diff options
Diffstat (limited to 'src/nbc/count.sml')
-rw-r--r-- | src/nbc/count.sml | 290 |
1 files changed, 0 insertions, 290 deletions
diff --git a/src/nbc/count.sml b/src/nbc/count.sml deleted file mode 100644 index d036050..0000000 --- a/src/nbc/count.sml +++ /dev/null @@ -1,290 +0,0 @@ -datatype sides = Single | Double -datatype labeled = Labeled | Unlabeled -local - (* - val perWord = ref NONE - val total = ref NONE - *) - val order = ref (SOME 15) - val sides = ref Double - val labeled = ref Labeled - val optionsWithoutHelp = [ - (* { - short = "w", long = ["per-word"] - , desc = GetOpt.ReqArg ( - fn file => perWord := SOME file - , "file" - ), help = "file to store per-word counts in" - }, { - short = "t", long = ["total"] - , desc = GetOpt.ReqArg ( - fn file => total := SOME file - , "file" - ), help = "file to store total count in" - }, *) { - short = "r", long = ["order"] - , desc = GetOpt.ReqArg ( - fn size => order := Int.fromString size - , "size" - ), help = "word size" - }, { - short = "1", long = ["single"] - , desc = GetOpt.NoArg (fn () => sides := Single) - , help = "only count one side" - }, { - short = "u", long = ["unlabeled"] - , desc = GetOpt.NoArg (fn () => labeled := Unlabeled) - , help = "emit counts for every possible nmer, without labels" - } - ] - fun usageString () = GetOpt.usageInfo { - header = CommandLine.name () ^ " <options> <input FASTA file> ..." - , options = optionsWithoutHelp - } ^ "\n" - datatype status = Success | Failure - fun displayHelpAndExit status = ( - TextIO.output ( - TextIO.stdErr - , usageString () - ); OS.Process.exit (case status of - Success => OS.Process.success - | Failure => OS.Process.failure - ) - ) - val options = { - short = "h", long = ["help"] - , desc = GetOpt.NoArg (fn () => displayHelpAndExit Success) - , help = "display help" - } :: optionsWithoutHelp -in - val (_, files) = GetOpt.getOpt { - argOrder = GetOpt.Permute - , options = options - , errFn = fn errorMessage => ( - TextIO.output (TextIO.stdErr, errorMessage ^ "\n") - ; displayHelpAndExit Failure - ) - } (CommandLine.arguments ()) - (* - val perWordFileName = case !perWord of - NONE => ( - TextIO.output ( - stdErr - , "per-word file name required but not provided\n" - ); displayHelpAndExit Failure - ) | SOME fileName => fileName - val totalFileName = case !total of - NONE => ( - TextIO.output ( - stdErr - , "total file name required but not provided\n" - ); displayHelpAndExit Failure - ) | SOME fileName => fileName - *) - val order = case !order of - NONE => ( - TextIO.output ( - TextIO.stdErr - , "invalid order\n" - ); displayHelpAndExit Failure - ) | SOME integer => integer - val sides = !sides - val labeled = !labeled -end - -signature COLLECTION = sig - type collection - type nmer - val empty: unit -> collection - val add: collection * nmer -> unit - val get: collection * nmer -> int - val app: (nmer * int -> unit) -> collection -> unit -end - -functor Collection (Nmer: NMER) -:> COLLECTION where type nmer = Nmer.nmer = struct - type nmer = Nmer.nmer - structure Table = HashTableFn ( - type hash_key = nmer - val hashVal = Nmer.hash - val sameKey = Nmer.equal - ) - type collection = int ref Table.hash_table - exception NotFound - fun empty () = Table.mkTable (256 * 1024, NotFound) - fun add (table, nmer) = case Table.find table nmer of - NONE => Table.insert table (nmer, ref 1) - | SOME count => count := !count + 1 - fun get (table, nmer) = case Table.find table nmer of - NONE => 0 - | SOME (ref count) => count - fun app execute table = Table.appi (fn (nmer, ref count) => - execute (nmer, count) - ) table -end - -datatype result = Success | Failure - -signature OUTPUT = sig - type collection - val output: collection -> unit -end - -functor Unlabeled ( - structure Nmer: NMER - structure Collection: COLLECTION - sharing type Collection.nmer = Nmer.nmer -) :> OUTPUT - where type collection = Collection.collection -= struct - type collection = Collection.collection - fun put string = TextIO.output (TextIO.stdOut, string) - fun single count = ( - put (Int.toString count) - ; put "\n" - ) - fun output collection = - let - fun continue nmer = ( - single (Collection.get (collection, nmer)) - ; - if nmer = Nmer.maximum then () - else continue (Nmer.next nmer) - ) - in - continue (Nmer.minimum) - end -end - -functor Labeled ( - structure Nmer: NMER - structure Collection: COLLECTION - sharing type Collection.nmer = Nmer.nmer -) :> OUTPUT - where type collection = Collection.collection -= struct - type collection = Collection.collection - fun put string = TextIO.output (TextIO.stdOut, string) - fun single (nmer, count) = ( - put (Nmer.toString nmer) - ; put " " - ; put (Int.toString count) - ; put "\n" - ) - fun output collection = Collection.app single collection -end - -functor File ( - structure Collection: COLLECTION - structure Output: OUTPUT - sharing type Collection.collection = Output.collection -) :> FILE - where type nmer = Collection.nmer - where type result = result - where type argument = unit -= struct - type argument = unit - type file = Collection.collection - type read = unit - type nmer = Collection.nmer - type result = result - fun startFile _ = Collection.empty () - fun startRead _ = () - fun nmer (counts, (), nmer) = Collection.add (counts, nmer) - fun stopRead (_, ()) = () - fun stopFile counts = ( - Output.output counts - ; Success - ) - fun invalidFormat file = Failure -end - -functor Everything (Nmer: NMER) = struct - structure Collection = Collection (Nmer) - structure Unlabeled = File ( - structure Collection = Collection - structure Output = Unlabeled ( - structure Nmer = Nmer - structure Collection = Collection - ) - ) - structure Labeled = File ( - structure Collection = Collection - structure Output = Labeled ( - structure Nmer = Nmer - structure Collection = Collection - ) - ) - structure SingleSidedUnlabeled = SingleSidedFasta ( - structure Nmer = Nmer - structure File = Unlabeled - ) - structure DoubleSidedUnlabeled = DoubleSidedFasta ( - structure Nmer = Nmer - structure File = Unlabeled - ) - structure SingleSidedLabeled = SingleSidedFasta ( - structure Nmer = Nmer - structure File = Labeled - ) - structure DoubleSidedLabeled = DoubleSidedFasta ( - structure Nmer = Nmer - structure File = Labeled - ) -end - -structure Everything32 = Everything ( - Nmer ( - val order = order - structure Word = Word32 - ) -) -structure Everything64 = Everything ( - Nmer ( - val order = order - structure Word = Word64 - ) -) - -val process = - if order <= 32 then (case sides of - Single => (case labeled of - Unlabeled => Everything32.SingleSidedUnlabeled.process - | Labeled => Everything32.SingleSidedLabeled.process - ) | Double => (case labeled of - Unlabeled => Everything32.DoubleSidedUnlabeled.process - | Labeled => Everything32.DoubleSidedLabeled.process - ) - ) else (case sides of - Single => (case labeled of - Unlabeled => Everything64.SingleSidedUnlabeled.process - | Labeled => Everything64.SingleSidedLabeled.process - ) | Double => (case labeled of - Unlabeled => Everything64.DoubleSidedUnlabeled.process - | Labeled => Everything64.DoubleSidedLabeled.process - ) - ) - -val () = - let - fun one name = - let - val instream = TextIO.openIn name - val result = process ((), instream) - in - TextIO.closeIn instream - ; case result of - Success => true - | Failure => ( - TextIO.output ( - TextIO.stdErr - , name - ^ ": invalid format\n" - ); false - ) - end - fun all names = List.all one names - in - if all files then () - else OS.Process.exit OS.Process.failure - end |