From b632667ce57af89691407bb8668e1512775278ae Mon Sep 17 00:00:00 2001 From: Calvin Date: Fri, 15 Mar 2013 15:26:20 -0400 Subject: nbc added --- src/nbc/count.sml | 290 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 290 insertions(+) create mode 100644 src/nbc/count.sml (limited to 'src/nbc/count.sml') diff --git a/src/nbc/count.sml b/src/nbc/count.sml new file mode 100644 index 0000000..d036050 --- /dev/null +++ b/src/nbc/count.sml @@ -0,0 +1,290 @@ +datatype sides = Single | Double +datatype labeled = Labeled | Unlabeled +local + (* + val perWord = ref NONE + val total = ref NONE + *) + val order = ref (SOME 15) + val sides = ref Double + val labeled = ref Labeled + val optionsWithoutHelp = [ + (* { + short = "w", long = ["per-word"] + , desc = GetOpt.ReqArg ( + fn file => perWord := SOME file + , "file" + ), help = "file to store per-word counts in" + }, { + short = "t", long = ["total"] + , desc = GetOpt.ReqArg ( + fn file => total := SOME file + , "file" + ), help = "file to store total count in" + }, *) { + short = "r", long = ["order"] + , desc = GetOpt.ReqArg ( + fn size => order := Int.fromString size + , "size" + ), help = "word size" + }, { + short = "1", long = ["single"] + , desc = GetOpt.NoArg (fn () => sides := Single) + , help = "only count one side" + }, { + short = "u", long = ["unlabeled"] + , desc = GetOpt.NoArg (fn () => labeled := Unlabeled) + , help = "emit counts for every possible nmer, without labels" + } + ] + fun usageString () = GetOpt.usageInfo { + header = CommandLine.name () ^ " ..." + , options = optionsWithoutHelp + } ^ "\n" + datatype status = Success | Failure + fun displayHelpAndExit status = ( + TextIO.output ( + TextIO.stdErr + , usageString () + ); OS.Process.exit (case status of + Success => OS.Process.success + | Failure => OS.Process.failure + ) + ) + val options = { + short = "h", long = ["help"] + , desc = GetOpt.NoArg (fn () => displayHelpAndExit Success) + , help = "display help" + } :: optionsWithoutHelp +in + val (_, files) = GetOpt.getOpt { + argOrder = GetOpt.Permute + , options = options + , errFn = fn errorMessage => ( + TextIO.output (TextIO.stdErr, errorMessage ^ "\n") + ; displayHelpAndExit Failure + ) + } (CommandLine.arguments ()) + (* + val perWordFileName = case !perWord of + NONE => ( + TextIO.output ( + stdErr + , "per-word file name required but not provided\n" + ); displayHelpAndExit Failure + ) | SOME fileName => fileName + val totalFileName = case !total of + NONE => ( + TextIO.output ( + stdErr + , "total file name required but not provided\n" + ); displayHelpAndExit Failure + ) | SOME fileName => fileName + *) + val order = case !order of + NONE => ( + TextIO.output ( + TextIO.stdErr + , "invalid order\n" + ); displayHelpAndExit Failure + ) | SOME integer => integer + val sides = !sides + val labeled = !labeled +end + +signature COLLECTION = sig + type collection + type nmer + val empty: unit -> collection + val add: collection * nmer -> unit + val get: collection * nmer -> int + val app: (nmer * int -> unit) -> collection -> unit +end + +functor Collection (Nmer: NMER) +:> COLLECTION where type nmer = Nmer.nmer = struct + type nmer = Nmer.nmer + structure Table = HashTableFn ( + type hash_key = nmer + val hashVal = Nmer.hash + val sameKey = Nmer.equal + ) + type collection = int ref Table.hash_table + exception NotFound + fun empty () = Table.mkTable (256 * 1024, NotFound) + fun add (table, nmer) = case Table.find table nmer of + NONE => Table.insert table (nmer, ref 1) + | SOME count => count := !count + 1 + fun get (table, nmer) = case Table.find table nmer of + NONE => 0 + | SOME (ref count) => count + fun app execute table = Table.appi (fn (nmer, ref count) => + execute (nmer, count) + ) table +end + +datatype result = Success | Failure + +signature OUTPUT = sig + type collection + val output: collection -> unit +end + +functor Unlabeled ( + structure Nmer: NMER + structure Collection: COLLECTION + sharing type Collection.nmer = Nmer.nmer +) :> OUTPUT + where type collection = Collection.collection += struct + type collection = Collection.collection + fun put string = TextIO.output (TextIO.stdOut, string) + fun single count = ( + put (Int.toString count) + ; put "\n" + ) + fun output collection = + let + fun continue nmer = ( + single (Collection.get (collection, nmer)) + ; + if nmer = Nmer.maximum then () + else continue (Nmer.next nmer) + ) + in + continue (Nmer.minimum) + end +end + +functor Labeled ( + structure Nmer: NMER + structure Collection: COLLECTION + sharing type Collection.nmer = Nmer.nmer +) :> OUTPUT + where type collection = Collection.collection += struct + type collection = Collection.collection + fun put string = TextIO.output (TextIO.stdOut, string) + fun single (nmer, count) = ( + put (Nmer.toString nmer) + ; put " " + ; put (Int.toString count) + ; put "\n" + ) + fun output collection = Collection.app single collection +end + +functor File ( + structure Collection: COLLECTION + structure Output: OUTPUT + sharing type Collection.collection = Output.collection +) :> FILE + where type nmer = Collection.nmer + where type result = result + where type argument = unit += struct + type argument = unit + type file = Collection.collection + type read = unit + type nmer = Collection.nmer + type result = result + fun startFile _ = Collection.empty () + fun startRead _ = () + fun nmer (counts, (), nmer) = Collection.add (counts, nmer) + fun stopRead (_, ()) = () + fun stopFile counts = ( + Output.output counts + ; Success + ) + fun invalidFormat file = Failure +end + +functor Everything (Nmer: NMER) = struct + structure Collection = Collection (Nmer) + structure Unlabeled = File ( + structure Collection = Collection + structure Output = Unlabeled ( + structure Nmer = Nmer + structure Collection = Collection + ) + ) + structure Labeled = File ( + structure Collection = Collection + structure Output = Labeled ( + structure Nmer = Nmer + structure Collection = Collection + ) + ) + structure SingleSidedUnlabeled = SingleSidedFasta ( + structure Nmer = Nmer + structure File = Unlabeled + ) + structure DoubleSidedUnlabeled = DoubleSidedFasta ( + structure Nmer = Nmer + structure File = Unlabeled + ) + structure SingleSidedLabeled = SingleSidedFasta ( + structure Nmer = Nmer + structure File = Labeled + ) + structure DoubleSidedLabeled = DoubleSidedFasta ( + structure Nmer = Nmer + structure File = Labeled + ) +end + +structure Everything32 = Everything ( + Nmer ( + val order = order + structure Word = Word32 + ) +) +structure Everything64 = Everything ( + Nmer ( + val order = order + structure Word = Word64 + ) +) + +val process = + if order <= 32 then (case sides of + Single => (case labeled of + Unlabeled => Everything32.SingleSidedUnlabeled.process + | Labeled => Everything32.SingleSidedLabeled.process + ) | Double => (case labeled of + Unlabeled => Everything32.DoubleSidedUnlabeled.process + | Labeled => Everything32.DoubleSidedLabeled.process + ) + ) else (case sides of + Single => (case labeled of + Unlabeled => Everything64.SingleSidedUnlabeled.process + | Labeled => Everything64.SingleSidedLabeled.process + ) | Double => (case labeled of + Unlabeled => Everything64.DoubleSidedUnlabeled.process + | Labeled => Everything64.DoubleSidedLabeled.process + ) + ) + +val () = + let + fun one name = + let + val instream = TextIO.openIn name + val result = process ((), instream) + in + TextIO.closeIn instream + ; case result of + Success => true + | Failure => ( + TextIO.output ( + TextIO.stdErr + , name + ^ ": invalid format\n" + ); false + ) + end + fun all names = List.all one names + in + if all files then () + else OS.Process.exit OS.Process.failure + end -- cgit v1.2.3