diff options
-rw-r--r-- | src/nbc/LICENSE | 16 | ||||
-rw-r--r-- | src/nbc/Makefile | 16 | ||||
-rw-r--r-- | src/nbc/README | 3 | ||||
-rw-r--r-- | src/nbc/count.mlb | 10 | ||||
-rw-r--r-- | src/nbc/count.sml | 290 | ||||
-rw-r--r-- | src/nbc/fasta-all.mlb | 8 | ||||
-rw-r--r-- | src/nbc/fasta.mlb | 8 | ||||
-rw-r--r-- | src/nbc/fasta.sml | 326 | ||||
-rw-r--r-- | src/nbc/nmer-all.mlb | 6 | ||||
-rw-r--r-- | src/nbc/nmer.mlb | 7 | ||||
-rw-r--r-- | src/nbc/nmer.sml | 557 | ||||
-rw-r--r-- | src/nbc/parse-state.mlb | 5 | ||||
-rw-r--r-- | src/nbc/parse-state.sml | 89 | ||||
-rw-r--r-- | src/nbc/probabilities-by-read.mlb | 8 | ||||
-rw-r--r-- | src/nbc/probabilities-by-read.sml | 210 | ||||
-rw-r--r-- | src/nbc/promise.mlb | 5 | ||||
-rw-r--r-- | src/nbc/promise.sml | 24 | ||||
-rw-r--r-- | src/nbc/stream.mlb | 6 | ||||
-rw-r--r-- | src/nbc/stream.sml | 243 | ||||
-rw-r--r-- | src/nbc/test-library.mlb | 5 | ||||
-rw-r--r-- | src/nbc/test-library.sml | 50 | ||||
-rw-r--r-- | src/nbc/tree.mlb | 6 | ||||
-rw-r--r-- | src/nbc/tree.sml | 225 |
23 files changed, 0 insertions, 2123 deletions
diff --git a/src/nbc/LICENSE b/src/nbc/LICENSE deleted file mode 100644 index 3baecf8..0000000 --- a/src/nbc/LICENSE +++ /dev/null @@ -1,16 +0,0 @@ -GPL: - -Copyright 2008, 2009, 2010 Drexel University - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see http://www.gnu.org/licenses/. diff --git a/src/nbc/Makefile b/src/nbc/Makefile deleted file mode 100644 index 4f9c324..0000000 --- a/src/nbc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -MLTON = mlton -MLYACC = mlyacc -MLLEX = mllex -%.grm.sig %.grm.sml: %.grm - $(MLYACC) $^ -%.lex.sml: %.lex - $(MLLEX) $^ -%: %.mlb - $(MLTON) $(MLTONFLAGS) -output $@ $^ -all: count probabilities-by-read -count: count.mlb -probabilities-by-read: probabilities-by-read.mlb -score: score.mlb -tabulate: tabulate.mlb -clean: - rm -f count probabilities-by-read diff --git a/src/nbc/README b/src/nbc/README deleted file mode 100644 index c4d7853..0000000 --- a/src/nbc/README +++ /dev/null @@ -1,3 +0,0 @@ -This folder contains code to count kmers, from the Naive Bayesian Classifier: - -http://nbc.ece.drexel.edu/ diff --git a/src/nbc/count.mlb b/src/nbc/count.mlb deleted file mode 100644 index e0eb0e6..0000000 --- a/src/nbc/count.mlb +++ /dev/null @@ -1,10 +0,0 @@ -local - $(SML_LIB)/basis/basis.mlb - $(SML_LIB)/smlnj-lib/Util/smlnj-lib.mlb - stream.mlb - tree.mlb - nmer.mlb - fasta.mlb -in - count.sml -end diff --git a/src/nbc/count.sml b/src/nbc/count.sml deleted file mode 100644 index d036050..0000000 --- a/src/nbc/count.sml +++ /dev/null @@ -1,290 +0,0 @@ -datatype sides = Single | Double -datatype labeled = Labeled | Unlabeled -local - (* - val perWord = ref NONE - val total = ref NONE - *) - val order = ref (SOME 15) - val sides = ref Double - val labeled = ref Labeled - val optionsWithoutHelp = [ - (* { - short = "w", long = ["per-word"] - , desc = GetOpt.ReqArg ( - fn file => perWord := SOME file - , "file" - ), help = "file to store per-word counts in" - }, { - short = "t", long = ["total"] - , desc = GetOpt.ReqArg ( - fn file => total := SOME file - , "file" - ), help = "file to store total count in" - }, *) { - short = "r", long = ["order"] - , desc = GetOpt.ReqArg ( - fn size => order := Int.fromString size - , "size" - ), help = "word size" - }, { - short = "1", long = ["single"] - , desc = GetOpt.NoArg (fn () => sides := Single) - , help = "only count one side" - }, { - short = "u", long = ["unlabeled"] - , desc = GetOpt.NoArg (fn () => labeled := Unlabeled) - , help = "emit counts for every possible nmer, without labels" - } - ] - fun usageString () = GetOpt.usageInfo { - header = CommandLine.name () ^ " <options> <input FASTA file> ..." - , options = optionsWithoutHelp - } ^ "\n" - datatype status = Success | Failure - fun displayHelpAndExit status = ( - TextIO.output ( - TextIO.stdErr - , usageString () - ); OS.Process.exit (case status of - Success => OS.Process.success - | Failure => OS.Process.failure - ) - ) - val options = { - short = "h", long = ["help"] - , desc = GetOpt.NoArg (fn () => displayHelpAndExit Success) - , help = "display help" - } :: optionsWithoutHelp -in - val (_, files) = GetOpt.getOpt { - argOrder = GetOpt.Permute - , options = options - , errFn = fn errorMessage => ( - TextIO.output (TextIO.stdErr, errorMessage ^ "\n") - ; displayHelpAndExit Failure - ) - } (CommandLine.arguments ()) - (* - val perWordFileName = case !perWord of - NONE => ( - TextIO.output ( - stdErr - , "per-word file name required but not provided\n" - ); displayHelpAndExit Failure - ) | SOME fileName => fileName - val totalFileName = case !total of - NONE => ( - TextIO.output ( - stdErr - , "total file name required but not provided\n" - ); displayHelpAndExit Failure - ) | SOME fileName => fileName - *) - val order = case !order of - NONE => ( - TextIO.output ( - TextIO.stdErr - , "invalid order\n" - ); displayHelpAndExit Failure - ) | SOME integer => integer - val sides = !sides - val labeled = !labeled -end - -signature COLLECTION = sig - type collection - type nmer - val empty: unit -> collection - val add: collection * nmer -> unit - val get: collection * nmer -> int - val app: (nmer * int -> unit) -> collection -> unit -end - -functor Collection (Nmer: NMER) -:> COLLECTION where type nmer = Nmer.nmer = struct - type nmer = Nmer.nmer - structure Table = HashTableFn ( - type hash_key = nmer - val hashVal = Nmer.hash - val sameKey = Nmer.equal - ) - type collection = int ref Table.hash_table - exception NotFound - fun empty () = Table.mkTable (256 * 1024, NotFound) - fun add (table, nmer) = case Table.find table nmer of - NONE => Table.insert table (nmer, ref 1) - | SOME count => count := !count + 1 - fun get (table, nmer) = case Table.find table nmer of - NONE => 0 - | SOME (ref count) => count - fun app execute table = Table.appi (fn (nmer, ref count) => - execute (nmer, count) - ) table -end - -datatype result = Success | Failure - -signature OUTPUT = sig - type collection - val output: collection -> unit -end - -functor Unlabeled ( - structure Nmer: NMER - structure Collection: COLLECTION - sharing type Collection.nmer = Nmer.nmer -) :> OUTPUT - where type collection = Collection.collection -= struct - type collection = Collection.collection - fun put string = TextIO.output (TextIO.stdOut, string) - fun single count = ( - put (Int.toString count) - ; put "\n" - ) - fun output collection = - let - fun continue nmer = ( - single (Collection.get (collection, nmer)) - ; - if nmer = Nmer.maximum then () - else continue (Nmer.next nmer) - ) - in - continue (Nmer.minimum) - end -end - -functor Labeled ( - structure Nmer: NMER - structure Collection: COLLECTION - sharing type Collection.nmer = Nmer.nmer -) :> OUTPUT - where type collection = Collection.collection -= struct - type collection = Collection.collection - fun put string = TextIO.output (TextIO.stdOut, string) - fun single (nmer, count) = ( - put (Nmer.toString nmer) - ; put " " - ; put (Int.toString count) - ; put "\n" - ) - fun output collection = Collection.app single collection -end - -functor File ( - structure Collection: COLLECTION - structure Output: OUTPUT - sharing type Collection.collection = Output.collection -) :> FILE - where type nmer = Collection.nmer - where type result = result - where type argument = unit -= struct - type argument = unit - type file = Collection.collection - type read = unit - type nmer = Collection.nmer - type result = result - fun startFile _ = Collection.empty () - fun startRead _ = () - fun nmer (counts, (), nmer) = Collection.add (counts, nmer) - fun stopRead (_, ()) = () - fun stopFile counts = ( - Output.output counts - ; Success - ) - fun invalidFormat file = Failure -end - -functor Everything (Nmer: NMER) = struct - structure Collection = Collection (Nmer) - structure Unlabeled = File ( - structure Collection = Collection - structure Output = Unlabeled ( - structure Nmer = Nmer - structure Collection = Collection - ) - ) - structure Labeled = File ( - structure Collection = Collection - structure Output = Labeled ( - structure Nmer = Nmer - structure Collection = Collection - ) - ) - structure SingleSidedUnlabeled = SingleSidedFasta ( - structure Nmer = Nmer - structure File = Unlabeled - ) - structure DoubleSidedUnlabeled = DoubleSidedFasta ( - structure Nmer = Nmer - structure File = Unlabeled - ) - structure SingleSidedLabeled = SingleSidedFasta ( - structure Nmer = Nmer - structure File = Labeled - ) - structure DoubleSidedLabeled = DoubleSidedFasta ( - structure Nmer = Nmer - structure File = Labeled - ) -end - -structure Everything32 = Everything ( - Nmer ( - val order = order - structure Word = Word32 - ) -) -structure Everything64 = Everything ( - Nmer ( - val order = order - structure Word = Word64 - ) -) - -val process = - if order <= 32 then (case sides of - Single => (case labeled of - Unlabeled => Everything32.SingleSidedUnlabeled.process - | Labeled => Everything32.SingleSidedLabeled.process - ) | Double => (case labeled of - Unlabeled => Everything32.DoubleSidedUnlabeled.process - | Labeled => Everything32.DoubleSidedLabeled.process - ) - ) else (case sides of - Single => (case labeled of - Unlabeled => Everything64.SingleSidedUnlabeled.process - | Labeled => Everything64.SingleSidedLabeled.process - ) | Double => (case labeled of - Unlabeled => Everything64.DoubleSidedUnlabeled.process - | Labeled => Everything64.DoubleSidedLabeled.process - ) - ) - -val () = - let - fun one name = - let - val instream = TextIO.openIn name - val result = process ((), instream) - in - TextIO.closeIn instream - ; case result of - Success => true - | Failure => ( - TextIO.output ( - TextIO.stdErr - , name - ^ ": invalid format\n" - ); false - ) - end - fun all names = List.all one names - in - if all files then () - else OS.Process.exit OS.Process.failure - end diff --git a/src/nbc/fasta-all.mlb b/src/nbc/fasta-all.mlb deleted file mode 100644 index 7bf2790..0000000 --- a/src/nbc/fasta-all.mlb +++ /dev/null @@ -1,8 +0,0 @@ -local - $(SML_LIB)/basis/basis.mlb - nmer.mlb - parse-state.mlb - test-library.mlb -in - fasta.sml -end diff --git a/src/nbc/fasta.mlb b/src/nbc/fasta.mlb deleted file mode 100644 index a308ac5..0000000 --- a/src/nbc/fasta.mlb +++ /dev/null @@ -1,8 +0,0 @@ -local - fasta-all.mlb -in - signature FILE - signature FASTA - functor SingleSidedFasta - functor DoubleSidedFasta -end diff --git a/src/nbc/fasta.sml b/src/nbc/fasta.sml deleted file mode 100644 index 8089025..0000000 --- a/src/nbc/fasta.sml +++ /dev/null @@ -1,326 +0,0 @@ -signature FILE = sig - type argument - type file - type read - type nmer - type result - val startFile: argument -> file - val startRead: file * string -> read - val nmer: file * read * nmer -> unit - val stopRead: file * read -> unit - val stopFile: file -> result - val invalidFormat: file -> result -end - -signature FASTA = sig - type argument - type result - val process: argument * TextIO.instream -> result -end - -functor AgnosticFasta ( - structure Nmer: NMER - structure File: FILE - sharing type Nmer.nmer = File.nmer - structure Sides: sig - include NMER_SIDES - type file - type read - val process: file * read * sides -> unit - end - sharing type Nmer.base = Sides.sidesBase - sharing type Nmer.nmer = Sides.sidesNmer - sharing type File.read = Sides.read - sharing type File.file = Sides.file -) :> FASTA - where type argument = File.argument - where type result = File.result -= struct - type argument = File.argument - type result = File.result - - val beforeHeaderBeginningOfLine = ParseState.create () - val beforeHeaderMiddleOfLine = ParseState.create () - val afterHeaderBeginningOfLine = ParseState.create () - val afterHeaderMiddleOfLine = ParseState.create () - - fun inputLineButDiscardNewline instream = - Option.map (fn line => - String.extract (line, 0, SOME (size line - 1)) - ) (TextIO.inputLine instream) - datatype z = datatype ParseState.whichCharacters (* This | Any *) - - local - fun header (instream, (file, sides)) = - case inputLineButDiscardNewline instream of - NONE => File.invalidFormat file - | SOME header => ParseState.enter ( - afterHeaderBeginningOfLine - , instream - , ( - file - , File.startRead ( - file - , header - ), sides - ) - ) - fun space (instream, (file, sides)) = ParseState.enter ( - beforeHeaderMiddleOfLine - , instream - , (file, sides) - ) - fun newline (instream, (file, sides)) = ParseState.enter ( - beforeHeaderBeginningOfLine - , instream - , (file, sides) - ) - fun invalidFormat (_, (file, _)) = File.invalidFormat file - in - val () = ParseState.build { - state = beforeHeaderBeginningOfLine - , characters = [ - (These [#">"], header) - , (These [#"\n"], newline) - , (These [#" ", #"\t", #"\r"], space) - , (Any, invalidFormat) - ], endOfFile = invalidFormat - } - val () = ParseState.build { - state = beforeHeaderMiddleOfLine - , characters = [ - (These [#"\n"], newline) - , (These [#" ", #"\t", #"\r"], space) - , (Any, invalidFormat) - ], endOfFile = invalidFormat - } - end - local - fun base base (instream, (file, read, sides)) = ( - Sides.put (sides, base) - ; - if Sides.isFull sides then - Sides.process (file, read, sides) - else () - ; ParseState.enter ( - afterHeaderMiddleOfLine - , instream - , (file, read, sides) - ) - ) - fun space (instream, (file, read, sides)) = ( - ParseState.enter ( - afterHeaderMiddleOfLine - , instream - , (file, read, sides) - ) - ) - fun other (instream, (file, read, sides)) = ( - Sides.clear sides - ; ParseState.enter ( - afterHeaderMiddleOfLine - , instream - , (file, read, sides) - ) - ) - fun newline (instream, (file, read, sides)) = - ParseState.enter ( - afterHeaderBeginningOfLine - , instream - , (file, read, sides) - ) - fun header (instream, (file, read, sides)) = ( - File.stopRead (file, read) - ; Sides.clear sides - ; case inputLineButDiscardNewline instream of - NONE => File.invalidFormat file - | SOME header => ParseState.enter ( - afterHeaderBeginningOfLine - , instream - , ( - file - , File.startRead ( - file - , header - ), sides - ) - ) - ) - fun success (_, (file, read, _)) = ( - File.stopRead (file, read) - ; File.stopFile file - ) - in - val () = ParseState.build { - state = afterHeaderBeginningOfLine - , characters = [ - (These [#"A", #"a"], base Nmer.a) - , (These [#"C", #"c"], base Nmer.c) - , (These [#"G", #"g"], base Nmer.g) - , (These [#"T", #"t"], base Nmer.t) - , (These [#">"], header) - , (These [#"\n"], newline) - , (These [#" ", #"\t", #"\r"], space) - , (Any, other) - ], endOfFile = success - } - val () = ParseState.build { - state = afterHeaderMiddleOfLine - , characters = [ - (These [#"A", #"a"], base Nmer.a) - , (These [#"C", #"c"], base Nmer.c) - , (These [#"G", #"g"], base Nmer.g) - , (These [#"T", #"t"], base Nmer.t) - , (These [#" ", #"\t", #"\r"], space) - , (These [#"\n"], newline) - , (Any, other) - ], endOfFile = success - } - end - fun process (argument, instream) = ParseState.enter ( - beforeHeaderBeginningOfLine - , instream - , (File.startFile argument, Sides.create ()) - ) -end - -functor SingleSidedFasta ( - structure Nmer: NMER - structure File: FILE - sharing type Nmer.nmer = File.nmer -) = AgnosticFasta ( - structure Nmer = Nmer - structure File = File - structure Sides = struct - type read = File.read - type file = File.file - open Nmer.Single - fun process (file, read, sides) = - File.nmer (file, read, forward sides) - end -) - -functor DoubleSidedFasta ( - structure Nmer: NMER - structure File: FILE - sharing type Nmer.nmer = File.nmer -) = AgnosticFasta ( - structure Nmer = Nmer - structure File = File - structure Sides = struct - type read = File.read - type file = File.file - open Nmer.Double - fun process (file, read, sides) = ( - File.nmer (file, read, forward sides) - ; File.nmer (file, read, reverse sides) - ) - end -) - -functor TestFile (Nmer: NMER) = struct - type argument = unit - type nmer = Nmer.nmer - type read = {header: string, nmers: nmer list ref} - type file = {header: string, nmers: string list} list ref - type result = string - fun startFile () = ref nil - fun stopFile file = String.concatWith ";" ( - map (fn {header, nmers} => - header - ^ ":" - ^ String.concatWith "," (rev nmers) - ) (rev (!file)) - ) - fun startRead (_, header) = - {header = header, nmers = ref nil} - fun nmer (_, {header = _, nmers}, nmer) = - nmers := nmer :: !nmers - fun stopRead (file, {header, nmers = ref nmers}) = - file := { - header = header - , nmers = map Nmer.toString nmers - } :: !file - fun invalidFormat _ = "invalid format" -end - -functor Test () = struct - structure Nmer1 = Nmer ( - val order = 1 - structure Word = Word32 - ) - structure File1 = TestFile (Nmer1) - structure SingleFasta1 = SingleSidedFasta ( - structure Nmer = Nmer1 - structure File = File1 - ) - fun test process input () = process ((), TextIO.openString input) - val single1 = test SingleFasta1.process - structure Nmer2 = Nmer ( - val order = 2 - structure Word = Word32 - ) - structure File2 = TestFile (Nmer2) - structure SingleFasta2 = SingleSidedFasta ( - structure Nmer = Nmer2 - structure File = File2 - ) - val single2 = test SingleFasta2.process - structure DoubleFasta1 = DoubleSidedFasta ( - structure Nmer = Nmer1 - structure File = File1 - ) - val double1 = test DoubleFasta1.process - structure DoubleFasta2 = DoubleSidedFasta ( - structure Nmer = Nmer2 - structure File = File2 - ) - val double2 = test DoubleFasta2.process - val () = Test.list [ - { - description = "single 1: A" - , function = single1 ">foo\nA\n" - , expectedResult = "foo:A" - }, { - description = "single 1: AG" - , function = single1 ">foo\nAG\n" - , expectedResult = "foo:A,G" - }, { - description = "single 2: A" - , function = single2 ">foo\nA\n" - , expectedResult = "foo:" - }, { - description = "single 2: CTGAG" - , function = single2 ">foo\nCTGAG\n" - , expectedResult = "foo:CT,TG,GA,AG" - }, { - description = "double 1: C" - , function = double1 ">bar\nC\n" - , expectedResult = "bar:C,G" - }, { - description = "double 2: T" - , function = double2 ">baz\nT\n" - , expectedResult = "baz:" - }, { - description = "double 2: GC" - , function = double2 ">quux\nGC\n" - , expectedResult = "quux:GC,GC" - }, { - description = "double 2: CCC\\nC\\nCT" - , function = double2 ">goo\nCCC\nC\nCT\n" - , expectedResult = "goo:CC,GG,CC,GG,CC,GG,CC,GG,CT,AG" - }, { - description = "double 2: CC\\nC*\\nT" - , function = double2 ">goo\nCC\nC*\nT\n" - , expectedResult = "goo:CC,GG,CC,GG" - }, { - description = "double 2: foo CATGAC goo TACCAG" - , function = double2 - ">foo\nCATGAC\n>goo\nTACCAG\n" - , expectedResult = ( - "foo:CA,TG,AT,AT,TG,CA,GA,TC,AC,GT" - ^ ";goo:TA,TA,AC,GT,CC,GG,CA,TG,AG,CT" - ) - } - ] -end diff --git a/src/nbc/nmer-all.mlb b/src/nbc/nmer-all.mlb deleted file mode 100644 index dcc3126..0000000 --- a/src/nbc/nmer-all.mlb +++ /dev/null @@ -1,6 +0,0 @@ -local - $(SML_LIB)/basis/basis.mlb - test-library.mlb -in - nmer.sml -end diff --git a/src/nbc/nmer.mlb b/src/nbc/nmer.mlb deleted file mode 100644 index a7f4b01..0000000 --- a/src/nbc/nmer.mlb +++ /dev/null @@ -1,7 +0,0 @@ -local - nmer-all.mlb -in - signature NMER - signature NMER_SIDES - functor Nmer -end diff --git a/src/nbc/nmer.sml b/src/nbc/nmer.sml deleted file mode 100644 index 82c5b53..0000000 --- a/src/nbc/nmer.sml +++ /dev/null @@ -1,557 +0,0 @@ -signature NMER_SIDES = sig - type sidesBase - type sidesNmer - type sides - val create: unit -> sides - val clear: sides -> unit - val put: sides * sidesBase -> unit - val isFull: sides -> bool - val forward: sides -> sidesNmer -end - -signature NMER = sig - eqtype base - val a: base - val c: base - val g: base - val t: base - eqtype nmer - val compare: nmer * nmer -> order - val maximum: nmer - val minimum: nmer - val next: nmer -> nmer - val hash: nmer -> Word.word - val equal: nmer * nmer -> bool - val toString: nmer -> string - val fromString: string -> nmer option - structure Single: NMER_SIDES - where type sidesBase = base - where type sidesNmer = nmer - structure Double: sig - include NMER_SIDES - where type sidesBase = base - where type sidesNmer = nmer - val reverse: sides -> nmer - end -end - -signature NMER_ARGUMENTS = sig - val order: int - structure Word: sig - eqtype word - val fromInt: Int.int -> word - val toInt: word -> Int.int - val + : word * word -> word - val << : word * Word.word -> word - val ~>> : word * Word.word -> word - val andb: word * word -> word - val orb: word * word -> word - val xorb: word * word -> word - val compare: word * word -> order - val toLarge: word -> LargeWord.word - end -end - -functor Nmer (Arguments: NMER_ARGUMENTS) = struct - type base = Arguments.Word.word - val a = Arguments.Word.fromInt 0 - val c = Arguments.Word.fromInt 1 - val g = Arguments.Word.fromInt 2 - val t = Arguments.Word.fromInt 3 - val maximumBase = t - val baseBits = 0w2 - val nmerBits = Word.fromInt (Arguments.order * 2) - fun opposite base = - (* - Conveniently enough, xor properly implements this: - a -> t - c -> g - g -> c - t -> a - *) - Arguments.Word.xorb (base, maximumBase) - type nmer = Arguments.Word.word - val compare = Arguments.Word.compare - val minimum = Arguments.Word.fromInt 0 - local - fun shiftInto (nmer, base) = - Arguments.Word.+ ( - Arguments.Word.<< (nmer, baseBits) - , base - ) - fun maximumOfOrder order = - if order = 0 then minimum - else shiftInto ( - maximumOfOrder (order - 1) - , maximumBase - ) - in - val maximum = maximumOfOrder Arguments.order - end - local - val one = Arguments.Word.fromInt 1 - in - fun next nmer = Arguments.Word.+ (nmer, one) - end - local - fun charFromBase base = case Arguments.Word.toInt base of - 0 => #"A" - | 1 => #"C" - | 2 => #"G" - | 3 => #"T" - | _ => raise Fail "bug in nmer.sml" - fun get (nmer, index) = - let - fun multiplyByTwo word = Word.<< (word, 0w1) - val offset = multiplyByTwo ( - Word.fromInt ( - Arguments.order - 1 - index - ) - ) - in - Arguments.Word.~>> ( - Arguments.Word.andb ( - nmer - , Arguments.Word.<< ( - maximumBase - , offset - ) - ), offset - ) - end - in - fun toString nmer = CharVector.tabulate ( - Arguments.order - , fn index => charFromBase ( - get (nmer, index) - ) - ) - end - fun hash nmer = Word.fromLarge (Arguments.Word.toLarge nmer) - fun equal (a, b) = Arguments.Word.compare (a, b) = EQUAL - structure Undetermined = struct - type sidesBase = base - type sidesNmer = nmer - type 'reverse undeterminedSides = { - forward: nmer ref - , reverse: 'reverse - , count: int ref - } - fun clear {forward = _, reverse = _, count} = count := 0 - fun put ({forward, reverse, count}, base) = ( - forward := Arguments.Word.+ ( - Arguments.Word.andb ( - Arguments.Word.<< ( - !forward - , baseBits - ), maximum - ), base - ); - if !count = Arguments.order then () - else count := !count + 1 - ) - fun isFull {forward = _, reverse = _, count = ref count} = - count = Arguments.order - fun forward {forward = ref forward, reverse = _, count = _} = - forward - end - structure Single = struct - open Undetermined - type sides = unit undeterminedSides - fun create () = { - forward = ref minimum - , reverse = () - , count = ref 0 - } - end - structure Double = struct - open Undetermined - type sides = nmer ref undeterminedSides - fun create () = { - forward = ref minimum - , reverse = ref maximum - , count = ref 0 - } - val put = fn ( - sides as {forward = _, reverse, count = _} - , base - ) => ( - put (sides, base) - ; reverse := Arguments.Word.+ ( - Arguments.Word.~>> ( - !reverse - , baseBits - ), Arguments.Word.<< ( - opposite base - , nmerBits - baseBits - ) - ) - ) - fun reverse {reverse = ref reverse, forward = _, count = _} = - reverse - end - fun fromString string = - let - val side = Single.create () - val char = fn - #"A" => (Single.put (side, a); true) - | #"a" => (Single.put (side, a); true) - | #"C" => (Single.put (side, c); true) - | #"c" => (Single.put (side, c); true) - | #"G" => (Single.put (side, g); true) - | #"g" => (Single.put (side, g); true) - | #"T" => (Single.put (side, t); true) - | #"t" => (Single.put (side, t); true) - | _ => false - - in - if CharVector.all char string then - SOME (Single.forward side) - else NONE - end -end - -functor Test () = struct - structure Nmer1 = Nmer ( - val order = 1 - structure Word = Word32 - ) - structure Nmer2 = Nmer ( - val order = 2 - structure Word = Word32 - ) - structure Nmer3 = Nmer ( - val order = 3 - structure Word = Word32 - ) - val () = Test.list [ - { - description = "opposite a = t" - , function = fn () => - Bool.toString ( - Nmer1.opposite Nmer1.a = Nmer1.t - ) - , expectedResult = "true" - }, { - description = "opposite c = g" - , function = fn () => - Bool.toString ( - Nmer1.opposite Nmer1.c = Nmer1.g - ) - , expectedResult = "true" - }, { - description = "A forward" - , function = fn () => - let - val nmer = Nmer1.Double.create () - in - Nmer1.Double.put (nmer, Nmer1.a) - ; Nmer1.toString ( - Nmer1.Double.forward nmer - ) - end - , expectedResult = "A" - }, { - description = "A reverse" - , function = fn () => - let - val nmer = Nmer1.Double.create () - in - Nmer1.Double.put (nmer, Nmer1.a) - ; Nmer1.toString ( - Nmer1.Double.reverse nmer - ) - end - , expectedResult = "T" - }, { - description = "C forward" - , function = fn () => - let - val nmer = Nmer1.Double.create () - in - Nmer1.Double.put (nmer, Nmer1.c) - ; Nmer1.toString ( - Nmer1.Double.forward nmer - ) - end - , expectedResult = "C" - }, { - description = "C reverse" - , function = fn () => - let - val nmer = Nmer1.Double.create () - in - Nmer1.Double.put (nmer, Nmer1.c) - ; Nmer1.toString ( - Nmer1.Double.reverse nmer - ) - end - , expectedResult = "G" - }, { - description = "G forward" - , function = fn () => - let - val nmer = Nmer1.Double.create () - in - Nmer1.Double.put (nmer, Nmer1.g) - ; Nmer1.toString ( - Nmer1.Double.forward nmer - ) - end - , expectedResult = "G" - }, { - description = "G reverse" - , function = fn () => - let - val nmer = Nmer1.Double.create () - in - Nmer1.Double.put (nmer, Nmer1.g) - ; Nmer1.toString ( - Nmer1.Double.reverse nmer - ) - end - , expectedResult = "C" - }, { - description = "T forward" - , function = fn () => - let - val nmer = Nmer1.Double.create () - in - Nmer1.Double.put (nmer, Nmer1.t) - ; Nmer1.toString ( - Nmer1.Double.forward nmer - ) - end - , expectedResult = "T" - }, { - description = "T reverse" - , function = fn () => - let - val nmer = Nmer1.Double.create () - in - Nmer1.Double.put (nmer, Nmer1.t) - ; Nmer1.toString ( - Nmer1.Double.reverse nmer - ) - end - , expectedResult = "A" - }, { - description = "AA forward" - , function = fn () => - let - val nmer = Nmer2.Double.create () - in - Nmer2.Double.put (nmer, Nmer2.a) - ; Nmer2.Double.put (nmer, Nmer2.a) - ; Nmer2.toString ( - Nmer2.Double.forward nmer - ) - end - , expectedResult = "AA" - }, { - description = "AA reverse" - , function = fn () => - let - val nmer = Nmer2.Double.create () - in - Nmer2.Double.put (nmer, Nmer2.a) - ; Nmer2.Double.put (nmer, Nmer2.a) - ; Nmer2.toString ( - Nmer2.Double.reverse nmer - ) - end - , expectedResult = "TT" - }, { - description = "AC forward" - , function = fn () => - let - val nmer = Nmer2.Double.create () - in - Nmer2.Double.put (nmer, Nmer2.a) - ; Nmer2.Double.put (nmer, Nmer2.c) - ; Nmer2.toString ( - Nmer2.Double.forward nmer - ) - end - , expectedResult = "AC" - }, { - description = "AC reverse" - , function = fn () => - let - val nmer = Nmer2.Double.create () - in - Nmer2.Double.put (nmer, Nmer2.a) - ; Nmer2.Double.put (nmer, Nmer2.c) - ; Nmer2.toString ( - Nmer2.Double.reverse nmer - ) - end - , expectedResult = "GT" - }, { - description = "GTA forward" - , function = fn () => - let - val nmer = Nmer3.Double.create () - in - Nmer3.Double.put (nmer, Nmer3.g) - ; Nmer3.Double.put (nmer, Nmer3.t) - ; Nmer3.Double.put (nmer, Nmer3.a) - ; Nmer3.toString ( - Nmer3.Double.forward nmer - ) - end - , expectedResult = "GTA" - }, { - description = "GTA reverse" - , function = fn () => - let - val nmer = Nmer3.Double.create () - in - Nmer3.Double.put (nmer, Nmer3.g) - ; Nmer3.Double.put (nmer, Nmer3.t) - ; Nmer3.Double.put (nmer, Nmer3.a) - ; Nmer3.toString ( - Nmer3.Double.reverse nmer - ) - end - , expectedResult = "TAC" - }, { - description = "( ) isFull" - , function = fn () => - let - val nmer = Nmer1.Double.create () - in - Bool.toString ( - Nmer1.Double.isFull nmer - ) - end - , expectedResult = "false" - }, { - description = "(C) isFull" - , function = fn () => - let - val nmer = Nmer1.Double.create () - in - Nmer1.Double.put (nmer, Nmer1.g) - ; Bool.toString ( - Nmer1.Double.isFull nmer - ) - end - , expectedResult = "true" - }, { - description = "(C ) isFull" - , function = fn () => - let - val nmer = Nmer2.Double.create () - in - Nmer2.Double.put (nmer, Nmer2.c) - ; Bool.toString ( - Nmer2.Double.isFull nmer - ) - end - , expectedResult = "false" - }, { - description = "(CG) isFull" - , function = fn () => - let - val nmer = Nmer2.Double.create () - in - Nmer2.Double.put (nmer, Nmer2.c) - ; Nmer2.Double.put (nmer, Nmer2.g) - ; Bool.toString ( - Nmer2.Double.isFull nmer - ) - end - , expectedResult = "true" - }, { - description = "C(GA) isFull" - , function = fn () => - let - val nmer = Nmer2.Double.create () - in - Nmer2.Double.put (nmer, Nmer2.c) - ; Nmer2.Double.put (nmer, Nmer2.g) - ; Nmer2.Double.put (nmer, Nmer2.a) - ; Bool.toString ( - Nmer2.Double.isFull nmer - ) - end - , expectedResult = "true" - }, { - description = "CGA( ) isFull" - , function = fn () => - let - val nmer = Nmer2.Double.create () - in - Nmer2.Double.put (nmer, Nmer2.c) - ; Nmer2.Double.put (nmer, Nmer2.g) - ; Nmer2.Double.put (nmer, Nmer2.a) - ; Nmer2.Double.clear nmer - ; Bool.toString ( - Nmer2.Double.isFull nmer - ) - end - , expectedResult = "false" - }, { - description = "CGA (AC) isFull" - , function = fn () => - let - val nmer = Nmer2.Double.create () - in - Nmer2.Double.put (nmer, Nmer2.c) - ; Nmer2.Double.put (nmer, Nmer2.g) - ; Nmer2.Double.put (nmer, Nmer2.a) - ; Nmer2.Double.clear nmer - ; Nmer2.Double.put (nmer, Nmer2.a) - ; Nmer2.Double.put (nmer, Nmer2.c) - ; Bool.toString ( - Nmer2.Double.isFull nmer - ) - end - , expectedResult = "true" - }, { - description = "CGA (AC) forward" - , function = fn () => - let - val nmer = Nmer2.Double.create () - in - Nmer2.Double.put (nmer, Nmer2.c) - ; Nmer2.Double.put (nmer, Nmer2.g) - ; Nmer2.Double.put (nmer, Nmer2.a) - ; Nmer2.Double.clear nmer - ; Nmer2.Double.put (nmer, Nmer2.a) - ; Nmer2.Double.put (nmer, Nmer2.c) - ; Nmer2.toString ( - Nmer2.Double.forward nmer - ) - end - , expectedResult = "AC" - }, { - description = "CGA (AC) reverse" - , function = fn () => - let - val nmer = Nmer2.Double.create () - in - Nmer2.Double.put (nmer, Nmer2.c) - ; Nmer2.Double.put (nmer, Nmer2.g) - ; Nmer2.Double.put (nmer, Nmer2.a) - ; Nmer2.Double.clear nmer - ; Nmer2.Double.put (nmer, Nmer2.a) - ; Nmer2.Double.put (nmer, Nmer2.c) - ; Nmer2.toString ( - Nmer2.Double.reverse nmer - ) - end - , expectedResult = "GT" - }, { - description = "TG fromString" - , function = fn () => - case Nmer2.fromString "TG" of - NONE => "invalid string" - | SOME nmer => Nmer2.toString nmer - , expectedResult = "TG" - } - ] -end - -functor Nmer (Arguments: NMER_ARGUMENTS) :> NMER = Nmer (Arguments) diff --git a/src/nbc/parse-state.mlb b/src/nbc/parse-state.mlb deleted file mode 100644 index 6b0ccf3..0000000 --- a/src/nbc/parse-state.mlb +++ /dev/null @@ -1,5 +0,0 @@ -local - $(SML_LIB)/basis/basis.mlb -in - parse-state.sml -end diff --git a/src/nbc/parse-state.sml b/src/nbc/parse-state.sml deleted file mode 100644 index 8b30a67..0000000 --- a/src/nbc/parse-state.sml +++ /dev/null @@ -1,89 +0,0 @@ -signature PARSE_STATE = sig - type ('argument, 'result) state - val create: unit -> ('argument, 'result) state - type ('argument, 'result) handler = - TextIO.instream * 'argument -> 'result - datatype whichCharacters = These of char list | Any - val build: { - state: ('argument, 'result) state - , characters: - (whichCharacters * ('argument, 'result) handler) list - , endOfFile: ('argument, 'result) handler - } -> unit - val enter: - ('argument, 'result) state - * TextIO.instream - * 'argument - -> 'result -end - -structure ParseState :> PARSE_STATE = struct - type ('argument, 'result) handler = - TextIO.instream * 'argument -> 'result - datatype whichCharacters = These of char list | Any - type ('argument, 'result) state = { - byCharacter: Int8.int vector ref - , byIndex: ('argument, 'result) handler vector ref - , endOfFile: ('argument, 'result) handler option ref - } - fun create () = { - byCharacter = ref (Vector.fromList nil) - , byIndex = ref (Vector.fromList nil) - , endOfFile = ref NONE - } - fun build { - state = {byCharacter, byIndex, endOfFile} - , characters - , endOfFile = newEndOfFile - } = - let - val characters = vector characters - fun equal (one: char) (two: char) = - one = two - fun shallHandle ((whichToHandle, _), char) = - case whichToHandle of - Any => true - | These charactersToHandle => - List.exists (equal char) - charactersToHandle - fun charToIndex char = - case - Vector.findi (fn (_, handler) => - shallHandle (handler, char) - ) characters - of - NONE => raise Fail ( - "ParseState.build: " - ^ Char.toString char - ^ " not found" - ) | SOME (index, _) => - Int8.fromInt index - fun handlerToFunction (_, function) = function - fun indexToFunction index = handlerToFunction ( - Vector.sub (characters, index) - ) - in - byCharacter := Vector.tabulate ( - Char.maxOrd + 1 - , charToIndex o chr - ); byIndex := - Vector.map (fn (_, function) => - function - ) characters - ; endOfFile := SOME newEndOfFile - end - fun enter ( - { - byCharacter = ref byCharacter - , byIndex = ref byIndex - , endOfFile = ref endOfFile - } - , instream - , argument - ) = case TextIO.input1 instream of - NONE => (valOf endOfFile) (instream, argument) - | SOME char => Vector.sub ( - byIndex - , Int8.toInt (Vector.sub (byCharacter, ord char)) - ) (instream, argument) -end diff --git a/src/nbc/probabilities-by-read.mlb b/src/nbc/probabilities-by-read.mlb deleted file mode 100644 index f487180..0000000 --- a/src/nbc/probabilities-by-read.mlb +++ /dev/null @@ -1,8 +0,0 @@ -local - $(SML_LIB)/basis/basis.mlb - $(SML_LIB)/smlnj-lib/Util/smlnj-lib.mlb - nmer.mlb - fasta.mlb -in - probabilities-by-read.sml -end diff --git a/src/nbc/probabilities-by-read.sml b/src/nbc/probabilities-by-read.sml deleted file mode 100644 index acd8041..0000000 --- a/src/nbc/probabilities-by-read.sml +++ /dev/null @@ -1,210 +0,0 @@ -local - fun usage result = ( - TextIO.output ( - TextIO.stdErr - , CommandLine.name () - ^ " <order> <input FASTA> <file of nmers to count>\n" - ); OS.Process.exit result - ) -in - val (order, input, toCount) = case CommandLine.arguments () of - [order, input, toCount] => (case Int.fromString order of - NONE => usage OS.Process.failure - | SOME order => (order, input, toCount) - ) | ["--help"] => usage OS.Process.success - | _ => usage OS.Process.failure -end - -datatype result = Success | Failure -fun warn message = TextIO.output ( - TextIO.stdErr - , ( - "warning: " - ^ message - ^ "\n" - ) -) - -signature NMER_TABLE = sig - type nmer - type table - - (* - Create the table. Only the provided nmers will be counted. - *) - val create: nmer list -> table - - (* - Increment the count for a given nmer. If the nmer was - not provided at table creation, do nothing. - *) - val bump: table * nmer -> unit - - (* - Reset all counts to zero. Do not change the list of - nmers to count. - *) - val clear: table -> unit - - (* - Apply a function to all nmers and their counts, in - lexicographic order. - *) - val app: (nmer * int -> unit) -> table -> unit -end - -functor NmerTable (Nmer: NMER) -:> NMER_TABLE where type nmer = Nmer.nmer -= struct - structure HashTable = HashTableFn ( - type hash_key = Nmer.nmer - val hashVal = Nmer.hash - val sameKey = Nmer.equal - ) - exception NotFound - type nmer = Nmer.nmer - type table = { - indexes: int HashTable.hash_table - , counts: int array - , nmers: nmer vector - } - fun create list = - let - val indexes = HashTable.mkTable (1024, NotFound) - val nmers = - let - val array = Array.fromList list - in - ArrayQSort.sort Nmer.compare array - ; Array.vector array - end - in - Vector.appi (fn (index, nmer) => - HashTable.insert indexes (nmer, index) - ) nmers - ; { - indexes = indexes - , nmers = nmers - , counts = Array.array ( - Vector.length nmers - , 0 - ) - } - end - fun bump ({indexes, nmers = _, counts}, nmer) = - case HashTable.find indexes nmer of - NONE => () - | SOME index => Array.update ( - counts - , index - , Array.sub (counts, index) + 1 - ) - fun clear {indexes = _, nmers = _, counts} = - Array.modify (fn _ => 0) counts - fun app execute {indexes = _, nmers, counts} = - Vector.appi (fn (index, nmer) => - execute (nmer, Array.sub (counts, index)) - ) nmers -end - -functor Input ( - structure Nmer: NMER - structure NmerTable: NMER_TABLE - sharing type Nmer.nmer = NmerTable.nmer -) = SingleSidedFasta ( - structure Nmer = Nmer - structure File = struct - type argument = NmerTable.table - type file = argument - type read = Int64.int ref - type nmer = Nmer.nmer - datatype result = datatype result - exception NotFound - fun startFile table = table - fun startRead (table, _) = ref (0: Int64.int) - fun nmer (table, (total: Int64.int ref), nmer) = ( - total := !total + 1 - ; NmerTable.bump (table, nmer) - ) - fun put string = TextIO.output (TextIO.stdOut, string) - fun stopRead (table, total) = - let - val realFromInt64 = - Real.fromLargeInt o Int64.toLarge - val realTotal = realFromInt64 (!total) - val toString = Real.fmt ( - StringCvt.FIX (SOME 17) - ) - fun probability count = real count / realTotal - infix |> - fun argument |> function = function argument - val first = ref true - in - NmerTable.app (fn (nmer, count) => ( - if !first then first := false - else put "\t" - ; count - |> probability - |> toString - |> put - )) table - ; put "\n" - ; NmerTable.clear table - ; total := 0 - end - fun stopFile _ = Success - fun invalidFormat _ = Failure - end -) - -structure Nmer = Nmer ( - val order = order - structure Word = Word64 -) -structure NmerTable = NmerTable (Nmer) -structure Input = Input ( - structure Nmer = Nmer - structure NmerTable = NmerTable -) - -val table = - let - fun build collect goose = case collect goose of - NONE => nil - | SOME egg => - egg :: build collect goose - fun chopEnd string = String.extract ( - string - , 0 - , SOME (size string - ( - if String.isSuffix "\r\n" string - then 2 - else 1 - )) - ) - val line = Option.map chopEnd o TextIO.inputLine - val lines = build line - val instream = TextIO.openIn toCount - fun fromStringWithWarning string = - case Nmer.fromString string of - NONE => ( - warn ( - string - ^ " is not a valid nmer" - ); NONE - ) | someNmer => someNmer - val nmers = List.mapPartial - fromStringWithWarning - (lines instream) - in - TextIO.closeIn instream - ; NmerTable.create nmers - end - -val () = case Input.process (table, TextIO.openIn input) of - Failure => ( - TextIO.output ( - TextIO.stdErr - , "input is not valid FASTA\n" - ); OS.Process.exit OS.Process.failure - ) | Success => () diff --git a/src/nbc/promise.mlb b/src/nbc/promise.mlb deleted file mode 100644 index 7e1f71f..0000000 --- a/src/nbc/promise.mlb +++ /dev/null @@ -1,5 +0,0 @@ -local - $(SML_LIB)/basis/basis.mlb -in - promise.sml -end diff --git a/src/nbc/promise.sml b/src/nbc/promise.sml deleted file mode 100644 index 6bb2655..0000000 --- a/src/nbc/promise.sml +++ /dev/null @@ -1,24 +0,0 @@ -structure Promise -:> sig - type 'fulfillment promise - val delay: (unit -> 'fulfillment) -> 'fulfillment promise - val force: 'fulfillment promise -> 'fulfillment -end = struct - local - datatype 'expectation lazy = - Delayed of unit -> 'expectation - | Forced of 'expectation - in - type 'expectation promise = 'expectation lazy ref - fun delay fulfill = ref (Delayed fulfill) - fun force promise = case !promise of - Delayed fulfill => - let - val expectation = fulfill () - in - promise := Forced expectation - ; expectation - end - | Forced expectation => expectation - end -end diff --git a/src/nbc/stream.mlb b/src/nbc/stream.mlb deleted file mode 100644 index 639facc..0000000 --- a/src/nbc/stream.mlb +++ /dev/null @@ -1,6 +0,0 @@ -local - $(SML_LIB)/basis/basis.mlb - promise.mlb -in - stream.sml -end diff --git a/src/nbc/stream.sml b/src/nbc/stream.sml deleted file mode 100644 index 00b60ab..0000000 --- a/src/nbc/stream.sml +++ /dev/null @@ -1,243 +0,0 @@ -signature STREAM = sig - type 'element stream - val create: - (unit -> ('element * 'element stream) option) - -> 'element stream - val empty: unit -> 'element stream - val cons: 'element * 'element stream -> 'element stream - val unfold: - ('seed -> ('fruit * 'seed) option) - -> 'seed - -> 'fruit stream - val getItem: 'element stream -> ('element * 'element stream) option - val isEmpty: 'element stream -> bool - val fold: - ('element * 'accumulation -> 'accumulation) - -> 'accumulation - -> 'element stream - -> 'accumulation - val length: 'element stream -> int - val rev: 'element stream -> 'element stream - val map: ('input -> 'output) -> 'input stream -> 'output stream - val mapPartial: - ('input -> 'output option) - -> 'input stream - -> 'output stream - val app: ('element -> unit) -> 'element stream -> unit - val toList: 'element stream -> 'element list - val fromList: 'element list -> 'element stream - val toVector: 'element stream -> 'element vector - val fromVector: 'element vector -> 'element stream - val fromVectorSlice: 'element VectorSlice.slice -> 'element stream - val toArray: 'element stream -> 'element array - val fromArray: 'element array -> 'element stream - val fromArraySlice: 'element ArraySlice.slice -> 'element stream - val fromString: string -> char stream - val fromSubstring: Substring.substring -> char stream - val toString: char stream -> string - val fromTextInstream: TextIO.instream -> char stream - val append: 'element stream * 'element stream -> 'element stream - val concat: 'element stream stream -> 'element stream - val hd: 'element stream -> 'element - val tl: 'element stream -> 'element stream - val find: ('element -> bool) -> 'element stream -> 'element option - val filter: ('element -> bool) -> 'element stream -> 'element stream - val exists: ('element -> bool) -> 'element stream -> bool - val all: ('element -> bool) -> 'element stream -> bool - val partition: - ('element -> bool) - -> 'element stream - -> 'element stream * 'element stream - val take: ('element -> bool) -> 'element stream -> 'element stream - val drop: ('element -> bool) -> 'element stream -> 'element stream - val split: - ('element -> bool) - -> 'element stream - -> 'element stream * 'element stream - val trim: 'element stream * int -> 'element stream - val tokens: - ('element -> bool) - -> 'element stream - -> 'element stream stream - val fields: - ('element -> bool) - -> 'element stream - -> 'element stream stream -end - -structure Stream :> STREAM = struct - datatype 'element stream = - T of unit -> ('element * 'element stream) option - fun create function = T function - fun empty () = create (fn () => NONE) - fun cons headAndTail = create (fn () => SOME headAndTail) - fun unfold harvest seed = create (fn () => - case harvest seed of - NONE => NONE - | SOME (fruit, seed) => SOME ( - fruit - , unfold harvest seed - ) - ) - fun getItem (T function) = function () - fun fromList list = unfold List.getItem list - fun toList stream = case getItem stream of - NONE => nil - | SOME (head, tail) => head :: toList tail - fun fold accumulate accumulation stream = case getItem stream of - NONE => accumulation - | SOME (head, tail) => - fold accumulate (accumulate (head, accumulation)) tail - fun length stream = fold (fn (_, count) => count + 1) 0 stream - fun rev stream = fromList (fold op :: nil stream) - fun map transform stream = unfold (fn stream => - case getItem stream of - NONE => NONE - | SOME (head, tail) => SOME ( - transform head - , tail - ) - ) stream - fun app execute stream = - fold (fn (element, ()) => execute element) () stream - fun fromVectorSlice slice = unfold VectorSlice.getItem slice - fun fromVector vector = fromVectorSlice (VectorSlice.full vector) - fun fromArraySlice slice = unfold ArraySlice.getItem slice - fun fromArray array = fromArraySlice (ArraySlice.full array) - fun fromSubstring substring = unfold Substring.getc substring - fun fromString string = fromSubstring (Substring.full string) - local - fun withTabulate tabulate stream = - let - val position = ref stream - in - tabulate ( - length stream - , fn _ => case getItem (!position) of - NONE => raise Fail "Stream" - | SOME (head, tail) => ( - position := tail - ; head - ) - ) - end - in - fun toVector stream = withTabulate Vector.tabulate stream - fun toArray stream = withTabulate Array.tabulate stream - fun toString stream = withTabulate CharVector.tabulate stream - end - fun fromTextInstream instream = - unfold TextIO.StreamIO.input1 (TextIO.getInstream instream) - fun append (first, second) = create (fn () => - case getItem first of - NONE => getItem second - | SOME (head, tail) => SOME ( - head - , append (tail, second) - ) - ) - fun concat streams = create (fn () => - case getItem streams of - NONE => NONE - | SOME (head, tail) => - getItem (append (head, concat tail)) - ) - fun hd stream = case getItem stream of - NONE => raise Empty - | SOME (head, _) => head - fun tl stream = case getItem stream of - NONE => raise Empty - | SOME (_, tail) => tail - fun last stream = hd (rev stream) - fun drop (stream, count) = - if count < 0 then raise Subscript - else if count = 0 then stream - else case getItem stream of - NONE => raise Subscript - | SOME (_, tail) => drop (tail, count - 1) - fun nth streamAndOffset = case getItem (drop streamAndOffset) of - NONE => raise Subscript - | SOME (head, _) => head - fun mapPartial transform stream = create (fn () => - case getItem stream of - NONE => NONE - | SOME (head, tail) => case transform head of - NONE => getItem (mapPartial transform tail) - | SOME element => SOME ( - element - , mapPartial transform tail - ) - ) - fun find test stream = case getItem stream of - NONE => NONE - | SOME (head, tail) => - if test head then SOME head - else find test tail - fun filter test stream = unfold (fn stream => - case getItem stream of - NONE => NONE - | someHeadAndTail as (SOME (head, tail)) => - if test head then someHeadAndTail - else getItem (filter test tail) - ) stream - fun exists test stream = case find test stream of - NONE => false - | SOME _ => true - fun all test stream = not (exists (not o test) stream) - fun partition test stream = - let - val withResult = map (fn element => - (test element, element) - ) stream - in ( - mapPartial (fn (result, element) => - if result then SOME element - else NONE - ) withResult - , mapPartial (fn (result, element) => - if result then NONE - else SOME element - ) withResult - ) end - fun take test stream = create (fn () => - case getItem stream of - NONE => NONE - | SOME (head, tail) => - if test head then SOME (head, take test tail) - else NONE - ) - fun drop test stream = create (fn () => - case getItem stream of - NONE => NONE - | someHeadAndTail as (SOME (head, tail)) => - if test head then getItem (drop test tail) - else someHeadAndTail - ) - fun split test stream = (take test stream, drop test stream) - fun trim (stream, count) = - if count <= 0 then stream - else create (fn () => - case getItem stream of - NONE => NONE - | SOME (_, tail) => - getItem (trim (tail, count - 1)) - ) - fun isEmpty stream = case getItem stream of - NONE => true - | SOME _ => false - fun tokens isSeparator stream = unfold (fn stream => - let - val skipped = drop isSeparator stream - in - if isEmpty skipped then NONE - else SOME (split (not o isSeparator) skipped) - end - ) stream - fun fields isSeparator stream = unfold (fn stream => - if isEmpty stream then NONE - else SOME ( - take (not o isSeparator) stream - , trim (drop (not o isSeparator) stream, 1) - ) - ) stream -end diff --git a/src/nbc/test-library.mlb b/src/nbc/test-library.mlb deleted file mode 100644 index 2618fdb..0000000 --- a/src/nbc/test-library.mlb +++ /dev/null @@ -1,5 +0,0 @@ -local - $(SML_LIB)/basis/basis.mlb -in - test-library.sml -end diff --git a/src/nbc/test-library.sml b/src/nbc/test-library.sml deleted file mode 100644 index 6821e74..0000000 --- a/src/nbc/test-library.sml +++ /dev/null @@ -1,50 +0,0 @@ -signature TEST = sig - type test = - { - description: string - , expectedResult: string - , function: unit -> string - } - val single: test -> unit - val list: test list -> unit -end - -structure Test = struct - fun single {description, expectedResult, function} = - let - val actualResult = function () - in - if expectedResult = actualResult then - TextIO.output ( - TextIO.stdErr - , ( - description - ^ " succeeded.\n" - ) - ) - else ( - TextIO.output ( - TextIO.stdErr - , ( - description - ^ " was supposed to be " - ^ expectedResult - ^ ", but was actually " - ^ actualResult - ^ ".\n" - ) - ); OS.Process.exit OS.Process.failure - ) - end handle exception' => ( - TextIO.output ( - TextIO.stdErr - , ( - description - ^ " failed with exception " - ^ exnMessage exception' - ^ ".\n" - ) - ); OS.Process.exit OS.Process.failure - ) - fun list tests = app single tests -end diff --git a/src/nbc/tree.mlb b/src/nbc/tree.mlb deleted file mode 100644 index 4a496bf..0000000 --- a/src/nbc/tree.mlb +++ /dev/null @@ -1,6 +0,0 @@ -local - $(SML_LIB)/basis/basis.mlb - stream.mlb -in - tree.sml -end diff --git a/src/nbc/tree.sml b/src/nbc/tree.sml deleted file mode 100644 index 5d64f51..0000000 --- a/src/nbc/tree.sml +++ /dev/null @@ -1,225 +0,0 @@ -functor Tree (Key: sig - type key - val compare: key * key -> order -end) :> sig - type key = Key.key - type 'datum tree - val size: 'datum tree -> int - val empty: 'datum tree - val single: key * 'datum -> 'datum tree - val add: 'datum tree * key * 'datum -> 'datum tree - val find: 'datum tree * key -> 'datum option - val exists: 'datum tree * key -> bool - val up: 'datum tree -> (key * 'datum) Stream.stream - val down: 'datum tree -> (key * 'datum) Stream.stream - val upFrom: 'datum tree * key -> (key * 'datum) Stream.stream - val downFrom: 'datum tree * key -> (key * 'datum) Stream.stream - val fromList: (key * 'datum) list -> 'datum tree - val fromStream: (key * 'datum) Stream.stream -> 'datum tree - (* - val remove: 'datum tree * key -> 'datum tree - *) -end = struct - type key = Key.key - structure Height = Int8 - datatype 'datum tree = - Leaf - | Branch of { - height: Height.int - , less: 'datum tree - , key: key - , datum: 'datum - , greater: 'datum tree - } - fun size tree = case tree of - Leaf => 0 - | Branch {less, greater, ...} => - 1 + size less + size greater - val empty = Leaf - fun single (key, datum) = Branch { - height = 1 - , less = Leaf - , key = key - , datum = datum - , greater = Leaf - } - fun height tree = case tree of - Leaf => 0 - | Branch {height, ...} => height - fun calculateHeight {key, datum, less, greater} = Branch { - key = key - , datum = datum - , less = less - , greater = greater - , height = 1 + Height.max (height less, height greater) - } - fun rotateLess branch = case branch of - (* - b d - a d => b e - c e a c - *) - { - less = a - , key = bKey - , datum = bDatum - , greater = Branch { - less = c - , key = dKey - , datum = dDatum - , greater = e - , height = _ - } - } => calculateHeight { - less = calculateHeight { - less = a - , key = bKey - , datum = bDatum - , greater = c - }, key = dKey - , datum = dDatum - , greater = e - } | _ => raise Fail "rotateLess" - fun rotateGreater branch = case branch of - (* - d b - b e => a d - a c c e - *) - { - less = Branch { - less = a - , key = bKey - , datum = bDatum - , greater = c - , height = _ - }, key = dKey - , datum = dDatum - , greater = e - } => calculateHeight { - less = a - , key = bKey - , datum = bDatum - , greater = calculateHeight { - less = c - , key = dKey - , datum = dDatum - , greater = e - } - } | _ => raise Fail "rotateGreater" - fun balance (branch as {key, datum, less, greater}) = - let - val heightLess = height less - val heightGreater = height greater - in - if heightLess < heightGreater - 2 then - rotateLess branch - else if heightLess > heightGreater + 2 then - rotateGreater branch - else calculateHeight branch - end - fun add (tree, newKey, newDatum) = case tree of - Leaf => single (newKey, newDatum) - | Branch {height, less, key, datum, greater} => - case Key.compare (newKey, key) of - EQUAL => Branch { - height = height - , less = less - , key = newKey - , datum = newDatum - , greater = greater - } | LESS => balance { - less = add (less, newKey, newDatum) - , key = key - , datum = datum - , greater = greater - } | GREATER => balance { - less = less - , key = key - , datum = datum - , greater = add (greater, newKey, newDatum) - } - fun find (tree, desiredKey) = case tree of - Leaf => NONE - | Branch {less, key, datum, greater, ...} => - case Key.compare (desiredKey, key) of - EQUAL => SOME datum - | LESS => find (less, desiredKey) - | GREATER => find (greater, desiredKey) - fun exists (tree, desiredKey) = case find (tree, desiredKey) of - NONE => false - | SOME _ => true - fun up tree = Stream.create (fn () => - case tree of - Leaf => NONE - | Branch {less, key, datum, greater, ...} => - Stream.getItem ( - Stream.append ( - up less - , Stream.cons ( - (key, datum) - , up greater - ) - ) - ) - ) - fun down tree = Stream.create (fn () => - case tree of - Leaf => NONE - | Branch {greater, key, datum, less, ...} => - Stream.getItem ( - Stream.append ( - down greater - , Stream.cons ( - (key, datum) - , down less - ) - ) - ) - ) - fun upFrom (tree, firstKey) = Stream.create (fn () => - case tree of - Leaf => NONE - | Branch {less, key, datum, greater, ...} => - case Key.compare (firstKey, key) of - LESS => Stream.getItem ( - Stream.append ( - upFrom (less, firstKey) - , Stream.cons ( - (key, datum) - , up greater - ) - ) - ) | EQUAL => SOME ( - (key, datum) - , up greater - ) | GREATER => Stream.getItem ( - upFrom (greater, firstKey) - ) - ) - fun downFrom (tree, firstKey) = Stream.create (fn () => - case tree of - Leaf => NONE - | Branch {greater, key, datum, less, ...} => - case Key.compare (firstKey, key) of - LESS => Stream.getItem ( - downFrom (less, firstKey) - ) | EQUAL => SOME ( - (key, datum) - , down less - ) | GREATER => Stream.getItem ( - Stream.append ( - downFrom (greater, firstKey) - , Stream.cons ( - (key, datum) - , down less - ) - ) - ) - ) - fun fromStream stream = - Stream.fold (fn ((key, datum), tree) => - add (tree, key, datum) - ) empty stream - fun fromList list = fromStream (Stream.fromList list) -end |