diff options
Diffstat (limited to 'src/nbc/fasta.sml')
-rw-r--r-- | src/nbc/fasta.sml | 326 |
1 files changed, 0 insertions, 326 deletions
diff --git a/src/nbc/fasta.sml b/src/nbc/fasta.sml deleted file mode 100644 index 8089025..0000000 --- a/src/nbc/fasta.sml +++ /dev/null @@ -1,326 +0,0 @@ -signature FILE = sig - type argument - type file - type read - type nmer - type result - val startFile: argument -> file - val startRead: file * string -> read - val nmer: file * read * nmer -> unit - val stopRead: file * read -> unit - val stopFile: file -> result - val invalidFormat: file -> result -end - -signature FASTA = sig - type argument - type result - val process: argument * TextIO.instream -> result -end - -functor AgnosticFasta ( - structure Nmer: NMER - structure File: FILE - sharing type Nmer.nmer = File.nmer - structure Sides: sig - include NMER_SIDES - type file - type read - val process: file * read * sides -> unit - end - sharing type Nmer.base = Sides.sidesBase - sharing type Nmer.nmer = Sides.sidesNmer - sharing type File.read = Sides.read - sharing type File.file = Sides.file -) :> FASTA - where type argument = File.argument - where type result = File.result -= struct - type argument = File.argument - type result = File.result - - val beforeHeaderBeginningOfLine = ParseState.create () - val beforeHeaderMiddleOfLine = ParseState.create () - val afterHeaderBeginningOfLine = ParseState.create () - val afterHeaderMiddleOfLine = ParseState.create () - - fun inputLineButDiscardNewline instream = - Option.map (fn line => - String.extract (line, 0, SOME (size line - 1)) - ) (TextIO.inputLine instream) - datatype z = datatype ParseState.whichCharacters (* This | Any *) - - local - fun header (instream, (file, sides)) = - case inputLineButDiscardNewline instream of - NONE => File.invalidFormat file - | SOME header => ParseState.enter ( - afterHeaderBeginningOfLine - , instream - , ( - file - , File.startRead ( - file - , header - ), sides - ) - ) - fun space (instream, (file, sides)) = ParseState.enter ( - beforeHeaderMiddleOfLine - , instream - , (file, sides) - ) - fun newline (instream, (file, sides)) = ParseState.enter ( - beforeHeaderBeginningOfLine - , instream - , (file, sides) - ) - fun invalidFormat (_, (file, _)) = File.invalidFormat file - in - val () = ParseState.build { - state = beforeHeaderBeginningOfLine - , characters = [ - (These [#">"], header) - , (These [#"\n"], newline) - , (These [#" ", #"\t", #"\r"], space) - , (Any, invalidFormat) - ], endOfFile = invalidFormat - } - val () = ParseState.build { - state = beforeHeaderMiddleOfLine - , characters = [ - (These [#"\n"], newline) - , (These [#" ", #"\t", #"\r"], space) - , (Any, invalidFormat) - ], endOfFile = invalidFormat - } - end - local - fun base base (instream, (file, read, sides)) = ( - Sides.put (sides, base) - ; - if Sides.isFull sides then - Sides.process (file, read, sides) - else () - ; ParseState.enter ( - afterHeaderMiddleOfLine - , instream - , (file, read, sides) - ) - ) - fun space (instream, (file, read, sides)) = ( - ParseState.enter ( - afterHeaderMiddleOfLine - , instream - , (file, read, sides) - ) - ) - fun other (instream, (file, read, sides)) = ( - Sides.clear sides - ; ParseState.enter ( - afterHeaderMiddleOfLine - , instream - , (file, read, sides) - ) - ) - fun newline (instream, (file, read, sides)) = - ParseState.enter ( - afterHeaderBeginningOfLine - , instream - , (file, read, sides) - ) - fun header (instream, (file, read, sides)) = ( - File.stopRead (file, read) - ; Sides.clear sides - ; case inputLineButDiscardNewline instream of - NONE => File.invalidFormat file - | SOME header => ParseState.enter ( - afterHeaderBeginningOfLine - , instream - , ( - file - , File.startRead ( - file - , header - ), sides - ) - ) - ) - fun success (_, (file, read, _)) = ( - File.stopRead (file, read) - ; File.stopFile file - ) - in - val () = ParseState.build { - state = afterHeaderBeginningOfLine - , characters = [ - (These [#"A", #"a"], base Nmer.a) - , (These [#"C", #"c"], base Nmer.c) - , (These [#"G", #"g"], base Nmer.g) - , (These [#"T", #"t"], base Nmer.t) - , (These [#">"], header) - , (These [#"\n"], newline) - , (These [#" ", #"\t", #"\r"], space) - , (Any, other) - ], endOfFile = success - } - val () = ParseState.build { - state = afterHeaderMiddleOfLine - , characters = [ - (These [#"A", #"a"], base Nmer.a) - , (These [#"C", #"c"], base Nmer.c) - , (These [#"G", #"g"], base Nmer.g) - , (These [#"T", #"t"], base Nmer.t) - , (These [#" ", #"\t", #"\r"], space) - , (These [#"\n"], newline) - , (Any, other) - ], endOfFile = success - } - end - fun process (argument, instream) = ParseState.enter ( - beforeHeaderBeginningOfLine - , instream - , (File.startFile argument, Sides.create ()) - ) -end - -functor SingleSidedFasta ( - structure Nmer: NMER - structure File: FILE - sharing type Nmer.nmer = File.nmer -) = AgnosticFasta ( - structure Nmer = Nmer - structure File = File - structure Sides = struct - type read = File.read - type file = File.file - open Nmer.Single - fun process (file, read, sides) = - File.nmer (file, read, forward sides) - end -) - -functor DoubleSidedFasta ( - structure Nmer: NMER - structure File: FILE - sharing type Nmer.nmer = File.nmer -) = AgnosticFasta ( - structure Nmer = Nmer - structure File = File - structure Sides = struct - type read = File.read - type file = File.file - open Nmer.Double - fun process (file, read, sides) = ( - File.nmer (file, read, forward sides) - ; File.nmer (file, read, reverse sides) - ) - end -) - -functor TestFile (Nmer: NMER) = struct - type argument = unit - type nmer = Nmer.nmer - type read = {header: string, nmers: nmer list ref} - type file = {header: string, nmers: string list} list ref - type result = string - fun startFile () = ref nil - fun stopFile file = String.concatWith ";" ( - map (fn {header, nmers} => - header - ^ ":" - ^ String.concatWith "," (rev nmers) - ) (rev (!file)) - ) - fun startRead (_, header) = - {header = header, nmers = ref nil} - fun nmer (_, {header = _, nmers}, nmer) = - nmers := nmer :: !nmers - fun stopRead (file, {header, nmers = ref nmers}) = - file := { - header = header - , nmers = map Nmer.toString nmers - } :: !file - fun invalidFormat _ = "invalid format" -end - -functor Test () = struct - structure Nmer1 = Nmer ( - val order = 1 - structure Word = Word32 - ) - structure File1 = TestFile (Nmer1) - structure SingleFasta1 = SingleSidedFasta ( - structure Nmer = Nmer1 - structure File = File1 - ) - fun test process input () = process ((), TextIO.openString input) - val single1 = test SingleFasta1.process - structure Nmer2 = Nmer ( - val order = 2 - structure Word = Word32 - ) - structure File2 = TestFile (Nmer2) - structure SingleFasta2 = SingleSidedFasta ( - structure Nmer = Nmer2 - structure File = File2 - ) - val single2 = test SingleFasta2.process - structure DoubleFasta1 = DoubleSidedFasta ( - structure Nmer = Nmer1 - structure File = File1 - ) - val double1 = test DoubleFasta1.process - structure DoubleFasta2 = DoubleSidedFasta ( - structure Nmer = Nmer2 - structure File = File2 - ) - val double2 = test DoubleFasta2.process - val () = Test.list [ - { - description = "single 1: A" - , function = single1 ">foo\nA\n" - , expectedResult = "foo:A" - }, { - description = "single 1: AG" - , function = single1 ">foo\nAG\n" - , expectedResult = "foo:A,G" - }, { - description = "single 2: A" - , function = single2 ">foo\nA\n" - , expectedResult = "foo:" - }, { - description = "single 2: CTGAG" - , function = single2 ">foo\nCTGAG\n" - , expectedResult = "foo:CT,TG,GA,AG" - }, { - description = "double 1: C" - , function = double1 ">bar\nC\n" - , expectedResult = "bar:C,G" - }, { - description = "double 2: T" - , function = double2 ">baz\nT\n" - , expectedResult = "baz:" - }, { - description = "double 2: GC" - , function = double2 ">quux\nGC\n" - , expectedResult = "quux:GC,GC" - }, { - description = "double 2: CCC\\nC\\nCT" - , function = double2 ">goo\nCCC\nC\nCT\n" - , expectedResult = "goo:CC,GG,CC,GG,CC,GG,CC,GG,CT,AG" - }, { - description = "double 2: CC\\nC*\\nT" - , function = double2 ">goo\nCC\nC*\nT\n" - , expectedResult = "goo:CC,GG,CC,GG" - }, { - description = "double 2: foo CATGAC goo TACCAG" - , function = double2 - ">foo\nCATGAC\n>goo\nTACCAG\n" - , expectedResult = ( - "foo:CA,TG,AT,AT,TG,CA,GA,TC,AC,GT" - ^ ";goo:TA,TA,AC,GT,CC,GG,CA,TG,AG,CT" - ) - } - ] -end |