aboutsummaryrefslogtreecommitdiff
path: root/src/nbc/fasta.sml
diff options
context:
space:
mode:
Diffstat (limited to 'src/nbc/fasta.sml')
-rw-r--r--src/nbc/fasta.sml326
1 files changed, 0 insertions, 326 deletions
diff --git a/src/nbc/fasta.sml b/src/nbc/fasta.sml
deleted file mode 100644
index 8089025..0000000
--- a/src/nbc/fasta.sml
+++ /dev/null
@@ -1,326 +0,0 @@
-signature FILE = sig
- type argument
- type file
- type read
- type nmer
- type result
- val startFile: argument -> file
- val startRead: file * string -> read
- val nmer: file * read * nmer -> unit
- val stopRead: file * read -> unit
- val stopFile: file -> result
- val invalidFormat: file -> result
-end
-
-signature FASTA = sig
- type argument
- type result
- val process: argument * TextIO.instream -> result
-end
-
-functor AgnosticFasta (
- structure Nmer: NMER
- structure File: FILE
- sharing type Nmer.nmer = File.nmer
- structure Sides: sig
- include NMER_SIDES
- type file
- type read
- val process: file * read * sides -> unit
- end
- sharing type Nmer.base = Sides.sidesBase
- sharing type Nmer.nmer = Sides.sidesNmer
- sharing type File.read = Sides.read
- sharing type File.file = Sides.file
-) :> FASTA
- where type argument = File.argument
- where type result = File.result
-= struct
- type argument = File.argument
- type result = File.result
-
- val beforeHeaderBeginningOfLine = ParseState.create ()
- val beforeHeaderMiddleOfLine = ParseState.create ()
- val afterHeaderBeginningOfLine = ParseState.create ()
- val afterHeaderMiddleOfLine = ParseState.create ()
-
- fun inputLineButDiscardNewline instream =
- Option.map (fn line =>
- String.extract (line, 0, SOME (size line - 1))
- ) (TextIO.inputLine instream)
- datatype z = datatype ParseState.whichCharacters (* This | Any *)
-
- local
- fun header (instream, (file, sides)) =
- case inputLineButDiscardNewline instream of
- NONE => File.invalidFormat file
- | SOME header => ParseState.enter (
- afterHeaderBeginningOfLine
- , instream
- , (
- file
- , File.startRead (
- file
- , header
- ), sides
- )
- )
- fun space (instream, (file, sides)) = ParseState.enter (
- beforeHeaderMiddleOfLine
- , instream
- , (file, sides)
- )
- fun newline (instream, (file, sides)) = ParseState.enter (
- beforeHeaderBeginningOfLine
- , instream
- , (file, sides)
- )
- fun invalidFormat (_, (file, _)) = File.invalidFormat file
- in
- val () = ParseState.build {
- state = beforeHeaderBeginningOfLine
- , characters = [
- (These [#">"], header)
- , (These [#"\n"], newline)
- , (These [#" ", #"\t", #"\r"], space)
- , (Any, invalidFormat)
- ], endOfFile = invalidFormat
- }
- val () = ParseState.build {
- state = beforeHeaderMiddleOfLine
- , characters = [
- (These [#"\n"], newline)
- , (These [#" ", #"\t", #"\r"], space)
- , (Any, invalidFormat)
- ], endOfFile = invalidFormat
- }
- end
- local
- fun base base (instream, (file, read, sides)) = (
- Sides.put (sides, base)
- ;
- if Sides.isFull sides then
- Sides.process (file, read, sides)
- else ()
- ; ParseState.enter (
- afterHeaderMiddleOfLine
- , instream
- , (file, read, sides)
- )
- )
- fun space (instream, (file, read, sides)) = (
- ParseState.enter (
- afterHeaderMiddleOfLine
- , instream
- , (file, read, sides)
- )
- )
- fun other (instream, (file, read, sides)) = (
- Sides.clear sides
- ; ParseState.enter (
- afterHeaderMiddleOfLine
- , instream
- , (file, read, sides)
- )
- )
- fun newline (instream, (file, read, sides)) =
- ParseState.enter (
- afterHeaderBeginningOfLine
- , instream
- , (file, read, sides)
- )
- fun header (instream, (file, read, sides)) = (
- File.stopRead (file, read)
- ; Sides.clear sides
- ; case inputLineButDiscardNewline instream of
- NONE => File.invalidFormat file
- | SOME header => ParseState.enter (
- afterHeaderBeginningOfLine
- , instream
- , (
- file
- , File.startRead (
- file
- , header
- ), sides
- )
- )
- )
- fun success (_, (file, read, _)) = (
- File.stopRead (file, read)
- ; File.stopFile file
- )
- in
- val () = ParseState.build {
- state = afterHeaderBeginningOfLine
- , characters = [
- (These [#"A", #"a"], base Nmer.a)
- , (These [#"C", #"c"], base Nmer.c)
- , (These [#"G", #"g"], base Nmer.g)
- , (These [#"T", #"t"], base Nmer.t)
- , (These [#">"], header)
- , (These [#"\n"], newline)
- , (These [#" ", #"\t", #"\r"], space)
- , (Any, other)
- ], endOfFile = success
- }
- val () = ParseState.build {
- state = afterHeaderMiddleOfLine
- , characters = [
- (These [#"A", #"a"], base Nmer.a)
- , (These [#"C", #"c"], base Nmer.c)
- , (These [#"G", #"g"], base Nmer.g)
- , (These [#"T", #"t"], base Nmer.t)
- , (These [#" ", #"\t", #"\r"], space)
- , (These [#"\n"], newline)
- , (Any, other)
- ], endOfFile = success
- }
- end
- fun process (argument, instream) = ParseState.enter (
- beforeHeaderBeginningOfLine
- , instream
- , (File.startFile argument, Sides.create ())
- )
-end
-
-functor SingleSidedFasta (
- structure Nmer: NMER
- structure File: FILE
- sharing type Nmer.nmer = File.nmer
-) = AgnosticFasta (
- structure Nmer = Nmer
- structure File = File
- structure Sides = struct
- type read = File.read
- type file = File.file
- open Nmer.Single
- fun process (file, read, sides) =
- File.nmer (file, read, forward sides)
- end
-)
-
-functor DoubleSidedFasta (
- structure Nmer: NMER
- structure File: FILE
- sharing type Nmer.nmer = File.nmer
-) = AgnosticFasta (
- structure Nmer = Nmer
- structure File = File
- structure Sides = struct
- type read = File.read
- type file = File.file
- open Nmer.Double
- fun process (file, read, sides) = (
- File.nmer (file, read, forward sides)
- ; File.nmer (file, read, reverse sides)
- )
- end
-)
-
-functor TestFile (Nmer: NMER) = struct
- type argument = unit
- type nmer = Nmer.nmer
- type read = {header: string, nmers: nmer list ref}
- type file = {header: string, nmers: string list} list ref
- type result = string
- fun startFile () = ref nil
- fun stopFile file = String.concatWith ";" (
- map (fn {header, nmers} =>
- header
- ^ ":"
- ^ String.concatWith "," (rev nmers)
- ) (rev (!file))
- )
- fun startRead (_, header) =
- {header = header, nmers = ref nil}
- fun nmer (_, {header = _, nmers}, nmer) =
- nmers := nmer :: !nmers
- fun stopRead (file, {header, nmers = ref nmers}) =
- file := {
- header = header
- , nmers = map Nmer.toString nmers
- } :: !file
- fun invalidFormat _ = "invalid format"
-end
-
-functor Test () = struct
- structure Nmer1 = Nmer (
- val order = 1
- structure Word = Word32
- )
- structure File1 = TestFile (Nmer1)
- structure SingleFasta1 = SingleSidedFasta (
- structure Nmer = Nmer1
- structure File = File1
- )
- fun test process input () = process ((), TextIO.openString input)
- val single1 = test SingleFasta1.process
- structure Nmer2 = Nmer (
- val order = 2
- structure Word = Word32
- )
- structure File2 = TestFile (Nmer2)
- structure SingleFasta2 = SingleSidedFasta (
- structure Nmer = Nmer2
- structure File = File2
- )
- val single2 = test SingleFasta2.process
- structure DoubleFasta1 = DoubleSidedFasta (
- structure Nmer = Nmer1
- structure File = File1
- )
- val double1 = test DoubleFasta1.process
- structure DoubleFasta2 = DoubleSidedFasta (
- structure Nmer = Nmer2
- structure File = File2
- )
- val double2 = test DoubleFasta2.process
- val () = Test.list [
- {
- description = "single 1: A"
- , function = single1 ">foo\nA\n"
- , expectedResult = "foo:A"
- }, {
- description = "single 1: AG"
- , function = single1 ">foo\nAG\n"
- , expectedResult = "foo:A,G"
- }, {
- description = "single 2: A"
- , function = single2 ">foo\nA\n"
- , expectedResult = "foo:"
- }, {
- description = "single 2: CTGAG"
- , function = single2 ">foo\nCTGAG\n"
- , expectedResult = "foo:CT,TG,GA,AG"
- }, {
- description = "double 1: C"
- , function = double1 ">bar\nC\n"
- , expectedResult = "bar:C,G"
- }, {
- description = "double 2: T"
- , function = double2 ">baz\nT\n"
- , expectedResult = "baz:"
- }, {
- description = "double 2: GC"
- , function = double2 ">quux\nGC\n"
- , expectedResult = "quux:GC,GC"
- }, {
- description = "double 2: CCC\\nC\\nCT"
- , function = double2 ">goo\nCCC\nC\nCT\n"
- , expectedResult = "goo:CC,GG,CC,GG,CC,GG,CC,GG,CT,AG"
- }, {
- description = "double 2: CC\\nC*\\nT"
- , function = double2 ">goo\nCC\nC*\nT\n"
- , expectedResult = "goo:CC,GG,CC,GG"
- }, {
- description = "double 2: foo CATGAC goo TACCAG"
- , function = double2
- ">foo\nCATGAC\n>goo\nTACCAG\n"
- , expectedResult = (
- "foo:CA,TG,AT,AT,TG,CA,GA,TC,AC,GT"
- ^ ";goo:TA,TA,AC,GT,CC,GG,CA,TG,AG,CT"
- )
- }
- ]
-end