aboutsummaryrefslogtreecommitdiff
path: root/src/nbc/probabilities-by-read.sml
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2013-11-13 13:07:23 -0500
committerCalvin Morrison <mutantturkey@gmail.com>2013-11-13 13:07:23 -0500
commit9066ea884129957fe899d6d10cba1e17547214b9 (patch)
treeaaa3ba9e0e507d9d13d8258def7585675e86f221 /src/nbc/probabilities-by-read.sml
parentba07cda9cbf54d33b657d9ee30bc03c0b67f0b5f (diff)
remove nbc:
Diffstat (limited to 'src/nbc/probabilities-by-read.sml')
-rw-r--r--src/nbc/probabilities-by-read.sml210
1 files changed, 0 insertions, 210 deletions
diff --git a/src/nbc/probabilities-by-read.sml b/src/nbc/probabilities-by-read.sml
deleted file mode 100644
index acd8041..0000000
--- a/src/nbc/probabilities-by-read.sml
+++ /dev/null
@@ -1,210 +0,0 @@
-local
- fun usage result = (
- TextIO.output (
- TextIO.stdErr
- , CommandLine.name ()
- ^ " <order> <input FASTA> <file of nmers to count>\n"
- ); OS.Process.exit result
- )
-in
- val (order, input, toCount) = case CommandLine.arguments () of
- [order, input, toCount] => (case Int.fromString order of
- NONE => usage OS.Process.failure
- | SOME order => (order, input, toCount)
- ) | ["--help"] => usage OS.Process.success
- | _ => usage OS.Process.failure
-end
-
-datatype result = Success | Failure
-fun warn message = TextIO.output (
- TextIO.stdErr
- , (
- "warning: "
- ^ message
- ^ "\n"
- )
-)
-
-signature NMER_TABLE = sig
- type nmer
- type table
-
- (*
- Create the table. Only the provided nmers will be counted.
- *)
- val create: nmer list -> table
-
- (*
- Increment the count for a given nmer. If the nmer was
- not provided at table creation, do nothing.
- *)
- val bump: table * nmer -> unit
-
- (*
- Reset all counts to zero. Do not change the list of
- nmers to count.
- *)
- val clear: table -> unit
-
- (*
- Apply a function to all nmers and their counts, in
- lexicographic order.
- *)
- val app: (nmer * int -> unit) -> table -> unit
-end
-
-functor NmerTable (Nmer: NMER)
-:> NMER_TABLE where type nmer = Nmer.nmer
-= struct
- structure HashTable = HashTableFn (
- type hash_key = Nmer.nmer
- val hashVal = Nmer.hash
- val sameKey = Nmer.equal
- )
- exception NotFound
- type nmer = Nmer.nmer
- type table = {
- indexes: int HashTable.hash_table
- , counts: int array
- , nmers: nmer vector
- }
- fun create list =
- let
- val indexes = HashTable.mkTable (1024, NotFound)
- val nmers =
- let
- val array = Array.fromList list
- in
- ArrayQSort.sort Nmer.compare array
- ; Array.vector array
- end
- in
- Vector.appi (fn (index, nmer) =>
- HashTable.insert indexes (nmer, index)
- ) nmers
- ; {
- indexes = indexes
- , nmers = nmers
- , counts = Array.array (
- Vector.length nmers
- , 0
- )
- }
- end
- fun bump ({indexes, nmers = _, counts}, nmer) =
- case HashTable.find indexes nmer of
- NONE => ()
- | SOME index => Array.update (
- counts
- , index
- , Array.sub (counts, index) + 1
- )
- fun clear {indexes = _, nmers = _, counts} =
- Array.modify (fn _ => 0) counts
- fun app execute {indexes = _, nmers, counts} =
- Vector.appi (fn (index, nmer) =>
- execute (nmer, Array.sub (counts, index))
- ) nmers
-end
-
-functor Input (
- structure Nmer: NMER
- structure NmerTable: NMER_TABLE
- sharing type Nmer.nmer = NmerTable.nmer
-) = SingleSidedFasta (
- structure Nmer = Nmer
- structure File = struct
- type argument = NmerTable.table
- type file = argument
- type read = Int64.int ref
- type nmer = Nmer.nmer
- datatype result = datatype result
- exception NotFound
- fun startFile table = table
- fun startRead (table, _) = ref (0: Int64.int)
- fun nmer (table, (total: Int64.int ref), nmer) = (
- total := !total + 1
- ; NmerTable.bump (table, nmer)
- )
- fun put string = TextIO.output (TextIO.stdOut, string)
- fun stopRead (table, total) =
- let
- val realFromInt64 =
- Real.fromLargeInt o Int64.toLarge
- val realTotal = realFromInt64 (!total)
- val toString = Real.fmt (
- StringCvt.FIX (SOME 17)
- )
- fun probability count = real count / realTotal
- infix |>
- fun argument |> function = function argument
- val first = ref true
- in
- NmerTable.app (fn (nmer, count) => (
- if !first then first := false
- else put "\t"
- ; count
- |> probability
- |> toString
- |> put
- )) table
- ; put "\n"
- ; NmerTable.clear table
- ; total := 0
- end
- fun stopFile _ = Success
- fun invalidFormat _ = Failure
- end
-)
-
-structure Nmer = Nmer (
- val order = order
- structure Word = Word64
-)
-structure NmerTable = NmerTable (Nmer)
-structure Input = Input (
- structure Nmer = Nmer
- structure NmerTable = NmerTable
-)
-
-val table =
- let
- fun build collect goose = case collect goose of
- NONE => nil
- | SOME egg =>
- egg :: build collect goose
- fun chopEnd string = String.extract (
- string
- , 0
- , SOME (size string - (
- if String.isSuffix "\r\n" string
- then 2
- else 1
- ))
- )
- val line = Option.map chopEnd o TextIO.inputLine
- val lines = build line
- val instream = TextIO.openIn toCount
- fun fromStringWithWarning string =
- case Nmer.fromString string of
- NONE => (
- warn (
- string
- ^ " is not a valid nmer"
- ); NONE
- ) | someNmer => someNmer
- val nmers = List.mapPartial
- fromStringWithWarning
- (lines instream)
- in
- TextIO.closeIn instream
- ; NmerTable.create nmers
- end
-
-val () = case Input.process (table, TextIO.openIn input) of
- Failure => (
- TextIO.output (
- TextIO.stdErr
- , "input is not valid FASTA\n"
- ); OS.Process.exit OS.Process.failure
- ) | Success => ()