1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
structure Main = struct
fun |> (x, f) = f x
infix |>
val order = Options.order
val genomes = case Options.genomes of
SOME x => x |> TextIO.openIn |> Misc.sequenceLines
| NONE => Options.genomesDir |> Misc.sortedDirectoryNoPrefix |> Sequence.fromList
fun input () =
case Fasta.sequence (Gzip.openIn Options.input) of
NONE => Fail.fail "input file is not FASTA format"
| SOME x =>
Sequence.map (fn (header, data) =>
(header, String.map Char.toUpper data)
) x
fun output genome =
let
fun inner format = case format of
Options.Matlab {variable, file} =>
let
val matlab = Matlab.openOut file
val doubleArray = Matlab.beginDoubleArray (matlab, variable)
in {
write = fn (_, score) =>
Matlab.writeDouble (doubleArray, score)
, close = fn () => (
Matlab.concludeDoubleArray doubleArray
; Matlab.closeOut matlab
)
} end
| Options.Text filename =>
let
val gzip = Gzip.openOut filename
in {
write = fn (header, score) =>
TextIO.output (
gzip
, Real.fmt (StringCvt.FIX (SOME 8)) score
)
, close = fn () => TextIO.closeOut gzip
} end
| Options.Dual {text, matlab} =>
let
val text = inner (Options.Text text)
val matlab = inner (Options.Matlab matlab)
in {
write = fn x => (
#write text x
; #write matlab x
), close = fn () => (
#close text ()
; #close matlab ()
)
} end
in
inner (Options.output genome)
end
fun totalWords (genome, order) =
let
val name = Options.totalWords (genome, order)
val input = TextIO.openIn name
in
(case Option.mapPartial Real.fromString (Misc.inputLine input) of
NONE => Fail.fail ("could not read number from " ^ name)
| SOME r => r
) before TextIO.closeIn input
end
val () =
Sequence.app (fn gname =>
let
val {write, close} = output gname
val totalWords = totalWords (gname, order)
val genome = (
Stopwatch.start ("Loading genome " ^ gname)
; Genome.load (gname, order)
before Stopwatch.finish ()
)
in
Stopwatch.start ("Scoring fragments")
; input () |> Sequence.app (fn (header, fragment) =>
write (
header
, Score.score (
order
, Options.missConstant
, fn nmer => Genome.get (genome, nmer)
, totalWords
, fragment
)
)
); Stopwatch.finish ()
; close ()
end
) genomes
end
|