summaryrefslogtreecommitdiff
path: root/src/nbc/main.sml
blob: 361d9ca6949ccebf336f584674747942893c3016 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
structure Main = struct

fun |> (x, f) = f x
infix |>

val order = Options.order
val genomes = case Options.genomes of
	SOME x => x |> TextIO.openIn |> Misc.sequenceLines
	| NONE => Options.genomesDir |> Misc.sortedDirectoryNoPrefix |> Sequence.fromList
fun input () =
	case Fasta.sequence (Gzip.openIn Options.input) of
		NONE => Fail.fail "input file is not FASTA format"
		| SOME x =>
			Sequence.map (fn (header, data) =>
				(header, String.map Char.toUpper data)
			) x
fun output genome =
	let
		fun inner format = case format of
			Options.Matlab {variable, file} =>
				let
					val matlab = Matlab.openOut file
					val doubleArray = Matlab.beginDoubleArray (matlab, variable)
				in {
					write = fn (_, score) =>
						Matlab.writeDouble (doubleArray, score)
					, close = fn () => (
						Matlab.concludeDoubleArray doubleArray
						; Matlab.closeOut matlab
					)
				} end
			| Options.Text filename =>
				let
					val gzip = Gzip.openOut filename
				in {
					write = fn (header, score) =>
						TextIO.output (
							gzip
							, Real.fmt (StringCvt.FIX (SOME 8)) score
						)
					, close = fn () => TextIO.closeOut gzip
				} end
			| Options.Dual {text, matlab} =>
				let
					val text = inner (Options.Text text)
					val matlab = inner (Options.Matlab matlab)
				in {
					write = fn x => (
						#write text x
						; #write matlab x
					), close = fn () => (
						#close text ()
						; #close matlab ()
					)
				} end
	in
		inner (Options.output genome)
	end
fun totalWords (genome, order) =
        let
		val name = Options.totalWords (genome, order)
		val input = TextIO.openIn name
	in
		(case Option.mapPartial Real.fromString (Misc.inputLine input) of
			NONE => Fail.fail ("could not read number from " ^ name)
			| SOME r => r
		) before TextIO.closeIn input
	end

val () =
	Sequence.app (fn gname =>
		let
			val {write, close} = output gname
			val totalWords = totalWords (gname, order)
			val genome = (
				Stopwatch.start ("Loading genome " ^ gname)
				; Genome.load (gname, order)
				before Stopwatch.finish ()
			)
		in
			Stopwatch.start ("Scoring fragments")
			; input () |> Sequence.app (fn (header, fragment) =>
				write (
					header
					, Score.score (
						order
						, Options.missConstant
						, fn nmer => Genome.get (genome, nmer)
						, totalWords
						, fragment
					)
				)
			); Stopwatch.finish ()
			; close ()
		end
	) genomes

end