summaryrefslogtreecommitdiff
path: root/src/nbc/score.sml
diff options
context:
space:
mode:
Diffstat (limited to 'src/nbc/score.sml')
-rw-r--r--src/nbc/score.sml30
1 files changed, 30 insertions, 0 deletions
diff --git a/src/nbc/score.sml b/src/nbc/score.sml
new file mode 100644
index 0000000..90d569a
--- /dev/null
+++ b/src/nbc/score.sml
@@ -0,0 +1,30 @@
+signature SCORE = sig
+ val score: int * real * (string -> int option) * real * string -> real
+end
+
+structure Score :> SCORE = struct
+ fun |> (x, f) = f x
+ infix |>
+
+ fun addCount (hitsum, fcount, gcount, totalWords) =
+ Kahan.add (hitsum, Real.fromInt fcount * Math.ln (Real.fromInt gcount / totalWords))
+ fun addNmer (totalWords, getGenomeCount) (nmer, ref fcount, (misses, anyhits, hitsum)) =
+ case getGenomeCount nmer of
+ NONE => (misses + 1, anyhits, hitsum)
+ | SOME gcount => (
+ misses, true
+ , addCount (hitsum, fcount, gcount, totalWords)
+ )
+ fun score (order, missConstant, getGenomeCount, totalWords, fragment) =
+ let
+ val add = addNmer (totalWords, getGenomeCount)
+ val seed = (0, false, Kahan.zero)
+ val (misses, anyhits, hitsum) =
+ Nmer.count (order, fragment) |> HashTable.foldi add seed
+ in
+ if anyhits then
+ Kahan.add (hitsum, Math.ln missConstant * Real.fromInt misses)
+ |> Kahan.sum
+ else Real.negInf
+ end
+end