blob: bd97b7b017119a3abb4a1dbc7695667ca9ae324a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
module ExtArray = ExtArray.Array
let (|>) a b = b a
module Options = struct
let parser = OptParse.OptParser.make ~version:"1.0" ()
let per_word =
let option = OptParse.StdOpt.str_option ~metavar:"file" () in
OptParse.OptParser.add parser ~short_name:'w' ~long_name:"per-word"
~help:"file to store per-word counts in" option;
(fun () -> match OptParse.Opt.opt option with
Some x -> x
| None ->
OptParse.OptParser.usage parser ();
exit 1
)
let total =
let option = OptParse.StdOpt.str_option ~metavar:"file" () in
OptParse.OptParser.add parser ~short_name:'t' ~long_name:"total"
~help:"file to store total count in" option;
(fun () -> match OptParse.Opt.opt option with
Some x -> x
| None ->
OptParse.OptParser.usage parser ();
exit 1
)
let order =
let option = OptParse.StdOpt.int_option ~default:15 () in
OptParse.OptParser.add parser ~short_name:'r' ~long_name:"order" option;
(fun () -> OptParse.Opt.get option)
let files = OptParse.OptParser.parse_argv parser
let total = total ()
let per_word = per_word ()
let order = order ()
end
let load_file order judy total name =
Misc.io_of_gzip name |> Fasta.enum_words order
|> Enum.fold (fun word total ->
Judy.bump judy word;
Judy.bump judy (Gene.reverse word);
total + 2
) total
let load_files order names =
let judy = Judy.create () in
List.fold_left (load_file order judy) 0 names, judy
let gzip_output_string c s = Gzip.output c s 0 (String.length s)
let () =
let total_words, judy = load_files Options.order Options.files in
(
let c = open_out Options.total in
output_string c (string_of_int total_words ^ "\n");
close_out c
); (
let c = Gzip.open_out Options.per_word in
Judy.iter (fun word count ->
gzip_output_string c
(String.concat "" [string_of_int count; " "; word; "\n"])
) judy;
Gzip.close_out c
)
|