From c1a80d2f4ed10b09b2e18ec515978e87fbc81143 Mon Sep 17 00:00:00 2001 From: Calvin Morrison Date: Thu, 6 Mar 2014 20:04:30 -0500 Subject: use kmer_continuous_count instead of kmer_total_count, remove tmp file, update parameters for select_mers.py --- SelectiveGenomeAmplification | 25 ++++++++----------------- src/select_mers.py | 11 +++-------- 2 files changed, 11 insertions(+), 25 deletions(-) diff --git a/SelectiveGenomeAmplification b/SelectiveGenomeAmplification index bb52626..b3fdd2f 100755 --- a/SelectiveGenomeAmplification +++ b/SelectiveGenomeAmplification @@ -91,22 +91,15 @@ mkdir -p $output_directory/$current_run for fasta_file in $foreground $background; do counts=$counts_directory/$(basename $fasta_file) - tmp=$tmp_directory/$(basename $fasta_file) echo pre-processing $fasta_file - # check if our preprocessed file exists - if [[ ! -f $tmp ]]; then - echo "> pre processed $fasta_file" >> $tmp - cat $fasta_file | grep -v "^>" | tr -d '\n' >> $tmp - fi - # run counts if they haven't been created rm $counts-counts for mer in `seq $min_mer_range $max_mer_range`; do if [ ! -e $counts-counts-$mer ]; then echo checking $mer mers for $fasta_file - kmer_total_count -i $tmp -k $mer -l -n >> $counts-counts-$mer + kmer_continuous_count -i $fasta_file -k $mer -l -n >> $counts-counts-$mer else echo "$mer mers already done for $fasta_file" fi @@ -119,9 +112,6 @@ done fg_counts=$counts_directory/$(basename $foreground)-counts bg_counts=$counts_directory/$(basename $background)-counts -fg_tmp=$tmp_directory/$(basename $foreground) -bg_tmp=$tmp_directory/$(basename $background) - selected=$output_directory/$current_run/selected-mers # remove ignored mers @@ -139,15 +129,16 @@ for var in ignore_mers min_mer_range max_check cpus max_consecutive_binding max_ done; echo "checking if mers are within the melting range $min_melting_temp $max_melting_temp" -cat $fg_counts | filter_melting_range $min_melting_temp $max_melting_temp > $output_directory/$current_run/$foreground-counts-non-melting -cat $bg_counts | filter_melting_range $min_melting_temp $max_melting_temp > $output_directory/$current_run/$background-counts-non-melting +cat $fg_counts | filter_melting_range $min_melting_temp $max_melting_temp > $output_directory/$current_run/`basename $foreground`-counts-non-melting || exit 1 +cat $bg_counts | filter_melting_range $min_melting_temp $max_melting_temp > $output_directory/$current_run/`basename $background`-counts-non-melting || exit 1 echo "filtering out elements that have more consecutive binding mers than allowed by default $max_consecutive_binding" -cat $output_directory/$current_run/$foreground-counts-non-melting | filter_max_consecutive_binding.py $max_consecutive_binding > $output_directory/$current_run/$foreground-counts-filtered-binding -cat $output_directory/$current_run/$background-counts-non-melting | filter_max_consecutive_binding.py $max_consecutive_binding > $output_directory/$current_run/$background-counts-filtered-binding +cat $output_directory/$current_run/`basename $foreground`-counts-non-melting | filter_max_consecutive_binding.py $max_consecutive_binding > $output_directory/$current_run/`basename $foreground`-counts-filtered-binding || exit 1 +cat $output_directory/$current_run/`basename $background`-counts-non-melting | filter_max_consecutive_binding.py $max_consecutive_binding > $output_directory/$current_run/`basename $background`-counts-filtered-binding || exit 1 echo "scoring mer selectivity" -select_mers.py $output_directory/$current_run/$foreground-counts-filtered-binding $fg_tmp $output_directory/$current_run/$background-counts-filtered-binding $bg_tmp > $selected +select_mers.py $output_directory/$current_run/`basename $foreground`-counts-filtered-binding \ + $output_directory/$current_run/`basename $background`-counts-filtered-binding > $selected || exit 1 echo "scoring top mers based on selectivity" -score_mers.py $selected $fg_tmp $bg_tmp $output_directory/$current_run/scores-output +score_mers.py $selected <( cat $foreground | grep -v "^>" | tr -d '\n' ) <( cat $background | grep -v "^>" | tr -d '\n') $output_directory/$current_run/scores-output || exit 1 diff --git a/src/select_mers.py b/src/select_mers.py index 21306cc..ef019c0 100755 --- a/src/select_mers.py +++ b/src/select_mers.py @@ -7,18 +7,13 @@ bg_mers = {} min_mer_count = int(os.environ.get("min_mer_count", 0)); -if(len(sys.argv) == 5): +if(len(sys.argv) == 3): fg_count_fn = sys.argv[1] - fg_fasta_fn = sys.argv[2] - bg_count_fn = sys.argv[3] - bg_fasta_fn = sys.argv[4] - - fg_genome_length = os.path.getsize(fg_fasta_fn) - bg_genome_length = os.path.getsize(bg_fasta_fn) + bg_count_fn = sys.argv[2] else: print len(sys.argv) print "please specify your inputs" - print "ex: select_mers.py fg_counts fg_fasta bg_counts bg_fasta" + print "ex: select_mers.py fg_counts bg_counts" exit() -- cgit v1.2.3