aboutsummaryrefslogtreecommitdiff
path: root/select_kmers.sh
blob: 11ee124ad620537ac5a69d42df05765aa1fba870 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/bash
# range of mers, min and max 
: ${min_mer_range=6}
: ${max_mer_range=10}
# directory to store our counts and sorted counts
: ${counts_directory=$PWD/counts}
# temp directory 
: ${tmp_directory=$PWD/tmp}
# maximum kmer melting point
: ${max_melting_temp=20}
# minimum mer count
# ! you can supply either a percentage like this .5
# ! or you can supply a raw number (100)
: ${min_mer_count=1000}
# maximum mers to pick
: ${max_select=15}

export min_mer_range
export max_mer_range
export max_select
export min_mer_count


if [ ! -d $counts_directory ]; then
	mkdir $counts_directory
fi

if [ ! -d $tmp_directory ]; then
	mkdir $tmp_directory
fi

foreground=$1
background=$2

if [[ ! -f $foreground ]]; then
	echo "Could not open $foreground."
	exit 1
fi

if [[ ! -f $background ]]; then
	echo "Could not open $background."
	exit 1
fi

for fasta_file in $foreground $background; do

	counts=$counts_directory/$(basename $fasta_file)
	tmp=$tmp_directory/$(basename $fasta_file)

	echo pre-processing $fasta_file

	# check if our preprocessed file exists
	if [[ ! -f $tmp ]]; then
		echo "> pre processed $fasta_file" >> $tmp
		cat $fasta_file | grep -v "^>" | tr -d '\n' >> $tmp
	fi

	# run counts if they haven't been created 
	rm $counts-counts
	for mer in `seq $min_mer_range $max_mer_range`;	do 
		if [ ! -e $counts-counts-$mer ]; then
			echo checking $mer mers for $fasta_file
			kmer_total_count -i $tmp -k $mer -l -n >> $counts-counts-$mer
		else 
			echo "$mer mers already done for $fasta_file"
		fi
		
		cat $counts-counts-$mer >> $counts-counts
	
	done
done


fg_counts=$counts_directory/$(basename $foreground)-counts
bg_counts=$counts_directory/$(basename $background)-counts

fg_tmp=$tmp_directory/$(basename $foreground)
bg_tmp=$tmp_directory/$(basename $background)

echo "checking if mers are below melting temperature in the foreground"

rm $fg_counts-fg-non-melting

python below_melting_temperature.py $max_melting_temp < $fg_counts > $fg_counts-fg-non-melting

python ./select_mers.py $fg_counts-fg-non-melting $fg_tmp $bg_counts $bg_tmp # > $(basename $foreground)_$(basename $background)_final_mers