aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCalvin Morrison <mutantturkey@gmail.com>2014-07-16 13:31:53 -0400
committerCalvin Morrison <mutantturkey@gmail.com>2014-07-16 13:31:53 -0400
commit7ae2dee4d9445f02535d8fd479ddaacb2f968b86 (patch)
treef9e017742297d65fc8380f4417a7783d2bf6536f
parent428fbefaac16d6a5d83baa509cd9b02a65cffc57 (diff)
work on filters
-rw-r--r--Makefile4
-rwxr-xr-xSelectiveWholeGenomeAmplification34
-rwxr-xr-xsrc/lock4
-rw-r--r--src/remove_mers.py17
4 files changed, 42 insertions, 17 deletions
diff --git a/Makefile b/Makefile
index eac08ee..634b9ee 100644
--- a/Makefile
+++ b/Makefile
@@ -7,6 +7,7 @@ all: output_dir bin/strstream bin/filter_melting_range bin/strstreamone bin/sequ
output_dir:
mkdir -p bin
+
bin/strstream: src/strstream.c
$(CC) src/strstream.c -o bin/strstream $(CLIBS) $(CFLAGS)
bin/strstreamone: src/strstreamone.c
@@ -36,4 +37,5 @@ install: all
install -c src/filter_melting_temperature.py $(DEST)
install -c src/filter_max_consecutive_binding.py $(DEST)
install -c src/filter_average_binding.py $(DEST)
-
+ install -c src/remove_mers.py $(DEST)
+ install -c src/remove_mers_from_file.py $(DEST)
diff --git a/SelectiveWholeGenomeAmplification b/SelectiveWholeGenomeAmplification
index a3e3a24..37ddeb1 100755
--- a/SelectiveWholeGenomeAmplification
+++ b/SelectiveWholeGenomeAmplification
@@ -9,6 +9,9 @@ exit_handler() {
exit 1
}
+arg() {
+ echo -e "\e[32m$@\e[39m"
+}
# check_non_empty
check_non_empty() {
if [[ ! -s $1 ]]; then
@@ -24,11 +27,12 @@ check_mers() {
local counts="$2"
local mer=0
- echo " counting mers in $fasta_file"
+ echo " counting mers in $fasta_file:e"
+ echo -e "\e[32m"
# remove the counts file so we can concatenate
if [[ -e "$counts"-counts ]]; then
- echo " removing $counts-counts"
+ echo " removing $counts-counts"
rm "$counts"-counts
fi
@@ -36,15 +40,16 @@ check_mers() {
lock $tmp_directory/counts-lock
for (( mer = min_mer_range; mer <= max_mer_range; mer++)) ; do
if [[ ! -e "$counts"-counts-"$mer" ]]; then
- echo " checking $mer mers for $fasta_file"
+ echo " checking $mer mers for $fasta_file"
kmer_total_count -c -i "$fasta_file" -k "$mer" -l -n > "$counts"-counts-"$mer" || exit_handler
else
- echo " $mer-mers already done for $fasta_file (assuming no change)"
+ echo " $mer-mers already done for $fasta_file (assuming no change)"
fi
# concatentate
cat "$counts"-counts-"$mer" >> "$counts"-counts
+ echo -e "\e[39m"
done
rmdir $tmp_directory/counts-lock
}
@@ -290,31 +295,32 @@ if [[ -n "$step_filters" ]] || [[ -n "$all" ]]; then
echo "Step 2: filtering mers"
- cp "$fg_counts" "$ignore_mers_counts"
# remove ignored mers
if [[ "$ignore_mers" ]]; then
echo " filtering explicitly ignored mers: $ignore_mers"
- for mer in $ignore_mers; do
- sed -i '/^'"$mer"'\t/d' "$ignore_mers_counts"
- done
+ cat "$fg_counts" | remove_mers.py $ignore_mers > "$ignore_mers_counts"
+ else
+ cp "$fg_counts" "$ignore_mers_counts"
fi
+
check_non_empty "$ignore_mers_counts" "ignore mers"
+ # create full ignore_all_counts
cp "$ignore_mers_counts" "$ignore_all_mers_counts"
# remove ignored mers
if [[ "$ignore_all_mers_from_files" ]]; then
for ignore_file in $ignore_all_mers_from_files; do
-
if [[ -f "$ignore_file" ]]; then
- echo " filtering ignored mers from: $ignore_file"
+ # check mers from next ignore file
counts="$counts_directory/ignore-"$(basename "$ignore_file")
check_mers "$ignore_file" "$counts"
- while read mer_line; do
- mer=$(echo "$mer_line" | sed -e 's/\t.*//g')
- sed -i '/^'"$mer"'\t/d' "$ignore_all_mers_counts"
- done < "$counts-counts"
+ echo " filtering ignored mers from: $ignore_file"
+ cat "$ignore_all_mers_counts" | remove_mers_from_file.py "$ignore_file"> "$ignore_all_mers_counts-tmp"
+ mv "$ignore_all_mers_counts-tmp" "$ignore_all_mers_counts"
+ read
+ check_non_empty "$ignore_all_mers_counts" "ignore all mers from file $ignore_file"
else
echo " $ignore_file not found, continuing..."
fi
diff --git a/src/lock b/src/lock
index 73b2fee..34a3945 100755
--- a/src/lock
+++ b/src/lock
@@ -7,9 +7,9 @@ lock() {
fi
if mkdir "$1" &>/dev/null; then
- echo "lock $1 created" >&2
+ echo " lock $1 created" >&2
else
- echo "lock $1 found, waiting for unlock" >&2
+ echo " lock $1 found, waiting for unlock" >&2
while true; do
sleep 2;
if mkdir "$1" &>/dev/null; then
diff --git a/src/remove_mers.py b/src/remove_mers.py
new file mode 100644
index 0000000..b18992a
--- /dev/null
+++ b/src/remove_mers.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python2.7
+import sys
+
+if __name__ == "__main__":
+
+ if len(sys.argv) is 1:
+ exit("Filter mers, input is stdin, output is stdout, mers are argv")
+
+ mers_to_delete = set()
+
+ for mer in sys.argv[1:]:
+ mers_to_delete.add(mer)
+
+ for line in sys.stdin:
+ if line.split()[0] not in mers_to_delete:
+ sys.stdout.write(line)
+