Skip to content

Commit

Permalink
Merge pull request #52 from campanam/empty-region-bed
Browse files Browse the repository at this point in the history
Empty region bed
  • Loading branch information
campanam authored Jul 17, 2022
2 parents b3f233c + 94b25c5 commit 28f0134
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 26 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.Md
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,10 @@ First version integrated into pipeline
Initial script (plotDPGQ-clean.R)

## ratestools
### Version 0.5.6
Fixed bug in region filtration when empty region bed
bedtools subtract rather than interact for second attempt

### Version 0.5.5
summarizeDNM outputs VCF of candidate DNMs

Expand Down
79 changes: 53 additions & 26 deletions ratestools.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env nextflow

/* RatesTools version 0.5.5
/* RatesTools version 0.5.6
Michael G. Campana and Ellie E. Armstrong, 2020-2022
Smithsonian Institution and Stanford University
Expand Down Expand Up @@ -487,6 +487,7 @@ process simplifyBed {
path "${prefix}_excluded_reduced.bed" into exclude_bed_ch

"""
#!/usr/bin/env bash
if [ ! "\$(wc -l < ${indel_bed})" -eq 0 ]; then
simplify_sorted_bed.rb ${indel_bed} > ${indel_bed.baseName}_sorted.bed
else
Expand Down Expand Up @@ -751,42 +752,68 @@ process filterRegions {
chr = site_vcf.simpleName.split('_chr')[1]
if (task.attempt == 1)
"""
bedtools subtract -a ${site_vcf} -b ${exclude_bed} -header | gzip > ${site_vcf.simpleName}.regionfilt.vcf.gz
if [[ `grep -n 'Error: Invalid record' .command.log | cut -d ':' -f 1` -eq 0]] ; then
vcftools --gzvcf ${site_vcf.simpleName}.regionfilt.vcf.gz
cp .command.log ${site_vcf.simpleName}_regionfilt.tmp
#!/usr/bin/env bash
grep ${chr} ${exclude_bed} > tmp.bed
if [ ! "\$(wc -l < tmp.bed)" -eq 0 ]; then
bedtools subtract -a ${site_vcf} -b tmp.bed -header | gzip > ${site_vcf.simpleName}.regionfilt.vcf.gz
if [[ `grep -n 'Error: Invalid record' .command.log | cut -d ':' -f 1` -eq 0 ]]; then
vcftools --gzvcf ${site_vcf.simpleName}.regionfilt.vcf.gz
cp .command.log ${site_vcf.simpleName}_regionfilt.tmp
else
rm ${site_vcf.simpleName}.regionfilt.vcf.gz
fi
else
rm ${site_vcf.simpleName}.regionfilt.vcf.gz
fi
vcftools --gzvcf ${site_vcf} --recode -c | gzip > ${site_vcf.simpleName}.regionfilt.vcf.gz
cp .command.log ${site_vcf.simpleName}_regionfilt.tmp
fi
"""
else if (task.attempt == 2)
"""
zcat ${site_vcf} | bedtools intersect -a stdin -b ${exclude_bed} -v -header | gzip > ${site_vcf.simpleName}.regionfilt.vcf.gz
if [[ `grep -n 'Error: Invalid record' .command.log | cut -d ':' -f 1` -eq 0]] ; then
vcftools --gzvcf ${site_vcf.simpleName}.regionfilt.vcf.gz
cp .command.log ${site_vcf.simpleName}_regionfilt.tmp
#!/usr/bin/env bash
grep ${chr} ${exclude_bed} > tmp.bed
if [ ! "\$(wc -l < tmp.bed)" -eq 0 ]; then
zcat ${site_vcf} | bedtools subtract -a stdin -b tmp.bed -v -header | gzip > ${site_vcf.simpleName}.regionfilt.vcf.gz
if [[ `grep -n 'Error: Invalid record' .command.log | cut -d ':' -f 1` -eq 0 ]]; then
vcftools --gzvcf ${site_vcf.simpleName}.regionfilt.vcf.gz
cp .command.log ${site_vcf.simpleName}_regionfilt.tmp
else
rm ${site_vcf.simpleName}.regionfilt.vcf.gz
fi
else
rm ${site_vcf.simpleName}.regionfilt.vcf.gz
fi
vcftools --gzvcf ${site_vcf} --recode -c | gzip > ${site_vcf.simpleName}.regionfilt.vcf.gz
cp .command.log ${site_vcf.simpleName}_regionfilt.tmp
fi
"""
else if (task.attempt == 3)
"""
grep ${chr} ${exclude_bed} > tmp.bed
tabix ${site_vcf}
bcftools view -R tmp.bed -Ob -o tmp.bcf ${site_vcf}
tabix tmp.bcf
# bcftools isec gives the target sites not included in the bed
bcftools isec -C -O v ${site_vcf} tmp.bcf | cut -f1,2 > ${site_vcf.simpleName}.targets
# Use the isec output to get the output. Needs to stream (-T) rather than index jump (-R) for efficiency.
bcftools view -T ${site_vcf.simpleName}.targets -Ov ${site_vcf} | gzip > ${site_vcf.simpleName}.regionfilt.vcf.gz
vcftools --gzvcf ${site_vcf.simpleName}.regionfilt.vcf.gz
cp .command.log ${site_vcf.simpleName}_regionfilt.tmp
#!/usr/bin/env bash
grep ${chr} ${exclude_bed} > tmp.bed
if [ ! "\$(wc -l < tmp.bed)" -eq 0 ]; then
tabix ${site_vcf}
bcftools view -R tmp.bed -Ob -o tmp.bcf ${site_vcf}
tabix tmp.bcf
# bcftools isec gives the target sites not included in the bed
bcftools isec -C -O v ${site_vcf} tmp.bcf | cut -f1,2 > ${site_vcf.simpleName}.targets
# Use the isec output to get the output. Needs to stream (-T) rather than index jump (-R) for efficiency.
bcftools view -T ${site_vcf.simpleName}.targets -Ov ${site_vcf} | gzip > ${site_vcf.simpleName}.regionfilt.vcf.gz
vcftools --gzvcf ${site_vcf.simpleName}.regionfilt.vcf.gz
cp .command.log ${site_vcf.simpleName}_regionfilt.tmp
else
vcftools --gzvcf ${site_vcf} --recode -c | gzip > ${site_vcf.simpleName}.regionfilt.vcf.gz
cp .command.log ${site_vcf.simpleName}_regionfilt.tmp
fi
"""
else
"""
grep ${chr} ${exclude_bed} > tmp.bed
vcftools --gzvcf ${site_vcf} --recode -c --exclude-bed tmp.bed | gzip > ${site_vcf.simpleName}.regionfilt.vcf.gz
cp .command.log ${site_vcf.simpleName}_regionfilt.tmp
#!/usr/bin/env bash
grep ${chr} ${exclude_bed} > tmp.bed
if [ ! "\$(wc -l < tmp.bed)" -eq 0 ]; then
vcftools --gzvcf ${site_vcf} --recode -c --exclude-bed tmp.bed | gzip > ${site_vcf.simpleName}.regionfilt.vcf.gz
cp .command.log ${site_vcf.simpleName}_regionfilt.tmp
else
vcftools --gzvcf ${site_vcf} --recode -c | gzip > ${site_vcf.simpleName}.regionfilt.vcf.gz
cp .command.log ${site_vcf.simpleName}_regionfilt.tmp
fi
"""

}
Expand Down

0 comments on commit 28f0134

Please sign in to comment.