Skip to content

Commit

Permalink
Add ability to explore various values of AED and LD. (#93)
Browse files Browse the repository at this point in the history
* Update parameters to read from meta map

* Introduce channel to construct sweep parameter map

* Update nf-core modules

* Add meta map to agat separatebyrecord

* Add meta map to agat filterbyattribute

* update blast path

* Add meta map to agat keeplongestisoform

* Add meta map to agat filterincompletegenecodingmodels

* Add meta map to agat filterbylocusdistance

* Add meta map to agat extractsequences

* Add meta map to blast blastp

* Add meta map to agat filterbymrnablastvalue

* Add meta map to augustus gff2gbk

* Add meta map to augustus gbk2augustus

* Add meta map to agustus training modules

* Add meta map to agat gff2zff

* Add meta map to snap training

* Update channel logic for abinitio pipeline

* Update README

* Update abinitio test profile

* Fix module links for annotation preprocessing workflow

* Fix module links for functional annotation workflow

* Fix module links

* Update publish paths to include parameter sweep values

* Add container registries to profiles for nf-core

* Remove registries from docker container paths

* Add aed and locus distance parameters to config

* Update annotation preprocessing workflow for updated modules

* Add meta map to interproscan module

* Add meta map to agat managefunctionalannotation

* Update functional annotation workflow to reflect updated modules

* Fix includeInputs placement

* Fix container paths

* Fix workflow for module updates

* Remove references to params.enable_conda

* Update minimum Nextflow version to 22.10.0

* Run split maker evidence once

* Update publishing path

* Update test config to do parameter sweep

* Associate proteindb with protein

* Patch blast/makeblastdb to use a meta map

* Fix functional annotation subworkflow from module change

* Fix file staging

* Update file prefix to include LD and AED values

* Add start of table rank code

* Add rank model to abinitio workflow

* Escape dollars

* Syntax fixes

* Fix syntax

* Add publish path

* rename output folders

* Append training data gene count to log
  • Loading branch information
mahesh-panchal authored Oct 12, 2023
1 parent 62b725c commit 8b6c7fa
Show file tree
Hide file tree
Showing 42 changed files with 478 additions and 257 deletions.
38 changes: 22 additions & 16 deletions config/abinitio_training_modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,44 +4,44 @@ process {
withName: 'SPLIT_MAKER_EVIDENCE' {
ext.args = ''
publishDir = [
path: "${params.outdir}/${publish_subdir}",
path: { "${params.outdir}/${publish_subdir}/split_evidence" },
mode: params.publishDir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: 'MODEL_SELECTION_BY_AED' {
ext.args = [
'--value 0.3',
ext.args = { [
"--value ${meta.aed_value}",
'-a _AED',
'-t ">"'
].join(' ').trim()
].join(' ').trim() }
ext.prefix = 'codingGeneFeatures'
publishDir = [
path: "${params.outdir}/${publish_subdir}/filter",
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/filter"},
mode: params.publishDir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: 'RETAIN_LONGEST_ISOFORM' {
ext.args = ''
publishDir = [
path: "${params.outdir}/${publish_subdir}/filter",
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/filter"},
mode: params.publishDir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: 'REMOVE_INCOMPLETE_GENE_MODELS' {
ext.args = ''
publishDir = [
path: "${params.outdir}/${publish_subdir}/filter",
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/filter"},
mode: params.publishDir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: 'FILTER_BY_LOCUS_DISTANCE' {
ext.args = '-d 3000'
ext.args = { "-d ${meta.locus_distance}" }
publishDir = [
path: "${params.outdir}/${publish_subdir}/filter",
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/filter"},
mode: params.publishDir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
Expand All @@ -63,15 +63,15 @@ process {
withName: 'GFF_FILTER_BY_BLAST' {
ext.args = ''
publishDir = [
path: "${params.outdir}/${publish_subdir}/blast_filtered_gff",
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/blast_filtered_gff"},
mode: params.publishDir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: 'GFF2GBK' {
ext.args = params.flank_region_size
publishDir = [
path: "${params.outdir}/${publish_subdir}/augustus/gbk_files",
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/augustus/gbk_files"},
mode: params.publishDir_mode,
pattern: "*.gbk"
]
Expand All @@ -80,41 +80,47 @@ process {
ext.args = '100'
publishDir = [
[
path: "${params.outdir}/${publish_subdir}/augustus/training_data",
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/augustus/training_data"},
mode: params.publishDir_mode,
pattern: "*.train"
],
[
path: "${params.outdir}/${publish_subdir}/augustus/test_data",
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/augustus/test_data"},
mode: params.publishDir_mode,
pattern: "*.test"
]
]
}
withName: 'AUGUSTUS_TRAINING' {
ext.args = ''
ext.prefix = { "${species_label}-LD${meta.locus_distance}-AED${meta.aed_value}" }
publishDir = [
[
path: "${params.outdir}/${publish_subdir}/augustus_training",
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/augustus_training"},
mode: params.publishDir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
],
[
path: "${params.maker_species_publishdir}",
path: {"${params.maker_species_publishdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}"},
mode: 'copy',
enabled: params.maker_species_publishdir != null,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
]
}
withName: 'RANK_AUGUSTUS_MODELS' {
publishDir = [
path: { "${params.outdir}/${publish_subdir}/augustus_sweep_summary" }
]
}
withName: 'CONVERT_GFF2ZFF' {
ext.args = ''
}
withName: 'SNAP_TRAINING' {
ext.args = "-categorize ${params.flank_region_size}"
ext.args2 = "-export ${params.flank_region_size} -plus"
publishDir = [
path: "${params.outdir}/${publish_subdir}/snap_training",
path: {"${params.outdir}/${publish_subdir}/LD-${meta.locus_distance}_AED-${meta.aed_value}/snap_training"},
mode: params.publishDir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
Expand Down
5 changes: 2 additions & 3 deletions config/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@ if ( params.subworkflow == 'abinitio_training' ) {
genome = 'https://github.com/nf-core/test-datasets/raw/rnaseq/reference/genome.fa'
species_label = 'test_species' // e.g. 'asecodes_parviclava'
flank_region_size = 500
aed_value = [ 0.3, 0.2 ]
locus_distance = [ 500 ]
}
process {
// Trick: Fully qualified process name has higher priority than simple name
// Otherwise settings are overridden by those in modules.config loaded after this
withName: 'ABINITIO_TRAINING:FILTER_BY_LOCUS_DISTANCE' {
ext.args = '-d 500'
}
withName: 'ABINITIO_TRAINING:GBK2AUGUSTUS' {
ext.args = '10'
}
Expand Down
56 changes: 37 additions & 19 deletions modules.json
Original file line number Diff line number Diff line change
@@ -1,23 +1,41 @@
{
"name": "NBIS Genome Annotation Workflow",
"homePage": "",
"repos": {
"nf-core/modules": {
"blast/makeblastdb": {
"git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
},
"busco": {
"git_sha": "89a84538bede7c6919f7c042fdb4c79e5e2d9d2a"
},
"fastp": {
"git_sha": "9b51362a532a14665f513cf987531f9ea5046b74"
},
"fastqc": {
"git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe"
},
"multiqc": {
"git_sha": "5138acca0985ca01c38a1c4fba917d83772b1106"
}
"name": "NBIS Genome Annotation Workflow",
"homePage": "",
"repos": {
"https://github.com/nf-core/modules.git": {
"modules": {
"nf-core": {
"blast/makeblastdb": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"],
"patch": "modules/nf-core/blast/makeblastdb/blast-makeblastdb.diff"
},
"busco": {
"branch": "master",
"git_sha": "6d6552cb582f56b6101c452e16ee7c23073f91de",
"installed_by": ["modules"]
},
"fastp": {
"branch": "master",
"git_sha": "d497a4868ace3302016ea8ed4b395072d5e833cd",
"installed_by": ["modules"]
},
"fastqc": {
"branch": "master",
"git_sha": "9a4517e720bc812e95b56d23d15a1653b6db4f53",
"installed_by": ["modules"]
},
"multiqc": {
"branch": "master",
"git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80",
"installed_by": ["modules"]
}
}
},
"subworkflows": {
"nf-core": {}
}
}
}
}
10 changes: 5 additions & 5 deletions modules/local/agat/extractsequences.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@ process AGAT_EXTRACTSEQUENCES {
label 'process_single'

// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda (params.enable_conda ? "bioconda::agat=0.9.2" : null)
conda "bioconda::agat=0.9.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1':
'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"

input:
path gff
tuple val(meta), path (gff)
path genome

output:
path "${gff.baseName}_proteins.fasta", emit: proteins
path "versions.yml" , emit: versions
tuple val(meta), path ("${gff.baseName}_proteins.fasta"), emit: proteins
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
10 changes: 5 additions & 5 deletions modules/local/agat/filterbyattribute.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@ process AGAT_FILTERBYATTRIBUTE {
label 'process_single'

// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda (params.enable_conda ? "bioconda::agat=0.9.2" : null)
conda "bioconda::agat=0.9.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1':
'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"

input:
path mrna_gff
tuple val(meta), path(mrna_gff)

output:
path "*.filter.gff", emit: selected_models
path "versions.yml", emit: versions
tuple val(meta), path("*.filter.gff"), emit: selected_models
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
10 changes: 5 additions & 5 deletions modules/local/agat/filterbylocusdistance.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@ process AGAT_FILTERBYLOCUSDISTANCE {
label 'process_single'

// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda (params.enable_conda ? "bioconda::agat=0.9.2" : null)
conda "bioconda::agat=0.9.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1':
'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"

input:
path coding_gene_features_gff
tuple val(meta), path (coding_gene_features_gff)

output:
path "*.good_distance.gff", emit: distanced_models
path "versions.yml" , emit: versions
tuple val(meta), path ("*.good_distance.gff"), emit: distanced_models
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
10 changes: 5 additions & 5 deletions modules/local/agat/filterbymrnablastvalue.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@ process AGAT_FILTERBYMRNABLASTVALUE {
label 'process_single'

// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda (params.enable_conda ? "bioconda::agat=0.9.2" : null)
conda "bioconda::agat=0.9.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1':
'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"

input:
path gff
tuple val(meta), path(gff)
path blast_tbl

output:
path "*_blast-filtered.gff3", emit: blast_filtered
path "versions.yml" , emit: versions
tuple val(meta), path("*_blast-filtered.gff3"), emit: blast_filtered
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
10 changes: 5 additions & 5 deletions modules/local/agat/filterincompletegenecodingmodels.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@ process AGAT_FILTERINCOMPLETEGENECODINGMODELS {
label 'process_single'

// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda (params.enable_conda ? "bioconda::agat=0.9.2" : null)
conda "bioconda::agat=0.9.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1':
'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"

input:
path coding_gene_features_gff
tuple val(meta), path (coding_gene_features_gff)
path genome

output:
path "*.complete.gff", emit: complete_gene_models
path "versions.yml" , emit: versions
tuple val(meta), path ("*.complete.gff"), emit: complete_gene_models
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
10 changes: 5 additions & 5 deletions modules/local/agat/gff2zff.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,18 @@ process AGAT_GFF2ZFF {
tag "${annotation}"
label 'process_single'

conda (params.enable_conda ? "bioconda::agat=0.9.2" : null)
conda "bioconda::agat=0.9.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1':
'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"

input:
path annotation
tuple val(meta), path (annotation)
path genome

output:
path "*.{ann,dna}" , emit: zff
path "versions.yml", emit: versions
tuple val(meta), path ("*.{ann,dna}"), emit: zff
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
10 changes: 5 additions & 5 deletions modules/local/agat/keeplongestisoform.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@ process AGAT_KEEPLONGESTISOFORM {
label 'process_single'

// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda (params.enable_conda ? "bioconda::agat=0.9.2" : null)
conda "bioconda::agat=0.9.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1':
'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"

input:
path coding_gene_features_gff
tuple val(meta), path(coding_gene_features_gff)

output:
path "*.longest_cds.gff", emit: longest_isoform
path "versions.yml" , emit: versions
tuple val(meta), path("*.longest_cds.gff"), emit: longest_isoform
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
12 changes: 6 additions & 6 deletions modules/local/agat/managefunctionalannotation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,21 @@ process AGAT_MANAGEFUNCTIONALANNOTATION {
label 'process_single'

// WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
conda (params.enable_conda ? "bioconda::agat=0.9.2" : null)
conda "bioconda::agat=0.9.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/agat:0.9.2--pl5321hdfd78af_1':
'quay.io/biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"
'biocontainers/agat:0.9.2--pl5321hdfd78af_1' }"

input:
path gff
tuple val(meta), path(gff)
path merged_blast_results
path merged_interproscan_results
path blast_db

output:
path "*_plus-functional-annotation.gff", emit: gff
path "*.tsv" , emit: tsv, includeInputs: true
path "versions.yml" , emit: versions
tuple val(meta), path("*_plus-functional-annotation.gff"), emit: gff
tuple val(meta), path("*.tsv", includeInputs: true) , emit: tsv
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
Loading

0 comments on commit 8b6c7fa

Please sign in to comment.