nextflow.config

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    phac-nml/mikrokondo Nextflow config file
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Default config options for all compute environments
----------------------------------------------------------------------------------------
*/


// Global default params, used in configs
params {
    // Input options
    input = null

    // Index creation output file
    output_idx_name = null


    // List all allowed platforms
    opt_platforms {
        // ! WARNING if these are ever spelt wrong you get esoteric errors...
        illumina = "illumina"
        ont = "nanopore"
        pacbio = "pacbio"
        hybrid = "hybrid" // assumes illumina and long reads
    }

    flye_read_type = "hq" // options are hq, corr or raw
    platform = params.opt_platforms.illumina
    long_read_opt = params.opt_platforms.ont
    nanopore_chemistry = null

    run_kraken = false // Run kraken if true or mash screen on contigs if false
    hybrid_unicycler = false // run alternative assembly workflow

    // Filtering options
    min_reads = 1000 // The minimum number of reads needed for a sample to continue through the pipeline

    // Sub sampling options
    target_depth = 100

    validate_params = true
    show_hidden_params = false
    validationS3PathCheck = true
    validationShowHiddenParams = false
    validationSchemaIgnoreParams = 'locidex_summary,allele_scheme_selected,top_hit_method,abricate,locidex,assembly_status,bakta,bandage,checkm,chopper,contigs_too_short,coreutils,coverage_calc_fields,ectyper,fastp,fastqc,filtered_reads,flye,kat,kleborate,kraken,kraken_bin,kraken_species,lissero,mash,mash_meta,medaka,minimap2,mlst,mobsuite_recon,opt_platforms,pilon,pilon_iterative,pointfinder_db_tag,python3,QCReport,QCReport-fields,QCReportFields,quast,racon,raw_reads,report_aggregate,r_contaminants,samtools,seqkit,seqtk,seqtk_size,shigeifinder,sistr,spades,spatyper,staramr,subtyping_report,top_hit_species,unicycler'
    validationFailUnrecognisedParams = false // for the qcreport fields

    // SKIP options
    // TODO need to add constants section
    // TODO investigate usage of template scripts to replace mash modules
    skip_report = false
    skip_raw_read_metrics = false
    skip_version_gathering = false
    skip_subtyping = false
    skip_bakta = true
    skip_abricate = false
    skip_checkm = false
    skip_depth_sampling = false // TODO have it mentioned that this should be turned off for metagenomic runs
    skip_ont_header_cleaning = true // TODO an awk script can likely replace this and be much faster at what it does...
    skip_polishing = false // TODO make it clear this does not apply to Hybrid assembly
    skip_species_classification = false
    skip_mlst = false
    skip_mobrecon = false
    skip_staramr = false
    skip_metagenomic_detection = false // Skip classifying if sample is metagenomic
    skip_allele_calling = false
    skip_length_filtering_contigs = false
    metagenomic_run = false // Label all samples as


    // Datasets
    dehosting_idx = null // mm2 index
    mash_sketch = null // Make sure comments are formatted as taxonomic strings
    bakta_db = null
    kraken2_db = null
    staramr_db = null // Recommended usage is to use the default database in the container


    // FastP parameters
    fp_average_quality = 25
    fp_cut_tail_mean_quality = 15
    fp_cut_tail_window_size = 4
    fp_complexity_threshold = 20
    fp_qualified_phred = 15
    fp_unqualified_percent_limit = 40
    fp_polyg_min_len = 10
    fp_polyx_min_len = 10
    fp_illumina_length_min = 35
    fp_illumina_length_max = 400
    fp_single_end_length_min = 1000 // Affects nanopore only
    fp_dedup_reads = false

    // Bakta Parmeters
    ba_min_contig_length = 200


    // Quast parameters
    qt_min_contig_length = 1000

    // Mash
    mh_min_kmer = 10

    // ECTyper Parameters
    ec_opid = 90 // Minimum percent identity to determine O antigens prescence
    ec_opcov = 90 // Minimum percent coverage of O antigen
    ec_hpid = 95 // Miniumum percent identity to determine H antigens prescence
    ec_hpcov =  50 // Minimum percent coverage of H antigen
    ec_enable_verification = true // Enable species verification in ECTyper

    // SISTR parameters
    sr_full_cgmlst = true // Use full set of cgMLST alleles which can include highly similar alleles

    // Allele Scheme Options
    override_allele_scheme = "" // Supply an allele scheme that will be used for allele calling of all samples
    lx_allele_database = null // A path to the locidex manifest.json file needed for allele calling.

    // Locidex Options
    lx_min_evalue = 0.0001
    lx_min_dna_len = 1
    lx_min_aa_len = 1
    lx_max_dna_len = 10000000
    lx_max_aa_len = 10000000
    lx_min_dna_ident = 80.0
    lx_min_aa_ident = 80.0
    lx_min_dna_match_cov = 80.0
    lx_min_aa_match_cov = 80.0
    lx_max_target_seqs = 10
    lx_extraction_mode = "raw"
    lx_report_mode = "normal"
    lx_report_prop = "locus_name"
    lx_report_max_ambig = 0
    lx_report_max_stop = 0


    // Boilerplate options
    outdir                     = null
    tracedir                   = "${params.outdir}/pipeline_info"
    publish_dir_mode           = 'copy'
    email                      = null
    email_on_fail              = null
    plaintext_email            = false
    monochrome_logs            = false
    hook_url                   = null
    help                       = false
    version                    = false


    // If a param in camel case is present nextflow automatically creates a kebab case parameter as well

    stage_in_mode = 'symlink'


    // Config options
    custom_config_version      = 'master'
    custom_config_base         = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
    config_profile_description = null
    config_profile_contact     = null
    config_profile_url         = null
    config_profile_name        = null

    // use slurm executor
    // TODO add in max runtime for jobs, as to not run up cloud, local or cluster resources
    slurm_p = false
    slurm_profile = ""

    // Max resource options
    // Defaults only, expecting to be overwritten
    max_memory                 = '2000.GB'
    max_cpus                   = 16
    max_time                   = '240.h'

    abricate {
        singularity = "https://depot.galaxyproject.org/singularity/abricate%3A1.0.1--ha8f3691_1"
        docker = "biocontainers/abricate:1.0.1--ha8f3691_1"
        args = { "" }
        report_tag = "Abricate"
        header_p = true
    }


    raw_reads {
        high_precision = false // Makes things really slow
        report_tag = "RawReadSummary"
    }

    // coreutils e.g. cat
    coreutils {
        singularity = "https://depot.galaxyproject.org/singularity/coreutils%3A8.31--h14c3975_0"
        docker = 'biocontainers/coreutils:8.31--h14c3975_0'
    }

    // Python container, May switch for pypy3
    python3 {
        singularity = "https://depot.galaxyproject.org/singularity/python:3.12"
        docker = "biocontainers/python:3.12"
    }

    rasusa {
        docker = "biocontainers/rasusa:2.1.0--h715e4b3_0"
        singularity = "https://depot.galaxyproject.org/singularity/rasusa:2.1.0--h715e4b3_0"
        reads_ext = ".sampled.fastq.gz"
        seed = 42
    }

    seqtk {
        singularity = 'https://depot.galaxyproject.org/singularity/seqtk%3A1.4--he4a0461_1'
        docker = 'biocontainers/seqtk:1.4--he4a0461_1'
        seed = 42
        reads_ext = ".sampled.fastq.gz"
        assembly_fastq = ".fastq.gz"
        report_tag = "Seqtk"
    }

    seqtk_size {
        singularity = 'https://depot.galaxyproject.org/singularity/seqtk%3A1.4--he4a0461_1'
        docker = 'biocontainers/seqtk:1.4--he4a0461_1'
        report_tag = "SeqtkBaseCount"
    }

    locidex {
        // awaiting singluarity image build
        singularity = "docker.io/mwells14/locidex:0.2.3"
        docker = "docker.io/mwells14/locidex:0.2.3"
        private_repository = 'mwells14/locidex:0.2.3'
        min_evalue = params.lx_min_evalue
        min_dna_len = params.lx_min_dna_len
        min_aa_len = params.lx_min_aa_len
        max_dna_len = params.lx_max_dna_len
        max_aa_len = params.lx_max_aa_len
        min_dna_ident = params.lx_min_dna_ident
        min_aa_ident = params.lx_min_aa_ident
        min_dna_match_cov = params.lx_min_dna_match_cov
        min_aa_match_cov = params.lx_min_aa_match_cov
        max_target_seqs = params.lx_max_target_seqs
        extraction_mode = params.lx_extraction_mode
        report_mode = params.lx_report_mode
        report_prop = params.lx_report_prop
        report_max_ambig = params.lx_report_max_ambig
        report_max_stop = params.lx_report_max_stop
        allele_database = params.lx_allele_database
        date_format_string = "yyyy-MM-dd"
        manifest_db_path = "path"
        manifest_config_key = "config"
        manifest_config_name = "db_name"
        manifest_config_version = "db_version"
        manifest_name = "manifest.json"
        config_data_file = "config.json"
        database_config_value_date = "db_date"
        extracted_seqs_suffix = ".extracted.seqs.fasta.gz"
        seq_store_suffix = ".seq_store.json.gz"
        gbk_suffix = ".gbk.gz"
        extraction_dir = "extracted"
        report_suffix = ".profile.mlst.json.gz"
        db_config_output_name = "SelectedLocidexConfig.json"
        report_tag = "LocidexDatabaseInformation"
    }

    locidex_summary {
        report_tag = "LocidexSummary"
        data_key = "data"
        data_profile_key = "profile"
        data_sample_key = "sample_name"
        missing_allele_value = '-'
        reportable_alleles = []
        report_exclude_fields = ["MissingAlleles"]
    }

    allele_scheme_selected {
        report_tag = "AlleleSchemeUsed"
    }

    // FASTP options
    fastp {
        fastq_ext = ".trimmed.fastq.gz"
        singularity = 'https://depot.galaxyproject.org/singularity/fastp%3A0.23.2--hb7a2d85_2'
        docker = 'biocontainers/fastp:0.23.2--hb7a2d85_2'
        html_ext = ".html"
        json_ext = ".json"
        report_tag = "FastP"
        average_quality_e = params.fp_average_quality // The -e option in fastp for if the average quality is less than specified then read/read-pair is discarded
        cut_tail_mean_quality = params.fp_cut_tail_mean_quality
        cut_tail_window_size = params.fp_cut_tail_window_size // default is 4
        complexity_threshold = params.fp_complexity_threshold // FastP default is 30 not 20
        qualified_quality_phred = params.fp_qualified_phred // min quality for a read to contain
        unqualified_percent_limit = params.fp_unqualified_percent_limit // if the minimum quality of a read is below 10 that read is discarded
        polyg_min_len = params.fp_polyg_min_len
        polyx_min_len = params.fp_polyx_min_len
        illumina_length_min = params.fp_illumina_length_min
        illumina_length_max = params.fp_illumina_length_max
        single_end_length_min = params.fp_single_end_length_min
        dedup_reads = params.fp_dedup_reads
        // -c is used to apply overlap analysis and cut out out adapters
        args {
            illumina = "--overrepresentation_analysis --trim_poly_g --poly_g_min_len ${params.fastp.polyg_min_len} --trim_poly_x --poly_x_min_len ${params.fastp.polyx_min_len} --cut_tail --cut_tail_window_size ${params.fastp.cut_tail_window_size} --cut_tail_mean_quality ${params.fastp.cut_tail_mean_quality} --low_complexity_filter --complexity_threshold ${params.fastp.complexity_threshold} --average_qual ${params.fastp.average_quality_e} --qualified_quality_phred ${params.fastp.qualified_quality_phred} --unqualified_percent_limit ${params.fastp.unqualified_percent_limit} --length_limit ${params.fastp.illumina_length_max} --length_required ${params.fastp.illumina_length_min} --detect_adapter_for_pe"
            single_end = "--overrepresentation_analysis -Q --length_required ${params.fastp.single_end_length_min}"
        }
        report_exclude_fields = ["content_curves", "quality_curves", "mean", "kmer_count", "histogram", "overrepresented_sequences"]
    }

    // Nanofilter options
    chopper {
        singularity = 'https://depot.galaxyproject.org/singularity/chopper%3A0.5.0--hd03093a_0'
        quality = 0
        minlength = 100
        fastq_ext = ".fastq.gz"
    }

    // Flye options
    // TODO figure out if --keep-haplotypes should be enabled
    flye {
        // TODO add in arg for data type in flye
        nanopore {
            raw = "--nano-raw"
            corr = "--nano-corr"
            hq = "--nano-hq"
        }
        pacbio {
            raw = "--pacbio-raw"
            corr = "--pacbio-corr"
            hq = "--pacbio-hifi" // TODO all reads are marked as hifi
        }
        singularity = 'https://depot.galaxyproject.org/singularity/flye:2.9--py39h6935b12_1'
        docker = 'biocontainers/flye:2.9.2--py39h6935b12_0'
        fasta_ext = ".fasta.gz"
        gfa_ext = ".gfa.gz"
        gv_ext = ".gv.gz"
        txt_ext = ".txt"
        log_ext = ".log"
        json_ext = ".json"
        polishing_iterations = 1
        args = { "--iterations ${params.flye.polishing_iterations}" } // this is normlly configured through task.ext.args... but idk how I feel about that yet
    }

    // SPADES options
    spades {
        singularity = 'https://depot.galaxyproject.org/singularity/spades:3.15.5--h95f258a_1'
        docker = 'biocontainers/spades:3.15.5--h95f258a_1'
        outdir = "assembly"
        scaffolds_ext = ".scaffolds.fasta.gz"
        contigs_ext = ".contigs.fasta.gz"
        transcripts_ext = ".transcripts.fasta.gz"
        gene_clusters_ext = ".gene_clusters.fasta.gz"
        assembly_graphs_ext =  ".assembly.gfa.gz"
        log_ext = ".log"
    }

    // Fastqc options
    fastqc {
        html_ext = ".html"
        zip_ext = ".zip"
    }

    // TODO add to docs
    seqkit {
        singularity = 'https://depot.galaxyproject.org/singularity/seqkit:2.2.0--h9ee0642_0'
        docker = 'biocontainers/seqkit:2.2.0--h9ee0642_0'
        report_ext = ".tsv"
        fasta_ext = ".filtered.fasta.gz"
        filter_field = "max_len"
        report_tag = "Seqkit_stats"
        header_p = true
    }


    // QUAST args
    quast{
        singularity = 'https://depot.galaxyproject.org/singularity/quast%3A5.2.0--py39pl5321h4e691d4_3' // not all Quast containers work, might get file system error with BWA, if issue persists reads can be not passed to the module instead
        docker = 'biocontainers/quast:5.2.0--py39pl5321h4e691d4_3'
        suffix = "quast"
        report_base = "report"
        report_prefix = "transposed_"
        report_ext = ".tsv"
        report_tag = "QUAST"
        min_contig_length = params.qt_min_contig_length
        contigs_field = "# contigs"
        // need to add a --min-contig args to set to 0 as at 500 currently add --report-all-metrics
        // also need to see if a sam can be published from quast
        args = { "--min-contig ${params.quast.min_contig_length} --report-all-metrics" }
        header_p = true
    }


    checkm {
        // TODO add to trouble shooting if checkm fails and provides EOF errors, to try changing the container
        singularity = 'https://depot.galaxyproject.org/singularity/checkm-genome%3A1.2.2--pyhdfd78af_1'
        docker = 'biocontainers/checkm-genome:1.2.2--pyhdfd78af_1'
        alignment_ext = ".genes.aln"
        results_ext = ".results.txt"
        tsv_ext = ".tsv"
        folder_name = "checkm"
        gzip_ext = ".gz"
        lineage_ms = "lineage.ms"
        report_tag = "CheckM"
        header_p = true
    }

    // Kraken args
    kraken {
        singularity = 'https://depot.galaxyproject.org/singularity/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0'
        docker = 'biocontainers/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:941789bd7fe00db16531c26de8bf3c5c985242a5-0'
        db = params.kraken2_db
        classified_suffix = "classified"
        unclassified_suffix = "unclassified"
        report_suffix = "report"
        output_suffix = "output"
        tophit_level = "S" // level to select the samples top-hit based on kraken2
        save_output_fastqs = false
        save_reads_assignments = true
        run_kraken_quick = false
        report_tag = "KrakenReport"
        header_p = false
        headers = ["PercentID", "FragmentsRecovered", "FragmentsAssignmentTaxon", "RankCode", "NCBITaxonID", "ScientificName"]
    }

    mlst {
        singularity = "https://depot.galaxyproject.org/singularity/mlst%3A2.23.0--hdfd78af_1"
        docker = "quay.io/biocontainers/mlst:2.23.0--hdfd78af_1"
        args = ""
        tsv_ext = ".tsv"
        json_ext = ".json"
        report_tag = "SevenGeneMLSTReport"
    }

    mash {
        singularity = "https://depot.galaxyproject.org/singularity/mash:2.3--he348c14_1"
        docker = 'biocontainers/mash:2.3--he348c14_1'
        // going forward labeled with _ext include '.'
        mash_ext = ".screen" //TODO refactor out of utility workflow
        output_reads_ext = ".reads.screen"
        output_taxa_ext = ".taxa.screen"
        output_dir = "contamination"
        mash_sketch = params.mash_sketch
        sketch_ext = ".msh"
        json_ext = ".json"
        sketch_kmer_size = 21 // defualt param in mash
        min_kmer = params.mh_min_kmer
        final_sketch_name = "GTDB_sketch"
        report_tag = "Mash"
        header_p = false
        headers = ["identity", "Shared Hashes", "Median Multiplicity", "P-Value", "Query ID", "Query Note"]
    }

    mash_meta {
        report_tag = "MashMeta"
    }

    top_hit_species {
        report_tag = "SpeciesTopHit"
    }

    top_hit_method {
        report_tag = "IdentificationMethod"
    }

    r_contaminants {
        // container contains minimap2 and samtools
        singularity = "https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0"
        docker = "biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0"
        phix_fa = ""
        homo_sapiens_fa = ""
        pacbio_mg = ""
        output_ext = ".cleaned.fastq.gz"
        mega_mm2_idx = params.dehosting_idx
        mm2_illumina = "-x sr" // ax outputs sam
        mm2_pac = "-x map-pb"
        mm2_ont = "-x map-ont"
        mm2_output_ext = ".sam"
        samtools_output_ext = ".fastq"
        samtools_singletons_ext = ".singleton.fq"
        samtools_output_suffix = "deconned"
        output_ext = "${samtools_output_ext}.gz"
        output_dir = "${params.mash.output_dir}/deconned_reads"
    }

    minimap2 {
        // for creation of indices from assemblies
        singularity = params.r_contaminants.singularity
        docker = params.r_contaminants.docker
        index_outdir = "indices"
        index_ext = ".idx"
        mapped_paf_ext = ".paf"
        mapped_sam_ext = ".sam"
        mapped_outdir = "mapped"
    }

    samtools {
        singularity = params.r_contaminants.singularity
        docker = params.r_contaminants.docker
        bam_ext = ".bam"
        bai_ext = ".bai"
    }

    racon {
        singularity = 'https://depot.galaxyproject.org/singularity/racon:1.4.20--h9a82719_1'
        docker = 'biocontainers/racon:1.4.20--h9a82719_1'
        consensus_suffix = "_assembly_consensus.fasta"
        consensus_ext = ".fasta.gz"
        outdir = "polished"
    }

    pilon {
        singularity = 'https://depot.galaxyproject.org/singularity/pilon%3A1.24--hdfd78af_0'
        docker = 'biocontainers/pilon:1.24--hdfd78af_0'
        outdir = "pilon"
        fasta_ext = ".fasta.gz"
        fasta_outdir = "fasta"
        vcf_ext = ".vcf"
        vcf_outdir = "vcf"
        changes_ext = ".changes"
        changes_outdir = "changes"
        max_memory_multiplier = 3
    }

    pilon_iterative {
        singularity = 'docker.io/mwells14/pilonpolisher:0.0.1' // Docker containers will be converted to singularity
        docker = 'docker.io/mwells14/pilonpolisher:0.0.1'
        private_repository = 'mwells14/pilonpolisher:0.0.1'
        outdir = "pilon"
        fasta_ext = ".fasta.gz"
        fasta_outdir = "fasta"
        vcf_ext = ".vcf"
        vcf_outdir = "vcf"
        bam_ext = ".bam"
        bai_ext = ".bai"
        changes_ext = ".changes"
        changes_outdir = "changes"
        max_memory_multiplier = 3 // how much memory is increased after each run failure
        max_polishing_illumina = 3
        max_polishing_pacbio = 4
        max_polishing_nanopore = 10
    }

    medaka {
        singularity = 'https://depot.galaxyproject.org/singularity/medaka%3A1.8.0--py38hdaa7744_0'
        docker = 'biocontainers/medaka:1.8.0--py38hdaa7744_0'
        model = params.nanopore_chemistry
        fasta_ext = ".fa.gz"
        outdir = "medaka"
        batch_size = 5
    }

    unicycler {
        singularity = 'https://depot.galaxyproject.org/singularity/unicycler%3A0.5.0--py38h3b68952_2'
        docker = 'biocontainers/unicycler:0.5.0--py38h3b68952_2'
        scaffolds_ext = ".scaffolds.fa.gz"
        assembly_ext = ".assembly.gfa.gz"
        log_ext = ".unicycler.log"
        outdir = "unicycler"
        mem_modifier = 1000 // overly large number to provide more memory for unicycler
        threads_increase_factor = 1
    }

    mobsuite_recon {
        singularity = 'https://depot.galaxyproject.org/singularity/mob_suite%3A3.0.3--pyhdfd78af_0'
        docker = 'biocontainers/mob_suite:3.0.3--pyhdfd78af_0'
        args = { "" }
        fasta_ext = ".fasta"
        results_ext = ".txt"
        mob_results_file = "mobtyper_results.txt"
        contig_report = "contig_report.txt"
        report_tag = "MobRecon"
        header_p = true
    }

    staramr {
        singularity = "biocontainers/staramr:0.10.0--pyhdfd78af_0"
        docker = "biocontainers/staramr:0.10.0--pyhdfd78af_0"
        point_finder_db_default = null
        db = null
        tsv_ext = ".tsv"
        txt_ext = ".txt"
        xlsx_ext = ".xlsx"
        args = { "" }
        // Approved point finder databases
        point_finder_dbs = ['salmonella', 'campylobacter', 'enterococcus_faecalis',
                            'enterococcus_faecium', 'escherichia_coli', 'helicobacter_pylori']
        report_tag = "StarAMR"
        header_p = true
    }

    pointfinder_db_tag {
        report_tag = "PointfinderDB"
    }

    bakta {
        // TODO verify bakta version in scripts so that greater than 1.8 exists for force options
        singularity = 'https://depot.galaxyproject.org/singularity/bakta%3A1.8.1--pyhdfd78af_0'
        docker = 'biocontainers/bakta:1.8.1--pyhdfd78af_0'
        db = params.bakta_db
        output_dir = "bakta"
        embl_ext = ".embl"
        faa_ext = ".faa"
        ffn_ext = ".ffn"
        fna_ext = ".fna"
        gbff_ext = ".gbff"
        gff_ext = ".gff3"
        threads = 12 // default number of threads, more is not always better
        hypotheticals_tsv_ext = ".hypotheticals.tsv"
        hypotheticals_faa_ext = ".hypotheticals.faa"
        tsv_ext = ".tsv"
        txt_ext = ".txt"
        min_contig_length = params.ba_min_contig_length
        args = { "" }
    }

    bandage {
        singularity = 'https://depot.galaxyproject.org/singularity/bandage:0.8.1--hc9558a2_2'
        docker = 'biocontainers/bandage:0.8.1--hc9558a2_2'
        svg_ext = ".svg"
        outdir = "bandage"

    }

    subtyping_report {
        report_tag = "Subtyping"
    }

    ectyper {
        singularity = 'https://depot.galaxyproject.org/singularity/ectyper:1.0.0--pyhdfd78af_1'
        docker = 'biocontainers/ectyper:1.0.0--pyhdfd78af_1'
        log_ext = ".log"
        tsv_ext = ".tsv"
        txt_ext = ".txt"
        opid = params.ec_opid
        opcov = params.ec_opcov
        hpid = params.ec_hpid
        hpcov = params.ec_hpcov
        verify = params.ec_enable_verification ? "--verify" : ""
        args =  { "${params.ectyper.verify} -opid ${params.ectyper.opid} -opcov ${params.ectyper.opcov} -hpid ${params.ectyper.hpcov} -hpcov ${params.ectyper.hpcov}" }
        report_tag = "ECTyper${params.subtyping_report.report_tag}"
        header_p = true
    }

    kleborate {
        singularity = 'https://depot.galaxyproject.org/singularity/kleborate:2.1.0--pyhdfd78af_1'
        docker = 'biocontainers/kleborate:2.1.0--pyhdfd78af_1'
        txt_ext = ".results.txt"
        report_tag = "Kleborate${params.subtyping_report.report_tag}"
        header_p = true
    }

    spatyper {
        // TODO figure out what repeats input is
        singularity = 'https://depot.galaxyproject.org/singularity/spatyper:0.3.3--pyhdfd78af_3'
        docker = 'biocontainers/spatyper:0.3.3--pyhdfd78af_3'
        tsv_ext = ".tsv"
        report_tag = "SpaTyper${params.subtyping_report.report_tag}"
        header_p = true
        repeats = null
        repeat_order = null
    }

    sistr {
        singularity = "https://depot.galaxyproject.org/singularity/sistr_cmd:1.1.1--pyh864c0ab_2"
        docker = 'biocontainers/sistr_cmd:1.1.1--pyh864c0ab_2'
        tsv_ext = ".tab"
        allele_fasta_ext = ".allele.fasta"
        allele_json_ext = ".allele.json"
        cgmlst_ext = ".cgmlst.csv"
        full_cgmlst = params.sr_full_cgmlst
        report_tag = "SISTR${params.subtyping_report.report_tag}"
        header_p = true
    }

    lissero {
        singularity = 'https://depot.galaxyproject.org/singularity/lissero:0.4.9--py_0'
        docker = 'biocontainers/lissero:0.4.9--py_0'
        tsv_ext = ".tsv"
        report_tag = "LISSERO${params.subtyping_report.report_tag}"
        header_p = true
    }

    shigeifinder {
        container_version = '1.3.2' // update version info with containers as it is not included in shigeifinder
        singularity = 'https://depot.galaxyproject.org/singularity/shigeifinder:1.3.2--pyhdfd78af_0'
        docker = 'biocontainers/shigeifinder:1.3.2--pyhdfd78af_0'
        tsv_ext = ".tsv"
        report_tag = "Shigeifinder${params.subtyping_report.report_tag}"
        header_p = true
    }

    //shigatyper {
    //    singularity = 'https://depot.galaxyproject.org/singularity/shigatyper%3A2.0.1--pyhdfd78af_0'
    //    docker = 'biocontainers/shigatyper:2.0.1--pyhdfd78af_0'
    //    tsv_ext = ".tsv"
    //    report_tag = "ShigaTyper${params.subtyping_report.report_tag}"
    //    //report_tag = params.subtyping_report.report_tag
    //    header_p = true
    //}

    coverage_calc_fields {
        //bp_field = params.seqtk_size.report_tag // Trimmed base count size
        bp_field = [params.raw_reads.report_tag, "combined", "total_bp"]
        fixed_cov = "FixedGenomeSizeDepth"
        auto_cov = "DetectedGenomeSizeDepth"
    }

    assembly_status {
        report_tag = "AssemblyCompleted"
    }

    contigs_too_short {
        report_tag = "MaxContigToShort"
    }

    filtered_reads {
        threshold = params.min_reads
        report_tag = "MeetsReadThreshold"
    }

    kraken_bin {
        // Python only
        taxonomic_level = "G"
        fasta_ext = ".fasta.gz"
    }

    report_aggregate {
        sample_flat_suffix = ".flat_sample.json"
    }


    /*
    Base fields for an organism for quality control checking
    search: The phrase to search for to determine it is a specific organism
    average_quality: average quality of bases must be greater than or equal too
    min_n50: n50 value to be greater than or equal too
    max_n50: n50 value must be less than or equal too
    min_nr_contigs: lower bound minimum of conitgs
    max_nr_contigs: upper bound for number of contigs
    fixed_genome_size = A set genome size to use for naively calculating genome size
    min_length: minimum acceptable length
    max_length: maximum acceptable genome length
    search =
    ! Need to provide filtering based on original quality score on raw data
    ! TODO need to determine if this should happen pre-decon
    quality_field = FastP.summary.before_filtering.
    raw_average_quality =
    min_n50 =
    max_n50 =
    min_nr_contigs =
    max_nr_contigs =
    fixed_genome_size =
    min_length =
    max_length =
    max_checkm_contamination = 1.0
    average_coverage =
    TODO add in tag referencing relevant subtyping tool for including results in final summary csv
    */


    QCReport {
        escherichia {
            search = "Escherichia coli"
            raw_average_quality = 30
            min_n50 = 50000
            max_n50 = 6000000
            min_nr_contigs = 1
            max_nr_contigs = 500
            fixed_genome_size = 5000000
            min_length = 4500000
            max_length = 6000000
            max_checkm_contamination = 3.0
            min_average_coverage = 40
        }
        salmonella {
            search = "Salmonella"
            raw_average_quality = 30
            min_n50 = 90000
            max_n50 = 6000000
            min_nr_contigs = 1
            max_nr_contigs = 200
            fixed_genome_size = 5000000
            min_length = 4400000
            max_length = 6000000
            max_checkm_contamination = 3.0
            min_average_coverage = 40
        }
        shigella {
            search = "Shigella"
            raw_average_quality = 30
            min_n50 = 18000
            max_n50 =  5000000
            min_nr_contigs = 1
            max_nr_contigs = 500
            fixed_genome_size = 5000000
            min_length = 4300000
            max_length = 5000000
            max_checkm_contamination = 3.0
            min_average_coverage = 40
        }
        listeria {
            search = "Listeria"
            raw_average_quality = 30
            min_n50 = 50000
            max_n50 = 3200000
            min_nr_contigs = 1
            max_nr_contigs = 200
            fixed_genome_size = 3000000
            min_length = 2700000
            max_length = 3200000
            max_checkm_contamination = 3.0
            min_average_coverage = 30
        }
        campylobacter_jejuni {
            search = "Campylobacter jejuni"
            raw_average_quality = 30
            min_n50 = 100000
            max_n50 = 2000000
            min_nr_contigs = 1
            max_nr_contigs = 150
            fixed_genome_size = 1800000
            min_length = 1400000
            max_length = 2000000
            max_checkm_contamination = 3.0
            min_average_coverage = 30
        }
        campylobacter_coli {
            search = "Campylobacter coli"
            raw_average_quality = 30
            min_n50 = 100000
            max_n50 = 2000000
            min_nr_contigs = 1
            max_nr_contigs = 150
            fixed_genome_size = 1800000
            min_length = 1400000
            max_length = 2000000
            max_checkm_contamination = 3.0
            min_average_coverage = 30
        }
        vibrio_cholerae {
            search = "Vibrio cholerae"
            raw_average_quality = 30
            min_n50 = 100000
            max_n50 = 4300000
            min_nr_contigs = 1
            max_nr_contigs = 150
            fixed_genome_size = 4000000
            min_length = 3800000
            max_length = 4300000
            max_checkm_contamination = 3.0
            min_average_coverage = 40
        }
        // Some of these defaults are made up
        klebsiella {
            search = "Klebsiella"
            raw_average_quality = 30
            min_n50 = 100000
            max_n50 = 6500000
            min_nr_contigs = 1
            max_nr_contigs = 500
            min_length = 4500000
            max_length = 6500000
            max_checkm_contamination = 3.0
            min_average_coverage = 50
        }
        staphylococcus {
            search = "Staphylococcus"
            raw_average_quality = 30
            min_n50 = 30000
            max_n50 = 3500000
            min_nr_contigs = 1
            max_nr_contigs = 250
            min_length = 2500000
            max_length = 3500000
            max_checkm_contamination = 3.0
            min_average_coverage = 50
        }
        enterobacter {
            search = "Enterobacter"
            raw_average_quality = 30
            min_n50 = 100000
            max_n50 = 6500000
            min_nr_contigs = 1
            max_nr_contigs = 500
            min_length = 4500000
            max_length = 6500000
            max_checkm_contamination = 3.0
            min_average_coverage = 50
        }
        citrobacter {
            search = "Citrobacter"
            raw_average_quality = 30
            min_n50 = 100000
            max_n50 = 6500000
            min_nr_contigs = 1
            max_nr_contigs = 500
            min_length = 4500000
            max_length = 6500000
            max_checkm_contamination = 3.0
            min_average_coverage = 50
        }
        acinetobacter {
            search = "Acinetobacter"
            raw_average_quality = 30
            min_n50 = 100000
            max_n50 = 6500000
            min_nr_contigs = 1
            max_nr_contigs = 500
            min_length = 4500000
            max_length = 6500000
            max_checkm_contamination = 3.0
            min_average_coverage = 50
        }
        pseudomonas {
            search = "Pseudomonas"
            raw_average_quality = 30
            min_n50 = 100000
            max_n50 = 7000000
            min_nr_contigs = 1
            max_nr_contigs = 500
            min_length = 5000000
            max_length = 7000000
            max_checkm_contamination = 3.0
            min_average_coverage = 50
        }
        enterococcus {
            search = "Enterococcus"
            raw_average_quality = 30
            min_n50 = 15000
            max_n50 = 3500000
            min_nr_contigs = 1
            max_nr_contigs = 550
            min_length = 2500000
            max_length = 3500000
            max_checkm_contamination = 3.0
            min_average_coverage = 50
        }
            clostridioides {
            search = "Clostridioides"
            raw_average_quality = 30
            min_n50 = 20000
            max_n50 = 4500000
            min_nr_contigs = 1
            max_nr_contigs = 600
            min_length = 4000000
            max_length = 4500000
            max_checkm_contamination = 3.0
            min_average_coverage = 50
        }
        fallthrough {
            search = "No organism specific QC data available."
            raw_average_quality = 30
            min_n50 = null
            max_n50 = null
            min_nr_contigs = null
            max_nr_contigs = null
            fixed_genome_size = null
            min_length = null
            max_length = null
            max_checkm_contamination = 3.0
            min_average_coverage = 30
        }
    }

    QCReportFields {
        // Configure paths if final report where relevant data is required
        raw_average_quality {
            path = [params.raw_reads.report_tag, "combined", "qual_mean"]
            coerce_type = 'Float'
            compare_fields = ['raw_average_quality']
            comp_type = "ge"
            on = true
            low_msg = "Base quality is poor, resequencing is recommended."
        }
        average_coverage {
            /*
                * Example path below can be used for qaust coverage
                path = [params.quast.report_tag, "0", "Avg. coverage depth"]
                * Use the path below to use quasts base_pairs/genome length for coverage
                path = [params.coverage_calc_fields.auto_cov]

            */

            path = [params.coverage_calc_fields.fixed_cov]
            coerce_type = 'Float'
            compare_fields = ['min_average_coverage']
            comp_type = 'ge'
            on = true
            low_msg = "Depth of coverage from assembly is lower than than expected. A top-up run is likely needed."
        }
        metagenomic {
            path = [params.mash_meta.report_tag]
            coerce_type = 'Bool'
            compare_fields = []
            comp_type = "bool"
            on = false
        }
        n50_value {
            path = [params.quast.report_tag, "0", "N50"]
            coerce_type = 'Integer'
            compare_fields = ['min_n50', 'max_n50']
            comp_type = "range"
            on = true
            low_msg = "N50 value is low, this could be due to many reasons involving contamination, poor template quality or insufficient template quantity. Reisolation and reseqeuncing may be needed."
            high_msg = "N50 value is high, this is likely a good thing if you have fewer contigs than expected."
        }
        nr_contigs {
            path = [params.quast.report_tag, "0", params.quast.contigs_field]
            coerce_type = 'Integer'
            compare_fields = ['min_nr_contigs', 'max_nr_contigs']
            comp_type = "range"
            on = true
            low_msg = "Fewer contigs than expected, if your genome length is of an expected size and you have a high N50 you likely just have a high quality assembly."
            high_msg = "More contigs are present than expected, if your N50 is low and your genome length is shorter than expected you likely need to potentially need to reisolate and resequence/top-up your sample."
        }
        length {
            path = [params.quast.report_tag, "0", "Total length"]
            coerce_type = 'Integer'
            compare_fields = ['min_length', 'max_length']
            comp_type = "range"
            on = true
            low_msg = "Genome length lower than expected, you may need to resequence the sample."
            high_msg = "Genome length is higher than expected, contmination is potentially present."
        }
        checkm_contamination {
            path = [params.checkm.report_tag, "0", "Contamination"]
            coerce_type = 'Float'
            compare_fields = ['max_checkm_contamination']
            comp_type = "le"
            on = true
            high_msg = "Potential contamination is present in your sample. You may need to reisolate and resequence your sample."
        }
    }

}

// Load base.config by default for all pipelines
includeConfig 'conf/base.config'

// Load nf-core custom profiles from different Institutions
try {
    includeConfig "${params.custom_config_base}/nfcore_custom.config"
} catch (Exception e) {
    System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config")
}

// Load mk-kondo/mikrokondo custom profiles from different institutions.
// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs!
// try {
//   includeConfig "${params.custom_config_base}/pipeline/mikrokondo.config"
// } catch (Exception e) {
//   System.err.println("WARNING: Could not load nf-core/config/mikrokondo profiles: ${params.custom_config_base}/pipeline/mikrokondo.config")
// }


profiles {
    debug {
        dumpHashes             = true
        process.beforeScript   = 'echo $HOSTNAME'
        cleanup                = false
    }
    docker {
        docker.enabled          = true
        conda.enabled           = false
        singularity.enabled     = false
        podman.enabled          = false
        shifter.enabled         = false
        charliecloud.enabled    = false
        apptainer.enabled       = false
        docker.runOptions       = '-u $(id -u):$(id -g)'
    }
    arm {
        docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64'
    }
    singularity {
        singularity.enabled     = true
        singularity.autoMounts  = true
        conda.enabled           = false
        docker.enabled          = false
        podman.enabled          = false
        shifter.enabled         = false
        charliecloud.enabled    = false
        apptainer.enabled       = false
    }
    podman {
        podman.enabled          = true
        conda.enabled           = false
        docker.enabled          = false
        singularity.enabled     = false
        shifter.enabled         = false
        charliecloud.enabled    = false
        apptainer.enabled       = false
    }
    shifter {
        shifter.enabled         = true
        conda.enabled           = false
        docker.enabled          = false
        singularity.enabled     = false
        podman.enabled          = false
        charliecloud.enabled    = false
        apptainer.enabled       = false
    }
    charliecloud {
        charliecloud.enabled    = true
        conda.enabled           = false
        docker.enabled          = false
        singularity.enabled     = false
        podman.enabled          = false
        shifter.enabled         = false
        apptainer.enabled       = false
    }
    apptainer {
        apptainer.enabled       = true
        apptainer.autoMounts    = true
        conda.enabled           = false
        docker.enabled          = false
        singularity.enabled     = false
        podman.enabled          = false
        shifter.enabled         = false
        charliecloud.enabled    = false
    }
    gitpod {
        executor.name          = 'local'
        executor.cpus          = 16
        executor.memory        = 60.GB
    }
    test_assembly {includeConfig 'conf/test_assembly.config'}
    test_illumina {includeConfig 'conf/test_illumina.config'}
    test_nanopore {includeConfig 'conf/test_nanopore.config'}
    test_pacbio {includeConfig 'conf/test_pacbio.config'}
    test_stub {includeConfig 'conf/test_stub.config'}
    // Remove when merge into main
    test_samplesheet {includeConfig 'conf/test_sample_sheet.config'}

    test { includeConfig 'conf/test.config' }
}

// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled
// Set to your registry if you have a mirror of containers
apptainer.registry   = 'quay.io'
docker.registry      = 'quay.io'
podman.registry      = 'quay.io'
singularity.registry = 'quay.io'

// Override the default Docker registry when required
process.ext.override_configured_container_registry = true

plugins {
    //id 'nf-validation@2.0.0'
    id 'nf-validation@1.1.3'
    id 'nf-prov'
    id 'nf-iridanext@0.2.0'
}

includeConfig 'conf/irida_next.config'

prov {
    enabled = true
    formats {
        legacy {
            file = "${params.outdir}/manifest.json"
            overwrite = true
        }
        bco {
            file = "${params.outdir}/bco.json"
            overwrite = true
        }
    }
}


// Export these variables to prevent local Python/R libraries from conflicting with those in the container
// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.

env {
    PYTHONNOUSERSITE = 1
    R_PROFILE_USER   = "/.Rprofile"
    R_ENVIRON_USER   = "/.Renviron"
    JULIA_DEPOT_PATH = "/usr/local/share/julia"
}

// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']

def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
timeline {
    enabled = true
    file    = "${params.tracedir}/execution_timeline_${trace_timestamp}.html"
}
report {
    enabled = true
    file    = "${params.tracedir}/execution_report_${trace_timestamp}.html"
}
trace {
    enabled = true
    fields = "task_id,hash,process,tag,name,status,script,scratch,env,container,hostname,duration,realtime,%cpu,rss,vmem,rchar,wchar"
    file    = "${params.tracedir}/execution_trace_${trace_timestamp}.txt"
}
dag {
    enabled = true
    file    = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html"
}

manifest {
    name            = 'phac-nml/mikrokondo'
    author          = """Matthew Wells, James Robertson, Aaron Petkau, Christy-Lynn Peterson, Eric Marinier"""
    homePage        = 'https://github.com/phac-nml/mikrokondo'
    description     = """Mikrokondo"""
    mainScript      = 'main.nf'
    nextflowVersion = '!>=23.04.0'
    version         = '0.4.2'
    defaultBranch   = 'main'
    doi             = ''
}

// Load modules.config for DSL2 module specific options
includeConfig 'conf/modules.config'

// Function to ensure that resource requirements don't go beyond
// a maximum limit
def check_max(obj, type) {
    if (type == 'memory') {
        try {
            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
                return params.max_memory as nextflow.util.MemoryUnit
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'time') {
        try {
            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
                return params.max_time as nextflow.util.Duration
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'cpus') {
        try {
            return Math.min( obj, params.max_cpus as int )
        } catch (all) {
            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
            return obj
        }
    }
}