From 7edf2aa35d75c7000984ec02138c6441bc6e71f2 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Thu, 24 Oct 2024 17:01:39 -0500 Subject: [PATCH] identified sticking point for sample names not being passed to the iridanext config --- conf/irida_next.config | 2 +- main.nf | 1 + modules/local/report.nf | 4 ++ nextflow.config | 3 +- tests/main.nf.test | 126 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 134 insertions(+), 2 deletions(-) diff --git a/conf/irida_next.config b/conf/irida_next.config index 24cc2d07..567e570c 100755 --- a/conf/irida_next.config +++ b/conf/irida_next.config @@ -11,7 +11,7 @@ iridanext { overwrite = true validate = false files { - idkey = "sample" + idkey = "irida_id" global = [ "**/FinalReports/Aggregated/Json/final_report.json", "**/FinalReports/Aggregated/Tables/final_report.tsv" diff --git a/main.nf b/main.nf index 71223572..24336b72 100644 --- a/main.nf +++ b/main.nf @@ -112,6 +112,7 @@ workflow MIKROKONDO { ch_versions = ch_versions.mix(REPORT_AGGREGATE.out.versions) + // TODO need to add logic to merge this channel with a previous one to get its INX id updated_samples = REPORT_AGGREGATE.out.flat_samples.flatten().map{ sample -> def name_trim = sample.getName() diff --git a/modules/local/report.nf b/modules/local/report.nf index 0eccfe08..9b9b015a 100644 --- a/modules/local/report.nf +++ b/modules/local/report.nf @@ -47,6 +47,10 @@ process REPORT{ sample_data[meta_data.sample]["meta"] = [:] } + + // TODO add a condition around this to only be appened if iridanext is enabled + update_map_values(sample_data, meta_data, "irida_id") + update_map_values(sample_data, meta_data, "metagenomic") update_map_values(sample_data, meta_data, "assembly") update_map_values(sample_data, meta_data, "hybrid") diff --git a/nextflow.config b/nextflow.config index c90c43e6..256766f3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -7,6 +7,7 @@ */ + // Global default params, used in configs params { // Input options @@ -43,7 +44,7 @@ params { show_hidden_params = false validationS3PathCheck = true validationShowHiddenParams = false - validationSchemaIgnoreParams = 'rasusa,locidex_summary,allele_scheme_selected,top_hit_method,abricate,locidex,assembly_status,bakta,bandage,checkm,chopper,contigs_too_short,coreutils,coverage_calc_fields,ectyper,fastp,fastqc,filtered_reads,flye,kat,kleborate,kraken,kraken_bin,kraken_species,lissero,mash,mash_meta,medaka,minimap2,mlst,mobsuite_recon,opt_platforms,pilon,pilon_iterative,pointfinder_db_tag,python3,QCReport,QCReport-fields,QCReportFields,quast,racon,raw_reads,report_aggregate,r_contaminants,samtools,seqkit,seqtk,seqtk_size,shigeifinder,sistr,spades,spatyper,staramr,subtyping_report,top_hit_species,unicycler' + validationSchemaIgnoreParams = '__in_iridanext,rasusa,locidex_summary,allele_scheme_selected,top_hit_method,abricate,locidex,assembly_status,bakta,bandage,checkm,chopper,contigs_too_short,coreutils,coverage_calc_fields,ectyper,fastp,fastqc,filtered_reads,flye,kat,kleborate,kraken,kraken_bin,kraken_species,lissero,mash,mash_meta,medaka,minimap2,mlst,mobsuite_recon,opt_platforms,pilon,pilon_iterative,pointfinder_db_tag,python3,QCReport,QCReport-fields,QCReportFields,quast,racon,raw_reads,report_aggregate,r_contaminants,samtools,seqkit,seqtk,seqtk_size,shigeifinder,sistr,spades,spatyper,staramr,subtyping_report,top_hit_species,unicycler' validationFailUnrecognisedParams = false // for the qcreport fields // SKIP options diff --git a/tests/main.nf.test b/tests/main.nf.test index 261ff7cb..5d741ee6 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -100,6 +100,132 @@ nextflow_pipeline { } } + test("Should run without failure unzipped IRIDANext id") { + tag "succeed_assembly_inx_id" + + when { + params { + input = "https://raw.githubusercontent.com/phac-nml/mikrokondo/refs/heads/dev/tests/data/samplesheets/samplesheet-small-assembly-inx.csv" + outdir = "results" + platform = "illumina" + + mash_sketch = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy-staph-ecoli.msh" + mh_min_kmer = 1 + + dehosting_idx = "https://github.com/phac-nml/mikrokondo/raw/dev/tests/data/databases/campy.mmi" + kraken2_db = "$baseDir/tests/data/kraken2/test" + + min_reads = 100 + skip_allele_calling = true + + QCReport { + fallthrough { + search = "No organism specific QC data available." + raw_average_quality = 30 + min_n50 = null + max_n50 = null + min_nr_contigs = null + max_nr_contigs = null + fixed_genome_size = 1000 + min_length = null + max_length = null + max_checkm_contamination = 3.0 + min_average_coverage = 30 + } + } + + + skip_bakta = true + skip_staramr = false + skip_mobrecon = false + skip_checkm = false + skip_raw_read_metrics = false + skip_polishing = false + + max_memory = "2.GB" + max_cpus = 1 + } + } + + then { + + assert workflow.success + assert path("$launchDir/results").exists() + + // parse output json file + def json = path("$launchDir/results/FinalReports/Aggregated/Json/final_report.json").json + + assert json.short.short.RawReadSummary.R1."total_bp".equals(118750) + assert json.short.short.RawReadSummary.R1."total_reads".equals(475) + assert json.short.short.RawReadSummary.R1."read_qual_mean".equals(40.0) + assert json.short.short.RawReadSummary.R1."mean_sequence_length".equals(250.0) + + assert json.short.short.FastP.summary.sequencing.equals("paired end (250 cycles + 250 cycles)") + assert json.short.short.FastP.summary.before_filtering.total_reads.equals(950) + assert json.short.short.FastP.filtering_result.passed_filter_reads.equals(950) + assert json.short.short.FastP.filtering_result.low_quality_reads.equals(0) + assert json.short.short.FastP.insert_size.peak.equals(347) + + //assert json.short.meta.metagenomic.equals(false) // Currently, this is "null". + assert json.short.meta.assembly.equals(false) + assert json.short.meta.hybrid.equals(false) + assert json.short.meta.single_end.equals(false) + assert json.short.meta.merge.equals(false) + assert json.short.meta.downsampled.equals(false) + + assert json.short.short.AssemblyCompleted.equals(true) + assert json.short.short.QUAST."0"."Total length (>= 0 bp)".equals("4949") + assert json.short.short.QUAST."0"."Largest contig".equals("4949") + assert json.short.short.QUAST."0"."GC (%)".equals("52.96") + assert json.short.short.QUAST."0"."Avg. coverage depth".equals("47") + + // Below two values should be empty + assert json.short.short.StarAMR."0"."Genotype".equals("None") + assert json.short.short.StarAMR."0"."Predicted Phenotype".equals("Susceptible") + assert json.short.short.StarAMR."0"."Genome Length".equals("4949") + + assert json.short.short.CheckM."0"."# genomes".equals("5656") + assert json.short.short.CheckM."0"."# markers".equals("56") + assert json.short.short.CheckM."0"."# marker sets".equals("24") + assert json.short.short.CheckM."0".Contamination.equals("0.00") + + assert json.short.short.SevenGeneMLSTReport[0].filename.equals("short.filtered.fasta.gz") + + assert json.short.short.Abricate."0".RESISTANCE.equals("NoData") // All Abricate results for this are "NoData". + + def assembly_path = "$launchDir/results/Assembly/FinalAssembly/short/short.final.filtered.assembly.fasta.gz" + assert path(assembly_path).exists() + + // parse assembly file + def assembly_header = path(assembly_path).linesGzip[0] + assert assembly_header.startsWith(">NODE_1_length_4949_cov_23.917254") // _pilon_pilon_pilon gets appended + + // compare IRIDA Next JSON output + def iridanext_json = path("$launchDir/results/iridanext.output.json").json + def iridanext_global = iridanext_json.files.global + def iridanext_samples = iridanext_json.files.samples + def iridanext_metadata = iridanext_json.metadata.samples + + // output files + assert iridanext_global.findAll { it.path == "FinalReports/Aggregated/Json/final_report.json" }.size() == 1 + assert iridanext_global.findAll { it.path == "FinalReports/Aggregated/Tables/final_report.tsv" }.size() == 1 + assert iridanext_samples.INX.findAll { it.path == "Assembly/FinalAssembly/short/short.final.filtered.assembly.fasta.gz" }.size() == 1 + assert iridanext_samples.INX.findAll { it.path == "Assembly/Quality/QUAST/short/short.transposed_short.quast.quality.tsv" }.size() == 1 + assert iridanext_samples.INX.findAll { it.path == "Assembly/Quality/SeqKitStats/short.seqkit.stats.summary.tsv" }.size() == 1 + assert iridanext_samples.INX.findAll { it.path == "Assembly/PostProcessing/Speciation/MashScreen/short.mash.screen.taxa.screen.screen" }.size() == 1 + assert iridanext_samples.INX.findAll { it.path == "Reads/Quality/Trimmed/MashScreen/short.mash.screen.reads.screen.screen" }.size() == 1 + assert iridanext_samples.INX.findAll { it.path == "Reads/Quality/Trimmed/FastP/short.fastp.summary.json" }.size() == 1 + assert iridanext_samples.INX.findAll { it.path == "Reads/Quality/RawReadQuality/short.read.scan.summary.json" }.size() == 1 + assert iridanext_samples.INX.findAll { it.path == "FinalReports/FlattenedReports/short.flat_sample.json.gz" }.size() == 1 + + // output metadata + assert iridanext_metadata.INX."QC Status" == "PASSED" + + } + + } + + test("Should run without failure.") { tag "succeed_assembly"