diff --git a/.github/data/reads_to_simulate.csv b/.github/data/reads_to_simulate.csv deleted file mode 100644 index 7854e7b..0000000 --- a/.github/data/reads_to_simulate.csv +++ /dev/null @@ -1 +0,0 @@ -MK58361X-H3N2,.github/data/assemblies/MK58361X-H3N2.fa diff --git a/.github/environments/art.yml b/.github/environments/art.yml deleted file mode 100644 index bb6458a..0000000 --- a/.github/environments/art.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: art -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - art=2016.06.05 diff --git a/.github/scripts/check_outputs.py b/.github/scripts/check_outputs.py deleted file mode 100755 index c84c323..0000000 --- a/.github/scripts/check_outputs.py +++ /dev/null @@ -1,367 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import csv -import glob -import hashlib -import json -import os -import sys - -from pathlib import Path - - -def check_expected_files_exist(output_dirs, sample_ids, output_file_mapping_by_sample_id): - """ - Check that the expected files exist in the output directory. - - :param output_dirs: Dictionary with keys ['upstream', 'downstream'] and values as the output directories - :type output_dirs: Dict[str, Path] - :param sample_ids: List of sample IDs - :type sample_ids: List[str] - :param output_file_mapping_by_sample_id: Dictionary with keys as sample IDs - and values as dictionaries. - - :return: List of dictionaries with keys ['sample_id', 'file_type', 'upstream_path', 'origin_path', 'upstream_exists', 'origin_exists'] - :rtype: List[Dict[str, str]] - """ - expected_file_checks = [] - for sample_id, output_files in output_file_mapping_by_sample_id.items(): - for file_type, paths_by_pipeline in output_files.items(): - upstream_path = os.path.join(output_dirs['upstream'], paths_by_pipeline['upstream']) - origin_path = os.path.join(output_dirs['origin'], paths_by_pipeline['origin']) - expected_file_check = { - 'sample_id': sample_id, - 'file_type': file_type, - 'upstream_path': upstream_path, - 'origin_path': origin_path, - 'upstream_exists': os.path.exists(upstream_path), - 'origin_exists': os.path.exists(origin_path), - 'both_exist': os.path.exists(upstream_path) and os.path.exists(origin_path) - } - expected_file_checks.append(expected_file_check) - - return expected_file_checks - - -def check_expected_md5sums_match( - output_dirs, - sample_ids, - output_file_mapping_by_sample_id, - files_with_header_added_in_origin -): - """ - Check that the expected md5sums match the actual md5sums in the output directory. - - :param output_dirs: Dictionary with keys ['upstream', 'downstream'] and values as the output directories - :type output_dirs: Dict[str, Path] - :param sample_ids: List of sample IDs - :type sample_ids: List[str] - :param output_file_mapping_by_sample_id: Dictionary with keys as sample IDs - and values as dictionaries. - :type output_file_mapping_by_sample_id: Dict[str, Dict[str, Dict[str, str]]] - :param files_with_header_added_in_origin: List of file types for which the header is added in the origin version of the file - :type files_with_header_added_in_origin: List[str] - :return: List of dictionaries with keys ['sample_id', 'file_type', 'upstream_path', 'origin_path', 'upstream_md5sum', 'origin_md5sum', 'md5sums_match'] - """ - expected_md5sum_checks = [] - for sample_id, output_files in output_file_mapping_by_sample_id.items(): - for file_type, paths_by_pipeline in output_files.items(): - upstream_path = os.path.join(output_dirs['upstream'], paths_by_pipeline['upstream']) - origin_path = os.path.join(output_dirs['origin'], paths_by_pipeline['origin']) - upstream_md5sum = None - origin_md5sum = None - with open(upstream_path, 'r') as f: - upstream_md5sum = hashlib.md5(f.read().encode()).hexdigest() - with open(origin_path, 'r') as f: - # skip header when calculating checksum for - # files where header is added in the origin version - - if file_type in files_with_header_added_in_origin: - f.readline() - - origin_md5sum = hashlib.md5(f.read().encode()).hexdigest() - - - expected_md5sum_check = { - 'sample_id': sample_id, - 'file_type': file_type, - 'upstream_path': upstream_path, - 'origin_path': origin_path, - 'upstream_md5sum': upstream_md5sum, - 'origin_md5sum': origin_md5sum, - 'md5sums_match': upstream_md5sum == origin_md5sum - } - expected_md5sum_checks.append(expected_md5sum_check) - - return expected_md5sum_checks - - - -def main(args): - - os.makedirs(args.outdir, exist_ok=True) - - # TODO: read this from the 'reads_to_simulate.csv' file - sample_ids = [ - 'MK58361X-H3N2' - ] - analysis_stages = [ - '00_normalize_depth', - '01_assemble_contigs', - '02_blast_contigs', - '03_scaffolding', - '04_read_mapping', - '05_variant_calling', - '06_report_variants', - '07_summary_reporting' - ] - output_file_mapping_by_sample_id = {} - for sample_id in sample_ids: - output_file_mapping = { - 'HA_contigs': {"upstream": os.path.join(sample_id, "HA_contigs.fa"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "03_scaffolding", - "HA_contigs.fa")}, - 'HA_contigs_alignment': {"upstream": os.path.join(sample_id, "HA_contigs.afa"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "03_scaffolding", - "HA_contigs.afa")}, - 'NA_contigs': {"upstream": os.path.join(sample_id, "NA_contigs.fa"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "03_scaffolding", - "NA_contigs.fa")}, - 'NA_contigs_alignment': {"upstream": os.path.join(sample_id, "NA_contigs.afa"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "03_scaffolding", - "NA_contigs.afa")}, - 'NP_contigs': {"upstream": os.path.join(sample_id, "NP_contigs.fa"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "03_scaffolding", - "NP_contigs.fa")}, - 'NP_contigs_alignment': {"upstream": os.path.join(sample_id, "NP_contigs.afa"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "03_scaffolding", - "NP_contigs.afa")}, - 'PA_contigs': {"upstream": os.path.join(sample_id, "PA_contigs.fa"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "03_scaffolding", - "PA_contigs.fa")}, - 'PA_contigs_alignment': {"upstream": os.path.join(sample_id, "PA_contigs.afa"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "03_scaffolding", - "PA_contigs.afa")}, - 'PB1_contigs': {"upstream": os.path.join(sample_id, "PB1_contigs.fa"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "03_scaffolding", - "PB1_contigs.fa")}, - 'PB1_contigs_alignment': {"upstream": os.path.join(sample_id, "PB1_contigs.afa"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "03_scaffolding", - "PB1_contigs.afa")}, - 'PB2_contigs': {"upstream": os.path.join(sample_id, "PB2_contigs.fa"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "03_scaffolding", - "PB2_contigs.fa")}, - 'PB2_contigs_alignment': {"upstream": os.path.join(sample_id, "PB2_contigs.afa"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "03_scaffolding", - "PB2_contigs.afa")}, - 'normalized_reads_r1': {"upstream": os.path.join(sample_id, "R1.fq"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "00_normalize_depth", - f"{sample_id}-normalized_R1.fastq.gz")}, - 'normalized_reads_r2': {"upstream": os.path.join(sample_id, "R2.fq"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "00_normalize_depth", - f"{sample_id}-normalized_R2.fastq.gz")}, - 'assembly_contigs': {"upstream": os.path.join(sample_id, "spades_output", "contigs.fasta"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "01_assemble_contigs", - f"{sample_id}_contigs.fasta")}, - 'alignment_sam': {"upstream": os.path.join(sample_id, "alignment.sam"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "04_read_mapping", - f"{sample_id}_alignment.sam")}, - 'ambig_tsv': {"upstream": os.path.join(sample_id, "ambig.tsv"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "05_variant_calling", - f"{sample_id}_ambig.tsv")}, - 'contigs_blast': {"upstream": os.path.join(sample_id, "contigs_blast.tsv"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "02_blast_contigs", - f"{sample_id}_contigs_blast.tsv")}, - 'mapping_refs': {"upstream": os.path.join(sample_id, f"{sample_id}_mapping_refs.fa"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "04_read_mapping", - f"{sample_id}_mapping_refs.fa")}, - 'depth_of_cov_freebayes': {"upstream": os.path.join(sample_id, "depth_of_cov_freebayes.tsv"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "05_variant_calling", - f"{sample_id}_depth_of_cov_freebayes.tsv")}, - 'depth_of_cov_samtools': {"upstream": os.path.join(sample_id, "depth_of_cov_samtools.tsv"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "07_summary_reporting", - f"{sample_id}_depth_of_cov_samtools.tsv")}, - 'low_cov': {"upstream": os.path.join(sample_id, "low_cov.tsv"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "05_variant_calling", - f"{sample_id}_low_cov.tsv")}, - 'masked_bed': {"upstream": os.path.join(sample_id, "masked.bed"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "05_variant_calling", - f"{sample_id}_masked.bed")}, - 'pileup_vcf': {"upstream": os.path.join(sample_id, "pileup.vcf"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "05_variant_calling", - f"{sample_id}_variants_raw.vcf")}, - 'reads_mapped_tsv': {"upstream": os.path.join(sample_id, "reads_mapped.tsv"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "07_summary_reporting", - f"{sample_id}_reads_mapped.tsv")}, - 'scaffolds_fasta': {"upstream": os.path.join(sample_id, "scaffolds.fa"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "03_scaffolding", - f"{sample_id}_scaffolds.fa")}, - 'scaffolds_blast_tsv': {"upstream": os.path.join(sample_id, "scaffolds_blast.tsv"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "03_scaffolding", - f"{sample_id}_scaffolds_blast.tsv")}, - 'variants_tsv': {"upstream": os.path.join(sample_id, "variants.tsv"), - "origin": os.path.join(sample_id, - "analysis_by_stage", - "05_variant_calling", - f"{sample_id}_variants.tsv")}, - - } - output_file_mapping_by_sample_id[sample_id] = output_file_mapping - - pipeline_outdirs = { - "upstream": args.analysis_outdir_upstream, - "origin": args.analysis_outdir_origin - } - - expected_files_exist_checks = check_expected_files_exist( - pipeline_outdirs, - sample_ids, - output_file_mapping_by_sample_id - ) - expected_outputs_exist_output_path = os.path.join(args.outdir, "check_outputs_exist.csv") - with open(expected_outputs_exist_output_path, 'w') as f: - writer = csv.DictWriter(f, fieldnames=expected_files_exist_checks[0].keys(), extrasaction='ignore') - writer.writeheader() - for check in expected_files_exist_checks: - writer.writerow(check) - - all_expected_files_exist = all([check['upstream_exists'] and check['origin_exists'] for check in expected_files_exist_checks]) - - files_whose_md5sums_are_not_expected_to_match = [ - 'normalized_reads_r1', - 'normalized_reads_r2', - 'alignment_sam', - 'pileup_vcf', - ] - files_with_header_added_in_origin = [ - 'contigs_blast', - 'scaffolds_blast_tsv', - - ] - output_file_mapping_by_sample_id_for_md5sum_check = {} - for sample_id, output_files in output_file_mapping_by_sample_id.items(): - for file_type, paths_by_pipeline in output_files.items(): - if file_type in files_whose_md5sums_are_not_expected_to_match: - continue - if sample_id not in output_file_mapping_by_sample_id_for_md5sum_check: - output_file_mapping_by_sample_id_for_md5sum_check[sample_id] = {} - output_file_mapping_by_sample_id_for_md5sum_check[sample_id][file_type] = paths_by_pipeline - - expected_md5sums_match_checks = check_expected_md5sums_match( - pipeline_outdirs, - sample_ids, - output_file_mapping_by_sample_id_for_md5sum_check, - files_with_header_added_in_origin - ) - def get_analysis_stage_number_from_origin_path(origin_path): - return origin_path.split(os.sep)[-2] - expected_md5sums_match_checks_sorted = sorted(expected_md5sums_match_checks, key=lambda x: get_analysis_stage_number_from_origin_path(x['origin_path'])) - - expected_md5sums_match_output_path = os.path.join(args.outdir, "check_md5sums_match.csv") - with open(expected_md5sums_match_output_path, 'w') as f: - writer = csv.DictWriter(f, fieldnames=expected_md5sums_match_checks[0].keys(), extrasaction='ignore') - writer.writeheader() - for check in expected_md5sums_match_checks_sorted: - writer.writerow(check) - all_expected_md5sums_match = all([check['md5sums_match'] for check in expected_md5sums_match_checks]) - - # TODO: Add more tests - tests = [ - { - "test_name": "all_expected_files_exist", - "test_passed": all_expected_files_exist, - }, - { - "test_name": "all_expected_md5sums_match", - "test_passed": all_expected_md5sums_match, - }, - ] - - output_fields = [ - "test_name", - "test_result" - ] - - output_path = os.path.join(args.outdir, "check_outputs_summary.csv") - with open(output_path, 'w') as f: - file_writer = csv.DictWriter(f, fieldnames=output_fields, extrasaction='ignore') - stdout_writer = csv.DictWriter(sys.stdout, fieldnames=output_fields, extrasaction='ignore', delimiter='\t') - stdout_writer.writeheader() - file_writer.writeheader() - for test in tests: - test["test_result"] = "FAIL" - if test["test_passed"]: - test["test_result"] = "PASS" - - stdout_writer.writerow(test) - file_writer.writerow(test) - - for test in tests: - if not test['test_passed']: - print(f"\nTest: {test['test_name']} failed.") - print(f"See {output_path} for more details.") - exit(1) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Check outputs') - parser.add_argument('--analysis-outdir-upstream', type=str, help='Path to the pipeline output directory for the upstream (KevinKuchinski) version of FluViewer') - parser.add_argument('--analysis-outdir-origin', type=str, help='Path to the pipeline output directory for the origin (BCCDC-PHL) version of FluViewer') - parser.add_argument('-o', '--outdir', type=str, help='Path to the directory where the output files will be written') - args = parser.parse_args() - main(args) diff --git a/.github/scripts/check_outputs.sh b/.github/scripts/check_outputs.sh deleted file mode 100755 index 6c7452e..0000000 --- a/.github/scripts/check_outputs.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -.github/scripts/check_outputs.py \ - --analysis-outdir-upstream .github/data/test_output/KevinKuchinski-FluViewer-output \ - --analysis-outdir-origin .github/data/test_output/BCCDC-PHL-FluViewer-output \ - --outdir artifacts diff --git a/.github/scripts/create_art_environment.sh b/.github/scripts/create_art_environment.sh deleted file mode 100755 index d393421..0000000 --- a/.github/scripts/create_art_environment.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -conda env create -f .github/environments/art.yml diff --git a/.github/scripts/download_assemblies.sh b/.github/scripts/download_assemblies.sh deleted file mode 100755 index 83eb9d5..0000000 --- a/.github/scripts/download_assemblies.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -mkdir -p .github/data/assemblies - -curl -o .github/data/assemblies/MK583610.1_segment_1_PB2_H3N2.fa "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=text&id=MK583610.1&db=nucleotide&rettype=fasta" -curl -o .github/data/assemblies/MK583611.1_segment_2_PB1_H3N2.fa "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=text&id=MK583611.1&db=nucleotide&rettype=fasta" -curl -o .github/data/assemblies/MK583612.1_segment_3_PA_H3N2.fa "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=text&id=MK583612.1&db=nucleotide&rettype=fasta" -curl -o .github/data/assemblies/MK583613.1_segment_4_HA_H3N2.fa "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=text&id=MK583613.1&db=nucleotide&rettype=fasta" -curl -o .github/data/assemblies/MK583614.1_segment_5_NP_H3N2.fa "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=text&id=MK583614.1&db=nucleotide&rettype=fasta" -curl -o .github/data/assemblies/MK583615.1_segment_6_NA_H3N2.fa "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=text&id=MK583615.1&db=nucleotide&rettype=fasta" -curl -o .github/data/assemblies/MK583616.1_segment_7_M_H3N2.fa "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=text&id=MK583616.1&db=nucleotide&rettype=fasta" -curl -o .github/data/assemblies/MK583617.1_segment_8_NS_H3N2.fa "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=text&id=MK583617.1&db=nucleotide&rettype=fasta" - -cat .github/data/assemblies/MK58361*.fa > .github/data/assemblies/tmp.fa - -rm .github/data/assemblies/MK58361*.*_segment_*.fa - -mv .github/data/assemblies/tmp.fa .github/data/assemblies/MK58361X-H3N2.fa diff --git a/.github/scripts/download_fluviewer_db.sh b/.github/scripts/download_fluviewer_db.sh deleted file mode 100755 index f0e3898..0000000 --- a/.github/scripts/download_fluviewer_db.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -database_version="v0.1.8" - -if [ -f .github/data/fluviewer_db-${database_version}/FluViewer_db.fa ]; then - rm .github/data/fluviewer_db-${database_version}/FluViewer_db.fa -fi - -mkdir -p .github/data/fluviewer_db-${database_version} - -wget -O .github/data/fluviewer_db-${database_version}/FluViewer_db.fa.gz \ - https://github.com/BCCDC-PHL/FluViewer-db/raw/${database_version}/FluViewer_db.fa.gz - -gunzip .github/data/fluviewer_db-${database_version}/FluViewer_db.fa.gz diff --git a/.github/scripts/install_conda.sh b/.github/scripts/install_conda.sh deleted file mode 100755 index fd82b1a..0000000 --- a/.github/scripts/install_conda.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -set -eo pipefail - -artifacts_dir="artifacts" - -echo "Install Miniconda .." >> ${artifacts_dir}/test.log - -export PATH=/opt/miniconda3/bin:$PATH - -wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh - -/bin/bash ~/miniconda.sh -b -p /opt/miniconda3 - -rm ~/miniconda.sh - -echo ". /opt/minconda3/etc/profile.d/conda.sh" >> ~/.bashrc - -conda update -n base -c defaults conda - -conda install -y -c conda-forge mamba - -conda init bash diff --git a/.github/scripts/install_fluviewer_bccdc-phl.sh b/.github/scripts/install_fluviewer_bccdc-phl.sh deleted file mode 100755 index 89834c7..0000000 --- a/.github/scripts/install_fluviewer_bccdc-phl.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -source ${HOME}/.bashrc - -eval "$(conda shell.bash hook)" - -conda env create -f environment.yaml -n fluviewer-bccdc-phl - -conda activate fluviewer-bccdc-phl - -pip install . diff --git a/.github/scripts/install_fluviewer_kkuchinski.sh b/.github/scripts/install_fluviewer_kkuchinski.sh deleted file mode 100755 index f7b2bfc..0000000 --- a/.github/scripts/install_fluviewer_kkuchinski.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -source ${HOME}/.bashrc - -eval "$(conda shell.bash hook)" - -fluviewer_version="0.1.11" - -conda env create -f environment.yaml -n fluviewer-kkuchinski - -conda activate fluviewer-kkuchinski - -pip install fluviewer==${fluviewer_version} diff --git a/.github/scripts/prepare_artifacts.sh b/.github/scripts/prepare_artifacts.sh deleted file mode 100755 index 2ac7069..0000000 --- a/.github/scripts/prepare_artifacts.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -artifacts_dir="artifacts" - -echo "Prepare artifacts .." >> ${artifacts_dir}/test.log - -mkdir -p ${artifacts_dir}/fastq - -mv .github/data/fastq/*.fastq.gz ${artifacts_dir}/fastq - -mkdir -p ${artifacts_dir}/pipeline_outputs - -mv .github/data/test_output/* ${artifacts_dir}/pipeline_outputs diff --git a/.github/scripts/run_analysis_origin.sh b/.github/scripts/run_analysis_origin.sh deleted file mode 100755 index 6221972..0000000 --- a/.github/scripts/run_analysis_origin.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -source ${HOME}/.bashrc - -eval "$(conda shell.bash hook)" - -conda activate fluviewer-bccdc-phl - -# Check for a sign that we're in the GitHub Actions environment. -# Prevents these settings from being applied in other environments. -if [ -z "${GITHUB_ACTIONS}" ]; then - echo "Not running in GitHub Actions environment." - num_threads=16 -else - echo "Running in GitHub Actions environment." - num_threads=2 -fi -echo "Number of threads used for analysis: ${num_threads}" - -database_version="v0.1.8" - -mkdir -p .github/data/test_output - -while IFS=, read -r sample_id assembly; do - echo "Analyzing sample: ${sample_id}" - fluviewer \ - --threads ${num_threads} \ - --disable-garbage-collection \ - --forward-reads .github/data/fastq/${sample_id}_R1.fastq.gz \ - --reverse-reads .github/data/fastq/${sample_id}_R2.fastq.gz \ - --db .github/data/fluviewer_db-${database_version}/FluViewer_db.fa \ - --outdir .github/data/test_output/BCCDC-PHL-FluViewer-output/${sample_id} \ - --output-name ${sample_id} - echo "Finished analyzing sample: ${sample_id}" - -done < .github/data/reads_to_simulate.csv - diff --git a/.github/scripts/run_analysis_upstream.sh b/.github/scripts/run_analysis_upstream.sh deleted file mode 100755 index 6773f59..0000000 --- a/.github/scripts/run_analysis_upstream.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -source ${HOME}/.bashrc - -eval "$(conda shell.bash hook)" - -conda activate fluviewer-kkuchinski - -# Check for a sign that we're in the GitHub Actions environment. -# Prevents these settings from being applied in other environments. -if [ -z "${GITHUB_ACTIONS}" ]; then - echo "Not running in GitHub Actions environment." - num_threads=16 -else - echo "Running in GitHub Actions environment." - num_threads=2 -fi -echo "Number of threads used for analysis: ${num_threads}" - -database_version="v0.1.8" - -mkdir -p .github/data/test_output/KevinKuchinski-FluViewer-output - -while IFS=, read -r sample_id assembly; do - echo "Analyzing sample ${sample_id}" - rm -rf ./${sample_id} - FluViewer \ - -T ${num_threads} \ - -g \ - -f .github/data/fastq/${sample_id}_R1.fastq.gz \ - -r .github/data/fastq/${sample_id}_R2.fastq.gz \ - -d .github/data/fluviewer_db-${database_version}/FluViewer_db.fa \ - -n ${sample_id} - - mv ${sample_id} .github/data/test_output/KevinKuchinski-FluViewer-output/${sample_id} - echo "Finished analyzing sample ${sample_id}" - -done < .github/data/reads_to_simulate.csv diff --git a/.github/scripts/run_comparison_check_locally.sh b/.github/scripts/run_comparison_check_locally.sh deleted file mode 100755 index dc1265b..0000000 --- a/.github/scripts/run_comparison_check_locally.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -# Run the same steps as the CI pipeline, but with the local environment - -set -e - -rm -rf .github/data/test_output/* - -# Install dependencies -art_env_dir="${HOME}/.conda/envs/art" -if [ ! -d ${art_env_dir} ]; then - .github/scripts/create_art_environment.sh -fi - -.github/scripts/download_assemblies.sh - -.github/scripts/simulate_reads.sh - -.github/scripts/download_fluviewer_db.sh - -fluviewer_kkuchinski_env_dir="${HOME}/.conda/envs/fluviewer-kkuchinski" -if [ ! -d ${fluviewer_kkuchinski_env_dir} ]; then - .github/scripts/install_fluviewer_kkuchinski.sh -fi - -.github/scripts/run_analysis_upstream.sh - -fluviewer_bccdc_phl_env_dir="${HOME}/.conda/envs/fluviewer-bccdc-phl" -if [ ! -d ${fluviewer_bccdc_phl_env_dir} ]; then - .github/scripts/install_fluviewer_bccdc-phl.sh -fi - -.github/scripts/run_analysis_origin.sh - -.github/scripts/check_outputs.sh - - -# Cleanup - -rm -rf .github/data/assemblies/* -rm -rf .github/data/fastq/* -rm -rf .github/data/fluviewer_db-v* diff --git a/.github/scripts/simulate_reads.sh b/.github/scripts/simulate_reads.sh deleted file mode 100755 index 0689e4e..0000000 --- a/.github/scripts/simulate_reads.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - - -source ${HOME}/.bashrc - -eval "$(conda shell.bash hook)" - -conda activate art - -mkdir -p .github/data/fastq - -while IFS=',' read -r sample_id assembly; do - art_illumina \ - --paired \ - --in ${assembly} \ - --fcov 500 \ - --len 150 \ - --mflen 400 \ - --sdev 100 \ - --rndSeed 42 \ - --qShift 0 \ - --qShift2 0 \ - --out .github/data/fastq/${sample_id}_R - - rm -f .github/data/fastq/${sample_id}_R1.aln - rm -f .github/data/fastq/${sample_id}_R2.aln - - mv .github/data/fastq/${sample_id}_R1.fq .github/data/fastq/${sample_id}_R1.fastq - mv .github/data/fastq/${sample_id}_R2.fq .github/data/fastq/${sample_id}_R2.fastq - - gzip -f .github/data/fastq/${sample_id}_R1.fastq - gzip -f .github/data/fastq/${sample_id}_R2.fastq - -done < .github/data/reads_to_simulate.csv - diff --git a/.github/workflows/consistency_check.yml b/.github/workflows/consistency_check.yml deleted file mode 100644 index c45e201..0000000 --- a/.github/workflows/consistency_check.yml +++ /dev/null @@ -1,55 +0,0 @@ -on: - pull_request: - branches: - - main - push: - branches: - - main - workflow_dispatch: -name: Consistency Check -jobs: - test: - name: Check Consistency vs. KevinKuchinski/FluViewer - strategy: - fail-fast: false - matrix: - python_version: - - "3.8" - - "3.9" - - "3.10" - - "3.11" - - "3.12" - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@master - - name: Create Artifacts Directory - run: mkdir artifacts - - name: Install Conda - run: bash .github/scripts/install_conda.sh - - name: Create ART Read-Simulation Environment - run: bash .github/scripts/create_art_environment.sh - - name: Download Assemblies - run: bash .github/scripts/download_assemblies.sh - - name: Simulate Reads - run: bash .github/scripts/simulate_reads.sh - - name: Download FluViewer db - run: bash .github/scripts/download_fluviewer_db.sh - - name: Install KevinKuchinski/FluViewer - run: bash .github/scripts/install_fluviewer_kkuchinski.sh - - name: Run analysis with KevinKuchinski/FluViewer - run: bash .github/scripts/run_analysis_upstream.sh - - name: Install BCCDC-PHL/FluViewer - run: bash .github/scripts/install_fluviewer_bccdc-phl.sh - - name: Run analysis with BCCDC-PHL/FluViewer - run: bash .github/scripts/run_analysis_origin.sh - - name: Check Outputs - run: bash .github/scripts/check_outputs.sh - - name: Prepare Artifacts - if: always() - run: bash .github/scripts/prepare_artifacts.sh - - name: Upload Artifacts - uses: actions/upload-artifact@v4 - if: always() - with: - name: artifacts-BCCDC-PHL-FluViewer-python-v${{ matrix.python_version }}-${{ github.run_id }}.${{ github.run_attempt }} - path: artifacts diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 9949465..0000000 --- a/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*~ -*.pyc -*.egg-info -__pycache__ -dev_scripts