Skip to content

Commit

Permalink
Merge pull request #48 from VukManojlovic/CTX-4505
Browse files Browse the repository at this point in the history
CTX-4505: Added a paired-end demultipleixng and three clustering commands
  • Loading branch information
igorperic17 authored Sep 1, 2023
2 parents 5583e08 + ca25ad8 commit 70c0a0c
Show file tree
Hide file tree
Showing 2 changed files with 287 additions and 24 deletions.
246 changes: 242 additions & 4 deletions coretex/bioinformatics/ctx_qiime2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
from pathlib import Path

from .utils import compressGzip, createSample, getDemuxSamples, getDenoisedSamples, \
getFastqDPSamples, getFastqMPSamples, getMetadataSample, getPhylogeneticTreeSamples, \
getFastqDPSamples, getFastqMPSamples, getImportedSamples, getMetadata, getPhylogeneticTreeSamples, \
isDemultiplexedSample, isDenoisedSample, isFastqDPSample, isFastqMPSample, \
isMetadataSample, isPhylogeneticTreeSample, sampleNumber
isImportedSample, isPhylogeneticTreeSample, sampleNumber, isPairedEnd

from ..utils import command

Expand Down Expand Up @@ -67,6 +67,55 @@ def demuxEmpSingle(
])


def demuxEmpPaired(
sequencesPath: Union[str, Path],
barcodesPath: Union[str, Path],
barcodesColumn: str,
perSampleSequences: Union[str, Path],
errorCorretctionDetailsPath: Union[str, Path]
) -> None:
"""
Wrapper for QIIME's demux emp-paired command, which performs demultiplexing of multiplexed
sequences in the Earth Microbiome Project amplicon sequencing standard format
http://www.earthmicrobiome.org/protocols-and-standards/.
Parameters
----------
sequencesPath : Union[str, Path]
Path to the paired-end demultiplexed sequences to be denoised
barcodesPath : Union[str, Path]
Output path to the resulting feature sequences
perSampleSequences : Union[str, Path]
Output path to the resulting feature table
errorCorretctionDetailsPath : Union[str, Path]
Output path to the statistics of the denoising
multithreading : bool
Whether to use multithreading. One thread per CPU core
"""

if isinstance(sequencesPath, Path):
sequencesPath = str(sequencesPath)

if isinstance(barcodesPath, Path):
barcodesPath = str(barcodesPath)

if isinstance(perSampleSequences, Path):
perSampleSequences = str(perSampleSequences)

if isinstance(errorCorretctionDetailsPath, Path):
errorCorretctionDetailsPath = str(errorCorretctionDetailsPath)

command([
"qiime", "demux", "emp-paired",
"--i-seqs", sequencesPath,
"--m-barcodes-file", barcodesPath,
"--m-barcodes-column", barcodesColumn,
"--o-per-sample-sequences", perSampleSequences,
"--o-error-correction-details", errorCorretctionDetailsPath,
"--verbose"
])


def demuxSummarize(dataPath: str, visualizationPath: str) -> None:
command([
"qiime", "demux", "summarize",
Expand Down Expand Up @@ -111,7 +160,7 @@ def dada2DenoiseSingle(
denoisingStatsPath : Union[str, Path]
Output path to the statistics of the denoising
multithreading : bool
Whether to use multithreading
Whether to use multithreading. One thread per CPU core
"""

if isinstance(inputPath, Path):
Expand Down Expand Up @@ -191,7 +240,7 @@ def dada2DenoisePaired(
denoisingStatsPath : Union[str, Path]
Output path to the statistics of the denoising
multithreading : bool
Whether to use multithreading
Whether to use multithreading. One thread per CPU core
"""

if isinstance(inputPath, Path):
Expand Down Expand Up @@ -453,3 +502,192 @@ def taxaCollapse(
"--p-level", str(level),
"--o-collapsed-table", collapsedTablePath
])

def vsearchClusterDeNovo(
tablePath: Union[str, Path],
representativeSequencesPath: Union[str, Path],
percIdentity: float,
clusteredTablePath: Union[str, Path],
clusteredSequencesPath: Union[str, Path],
multithreading: bool = True
) -> None:
"""
Wrapper for QIIME2's vsearch de novo clusteing command.
Parameters
----------
tablePath : Union[str, Path]
Path to the feature table generated by DADA2 denoise
representativeSequencesPath : Union[str, Path]
Path to the represenative sequences generated by DADA2 denoise
percIdentity : float
Percent identiy threshold for the OTU clustering
clusteredTablePath : Union[str, Path]
Path to the output clustered feature table
clusteredSequencesPath : Union[str, Path]
Path to the output clustered sequences
multithreading : bool
Whether to use multithreading. One thread per CPU core
"""

if isinstance(tablePath, Path):
tablePath = str(tablePath)

if isinstance(representativeSequencesPath, Path):
representativeSequencesPath = str(representativeSequencesPath)

if isinstance(clusteredTablePath, Path):
clusteredTablePath = str(clusteredTablePath)

if isinstance(clusteredSequencesPath, Path):
clusteredSequencesPath = str(clusteredSequencesPath)

args = [
"qiime", "vsearch", "cluster-features-de-novo",
"--i-table", tablePath,
"--i-sequences", representativeSequencesPath,
"--p-perc-identity", str(percIdentity),
"--o-clustered-table", clusteredTablePath,
"--o-clustered-sequences", clusteredSequencesPath
]

if multithreading:
args.extend(["--p-threads", "0"])

command(args)


def vsearchClusterClosedReference(
tablePath: Union[str, Path],
representativeSequencesPath: Union[str, Path],
referenceSequencesPath: Union[str, Path],
percIdentity: float,
clusteredTablePath: Union[str, Path],
clusteredSequencesPath: Union[str, Path],
unmatchedSequencesPath: Union[str, Path],
multithreading: bool = True
) -> None:
"""
Wrapper for QIIME2's vsearch closed reference clusteing command.
Parameters
----------
tablePath : Union[str, Path]
Path to the feature table generated by DADA2 denoise
representativeSequencesPath : Union[str, Path]
Path to the represenative sequences generated by DADA2 denoise
referenceSequencesPath : Union[str, Path]
Path to reference OTU sequences
percIdentity : float
Percent identiy threshold for the OTU clustering
clusteredTablePath : Union[str, Path]
Path to the output clustered feature table
clusteredSequencesPath : Union[str, Path]
Path to the output clustered sequences
unmatchedSequencesPath : Union[str, Path]
Path to the output unmatched sequences
multithreading : bool
Whether to use multithreading. One thread per CPU core
"""

if isinstance(tablePath, Path):
tablePath = str(tablePath)

if isinstance(representativeSequencesPath, Path):
representativeSequencesPath = str(representativeSequencesPath)

if isinstance(referenceSequencesPath, Path):
referenceSequencesPath = str(referenceSequencesPath)

if isinstance(clusteredTablePath, Path):
clusteredTablePath = str(clusteredTablePath)

if isinstance(clusteredSequencesPath, Path):
clusteredSequencesPath = str(clusteredSequencesPath)

if isinstance(unmatchedSequencesPath, Path):
unmatchedSequencesPath = str(unmatchedSequencesPath)

args = [
"qiime", "vsearch", "cluster-features-closed-reference",
"--i-table", tablePath,
"--i-sequences", representativeSequencesPath,
"--i-reference-sequences", referenceSequencesPath,
"--p-perc-identity", str(percIdentity),
"--o-clustered-table", clusteredTablePath,
"--o-clustered-sequences", clusteredSequencesPath,
"--o-unmatched-sequences", unmatchedSequencesPath
]

if multithreading:
args.extend(["--p-threads", "0"])

command(args)


def vsearchClusterOpenReference(
tablePath: Union[str, Path],
representativeSequencesPath: Union[str, Path],
referenceSequencesPath: Union[str, Path],
percIdentity: float,
clusteredTablePath: Union[str, Path],
clusteredSequencesPath: Union[str, Path],
newReferenceSequencesPath: Union[str, Path],
multithreading: bool = True
) -> None:
"""
Wrapper for QIIME2's vsearch open reference clusteing command.
Parameters
----------
tablePath : Union[str, Path]
Path to the feature table generated by DADA2 denoise
representativeSequencesPath : Union[str, Path]
Path to the represenative sequences generated by DADA2 denoise
referenceSequencesPath : Union[str, Path]
Path to reference OTU sequences
percIdentity : float
Percent identiy threshold for the OTU clustering
clusteredTablePath : Union[str, Path]
Path to the output clustered feature table
clusteredSequencesPath : Union[str, Path]
Path to the output clustered sequences
newReferenceSequencesPath : Union[str, Path]
Path to the output new reference sequences
multithreading : bool
Whether to use multithreading. One thread per CPU core
"""

if isinstance(tablePath, Path):
tablePath = str(tablePath)

if isinstance(representativeSequencesPath, Path):
representativeSequencesPath = str(representativeSequencesPath)

if isinstance(referenceSequencesPath, Path):
referenceSequencesPath = str(referenceSequencesPath)

if isinstance(clusteredTablePath, Path):
clusteredTablePath = str(clusteredTablePath)

if isinstance(clusteredSequencesPath, Path):
clusteredSequencesPath = str(clusteredSequencesPath)

if isinstance(newReferenceSequencesPath, Path):
newReferenceSequencesPath = str(newReferenceSequencesPath)

args = [
"qiime", "vsearch", "cluster-features-open-reference",
"--i-table", tablePath,
"--i-sequences", representativeSequencesPath,
"--i-reference-sequences", referenceSequencesPath,
"--p-perc-identity", str(percIdentity),
"--o-clustered-table", clusteredTablePath,
"--o-clustered-sequences", clusteredSequencesPath,
"--o-new-reference-sequences", newReferenceSequencesPath
]

if multithreading:
args.extend(["--p-threads", "0"])

command(args)
Loading

0 comments on commit 70c0a0c

Please sign in to comment.