Skip to content

Commit

Permalink
Merge branch 'devel'
Browse files Browse the repository at this point in the history
  • Loading branch information
pmelsted committed Feb 23, 2019
2 parents 0c950a3 + adb1236 commit eafdfda
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 32 deletions.
21 changes: 16 additions & 5 deletions src/ProcessReads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -453,15 +453,16 @@ void MasterProcessor::processReads() {
++tmp_counts[umis[j].first];
}
}
for (auto x : tmp_counts) {
num_umi += x;
}

auto& bc = batchCounts[id];
for (int j = 0; j < tmp_counts.size(); j++) {
if (tmp_counts[j] > 0) {
bc.push_back({j,tmp_counts[j]});
}
}
for (auto x : bc) {
num_umi += x.second;
}
}
}
}
Expand Down Expand Up @@ -864,6 +865,7 @@ ReadProcessor::ReadProcessor(const KmerIndex& index, const ProgramOptions& opt,
if (opt.batch_mode) {
assert(id != -1);
batchSR.files = opt.batch_files[id];
batchSR.reserveNfiles(opt.batch_files[id].size());
if (opt.umi) {
batchSR.umi_files = {opt.umi_files[id]};
}
Expand Down Expand Up @@ -1442,6 +1444,7 @@ AlnProcessor::AlnProcessor(const KmerIndex& index, const ProgramOptions& opt, Ma
/* need to check this later */
assert(id != -1);
batchSR.files = opt.batch_files[id];
batchSR.reserveNfiles(opt.batch_files[id].size());
if (opt.umi) {
batchSR.umi_files = {opt.umi_files[id]};
}
Expand Down Expand Up @@ -2575,6 +2578,13 @@ SequenceReader::~SequenceReader() {
}


void SequenceReader::reserveNfiles(int n) {
fp.resize(nfiles);
seq.resize(nfiles, nullptr);
l.resize(nfiles, 0);
nl.resize(nfiles, 0);
}

void SequenceReader::reset() {
for (auto &f : fp) {
if (f) {
Expand Down Expand Up @@ -2650,16 +2660,17 @@ bool SequenceReader::fetchSequences(char *buf, const int limit, std::vector<std:

// open the next one
for (int i = 0; i < nfiles; i++) {
fp[i] = gzopen(files[current_file].c_str(), "r");
fp[i] = gzopen(files[current_file+i].c_str(), "r");
seq[i] = kseq_init(fp[i]);
l[i] = kseq_read(seq[i]);
current_file++;

}
if (usingUMIfiles) {
// open new umi file
f_umi->open(umi_files[current_file]);
current_file++;
}
current_file+=nfiles;
state = true;
}
}
Expand Down
6 changes: 2 additions & 4 deletions src/ProcessReads.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,7 @@ class SequenceReader {
} else {
nfiles = paired ? 2 : 1;
}
fp.resize(nfiles);
seq.resize(nfiles, nullptr);
l.resize(nfiles, 0);
nl.resize(nfiles, 0);
reserveNfiles(nfiles);
}
SequenceReader() :
paired(false),
Expand All @@ -62,6 +59,7 @@ class SequenceReader {

bool empty();
void reset();
void reserveNfiles(int n);
~SequenceReader();

bool fetchSequences(char *buf, const int limit, std::vector<std::pair<const char*, int>>& seqs,
Expand Down
2 changes: 1 addition & 1 deletion src/common.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef KALLISTO_COMMON_H
#define KALLISTO_COMMON_H

#define KALLISTO_VERSION "0.45.0"
#define KALLISTO_VERSION "0.45.1"

#include <string>
#include <vector>
Expand Down
63 changes: 41 additions & 22 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ void ParseOptionsPseudo(int argc, char **argv, ProgramOptions& opt) {

if (umi_flag) {
opt.umi = true;
opt.single_end = true; // UMI implies single end reads
opt.single_end = true; // UMI implies single-end reads
}

// all other arguments are fast[a/q] files to be read
Expand Down Expand Up @@ -524,30 +524,33 @@ void ParseOptionsMerge(int argc, char **argv, ProgramOptions& opt) {

void ListSingleCellTechnologies() {
//todo, figure this out
cout << "List of supported single cell technologies" << endl << endl
cout << "List of supported single-cell technologies" << endl << endl
<< "short name description" << endl
<< "---------- -----------" << endl
<< "10Xv1 10X chemistry version 1" << endl
<< "10Xv2 10X chemistry verison 2" << endl
<< "DropSeq DropSeq" << endl
<< "inDrop inDrop" << endl
<< "10xv1 10x version 1 chemistry" << endl
<< "10xv2 10x version 2 chemistry" << endl
<< "10xv3 10x version 3 chemistry" << endl
<< "CELSeq CEL-Seq" << endl
<< "CELSeq2 CEL-Seq version 2" << endl
<< "DropSeq DropSeq" << endl
<< "inDrops inDrops" << endl
<< "SCRBSeq SCRB-Seq" << endl
<< "SureCell SureCell for ddSEQ" << endl
<< endl;
}

void ParseOptionsBus(int argc, char **argv, ProgramOptions& opt) {
int list_flag = 0;
const char *opt_string = "i:o:x:t:";
const char *opt_string = "i:o:x:t:l";
static struct option long_options[] = {
{"index", required_argument, 0, 'i'},
{"output-dir", required_argument, 0, 'o'},
{"technology", required_argument, 0, 'x'},
{"list", no_argument, &list_flag, 'l'},
{"list", no_argument, 0, 'l'},
{"threads", required_argument, 0, 't'},
{0,0,0,0}
};

int list_flag = 0;
int c;
int option_index = 0;
while (true) {
Expand All @@ -564,6 +567,10 @@ void ParseOptionsBus(int argc, char **argv, ProgramOptions& opt) {
opt.index = optarg;
break;
}
case 'l': {
list_flag = 1;
break;
}
case 'o': {
opt.output = optarg;
break;
Expand Down Expand Up @@ -711,17 +718,29 @@ bool CheckOptionsBus(ProgramOptions& opt) {
busopt.seq = BUSOptionSubstr(1,0,0); // second file, entire string
busopt.umi = BUSOptionSubstr(0,16,26); // first file [16:26]
busopt.bc.push_back(BUSOptionSubstr(0,0,16));
} else if (opt.technology == "10XV3") {
busopt.nfiles = 2;
busopt.seq = BUSOptionSubstr(1,0,0);
busopt.umi = BUSOptionSubstr(0,16,28);
busopt.bc.push_back(BUSOptionSubstr(0,0,16));
} else if (opt.technology == "10XV1") {
busopt.nfiles = 3;
busopt.seq = BUSOptionSubstr(0,0,0);
busopt.umi = BUSOptionSubstr(1,0,0);
busopt.bc.push_back(BUSOptionSubstr(2,0,0));
} else if (opt.technology == "SURECELL") {
busopt.nfiles = 2;
busopt.seq = BUSOptionSubstr(1,0,0);
busopt.umi = BUSOptionSubstr(0,51,59);
busopt.bc.push_back(BUSOptionSubstr(0,0,6));
busopt.bc.push_back(BUSOptionSubstr(0,21,27));
busopt.bc.push_back(BUSOptionSubstr(0,42,48));
} else if (opt.technology == "DROPSEQ") {
busopt.nfiles = 2;
busopt.seq = BUSOptionSubstr(1,0,0);
busopt.umi = BUSOptionSubstr(0,12,20);
busopt.bc.push_back(BUSOptionSubstr(0,0,12));
} else if (opt.technology == "INDROP") {
} else if (opt.technology == "INDROPS") {
busopt.nfiles = 2;
busopt.seq = BUSOptionSubstr(1,0,0);
busopt.umi = BUSOptionSubstr(0,42,48);
Expand Down Expand Up @@ -833,7 +852,7 @@ bool CheckOptionsEM(ProgramOptions& opt, bool emonly = false) {

/*
if (opt.strand_specific && !opt.single_end) {
cerr << "Error: strand-specific mode requires single end mode" << endl;
cerr << "Error: strand-specific mode requires single-end mode" << endl;
ret = false;
}*/

Expand Down Expand Up @@ -1171,7 +1190,7 @@ bool CheckOptionsPseudo(ProgramOptions& opt) {

/*
if (opt.strand_specific && !opt.single_end) {
cerr << "Error: strand-specific mode requires single end mode" << endl;
cerr << "Error: strand-specific mode requires single-end mode" << endl;
ret = false;
}*/

Expand Down Expand Up @@ -1378,7 +1397,7 @@ void usage() {
<< "Where <CMD> can be one of:" << endl << endl
<< " index Builds a kallisto index "<< endl
<< " quant Runs the quantification algorithm " << endl
<< " bus Generate BUS files for single cell data " << endl
<< " bus Generate BUS files for single-cell data " << endl
<< " pseudo Runs the pseudoalignment step " << endl
<< " merge Merges several batch runs " << endl
<< " h5dump Converts HDF5-formatted results to plaintext" << endl
Expand All @@ -1390,15 +1409,15 @@ void usage() {

void usageBus() {
cout << "kallisto " << KALLISTO_VERSION << endl
<< "Generates BUS files for single cell sequencing" << endl << endl
<< "Generates BUS files for single-cell sequencing" << endl << endl
<< "Usage: kallisto bus [arguments] FASTQ-files" << endl << endl
<< "Required arguments:" << endl
<< "-i, --index=STRING Filename for the kallisto index to be used for" << endl
<< " pseudoalignment" << endl
<< "-o, --output-dir=STRING Directory to write output to" << endl
<< "-x, --technology=STRING Single cell technology used " << endl << endl
<< "-x, --technology=STRING Single-cell technology used " << endl << endl
<< "Optional arguments:" << endl
<< "-l, --list List all single cell technologies supported" << endl
<< "-l, --list List all single-cell technologies supported" << endl
<< "-t, --threads=INT Number of threads to use (default: 1)" << endl;
}

Expand Down Expand Up @@ -1871,13 +1890,13 @@ int main(int argc, char *argv[]) {
}

cerr << endl;
}

if (opt.pseudobam) {
MP.processAln(em, true);
}
}
if (opt.pseudobam) {

MP.processAln(em, true);
}


cerr << endl;
}
Expand Down

0 comments on commit eafdfda

Please sign in to comment.