-
Notifications
You must be signed in to change notification settings - Fork 0
/
BASH_code_for_data_analysis.sh
141 lines (83 loc) · 4.81 KB
/
BASH_code_for_data_analysis.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Appendix 3.1 BASH code for data analysis
# Grace Seo
# Contains detailed code for data transfer and Guppy basecalling
###===============================================###
### Transfer data from GridION to server computer ###
###===============================================###
while true ; do date ; time rsync -r RUN_NAME /DATABASE_FOLDER_PATH ; sleep 180 ; done
###================================================================###
### Guppy basecalling on fast5 files using fast basecalling module ###
###================================================================###
### April 9, 2022 fast basecalling vs. high-accuracy basecalling test GC_231A (ligation sequencing kit) and GC_231B (rapid barcoding kit)
###########################################################################
###########################################################################
### ###
### GC_231A and GC_231B working guppy gpu code ###
### ###
###########################################################################
###########################################################################
(fast basecalling script and COVID nextflow analysis)
###########################################################################
### WORKING BASECALLING METHOD - GUPPY GPU
cd $INPUT
mkdir -p $OUTPUT/GS_nextflow_analyses_Apr9_1618_FreedV1_FastBC
cd $OUTPUT/GS_nextflow_analyses_Apr9_1618_FreedV1_FastBC
ln -s $INPUT .
touch fast_basecalling_gpu.sh
nano fast_basecalling_gpu.sh
## copy and paste the following into the script
################## BASH SCRIPT ########################
#!/bin/bash
# Set the numbr of parallel runs to do for basecalling
parallel_basecalling=6
# Sets the number of gpus on the node
# Currently not working, have to find a way to put into cuda part
gpus=2
for folder in `ls -1 -d */`; do echo $folder; done | parallel -j $parallel_basecalling guppy_basecaller -c dna_r9.4.1_450bps_fast.cfg -r -i {} -s fastq_pass_dehosted_only/{} -x \"cuda:'{= $_=$job->slot()%2=}'\"
################## End of BASH SCRIPT ########################
chmod 755 fast_basecalling_gpu.sh
conda init
conda activate guppy-4.0.11-gpu
sbatch $CONFIG --gres=gpu:v100:2 -J GC_231A_fastBC --wrap="./fast_basecalling_gpu.sh"
conda deactivate
sacct --format="Elapsed" -j 12084038
### took about 1 day + 16 hours
##-------------------------------------------------------------------------
## 1. Create variables and a folder -
##-------------------------------------------------------------------------
PARENT_DIR=""
BASE_DIR=""
FAST5_PASS=${PARENT_DIR}/fast5_pass
FASTBC_FASTQ=""
##-------------------------------------------------------------------------
## 4. Combine all fastq files into one fastq -
##-------------------------------------------------------------------------
cd $INPUT/GS_nextflow_analyses_Apr9_1618_FreedV1_FastBC
mkdir -p all_reads
## CHANGE JOB ID AFTEROK
sbatch -c 4 --mem=16G -p NMLResearch -J combining_fastqfiles --dependency=afterok:$PREVIOUS_JOB --wrap="cat ${FASTBC_FASTQ}/*.fastq >> ./all_reads/results_all.fastq"
##-------------------------------------------------------------------------
## 5. Demultiplex newly basecalled fastq file -
##-------------------------------------------------------------------------
## EXAMPLE: sbatch $CONFIG -J stricBarcoding --wrap="guppy_barcoder -i all_reads -s barcodes_strict_demulti --barcode_kits EXP-NBD104"
## EXAMPLE: sbatch $CONFIG -J stricBarcoding --wrap="guppy_barcoder -i all_reads -s barcodes_strict_demulti --barcode_kits EXP-NBD114"
conda activate guppy-4.0.11-cpu
sbatch $CONFIG -J stricBarcoding --dependency=afterok:$PREVIOUS_JOB --wrap="guppy_barcoder -i all_reads -s barcodes_strict_demulti --barcode_kits EXP-NBD196"
conda deactivate
##-------------------------------------------------------------------------
## 6. Make a run folder and move barcodes_strict_demulti to fastq_pass -
##-------------------------------------------------------------------------
mkdir -p run
mv barcodes_strict_demulti/ run/fastq_pass
##-------------------------------------------------------------------------
## 7. Change directory and create sym links -
##-------------------------------------------------------------------------
cd $OUTPUT/GS_nextflow_analyses_Apr9_1618_FreedV1_FastBC
ln -s ${BASE_DIR}/samplesheet.tsv .
cd run
ln -s ${PARENT_DIR}/sequencing_summary*.txt .
ln -s ${PARENT_DIR}/fast5_pass .
cd ../
#pwd: $INPUT/GS_nextflow_analyses_Apr9_1618_FreedV1_FastBC
### USE DEHOSTED OPTION SINCE ALREADY REBASECALLED
srun -p NMLResearch bash $NEXTFLOWPIPELINE/pipeline_nextflow.sh -d ./run -p freed --dehosted