Skip to content

Commit

Permalink
Slow5 (#298)
Browse files Browse the repository at this point in the history
* Added slow5 parser

* add package docs

* add better flow as suggested by  matiasinsaurralde

* Nearly entire rewrite of parser

* Added variable failure reasons

* Add stringbuilder for raw signals

* fixed lint issues.

* update specification link

* separate example file

* Comment where example.slow5 comes from

---------

Co-authored-by: Timothy Stiles <[email protected]>
  • Loading branch information
Koeng101 and TimothyStiles authored Mar 28, 2023
1 parent 22b6bfa commit 931e546
Show file tree
Hide file tree
Showing 25 changed files with 856 additions and 0 deletions.
51 changes: 51 additions & 0 deletions io/slow5/data/example.slow5

Large diffs are not rendered by default.

Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#slow5_version 0.2.0
#num_read_groups 2
@asic_id 4175987214
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#slow5_version 0.2.0
#num_read_groups bad!
@asic_id 4175987214
4 changes: 4 additions & 0 deletions io/slow5/data/header_tests/test_header_without_tabs.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#slow5_version 0.2.0
#num_read_groups 1
@bad
@asic_id 4175987214
7 changes: 7 additions & 0 deletions io/slow5/data/read_tests/continue.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 . bad 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10

7 changes: 7 additions & 0 deletions io/slow5/data/read_tests/digitisation.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 bad 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10

7 changes: 7 additions & 0 deletions io/slow5/data/read_tests/endReason.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{bad,unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10

7 changes: 7 additions & 0 deletions io/slow5/data/read_tests/end_reason.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 bad 10

7 changes: 7 additions & 0 deletions io/slow5/data/read_tests/end_reason_unknown.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 1000 10

7 changes: 7 additions & 0 deletions io/slow5/data/read_tests/len_raw_signal.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 bad 430,472,463 8318394 5383 1 219.133423 5 10

7 changes: 7 additions & 0 deletions io/slow5/data/read_tests/median_before.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 bad 5 10

7 changes: 7 additions & 0 deletions io/slow5/data/read_tests/offset.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 bad 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10

7 changes: 7 additions & 0 deletions io/slow5/data/read_tests/range.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 bad 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10

7 changes: 7 additions & 0 deletions io/slow5/data/read_tests/raw_signal.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,bad,463 8318394 5383 1 219.133423 5 10

7 changes: 7 additions & 0 deletions io/slow5/data/read_tests/read_group.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b bad 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10

7 changes: 7 additions & 0 deletions io/slow5/data/read_tests/read_number.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 bad 1 219.133423 5 10

7 changes: 7 additions & 0 deletions io/slow5/data/read_tests/sampling_rate.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 raw 5347 430,472,463 8318394 5383 1 219.133423 5 10

7 changes: 7 additions & 0 deletions io/slow5/data/read_tests/start_mux.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 bad 219.133423 5 10

7 changes: 7 additions & 0 deletions io/slow5/data/read_tests/start_time.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 bad 5383 1 219.133423 5 10

6 changes: 6 additions & 0 deletions io/slow5/data/read_tests/unknown.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number bad
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10 1
7 changes: 7 additions & 0 deletions io/slow5/data/test_example.slow5
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#slow5_version 0.2.0
#num_read_groups 1
@asic_id 4175987214
#char* uint32_t double double double double uint64_t int16_t* uint64_t int32_t uint8_t double enum{unknown,partial,mux_change,unblock_mux_change,data_service_unblock_mux_change,signal_positive,signal_negative} char*
#read_id read_group digitisation offset range sampling_rate len_raw_signal raw_signal start_time read_number start_mux median_before end_reason channel_number
0026631e-33a3-49ab-aa22-3ab157d71f8b 0 8192 16 1489.52832 4000 5347 430,472,463 8318394 5383 1 219.133423 5 10

32 changes: 32 additions & 0 deletions io/slow5/example_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package slow5_test

import (
"fmt"
"os"

"github.com/TimothyStiles/poly/io/slow5"
)

func ExampleNewParser() {
// example.slow5 is a file I generated using slow5tools from nanopore fast5
// run where I was testing using nanopore for doing COVID testing. It
// contains real nanopore data.
file, _ := os.Open("data/example.slow5")
// Set maxLineSize to 64kb. If you expect longer reads,
// make maxLineSize longer!
const maxLineSize = 2 * 32 * 1024
parser, _, _ := slow5.NewParser(file, maxLineSize)

var outputReads []slow5.Read
for {
read, err := parser.ParseNext()
if err != nil {
// Break at EOF
break
}
outputReads = append(outputReads, read)
}

fmt.Println(outputReads[0].RawSignal[0:10])
// Output: [430 472 463 467 454 465 463 450 450 449]
}
Loading

0 comments on commit 931e546

Please sign in to comment.