Skip to content

Commit

Permalink
confirmed that alignment is the same for a single read
Browse files Browse the repository at this point in the history
  • Loading branch information
maximilianmordig committed Jun 4, 2024
1 parent 1351418 commit e97d2fd
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 52 deletions.
76 changes: 65 additions & 11 deletions python_wrapper/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,35 +20,89 @@
args = [
"-x", "sensitive", "-t", "8", "-p",
"/home/mmordig/rawhash_project/rawhash2_new/extern/kmer_models/legacy/legacy_r9.4_180mv_450bps_6mer/template_median68pA.model",
# "-d", "example_out/ref.ind",
"/home/mmordig/rawhash_project/rawhash2_new/test/data/d2_ecoli_r94/ref.fa",
"no-revcomp-query",
]
# args += [
# "/home/mmordig/rawhash_project/rawhash2_new/test/data/d2_ecoli_r94/ref.fa",
# "no-revcomp-query",
# ]
args += [
"/home/mmordig/rawhash_project/rawhash2_new/example_out/forwrev_rawhash2_sensitive.ind",
]
# forwrev_rawhash2_sensitive.paf
# 0006a106-77eb-4752-b405-38d1635718fa
# /home/mmordig/rawhash_project/rawhash2_new/example_out/forwonly_rawhash2_sensitive.ind
# /home/mmordig/rawhash_project/rawhash2_new/example_out/forwrev_rawhash2_sensitive.ind
args = ["my_dummy_program"] + args
mapper = cppyy.gbl.RawHashMapper(len(args), args)
mapper.idx_info()

#%%
slow5_filename = "/home/mmordig/rawhash_project/rawhash2_new/test/data/d2_ecoli_r94/small_slow5_files/barcode02_r0barcode02b0_0.blow5"
# we know rawhash finds an alignment since from the file /home/mmordig/rawhash_project/rawhash2_new/example_out/forwrev_rawhash2_sensitive.paf
read_id = "0006a106-77eb-4752-b405-38d1635718fa"
import contextlib
import pyslow5
s5 = pyslow5.Open(slow5_filename, "r")
raw_signal = s5.get_read(read_id, pA=True)["signal"]
raw_signal = raw_signal[(raw_signal > 30.) & (raw_signal < 200.)]
# with contextlib.autoclosing(pyslow5.Open(slow5_filename, "r")) as s5:
raw_signal.shape
#%%
import numpy as np

# Create a numpy array
arr = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32)
# arr = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32)
# # Ensure the numpy array is contiguous
# arr = np.ascontiguousarray(arr, dtype=np.float32)

# Ensure the numpy array is contiguous
arr = np.ascontiguousarray(arr, dtype=np.float32)
# arr = raw_signal.astype(np.float32)
arr = np.ascontiguousarray(raw_signal, dtype=np.float32)

import cppyy.ll
cppyy.set_debug()
# cppyy.set_debug()
# Call the C++ function with the numpy array
alignments = mapper.map(cppyy.ll.cast["float*"](arr.ctypes.data), len(arr))

def format_alignment(alignment):
# return alignment
return f"Alignment(contig: {alignment.contig}, start: {alignment.ref_start}, end: {alignment.ref_end}, strand: {alignment.is_pos_strand})"
class Alignment:
def __init__(self, contig, ref_start, ref_end, is_pos_strand):
self.contig = contig
self.ref_start = ref_start
self.ref_end = ref_end
self.is_pos_strand = is_pos_strand
@staticmethod
def from_cppyy(alignment):
return Alignment(
contig=alignment.contig,
ref_start=alignment.ref_start,
ref_end=alignment.ref_end,
is_pos_strand=alignment.is_pos_strand,
)
def __repr__(self):
return f"Alignment(contig: {self.contig}, start: {self.ref_start}, end: {self.ref_end}, pos_strand: {self.is_pos_strand})"

def parse_paf_line(line):
"""returns read_id, alignment"""
fields = line.strip().split("\t")
return fields[0], Alignment(
contig=fields[5],
ref_start=int(fields[7]),
ref_end=int(fields[8]),
is_pos_strand=fields[4] == "+",
)
# def format_alignment(alignment):
# # return alignment
# return f"Alignment(contig: {alignment.contig}, start: {alignment.ref_start}, end: {alignment.ref_end}, pos_strand: {alignment.is_pos_strand})"

for (i, alignment) in enumerate(alignments):
print(f"Alignment {i}: {format_alignment(alignment)}")
print(f"Alignment {i}: {Alignment.from_cppyy(alignment)}")

gt_paf_filename = "/home/mmordig/rawhash_project/rawhash2_new/example_out/forwrev_rawhash2_sensitive.paf"
with open(gt_paf_filename) as f:
for line in f:
rid, gt_alignment = parse_paf_line(line)
if rid == read_id:
print(f"GT Alignment for {rid}: {gt_alignment}")
break
#%%
1

Expand Down
74 changes: 41 additions & 33 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,39 +52,47 @@ set_alternate_linker(mold)
# todo: currently can only compile rawhash2 or the below, not both
# this happens because functions like setup_hdf5 define the hdf5 target and the relationship to the target name
# rather than doing it separately

set(TARGET_NAME rawhash2)
add_executable(${TARGET_NAME} main.cpp)
setup_rawhashlike_target(${TARGET_NAME})
# setup_pod5(${TARGET_NAME}) # does not seem to work when pod5 is disabled
target_compile_definitions(${TARGET_NAME} PRIVATE NPOD5RH=1) # todo: workaround
setup_ruclient(${TARGET_NAME})
setup_hdf5(${TARGET_NAME})
setup_slow5(${TARGET_NAME})
setup_tflite(${TARGET_NAME})

# # set(TARGET_NAME rawhash2_wrapper_example)
# # add_executable(${TARGET_NAME} wrapper_example.cpp rawhash_wrapper.cpp rawhash_wrapper.hpp)
# set(TARGET_NAME rawhash2_wrapper)
# add_library(${TARGET_NAME} SHARED rawhash_wrapper.cpp rawhash_wrapper.hpp)
# setup_rawhashlike_target(${TARGET_NAME})
# # setup_pod5(${TARGET_NAME})
# target_compile_definitions(${TARGET_NAME} PRIVATE NPOD5RH=1) # todo: workaround
# setup_ruclient(${TARGET_NAME})
# setup_hdf5(${TARGET_NAME})
# setup_slow5(${TARGET_NAME})
# setup_tflite(${TARGET_NAME})

# set(TARGET_NAME rawhash2_usinglib)
# add_executable(${TARGET_NAME} wrapper_example.cpp)
# target_link_libraries(${TARGET_NAME} PRIVATE rawhash2_wrapper)
# # setup_rawhashlike_target(${TARGET_NAME})
# # # setup_pod5(${TARGET_NAME})
# # target_compile_definitions(${TARGET_NAME} PRIVATE NPOD5RH=1) # todo: workaround
# # setup_ruclient(${TARGET_NAME})
# # setup_hdf5(${TARGET_NAME})
# # setup_slow5(${TARGET_NAME})
# # setup_tflite(${TARGET_NAME})
set(build_cli OFF)

if (build_cli)
message(STATUS "Building CLI")

set(TARGET_NAME rawhash2)
add_executable(${TARGET_NAME} main.cpp)
setup_rawhashlike_target(${TARGET_NAME})
# setup_pod5(${TARGET_NAME}) # does not seem to work when pod5 is disabled
target_compile_definitions(${TARGET_NAME} PRIVATE NPOD5RH=1) # todo: workaround
setup_ruclient(${TARGET_NAME})
setup_hdf5(${TARGET_NAME})
setup_slow5(${TARGET_NAME})
setup_tflite(${TARGET_NAME})

else()
message(STATUS "Building shared library")

# set(TARGET_NAME rawhash2_wrapper_example)
# add_executable(${TARGET_NAME} wrapper_example.cpp rawhash_wrapper.cpp rawhash_wrapper.hpp)
set(TARGET_NAME rawhash2_wrapper)
add_library(${TARGET_NAME} SHARED rawhash_wrapper.cpp rawhash_wrapper.hpp)
setup_rawhashlike_target(${TARGET_NAME})
# setup_pod5(${TARGET_NAME})
target_compile_definitions(${TARGET_NAME} PRIVATE NPOD5RH=1) # todo: workaround
setup_ruclient(${TARGET_NAME})
setup_hdf5(${TARGET_NAME})
setup_slow5(${TARGET_NAME})
setup_tflite(${TARGET_NAME})

set(TARGET_NAME rawhash2_usinglib)
add_executable(${TARGET_NAME} wrapper_example.cpp)
target_link_libraries(${TARGET_NAME} PRIVATE rawhash2_wrapper)
# setup_rawhashlike_target(${TARGET_NAME})
# # setup_pod5(${TARGET_NAME})
# target_compile_definitions(${TARGET_NAME} PRIVATE NPOD5RH=1) # todo: workaround
# setup_ruclient(${TARGET_NAME})
# setup_hdf5(${TARGET_NAME})
# setup_slow5(${TARGET_NAME})
# setup_tflite(${TARGET_NAME})
endif()


# setup_pod5()
Expand Down
22 changes: 14 additions & 8 deletions src/rawhash_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,13 @@ void RawHashMapper::idx_info() const {

std::vector<Alignment> RawHashMapper::map(float* raw_signal, int signal_length) {
// print
fprintf(stderr, "[M::%s] mapping\n", __func__);
for (int i = 0; i < signal_length; i++) {
fprintf(stderr, "%f ", raw_signal[i]);
// fprintf(stderr, "[M::%s] mapping\n", __func__);

if (ri_verbose >= 3) {
for (int i = 0; (i < signal_length) && (i <= 10); i++) {
fprintf(stderr, "%f ", raw_signal[i]);
}
fprintf(stderr, "...");
}

ri_idx_t* ri = (ri_idx_t*)_ri;
Expand Down Expand Up @@ -143,7 +147,9 @@ std::vector<Alignment> RawHashMapper::map(float* raw_signal, int signal_length)
map_worker_for(&data, 0, 0);

// write out
write_out_mappings_to_stdout(reg0, ri);
if (ri_verbose >= 3) {
write_out_mappings_to_stdout(reg0, ri);
}
std::vector<Alignment> alignments;
for (int m = 0; m < reg0->n_maps; ++m) {
if (reg0->maps[m].ref_id < ri->n_seq) {
Expand All @@ -156,10 +162,10 @@ std::vector<Alignment> RawHashMapper::map(float* raw_signal, int signal_length)
});
}
}
// todo: dummy alignment
alignments.push_back(Alignment {
.contig = "fakealignment_chr123", .ref_start = 123, .ref_end = 456, .is_pos_strand = true
});
// dummy alignment
// alignments.push_back(Alignment {
// .contig = "fakealignment_chr123", .ref_start = 123, .ref_end = 456, .is_pos_strand = true
// });
free_mappings_ri_reg1_t(reg0);

ri_tbuf_destroy(buf);
Expand Down

0 comments on commit e97d2fd

Please sign in to comment.