From c842ee84ba0dd953f86e5c1e0e10616637ece160 Mon Sep 17 00:00:00 2001 From: Harun Mustafa Date: Tue, 8 Oct 2024 11:47:56 +0200 Subject: [PATCH 01/10] Fix for queries with invalid characters --- .../graph/representation/canonical_dbg.cpp | 2 +- .../graph/representation/hash/dbg_sshash.cpp | 47 ++++++++++++++----- .../graph/representation/hash/dbg_sshash.hpp | 6 +++ .../tests/annotation/test_annotated_dbg.cpp | 7 ++- .../annotation/test_annotated_dbg_helpers.cpp | 1 + .../tests/graph/all/test_dbg_helpers.cpp | 7 ++- 6 files changed, 53 insertions(+), 17 deletions(-) diff --git a/metagraph/src/graph/representation/canonical_dbg.cpp b/metagraph/src/graph/representation/canonical_dbg.cpp index fdd4bd683e..a944cd958c 100644 --- a/metagraph/src/graph/representation/canonical_dbg.cpp +++ b/metagraph/src/graph/representation/canonical_dbg.cpp @@ -64,7 +64,7 @@ ::map_to_nodes_sequentially(std::string_view sequence, path.reserve(sequence.size() - get_k() + 1); if (const auto sshash = std::dynamic_pointer_cast(graph_)) { - sshash->map_to_nodes_with_rc<>(sequence, [&](node_index node, bool orientation) { + sshash->map_to_nodes_with_rc(sequence, [&](node_index node, bool orientation) { callback(node && orientation ? reverse_complement(node) : node); }, terminate); return; diff --git a/metagraph/src/graph/representation/hash/dbg_sshash.cpp b/metagraph/src/graph/representation/hash/dbg_sshash.cpp index 9e47d1fe3d..ecda8b57ea 100644 --- a/metagraph/src/graph/representation/hash/dbg_sshash.cpp +++ b/metagraph/src/graph/representation/hash/dbg_sshash.cpp @@ -5,6 +5,7 @@ #include "common/seq_tools/reverse_complement.hpp" #include "common/threads/threading.hpp" #include "common/logger.hpp" +#include "common/algorithms.hpp" #include "kmer/kmer_extractor.hpp" @@ -99,6 +100,37 @@ void DBGSSHash::add_sequence(std::string_view sequence, throw std::logic_error("adding sequences not supported"); } +void DBGSSHash +::map_to_nodes_with_rc_advanced(std::string_view sequence, + const std::function& callback, + bool with_rc, + const std::function& terminate) const { + if (terminate() || sequence.size() < k_) + return; + + std::visit([&](const auto &dict) { + using kmer_t = get_kmer_t; + + std::vector seq_encoded; + seq_encoded.reserve(sequence.size()); + for (size_t i = 0; i < sequence.size(); ++i) { + char enc = kmer_t::canonicalize_basepair_forward_map[static_cast(sequence[i])]; + seq_encoded.emplace_back(enc == '\0'); + } + + auto invalid = utils::drag_and_mark_segments(seq_encoded, 1, k_); + + kmer_t uint_kmer = sshash::util::string_to_uint_kmer(sequence.data(), k_ - 1); + uint_kmer.pad_char(); + for (size_t i = k_ - 1; i < sequence.size() && !terminate(); ++i) { + uint_kmer.drop_char(); + uint_kmer.kth_char_or(k_ - 1, kmer_t::char_to_uint(sequence[i])); + callback(!invalid[i] ? dict.lookup_advanced_uint(uint_kmer, with_rc) + : sshash::lookup_result()); + } + }, dict_); +} + template void DBGSSHash::map_to_nodes_with_rc(std::string_view sequence, const std::function& callback, @@ -113,18 +145,11 @@ void DBGSSHash::map_to_nodes_with_rc(std::string_view sequence, return; } - std::visit([&](const auto &dict) { - using kmer_t = get_kmer_t; - kmer_t uint_kmer = sshash::util::string_to_uint_kmer(sequence.data(), k_ - 1); - uint_kmer.pad_char(); - for (size_t i = k_ - 1; i < sequence.size() && !terminate(); ++i) { - uint_kmer.drop_char(); - uint_kmer.kth_char_or(k_ - 1, kmer_t::char_to_uint(sequence[i])); - auto res = dict.lookup_advanced_uint(uint_kmer, with_rc); - callback(sshash_to_graph_index(res.kmer_id), res.kmer_orientation); - } - }, dict_); + map_to_nodes_with_rc_advanced(sequence, [&](sshash::lookup_result res) { + callback(sshash_to_graph_index(res.kmer_id), res.kmer_orientation); + }, with_rc, terminate); } + template void DBGSSHash::map_to_nodes_with_rc(std::string_view, const std::function&, diff --git a/metagraph/src/graph/representation/hash/dbg_sshash.hpp b/metagraph/src/graph/representation/hash/dbg_sshash.hpp index 12c1a407bd..0f3da5dc5a 100644 --- a/metagraph/src/graph/representation/hash/dbg_sshash.hpp +++ b/metagraph/src/graph/representation/hash/dbg_sshash.hpp @@ -117,6 +117,12 @@ class DBGSSHash : public DeBruijnGraph { size_t num_nodes_; Mode mode_; + void map_to_nodes_with_rc_advanced( + std::string_view sequence, + const std::function& callback, + bool with_rc, + const std::function& terminate = []() { return false; }) const; + size_t dict_size() const; }; diff --git a/metagraph/tests/annotation/test_annotated_dbg.cpp b/metagraph/tests/annotation/test_annotated_dbg.cpp index 278f01f1f0..d1437aa725 100644 --- a/metagraph/tests/annotation/test_annotated_dbg.cpp +++ b/metagraph/tests/annotation/test_annotated_dbg.cpp @@ -4,15 +4,12 @@ #include "gtest/gtest.h" #include "../test_helpers.hpp" +#include "../graph/all/test_dbg_helpers.hpp" #include "common/threads/threading.hpp" #include "common/vectors/bit_vector_dyn.hpp" #include "common/vectors/vector_algorithm.hpp" #include "annotation/representation/column_compressed/annotate_column_compressed.hpp" -#include "graph/representation/bitmap/dbg_bitmap.hpp" -#include "graph/representation/hash/dbg_hash_string.hpp" -#include "graph/representation/hash/dbg_hash_ordered.hpp" -#include "graph/representation/hash/dbg_hash_fast.hpp" #define protected public #define private public @@ -987,6 +984,7 @@ typedef ::testing::Types>, std::pair>, std::pair>, std::pair>, + std::pair>, std::pair, std::pair, std::pair, @@ -1016,6 +1014,7 @@ class AnnotatedDBGNoNTest : public ::testing::Test {}; typedef ::testing::Types>, std::pair>, std::pair>, + std::pair>, std::pair, std::pair, std::pair, diff --git a/metagraph/tests/annotation/test_annotated_dbg_helpers.cpp b/metagraph/tests/annotation/test_annotated_dbg_helpers.cpp index 2c6b1f5735..39e387335e 100644 --- a/metagraph/tests/annotation/test_annotated_dbg_helpers.cpp +++ b/metagraph/tests/annotation/test_annotated_dbg_helpers.cpp @@ -235,6 +235,7 @@ template std::unique_ptr build_anno_graph build_anno_graph>(uint64_t, const std::vector &, const std::vector&, DeBruijnGraph::Mode, bool); template std::unique_ptr build_anno_graph>(uint64_t, const std::vector &, const std::vector&, DeBruijnGraph::Mode, bool); template std::unique_ptr build_anno_graph>(uint64_t, const std::vector &, const std::vector&, DeBruijnGraph::Mode, bool); +template std::unique_ptr build_anno_graph>(uint64_t, const std::vector &, const std::vector&, DeBruijnGraph::Mode, bool); template std::unique_ptr build_anno_graph(uint64_t, const std::vector &, const std::vector&, DeBruijnGraph::Mode, bool); template std::unique_ptr build_anno_graph(uint64_t, const std::vector &, const std::vector&, DeBruijnGraph::Mode, bool); diff --git a/metagraph/tests/graph/all/test_dbg_helpers.cpp b/metagraph/tests/graph/all/test_dbg_helpers.cpp index 82c6878024..a4d151a093 100644 --- a/metagraph/tests/graph/all/test_dbg_helpers.cpp +++ b/metagraph/tests/graph/all/test_dbg_helpers.cpp @@ -1,6 +1,10 @@ #include "test_dbg_helpers.hpp" +#include "../../annotation/test_annotated_dbg_helpers.hpp" +#include "annotation/representation/column_compressed/annotate_column_compressed.hpp" + #include "gtest/gtest.h" +#include "graph/annotated_dbg.hpp" #include "graph/representation/canonical_dbg.hpp" #include "graph/representation/succinct/boss.hpp" #include "graph/representation/succinct/boss_construct.hpp" @@ -146,6 +150,7 @@ void writeFastaFile(const std::vector& sequences, const std::string fastaFile.close(); } + template <> std::shared_ptr build_graph(uint64_t k, @@ -155,7 +160,7 @@ build_graph(uint64_t k, return std::make_shared(k, mode); // use DBGHashString to get contigs for SSHash - auto string_graph = build_graph(k, sequences, mode); + auto string_graph = build_graph(k, sequences, mode); std::vector contigs; size_t num_kmers = 0; From 0f91011c07e7dde6885c996ccd9cc167d2e26626 Mon Sep 17 00:00:00 2001 From: Harun Mustafa Date: Tue, 8 Oct 2024 11:59:00 +0200 Subject: [PATCH 02/10] simplify --- metagraph/src/graph/representation/hash/dbg_sshash.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/metagraph/src/graph/representation/hash/dbg_sshash.cpp b/metagraph/src/graph/representation/hash/dbg_sshash.cpp index ecda8b57ea..23f7266789 100644 --- a/metagraph/src/graph/representation/hash/dbg_sshash.cpp +++ b/metagraph/src/graph/representation/hash/dbg_sshash.cpp @@ -114,8 +114,7 @@ ::map_to_nodes_with_rc_advanced(std::string_view sequence, std::vector seq_encoded; seq_encoded.reserve(sequence.size()); for (size_t i = 0; i < sequence.size(); ++i) { - char enc = kmer_t::canonicalize_basepair_forward_map[static_cast(sequence[i])]; - seq_encoded.emplace_back(enc == '\0'); + seq_encoded.emplace_back(!kmer_t::is_valid(sequence[i])); } auto invalid = utils::drag_and_mark_segments(seq_encoded, 1, k_); From 5860d1521d6e2a1de986d31fe4283c392af51bd3 Mon Sep 17 00:00:00 2001 From: Harun Mustafa Date: Tue, 8 Oct 2024 15:21:59 +0200 Subject: [PATCH 03/10] Update metagraph/tests/graph/all/test_dbg_helpers.cpp Co-authored-by: Oleksandr Kulkov --- metagraph/tests/graph/all/test_dbg_helpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagraph/tests/graph/all/test_dbg_helpers.cpp b/metagraph/tests/graph/all/test_dbg_helpers.cpp index a4d151a093..da2fc3de37 100644 --- a/metagraph/tests/graph/all/test_dbg_helpers.cpp +++ b/metagraph/tests/graph/all/test_dbg_helpers.cpp @@ -159,7 +159,7 @@ build_graph(uint64_t k, if (sequences.empty()) return std::make_shared(k, mode); - // use DBGHashString to get contigs for SSHash + // use DBGHashFast to get contigs for SSHash auto string_graph = build_graph(k, sequences, mode); std::vector contigs; From 631fcfb97968b39ea6253752287f1b728226eb29 Mon Sep 17 00:00:00 2001 From: Harun Mustafa Date: Tue, 8 Oct 2024 15:53:05 +0200 Subject: [PATCH 04/10] cleanup --- metagraph/tests/graph/all/test_dbg_helpers.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/metagraph/tests/graph/all/test_dbg_helpers.cpp b/metagraph/tests/graph/all/test_dbg_helpers.cpp index da2fc3de37..3af60c0cfb 100644 --- a/metagraph/tests/graph/all/test_dbg_helpers.cpp +++ b/metagraph/tests/graph/all/test_dbg_helpers.cpp @@ -1,10 +1,6 @@ #include "test_dbg_helpers.hpp" -#include "../../annotation/test_annotated_dbg_helpers.hpp" -#include "annotation/representation/column_compressed/annotate_column_compressed.hpp" - #include "gtest/gtest.h" -#include "graph/annotated_dbg.hpp" #include "graph/representation/canonical_dbg.hpp" #include "graph/representation/succinct/boss.hpp" #include "graph/representation/succinct/boss_construct.hpp" From 6889ce00387988b69ab8f38d957df28d9c8bbcf5 Mon Sep 17 00:00:00 2001 From: Harun Mustafa Date: Tue, 8 Oct 2024 16:48:47 +0200 Subject: [PATCH 05/10] minor --- metagraph/src/graph/representation/canonical_dbg.cpp | 4 ++-- metagraph/tests/annotation/test_aligner_labeled.cpp | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/metagraph/src/graph/representation/canonical_dbg.cpp b/metagraph/src/graph/representation/canonical_dbg.cpp index a944cd958c..39b3798001 100644 --- a/metagraph/src/graph/representation/canonical_dbg.cpp +++ b/metagraph/src/graph/representation/canonical_dbg.cpp @@ -180,7 +180,7 @@ void CanonicalDBG::call_outgoing_kmers(node_index node, } if (const auto sshash = std::dynamic_pointer_cast(graph_)) { - sshash->call_outgoing_kmers_with_rc<>(node, [&](node_index next, char c, bool orientation) { + sshash->call_outgoing_kmers_with_rc(node, [&](node_index next, char c, bool orientation) { callback(orientation ? reverse_complement(next) : next, c); }); return; @@ -273,7 +273,7 @@ void CanonicalDBG::call_incoming_kmers(node_index node, } if (const auto sshash = std::dynamic_pointer_cast(graph_)) { - sshash->call_incoming_kmers_with_rc<>(node, [&](node_index prev, char c, bool orientation) { + sshash->call_incoming_kmers_with_rc(node, [&](node_index prev, char c, bool orientation) { callback(orientation ? reverse_complement(prev) : prev, c); }); return; diff --git a/metagraph/tests/annotation/test_aligner_labeled.cpp b/metagraph/tests/annotation/test_aligner_labeled.cpp index 6462cdfc73..bdeddccf7f 100644 --- a/metagraph/tests/annotation/test_aligner_labeled.cpp +++ b/metagraph/tests/annotation/test_aligner_labeled.cpp @@ -52,6 +52,7 @@ class LabeledAlignerTest : public ::testing::Test {}; typedef ::testing::Types>, std::pair>, + std::pair>, std::pair, std::pair, std::pair> FewGraphAnnotationPairTypes; From 3e44b8cb32c27e3133867d4e9138a7d620828857 Mon Sep 17 00:00:00 2001 From: Harun Mustafa Date: Tue, 8 Oct 2024 21:52:44 +0200 Subject: [PATCH 06/10] Update metagraph/src/graph/representation/hash/dbg_sshash.cpp Co-authored-by: Oleksandr Kulkov --- .../graph/representation/hash/dbg_sshash.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/metagraph/src/graph/representation/hash/dbg_sshash.cpp b/metagraph/src/graph/representation/hash/dbg_sshash.cpp index 23f7266789..e4b7501f6c 100644 --- a/metagraph/src/graph/representation/hash/dbg_sshash.cpp +++ b/metagraph/src/graph/representation/hash/dbg_sshash.cpp @@ -105,27 +105,26 @@ ::map_to_nodes_with_rc_advanced(std::string_view sequence, const std::function& callback, bool with_rc, const std::function& terminate) const { - if (terminate() || sequence.size() < k_) + size_t n = sequence.size(); + if (terminate() || n < k_) return; std::visit([&](const auto &dict) { using kmer_t = get_kmer_t; - std::vector seq_encoded; - seq_encoded.reserve(sequence.size()); - for (size_t i = 0; i < sequence.size(); ++i) { - seq_encoded.emplace_back(!kmer_t::is_valid(sequence[i])); + std::vector invalid_char(n); + for (size_t i = 0; i < n; ++i) { + invalid_char[i] = !kmer_t::is_valid(sequence[i]); } - - auto invalid = utils::drag_and_mark_segments(seq_encoded, 1, k_); + auto invalid_kmer = utils::drag_and_mark_segments(invalid_char, true, k_); kmer_t uint_kmer = sshash::util::string_to_uint_kmer(sequence.data(), k_ - 1); uint_kmer.pad_char(); - for (size_t i = k_ - 1; i < sequence.size() && !terminate(); ++i) { + for (size_t i = k_ - 1; i < n && !terminate(); ++i) { uint_kmer.drop_char(); uint_kmer.kth_char_or(k_ - 1, kmer_t::char_to_uint(sequence[i])); - callback(!invalid[i] ? dict.lookup_advanced_uint(uint_kmer, with_rc) - : sshash::lookup_result()); + callback(invalid_kmer[i] ? sshash::lookup_result() + : dict.lookup_advanced_uint(uint_kmer, with_rc)); } }, dict_); } From 6a6fd0a40a7b37acfd4dea82000c8b84570470d5 Mon Sep 17 00:00:00 2001 From: Harun Mustafa Date: Tue, 8 Oct 2024 22:07:05 +0200 Subject: [PATCH 07/10] refactor --- .../graph/representation/hash/dbg_sshash.cpp | 54 ++++++++++--------- .../graph/representation/hash/dbg_sshash.hpp | 6 --- 2 files changed, 28 insertions(+), 32 deletions(-) diff --git a/metagraph/src/graph/representation/hash/dbg_sshash.cpp b/metagraph/src/graph/representation/hash/dbg_sshash.cpp index e4b7501f6c..f9ec840671 100644 --- a/metagraph/src/graph/representation/hash/dbg_sshash.cpp +++ b/metagraph/src/graph/representation/hash/dbg_sshash.cpp @@ -100,33 +100,33 @@ void DBGSSHash::add_sequence(std::string_view sequence, throw std::logic_error("adding sequences not supported"); } -void DBGSSHash -::map_to_nodes_with_rc_advanced(std::string_view sequence, - const std::function& callback, - bool with_rc, - const std::function& terminate) const { +template +void map_to_nodes_with_rc_advanced(size_t k, + const Dict &dict, + std::string_view sequence, + const std::function& callback, + const std::function& terminate) { size_t n = sequence.size(); - if (terminate() || n < k_) + if (terminate() || n < k) return; - std::visit([&](const auto &dict) { - using kmer_t = get_kmer_t; + using kmer_t = get_kmer_t; - std::vector invalid_char(n); - for (size_t i = 0; i < n; ++i) { - invalid_char[i] = !kmer_t::is_valid(sequence[i]); - } - auto invalid_kmer = utils::drag_and_mark_segments(invalid_char, true, k_); - - kmer_t uint_kmer = sshash::util::string_to_uint_kmer(sequence.data(), k_ - 1); - uint_kmer.pad_char(); - for (size_t i = k_ - 1; i < n && !terminate(); ++i) { - uint_kmer.drop_char(); - uint_kmer.kth_char_or(k_ - 1, kmer_t::char_to_uint(sequence[i])); - callback(invalid_kmer[i] ? sshash::lookup_result() - : dict.lookup_advanced_uint(uint_kmer, with_rc)); - } - }, dict_); + std::vector seq_encoded(n); + for (size_t i = 0; i < n; ++i) { + seq_encoded[i] = !kmer_t::is_valid(sequence[i]); + } + + auto invalid_kmer = utils::drag_and_mark_segments(seq_encoded, 1, k); + + kmer_t uint_kmer = sshash::util::string_to_uint_kmer(sequence.data(), k - 1); + uint_kmer.pad_char(); + for (size_t i = k - 1; i < n && !terminate(); ++i) { + uint_kmer.drop_char(); + uint_kmer.kth_char_or(k - 1, kmer_t::char_to_uint(sequence[i])); + callback(invalid_kmer[i] ? sshash::lookup_result() + : dict.lookup_advanced_uint(uint_kmer, with_rc)); + } } template @@ -143,9 +143,11 @@ void DBGSSHash::map_to_nodes_with_rc(std::string_view sequence, return; } - map_to_nodes_with_rc_advanced(sequence, [&](sshash::lookup_result res) { - callback(sshash_to_graph_index(res.kmer_id), res.kmer_orientation); - }, with_rc, terminate); + std::visit([&](const auto &dict) { + map_to_nodes_with_rc_advanced(k_, dict, sequence, [&](sshash::lookup_result res) { + callback(sshash_to_graph_index(res.kmer_id), res.kmer_orientation); + }, terminate); + }, dict_); } template diff --git a/metagraph/src/graph/representation/hash/dbg_sshash.hpp b/metagraph/src/graph/representation/hash/dbg_sshash.hpp index 0f3da5dc5a..12c1a407bd 100644 --- a/metagraph/src/graph/representation/hash/dbg_sshash.hpp +++ b/metagraph/src/graph/representation/hash/dbg_sshash.hpp @@ -117,12 +117,6 @@ class DBGSSHash : public DeBruijnGraph { size_t num_nodes_; Mode mode_; - void map_to_nodes_with_rc_advanced( - std::string_view sequence, - const std::function& callback, - bool with_rc, - const std::function& terminate = []() { return false; }) const; - size_t dict_size() const; }; From 844e4e1d92f1b75c3f88baa80915d77471e17432 Mon Sep 17 00:00:00 2001 From: Harun Mustafa Date: Tue, 8 Oct 2024 23:01:11 +0200 Subject: [PATCH 08/10] addressed reviewer comments --- metagraph/src/common/algorithms.hpp | 3 +- .../graph/representation/hash/dbg_sshash.cpp | 31 +++++++++---------- 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/metagraph/src/common/algorithms.hpp b/metagraph/src/common/algorithms.hpp index c203883888..c3879c91b7 100644 --- a/metagraph/src/common/algorithms.hpp +++ b/metagraph/src/common/algorithms.hpp @@ -53,8 +53,7 @@ namespace utils { size_t segment_length) { std::vector mask(array.size(), false); size_t last_occurrence - = std::find(array.data(), array.data() + array.size(), label) - - array.data(); + = std::find(array.begin(), array.end(), label) - array.begin(); for (size_t i = last_occurrence; i < array.size(); ++i) { if (array[i] == label) diff --git a/metagraph/src/graph/representation/hash/dbg_sshash.cpp b/metagraph/src/graph/representation/hash/dbg_sshash.cpp index f9ec840671..6908dec087 100644 --- a/metagraph/src/graph/representation/hash/dbg_sshash.cpp +++ b/metagraph/src/graph/representation/hash/dbg_sshash.cpp @@ -101,18 +101,25 @@ void DBGSSHash::add_sequence(std::string_view sequence, } template -void map_to_nodes_with_rc_advanced(size_t k, - const Dict &dict, - std::string_view sequence, - const std::function& callback, - const std::function& terminate) { +void map_to_nodes_with_rc_impl(size_t k, + const Dict &dict, + std::string_view sequence, + const std::function& callback, + const std::function& terminate) { size_t n = sequence.size(); if (terminate() || n < k) return; + if (!dict.size()) { + for (size_t i = 0; i + k <= sequence.size() && !terminate(); ++i) { + callback(sshash::lookup_result()); + } + return; + } + using kmer_t = get_kmer_t; - std::vector seq_encoded(n); + std::vector seq_encoded(n); for (size_t i = 0; i < n; ++i) { seq_encoded[i] = !kmer_t::is_valid(sequence[i]); } @@ -133,18 +140,8 @@ template void DBGSSHash::map_to_nodes_with_rc(std::string_view sequence, const std::function& callback, const std::function& terminate) const { - if (terminate() || sequence.size() < k_) - return; - - if (!num_nodes()) { - for (size_t i = 0; i < sequence.size() - k_ + 1 && !terminate(); ++i) { - callback(npos, false); - } - return; - } - std::visit([&](const auto &dict) { - map_to_nodes_with_rc_advanced(k_, dict, sequence, [&](sshash::lookup_result res) { + map_to_nodes_with_rc_impl(k_, dict, sequence, [&](sshash::lookup_result res) { callback(sshash_to_graph_index(res.kmer_id), res.kmer_orientation); }, terminate); }, dict_); From fca3569f3580db37f625f22196a984fb111a4054 Mon Sep 17 00:00:00 2001 From: Harun Mustafa Date: Tue, 8 Oct 2024 23:06:58 +0200 Subject: [PATCH 09/10] auto-deduce dict type --- metagraph/src/graph/representation/hash/dbg_sshash.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagraph/src/graph/representation/hash/dbg_sshash.cpp b/metagraph/src/graph/representation/hash/dbg_sshash.cpp index 6908dec087..50c3a0e3da 100644 --- a/metagraph/src/graph/representation/hash/dbg_sshash.cpp +++ b/metagraph/src/graph/representation/hash/dbg_sshash.cpp @@ -100,7 +100,7 @@ void DBGSSHash::add_sequence(std::string_view sequence, throw std::logic_error("adding sequences not supported"); } -template +template void map_to_nodes_with_rc_impl(size_t k, const Dict &dict, std::string_view sequence, @@ -141,7 +141,7 @@ void DBGSSHash::map_to_nodes_with_rc(std::string_view sequence, const std::function& callback, const std::function& terminate) const { std::visit([&](const auto &dict) { - map_to_nodes_with_rc_impl(k_, dict, sequence, [&](sshash::lookup_result res) { + map_to_nodes_with_rc_impl(k_, dict, sequence, [&](sshash::lookup_result res) { callback(sshash_to_graph_index(res.kmer_id), res.kmer_orientation); }, terminate); }, dict_); From cd03ed267b1a058a37ee3c85b76048ffec705448 Mon Sep 17 00:00:00 2001 From: Oleksandr Kulkov Date: Tue, 8 Oct 2024 23:08:02 +0200 Subject: [PATCH 10/10] Update metagraph/src/graph/representation/hash/dbg_sshash.cpp --- metagraph/src/graph/representation/hash/dbg_sshash.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metagraph/src/graph/representation/hash/dbg_sshash.cpp b/metagraph/src/graph/representation/hash/dbg_sshash.cpp index 50c3a0e3da..07120a28e2 100644 --- a/metagraph/src/graph/representation/hash/dbg_sshash.cpp +++ b/metagraph/src/graph/representation/hash/dbg_sshash.cpp @@ -119,12 +119,12 @@ void map_to_nodes_with_rc_impl(size_t k, using kmer_t = get_kmer_t; - std::vector seq_encoded(n); + std::vector invalid_char(n); for (size_t i = 0; i < n; ++i) { - seq_encoded[i] = !kmer_t::is_valid(sequence[i]); + invalid_char[i] = !kmer_t::is_valid(sequence[i]); } - auto invalid_kmer = utils::drag_and_mark_segments(seq_encoded, 1, k); + auto invalid_kmer = utils::drag_and_mark_segments(invalid_char, true, k); kmer_t uint_kmer = sshash::util::string_to_uint_kmer(sequence.data(), k - 1); uint_kmer.pad_char();