Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
84 commits
Select commit Hold shift + click to select a range
f2e15e4
Add row_diff_traverse, row_diff_successor
adamant-pwn May 23, 2024
f3fb0b0
Update, propagate to DBGSuccinct
adamant-pwn Jul 2, 2024
d7878bf
Use graph::DeBruijnGraph in build_pred_succ and assign_anchors
adamant-pwn Oct 7, 2024
3da66a9
Update download-artifact
adamant-pwn Oct 8, 2024
3a16332
Apply suggestions from code review
adamant-pwn Oct 8, 2024
76b29b7
Merge master to rowdiff (#504)
adamant-pwn Oct 8, 2024
3a9f1c6
Try simplifying build_pred_succ, temporarily rollback to graph.get_la…
adamant-pwn Oct 8, 2024
b18d463
Special handling of last.size()
adamant-pwn Oct 8, 2024
629cb09
Use last instead of rd_succ if it's empty
adamant-pwn Oct 9, 2024
71c19a1
Add checks to fix integration tests
adamant-pwn Oct 9, 2024
90abc08
Use BOSS index space in DBGSuccinct
adamant-pwn Oct 9, 2024
f019a05
override final for call_nodes + add select/rank node + some fixes
adamant-pwn Oct 9, 2024
358e445
num_nodes() -> max_index() for dbg_succ_
adamant-pwn Oct 9, 2024
1068116
Add is_valid checks to nodes in DBGSuccinct
adamant-pwn Oct 9, 2024
42707ff
Fix DBGSuccinct tests
adamant-pwn Oct 9, 2024
a49f07f
Fix AnnotatedDBG test group
adamant-pwn Oct 9, 2024
68dd5f5
Fix RowDiff tests
adamant-pwn Oct 10, 2024
ee0c4a8
Return npos in certain callbacks for dummy nodes
adamant-pwn Oct 10, 2024
a46f6a4
Validate edges on BOSS edge -> DBG node transition
adamant-pwn Oct 10, 2024
60851da
More validate_edge checks
adamant-pwn Oct 10, 2024
91ea56f
Use is_valid in adjacent_outgoing_rc_strand
adamant-pwn Oct 10, 2024
1976be1
Fix identation + annotations without succ
adamant-pwn Oct 10, 2024
4e971da
Fix RowDiff test
adamant-pwn Oct 10, 2024
7737e26
Move get_last, row_diff_traverse, row_diff_successor into row_diff_bu…
adamant-pwn Oct 10, 2024
04740f1
Preserve lifetime of get_last
adamant-pwn Oct 10, 2024
e2347d6
Fix integration tests
adamant-pwn Oct 15, 2024
94e9f90
Fix integration tests + return dict in _get_stats
adamant-pwn Oct 15, 2024
3bc714d
Apply review suggestions
adamant-pwn Nov 4, 2024
3a87651
Use valid_edges_->call_ones
adamant-pwn Nov 4, 2024
9ed4bfd
Use RowDiff with DBGHashFast
adamant-pwn Oct 15, 2024
275149e
Fix unit test and compilation errors
adamant-pwn Oct 16, 2024
40cb43e
vla-cxx-extension -> vla-extension (older versions don't support it)
adamant-pwn Oct 16, 2024
0767479
Add GDB debugging to integration test + fix
adamant-pwn Oct 16, 2024
6c89455
Update
adamant-pwn Oct 16, 2024
fac4222
Update
adamant-pwn Oct 16, 2024
29ded0f
Throw on invalid final state
adamant-pwn Oct 16, 2024
aa8b9fb
Distinguish dummy nodes and nodes with zero outdegree
adamant-pwn Oct 16, 2024
2d1379e
Fixes
adamant-pwn Oct 16, 2024
2bfaa47
Apply review suggestions
adamant-pwn Nov 4, 2024
b9f4da0
Merge remote-tracking branch 'origin/master' into rowdiff-2
hmusta Dec 3, 2024
c808190
Multithreaded get_last
hmusta Dec 4, 2024
c13196f
Update row_diff_builder.cpp
hmusta Dec 4, 2024
e706240
Update row_diff_builder.cpp
hmusta Dec 4, 2024
7a2f680
parallel row_diff_traverse
hmusta Feb 7, 2025
a0828ef
fix
hmusta Feb 7, 2025
d43903f
fix
hmusta Feb 7, 2025
9751cbb
minor
hmusta Feb 7, 2025
2c887e7
Merge remote-tracking branch 'origin/master' into rowdiff-2
hmusta Feb 7, 2025
cca1c69
Merge remote-tracking branch 'origin/master' into rowdiff
hmusta Feb 7, 2025
4f94d96
Merge branch 'rowdiff' into rowdiff-2
hmusta Feb 7, 2025
86a8667
test
hmusta Feb 10, 2025
66cf80c
multi-threaded call_nodes
hmusta Feb 10, 2025
9283562
use parallel call_nodes
hmusta Feb 10, 2025
23bbb34
cleanup
hmusta Feb 10, 2025
8b8d26b
disable automatic valid_edges_ generation
hmusta Feb 10, 2025
de88200
avoid node dropout and unnecessary atomic checks
hmusta Feb 10, 2025
89f725d
progress bar
hmusta Feb 10, 2025
9b5c019
multithreaded dbg succinct call_nodes
hmusta Feb 10, 2025
85750c3
fix integration test
hmusta Feb 10, 2025
32849af
fix
hmusta Feb 10, 2025
798d0d2
multithreaded call_nodes on masked graphs
hmusta Feb 10, 2025
9008f56
compile on mac
hmusta Feb 10, 2025
58e28d5
disable old DBGSuccinct check when loading rowdiff
hmusta Feb 11, 2025
6807a50
Merge remote-tracking branch 'origin/master' into rowdiff
hmusta Mar 4, 2025
2b2518b
Merge remote-tracking branch 'origin/rowdiff' into rowdiff-2
hmusta Mar 4, 2025
99f726f
Merge remote-tracking branch 'origin/master' into rowdiff
hmusta Mar 5, 2025
21864c4
Merge branch 'rowdiff' into rowdiff-2
hmusta Mar 5, 2025
1e128a5
Merge remote-tracking branch 'origin/master' into rowdiff
hmusta Mar 6, 2025
8cb9227
Merge branch 'rowdiff' into rowdiff-2
hmusta Mar 6, 2025
e57c4a5
fix DBGSuccinct::call_nodes and RowDiff::get_columns
hmusta Mar 10, 2025
ee204b1
fix bug where some dummy k-mers were marked as RowDiff termini
hmusta Mar 13, 2025
fc1cf3c
port row_diff_traverse from DBGSuccinct
hmusta Mar 13, 2025
bd994d5
DeBruijnGraph::in_graph
hmusta Mar 14, 2025
3cbfe34
Merge branch 'rowdiff' into rowdiff-2
hmusta Mar 14, 2025
a0112be
revert
hmusta Mar 14, 2025
f522249
extra check
hmusta Mar 14, 2025
7f2d4bc
don't set dummy k-mers as anchors
hmusta Mar 14, 2025
b0e88fe
this works
hmusta Mar 14, 2025
c98716a
minor
hmusta Mar 14, 2025
c0730e0
Merge remote-tracking branch 'origin/rowdiff' into rowdiff-2
hmusta Mar 14, 2025
6ecef56
fixes
hmusta Mar 14, 2025
8271b6f
Merge branch 'rowdiff-2' into rowdiff-3
hmusta Mar 16, 2025
52948cd
optimize forks for non-succinct graphs
hmusta Mar 17, 2025
c955ccd
Merge branch 'rowdiff-2' into rowdiff-3
hmusta Mar 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ on:
tags:
- 'v*'
pull_request:
branches:
- master

env:
REGISTRY: ghcr.io
Expand Down
1 change: 1 addition & 0 deletions metagraph/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
add_compile_options(
-Wno-exit-time-destructors
-Wno-deprecated-declarations
-Wno-vla-extension
)

if (NOT CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")
Expand Down
22 changes: 19 additions & 3 deletions metagraph/integration_tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@

script_path = os.path.dirname(os.path.realpath(__file__))

METAGRAPH = f'{os.getcwd()}/metagraph'
METAGRAPH_EXE = f'{os.getcwd()}/metagraph'
DNA_MODE = os.readlink(METAGRAPH_EXE).endswith("_DNA")
PROTEIN_MODE = os.readlink(METAGRAPH_EXE).endswith("_Protein")
METAGRAPH = METAGRAPH_EXE

def update_prefix(PREFIX):
global METAGRAPH
METAGRAPH = PREFIX + METAGRAPH_EXE

TEST_DATA_DIR = os.path.join(script_path, '..', 'tests', 'data')

Expand Down Expand Up @@ -37,10 +44,19 @@ def setUpClass(cls):
def _get_stats(graph_path):
stats_command = METAGRAPH + ' stats ' + graph_path + ' --mmap'
res = subprocess.run(stats_command.split(), stdout=PIPE, stderr=PIPE)
assert(res.returncode == 0)
if res.returncode != 0:
raise AssertionError(f"Command '{stats_command}' failed with return code {res.returncode} and error: {res.stderr.decode()}")
stats_command = METAGRAPH + ' stats ' + graph_path + MMAP_FLAG
res = subprocess.run(stats_command.split(), stdout=PIPE, stderr=PIPE)
return res
parsed = dict()
parsed['returncode'] = res.returncode
res = res.stdout.decode().split('\n')[2:]
for line in res:
if ': ' in line:
x, y = map(str.strip, line.split(':', 1))
assert(x not in parsed or parsed[x] == y)
parsed[x] = y
return parsed

@staticmethod
def _build_graph(input, output, k, repr, mode='basic', extra_params=''):
Expand Down
5 changes: 5 additions & 0 deletions metagraph/integration_tests/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import sys
import argparse
from helpers import TimeLoggingTestResult
from base import update_prefix


"""Run all integration tests"""
Expand Down Expand Up @@ -32,7 +33,11 @@ def create_test_suite(filter_pattern="*"):
parser = argparse.ArgumentParser(description='Metagraph integration tests.')
parser.add_argument('--test_filter', dest='filter', type=str, default="*",
help='filter test cases (default: run all)')
parser.add_argument('--gdb', dest='use_gdb', action='store_true',
help='run metagraph with gdb')
args = parser.parse_args()
if args.use_gdb:
update_prefix('gdb -ex run -ex bt -ex quit --args ')

result = unittest.TextTestRunner(
resultclass=TimeLoggingTestResult
Expand Down
119 changes: 53 additions & 66 deletions metagraph/integration_tests/test_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,11 @@
import glob
import os

from base import TestingBase, METAGRAPH, TEST_DATA_DIR, NUM_THREADS
from base import PROTEIN_MODE, DNA_MODE, TestingBase, METAGRAPH, TEST_DATA_DIR, NUM_THREADS


"""Test graph construction and alignment"""

DNA_MODE = os.readlink(METAGRAPH).endswith("_DNA")
PROTEIN_MODE = os.readlink(METAGRAPH).endswith("_Protein")

graph_file_extension = {'succinct': '.dbg',
'bitmap': '.bitmapdbg',
'hash': '.orhashdbg',
Expand All @@ -35,11 +32,10 @@ def test_simple_align_all_graphs(self, representation):
k=11, repr=representation,
extra_params="--mask-dummy")

res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 16438', params_str[1])
self.assertEqual('mode: basic', params_str[2])
params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
self.assertEqual('11', params['k'])
self.assertEqual('16438', params['nodes (k)'])
self.assertEqual('basic', params['mode'])

stats_command = '{exe} align --align-only-forwards -i {graph} --align-min-exact-match 0.0 {reads}'.format(
exe=METAGRAPH,
Expand Down Expand Up @@ -68,11 +64,10 @@ def test_simple_align_map_all_graphs(self, representation):
k=11, repr=representation,
extra_params="--mask-dummy")

res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 16438', params_str[1])
self.assertEqual('mode: basic', params_str[2])
params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
self.assertEqual('11', params['k'])
self.assertEqual('16438', params['nodes (k)'])
self.assertEqual('basic', params['mode'])

stats_command = '{exe} align -i {graph} --map --count-kmers {reads}'.format(
exe=METAGRAPH,
Expand All @@ -99,11 +94,10 @@ def test_simple_align_map_all_graphs_subk(self, representation):
k=11, repr=representation,
extra_params="--mask-dummy")

res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 16438', params_str[1])
self.assertEqual('mode: basic', params_str[2])
params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
self.assertEqual('11', params['k'])
self.assertEqual('16438', params['nodes (k)'])
self.assertEqual('basic', params['mode'])

stats_command = '{exe} align -i {graph} --map --count-kmers --align-length 10 {reads}'.format(
exe=METAGRAPH,
Expand Down Expand Up @@ -134,11 +128,10 @@ def test_simple_align_map_canonical_all_graphs(self, representation):
k=11, repr=representation, mode='canonical',
extra_params="--mask-dummy")

res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 32782', params_str[1])
self.assertEqual('mode: canonical', params_str[2])
params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
self.assertEqual('11', params['k'])
self.assertEqual('32782', params['nodes (k)'])
self.assertEqual('canonical', params['mode'])

stats_command = '{exe} align -i {graph} --map --count-kmers {reads}'.format(
exe=METAGRAPH,
Expand All @@ -165,11 +158,10 @@ def test_simple_align_json_all_graphs(self, representation):
k=11, repr=representation,
extra_params="--mask-dummy")

res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 16438', params_str[1])
self.assertEqual('mode: basic', params_str[2])
params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
self.assertEqual('11', params['k'])
self.assertEqual('16438', params['nodes (k)'])
self.assertEqual('basic', params['mode'])

stats_command = '{exe} align --align-only-forwards -i {graph} --align-min-exact-match 0.0 {reads}'.format(
exe=METAGRAPH,
Expand All @@ -189,11 +181,10 @@ def test_simple_align_fwd_rev_comp_all_graphs(self, representation):
k=11, repr=representation,
extra_params="--mask-dummy")

res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 16438', params_str[1])
self.assertEqual('mode: basic', params_str[2])
params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
self.assertEqual('11', params['k'])
self.assertEqual('16438', params['nodes (k)'])
self.assertEqual('basic', params['mode'])

stats_command = '{exe} align -i {graph} --align-min-exact-match 0.0 {reads}'.format(
exe=METAGRAPH,
Expand Down Expand Up @@ -222,11 +213,10 @@ def test_simple_align_canonical_all_graphs(self, representation):
k=11, repr=representation, mode='canonical',
extra_params="--mask-dummy")

res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 32782', params_str[1])
self.assertEqual('mode: canonical', params_str[2])
params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
self.assertEqual('11', params['k'])
self.assertEqual('32782', params['nodes (k)'])
self.assertEqual('canonical', params['mode'])

stats_command = '{exe} align -i {graph} --align-min-exact-match 0.0 {reads}'.format(
exe=METAGRAPH,
Expand Down Expand Up @@ -256,11 +246,10 @@ def test_simple_align_canonical_subk_succinct(self, representation):
k=11, repr=representation, mode='canonical',
extra_params="--mask-dummy")

res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 32782', params_str[1])
self.assertEqual('mode: canonical', params_str[2])
params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
self.assertEqual('11', params['k'])
self.assertEqual('32782', params['nodes (k)'])
self.assertEqual('canonical', params['mode'])

stats_command = '{exe} align -i {graph} --align-min-exact-match 0.0 --align-min-seed-length 10 {reads}'.format(
exe=METAGRAPH,
Expand All @@ -286,11 +275,10 @@ def test_simple_align_primary_all_graphs(self, representation):
k=11, repr=representation, mode='primary',
extra_params="--mask-dummy")

res = self._get_stats(self.tempdir.name + '/genome.MT.primary' + graph_file_extension[representation])
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 16391', params_str[1])
self.assertEqual('mode: primary', params_str[2])
params = self._get_stats(self.tempdir.name + '/genome.MT.primary' + graph_file_extension[representation])
self.assertEqual('11', params['k'])
self.assertEqual('16391', params['nodes (k)'])
self.assertEqual('primary', params['mode'])

stats_command = '{exe} align -i {graph} --align-min-exact-match 0.0 {reads}'.format(
exe=METAGRAPH,
Expand Down Expand Up @@ -320,11 +308,10 @@ def test_simple_align_primary_subk_succinct(self, representation):
k=11, repr=representation, mode='primary',
extra_params="--mask-dummy")

res = self._get_stats(self.tempdir.name + '/genome.MT.primary' + graph_file_extension[representation])
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 16391', params_str[1])
self.assertEqual('mode: primary', params_str[2])
params = self._get_stats(self.tempdir.name + '/genome.MT.primary' + graph_file_extension[representation])
self.assertEqual('11', params['k'])
self.assertEqual('16391', params['nodes (k)'])
self.assertEqual('primary', params['mode'])

stats_command = '{exe} align -i {graph} --align-min-exact-match 0.0 --align-min-seed-length 10 {reads}'.format(
exe=METAGRAPH,
Expand All @@ -347,13 +334,13 @@ def test_simple_align_fwd_rev_comp_json_all_graphs(self, representation):

self._build_graph(input=TEST_DATA_DIR + '/genome.MT.fa',
output=self.tempdir.name + '/genome.MT',
k=11, repr=representation)
k=11, repr=representation,
extra_params="--mask-dummy")

res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 16461', params_str[1])
self.assertEqual('mode: basic', params_str[2])
params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
self.assertEqual('11', params['k'])
self.assertEqual('16438', params['nodes (k)'])
self.assertEqual('basic', params['mode'])

stats_command = '{exe} align --json -i {graph} --align-min-exact-match 0.0 {reads}'.format(
exe=METAGRAPH,
Expand All @@ -373,13 +360,13 @@ def test_simple_align_edit_distance_all_graphs(self, representation):

self._build_graph(input=TEST_DATA_DIR + '/genome.MT.fa',
output=self.tempdir.name + '/genome.MT',
k=11, repr=representation)
k=11, repr=representation,
extra_params="--mask-dummy")

res = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
params_str = res.stdout.decode().split('\n')[2:]
self.assertEqual('k: 11', params_str[0])
self.assertEqual('nodes (k): 16461', params_str[1])
self.assertEqual('mode: basic', params_str[2])
params = self._get_stats(self.tempdir.name + '/genome.MT' + graph_file_extension[representation])
self.assertEqual('11', params['k'])
self.assertEqual('16438', params['nodes (k)'])
self.assertEqual('basic', params['mode'])

stats_command = '{exe} align --json --align-edit-distance -i {graph} --align-min-exact-match 0.0 {reads}'.format(
exe=METAGRAPH,
Expand Down
Loading
Loading