Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
422 commits
Select commit Hold shift + click to select a range
48ec951
ttt
hmusta Jun 4, 2024
151e1ec
implement no test
hmusta Jun 4, 2024
4f0689f
properly handle exceptions
hmusta Jun 5, 2024
518554f
count scaling
hmusta Jun 5, 2024
92c1972
fixes
hmusta Jun 5, 2024
be0da98
fixes
hmusta Jun 5, 2024
4ebe05c
fisher exact
hmusta Jun 5, 2024
0fc53da
fixes
hmusta Jun 5, 2024
65e4b00
cleanup
hmusta Jun 5, 2024
03e7057
brunner-munzel
hmusta Jun 6, 2024
3b1bd19
proper min p-val calculation for poisson_likelihoodratio
hmusta Jun 6, 2024
e0c844d
stream pvalues vector
hmusta Jun 6, 2024
941ad64
update p estimate for nbinom_exact
hmusta Jun 7, 2024
41fe6bb
skip tests, don't filter out counts
hmusta Jun 7, 2024
7d44395
fix corner cases
hmusta Jun 7, 2024
0ce82e3
only compute pval map when falling back
hmusta Jun 7, 2024
4345dce
don't filter out outliers
hmusta Jun 7, 2024
7888d39
find target_p via MLE
hmusta Jun 9, 2024
6005066
fixes
hmusta Jun 10, 2024
837b1d8
test
hmusta Jun 10, 2024
cf35ca1
many fixes
hmusta Jun 10, 2024
e18c615
cleanup
hmusta Jun 11, 2024
1a932a6
cleanup
hmusta Jun 11, 2024
a57f3c0
more cleanup
hmusta Jun 11, 2024
d94e342
more cleanup
hmusta Jun 11, 2024
1d62939
cleanup
hmusta Jun 11, 2024
c76dbb8
more cleanup
hmusta Jun 11, 2024
85f3774
more cleanup
hmusta Jun 11, 2024
72d2926
minor
hmusta Jun 11, 2024
54141be
unused variable
hmusta Jun 11, 2024
700adb1
cleanup
hmusta Jun 11, 2024
6f0e8c2
cleanup
hmusta Jun 11, 2024
c401264
remove CMH test
hmusta Jun 11, 2024
f122ab3
fix
hmusta Jun 11, 2024
1296ada
fallbacks
hmusta Jun 11, 2024
443c059
simplification
hmusta Jun 11, 2024
ed9a5d1
cleanup
hmusta Jun 11, 2024
96a75bd
fix
hmusta Jun 11, 2024
aba7f15
cleanup
hmusta Jun 11, 2024
a467a80
cleanup
hmusta Jun 11, 2024
39520f2
mark unitig starts
hmusta Jun 13, 2024
6833ea6
cleanup
hmusta Jun 13, 2024
c4a6379
bug fix
hmusta Jun 13, 2024
5f69566
fixes
hmusta Jun 13, 2024
a5c9557
get rid of BY multiple testing correction
hmusta Jun 13, 2024
1d21670
cleanup
hmusta Jun 13, 2024
716760d
fix
hmusta Jun 14, 2024
faddd79
pick extreme based on deviance
hmusta Jun 17, 2024
34563cd
fix fits
hmusta Jun 18, 2024
a3ab60c
minor
hmusta Jun 18, 2024
e5fea40
extra checks
hmusta Jun 18, 2024
629c8e1
cleanup
hmusta Jun 18, 2024
959994f
fix
hmusta Jun 19, 2024
26bb423
fix check filtering
hmusta Jun 20, 2024
6bd53f7
cleanup
hmusta Jun 21, 2024
f265a0f
cleanup2
hmusta Jun 21, 2024
3433fe0
refactor
hmusta Jun 21, 2024
73a0826
add code for general annotators
hmusta Jun 21, 2024
35828b1
test
hmusta Jun 24, 2024
54ea4d0
Revert "test"
hmusta Jun 25, 2024
50664e4
simplier unitig-based test. code cleanup
hmusta Jun 25, 2024
20c3d51
cleanup
hmusta Jun 25, 2024
ea87fb8
more parallel
hmusta Jun 25, 2024
f615a61
parallel
hmusta Jun 26, 2024
d87acdf
int row flat
hmusta Jul 2, 2024
4b915ae
fixes
hmusta Jul 17, 2024
e96a7c3
fixes
hmusta Jul 17, 2024
4fcf1f6
streaming
hmusta Jul 17, 2024
7677bb7
make dumping pvals optional
hmusta Jul 17, 2024
d546977
cleanup
hmusta Jul 17, 2024
51341c1
refactor
hmusta Jul 17, 2024
46859ed
test
hmusta Jul 18, 2024
ca3bb57
scaling
hmusta Jul 19, 2024
fe200e3
much more parallel
hmusta Jul 20, 2024
5ea1a43
t
hmusta Jul 20, 2024
0a1040e
fix
hmusta Jul 20, 2024
1538a68
fixes
hmusta Jul 20, 2024
0cc6768
start
hmusta Jul 21, 2024
36301ef
efficient
hmusta Jul 21, 2024
1ed1d5f
ttt
hmusta Jul 22, 2024
99084cf
check
hmusta Jul 22, 2024
8c34845
push back
hmusta Jul 22, 2024
ef79109
rearrange
hmusta Jul 22, 2024
9397bbb
minor
hmusta Jul 23, 2024
fd1c8f8
test
hmusta Jul 23, 2024
488503c
refactor
hmusta Jul 23, 2024
447639a
remove std::visit
hmusta Jul 24, 2024
00d615d
test
hmusta Jul 24, 2024
d98d613
test
hmusta Jul 24, 2024
91976e9
fixes
hmusta Jul 25, 2024
d826a26
better parallel
hmusta Jul 26, 2024
b654368
optimizations
hmusta Jul 26, 2024
7da05f6
optimize
hmusta Jul 26, 2024
1ea18ab
deviance lookup table
hmusta Jul 27, 2024
d715d52
optim
hmusta Jul 27, 2024
9114af5
minor
hmusta Jul 27, 2024
0b918bb
simplify logic
hmusta Jul 27, 2024
b22f2a6
don't compute p-values if p-min is too high
hmusta Jul 27, 2024
dba15c3
test
hmusta Jul 27, 2024
c195a1f
initial two-pass implementation
hmusta Jul 27, 2024
b814d8c
minor
hmusta Jul 27, 2024
c6e4ee7
fixes
hmusta Jul 27, 2024
8f6ec90
fixes
hmusta Jul 31, 2024
51ad45c
fixed unitig-based
hmusta Jul 31, 2024
81f808c
fixes
hmusta Aug 9, 2024
48a97db
cleanup
hmusta Aug 9, 2024
1250055
first impl of zinb
hmusta Aug 13, 2024
39665dd
zinb
hmusta Aug 14, 2024
9bfb6e7
use em algo
hmusta Aug 15, 2024
cbcc6f1
reduce number of tests
hmusta Aug 15, 2024
3e336fc
cleanup
hmusta Aug 15, 2024
3d4439a
first try of fixes
hmusta Aug 15, 2024
f5587fc
backup
hmusta Aug 16, 2024
a935f88
ttt
hmusta Aug 18, 2024
b612853
this works better: use log-normal
hmusta Aug 18, 2024
fe639b6
lognormal test
hmusta Aug 19, 2024
7cccc9f
tt
hmusta Aug 19, 2024
3c5e1e8
this works
hmusta Aug 19, 2024
bd8b2aa
this works
hmusta Aug 19, 2024
bc82d37
optim
hmusta Aug 19, 2024
f392dd1
this works better
hmusta Aug 19, 2024
72fbe97
try
hmusta Aug 20, 2024
34aefd6
revert to MoM estimators
hmusta Aug 20, 2024
2d309b5
fixes
hmusta Aug 20, 2024
b08fb16
course-grained adding
hmusta Aug 20, 2024
c39f87b
fix sign error
hmusta Aug 20, 2024
860d995
em algo
hmusta Aug 20, 2024
aaad9bc
fix posterior
hmusta Aug 21, 2024
3d69754
approx mle
hmusta Aug 21, 2024
123d615
check
hmusta Aug 22, 2024
3b14b25
finds better maxima
hmusta Aug 22, 2024
748bc04
t
hmusta Aug 22, 2024
5a2974d
fix
hmusta Aug 22, 2024
1dd70e9
this works best
hmusta Aug 22, 2024
a56f19c
cleanup
hmusta Aug 22, 2024
bc35e21
precompute r values
hmusta Aug 23, 2024
bd44e66
precompute
hmusta Aug 23, 2024
dad2cfd
optim
hmusta Aug 23, 2024
a1b0397
better fits
hmusta Aug 25, 2024
494194a
refactor
hmusta Aug 25, 2024
72244fa
per-k-mer prob
hmusta Aug 25, 2024
f4b8e5b
refactor
hmusta Aug 25, 2024
5587591
cleanup
hmusta Aug 25, 2024
07f0747
fix per-unitig tests for gnb
hmusta Aug 25, 2024
5856e77
less merging
hmusta Aug 25, 2024
93c2a87
simplify
hmusta Aug 25, 2024
b3751b7
cleanup
hmusta Aug 25, 2024
4f2ac90
fix bug in computing deviance
hmusta Aug 25, 2024
96d0649
cleanup
hmusta Aug 25, 2024
9f979f6
optim
hmusta Aug 25, 2024
cd9b456
cleanup
hmusta Aug 25, 2024
4d7c7b0
cleanup
hmusta Aug 25, 2024
258a426
optim
hmusta Aug 26, 2024
b45a963
minor
hmusta Aug 26, 2024
98db9a9
early exit
hmusta Aug 26, 2024
cc2f7a0
refactor
hmusta Aug 26, 2024
143a699
cleanup
hmusta Aug 26, 2024
3c8bf92
fixed deviance calculation
hmusta Aug 26, 2024
8cde9dd
refactor
hmusta Aug 26, 2024
81383b7
fix
hmusta Aug 26, 2024
33fb421
t
hmusta Aug 26, 2024
b576588
cleanup
hmusta Aug 26, 2024
89f361c
cleanup
hmusta Aug 26, 2024
84315f4
optim
hmusta Aug 27, 2024
106fc6e
optim
hmusta Aug 28, 2024
221e6a7
optim
hmusta Aug 28, 2024
a0337c3
cleanup
hmusta Aug 28, 2024
b0fc9e9
fix cdf
hmusta Aug 28, 2024
e0f116d
cleanup
hmusta Aug 28, 2024
566cef8
fewer ops
hmusta Aug 28, 2024
a70cae5
optim: use log2 and exp2
hmusta Aug 28, 2024
4c2ceb4
more optim
hmusta Aug 28, 2024
d2bf95b
added test for no counts
hmusta Sep 14, 2024
afec3d0
minor
hmusta Sep 24, 2024
82e003c
Merge remote-tracking branch 'origin/master' into count_diff_assem
hmusta Sep 24, 2024
111d895
fix histogram calculation for column-based annotation
hmusta Sep 25, 2024
33116d5
batch access binary annotations
hmusta Sep 25, 2024
f2aa1f6
minor
hmusta Sep 25, 2024
bfdd6a7
fix segfault
hmusta Sep 25, 2024
2d50666
fix off-by-one in poisson_binom test
hmusta Sep 25, 2024
31c8916
extend
hmusta Sep 25, 2024
70b1c2a
cleanup
hmusta Sep 25, 2024
9d5bd6b
cleanup
hmusta Sep 25, 2024
05ef139
fix assertion fail
hmusta Sep 26, 2024
a9935d5
skip sentinel in protein sequences
hmusta Sep 26, 2024
ce65ac3
fix kmer unit test
hmusta Sep 26, 2024
e27d44c
revert redundant changes
hmusta Sep 26, 2024
4be7fbf
don't encode punctuation
hmusta Sep 26, 2024
15c6c32
Revert "don't encode punctuation"
hmusta Sep 26, 2024
756cfd5
Revert "fix kmer unit test"
hmusta Sep 26, 2024
4939bf7
Revert "skip sentinel in protein sequences"
hmusta Sep 26, 2024
648a77a
fix unit test
hmusta Sep 26, 2024
d5a82a6
cleanup
hmusta Sep 26, 2024
757fcdb
fix compilation issues
hmusta Sep 26, 2024
da75095
fix loading from columns without counts
hmusta Sep 27, 2024
c34cbcd
support independent labels and labels in both sets
hmusta Sep 27, 2024
22d472b
minor
hmusta Sep 27, 2024
4989e70
fixes
hmusta Nov 25, 2024
46f88b7
minor
hmusta Dec 4, 2024
0ec5077
bug fixes
hmusta Dec 4, 2024
8d0376a
no saving rows
hmusta Jan 15, 2025
4ae4b5a
test
hmusta Jan 15, 2025
10921c7
cleanup and lnb
hmusta Feb 14, 2025
59c773d
fix nbinom fit
hmusta Feb 17, 2025
38029eb
rewrite
hmusta Feb 20, 2025
110932c
test
hmusta Feb 20, 2025
66fc606
cleanup
hmusta Feb 20, 2025
baa3ffd
added back unitigs
hmusta Feb 20, 2025
4d87c98
fixes
hmusta Feb 20, 2025
6bd351a
test
hmusta Feb 24, 2025
47b05ed
optim
hmusta Feb 25, 2025
e437355
test
hmusta Feb 25, 2025
ed1b866
test
hmusta Mar 10, 2025
29d6fa2
ttt
hmusta Mar 11, 2025
2343d83
bm
hmusta Mar 12, 2025
fe751b5
tt
hmusta Mar 12, 2025
ea8e76c
fixes
hmusta Mar 13, 2025
52dc145
unitig fixes
hmusta Mar 13, 2025
b1abc44
minor
hmusta Mar 19, 2025
4b4e60f
unitig
hmusta Mar 20, 2025
4460ff9
Merge remote-tracking branch 'origin/master' into count_diff_assem
hmusta Mar 21, 2025
2d4eb47
Merge remote-tracking branch 'origin/master' into count_diff_assem
hmusta Mar 21, 2025
2cdb25a
switch to new eigen gitlab repo
hmusta Mar 21, 2025
3b35599
unitig test
hmusta Mar 25, 2025
a163692
this works
hmusta Mar 25, 2025
7db5855
cleanup
hmusta Mar 25, 2025
678db49
more cleanup
hmusta Mar 25, 2025
a027455
more cleanup
hmusta Mar 25, 2025
61e3687
lots of cleanup
hmusta Mar 25, 2025
9fd1544
fix tests
hmusta Mar 26, 2025
cfcf098
minor
hmusta Mar 27, 2025
be4dc72
fix
hmusta Mar 28, 2025
95de5d3
test
hmusta Mar 31, 2025
dc518bf
update
hmusta Apr 2, 2025
b7d061a
updated chi2 pval calculator
hmusta Apr 11, 2025
94cad1c
fix cutoff
hmusta Apr 11, 2025
6781f59
bh
hmusta Apr 11, 2025
c1c6e61
fall back to LRT
hmusta Apr 11, 2025
4846ab4
test
hmusta Apr 14, 2025
b066ec9
effect sie
hmusta Apr 16, 2025
4b9042d
wald test nb
hmusta May 2, 2025
ec20f8a
fix eff size
hmusta May 6, 2025
0d311f1
Merge remote-tracking branch 'origin/master' into count_diff_assem
hmusta May 7, 2025
f0321d7
better nbinom optim and scale values before wald test
hmusta May 8, 2025
a4cfa1e
minor fix for notest
hmusta May 8, 2025
8ed7c49
switch to Holm
hmusta May 9, 2025
71401a4
Revert "switch to Holm"
hmusta May 9, 2025
8704ee8
simplify
hmusta May 9, 2025
db81e72
fit nb distribution first, then scale
hmusta May 9, 2025
8ec120b
changes
hmusta May 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion metagraph/.clang-format → .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ ColumnLimit: 90
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 8
ContinuationIndentWidth: 4
Cpp11BracedListStyle: false
DerivePointerAlignment: false
DisableFormat: false
Expand All @@ -51,11 +51,13 @@ ForEachMacros:
- foreach
- Q_FOREACH
- BOOST_FOREACH
IncludeBlocks: Regroup
IndentCaseLabels: true
IndentPPDirectives: None
IndentWidth: 4
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
LambdaBodyIndentation: Signature
MaxEmptyLinesToKeep: 2
NamespaceIndentation: None
PenaltyBreakAssignment: 2
Expand Down
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
url = https://github.com/karasikov/caches.git
[submodule "metagraph/external-libraries/eigen"]
path = metagraph/external-libraries/eigen
url = https://github.com/eigenteam/eigen-git-mirror.git
url = https://gitlab.com/libeigen/eigen.git
[submodule "metagraph/external-libraries/KMC"]
path = metagraph/external-libraries/KMC
url = https://github.com/karasikov/KMC.git
Expand Down
75 changes: 75 additions & 0 deletions metagraph/src/annotation/int_matrix/base/int_matrix.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
#include "int_matrix.hpp"

#include <progress_bar.hpp>

#include "common/logger.hpp"
#include "common/threads/threading.hpp"
#include "common/vectors/vector_algorithm.hpp"


namespace mtg {
namespace annot {
Expand All @@ -8,6 +14,75 @@ namespace matrix {
using Row = BinaryMatrix::Row;
using Column = BinaryMatrix::Column;

void IntMatrix::call_row_values(const std::function<void(uint64_t, const RowValues&, size_t)> &callback,
bool ordered) const {
size_t n = get_binary_matrix().num_rows();
size_t batch_size = (n + get_num_threads() - 1) / get_num_threads();
size_t rows_per_update = 10000;
ProgressBar progress_bar(n, "Streaming rows", std::cerr, !common::get_verbose());

std::ignore = ordered;

#pragma omp parallel for num_threads(get_num_threads()) schedule(dynamic)
for (size_t k = 0; k < n; k += batch_size) {
size_t begin = k;
size_t end = std::min(begin + batch_size, n);
size_t thread_id = k / batch_size;
std::vector<Row> row_ids;
for (size_t row_batch_i = begin; row_batch_i < end; row_batch_i += rows_per_update) {
size_t row_batch_end = std::min(row_batch_i + rows_per_update, end);
row_ids.resize(row_batch_end - row_batch_i);
std::iota(row_ids.begin(), row_ids.end(), row_batch_i);
auto row_vals = get_row_values(row_ids);
for (size_t i = 0; i < row_ids.size(); ++i) {
callback(i + row_batch_i, row_vals[i], thread_id);
}
progress_bar += row_ids.size();
}
}
}

std::vector<VectorMap<uint64_t, size_t>> IntMatrix::get_histograms(const std::vector<size_t> &min_counts,
sdsl::bit_vector *unmark_discarded) const {
common::logger->trace("Calculating count histograms");
bool parallel = get_num_threads() > 0;
std::vector<VectorMap<uint64_t, size_t>> hists_map(get_binary_matrix().num_columns());
std::atomic_thread_fence(std::memory_order_release);
call_row_values([&](uint64_t row_i, const auto &row, size_t) {
if (min_counts.size()) {
bool keep_row = false;
for (const auto &[j, c] : row) {
if (c >= min_counts[j]) {
keep_row = true;
break;
}
}

if (!keep_row) {
if (unmark_discarded)
unset_bit(unmark_discarded->data(), row_i, parallel, std::memory_order_relaxed);

return;
}
}

Vector<uint64_t> counts(hists_map.size());
for (const auto &[j, raw_c] : row) {
if (min_counts.empty() || raw_c >= min_counts[j])
counts[j] = raw_c;
}

#pragma omp critical
{
for (size_t j = 0; j < counts.size(); ++j) {
++hists_map[j][counts[j]];
}
}
}, false);
std::atomic_thread_fence(std::memory_order_acquire);

return hists_map;
}

IntMatrix::RowValues
IntMatrix::sum_row_values(const std::vector<std::pair<Row, size_t>> &index_counts,
Expand Down
9 changes: 9 additions & 0 deletions metagraph/src/annotation/int_matrix/base/int_matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@

#include <vector>

#include <sdsl/int_vector.hpp>

#include "annotation/binary_matrix/base/binary_matrix.hpp"
#include "common/vector_map.hpp"


namespace mtg {
Expand All @@ -20,6 +23,12 @@ class IntMatrix {
// |row| is in [0, num_rows), |column| is in [0, num_columns)
virtual std::vector<RowValues> get_row_values(const std::vector<BinaryMatrix::Row> &rows) const = 0;

virtual void call_row_values(const std::function<void(uint64_t, const RowValues&, size_t)> &callback,
bool ordered = true) const;

virtual std::vector<VectorMap<uint64_t, size_t>> get_histograms(const std::vector<size_t> &min_counts = {},
sdsl::bit_vector *unmark_discarded = nullptr) const;

// sum up values for each column with at least |min_count| non-zero values
virtual RowValues
sum_row_values(const std::vector<std::pair<BinaryMatrix::Row, size_t>> &index_counts,
Expand Down
Loading
Loading