From aa34f9da4a906e2c9a0ee4ee414dbe92cf101448 Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Wed, 28 Jul 2021 15:46:09 +0200 Subject: [PATCH 01/17] add some small methods that will be further used for tax class Signed-off-by: Radu Muntean --- metagraph/src/graph/annotated_dbg.cpp | 28 +++++++++++++++++++++++++++ metagraph/src/graph/annotated_dbg.hpp | 4 ++++ 2 files changed, 32 insertions(+) diff --git a/metagraph/src/graph/annotated_dbg.cpp b/metagraph/src/graph/annotated_dbg.cpp index 5db5ff76aa..63436cf9a2 100644 --- a/metagraph/src/graph/annotated_dbg.cpp +++ b/metagraph/src/graph/annotated_dbg.cpp @@ -776,6 +776,34 @@ ::call_annotated_nodes(const Label &label, ); } +void AnnotatedDBG::call_annotated_rows(const std::vector &rows, + std::function callback_cell, + std::function callback_row) const { + assert(check_compatibility()); + + auto unique_matrix_rows = annotator_->get_matrix().get_rows(rows); + + //TODO make sure that this function works even if we have duplications in 'rows'. Then, delete this error catch. + if (rows.size() != unique_matrix_rows.size()) { + throw std::runtime_error("The current 'call_annotated_rows' call contains duplication."); + } + + if (unique_matrix_rows.size() >= std::numeric_limits::max()) { + throw std::runtime_error("The current 'call_annotated_rows' call has returned, " + + std::to_string(unique_matrix_rows.size()) + + "rows. The maximum number of rows that can be returned is " + + std::to_string(std::numeric_limits::max()) + + ". Please reduce the query batch size"); + } + const auto &label_encoder = annotator_->get_label_encoder(); + for (auto row : unique_matrix_rows) { + for (auto cell : row) { + callback_cell(label_encoder.decode(cell)); + } + callback_row(); + } +} + bool AnnotatedSequenceGraph::check_compatibility() const { return graph_->max_index() == annotator_->num_objects(); } diff --git a/metagraph/src/graph/annotated_dbg.hpp b/metagraph/src/graph/annotated_dbg.hpp index 56e0dfa747..bd87eb85cd 100644 --- a/metagraph/src/graph/annotated_dbg.hpp +++ b/metagraph/src/graph/annotated_dbg.hpp @@ -156,6 +156,10 @@ class AnnotatedDBG : public AnnotatedSequenceGraph { int32_t match_score = 1, int32_t mismatch_score = 2) const; + void call_annotated_rows(const std::vector &rows, + std::function callback_cell, + std::function callback_row) const; + private: DeBruijnGraph &dbg_; }; From 1ff153f3c1412665bb02464bf99737f2af1f08d4 Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Wed, 28 Jul 2021 18:33:22 +0200 Subject: [PATCH 02/17] address comments --- metagraph/src/graph/annotated_dbg.cpp | 28 --------------------------- metagraph/src/graph/annotated_dbg.hpp | 4 ---- 2 files changed, 32 deletions(-) diff --git a/metagraph/src/graph/annotated_dbg.cpp b/metagraph/src/graph/annotated_dbg.cpp index 63436cf9a2..5db5ff76aa 100644 --- a/metagraph/src/graph/annotated_dbg.cpp +++ b/metagraph/src/graph/annotated_dbg.cpp @@ -776,34 +776,6 @@ ::call_annotated_nodes(const Label &label, ); } -void AnnotatedDBG::call_annotated_rows(const std::vector &rows, - std::function callback_cell, - std::function callback_row) const { - assert(check_compatibility()); - - auto unique_matrix_rows = annotator_->get_matrix().get_rows(rows); - - //TODO make sure that this function works even if we have duplications in 'rows'. Then, delete this error catch. - if (rows.size() != unique_matrix_rows.size()) { - throw std::runtime_error("The current 'call_annotated_rows' call contains duplication."); - } - - if (unique_matrix_rows.size() >= std::numeric_limits::max()) { - throw std::runtime_error("The current 'call_annotated_rows' call has returned, " - + std::to_string(unique_matrix_rows.size()) + - "rows. The maximum number of rows that can be returned is " + - std::to_string(std::numeric_limits::max()) + - ". Please reduce the query batch size"); - } - const auto &label_encoder = annotator_->get_label_encoder(); - for (auto row : unique_matrix_rows) { - for (auto cell : row) { - callback_cell(label_encoder.decode(cell)); - } - callback_row(); - } -} - bool AnnotatedSequenceGraph::check_compatibility() const { return graph_->max_index() == annotator_->num_objects(); } diff --git a/metagraph/src/graph/annotated_dbg.hpp b/metagraph/src/graph/annotated_dbg.hpp index bd87eb85cd..56e0dfa747 100644 --- a/metagraph/src/graph/annotated_dbg.hpp +++ b/metagraph/src/graph/annotated_dbg.hpp @@ -156,10 +156,6 @@ class AnnotatedDBG : public AnnotatedSequenceGraph { int32_t match_score = 1, int32_t mismatch_score = 2) const; - void call_annotated_rows(const std::vector &rows, - std::function callback_cell, - std::function callback_row) const; - private: DeBruijnGraph &dbg_; }; From 8f3600499a35de865a7ee1f57c13d4ecbfecd66b Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Wed, 28 Jul 2021 15:46:09 +0200 Subject: [PATCH 03/17] add some small methods that will be further used for tax class Signed-off-by: Radu Muntean --- metagraph/src/common/utils/string_utils.cpp | 2 +- metagraph/src/graph/annotated_dbg.cpp | 28 +++++++++++++++++++++ metagraph/src/graph/annotated_dbg.hpp | 4 +++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/metagraph/src/common/utils/string_utils.cpp b/metagraph/src/common/utils/string_utils.cpp index 8e801baf78..cc25f06ad5 100644 --- a/metagraph/src/common/utils/string_utils.cpp +++ b/metagraph/src/common/utils/string_utils.cpp @@ -11,7 +11,7 @@ bool starts_with(const std::string &str, const std::string &prefix) { if (prefix.size() > str.size()) { return false; } - return prefix == std::string_view(str).substr(0, prefix.size()); + return prefix == str.substr(0, static_cast(prefix.size())); } bool ends_with(const std::string &str, const std::string &suffix) { diff --git a/metagraph/src/graph/annotated_dbg.cpp b/metagraph/src/graph/annotated_dbg.cpp index 5db5ff76aa..a324c673c1 100644 --- a/metagraph/src/graph/annotated_dbg.cpp +++ b/metagraph/src/graph/annotated_dbg.cpp @@ -776,6 +776,34 @@ ::call_annotated_nodes(const Label &label, ); } +void AnnotatedDBG::call_annotated_rows(const std::vector &rows, + std::function callback_cell, + std::function callback_row) const { + assert(check_compatibility()); + + auto unique_matrix_rows = annotator_->get_matrix().get_rows(rows); + + //TODO make sure that this function works even if we have duplications in 'rows'. Then, delete this error catch. + if (rows.size() != unique_matrix_rows.size()) { + throw std::runtime_error("The current 'call_annotated_rows' call contains duplication."); + } + + if (unique_matrix_rows.size() >= std::numeric_limits::max()) { + throw std::runtime_error( + folly::to("The current 'call_annotated_rows' call has returned, ", unique_matrix_rows.size(), + "rows. The maximum number of rows that can be returned is ", + std::numeric_limits::max(), + ". Please reduce the query batch size")); + } + const auto &label_encoder = annotator_->get_label_encoder(); + for (auto row : unique_matrix_rows) { + for (auto cell : row) { + callback_cell(label_encoder.decode(cell)); + } + callback_row(); + } +} + bool AnnotatedSequenceGraph::check_compatibility() const { return graph_->max_index() == annotator_->num_objects(); } diff --git a/metagraph/src/graph/annotated_dbg.hpp b/metagraph/src/graph/annotated_dbg.hpp index 56e0dfa747..bd87eb85cd 100644 --- a/metagraph/src/graph/annotated_dbg.hpp +++ b/metagraph/src/graph/annotated_dbg.hpp @@ -156,6 +156,10 @@ class AnnotatedDBG : public AnnotatedSequenceGraph { int32_t match_score = 1, int32_t mismatch_score = 2) const; + void call_annotated_rows(const std::vector &rows, + std::function callback_cell, + std::function callback_row) const; + private: DeBruijnGraph &dbg_; }; From 2912decccec5a267a6ef2bc104f660640967c14e Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Wed, 28 Jul 2021 15:57:36 +0200 Subject: [PATCH 04/17] add data files used for taxonomic integration and unit tests Signed-off-by: Radu Muntean --- .gitignore | 1 + .../data/taxonomic_data/dumb.accession2taxid | 21 + .../tests/data/taxonomic_data/dumb_nodes.dmp | 20 + .../full_hierarchy_sequences.fa | 161 ++++ .../tests/data/taxonomic_data/tax_input.fa | 96 +++ .../tests/data/taxonomic_data/tax_query.fa | 800 ++++++++++++++++++ 6 files changed, 1099 insertions(+) create mode 100644 metagraph/tests/data/taxonomic_data/dumb.accession2taxid create mode 100644 metagraph/tests/data/taxonomic_data/dumb_nodes.dmp create mode 100644 metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa create mode 100644 metagraph/tests/data/taxonomic_data/tax_input.fa create mode 100644 metagraph/tests/data/taxonomic_data/tax_query.fa diff --git a/.gitignore b/.gitignore index 9e165295f4..cf49089ee5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ *.fai !metagraph/tests/data/*.fa !metagraph/tests/data/*.fai +!metagraph/tests/data/taxonomic_data/*.fa metagraph/tests/data/*dump_test* projects/*/temp visualization/geolocation/data/* diff --git a/metagraph/tests/data/taxonomic_data/dumb.accession2taxid b/metagraph/tests/data/taxonomic_data/dumb.accession2taxid new file mode 100644 index 0000000000..ca9ba2566f --- /dev/null +++ b/metagraph/tests/data/taxonomic_data/dumb.accession2taxid @@ -0,0 +1,21 @@ +accession accession.version taxid gi +NC_01 NC_01.1 10001 10001 +NC_02 NC_02.1 10002 10002 +NC_03 NC_04.1 10003 10003 +NC_04 NC_04.1 10004 10004 +NC_05 NC_05.1 10005 10005 +NC_06 NC_06.1 10006 10006 +NC_07 NC_07.1 10007 10007 +NC_08 NC_08.1 10008 10008 +NC_09 NC_09.1 10009 10009 +NC_10 NC_10.1 10010 10010 +NC_11 NC_11.1 10011 10011 +NC_12 NC_12.1 10012 10012 +NC_13 NC_13.1 10013 10013 +NC_14 NC_14.1 10014 10014 +NC_15 NC_15.1 10015 10015 +NC_16 NC_16.1 10016 10016 +NC_17 NC_17.1 10017 10017 +NC_18 NC_18.1 10018 10018 +NC_19 NC_19.1 10019 10019 +NC_20 NC_20.1 10020 10020 diff --git a/metagraph/tests/data/taxonomic_data/dumb_nodes.dmp b/metagraph/tests/data/taxonomic_data/dumb_nodes.dmp new file mode 100644 index 0000000000..c721ad085a --- /dev/null +++ b/metagraph/tests/data/taxonomic_data/dumb_nodes.dmp @@ -0,0 +1,20 @@ +10001 | 10001 +10002 | 10001 +10003 | 10001 +10004 | 10002 +10005 | 10002 +10006 | 10002 +10007 | 10003 +10008 | 10003 +10009 | 10004 +10010 | 10004 +10011 | 10004 +10012 | 10005 +10013 | 10005 +10014 | 10006 +10015 | 10006 +10016 | 10007 +10017 | 10007 +10018 | 10007 +10019 | 10008 +10020 | 10008 diff --git a/metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa b/metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa new file mode 100644 index 0000000000..36099686d8 --- /dev/null +++ b/metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa @@ -0,0 +1,161 @@ +>gi|10001|ref|NC_01.1| Test sample 1 (root) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACGAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10002|ref|NC_02.1| Test sample 2 (dist to root = 1) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10003|ref|NC_03.1| Test sample 3 (dist to root = 1) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10004|ref|NC_04.1| Test sample 4 (dist to root = 2) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAACCCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10005|ref|NC_05.1| Test sample 5 (dist to root = 2) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10006|ref|NC_06.1| Test sample 6 (dist to root = 2) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGTGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10007|ref|NC_07.1| Test sample 7 (dist to root = 2) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10008|ref|NC_08.1| Test sample 8 (dist to root = 2) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10009|ref|NC_09.1| Test sample 9 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAACCAAGGGGGCTGGGGCTGTTCGCAGGCAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10010|ref|NC_10.1| Test sample 10 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGTTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACCAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCTTAGCCAA +TATGACTTAACCCAAGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10011|ref|NC_11.1| Test sample 11 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +GGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TTTGACTTAACCCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10012|ref|NC_12.1| Test sample 12 (dist to root = 3) +CGGCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGT +CTATCTCGCATTAAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCTCTTCCGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10013|ref|NC_13.1| Test sample 13 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGAGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACCAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10014|ref|NC_14.1| Test sample 14 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCGGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGTGGCTGGGGCTGTTCGCAGACAAATCTACGGCTACTACAAACTCTAGCAATACC +>gi|10015|ref|NC_15.1| Test sample 15 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAAAATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAATGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGAAATTTAATGGGTTGGTATTCTTGTAGTATGGTCATCGTAGCCAA +TATGACTTAAACCAGGTGGCTGGCGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10016|ref|NC_16.1| Test sample 16 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCTTTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACGGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGTGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10017|ref|NC_17.1| Test sample 17 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAATTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGAATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGGGGGGGCTCGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10018|ref|NC_18.1| Test sample 18 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGGGGCCATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAAT +TAAATCGAGTGTTAGCCGCATCTAGACATTTAATGAGTTGGTTTTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10019|ref|NC_19.1| Test sample 19 (dist to root = 3) +CGCCGGCCGCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCCGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCATTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACCTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10020|ref|NC_20.1| Test sample 20 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATTATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACCAACTCTAGCAATACC + diff --git a/metagraph/tests/data/taxonomic_data/tax_input.fa b/metagraph/tests/data/taxonomic_data/tax_input.fa new file mode 100644 index 0000000000..3ba4796416 --- /dev/null +++ b/metagraph/tests/data/taxonomic_data/tax_input.fa @@ -0,0 +1,96 @@ +>gi|10009|ref|NC_09.1| Test sample 9 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAACCAAGGGGGCTGGGGCTGTTCGCAGGCAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10010|ref|NC_10.1| Test sample 10 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGTTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACCAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCTTAGCCAA +TATGACTTAACCCAAGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10011|ref|NC_11.1| Test sample 11 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +GGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TTTGACTTAACCCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10012|ref|NC_12.1| Test sample 12 (dist to root = 3) +CGGCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGT +CTATCTCGCATTAAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCTCTTCCGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10013|ref|NC_13.1| Test sample 13 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGAGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACCAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10014|ref|NC_14.1| Test sample 14 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCGGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGTGGCTGGGGCTGTTCGCAGACAAATCTACGGCTACTACAAACTCTAGCAATACC +>gi|10015|ref|NC_15.1| Test sample 15 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAAAATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAATGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGAAATTTAATGGGTTGGTATTCTTGTAGTATGGTCATCGTAGCCAA +TATGACTTAAACCAGGTGGCTGGCGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10016|ref|NC_16.1| Test sample 16 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCTTTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACGGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGTGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10017|ref|NC_17.1| Test sample 17 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAATTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGAATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGGGGGGGCTCGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10018|ref|NC_18.1| Test sample 18 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGGGGCCATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAAT +TAAATCGAGTGTTAGCCGCATCTAGACATTTAATGAGTTGGTTTTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10019|ref|NC_19.1| Test sample 19 (dist to root = 3) +CGCCGGCCGCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCCGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCATTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACCTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10020|ref|NC_20.1| Test sample 20 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATTATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACCAACTCTAGCAATACC diff --git a/metagraph/tests/data/taxonomic_data/tax_query.fa b/metagraph/tests/data/taxonomic_data/tax_query.fa new file mode 100644 index 0000000000..30ea3f2c27 --- /dev/null +++ b/metagraph/tests/data/taxonomic_data/tax_query.fa @@ -0,0 +1,800 @@ +@gi|10001|ref|NC_01.1|-9/1 +TGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATTGATCCCTCTCGAATA ++ +CC=GGGCGGGGGGGJGJJGGJJJCJJJGJJCJJGGJJG=GGJJJGGC8GGGCCGCGCJJGGJ=1CGGJGGCJG=GGGJGGCCGGGCCGCGCGGGG=GGGGCCGGCGGGGGGG=GCGGGC1CGGCGCCGGCC8GG1GCGGGGGGGCGC==C +@gi|10001|ref|NC_01.1|-7/1 +CCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGA ++ +CC8GGGGGGGGGGJCJJGGJJJJJGC(JJGCJJGJGGJJJCGGGJJJJJ=CCJJ8CJ8CGCC=GGJJGGGGGGJGGGGCCGGCGCCGCGGGG1G=CGGGCJCGCGGC1GGCCGGGCGGGCGCGGCGGGGG=CCGGGGGG=CGCCGGGCCG +@gi|10001|ref|NC_01.1|-5/1 +AGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTCGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTC ++ +C=CGGGGGG=CGGJGJ1JJJGJJCGJJJG=JGC=JCJGJJGJJCGJJGGJJJGJJGGGGJG=CGCCGGC=GGGCGGGG8CCGGGGGGGGGCC8GGCG=GGCCCGG1GGGGGGGGGGCGG8GCCGGGGGGGG1CGGGGGCCCGGGGGCGGC +@gi|10001|ref|NC_01.1|-3/1 +CTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGA ++ +CC1GGGGGGGCGGJGJJGJJJGGGJJGJJJJJJJJCGJGJGGJ1JJGCCJGJ=JJ8CGJ8CGGJJGJCJ=CCGGC=GGCGGGGGCGGGGGGGCCG1GGCGJCGGCC(GGCGGG=CGG(GCGG8G1GGGCCGGGGGGGGGGGCCGGGGGGG +@gi|10001|ref|NC_01.1|-1/1 +GAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCCTTGAGGGCGTGTACTTAGCCCA ++ +CCCGGGGGCGGGGGGJJCJJJJJJJJJJ1JJGJGGCJJGJJJJCCCJJGGGJ=GJJJGGJJCCGGGGGGCCC8CCCC=GGGGCGG==GGGGGGGCCC8GGCGGC1GGCGCGGGGGG=GCGGGCCGGC(GGGGGGGGCC8CGGGGGGGGCC +@gi|10002|ref|NC_02.1|-9/1 +AAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTT ++ +CCCGGGGGGGGGCJJJGJGGJJCJJJGGJGJ=JJJJGGGJGJGJGJJJGC=JJCGGGGJGJGGGJ1JGCCCGJGCGC=CGCC==CGGGCC1CGGGGGGCGJCCGGGGGG=CGCCGGGGCGGCCGGGCG1GGGGGCCGCCCCCGGGGGGC8 +@gi|10002|ref|NC_02.1|-7/1 +TGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGA ++ +=CCGCCGGGGGGGGCCJJGGJJJJ8J1JJGGJJ1CJCJJGGG8CJCCGJGJJGGGGGGCJGGGGGGCGGGJGCJGG1GCGCGGGCGGGCGGGGC=(GCG=JCG=GG(GCCCGC=GG(GCCGGGC=1CGCGG=GGCGGCCCGGGGGGCGGC +@gi|10002|ref|NC_02.1|-5/1 +GAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATGACTTAAAC ++ +CCCGGCGCGGGGGJJGJJGJJJJGGJG=JJGJGJJJJGJJJJCGJJJJGJGGGJGJGJJ(J=J=JGGG1GCGGCJCJGGCC1GGGCG==GCGG81GGGCGCCCGGCGGGGGCGGGGGGGGCGGGGGCCGG1GGCGCGCGCGGGGCG1GCC +@gi|10002|ref|NC_02.1|-3/1 +CGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGA ++ +CCCGGGGGGGGGGCJ1JJJJCJCGJJJJJJJGGGJJJJGJJJJJJGGGGJGGJGGGJG(GJJG=CGJJJCGGJGCGGGCGGGCG8GG=CGGCG8CGGCGCCC=C=GGGCCGGGGC1GC=G=G8GGCGGGCCGGCGGGGGGCGGCCCCGCC +@gi|10002|ref|NC_02.1|-1/1 +GGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCAC ++ +CCCGGGGGGGGGGJJJGJCJCJJJJJJJCCJGJJJGJCJJ=JJGGCGGJGC1JJGGJCJJCGJCCGGGGGGGCJCGCGC=GC=GCGCGG1GGGG=GCGGGCGGCGGGGC8GGCGGGGGGGG=CGGGCGGGG=CCG=GCGCCG=GGGCGGG +@gi|10003|ref|NC_03.1|-9/1 +TCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCG ++ +=C1GGGGGGGGCGJJJJGJJGJJCJJJJGJGGGGJJJGJJGCJJGJJCJJGJGC8JCGJCGCGJGGGGGCGGCGGGGGGG=CGCGGCGCCGGGCGGGCGGCGGGGGCCC=GCGGGG1GGC1C8GCGGGGGGCGGGCC=GCCGGGG=GGGG +@gi|10003|ref|NC_03.1|-7/1 +CTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAA ++ +CCCGCGGGGGGGGJJJJJJJGJJGJJGGJJJJJJGJJJJJGJ8JGJJJGGJG=GJJJJ=GGJCJJGGGCGGGCGJGGGGGGC=GGGGGGGG=G=GGGGGGJG=CG8GGGCGCGGCGCGCGGC18CGGGGGGGGGGCGCCGGGGGCGGGG= +@gi|10003|ref|NC_03.1|-5/1 +GGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAGAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAA ++ +CCCGGGGGGGG1GJGJJJJJJGJJCJGGCJGJGJJJJGJJGCJJ8=(JJJGGC8GCJGG(JJJCGGJJGGG=GGGGGGCCGGGGGGGGGCGG=CGGGGGG1CGGGCCCGGGGGC1CCGGGCGGCCCG8GCGGG=CCCGGCGCGGGGGGCC +@gi|10003|ref|NC_03.1|-3/1 +AAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCCAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTT ++ +CCCGGGGGCGGGGGGGCJJJCJJJGJJGJJJJGJJGJC(JJGJJJJJJJGGJGCGGGGJGJGCC=JGG=CCJGGG=GGGG8C=GCGGGGG=GGCGGCC=GJCC1C1C8GGGGGGGCCG=(GCGGGGGGGGCCGGCCGCC1CGGCCGGCCC +@gi|10003|ref|NC_03.1|-1/1 +ATTGCTAGAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCAAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAG ++ +CC11GGGGGGGGGJJJJJJJJJJGGJ1CJJJJJJ=JGGGJJGGGJ(JGGJJJGGGGCGGJCGJGJ1GCCGGGCJCCJCGCGGCCGGGGGGGGGGGGGC===GGCGCGGC=GCGCGCGGCC=8CGGGGGGGGGGGCC=GGGGGGCCCGGGG +@gi|10004|ref|NC_04.1|-9/1 +CTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGC ++ +CCC1GG=CGGGGGJJJJJCJGJJJJJJJJJJJJJJJJJJJG=JCJJJGGJJGGGGGGCJJGGJJJGGJGGGJGC=CGJCCG=GCGGGGGCGGGGGGCCGGJCG=G1CG1GGGGCC1CGG=(CGGGGG=CC=GGGGC8GG8GCGGGCGGGG +@gi|10004|ref|NC_04.1|-7/1 +CAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTCTC ++ +CCC1GGGGGGGGGJJ1JJJJGJJJGJJJJGGCJJCJGJJJJGJJGJJJGGGGGJGGGJ8=JCCGGCC1GCGGGCGGGGGJG=GC1GGGGGGGGGCGGGGGJCGCGGCCCGCCGGGGC8CGGGCGCGCCGGGGGGGGCGGGGGGCG1CG=C +@gi|10004|ref|NC_04.1|-5/1 +TTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATG ++ +=C=GCGGGGGGGGGJJJJJJJJGJJJGJG1JJGCGGJJGJGJ8JJJGGGJ8CJCJGJJGCJGGCGGGCGGJGGGGCG=GGGG1GGG=GGGGGGCGCG8GGJCCCCGGGGGGGCGCGCGCGGGCGGGGGG=CGCGGGGCCGGGCCCGGCG= +@gi|10004|ref|NC_04.1|-3/1 +GTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATGACTTAACCCAGGG ++ +CCCGGGGGGGG=GGGCCCGJJJJ=GGGJJJJGCJJJCJGGJJG1JJC8JGJGJCGCGJJJGGJGGGGGGJJJGC1G8=GGCCGCCGCGG8GCGGGCGGGCCCCGGCGGGCGGCCGG1=CGCG=GCGGCCGGGCGGCCG=G=GCGGC=GCC +@gi|10004|ref|NC_04.1|-1/1 +CAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAA ++ +CC8GGGCCG1GGGJJJJJGGJJGJJJJJGG1JJCGGJGGJGG1JJJJGJJGCGJJ=GJJJ=JGJ=GJJJCGCGG8GGGCCGGGGCGGGCGGCGGCGGCGGJCGGCGG1CGCGCGGGCCGGG8GGG1GGC8GGGGCGCGGGGCCGGGGCG= +@gi|10005|ref|NC_05.1|-9/1 +TAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTAC ++ +CCCGGGGGGGGGGJJJJJCGJJJGJJGJJGGJJ8GCJJGCJGJGJJJGJJJGGGGG8JGGJGGGGGGCGGCCJGCGG1GGGGGCGCGGG=GGGGCGCGGGJGGGG8CG=CGGGGCGGGGGGGGGGGCGGGCCG=GG=GGCCGGGCCGCG1 +@gi|10005|ref|NC_05.1|-7/1 +CCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGATTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTG ++ +=CCGGGGGGGGGGJ1JJJJGJ(JJJJJJJJJJGJJJGJGJGJGJGGGCGGGCGJG=JCJGGGCGCGGGGGCJGGGGJGGCCCGG=GGGGGCG(GGC=GGGJGGCGC=GC8GGC=GCGG=G1CG1GGCGGGGGG1GCCGGCGGGGGGCG8C +@gi|10005|ref|NC_05.1|-5/1 +TTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTCTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATT ++ +CCCGCGGGGGGGGJJJJJGGGJJGJJJGGCJJGJJJJJJGG=JGGJJ=J8GJJJCJG1JGCG8GJ1JGGGGJGGCGGCGJCCGCC1GG1GCCCCG(CGGGJCGG(=CGCCGGGCGGCGCC=CGCGGCGGGGCCCGCC=GGCGGGCCCGGC +@gi|10005|ref|NC_05.1|-3/1 +GTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCACAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGAGTTAATTTGCTTAGTAGTGAAAGTCC ++ +CCCCGGGGGGCGGJJJJJJJJJ8JCJJJJJGJJJCJCJG=JGJGJGJGJJ=JG=GGG1J=GJJGG8CGGGGGGGGGGGCGCCGGCGGGGGGGGGCGGGCGJG8GCGG=CGGCGCGCGGCGGCCG1GGGG8GGGGGCCGGCGCGGCGGCGG +@gi|10005|ref|NC_05.1|-1/1 +AGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGC ++ +CCC=GG=GGGGGGJGJJJJJJJJJJJJJJJJJ=J=JGCGJJJGGGJJCGJJJJCJJJGGGJGGJ(GGGGGJ1CCCGJCGGCCGCCC=CGGGGGCCGGG8=CG=GCGGCGGG1GCC=GCG1GCGGCG=GGCGG=G8GC(GCGGCCGCGGGG +@gi|10006|ref|NC_06.1|-9/1 +GTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTA ++ +CCC1GGG1GGGGCJJJJ=JJJGJJJGJJGGJGG=GJJJJJGJJJJJGJ8JJJCJG1=GJGGGGJGJG8GGGJCGGCCGGCGGCGGGGGGG(GCGGGGCG==CGCGGG8CCGGGGGGGG=GG=GGGGCCGGGGCG=GGGGGGCGGGGCCCC +@gi|10006|ref|NC_06.1|-7/1 +GCCCCAGCCACCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTA ++ +CCC1GGGGCGGGCJGJJGGJJJJJJGJJGCJJ=JJJCJJC=GJCJCGGGJJJ=GGJGCJGCGCCJJCJG8=GCGJGJG=CCG=CGGCGGC=GGC1G=CGGCCGGGGCG8CGC1GCGCGCGGGGCGGGGGCGGGCC=GGGGGCCGGGGGG= +@gi|10006|ref|NC_06.1|-5/1 +GTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATTTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAA ++ +CCCGGGGGGGGGGJGJJJGJJJJJGJJJCGJJJJGJJ8GJGJJJGJGCJJCGCJ(GGGGJGCCCGGCCGGJGGGGGG8JCGCGGGGGG=GCGGGGGGCGCJCG=CCGGCG=CGG8GCGGC=G8GCGGGCC=GGGGCCGGGG=GGGGG==C +@gi|10006|ref|NC_06.1|-3/1 +CAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCCCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTT ++ +CCCGGGGGGGGGGGJJGJGJJJJGGJGJJJCCJJJGGJGJGJGJGJJGJJJ=GJGJGGCGCCGGJGJC=GGGCGCGGG(GGGG=GGGCCGGGC=C1G=CGJGGGG=CGGCCG=GGGGGGGGC8GGCGGCGCGGCCGCCGCGGGGCGCGGG +@gi|10006|ref|NC_06.1|-1/1 +ATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAG ++ +CCCCGGCCGGGGG1GJ=JJJGJJJJJJGJCGJGJGJGGJJJ1JJ8CC=JJGJ1=GGJJJGJJGCJGJCJGGGGG8GGCGGGCCGCGGCGCGG8GGGCGCC1=GGGGCGG8GGCCCGCGGGGGCGGGGC=8C=CGGGG8GG=G=CGGGGCG +@gi|10007|ref|NC_07.1|-9/1 +CTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGC ++ +CCCGCCGGGGGGCGGJJGGJJGJGGJJGGCJGJ(JJC=G8JJ1JJGGGJJGCJJGCJGGJG==J=JGC8GCGGCGGGGGCGCCC=GGGCGGGGGG(G=CGJCCCGGGCGGCCGGGGCCGG1C=GGGGGGC1GGGGCCGC=CGC8CGGGCG +@gi|10007|ref|NC_07.1|-7/1 +GCAGTAGCAGACAAGTTTGAATTGGGCGAAACCTACTTGCTTCCTCTTGGAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAAT ++ +CCCG=GGGGGGGGJJJJJJJGJ=J1CJGGJJJ=JJCJJJJGJJJGCJJJGGJ8GJ8GCGJJJGGGJJGGGC=CGGCGG=G8GG=GC=GCGG=GGGGGGCCCCCCCGGCGGGGC=GCCGGGC=GGGGGC=GGCG1GCGCCGGG1=GGCCCG +@gi|10007|ref|NC_07.1|-5/1 +TGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCC ++ +CC1GCGGGGGCGGCJJJGJJJJJJJJJJJJJJJJJ=JJGJGGG(GCJJGJJGGGJGGGJCJGGJGGJCCCGGGCGGCCGGGGGCGCGCGGGG8CGGGCGGJGCGGGGCGGGGGGCGGGCGCGGCGCGGGGGGGGGGCGGC8G=GCCGGCG +@gi|10007|ref|NC_07.1|-3/1 +ACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAATATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAA ++ +=CCGCGGGGGCGGJJJGJJJGGJGJ1GGGGJGJCCJJ8JGJJJJJGGJG1GGCJJJJJJ=GGG1GGJGGGGCGGGCGCGGCCCGGGCGGCGCGCCGG=G8JGCCCCCGGCGGC=CCGGCGGG8GCC=GC=GGG=C=CGGGGGGCGGGGCC +@gi|10007|ref|NC_07.1|-1/1 +AACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGCTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCT ++ +CCCGGGGGCGG=GJGJJJGJJGGGJGJJGCJJJJGGGGCGJJGGJJJGJ8JGGCJJGCGJGJGJCGCGGGJGGGGGJGGGGG(GGGGGG1=CGC==GCGGJCGGGG=G8GGGGGCGCGCCGCGGGGGCGGGG8GG1C1GC1GGG18GCGG +@gi|10008|ref|NC_08.1|-9/1 +CCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCC ++ +CC=GGGGGGGGGGJJJJJJJJGJCJJJJCJJJCJJJJGJ=JJJG8JJJGGJCGCGGJCJJ8GJCCG=GGGGG=GG=CGGGGGGCGGGG8GG=GCCCGCGGJC1G8GGCCGGGGCGCCGGC1=CGCGGGGGCGCCGC8GGCCC8GG8GGGG +@gi|10008|ref|NC_08.1|-7/1 +AAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGG ++ +C=CGGCGCGGCGGJJGJJ1J1JGJJGJJJJJJJJJGJCGJJGJJGJJCGJJGJ=JGCGJG(G=GGJJG=CCGG=GGGCJ=CGGCGCG=GCGGGGCGGCGGJGG18GGGCGGGGCGGCCGGGGCGCGG=GGGGGGGGGGGGGCGCGCGGGG +@gi|10008|ref|NC_08.1|-5/1 +TGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTA ++ +=CCGGCGGGGGGGCG(JCGGCJJJJJJJJJG1GCJCGJJJJJJG8GGJGJJGJJJG=GGJGGJGGGGJGCGCCGGCGJG8GGGC=GGCGGCCCCGGGGGGJCGGGCGGCGGGG8C8G=CGGGGGGGGGGG=GGG=G=GCCGGCCGGGCGG +@gi|10008|ref|NC_08.1|-3/1 +TTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATG ++ +CC=8CGGGGGGGGJJGJGJJJJGJJJGGJJGGJGGJJ=JJGJ=JJGGGJ8CGJG=GJCJGJJGGCJ=JJGGCGGCGCGGGGGGCGGGGGGCGGGGGG1GCJCGC(GGCCC8GGGCGG=GGGGGCGGGGGGCGGGGGGC=(CCGCGGCC=G +@gi|10008|ref|NC_08.1|-1/1 +AGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCT ++ +CC=GGGGGGGGGGJGJGJCGJJGJ=JJGJJJ=CJJCJGGJJ=GJJGGJJGGGCGJCCGGJG8JGGGGGJJGG8JGGCGGCGGGGGGGG=CCGC=18GGGGJCCGCGGGGCGCCGGGGGG=GCG1CG8=CGGGGGGGCGG==CCCG8GGGG +@gi|10009|ref|NC_09.1|-9/1 +CCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATG ++ +CCCGGGGGGGGGGJJJJJJJJGJJCCJJJJJJJJ=GJJJG(GJJ8GGGJG8JGJCJJJCGJCCG8CGGGC=GGGGGJ18GG=G=GGGGGGCGGGG1GGGCJGGCGGGGGGCGGGGGGGC=GGGGGCGCGGGCCGGGGCCCGGCGCCGGGG +@gi|10009|ref|NC_09.1|-7/1 +CTGCGAACAGCCCCAGCCCCCTTGGTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTT ++ +CC=GGGGGGGGGGJJGJJJ8JJJGGJJGJJJGJGJJGGGJJJJJJJJJJJJJCJJGGJGGGGJCGJG1=JGGGGCGG=GCG=GGGGCGGG1GGG88GG=GJGGCCG(GGGGGGGGCGGGCGGCCGCCGGCCCCGGGGG=G=GGGGCGC=C +@gi|10009|ref|NC_09.1|-5/1 +TTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTCCTGCTACTGCCGAAGTCACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGATATAGACA ++ +CCCGGGGGGGGGCJJJJGJJ8JJJJJJJGJJ8JGJJJCGGJJJJGJGJJGGJJ=GGGJG(GJ8JGJG8JJGGGGGGG8GGCCGGCGCGGGG=GG(GGGGGJCCCGG=GGGCGGGGGG8GCGGC=GCGCGGGGGGGGGG8CGC=CCGCGGG +@gi|10009|ref|NC_09.1|-3/1 +GGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGC ++ +CC1C=GGGGGGGGCJJGJGJGJJGGGJJJJJJGJJGGJJCCGJ8GJJJJJCJGGCJJGGGCGGGGGGGJGG1GG8CCGGGGGGCG1G=GGGGGCCCGGGGCCCGGGC(C1G1GGCCGCCGG=CGCGGGGGCGGGGGCGGGGGGGGG=CGC +@gi|10009|ref|NC_09.1|-1/1 +TGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCATAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTGCTGCTACTGCCGAAGT ++ +CCCGGGGGGGGGGJ=GJGJGJJJJJJ8GJ1=GGJJGJ8JJJJJ1JJGGJJGGGGGGJGGGGGJGGGJJGGGCGCCCGGGG888GGGGGGGGCGG=CGGCCJGCGCGGC=GG=GGGGCCC=G=GGGGGGGCGCCCGGGGGGGCGG=GCCGC +@gi|10010|ref|NC_10.1|-9/1 +CTTGAAAAGTTGTAACCAAACGTACGAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCTTAGCCAATATGACTTA ++ +CCCGGGGGGGGG8JJGJJGJJJCJJ(CGGJJJGJ8JJGCGGJGGG=GJJJJGGJJJJJCJGGGGCGC(GCGG8GGGG=CGGGG8GGGCC==GGGCGGGGCJ=GGGGCCGGGCCGCG=C8GCCGGGCGCGGGCGC=GGCGGGGGGG=G1CG +@gi|10010|ref|NC_10.1|-7/1 +ATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGTTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCC ++ +CCCGGGGCCGGGGJGJJGGJGJJJ1=CJGJJJJJGCG8GGJJJJJJJJCJGGJJCJJGGJGCGGGJGJJGCG=GGJGCGGCGG=GCGCGGC8GGGCGGCGJC8G=GGGGCGCCGCGGCGGG=1=GGCCCGGGGGGGCGGCG1CGGGC=GC +@gi|10010|ref|NC_10.1|-5/1 +CCCTTGGGTTAAGTCATATTGGCTAAGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTGGTTACAA ++ +CCC1GGGGGGGGGGJGJGJG=GJGJJJJG=JJJJJC8JJJJJJGGGGJJC1JGGJJJGGGGGCCJJGJGCGGCCGGGJCCCCGGGGCGCGGGCGC=GGGGJCCGGGGGGGGGGGGGGG=GGGGGGGGGGGGCCG=CGGGG=GCGCGGGGC +@gi|10010|ref|NC_10.1|-3/1 +CAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTGTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATT ++ +=CCGGGGG1G=GGJJGGJJJCGJJJJGJJCCCGJJGJGJJGJ=JJJJJGJJGJGJG=C=8CC=GGGGCGGGGGGG=GCGGGGCG=C=CC=CGCCGGGGGGJCCCCGCGGCGGG8==CCCGCGGGGGCG1GGGGCCGGGGGGGCGGCGGGG +@gi|10010|ref|NC_10.1|-1/1 +TAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTGGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGG ++ +CC=CGC=GGGGGGGGJJJJCGJJCJGJJCJJJJJCGJCCJ=JCGJGJGJGGC=GCJGGJGGCGJGG=GGC8GGC8GGGGC=GCCGGGG=G=GGGGGCGG=J(GGCGGGGGGGGG8CCGGGGGGCCCGGGGGGGCGGCGG=GCGC=8CGGG +@gi|10011|ref|NC_11.1|-9/1 +AATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGGGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCATTGCAGATTACTC ++ +CC=GGGGCGGCGGJ1JJGJGJJJJGJJG1JGJGJJJGGJ8JJJCGGGCJCGGJ=CCJGCGCGGGJC=CGJCCGGCCCGGGGGCGGGGG8GCCGGGGGGGCJGCCGGGG=GGGGGGGC=GC1GGGGGGCCGGGCGGGCGGCG88CGGG=GC +@gi|10011|ref|NC_11.1|-7/1 +AAGTCAAATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCCAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGA ++ +CCC1GGGGGGGGGJJ1JCGJJCJJJJGJJJJGGGGGJJJJGCG=JG=JGGJJGGGCCJJGGGJGGCGGCGGCGGGCJGCCGGCCCGGGGC=GG8CGCGGGJCGCC1GCGGGCGGGGCG=G=CGGCGCGGCGCGGCGGGGGCCCGGGGCCG +@gi|10011|ref|NC_11.1|-5/1 +CAAACAATGCACTTCCGGCGGCTATTCGAGGGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGG ++ +CCCCCCGCGGGGGJJJJJJJJJJJJJGJJJ=JJGJCGGGJJGJGJJJJGGGJCJGGCGG1=GG8J=CCCGGJGGGG1CCCGGCGCGCGCGGCCGC=GGCCJG=GGGCGGGGG=GCCGGGGCGGGG=GGGGGCGGGCCGG=GCGG=GGGGC +@gi|10011|ref|NC_11.1|-3/1 +GTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGT ++ +CCCGGGGGGGG=GJCJJJGJJJJCJJJJJJJJGGJGJGJGJGGJJGGJJGGJJJJGGJGJCGGGGCGGGGG8GGGC=GGGGCGGGG8=CGGGGGGGGG=G1CGGGGGGGGC1GGGGCGGG8GG8=GGGCGGCGGGGCGCGGGGGGCCGGG +@gi|10011|ref|NC_11.1|-1/1 +GGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGGGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAACGGTCACTCTTGTGTCTATCTCGCATTCA ++ +CCCGGGGGGGGGGGJGJJJJJGGJG8JJGJJJJJGJ8JJJGJJJJGJJCGGCJGGJGGGJGGGGCGGGGCCGJGCGGJG8G=CCGGCC=GGGCC=GGGGCJGGGGGGGGGGCGGG1GGCGC(GGCGGGGG=CGG=CGGGGCC8GGGGCCG +@gi|10012|ref|NC_12.1|-9/1 +CGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGTCTATCTCGCATTA ++ +CC=GCGG=GGGGGJGJJGJJJJJ8G=JJJGJJJ=JJJJJGJGJJ=JJGGJJJJJJCJGJGJJ=CJGGGGCGCGGGGGGGGCGGCG=GGGCGGCGGCGCGGJC=GGGCCCCGGGCGGC8CGGGGGG=GCGCCGGGGGCCGGCG=CGGG8GC +@gi|10012|ref|NC_12.1|-7/1 +CAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTTAATGCGAGATAGACACCAGAGTCACCCTCTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTCTC ++ +CC8GGGGGGGGGGJJJJJJJ1GJJJJJCJJ1GGCJJJJCGJJCJJJGGJJGGCGGCCGCGCCJGGGCGGG=GCCGG=GCGGGCGGGGCGCG=GGGGGGGGJC=CGG1GCCGG1GGGCG1CGGCCCGGCCGGC1GCGGCGGGGGGGCGG8G +@gi|10012|ref|NC_12.1|-5/1 +ATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTA ++ +C=1=1GGGGGGGCGJJCJJGGGJJJJCJJJJJJJJJJJJJCJJCGJ(GGJGJJJJJJ8GGGCCCC8GGGCGGCGGCGGGGCGCCGC8GCGCGGG(GGGGGJGC8G=GGGGG=GGCGCCGGGGCGCCCG=GGGG1GGCCGGCCGCCC8CGC +@gi|10012|ref|NC_12.1|-3/1 +GTAGTAGGCGTAGATTTGTCTGCGAACAGCCACAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAA ++ +CCCGCGGGGGGGGJJJGCGJJJJJJJ=JJJCJJJJJGJJJGJCJJCGJGJGGGGGGJGJJ=CGJGCGJ1GGG8J=G8GCCCGGGGCCGGCG(CGGGGGGGJCGGG=CGGGGGCGCCGGCGGGGGCGGG=GGGCGGGGGCCGCGGGC=GGC +@gi|10012|ref|NC_12.1|-1/1 +ATCTCGCATTAAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCCGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGT ++ +CCCGGG=GGGGGGCJJJGJJCGJJJJJJJJJGJJGGJGCGGJGJ8GGGJGGJGJGJGGGJCJ=GGCGGGJJGGCGGGGCCCG1GCGGGG1CGGGGGGCCCCCGGGGG=CCGGGGGGGGGGGGGGC=CGGGGGGGGC=GCCGGGCCGCGCG +@gi|10013|ref|NC_13.1|-9/1 +CGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAG ++ +CCC8CGGGGGGGGJJGCGGJJGJJGGJJJJG(J=CJJJJJGGGGGCJGGJJJ=GGCGJCCGGCGGCCJCGGGCGJGGGGCCGGGGGGCCGGGCGCGGCG(==CCGGGGGGGC=GGGCCCG8GGCCGGGGGGGGGCCGGCGCCCC=GCC=C +@gi|10013|ref|NC_13.1|-7/1 +GACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGTTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGC ++ +CCCGGGGGGGGGGGJJJJ1GJ8JJGJJJJJJG=CGGJJJCJJGGCJ=JJ=CC=J8GGJC8GGGGGGGCGCGJCGCCGC=CCCCCGGGC=GGGGGGGCCGGJGCGGG=CGCG=CGG=CCGCGGGGGGGGGGCCGGCCCCGCC=CGGGGCG1 +@gi|10013|ref|NC_13.1|-5/1 +AAGTAAAGGGTGACTCTGGTGTCAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACAT ++ +C1CCGGGGGGGGGGJ(GJJJG=GC8JCJJJJGJGJGJJGJGJGGCCGGGGJJG=JGJJJJGJ(G8GGJC(GGJGG=JCGGGCCGCGC=GCGGCGGCCGCGJGGCGGGGCGGG8CGC=CCGGCCGGCGCGGGGGGC=CGGGGGCGG=GGCC +@gi|10013|ref|NC_13.1|-3/1 +GCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATTGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTC ++ +=CCGGGGGGGGG=GCJJJJ1JJGGJCGJGJJJGJJJJJGGGGJGJCJGJGGGGJJJJJG=JJGJJCGG18GCCGGGGG=GGGCCCGG1GC=G8GGGGCCGJ=1GGGCG8GGGGGCCGGCGCCCCGG=G8CCGGCGCGC=GGGGGCGG8GG +@gi|10013|ref|NC_13.1|-1/1 +CAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGTTACGTTTTGTT ++ +=C=GGGGGGCG=GJJJGJ8JJGJJJJCGJJJJJJJGGJJGJGGJJJJCGJJJJGCJJCJGJGCGGG=GCGJGCCGCGCC=GGGGGGGC=CGGGGGGGGCGJCCGC=GCC8CCGG1CGGGGGCGG=C=GGCGCGGGGGGGCGCGG=GGGGC +@gi|10014|ref|NC_14.1|-9/1 +ATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTA ++ +CCCGG=GGCGGGGJJJJG8JJJJCJ8JJGJJGJCGJCGGGJ8CGJJJJJJJGGGG=GCGGJ=GGCJCCCGGCGGGGCCC1C=CGCGGGG1GG=GGCGGGGJGGGCGGGGGGGGGC=GGGCGGGGCGGGGCGCCGCGGGGGCCGG=G=GGG +@gi|10014|ref|NC_14.1|-7/1 +TACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGCCGCGTGAGTTGGTACGTTATGTTACAACTTATCAAGAGTAAGCAAGTATGTTTCGCCCAATTCA ++ +CCCGGGGGGGGGGJJJ=GJGGGGJGJGJGJGJJGGGJ1JGGJJCJJJJJGJ=JCGJJGGJCJGJ1G=GGJCJGJ=GGGGGGGGCG(CCGGCGGGGGGGGCJC=GGCCCCGGGCCGGC==GGCGCGGGGCCGGGGGCGGGGGCGGGGGGGG +@gi|10014|ref|NC_14.1|-5/1 +TCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCGGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGC ++ +CCCGGGGGGGGGGJGJJJCJJJJGJJGCCJGGJJJJJGJGJC=JJGGJCGGJC8JGJJG=GGGCGJJJCGCGGCGGGCGGGGCGGCCG=GGGGCGGCGGCJ==GCGCGGGGGGGCGGGG=GGGGGGGGGGC=GCGCGG1GGGGGGGGGCC +@gi|10014|ref|NC_14.1|-3/1 +ACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAATTGGTACGTTATGTTACAACTTATCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTCTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATG ++ +CCC=GCGGGGGG8JJJJJJJGJJJCJJGJJ1GJJGJJJJJJJCJJCJJJCC=(JJ=JGGJGGCGJGCGGGGCGG8=GGGGGGGGGCGGGCGGGG=GCCCGJC=C8CCGGG8GGG=GGGGGG=1GG==CCC==GCCGCGGGGGGCGGGGCC +@gi|10014|ref|NC_14.1|-1/1 +GCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACT ++ +C1CGGGGGGGGGGJJGJ=JJG=JJGJJJGJJ1JJGJJCJGJGJJJGJGG=GGCJGGJCCJJJ(GGG=GGGGGGCGGGGGGGCGGGC=GCCGCCGGGGGCCJGGC=GGGGGCGGCCGCCCGGGGGCGGGC1CGGGCGC=GCGGGGG8C=CC +@gi|10015|ref|NC_15.1|-9/1 +GTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAG ++ +CCCGGGGGGGGGGGCJGCJJJJJJGCJGJGGJGGJJJCJCJGGGGGJGGJJJGJJJJGJGJCGCGCCJGGCGGG==G=GGGGGCCGG=CGGCGGCCGGCCCC=GGCGCCGCGG=C=G=CGGG=GCGGCGGGGCGGGGGG8CCGCC=CGCC +@gi|10015|ref|NC_15.1|-7/1 +ATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCCCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCATTACTT ++ +CCCGGGGGCGGGGCJJJCJCCGJJGJJJJGGGGGGJJJJJJGGGGJGGJJGJCGGGGGJ1GGGJGGGJGCG(GGC8CGCGGGGGGGCCCCCGGGGGCGGGJCGGGGGCGGGGCGGCCGGGGGCCCCCGGCCGGGGGGGGGCGGGCGGGCC +@gi|10015|ref|NC_15.1|-5/1 +GACCATACTACAAGAATACCAACCCATTAAATTTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGC ++ +CCCGGGGGGGGGGGGJGJJGJJJJJGCJGJJJGJCJJJJGGGJJGJJJCGJ8JJJGJJJ(GJGGGGGCGCGJGGGGGGGGG=GGGGCGGCGGGCCC=GGGCGGGGCGCGG=GGGGGGGCGG1GGCGCCGG=GCGGGGGCGGCCGCGCGGG +@gi|10015|ref|NC_15.1|-3/1 +GATGACCATACTACAAGAATACCAACCCATTAAATTTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTT ++ +8CCGGGGGGCGCGJJGJJJJJGJCJGGJJJGJGGGJJJ1JJJJGGGJCCG8JJGJCJCGGJJJCCC=G8=GJJGCCGGGGCGG1GGCGGGGGGGGGGGGG=CG8GCGG=GCGGCGCGGGGGGGCGGCGGGCGCCGGGGCGGGGCGCGGGC +@gi|10015|ref|NC_15.1|-1/1 +TAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGAAATTTCATGGGT ++ +CCCG=GGGGGCCGCJJJCGJCJJJJJJGGJJGGJJGJJJGGJGGJJCJJGCJGGGG8=JGJGGJG=CGGGJGGCCCGGGGGGGCGGGGGGGGGCGGCGGGJCGGGCCGGGCGGGGCGGGGGGCGGGG8GCGCGGCG8C1GCCC1G(CCG8 +@gi|10016|ref|NC_16.1|-9/1 +TGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACC ++ +=CCCCGGGCGGGGJGJCGJJJJJJJJJJJJJJGGJJJJGJJJJGGJJGCJCJJJJJCJ=JGGG1GGGCJGGJGGCGCCCGGCGG=CGCCG=G(GGGGCGGJGGCCC8GCCCCGGGGGCG=GGGGGGGG=GGGCCCGGCCGGCG=GGGG=C +@gi|10016|ref|NC_16.1|-7/1 +CTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCT ++ +CCCG8GGGGGGGGJJJJJJJGGGJ8JGJGJJGJJJJJGJJJGGJJJCGGJGJGG8G=GGGCCGJGCGCGCGCGJGCGGGCGCGCGGGGC8CGCGCGGG=GJGGCGGGGGGGGGGGGC8GG=GGCGG=GGC1CGGGGGGCCGCGGGCCGCG +@gi|10016|ref|NC_16.1|-5/1 +CCAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACCGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCA ++ +CCC1GGGCGGCGGGGJJGJJJJJJJJJCJJCGJJGJJJJGJ(JJCJGJJCJG(G=JJJJGGJJ=GCJGCJGCJGC8=GG=GGGGCGGCGCGCCGCG=CGCCCG1GG=GG1GCGGCCCGGGGCG==GG=CGGCGGCGGCCCC=GCGGCGCG +@gi|10016|ref|NC_16.1|-3/1 +GCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGTGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCC ++ +CCCGGG=GGGGGGJJGJJJJGJ1JJJJGJ=GJJJCGJGGJJJJJGCCJJGJJCJJGGJJ=JJGGJJGGJCGJJCGCCJCGG=GGGG=GCGG=GGGGCGCCJG=GGG1CGCCGC1GCGGGGGG8GGGGGGGCCGCGGCGGC=GGCGGGGGC +@gi|10016|ref|NC_16.1|-1/1 +CAAACAATGCACTTCCGGGGGCCTTTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGG ++ +CCCGGGGGGGGCGJCJJCGGJJJJJJJJCJJ1JJJGJJJGJJGJGGJ=GJGJJJGGGCJGJGGGJGGG1G1GCCCGGGG=GG=CGGGCGCGGGCGGGGGGJCGCGCGGG=GGGGGGC(GGCCGGCCGGGCCGGCGGGCGCCGGCCCGCCC +@gi|10017|ref|NC_17.1|-9/1 +CTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAA ++ +CCCGGGGGGGGGGCCJJGJJJGJJGGJJGCJJGGGJJGGJJGJGGJCG==JJGGGGGCCJGJGGCJCJCGGGGGCCGCGGGG=GGGCGGGGGC8GGGGG=C=GCG=G1G1GGGGGG=G=GGGGGGCCGGGGCGCCGGGGGGGGGGCGGC= +@gi|10017|ref|NC_17.1|-7/1 +CCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAATTGTAACAAAACGTACCAACTCACGCGA ++ +CCC=GGGG(GG1G1JJ8GJJGJJJGJGGGJGGJGGJJCJJJJGJJJGJ1GGG=1CGJGGCCJGJJGGGGGGGGCCCCGCCGGGGGGCG=GGCGGGGGGG=CCCGGGGCCCCGGGGCGGGGG=G=GGGCGCGGGGCGGCGGCCG1CCCGCG +@gi|10017|ref|NC_17.1|-5/1 +AGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTTATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATGGGATGCGAGATAGAAACCAGAGTCACCCTTTA ++ +CCCCGGGGGGGGG=JJJGJJGCJJJJGJCJJJCJJ(JGGJJCCJCJGJJ(G=CCJGG(GGJJCJGJJGCGGGG1CGGGCGGGGGGGC(GGGGGGG8GCGGJCC=CGG=GGG=CGGGCCGGGCGCGGCGCGCGGGGCGGCGCGGGGGCGGG +@gi|10017|ref|NC_17.1|-3/1 +GATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAATTGTAACAAAACGTACCAACTCACGCGACA ++ +CCCGGGGGGCGGGGCJGGGJGGJCJJGJ8JJJGJGGJJJGJJGGCGGGGG=JJGJGGCCGG1CJGCCG(GGGGGCGCGCGGGGGGC1GGCG=CG=GCC=CJCGGGCGGGGCGCGCC8CGGCCGGCGCGGCGGGGGGGG8CGCG=CGCCCG +@gi|10017|ref|NC_17.1|-1/1 +AAATGTCTACATTCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGA ++ +CC=GGGGGCGGGGGJJGGJJGJG1JJJJJJJGGJ=JGGJGCGGJGJCJGJGGGGGCGJJJGJ=GG=GJGCCGGG=GGCGGGGCCCGCGGCG8=GGGGCG1JGGGGGG8GCGG=GGGGCGCGCCGGGCGCGGGGGGCGCCCGCGG=GGG=G +@gi|10018|ref|NC_18.1|-9/1 +ATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAATTAAATCGAGTGTTAGCCGC ++ +C=CGGGGGGGGGGJGJJJJJJJJJ=JGJGJ8=JJJ=JGJGJCCGJGJJJJGGGC1GJGJCJJCGGGGCG=GCCGGGCGGJCCGGCGGCGGGGGGGCCCGGC1CCGG=GCGGGGCG=GGCGCGCGCGGC=GCCGGGCCG8GGGGCCGC8GG +@gi|10018|ref|NC_18.1|-7/1 +CGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTGGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGG ++ +CCCCGGGGGGGGGJJJJJGJGCJJJJJJJJGJGJJJCJGJGJCJJJJG==GGCGCGGJ8CGGJGGJGGGGJJG=G1GGCGGGGGCCGGGGGCGGGCGGGGCGCGGCGG=CCGG=GGCGC81GCGGCCCGGG=GGGGCGGCG1CG=CCCGG +@gi|10018|ref|NC_18.1|-5/1 +TCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTC ++ +CCCGGGGGGGGGGJJGJJCGJJJJJJJGJJJG8GJJJGJJJJGCGJGJJGJJJJJGGGGJC=CCGJG1CGGG=GGCGC8CG1CGGGGGCGCGGGGGGCGGJC=GCG=81CCGCG=GCGGGCGGG=GGGGGGC===GGCGGCGGGCGGGGG +@gi|10018|ref|NC_18.1|-3/1 +ATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAATTAAATCGAGTGTTAGCCGCATCTAGACATTTAATGAGTTGGTTTTCTTGTAGTA ++ +CC=GGGGGG8CGGJJJJ1JJJJJGGJJJG8J=JJJJCGJJGJJGGGJJGJJ=CGGGJJJJJGJG=GGCJCGCJCCCG=CGG=GGGGGCGCCGGCCGGGGGJCGGGCGGCGC==G8GGGGGGCG=GGGCGG=C8GGG=GCGGGGGCGCCGC +@gi|10018|ref|NC_18.1|-1/1 +GCTAACACTCGATTTAATTTGCCAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCC ++ +C1CGGGGGGGG1G1JJJ=GJJJGGJG(GJJJJJGJGJGJGGCGJCJJJJJGGGJJJJJCGGJ=CJGGG(8GGC8CCGGGGGG=8CGGCG8GGGGCGGGGGJCGGGC=CG8GGGGGGGGCCGG=CGCCCGGGGCGGGGGCGGGGCGC(GG= +@gi|10019|ref|NC_19.1|-9/1 +CAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAATGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCGGCATTGGATGCGAGATA ++ +CCCGGGGGGGGGCJGGGJGJJCJJJJJJJJJJJJCGJJJJGJGGGGGGGJ1CJGJGGGGJC=G8GGGCGC=GGCGJGCCGGC8GGGCCGGGGGGGGCGCGCCCGGCGCGCCGGCCGCCGGGGCCCG=GGCCGGGGCGGGGGG=G1CGCGC +@gi|10019|ref|NC_19.1|-7/1 +CAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATT ++ +C=CGGGGGGGGGGCJJJJJJGJJGJJJGJJJJJ(JJJGGJJGJGGJJGJG8GJJG=JGGGCGG8GJJCC=G8GGGJCCG=GGCCCGCG=GGCCGCGG=8GCGGGGCGGGGGCGGCGGGGGCGGGG=CGCCCCCCGGGGC1GGCGGGGCC= +@gi|10019|ref|NC_19.1|-5/1 +CGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCCGATTACTCCGATGA ++ +CCCGGGCGGGGGGJJJJGJJJJGGJJJ1JJG=JCJJCJGJGGGCJJ8GGJGGGJJ=GG=JCGJJGGGGGGGGCGJ=GGCGGGGGGC8GG=GCGG=GCGGGJGCGGGGCGCGCGG=GGCGGCCGGGGGGGGG8G=GCGGCGGCGGG8CCCG +@gi|10019|ref|NC_19.1|-3/1 +CAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCCGATTACTCCGATGAACGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCATTTCG ++ +CCCGGGGGGC1GGGGJGJJG=JJGJGJJJGCJJJJJJCJJGJGGJJJ=CGGCCJGGCJGJCGGCJGGJG=GGGGGGCGGG1GGCGGGGG(GGCC=CGGC=JCGCCGGGCCGCGCGG1CCGCGGGCGGGGGG8CGCGC8GCCGG88GG==G +@gi|10019|ref|NC_19.1|-1/1 +TTGTCTCCGAACAGCCCCAGCCCCCCCGTTTAGGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATAAAATGTCTACATGCGGCTAACACTGGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTG ++ +CCC=CGGGGGGGGJGJJJG8CJGGJJGGJJGJJ=JJGGGJJJJJCJGJGJJGGGJCGJGGGGC=GJJJCCGGG=GJ==GGGG(=CGCGGGGGCGGGCGGGJ=CGCGGC(GGGGGGGGGGGGGGGCGGGGGCGGGCGGGCGGC=GGGGGGG +@gi|10020|ref|NC_20.1|-9/1 +TATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATAATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACT ++ +CCCGCGGGGGGGG=JJJJGJGJJ=JGJCJJJJGGJGJCJJJ8GG=GJJCJGGCJGCCGGGCCGGGJG1JGGGG=GGCC=GCGGGG=GGGGCG1GGGCGGCJCGCCGGGGG1GCGCGGGCCGCGGGGC=GGCGG8C=GGGCGGCGGGGCCG +@gi|10020|ref|NC_20.1|-7/1 +ACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCA ++ +CCC=GGGGGGGGGJJJJJCGGGGCJJGJJGGJJJJJJJJJJGJJCJGJGGGGJGGGCJGGJGGGGGGCGC=JGGGGCCGGGGGGCGCCCGGCCGGGGC8CJ=CGGGGGC1GCGCGGG===G=C=G=GGGGGG=GGGGGGCGCCCGCGCC= +@gi|10020|ref|NC_20.1|-5/1 +GGTAGTAGGCGTAGATTTGTCTCCGAACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAGTACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAA ++ +CCCGGGGGCGGGGJJGJJJJJJGJG=JGJJJJJGJGGJJGJCJCJGGGJJJGJC=JCJ1GGJGJ8=CCGCG=CCCGGGJGCGG==GGCGGGGGCGGC8GGCC1CGCCCG=G=8CC=G1CGGCCGGCGGCGGGGGGGGGGG1CG=CGCCGC +@gi|10020|ref|NC_20.1|-3/1 +ACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACGGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAG ++ +CCCGGGGGGG1CGCJJJGJJJJGJJJGGJJJGGCJ8GGJGCJGJ1JJJ=GGJJJJGCCGJJGCGGCGGGC1GCCCCCGGCCG=GCG=GGGCGGGCCGG8GJ8GGCGGGGGGGCGGGGG1GCCC8GGCCGCCGGGCGGCC=GGGCGCGCGG +@gi|10020|ref|NC_20.1|-1/1 +TACAAGAGTACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACGGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCA ++ +CCCGGGGGGGGGGJJJJJJJGJJJ=GJJ8J=JJJGJGGJJGGJGJJCJGCGJJGCJGGGGCGJJ=CGCCCJGGGGCGGG1GGGGGGGCG=GGGGGGG=GGCCGGCCGGCGGCCG=GGG8GGGGCCGGG1GGC=GGCGGG8GGCGCCGGCG +@gi|10001|ref|NC_01.1|-9/2 +CCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATACAGA ++ +=C=GGG=GGGGGGJJJJJJGJJJJGJJJJJJJ==JJ1JJJGJJGGCCJ=JCGGJGGG(JGGJCGJGG(G1GJGCCCCCCGCGGGCG=GG81=GCGG8=GG=CCJJJCC(GGC=GGC1GGG81G1CCGCCGGGGCGCGGGGCC=GG(GGGG +@gi|10001|ref|NC_01.1|-7/2 +GCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATTGATCCCTC ++ +CCCCCGGGGGGG(JGJCJJJJJJC(1JJGGGJCJJGJJJ1GGGGJJCJGJJGGGGGGGJGJCCJG==GG8GCCGG=CG8GGGCG8GCCCCGCGC=CCCG=C=CJJJJCGGC=GGGGCCGGGGGC=GCC=1CCCGGCGGGCCGCGCCCGGC +@gi|10001|ref|NC_01.1|-5/2 +AACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCG ++ +C8CGGGGGGGCCGG1JCJJJGGGCCJJJJGJJG=J1GGGJJJJJJJ1JJJGCCJCGGCGGGGJJGCJGGCGGGGGGGGGCGGG=GGCCG=GCGCCGGGG8C1C=JCJ1CCGCCGCGGGGGGCG=GGG=GGCCGGC8G1=CCGCGCG=8CG +@gi|10001|ref|NC_01.1|-3/2 +ACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCA ++ +=CCCCGGGGGGGGJGJJ8GJJ1JJJCJJJCJCGJGJGJGJJJJJGJC=JGGGGCGJG=JCCGGGCGJGGG8G8G=GGCGCGGGGCGGGGGGGC=CGGCG=GGJJCJ8GGG=CG1CGGG=1CGGGGG1G=GGGGCCGGCGGGG8GGC=C8C +@gi|10001|ref|NC_01.1|-1/2 +GTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTA ++ +CC=GGGCGGGCGGJJGJCJJGGJ1JCJJJJJGJJJJJCJGJJJ8JJGGJJ=C=GCGGGCGG(J(JJGGJCJG=GGGGJCCGCCGGGGGCGGGGGGGG8CGC=CC(JCGGGGGGCGGGCCGGCGGGGGG8GCCGGGGG8CGG=CGCGC=1C +@gi|10002|ref|NC_02.1|-9/2 +ACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGA ++ +CCCGGGGGGGGCG=JCJJGJJJJJ=JJJ1J1JJG8GJGJJGGJGGGGJJJGGGC8GJGGJCGCJGCJGGJ(GCGCG=GC1CGGGGGCG1GCCGGGGGGCCG==CJCJGGCGCGCGGGC=GGC=GG=CG8CG1G(CCC1GCCGGG=CGGGC +@gi|10002|ref|NC_02.1|-7/2 +GAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAAT ++ +CCCGGGGGGGGCC1JGJJJ=JJJGJGJCJGJJJ1JJJGJJGGJGGGG==JCJJJ8GJJJJCGGGGJGGCCGCGGGGJCG1=GGGGGGGGCGGGGG=GCGGCCCJJJJG1CGCCG=GGGGC==CGGGGGGGGCCG=CGGCG8=GCGGCCGG +@gi|10002|ref|NC_02.1|-5/2 +TGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAA ++ +CC1CGGGGGCGGGJGGJJJJCJJJGJJGJ=J=JJCJGJGGJGCJJG1J8GJC=GGC=GGGG(G1GJGJGJ=G=CGJGGGG8GG=GCCGGGGCGCCCC1CGGCJJJ1J1GGGGC=GGCGG8GCCCGGGGG1GGCCGGCGGGCGCGGGGGGC +@gi|10002|ref|NC_02.1|-3/2 +TTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAG ++ +CCCGGGGGGGGGG=J==JJJJ1GGCJJGJJJJGGGJGJGJCGJGGJGGCGGJGGJGCGGJCCGGGGJGCCCGGCGJGCG=GGGGCGCGGGCC=GGGCGCGGCJ=JJ=GGGGCCCGGG=CG1G==CGGGCGCG==GGG(CGGGGGCGGCGG +@gi|10002|ref|NC_02.1|-1/2 +GAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTT ++ +CCC=GGGGGGGGGJCJJCGJGJJGGGJJJJCGJJJGCJCGGJCJGCJCGJJGGGJJGJC=CGGJ=JCCGGG1JGGCGGGGCCGGGCG18G=C8GC=GGCCGGCJC8CGGGGGGCGGG=GGGGGG=C1GGCGG(=CCCG=GCGC8CGGG=G +@gi|10003|ref|NC_03.1|-9/2 +CCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATTGATCCCTCT ++ +CCCGGGGG=GGGGJG1JJCJJGJJJJJGJG1JGJCJG=GGJJ1CGGCGJGJJGJGGGJGGCGGGGG8GGCJGCJGGCGJCGGCGGGG1GGCGCGCG=CGGGC8JCJJGCG8CCCGGGGGGC=CCGG8GGGCGGGGC=GGCGGGGCGCCC= +@gi|10003|ref|NC_03.1|-7/2 +CATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCG ++ +C=CGCGGGGGGGGJGJJCG1JJJGGJGGGJGJJJG8GJJJJGC8=GJJGCJJJGCCGGGGCGGCCGCGCJ=GGCGGGGGG8CGG1GGG=C=CGGGG=GCCGCCJJCCG1GG=CCGCCGGCGGGGC8GCGGG=GGGCGCCG=1GCCGCGG1 +@gi|10003|ref|NC_03.1|-5/2 +CAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACGTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGT ++ +CCCCGGGGGGGGGCGJGJGJJJ1GJJJGGCJJJJCGJGJGGGGJGJJJCJGJJGJG=JGGJGGGCGGGGG8=GGG=JGG8CCGGGGCGGGCGCGGGCGC8=C=JJJJGCGGG=GGGCGGGGCCGGCC=GGCGGCGGCG=GGGGGCCGG=C +@gi|10003|ref|NC_03.1|-3/2 +CTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACG ++ +CCCGGCGGGGGGGJGJJJJGJCJJGJJJGGCJGJGCJJJJGGGJGJGGJGGGGCJJ=JGC=GGC==GG=JGGGG=GGGJGGCGCGG8GGCGGGG=GGGGGG81JJCJGGG8GG1GGGGCGC8G8GGGCCGGGGGCGGGCGGCCCC1GCG8 +@gi|10003|ref|NC_03.1|-1/2 +CTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAATATGACTTA ++ +=CCGGGGGGGGGGJG1GGJGJJCGJGJJC1GCJJGGJCJJJJGGJGGJGGJCJCGJGJCGJGGGCJJGJGGGGJC=CGGG8G1G=GGGC1G=GGGGCGGCGCJCCCJGGGC1GCGGGGG1GGCCCGGGCG1CG1GGC=CCGGCGGCCGGC +@gi|10004|ref|NC_04.1|-9/2 +TTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGGTAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAG ++ +CCCGGGGGGGGG=JJJJJJJJGJJGJCGCJJJJJGJJGJJJ=GJJGJJGGGGGGCJJGJJGGJCG(GCGGGCGJ=CCGCGCGGGCGGGCCGG8GGGGGGCGCCCCJJGCGCCGG8GGGGCGGGGGGGCG=CGCGGGCGCCCCG=CGGCCG +@gi|10004|ref|NC_04.1|-7/2 +GGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAG ++ +CC8GGGGGGGGGGGJJJJJGJ=GCJJCJJCJJJJGGGJJJCJJJCJGJGJCJ8GJGG=J=CJG=GJCJGJGJJGG8CGGGGGGGCGGGGGGGGCGGCCGGC=J=JJCGCGGGGC=CGG=CGGCG==GCCGCGGCGCGGCCGGGG=G8GG= +@gi|10004|ref|NC_04.1|-5/2 +GTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTGGGTTAAGTCATATTGGCCACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAA ++ +8CCGGGGGGG=G1GGJJJJJCJJGCJJ1JJCJJJCJJJJJGJJGCJGJJCJJCG=CCJC=JG=GCC=GCGGGGC=GCCGGGGGG=GGCG=CGCGGGGGCGGGCCCJJGGGCCCGGCGGGCGGGCCGGGC=GGGCC=G=GGGGGGGGCCGG +@gi|10004|ref|NC_04.1|-3/2 +GAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTGGGTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGT ++ +CC=GGGGGGCG1GCJ=GC1GJCGCJGGJCJJJJJJCGJGJJCJGCJGGJGJGGJJ1CJGCGGCGGJGGGGGCGCG8G8GGGCCGCGGCGGGC=GGGGCCC=GJJ=CJGGGGGGGGGGGCGGGGGGG=GGGG=CGGCGGCCCGC=GC8GGC +@gi|10004|ref|NC_04.1|-1/2 +TTGCTTAGTAATGAAAGTCACAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGT ++ +CC=GG=GGGGGGGGJJG=J1JGGJGCGJGGJGJJJGJJGJG=JJGJGJJ=JGGJ=GJGCGGCGG=GJ=J=GCCGCGCGC=GGGGGGCGGGGGGCGGGGGGGCJ=(8JCGGCCGGCCCGGCCGG1GGGGGGCGCCGCCCGGCGC1GCC=GC +@gi|10005|ref|NC_05.1|-9/2 +AGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACT ++ +CCCGGGGGCGGGGGJJJJGJJJJJGJJJJJJJJJJGCGJGJGJGJ=JGJJGGGJJJJJJGGGG=GCCGG=GJGGCCGGGJG1CCGC=GGGCCGGGG1GGGGCCCJJC8C=8==CCGGGGCCGGGGCGGGC=GGGG=1C=CCCGCCGCGCG +@gi|10005|ref|NC_05.1|-7/2 +TACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAG ++ +C8CG=GGGGG=GGJGJ1JGJJCGJCJ8GG=JJCGJCJGGGJGJJCJGCGJJCJ=GJJGGJG8GGG8JJGCCCGG=GGG1=CGCCGGCGGGG=GCGGCGGG=(JJJJJGGGGGGG=GGCC1GG=C=1CCGGGGGGGGGCC=G8GCGGGGCG +@gi|10005|ref|NC_05.1|-5/2 +AGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAG ++ +CCC1CGGGGGCGGG1JGGJJJJJJGJJJGJJJGJJJJJJ=CJJJCJGGCJGJGCCJC8GGGGGJ8CGG=CGCCG=CGCGG=CCGGGGGGGGCCCCGGG1GGCJ=CJJGCCCCGGGCGCGGCGGG1GGGCGCCG1CGGGCC1GGGGCGGGC +@gi|10005|ref|NC_05.1|-3/2 +TTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATGACTTAAACCAGGGG ++ +=CCGGGGGGGGGGJ1JJGJGJJJGJJJJGJGGJGJGGJJCJGJ1JJJJJJJGCGJ=CJGGC1GJGGCG=JGGCCGJGGGGCG=GGGGCGCGGCCGGGG=GGGC=JJ=GGGCGCGC=GGC=CG8C=GGGG1GGGGGGGGGGGGCGCGGGG= +@gi|10005|ref|NC_05.1|-1/2 +TGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATC ++ +1CCGGGGGCGGGGCJGJGJCJJJJG=JJ=JJCJJJJJJJJJGJJJJG1CJCGGGCJGJJGGGJG1G8JCGGG1GGGGGGGGGC=CC=1CGGGCGCCGCCGGGJJJCJ=8=GGG8GCGGCGGGGGCGCCCC==CGGGGCGGGGGCGCGCGC +@gi|10006|ref|NC_06.1|-9/2 +AAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTT ++ +CCCGCC=GGGGGGCGJJJGJJ1CJJJGJGGCJJJJJJJGJGCJJCJJCJJJGJ(GGJGGCJGCJCGGGGJGG=CGC8GCGCCCCCGGC=CG11GGCGGG1GCJJJJJGCCCGGGGGCG=GGGGGGC==GG=GCG1GGGCG=GCGG=CGGC +@gi|10006|ref|NC_06.1|-7/2 +AAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTC ++ +CCCGGGGGGGGGGCJJGJJJJGJJJJGJJ=J=JG1CJGJGGJJCCJCGGGJ8JJJJCJGG1GGGGGG=CCGGCCGCJGGCCGCG=CGCCCGCGGG=GC=GGC8JJJJGGGCGGGG=GCGGGC=1=G=GCC(GG8CGGCC=GGGGGCGGGC +@gi|10006|ref|NC_06.1|-5/2 +CAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACGCTTTGT ++ +CCCGCCGGCGGGGJ=JJGGJJG=GJJJJJ1JGGJJGJCJJJJ=CJCJJG(J(CJG=GCGGCGGGCGGJGGGCGGGG1C=CCGCCGGC=G=GCGCGGGCGC=CJJJ=CGCCGG1=GGG=GGC=GGCCG(GCCGGGGCGGGCGGG(GCGCC= +@gi|10006|ref|NC_06.1|-3/2 +TCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACT ++ +CCCGGGGGGGGGGJJJGGJJGGGCJJJJGJJJJGJJJGJJCJGGGGJJGJGJJJJJCGGGJGJGGJ1GCGCGCGGGGGCJGGGCGG=G=G=GC8GGGGG=CGCJ=CCGGGGGGGGG=GGCGGCGGGGGCCGGGCCGGGGGCCGGGGGGGC +@gi|10006|ref|NC_06.1|-1/2 +GAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCACCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGT ++ +CCCGGCGGGGGGGJGGJJGGGGJJJJJJGJCJJGJJGJCJGJJJGC=C=GGGGJJCGGGGGJGG=8JGJCJGGGCG(CG8GGG=GCCCGGGGGCGGCGCCG=JJJJCG8G1GGGGGGGGGGCGGGGGGGG8GGGCGCGGCCGGGCGC=GG +@gi|10007|ref|NC_07.1|-9/2 +TACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCAC ++ +=CCCCGG1GGGGGJGJJJGGJGJ1JG1GJCJJGJCGGJGCCJCJGGGC8JGJ(GJGGJJGJGGCGGGGCJGCGGCGCGGCC88CCGGGGGCGCGGG===C(GJCJJ8GGGCGGCCCCG8GGCCGGGC=GCCGC1CGGCGG8CCGGG1GC8 +@gi|10007|ref|NC_07.1|-7/2 +TTTAAGTCATATTTGCTACGGTGACCCTACTACTAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCA ++ +CC1GGGGGGGGGCJJJ1JJGGJGGJCJ=GGJCJ(JJJGJ=GJJGJJ=JGJJJJ=CGCCGGGGGJGCGGGGGGCCCG8CGGCGGGGGGGG1GCG=CCCGGGC1JCCJCGCGGCGGC1CGG1GCCCCGGGC1GG1CGC=CGC=GGGCGGCGC +@gi|10007|ref|NC_07.1|-5/2 +TCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGT ++ +CCCGGGGGGGGGGGJJJGCJGCJJ1JJJJJJJCCCJGJGGGJGGCGJCGJ=JJJCJGJG=GCJGG8GGGJJJGCCGGG8CG=(GGCGCCCCGGCGGC=GGGCJJJJJGGGGGGGCGGGG===GGCGGGGCCGGCCG=G=GGGGCGGGGGG +@gi|10007|ref|NC_07.1|-3/2 +GCTAGAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGGTGACCTTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGGAGTGA ++ +CCCGGGGGGGGGGJGJJJJJGJGJJCGGJGJJGGGJCGGJJ1JJGGGCJGJ=JJJJ=JC1C8GGG=CGGG==GJGCC=C(CG=CGGCGGCG(GGGGGGC=CCC1C1CG=GGCCGG=CGG=CG=GCGGGCGGGCCGGC1GCGGGG1CGCCG +@gi|10007|ref|NC_07.1|-1/2 +AGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTT ++ +CC=1GGGGGGGGGGGJGGJGJJJGJJGGJGGJGGJ=JCJJGJGG=GCGJJGGJ=JGJJJJG8CGGGGCJCJGCCGCGGGGGGGG=G=GGGGGGGGCG8GGCCJJJJJCCCGCCGGGCGCGGGGGGCGCCCGGGG=C8GCGC=GGGG81CC +@gi|10008|ref|NC_08.1|-9/2 +CAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCT ++ +=CCGGGGGGGGGGJGJJGCGJGJJJGJJCCGGJJJGJJJGGJGGGGCGJGJ=GGGGGJGG8GJGGGJGCGJGGCJCGCGGGCG8G8GC1GG8GGGGGGGGGGC=CCCGCGGG8GGGGCG1CG88GGGCGGGGCG1G=CGC=GCCGGGGCG +@gi|10008|ref|NC_08.1|-7/2 +GTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTG ++ +C=1GGGGGGGGGGJJJJJJGGCJJJC8GJJJJJJJCGJJJJJJGJJJGGGGJGJG==GG==GGGGCGGJGCG=CGGCG8GCGCGGGGGGCGGGCC8G(GGCGJJCJCGCCCGGGCCCGGGCGCGGGGCGGGGGGCGCGCGGGGGGCGCCG +@gi|10008|ref|NC_08.1|-5/2 +TCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGC ++ +CCCGGGGCGGGCGGJ1JJJ=GGJJJJ8CJJJGJJJCGGGGJJGJ8CJJCJ(G8J1JG8JCGGG=CCCGGGGC=G(=JG8=G8GGGGCCG=GCG=CGCCCGGG=JJ=JGGGGCGCGGGG=C=GGGGGGGGGGGCGC1GCGG=C81GGGCGG +@gi|10008|ref|NC_08.1|-3/2 +TCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTC ++ +CCCGGGGGCCGGGGGCGJGGJJJJJJJCGJCJJJGJ8JGCJJJGGJGGJJJJCCGCCCCGJGCGCGJC=CGGCG=GGGGC=GGCGCG=GGGG=GGC1GG=8GJCJ8JCGGGGCGGCGCGC1GG=CGCCCGGCGGGCGCGGGCGCGGGGG8 +@gi|10008|ref|NC_08.1|-1/2 +CCAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGT ++ +CCCGGCGGG1GCGJJGJJGJJGG1JJ=CJJJGJJJGJGJJGGJCJGGGCGGJJGJCGJGJGC=GGGGGGCC8GC=GGG1G1CGGGG1CCCCGGCCCGGGG8GCJCJCG=CG=G=GGGGGGCGGGCG1GGGCGGGCGCGGGGCGGCGCGC= +@gi|10009|ref|NC_09.1|-9/2 +GAAGTGACACTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGA ++ +CCCGCGGGG(=GGGGJGGJJJJJJJG(JJJJJJJJJJCCGCG=GJGJJGJC=G8JJCJJ=JGGCGGGCGGGG1GGGGGCGGGGGGCGGCGGG=GCGGGGCGCJJJJCGGGG=GGGGGGC8CG1CCCG==CCGGCGC8GGGCGGCCGGCC1 +@gi|10009|ref|NC_09.1|-7/2 +TAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGT ++ +CCCGCGGGGGGGGJGJG=GGJGGGJJGJGJJJGCGJJGJGJGJJGJ=GJGJJG88G1GJGJJ=CJGGGGG(GGGGCGJGGCGCGGCCGG1GGGGGGCGG18CCJCC8GCGCG=GCG=G1GGGGCGG88CG8CCG=CGC=GGGCGCGGCCC +@gi|10009|ref|NC_09.1|-5/2 +CGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGG ++ +CCCGGGGGCGCGGJJJJJGJCJJG8J=GJJGJGJJJJGGJJGGGGJJJJCGCJCGJ=GCJGGGG1GGGGGGCJJGCJC=GGGGCGCCGGGGGG=GGG8GCG=CJ8J1GC=GGGG1=CCGGGCGGCCGGCCC=CGGGGGCGGCCGCCGCGC +@gi|10009|ref|NC_09.1|-3/2 +CCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGAACCATAGATCCCTCT ++ +CCC1GGGGG=GGGGGJJJGG=GJJCGGGJGJGJCGJJJJGJC=JGG=CJGG8JGGJGJJCJGGGGGGGGGGCGJGGCGGCCCCGGGGGGGGGCGGGG8GGGC=JCJJGGGGG=CGGCGCGCGGGGCGCCGCGCCGCCCGGGGG1CGGCC= +@gi|10009|ref|NC_09.1|-1/2 +AGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAAC ++ +CCCGGGGGGGGG=JJJJJGJJGJJJJCJCJGJJJGJGJJGGGJGJJCJGGG=J8GGJJGJGJG=GGJJ8GCCC8GJGGGCC(C188GGG=GCGCGGGCGGCCJCJJCGC8GG8CG=C1G1=GCGGGGGC1CCGCGCGGG=1GG=G1CG8= +@gi|10010|ref|NC_10.1|-9/2 +AGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCTTGGGTTAAGTCATATTGGCTAAGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTA ++ +CCCGGGGGGGGGGJGJ=JJJGJC1G1CJ1JJGCJJJJGGJJCJJJJGJJGGJJJJ=G==GGJ=GGJGGCGJGGGGGCCG1G=JCGGGC=G1GC=CGGG8=C1JJJJJGG(GC=GGGGGC1GCGGCGCCCGCGG1C=GG1=GGCGGGGGGG +@gi|10010|ref|NC_10.1|-7/2 +CCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTGTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCT ++ +1CC=G1GGGGGCCJJCJGJJG1JJJGCJJJCJCGJJGJCJJCJJJJJCJGC=G8JJGJJJG1GJGJGGGGGCGCJCGGGG=GGCGGC8CCGGCCGG==GGG(CJCCCCGGGGCGGGCGGG8GCCCCGCCGCGGCGGGG=CG1C8GGGGGG +@gi|10010|ref|NC_10.1|-5/2 +GTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACCAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAG ++ +CCCGGGGGCGGGGJGJJJGJGGJJGCGJJJCGJJJJJJJJGCJCJJJGJ8JCGJJGJCJGJ8JGGG=GG8CJCG=GGGCGGCGCGGGGGGCGGC==GCGGCCCCCJC(GGGGG=GGGCG=GGGG1GGGG1GGG1G1GG8CGGGCGGCCGG +@gi|10010|ref|NC_10.1|-3/2 +TATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATGCA ++ +CCCGGGGGGGGGGJGJJGCGJJJGJGJGC=CGJGGJGJGJJGJ(1CJGGJGGGGGJG==JCC1JGCG(C(GGGGGGGCG=GCCGGGCC8G=CG==CCGCGG(CCJJJC=GC=GGGGGGGGCGGG1GCCGGGGG81CGGCGCGG8CGGGCC +@gi|10010|ref|NC_10.1|-1/2 +GCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACCAAACGTACCA ++ +CCCGGGCGGGGGGGJCG1JJCJJCJ=CCJCJGJGCJJCCJ1JJG=JGGJGCJ=CJJCJ8G(JJG=88GCCCCGCGJGGCGCGCCGCCGCCGCCGGCGG=G1C1JJJJGGCCGGG=GGGGGGC=GGGCCGCCGGCGCGGGCGGGGG=CCGG +@gi|10011|ref|NC_11.1|-9/2 +TGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCAATGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCA ++ +CCCGGGGG=GGGGJGJGGGJJJJJJJJJJJJCJJJJJGJJGJGJGGGJJJJGGGJGCJJ8GGCGG8GGG==G=CGGGCCGGGGCGGGGGGGGCGCGGG=CGC=JJJCCGGGGGG=CCGGGGGGGGGCGGG(CG=GCGCGGGCCGGCG8CG +@gi|10011|ref|NC_11.1|-7/2 +TCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTGGCCGCATGTAGACATTT ++ +CCCGGG1GGGGGGCJ=JGJJCGJGJJJJJJ8JGJGJJGJ=JGGJJJJGCGJCGGGGCC1=C8JGJ==GCJGCGGG8GCCGGC1CGGGGGG=GGGCG=CCGCGJJJCJCGCCCCCG=GC8CC=G8=CGCGGGGGCGCCCGGGG=GCGGGGG +@gi|10011|ref|NC_11.1|-5/2 +ACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCCTTGTTTCGCTCATCGGAGTAATTTGCAATGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTG ++ +8=CGGGGGGGGGGJJGJJGCJJGG=JJJJJGJJJJGJJGG=C(JJGJCGCC8JGGJ(CGJJJJGGGCJCGGGGGJCGJC=CG=G=GG=GGGGCGCGGGGGGG=JC=JC==CCGGCGG=GGGGGGGCC=GGGGGG=C8GG=GGCGCGCCG1 +@gi|10011|ref|NC_11.1|-3/2 +CTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAA ++ +CCCGGGGGGGGGGJJJJJJJJJGGJJJJJGJC=JGJJGGJJJJGGCJJJGJGCJJGJGCGGGGGGJGCCJGJGGCGG=GGCGGGCG=C=CGGGCGGC=GG8CCJ(JJGGCC=GCGCGGCGG8GGCCCGCGCCCG1CCG=GGC8GCGGCC= +@gi|10011|ref|NC_11.1|-1/2 +ATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCAATGAATGCGAGATAGACACAAGAGACACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCCCTCGAATAGCC ++ +CCCGGGGGGGGG1GJCJJJGGCJGJGJGJJGGGJJGJGCJJJJJJG=G8JJG=CGJGGCCCGGGCGGCGGJGGGGGCG(CGGGGG8GCGGGG1=CCGGGCGCJC8JJC8GCGGGCCGGGCGCGGCCGG=C8GGGCGC=GCGC1CGCGGCC +@gi|10012|ref|NC_12.1|-9/2 +GGCGTATACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTTAATGCGAGATAGACACCAGAGTCACCCTCTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTCTCGAATAGCCGCCGGA ++ +1CCGG1GGGGGGGJJJJGJJJCJJGJJGJGJJGJ(CJJJ8GGJGJJJJJJCGJ8CCGGGCJJGGGGGGC=GGG=GGGGCGGCG=CCCGGGCGGG=CGGGGGCJ=CJJ8CGGGGGGCG1CGGGGCCGG1=GGGGGGCGCCCGGGGCGCGGG +@gi|10012|ref|NC_12.1|-7/2 +GGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGTCTATCTCGCATTAA ++ +=CCGGGCCGGGGGJJJGJCJGJJJJGGJGGJGGJJGGJGCCJJJGJGJJGJCCG8GJCG=GCGG8=JGJGGJC=GG=GGGCCGGGGCGCGGG1GC=GCGGG8JJCCJCCC1GCGCGGGG=CGGCGCCGGGGCGGC=GGC8=CGGCGG=CG +@gi|10012|ref|NC_12.1|-5/2 +TGCGAACAGCCACAGCCCCCTGGTTTAAGTCATATTGGATACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTG ++ +CCCGGGGGGGGGG=JJ8JJJJGCGJGJJJGJJJCJCGG(GJGJGGGJGJJGGJGGGJCJGGJJJJG1CG8GGGCJ1GGGCGGGGCC==GGGGGGGG=GGGGCCCJJC=GGG=G=GCCGCGGGGGCGCGG=CCGGG=CCCG=GGGGGC=GC +@gi|10012|ref|NC_12.1|-3/2 +TACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGC ++ +CC=GGGGCGGG=GJJJJGJJJGCGGJGGJJJGGJJJJCJGGGGJ8=GJGGGJGGJJGCJC8GGCJGGJGCCGGGGGGG==8GGGCGCGGCGGGC8GGGG==GJJCC=C=CCCGGG=G=GCGGC==CGGG=GGGGGCGGC=GGCGG(GCCC +@gi|10012|ref|NC_12.1|-1/2 +AAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCGGAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTT ++ +CCC=GGCGGGG8G=JJGJGJCJJJJJJJJJJJJJ1JGCGJJJJJJGJ18JJCGCJGGGGC=(=GJCCCCGGJCCG1GG8GCGGGGGCG(GG=GGGGGGG=GC=JJJJCGGGGGGGGCGGCGGCGCG=CGGCCGC=GCGGGGGCGCGGGGG +@gi|10013|ref|NC_13.1|-9/2 +GAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATTGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGA ++ +C=CGG=CG1GGGG=JCJGJJCGJGJJ1J1CGJGGGJGJJGGGGGGJGJJGG8JJCCGC8GGGGGGJ8GGG(G1CCCGG8GCCGGCC=GG=CGGGCGGCG=GCJJJ=CGGCGGGGGGGGGC1GGGG=CGCGGCGCCGGGCC1=CGG=GGGG +@gi|10013|ref|NC_13.1|-7/2 +AATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATC ++ +CCCGGGGGGGGGGJJJJJCG1JGGJGJJJJJ1GGGJJ(JGJJGGGJGJJGCJJG8JJJGCGJCJCGGGGGC1GGGGCGGCCGGCCG=GGGCGCCCGGGCCGCCJJJJCGCCGGCC=CC==GCG(GGGGGGGC=CCGGG8GGCGGGGGG=1 +@gi|10013|ref|NC_13.1|-5/2 +GTCGCGTGAGTTGTTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCG ++ +CCCGGGCGGGGGGG=(GGJJJJJGGGGCJGJJJJJCGJ1CGJJJJJJG=JCJCJ=GJGGJGJGGJGGCGGGGG8GGGCGG81GCGGGG=8CGGGGGGCCG8CCJJJ=CCG=CGGGGG=CGCGCCCG1GGCG1G=GCGGGCGGGGG=C=CC +@gi|10013|ref|NC_13.1|-3/2 +GAGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCAATCTCGCATTCAATGCAGATTACTCCGATGAGC ++ +C=CGGGGG1GGGGGGJJ=JJJJCJCJJJJGJJGJJJC1(CGJGCJJJGJJJGGJ8GGGGJGJJ=GGGGGGGGGJ1CGGGCGGGCGGGGCCGGCG1GGGCCGGCJJCCGCGGGCCCGC8GCCCCGGCCG=C8GCCG=GGGGGCGGGGGGGC +@gi|10013|ref|NC_13.1|-1/2 +TGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGG ++ +CCCGGGGGGGGGGGJG=JJ=CJGJGGGG=GJJJGCGGJJJJGJCGGGGGCGGJGCJJG8JGGGGCGJCCGJ8GGC8JGGCGGGCGGCCCCCGGGG=GGGGGCJJJ=JGG==CGGGGGGGG==G=1GCGGGGCGGGGGGGCGGGGG=CCGG +@gi|10014|ref|NC_14.1|-9/2 +GTAGCCGTAGATTTGTCTGCGAACAGCCCCAGCCACCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGC ++ +CCCG1GGGGGG1GGJJJJJJGJJJJJJJJCJGGJGGJJGJGJJJJGJJ1CGGJCJG8GCGGGJJG=GJJGJCCG8=CGGGG=GGCGCCCGGGCGGGGCGGGCJJJJ=GGGCGGGGCGGGGCCGCGC1GGGGCCGCGCCG=CGGCGCGGGG +@gi|10014|ref|NC_14.1|-7/2 +CACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCA ++ +CCCGGGGGGGGGGGGGJCJJ(GCJ=J1JGJGJJ1GGJJJ=J=JJGCJJCJJCGJJGGJJGC=GJ=GCJGG1CGG==G=G=(GCCGGGGGGGGGGGGCCGGGG=JJCJGCGGCGGGCGCCGGGGGGCCGGGGGGGCCGGGCGCGCG=CGCC +@gi|10014|ref|NC_14.1|-5/2 +TCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCCCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCCGACCCATAGATC ++ +CCCGGGCGGGCGGJJG1JJ1JJJJJJJJGJGJGGGJJGGCJGJJGGJGCJGJGJJGJGGGCGCGJGGGGJGGCC1GGGGGGCGGCCCCGGGGCGGCG=GGGCJCJJJGGGGCCCCGGCGCG8CCGGCGGC=GGGCGGCCG=GGC==GGGC +@gi|10014|ref|NC_14.1|-3/2 +ATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTAC ++ +8CC=GCCGGGCGGJJJJJGJJJJGJGJJJJJJGJJ8J1JGGJJJJGGGCJCJCGCJJJGGGCGJ8GJ1GGGGG=GGG=GGG1GGGGGGCCC=GGGGCGGGGGCJJCCCC1G=8CGGGGCC=CGG=C=CCGG=GGGCGGG8GG=G8=G8GG +@gi|10014|ref|NC_14.1|-1/2 +TTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTATCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTCTCTGCTACTGCC ++ +CCCGGCGG=GGGCGGGCJGJJCGJJJJJCG=J(JGJ8GJCJGGJGJJGGGCGGG=JGJJG=JC=GGGGGGGCGCJCJJGCCGGGCGGGGG(GCCGGGGGG8CJJJJCGG=188GGCCGGGGGG8CCGGGGCGCGGGGGGGGGGGCCCGGC +@gi|10015|ref|NC_15.1|-9/2 +AATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGG ++ +CCCGGGGGGGG=GJJCCJJCJGGJJJJJGJGCJCC=GGGJJJJJ=JJCCJGGGGJJJGGC8GGGJGGCGJCGG=CGGCGGCCCGGCCGGGGGGGGCC1GG(GCJJJCGGCCCCGGG=(CGGGGCGCGGCCGC=CGG(CGGGGCGGGGGG= +@gi|10015|ref|NC_15.1|-7/2 +TATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGTAGTAATGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGT ++ +CC1GGGGGGGGGGGJJJG=JGG1CJCJJJJJJCJJJ8CGJG(JJGGJ8JJGJJGJGGG(=GGGJGCGGCGCGGCGGGCGCGCCGGCGCGGGG=GCG=GGCCCJJJJJ=GGGG=GGGGGCGCGCGGGGGGGGGCGGGGGGCC=GCGGCGCC +@gi|10015|ref|NC_15.1|-5/2 +CCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTA ++ +CCCGGGGGGGCGGJJGGJCJJJJJG1JJJ==JJJJJJGGJJG(GJ(JCJJJGGJCJGJGCJGCG8JCGCGCGGCCGCGGCGGGGCGCGGGCC=GGCGGGGCGCCCJJGGGGGCC(G8CGG8GGCCGCGGGCGGGG1GGGCCCGGCGCCCG +@gi|10015|ref|NC_15.1|-3/2 +GACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCAT ++ +CCCGGGGGC(GGGJJGJCCJJJGJGJJJJJGJJJJGGG1GGJJJCJCJJGGGCGJGGGCJCGJ=8JGCCGGGJGG=CGGG=GCCGCGGGGGGGG1GGGGGGCCJ==8CGGC8GGC=G=GC=GCGGCCGG1GGGCGGCGGCC=C=GGCCG= +@gi|10015|ref|NC_15.1|-1/2 +ACCTGGTTTAAGTCATATTGGCTACGATGACCATACTACAAGAATACCAACCCATTAAATTTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAAC ++ +CCCGGGGGGGGGGCGJJGJJJCJCCJCGJJGGJJCJGJJJ=GJGGC=JGJGCJCG8GGGGJJGGGGC=GCGGGG(GG=GCGCCGCGGGGCCGCGGGGG(8GGJCJJCGGGGG=1GGGGCGGGGGG1G=GG=GGG8CCCGG=CCCCGGGCG +@gi|10016|ref|NC_16.1|-9/2 +GCGGCCAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACCGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCAT ++ +CCCGGGGGCG=GGGJJJGGJ=JG(JCJJJJJJJGJJJGJ8JJJJJGJGJJGJJGJGJG8GGJGJCCJ=GG=GJGCGJGCGGGCC=GGGGGGGGGGGCG8GG=CJJJJGCGCGCGCCCC=G=GGGGCGGGGGG==GGGGGGCGGGCCGGG8 +@gi|10016|ref|NC_16.1|-7/2 +AAGTCCCAAAGGCTACCGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGTGCGTGTACTTAGCCCACT ++ +CCCCGC1GGGGGGJGJJJJ1JJCJJJJJJGJJJG=JG=GJJGGGCJJJGGJGGJCJGCGCGCCCGCGGGJGGCGCCGG==CGGGGGGGG=GCCGGGCCGGC1CCJJJGCGGGGCCG(GGGGGCCGG=GGGCCGGGGCCGGCGCGGGGGCC +@gi|10016|ref|NC_16.1|-5/2 +TCTGGGTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGATAAGTACACGCACTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCCGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTT ++ +CCCGG1GGCGGGCJJG1GJGJJGJCJJGJJCJC=JJ=GJGGJJG=CJ=GGGJGGJGGG8JCCGGCCGCGGGJ==GCCCCGGGGCCC8GG=GGCC(CCGGGGGJJ=CJCGG1GGGGGGGC1GGGC=GGCG=CG==GCC=GG=GCGGCCGCC +@gi|10016|ref|NC_16.1|-3/2 +GGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCTTTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAG ++ +CCCGGGGGGGGGGJJJJCJJGJGJJCCJJJJJJJGJGJJJCJGJGGGCG8JJJJGJJGCG==GJGGGJGGGGGCCGGG=CGGGGGCCGGG=GGGGCCCCGG(=CCCCG=CCG11CGCGCCGC1GGGGCGGGGGGGCCCCCGCCGCG=GGC +@gi|10016|ref|NC_16.1|-1/2 +AATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGTGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTG ++ +=CCGGGGCGGGGGJGJJJJJJJJJCCJJGJJJJJJJGJJJJGJJGJ=JGJCGJC1JCJGGJGJCJG8CG8=CCGGGGGCCGGGCGGCGGGCCCCGC8GGCGCCJJCJCCGGGCG=GCCGGG=CCCGCCCGGGCCGGGC1GCGGGCGGGGC +@gi|10017|ref|NC_17.1|-9/2 +TGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTTATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAA ++ +CCCGG1GGG(GGGCGJJJGJJGJGJGJJJJJGGCGJCG8JGGGGGJ8JGGJJGJ8CCGG=CGCGGGJGGGCCCGGGCGGG8CCG=CGCGCC=GGG==CGGCG(CCJJGCC8GGCGGGG=G8GGG1CG=GGGGCGGGGG1G==CGGCGC8G +@gi|10017|ref|NC_17.1|-7/2 +ATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTTATGCCAAGATGCATTG ++ +CCCG=GGGCGGGGJGJJJ1JGGJJGJGJJJGJJJJJJ=GJJJGJJJGCGGJJJGGCGJGJGJGJJJGGJGGJGGGGCGGGCCCGGG88GGG8GC1C=GG1GCJCJCJCCCGCCCC=CCCGGGGGCGGCGGCG8C=GGGCGG=8CCCCGGC +@gi|10017|ref|NC_17.1|-5/2 +AGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGG ++ +CC=CG1GCGCGGGJGJJJJJJJJJCJGGG=8JJCCJC=JJCGGCJJJJGGGJ8GGCJGGCGGCGGCJGCGCGGGGG(GCCGGCGCG8CCGGGGGCCGGG8G=CCJ=JGGCGGC8GGGGGGGCGGGGGGGGG=8=CGCCCGGCCGCGCGC= +@gi|10017|ref|NC_17.1|-3/2 +CATTCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGT ++ +CCC=GGGCGGGGCGCJJJJJGJJJJCJJGJJJGJJG=JGJJJGJJJGGGC=JG=GCCGJGGGCG=CJ=GC8GGGGG(GCG1C8GCGGGGGGCGCG8CGCG8=CJJJJGGG=G1CCCGGGGGGGGGGGCGGCGGGGGCC=G=CG=CGG8GC +@gi|10017|ref|NC_17.1|-1/2 +CAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAATTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAAT ++ +CCCG=CGGGGGGCGJJJJJJJJJJGJJCJJ1JGGJGGGJJJJJJCJGJCGGJJGJCGGC1JGGCGGGJGJGGGGCJCGGGJGGGCC=GGCGGGCGCGG=GGGCJJCJGGCCGCGGGG(CCCGGGGCCGCCGC=GGGCGCC1GCCG=GG=8 +@gi|10018|ref|NC_18.1|-9/2 +GGTGACCCTAATACAAGAAAACCAACTCATTAAATGTCTAGATGCGGCTAACACTCGATTTAATTTGCCAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTT ++ +CCCGGGGGGG8GGJJJJJGJCJJJJJJCJGJGGJCJGJJJJJJGJG=JJGC8JJ8JGJJGCGGGCJCGGGGCGCGGGGGCGCCGGCGGCCGGGGG=GCG=(GCC8JJGGGGGG=CGGGCGC=CGG18=CGCCGCGGCCGG=GGGGCGC81 +@gi|10018|ref|NC_18.1|-7/2 +AAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTG ++ +CCCGGCGGGGGGGJJJJJGJJJJCGJGJ=JGGGJJJJJJJGGJJJGGGJGCJGJJJJCGJJG=CCJJ8GG1JGGGCJCGGGGGCGG=GGGCG=CGCCGCG1CJJ=JJ=GCGGCGCGGCCCGGGGGGC=CG8CGGGGGG=GC=GCGGCCGC +@gi|10018|ref|NC_18.1|-5/2 +AAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTT ++ +CCCGGGGGGGCGCG=GJJG8JJJJJJGGGJGGJJCJCGCCCC=GGCGJCJJ=GJGGGJGJGJCGGG8GGCGGCGGGGCGJ1G8CCGGGGCGC=GGGCGCGGGC=JJJGGCGGGG=GCGGGGCG=CGGCCGCCGGCCCGGGCGGGCGGCGC +@gi|10018|ref|NC_18.1|-3/2 +ACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGGTGACCCTACTACAAGAAAACCAACTCATTAAATGTCTAGATGCGGCTAACACTCGATTTAATTTGCCAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACG ++ +CCCGGGGGGGGGGJCJJGGJJJJ=JJJJJJ8JJJGJGJJGGGJJCJGJJJJGGCG8JCJJGJGGG(JGGG1G(GG=GCGC1GGGGGGGGGCCCGGGGGCGGCCCJCJCGCGGGG1GCGCG8CCCCG=CGCGCGCGGCCGGGGCGCCCGGC +@gi|10018|ref|NC_18.1|-1/2 +GAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCT ++ +CCCGGGGGGGGGGJJGJGJJJJJJJJJJCJJJJJGGGG=CGJGJGCCGGJC=GJGJGGGGCCGJJGCCCCCGJGGGGGGCGGCGGGGCGCGGGCGG8GGC8GJJJCCGGGGGG=GC1CGGGCGCGCGG=CGCCGCGGGCGGCCCCGGC=8 +@gi|10019|ref|NC_19.1|-9/2 +ACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCCGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCATTTCGGCAGTA ++ +CC=GGGGGGCGGGGJJJJGC=CJCJJGJJJGJGGJJJ(JCCJCJ1JGJCJJGJGGGGJ=GGC1JGGGJGC=GJCGJG1G=GCCGGGG=GGCCCCCGGGGGGG=JJJJGGCGCGCGCGGGCGGGGCGG=CCGC=GGGCGG8CGCGGCCGGC +@gi|10019|ref|NC_19.1|-7/2 +AGCCCCCCCGTTTAGGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATAAAATGTCTACATGCGGCTAACACTGGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTA ++ +CC=GGGG=CGGGGJJGGJJGJJJ(JJJ1JJCJJGGJJCJ1CCGGJJJJJJCGC=GGG(GCGJJGJ=CJCGJCJCGG1GGG=GCCGGGCGGGCCCGCC1CGG=JJCJJCGG1GCCCGG=1GGGGGGGC(GCGGGCGGGG=GGCGCCG1CG8 +@gi|10019|ref|NC_19.1|-5/2 +GAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCGGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTAGCTACGGCCGTGATCTGACCCATTGATCCCTCTCGAATAGCCCCC ++ +CCCGGGGGG=GCCCGJGJJJJJJGCJGGJJJJJJGJGJGJJJJ1JJJGGGJJJGG(=JCJC=GCGJJJJGGGCGGCGGC8CC8CGCCG=GGCGCGCCGCCGG==CJCGGGG8GGCGCGGCGCGGGCG1GGCGGGG=CGG=GGCCGGGGGC +@gi|10019|ref|NC_19.1|-3/2 +GCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAATGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCGGCCTTGGATGCGAGATAGAAACCAGAGTCACCCT ++ +CCCGGGGGGGGGGJG8CJ=1JGJJGJGJJJJJJGCGGGJJC(JJJCJJJJJGCGJGCCCJGJGC8GGCCCCCCJ8G=GGGGC=GGGCGCGGGGCGG1GGCGGC8JCJGCGG8GGGCGC(GGGGC=GG=GGCGCGGCGGCCCGCGGC=CG8 +@gi|10019|ref|NC_19.1|-1/2 +GCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTC ++ +CCCGGG=CGGGGGJGCJJJJ=GJCGJJJGJJJJGCCGGGJJJJJGJJJJJGJCGJGCGCGCJG=CGCGGGGGCCCG=CCG(GCCG=G=GGGGGGGCCCG(GCCJJJJGGGCGGG8GGGGGGCCGCCGGGGGC81CGGGCGGGCCCCG1CG +@gi|10020|ref|NC_20.1|-9/2 +GGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATTATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAA ++ +C81GGGGGGGGG(CGCGJ1JJJJCJ=JGGJCJJCGJJJJJJJJJ(C=GJ=CJJJJGJGJ=CCGCC8JCGGJGCGGCJGGGGGCGC=CCCCCGCGGGGGGGGCCCJCJGGGCGGGCGGGC=GGGGGGGG1CCGGGCCGGGGGGC=GG1C8= +@gi|10020|ref|NC_20.1|-7/2 +TTGCTAGAGTTGGTAGTAGGCGTAGATTTGTCTCCGAACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAGTACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGT ++ +CCCGCGG1CGGGC=JCJJJJJGJJGGJGJGGJGJJGGGGGGG8GGJGC=CJGGGGGGGJ=GGJJJGC1GGCGGCC=JGC((=GCG=GCGGCCCC=CGCC=GCJJJCJG=1G==CGGGCGGCC=GGCG1G1=CG8GCCGGCGGGG1GCCCC +@gi|10020|ref|NC_20.1|-5/2 +TTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCAAATATG ++ +CCCGG==GGGGGGGCJJJJJJJJGJJ1JJJ=JJJGJJGJJJGJGJCJG=CJJGGJ1GJGCJGGCG=C(G1G=CGGGGCCGCCCGGCGGCGCGCGCGGCGCCGCJCJJGGGGCGGCGG=G=GGGGGCCGGGGGGCGCG8=GGGCGCCCCCC +@gi|10020|ref|NC_20.1|-3/2 +ATCGAAACAAAGAGTCGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAG ++ +C1CG=GGG=GGGGCJ1JJGCJC=GGJJGJGGJJGJJGJJGCJGJJGJGJ1JGCJJGGGJCJJGCCCGGJGG==CJGGGCGGG1GGGG=CCCGC=CGCGGCCCCCJCCC=GGGGGGCGCCGGGGGG8CGG==CGGGGGGGGGGGGGGCCGC +@gi|10020|ref|NC_20.1|-1/2 +GCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACT ++ +C=CGGGGGGGGG=JJJCJJ=JJJ(GJGJJJJJJ=JJJJCGGJJJJJGJGGJGJGGCGG=J=GJGJGGGCGGGGGCCGGGGCGCG=G=GGCG=GGGGCCCGG1CCJJJ=CG1G(CG=GGGGGCGGGCCGGGCCGGG=1CCCGCGGGCG=GC From bb80f5b67355f916f47f621367281198fba7507f Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Wed, 28 Jul 2021 20:05:27 +0200 Subject: [PATCH 05/17] rebase --- metagraph/src/common/utils/string_utils.cpp | 2 +- metagraph/src/graph/annotated_dbg.cpp | 28 --------------------- metagraph/src/graph/annotated_dbg.hpp | 4 --- 3 files changed, 1 insertion(+), 33 deletions(-) diff --git a/metagraph/src/common/utils/string_utils.cpp b/metagraph/src/common/utils/string_utils.cpp index cc25f06ad5..8e801baf78 100644 --- a/metagraph/src/common/utils/string_utils.cpp +++ b/metagraph/src/common/utils/string_utils.cpp @@ -11,7 +11,7 @@ bool starts_with(const std::string &str, const std::string &prefix) { if (prefix.size() > str.size()) { return false; } - return prefix == str.substr(0, static_cast(prefix.size())); + return prefix == std::string_view(str).substr(0, prefix.size()); } bool ends_with(const std::string &str, const std::string &suffix) { diff --git a/metagraph/src/graph/annotated_dbg.cpp b/metagraph/src/graph/annotated_dbg.cpp index a324c673c1..5db5ff76aa 100644 --- a/metagraph/src/graph/annotated_dbg.cpp +++ b/metagraph/src/graph/annotated_dbg.cpp @@ -776,34 +776,6 @@ ::call_annotated_nodes(const Label &label, ); } -void AnnotatedDBG::call_annotated_rows(const std::vector &rows, - std::function callback_cell, - std::function callback_row) const { - assert(check_compatibility()); - - auto unique_matrix_rows = annotator_->get_matrix().get_rows(rows); - - //TODO make sure that this function works even if we have duplications in 'rows'. Then, delete this error catch. - if (rows.size() != unique_matrix_rows.size()) { - throw std::runtime_error("The current 'call_annotated_rows' call contains duplication."); - } - - if (unique_matrix_rows.size() >= std::numeric_limits::max()) { - throw std::runtime_error( - folly::to("The current 'call_annotated_rows' call has returned, ", unique_matrix_rows.size(), - "rows. The maximum number of rows that can be returned is ", - std::numeric_limits::max(), - ". Please reduce the query batch size")); - } - const auto &label_encoder = annotator_->get_label_encoder(); - for (auto row : unique_matrix_rows) { - for (auto cell : row) { - callback_cell(label_encoder.decode(cell)); - } - callback_row(); - } -} - bool AnnotatedSequenceGraph::check_compatibility() const { return graph_->max_index() == annotator_->num_objects(); } diff --git a/metagraph/src/graph/annotated_dbg.hpp b/metagraph/src/graph/annotated_dbg.hpp index bd87eb85cd..56e0dfa747 100644 --- a/metagraph/src/graph/annotated_dbg.hpp +++ b/metagraph/src/graph/annotated_dbg.hpp @@ -156,10 +156,6 @@ class AnnotatedDBG : public AnnotatedSequenceGraph { int32_t match_score = 1, int32_t mismatch_score = 2) const; - void call_annotated_rows(const std::vector &rows, - std::function callback_cell, - std::function callback_row) const; - private: DeBruijnGraph &dbg_; }; From d9a98619cfd816f63e3fb90a4c0258ac69e105d7 Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Fri, 30 Jul 2021 21:45:47 +0200 Subject: [PATCH 06/17] taxonomic classification skeleton with the RMQ preprocessing Signed-off-by: Radu Muntean --- .../annotation/taxonomy/label_to_taxid.cpp | 95 +++++++++ .../annotation/taxonomy/tax_classifier.cpp | 199 ++++++++++++++++++ .../annotation/taxonomy/tax_classifier.hpp | 191 +++++++++++++++++ .../annotation/taxonomy/test_taxonomy.cpp | 99 +++++++++ 4 files changed, 584 insertions(+) create mode 100644 metagraph/src/annotation/taxonomy/label_to_taxid.cpp create mode 100644 metagraph/src/annotation/taxonomy/tax_classifier.cpp create mode 100644 metagraph/src/annotation/taxonomy/tax_classifier.hpp create mode 100644 metagraph/tests/annotation/taxonomy/test_taxonomy.cpp diff --git a/metagraph/src/annotation/taxonomy/label_to_taxid.cpp b/metagraph/src/annotation/taxonomy/label_to_taxid.cpp new file mode 100644 index 0000000000..b3649baf1e --- /dev/null +++ b/metagraph/src/annotation/taxonomy/label_to_taxid.cpp @@ -0,0 +1,95 @@ +#include "tax_classifier.hpp" + +#include "common/utils/string_utils.hpp" + +#include "common/logger.hpp" + +namespace mtg { +namespace annot { + +using mtg::common::logger; + +void TaxonomyBase::assign_label_type(const std::string &label, bool *require_accversion_to_taxid_map) { + if (!utils::starts_with(label, ">gi|")) { + // e.g. >gi|1070643132|ref|NC_031224.1| Arthrobacter phage Mudcat, complete genome + this->label_type = GEN_BANK; + *require_accversion_to_taxid_map = true; + } else if (!utils::starts_with(label, ">kraken:")) { + // e.g. >kraken:taxid|2016032|NC_047834.1 Alteromonas virus vB_AspP-H4/4, complete genome + this->label_type = KRAKEN; + *require_accversion_to_taxid_map = false; + } else { + logger->error("Can't determine the type of the given label {}. Please make sure that the labels are in a recognized format.", label); + std::exit(1); + } +} + +bool TaxonomyBase::get_taxid_from_label(const std::string &label, TaxId *taxid) const { + if (this->label_type == KRAKEN) { + *taxid = static_cast(std::stoull(utils::split_string(label, "|")[1])); + return true; + } else if (TaxonomyBase::label_type == GEN_BANK) { + std::string acc_version = this->get_accession_version_from_label(label); + if (not this->accversion_to_taxid_map.count(acc_version)) { + return false; + } + *taxid = this->accversion_to_taxid_map.at(acc_version); + return true; + } + + logger->error("Run get_taxid_from_label() for unknown label {}.", label); + std::exit(1); +} + +std::string TaxonomyBase::get_accession_version_from_label(const std::string &label) const { + if (this->label_type == KRAKEN) { + return utils::split_string(utils::split_string(label, "|")[2], " ")[0]; + } else if (this->label_type == GEN_BANK) { + return utils::split_string(label, "|")[3];; + } + + logger->error("Run get_accession_version_from_label() for unknown label {}.", label); + std::exit(1); +} + +// TODO improve this by parsing the compressed ".gz" version (or use https://github.com/pmenzel/taxonomy-tools) +void TaxonomyBase::read_accversion_to_taxid_map(const std::string &filepath, + const graph::AnnotatedDBG *anno_matrix = NULL) { + std::ifstream f(filepath); + if (!f.good()) { + logger->error("Failed to open accession to taxid map table {}", filepath); + exit(1); + } + + std::string line; + getline(f, line); + if (!utils::starts_with(line, "accession\taccession.version\ttaxid\t")) { + logger->error("The accession to taxid map table is not in the standard (*.accession2taxid) format {}.", filepath); + exit(1); + } + + tsl::hopscotch_set input_accessions; + if (anno_matrix != NULL) { + for (const std::string &accversion : anno_matrix->get_annotation().get_all_labels()) { + input_accessions.insert(accversion); + } + } + + while (getline(f, line)) { + if (line == "") { + logger->error("The accession to taxid map table contains empty lines. Please make sure that this file was not manually modified {}.", filepath); + exit(1); + } + std::vector parts = utils::split_string(line, "\t"); + if (parts.size() <= 2) { + logger->error("The accession to taxid map table contains incomplete lines. Please make sure that this file was not manually modified {}.", filepath); + exit(1); + } + if (input_accessions.size() == 0 || input_accessions.count(parts[1])) { + this->accversion_to_taxid_map[parts[1]] = static_cast(std::stoull(parts[2])); + } + } +} + +} // namespace annot +} // namespace mtg diff --git a/metagraph/src/annotation/taxonomy/tax_classifier.cpp b/metagraph/src/annotation/taxonomy/tax_classifier.cpp new file mode 100644 index 0000000000..14ec972fdb --- /dev/null +++ b/metagraph/src/annotation/taxonomy/tax_classifier.cpp @@ -0,0 +1,199 @@ +#include "tax_classifier.hpp" + +#include +#include +#include + +#include "annotation/representation/annotation_matrix/annotation_matrix.hpp" +#include "common/unix_tools.hpp" + +#include "common/logger.hpp" + +namespace mtg { +namespace annot { + +using mtg::common::logger; + +TaxonomyClsAnno::TaxonomyClsAnno(const graph::AnnotatedDBG &anno, + const double lca_coverage_rate, + const double kmers_discovery_rate, + const std::string &tax_tree_filepath, + const std::string &label_taxid_map_filepath) : _anno_matrix(&anno) { + _lca_coverage_rate = lca_coverage_rate; + _kmers_discovery_rate = kmers_discovery_rate; + + if (!std::filesystem::exists(tax_tree_filepath)) { + logger->error("Can't open taxonomic tree file {}.", tax_tree_filepath); + std::exit(1); + } + + bool require_accversion_to_taxid_map = false; + assign_label_type(_anno_matrix->get_annotation().get_all_labels()[0], &require_accversion_to_taxid_map); + + Timer timer; + if (require_accversion_to_taxid_map) { + logger->trace("Parsing label_taxid_map file.."); + read_accversion_to_taxid_map(label_taxid_map_filepath, _anno_matrix); + logger->trace("Finished label_taxid_map file in {}s", timer.elapsed()); + } + + timer.reset(); + logger->trace("Parsing taxonomic tree.."); + ChildrenList tree; + read_tree(tax_tree_filepath, &tree); + logger->trace("Finished taxonomic tree read in {}s.", timer.elapsed()); + + timer.reset(); + logger->trace("Calculating tree statistics.."); + std::vector tree_linearization; + dfs_statistics(root_node, tree, &tree_linearization); + logger->trace("Finished tree statistics calculation in {}s.", timer.elapsed()); + + timer.reset(); + logger->trace("Starting rmq preprocessing.."); + rmq_preprocessing(tree_linearization); + logger->trace("Finished rmq preprocessing in {}s.", timer.elapsed()); +} + +void TaxonomyClsAnno::read_tree(const std::string &tax_tree_filepath, + ChildrenList *tree) { + std::ifstream f(tax_tree_filepath); + if (!f.good()) { + logger->error("Failed to open Taxonomic Tree file {}.", tax_tree_filepath); + exit(1); + } + + std::string line; + tsl::hopscotch_map full_parents_list; + while (getline(f, line)) { + if (line == "") { + logger->error("The Taxonomic Tree file contains empty lines. Please make sure that this file was not manually modified: {}.", + tax_tree_filepath); + exit(1); + } + std::vector parts = utils::split_string(line, "\t"); + if (parts.size() <= 2) { + logger->error("The Taxonomic tree filepath contains incomplete lines. Please make sure that this file was not manually modified: {}.", + tax_tree_filepath); + exit(1); + } + uint32_t act = static_cast(std::stoull(parts[0])); + uint32_t parent = static_cast(std::stoull(parts[2])); + full_parents_list[act] = parent; + this->node_parent[act] = parent; + } + + std::vector relevant_taxids; + // 'considered_relevant_taxids' is used to make sure that there are no duplications in 'relevant_taxids'. + tsl::hopscotch_set considered_relevant_taxids; + + if (this->accversion_to_taxid_map.size()) { + // Store only the taxonomic nodes that exists in the annotation matrix. + for (const pair &it : this->accversion_to_taxid_map) { + relevant_taxids.push_back(it.second); + considered_relevant_taxids.insert(it.second); + } + } else { + // If 'this->accversion_to_taxid_map' is empty, store the entire taxonomic tree. + for (auto it : full_parents_list) { + relevant_taxids.push_back(it.first); + considered_relevant_taxids.insert(it.first); + } + } + assert(relevant_taxids.size()); + + uint64_t num_taxid_failed = 0; // num_taxid_failed is used for logging only. + for (uint32_t i = 0; i < relevant_taxids.size(); ++i) { + const TaxId taxid = relevant_taxids[i]; + if (!full_parents_list.count(taxid)) { + num_taxid_failed += 1; + continue; + } + + if (considered_relevant_taxids.find(full_parents_list[taxid]) == considered_relevant_taxids.end()) { + relevant_taxids.push_back(full_parents_list[taxid]); + considered_relevant_taxids.insert(full_parents_list[taxid]); + } + + // Check if the current taxid is the root. + if (taxid == full_parents_list[taxid]) { + this->root_node = taxid; + } + } + if (num_taxid_failed) { + logger->warn("During the tax_tree_filepath {} parsing, {} taxids were not found out of {} evaluations.", + tax_tree_filepath, num_taxid_failed, relevant_taxids.size()); + } + + // Construct the output tree. + for (const TaxId &taxid : relevant_taxids) { + if (taxid == this->root_node) { + continue; + } + (*tree)[full_parents_list[taxid]].push_back(taxid); + } +} + +void TaxonomyClsAnno::dfs_statistics(const TaxId node, + const ChildrenList &tree, + std::vector *tree_linearization) { + this->node_to_linearization_idx[node] = tree_linearization->size(); + tree_linearization->push_back(node); + uint32_t depth = 0; + for (const TaxId &child : tree.at(node)) { + dfs_statistics(child, tree, tree_linearization); + tree_linearization->push_back(node); + if (this->node_depth[child] > depth) { + depth = this->node_depth[child]; + } + } + this->node_depth[node] = depth + 1; +} + +void TaxonomyClsAnno::rmq_preprocessing(const std::vector &tree_linearization) { + uint32_t num_rmq_rows = log2(tree_linearization.size()) + 1; + + this->rmq_data.resize(num_rmq_rows); + for (uint32_t i = 0; i < num_rmq_rows; ++i) { + this->rmq_data[i].resize(tree_linearization.size()); + } + + // Copy tree_linearization to rmq[0]. + for (uint32_t i = 0; i < tree_linearization.size(); ++i) { + this->rmq_data[0][i] = tree_linearization[i]; + } + + // Delta represents the size of the RMQ's sliding window (always a power of 2). + uint32_t delta = 1; + for (uint32_t row = 1; row < num_rmq_rows; ++row) { + for (uint32_t i = 0; i + delta < tree_linearization.size(); ++i) { + // rmq_data[row][i] covers an interval of size delta=2^row and returns the node with the maximal depth among positions [i, i+2^row-1] in the linearization. + // According to 'this->dfs_statistics()': node_depth[leaf] = 1 and node_depth[root] = maximum distance to a leaf. + if (this->node_depth[this->rmq_data[row - 1][i]] > + this->node_depth[this->rmq_data[row - 1][i + delta]]) { + this->rmq_data[row][i] = this->rmq_data[row - 1][i]; + } else { + this->rmq_data[row][i] = this->rmq_data[row - 1][i + delta]; + } + } + delta *= 2; + } + + // Compute fast tables for log2 and pow2. + this->fast_log2.resize(tree_linearization.size()); + this->fast_pow2.push_back(1); + for (uint32_t i = 2; i < tree_linearization.size(); ++i) { + this->fast_log2[i] = 1 + this->fast_log2[i/2]; + if (this->fast_log2[i] > this->fast_log2[i-1]) { + this->fast_pow2.push_back(i); + } + } +} + +TaxId TaxonomyClsAnno::assign_class(const std::string &sequence) const { + std::cerr << "assign class not implemented " << sequence << "\n\n"; + return 0; +} + +} // namespace annot +} // namespace mtg diff --git a/metagraph/src/annotation/taxonomy/tax_classifier.hpp b/metagraph/src/annotation/taxonomy/tax_classifier.hpp new file mode 100644 index 0000000000..438d38ae04 --- /dev/null +++ b/metagraph/src/annotation/taxonomy/tax_classifier.hpp @@ -0,0 +1,191 @@ +#ifndef __TAX_CLASSIFIER_HPP__ +#define __TAX_CLASSIFIER_HPP__ + +#ifdef TESTING +#define PRIVATE_TESTABLE public +#define PROTECTED_TESTABLE public +#else +#define PRIVATE_TESTABLE private +#define PROTECTED_TESTABLE protected +#endif + +#include +#include + +#include "graph/annotated_dbg.hpp" + +namespace mtg { +namespace annot { + +using TaxId = std::uint32_t; +using ChildrenList = tsl::hopscotch_map>; + +class TaxonomyBase { +public: + using KmerId = annot::MultiLabelEncoded::Index; + using node_index = graph::SequenceGraph::node_index; + + enum LabelType { + UNASSIGNED, GEN_BANK, KRAKEN + }; + + virtual ~TaxonomyBase() {}; + + // TODO implement + virtual TaxId assign_class(const std::string &sequence) const = 0; + +PROTECTED_TESTABLE: + void assign_label_type(const std::string &label, bool *require_accversion_to_taxid_map); + + // TODO implement. + TaxId find_lca(const std::vector &taxids) const; + + std::string get_accession_version_from_label(const std::string &label) const; + + bool get_taxid_from_label(const std::string &label, TaxId *taxid) const; + + /** Reads the accession version to taxid lookup table. + * + * @param [input] filepath -> a ".accession2taxid" file. + * @param [input] anno_matrix -> pointer to the annotation matrix + */ + void read_accversion_to_taxid_map(const std::string &filepath, + const graph::AnnotatedDBG *anno_matrix); + + // TODO implement. + /** + * Update the current node_scores and best_lca by taking into account the weight of the start_node and all its ancestors. + * + * @param [input] 'start_node' -> the starting node to update 'node_scores'. + * @param [input] 'num_kmers_per_node[taxid]' -> the number of kmers 'k' with taxonomic_map[k]=taxid. + * @param [input] 'desired_number_kmers' -> the threshold score that a node has to exceed in order to be considered as a valid solution. + * @param [modified] 'node_scores' -> the current score for each node in the tree. + * @param [modified] 'nodes_already_propagated' -> the set of nodes that were previously processed. + * @param [modified] 'best_lca' -> the current classification prediction (node that exceeds the `desired_number_kmers` threshold and is placed as close as possible to the leaves). + * @param [modified] 'best_lca_dist_to_root' -> the distance to the root for the current classification prediction. + */ + void update_scores_and_lca(const TaxId start_node, + const tsl::hopscotch_map &num_kmers_per_node, + const uint64_t desired_number_kmers, + tsl::hopscotch_map *node_scores, + tsl::hopscotch_set *nodes_already_propagated, + TaxId *best_lca, + uint32_t *best_lca_dist_to_root); + + LabelType label_type; + + /** + * node_depth returns the depth for each node in the taxonomic tree. + * The root is the unique node with maximal depth and all the leaves have depth equal to 1. + */ + tsl::hopscotch_map node_depth; + + TaxId root_node; + + /** + * node_parent stores a taxonomic tree representation as a taxid to taxid parent list. + */ + tsl::hopscotch_map node_parent; + + tsl::hopscotch_map accversion_to_taxid_map; + + double _lca_coverage_rate; + double _kmers_discovery_rate; +}; + +class TaxonomyClsImportDB : public TaxonomyBase { +public: + // todo implement + TaxonomyClsImportDB(const std::string &taxdb_filepath, + const double lca_coverage_rate, + const double kmers_discovery_rate); + TaxId assign_class(const std::string &sequence) const; +}; + +class TaxonomyClsAnno : public TaxonomyBase { +public: + /** + * TaxonomyCls constructor + * + * @param [input] anno -> the annotation matrix + * @param [input] lca_coverage_rate -> threshold used for taxonomic classification. + * @param [input] kmers_discovery_rate -> threshold used for taxonomic classification. + * @param [input] tax_tree_filepath -> path to a taxonomic tree ("nodes.dmp" file). + * @param [input] label_taxid_map_filepath -> path to acccession version to taxid lookup table (".accession2taxid" file). Mandatory if the label doesn't contain the 'taxid'. + */ + TaxonomyClsAnno(const graph::AnnotatedDBG &anno, + const double lca_coverage_rate, + const double kmers_discovery_rate, + const std::string &tax_tree_filepath, + const std::string &label_taxid_map_filepath = ""); + TaxonomyClsAnno() {}; + ~TaxonomyClsAnno() {}; + + // todo implement + void export_taxdb(const std::string &filepath) const; + + // todo implement + TaxId assign_class(const std::string &sequence) const; + +PRIVATE_TESTABLE: + /** + * Reads and returns the taxonomic tree as a list of children. + * + * @param [input] tax_tree_filepath -> path to a "nodes.dmp" file. + * @param [output] tree -> tree stored as a list of children. + */ + void read_tree(const std::string &tax_tree_filepath, + ChildrenList *tree); + + /** + * rmq_preprocessing computes 'this->rmq_data', 'this->precalc_log' and 'this->precalc_pow2' fields. + * + * @param [input] tree_linearization -> the linearization of the taxonomic tree. + */ + void rmq_preprocessing(const std::vector &tree_linearization); + + /** + * dfs_statistics method calculates the following fields: + * + tree_linearization; + * + this->node_depth; + * + this->node_to_linearization_idx. + * + * @param [input] node -> the node that is currently processed. + * @param [input] tree -> the taxonomic tree stored as a list of children. + * @param [output] tree_linearization -> the linearization of the received tree. + */ + void dfs_statistics(const TaxId node, + const ChildrenList &tree, + std::vector *tree_linearization); + + /** + * rmq_data[0] contains the taxonomic tree linearization + * (e.g. for root 1 and edges={1-2; 1-3}, the linearization is "1 2 1 3 1"). + * rmq_data[l][x] returns the node with the maximal depth among positions [x, x+2^l-1] in the linearization + * (e.g. rmq_data[3][6] return the node with max depth in [6, 13]). + */ + std::vector> rmq_data; + + /** + * node_to_linearization_idx[node] returns the index of the first occurrence of node + * in the tree linearization order. This array will be further used inside a RMQ query. + */ + tsl::hopscotch_map node_to_linearization_idx; + + /** + * fast_log2 is a table for a fast compute of log2(x). + */ + std::vector fast_log2; + + /** + * fast_pow2 is a table for a fast compute of pow2(x). + */ + std::vector fast_pow2; + + const graph::AnnotatedDBG *_anno_matrix = NULL; +}; + +} // namespace annot +} // namespace mtg + +#endif // __TAX_CLASSIFIER_HPP__ diff --git a/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp b/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp new file mode 100644 index 0000000000..09f4d3bf98 --- /dev/null +++ b/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp @@ -0,0 +1,99 @@ +#include "gtest/gtest.h" + +#define TESTING + +#include +#include +#include +#include + +#include "annotation/taxonomy/tax_classifier.hpp" + +namespace mtg { +namespace test { + +TEST (TaxonomyTest, DfsStatistics) { + mtg::annot::TaxonomyClsAnno *tax = new mtg::annot::TaxonomyClsAnno(); + tsl::hopscotch_map> tree { + {0, {1, 2, 3}}, // node 0 -> root + {1, {4, 5}}, // node 1 + {2, {}}, // node 2 + {3, {6}}, // node 3 + {4, {7, 8}}, // node 4 + {5, {}}, + {6, {}}, + {7, {}}, + {8, {}}, + }; + + std::vector expected_linearization = { + 0, 1, 4, 7, 4, 8, 4, 1, 5, 1, 0, 2, 0, 3, 6, 3, 0 + }; + tsl::hopscotch_map expected_node_depths = { + {0, 4}, + {1, 3}, + {2, 1}, + {3, 2}, + {4, 2}, + {5, 1}, + {6, 1}, + {7, 1}, + {8, 1}, + }; + tsl::hopscotch_map expected_node_to_linearization_idx = { + {0, 0}, + {1, 1}, + {2, 11}, + {3, 13}, + {4, 2}, + {5, 8}, + {6, 14}, + {7, 3}, + {8, 5}, + }; + + std::vector tree_linearization; + tax->dfs_statistics(0, tree, &tree_linearization); + EXPECT_EQ(expected_linearization, tree_linearization); + EXPECT_EQ(expected_node_depths, tax->node_depth); + EXPECT_EQ(expected_node_to_linearization_idx, tax->node_to_linearization_idx); +} + +TEST (TaxonomyTest, RmqPreprocessing) { + mtg::annot::TaxonomyClsAnno *tax = new mtg::annot::TaxonomyClsAnno(); + + tax->node_depth = { + {0, 4}, + {1, 3}, + {2, 1}, + {3, 2}, + {4, 2}, + {5, 1}, + {6, 1}, + {7, 1}, + {8, 1}, + }; + + std::vector linearization = { + 0, 1, 4, 7, 4, 8, 4, 1, 5, 1, 0, 2, 0, 3, 6, 3, 0 + }; + std::vector expected_pow2 = {1, 2, 4, 8, 16}; + std::vector expected_log2 = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 + }; + std::vector > expected_rmq = { + {0, 1, 4, 7, 4, 8, 4, 1, 5, 1, 0, 2, 0, 3, 6, 3, 0}, + {0, 1, 4, 4, 4, 4, 1, 1, 1, 0, 0, 0, 0, 3, 3, 0, 0}, + {0, 1, 4, 4, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + }; + + tax->rmq_preprocessing(linearization); + EXPECT_EQ(expected_pow2, tax->fast_pow2); + EXPECT_EQ(expected_log2, tax->fast_log2); + EXPECT_EQ(expected_rmq, tax->rmq_data); +} + +} +} From 4fafcb86623ce43f1b8823c6129be9d18bd960c0 Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Sat, 31 Jul 2021 00:01:12 +0200 Subject: [PATCH 07/17] address comments Signed-off-by: Radu Muntean --- .../annotation/taxonomy/label_to_taxid.cpp | 23 ++++---- .../annotation/taxonomy/tax_classifier.cpp | 54 +++++++------------ .../annotation/taxonomy/tax_classifier.hpp | 24 ++++----- .../annotation/taxonomy/test_taxonomy.cpp | 6 --- 4 files changed, 41 insertions(+), 66 deletions(-) diff --git a/metagraph/src/annotation/taxonomy/label_to_taxid.cpp b/metagraph/src/annotation/taxonomy/label_to_taxid.cpp index b3649baf1e..dc3ed63158 100644 --- a/metagraph/src/annotation/taxonomy/label_to_taxid.cpp +++ b/metagraph/src/annotation/taxonomy/label_to_taxid.cpp @@ -10,13 +10,14 @@ namespace annot { using mtg::common::logger; void TaxonomyBase::assign_label_type(const std::string &label, bool *require_accversion_to_taxid_map) { - if (!utils::starts_with(label, ">gi|")) { + if (utils::starts_with(label, ">gi|")) { // e.g. >gi|1070643132|ref|NC_031224.1| Arthrobacter phage Mudcat, complete genome - this->label_type = GEN_BANK; + label_type = GEN_BANK; *require_accversion_to_taxid_map = true; - } else if (!utils::starts_with(label, ">kraken:")) { + } else if (utils::starts_with(label, ">") && + utils::starts_with(utils::split_string(label, ":")[1], "taxid|")) { // e.g. >kraken:taxid|2016032|NC_047834.1 Alteromonas virus vB_AspP-H4/4, complete genome - this->label_type = KRAKEN; + label_type = TAXID; *require_accversion_to_taxid_map = false; } else { logger->error("Can't determine the type of the given label {}. Please make sure that the labels are in a recognized format.", label); @@ -25,15 +26,15 @@ void TaxonomyBase::assign_label_type(const std::string &label, bool *require_acc } bool TaxonomyBase::get_taxid_from_label(const std::string &label, TaxId *taxid) const { - if (this->label_type == KRAKEN) { + if (label_type == TAXID) { *taxid = static_cast(std::stoull(utils::split_string(label, "|")[1])); return true; } else if (TaxonomyBase::label_type == GEN_BANK) { - std::string acc_version = this->get_accession_version_from_label(label); - if (not this->accversion_to_taxid_map.count(acc_version)) { + std::string acc_version = get_accession_version_from_label(label); + if (not accversion_to_taxid_map.count(acc_version)) { return false; } - *taxid = this->accversion_to_taxid_map.at(acc_version); + *taxid = accversion_to_taxid_map.at(acc_version); return true; } @@ -42,9 +43,9 @@ bool TaxonomyBase::get_taxid_from_label(const std::string &label, TaxId *taxid) } std::string TaxonomyBase::get_accession_version_from_label(const std::string &label) const { - if (this->label_type == KRAKEN) { + if (label_type == TAXID) { return utils::split_string(utils::split_string(label, "|")[2], " ")[0]; - } else if (this->label_type == GEN_BANK) { + } else if (label_type == GEN_BANK) { return utils::split_string(label, "|")[3];; } @@ -86,7 +87,7 @@ void TaxonomyBase::read_accversion_to_taxid_map(const std::string &filepath, exit(1); } if (input_accessions.size() == 0 || input_accessions.count(parts[1])) { - this->accversion_to_taxid_map[parts[1]] = static_cast(std::stoull(parts[2])); + accversion_to_taxid_map[parts[1]] = static_cast(std::stoull(parts[2])); } } } diff --git a/metagraph/src/annotation/taxonomy/tax_classifier.cpp b/metagraph/src/annotation/taxonomy/tax_classifier.cpp index 14ec972fdb..040ffea689 100644 --- a/metagraph/src/annotation/taxonomy/tax_classifier.cpp +++ b/metagraph/src/annotation/taxonomy/tax_classifier.cpp @@ -18,10 +18,8 @@ TaxonomyClsAnno::TaxonomyClsAnno(const graph::AnnotatedDBG &anno, const double lca_coverage_rate, const double kmers_discovery_rate, const std::string &tax_tree_filepath, - const std::string &label_taxid_map_filepath) : _anno_matrix(&anno) { - _lca_coverage_rate = lca_coverage_rate; - _kmers_discovery_rate = kmers_discovery_rate; - + const std::string &label_taxid_map_filepath) : + TaxonomyBase(lca_coverage_rate, kmers_discovery_rate), _anno_matrix(&anno) { if (!std::filesystem::exists(tax_tree_filepath)) { logger->error("Can't open taxonomic tree file {}.", tax_tree_filepath); std::exit(1); @@ -55,8 +53,7 @@ TaxonomyClsAnno::TaxonomyClsAnno(const graph::AnnotatedDBG &anno, logger->trace("Finished rmq preprocessing in {}s.", timer.elapsed()); } -void TaxonomyClsAnno::read_tree(const std::string &tax_tree_filepath, - ChildrenList *tree) { +void TaxonomyClsAnno::read_tree(const std::string &tax_tree_filepath, ChildrenList *tree) { std::ifstream f(tax_tree_filepath); if (!f.good()) { logger->error("Failed to open Taxonomic Tree file {}.", tax_tree_filepath); @@ -80,16 +77,16 @@ void TaxonomyClsAnno::read_tree(const std::string &tax_tree_filepath, uint32_t act = static_cast(std::stoull(parts[0])); uint32_t parent = static_cast(std::stoull(parts[2])); full_parents_list[act] = parent; - this->node_parent[act] = parent; + node_parent[act] = parent; } std::vector relevant_taxids; // 'considered_relevant_taxids' is used to make sure that there are no duplications in 'relevant_taxids'. tsl::hopscotch_set considered_relevant_taxids; - if (this->accversion_to_taxid_map.size()) { + if (accversion_to_taxid_map.size()) { // Store only the taxonomic nodes that exists in the annotation matrix. - for (const pair &it : this->accversion_to_taxid_map) { + for (const pair &it : accversion_to_taxid_map) { relevant_taxids.push_back(it.second); considered_relevant_taxids.insert(it.second); } @@ -117,7 +114,7 @@ void TaxonomyClsAnno::read_tree(const std::string &tax_tree_filepath, // Check if the current taxid is the root. if (taxid == full_parents_list[taxid]) { - this->root_node = taxid; + root_node = taxid; } } if (num_taxid_failed) { @@ -127,7 +124,7 @@ void TaxonomyClsAnno::read_tree(const std::string &tax_tree_filepath, // Construct the output tree. for (const TaxId &taxid : relevant_taxids) { - if (taxid == this->root_node) { + if (taxid == root_node) { continue; } (*tree)[full_parents_list[taxid]].push_back(taxid); @@ -137,30 +134,30 @@ void TaxonomyClsAnno::read_tree(const std::string &tax_tree_filepath, void TaxonomyClsAnno::dfs_statistics(const TaxId node, const ChildrenList &tree, std::vector *tree_linearization) { - this->node_to_linearization_idx[node] = tree_linearization->size(); + node_to_linearization_idx[node] = tree_linearization->size(); tree_linearization->push_back(node); uint32_t depth = 0; for (const TaxId &child : tree.at(node)) { dfs_statistics(child, tree, tree_linearization); tree_linearization->push_back(node); - if (this->node_depth[child] > depth) { - depth = this->node_depth[child]; + if (node_depth[child] > depth) { + depth = node_depth[child]; } } - this->node_depth[node] = depth + 1; + node_depth[node] = depth + 1; } void TaxonomyClsAnno::rmq_preprocessing(const std::vector &tree_linearization) { uint32_t num_rmq_rows = log2(tree_linearization.size()) + 1; - this->rmq_data.resize(num_rmq_rows); + rmq_data.resize(num_rmq_rows); for (uint32_t i = 0; i < num_rmq_rows; ++i) { - this->rmq_data[i].resize(tree_linearization.size()); + rmq_data[i].resize(tree_linearization.size()); } // Copy tree_linearization to rmq[0]. for (uint32_t i = 0; i < tree_linearization.size(); ++i) { - this->rmq_data[0][i] = tree_linearization[i]; + rmq_data[0][i] = tree_linearization[i]; } // Delta represents the size of the RMQ's sliding window (always a power of 2). @@ -169,30 +166,19 @@ void TaxonomyClsAnno::rmq_preprocessing(const std::vector &tree_lineariza for (uint32_t i = 0; i + delta < tree_linearization.size(); ++i) { // rmq_data[row][i] covers an interval of size delta=2^row and returns the node with the maximal depth among positions [i, i+2^row-1] in the linearization. // According to 'this->dfs_statistics()': node_depth[leaf] = 1 and node_depth[root] = maximum distance to a leaf. - if (this->node_depth[this->rmq_data[row - 1][i]] > - this->node_depth[this->rmq_data[row - 1][i + delta]]) { - this->rmq_data[row][i] = this->rmq_data[row - 1][i]; + if (node_depth[rmq_data[row - 1][i]] > + node_depth[rmq_data[row - 1][i + delta]]) { + rmq_data[row][i] = rmq_data[row - 1][i]; } else { - this->rmq_data[row][i] = this->rmq_data[row - 1][i + delta]; + rmq_data[row][i] = rmq_data[row - 1][i + delta]; } } delta *= 2; } - - // Compute fast tables for log2 and pow2. - this->fast_log2.resize(tree_linearization.size()); - this->fast_pow2.push_back(1); - for (uint32_t i = 2; i < tree_linearization.size(); ++i) { - this->fast_log2[i] = 1 + this->fast_log2[i/2]; - if (this->fast_log2[i] > this->fast_log2[i-1]) { - this->fast_pow2.push_back(i); - } - } } TaxId TaxonomyClsAnno::assign_class(const std::string &sequence) const { - std::cerr << "assign class not implemented " << sequence << "\n\n"; - return 0; + throw std::runtime_error("Assign class not implemented. Received " + sequence); } } // namespace annot diff --git a/metagraph/src/annotation/taxonomy/tax_classifier.hpp b/metagraph/src/annotation/taxonomy/tax_classifier.hpp index 438d38ae04..47c2a053b4 100644 --- a/metagraph/src/annotation/taxonomy/tax_classifier.hpp +++ b/metagraph/src/annotation/taxonomy/tax_classifier.hpp @@ -26,9 +26,15 @@ class TaxonomyBase { using node_index = graph::SequenceGraph::node_index; enum LabelType { - UNASSIGNED, GEN_BANK, KRAKEN + UNASSIGNED, + GEN_BANK, // e.g. ">gi|1070643132|ref|NC_031224.1| Arthrobacter phage Mudcat, complete genome" + TAXID, // e.g. ">kraken:taxid|2016032|NC_047834.1 Alteromonas virus vB_AspP-H4/4, complete genome" }; + TaxonomyBase() {}; + TaxonomyBase(const double lca_coverage_rate, const double kmers_discovery_rate) : + _lca_coverage_rate(lca_coverage_rate), _kmers_discovery_rate(kmers_discovery_rate) {}; + virtual ~TaxonomyBase() {}; // TODO implement @@ -49,8 +55,7 @@ class TaxonomyBase { * @param [input] filepath -> a ".accession2taxid" file. * @param [input] anno_matrix -> pointer to the annotation matrix */ - void read_accversion_to_taxid_map(const std::string &filepath, - const graph::AnnotatedDBG *anno_matrix); + void read_accversion_to_taxid_map(const std::string &filepath, const graph::AnnotatedDBG *anno_matrix); // TODO implement. /** @@ -119,7 +124,6 @@ class TaxonomyClsAnno : public TaxonomyBase { const std::string &tax_tree_filepath, const std::string &label_taxid_map_filepath = ""); TaxonomyClsAnno() {}; - ~TaxonomyClsAnno() {}; // todo implement void export_taxdb(const std::string &filepath) const; @@ -138,7 +142,7 @@ class TaxonomyClsAnno : public TaxonomyBase { ChildrenList *tree); /** - * rmq_preprocessing computes 'this->rmq_data', 'this->precalc_log' and 'this->precalc_pow2' fields. + * rmq_preprocessing computes 'this->rmq_data' field. * * @param [input] tree_linearization -> the linearization of the taxonomic tree. */ @@ -172,16 +176,6 @@ class TaxonomyClsAnno : public TaxonomyBase { */ tsl::hopscotch_map node_to_linearization_idx; - /** - * fast_log2 is a table for a fast compute of log2(x). - */ - std::vector fast_log2; - - /** - * fast_pow2 is a table for a fast compute of pow2(x). - */ - std::vector fast_pow2; - const graph::AnnotatedDBG *_anno_matrix = NULL; }; diff --git a/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp b/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp index 09f4d3bf98..8c442a6fae 100644 --- a/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp +++ b/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp @@ -77,10 +77,6 @@ TEST (TaxonomyTest, RmqPreprocessing) { std::vector linearization = { 0, 1, 4, 7, 4, 8, 4, 1, 5, 1, 0, 2, 0, 3, 6, 3, 0 }; - std::vector expected_pow2 = {1, 2, 4, 8, 16}; - std::vector expected_log2 = { - 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 - }; std::vector > expected_rmq = { {0, 1, 4, 7, 4, 8, 4, 1, 5, 1, 0, 2, 0, 3, 6, 3, 0}, {0, 1, 4, 4, 4, 4, 1, 1, 1, 0, 0, 0, 0, 3, 3, 0, 0}, @@ -90,8 +86,6 @@ TEST (TaxonomyTest, RmqPreprocessing) { }; tax->rmq_preprocessing(linearization); - EXPECT_EQ(expected_pow2, tax->fast_pow2); - EXPECT_EQ(expected_log2, tax->fast_log2); EXPECT_EQ(expected_rmq, tax->rmq_data); } From 76dcb32017822d4a54f10651b4e668dce44fd9bb Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Tue, 3 Aug 2021 22:41:58 +0200 Subject: [PATCH 08/17] delete tax test files Signed-off-by: Radu Muntean --- .gitignore | 1 - .../annotation/taxonomy/label_to_taxid.cpp | 9 +- .../annotation/taxonomy/tax_classifier.cpp | 36 +- .../annotation/taxonomy/tax_classifier.hpp | 27 +- .../annotation/taxonomy/test_taxonomy.cpp | 6 +- .../data/taxonomic_data/dumb.accession2taxid | 21 - .../tests/data/taxonomic_data/dumb_nodes.dmp | 20 - .../full_hierarchy_sequences.fa | 161 ---- .../tests/data/taxonomic_data/tax_input.fa | 96 --- .../tests/data/taxonomic_data/tax_query.fa | 800 ------------------ 10 files changed, 52 insertions(+), 1125 deletions(-) delete mode 100644 metagraph/tests/data/taxonomic_data/dumb.accession2taxid delete mode 100644 metagraph/tests/data/taxonomic_data/dumb_nodes.dmp delete mode 100644 metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa delete mode 100644 metagraph/tests/data/taxonomic_data/tax_input.fa delete mode 100644 metagraph/tests/data/taxonomic_data/tax_query.fa diff --git a/.gitignore b/.gitignore index cf49089ee5..9e165295f4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,6 @@ *.fai !metagraph/tests/data/*.fa !metagraph/tests/data/*.fai -!metagraph/tests/data/taxonomic_data/*.fa metagraph/tests/data/*dump_test* projects/*/temp visualization/geolocation/data/* diff --git a/metagraph/src/annotation/taxonomy/label_to_taxid.cpp b/metagraph/src/annotation/taxonomy/label_to_taxid.cpp index dc3ed63158..ba8dbb13f5 100644 --- a/metagraph/src/annotation/taxonomy/label_to_taxid.cpp +++ b/metagraph/src/annotation/taxonomy/label_to_taxid.cpp @@ -10,12 +10,11 @@ namespace annot { using mtg::common::logger; void TaxonomyBase::assign_label_type(const std::string &label, bool *require_accversion_to_taxid_map) { - if (utils::starts_with(label, ">gi|")) { + if (utils::starts_with(label, "gi|")) { // e.g. >gi|1070643132|ref|NC_031224.1| Arthrobacter phage Mudcat, complete genome label_type = GEN_BANK; *require_accversion_to_taxid_map = true; - } else if (utils::starts_with(label, ">") && - utils::starts_with(utils::split_string(label, ":")[1], "taxid|")) { + } else if (utils::starts_with(utils::split_string(label, ":")[1], "taxid|")) { // e.g. >kraken:taxid|2016032|NC_047834.1 Alteromonas virus vB_AspP-H4/4, complete genome label_type = TAXID; *require_accversion_to_taxid_map = false; @@ -71,8 +70,8 @@ void TaxonomyBase::read_accversion_to_taxid_map(const std::string &filepath, tsl::hopscotch_set input_accessions; if (anno_matrix != NULL) { - for (const std::string &accversion : anno_matrix->get_annotation().get_all_labels()) { - input_accessions.insert(accversion); + for (const std::string &label : anno_matrix->get_annotation().get_all_labels()) { + input_accessions.insert(get_accession_version_from_label(label)); } } diff --git a/metagraph/src/annotation/taxonomy/tax_classifier.cpp b/metagraph/src/annotation/taxonomy/tax_classifier.cpp index 040ffea689..247df4cd94 100644 --- a/metagraph/src/annotation/taxonomy/tax_classifier.cpp +++ b/metagraph/src/annotation/taxonomy/tax_classifier.cpp @@ -107,7 +107,7 @@ void TaxonomyClsAnno::read_tree(const std::string &tax_tree_filepath, ChildrenLi continue; } - if (considered_relevant_taxids.find(full_parents_list[taxid]) == considered_relevant_taxids.end()) { + if (not considered_relevant_taxids.count(full_parents_list[taxid])) { relevant_taxids.push_back(full_parents_list[taxid]); considered_relevant_taxids.insert(full_parents_list[taxid]); } @@ -137,11 +137,13 @@ void TaxonomyClsAnno::dfs_statistics(const TaxId node, node_to_linearization_idx[node] = tree_linearization->size(); tree_linearization->push_back(node); uint32_t depth = 0; - for (const TaxId &child : tree.at(node)) { - dfs_statistics(child, tree, tree_linearization); - tree_linearization->push_back(node); - if (node_depth[child] > depth) { - depth = node_depth[child]; + if (tree.count(node)) { + for (const TaxId &child : tree.at(node)) { + dfs_statistics(child, tree, tree_linearization); + tree_linearization->push_back(node); + if (node_depth[child] > depth) { + depth = node_depth[child]; + } } } node_depth[node] = depth + 1; @@ -177,8 +179,26 @@ void TaxonomyClsAnno::rmq_preprocessing(const std::vector &tree_lineariza } } -TaxId TaxonomyClsAnno::assign_class(const std::string &sequence) const { - throw std::runtime_error("Assign class not implemented. Received " + sequence); +std::vector TaxonomyClsAnno::get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const { + cerr << "Assign class not implemented reversed = " << reversed << "\n"; + throw std::runtime_error("get_lca_taxids_for_seq TaxonomyClsAnno not implemented. Received seq size" + to_string(sequence.size())); + exit(0); +} + +std::vector TaxonomyClsImportDB::get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const { + cerr << "Assign class not implemented reversed = " << reversed << "\n"; + throw std::runtime_error("get_lca_taxids_for_seq TaxonomyClsImportDB not implemented. Received seq size" + to_string(sequence.size())); + exit(0); +} + +TaxId TaxonomyClsAnno::find_lca(const std::vector &taxids) const { + throw std::runtime_error("find_lca TaxonomyClsAnno not implemented. Received taxids size" + to_string(taxids.size())); + exit(0); +} + +TaxId TaxonomyClsImportDB::find_lca(const std::vector &taxids) const { + throw std::runtime_error("find_lca TaxonomyClsImportDB not implemented. Received taxids size" + to_string(taxids.size())); + exit(0); } } // namespace annot diff --git a/metagraph/src/annotation/taxonomy/tax_classifier.hpp b/metagraph/src/annotation/taxonomy/tax_classifier.hpp index 47c2a053b4..12ff6e0e06 100644 --- a/metagraph/src/annotation/taxonomy/tax_classifier.hpp +++ b/metagraph/src/annotation/taxonomy/tax_classifier.hpp @@ -37,14 +37,12 @@ class TaxonomyBase { virtual ~TaxonomyBase() {}; - // TODO implement - virtual TaxId assign_class(const std::string &sequence) const = 0; + TaxId assign_class(const std::string &sequence) const; PROTECTED_TESTABLE: void assign_label_type(const std::string &label, bool *require_accversion_to_taxid_map); - // TODO implement. - TaxId find_lca(const std::vector &taxids) const; + virtual TaxId find_lca(const std::vector &taxids) const = 0; std::string get_accession_version_from_label(const std::string &label) const; @@ -57,7 +55,6 @@ class TaxonomyBase { */ void read_accversion_to_taxid_map(const std::string &filepath, const graph::AnnotatedDBG *anno_matrix); - // TODO implement. /** * Update the current node_scores and best_lca by taking into account the weight of the start_node and all its ancestors. * @@ -75,7 +72,13 @@ class TaxonomyBase { tsl::hopscotch_map *node_scores, tsl::hopscotch_set *nodes_already_propagated, TaxId *best_lca, - uint32_t *best_lca_dist_to_root); + uint32_t *best_lca_dist_to_root) const; + + /** + * Get the list of LCA taxid for each kmer in a given sequences. + * The sequence can be given in forward or in reversed orientation. + */ + virtual std::vector get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const = 0; LabelType label_type; @@ -104,7 +107,10 @@ class TaxonomyClsImportDB : public TaxonomyBase { TaxonomyClsImportDB(const std::string &taxdb_filepath, const double lca_coverage_rate, const double kmers_discovery_rate); - TaxId assign_class(const std::string &sequence) const; + +PRIVATE_TESTABLE: + std::vector get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const; + TaxId find_lca(const std::vector &taxids) const; }; class TaxonomyClsAnno : public TaxonomyBase { @@ -128,8 +134,7 @@ class TaxonomyClsAnno : public TaxonomyBase { // todo implement void export_taxdb(const std::string &filepath) const; - // todo implement - TaxId assign_class(const std::string &sequence) const; + TaxId assign_class_toplabels(const std::string &sequence, const double label_fraction) const; PRIVATE_TESTABLE: /** @@ -162,6 +167,10 @@ class TaxonomyClsAnno : public TaxonomyBase { const ChildrenList &tree, std::vector *tree_linearization); + TaxId find_lca(const std::vector &taxids) const; + + std::vector get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const; + /** * rmq_data[0] contains the taxonomic tree linearization * (e.g. for root 1 and edges={1-2; 1-3}, the linearization is "1 2 1 3 1"). diff --git a/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp b/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp index 8c442a6fae..1543638537 100644 --- a/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp +++ b/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp @@ -4,15 +4,13 @@ #include #include -#include -#include #include "annotation/taxonomy/tax_classifier.hpp" namespace mtg { namespace test { -TEST (TaxonomyTest, DfsStatistics) { +TEST (TaxonomyTest, ClsAnno_DfsStatistics) { mtg::annot::TaxonomyClsAnno *tax = new mtg::annot::TaxonomyClsAnno(); tsl::hopscotch_map> tree { {0, {1, 2, 3}}, // node 0 -> root @@ -59,7 +57,7 @@ TEST (TaxonomyTest, DfsStatistics) { EXPECT_EQ(expected_node_to_linearization_idx, tax->node_to_linearization_idx); } -TEST (TaxonomyTest, RmqPreprocessing) { +TEST (TaxonomyTest, ClsAnno_RmqPreprocessing) { mtg::annot::TaxonomyClsAnno *tax = new mtg::annot::TaxonomyClsAnno(); tax->node_depth = { diff --git a/metagraph/tests/data/taxonomic_data/dumb.accession2taxid b/metagraph/tests/data/taxonomic_data/dumb.accession2taxid deleted file mode 100644 index ca9ba2566f..0000000000 --- a/metagraph/tests/data/taxonomic_data/dumb.accession2taxid +++ /dev/null @@ -1,21 +0,0 @@ -accession accession.version taxid gi -NC_01 NC_01.1 10001 10001 -NC_02 NC_02.1 10002 10002 -NC_03 NC_04.1 10003 10003 -NC_04 NC_04.1 10004 10004 -NC_05 NC_05.1 10005 10005 -NC_06 NC_06.1 10006 10006 -NC_07 NC_07.1 10007 10007 -NC_08 NC_08.1 10008 10008 -NC_09 NC_09.1 10009 10009 -NC_10 NC_10.1 10010 10010 -NC_11 NC_11.1 10011 10011 -NC_12 NC_12.1 10012 10012 -NC_13 NC_13.1 10013 10013 -NC_14 NC_14.1 10014 10014 -NC_15 NC_15.1 10015 10015 -NC_16 NC_16.1 10016 10016 -NC_17 NC_17.1 10017 10017 -NC_18 NC_18.1 10018 10018 -NC_19 NC_19.1 10019 10019 -NC_20 NC_20.1 10020 10020 diff --git a/metagraph/tests/data/taxonomic_data/dumb_nodes.dmp b/metagraph/tests/data/taxonomic_data/dumb_nodes.dmp deleted file mode 100644 index c721ad085a..0000000000 --- a/metagraph/tests/data/taxonomic_data/dumb_nodes.dmp +++ /dev/null @@ -1,20 +0,0 @@ -10001 | 10001 -10002 | 10001 -10003 | 10001 -10004 | 10002 -10005 | 10002 -10006 | 10002 -10007 | 10003 -10008 | 10003 -10009 | 10004 -10010 | 10004 -10011 | 10004 -10012 | 10005 -10013 | 10005 -10014 | 10006 -10015 | 10006 -10016 | 10007 -10017 | 10007 -10018 | 10007 -10019 | 10008 -10020 | 10008 diff --git a/metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa b/metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa deleted file mode 100644 index 36099686d8..0000000000 --- a/metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa +++ /dev/null @@ -1,161 +0,0 @@ ->gi|10001|ref|NC_01.1| Test sample 1 (root) -CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG -AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT -CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA -TATGACTTAAACGAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC ->gi|10002|ref|NC_02.1| Test sample 2 (dist to root = 1) -CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG -AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT -CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA -TATGACTTAAACCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC ->gi|10003|ref|NC_03.1| Test sample 3 (dist to root = 1) -CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG -AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT -CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA -TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC ->gi|10004|ref|NC_04.1| Test sample 4 (dist to root = 2) -CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG -AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT -CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA -TATGACTTAACCCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC ->gi|10005|ref|NC_05.1| Test sample 5 (dist to root = 2) -CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG -AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT -CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT -TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA -TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC ->gi|10006|ref|NC_06.1| Test sample 6 (dist to root = 2) -CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG -AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT -CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA -TATGACTTAAACCAGGTGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC ->gi|10007|ref|NC_07.1| Test sample 7 (dist to root = 2) -CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCATTCGAG -AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT -CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA -TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC ->gi|10008|ref|NC_08.1| Test sample 8 (dist to root = 2) -CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG -AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT -CTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA -TATGACTTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACAAACTCTAGCAATACC ->gi|10009|ref|NC_09.1| Test sample 9 (dist to root = 3) -CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG -AGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT -CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA -TATGACTTAACCAAGGGGGCTGGGGCTGTTCGCAGGCAAATCTACGCCTACTACAAACTCTAGAAATACC ->gi|10010|ref|NC_10.1| Test sample 10 (dist to root = 3) -CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGTTATTCGAG -AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT -CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACCAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCTTAGCCAA -TATGACTTAACCCAAGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC ->gi|10011|ref|NC_11.1| Test sample 11 (dist to root = 3) -CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG -GGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT -CTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT -TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA -TTTGACTTAACCCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC ->gi|10012|ref|NC_12.1| Test sample 12 (dist to root = 3) -CGGCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG -AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGT -CTATCTCGCATTAAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCTCTTCCGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT -TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA -TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC ->gi|10013|ref|NC_13.1| Test sample 13 (dist to root = 3) -CGCCGGCCTCCCCAAAAAATCCCCGGAGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG -AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT -CAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT -TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA -TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACCAATCTACGCCTACTACAAACTCTAGCAATACC ->gi|10014|ref|NC_14.1| Test sample 14 (dist to root = 3) -CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGCGGCTATTCGAG -AGGGATCTATGGGTCGGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT -CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA -TATGACTTAAACCAGGTGGCTGGGGCTGTTCGCAGACAAATCTACGGCTACTACAAACTCTAGCAATACC ->gi|10015|ref|NC_15.1| Test sample 15 (dist to root = 3) -CGCCGGCCTCCCCAAAAAATCCCCGGGGGAAAATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG -AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAATGGGTGACTCTGGTGT -CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGAAATTTAATGGGTTGGTATTCTTGTAGTATGGTCATCGTAGCCAA -TATGACTTAAACCAGGTGGCTGGCGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC ->gi|10016|ref|NC_16.1| Test sample 16 (dist to root = 3) -CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCTTTCGAG -AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT -CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACGGTAGCCTTTGGGACTTTCACTACTTAGCAAAT -TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA -TATGACTTAAACGTGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC ->gi|10017|ref|NC_17.1| Test sample 17 (dist to root = 3) -CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCATTCGAG -AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT -CTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC -TTGAAAAATTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT -TAAATCGAGTGTTAGCCGAATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA -TATGACTTAAACGGGGGGGCTCGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC ->gi|10018|ref|NC_18.1| Test sample 18 (dist to root = 3) -CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGGGGCCATTCGAG -AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT -CTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAAT -TAAATCGAGTGTTAGCCGCATCTAGACATTTAATGAGTTGGTTTTCTTGTAGTAGGGTCACCGTAGCAAA -TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC ->gi|10019|ref|NC_19.1| Test sample 19 (dist to root = 3) -CGCCGGCCGCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG -AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT -CTATCTCGCATCCAATGCCGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCATTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT -TAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA -TATGACCTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACAAACTCTAGCAATACC ->gi|10020|ref|NC_20.1| Test sample 20 (dist to root = 3) -CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG -AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT -CTATCTCGCATCCAATGCAGATTACTCCGATTATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCAAA -TATGACTTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACCAACTCTAGCAATACC - diff --git a/metagraph/tests/data/taxonomic_data/tax_input.fa b/metagraph/tests/data/taxonomic_data/tax_input.fa deleted file mode 100644 index 3ba4796416..0000000000 --- a/metagraph/tests/data/taxonomic_data/tax_input.fa +++ /dev/null @@ -1,96 +0,0 @@ ->gi|10009|ref|NC_09.1| Test sample 9 (dist to root = 3) -CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG -AGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT -CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA -TATGACTTAACCAAGGGGGCTGGGGCTGTTCGCAGGCAAATCTACGCCTACTACAAACTCTAGAAATACC ->gi|10010|ref|NC_10.1| Test sample 10 (dist to root = 3) -CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGTTATTCGAG -AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT -CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACCAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCTTAGCCAA -TATGACTTAACCCAAGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC ->gi|10011|ref|NC_11.1| Test sample 11 (dist to root = 3) -CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG -GGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT -CTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT -TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA -TTTGACTTAACCCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC ->gi|10012|ref|NC_12.1| Test sample 12 (dist to root = 3) -CGGCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG -AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGT -CTATCTCGCATTAAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCTCTTCCGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT -TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA -TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC ->gi|10013|ref|NC_13.1| Test sample 13 (dist to root = 3) -CGCCGGCCTCCCCAAAAAATCCCCGGAGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG -AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT -CAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT -TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA -TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACCAATCTACGCCTACTACAAACTCTAGCAATACC ->gi|10014|ref|NC_14.1| Test sample 14 (dist to root = 3) -CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGCGGCTATTCGAG -AGGGATCTATGGGTCGGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT -CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA -TATGACTTAAACCAGGTGGCTGGGGCTGTTCGCAGACAAATCTACGGCTACTACAAACTCTAGCAATACC ->gi|10015|ref|NC_15.1| Test sample 15 (dist to root = 3) -CGCCGGCCTCCCCAAAAAATCCCCGGGGGAAAATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG -AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAATGGGTGACTCTGGTGT -CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGAAATTTAATGGGTTGGTATTCTTGTAGTATGGTCATCGTAGCCAA -TATGACTTAAACCAGGTGGCTGGCGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC ->gi|10016|ref|NC_16.1| Test sample 16 (dist to root = 3) -CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCTTTCGAG -AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT -CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACGGTAGCCTTTGGGACTTTCACTACTTAGCAAAT -TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA -TATGACTTAAACGTGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC ->gi|10017|ref|NC_17.1| Test sample 17 (dist to root = 3) -CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCATTCGAG -AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT -CTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC -TTGAAAAATTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT -TAAATCGAGTGTTAGCCGAATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA -TATGACTTAAACGGGGGGGCTCGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC ->gi|10018|ref|NC_18.1| Test sample 18 (dist to root = 3) -CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGGGGCCATTCGAG -AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT -CTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAAT -TAAATCGAGTGTTAGCCGCATCTAGACATTTAATGAGTTGGTTTTCTTGTAGTAGGGTCACCGTAGCAAA -TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC ->gi|10019|ref|NC_19.1| Test sample 19 (dist to root = 3) -CGCCGGCCGCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG -AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT -CTATCTCGCATCCAATGCCGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCATTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT -TAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA -TATGACCTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACAAACTCTAGCAATACC ->gi|10020|ref|NC_20.1| Test sample 20 (dist to root = 3) -CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG -AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT -CTATCTCGCATCCAATGCAGATTACTCCGATTATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG -CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC -TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAAT -TAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCAAA -TATGACTTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACCAACTCTAGCAATACC diff --git a/metagraph/tests/data/taxonomic_data/tax_query.fa b/metagraph/tests/data/taxonomic_data/tax_query.fa deleted file mode 100644 index 30ea3f2c27..0000000000 --- a/metagraph/tests/data/taxonomic_data/tax_query.fa +++ /dev/null @@ -1,800 +0,0 @@ -@gi|10001|ref|NC_01.1|-9/1 -TGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATTGATCCCTCTCGAATA -+ -CC=GGGCGGGGGGGJGJJGGJJJCJJJGJJCJJGGJJG=GGJJJGGC8GGGCCGCGCJJGGJ=1CGGJGGCJG=GGGJGGCCGGGCCGCGCGGGG=GGGGCCGGCGGGGGGG=GCGGGC1CGGCGCCGGCC8GG1GCGGGGGGGCGC==C -@gi|10001|ref|NC_01.1|-7/1 -CCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGA -+ -CC8GGGGGGGGGGJCJJGGJJJJJGC(JJGCJJGJGGJJJCGGGJJJJJ=CCJJ8CJ8CGCC=GGJJGGGGGGJGGGGCCGGCGCCGCGGGG1G=CGGGCJCGCGGC1GGCCGGGCGGGCGCGGCGGGGG=CCGGGGGG=CGCCGGGCCG -@gi|10001|ref|NC_01.1|-5/1 -AGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTCGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTC -+ -C=CGGGGGG=CGGJGJ1JJJGJJCGJJJG=JGC=JCJGJJGJJCGJJGGJJJGJJGGGGJG=CGCCGGC=GGGCGGGG8CCGGGGGGGGGCC8GGCG=GGCCCGG1GGGGGGGGGGCGG8GCCGGGGGGGG1CGGGGGCCCGGGGGCGGC -@gi|10001|ref|NC_01.1|-3/1 -CTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGA -+ -CC1GGGGGGGCGGJGJJGJJJGGGJJGJJJJJJJJCGJGJGGJ1JJGCCJGJ=JJ8CGJ8CGGJJGJCJ=CCGGC=GGCGGGGGCGGGGGGGCCG1GGCGJCGGCC(GGCGGG=CGG(GCGG8G1GGGCCGGGGGGGGGGGCCGGGGGGG -@gi|10001|ref|NC_01.1|-1/1 -GAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCCTTGAGGGCGTGTACTTAGCCCA -+ -CCCGGGGGCGGGGGGJJCJJJJJJJJJJ1JJGJGGCJJGJJJJCCCJJGGGJ=GJJJGGJJCCGGGGGGCCC8CCCC=GGGGCGG==GGGGGGGCCC8GGCGGC1GGCGCGGGGGG=GCGGGCCGGC(GGGGGGGGCC8CGGGGGGGGCC -@gi|10002|ref|NC_02.1|-9/1 -AAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTT -+ -CCCGGGGGGGGGCJJJGJGGJJCJJJGGJGJ=JJJJGGGJGJGJGJJJGC=JJCGGGGJGJGGGJ1JGCCCGJGCGC=CGCC==CGGGCC1CGGGGGGCGJCCGGGGGG=CGCCGGGGCGGCCGGGCG1GGGGGCCGCCCCCGGGGGGC8 -@gi|10002|ref|NC_02.1|-7/1 -TGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGA -+ -=CCGCCGGGGGGGGCCJJGGJJJJ8J1JJGGJJ1CJCJJGGG8CJCCGJGJJGGGGGGCJGGGGGGCGGGJGCJGG1GCGCGGGCGGGCGGGGC=(GCG=JCG=GG(GCCCGC=GG(GCCGGGC=1CGCGG=GGCGGCCCGGGGGGCGGC -@gi|10002|ref|NC_02.1|-5/1 -GAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATGACTTAAAC -+ -CCCGGCGCGGGGGJJGJJGJJJJGGJG=JJGJGJJJJGJJJJCGJJJJGJGGGJGJGJJ(J=J=JGGG1GCGGCJCJGGCC1GGGCG==GCGG81GGGCGCCCGGCGGGGGCGGGGGGGGCGGGGGCCGG1GGCGCGCGCGGGGCG1GCC -@gi|10002|ref|NC_02.1|-3/1 -CGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGA -+ -CCCGGGGGGGGGGCJ1JJJJCJCGJJJJJJJGGGJJJJGJJJJJJGGGGJGGJGGGJG(GJJG=CGJJJCGGJGCGGGCGGGCG8GG=CGGCG8CGGCGCCC=C=GGGCCGGGGC1GC=G=G8GGCGGGCCGGCGGGGGGCGGCCCCGCC -@gi|10002|ref|NC_02.1|-1/1 -GGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCAC -+ -CCCGGGGGGGGGGJJJGJCJCJJJJJJJCCJGJJJGJCJJ=JJGGCGGJGC1JJGGJCJJCGJCCGGGGGGGCJCGCGC=GC=GCGCGG1GGGG=GCGGGCGGCGGGGC8GGCGGGGGGGG=CGGGCGGGG=CCG=GCGCCG=GGGCGGG -@gi|10003|ref|NC_03.1|-9/1 -TCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCG -+ -=C1GGGGGGGGCGJJJJGJJGJJCJJJJGJGGGGJJJGJJGCJJGJJCJJGJGC8JCGJCGCGJGGGGGCGGCGGGGGGG=CGCGGCGCCGGGCGGGCGGCGGGGGCCC=GCGGGG1GGC1C8GCGGGGGGCGGGCC=GCCGGGG=GGGG -@gi|10003|ref|NC_03.1|-7/1 -CTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAA -+ -CCCGCGGGGGGGGJJJJJJJGJJGJJGGJJJJJJGJJJJJGJ8JGJJJGGJG=GJJJJ=GGJCJJGGGCGGGCGJGGGGGGC=GGGGGGGG=G=GGGGGGJG=CG8GGGCGCGGCGCGCGGC18CGGGGGGGGGGCGCCGGGGGCGGGG= -@gi|10003|ref|NC_03.1|-5/1 -GGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAGAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAA -+ -CCCGGGGGGGG1GJGJJJJJJGJJCJGGCJGJGJJJJGJJGCJJ8=(JJJGGC8GCJGG(JJJCGGJJGGG=GGGGGGCCGGGGGGGGGCGG=CGGGGGG1CGGGCCCGGGGGC1CCGGGCGGCCCG8GCGGG=CCCGGCGCGGGGGGCC -@gi|10003|ref|NC_03.1|-3/1 -AAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCCAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTT -+ -CCCGGGGGCGGGGGGGCJJJCJJJGJJGJJJJGJJGJC(JJGJJJJJJJGGJGCGGGGJGJGCC=JGG=CCJGGG=GGGG8C=GCGGGGG=GGCGGCC=GJCC1C1C8GGGGGGGCCG=(GCGGGGGGGGCCGGCCGCC1CGGCCGGCCC -@gi|10003|ref|NC_03.1|-1/1 -ATTGCTAGAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCAAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAG -+ -CC11GGGGGGGGGJJJJJJJJJJGGJ1CJJJJJJ=JGGGJJGGGJ(JGGJJJGGGGCGGJCGJGJ1GCCGGGCJCCJCGCGGCCGGGGGGGGGGGGGC===GGCGCGGC=GCGCGCGGCC=8CGGGGGGGGGGGCC=GGGGGGCCCGGGG -@gi|10004|ref|NC_04.1|-9/1 -CTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGC -+ -CCC1GG=CGGGGGJJJJJCJGJJJJJJJJJJJJJJJJJJJG=JCJJJGGJJGGGGGGCJJGGJJJGGJGGGJGC=CGJCCG=GCGGGGGCGGGGGGCCGGJCG=G1CG1GGGGCC1CGG=(CGGGGG=CC=GGGGC8GG8GCGGGCGGGG -@gi|10004|ref|NC_04.1|-7/1 -CAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTCTC -+ -CCC1GGGGGGGGGJJ1JJJJGJJJGJJJJGGCJJCJGJJJJGJJGJJJGGGGGJGGGJ8=JCCGGCC1GCGGGCGGGGGJG=GC1GGGGGGGGGCGGGGGJCGCGGCCCGCCGGGGC8CGGGCGCGCCGGGGGGGGCGGGGGGCG1CG=C -@gi|10004|ref|NC_04.1|-5/1 -TTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATG -+ -=C=GCGGGGGGGGGJJJJJJJJGJJJGJG1JJGCGGJJGJGJ8JJJGGGJ8CJCJGJJGCJGGCGGGCGGJGGGGCG=GGGG1GGG=GGGGGGCGCG8GGJCCCCGGGGGGGCGCGCGCGGGCGGGGGG=CGCGGGGCCGGGCCCGGCG= -@gi|10004|ref|NC_04.1|-3/1 -GTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATGACTTAACCCAGGG -+ -CCCGGGGGGGG=GGGCCCGJJJJ=GGGJJJJGCJJJCJGGJJG1JJC8JGJGJCGCGJJJGGJGGGGGGJJJGC1G8=GGCCGCCGCGG8GCGGGCGGGCCCCGGCGGGCGGCCGG1=CGCG=GCGGCCGGGCGGCCG=G=GCGGC=GCC -@gi|10004|ref|NC_04.1|-1/1 -CAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAA -+ -CC8GGGCCG1GGGJJJJJGGJJGJJJJJGG1JJCGGJGGJGG1JJJJGJJGCGJJ=GJJJ=JGJ=GJJJCGCGG8GGGCCGGGGCGGGCGGCGGCGGCGGJCGGCGG1CGCGCGGGCCGGG8GGG1GGC8GGGGCGCGGGGCCGGGGCG= -@gi|10005|ref|NC_05.1|-9/1 -TAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTAC -+ -CCCGGGGGGGGGGJJJJJCGJJJGJJGJJGGJJ8GCJJGCJGJGJJJGJJJGGGGG8JGGJGGGGGGCGGCCJGCGG1GGGGGCGCGGG=GGGGCGCGGGJGGGG8CG=CGGGGCGGGGGGGGGGGCGGGCCG=GG=GGCCGGGCCGCG1 -@gi|10005|ref|NC_05.1|-7/1 -CCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGATTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTG -+ -=CCGGGGGGGGGGJ1JJJJGJ(JJJJJJJJJJGJJJGJGJGJGJGGGCGGGCGJG=JCJGGGCGCGGGGGCJGGGGJGGCCCGG=GGGGGCG(GGC=GGGJGGCGC=GC8GGC=GCGG=G1CG1GGCGGGGGG1GCCGGCGGGGGGCG8C -@gi|10005|ref|NC_05.1|-5/1 -TTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTCTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATT -+ -CCCGCGGGGGGGGJJJJJGGGJJGJJJGGCJJGJJJJJJGG=JGGJJ=J8GJJJCJG1JGCG8GJ1JGGGGJGGCGGCGJCCGCC1GG1GCCCCG(CGGGJCGG(=CGCCGGGCGGCGCC=CGCGGCGGGGCCCGCC=GGCGGGCCCGGC -@gi|10005|ref|NC_05.1|-3/1 -GTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCACAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGAGTTAATTTGCTTAGTAGTGAAAGTCC -+ -CCCCGGGGGGCGGJJJJJJJJJ8JCJJJJJGJJJCJCJG=JGJGJGJGJJ=JG=GGG1J=GJJGG8CGGGGGGGGGGGCGCCGGCGGGGGGGGGCGGGCGJG8GCGG=CGGCGCGCGGCGGCCG1GGGG8GGGGGCCGGCGCGGCGGCGG -@gi|10005|ref|NC_05.1|-1/1 -AGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGC -+ -CCC=GG=GGGGGGJGJJJJJJJJJJJJJJJJJ=J=JGCGJJJGGGJJCGJJJJCJJJGGGJGGJ(GGGGGJ1CCCGJCGGCCGCCC=CGGGGGCCGGG8=CG=GCGGCGGG1GCC=GCG1GCGGCG=GGCGG=G8GC(GCGGCCGCGGGG -@gi|10006|ref|NC_06.1|-9/1 -GTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTA -+ -CCC1GGG1GGGGCJJJJ=JJJGJJJGJJGGJGG=GJJJJJGJJJJJGJ8JJJCJG1=GJGGGGJGJG8GGGJCGGCCGGCGGCGGGGGGG(GCGGGGCG==CGCGGG8CCGGGGGGGG=GG=GGGGCCGGGGCG=GGGGGGCGGGGCCCC -@gi|10006|ref|NC_06.1|-7/1 -GCCCCAGCCACCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTA -+ -CCC1GGGGCGGGCJGJJGGJJJJJJGJJGCJJ=JJJCJJC=GJCJCGGGJJJ=GGJGCJGCGCCJJCJG8=GCGJGJG=CCG=CGGCGGC=GGC1G=CGGCCGGGGCG8CGC1GCGCGCGGGGCGGGGGCGGGCC=GGGGGCCGGGGGG= -@gi|10006|ref|NC_06.1|-5/1 -GTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATTTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAA -+ -CCCGGGGGGGGGGJGJJJGJJJJJGJJJCGJJJJGJJ8GJGJJJGJGCJJCGCJ(GGGGJGCCCGGCCGGJGGGGGG8JCGCGGGGGG=GCGGGGGGCGCJCG=CCGGCG=CGG8GCGGC=G8GCGGGCC=GGGGCCGGGG=GGGGG==C -@gi|10006|ref|NC_06.1|-3/1 -CAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCCCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTT -+ -CCCGGGGGGGGGGGJJGJGJJJJGGJGJJJCCJJJGGJGJGJGJGJJGJJJ=GJGJGGCGCCGGJGJC=GGGCGCGGG(GGGG=GGGCCGGGC=C1G=CGJGGGG=CGGCCG=GGGGGGGGC8GGCGGCGCGGCCGCCGCGGGGCGCGGG -@gi|10006|ref|NC_06.1|-1/1 -ATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAG -+ -CCCCGGCCGGGGG1GJ=JJJGJJJJJJGJCGJGJGJGGJJJ1JJ8CC=JJGJ1=GGJJJGJJGCJGJCJGGGGG8GGCGGGCCGCGGCGCGG8GGGCGCC1=GGGGCGG8GGCCCGCGGGGGCGGGGC=8C=CGGGG8GG=G=CGGGGCG -@gi|10007|ref|NC_07.1|-9/1 -CTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGC -+ -CCCGCCGGGGGGCGGJJGGJJGJGGJJGGCJGJ(JJC=G8JJ1JJGGGJJGCJJGCJGGJG==J=JGC8GCGGCGGGGGCGCCC=GGGCGGGGGG(G=CGJCCCGGGCGGCCGGGGCCGG1C=GGGGGGC1GGGGCCGC=CGC8CGGGCG -@gi|10007|ref|NC_07.1|-7/1 -GCAGTAGCAGACAAGTTTGAATTGGGCGAAACCTACTTGCTTCCTCTTGGAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAAT -+ -CCCG=GGGGGGGGJJJJJJJGJ=J1CJGGJJJ=JJCJJJJGJJJGCJJJGGJ8GJ8GCGJJJGGGJJGGGC=CGGCGG=G8GG=GC=GCGG=GGGGGGCCCCCCCGGCGGGGC=GCCGGGC=GGGGGC=GGCG1GCGCCGGG1=GGCCCG -@gi|10007|ref|NC_07.1|-5/1 -TGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCC -+ -CC1GCGGGGGCGGCJJJGJJJJJJJJJJJJJJJJJ=JJGJGGG(GCJJGJJGGGJGGGJCJGGJGGJCCCGGGCGGCCGGGGGCGCGCGGGG8CGGGCGGJGCGGGGCGGGGGGCGGGCGCGGCGCGGGGGGGGGGCGGC8G=GCCGGCG -@gi|10007|ref|NC_07.1|-3/1 -ACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAATATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAA -+ -=CCGCGGGGGCGGJJJGJJJGGJGJ1GGGGJGJCCJJ8JGJJJJJGGJG1GGCJJJJJJ=GGG1GGJGGGGCGGGCGCGGCCCGGGCGGCGCGCCGG=G8JGCCCCCGGCGGC=CCGGCGGG8GCC=GC=GGG=C=CGGGGGGCGGGGCC -@gi|10007|ref|NC_07.1|-1/1 -AACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGCTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCT -+ -CCCGGGGGCGG=GJGJJJGJJGGGJGJJGCJJJJGGGGCGJJGGJJJGJ8JGGCJJGCGJGJGJCGCGGGJGGGGGJGGGGG(GGGGGG1=CGC==GCGGJCGGGG=G8GGGGGCGCGCCGCGGGGGCGGGG8GG1C1GC1GGG18GCGG -@gi|10008|ref|NC_08.1|-9/1 -CCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCC -+ -CC=GGGGGGGGGGJJJJJJJJGJCJJJJCJJJCJJJJGJ=JJJG8JJJGGJCGCGGJCJJ8GJCCG=GGGGG=GG=CGGGGGGCGGGG8GG=GCCCGCGGJC1G8GGCCGGGGCGCCGGC1=CGCGGGGGCGCCGC8GGCCC8GG8GGGG -@gi|10008|ref|NC_08.1|-7/1 -AAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGG -+ -C=CGGCGCGGCGGJJGJJ1J1JGJJGJJJJJJJJJGJCGJJGJJGJJCGJJGJ=JGCGJG(G=GGJJG=CCGG=GGGCJ=CGGCGCG=GCGGGGCGGCGGJGG18GGGCGGGGCGGCCGGGGCGCGG=GGGGGGGGGGGGGCGCGCGGGG -@gi|10008|ref|NC_08.1|-5/1 -TGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTA -+ -=CCGGCGGGGGGGCG(JCGGCJJJJJJJJJG1GCJCGJJJJJJG8GGJGJJGJJJG=GGJGGJGGGGJGCGCCGGCGJG8GGGC=GGCGGCCCCGGGGGGJCGGGCGGCGGGG8C8G=CGGGGGGGGGGG=GGG=G=GCCGGCCGGGCGG -@gi|10008|ref|NC_08.1|-3/1 -TTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATG -+ -CC=8CGGGGGGGGJJGJGJJJJGJJJGGJJGGJGGJJ=JJGJ=JJGGGJ8CGJG=GJCJGJJGGCJ=JJGGCGGCGCGGGGGGCGGGGGGCGGGGGG1GCJCGC(GGCCC8GGGCGG=GGGGGCGGGGGGCGGGGGGC=(CCGCGGCC=G -@gi|10008|ref|NC_08.1|-1/1 -AGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCT -+ -CC=GGGGGGGGGGJGJGJCGJJGJ=JJGJJJ=CJJCJGGJJ=GJJGGJJGGGCGJCCGGJG8JGGGGGJJGG8JGGCGGCGGGGGGGG=CCGC=18GGGGJCCGCGGGGCGCCGGGGGG=GCG1CG8=CGGGGGGGCGG==CCCG8GGGG -@gi|10009|ref|NC_09.1|-9/1 -CCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATG -+ -CCCGGGGGGGGGGJJJJJJJJGJJCCJJJJJJJJ=GJJJG(GJJ8GGGJG8JGJCJJJCGJCCG8CGGGC=GGGGGJ18GG=G=GGGGGGCGGGG1GGGCJGGCGGGGGGCGGGGGGGC=GGGGGCGCGGGCCGGGGCCCGGCGCCGGGG -@gi|10009|ref|NC_09.1|-7/1 -CTGCGAACAGCCCCAGCCCCCTTGGTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTT -+ -CC=GGGGGGGGGGJJGJJJ8JJJGGJJGJJJGJGJJGGGJJJJJJJJJJJJJCJJGGJGGGGJCGJG1=JGGGGCGG=GCG=GGGGCGGG1GGG88GG=GJGGCCG(GGGGGGGGCGGGCGGCCGCCGGCCCCGGGGG=G=GGGGCGC=C -@gi|10009|ref|NC_09.1|-5/1 -TTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTCCTGCTACTGCCGAAGTCACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGATATAGACA -+ -CCCGGGGGGGGGCJJJJGJJ8JJJJJJJGJJ8JGJJJCGGJJJJGJGJJGGJJ=GGGJG(GJ8JGJG8JJGGGGGGG8GGCCGGCGCGGGG=GG(GGGGGJCCCGG=GGGCGGGGGG8GCGGC=GCGCGGGGGGGGGG8CGC=CCGCGGG -@gi|10009|ref|NC_09.1|-3/1 -GGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGC -+ -CC1C=GGGGGGGGCJJGJGJGJJGGGJJJJJJGJJGGJJCCGJ8GJJJJJCJGGCJJGGGCGGGGGGGJGG1GG8CCGGGGGGCG1G=GGGGGCCCGGGGCCCGGGC(C1G1GGCCGCCGG=CGCGGGGGCGGGGGCGGGGGGGGG=CGC -@gi|10009|ref|NC_09.1|-1/1 -TGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCATAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTGCTGCTACTGCCGAAGT -+ -CCCGGGGGGGGGGJ=GJGJGJJJJJJ8GJ1=GGJJGJ8JJJJJ1JJGGJJGGGGGGJGGGGGJGGGJJGGGCGCCCGGGG888GGGGGGGGCGG=CGGCCJGCGCGGC=GG=GGGGCCC=G=GGGGGGGCGCCCGGGGGGGCGG=GCCGC -@gi|10010|ref|NC_10.1|-9/1 -CTTGAAAAGTTGTAACCAAACGTACGAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCTTAGCCAATATGACTTA -+ -CCCGGGGGGGGG8JJGJJGJJJCJJ(CGGJJJGJ8JJGCGGJGGG=GJJJJGGJJJJJCJGGGGCGC(GCGG8GGGG=CGGGG8GGGCC==GGGCGGGGCJ=GGGGCCGGGCCGCG=C8GCCGGGCGCGGGCGC=GGCGGGGGGG=G1CG -@gi|10010|ref|NC_10.1|-7/1 -ATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGTTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCC -+ -CCCGGGGCCGGGGJGJJGGJGJJJ1=CJGJJJJJGCG8GGJJJJJJJJCJGGJJCJJGGJGCGGGJGJJGCG=GGJGCGGCGG=GCGCGGC8GGGCGGCGJC8G=GGGGCGCCGCGGCGGG=1=GGCCCGGGGGGGCGGCG1CGGGC=GC -@gi|10010|ref|NC_10.1|-5/1 -CCCTTGGGTTAAGTCATATTGGCTAAGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTGGTTACAA -+ -CCC1GGGGGGGGGGJGJGJG=GJGJJJJG=JJJJJC8JJJJJJGGGGJJC1JGGJJJGGGGGCCJJGJGCGGCCGGGJCCCCGGGGCGCGGGCGC=GGGGJCCGGGGGGGGGGGGGGG=GGGGGGGGGGGGCCG=CGGGG=GCGCGGGGC -@gi|10010|ref|NC_10.1|-3/1 -CAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTGTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATT -+ -=CCGGGGG1G=GGJJGGJJJCGJJJJGJJCCCGJJGJGJJGJ=JJJJJGJJGJGJG=C=8CC=GGGGCGGGGGGG=GCGGGGCG=C=CC=CGCCGGGGGGJCCCCGCGGCGGG8==CCCGCGGGGGCG1GGGGCCGGGGGGGCGGCGGGG -@gi|10010|ref|NC_10.1|-1/1 -TAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTGGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGG -+ -CC=CGC=GGGGGGGGJJJJCGJJCJGJJCJJJJJCGJCCJ=JCGJGJGJGGC=GCJGGJGGCGJGG=GGC8GGC8GGGGC=GCCGGGG=G=GGGGGCGG=J(GGCGGGGGGGGG8CCGGGGGGCCCGGGGGGGCGGCGG=GCGC=8CGGG -@gi|10011|ref|NC_11.1|-9/1 -AATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGGGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCATTGCAGATTACTC -+ -CC=GGGGCGGCGGJ1JJGJGJJJJGJJG1JGJGJJJGGJ8JJJCGGGCJCGGJ=CCJGCGCGGGJC=CGJCCGGCCCGGGGGCGGGGG8GCCGGGGGGGCJGCCGGGG=GGGGGGGC=GC1GGGGGGCCGGGCGGGCGGCG88CGGG=GC -@gi|10011|ref|NC_11.1|-7/1 -AAGTCAAATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCCAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGA -+ -CCC1GGGGGGGGGJJ1JCGJJCJJJJGJJJJGGGGGJJJJGCG=JG=JGGJJGGGCCJJGGGJGGCGGCGGCGGGCJGCCGGCCCGGGGC=GG8CGCGGGJCGCC1GCGGGCGGGGCG=G=CGGCGCGGCGCGGCGGGGGCCCGGGGCCG -@gi|10011|ref|NC_11.1|-5/1 -CAAACAATGCACTTCCGGCGGCTATTCGAGGGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGG -+ -CCCCCCGCGGGGGJJJJJJJJJJJJJGJJJ=JJGJCGGGJJGJGJJJJGGGJCJGGCGG1=GG8J=CCCGGJGGGG1CCCGGCGCGCGCGGCCGC=GGCCJG=GGGCGGGGG=GCCGGGGCGGGG=GGGGGCGGGCCGG=GCGG=GGGGC -@gi|10011|ref|NC_11.1|-3/1 -GTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGT -+ -CCCGGGGGGGG=GJCJJJGJJJJCJJJJJJJJGGJGJGJGJGGJJGGJJGGJJJJGGJGJCGGGGCGGGGG8GGGC=GGGGCGGGG8=CGGGGGGGGG=G1CGGGGGGGGC1GGGGCGGG8GG8=GGGCGGCGGGGCGCGGGGGGCCGGG -@gi|10011|ref|NC_11.1|-1/1 -GGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGGGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAACGGTCACTCTTGTGTCTATCTCGCATTCA -+ -CCCGGGGGGGGGGGJGJJJJJGGJG8JJGJJJJJGJ8JJJGJJJJGJJCGGCJGGJGGGJGGGGCGGGGCCGJGCGGJG8G=CCGGCC=GGGCC=GGGGCJGGGGGGGGGGCGGG1GGCGC(GGCGGGGG=CGG=CGGGGCC8GGGGCCG -@gi|10012|ref|NC_12.1|-9/1 -CGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGTCTATCTCGCATTA -+ -CC=GCGG=GGGGGJGJJGJJJJJ8G=JJJGJJJ=JJJJJGJGJJ=JJGGJJJJJJCJGJGJJ=CJGGGGCGCGGGGGGGGCGGCG=GGGCGGCGGCGCGGJC=GGGCCCCGGGCGGC8CGGGGGG=GCGCCGGGGGCCGGCG=CGGG8GC -@gi|10012|ref|NC_12.1|-7/1 -CAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTTAATGCGAGATAGACACCAGAGTCACCCTCTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTCTC -+ -CC8GGGGGGGGGGJJJJJJJ1GJJJJJCJJ1GGCJJJJCGJJCJJJGGJJGGCGGCCGCGCCJGGGCGGG=GCCGG=GCGGGCGGGGCGCG=GGGGGGGGJC=CGG1GCCGG1GGGCG1CGGCCCGGCCGGC1GCGGCGGGGGGGCGG8G -@gi|10012|ref|NC_12.1|-5/1 -ATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTA -+ -C=1=1GGGGGGGCGJJCJJGGGJJJJCJJJJJJJJJJJJJCJJCGJ(GGJGJJJJJJ8GGGCCCC8GGGCGGCGGCGGGGCGCCGC8GCGCGGG(GGGGGJGC8G=GGGGG=GGCGCCGGGGCGCCCG=GGGG1GGCCGGCCGCCC8CGC -@gi|10012|ref|NC_12.1|-3/1 -GTAGTAGGCGTAGATTTGTCTGCGAACAGCCACAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAA -+ -CCCGCGGGGGGGGJJJGCGJJJJJJJ=JJJCJJJJJGJJJGJCJJCGJGJGGGGGGJGJJ=CGJGCGJ1GGG8J=G8GCCCGGGGCCGGCG(CGGGGGGGJCGGG=CGGGGGCGCCGGCGGGGGCGGG=GGGCGGGGGCCGCGGGC=GGC -@gi|10012|ref|NC_12.1|-1/1 -ATCTCGCATTAAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCCGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGT -+ -CCCGGG=GGGGGGCJJJGJJCGJJJJJJJJJGJJGGJGCGGJGJ8GGGJGGJGJGJGGGJCJ=GGCGGGJJGGCGGGGCCCG1GCGGGG1CGGGGGGCCCCCGGGGG=CCGGGGGGGGGGGGGGC=CGGGGGGGGC=GCCGGGCCGCGCG -@gi|10013|ref|NC_13.1|-9/1 -CGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAG -+ -CCC8CGGGGGGGGJJGCGGJJGJJGGJJJJG(J=CJJJJJGGGGGCJGGJJJ=GGCGJCCGGCGGCCJCGGGCGJGGGGCCGGGGGGCCGGGCGCGGCG(==CCGGGGGGGC=GGGCCCG8GGCCGGGGGGGGGCCGGCGCCCC=GCC=C -@gi|10013|ref|NC_13.1|-7/1 -GACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGTTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGC -+ -CCCGGGGGGGGGGGJJJJ1GJ8JJGJJJJJJG=CGGJJJCJJGGCJ=JJ=CC=J8GGJC8GGGGGGGCGCGJCGCCGC=CCCCCGGGC=GGGGGGGCCGGJGCGGG=CGCG=CGG=CCGCGGGGGGGGGGCCGGCCCCGCC=CGGGGCG1 -@gi|10013|ref|NC_13.1|-5/1 -AAGTAAAGGGTGACTCTGGTGTCAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACAT -+ -C1CCGGGGGGGGGGJ(GJJJG=GC8JCJJJJGJGJGJJGJGJGGCCGGGGJJG=JGJJJJGJ(G8GGJC(GGJGG=JCGGGCCGCGC=GCGGCGGCCGCGJGGCGGGGCGGG8CGC=CCGGCCGGCGCGGGGGGC=CGGGGGCGG=GGCC -@gi|10013|ref|NC_13.1|-3/1 -GCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATTGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTC -+ -=CCGGGGGGGGG=GCJJJJ1JJGGJCGJGJJJGJJJJJGGGGJGJCJGJGGGGJJJJJG=JJGJJCGG18GCCGGGGG=GGGCCCGG1GC=G8GGGGCCGJ=1GGGCG8GGGGGCCGGCGCCCCGG=G8CCGGCGCGC=GGGGGCGG8GG -@gi|10013|ref|NC_13.1|-1/1 -CAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGTTACGTTTTGTT -+ -=C=GGGGGGCG=GJJJGJ8JJGJJJJCGJJJJJJJGGJJGJGGJJJJCGJJJJGCJJCJGJGCGGG=GCGJGCCGCGCC=GGGGGGGC=CGGGGGGGGCGJCCGC=GCC8CCGG1CGGGGGCGG=C=GGCGCGGGGGGGCGCGG=GGGGC -@gi|10014|ref|NC_14.1|-9/1 -ATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTA -+ -CCCGG=GGCGGGGJJJJG8JJJJCJ8JJGJJGJCGJCGGGJ8CGJJJJJJJGGGG=GCGGJ=GGCJCCCGGCGGGGCCC1C=CGCGGGG1GG=GGCGGGGJGGGCGGGGGGGGGC=GGGCGGGGCGGGGCGCCGCGGGGGCCGG=G=GGG -@gi|10014|ref|NC_14.1|-7/1 -TACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGCCGCGTGAGTTGGTACGTTATGTTACAACTTATCAAGAGTAAGCAAGTATGTTTCGCCCAATTCA -+ -CCCGGGGGGGGGGJJJ=GJGGGGJGJGJGJGJJGGGJ1JGGJJCJJJJJGJ=JCGJJGGJCJGJ1G=GGJCJGJ=GGGGGGGGCG(CCGGCGGGGGGGGCJC=GGCCCCGGGCCGGC==GGCGCGGGGCCGGGGGCGGGGGCGGGGGGGG -@gi|10014|ref|NC_14.1|-5/1 -TCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCGGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGC -+ -CCCGGGGGGGGGGJGJJJCJJJJGJJGCCJGGJJJJJGJGJC=JJGGJCGGJC8JGJJG=GGGCGJJJCGCGGCGGGCGGGGCGGCCG=GGGGCGGCGGCJ==GCGCGGGGGGGCGGGG=GGGGGGGGGGC=GCGCGG1GGGGGGGGGCC -@gi|10014|ref|NC_14.1|-3/1 -ACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAATTGGTACGTTATGTTACAACTTATCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTCTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATG -+ -CCC=GCGGGGGG8JJJJJJJGJJJCJJGJJ1GJJGJJJJJJJCJJCJJJCC=(JJ=JGGJGGCGJGCGGGGCGG8=GGGGGGGGGCGGGCGGGG=GCCCGJC=C8CCGGG8GGG=GGGGGG=1GG==CCC==GCCGCGGGGGGCGGGGCC -@gi|10014|ref|NC_14.1|-1/1 -GCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACT -+ -C1CGGGGGGGGGGJJGJ=JJG=JJGJJJGJJ1JJGJJCJGJGJJJGJGG=GGCJGGJCCJJJ(GGG=GGGGGGCGGGGGGGCGGGC=GCCGCCGGGGGCCJGGC=GGGGGCGGCCGCCCGGGGGCGGGC1CGGGCGC=GCGGGGG8C=CC -@gi|10015|ref|NC_15.1|-9/1 -GTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAG -+ -CCCGGGGGGGGGGGCJGCJJJJJJGCJGJGGJGGJJJCJCJGGGGGJGGJJJGJJJJGJGJCGCGCCJGGCGGG==G=GGGGGCCGG=CGGCGGCCGGCCCC=GGCGCCGCGG=C=G=CGGG=GCGGCGGGGCGGGGGG8CCGCC=CGCC -@gi|10015|ref|NC_15.1|-7/1 -ATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCCCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCATTACTT -+ -CCCGGGGGCGGGGCJJJCJCCGJJGJJJJGGGGGGJJJJJJGGGGJGGJJGJCGGGGGJ1GGGJGGGJGCG(GGC8CGCGGGGGGGCCCCCGGGGGCGGGJCGGGGGCGGGGCGGCCGGGGGCCCCCGGCCGGGGGGGGGCGGGCGGGCC -@gi|10015|ref|NC_15.1|-5/1 -GACCATACTACAAGAATACCAACCCATTAAATTTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGC -+ -CCCGGGGGGGGGGGGJGJJGJJJJJGCJGJJJGJCJJJJGGGJJGJJJCGJ8JJJGJJJ(GJGGGGGCGCGJGGGGGGGGG=GGGGCGGCGGGCCC=GGGCGGGGCGCGG=GGGGGGGCGG1GGCGCCGG=GCGGGGGCGGCCGCGCGGG -@gi|10015|ref|NC_15.1|-3/1 -GATGACCATACTACAAGAATACCAACCCATTAAATTTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTT -+ -8CCGGGGGGCGCGJJGJJJJJGJCJGGJJJGJGGGJJJ1JJJJGGGJCCG8JJGJCJCGGJJJCCC=G8=GJJGCCGGGGCGG1GGCGGGGGGGGGGGGG=CG8GCGG=GCGGCGCGGGGGGGCGGCGGGCGCCGGGGCGGGGCGCGGGC -@gi|10015|ref|NC_15.1|-1/1 -TAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGAAATTTCATGGGT -+ -CCCG=GGGGGCCGCJJJCGJCJJJJJJGGJJGGJJGJJJGGJGGJJCJJGCJGGGG8=JGJGGJG=CGGGJGGCCCGGGGGGGCGGGGGGGGGCGGCGGGJCGGGCCGGGCGGGGCGGGGGGCGGGG8GCGCGGCG8C1GCCC1G(CCG8 -@gi|10016|ref|NC_16.1|-9/1 -TGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACC -+ -=CCCCGGGCGGGGJGJCGJJJJJJJJJJJJJJGGJJJJGJJJJGGJJGCJCJJJJJCJ=JGGG1GGGCJGGJGGCGCCCGGCGG=CGCCG=G(GGGGCGGJGGCCC8GCCCCGGGGGCG=GGGGGGGG=GGGCCCGGCCGGCG=GGGG=C -@gi|10016|ref|NC_16.1|-7/1 -CTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCT -+ -CCCG8GGGGGGGGJJJJJJJGGGJ8JGJGJJGJJJJJGJJJGGJJJCGGJGJGG8G=GGGCCGJGCGCGCGCGJGCGGGCGCGCGGGGC8CGCGCGGG=GJGGCGGGGGGGGGGGGC8GG=GGCGG=GGC1CGGGGGGCCGCGGGCCGCG -@gi|10016|ref|NC_16.1|-5/1 -CCAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACCGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCA -+ -CCC1GGGCGGCGGGGJJGJJJJJJJJJCJJCGJJGJJJJGJ(JJCJGJJCJG(G=JJJJGGJJ=GCJGCJGCJGC8=GG=GGGGCGGCGCGCCGCG=CGCCCG1GG=GG1GCGGCCCGGGGCG==GG=CGGCGGCGGCCCC=GCGGCGCG -@gi|10016|ref|NC_16.1|-3/1 -GCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGTGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCC -+ -CCCGGG=GGGGGGJJGJJJJGJ1JJJJGJ=GJJJCGJGGJJJJJGCCJJGJJCJJGGJJ=JJGGJJGGJCGJJCGCCJCGG=GGGG=GCGG=GGGGCGCCJG=GGG1CGCCGC1GCGGGGGG8GGGGGGGCCGCGGCGGC=GGCGGGGGC -@gi|10016|ref|NC_16.1|-1/1 -CAAACAATGCACTTCCGGGGGCCTTTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGG -+ -CCCGGGGGGGGCGJCJJCGGJJJJJJJJCJJ1JJJGJJJGJJGJGGJ=GJGJJJGGGCJGJGGGJGGG1G1GCCCGGGG=GG=CGGGCGCGGGCGGGGGGJCGCGCGGG=GGGGGGC(GGCCGGCCGGGCCGGCGGGCGCCGGCCCGCCC -@gi|10017|ref|NC_17.1|-9/1 -CTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAA -+ -CCCGGGGGGGGGGCCJJGJJJGJJGGJJGCJJGGGJJGGJJGJGGJCG==JJGGGGGCCJGJGGCJCJCGGGGGCCGCGGGG=GGGCGGGGGC8GGGGG=C=GCG=G1G1GGGGGG=G=GGGGGGCCGGGGCGCCGGGGGGGGGGCGGC= -@gi|10017|ref|NC_17.1|-7/1 -CCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAATTGTAACAAAACGTACCAACTCACGCGA -+ -CCC=GGGG(GG1G1JJ8GJJGJJJGJGGGJGGJGGJJCJJJJGJJJGJ1GGG=1CGJGGCCJGJJGGGGGGGGCCCCGCCGGGGGGCG=GGCGGGGGGG=CCCGGGGCCCCGGGGCGGGGG=G=GGGCGCGGGGCGGCGGCCG1CCCGCG -@gi|10017|ref|NC_17.1|-5/1 -AGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTTATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATGGGATGCGAGATAGAAACCAGAGTCACCCTTTA -+ -CCCCGGGGGGGGG=JJJGJJGCJJJJGJCJJJCJJ(JGGJJCCJCJGJJ(G=CCJGG(GGJJCJGJJGCGGGG1CGGGCGGGGGGGC(GGGGGGG8GCGGJCC=CGG=GGG=CGGGCCGGGCGCGGCGCGCGGGGCGGCGCGGGGGCGGG -@gi|10017|ref|NC_17.1|-3/1 -GATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAATTGTAACAAAACGTACCAACTCACGCGACA -+ -CCCGGGGGGCGGGGCJGGGJGGJCJJGJ8JJJGJGGJJJGJJGGCGGGGG=JJGJGGCCGG1CJGCCG(GGGGGCGCGCGGGGGGC1GGCG=CG=GCC=CJCGGGCGGGGCGCGCC8CGGCCGGCGCGGCGGGGGGGG8CGCG=CGCCCG -@gi|10017|ref|NC_17.1|-1/1 -AAATGTCTACATTCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGA -+ -CC=GGGGGCGGGGGJJGGJJGJG1JJJJJJJGGJ=JGGJGCGGJGJCJGJGGGGGCGJJJGJ=GG=GJGCCGGG=GGCGGGGCCCGCGGCG8=GGGGCG1JGGGGGG8GCGG=GGGGCGCGCCGGGCGCGGGGGGCGCCCGCGG=GGG=G -@gi|10018|ref|NC_18.1|-9/1 -ATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAATTAAATCGAGTGTTAGCCGC -+ -C=CGGGGGGGGGGJGJJJJJJJJJ=JGJGJ8=JJJ=JGJGJCCGJGJJJJGGGC1GJGJCJJCGGGGCG=GCCGGGCGGJCCGGCGGCGGGGGGGCCCGGC1CCGG=GCGGGGCG=GGCGCGCGCGGC=GCCGGGCCG8GGGGCCGC8GG -@gi|10018|ref|NC_18.1|-7/1 -CGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTGGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGG -+ -CCCCGGGGGGGGGJJJJJGJGCJJJJJJJJGJGJJJCJGJGJCJJJJG==GGCGCGGJ8CGGJGGJGGGGJJG=G1GGCGGGGGCCGGGGGCGGGCGGGGCGCGGCGG=CCGG=GGCGC81GCGGCCCGGG=GGGGCGGCG1CG=CCCGG -@gi|10018|ref|NC_18.1|-5/1 -TCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTC -+ -CCCGGGGGGGGGGJJGJJCGJJJJJJJGJJJG8GJJJGJJJJGCGJGJJGJJJJJGGGGJC=CCGJG1CGGG=GGCGC8CG1CGGGGGCGCGGGGGGCGGJC=GCG=81CCGCG=GCGGGCGGG=GGGGGGC===GGCGGCGGGCGGGGG -@gi|10018|ref|NC_18.1|-3/1 -ATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAATTAAATCGAGTGTTAGCCGCATCTAGACATTTAATGAGTTGGTTTTCTTGTAGTA -+ -CC=GGGGGG8CGGJJJJ1JJJJJGGJJJG8J=JJJJCGJJGJJGGGJJGJJ=CGGGJJJJJGJG=GGCJCGCJCCCG=CGG=GGGGGCGCCGGCCGGGGGJCGGGCGGCGC==G8GGGGGGCG=GGGCGG=C8GGG=GCGGGGGCGCCGC -@gi|10018|ref|NC_18.1|-1/1 -GCTAACACTCGATTTAATTTGCCAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCC -+ -C1CGGGGGGGG1G1JJJ=GJJJGGJG(GJJJJJGJGJGJGGCGJCJJJJJGGGJJJJJCGGJ=CJGGG(8GGC8CCGGGGGG=8CGGCG8GGGGCGGGGGJCGGGC=CG8GGGGGGGGCCGG=CGCCCGGGGCGGGGGCGGGGCGC(GG= -@gi|10019|ref|NC_19.1|-9/1 -CAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAATGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCGGCATTGGATGCGAGATA -+ -CCCGGGGGGGGGCJGGGJGJJCJJJJJJJJJJJJCGJJJJGJGGGGGGGJ1CJGJGGGGJC=G8GGGCGC=GGCGJGCCGGC8GGGCCGGGGGGGGCGCGCCCGGCGCGCCGGCCGCCGGGGCCCG=GGCCGGGGCGGGGGG=G1CGCGC -@gi|10019|ref|NC_19.1|-7/1 -CAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATT -+ -C=CGGGGGGGGGGCJJJJJJGJJGJJJGJJJJJ(JJJGGJJGJGGJJGJG8GJJG=JGGGCGG8GJJCC=G8GGGJCCG=GGCCCGCG=GGCCGCGG=8GCGGGGCGGGGGCGGCGGGGGCGGGG=CGCCCCCCGGGGC1GGCGGGGCC= -@gi|10019|ref|NC_19.1|-5/1 -CGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCCGATTACTCCGATGA -+ -CCCGGGCGGGGGGJJJJGJJJJGGJJJ1JJG=JCJJCJGJGGGCJJ8GGJGGGJJ=GG=JCGJJGGGGGGGGCGJ=GGCGGGGGGC8GG=GCGG=GCGGGJGCGGGGCGCGCGG=GGCGGCCGGGGGGGGG8G=GCGGCGGCGGG8CCCG -@gi|10019|ref|NC_19.1|-3/1 -CAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCCGATTACTCCGATGAACGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCATTTCG -+ -CCCGGGGGGC1GGGGJGJJG=JJGJGJJJGCJJJJJJCJJGJGGJJJ=CGGCCJGGCJGJCGGCJGGJG=GGGGGGCGGG1GGCGGGGG(GGCC=CGGC=JCGCCGGGCCGCGCGG1CCGCGGGCGGGGGG8CGCGC8GCCGG88GG==G -@gi|10019|ref|NC_19.1|-1/1 -TTGTCTCCGAACAGCCCCAGCCCCCCCGTTTAGGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATAAAATGTCTACATGCGGCTAACACTGGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTG -+ -CCC=CGGGGGGGGJGJJJG8CJGGJJGGJJGJJ=JJGGGJJJJJCJGJGJJGGGJCGJGGGGC=GJJJCCGGG=GJ==GGGG(=CGCGGGGGCGGGCGGGJ=CGCGGC(GGGGGGGGGGGGGGGCGGGGGCGGGCGGGCGGC=GGGGGGG -@gi|10020|ref|NC_20.1|-9/1 -TATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATAATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACT -+ -CCCGCGGGGGGGG=JJJJGJGJJ=JGJCJJJJGGJGJCJJJ8GG=GJJCJGGCJGCCGGGCCGGGJG1JGGGG=GGCC=GCGGGG=GGGGCG1GGGCGGCJCGCCGGGGG1GCGCGGGCCGCGGGGC=GGCGG8C=GGGCGGCGGGGCCG -@gi|10020|ref|NC_20.1|-7/1 -ACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCA -+ -CCC=GGGGGGGGGJJJJJCGGGGCJJGJJGGJJJJJJJJJJGJJCJGJGGGGJGGGCJGGJGGGGGGCGC=JGGGGCCGGGGGGCGCCCGGCCGGGGC8CJ=CGGGGGC1GCGCGGG===G=C=G=GGGGGG=GGGGGGCGCCCGCGCC= -@gi|10020|ref|NC_20.1|-5/1 -GGTAGTAGGCGTAGATTTGTCTCCGAACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAGTACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAA -+ -CCCGGGGGCGGGGJJGJJJJJJGJG=JGJJJJJGJGGJJGJCJCJGGGJJJGJC=JCJ1GGJGJ8=CCGCG=CCCGGGJGCGG==GGCGGGGGCGGC8GGCC1CGCCCG=G=8CC=G1CGGCCGGCGGCGGGGGGGGGGG1CG=CGCCGC -@gi|10020|ref|NC_20.1|-3/1 -ACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACGGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAG -+ -CCCGGGGGGG1CGCJJJGJJJJGJJJGGJJJGGCJ8GGJGCJGJ1JJJ=GGJJJJGCCGJJGCGGCGGGC1GCCCCCGGCCG=GCG=GGGCGGGCCGG8GJ8GGCGGGGGGGCGGGGG1GCCC8GGCCGCCGGGCGGCC=GGGCGCGCGG -@gi|10020|ref|NC_20.1|-1/1 -TACAAGAGTACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACGGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCA -+ -CCCGGGGGGGGGGJJJJJJJGJJJ=GJJ8J=JJJGJGGJJGGJGJJCJGCGJJGCJGGGGCGJJ=CGCCCJGGGGCGGG1GGGGGGGCG=GGGGGGG=GGCCGGCCGGCGGCCG=GGG8GGGGCCGGG1GGC=GGCGGG8GGCGCCGGCG -@gi|10001|ref|NC_01.1|-9/2 -CCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATACAGA -+ -=C=GGG=GGGGGGJJJJJJGJJJJGJJJJJJJ==JJ1JJJGJJGGCCJ=JCGGJGGG(JGGJCGJGG(G1GJGCCCCCCGCGGGCG=GG81=GCGG8=GG=CCJJJCC(GGC=GGC1GGG81G1CCGCCGGGGCGCGGGGCC=GG(GGGG -@gi|10001|ref|NC_01.1|-7/2 -GCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATTGATCCCTC -+ -CCCCCGGGGGGG(JGJCJJJJJJC(1JJGGGJCJJGJJJ1GGGGJJCJGJJGGGGGGGJGJCCJG==GG8GCCGG=CG8GGGCG8GCCCCGCGC=CCCG=C=CJJJJCGGC=GGGGCCGGGGGC=GCC=1CCCGGCGGGCCGCGCCCGGC -@gi|10001|ref|NC_01.1|-5/2 -AACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCG -+ -C8CGGGGGGGCCGG1JCJJJGGGCCJJJJGJJG=J1GGGJJJJJJJ1JJJGCCJCGGCGGGGJJGCJGGCGGGGGGGGGCGGG=GGCCG=GCGCCGGGG8C1C=JCJ1CCGCCGCGGGGGGCG=GGG=GGCCGGC8G1=CCGCGCG=8CG -@gi|10001|ref|NC_01.1|-3/2 -ACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCA -+ -=CCCCGGGGGGGGJGJJ8GJJ1JJJCJJJCJCGJGJGJGJJJJJGJC=JGGGGCGJG=JCCGGGCGJGGG8G8G=GGCGCGGGGCGGGGGGGC=CGGCG=GGJJCJ8GGG=CG1CGGG=1CGGGGG1G=GGGGCCGGCGGGG8GGC=C8C -@gi|10001|ref|NC_01.1|-1/2 -GTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTA -+ -CC=GGGCGGGCGGJJGJCJJGGJ1JCJJJJJGJJJJJCJGJJJ8JJGGJJ=C=GCGGGCGG(J(JJGGJCJG=GGGGJCCGCCGGGGGCGGGGGGGG8CGC=CC(JCGGGGGGCGGGCCGGCGGGGGG8GCCGGGGG8CGG=CGCGC=1C -@gi|10002|ref|NC_02.1|-9/2 -ACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGA -+ -CCCGGGGGGGGCG=JCJJGJJJJJ=JJJ1J1JJG8GJGJJGGJGGGGJJJGGGC8GJGGJCGCJGCJGGJ(GCGCG=GC1CGGGGGCG1GCCGGGGGGCCG==CJCJGGCGCGCGGGC=GGC=GG=CG8CG1G(CCC1GCCGGG=CGGGC -@gi|10002|ref|NC_02.1|-7/2 -GAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAAT -+ -CCCGGGGGGGGCC1JGJJJ=JJJGJGJCJGJJJ1JJJGJJGGJGGGG==JCJJJ8GJJJJCGGGGJGGCCGCGGGGJCG1=GGGGGGGGCGGGGG=GCGGCCCJJJJG1CGCCG=GGGGC==CGGGGGGGGCCG=CGGCG8=GCGGCCGG -@gi|10002|ref|NC_02.1|-5/2 -TGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAA -+ -CC1CGGGGGCGGGJGGJJJJCJJJGJJGJ=J=JJCJGJGGJGCJJG1J8GJC=GGC=GGGG(G1GJGJGJ=G=CGJGGGG8GG=GCCGGGGCGCCCC1CGGCJJJ1J1GGGGC=GGCGG8GCCCGGGGG1GGCCGGCGGGCGCGGGGGGC -@gi|10002|ref|NC_02.1|-3/2 -TTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAG -+ -CCCGGGGGGGGGG=J==JJJJ1GGCJJGJJJJGGGJGJGJCGJGGJGGCGGJGGJGCGGJCCGGGGJGCCCGGCGJGCG=GGGGCGCGGGCC=GGGCGCGGCJ=JJ=GGGGCCCGGG=CG1G==CGGGCGCG==GGG(CGGGGGCGGCGG -@gi|10002|ref|NC_02.1|-1/2 -GAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTT -+ -CCC=GGGGGGGGGJCJJCGJGJJGGGJJJJCGJJJGCJCGGJCJGCJCGJJGGGJJGJC=CGGJ=JCCGGG1JGGCGGGGCCGGGCG18G=C8GC=GGCCGGCJC8CGGGGGGCGGG=GGGGGG=C1GGCGG(=CCCG=GCGC8CGGG=G -@gi|10003|ref|NC_03.1|-9/2 -CCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATTGATCCCTCT -+ -CCCGGGGG=GGGGJG1JJCJJGJJJJJGJG1JGJCJG=GGJJ1CGGCGJGJJGJGGGJGGCGGGGG8GGCJGCJGGCGJCGGCGGGG1GGCGCGCG=CGGGC8JCJJGCG8CCCGGGGGGC=CCGG8GGGCGGGGC=GGCGGGGCGCCC= -@gi|10003|ref|NC_03.1|-7/2 -CATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCG -+ -C=CGCGGGGGGGGJGJJCG1JJJGGJGGGJGJJJG8GJJJJGC8=GJJGCJJJGCCGGGGCGGCCGCGCJ=GGCGGGGGG8CGG1GGG=C=CGGGG=GCCGCCJJCCG1GG=CCGCCGGCGGGGC8GCGGG=GGGCGCCG=1GCCGCGG1 -@gi|10003|ref|NC_03.1|-5/2 -CAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACGTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGT -+ -CCCCGGGGGGGGGCGJGJGJJJ1GJJJGGCJJJJCGJGJGGGGJGJJJCJGJJGJG=JGGJGGGCGGGGG8=GGG=JGG8CCGGGGCGGGCGCGGGCGC8=C=JJJJGCGGG=GGGCGGGGCCGGCC=GGCGGCGGCG=GGGGGCCGG=C -@gi|10003|ref|NC_03.1|-3/2 -CTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACG -+ -CCCGGCGGGGGGGJGJJJJGJCJJGJJJGGCJGJGCJJJJGGGJGJGGJGGGGCJJ=JGC=GGC==GG=JGGGG=GGGJGGCGCGG8GGCGGGG=GGGGGG81JJCJGGG8GG1GGGGCGC8G8GGGCCGGGGGCGGGCGGCCCC1GCG8 -@gi|10003|ref|NC_03.1|-1/2 -CTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAATATGACTTA -+ -=CCGGGGGGGGGGJG1GGJGJJCGJGJJC1GCJJGGJCJJJJGGJGGJGGJCJCGJGJCGJGGGCJJGJGGGGJC=CGGG8G1G=GGGC1G=GGGGCGGCGCJCCCJGGGC1GCGGGGG1GGCCCGGGCG1CG1GGC=CCGGCGGCCGGC -@gi|10004|ref|NC_04.1|-9/2 -TTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGGTAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAG -+ -CCCGGGGGGGGG=JJJJJJJJGJJGJCGCJJJJJGJJGJJJ=GJJGJJGGGGGGCJJGJJGGJCG(GCGGGCGJ=CCGCGCGGGCGGGCCGG8GGGGGGCGCCCCJJGCGCCGG8GGGGCGGGGGGGCG=CGCGGGCGCCCCG=CGGCCG -@gi|10004|ref|NC_04.1|-7/2 -GGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAG -+ -CC8GGGGGGGGGGGJJJJJGJ=GCJJCJJCJJJJGGGJJJCJJJCJGJGJCJ8GJGG=J=CJG=GJCJGJGJJGG8CGGGGGGGCGGGGGGGGCGGCCGGC=J=JJCGCGGGGC=CGG=CGGCG==GCCGCGGCGCGGCCGGGG=G8GG= -@gi|10004|ref|NC_04.1|-5/2 -GTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTGGGTTAAGTCATATTGGCCACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAA -+ -8CCGGGGGGG=G1GGJJJJJCJJGCJJ1JJCJJJCJJJJJGJJGCJGJJCJJCG=CCJC=JG=GCC=GCGGGGC=GCCGGGGGG=GGCG=CGCGGGGGCGGGCCCJJGGGCCCGGCGGGCGGGCCGGGC=GGGCC=G=GGGGGGGGCCGG -@gi|10004|ref|NC_04.1|-3/2 -GAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTGGGTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGT -+ -CC=GGGGGGCG1GCJ=GC1GJCGCJGGJCJJJJJJCGJGJJCJGCJGGJGJGGJJ1CJGCGGCGGJGGGGGCGCG8G8GGGCCGCGGCGGGC=GGGGCCC=GJJ=CJGGGGGGGGGGGCGGGGGGG=GGGG=CGGCGGCCCGC=GC8GGC -@gi|10004|ref|NC_04.1|-1/2 -TTGCTTAGTAATGAAAGTCACAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGT -+ -CC=GG=GGGGGGGGJJG=J1JGGJGCGJGGJGJJJGJJGJG=JJGJGJJ=JGGJ=GJGCGGCGG=GJ=J=GCCGCGCGC=GGGGGGCGGGGGGCGGGGGGGCJ=(8JCGGCCGGCCCGGCCGG1GGGGGGCGCCGCCCGGCGC1GCC=GC -@gi|10005|ref|NC_05.1|-9/2 -AGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACT -+ -CCCGGGGGCGGGGGJJJJGJJJJJGJJJJJJJJJJGCGJGJGJGJ=JGJJGGGJJJJJJGGGG=GCCGG=GJGGCCGGGJG1CCGC=GGGCCGGGG1GGGGCCCJJC8C=8==CCGGGGCCGGGGCGGGC=GGGG=1C=CCCGCCGCGCG -@gi|10005|ref|NC_05.1|-7/2 -TACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAG -+ -C8CG=GGGGG=GGJGJ1JGJJCGJCJ8GG=JJCGJCJGGGJGJJCJGCGJJCJ=GJJGGJG8GGG8JJGCCCGG=GGG1=CGCCGGCGGGG=GCGGCGGG=(JJJJJGGGGGGG=GGCC1GG=C=1CCGGGGGGGGGCC=G8GCGGGGCG -@gi|10005|ref|NC_05.1|-5/2 -AGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAG -+ -CCC1CGGGGGCGGG1JGGJJJJJJGJJJGJJJGJJJJJJ=CJJJCJGGCJGJGCCJC8GGGGGJ8CGG=CGCCG=CGCGG=CCGGGGGGGGCCCCGGG1GGCJ=CJJGCCCCGGGCGCGGCGGG1GGGCGCCG1CGGGCC1GGGGCGGGC -@gi|10005|ref|NC_05.1|-3/2 -TTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATGACTTAAACCAGGGG -+ -=CCGGGGGGGGGGJ1JJGJGJJJGJJJJGJGGJGJGGJJCJGJ1JJJJJJJGCGJ=CJGGC1GJGGCG=JGGCCGJGGGGCG=GGGGCGCGGCCGGGG=GGGC=JJ=GGGCGCGC=GGC=CG8C=GGGG1GGGGGGGGGGGGCGCGGGG= -@gi|10005|ref|NC_05.1|-1/2 -TGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATC -+ -1CCGGGGGCGGGGCJGJGJCJJJJG=JJ=JJCJJJJJJJJJGJJJJG1CJCGGGCJGJJGGGJG1G8JCGGG1GGGGGGGGGC=CC=1CGGGCGCCGCCGGGJJJCJ=8=GGG8GCGGCGGGGGCGCCCC==CGGGGCGGGGGCGCGCGC -@gi|10006|ref|NC_06.1|-9/2 -AAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTT -+ -CCCGCC=GGGGGGCGJJJGJJ1CJJJGJGGCJJJJJJJGJGCJJCJJCJJJGJ(GGJGGCJGCJCGGGGJGG=CGC8GCGCCCCCGGC=CG11GGCGGG1GCJJJJJGCCCGGGGGCG=GGGGGGC==GG=GCG1GGGCG=GCGG=CGGC -@gi|10006|ref|NC_06.1|-7/2 -AAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTC -+ -CCCGGGGGGGGGGCJJGJJJJGJJJJGJJ=J=JG1CJGJGGJJCCJCGGGJ8JJJJCJGG1GGGGGG=CCGGCCGCJGGCCGCG=CGCCCGCGGG=GC=GGC8JJJJGGGCGGGG=GCGGGC=1=G=GCC(GG8CGGCC=GGGGGCGGGC -@gi|10006|ref|NC_06.1|-5/2 -CAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACGCTTTGT -+ -CCCGCCGGCGGGGJ=JJGGJJG=GJJJJJ1JGGJJGJCJJJJ=CJCJJG(J(CJG=GCGGCGGGCGGJGGGCGGGG1C=CCGCCGGC=G=GCGCGGGCGC=CJJJ=CGCCGG1=GGG=GGC=GGCCG(GCCGGGGCGGGCGGG(GCGCC= -@gi|10006|ref|NC_06.1|-3/2 -TCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACT -+ -CCCGGGGGGGGGGJJJGGJJGGGCJJJJGJJJJGJJJGJJCJGGGGJJGJGJJJJJCGGGJGJGGJ1GCGCGCGGGGGCJGGGCGG=G=G=GC8GGGGG=CGCJ=CCGGGGGGGGG=GGCGGCGGGGGCCGGGCCGGGGGCCGGGGGGGC -@gi|10006|ref|NC_06.1|-1/2 -GAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCACCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGT -+ -CCCGGCGGGGGGGJGGJJGGGGJJJJJJGJCJJGJJGJCJGJJJGC=C=GGGGJJCGGGGGJGG=8JGJCJGGGCG(CG8GGG=GCCCGGGGGCGGCGCCG=JJJJCG8G1GGGGGGGGGGCGGGGGGGG8GGGCGCGGCCGGGCGC=GG -@gi|10007|ref|NC_07.1|-9/2 -TACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCAC -+ -=CCCCGG1GGGGGJGJJJGGJGJ1JG1GJCJJGJCGGJGCCJCJGGGC8JGJ(GJGGJJGJGGCGGGGCJGCGGCGCGGCC88CCGGGGGCGCGGG===C(GJCJJ8GGGCGGCCCCG8GGCCGGGC=GCCGC1CGGCGG8CCGGG1GC8 -@gi|10007|ref|NC_07.1|-7/2 -TTTAAGTCATATTTGCTACGGTGACCCTACTACTAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCA -+ -CC1GGGGGGGGGCJJJ1JJGGJGGJCJ=GGJCJ(JJJGJ=GJJGJJ=JGJJJJ=CGCCGGGGGJGCGGGGGGCCCG8CGGCGGGGGGGG1GCG=CCCGGGC1JCCJCGCGGCGGC1CGG1GCCCCGGGC1GG1CGC=CGC=GGGCGGCGC -@gi|10007|ref|NC_07.1|-5/2 -TCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGT -+ -CCCGGGGGGGGGGGJJJGCJGCJJ1JJJJJJJCCCJGJGGGJGGCGJCGJ=JJJCJGJG=GCJGG8GGGJJJGCCGGG8CG=(GGCGCCCCGGCGGC=GGGCJJJJJGGGGGGGCGGGG===GGCGGGGCCGGCCG=G=GGGGCGGGGGG -@gi|10007|ref|NC_07.1|-3/2 -GCTAGAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGGTGACCTTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGGAGTGA -+ -CCCGGGGGGGGGGJGJJJJJGJGJJCGGJGJJGGGJCGGJJ1JJGGGCJGJ=JJJJ=JC1C8GGG=CGGG==GJGCC=C(CG=CGGCGGCG(GGGGGGC=CCC1C1CG=GGCCGG=CGG=CG=GCGGGCGGGCCGGC1GCGGGG1CGCCG -@gi|10007|ref|NC_07.1|-1/2 -AGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTT -+ -CC=1GGGGGGGGGGGJGGJGJJJGJJGGJGGJGGJ=JCJJGJGG=GCGJJGGJ=JGJJJJG8CGGGGCJCJGCCGCGGGGGGGG=G=GGGGGGGGCG8GGCCJJJJJCCCGCCGGGCGCGGGGGGCGCCCGGGG=C8GCGC=GGGG81CC -@gi|10008|ref|NC_08.1|-9/2 -CAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCT -+ -=CCGGGGGGGGGGJGJJGCGJGJJJGJJCCGGJJJGJJJGGJGGGGCGJGJ=GGGGGJGG8GJGGGJGCGJGGCJCGCGGGCG8G8GC1GG8GGGGGGGGGGC=CCCGCGGG8GGGGCG1CG88GGGCGGGGCG1G=CGC=GCCGGGGCG -@gi|10008|ref|NC_08.1|-7/2 -GTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTG -+ -C=1GGGGGGGGGGJJJJJJGGCJJJC8GJJJJJJJCGJJJJJJGJJJGGGGJGJG==GG==GGGGCGGJGCG=CGGCG8GCGCGGGGGGCGGGCC8G(GGCGJJCJCGCCCGGGCCCGGGCGCGGGGCGGGGGGCGCGCGGGGGGCGCCG -@gi|10008|ref|NC_08.1|-5/2 -TCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGC -+ -CCCGGGGCGGGCGGJ1JJJ=GGJJJJ8CJJJGJJJCGGGGJJGJ8CJJCJ(G8J1JG8JCGGG=CCCGGGGC=G(=JG8=G8GGGGCCG=GCG=CGCCCGGG=JJ=JGGGGCGCGGGG=C=GGGGGGGGGGGCGC1GCGG=C81GGGCGG -@gi|10008|ref|NC_08.1|-3/2 -TCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTC -+ -CCCGGGGGCCGGGGGCGJGGJJJJJJJCGJCJJJGJ8JGCJJJGGJGGJJJJCCGCCCCGJGCGCGJC=CGGCG=GGGGC=GGCGCG=GGGG=GGC1GG=8GJCJ8JCGGGGCGGCGCGC1GG=CGCCCGGCGGGCGCGGGCGCGGGGG8 -@gi|10008|ref|NC_08.1|-1/2 -CCAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGT -+ -CCCGGCGGG1GCGJJGJJGJJGG1JJ=CJJJGJJJGJGJJGGJCJGGGCGGJJGJCGJGJGC=GGGGGGCC8GC=GGG1G1CGGGG1CCCCGGCCCGGGG8GCJCJCG=CG=G=GGGGGGCGGGCG1GGGCGGGCGCGGGGCGGCGCGC= -@gi|10009|ref|NC_09.1|-9/2 -GAAGTGACACTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGA -+ -CCCGCGGGG(=GGGGJGGJJJJJJJG(JJJJJJJJJJCCGCG=GJGJJGJC=G8JJCJJ=JGGCGGGCGGGG1GGGGGCGGGGGGCGGCGGG=GCGGGGCGCJJJJCGGGG=GGGGGGC8CG1CCCG==CCGGCGC8GGGCGGCCGGCC1 -@gi|10009|ref|NC_09.1|-7/2 -TAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGT -+ -CCCGCGGGGGGGGJGJG=GGJGGGJJGJGJJJGCGJJGJGJGJJGJ=GJGJJG88G1GJGJJ=CJGGGGG(GGGGCGJGGCGCGGCCGG1GGGGGGCGG18CCJCC8GCGCG=GCG=G1GGGGCGG88CG8CCG=CGC=GGGCGCGGCCC -@gi|10009|ref|NC_09.1|-5/2 -CGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGG -+ -CCCGGGGGCGCGGJJJJJGJCJJG8J=GJJGJGJJJJGGJJGGGGJJJJCGCJCGJ=GCJGGGG1GGGGGGCJJGCJC=GGGGCGCCGGGGGG=GGG8GCG=CJ8J1GC=GGGG1=CCGGGCGGCCGGCCC=CGGGGGCGGCCGCCGCGC -@gi|10009|ref|NC_09.1|-3/2 -CCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGAACCATAGATCCCTCT -+ -CCC1GGGGG=GGGGGJJJGG=GJJCGGGJGJGJCGJJJJGJC=JGG=CJGG8JGGJGJJCJGGGGGGGGGGCGJGGCGGCCCCGGGGGGGGGCGGGG8GGGC=JCJJGGGGG=CGGCGCGCGGGGCGCCGCGCCGCCCGGGGG1CGGCC= -@gi|10009|ref|NC_09.1|-1/2 -AGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAAC -+ -CCCGGGGGGGGG=JJJJJGJJGJJJJCJCJGJJJGJGJJGGGJGJJCJGGG=J8GGJJGJGJG=GGJJ8GCCC8GJGGGCC(C188GGG=GCGCGGGCGGCCJCJJCGC8GG8CG=C1G1=GCGGGGGC1CCGCGCGGG=1GG=G1CG8= -@gi|10010|ref|NC_10.1|-9/2 -AGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCTTGGGTTAAGTCATATTGGCTAAGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTA -+ -CCCGGGGGGGGGGJGJ=JJJGJC1G1CJ1JJGCJJJJGGJJCJJJJGJJGGJJJJ=G==GGJ=GGJGGCGJGGGGGCCG1G=JCGGGC=G1GC=CGGG8=C1JJJJJGG(GC=GGGGGC1GCGGCGCCCGCGG1C=GG1=GGCGGGGGGG -@gi|10010|ref|NC_10.1|-7/2 -CCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTGTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCT -+ -1CC=G1GGGGGCCJJCJGJJG1JJJGCJJJCJCGJJGJCJJCJJJJJCJGC=G8JJGJJJG1GJGJGGGGGCGCJCGGGG=GGCGGC8CCGGCCGG==GGG(CJCCCCGGGGCGGGCGGG8GCCCCGCCGCGGCGGGG=CG1C8GGGGGG -@gi|10010|ref|NC_10.1|-5/2 -GTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACCAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAG -+ -CCCGGGGGCGGGGJGJJJGJGGJJGCGJJJCGJJJJJJJJGCJCJJJGJ8JCGJJGJCJGJ8JGGG=GG8CJCG=GGGCGGCGCGGGGGGCGGC==GCGGCCCCCJC(GGGGG=GGGCG=GGGG1GGGG1GGG1G1GG8CGGGCGGCCGG -@gi|10010|ref|NC_10.1|-3/2 -TATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATGCA -+ -CCCGGGGGGGGGGJGJJGCGJJJGJGJGC=CGJGGJGJGJJGJ(1CJGGJGGGGGJG==JCC1JGCG(C(GGGGGGGCG=GCCGGGCC8G=CG==CCGCGG(CCJJJC=GC=GGGGGGGGCGGG1GCCGGGGG81CGGCGCGG8CGGGCC -@gi|10010|ref|NC_10.1|-1/2 -GCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACCAAACGTACCA -+ -CCCGGGCGGGGGGGJCG1JJCJJCJ=CCJCJGJGCJJCCJ1JJG=JGGJGCJ=CJJCJ8G(JJG=88GCCCCGCGJGGCGCGCCGCCGCCGCCGGCGG=G1C1JJJJGGCCGGG=GGGGGGC=GGGCCGCCGGCGCGGGCGGGGG=CCGG -@gi|10011|ref|NC_11.1|-9/2 -TGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCAATGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCA -+ -CCCGGGGG=GGGGJGJGGGJJJJJJJJJJJJCJJJJJGJJGJGJGGGJJJJGGGJGCJJ8GGCGG8GGG==G=CGGGCCGGGGCGGGGGGGGCGCGGG=CGC=JJJCCGGGGGG=CCGGGGGGGGGCGGG(CG=GCGCGGGCCGGCG8CG -@gi|10011|ref|NC_11.1|-7/2 -TCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTGGCCGCATGTAGACATTT -+ -CCCGGG1GGGGGGCJ=JGJJCGJGJJJJJJ8JGJGJJGJ=JGGJJJJGCGJCGGGGCC1=C8JGJ==GCJGCGGG8GCCGGC1CGGGGGG=GGGCG=CCGCGJJJCJCGCCCCCG=GC8CC=G8=CGCGGGGGCGCCCGGGG=GCGGGGG -@gi|10011|ref|NC_11.1|-5/2 -ACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCCTTGTTTCGCTCATCGGAGTAATTTGCAATGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTG -+ -8=CGGGGGGGGGGJJGJJGCJJGG=JJJJJGJJJJGJJGG=C(JJGJCGCC8JGGJ(CGJJJJGGGCJCGGGGGJCGJC=CG=G=GG=GGGGCGCGGGGGGG=JC=JC==CCGGCGG=GGGGGGGCC=GGGGGG=C8GG=GGCGCGCCG1 -@gi|10011|ref|NC_11.1|-3/2 -CTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAA -+ -CCCGGGGGGGGGGJJJJJJJJJGGJJJJJGJC=JGJJGGJJJJGGCJJJGJGCJJGJGCGGGGGGJGCCJGJGGCGG=GGCGGGCG=C=CGGGCGGC=GG8CCJ(JJGGCC=GCGCGGCGG8GGCCCGCGCCCG1CCG=GGC8GCGGCC= -@gi|10011|ref|NC_11.1|-1/2 -ATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCAATGAATGCGAGATAGACACAAGAGACACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCCCTCGAATAGCC -+ -CCCGGGGGGGGG1GJCJJJGGCJGJGJGJJGGGJJGJGCJJJJJJG=G8JJG=CGJGGCCCGGGCGGCGGJGGGGGCG(CGGGGG8GCGGGG1=CCGGGCGCJC8JJC8GCGGGCCGGGCGCGGCCGG=C8GGGCGC=GCGC1CGCGGCC -@gi|10012|ref|NC_12.1|-9/2 -GGCGTATACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTTAATGCGAGATAGACACCAGAGTCACCCTCTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTCTCGAATAGCCGCCGGA -+ -1CCGG1GGGGGGGJJJJGJJJCJJGJJGJGJJGJ(CJJJ8GGJGJJJJJJCGJ8CCGGGCJJGGGGGGC=GGG=GGGGCGGCG=CCCGGGCGGG=CGGGGGCJ=CJJ8CGGGGGGCG1CGGGGCCGG1=GGGGGGCGCCCGGGGCGCGGG -@gi|10012|ref|NC_12.1|-7/2 -GGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGTCTATCTCGCATTAA -+ -=CCGGGCCGGGGGJJJGJCJGJJJJGGJGGJGGJJGGJGCCJJJGJGJJGJCCG8GJCG=GCGG8=JGJGGJC=GG=GGGCCGGGGCGCGGG1GC=GCGGG8JJCCJCCC1GCGCGGGG=CGGCGCCGGGGCGGC=GGC8=CGGCGG=CG -@gi|10012|ref|NC_12.1|-5/2 -TGCGAACAGCCACAGCCCCCTGGTTTAAGTCATATTGGATACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTG -+ -CCCGGGGGGGGGG=JJ8JJJJGCGJGJJJGJJJCJCGG(GJGJGGGJGJJGGJGGGJCJGGJJJJG1CG8GGGCJ1GGGCGGGGCC==GGGGGGGG=GGGGCCCJJC=GGG=G=GCCGCGGGGGCGCGG=CCGGG=CCCG=GGGGGC=GC -@gi|10012|ref|NC_12.1|-3/2 -TACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGC -+ -CC=GGGGCGGG=GJJJJGJJJGCGGJGGJJJGGJJJJCJGGGGJ8=GJGGGJGGJJGCJC8GGCJGGJGCCGGGGGGG==8GGGCGCGGCGGGC8GGGG==GJJCC=C=CCCGGG=G=GCGGC==CGGG=GGGGGCGGC=GGCGG(GCCC -@gi|10012|ref|NC_12.1|-1/2 -AAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCGGAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTT -+ -CCC=GGCGGGG8G=JJGJGJCJJJJJJJJJJJJJ1JGCGJJJJJJGJ18JJCGCJGGGGC=(=GJCCCCGGJCCG1GG8GCGGGGGCG(GG=GGGGGGG=GC=JJJJCGGGGGGGGCGGCGGCGCG=CGGCCGC=GCGGGGGCGCGGGGG -@gi|10013|ref|NC_13.1|-9/2 -GAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATTGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGA -+ -C=CGG=CG1GGGG=JCJGJJCGJGJJ1J1CGJGGGJGJJGGGGGGJGJJGG8JJCCGC8GGGGGGJ8GGG(G1CCCGG8GCCGGCC=GG=CGGGCGGCG=GCJJJ=CGGCGGGGGGGGGC1GGGG=CGCGGCGCCGGGCC1=CGG=GGGG -@gi|10013|ref|NC_13.1|-7/2 -AATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATC -+ -CCCGGGGGGGGGGJJJJJCG1JGGJGJJJJJ1GGGJJ(JGJJGGGJGJJGCJJG8JJJGCGJCJCGGGGGC1GGGGCGGCCGGCCG=GGGCGCCCGGGCCGCCJJJJCGCCGGCC=CC==GCG(GGGGGGGC=CCGGG8GGCGGGGGG=1 -@gi|10013|ref|NC_13.1|-5/2 -GTCGCGTGAGTTGTTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCG -+ -CCCGGGCGGGGGGG=(GGJJJJJGGGGCJGJJJJJCGJ1CGJJJJJJG=JCJCJ=GJGGJGJGGJGGCGGGGG8GGGCGG81GCGGGG=8CGGGGGGCCG8CCJJJ=CCG=CGGGGG=CGCGCCCG1GGCG1G=GCGGGCGGGGG=C=CC -@gi|10013|ref|NC_13.1|-3/2 -GAGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCAATCTCGCATTCAATGCAGATTACTCCGATGAGC -+ -C=CGGGGG1GGGGGGJJ=JJJJCJCJJJJGJJGJJJC1(CGJGCJJJGJJJGGJ8GGGGJGJJ=GGGGGGGGGJ1CGGGCGGGCGGGGCCGGCG1GGGCCGGCJJCCGCGGGCCCGC8GCCCCGGCCG=C8GCCG=GGGGGCGGGGGGGC -@gi|10013|ref|NC_13.1|-1/2 -TGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGG -+ -CCCGGGGGGGGGGGJG=JJ=CJGJGGGG=GJJJGCGGJJJJGJCGGGGGCGGJGCJJG8JGGGGCGJCCGJ8GGC8JGGCGGGCGGCCCCCGGGG=GGGGGCJJJ=JGG==CGGGGGGGG==G=1GCGGGGCGGGGGGGCGGGGG=CCGG -@gi|10014|ref|NC_14.1|-9/2 -GTAGCCGTAGATTTGTCTGCGAACAGCCCCAGCCACCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGC -+ -CCCG1GGGGGG1GGJJJJJJGJJJJJJJJCJGGJGGJJGJGJJJJGJJ1CGGJCJG8GCGGGJJG=GJJGJCCG8=CGGGG=GGCGCCCGGGCGGGGCGGGCJJJJ=GGGCGGGGCGGGGCCGCGC1GGGGCCGCGCCG=CGGCGCGGGG -@gi|10014|ref|NC_14.1|-7/2 -CACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCA -+ -CCCGGGGGGGGGGGGGJCJJ(GCJ=J1JGJGJJ1GGJJJ=J=JJGCJJCJJCGJJGGJJGC=GJ=GCJGG1CGG==G=G=(GCCGGGGGGGGGGGGCCGGGG=JJCJGCGGCGGGCGCCGGGGGGCCGGGGGGGCCGGGCGCGCG=CGCC -@gi|10014|ref|NC_14.1|-5/2 -TCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCCCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCCGACCCATAGATC -+ -CCCGGGCGGGCGGJJG1JJ1JJJJJJJJGJGJGGGJJGGCJGJJGGJGCJGJGJJGJGGGCGCGJGGGGJGGCC1GGGGGGCGGCCCCGGGGCGGCG=GGGCJCJJJGGGGCCCCGGCGCG8CCGGCGGC=GGGCGGCCG=GGC==GGGC -@gi|10014|ref|NC_14.1|-3/2 -ATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTAC -+ -8CC=GCCGGGCGGJJJJJGJJJJGJGJJJJJJGJJ8J1JGGJJJJGGGCJCJCGCJJJGGGCGJ8GJ1GGGGG=GGG=GGG1GGGGGGCCC=GGGGCGGGGGCJJCCCC1G=8CGGGGCC=CGG=C=CCGG=GGGCGGG8GG=G8=G8GG -@gi|10014|ref|NC_14.1|-1/2 -TTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTATCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTCTCTGCTACTGCC -+ -CCCGGCGG=GGGCGGGCJGJJCGJJJJJCG=J(JGJ8GJCJGGJGJJGGGCGGG=JGJJG=JC=GGGGGGGCGCJCJJGCCGGGCGGGGG(GCCGGGGGG8CJJJJCGG=188GGCCGGGGGG8CCGGGGCGCGGGGGGGGGGGCCCGGC -@gi|10015|ref|NC_15.1|-9/2 -AATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGG -+ -CCCGGGGGGGG=GJJCCJJCJGGJJJJJGJGCJCC=GGGJJJJJ=JJCCJGGGGJJJGGC8GGGJGGCGJCGG=CGGCGGCCCGGCCGGGGGGGGCC1GG(GCJJJCGGCCCCGGG=(CGGGGCGCGGCCGC=CGG(CGGGGCGGGGGG= -@gi|10015|ref|NC_15.1|-7/2 -TATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGTAGTAATGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGT -+ -CC1GGGGGGGGGGGJJJG=JGG1CJCJJJJJJCJJJ8CGJG(JJGGJ8JJGJJGJGGG(=GGGJGCGGCGCGGCGGGCGCGCCGGCGCGGGG=GCG=GGCCCJJJJJ=GGGG=GGGGGCGCGCGGGGGGGGGCGGGGGGCC=GCGGCGCC -@gi|10015|ref|NC_15.1|-5/2 -CCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTA -+ -CCCGGGGGGGCGGJJGGJCJJJJJG1JJJ==JJJJJJGGJJG(GJ(JCJJJGGJCJGJGCJGCG8JCGCGCGGCCGCGGCGGGGCGCGGGCC=GGCGGGGCGCCCJJGGGGGCC(G8CGG8GGCCGCGGGCGGGG1GGGCCCGGCGCCCG -@gi|10015|ref|NC_15.1|-3/2 -GACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCAT -+ -CCCGGGGGC(GGGJJGJCCJJJGJGJJJJJGJJJJGGG1GGJJJCJCJJGGGCGJGGGCJCGJ=8JGCCGGGJGG=CGGG=GCCGCGGGGGGGG1GGGGGGCCJ==8CGGC8GGC=G=GC=GCGGCCGG1GGGCGGCGGCC=C=GGCCG= -@gi|10015|ref|NC_15.1|-1/2 -ACCTGGTTTAAGTCATATTGGCTACGATGACCATACTACAAGAATACCAACCCATTAAATTTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAAC -+ -CCCGGGGGGGGGGCGJJGJJJCJCCJCGJJGGJJCJGJJJ=GJGGC=JGJGCJCG8GGGGJJGGGGC=GCGGGG(GG=GCGCCGCGGGGCCGCGGGGG(8GGJCJJCGGGGG=1GGGGCGGGGGG1G=GG=GGG8CCCGG=CCCCGGGCG -@gi|10016|ref|NC_16.1|-9/2 -GCGGCCAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACCGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCAT -+ -CCCGGGGGCG=GGGJJJGGJ=JG(JCJJJJJJJGJJJGJ8JJJJJGJGJJGJJGJGJG8GGJGJCCJ=GG=GJGCGJGCGGGCC=GGGGGGGGGGGCG8GG=CJJJJGCGCGCGCCCC=G=GGGGCGGGGGG==GGGGGGCGGGCCGGG8 -@gi|10016|ref|NC_16.1|-7/2 -AAGTCCCAAAGGCTACCGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGTGCGTGTACTTAGCCCACT -+ -CCCCGC1GGGGGGJGJJJJ1JJCJJJJJJGJJJG=JG=GJJGGGCJJJGGJGGJCJGCGCGCCCGCGGGJGGCGCCGG==CGGGGGGGG=GCCGGGCCGGC1CCJJJGCGGGGCCG(GGGGGCCGG=GGGCCGGGGCCGGCGCGGGGGCC -@gi|10016|ref|NC_16.1|-5/2 -TCTGGGTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGATAAGTACACGCACTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCCGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTT -+ -CCCGG1GGCGGGCJJG1GJGJJGJCJJGJJCJC=JJ=GJGGJJG=CJ=GGGJGGJGGG8JCCGGCCGCGGGJ==GCCCCGGGGCCC8GG=GGCC(CCGGGGGJJ=CJCGG1GGGGGGGC1GGGC=GGCG=CG==GCC=GG=GCGGCCGCC -@gi|10016|ref|NC_16.1|-3/2 -GGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCTTTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAG -+ -CCCGGGGGGGGGGJJJJCJJGJGJJCCJJJJJJJGJGJJJCJGJGGGCG8JJJJGJJGCG==GJGGGJGGGGGCCGGG=CGGGGGCCGGG=GGGGCCCCGG(=CCCCG=CCG11CGCGCCGC1GGGGCGGGGGGGCCCCCGCCGCG=GGC -@gi|10016|ref|NC_16.1|-1/2 -AATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGTGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTG -+ -=CCGGGGCGGGGGJGJJJJJJJJJCCJJGJJJJJJJGJJJJGJJGJ=JGJCGJC1JCJGGJGJCJG8CG8=CCGGGGGCCGGGCGGCGGGCCCCGC8GGCGCCJJCJCCGGGCG=GCCGGG=CCCGCCCGGGCCGGGC1GCGGGCGGGGC -@gi|10017|ref|NC_17.1|-9/2 -TGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTTATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAA -+ -CCCGG1GGG(GGGCGJJJGJJGJGJGJJJJJGGCGJCG8JGGGGGJ8JGGJJGJ8CCGG=CGCGGGJGGGCCCGGGCGGG8CCG=CGCGCC=GGG==CGGCG(CCJJGCC8GGCGGGG=G8GGG1CG=GGGGCGGGGG1G==CGGCGC8G -@gi|10017|ref|NC_17.1|-7/2 -ATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTTATGCCAAGATGCATTG -+ -CCCG=GGGCGGGGJGJJJ1JGGJJGJGJJJGJJJJJJ=GJJJGJJJGCGGJJJGGCGJGJGJGJJJGGJGGJGGGGCGGGCCCGGG88GGG8GC1C=GG1GCJCJCJCCCGCCCC=CCCGGGGGCGGCGGCG8C=GGGCGG=8CCCCGGC -@gi|10017|ref|NC_17.1|-5/2 -AGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGG -+ -CC=CG1GCGCGGGJGJJJJJJJJJCJGGG=8JJCCJC=JJCGGCJJJJGGGJ8GGCJGGCGGCGGCJGCGCGGGGG(GCCGGCGCG8CCGGGGGCCGGG8G=CCJ=JGGCGGC8GGGGGGGCGGGGGGGGG=8=CGCCCGGCCGCGCGC= -@gi|10017|ref|NC_17.1|-3/2 -CATTCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGT -+ -CCC=GGGCGGGGCGCJJJJJGJJJJCJJGJJJGJJG=JGJJJGJJJGGGC=JG=GCCGJGGGCG=CJ=GC8GGGGG(GCG1C8GCGGGGGGCGCG8CGCG8=CJJJJGGG=G1CCCGGGGGGGGGGGCGGCGGGGGCC=G=CG=CGG8GC -@gi|10017|ref|NC_17.1|-1/2 -CAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAATTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAAT -+ -CCCG=CGGGGGGCGJJJJJJJJJJGJJCJJ1JGGJGGGJJJJJJCJGJCGGJJGJCGGC1JGGCGGGJGJGGGGCJCGGGJGGGCC=GGCGGGCGCGG=GGGCJJCJGGCCGCGGGG(CCCGGGGCCGCCGC=GGGCGCC1GCCG=GG=8 -@gi|10018|ref|NC_18.1|-9/2 -GGTGACCCTAATACAAGAAAACCAACTCATTAAATGTCTAGATGCGGCTAACACTCGATTTAATTTGCCAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTT -+ -CCCGGGGGGG8GGJJJJJGJCJJJJJJCJGJGGJCJGJJJJJJGJG=JJGC8JJ8JGJJGCGGGCJCGGGGCGCGGGGGCGCCGGCGGCCGGGGG=GCG=(GCC8JJGGGGGG=CGGGCGC=CGG18=CGCCGCGGCCGG=GGGGCGC81 -@gi|10018|ref|NC_18.1|-7/2 -AAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTG -+ -CCCGGCGGGGGGGJJJJJGJJJJCGJGJ=JGGGJJJJJJJGGJJJGGGJGCJGJJJJCGJJG=CCJJ8GG1JGGGCJCGGGGGCGG=GGGCG=CGCCGCG1CJJ=JJ=GCGGCGCGGCCCGGGGGGC=CG8CGGGGGG=GC=GCGGCCGC -@gi|10018|ref|NC_18.1|-5/2 -AAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTT -+ -CCCGGGGGGGCGCG=GJJG8JJJJJJGGGJGGJJCJCGCCCC=GGCGJCJJ=GJGGGJGJGJCGGG8GGCGGCGGGGCGJ1G8CCGGGGCGC=GGGCGCGGGC=JJJGGCGGGG=GCGGGGCG=CGGCCGCCGGCCCGGGCGGGCGGCGC -@gi|10018|ref|NC_18.1|-3/2 -ACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGGTGACCCTACTACAAGAAAACCAACTCATTAAATGTCTAGATGCGGCTAACACTCGATTTAATTTGCCAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACG -+ -CCCGGGGGGGGGGJCJJGGJJJJ=JJJJJJ8JJJGJGJJGGGJJCJGJJJJGGCG8JCJJGJGGG(JGGG1G(GG=GCGC1GGGGGGGGGCCCGGGGGCGGCCCJCJCGCGGGG1GCGCG8CCCCG=CGCGCGCGGCCGGGGCGCCCGGC -@gi|10018|ref|NC_18.1|-1/2 -GAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCT -+ -CCCGGGGGGGGGGJJGJGJJJJJJJJJJCJJJJJGGGG=CGJGJGCCGGJC=GJGJGGGGCCGJJGCCCCCGJGGGGGGCGGCGGGGCGCGGGCGG8GGC8GJJJCCGGGGGG=GC1CGGGCGCGCGG=CGCCGCGGGCGGCCCCGGC=8 -@gi|10019|ref|NC_19.1|-9/2 -ACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCCGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCATTTCGGCAGTA -+ -CC=GGGGGGCGGGGJJJJGC=CJCJJGJJJGJGGJJJ(JCCJCJ1JGJCJJGJGGGGJ=GGC1JGGGJGC=GJCGJG1G=GCCGGGG=GGCCCCCGGGGGGG=JJJJGGCGCGCGCGGGCGGGGCGG=CCGC=GGGCGG8CGCGGCCGGC -@gi|10019|ref|NC_19.1|-7/2 -AGCCCCCCCGTTTAGGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATAAAATGTCTACATGCGGCTAACACTGGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTA -+ -CC=GGGG=CGGGGJJGGJJGJJJ(JJJ1JJCJJGGJJCJ1CCGGJJJJJJCGC=GGG(GCGJJGJ=CJCGJCJCGG1GGG=GCCGGGCGGGCCCGCC1CGG=JJCJJCGG1GCCCGG=1GGGGGGGC(GCGGGCGGGG=GGCGCCG1CG8 -@gi|10019|ref|NC_19.1|-5/2 -GAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCGGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTAGCTACGGCCGTGATCTGACCCATTGATCCCTCTCGAATAGCCCCC -+ -CCCGGGGGG=GCCCGJGJJJJJJGCJGGJJJJJJGJGJGJJJJ1JJJGGGJJJGG(=JCJC=GCGJJJJGGGCGGCGGC8CC8CGCCG=GGCGCGCCGCCGG==CJCGGGG8GGCGCGGCGCGGGCG1GGCGGGG=CGG=GGCCGGGGGC -@gi|10019|ref|NC_19.1|-3/2 -GCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAATGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCGGCCTTGGATGCGAGATAGAAACCAGAGTCACCCT -+ -CCCGGGGGGGGGGJG8CJ=1JGJJGJGJJJJJJGCGGGJJC(JJJCJJJJJGCGJGCCCJGJGC8GGCCCCCCJ8G=GGGGC=GGGCGCGGGGCGG1GGCGGC8JCJGCGG8GGGCGC(GGGGC=GG=GGCGCGGCGGCCCGCGGC=CG8 -@gi|10019|ref|NC_19.1|-1/2 -GCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTC -+ -CCCGGG=CGGGGGJGCJJJJ=GJCGJJJGJJJJGCCGGGJJJJJGJJJJJGJCGJGCGCGCJG=CGCGGGGGCCCG=CCG(GCCG=G=GGGGGGGCCCG(GCCJJJJGGGCGGG8GGGGGGCCGCCGGGGGC81CGGGCGGGCCCCG1CG -@gi|10020|ref|NC_20.1|-9/2 -GGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATTATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAA -+ -C81GGGGGGGGG(CGCGJ1JJJJCJ=JGGJCJJCGJJJJJJJJJ(C=GJ=CJJJJGJGJ=CCGCC8JCGGJGCGGCJGGGGGCGC=CCCCCGCGGGGGGGGCCCJCJGGGCGGGCGGGC=GGGGGGGG1CCGGGCCGGGGGGC=GG1C8= -@gi|10020|ref|NC_20.1|-7/2 -TTGCTAGAGTTGGTAGTAGGCGTAGATTTGTCTCCGAACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAGTACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGT -+ -CCCGCGG1CGGGC=JCJJJJJGJJGGJGJGGJGJJGGGGGGG8GGJGC=CJGGGGGGGJ=GGJJJGC1GGCGGCC=JGC((=GCG=GCGGCCCC=CGCC=GCJJJCJG=1G==CGGGCGGCC=GGCG1G1=CG8GCCGGCGGGG1GCCCC -@gi|10020|ref|NC_20.1|-5/2 -TTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCAAATATG -+ -CCCGG==GGGGGGGCJJJJJJJJGJJ1JJJ=JJJGJJGJJJGJGJCJG=CJJGGJ1GJGCJGGCG=C(G1G=CGGGGCCGCCCGGCGGCGCGCGCGGCGCCGCJCJJGGGGCGGCGG=G=GGGGGCCGGGGGGCGCG8=GGGCGCCCCCC -@gi|10020|ref|NC_20.1|-3/2 -ATCGAAACAAAGAGTCGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAG -+ -C1CG=GGG=GGGGCJ1JJGCJC=GGJJGJGGJJGJJGJJGCJGJJGJGJ1JGCJJGGGJCJJGCCCGGJGG==CJGGGCGGG1GGGG=CCCGC=CGCGGCCCCCJCCC=GGGGGGCGCCGGGGGG8CGG==CGGGGGGGGGGGGGGCCGC -@gi|10020|ref|NC_20.1|-1/2 -GCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACT -+ -C=CGGGGGGGGG=JJJCJJ=JJJ(GJGJJJJJJ=JJJJCGGJJJJJGJGGJGJGGCGG=J=GJGJGGGCGGGGGCCGGGGCGCG=G=GGCG=GGGGCCCGG1CCJJJ=CG1G(CG=GGGGGCGGGCCGGGCCGGG=1CCCGCGGGCG=GC From db97283365e85a72a0a31f4126fcb106dceceb29 Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Thu, 5 Aug 2021 11:15:27 +0200 Subject: [PATCH 09/17] small reformat Signed-off-by: Radu Muntean --- .../annotation/taxonomy/label_to_taxid.cpp | 95 ------------ .../annotation/taxonomy/tax_classifier.cpp | 145 ++++++++++++++---- .../annotation/taxonomy/tax_classifier.hpp | 36 ++--- .../annotation/taxonomy/test_taxonomy.cpp | 9 +- 4 files changed, 138 insertions(+), 147 deletions(-) delete mode 100644 metagraph/src/annotation/taxonomy/label_to_taxid.cpp diff --git a/metagraph/src/annotation/taxonomy/label_to_taxid.cpp b/metagraph/src/annotation/taxonomy/label_to_taxid.cpp deleted file mode 100644 index ba8dbb13f5..0000000000 --- a/metagraph/src/annotation/taxonomy/label_to_taxid.cpp +++ /dev/null @@ -1,95 +0,0 @@ -#include "tax_classifier.hpp" - -#include "common/utils/string_utils.hpp" - -#include "common/logger.hpp" - -namespace mtg { -namespace annot { - -using mtg::common::logger; - -void TaxonomyBase::assign_label_type(const std::string &label, bool *require_accversion_to_taxid_map) { - if (utils::starts_with(label, "gi|")) { - // e.g. >gi|1070643132|ref|NC_031224.1| Arthrobacter phage Mudcat, complete genome - label_type = GEN_BANK; - *require_accversion_to_taxid_map = true; - } else if (utils::starts_with(utils::split_string(label, ":")[1], "taxid|")) { - // e.g. >kraken:taxid|2016032|NC_047834.1 Alteromonas virus vB_AspP-H4/4, complete genome - label_type = TAXID; - *require_accversion_to_taxid_map = false; - } else { - logger->error("Can't determine the type of the given label {}. Please make sure that the labels are in a recognized format.", label); - std::exit(1); - } -} - -bool TaxonomyBase::get_taxid_from_label(const std::string &label, TaxId *taxid) const { - if (label_type == TAXID) { - *taxid = static_cast(std::stoull(utils::split_string(label, "|")[1])); - return true; - } else if (TaxonomyBase::label_type == GEN_BANK) { - std::string acc_version = get_accession_version_from_label(label); - if (not accversion_to_taxid_map.count(acc_version)) { - return false; - } - *taxid = accversion_to_taxid_map.at(acc_version); - return true; - } - - logger->error("Run get_taxid_from_label() for unknown label {}.", label); - std::exit(1); -} - -std::string TaxonomyBase::get_accession_version_from_label(const std::string &label) const { - if (label_type == TAXID) { - return utils::split_string(utils::split_string(label, "|")[2], " ")[0]; - } else if (label_type == GEN_BANK) { - return utils::split_string(label, "|")[3];; - } - - logger->error("Run get_accession_version_from_label() for unknown label {}.", label); - std::exit(1); -} - -// TODO improve this by parsing the compressed ".gz" version (or use https://github.com/pmenzel/taxonomy-tools) -void TaxonomyBase::read_accversion_to_taxid_map(const std::string &filepath, - const graph::AnnotatedDBG *anno_matrix = NULL) { - std::ifstream f(filepath); - if (!f.good()) { - logger->error("Failed to open accession to taxid map table {}", filepath); - exit(1); - } - - std::string line; - getline(f, line); - if (!utils::starts_with(line, "accession\taccession.version\ttaxid\t")) { - logger->error("The accession to taxid map table is not in the standard (*.accession2taxid) format {}.", filepath); - exit(1); - } - - tsl::hopscotch_set input_accessions; - if (anno_matrix != NULL) { - for (const std::string &label : anno_matrix->get_annotation().get_all_labels()) { - input_accessions.insert(get_accession_version_from_label(label)); - } - } - - while (getline(f, line)) { - if (line == "") { - logger->error("The accession to taxid map table contains empty lines. Please make sure that this file was not manually modified {}.", filepath); - exit(1); - } - std::vector parts = utils::split_string(line, "\t"); - if (parts.size() <= 2) { - logger->error("The accession to taxid map table contains incomplete lines. Please make sure that this file was not manually modified {}.", filepath); - exit(1); - } - if (input_accessions.size() == 0 || input_accessions.count(parts[1])) { - accversion_to_taxid_map[parts[1]] = static_cast(std::stoull(parts[2])); - } - } -} - -} // namespace annot -} // namespace mtg diff --git a/metagraph/src/annotation/taxonomy/tax_classifier.cpp b/metagraph/src/annotation/taxonomy/tax_classifier.cpp index 247df4cd94..e3b1de1472 100644 --- a/metagraph/src/annotation/taxonomy/tax_classifier.cpp +++ b/metagraph/src/annotation/taxonomy/tax_classifier.cpp @@ -6,6 +6,7 @@ #include "annotation/representation/annotation_matrix/annotation_matrix.hpp" #include "common/unix_tools.hpp" +#include "common/utils/string_utils.hpp" #include "common/logger.hpp" @@ -14,49 +15,135 @@ namespace annot { using mtg::common::logger; +bool TaxonomyBase::assign_label_type(const std::string &sample_label) { + if (utils::starts_with(sample_label, "gi|")) { + // e.g. >gi|1070643132|ref|NC_031224.1| Arthrobacter phage Mudcat, complete genome + label_type = GEN_BANK; + return true; + } else if (utils::starts_with(utils::split_string(sample_label, ":")[1], "taxid|")) { + // e.g. >kraken:taxid|2016032|NC_047834.1 Alteromonas virus vB_AspP-H4/4, complete genome + label_type = TAXID; + return false; + } + + logger->error("Can't determine the type of the given label {}. " + "Make sure the labels are in a recognized format.", sample_label); + exit(1); +} + +bool TaxonomyBase::get_taxid_from_label(const std::string &label, TaxId *taxid) const { + if (label_type == TAXID) { + *taxid = std::stoul(utils::split_string(label, "|")[1]); + return true; + } else if (TaxonomyBase::label_type == GEN_BANK) { + std::string acc_version = get_accession_version_from_label(label); + if (not accversion_to_taxid_map.count(acc_version)) { + return false; + } + *taxid = accversion_to_taxid_map.at(acc_version); + return true; + } + + logger->error("Error: Could not get the taxid for label {}", label); + exit(1); +} + +std::string TaxonomyBase::get_accession_version_from_label(const std::string &label) const { + if (label_type == TAXID) { + return utils::split_string(utils::split_string(label, "|")[2], " ")[0]; + } else if (label_type == GEN_BANK) { + return utils::split_string(label, "|")[3];; + } + + logger->error("Error: Could not get the accession version for label {}", label); + exit(1); +} + +// TODO improve this by parsing the compressed ".gz" version (or use https://github.com/pmenzel/taxonomy-tools) +void TaxonomyBase::read_accversion_to_taxid_map(const std::string &filepath, + const graph::AnnotatedDBG *anno_matrix = NULL) { + std::ifstream f(filepath); + if (!f.good()) { + logger->error("Failed to open accession to taxid map table {}", filepath); + exit(1); + } + + std::string line; + getline(f, line); + if (!utils::starts_with(line, "accession\taccession.version\ttaxid\t")) { + logger->error("The accession to taxid map table is not in the standard (*.accession2taxid) format {}", + filepath); + exit(1); + } + + tsl::hopscotch_set input_accessions; + if (anno_matrix != NULL) { + for (const std::string &label : anno_matrix->get_annotation().get_all_labels()) { + input_accessions.insert(get_accession_version_from_label(label)); + } + } + + while (getline(f, line)) { + if (line == "") { + logger->error("The accession to taxid map table contains empty lines. " + "Please make sure that this file was not manually modified {}", filepath); + exit(1); + } + std::vector parts = utils::split_string(line, "\t"); + if (parts.size() <= 2) { + logger->error("The accession to taxid map table contains incomplete lines. " + "Please make sure that this file was not manually modified {}", filepath); + exit(1); + } + if (input_accessions.size() == 0 || input_accessions.count(parts[1])) { + accversion_to_taxid_map[parts[1]] = std::stoul(parts[2]); + } + } +} + TaxonomyClsAnno::TaxonomyClsAnno(const graph::AnnotatedDBG &anno, const double lca_coverage_rate, const double kmers_discovery_rate, const std::string &tax_tree_filepath, - const std::string &label_taxid_map_filepath) : - TaxonomyBase(lca_coverage_rate, kmers_discovery_rate), _anno_matrix(&anno) { + const std::string &label_taxid_map_filepath) + : TaxonomyBase(lca_coverage_rate, kmers_discovery_rate), + _anno_matrix(&anno) { if (!std::filesystem::exists(tax_tree_filepath)) { - logger->error("Can't open taxonomic tree file {}.", tax_tree_filepath); - std::exit(1); + logger->error("Can't open taxonomic tree file {}", tax_tree_filepath); + exit(1); } - bool require_accversion_to_taxid_map = false; - assign_label_type(_anno_matrix->get_annotation().get_all_labels()[0], &require_accversion_to_taxid_map); + bool require_accversion_to_taxid_map = assign_label_type(_anno_matrix->get_annotation().get_all_labels()[0]); Timer timer; if (require_accversion_to_taxid_map) { - logger->trace("Parsing label_taxid_map file.."); + logger->trace("Parsing label_taxid_map file..."); read_accversion_to_taxid_map(label_taxid_map_filepath, _anno_matrix); - logger->trace("Finished label_taxid_map file in {}s", timer.elapsed()); + logger->trace("Finished label_taxid_map file in {} sec", timer.elapsed()); } timer.reset(); - logger->trace("Parsing taxonomic tree.."); + logger->trace("Parsing taxonomic tree..."); ChildrenList tree; read_tree(tax_tree_filepath, &tree); - logger->trace("Finished taxonomic tree read in {}s.", timer.elapsed()); + logger->trace("Finished taxonomic tree read in {} sec.", timer.elapsed()); timer.reset(); - logger->trace("Calculating tree statistics.."); + logger->trace("Calculating tree statistics..."); std::vector tree_linearization; dfs_statistics(root_node, tree, &tree_linearization); - logger->trace("Finished tree statistics calculation in {}s.", timer.elapsed()); + logger->trace("Finished tree statistics calculation in {} sec.", timer.elapsed()); timer.reset(); - logger->trace("Starting rmq preprocessing.."); + logger->trace("Starting rmq preprocessing..."); rmq_preprocessing(tree_linearization); - logger->trace("Finished rmq preprocessing in {}s.", timer.elapsed()); + logger->trace("Finished rmq preprocessing in {} sec.", timer.elapsed()); } void TaxonomyClsAnno::read_tree(const std::string &tax_tree_filepath, ChildrenList *tree) { std::ifstream f(tax_tree_filepath); if (!f.good()) { - logger->error("Failed to open Taxonomic Tree file {}.", tax_tree_filepath); + logger->error("Failed to open Taxonomic Tree file {}", tax_tree_filepath); exit(1); } @@ -64,18 +151,20 @@ void TaxonomyClsAnno::read_tree(const std::string &tax_tree_filepath, ChildrenLi tsl::hopscotch_map full_parents_list; while (getline(f, line)) { if (line == "") { - logger->error("The Taxonomic Tree file contains empty lines. Please make sure that this file was not manually modified: {}.", + logger->error("The Taxonomic Tree file contains empty lines. " + "Please make sure that this file was not manually modified: {}", tax_tree_filepath); exit(1); } std::vector parts = utils::split_string(line, "\t"); if (parts.size() <= 2) { - logger->error("The Taxonomic tree filepath contains incomplete lines. Please make sure that this file was not manually modified: {}.", + logger->error("The Taxonomic tree filepath contains incomplete lines. " + "Please make sure that this file was not manually modified: {}", tax_tree_filepath); exit(1); } - uint32_t act = static_cast(std::stoull(parts[0])); - uint32_t parent = static_cast(std::stoull(parts[2])); + uint32_t act = std::stoul(parts[0]); + uint32_t parent = std::stoul(parts[2]); full_parents_list[act] = parent; node_parent[act] = parent; } @@ -118,7 +207,7 @@ void TaxonomyClsAnno::read_tree(const std::string &tax_tree_filepath, ChildrenLi } } if (num_taxid_failed) { - logger->warn("During the tax_tree_filepath {} parsing, {} taxids were not found out of {} evaluations.", + logger->warn("During the tax_tree_filepath {} parsing, {} taxids were not found out of {} total evaluations.", tax_tree_filepath, num_taxid_failed, relevant_taxids.size()); } @@ -181,24 +270,24 @@ void TaxonomyClsAnno::rmq_preprocessing(const std::vector &tree_lineariza std::vector TaxonomyClsAnno::get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const { cerr << "Assign class not implemented reversed = " << reversed << "\n"; - throw std::runtime_error("get_lca_taxids_for_seq TaxonomyClsAnno not implemented. Received seq size" + to_string(sequence.size())); - exit(0); + throw std::runtime_error("get_lca_taxids_for_seq TaxonomyClsAnno not implemented. Received seq size" + + to_string(sequence.size())); } std::vector TaxonomyClsImportDB::get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const { cerr << "Assign class not implemented reversed = " << reversed << "\n"; - throw std::runtime_error("get_lca_taxids_for_seq TaxonomyClsImportDB not implemented. Received seq size" + to_string(sequence.size())); - exit(0); + throw std::runtime_error("get_lca_taxids_for_seq TaxonomyClsImportDB not implemented. Received seq size" + + to_string(sequence.size())); } TaxId TaxonomyClsAnno::find_lca(const std::vector &taxids) const { - throw std::runtime_error("find_lca TaxonomyClsAnno not implemented. Received taxids size" + to_string(taxids.size())); - exit(0); + throw std::runtime_error("find_lca TaxonomyClsAnno not implemented. Received taxids size" + + to_string(taxids.size())); } TaxId TaxonomyClsImportDB::find_lca(const std::vector &taxids) const { - throw std::runtime_error("find_lca TaxonomyClsImportDB not implemented. Received taxids size" + to_string(taxids.size())); - exit(0); + throw std::runtime_error("find_lca TaxonomyClsImportDB not implemented. Received taxids size" + + to_string(taxids.size())); } } // namespace annot diff --git a/metagraph/src/annotation/taxonomy/tax_classifier.hpp b/metagraph/src/annotation/taxonomy/tax_classifier.hpp index 12ff6e0e06..1aa59c34c9 100644 --- a/metagraph/src/annotation/taxonomy/tax_classifier.hpp +++ b/metagraph/src/annotation/taxonomy/tax_classifier.hpp @@ -1,14 +1,6 @@ #ifndef __TAX_CLASSIFIER_HPP__ #define __TAX_CLASSIFIER_HPP__ -#ifdef TESTING -#define PRIVATE_TESTABLE public -#define PROTECTED_TESTABLE public -#else -#define PRIVATE_TESTABLE private -#define PROTECTED_TESTABLE protected -#endif - #include #include @@ -21,7 +13,7 @@ using TaxId = std::uint32_t; using ChildrenList = tsl::hopscotch_map>; class TaxonomyBase { -public: + public: using KmerId = annot::MultiLabelEncoded::Index; using node_index = graph::SequenceGraph::node_index; @@ -32,15 +24,21 @@ class TaxonomyBase { }; TaxonomyBase() {}; - TaxonomyBase(const double lca_coverage_rate, const double kmers_discovery_rate) : - _lca_coverage_rate(lca_coverage_rate), _kmers_discovery_rate(kmers_discovery_rate) {}; - - virtual ~TaxonomyBase() {}; + TaxonomyBase(const double lca_coverage_rate, const double kmers_discovery_rate) + : _lca_coverage_rate(lca_coverage_rate), + _kmers_discovery_rate(kmers_discovery_rate) {}; TaxId assign_class(const std::string &sequence) const; -PROTECTED_TESTABLE: - void assign_label_type(const std::string &label, bool *require_accversion_to_taxid_map); + protected: + /** Recognise the label type by parsing one sample_label. + * + * @param [input] sample_label + * @param [output] the returned boolean value is later used to decide if we need to parse the accession version to taxid lookup table: + * if false: then the taxid is part of the label; + * if true: then the taxid is not part of the label; + */ + bool assign_label_type(const std::string &sample_label); virtual TaxId find_lca(const std::vector &taxids) const = 0; @@ -102,19 +100,19 @@ class TaxonomyBase { }; class TaxonomyClsImportDB : public TaxonomyBase { -public: + public: // todo implement TaxonomyClsImportDB(const std::string &taxdb_filepath, const double lca_coverage_rate, const double kmers_discovery_rate); -PRIVATE_TESTABLE: + private: std::vector get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const; TaxId find_lca(const std::vector &taxids) const; }; class TaxonomyClsAnno : public TaxonomyBase { -public: + public: /** * TaxonomyCls constructor * @@ -136,7 +134,7 @@ class TaxonomyClsAnno : public TaxonomyBase { TaxId assign_class_toplabels(const std::string &sequence, const double label_fraction) const; -PRIVATE_TESTABLE: + private: /** * Reads and returns the taxonomic tree as a list of children. * diff --git a/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp b/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp index 1543638537..de4e4fe2ce 100644 --- a/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp +++ b/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp @@ -1,14 +1,14 @@ #include "gtest/gtest.h" -#define TESTING - #include #include +#define private public +#define protected public + #include "annotation/taxonomy/tax_classifier.hpp" -namespace mtg { -namespace test { +namespace { TEST (TaxonomyTest, ClsAnno_DfsStatistics) { mtg::annot::TaxonomyClsAnno *tax = new mtg::annot::TaxonomyClsAnno(); @@ -88,4 +88,3 @@ TEST (TaxonomyTest, ClsAnno_RmqPreprocessing) { } } -} From e96ead656c5e0b529cc486c00b64dd35fd2611ab Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Wed, 28 Jul 2021 15:46:09 +0200 Subject: [PATCH 10/17] add some small methods that will be further used for tax class Signed-off-by: Radu Muntean --- metagraph/src/graph/annotated_dbg.cpp | 28 +++++++++++++++++++++++++++ metagraph/src/graph/annotated_dbg.hpp | 4 ++++ 2 files changed, 32 insertions(+) diff --git a/metagraph/src/graph/annotated_dbg.cpp b/metagraph/src/graph/annotated_dbg.cpp index 5db5ff76aa..63436cf9a2 100644 --- a/metagraph/src/graph/annotated_dbg.cpp +++ b/metagraph/src/graph/annotated_dbg.cpp @@ -776,6 +776,34 @@ ::call_annotated_nodes(const Label &label, ); } +void AnnotatedDBG::call_annotated_rows(const std::vector &rows, + std::function callback_cell, + std::function callback_row) const { + assert(check_compatibility()); + + auto unique_matrix_rows = annotator_->get_matrix().get_rows(rows); + + //TODO make sure that this function works even if we have duplications in 'rows'. Then, delete this error catch. + if (rows.size() != unique_matrix_rows.size()) { + throw std::runtime_error("The current 'call_annotated_rows' call contains duplication."); + } + + if (unique_matrix_rows.size() >= std::numeric_limits::max()) { + throw std::runtime_error("The current 'call_annotated_rows' call has returned, " + + std::to_string(unique_matrix_rows.size()) + + "rows. The maximum number of rows that can be returned is " + + std::to_string(std::numeric_limits::max()) + + ". Please reduce the query batch size"); + } + const auto &label_encoder = annotator_->get_label_encoder(); + for (auto row : unique_matrix_rows) { + for (auto cell : row) { + callback_cell(label_encoder.decode(cell)); + } + callback_row(); + } +} + bool AnnotatedSequenceGraph::check_compatibility() const { return graph_->max_index() == annotator_->num_objects(); } diff --git a/metagraph/src/graph/annotated_dbg.hpp b/metagraph/src/graph/annotated_dbg.hpp index 56e0dfa747..bd87eb85cd 100644 --- a/metagraph/src/graph/annotated_dbg.hpp +++ b/metagraph/src/graph/annotated_dbg.hpp @@ -156,6 +156,10 @@ class AnnotatedDBG : public AnnotatedSequenceGraph { int32_t match_score = 1, int32_t mismatch_score = 2) const; + void call_annotated_rows(const std::vector &rows, + std::function callback_cell, + std::function callback_row) const; + private: DeBruijnGraph &dbg_; }; From 456287cb2aa506c9d4a212cf7d9b8633b67d4921 Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Wed, 28 Jul 2021 18:33:22 +0200 Subject: [PATCH 11/17] address comments --- metagraph/src/graph/annotated_dbg.cpp | 28 --------------------------- metagraph/src/graph/annotated_dbg.hpp | 4 ---- 2 files changed, 32 deletions(-) diff --git a/metagraph/src/graph/annotated_dbg.cpp b/metagraph/src/graph/annotated_dbg.cpp index 63436cf9a2..5db5ff76aa 100644 --- a/metagraph/src/graph/annotated_dbg.cpp +++ b/metagraph/src/graph/annotated_dbg.cpp @@ -776,34 +776,6 @@ ::call_annotated_nodes(const Label &label, ); } -void AnnotatedDBG::call_annotated_rows(const std::vector &rows, - std::function callback_cell, - std::function callback_row) const { - assert(check_compatibility()); - - auto unique_matrix_rows = annotator_->get_matrix().get_rows(rows); - - //TODO make sure that this function works even if we have duplications in 'rows'. Then, delete this error catch. - if (rows.size() != unique_matrix_rows.size()) { - throw std::runtime_error("The current 'call_annotated_rows' call contains duplication."); - } - - if (unique_matrix_rows.size() >= std::numeric_limits::max()) { - throw std::runtime_error("The current 'call_annotated_rows' call has returned, " - + std::to_string(unique_matrix_rows.size()) + - "rows. The maximum number of rows that can be returned is " + - std::to_string(std::numeric_limits::max()) + - ". Please reduce the query batch size"); - } - const auto &label_encoder = annotator_->get_label_encoder(); - for (auto row : unique_matrix_rows) { - for (auto cell : row) { - callback_cell(label_encoder.decode(cell)); - } - callback_row(); - } -} - bool AnnotatedSequenceGraph::check_compatibility() const { return graph_->max_index() == annotator_->num_objects(); } diff --git a/metagraph/src/graph/annotated_dbg.hpp b/metagraph/src/graph/annotated_dbg.hpp index bd87eb85cd..56e0dfa747 100644 --- a/metagraph/src/graph/annotated_dbg.hpp +++ b/metagraph/src/graph/annotated_dbg.hpp @@ -156,10 +156,6 @@ class AnnotatedDBG : public AnnotatedSequenceGraph { int32_t match_score = 1, int32_t mismatch_score = 2) const; - void call_annotated_rows(const std::vector &rows, - std::function callback_cell, - std::function callback_row) const; - private: DeBruijnGraph &dbg_; }; From 74747f2763eb37035fcfa26ad622eb6c19ef820d Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Mon, 2 Aug 2021 14:48:13 +0200 Subject: [PATCH 12/17] implement the taxonomic classification with queries on the annotation matrix Signed-off-by: Radu Muntean --- .../annotation/taxonomy/tax_classifier.cpp | 227 +++++++++++++++++- .../annotation/taxonomy/test_taxonomy.cpp | 208 ++++++++++++++++ 2 files changed, 432 insertions(+), 3 deletions(-) diff --git a/metagraph/src/annotation/taxonomy/tax_classifier.cpp b/metagraph/src/annotation/taxonomy/tax_classifier.cpp index e3b1de1472..068d00339e 100644 --- a/metagraph/src/annotation/taxonomy/tax_classifier.cpp +++ b/metagraph/src/annotation/taxonomy/tax_classifier.cpp @@ -6,6 +6,7 @@ #include "annotation/representation/annotation_matrix/annotation_matrix.hpp" #include "common/unix_tools.hpp" +#include "common/seq_tools/reverse_complement.hpp" #include "common/utils/string_utils.hpp" #include "common/logger.hpp" @@ -269,9 +270,229 @@ void TaxonomyClsAnno::rmq_preprocessing(const std::vector &tree_lineariza } std::vector TaxonomyClsAnno::get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const { - cerr << "Assign class not implemented reversed = " << reversed << "\n"; - throw std::runtime_error("get_lca_taxids_for_seq TaxonomyClsAnno not implemented. Received seq size" - + to_string(sequence.size())); + // num_kmers represents the total number of kmers parsed until the current time. + uint32_t num_kmers = 0; + + // 'kmer_idx' and 'kmer_val' are storing the indexes and values of all the nonzero kmers in the given read. + // The list of kmers, 'kmer_val', will be further sent to "matrix.getrows()" method; + // The list of indexes, 'kmer_idx', will be used to associate one row from "matrix.getrows()" with the corresponding kmer index. + std::vector kmer_idx; + std::vector kmer_val; + + if (sequence.size() >= std::numeric_limits::max()) { + logger->error("The given sequence contains more than 2^32 bp."); + std::exit(1); + } + + auto anno_graph = _anno_matrix->get_graph_ptr(); + anno_graph->map_to_nodes(sequence, [&](node_index i) { + num_kmers++; + if (i <= 0 || i >= anno_graph->max_index()) { + return; + } + kmer_val.push_back(i - 1); + kmer_idx.push_back(num_kmers - 1); + }); + + // Compute the LCA normalized taxid for each nonzero kmer in the given read. + const auto unique_matrix_rows = _anno_matrix->get_annotation().get_matrix().get_rows(kmer_val); + //TODO make sure that this function works even if we have duplications in 'rows'. Then, delete this error catch. + if (kmer_val.size() != unique_matrix_rows.size()) { + throw std::runtime_error("Internal error: There must be no duplications in the received set of 'rows' in 'call_annotated_rows'."); + } + + if (unique_matrix_rows.size() >= std::numeric_limits::max()) { + throw std::runtime_error("Internal error: There must be less than 2^32 unique rows. Reduce the query batch size."); + } + + const auto &label_encoder = _anno_matrix->get_annotation().get_label_encoder(); + + TaxId taxid; + uint64_t cnt_kmer_idx = 0; + std::vector curr_kmer_taxids; + std::vector seq_taxids(num_kmers); + + for (auto row : unique_matrix_rows) { + for (auto cell : row) { + if (get_taxid_from_label(label_encoder.decode(cell), &taxid)) { + curr_kmer_taxids.push_back(taxid); + } + } + if (curr_kmer_taxids.size() != 0) { + if (not reversed) { + seq_taxids[kmer_idx[cnt_kmer_idx]] = find_lca(curr_kmer_taxids); + } else { + seq_taxids[num_kmers - 1 - kmer_idx[cnt_kmer_idx]] = find_lca(curr_kmer_taxids); + } + } + cnt_kmer_idx++; + curr_kmer_taxids.clear(); + } + + return seq_taxids; +} + +TaxId TaxonomyBase::assign_class(const std::string &sequence) const { + std::vector forward_taxids = get_lca_taxids_for_seq(sequence, false); + + std::string reversed_sequence(sequence); + reverse_complement(reversed_sequence.begin(), reversed_sequence.end()); + std::vector backward_taxids = get_lca_taxids_for_seq(reversed_sequence, true); + + tsl::hopscotch_map num_kmers_per_node; + + // total_discovered_kmers represents the number of nonzero kmers according to both forward and reversed read. + uint32_t num_discovered_kmers = 0; + const uint32_t num_total_kmers = forward_taxids.size(); + + // Find the LCA taxid for each kmer without any dependency on the orientation of the read. + for (uint32_t i = 0; i < num_total_kmers; ++i) { + if (forward_taxids[i] == 0 && backward_taxids[i] == 0) { + continue; + } + TaxId curr_taxid; + if (backward_taxids[i] == 0) { + curr_taxid = forward_taxids[i]; + } else if (forward_taxids[i] == 0) { + curr_taxid = backward_taxids[i]; + } else { + // In case that both 'forward_taxid[i]' and 'backward_taxids[i]' are nonzero, compute the LCA. + TaxId forward_taxid = forward_taxids[i]; + TaxId backward_taxid = backward_taxids[i]; + if (forward_taxid == 0) { + curr_taxid = backward_taxid; + } else if (backward_taxid == 0) { + curr_taxid = forward_taxid; + } else { + curr_taxid = find_lca({forward_taxid, backward_taxid}); + } + } + if (curr_taxid) { + num_discovered_kmers ++; + num_kmers_per_node[curr_taxid]++; + } + } + + if (num_discovered_kmers <= _kmers_discovery_rate * num_total_kmers) { + return 0; // 0 is a wildcard for not enough discovered kmers. + } + + tsl::hopscotch_set nodes_already_propagated; + tsl::hopscotch_map node_scores; + + uint32_t desired_number_kmers = num_discovered_kmers * _lca_coverage_rate; + TaxId best_lca = root_node; + uint32_t best_lca_dist_to_root = 1; + + // Update the nodes' score by iterating through all the nodes with nonzero kmers. + for (const pair &node_pair : num_kmers_per_node) { + TaxId start_node = node_pair.first; + this->update_scores_and_lca(start_node, num_kmers_per_node, desired_number_kmers, &node_scores, + &nodes_already_propagated, &best_lca, &best_lca_dist_to_root); + } + return best_lca; +} + + +void TaxonomyBase::update_scores_and_lca(const TaxId start_node, + const tsl::hopscotch_map &num_kmers_per_node, + const uint64_t desired_number_kmers, + tsl::hopscotch_map *node_scores, + tsl::hopscotch_set *nodes_already_propagated, + TaxId *best_lca, + uint32_t *best_lca_dist_to_root) const { + if (nodes_already_propagated->count(start_node)) { + return; + } + uint64_t score_from_processed_parents = 0; + uint64_t score_from_unprocessed_parents = num_kmers_per_node.at(start_node); + + // processed_parents represents the set of nodes on the path start_node->root that have already been processed in the previous iterations. + std::vector processed_parents; + std::vector unprocessed_parents; + + TaxId act_node = start_node; + unprocessed_parents.push_back(act_node); + + while (act_node != root_node) { + act_node = node_parent.at(act_node); + if (!nodes_already_propagated->count(act_node)) { + if (num_kmers_per_node.count(act_node)) { + score_from_unprocessed_parents += num_kmers_per_node.at(act_node); + } + unprocessed_parents.push_back(act_node); + } else { + if (num_kmers_per_node.count(act_node)) { + score_from_processed_parents += num_kmers_per_node.at(act_node); + } + processed_parents.push_back(act_node); + } + } + // The score of all the nodes in 'processed_parents' will be updated with 'score_from_unprocessed_parents' only. + // The nodes in 'unprocessed_parents' will be updated with the sum 'score_from_processed_parents + score_from_unprocessed_parents'. + for (uint64_t i = 0; i < unprocessed_parents.size(); ++i) { + TaxId &act_node = unprocessed_parents[i]; + (*node_scores)[act_node] = + score_from_processed_parents + score_from_unprocessed_parents; + nodes_already_propagated->insert(act_node); + + uint64_t act_dist_to_root = + processed_parents.size() + unprocessed_parents.size() - i; + + // Test if the current node's score would be a better LCA result. + if ((*node_scores)[act_node] >= desired_number_kmers + && (act_dist_to_root > *best_lca_dist_to_root + || (act_dist_to_root == *best_lca_dist_to_root && (*node_scores)[act_node] > (*node_scores)[*best_lca]))) { + *best_lca = act_node; + *best_lca_dist_to_root = act_dist_to_root; + } + } + for (uint64_t i = 0; i < processed_parents.size(); ++i) { + TaxId &act_node = processed_parents[i]; + (*node_scores)[act_node] += score_from_unprocessed_parents; + + uint64_t act_dist_to_root = processed_parents.size() - i; + if ((*node_scores)[act_node] >= desired_number_kmers + && (act_dist_to_root > *best_lca_dist_to_root + || (act_dist_to_root == *best_lca_dist_to_root && (*node_scores)[act_node] > (*node_scores)[*best_lca]))) { + *best_lca = act_node; + *best_lca_dist_to_root = act_dist_to_root; + } + } +} + +TaxId TaxonomyClsAnno::find_lca(const std::vector &taxids) const { + if (taxids.empty()) { + logger->error("Internal error: Can't find LCA for an empty set of normalized taxids."); + std::exit(1); + } + uint64_t left_idx = node_to_linearization_idx.at(taxids[0]); + uint64_t right_idx = node_to_linearization_idx.at(taxids[0]); + + for (const TaxId &taxid : taxids) { + if (node_to_linearization_idx.at(taxid) < left_idx) { + left_idx = node_to_linearization_idx.at(taxid); + } + if (node_to_linearization_idx.at(taxid) > right_idx) { + right_idx = node_to_linearization_idx.at(taxid); + } + } + // The node with maximum node_depth in 'linearization[left_idx : right_idx+1]' is the LCA of the given set. + + // Find the maximum node_depth between the 2 overlapping intervals of size 2^log_dist. + uint32_t log_dist = LOG2(right_idx - left_idx); + if (rmq_data.size() <= log_dist) { + logger->error("Internal error: the RMQ was not precomputed before the LCA queries."); + std::exit(1); + } + + uint32_t left_lca = rmq_data[log_dist][left_idx]; + uint32_t right_lca = rmq_data[log_dist][right_idx - (1 << log_dist) + 1]; + + if (node_depth.at(left_lca) > node_depth.at(right_lca)) { + return left_lca; + } + return right_lca; } std::vector TaxonomyClsImportDB::get_lca_taxids_for_seq(const std::string_view &sequence, bool reversed) const { diff --git a/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp b/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp index de4e4fe2ce..5982e9dbbc 100644 --- a/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp +++ b/metagraph/tests/annotation/taxonomy/test_taxonomy.cpp @@ -2,6 +2,8 @@ #include #include +#include "seq_io/sequence_io.hpp" +#include "../test_annotated_dbg_helpers.hpp" #define private public #define protected public @@ -87,4 +89,210 @@ TEST (TaxonomyTest, ClsAnno_RmqPreprocessing) { EXPECT_EQ(expected_rmq, tax->rmq_data); } +TEST (TaxonomyTest, ClsAnno_FindLca) { + mtg::annot::TaxonomyClsAnno *tax = new mtg::annot::TaxonomyClsAnno(); + /* + * Tree configuration: + * node 0 -> 1 2 3 + * node 1 -> 4 5 + * node 2 -> _ + * node 3 -> 6 + * node 4 -> 7 8 + */ + + tax->rmq_data = { + {0, 1, 4, 7, 4, 8, 4, 1, 5, 1, 0, 2, 0, 3, 6, 3, 0}, + {0, 1, 4, 4, 4, 4, 1, 1, 1, 0, 0, 0, 0, 3, 3, 0, 0}, + {0, 1, 4, 4, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} + }; + tax->node_to_linearization_idx = { + {0, 0}, + {1, 1}, + {2, 11}, + {3, 13}, + {4, 2}, + {5, 8}, + {6, 14}, + {7, 3}, + {8, 5}, + }; + tax->node_depth = { + {0, 4}, + {1, 3}, + {2, 1}, + {3, 2}, + {4, 2}, + {5, 1}, + {6, 1}, + {7, 1}, + {8, 1}, + }; + + struct query_lca { + std::string test_id; + uint32_t expected; + std::vector nodes; + }; + + std::vector queries = { + {"test1", 0, {7, 6}}, + {"test2", 0, {1, 2}}, + {"test3", 0, {3, 4}}, + {"test4", 0, {1, 2, 5, 6}}, + {"test5", 2, {2}}, + {"test6", 3, {3, 6}}, + {"test6b", 3, {6, 3}}, + {"test7", 1, {7, 8, 5}}, + {"test8", 1, {4, 5}}, + {"test9", 4, {7, 8}}, + {"test10", 0, {0, 1, 2, 3, 4, 5, 6, 7, 8}}, + }; + + for(const auto &it: queries) { + EXPECT_EQ(make_pair(it.test_id, it.expected), + make_pair(it.test_id, tax->find_lca(it.nodes))); + } +} + +TEST (TaxonomyTest, ClsAnno_ClassifierUpdateScoresAndLca) { + mtg::annot::TaxonomyClsAnno tax_classifier; + + tax_classifier.root_node = 1; + tax_classifier.node_parent = { {1, 1}, + {2, 1}, {3, 1}, + {4, 3}, {5, 3}, + {6, 4}, {7, 4} + }; + + tsl::hopscotch_map num_kmers_per_node = { + {1, 20}, {2, 1}, {3, 15}, {4, 25}, {5, 6}, {6, 15}, {7, 3} // leaves 2, 7 and 5 have a smaller number of kmers. + }; + + struct query_tax_map_update { + std::string test_id; + std::string description; + uint64_t desired_number_kmers; + vector> ordered_node_sets; + tsl::hopscotch_map expected_node_scores; + tsl::hopscotch_set expected_nodes_already_propagated; + uint32_t expected_best_lca; + uint32_t expected_best_lca_dist_to_root; + }; + + // All the lists in `ordered_node_sets` are covering the entire taxonomic tree. + // Thus, the evaluation of `update_scores_and_lca` on any of those sets should return the same results. + vector> ordered_node_sets = { + {1, 2, 3, 4, 5, 6, 7}, + {7, 6, 5, 4, 3, 2, 1}, + {7, 4, 6, 3, 5, 1, 2}, + {4, 6, 7, 3, 5, 1, 2}, + {2, 5, 4, 6, 7, 3, 1}, + {2, 6, 7, 5}, + {6, 7, 5, 2}, + {6, 7, 5, 2, 1}, + {3, 5, 6, 7, 2} + }; + + std::vector tests = { + { "test1", + "desired_number_kmers is equal to node_score[6]; expect LCA taxid = 6", + 75, + ordered_node_sets, + {{1, 85}, {2, 21}, {3, 84}, {4, 78}, {5, 41}, {6, 75}, {7, 63}}, + {1, 2, 3, 4, 5, 6, 7}, + 6, + 4 + }, + { "test2", + "desired_number_kmers is equal to node_score[6]+1; expect LCA taxid = 4", + 76, + ordered_node_sets, + {{1, 85}, {2, 21}, {3, 84}, {4, 78}, {5, 41}, {6, 75}, {7, 63}}, + {1, 2, 3, 4, 5, 6, 7}, + 4, + 3 + }, + { "test3", + "desired_number_kmers is equal to node_score[4]+1; expect LCA taxid = 3", + 79, + ordered_node_sets, + {{1, 85}, {2, 21}, {3, 84}, {4, 78}, {5, 41}, {6, 75}, {7, 63}}, + {1, 2, 3, 4, 5, 6, 7}, + 3, + 2 + }, + { "test4", + "desired_number_kmers is equal to node_score[3]+1; expect LCA taxid = 1", + 85, + ordered_node_sets, + {{1, 85}, {2, 21}, {3, 84}, {4, 78}, {5, 41}, {6, 75}, {7, 63}}, + {1, 2, 3, 4, 5, 6, 7}, + 1, + 1 + }, + { "test5", + "Check updated scores after processing only node 4", + 100, + {{4}}, + {{4, 60}, {3, 60}, {1, 60}}, + {1, 3, 4}, + 1, + 1 + }, + { "test6", + "Check updated scores after processing only the nodes 4 and 6", + 100, + {{4, 6}, {6, 4}}, + {{6, 75}, {4, 75}, {3, 75}, {1, 75}}, + {1, 3, 4, 6}, + 1, + 1 + }, + { "test7", + "Check updated scores after processing only the nodes 7 and 5", + 100, + {{7, 5}, {5, 7}}, + {{7, 63}, {4, 63}, {5, 41}, {3, 69}, {1, 69}}, + {1, 3, 4, 5, 7}, + 1, + 1 + }, + { "test8", + "Check updated scores after processing only the nodes 2, 6 and 7", + 100, + {{2, 6, 7}, {2, 7, 6}, {6, 2, 7}, {6, 7, 2}, {7, 2, 6}, {7, 6, 2}}, + {{1, 79}, {2, 21}, {3, 78}, {4, 78}, {6, 75}, {7, 63}}, + {1, 2, 3, 4, 6, 7}, + 1, + 1 + }, + }; + + for (const auto &test: tests) { + for (std::vector nodes_set : test.ordered_node_sets) { + tsl::hopscotch_set nodes_already_propagated; + tsl::hopscotch_map node_scores; + uint32_t best_lca = tax_classifier.root_node; + uint32_t best_lca_dist_to_root = 1; + + for (uint64_t node: nodes_set) { + tax_classifier.update_scores_and_lca(node, num_kmers_per_node, test.desired_number_kmers, + &node_scores, &nodes_already_propagated, + &best_lca, &best_lca_dist_to_root); + } + + EXPECT_EQ(make_pair(test.test_id, test.expected_node_scores), + make_pair(test.test_id, node_scores)); + EXPECT_EQ(make_pair(test.test_id, test.expected_nodes_already_propagated), + make_pair(test.test_id, nodes_already_propagated)); + EXPECT_EQ(make_pair(test.test_id, test.expected_best_lca), + make_pair(test.test_id, best_lca)); + EXPECT_EQ(make_pair(test.test_id, test.expected_best_lca_dist_to_root), + make_pair(test.test_id, best_lca_dist_to_root)); + } + } +} + } From 84d2542e5222dc2022368e17b6381936837580ff Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Tue, 3 Aug 2021 23:02:00 +0200 Subject: [PATCH 13/17] address review & rebase Signed-off-by: Radu Muntean --- metagraph/src/annotation/taxonomy/tax_classifier.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagraph/src/annotation/taxonomy/tax_classifier.cpp b/metagraph/src/annotation/taxonomy/tax_classifier.cpp index 068d00339e..ff43076b8d 100644 --- a/metagraph/src/annotation/taxonomy/tax_classifier.cpp +++ b/metagraph/src/annotation/taxonomy/tax_classifier.cpp @@ -480,7 +480,7 @@ TaxId TaxonomyClsAnno::find_lca(const std::vector &taxids) const { // The node with maximum node_depth in 'linearization[left_idx : right_idx+1]' is the LCA of the given set. // Find the maximum node_depth between the 2 overlapping intervals of size 2^log_dist. - uint32_t log_dist = LOG2(right_idx - left_idx); + uint32_t log_dist = sdsl::bits::hi(right_idx - left_idx); if (rmq_data.size() <= log_dist) { logger->error("Internal error: the RMQ was not precomputed before the LCA queries."); std::exit(1); From 49c768ee94f0c54c0964bde8306130ef957a7a1e Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Thu, 5 Aug 2021 21:23:29 +0200 Subject: [PATCH 14/17] minor Signed-off-by: Radu Muntean --- .../annotation/taxonomy/tax_classifier.cpp | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/metagraph/src/annotation/taxonomy/tax_classifier.cpp b/metagraph/src/annotation/taxonomy/tax_classifier.cpp index ff43076b8d..73e5832bab 100644 --- a/metagraph/src/annotation/taxonomy/tax_classifier.cpp +++ b/metagraph/src/annotation/taxonomy/tax_classifier.cpp @@ -281,7 +281,7 @@ std::vector TaxonomyClsAnno::get_lca_taxids_for_seq(const std::string_vie if (sequence.size() >= std::numeric_limits::max()) { logger->error("The given sequence contains more than 2^32 bp."); - std::exit(1); + exit(1); } auto anno_graph = _anno_matrix->get_graph_ptr(); @@ -298,11 +298,13 @@ std::vector TaxonomyClsAnno::get_lca_taxids_for_seq(const std::string_vie const auto unique_matrix_rows = _anno_matrix->get_annotation().get_matrix().get_rows(kmer_val); //TODO make sure that this function works even if we have duplications in 'rows'. Then, delete this error catch. if (kmer_val.size() != unique_matrix_rows.size()) { - throw std::runtime_error("Internal error: There must be no duplications in the received set of 'rows' in 'call_annotated_rows'."); + throw std::runtime_error("Internal error: The tool doesn't know how to treat the case of " + "kmer duplications in the same read. Please contact the maintainers."); } if (unique_matrix_rows.size() >= std::numeric_limits::max()) { - throw std::runtime_error("Internal error: There must be less than 2^32 unique rows. Reduce the query batch size."); + throw std::runtime_error("Internal error: There must be less than 2^32 unique rows. " + "Please reduce the query batch size."); } const auto &label_encoder = _anno_matrix->get_annotation().get_label_encoder(); @@ -341,7 +343,7 @@ TaxId TaxonomyBase::assign_class(const std::string &sequence) const { tsl::hopscotch_map num_kmers_per_node; - // total_discovered_kmers represents the number of nonzero kmers according to both forward and reversed read. + // num_discovered_kmers represents the number of nonzero kmers according to at least of the forward and reversed read options. uint32_t num_discovered_kmers = 0; const uint32_t num_total_kmers = forward_taxids.size(); @@ -442,7 +444,10 @@ void TaxonomyBase::update_scores_and_lca(const TaxId start_node, // Test if the current node's score would be a better LCA result. if ((*node_scores)[act_node] >= desired_number_kmers && (act_dist_to_root > *best_lca_dist_to_root - || (act_dist_to_root == *best_lca_dist_to_root && (*node_scores)[act_node] > (*node_scores)[*best_lca]))) { + || (act_dist_to_root == *best_lca_dist_to_root + && (*node_scores)[act_node] > (*node_scores)[*best_lca]) + ) + ) { *best_lca = act_node; *best_lca_dist_to_root = act_dist_to_root; } @@ -454,7 +459,10 @@ void TaxonomyBase::update_scores_and_lca(const TaxId start_node, uint64_t act_dist_to_root = processed_parents.size() - i; if ((*node_scores)[act_node] >= desired_number_kmers && (act_dist_to_root > *best_lca_dist_to_root - || (act_dist_to_root == *best_lca_dist_to_root && (*node_scores)[act_node] > (*node_scores)[*best_lca]))) { + || (act_dist_to_root == *best_lca_dist_to_root + && (*node_scores)[act_node] > (*node_scores)[*best_lca]) + ) + ) { *best_lca = act_node; *best_lca_dist_to_root = act_dist_to_root; } @@ -464,7 +472,7 @@ void TaxonomyBase::update_scores_and_lca(const TaxId start_node, TaxId TaxonomyClsAnno::find_lca(const std::vector &taxids) const { if (taxids.empty()) { logger->error("Internal error: Can't find LCA for an empty set of normalized taxids."); - std::exit(1); + exit(1); } uint64_t left_idx = node_to_linearization_idx.at(taxids[0]); uint64_t right_idx = node_to_linearization_idx.at(taxids[0]); @@ -483,7 +491,7 @@ TaxId TaxonomyClsAnno::find_lca(const std::vector &taxids) const { uint32_t log_dist = sdsl::bits::hi(right_idx - left_idx); if (rmq_data.size() <= log_dist) { logger->error("Internal error: the RMQ was not precomputed before the LCA queries."); - std::exit(1); + exit(1); } uint32_t left_lca = rmq_data[log_dist][left_idx]; @@ -501,11 +509,6 @@ std::vector TaxonomyClsImportDB::get_lca_taxids_for_seq(const std::string + to_string(sequence.size())); } -TaxId TaxonomyClsAnno::find_lca(const std::vector &taxids) const { - throw std::runtime_error("find_lca TaxonomyClsAnno not implemented. Received taxids size" - + to_string(taxids.size())); -} - TaxId TaxonomyClsImportDB::find_lca(const std::vector &taxids) const { throw std::runtime_error("find_lca TaxonomyClsImportDB not implemented. Received taxids size" + to_string(taxids.size())); From c9b5e9e098ee5e1fe4fc1a5cfceed78417110d56 Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Mon, 2 Aug 2021 21:47:26 +0200 Subject: [PATCH 15/17] add tax_class metagraph CLI + integration tests --- metagraph/integration_tests/test_taxonomy.py | 197 ++++++++++++++++++ .../annotation/taxonomy/tax_classifier.cpp | 30 +++ metagraph/src/cli/config/config.cpp | 29 +++ metagraph/src/cli/config/config.hpp | 6 + metagraph/src/cli/tax_class.cpp | 140 +++++++++++++ metagraph/src/cli/tax_class.hpp | 14 ++ metagraph/src/main.cpp | 4 + 7 files changed, 420 insertions(+) create mode 100644 metagraph/integration_tests/test_taxonomy.py create mode 100644 metagraph/src/cli/tax_class.cpp create mode 100644 metagraph/src/cli/tax_class.hpp diff --git a/metagraph/integration_tests/test_taxonomy.py b/metagraph/integration_tests/test_taxonomy.py new file mode 100644 index 0000000000..71b89ec7ee --- /dev/null +++ b/metagraph/integration_tests/test_taxonomy.py @@ -0,0 +1,197 @@ +import unittest +import subprocess +from subprocess import PIPE +from parameterized import parameterized + +from tempfile import TemporaryDirectory +import os + + +"""Test taxonomy classification framework""" + +METAGRAPH = './metagraph' +PROTEIN_MODE = os.readlink(METAGRAPH).endswith("_Protein") # TODO - decide if we need to consider this "_Protein" case +TEST_DATA_DIR = os.path.dirname(os.path.realpath(__file__)) + '/../tests/data' +TAX_DATA_DIR = TEST_DATA_DIR + "/taxonomic_data" + +tax_tests = { + 'one_thread': { + 'threads': 1, + }, + 'nine_threads': { + 'threads': 9, + } +} + +test_params = [name for name, _ in tax_tests.items()] + +class TestTaxonomy(unittest.TestCase): + def setUp(self): + self.tempdir = TemporaryDirectory() + self.tax_parent = {} + self.tax_root = -1 + self.lca_coverage = 0.9 + self.k = 20 + tax_lines = open(TAX_DATA_DIR + '/dumb_nodes.dmp').readlines() + for line in tax_lines: + act_node = line.split('\t')[0].strip() + act_parent = line.split('\t')[2].strip() + self.tax_parent[act_node] = act_parent + if act_node == act_parent: + self.tax_root = act_node + + def is_descendant(self, node: str, query: str) -> bool: + while query != self.tax_root: + query = self.tax_parent[query] + if query == node: + return True + return False + + def build_graph_and_annotation(self, num_threads: int): + construct_command = '{exe} build -p {num_threads} -k {k} -o {outfile} {input}'.format( + exe=METAGRAPH, + num_threads=num_threads, + k=self.k, + outfile=self.tempdir.name + '/graph', + input=TAX_DATA_DIR + '/tax_input.fa' + ) + res = subprocess.run([construct_command], shell=True) + self.assertEqual(res.returncode, 0) + + annotate_command = '{exe} annotate --anno-header -i {dbg} -o {anno} -p {num_threads} {input_fasta}'.format( + exe=METAGRAPH, + dbg=self.tempdir.name + '/graph.dbg', + anno=self.tempdir.name + '/annotation', + num_threads=num_threads, + input_fasta=TAX_DATA_DIR + '/tax_input.fa' + ) + res = subprocess.run([annotate_command], shell=True) + self.assertEqual(res.returncode, 0) + + def get_prediction_statistics_from(self, res_lines: [str]) -> {}: + result = {} + result["num_correct_tips"] = 0 + result["num_correct_internals"] = 0 + result["num_total_tips"] = 0 + result["num_total_internals"] = 0 + + result["num_descendant_prediction_internals"] = 0 + result["num_ancestor_prediction_tips"] = 0 + result["num_ancestor_prediction_internals"] = 0 + + result["num_wrong_prediction_tips"] = 0 + result["num_wrong_prediction_internals"] = 0 + + result["num_too_few_discovered_kmers"] = 0 + + for line in res_lines: + if line == "": + continue + query_expected = line.split(" ")[1].split("|")[1].strip() + query_prediction = line.split(" ")[7].split("'")[1].strip() + + # TaxId 0 means that there were not enough discovered kmers in order to realize the tax classification. + if query_prediction == "0": + result["num_too_few_discovered_kmers"] += 1 + continue + + # All the tax nodes with ids {10001, 10002 .. 10008} represents internal nodes. TaxIds >= 10009 are reserved for the leaves. + if int(line.split(" ")[1].split("|")[1]) >= 10009: + # The current taxid is a tip, thus, it has no children in the taxonomic tree. + result["num_total_tips"] += 1 + if query_expected == query_prediction: + result["num_correct_tips"] += 1 + else: + if self.is_descendant(node=query_prediction, query=query_expected): + result["num_ancestor_prediction_tips"] += 1 + else: + result["num_wrong_prediction_tips"] += 1 + else: + # The current taxid is an internal node. + result["num_total_internals"] += 1 + if query_expected == query_prediction: + result["num_correct_internals"] += 1 + else: + if self.is_descendant(node=query_prediction, query=query_expected): + result["num_ancestor_prediction_internals"] += 1 + elif self.is_descendant(node=query_expected, query=query_prediction): + result["num_descendant_prediction_internals"] += 1 + else: + result["num_wrong_prediction_internals"] += 1 + return result + + @parameterized.expand(test_params) + @unittest.skipIf(PROTEIN_MODE, "No canonical mode for Protein alphabets") + def test_taxonomy_getrows(self, tax_test): + self.build_graph_and_annotation(tax_tests[tax_test]['threads']) + tax_class_command = '{exe} tax_class -i {dbg} {fasta_queries} --taxonomic-tree {tax_tree} \ + --lca-coverage-fraction {lca_coverage} --label-taxid-map {label_taxid_map} ' \ + '-p {num_threads} -a {anno}'.format( + exe=METAGRAPH, + dbg=self.tempdir.name + '/graph.dbg', + fasta_queries=TAX_DATA_DIR + '/tax_query.fa', + tax_tree=TAX_DATA_DIR + '/dumb_nodes.dmp', + lca_coverage=self.lca_coverage, + label_taxid_map=TAX_DATA_DIR + '/dumb.accession2taxid', + num_threads=tax_tests[tax_test]['threads'], + anno=self.tempdir.name + '/annotation.column.annodbg', + ) + res = subprocess.run([tax_class_command], shell=True, stdout=PIPE) + self.assertEqual(res.returncode, 0) + + res_lines = res.stdout.decode().rstrip().split('\n') + statistics = self.get_prediction_statistics_from(res_lines) + + self.assertEqual(statistics["num_total_tips"], 118) + self.assertEqual(statistics["num_total_internals"], 80) + + self.assertEqual(statistics["num_correct_tips"], 109) + self.assertEqual(statistics["num_correct_internals"], 38) + + self.assertEqual(statistics["num_ancestor_prediction_internals"], 5) + self.assertEqual(statistics["num_descendant_prediction_internals"], 34) + self.assertEqual(statistics["num_ancestor_prediction_tips"], 9) + + self.assertEqual(statistics["num_wrong_prediction_internals"], 3) + self.assertEqual(statistics["num_wrong_prediction_tips"], 0) + + self.assertEqual(statistics["num_too_few_discovered_kmers"], 2) + + @parameterized.expand(test_params) + @unittest.skipIf(PROTEIN_MODE, "No canonical mode for Protein alphabets") + def test_taxonomy_toplabels(self, tax_test): + self.build_graph_and_annotation(tax_tests[tax_test]['threads']) + tax_class_command = '{exe} tax_class -i {dbg} {fasta_queries} --taxonomic-tree {tax_tree} \ + --lca-coverage-fraction {lca_coverage} -p {num_threads} -a {anno} \ + --label-taxid-map {label_taxid_map} \ + --top-label-fraction {top_label_fraction}'.format( + exe=METAGRAPH, + dbg=self.tempdir.name + '/graph.dbg', + fasta_queries=TAX_DATA_DIR + '/tax_query.fa', + tax_tree=TAX_DATA_DIR + '/dumb_nodes.dmp', + lca_coverage=self.lca_coverage, + label_taxid_map=TAX_DATA_DIR + '/dumb.accession2taxid', + num_threads=tax_tests[tax_test]['threads'], + anno=self.tempdir.name + '/annotation.column.annodbg', + top_label_fraction=0.7, + ) + res = subprocess.run([tax_class_command], shell=True, stdout=PIPE) + self.assertEqual(res.returncode, 0) + + res_lines = res.stdout.decode().rstrip().split('\n') + statistics = self.get_prediction_statistics_from(res_lines) + + self.assertEqual(statistics["num_total_tips"], 118) + self.assertEqual(statistics["num_total_internals"], 68) + + self.assertEqual(statistics["num_correct_tips"], 74) + self.assertEqual(statistics["num_correct_internals"], 24) + + self.assertEqual(statistics["num_ancestor_prediction_internals"], 27) + self.assertEqual(statistics["num_descendant_prediction_internals"], 15) + self.assertEqual(statistics["num_ancestor_prediction_tips"], 44) + + self.assertEqual(statistics["num_wrong_prediction_internals"], 2) + self.assertEqual(statistics["num_wrong_prediction_tips"], 0) + + self.assertEqual(statistics["num_too_few_discovered_kmers"], 14) diff --git a/metagraph/src/annotation/taxonomy/tax_classifier.cpp b/metagraph/src/annotation/taxonomy/tax_classifier.cpp index 73e5832bab..135cc57f01 100644 --- a/metagraph/src/annotation/taxonomy/tax_classifier.cpp +++ b/metagraph/src/annotation/taxonomy/tax_classifier.cpp @@ -514,5 +514,35 @@ TaxId TaxonomyClsImportDB::find_lca(const std::vector &taxids) const { + to_string(taxids.size())); } +TaxId TaxonomyClsAnno::assign_class_toplabels(const std::string &sequence, const double label_fraction) const { + // Get all the labels with a frequency higher than 'label_fraction' among the kmers in the forward read. + std::vector labels_discovered = _anno_matrix->get_labels(sequence, label_fraction); + + std::string reversed_sequence = sequence; + reverse_complement(reversed_sequence.begin(), reversed_sequence.end()); + // Get all the labels with a frequency higher than 'label_fraction' among the kmers in the reversed read. + std::vector labels_discovered_rev = _anno_matrix->get_labels(reversed_sequence, label_fraction); + + // Usually, only one of the two sets ('labels_discovered', 'labels_discovered_rev') will be nonempty. + + std::vector curr_taxids; + for (uint32_t i = 0; i < labels_discovered.size(); ++i) { + TaxId act; + if(get_taxid_from_label(labels_discovered[i], &act)) { + curr_taxids.push_back(act); + } + } + for (uint32_t i = 0; i < labels_discovered_rev.size(); ++i) { + TaxId act; + if(get_taxid_from_label(labels_discovered_rev[i], &act)) { + curr_taxids.push_back(act); + } + } + if (curr_taxids.size() == 0) { + return 0; // Wildcard for not being able to assign a taxid. + } + return find_lca(curr_taxids); +} + } // namespace annot } // namespace mtg diff --git a/metagraph/src/cli/config/config.cpp b/metagraph/src/cli/config/config.cpp index f2fd023fa8..d63d3f1598 100644 --- a/metagraph/src/cli/config/config.cpp +++ b/metagraph/src/cli/config/config.cpp @@ -73,6 +73,8 @@ Config::Config(int argc, char *argv[]) { identity = ASSEMBLE; } else if (!strcmp(argv[1], "relax_brwt")) { identity = RELAX_BRWT; + } else if (!strcmp(argv[1], "tax_class")) { + identity = TAX_CLASS; } else if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { print_welcome_message(); print_usage(argv[0]); @@ -274,6 +276,16 @@ Config::Config(int argc, char *argv[]) { anno_labels_delimiter = std::string(get_value(i++)); } else if (!strcmp(argv[i], "--separately")) { separately = true; + } else if (!strcmp(argv[i], "--taxonomic-tree")) { + taxonomic_tree = std::string(get_value(i++)); + } else if (!strcmp(argv[i], "--taxonomic-db")) { + taxonomic_db = std::string(get_value(i++)); + } else if (!strcmp(argv[i], "--lca-coverage-fraction")) { + lca_coverage_fraction = std::stof(get_value(i++)); + } else if (!strcmp(argv[i], "--top-label-fraction")) { + top_label_fraction = std::stof(get_value(i++)); + } else if (!strcmp(argv[i], "--label-taxid-map")) { + label_taxid_map = std::string(get_value(i++)); } else if (!strcmp(argv[i], "--num-top-labels")) { num_top_labels = atoi(get_value(i++)); } else if (!strcmp(argv[i], "--port")) { @@ -858,6 +870,7 @@ void Config::print_usage(const std::string &prog_name, IdentityType identity) { fprintf(stderr, "\tquery\t\tannotate sequences from fast[a|q] files\n\n"); fprintf(stderr, "\tserver_query\tannotate received sequences and send annotations back\n\n"); + fprintf(stderr, "\ttax_class \tclassify sequences according to the taxonomic hierarchy\n"); return; } case BUILD: { @@ -1246,6 +1259,22 @@ void Config::print_usage(const std::string &prog_name, IdentityType identity) { fprintf(stderr, "\t-p --parallel [INT] \tmaximum number of parallel connections [1]\n"); // fprintf(stderr, "\t --cache-size [INT] \tnumber of uncompressed rows to store in the cache [0]\n"); } break; + case TAX_CLASS: { + fprintf(stderr, "Usage: %s tax_class [options]\n" + "\t\t -i FILE1 [[FILE2] ...]\n" + "\t\t -a --taxonomic_tree \n" + "\t\t [--label-taxid-map <*.accession2taxid>]" + "\tEach input file is given in FASTA or FASTQ format.\n\n", prog_name.c_str()); + + fprintf(stderr, "Available options for taxonomic classification:\n"); + fprintf(stderr, "\t --lca-coverage-fraction [FLOAT] \tfraction of kmers that needs to be covered\n" + "\t\t\t\t\t\tby the returned LCA, it's subtree and it's ancestors [0.66]\n"); + fprintf(stderr, "\t-p --parallel [INT] \t\t\tuse multiple threads for computation [1]\n"); + fprintf(stderr, "\t --discovery-fraction [FLOAT] \tfraction of labeled k-mers required for a valid annotation [0.7]\n"); + fprintf(stderr, "\t --top-label-fraction [FLOAT] \tif greater than 0, use a faster tax_class algorithm\n" + "\t\t\t\t\t\tthat returns the LCA of the labels linked to at least \n" + "\t\t\t\t\t\t'top_label_fraction' percent of the existent kmers [0]\n"); + } } fprintf(stderr, "\n\tGeneral options:\n"); diff --git a/metagraph/src/cli/config/config.hpp b/metagraph/src/cli/config/config.hpp index b58f4907bd..984c84a093 100644 --- a/metagraph/src/cli/config/config.hpp +++ b/metagraph/src/cli/config/config.hpp @@ -134,6 +134,8 @@ class Config { double alignment_max_nodes_per_seq_char = 12.0; double alignment_max_ram = 200; double alignment_min_exact_match = 0.0; + double lca_coverage_fraction = 0.66; + double top_label_fraction = 0; double min_fraction = 0.0; double max_fraction = 1.0; std::vector count_slice_quantiles; @@ -154,6 +156,9 @@ class Config { std::string fasta_anno_comment_delim = UNINITIALIZED_STR; std::string header = ""; std::string host_address; + std::string taxonomic_tree; + std::string taxonomic_db; + std::string label_taxid_map; std::string linkage_file; std::string intersected_columns; @@ -180,6 +185,7 @@ class Config { RELAX_BRWT, QUERY, SERVER_QUERY, + TAX_CLASS, }; IdentityType identity = NO_IDENTITY; diff --git a/metagraph/src/cli/tax_class.cpp b/metagraph/src/cli/tax_class.cpp new file mode 100644 index 0000000000..7a2460c898 --- /dev/null +++ b/metagraph/src/cli/tax_class.cpp @@ -0,0 +1,140 @@ +#include "tax_class.hpp" + +#include "annotation/taxonomy/tax_classifier.hpp" +#include "common/threads/threading.hpp" +#include "common/unix_tools.hpp" +#include "config/config.hpp" +#include "load/load_graph.hpp" +#include "load/load_annotated_graph.hpp" +#include "seq_io/sequence_io.hpp" + +#include "common/logger.hpp" + +namespace mtg { +namespace cli { + +using mtg::common::logger; + +const uint32_t QUERY_SEQ_BATCH_SIZE = 100000; + +void append_new_result(const std::string &seq_label, + const uint32_t taxid, + std::vector > *pair_label_taxid, + std::mutex *tax_mutex) { + std::scoped_lock guard(*tax_mutex); + (*pair_label_taxid).emplace_back(seq_label, taxid); +} + +void print_all_results(const std::vector > &pair_label_taxid, + const std::function &callback) { + for (const std::pair &label_taxid : pair_label_taxid) { + callback(label_taxid.first, label_taxid.second); + } +} + +void execute_fasta_file(const string &file, + std::function > &)> &callback) { + logger->trace("Parsing query sequences from file {}.", file); + + seq_io::FastaParser fasta_parser(file); + std::vector > seq_batch; + + for (const seq_io::kseq_t &kseq : fasta_parser) { + seq_batch.push_back({std::string(kseq.seq.s), std::string(kseq.name.s)}); + + if (seq_batch.size() != QUERY_SEQ_BATCH_SIZE) { + continue; + } + callback(seq_batch); + + logger->trace("Processing an another bucket of {} queries from file {}.", QUERY_SEQ_BATCH_SIZE, file); + seq_batch.clear(); + } + callback(seq_batch); +} + +int taxonomic_classification(Config *config) { + assert(config); + + const std::vector &files = config->fnames; + + Timer timer; + logger->trace("Graph loading..."); + auto graph = load_critical_dbg(config->infbase); + logger->trace("Finished graph loading after {}s.", timer.elapsed()); + + timer.reset(); + logger->trace("Processing the classification..."); + ThreadPool thread_pool(std::max(1u, get_num_threads()) - 1, 1000); + + std::function > &)> callback; + + std::vector > pair_label_taxid; + std::mutex tax_mutex; + + std::unique_ptr taxonomy; + std::unique_ptr anno_graph; + + if (config->taxonomic_db != "") { + throw std::runtime_error("internal error: taxonomic classification with taxDB is not implemented."); + } else { + // Use tax_class without any precomputed database. + if (config->infbase_annotators.size() == 0) { + logger->error("The annotation matrix is missing from the command line, please use '-a' flag for the annotation matrix filepath."); + std::exit(1); + } + timer.reset(); + logger->trace("Graph and Annotation loading..."); + graph = load_critical_dbg(config->infbase); + anno_graph = initialize_annotated_dbg(graph, *config); + logger->trace("Finished graph annotation loading after {}s.", timer.elapsed()); + + timer.reset(); + logger->trace("Constructing TaxonomyClsAnno..."); + taxonomy = std::make_unique(*anno_graph, config->lca_coverage_fraction, + config->discovery_fraction, config->taxonomic_tree, + config->label_taxid_map); + logger->trace("Finished TaxonomyDB construction after {}s.", timer.elapsed()); + + if (config->top_label_fraction > 0) { + // Use tax_class version that is returning the LCA of the top labels among the kmers. + // This version is fast, but less precise. + callback = [&](const std::vector > &seq_batch){ + thread_pool.enqueue([&](std::vector > sequences){ + for (std::pair &seq : sequences) { + append_new_result(seq.second, taxonomy->assign_class_toplabels( + seq.first, config->top_label_fraction), &pair_label_taxid, &tax_mutex); + } + }, std::move(seq_batch)); + }; + } else { + // Use tax_class version that computest the LCA taxid for each kmer. + // The prediction result will be identical to the one using taxdb, but the computations will be slower. + callback = [&](const std::vector > &seq_batch){ + thread_pool.enqueue([&](std::vector > sequences){ + for (std::pair &seq : sequences) { + append_new_result(seq.second, taxonomy->assign_class(seq.first), &pair_label_taxid, &tax_mutex); + } + }, std::move(seq_batch)); + }; + } + } + + for (const std::string &file : files) { + execute_fasta_file(file, callback); + } + thread_pool.join(); + + print_all_results(pair_label_taxid, [](const std::string name_seq, const uint32_t &taxid) { + std::string result = fmt::format( + "Sequence '{}' was classified with Tax ID '{}'\n", + name_seq, taxid); + std::cout << result << std::endl; + }); + + logger->trace("Finished all the queries in {}s.", timer.elapsed()); + return 0; +} + +} // namespace cli +} // namespace mtg diff --git a/metagraph/src/cli/tax_class.hpp b/metagraph/src/cli/tax_class.hpp new file mode 100644 index 0000000000..d00084aa35 --- /dev/null +++ b/metagraph/src/cli/tax_class.hpp @@ -0,0 +1,14 @@ +#ifndef __TAX_CLASSIFY_HPP__ +#define __TAX_CLASSIFY_HPP__ + +namespace mtg { +namespace cli { + +class Config; + +int taxonomic_classification(Config *config); + +} // namespace cli +} // namespace mtg + +#endif // __TAX_CLASSIFY_HPP__ diff --git a/metagraph/src/main.cpp b/metagraph/src/main.cpp index 99f7706b3f..b69b5fa77c 100644 --- a/metagraph/src/main.cpp +++ b/metagraph/src/main.cpp @@ -15,6 +15,7 @@ #include "cli/server.hpp" #include "cli/transform_graph.hpp" #include "cli/transform_annotation.hpp" +#include "cli/tax_class.hpp" using namespace mtg; using mtg::common::logger; @@ -86,6 +87,9 @@ int main(int argc, char *argv[]) { case Config::ALIGN: return cli::align_to_graph(config.get()); + case Config::TAX_CLASS: + return cli::taxonomic_classification(config.get()); + case Config::NO_IDENTITY: assert(false); } From 225f957c5ec5cf01696bef7a9ac761459778966a Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Tue, 3 Aug 2021 23:18:48 +0200 Subject: [PATCH 16/17] rebase Signed-off-by: Radu Muntean --- .gitignore | 1 + metagraph/src/cli/config/config.cpp | 2 +- .../data/taxonomic_data/dumb.accession2taxid | 21 + .../tests/data/taxonomic_data/dumb_nodes.dmp | 20 + .../full_hierarchy_sequences.fa | 161 ++++ .../tests/data/taxonomic_data/tax_input.fa | 96 +++ .../tests/data/taxonomic_data/tax_query.fa | 800 ++++++++++++++++++ 7 files changed, 1100 insertions(+), 1 deletion(-) create mode 100644 metagraph/tests/data/taxonomic_data/dumb.accession2taxid create mode 100644 metagraph/tests/data/taxonomic_data/dumb_nodes.dmp create mode 100644 metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa create mode 100644 metagraph/tests/data/taxonomic_data/tax_input.fa create mode 100644 metagraph/tests/data/taxonomic_data/tax_query.fa diff --git a/.gitignore b/.gitignore index 9e165295f4..cf49089ee5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ *.fai !metagraph/tests/data/*.fa !metagraph/tests/data/*.fai +!metagraph/tests/data/taxonomic_data/*.fa metagraph/tests/data/*dump_test* projects/*/temp visualization/geolocation/data/* diff --git a/metagraph/src/cli/config/config.cpp b/metagraph/src/cli/config/config.cpp index d63d3f1598..c307e4ec29 100644 --- a/metagraph/src/cli/config/config.cpp +++ b/metagraph/src/cli/config/config.cpp @@ -1268,7 +1268,7 @@ void Config::print_usage(const std::string &prog_name, IdentityType identity) { fprintf(stderr, "Available options for taxonomic classification:\n"); fprintf(stderr, "\t --lca-coverage-fraction [FLOAT] \tfraction of kmers that needs to be covered\n" - "\t\t\t\t\t\tby the returned LCA, it's subtree and it's ancestors [0.66]\n"); + "\t\t\t\t\t\tby the returned LCA, its subtree and its ancestors [0.66]\n"); fprintf(stderr, "\t-p --parallel [INT] \t\t\tuse multiple threads for computation [1]\n"); fprintf(stderr, "\t --discovery-fraction [FLOAT] \tfraction of labeled k-mers required for a valid annotation [0.7]\n"); fprintf(stderr, "\t --top-label-fraction [FLOAT] \tif greater than 0, use a faster tax_class algorithm\n" diff --git a/metagraph/tests/data/taxonomic_data/dumb.accession2taxid b/metagraph/tests/data/taxonomic_data/dumb.accession2taxid new file mode 100644 index 0000000000..ca9ba2566f --- /dev/null +++ b/metagraph/tests/data/taxonomic_data/dumb.accession2taxid @@ -0,0 +1,21 @@ +accession accession.version taxid gi +NC_01 NC_01.1 10001 10001 +NC_02 NC_02.1 10002 10002 +NC_03 NC_04.1 10003 10003 +NC_04 NC_04.1 10004 10004 +NC_05 NC_05.1 10005 10005 +NC_06 NC_06.1 10006 10006 +NC_07 NC_07.1 10007 10007 +NC_08 NC_08.1 10008 10008 +NC_09 NC_09.1 10009 10009 +NC_10 NC_10.1 10010 10010 +NC_11 NC_11.1 10011 10011 +NC_12 NC_12.1 10012 10012 +NC_13 NC_13.1 10013 10013 +NC_14 NC_14.1 10014 10014 +NC_15 NC_15.1 10015 10015 +NC_16 NC_16.1 10016 10016 +NC_17 NC_17.1 10017 10017 +NC_18 NC_18.1 10018 10018 +NC_19 NC_19.1 10019 10019 +NC_20 NC_20.1 10020 10020 diff --git a/metagraph/tests/data/taxonomic_data/dumb_nodes.dmp b/metagraph/tests/data/taxonomic_data/dumb_nodes.dmp new file mode 100644 index 0000000000..c721ad085a --- /dev/null +++ b/metagraph/tests/data/taxonomic_data/dumb_nodes.dmp @@ -0,0 +1,20 @@ +10001 | 10001 +10002 | 10001 +10003 | 10001 +10004 | 10002 +10005 | 10002 +10006 | 10002 +10007 | 10003 +10008 | 10003 +10009 | 10004 +10010 | 10004 +10011 | 10004 +10012 | 10005 +10013 | 10005 +10014 | 10006 +10015 | 10006 +10016 | 10007 +10017 | 10007 +10018 | 10007 +10019 | 10008 +10020 | 10008 diff --git a/metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa b/metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa new file mode 100644 index 0000000000..36099686d8 --- /dev/null +++ b/metagraph/tests/data/taxonomic_data/full_hierarchy_sequences.fa @@ -0,0 +1,161 @@ +>gi|10001|ref|NC_01.1| Test sample 1 (root) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACGAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10002|ref|NC_02.1| Test sample 2 (dist to root = 1) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10003|ref|NC_03.1| Test sample 3 (dist to root = 1) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10004|ref|NC_04.1| Test sample 4 (dist to root = 2) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAACCCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10005|ref|NC_05.1| Test sample 5 (dist to root = 2) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10006|ref|NC_06.1| Test sample 6 (dist to root = 2) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGTGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10007|ref|NC_07.1| Test sample 7 (dist to root = 2) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10008|ref|NC_08.1| Test sample 8 (dist to root = 2) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10009|ref|NC_09.1| Test sample 9 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAACCAAGGGGGCTGGGGCTGTTCGCAGGCAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10010|ref|NC_10.1| Test sample 10 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGTTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACCAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCTTAGCCAA +TATGACTTAACCCAAGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10011|ref|NC_11.1| Test sample 11 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +GGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TTTGACTTAACCCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10012|ref|NC_12.1| Test sample 12 (dist to root = 3) +CGGCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGT +CTATCTCGCATTAAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCTCTTCCGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10013|ref|NC_13.1| Test sample 13 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGAGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACCAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10014|ref|NC_14.1| Test sample 14 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCGGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGTGGCTGGGGCTGTTCGCAGACAAATCTACGGCTACTACAAACTCTAGCAATACC +>gi|10015|ref|NC_15.1| Test sample 15 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAAAATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAATGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGAAATTTAATGGGTTGGTATTCTTGTAGTATGGTCATCGTAGCCAA +TATGACTTAAACCAGGTGGCTGGCGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10016|ref|NC_16.1| Test sample 16 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCTTTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACGGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGTGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10017|ref|NC_17.1| Test sample 17 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAATTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGAATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGGGGGGGCTCGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10018|ref|NC_18.1| Test sample 18 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGGGGCCATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAAT +TAAATCGAGTGTTAGCCGCATCTAGACATTTAATGAGTTGGTTTTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10019|ref|NC_19.1| Test sample 19 (dist to root = 3) +CGCCGGCCGCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCCGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCATTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACCTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10020|ref|NC_20.1| Test sample 20 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATTATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACCAACTCTAGCAATACC + diff --git a/metagraph/tests/data/taxonomic_data/tax_input.fa b/metagraph/tests/data/taxonomic_data/tax_input.fa new file mode 100644 index 0000000000..3ba4796416 --- /dev/null +++ b/metagraph/tests/data/taxonomic_data/tax_input.fa @@ -0,0 +1,96 @@ +>gi|10009|ref|NC_09.1| Test sample 9 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAACCAAGGGGGCTGGGGCTGTTCGCAGGCAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10010|ref|NC_10.1| Test sample 10 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGTTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACCAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCTTAGCCAA +TATGACTTAACCCAAGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10011|ref|NC_11.1| Test sample 11 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +GGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGT +CTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAAT +TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TTTGACTTAACCCAGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGAAATACC +>gi|10012|ref|NC_12.1| Test sample 12 (dist to root = 3) +CGGCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGT +CTATCTCGCATTAAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCTCTTCCGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10013|ref|NC_13.1| Test sample 13 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGAGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGGGGCTGTGGCTGTTCGCAGACCAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10014|ref|NC_14.1| Test sample 14 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCGGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAA +TATGACTTAAACCAGGTGGCTGGGGCTGTTCGCAGACAAATCTACGGCTACTACAAACTCTAGCAATACC +>gi|10015|ref|NC_15.1| Test sample 15 (dist to root = 3) +CGCCGGCCTCCCCAAAAAATCCCCGGGGGAAAATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAG +AGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAATGGGTGACTCTGGTGT +CTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGAAATTTAATGGGTTGGTATTCTTGTAGTATGGTCATCGTAGCCAA +TATGACTTAAACCAGGTGGCTGGCGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10016|ref|NC_16.1| Test sample 16 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCTTTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACGGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTGGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGTGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10017|ref|NC_17.1| Test sample 17 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAATTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGAATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGGGGGGGCTCGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10018|ref|NC_18.1| Test sample 18 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGGGGCCATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAAT +TAAATCGAGTGTTAGCCGCATCTAGACATTTAATGAGTTGGTTTTCTTGTAGTAGGGTCACCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAATCTACGCCTACTACAAACTCTAGCCATACC +>gi|10019|ref|NC_19.1| Test sample 19 (dist to root = 3) +CGCCGGCCGCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCCGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCATTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACCTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACAAACTCTAGCAATACC +>gi|10020|ref|NC_20.1| Test sample 20 (dist to root = 3) +CGCCGGCCCCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAG +AGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTT +CTATCTCGCATCCAATGCAGATTACTCCGATTATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATG +CATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTC +TTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAAT +TAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCAAA +TATGACTTAAACGGGGGGGCTGGGGCTGTTCGGAGACAAATCTACGCCTACTACCAACTCTAGCAATACC diff --git a/metagraph/tests/data/taxonomic_data/tax_query.fa b/metagraph/tests/data/taxonomic_data/tax_query.fa new file mode 100644 index 0000000000..30ea3f2c27 --- /dev/null +++ b/metagraph/tests/data/taxonomic_data/tax_query.fa @@ -0,0 +1,800 @@ +@gi|10001|ref|NC_01.1|-9/1 +TGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATTGATCCCTCTCGAATA ++ +CC=GGGCGGGGGGGJGJJGGJJJCJJJGJJCJJGGJJG=GGJJJGGC8GGGCCGCGCJJGGJ=1CGGJGGCJG=GGGJGGCCGGGCCGCGCGGGG=GGGGCCGGCGGGGGGG=GCGGGC1CGGCGCCGGCC8GG1GCGGGGGGGCGC==C +@gi|10001|ref|NC_01.1|-7/1 +CCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGA ++ +CC8GGGGGGGGGGJCJJGGJJJJJGC(JJGCJJGJGGJJJCGGGJJJJJ=CCJJ8CJ8CGCC=GGJJGGGGGGJGGGGCCGGCGCCGCGGGG1G=CGGGCJCGCGGC1GGCCGGGCGGGCGCGGCGGGGG=CCGGGGGG=CGCCGGGCCG +@gi|10001|ref|NC_01.1|-5/1 +AGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTCGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTC ++ +C=CGGGGGG=CGGJGJ1JJJGJJCGJJJG=JGC=JCJGJJGJJCGJJGGJJJGJJGGGGJG=CGCCGGC=GGGCGGGG8CCGGGGGGGGGCC8GGCG=GGCCCGG1GGGGGGGGGGCGG8GCCGGGGGGGG1CGGGGGCCCGGGGGCGGC +@gi|10001|ref|NC_01.1|-3/1 +CTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGA ++ +CC1GGGGGGGCGGJGJJGJJJGGGJJGJJJJJJJJCGJGJGGJ1JJGCCJGJ=JJ8CGJ8CGGJJGJCJ=CCGGC=GGCGGGGGCGGGGGGGCCG1GGCGJCGGCC(GGCGGG=CGG(GCGG8G1GGGCCGGGGGGGGGGGCCGGGGGGG +@gi|10001|ref|NC_01.1|-1/1 +GAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCCTTGAGGGCGTGTACTTAGCCCA ++ +CCCGGGGGCGGGGGGJJCJJJJJJJJJJ1JJGJGGCJJGJJJJCCCJJGGGJ=GJJJGGJJCCGGGGGGCCC8CCCC=GGGGCGG==GGGGGGGCCC8GGCGGC1GGCGCGGGGGG=GCGGGCCGGC(GGGGGGGGCC8CGGGGGGGGCC +@gi|10002|ref|NC_02.1|-9/1 +AAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTT ++ +CCCGGGGGGGGGCJJJGJGGJJCJJJGGJGJ=JJJJGGGJGJGJGJJJGC=JJCGGGGJGJGGGJ1JGCCCGJGCGC=CGCC==CGGGCC1CGGGGGGCGJCCGGGGGG=CGCCGGGGCGGCCGGGCG1GGGGGCCGCCCCCGGGGGGC8 +@gi|10002|ref|NC_02.1|-7/1 +TGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGA ++ +=CCGCCGGGGGGGGCCJJGGJJJJ8J1JJGGJJ1CJCJJGGG8CJCCGJGJJGGGGGGCJGGGGGGCGGGJGCJGG1GCGCGGGCGGGCGGGGC=(GCG=JCG=GG(GCCCGC=GG(GCCGGGC=1CGCGG=GGCGGCCCGGGGGGCGGC +@gi|10002|ref|NC_02.1|-5/1 +GAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATGACTTAAAC ++ +CCCGGCGCGGGGGJJGJJGJJJJGGJG=JJGJGJJJJGJJJJCGJJJJGJGGGJGJGJJ(J=J=JGGG1GCGGCJCJGGCC1GGGCG==GCGG81GGGCGCCCGGCGGGGGCGGGGGGGGCGGGGGCCGG1GGCGCGCGCGGGGCG1GCC +@gi|10002|ref|NC_02.1|-3/1 +CGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGA ++ +CCCGGGGGGGGGGCJ1JJJJCJCGJJJJJJJGGGJJJJGJJJJJJGGGGJGGJGGGJG(GJJG=CGJJJCGGJGCGGGCGGGCG8GG=CGGCG8CGGCGCCC=C=GGGCCGGGGC1GC=G=G8GGCGGGCCGGCGGGGGGCGGCCCCGCC +@gi|10002|ref|NC_02.1|-1/1 +GGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCAC ++ +CCCGGGGGGGGGGJJJGJCJCJJJJJJJCCJGJJJGJCJJ=JJGGCGGJGC1JJGGJCJJCGJCCGGGGGGGCJCGCGC=GC=GCGCGG1GGGG=GCGGGCGGCGGGGC8GGCGGGGGGGG=CGGGCGGGG=CCG=GCGCCG=GGGCGGG +@gi|10003|ref|NC_03.1|-9/1 +TCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCG ++ +=C1GGGGGGGGCGJJJJGJJGJJCJJJJGJGGGGJJJGJJGCJJGJJCJJGJGC8JCGJCGCGJGGGGGCGGCGGGGGGG=CGCGGCGCCGGGCGGGCGGCGGGGGCCC=GCGGGG1GGC1C8GCGGGGGGCGGGCC=GCCGGGG=GGGG +@gi|10003|ref|NC_03.1|-7/1 +CTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAA ++ +CCCGCGGGGGGGGJJJJJJJGJJGJJGGJJJJJJGJJJJJGJ8JGJJJGGJG=GJJJJ=GGJCJJGGGCGGGCGJGGGGGGC=GGGGGGGG=G=GGGGGGJG=CG8GGGCGCGGCGCGCGGC18CGGGGGGGGGGCGCCGGGGGCGGGG= +@gi|10003|ref|NC_03.1|-5/1 +GGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAGAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAA ++ +CCCGGGGGGGG1GJGJJJJJJGJJCJGGCJGJGJJJJGJJGCJJ8=(JJJGGC8GCJGG(JJJCGGJJGGG=GGGGGGCCGGGGGGGGGCGG=CGGGGGG1CGGGCCCGGGGGC1CCGGGCGGCCCG8GCGGG=CCCGGCGCGGGGGGCC +@gi|10003|ref|NC_03.1|-3/1 +AAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCCAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTT ++ +CCCGGGGGCGGGGGGGCJJJCJJJGJJGJJJJGJJGJC(JJGJJJJJJJGGJGCGGGGJGJGCC=JGG=CCJGGG=GGGG8C=GCGGGGG=GGCGGCC=GJCC1C1C8GGGGGGGCCG=(GCGGGGGGGGCCGGCCGCC1CGGCCGGCCC +@gi|10003|ref|NC_03.1|-1/1 +ATTGCTAGAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCAAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAG ++ +CC11GGGGGGGGGJJJJJJJJJJGGJ1CJJJJJJ=JGGGJJGGGJ(JGGJJJGGGGCGGJCGJGJ1GCCGGGCJCCJCGCGGCCGGGGGGGGGGGGGC===GGCGCGGC=GCGCGCGGCC=8CGGGGGGGGGGGCC=GGGGGGCCCGGGG +@gi|10004|ref|NC_04.1|-9/1 +CTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGC ++ +CCC1GG=CGGGGGJJJJJCJGJJJJJJJJJJJJJJJJJJJG=JCJJJGGJJGGGGGGCJJGGJJJGGJGGGJGC=CGJCCG=GCGGGGGCGGGGGGCCGGJCG=G1CG1GGGGCC1CGG=(CGGGGG=CC=GGGGC8GG8GCGGGCGGGG +@gi|10004|ref|NC_04.1|-7/1 +CAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTCTC ++ +CCC1GGGGGGGGGJJ1JJJJGJJJGJJJJGGCJJCJGJJJJGJJGJJJGGGGGJGGGJ8=JCCGGCC1GCGGGCGGGGGJG=GC1GGGGGGGGGCGGGGGJCGCGGCCCGCCGGGGC8CGGGCGCGCCGGGGGGGGCGGGGGGCG1CG=C +@gi|10004|ref|NC_04.1|-5/1 +TTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATG ++ +=C=GCGGGGGGGGGJJJJJJJJGJJJGJG1JJGCGGJJGJGJ8JJJGGGJ8CJCJGJJGCJGGCGGGCGGJGGGGCG=GGGG1GGG=GGGGGGCGCG8GGJCCCCGGGGGGGCGCGCGCGGGCGGGGGG=CGCGGGGCCGGGCCCGGCG= +@gi|10004|ref|NC_04.1|-3/1 +GTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATGACTTAACCCAGGG ++ +CCCGGGGGGGG=GGGCCCGJJJJ=GGGJJJJGCJJJCJGGJJG1JJC8JGJGJCGCGJJJGGJGGGGGGJJJGC1G8=GGCCGCCGCGG8GCGGGCGGGCCCCGGCGGGCGGCCGG1=CGCG=GCGGCCGGGCGGCCG=G=GCGGC=GCC +@gi|10004|ref|NC_04.1|-1/1 +CAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAA ++ +CC8GGGCCG1GGGJJJJJGGJJGJJJJJGG1JJCGGJGGJGG1JJJJGJJGCGJJ=GJJJ=JGJ=GJJJCGCGG8GGGCCGGGGCGGGCGGCGGCGGCGGJCGGCGG1CGCGCGGGCCGGG8GGG1GGC8GGGGCGCGGGGCCGGGGCG= +@gi|10005|ref|NC_05.1|-9/1 +TAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTAC ++ +CCCGGGGGGGGGGJJJJJCGJJJGJJGJJGGJJ8GCJJGCJGJGJJJGJJJGGGGG8JGGJGGGGGGCGGCCJGCGG1GGGGGCGCGGG=GGGGCGCGGGJGGGG8CG=CGGGGCGGGGGGGGGGGCGGGCCG=GG=GGCCGGGCCGCG1 +@gi|10005|ref|NC_05.1|-7/1 +CCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGATTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTG ++ +=CCGGGGGGGGGGJ1JJJJGJ(JJJJJJJJJJGJJJGJGJGJGJGGGCGGGCGJG=JCJGGGCGCGGGGGCJGGGGJGGCCCGG=GGGGGCG(GGC=GGGJGGCGC=GC8GGC=GCGG=G1CG1GGCGGGGGG1GCCGGCGGGGGGCG8C +@gi|10005|ref|NC_05.1|-5/1 +TTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTCTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATT ++ +CCCGCGGGGGGGGJJJJJGGGJJGJJJGGCJJGJJJJJJGG=JGGJJ=J8GJJJCJG1JGCG8GJ1JGGGGJGGCGGCGJCCGCC1GG1GCCCCG(CGGGJCGG(=CGCCGGGCGGCGCC=CGCGGCGGGGCCCGCC=GGCGGGCCCGGC +@gi|10005|ref|NC_05.1|-3/1 +GTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCACAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGAGTTAATTTGCTTAGTAGTGAAAGTCC ++ +CCCCGGGGGGCGGJJJJJJJJJ8JCJJJJJGJJJCJCJG=JGJGJGJGJJ=JG=GGG1J=GJJGG8CGGGGGGGGGGGCGCCGGCGGGGGGGGGCGGGCGJG8GCGG=CGGCGCGCGGCGGCCG1GGGG8GGGGGCCGGCGCGGCGGCGG +@gi|10005|ref|NC_05.1|-1/1 +AGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGC ++ +CCC=GG=GGGGGGJGJJJJJJJJJJJJJJJJJ=J=JGCGJJJGGGJJCGJJJJCJJJGGGJGGJ(GGGGGJ1CCCGJCGGCCGCCC=CGGGGGCCGGG8=CG=GCGGCGGG1GCC=GCG1GCGGCG=GGCGG=G8GC(GCGGCCGCGGGG +@gi|10006|ref|NC_06.1|-9/1 +GTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTA ++ +CCC1GGG1GGGGCJJJJ=JJJGJJJGJJGGJGG=GJJJJJGJJJJJGJ8JJJCJG1=GJGGGGJGJG8GGGJCGGCCGGCGGCGGGGGGG(GCGGGGCG==CGCGGG8CCGGGGGGGG=GG=GGGGCCGGGGCG=GGGGGGCGGGGCCCC +@gi|10006|ref|NC_06.1|-7/1 +GCCCCAGCCACCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTA ++ +CCC1GGGGCGGGCJGJJGGJJJJJJGJJGCJJ=JJJCJJC=GJCJCGGGJJJ=GGJGCJGCGCCJJCJG8=GCGJGJG=CCG=CGGCGGC=GGC1G=CGGCCGGGGCG8CGC1GCGCGCGGGGCGGGGGCGGGCC=GGGGGCCGGGGGG= +@gi|10006|ref|NC_06.1|-5/1 +GTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATTTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAA ++ +CCCGGGGGGGGGGJGJJJGJJJJJGJJJCGJJJJGJJ8GJGJJJGJGCJJCGCJ(GGGGJGCCCGGCCGGJGGGGGG8JCGCGGGGGG=GCGGGGGGCGCJCG=CCGGCG=CGG8GCGGC=G8GCGGGCC=GGGGCCGGGG=GGGGG==C +@gi|10006|ref|NC_06.1|-3/1 +CAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCCCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTT ++ +CCCGGGGGGGGGGGJJGJGJJJJGGJGJJJCCJJJGGJGJGJGJGJJGJJJ=GJGJGGCGCCGGJGJC=GGGCGCGGG(GGGG=GGGCCGGGC=C1G=CGJGGGG=CGGCCG=GGGGGGGGC8GGCGGCGCGGCCGCCGCGGGGCGCGGG +@gi|10006|ref|NC_06.1|-1/1 +ATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTAGGGTCATCGTAG ++ +CCCCGGCCGGGGG1GJ=JJJGJJJJJJGJCGJGJGJGGJJJ1JJ8CC=JJGJ1=GGJJJGJJGCJGJCJGGGGG8GGCGGGCCGCGGCGCGG8GGGCGCC1=GGGGCGG8GGCCCGCGGGGGCGGGGC=8C=CGGGG8GG=G=CGGGGCG +@gi|10007|ref|NC_07.1|-9/1 +CTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGC ++ +CCCGCCGGGGGGCGGJJGGJJGJGGJJGGCJGJ(JJC=G8JJ1JJGGGJJGCJJGCJGGJG==J=JGC8GCGGCGGGGGCGCCC=GGGCGGGGGG(G=CGJCCCGGGCGGCCGGGGCCGG1C=GGGGGGC1GGGGCCGC=CGC8CGGGCG +@gi|10007|ref|NC_07.1|-7/1 +GCAGTAGCAGACAAGTTTGAATTGGGCGAAACCTACTTGCTTCCTCTTGGAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAAT ++ +CCCG=GGGGGGGGJJJJJJJGJ=J1CJGGJJJ=JJCJJJJGJJJGCJJJGGJ8GJ8GCGJJJGGGJJGGGC=CGGCGG=G8GG=GC=GCGG=GGGGGGCCCCCCCGGCGGGGC=GCCGGGC=GGGGGC=GGCG1GCGCCGGG1=GGCCCG +@gi|10007|ref|NC_07.1|-5/1 +TGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCC ++ +CC1GCGGGGGCGGCJJJGJJJJJJJJJJJJJJJJJ=JJGJGGG(GCJJGJJGGGJGGGJCJGGJGGJCCCGGGCGGCCGGGGGCGCGCGGGG8CGGGCGGJGCGGGGCGGGGGGCGGGCGCGGCGCGGGGGGGGGGCGGC8G=GCCGGCG +@gi|10007|ref|NC_07.1|-3/1 +ACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCACCGTAGCAAATATGACTTAAACGGGGGGGCTGGGGCTGTTCGCAGACAAA ++ +=CCGCGGGGGCGGJJJGJJJGGJGJ1GGGGJGJCCJJ8JGJJJJJGGJG1GGCJJJJJJ=GGG1GGJGGGGCGGGCGCGGCCCGGGCGGCGCGCCGG=G8JGCCCCCGGCGGC=CCGGCGGG8GCC=GC=GGG=C=CGGGGGGCGGGGCC +@gi|10007|ref|NC_07.1|-1/1 +AACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGCTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCT ++ +CCCGGGGGCGG=GJGJJJGJJGGGJGJJGCJJJJGGGGCGJJGGJJJGJ8JGGCJJGCGJGJGJCGCGGGJGGGGGJGGGGG(GGGGGG1=CGC==GCGGJCGGGG=G8GGGGGCGCGCCGCGGGGGCGGGG8GG1C1GC1GGG18GCGG +@gi|10008|ref|NC_08.1|-9/1 +CCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCC ++ +CC=GGGGGGGGGGJJJJJJJJGJCJJJJCJJJCJJJJGJ=JJJG8JJJGGJCGCGGJCJJ8GJCCG=GGGGG=GG=CGGGGGGCGGGG8GG=GCCCGCGGJC1G8GGCCGGGGCGCCGGC1=CGCGGGGGCGCCGC8GGCCC8GG8GGGG +@gi|10008|ref|NC_08.1|-7/1 +AAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGG ++ +C=CGGCGCGGCGGJJGJJ1J1JGJJGJJJJJJJJJGJCGJJGJJGJJCGJJGJ=JGCGJG(G=GGJJG=CCGG=GGGCJ=CGGCGCG=GCGGGGCGGCGGJGG18GGGCGGGGCGGCCGGGGCGCGG=GGGGGGGGGGGGGCGCGCGGGG +@gi|10008|ref|NC_08.1|-5/1 +TGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTA ++ +=CCGGCGGGGGGGCG(JCGGCJJJJJJJJJG1GCJCGJJJJJJG8GGJGJJGJJJG=GGJGGJGGGGJGCGCCGGCGJG8GGGC=GGCGGCCCCGGGGGGJCGGGCGGCGGGG8C8G=CGGGGGGGGGGG=GGG=G=GCCGGCCGGGCGG +@gi|10008|ref|NC_08.1|-3/1 +TTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATG ++ +CC=8CGGGGGGGGJJGJGJJJJGJJJGGJJGGJGGJJ=JJGJ=JJGGGJ8CGJG=GJCJGJJGGCJ=JJGGCGGCGCGGGGGGCGGGGGGCGGGGGG1GCJCGC(GGCCC8GGGCGG=GGGGGCGGGGGGCGGGGGGC=(CCGCGGCC=G +@gi|10008|ref|NC_08.1|-1/1 +AGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCT ++ +CC=GGGGGGGGGGJGJGJCGJJGJ=JJGJJJ=CJJCJGGJJ=GJJGGJJGGGCGJCCGGJG8JGGGGGJJGG8JGGCGGCGGGGGGGG=CCGC=18GGGGJCCGCGGGGCGCCGGGGGG=GCG1CG8=CGGGGGGGCGG==CCCG8GGGG +@gi|10009|ref|NC_09.1|-9/1 +CCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATG ++ +CCCGGGGGGGGGGJJJJJJJJGJJCCJJJJJJJJ=GJJJG(GJJ8GGGJG8JGJCJJJCGJCCG8CGGGC=GGGGGJ18GG=G=GGGGGGCGGGG1GGGCJGGCGGGGGGCGGGGGGGC=GGGGGCGCGGGCCGGGGCCCGGCGCCGGGG +@gi|10009|ref|NC_09.1|-7/1 +CTGCGAACAGCCCCAGCCCCCTTGGTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTT ++ +CC=GGGGGGGGGGJJGJJJ8JJJGGJJGJJJGJGJJGGGJJJJJJJJJJJJJCJJGGJGGGGJCGJG1=JGGGGCGG=GCG=GGGGCGGG1GGG88GG=GJGGCCG(GGGGGGGGCGGGCGGCCGCCGGCCCCGGGGG=G=GGGGCGC=C +@gi|10009|ref|NC_09.1|-5/1 +TTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTCCTGCTACTGCCGAAGTCACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGATATAGACA ++ +CCCGGGGGGGGGCJJJJGJJ8JJJJJJJGJJ8JGJJJCGGJJJJGJGJJGGJJ=GGGJG(GJ8JGJG8JJGGGGGGG8GGCCGGCGCGGGG=GG(GGGGGJCCCGG=GGGCGGGGGG8GCGGC=GCGCGGGGGGGGGG8CGC=CCGCGGG +@gi|10009|ref|NC_09.1|-3/1 +GGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGTTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGC ++ +CC1C=GGGGGGGGCJJGJGJGJJGGGJJJJJJGJJGGJJCCGJ8GJJJJJCJGGCJJGGGCGGGGGGGJGG1GG8CCGGGGGGCG1G=GGGGGCCCGGGGCCCGGGC(C1G1GGCCGCCGG=CGCGGGGGCGGGGGCGGGGGGGGG=CGC +@gi|10009|ref|NC_09.1|-1/1 +TGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCATAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTGCTGCTACTGCCGAAGT ++ +CCCGGGGGGGGGGJ=GJGJGJJJJJJ8GJ1=GGJJGJ8JJJJJ1JJGGJJGGGGGGJGGGGGJGGGJJGGGCGCCCGGGG888GGGGGGGGCGG=CGGCCJGCGCGGC=GG=GGGGCCC=G=GGGGGGGCGCCCGGGGGGGCGG=GCCGC +@gi|10010|ref|NC_10.1|-9/1 +CTTGAAAAGTTGTAACCAAACGTACGAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCTTAGCCAATATGACTTA ++ +CCCGGGGGGGGG8JJGJJGJJJCJJ(CGGJJJGJ8JJGCGGJGGG=GJJJJGGJJJJJCJGGGGCGC(GCGG8GGGG=CGGGG8GGGCC==GGGCGGGGCJ=GGGGCCGGGCCGCG=C8GCCGGGCGCGGGCGC=GGCGGGGGGG=G1CG +@gi|10010|ref|NC_10.1|-7/1 +ATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGTTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCC ++ +CCCGGGGCCGGGGJGJJGGJGJJJ1=CJGJJJJJGCG8GGJJJJJJJJCJGGJJCJJGGJGCGGGJGJJGCG=GGJGCGGCGG=GCGCGGC8GGGCGGCGJC8G=GGGGCGCCGCGGCGGG=1=GGCCCGGGGGGGCGGCG1CGGGC=GC +@gi|10010|ref|NC_10.1|-5/1 +CCCTTGGGTTAAGTCATATTGGCTAAGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTGGTTACAA ++ +CCC1GGGGGGGGGGJGJGJG=GJGJJJJG=JJJJJC8JJJJJJGGGGJJC1JGGJJJGGGGGCCJJGJGCGGCCGGGJCCCCGGGGCGCGGGCGC=GGGGJCCGGGGGGGGGGGGGGG=GGGGGGGGGGGGCCG=CGGGG=GCGCGGGGC +@gi|10010|ref|NC_10.1|-3/1 +CAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTGTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATT ++ +=CCGGGGG1G=GGJJGGJJJCGJJJJGJJCCCGJJGJGJJGJ=JJJJJGJJGJGJG=C=8CC=GGGGCGGGGGGG=GCGGGGCG=C=CC=CGCCGGGGGGJCCCCGCGGCGGG8==CCCGCGGGGGCG1GGGGCCGGGGGGGCGGCGGGG +@gi|10010|ref|NC_10.1|-1/1 +TAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTGGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGG ++ +CC=CGC=GGGGGGGGJJJJCGJJCJGJJCJJJJJCGJCCJ=JCGJGJGJGGC=GCJGGJGGCGJGG=GGC8GGC8GGGGC=GCCGGGG=G=GGGGGCGG=J(GGCGGGGGGGGG8CCGGGGGGCCCGGGGGGGCGGCGG=GCGC=8CGGG +@gi|10011|ref|NC_11.1|-9/1 +AATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGGGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCATTGCAGATTACTC ++ +CC=GGGGCGGCGGJ1JJGJGJJJJGJJG1JGJGJJJGGJ8JJJCGGGCJCGGJ=CCJGCGCGGGJC=CGJCCGGCCCGGGGGCGGGGG8GCCGGGGGGGCJGCCGGGG=GGGGGGGC=GC1GGGGGGCCGGGCGGGCGGCG88CGGG=GC +@gi|10011|ref|NC_11.1|-7/1 +AAGTCAAATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCCAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGA ++ +CCC1GGGGGGGGGJJ1JCGJJCJJJJGJJJJGGGGGJJJJGCG=JG=JGGJJGGGCCJJGGGJGGCGGCGGCGGGCJGCCGGCCCGGGGC=GG8CGCGGGJCGCC1GCGGGCGGGGCG=G=CGGCGCGGCGCGGCGGGGGCCCGGGGCCG +@gi|10011|ref|NC_11.1|-5/1 +CAAACAATGCACTTCCGGCGGCTATTCGAGGGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGG ++ +CCCCCCGCGGGGGJJJJJJJJJJJJJGJJJ=JJGJCGGGJJGJGJJJJGGGJCJGGCGG1=GG8J=CCCGGJGGGG1CCCGGCGCGCGCGGCCGC=GGCCJG=GGGCGGGGG=GCCGGGGCGGGG=GGGGGCGGGCCGG=GCGG=GGGGC +@gi|10011|ref|NC_11.1|-3/1 +GTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGT ++ +CCCGGGGGGGG=GJCJJJGJJJJCJJJJJJJJGGJGJGJGJGGJJGGJJGGJJJJGGJGJCGGGGCGGGGG8GGGC=GGGGCGGGG8=CGGGGGGGGG=G1CGGGGGGGGC1GGGGCGGG8GG8=GGGCGGCGGGGCGCGGGGGGCCGGG +@gi|10011|ref|NC_11.1|-1/1 +GGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGGGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAACGGTCACTCTTGTGTCTATCTCGCATTCA ++ +CCCGGGGGGGGGGGJGJJJJJGGJG8JJGJJJJJGJ8JJJGJJJJGJJCGGCJGGJGGGJGGGGCGGGGCCGJGCGGJG8G=CCGGCC=GGGCC=GGGGCJGGGGGGGGGGCGGG1GGCGC(GGCGGGGG=CGG=CGGGGCC8GGGGCCG +@gi|10012|ref|NC_12.1|-9/1 +CGGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGTCTATCTCGCATTA ++ +CC=GCGG=GGGGGJGJJGJJJJJ8G=JJJGJJJ=JJJJJGJGJJ=JJGGJJJJJJCJGJGJJ=CJGGGGCGCGGGGGGGGCGGCG=GGGCGGCGGCGCGGJC=GGGCCCCGGGCGGC8CGGGGGG=GCGCCGGGGGCCGGCG=CGGG8GC +@gi|10012|ref|NC_12.1|-7/1 +CAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTTAATGCGAGATAGACACCAGAGTCACCCTCTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTCTC ++ +CC8GGGGGGGGGGJJJJJJJ1GJJJJJCJJ1GGCJJJJCGJJCJJJGGJJGGCGGCCGCGCCJGGGCGGG=GCCGG=GCGGGCGGGGCGCG=GGGGGGGGJC=CGG1GCCGG1GGGCG1CGGCCCGGCCGGC1GCGGCGGGGGGGCGG8G +@gi|10012|ref|NC_12.1|-5/1 +ATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTA ++ +C=1=1GGGGGGGCGJJCJJGGGJJJJCJJJJJJJJJJJJJCJJCGJ(GGJGJJJJJJ8GGGCCCC8GGGCGGCGGCGGGGCGCCGC8GCGCGGG(GGGGGJGC8G=GGGGG=GGCGCCGGGGCGCCCG=GGGG1GGCCGGCCGCCC8CGC +@gi|10012|ref|NC_12.1|-3/1 +GTAGTAGGCGTAGATTTGTCTGCGAACAGCCACAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAA ++ +CCCGCGGGGGGGGJJJGCGJJJJJJJ=JJJCJJJJJGJJJGJCJJCGJGJGGGGGGJGJJ=CGJGCGJ1GGG8J=G8GCCCGGGGCCGGCG(CGGGGGGGJCGGG=CGGGGGCGCCGGCGGGGGCGGG=GGGCGGGGGCCGCGGGC=GGC +@gi|10012|ref|NC_12.1|-1/1 +ATCTCGCATTAAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCCGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGT ++ +CCCGGG=GGGGGGCJJJGJJCGJJJJJJJJJGJJGGJGCGGJGJ8GGGJGGJGJGJGGGJCJ=GGCGGGJJGGCGGGGCCCG1GCGGGG1CGGGGGGCCCCCGGGGG=CCGGGGGGGGGGGGGGC=CGGGGGGGGC=GCCGGGCCGCGCG +@gi|10013|ref|NC_13.1|-9/1 +CGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAG ++ +CCC8CGGGGGGGGJJGCGGJJGJJGGJJJJG(J=CJJJJJGGGGGCJGGJJJ=GGCGJCCGGCGGCCJCGGGCGJGGGGCCGGGGGGCCGGGCGCGGCG(==CCGGGGGGGC=GGGCCCG8GGCCGGGGGGGGGCCGGCGCCCC=GCC=C +@gi|10013|ref|NC_13.1|-7/1 +GACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGTTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGC ++ +CCCGGGGGGGGGGGJJJJ1GJ8JJGJJJJJJG=CGGJJJCJJGGCJ=JJ=CC=J8GGJC8GGGGGGGCGCGJCGCCGC=CCCCCGGGC=GGGGGGGCCGGJGCGGG=CGCG=CGG=CCGCGGGGGGGGGGCCGGCCCCGCC=CGGGGCG1 +@gi|10013|ref|NC_13.1|-5/1 +AAGTAAAGGGTGACTCTGGTGTCAATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACAT ++ +C1CCGGGGGGGGGGJ(GJJJG=GC8JCJJJJGJGJGJJGJGJGGCCGGGGJJG=JGJJJJGJ(G8GGJC(GGJGG=JCGGGCCGCGC=GCGGCGGCCGCGJGGCGGGGCGGG8CGC=CCGGCCGGCGCGGGGGGC=CGGGGGCGG=GGCC +@gi|10013|ref|NC_13.1|-3/1 +GCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATTGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTC ++ +=CCGGGGGGGGG=GCJJJJ1JJGGJCGJGJJJGJJJJJGGGGJGJCJGJGGGGJJJJJG=JJGJJCGG18GCCGGGGG=GGGCCCGG1GC=G8GGGGCCGJ=1GGGCG8GGGGGCCGGCGCCCCGG=G8CCGGCGCGC=GGGGGCGG8GG +@gi|10013|ref|NC_13.1|-1/1 +CAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGTTACGTTTTGTT ++ +=C=GGGGGGCG=GJJJGJ8JJGJJJJCGJJJJJJJGGJJGJGGJJJJCGJJJJGCJJCJGJGCGGG=GCGJGCCGCGCC=GGGGGGGC=CGGGGGGGGCGJCCGC=GCC8CCGG1CGGGGGCGG=C=GGCGCGGGGGGGCGCGG=GGGGC +@gi|10014|ref|NC_14.1|-9/1 +ATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTCTTGTAGTA ++ +CCCGG=GGCGGGGJJJJG8JJJJCJ8JJGJJGJCGJCGGGJ8CGJJJJJJJGGGG=GCGGJ=GGCJCCCGGCGGGGCCC1C=CGCGGGG1GG=GGCGGGGJGGGCGGGGGGGGGC=GGGCGGGGCGGGGCGCCGCGGGGGCCGG=G=GGG +@gi|10014|ref|NC_14.1|-7/1 +TACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGCCGCGTGAGTTGGTACGTTATGTTACAACTTATCAAGAGTAAGCAAGTATGTTTCGCCCAATTCA ++ +CCCGGGGGGGGGGJJJ=GJGGGGJGJGJGJGJJGGGJ1JGGJJCJJJJJGJ=JCGJJGGJCJGJ1G=GGJCJGJ=GGGGGGGGCG(CCGGCGGGGGGGGCJC=GGCCCCGGGCCGGC==GGCGCGGGGCCGGGGGCGGGGGCGGGGGGGG +@gi|10014|ref|NC_14.1|-5/1 +TCCCCAAAAAATCCCCGGGGGAATATTTCGACCACACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCGGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGC ++ +CCCGGGGGGGGGGJGJJJCJJJJGJJGCCJGGJJJJJGJGJC=JJGGJCGGJC8JGJJG=GGGCGJJJCGCGGCGGGCGGGGCGGCCG=GGGGCGGCGGCJ==GCGCGGGGGGGCGGGG=GGGGGGGGGGC=GCGCGG1GGGGGGGGGCC +@gi|10014|ref|NC_14.1|-3/1 +ACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAATTGGTACGTTATGTTACAACTTATCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTCTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATG ++ +CCC=GCGGGGGG8JJJJJJJGJJJCJJGJJ1GJJGJJJJJJJCJJCJJJCC=(JJ=JGGJGGCGJGCGGGGCGG8=GGGGGGGGGCGGGCGGGG=GCCCGJC=C8CCGGG8GGG=GGGGGG=1GG==CCC==GCCGCGGGGGGCGGGGCC +@gi|10014|ref|NC_14.1|-1/1 +GCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACT ++ +C1CGGGGGGGGGGJJGJ=JJG=JJGJJJGJJ1JJGJJCJGJGJJJGJGG=GGCJGGJCCJJJ(GGG=GGGGGGCGGGGGGGCGGGC=GCCGCCGGGGGCCJGGC=GGGGGCGGCCGCCCGGGGGCGGGC1CGGGCGC=GCGGGGG8C=CC +@gi|10015|ref|NC_15.1|-9/1 +GTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAG ++ +CCCGGGGGGGGGGGCJGCJJJJJJGCJGJGGJGGJJJCJCJGGGGGJGGJJJGJJJJGJGJCGCGCCJGGCGGG==G=GGGGGCCGG=CGGCGGCCGGCCCC=GGCGCCGCGG=C=G=CGGG=GCGGCGGGGCGGGGGG8CCGCC=CGCC +@gi|10015|ref|NC_15.1|-7/1 +ATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCCCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCATTACTT ++ +CCCGGGGGCGGGGCJJJCJCCGJJGJJJJGGGGGGJJJJJJGGGGJGGJJGJCGGGGGJ1GGGJGGGJGCG(GGC8CGCGGGGGGGCCCCCGGGGGCGGGJCGGGGGCGGGGCGGCCGGGGGCCCCCGGCCGGGGGGGGGCGGGCGGGCC +@gi|10015|ref|NC_15.1|-5/1 +GACCATACTACAAGAATACCAACCCATTAAATTTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGC ++ +CCCGGGGGGGGGGGGJGJJGJJJJJGCJGJJJGJCJJJJGGGJJGJJJCGJ8JJJGJJJ(GJGGGGGCGCGJGGGGGGGGG=GGGGCGGCGGGCCC=GGGCGGGGCGCGG=GGGGGGGCGG1GGCGCCGG=GCGGGGGCGGCCGCGCGGG +@gi|10015|ref|NC_15.1|-3/1 +GATGACCATACTACAAGAATACCAACCCATTAAATTTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTT ++ +8CCGGGGGGCGCGJJGJJJJJGJCJGGJJJGJGGGJJJ1JJJJGGGJCCG8JJGJCJCGGJJJCCC=G8=GJJGCCGGGGCGG1GGCGGGGGGGGGGGGG=CG8GCGG=GCGGCGCGGGGGGGCGGCGGGCGCCGGGGCGGGGCGCGGGC +@gi|10015|ref|NC_15.1|-1/1 +TAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGAAATTTCATGGGT ++ +CCCG=GGGGGCCGCJJJCGJCJJJJJJGGJJGGJJGJJJGGJGGJJCJJGCJGGGG8=JGJGGJG=CGGGJGGCCCGGGGGGGCGGGGGGGGGCGGCGGGJCGGGCCGGGCGGGGCGGGGGGCGGGG8GCGCGGCG8C1GCCC1G(CCG8 +@gi|10016|ref|NC_16.1|-9/1 +TGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACC ++ +=CCCCGGGCGGGGJGJCGJJJJJJJJJJJJJJGGJJJJGJJJJGGJJGCJCJJJJJCJ=JGGG1GGGCJGGJGGCGCCCGGCGG=CGCCG=G(GGGGCGGJGGCCC8GCCCCGGGGGCG=GGGGGGGG=GGGCCCGGCCGGCG=GGGG=C +@gi|10016|ref|NC_16.1|-7/1 +CTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCACTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCT ++ +CCCG8GGGGGGGGJJJJJJJGGGJ8JGJGJJGJJJJJGJJJGGJJJCGGJGJGG8G=GGGCCGJGCGCGCGCGJGCGGGCGCGCGGGGC8CGCGCGGG=GJGGCGGGGGGGGGGGGC8GG=GGCGG=GGC1CGGGGGGCCGCGGGCCGCG +@gi|10016|ref|NC_16.1|-5/1 +CCAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACCGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCA ++ +CCC1GGGCGGCGGGGJJGJJJJJJJJJCJJCGJJGJJJJGJ(JJCJGJJCJG(G=JJJJGGJJ=GCJGCJGCJGC8=GG=GGGGCGGCGCGCCGCG=CGCCCG1GG=GG1GCGGCCCGGGGCG==GG=CGGCGGCGGCCCC=GCGGCGCG +@gi|10016|ref|NC_16.1|-3/1 +GCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGTGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCC ++ +CCCGGG=GGGGGGJJGJJJJGJ1JJJJGJ=GJJJCGJGGJJJJJGCCJJGJJCJJGGJJ=JJGGJJGGJCGJJCGCCJCGG=GGGG=GCGG=GGGGCGCCJG=GGG1CGCCGC1GCGGGGGG8GGGGGGGCCGCGGCGGC=GGCGGGGGC +@gi|10016|ref|NC_16.1|-1/1 +CAAACAATGCACTTCCGGGGGCCTTTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGG ++ +CCCGGGGGGGGCGJCJJCGGJJJJJJJJCJJ1JJJGJJJGJJGJGGJ=GJGJJJGGGCJGJGGGJGGG1G1GCCCGGGG=GG=CGGGCGCGGGCGGGGGGJCGCGCGGG=GGGGGGC(GGCCGGCCGGGCCGGCGGGCGCCGGCCCGCCC +@gi|10017|ref|NC_17.1|-9/1 +CTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAA ++ +CCCGGGGGGGGGGCCJJGJJJGJJGGJJGCJJGGGJJGGJJGJGGJCG==JJGGGGGCCJGJGGCJCJCGGGGGCCGCGGGG=GGGCGGGGGC8GGGGG=C=GCG=G1G1GGGGGG=G=GGGGGGCCGGGGCGCCGGGGGGGGGGCGGC= +@gi|10017|ref|NC_17.1|-7/1 +CCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAATTGTAACAAAACGTACCAACTCACGCGA ++ +CCC=GGGG(GG1G1JJ8GJJGJJJGJGGGJGGJGGJJCJJJJGJJJGJ1GGG=1CGJGGCCJGJJGGGGGGGGCCCCGCCGGGGGGCG=GGCGGGGGGG=CCCGGGGCCCCGGGGCGGGGG=G=GGGCGCGGGGCGGCGGCCG1CCCGCG +@gi|10017|ref|NC_17.1|-5/1 +AGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTTATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATGGGATGCGAGATAGAAACCAGAGTCACCCTTTA ++ +CCCCGGGGGGGGG=JJJGJJGCJJJJGJCJJJCJJ(JGGJJCCJCJGJJ(G=CCJGG(GGJJCJGJJGCGGGG1CGGGCGGGGGGGC(GGGGGGG8GCGGJCC=CGG=GGG=CGGGCCGGGCGCGGCGCGCGGGGCGGCGCGGGGGCGGG +@gi|10017|ref|NC_17.1|-3/1 +GATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAATTGTAACAAAACGTACCAACTCACGCGACA ++ +CCCGGGGGGCGGGGCJGGGJGGJCJJGJ8JJJGJGGJJJGJJGGCGGGGG=JJGJGGCCGG1CJGCCG(GGGGGCGCGCGGGGGGC1GGCG=CG=GCC=CJCGGGCGGGGCGCGCC8CGGCCGGCGCGGCGGGGGGGG8CGCG=CGCCCG +@gi|10017|ref|NC_17.1|-1/1 +AAATGTCTACATTCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGA ++ +CC=GGGGGCGGGGGJJGGJJGJG1JJJJJJJGGJ=JGGJGCGGJGJCJGJGGGGGCGJJJGJ=GG=GJGCCGGG=GGCGGGGCCCGCGGCG8=GGGGCG1JGGGGGG8GCGG=GGGGCGCGCCGGGCGCGGGGGGCGCCCGCGG=GGG=G +@gi|10018|ref|NC_18.1|-9/1 +ATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAATTAAATCGAGTGTTAGCCGC ++ +C=CGGGGGGGGGGJGJJJJJJJJJ=JGJGJ8=JJJ=JGJGJCCGJGJJJJGGGC1GJGJCJJCGGGGCG=GCCGGGCGGJCCGGCGGCGGGGGGGCCCGGC1CCGG=GCGGGGCG=GGCGCGCGCGGC=GCCGGGCCG8GGGGCCGC8GG +@gi|10018|ref|NC_18.1|-7/1 +CGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTGGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGG ++ +CCCCGGGGGGGGGJJJJJGJGCJJJJJJJJGJGJJJCJGJGJCJJJJG==GGCGCGGJ8CGGJGGJGGGGJJG=G1GGCGGGGGCCGGGGGCGGGCGGGGCGCGGCGG=CCGG=GGCGC81GCGGCCCGGG=GGGGCGGCG1CG=CCCGG +@gi|10018|ref|NC_18.1|-5/1 +TCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTC ++ +CCCGGGGGGGGGGJJGJJCGJJJJJJJGJJJG8GJJJGJJJJGCGJGJJGJJJJJGGGGJC=CCGJG1CGGG=GGCGC8CG1CGGGGGCGCGGGGGGCGGJC=GCG=81CCGCG=GCGGGCGGG=GGGGGGC===GGCGGCGGGCGGGGG +@gi|10018|ref|NC_18.1|-3/1 +ATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTGGCAAATTAAATCGAGTGTTAGCCGCATCTAGACATTTAATGAGTTGGTTTTCTTGTAGTA ++ +CC=GGGGGG8CGGJJJJ1JJJJJGGJJJG8J=JJJJCGJJGJJGGGJJGJJ=CGGGJJJJJGJG=GGCJCGCJCCCG=CGG=GGGGGCGCCGGCCGGGGGJCGGGCGGCGC==G8GGGGGGCG=GGGCGG=C8GGG=GCGGGGGCGCCGC +@gi|10018|ref|NC_18.1|-1/1 +GCTAACACTCGATTTAATTTGCCAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCC ++ +C1CGGGGGGGG1G1JJJ=GJJJGGJG(GJJJJJGJGJGJGGCGJCJJJJJGGGJJJJJCGGJ=CJGGG(8GGC8CCGGGGGG=8CGGCG8GGGGCGGGGGJCGGGC=CG8GGGGGGGGCCGG=CGCCCGGGGCGGGGGCGGGGCGC(GG= +@gi|10019|ref|NC_19.1|-9/1 +CAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAATGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCGGCATTGGATGCGAGATA ++ +CCCGGGGGGGGGCJGGGJGJJCJJJJJJJJJJJJCGJJJJGJGGGGGGGJ1CJGJGGGGJC=G8GGGCGC=GGCGJGCCGGC8GGGCCGGGGGGGGCGCGCCCGGCGCGCCGGCCGCCGGGGCCCG=GGCCGGGGCGGGGGG=G1CGCGC +@gi|10019|ref|NC_19.1|-7/1 +CAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATT ++ +C=CGGGGGGGGGGCJJJJJJGJJGJJJGJJJJJ(JJJGGJJGJGGJJGJG8GJJG=JGGGCGG8GJJCC=G8GGGJCCG=GGCCCGCG=GGCCGCGG=8GCGGGGCGGGGGCGGCGGGGGCGGGG=CGCCCCCCGGGGC1GGCGGGGCC= +@gi|10019|ref|NC_19.1|-5/1 +CGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCCGATTACTCCGATGA ++ +CCCGGGCGGGGGGJJJJGJJJJGGJJJ1JJG=JCJJCJGJGGGCJJ8GGJGGGJJ=GG=JCGJJGGGGGGGGCGJ=GGCGGGGGGC8GG=GCGG=GCGGGJGCGGGGCGCGCGG=GGCGGCCGGGGGGGGG8G=GCGGCGGCGGG8CCCG +@gi|10019|ref|NC_19.1|-3/1 +CAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCCGATTACTCCGATGAACGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCATTTCG ++ +CCCGGGGGGC1GGGGJGJJG=JJGJGJJJGCJJJJJJCJJGJGGJJJ=CGGCCJGGCJGJCGGCJGGJG=GGGGGGCGGG1GGCGGGGG(GGCC=CGGC=JCGCCGGGCCGCGCGG1CCGCGGGCGGGGGG8CGCGC8GCCGG88GG==G +@gi|10019|ref|NC_19.1|-1/1 +TTGTCTCCGAACAGCCCCAGCCCCCCCGTTTAGGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATAAAATGTCTACATGCGGCTAACACTGGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTG ++ +CCC=CGGGGGGGGJGJJJG8CJGGJJGGJJGJJ=JJGGGJJJJJCJGJGJJGGGJCGJGGGGC=GJJJCCGGG=GJ==GGGG(=CGCGGGGGCGGGCGGGJ=CGCGGC(GGGGGGGGGGGGGGGCGGGGGCGGGCGGGCGGC=GGGGGGG +@gi|10020|ref|NC_20.1|-9/1 +TATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATAATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACT ++ +CCCGCGGGGGGGG=JJJJGJGJJ=JGJCJJJJGGJGJCJJJ8GG=GJJCJGGCJGCCGGGCCGGGJG1JGGGG=GGCC=GCGGGG=GGGGCG1GGGCGGCJCGCCGGGGG1GCGCGGGCCGCGGGGC=GGCGG8C=GGGCGGCGGGGCCG +@gi|10020|ref|NC_20.1|-7/1 +ACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCA ++ +CCC=GGGGGGGGGJJJJJCGGGGCJJGJJGGJJJJJJJJJJGJJCJGJGGGGJGGGCJGGJGGGGGGCGC=JGGGGCCGGGGGGCGCCCGGCCGGGGC8CJ=CGGGGGC1GCGCGGG===G=C=G=GGGGGG=GGGGGGCGCCCGCGCC= +@gi|10020|ref|NC_20.1|-5/1 +GGTAGTAGGCGTAGATTTGTCTCCGAACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAGTACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAA ++ +CCCGGGGGCGGGGJJGJJJJJJGJG=JGJJJJJGJGGJJGJCJCJGGGJJJGJC=JCJ1GGJGJ8=CCGCG=CCCGGGJGCGG==GGCGGGGGCGGC8GGCC1CGCCCG=G=8CC=G1CGGCCGGCGGCGGGGGGGGGGG1CG=CGCCGC +@gi|10020|ref|NC_20.1|-3/1 +ACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACGGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAG ++ +CCCGGGGGGG1CGCJJJGJJJJGJJJGGJJJGGCJ8GGJGCJGJ1JJJ=GGJJJJGCCGJJGCGGCGGGC1GCCCCCGGCCG=GCG=GGGCGGGCCGG8GJ8GGCGGGGGGGCGGGGG1GCCC8GGCCGCCGGGCGGCC=GGGCGCGCGG +@gi|10020|ref|NC_20.1|-1/1 +TACAAGAGTACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACGGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCA ++ +CCCGGGGGGGGGGJJJJJJJGJJJ=GJJ8J=JJJGJGGJJGGJGJJCJGCGJJGCJGGGGCGJJ=CGCCCJGGGGCGGG1GGGGGGGCG=GGGGGGG=GGCCGGCCGGCGGCCG=GGG8GGGGCCGGG1GGC=GGCGGG8GGCGCCGGCG +@gi|10001|ref|NC_01.1|-9/2 +CCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATACAGA ++ +=C=GGG=GGGGGGJJJJJJGJJJJGJJJJJJJ==JJ1JJJGJJGGCCJ=JCGGJGGG(JGGJCGJGG(G1GJGCCCCCCGCGGGCG=GG81=GCGG8=GG=CCJJJCC(GGC=GGC1GGG81G1CCGCCGGGGCGCGGGGCC=GG(GGGG +@gi|10001|ref|NC_01.1|-7/2 +GCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATTGATCCCTC ++ +CCCCCGGGGGGG(JGJCJJJJJJC(1JJGGGJCJJGJJJ1GGGGJJCJGJJGGGGGGGJGJCCJG==GG8GCCGG=CG8GGGCG8GCCCCGCGC=CCCG=C=CJJJJCGGC=GGGGCCGGGGGC=GCC=1CCCGGCGGGCCGCGCCCGGC +@gi|10001|ref|NC_01.1|-5/2 +AACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCG ++ +C8CGGGGGGGCCGG1JCJJJGGGCCJJJJGJJG=J1GGGJJJJJJJ1JJJGCCJCGGCGGGGJJGCJGGCGGGGGGGGGCGGG=GGCCG=GCGCCGGGG8C1C=JCJ1CCGCCGCGGGGGGCG=GGG=GGCCGGC8G1=CCGCGCG=8CG +@gi|10001|ref|NC_01.1|-3/2 +ACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCA ++ +=CCCCGGGGGGGGJGJJ8GJJ1JJJCJJJCJCGJGJGJGJJJJJGJC=JGGGGCGJG=JCCGGGCGJGGG8G8G=GGCGCGGGGCGGGGGGGC=CGGCG=GGJJCJ8GGG=CG1CGGG=1CGGGGG1G=GGGGCCGGCGGGG8GGC=C8C +@gi|10001|ref|NC_01.1|-1/2 +GTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTA ++ +CC=GGGCGGGCGGJJGJCJJGGJ1JCJJJJJGJJJJJCJGJJJ8JJGGJJ=C=GCGGGCGG(J(JJGGJCJG=GGGGJCCGCCGGGGGCGGGGGGGG8CGC=CC(JCGGGGGGCGGGCCGGCGGGGGG8GCCGGGGG8CGG=CGCGC=1C +@gi|10002|ref|NC_02.1|-9/2 +ACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGA ++ +CCCGGGGGGGGCG=JCJJGJJJJJ=JJJ1J1JJG8GJGJJGGJGGGGJJJGGGC8GJGGJCGCJGCJGGJ(GCGCG=GC1CGGGGGCG1GCCGGGGGGCCG==CJCJGGCGCGCGGGC=GGC=GG=CG8CG1G(CCC1GCCGGG=CGGGC +@gi|10002|ref|NC_02.1|-7/2 +GAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAAT ++ +CCCGGGGGGGGCC1JGJJJ=JJJGJGJCJGJJJ1JJJGJJGGJGGGG==JCJJJ8GJJJJCGGGGJGGCCGCGGGGJCG1=GGGGGGGGCGGGGG=GCGGCCCJJJJG1CGCCG=GGGGC==CGGGGGGGGCCG=CGGCG8=GCGGCCGG +@gi|10002|ref|NC_02.1|-5/2 +TGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAA ++ +CC1CGGGGGCGGGJGGJJJJCJJJGJJGJ=J=JJCJGJGGJGCJJG1J8GJC=GGC=GGGG(G1GJGJGJ=G=CGJGGGG8GG=GCCGGGGCGCCCC1CGGCJJJ1J1GGGGC=GGCGG8GCCCGGGGG1GGCCGGCGGGCGCGGGGGGC +@gi|10002|ref|NC_02.1|-3/2 +TTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAG ++ +CCCGGGGGGGGGG=J==JJJJ1GGCJJGJJJJGGGJGJGJCGJGGJGGCGGJGGJGCGGJCCGGGGJGCCCGGCGJGCG=GGGGCGCGGGCC=GGGCGCGGCJ=JJ=GGGGCCCGGG=CG1G==CGGGCGCG==GGG(CGGGGGCGGCGG +@gi|10002|ref|NC_02.1|-1/2 +GAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTT ++ +CCC=GGGGGGGGGJCJJCGJGJJGGGJJJJCGJJJGCJCGGJCJGCJCGJJGGGJJGJC=CGGJ=JCCGGG1JGGCGGGGCCGGGCG18G=C8GC=GGCCGGCJC8CGGGGGGCGGG=GGGGGG=C1GGCGG(=CCCG=GCGC8CGGG=G +@gi|10003|ref|NC_03.1|-9/2 +CCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATTGATCCCTCT ++ +CCCGGGGG=GGGGJG1JJCJJGJJJJJGJG1JGJCJG=GGJJ1CGGCGJGJJGJGGGJGGCGGGGG8GGCJGCJGGCGJCGGCGGGG1GGCGCGCG=CGGGC8JCJJGCG8CCCGGGGGGC=CCGG8GGGCGGGGC=GGCGGGGCGCCC= +@gi|10003|ref|NC_03.1|-7/2 +CATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCG ++ +C=CGCGGGGGGGGJGJJCG1JJJGGJGGGJGJJJG8GJJJJGC8=GJJGCJJJGCCGGGGCGGCCGCGCJ=GGCGGGGGG8CGG1GGG=C=CGGGG=GCCGCCJJCCG1GG=CCGCCGGCGGGGC8GCGGG=GGGCGCCG=1GCCGCGG1 +@gi|10003|ref|NC_03.1|-5/2 +CAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACGTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGT ++ +CCCCGGGGGGGGGCGJGJGJJJ1GJJJGGCJJJJCGJGJGGGGJGJJJCJGJJGJG=JGGJGGGCGGGGG8=GGG=JGG8CCGGGGCGGGCGCGGGCGC8=C=JJJJGCGGG=GGGCGGGGCCGGCC=GGCGGCGGCG=GGGGGCCGG=C +@gi|10003|ref|NC_03.1|-3/2 +CTTCCGGGGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACG ++ +CCCGGCGGGGGGGJGJJJJGJCJJGJJJGGCJGJGCJJJJGGGJGJGGJGGGGCJJ=JGC=GGC==GG=JGGGG=GGGJGGCGCGG8GGCGGGG=GGGGGG81JJCJGGG8GG1GGGGCGC8G8GGGCCGGGGGCGGGCGGCCCC1GCG8 +@gi|10003|ref|NC_03.1|-1/2 +CTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCAAATATGACTTA ++ +=CCGGGGGGGGGGJG1GGJGJJCGJGJJC1GCJJGGJCJJJJGGJGGJGGJCJCGJGJCGJGGGCJJGJGGGGJC=CGGG8G1G=GGGC1G=GGGGCGGCGCJCCCJGGGC1GCGGGGG1GGCCCGGGCG1CG1GGC=CCGGCGGCCGGC +@gi|10004|ref|NC_04.1|-9/2 +TTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGGTAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAG ++ +CCCGGGGGGGGG=JJJJJJJJGJJGJCGCJJJJJGJJGJJJ=GJJGJJGGGGGGCJJGJJGGJCG(GCGGGCGJ=CCGCGCGGGCGGGCCGG8GGGGGGCGCCCCJJGCGCCGG8GGGGCGGGGGGGCG=CGCGGGCGCCCCG=CGGCCG +@gi|10004|ref|NC_04.1|-7/2 +GGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAG ++ +CC8GGGGGGGGGGGJJJJJGJ=GCJJCJJCJJJJGGGJJJCJJJCJGJGJCJ8GJGG=J=CJG=GJCJGJGJJGG8CGGGGGGGCGGGGGGGGCGGCCGGC=J=JJCGCGGGGC=CGG=CGGCG==GCCGCGGCGCGGCCGGGG=G8GG= +@gi|10004|ref|NC_04.1|-5/2 +GTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTGGGTTAAGTCATATTGGCCACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAA ++ +8CCGGGGGGG=G1GGJJJJJCJJGCJJ1JJCJJJCJJJJJGJJGCJGJJCJJCG=CCJC=JG=GCC=GCGGGGC=GCCGGGGGG=GGCG=CGCGGGGGCGGGCCCJJGGGCCCGGCGGGCGGGCCGGGC=GGGCC=G=GGGGGGGGCCGG +@gi|10004|ref|NC_04.1|-3/2 +GAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCTGGGTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGT ++ +CC=GGGGGGCG1GCJ=GC1GJCGCJGGJCJJJJJJCGJGJJCJGCJGGJGJGGJJ1CJGCGGCGGJGGGGGCGCG8G8GGGCCGCGGCGGGC=GGGGCCC=GJJ=CJGGGGGGGGGGGCGGGGGGG=GGGG=CGGCGGCCCGC=GC8GGC +@gi|10004|ref|NC_04.1|-1/2 +TTGCTTAGTAATGAAAGTCACAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGT ++ +CC=GG=GGGGGGGGJJG=J1JGGJGCGJGGJGJJJGJJGJG=JJGJGJJ=JGGJ=GJGCGGCGG=GJ=J=GCCGCGCGC=GGGGGGCGGGGGGCGGGGGGGCJ=(8JCGGCCGGCCCGGCCGG1GGGGGGCGCCGCCCGGCGC1GCC=GC +@gi|10005|ref|NC_05.1|-9/2 +AGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACT ++ +CCCGGGGGCGGGGGJJJJGJJJJJGJJJJJJJJJJGCGJGJGJGJ=JGJJGGGJJJJJJGGGG=GCCGG=GJGGCCGGGJG1CCGC=GGGCCGGGG1GGGGCCCJJC8C=8==CCGGGGCCGGGGCGGGC=GGGG=1C=CCCGCCGCGCG +@gi|10005|ref|NC_05.1|-7/2 +TACACGCCCTCAATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAG ++ +C8CG=GGGGG=GGJGJ1JGJJCGJCJ8GG=JJCGJCJGGGJGJJCJGCGJJCJ=GJJGGJG8GGG8JJGCCCGG=GGG1=CGCCGGCGGGG=GCGGCGGG=(JJJJJGGGGGGG=GGCC1GG=C=1CCGGGGGGGGGCC=G8GCGGGGCG +@gi|10005|ref|NC_05.1|-5/2 +AGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAG ++ +CCC1CGGGGGCGGG1JGGJJJJJJGJJJGJJJGJJJJJJ=CJJJCJGGCJGJGCCJC8GGGGGJ8CGG=CGCCG=CGCGG=CCGGGGGGGGCCCCGGG1GGCJ=CJJGCCCCGGGCGCGGCGGG1GGGCGCCG1CGGGCC1GGGGCGGGC +@gi|10005|ref|NC_05.1|-3/2 +TTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGCCAATATGACTTAAACCAGGGG ++ +=CCGGGGGGGGGGJ1JJGJGJJJGJJJJGJGGJGJGGJJCJGJ1JJJJJJJGCGJ=CJGGC1GJGGCG=JGGCCGJGGGGCG=GGGGCGCGGCCGGGG=GGGC=JJ=GGGCGCGC=GGC=CG8C=GGGG1GGGGGGGGGGGGCGCGGGG= +@gi|10005|ref|NC_05.1|-1/2 +TGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATC ++ +1CCGGGGGCGGGGCJGJGJCJJJJG=JJ=JJCJJJJJJJJJGJJJJG1CJCGGGCJGJJGGGJG1G8JCGGG1GGGGGGGGGC=CC=1CGGGCGCCGCCGGGJJJCJ=8=GGG8GCGGCGGGGGCGCCCC==CGGGGCGGGGGCGCGCGC +@gi|10006|ref|NC_06.1|-9/2 +AAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTT ++ +CCCGCC=GGGGGGCGJJJGJJ1CJJJGJGGCJJJJJJJGJGCJJCJJCJJJGJ(GGJGGCJGCJCGGGGJGG=CGC8GCGCCCCCGGC=CG11GGCGGG1GCJJJJJGCCCGGGGGCG=GGGGGGC==GG=GCG1GGGCG=GCGG=CGGC +@gi|10006|ref|NC_06.1|-7/2 +AAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGGGTTGGTATTC ++ +CCCGGGGGGGGGGCJJGJJJJGJJJJGJJ=J=JG1CJGJGGJJCCJCGGGJ8JJJJCJGG1GGGGGG=CCGGCCGCJGGCCGCG=CGCCCGCGGG=GC=GGC8JJJJGGGCGGGG=GCGGGC=1=G=GCC(GG8CGGCC=GGGGGCGGGC +@gi|10006|ref|NC_06.1|-5/2 +CAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACGCTTTGT ++ +CCCGCCGGCGGGGJ=JJGGJJG=GJJJJJ1JGGJJGJCJJJJ=CJCJJG(J(CJG=GCGGCGGGCGGJGGGCGGGG1C=CCGCCGGC=G=GCGCGGGCGC=CJJJ=CGCCGG1=GGG=GGC=GGCCG(GCCGGGGCGGGCGGG(GCGCC= +@gi|10006|ref|NC_06.1|-3/2 +TCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACT ++ +CCCGGGGGGGGGGJJJGGJJGGGCJJJJGJJJJGJJJGJJCJGGGGJJGJGJJJJJCGGGJGJGGJ1GCGCGCGGGGGCJGGGCGG=G=G=GC8GGGGG=CGCJ=CCGGGGGGGGG=GGCGGCGGGGGCCGGGCCGGGGGCCGGGGGGGC +@gi|10006|ref|NC_06.1|-1/2 +GAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCACCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGT ++ +CCCGGCGGGGGGGJGGJJGGGGJJJJJJGJCJJGJJGJCJGJJJGC=C=GGGGJJCGGGGGJGG=8JGJCJGGGCG(CG8GGG=GCCCGGGGGCGGCGCCG=JJJJCG8G1GGGGGGGGGGCGGGGGGGG8GGGCGCGGCCGGGCGC=GG +@gi|10007|ref|NC_07.1|-9/2 +TACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCAC ++ +=CCCCGG1GGGGGJGJJJGGJGJ1JG1GJCJJGJCGGJGCCJCJGGGC8JGJ(GJGGJJGJGGCGGGGCJGCGGCGCGGCC88CCGGGGGCGCGGG===C(GJCJJ8GGGCGGCCCCG8GGCCGGGC=GCCGC1CGGCGG8CCGGG1GC8 +@gi|10007|ref|NC_07.1|-7/2 +TTTAAGTCATATTTGCTACGGTGACCCTACTACTAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCA ++ +CC1GGGGGGGGGCJJJ1JJGGJGGJCJ=GGJCJ(JJJGJ=GJJGJJ=JGJJJJ=CGCCGGGGGJGCGGGGGGCCCG8CGGCGGGGGGGG1GCG=CCCGGGC1JCCJCGCGGCGGC1CGG1GCCCCGGGC1GG1CGC=CGC=GGGCGGCGC +@gi|10007|ref|NC_07.1|-5/2 +TCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGT ++ +CCCGGGGGGGGGGGJJJGCJGCJJ1JJJJJJJCCCJGJGGGJGGCGJCGJ=JJJCJGJG=GCJGG8GGGJJJGCCGGG8CG=(GGCGCCCCGGCGGC=GGGCJJJJJGGGGGGGCGGGG===GGCGGGGCCGGCCG=G=GGGGCGGGGGG +@gi|10007|ref|NC_07.1|-3/2 +GCTAGAGTTTGTAGTAGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGGTGACCTTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGGAGTGA ++ +CCCGGGGGGGGGGJGJJJJJGJGJJCGGJGJJGGGJCGGJJ1JJGGGCJGJ=JJJJ=JC1C8GGG=CGGG==GJGCC=C(CG=CGGCGGCG(GGGGGGC=CCC1C1CG=GGCCGG=CGG=CG=GCGGGCGGGCCGGC1GCGGGG1CGCCG +@gi|10007|ref|NC_07.1|-1/2 +AGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTT ++ +CC=1GGGGGGGGGGGJGGJGJJJGJJGGJGGJGGJ=JCJJGJGG=GCGJJGGJ=JGJJJJG8CGGGGCJCJGCCGCGGGGGGGG=G=GGGGGGGGCG8GGCCJJJJJCCCGCCGGGCGCGGGGGGCGCCCGGGG=C8GCGC=GGGG81CC +@gi|10008|ref|NC_08.1|-9/2 +CAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCT ++ +=CCGGGGGGGGGGJGJJGCGJGJJJGJJCCGGJJJGJJJGGJGGGGCGJGJ=GGGGGJGG8GJGGGJGCGJGGCJCGCGGGCG8G8GC1GG8GGGGGGGGGGC=CCCGCGGG8GGGGCG1CG88GGGCGGGGCG1G=CGC=GCCGGGGCG +@gi|10008|ref|NC_08.1|-7/2 +GTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTG ++ +C=1GGGGGGGGGGJJJJJJGGCJJJC8GJJJJJJJCGJJJJJJGJJJGGGGJGJG==GG==GGGGCGGJGCG=CGGCG8GCGCGGGGGGCGGGCC8G(GGCGJJCJCGCCCGGGCCCGGGCGCGGGGCGGGGGGCGCGCGGGGGGCGCCG +@gi|10008|ref|NC_08.1|-5/2 +TCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGC ++ +CCCGGGGCGGGCGGJ1JJJ=GGJJJJ8CJJJGJJJCGGGGJJGJ8CJJCJ(G8J1JG8JCGGG=CCCGGGGC=G(=JG8=G8GGGGCCG=GCG=CGCCCGGG=JJ=JGGGGCGCGGGG=C=GGGGGGGGGGGCGC1GCGG=C81GGGCGG +@gi|10008|ref|NC_08.1|-3/2 +TCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTC ++ +CCCGGGGGCCGGGGGCGJGGJJJJJJJCGJCJJJGJ8JGCJJJGGJGGJJJJCCGCCCCGJGCGCGJC=CGGCG=GGGGC=GGCGCG=GGGG=GGC1GG=8GJCJ8JCGGGGCGGCGCGC1GG=CGCCCGGCGGGCGCGGGCGCGGGGG8 +@gi|10008|ref|NC_08.1|-1/2 +CCAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGT ++ +CCCGGCGGG1GCGJJGJJGJJGG1JJ=CJJJGJJJGJGJJGGJCJGGGCGGJJGJCGJGJGC=GGGGGGCC8GC=GGG1G1CGGGG1CCCCGGCCCGGGG8GCJCJCG=CG=G=GGGGGGCGGGCG1GGGCGGGCGCGGGGCGGCGCGC= +@gi|10009|ref|NC_09.1|-9/2 +GAAGTGACACTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGA ++ +CCCGCGGGG(=GGGGJGGJJJJJJJG(JJJJJJJJJJCCGCG=GJGJJGJC=G8JJCJJ=JGGCGGGCGGGG1GGGGGCGGGGGGCGGCGGG=GCGGGGCGCJJJJCGGGG=GGGGGGC8CG1CCCG==CCGGCGC8GGGCGGCCGGCC1 +@gi|10009|ref|NC_09.1|-7/2 +TAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTAATGAGT ++ +CCCGCGGGGGGGGJGJG=GGJGGGJJGJGJJJGCGJJGJGJGJJGJ=GJGJJG88G1GJGJJ=CJGGGGG(GGGGCGJGGCGCGGCCGG1GGGGGGCGG18CCJCC8GCGCG=GCG=G1GGGGCGG88CG8CCG=CGC=GGGCGCGGCCC +@gi|10009|ref|NC_09.1|-5/2 +CGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGG ++ +CCCGGGGGCGCGGJJJJJGJCJJG8J=GJJGJGJJJJGGJJGGGGJJJJCGCJCGJ=GCJGGGG1GGGGGGCJJGCJC=GGGGCGCCGGGGGG=GGG8GCG=CJ8J1GC=GGGG1=CCGGGCGGCCGGCCC=CGGGGGCGGCCGCCGCGC +@gi|10009|ref|NC_09.1|-3/2 +CCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGAACCATAGATCCCTCT ++ +CCC1GGGGG=GGGGGJJJGG=GJJCGGGJGJGJCGJJJJGJC=JGG=CJGG8JGGJGJJCJGGGGGGGGGGCGJGGCGGCCCCGGGGGGGGGCGGGG8GGGC=JCJJGGGGG=CGGCGCGCGGGGCGCCGCGCCGCCCGGGGG1CGGCC= +@gi|10009|ref|NC_09.1|-1/2 +AGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGGAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAAC ++ +CCCGGGGGGGGG=JJJJJGJJGJJJJCJCJGJJJGJGJJGGGJGJJCJGGG=J8GGJJGJGJG=GGJJ8GCCC8GJGGGCC(C188GGG=GCGCGGGCGGCCJCJJCGC8GG8CG=C1G1=GCGGGGGC1CCGCGCGGG=1GG=G1CG8= +@gi|10010|ref|NC_10.1|-9/2 +AGGCGTAGATTTGTCTGCGAACAGCCCCAGCCCCTTGGGTTAAGTCATATTGGCTAAGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAATGAAAGTCCCAAAGGCTA ++ +CCCGGGGGGGGGGJGJ=JJJGJC1G1CJ1JJGCJJJJGGJJCJJJJGJJGGJJJJ=G==GGJ=GGJGGCGJGGGGGCCG1G=JCGGGC=G1GC=CGGG8=C1JJJJJGG(GC=GGGGGC1GCGGCGCCCGCGG1C=GG1=GGCGGGGGGG +@gi|10010|ref|NC_10.1|-7/2 +CCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTGTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCT ++ +1CC=G1GGGGGCCJJCJGJJG1JJJGCJJJCJCGJJGJCJJCJJJJJCJGC=G8JJGJJJG1GJGJGGGGGCGCJCGGGG=GGCGGC8CCGGCCGG==GGG(CJCCCCGGGGCGGGCGGG8GCCCCGCCGCGGCGGGG=CG1C8GGGGGG +@gi|10010|ref|NC_10.1|-5/2 +GTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACCAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTAGCCGCATGTAG ++ +CCCGGGGGCGGGGJGJJJGJGGJJGCGJJJCGJJJJJJJJGCJCJJJGJ8JCGJJGJCJGJ8JGGG=GG8CJCG=GGGCGGCGCGGGGGGCGGC==GCGGCCCCCJC(GGGGG=GGGCG=GGGG1GGGG1GGG1G1GG8CGGGCGGCCGG +@gi|10010|ref|NC_10.1|-3/2 +TATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATGCA ++ +CCCGGGGGGGGGGJGJJGCGJJJGJGJGC=CGJGGJGJGJJGJ(1CJGGJGGGGGJG==JCC1JGCG(C(GGGGGGGCG=GCCGGGCC8G=CG==CCGCGG(CCJJJC=GC=GGGGGGGGCGGG1GCCGGGGG81CGGCGCGG8CGGGCC +@gi|10010|ref|NC_10.1|-1/2 +GCAGATTACTCCGATGAGCGACACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACCAAACGTACCA ++ +CCCGGGCGGGGGGGJCG1JJCJJCJ=CCJCJGJGCJJCCJ1JJG=JGGJGCJ=CJJCJ8G(JJG=88GCCCCGCGJGGCGCGCCGCCGCCGCCGGCGG=G1C1JJJJGGCCGGG=GGGGGGC=GGGCCGCCGGCGCGGGCGGGGG=CCGG +@gi|10011|ref|NC_11.1|-9/2 +TGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCAATGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCA ++ +CCCGGGGG=GGGGJGJGGGJJJJJJJJJJJJCJJJJJGJJGJGJGGGJJJJGGGJGCJJ8GGCGG8GGG==G=CGGGCCGGGGCGGGGGGGGCGCGGG=CGC=JJJCCGGGGGG=CCGGGGGGGGGCGGG(CG=GCGCGGGCCGGCG8CG +@gi|10011|ref|NC_11.1|-7/2 +TCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCATTACTAAGCAAATTAAATCGAGTGTTGGCCGCATGTAGACATTT ++ +CCCGGG1GGGGGGCJ=JGJJCGJGJJJJJJ8JGJGJJGJ=JGGJJJJGCGJCGGGGCC1=C8JGJ==GCJGCGGG8GCCGGC1CGGGGGG=GGGCG=CCGCGJJJCJCGCCCCCG=GC8CC=G8=CGCGGGGGCGCCCGGGG=GCGGGGG +@gi|10011|ref|NC_11.1|-5/2 +ACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCCTTGTTTCGCTCATCGGAGTAATTTGCAATGAATGCGAGATAGACACAAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTG ++ +8=CGGGGGGGGGGJJGJJGCJJGG=JJJJJGJJJJGJJGG=C(JJGJCGCC8JGGJ(CGJJJJGGGCJCGGGGGJCGJC=CG=G=GG=GGGGCGCGGGGGGG=JC=JC==CCGGCGG=GGGGGGGCC=GGGGGG=C8GG=GGCGCGCCG1 +@gi|10011|ref|NC_11.1|-3/2 +CTCAATCACTCAGAAGTAAAGGGTGACTCTTGTGTCTATCTCGCATTCATTGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAA ++ +CCCGGGGGGGGGGJJJJJJJJJGGJJJJJGJC=JGJJGGJJJJGGCJJJGJGCJJGJGCGGGGGGJGCCJGJGGCGG=GGCGGGCG=C=CGGGCGGC=GG8CCJ(JJGGCC=GCGCGGCGG8GGCCCGCGCCCG1CCG=GGC8GCGGCC= +@gi|10011|ref|NC_11.1|-1/2 +ATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCAATGAATGCGAGATAGACACAAGAGACACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCCCTCGAATAGCC ++ +CCCGGGGGGGGG1GJCJJJGGCJGJGJGJJGGGJJGJGCJJJJJJG=G8JJG=CGJGGCCCGGGCGGCGGJGGGGGCG(CGGGGG8GCGGGG1=CCGGGCGCJC8JJC8GCGGGCCGGGCGCGGCCGG=C8GGGCGC=GCGC1CGCGGCC +@gi|10012|ref|NC_12.1|-9/2 +GGCGTATACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTTAATGCGAGATAGACACCAGAGTCACCCTCTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGACCCATAGATCCCTCTCGAATAGCCGCCGGA ++ +1CCGG1GGGGGGGJJJJGJJJCJJGJJGJGJJGJ(CJJJ8GGJGJJJJJJCGJ8CCGGGCJJGGGGGGC=GGG=GGGGCGGCG=CCCGGGCGGG=CGGGGGCJ=CJJ8CGGGGGGCG1CGGGGCCGG1=GGGGGGCGCCCGGGGCGCGGG +@gi|10012|ref|NC_12.1|-7/2 +GGCCTCCCCAAAAAATCCCCGGGGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAGAGGGTGACTCTGGTGTCTATCTCGCATTAA ++ +=CCGGGCCGGGGGJJJGJCJGJJJJGGJGGJGGJJGGJGCCJJJGJGJJGJCCG8GJCG=GCGG8=JGJGGJC=GG=GGGCCGGGGCGCGGG1GC=GCGGG8JJCCJCCC1GCGCGGGG=CGGCGCCGGGGCGGC=GGC8=CGGCGG=CG +@gi|10012|ref|NC_12.1|-5/2 +TGCGAACAGCCACAGCCCCCTGGTTTAAGTCATATTGGATACGATGACCCTACTACAAGAATACCAACTCATTAAATGTCTACATGCGGCTAACACTAGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTG ++ +CCCGGGGGGGGGG=JJ8JJJJGCGJGJJJGJJJCJCGG(GJGJGGGJGJJGGJGGGJCJGGJJJJG1CG8GGGCJ1GGGCGGGGCC==GGGGGGGG=GGGGCCCJJC=GGG=G=GCCGCGGGGGCGCGG=CCGGG=CCCG=GGGGGC=GC +@gi|10012|ref|NC_12.1|-3/2 +TACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGGGTCATCGTAGC ++ +CC=GGGGCGGG=GJJJJGJJJGCGGJGGJJJGGJJJJCJGGGGJ8=GJGGGJGGJJGCJC8GGCJGGJGCCGGGGGGG==8GGGCGCGGCGGGC8GGGG==GJJCC=C=CCCGGG=G=GCGGC==CGGG=GGGGGCGGC=GGCGG(GCCC +@gi|10012|ref|NC_12.1|-1/2 +AAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCGGAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTT ++ +CCC=GGCGGGG8G=JJGJGJCJJJJJJJJJJJJJ1JGCGJJJJJJGJ18JJCGCJGGGGC=(=GJCCCCGGJCCG1GG8GCGGGGGCG(GG=GGGGGGG=GC=JJJJCGGGGGGGGCGGCGGCGCG=CGGCCGC=GCGGGGGCGCGGGGG +@gi|10013|ref|NC_13.1|-9/2 +GAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGAATGCGAGATTGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCTGA ++ +C=CGG=CG1GGGG=JCJGJJCGJGJJ1J1CGJGGGJGJJGGGGGGJGJJGG8JJCCGC8GGGGGGJ8GGG(G1CCCGG8GCCGGCC=GG=CGGGCGGCG=GCJJJ=CGGCGGGGGGGGGC1GGGG=CGCGGCGCCGGGCC1=CGG=GGGG +@gi|10013|ref|NC_13.1|-7/2 +AATGCATCTTGGCATGACTGTCTCTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATC ++ +CCCGGGGGGGGGGJJJJJCG1JGGJGJJJJJ1GGGJJ(JGJJGGGJGJJGCJJG8JJJGCGJCJCGGGGGC1GGGGCGGCCGGCCG=GGGCGCCCGGGCCGCCJJJJCGCCGGCC=CC==GCG(GGGGGGGC=CCGGG8GGCGGGGGG=1 +@gi|10013|ref|NC_13.1|-5/2 +GTCGCGTGAGTTGTTACGTTTTGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGAGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCG ++ +CCCGGGCGGGGGGG=(GGJJJJJGGGGCJGJJJJJCGJ1CGJJJJJJG=JCJCJ=GJGGJGJGGJGGCGGGGG8GGGCGG81GCGGGG=8CGGGGGGCCG8CCJJJ=CCG=CGGGGG=CGCGCCCG1GGCG1G=GCGGGCGGGGG=C=CC +@gi|10013|ref|NC_13.1|-3/2 +GAGGAATATTTCGACCAAACAATGCACTTCCGGCGGCTATTCGAGAGGGATCTATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTGTCAATCTCGCATTCAATGCAGATTACTCCGATGAGC ++ +C=CGGGGG1GGGGGGJJ=JJJJCJCJJJJGJJGJJJC1(CGJGCJJJGJJJGGJ8GGGGJGJJ=GGGGGGGGGJ1CGGGCGGGCGGGGCCGGCG1GGGCCGGCJJCCGCGGGCCCGC8GCCCCGGCCG=C8GCCG=GGGGGCGGGGGGGC +@gi|10013|ref|NC_13.1|-1/2 +TGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTAACAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCTAGTGTTAGCCGCATGTAGACATTTAATGAGTTGGTATTCTTGTAGTAGG ++ +CCCGGGGGGGGGGGJG=JJ=CJGJGGGG=GJJJGCGGJJJJGJCGGGGGCGGJGCJJG8JGGGGCGJCCGJ8GGC8JGGCGGGCGGCCCCCGGGG=GGGGGCJJJ=JGG==CGGGGGGGG==G=1GCGGGGCGGGGGGGCGGGGG=CCGG +@gi|10014|ref|NC_14.1|-9/2 +GTAGCCGTAGATTTGTCTGCGAACAGCCCCAGCCACCTGGTTTAAGTCATATTGGCTACGATGACCCTACTACAAGAATACCAACCCATTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGC ++ +CCCG1GGGGGG1GGJJJJJJGJJJJJJJJCJGGJGGJJGJGJJJJGJJ1CGGJCJG8GCGGGJJG=GJJGJCCG8=CGGGG=GGCGCCCGGGCGGGGCGGGCJJJJ=GGGCGGGGCGGGGCCGCGC1GGGGCCGCGCCG=CGGCGCGGGG +@gi|10014|ref|NC_14.1|-7/2 +CACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCA ++ +CCCGGGGGGGGGGGGGJCJJ(GCJ=J1JGJGJJ1GGJJJ=J=JJGCJJCJJCGJJGGJJGC=GJ=GCJGG1CGG==G=G=(GCCGGGGGGGGGGGGCCGGGG=JJCJGCGGCGGGCGCCGGGGGGCCGGGGGGGCCGGGCGCGCG=CGCC +@gi|10014|ref|NC_14.1|-5/2 +TCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCCCATCGGAGTAATCTGCATTGAATGCGAGATAGACACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTTGCTACGGCCGTGATCCGACCCATAGATC ++ +CCCGGGCGGGCGGJJG1JJ1JJJJJJJJGJGJGGGJJGGCJGJJGGJGCJGJGJJGJGGGCGCGJGGGGJGGCC1GGGGGGCGGCCCCGGGGCGGCG=GGGCJCJJJGGGGCCCCGGCGCG8CCGGCGGC=GGGCGGCCG=GGC==GGGC +@gi|10014|ref|NC_14.1|-3/2 +ATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAGAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGATAAGTTGTAACATAACGTAC ++ +8CC=GCCGGGCGGJJJJJGJJJJGJGJJJJJJGJJ8J1JGGJJJJGGGCJCJCGCJJJGGGCGJ8GJ1GGGGG=GGG=GGG1GGGGGGCCC=GGGGCGGGGGCJJCCCC1G=8CGGGGCC=CGG=C=CCGG=GGGCGGG8GG=G8=G8GG +@gi|10014|ref|NC_14.1|-1/2 +TTAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTATCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTCTCTGCTACTGCC ++ +CCCGGCGG=GGGCGGGCJGJJCGJJJJJCG=J(JGJ8GJCJGGJGJJGGGCGGG=JGJJG=JC=GGGGGGGCGCJCJJGCCGGGCGGGGG(GCCGGGGGG8CJJJJCGG=188GGCCGGGGGG8CCGGGGCGCGGGGGGGGGGGCCCGGC +@gi|10015|ref|NC_15.1|-9/2 +AATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAACTTTTCAAGAGTAAGCAAGTATGTTTCGCCCAATTCAAACTTTTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGG ++ +CCCGGGGGGGG=GJJCCJJCJGGJJJJJGJGCJCC=GGGJJJJJ=JJCCJGGGGJJJGGC8GGGJGGCGJCGG=CGGCGGCCCGGCCGGGGGGGGCC1GG(GCJJJCGGCCCCGGG=(CGGGGCGCGGCCGC=CGG(CGGGGCGGGGGG= +@gi|10015|ref|NC_15.1|-7/2 +TATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGTAGTAATGGGTGACTCTGGTGTCTATCTCGCATTCAATGCAGATTACTCCGATGGGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGT ++ +CC1GGGGGGGGGGGJJJG=JGG1CJCJJJJJJCJJJ8CGJG(JJGGJ8JJGJJGJGGG(=GGGJGCGGCGCGGCGGGCGCGCCGGCGCGGGG=GCG=GGCCCJJJJJ=GGGG=GGGGGCGCGCGGGGGGGGGCGGGGGGCC=GCGGCGCC +@gi|10015|ref|NC_15.1|-5/2 +CCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTA ++ +CCCGGGGGGGCGGJJGGJCJJJJJG1JJJ==JJJJJJGGJJG(GJ(JCJJJGGJCJGJGCJGCG8JCGCGCGGCCGCGGCGGGGCGCGGGCC=GGCGGGGCGCCCJJGGGGGCC(G8CGG8GGCCGCGGGCGGGG1GGGCCCGGCGCCCG +@gi|10015|ref|NC_15.1|-3/2 +GACTGTCACTTCGGCAGTAGCAGAAAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACATAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTAAGCAAATTAAATCGAGTGTTAGCCGCAT ++ +CCCGGGGGC(GGGJJGJCCJJJGJGJJJJJGJJJJGGG1GGJJJCJCJJGGGCGJGGGCJCGJ=8JGCCGGGJGG=CGGG=GCCGCGGGGGGGG1GGGGGGCCJ==8CGGC8GGC=G=GC=GCGGCCGG1GGGCGGCGGCC=C=GGCCG= +@gi|10015|ref|NC_15.1|-1/2 +ACCTGGTTTAAGTCATATTGGCTACGATGACCATACTACAAGAATACCAACCCATTAAATTTCTACATGCGGCTAACACTCGATTTAATTTGCTTAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTATGTTACAAC ++ +CCCGGGGGGGGGGCGJJGJJJCJCCJCGJJGGJJCJGJJJ=GJGGC=JGJGCJCG8GGGGJJGGGGC=GCGGGG(GG=GCGCCGCGGGGCCGCGGGGG(8GGJCJJCGGGGG=1GGGGCGGGGGG1G=GG=GGG8CCCGG=CCCCGGGCG +@gi|10016|ref|NC_16.1|-9/2 +GCGGCCAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACCGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCAT ++ +CCCGGGGGCG=GGGJJJGGJ=JG(JCJJJJJJJGJJJGJ8JJJJJGJGJJGJJGJGJG8GGJGJCCJ=GG=GJGCGJGCGGGCC=GGGGGGGGGGGCG8GG=CJJJJGCGCGCGCCCC=G=GGGGCGGGGGG==GGGGGGCGGGCCGGG8 +@gi|10016|ref|NC_16.1|-7/2 +AAGTCCCAAAGGCTACCGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGTGCGTGTACTTAGCCCACT ++ +CCCCGC1GGGGGGJGJJJJ1JJCJJJJJJGJJJG=JG=GJJGGGCJJJGGJGGJCJGCGCGCCCGCGGGJGGCGCCGG==CGGGGGGGG=GCCGGGCCGGC1CCJJJGCGGGGCCG(GGGGGCCGG=GGGCCGGGGCCGGCGCGGGGGCC +@gi|10016|ref|NC_16.1|-5/2 +TCTGGGTTCTATCTCGCATCCAATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGATAAGTACACGCACTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCCGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTT ++ +CCCGG1GGCGGGCJJG1GJGJJGJCJJGJJCJC=JJ=GJGGJJG=CJ=GGGJGGJGGG8JCCGGCCGCGGGJ==GCCCCGGGGCCC8GG=GGCC(CCGGGGGJJ=CJCGG1GGGGGGGC1GGGC=GGCG=CG==GCC=GG=GCGGCCGCC +@gi|10016|ref|NC_16.1|-3/2 +GGGGGAATATTTCGACCAAACAATGCACTTCCGGGGGCCTTTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGAG ++ +CCCGGGGGGGGGGJJJJCJJGJGJJCCJJJJJJJGJGJJJCJGJGGGCG8JJJJGJJGCG==GJGGGJGGGGGCCGGG=CGGGGGCCGGG=GGGGCCCCGG(=CCCCG=CCG11CGCGCCGC1GGGGCGGGGGGGCCCCCGCCGCG=GGC +@gi|10016|ref|NC_16.1|-1/2 +AATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGTGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTG ++ +=CCGGGGCGGGGGJGJJJJJJJJJCCJJGJJJJJJJGJJJJGJJGJ=JGJCGJC1JCJGGJGJCJG8CG8=CCGGGGGCCGGGCGGCGGGCCCCGC8GGCGCCJJCJCCGGGCG=GCCGGG=CCCGCCCGGGCCGGGC1GCGGGCGGGGC +@gi|10017|ref|NC_17.1|-9/2 +TGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTTATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGCTCATCGGAGTAA ++ +CCCGG1GGG(GGGCGJJJGJJGJGJGJJJJJGGCGJCG8JGGGGGJ8JGGJJGJ8CCGG=CGCGGGJGGGCCCGGGCGGG8CCG=CGCGCC=GGG==CGGCG(CCJJGCC8GGCGGGG=G8GGG1CG=GGGGCGGGGG1G==CGGCGC8G +@gi|10017|ref|NC_17.1|-7/2 +ATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTTATGCCAAGATGCATTG ++ +CCCG=GGGCGGGGJGJJJ1JGGJJGJGJJJGJJJJJJ=GJJJGJJJGCGGJJJGGCGJGJGJGJJJGGJGGJGGGGCGGGCCCGGG88GGG8GC1C=GG1GCJCJCJCCCGCCCC=CCCGGGGGCGGCGGCG8C=GGGCGG=8CCCCGGC +@gi|10017|ref|NC_17.1|-5/2 +AGAGGGATCAATGGGTCAGATCACGGCCGTAGCAAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCCATGCAGATTACTCCGATGAGCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGG ++ +CC=CG1GCGCGGGJGJJJJJJJJJCJGGG=8JJCCJC=JJCGGCJJJJGGGJ8GGCJGGCGGCGGCJGCGCGGGGG(GCCGGCGCG8CCGGGGGCCGGG8G=CCJ=JGGCGGC8GGGGGGGCGGGGGGGGG=8=CGCCCGGCCGCGCGC= +@gi|10017|ref|NC_17.1|-3/2 +CATTCGGCTAACACTCGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAATTTTTCAAGAGGAAGCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGT ++ +CCC=GGGCGGGGCGCJJJJJGJJJJCJJGJJJGJJG=JGJJJGJJJGGGC=JG=GCCGJGGGCG=CJ=GC8GGGGG(GCG1C8GCGGGGGGCGCG8CGCG8=CJJJJGGG=G1CCCGGGGGGGGGGGCGGCGGGGGCC=G=CG=CGG8GC +@gi|10017|ref|NC_17.1|-1/2 +CAATGCATCTTGGCATAACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAATTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAAT ++ +CCCG=CGGGGGGCGJJJJJJJJJJGJJCJJ1JGGJGGGJJJJJJCJGJCGGJJGJCGGC1JGGCGGGJGJGGGGCJCGGGJGGGCC=GGCGGGCGCGG=GGGCJJCJGGCCGCGGGG(CCCGGGGCCGCCGC=GGGCGCC1GCCG=GG=8 +@gi|10018|ref|NC_18.1|-9/2 +GGTGACCCTAATACAAGAAAACCAACTCATTAAATGTCTAGATGCGGCTAACACTCGATTTAATTTGCCAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTACAACTTTTCAAGAGGAAGCAAGTATGTTT ++ +CCCGGGGGGG8GGJJJJJGJCJJJJJJCJGJGGJCJGJJJJJJGJG=JJGC8JJ8JGJJGCGGGCJCGGGGCGCGGGGGCGCCGGCGGCCGGGGG=GCG=(GCC8JJGGGGGG=CGGGCGC=CGG18=CGCCGCGGCCGG=GGGGCGC81 +@gi|10018|ref|NC_18.1|-7/2 +AAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTG ++ +CCCGGCGGGGGGGJJJJJGJJJJCGJGJ=JGGGJJJJJJJGGJJJGGGJGCJGJJJJCGJJG=CCJJ8GG1JGGGCJCGGGGGCGG=GGGCG=CGCCGCG1CJJ=JJ=GCGGCGCGGCCCGGGGGGC=CG8CGGGGGG=GC=GCGGCCGC +@gi|10018|ref|NC_18.1|-5/2 +AAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAGTGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCTGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTT ++ +CCCGGGGGGGCGCG=GJJG8JJJJJJGGGJGGJJCJCGCCCC=GGCGJCJJ=GJGGGJGJGJCGGG8GGCGGCGGGGCGJ1G8CCGGGGCGC=GGGCGCGGGC=JJJGGCGGGG=GCGGGGCG=CGGCCGCCGGCCCGGGCGGGCGGCGC +@gi|10018|ref|NC_18.1|-3/2 +ACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGGTGACCCTACTACAAGAAAACCAACTCATTAAATGTCTAGATGCGGCTAACACTCGATTTAATTTGCCAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACG ++ +CCCGGGGGGGGGGJCJJGGJJJJ=JJJJJJ8JJJGJGJJGGGJJCJGJJJJGGCG8JCJJGJGGG(JGGG1G(GG=GCGC1GGGGGGGGGCCCGGGGGCGGCCCJCJCGCGGGG1GCGCG8CCCCG=CGCGCGCGGCCGGGGCGCCCGGC +@gi|10018|ref|NC_18.1|-1/2 +GAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTCCTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCT ++ +CCCGGGGGGGGGGJJGJGJJJJJJJJJJCJJJJJGGGG=CGJGJGCCGGJC=GJGJGGGGCCGJJGCCCCCGJGGGGGGCGGCGGGGCGCGGGCGG8GGC8GJJJCCGGGGGG=GC1CGGGCGCGCGG=CGCCGCGGGCGGCCCCGGC=8 +@gi|10019|ref|NC_19.1|-9/2 +ACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCCGATTACTCCGATGATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCATTTCGGCAGTA ++ +CC=GGGGGGCGGGGJJJJGC=CJCJJGJJJGJGGJJJ(JCCJCJ1JGJCJJGJGGGGJ=GGC1JGGGJGC=GJCGJG1G=GCCGGGG=GGCCCCCGGGGGGG=JJJJGGCGCGCGCGGGCGGGGCGG=CCGC=GGGCGG8CGCGGCCGGC +@gi|10019|ref|NC_19.1|-7/2 +AGCCCCCCCGTTTAGGTCATATTTGCTACGATGACCCTACTACAAGAATACCAACTCATAAAATGTCTACATGCGGCTAACACTGGATTTAATTTGCTAAGTAGTGAAAGTCCCAAAGGCTACTGTCGCGTGAGTTGGTACGTTTTGTTA ++ +CC=GGGG=CGGGGJJGGJJGJJJ(JJJ1JJCJJGGJJCJ1CCGGJJJJJJCGC=GGG(GCGJJGJ=CJCGJCJCGG1GGG=GCCGGGCGGGCCCGCC1CGG=JJCJJCGG1GCCCGG=1GGGGGGGC(GCGGGCGGGG=GGCGCCG1CG8 +@gi|10019|ref|NC_19.1|-5/2 +GAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCGGCATTGGATGCGAGATAGAAACCAGAGTCACCCTTTACTTCTGAGTGATTGAGCTTAGCTACGGCCGTGATCTGACCCATTGATCCCTCTCGAATAGCCCCC ++ +CCCGGGGGG=GCCCGJGJJJJJJGCJGGJJJJJJGJGJGJJJJ1JJJGGGJJJGG(=JCJC=GCGJJJJGGGCGGCGGC8CC8CGCCG=GGCGCGCCGCCGG==CJCGGGG8GGCGCGGCGCGGGCG1GGCGGGG=CGG=GGCCGGGGGC +@gi|10019|ref|NC_19.1|-3/2 +GCAAGTATGTTTCGCCCAATTCAAACTTGTCTGCTACTGCCGAAATGACAGTCATGCCAAGATGCATTGAGGGCGTGTACTTAGCCCACTCTTTGTTTCGATCATCGGAGTAATCGGCCTTGGATGCGAGATAGAAACCAGAGTCACCCT ++ +CCCGGGGGGGGGGJG8CJ=1JGJJGJGJJJJJJGCGGGJJC(JJJCJJJJJGCGJGCCCJGJGC8GGCCCCCCJ8G=GGGGC=GGGCGCGGGGCGG1GGCGGC8JCJGCGG8GGGCGC(GGGGC=GG=GGCGCGGCGGCCCGCGGC=CG8 +@gi|10019|ref|NC_19.1|-1/2 +GCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACAGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCCAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTATTCTTGTAGTAGGGTC ++ +CCCGGG=CGGGGGJGCJJJJ=GJCGJJJGJJJJGCCGGGJJJJJGJJJJJGJCGJGCGCGCJG=CGCGGGGGCCCG=CCG(GCCG=G=GGGGGGGCCCG(GCCJJJJGGGCGGG8GGGGGGCCGCCGGGGGC81CGGGCGGGCCCCG1CG +@gi|10020|ref|NC_20.1|-9/2 +GGGCTATTCGAGAGGGATCAATGGGTCAGATCACGGCCGTAGCTAAGCTCAATCACTCAGAAGTAAAGGGTGACTCTGGTTTCTATCTCGCATCCAATGCAGATTACTCCGATTATCGAAACAAAGAGTGGGCTAAGTACACGCCCTCAA ++ +C81GGGGGGGGG(CGCGJ1JJJJCJ=JGGJCJJCGJJJJJJJJJ(C=GJ=CJJJJGJGJ=CCGCC8JCGGJGCGGCJGGGGGCGC=CCCCCGCGGGGGGGGCCCJCJGGGCGGGCGGGC=GGGGGGGG1CCGGGCCGGGGGGC=GG1C8= +@gi|10020|ref|NC_20.1|-7/2 +TTGCTAGAGTTGGTAGTAGGCGTAGATTTGTCTCCGAACAGCCCCAGCCCCCCCGTTTAAGTCATATTTGCTACGATGACCCTACTACAAGAGTACCAACTCATAAAATGTCTACATGCGGCTAACACTCGATTTAATTTGCTAAGTAGT ++ +CCCGCGG1CGGGC=JCJJJJJGJJGGJGJGGJGJJGGGGGGG8GGJGC=CJGGGGGGGJ=GGJJJGC1GGCGGCC=JGC((=GCG=GCGGCCCC=CGCC=GCJJJCJG=1G==CGGGCGGCC=GGCG1G1=CG8GCCGGCGGGG1GCCCC +@gi|10020|ref|NC_20.1|-5/2 +TTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACTACTTAGCAAATTAAATCGAGTGTTAGCCGCATGTAGACATTTTATGAGTTGGTACTCTTGTAGTAGGGTCATCGTAGCAAATATG ++ +CCCGG==GGGGGGGCJJJJJJJJGJJ1JJJ=JJJGJJGJJJGJGJCJG=CJJGGJ1GJGCJGGCG=C(G1G=CGGGGCCGCCCGGCGGCGCGCGCGGCGCCGCJCJJGGGGCGGCGG=G=GGGGGCCGGGGGGCGCG8=GGGCGCCCCCC +@gi|10020|ref|NC_20.1|-3/2 +ATCGAAACAAAGAGTCGGCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAG ++ +C1CG=GGG=GGGGCJ1JJGCJC=GGJJGJGGJJGJJGJJGCJGJJGJGJ1JGCJJGGGJCJJGCCCGGJGG==CJGGGCGGG1GGGG=CCCGC=CGCGGCCCCCJCCC=GGGGGGCGCCGGGGGG8CGG==CGGGGGGGGGGGGGGCCGC +@gi|10020|ref|NC_20.1|-1/2 +GCTAAGTACACGCCCTCAATGCATCTTGGCATGACTGTCACTTCGGCAGTAGCAGACAAGTTTGAATTGGGCGAAACATACTTGCTTACTCTTGAAAAGTTGTAACAAAACGTACCAACTCACGCGACCGTAGCCTTTGGGACTTTCACT ++ +C=CGGGGGGGGG=JJJCJJ=JJJ(GJGJJJJJJ=JJJJCGGJJJJJGJGGJGJGGCGG=J=GJGJGGGCGGGGGCCGGGGCGCG=G=GGCG=GGGGCCCGG1CCJJJ=CG1G(CG=GGGGGCGGGCCGGGCCGGG=1CCCGCGGGCG=GC From 4111adb04b1ab3d38595a874c886765c0908681b Mon Sep 17 00:00:00 2001 From: Radu Muntean Date: Thu, 5 Aug 2021 21:42:23 +0200 Subject: [PATCH 17/17] rebase Signed-off-by: Radu Muntean --- metagraph/src/cli/tax_class.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/metagraph/src/cli/tax_class.cpp b/metagraph/src/cli/tax_class.cpp index 7a2460c898..7a9f2bba4e 100644 --- a/metagraph/src/cli/tax_class.cpp +++ b/metagraph/src/cli/tax_class.cpp @@ -61,7 +61,7 @@ int taxonomic_classification(Config *config) { Timer timer; logger->trace("Graph loading..."); auto graph = load_critical_dbg(config->infbase); - logger->trace("Finished graph loading after {}s.", timer.elapsed()); + logger->trace("Finished graph loading after {} sec.", timer.elapsed()); timer.reset(); logger->trace("Processing the classification..."); @@ -80,21 +80,22 @@ int taxonomic_classification(Config *config) { } else { // Use tax_class without any precomputed database. if (config->infbase_annotators.size() == 0) { - logger->error("The annotation matrix is missing from the command line, please use '-a' flag for the annotation matrix filepath."); + logger->error("The annotation matrix is missing from the command line, " + "please use '-a' flag for the annotation matrix filepath."); std::exit(1); } timer.reset(); logger->trace("Graph and Annotation loading..."); graph = load_critical_dbg(config->infbase); anno_graph = initialize_annotated_dbg(graph, *config); - logger->trace("Finished graph annotation loading after {}s.", timer.elapsed()); + logger->trace("Finished graph annotation loading after {} sec.", timer.elapsed()); timer.reset(); logger->trace("Constructing TaxonomyClsAnno..."); taxonomy = std::make_unique(*anno_graph, config->lca_coverage_fraction, config->discovery_fraction, config->taxonomic_tree, config->label_taxid_map); - logger->trace("Finished TaxonomyDB construction after {}s.", timer.elapsed()); + logger->trace("Finished TaxonomyDB construction after {} sec.", timer.elapsed()); if (config->top_label_fraction > 0) { // Use tax_class version that is returning the LCA of the top labels among the kmers. @@ -132,7 +133,7 @@ int taxonomic_classification(Config *config) { std::cout << result << std::endl; }); - logger->trace("Finished all the queries in {}s.", timer.elapsed()); + logger->trace("Finished all the queries in {} sec.", timer.elapsed()); return 0; }