Skip to content

Commit 5da4fb7

Browse files
authored
Merge pull request #21 from PathwayMerger/bel-specification
Bel specification
2 parents a5acc00 + 3531cf5 commit 5da4fb7

File tree

8 files changed

+130
-47
lines changed

8 files changed

+130
-47
lines changed

setup.cfg

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@ install_requires =
5555
bio2bel==0.2.1
5656
bio2bel_hgnc==0.2.2
5757
bio2bel_chebi==0.2.1
58-
bio2bel_kegg==0.2.3
59-
bio2bel_wikipathways==0.2.2
58+
bio2bel_kegg==0.2.5
59+
bio2bel_wikipathways==0.2.3
6060
bio2bel_reactome==0.2.3
6161
pybel==0.13.2
6262
pybel-tools>=0.7.2

src/pathme/constants.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,16 @@ def ensure_pathme_folders(): # TODO why is this a function?
9090
'methylation': 'Me',
9191
}
9292
KEGG_CITATION = '10592173'
93+
REACTOME_CITATION = '29145629'
9394

9495
# FIXME why doesn't this just import the compath_resources package?
9596
KEGG_WIKIPATHWAYS_MAPPINGS = 'https://github.com/ComPath/curation/raw/master/mappings/kegg_wikipathways.xlsx'
9697
KEGG_REACTOME_MAPPINGS = 'https://github.com/ComPath/curation/raw/master/mappings/kegg_reactome.xlsx'
9798
WIKIPATHWAYS_REACTOME_MAPPINGS = 'https://github.com/ComPath/curation/raw/master/mappings/wikipathways_reactome.xlsx'
9899

99100
KEGG_KGML_URL = 'http://rest.kegg.jp/get/{}/kgml'
100-
RDF_REACTOME = ' ftp://ftp.ebi.ac.uk/pub/databases/RDF/reactome/r67/reactome-biopax.tar.bz2'
101-
RDF_WIKIPATHWAYS = 'http://data.wikipathways.org/20190310/rdf/wikipathways-20190310-rdf-wp.zip'
101+
RDF_REACTOME = 'ftp://ftp.ebi.ac.uk/pub/databases/RDF/reactome/r67/reactome-biopax.tar.bz2'
102+
RDF_WIKIPATHWAYS = 'http://data.wikipathways.org/20190610/rdf/wikipathways-20190610-rdf-wp.zip'
102103

103104
KEGG_STATS_COLUMN_NAMES = {
104105
'nodes': 'BEL Nodes',

src/pathme/export_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import logging
66
import os
7-
from typing import List, Iterable
7+
from typing import Iterable, List, Tuple
88

99
import click
1010
import networkx as nx
@@ -36,7 +36,7 @@ def add_annotation_key(graph):
3636
graph[u][v][k][ANNOTATIONS] = {}
3737

3838

39-
def get_all_pickles(kegg_path, reactome_path, wikipathways_path):
39+
def get_all_pickles(kegg_path: str, reactome_path: str, wikipathways_path: str) -> Tuple[List, List, List]:
4040
"""Return a list with all pickle paths."""
4141
kegg_pickles = get_paths_in_folder(kegg_path)
4242

src/pathme/kegg/convert_to_bel.py

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pybel import BELGraph, to_pickle
1111
from pybel.dsl.edges import activity
1212
from pybel.dsl.node_classes import CentralDogma
13-
from pybel.dsl.nodes import abundance, bioprocess, complex_abundance, composite_abundance, pmod, protein, reaction
13+
from pybel.dsl.nodes import abundance, bioprocess, complex_abundance, composite_abundance, pmod, protein, reaction, rna
1414
from pybel.struct.summary import count_functions, edge_summary
1515

1616
from pathme.constants import *
@@ -171,7 +171,8 @@ def gene_to_bel_node(graph, node):
171171
return protein_node
172172

173173
elif UNIPROT in attribute:
174-
protein_node = protein(namespace=UNIPROT.upper(), name=attribute[UNIPROT], identifier=attribute[UNIPROT])
174+
protein_node = protein(namespace=UNIPROT.upper(), name=attribute[UNIPROT],
175+
identifier=attribute[UNIPROT])
175176
graph.add_node_from_data(protein_node)
176177
return protein_node
177178

@@ -505,13 +506,23 @@ def add_simple_edge(graph, u, v, relation_type):
505506

506507
# Add increases edge if pmod subtype is coupled with activation subtype
507508
if relation_type[0] == 'activation':
508-
graph.add_increases(u, v_modified, citation='', evidence='', subject_modifier=activity(),
509-
annotations={})
509+
graph.add_increases(
510+
u, v_modified,
511+
citation=KEGG_CITATION, evidence='Extracted from KEGG',
512+
subject_modifier=activity() if u in {protein, complex_abundance, rna} else None,
513+
# Add the activity function if subject is one of the following nodes (BEL 2.0 specifications)
514+
annotations={},
515+
)
510516

511517
# Add decreases edge if pmod subtype is coupled with inhibition subtype
512518
elif relation_type[0] == 'inhibition':
513-
graph.add_decreases(u, v_modified, citation='', evidence='', subject_modifier=activity(),
514-
annotations={})
519+
graph.add_decreases(
520+
u, v_modified,
521+
citation=KEGG_CITATION, evidence='Extracted from KEGG',
522+
subject_modifier=activity() if u in {protein, complex_abundance, rna} else None,
523+
# Add the activity function if subject is one of the following nodes (BEL 2.0 specifications)
524+
annotations={},
525+
)
515526

516527
# TODO: add pmod of v activates v
517528
# TODO: how to represent abundance modification in BEL?
@@ -522,48 +533,72 @@ def add_simple_edge(graph, u, v, relation_type):
522533
# If the object is a gene, miRNA, RNA, or protein, add protein modification
523534
if isinstance(v, CentralDogma):
524535
v_modified = v.with_variants(pmod(KEGG_MODIFICATIONS[relation_type]))
525-
graph.add_increases(u, v_modified, citation='', evidence='', subject_modifier=activity(),
526-
annotations={})
536+
graph.add_increases(
537+
u, v_modified,
538+
citation=KEGG_CITATION, evidence='Extracted from KEGG',
539+
subject_modifier=activity() if u in {protein, complex_abundance, rna} else None,
540+
annotations={},
541+
)
527542

528543
# Subject activity decreases protein modification (i.e. dephosphorylation) of object
529544
elif relation_type == 'dephosphorylation':
530545

531546
# If the object is a gene, miRNA, RNA, or protein, add protein modification
532547
if isinstance(v, CentralDogma):
533548
v = v.with_variants(pmod('Ph'))
534-
graph.add_decreases(u, v, citation=KEGG_CITATION, evidence='', subject_modifier=activity(), annotations={})
549+
graph.add_decreases(
550+
u, v,
551+
citation=KEGG_CITATION, evidence='Extracted from KEGG',
552+
subject_modifier=activity() if u in {protein, complex_abundance, rna} else None,
553+
annotations={},
554+
)
535555

536556
# Subject increases activity of object
537557
elif relation_type == 'activation':
538-
graph.add_increases(u, v, citation=KEGG_CITATION, evidence='', object_modifier=activity(), annotations={})
558+
graph.add_increases(
559+
u, v,
560+
citation=KEGG_CITATION, evidence='Extracted from KEGG',
561+
object_modifier=activity() if v in {protein, complex_abundance, rna} else None,
562+
annotations={},
563+
)
539564

540565
# Catalytic activity of subject increases transformation of reactant(s) to product(s)
541566
elif relation_type in {'reversible', 'irreversible'}:
542-
graph.add_increases(u, v, citation=KEGG_CITATION, evidence='', subject_modifier=activity('cat'), annotations={})
567+
graph.add_increases(
568+
u, v,
569+
citation=KEGG_CITATION, evidence='Extracted from KEGG',
570+
subject_modifier=activity('cat') if u in {protein, complex_abundance, rna} else None,
571+
annotations={},
572+
)
543573

544574
# Subject decreases activity of object
545575
elif relation_type == 'inhibition':
546-
graph.add_decreases(u, v, citation=KEGG_CITATION, evidence='', object_modifier=activity(), annotations={})
576+
graph.add_decreases(
577+
u, v,
578+
citation=KEGG_CITATION, evidence='Extracted from KEGG',
579+
object_modifier=activity() if v in {protein, complex_abundance, rna} else None,
580+
annotations={},
581+
)
547582

548583
# Indirect effect and binding/association are noted to be equivalent relation types
549584
elif relation_type in {'indirect effect', 'binding/association'}:
550-
graph.add_association(u, v, citation=KEGG_CITATION, evidence='', annotations={})
585+
graph.add_association(u, v, citation=KEGG_CITATION, evidence='Extracted from KEGG', annotations={})
551586

552587
# Subject increases expression of object
553588
elif relation_type == 'expression':
554589

555590
# Expression object is converted to RNA abundance
556591
if isinstance(v, CentralDogma):
557592
v = v.get_rna()
558-
graph.add_increases(u, v, citation=KEGG_CITATION, evidence='', annotations={})
593+
graph.add_increases(u, v, citation=KEGG_CITATION, evidence='Extracted from KEGG', annotations={})
559594

560595
# Subject decreases expression of object
561596
elif relation_type == 'repression':
562597

563598
# Repression object is converted to RNA abundance
564599
if isinstance(v, CentralDogma):
565600
v = v.get_rna()
566-
graph.add_decreases(u, v, citation=KEGG_CITATION, evidence='', annotations={})
601+
graph.add_decreases(u, v, citation=KEGG_CITATION, evidence='Extracted from KEGG', annotations={})
567602

568603
elif relation_type in {'dissociation', 'hidden compound', 'missing interaction', 'state change'}:
569604
pass

src/pathme/kegg/kegg_xml_parser.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
import json
77
import logging
88
import os
9-
from xml.etree.ElementTree import parse
109
from collections import defaultdict
10+
from xml.etree.ElementTree import parse
1111

1212
import requests
1313
from bio2bel_kegg.constants import API_KEGG_GET
@@ -92,10 +92,12 @@ def _post_process_api_query(node_meta_data, hgnc_manager, chebi_manager):
9292
for chebi_id in identifier.split(' '):
9393
chebi_entry = chebi_manager.get_chemical_by_chebi_id(chebi_id)
9494

95-
if not chebi_entry:
96-
continue
97-
98-
node_dict[CHEBI_NAME] = chebi_entry.name
95+
# If the id is found in the database stick the name
96+
if chebi_entry:
97+
node_dict[CHEBI_NAME] = chebi_entry.name
98+
# Else use the default name by KEGG to ensure the name makes it into the graph
99+
elif "ENTRY_NAME" in node_meta_data:
100+
node_dict[CHEBI_NAME] = node_meta_data["ENTRY_NAME"]
99101

100102
return node_dict
101103

src/pathme/reactome/convert_to_bel.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
from bio2bel_chebi import Manager as ChebiManager
99
from bio2bel_hgnc import Manager as HgncManager
10-
1110
from pybel import BELGraph
1211
from pybel.dsl import (
1312
abundance,
@@ -23,7 +22,7 @@
2322
NamedComplexAbundance
2423
)
2524

26-
from pathme.constants import UNKNOWN
25+
from pathme.constants import UNKNOWN, REACTOME_CITATION
2726
from pathme.reactome.utils import get_valid_node_parameters, process_multiple_proteins
2827
from pathme.utils import parse_id_uri
2928

@@ -36,6 +35,7 @@
3635

3736
def convert_to_bel(nodes: Dict[str, Dict], interactions: List[Tuple[str, str, Dict]], pathway_info: Dict,
3837
hgnc_manager: HgncManager, chebi_manager: ChebiManager) -> BELGraph:
38+
"""Convert RDF graph dictionary into BEL graph."""
3939
uri_id = pathway_info['uri_reactome_id']
4040

4141
if uri_id != UNKNOWN:
@@ -69,7 +69,8 @@ def convert_to_bel(nodes: Dict[str, Dict], interactions: List[Tuple[str, str, Di
6969
return graph
7070

7171

72-
def nodes_to_bel(nodes: Dict[str, Dict], graph: BELGraph, hgnc_manager: HgncManager, chebi_manager: ChebiManager) -> Dict[str, BaseEntity]:
72+
def nodes_to_bel(nodes: Dict[str, Dict], graph: BELGraph, hgnc_manager: HgncManager, chebi_manager: ChebiManager) -> \
73+
Dict[str, BaseEntity]:
7374
"""Convert dictionary values to BEL nodes."""
7475
return {
7576
node_id: node_to_bel(node_att, graph, hgnc_manager, chebi_manager)
@@ -125,18 +126,16 @@ def node_to_bel(node: Dict, graph, hgnc_manager: HgncManager, chebi_manager: Che
125126
namespace=namespace.upper()
126127
)
127128

128-
129129
elif 'Pathway' in node_types:
130130
bioprocess_node = bioprocess(identifier=identifier, name=name, namespace=namespace.upper())
131131
graph.add_node_from_data(bioprocess_node)
132132
return bioprocess_node
133-
134133
else:
135134
log.warning('Entity type not recognized', node_types)
136135

137136

138137
def add_edges(graph: BELGraph, participants, nodes, att: Dict):
139-
uri_id = att['uri_id']
138+
"""Add edges into the graph."""
140139
edge_types = att['interaction_type']
141140

142141
if isinstance(participants, dict):
@@ -157,17 +156,25 @@ def add_edges(graph: BELGraph, participants, nodes, att: Dict):
157156
elif isinstance(participants, tuple):
158157
u = nodes[participants[0]]
159158
v = nodes[participants[1]]
160-
add_simple_edge(graph, u, v, edge_types, uri_id)
159+
add_simple_edge(graph, u, v, edge_types)
161160

162161

163-
def add_simple_edge(graph: BELGraph, u, v, edge_types, uri_id):
162+
def add_simple_edge(graph: BELGraph, u, v, edge_types):
163+
"""Add a simple edge into the graph."""
164164
if 'ACTIVATION' in edge_types:
165-
# TODO anadir pubmed y descripcion
166-
graph.add_increases(u, v, citation=uri_id, evidence='', object_modifier=activity(), annotations={})
165+
graph.add_increases(
166+
u, v,
167+
citation=REACTOME_CITATION, evidence='Extracted from Reactome',
168+
object_modifier=activity() if v in {protein, complex_abundance, rna} else None,
169+
annotations={},
170+
)
167171

168172
elif 'INHIBITION' in edge_types:
169-
# TODO anadir pubmed y descripcion
170-
graph.add_decreases(u, v, citation=uri_id, evidence='', object_modifier=activity(), annotations={})
171-
173+
graph.add_decreases(
174+
u, v,
175+
citation=REACTOME_CITATION, evidence='Extracted from Reactome',
176+
object_modifier=activity() if v in {protein, complex_abundance, rna} else None,
177+
annotations={},
178+
)
172179
else:
173180
log.warning('edge type %s', edge_types)

src/pathme/wikipathways/convert_to_bel.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -200,20 +200,35 @@ def add_simple_edge(graph: BELGraph, u, v, edge_types, uri_id):
200200
:param uri_id: citation URI
201201
"""
202202
if 'Stimulation' in edge_types:
203-
graph.add_increases(u, v, citation=uri_id, evidence='', object_modifier=activity())
203+
graph.add_increases(
204+
u, v,
205+
citation=uri_id, evidence='Extracted from WikiPathways',
206+
object_modifier=activity() if v in {protein, complex_abundance, rna} else None,
207+
annotations={},
208+
)
204209

205210
elif 'Inhibition' in edge_types:
206-
graph.add_decreases(u, v, citation=uri_id, evidence='', object_modifier=activity())
211+
graph.add_decreases(
212+
u, v,
213+
citation=uri_id, evidence='Extracted from WikiPathways',
214+
object_modifier=activity() if v in {protein, complex_abundance, rna} else None,
215+
annotations={},
216+
)
207217

208218
elif 'Catalysis' in edge_types:
209-
graph.add_increases(u, v, citation=uri_id, evidence='', object_modifier=activity())
219+
graph.add_increases(
220+
u, v,
221+
citation=uri_id, evidence='Extracted from WikiPathways',
222+
object_modifier=activity() if v in {protein, complex_abundance, rna} else None,
223+
annotations={},
224+
)
210225

211226
elif 'DirectedInteraction' in edge_types:
212227
graph.add_qualified_edge(
213228
u, v,
214229
relation=REGULATES,
215230
citation=uri_id,
216-
evidence='',
231+
evidence='Extracted from WikiPathways',
217232
annotations={
218233
'EdgeTypes': edge_types,
219234
},

0 commit comments

Comments
 (0)