Skip to content

Commit c87ad9f

Browse files
authored
Merge pull request #49 from bcgsc/release/v1.0.0
Release/v1.0.0
2 parents 3a7392b + a9e1e27 commit c87ad9f

27 files changed

+6497
-4817
lines changed

pori_python/graphkb/genes.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from __future__ import annotations
44

55
from typing import Any, Dict, List, Sequence, Set, Tuple, cast
6+
from typing_extensions import deprecated
67

78
from pori_python.types import IprGene, Ontology, Record, Statement, Variant
89

@@ -256,6 +257,7 @@ def get_preferred_gene_name(
256257
return gene_names[0]
257258

258259

260+
@deprecated("Use get_gene_linked_cancer_predisposition_info instead")
259261
def get_cancer_predisposition_info(
260262
conn: GraphKBConnection, source: str = PREFERRED_GENE_SOURCE_NAME
261263
) -> Tuple[List[str], Dict[str, str]]:
@@ -360,6 +362,7 @@ def get_gene_linked_cancer_predisposition_info(
360362
return sorted(genes), variants
361363

362364

365+
@deprecated("Use get_gene_linked_pharmacogenomic_info instead")
363366
def get_pharmacogenomic_info(
364367
conn: GraphKBConnection, source: str = PREFERRED_GENE_SOURCE_NAME
365368
) -> Tuple[List[str], Dict[str, str]]:

pori_python/graphkb/match.py

Lines changed: 95 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,12 @@
3131
looks_like_rid,
3232
stringifyVariant,
3333
)
34-
from .vocab import get_equivalent_terms, get_term_tree, get_terms_set
34+
from .vocab import (
35+
get_equivalent_terms,
36+
get_term_by_name,
37+
get_term_tree,
38+
get_terms_set,
39+
)
3540

3641
FEATURES_CACHE: Set[str] = set()
3742

@@ -124,50 +129,62 @@ def cache_missing_features(conn: GraphKBConnection) -> None:
124129

125130
def match_category_variant(
126131
conn: GraphKBConnection,
127-
gene_name: str,
132+
reference_name: str,
128133
category: str,
129134
root_exclude_term: str = "",
130135
gene_source: str = "",
131136
gene_is_source_id: bool = False,
132137
ignore_cache: bool = False,
138+
reference_class: str = 'Feature',
133139
) -> List[Variant]:
134140
"""
135141
Returns a list of variants matching the input variant
136142
137143
Args:
138144
conn (GraphKBConnection): the graphkb connection object
139-
gene_name (str): the name of the gene the variant is in reference to
145+
reference_name (str): the name of the Feature(gene)/Signature the variant is in reference to
140146
category (str): the variant category (ex. copy loss)
141147
gene_source: The source database the gene is defined by (ex. ensembl)
142148
gene_is_source_id: Indicates the gene name(s) input should be treated as sourceIds not names
149+
reference_class (str): Class name of the variant reference. Default to 'Feature'
143150
Raises:
144151
FeatureNotFoundError: The gene could not be found in GraphKB
145152
146153
Returns:
147154
Array.<dict>: List of variant records from GraphKB which match the input
148155
"""
149-
# disambiguate the gene to find all equivalent representations
150-
features = convert_to_rid_list(
151-
get_equivalent_features(
152-
conn,
153-
gene_name,
154-
source=gene_source,
155-
is_source_id=gene_is_source_id,
156-
ignore_cache=ignore_cache,
156+
# disambiguate the reference to find all equivalent representations
157+
references: List[str] = []
158+
if reference_class == 'Feature':
159+
references = convert_to_rid_list(
160+
get_equivalent_features(
161+
conn,
162+
reference_name,
163+
source=gene_source,
164+
is_source_id=gene_is_source_id,
165+
ignore_cache=ignore_cache,
166+
)
157167
)
158-
)
159-
160-
if not features:
161-
raise FeatureNotFoundError(
162-
f"unable to find the gene ({gene_name}) or any equivalent representations"
168+
if not references:
169+
raise FeatureNotFoundError(
170+
f"unable to find the gene ({reference_name}) or any equivalent representations"
171+
)
172+
if reference_class == 'Signature':
173+
references = convert_to_rid_list(
174+
get_equivalent_terms(
175+
conn,
176+
reference_name.lower(),
177+
ontology_class='Signature',
178+
ignore_cache=ignore_cache,
179+
)
163180
)
164181

165182
# get the list of terms that we should match
166-
terms = convert_to_rid_list(
183+
types = convert_to_rid_list(
167184
get_term_tree(conn, category, root_exclude_term, ignore_cache=ignore_cache)
168185
)
169186

170-
if not terms:
187+
if not types:
171188
raise ValueError(f"unable to find the term/category ({category}) or any equivalent")
172189

173190
# find the variant list
@@ -178,8 +195,8 @@ def match_category_variant(
178195
"target": {
179196
"target": "CategoryVariant",
180197
"filters": [
181-
{"reference1": features, "operator": "IN"},
182-
{"type": terms, "operator": "IN"},
198+
{"reference1": references, "operator": "IN"},
199+
{"type": types, "operator": "IN"},
183200
],
184201
},
185202
"queryType": "similarTo",
@@ -275,7 +292,55 @@ def positions_overlap(
275292
return start is None or pos == start
276293

277294

295+
def equivalent_types(
296+
conn: GraphKBConnection,
297+
type1: str,
298+
type2: str,
299+
strict: bool = False,
300+
) -> bool:
301+
"""
302+
Compare 2 variant types to determine if they should match
303+
304+
Args:
305+
conn: the graphkb connection object
306+
type1: type from the observed variant we want to match to the DB
307+
type2: type from the DB variant
308+
strict: wether or not only the specific-to-generic ones are considered.
309+
By default (false), not only specific types can match more generic ones,
310+
but generic types can also match more specific ones.
311+
312+
Returns:
313+
bool: True if the types can be matched
314+
"""
315+
316+
# Convert rid to displayName if needed
317+
if looks_like_rid(type1):
318+
type1 = conn.get_records_by_id([type1])[0]['displayName']
319+
if looks_like_rid(type2):
320+
type2 = conn.get_records_by_id([type2])[0]['displayName']
321+
322+
# Get type terms from observed variant
323+
terms1 = []
324+
if strict:
325+
try:
326+
terms1.append(get_term_by_name(conn, type1)['@rid'])
327+
except:
328+
pass
329+
else:
330+
terms1 = get_terms_set(conn, [type1])
331+
332+
# Get type terms from DB variant
333+
terms2 = get_terms_set(conn, [type2])
334+
335+
# Check for intersect
336+
if len(terms2.intersection(terms1)) == 0:
337+
return False
338+
339+
return True
340+
341+
278342
def compare_positional_variants(
343+
conn: GraphKBConnection,
279344
variant: Union[PositionalVariant, ParsedVariant],
280345
reference_variant: Union[PositionalVariant, ParsedVariant],
281346
generic: bool = True,
@@ -378,6 +443,11 @@ def compare_positional_variants(
378443
elif len(variant["refSeq"]) != len(reference_variant["refSeq"]): # type: ignore
379444
return False
380445

446+
# Equivalent types
447+
if variant.get('type') and reference_variant.get('type'):
448+
if not equivalent_types(conn, variant["type"], reference_variant["type"]):
449+
return False
450+
381451
return True
382452

383453

@@ -598,10 +668,14 @@ def match_positional_variant(
598668
):
599669
# TODO: Check if variant and reference_variant should be interchanged
600670
if compare_positional_variants(
601-
variant=parsed, reference_variant=cast(PositionalVariant, row), generic=True
671+
conn,
672+
variant=parsed,
673+
reference_variant=cast(PositionalVariant, row),
674+
generic=True,
602675
):
603676
filtered_similarAndGeneric.append(row)
604677
if compare_positional_variants(
678+
conn,
605679
variant=parsed,
606680
reference_variant=cast(PositionalVariant, row),
607681
generic=False, # Similar variants only

0 commit comments

Comments
 (0)