3131 looks_like_rid ,
3232 stringifyVariant ,
3333)
34- from .vocab import get_equivalent_terms , get_term_tree , get_terms_set
34+ from .vocab import (
35+ get_equivalent_terms ,
36+ get_term_by_name ,
37+ get_term_tree ,
38+ get_terms_set ,
39+ )
3540
3641FEATURES_CACHE : Set [str ] = set ()
3742
@@ -124,50 +129,62 @@ def cache_missing_features(conn: GraphKBConnection) -> None:
124129
125130def match_category_variant (
126131 conn : GraphKBConnection ,
127- gene_name : str ,
132+ reference_name : str ,
128133 category : str ,
129134 root_exclude_term : str = "" ,
130135 gene_source : str = "" ,
131136 gene_is_source_id : bool = False ,
132137 ignore_cache : bool = False ,
138+ reference_class : str = 'Feature' ,
133139) -> List [Variant ]:
134140 """
135141 Returns a list of variants matching the input variant
136142
137143 Args:
138144 conn (GraphKBConnection): the graphkb connection object
139- gene_name (str): the name of the gene the variant is in reference to
145+ reference_name (str): the name of the Feature( gene)/Signature the variant is in reference to
140146 category (str): the variant category (ex. copy loss)
141147 gene_source: The source database the gene is defined by (ex. ensembl)
142148 gene_is_source_id: Indicates the gene name(s) input should be treated as sourceIds not names
149+ reference_class (str): Class name of the variant reference. Default to 'Feature'
143150 Raises:
144151 FeatureNotFoundError: The gene could not be found in GraphKB
145152
146153 Returns:
147154 Array.<dict>: List of variant records from GraphKB which match the input
148155 """
149- # disambiguate the gene to find all equivalent representations
150- features = convert_to_rid_list (
151- get_equivalent_features (
152- conn ,
153- gene_name ,
154- source = gene_source ,
155- is_source_id = gene_is_source_id ,
156- ignore_cache = ignore_cache ,
156+ # disambiguate the reference to find all equivalent representations
157+ references : List [str ] = []
158+ if reference_class == 'Feature' :
159+ references = convert_to_rid_list (
160+ get_equivalent_features (
161+ conn ,
162+ reference_name ,
163+ source = gene_source ,
164+ is_source_id = gene_is_source_id ,
165+ ignore_cache = ignore_cache ,
166+ )
157167 )
158- )
159-
160- if not features :
161- raise FeatureNotFoundError (
162- f"unable to find the gene ({ gene_name } ) or any equivalent representations"
168+ if not references :
169+ raise FeatureNotFoundError (
170+ f"unable to find the gene ({ reference_name } ) or any equivalent representations"
171+ )
172+ if reference_class == 'Signature' :
173+ references = convert_to_rid_list (
174+ get_equivalent_terms (
175+ conn ,
176+ reference_name .lower (),
177+ ontology_class = 'Signature' ,
178+ ignore_cache = ignore_cache ,
179+ )
163180 )
164181
165182 # get the list of terms that we should match
166- terms = convert_to_rid_list (
183+ types = convert_to_rid_list (
167184 get_term_tree (conn , category , root_exclude_term , ignore_cache = ignore_cache )
168185 )
169186
170- if not terms :
187+ if not types :
171188 raise ValueError (f"unable to find the term/category ({ category } ) or any equivalent" )
172189
173190 # find the variant list
@@ -178,8 +195,8 @@ def match_category_variant(
178195 "target" : {
179196 "target" : "CategoryVariant" ,
180197 "filters" : [
181- {"reference1" : features , "operator" : "IN" },
182- {"type" : terms , "operator" : "IN" },
198+ {"reference1" : references , "operator" : "IN" },
199+ {"type" : types , "operator" : "IN" },
183200 ],
184201 },
185202 "queryType" : "similarTo" ,
@@ -275,7 +292,55 @@ def positions_overlap(
275292 return start is None or pos == start
276293
277294
295+ def equivalent_types (
296+ conn : GraphKBConnection ,
297+ type1 : str ,
298+ type2 : str ,
299+ strict : bool = False ,
300+ ) -> bool :
301+ """
302+ Compare 2 variant types to determine if they should match
303+
304+ Args:
305+ conn: the graphkb connection object
306+ type1: type from the observed variant we want to match to the DB
307+ type2: type from the DB variant
308+ strict: wether or not only the specific-to-generic ones are considered.
309+ By default (false), not only specific types can match more generic ones,
310+ but generic types can also match more specific ones.
311+
312+ Returns:
313+ bool: True if the types can be matched
314+ """
315+
316+ # Convert rid to displayName if needed
317+ if looks_like_rid (type1 ):
318+ type1 = conn .get_records_by_id ([type1 ])[0 ]['displayName' ]
319+ if looks_like_rid (type2 ):
320+ type2 = conn .get_records_by_id ([type2 ])[0 ]['displayName' ]
321+
322+ # Get type terms from observed variant
323+ terms1 = []
324+ if strict :
325+ try :
326+ terms1 .append (get_term_by_name (conn , type1 )['@rid' ])
327+ except :
328+ pass
329+ else :
330+ terms1 = get_terms_set (conn , [type1 ])
331+
332+ # Get type terms from DB variant
333+ terms2 = get_terms_set (conn , [type2 ])
334+
335+ # Check for intersect
336+ if len (terms2 .intersection (terms1 )) == 0 :
337+ return False
338+
339+ return True
340+
341+
278342def compare_positional_variants (
343+ conn : GraphKBConnection ,
279344 variant : Union [PositionalVariant , ParsedVariant ],
280345 reference_variant : Union [PositionalVariant , ParsedVariant ],
281346 generic : bool = True ,
@@ -378,6 +443,11 @@ def compare_positional_variants(
378443 elif len (variant ["refSeq" ]) != len (reference_variant ["refSeq" ]): # type: ignore
379444 return False
380445
446+ # Equivalent types
447+ if variant .get ('type' ) and reference_variant .get ('type' ):
448+ if not equivalent_types (conn , variant ["type" ], reference_variant ["type" ]):
449+ return False
450+
381451 return True
382452
383453
@@ -598,10 +668,14 @@ def match_positional_variant(
598668 ):
599669 # TODO: Check if variant and reference_variant should be interchanged
600670 if compare_positional_variants (
601- variant = parsed , reference_variant = cast (PositionalVariant , row ), generic = True
671+ conn ,
672+ variant = parsed ,
673+ reference_variant = cast (PositionalVariant , row ),
674+ generic = True ,
602675 ):
603676 filtered_similarAndGeneric .append (row )
604677 if compare_positional_variants (
678+ conn ,
605679 variant = parsed ,
606680 reference_variant = cast (PositionalVariant , row ),
607681 generic = False , # Similar variants only
0 commit comments