1818MULTI_ENTRY_SEP = ','
1919
2020
21-
2221three_letters_aa = re .compile ('p.\\ (?[A-Z][a-z]{2}[0-9]+[A-Z][a-z]{2}' ) # e.g. p.(Tyr831SerfsTer9)
22+
2323def _is_bic_designation (s ):
2424 return any (k in s .lower () for k in {'ins' , 'del' , 'dup' }) or \
2525 (not s .startswith ('p.' ) and '>' in s ) or \
2626 (s .startswith ('p.' ) and ':' not in s and three_letters_aa .match (s ) is None ) # shouldn't match for a BIC designator
2727
2828
29- #
30- # 11/27/2024: ClinVar no longer supplies the BIC nomenclature terms in their records
31- def _fetch_bic (cvs_el ):
32- return default_val
33-
34-
3529def _compute_protein_changes (hgvs_cdna , hgvs_util ):
3630 if hgvs_cdna is not None :
3731 v_protein = hgvs_util .cdna_to_protein (hgvs_cdna , return_str = False )
@@ -53,6 +47,17 @@ def _compute_protein_changes(hgvs_cdna, hgvs_util):
5347 return (default_val , default_val )
5448
5549
50+ def _fetch_bic (va_el ):
51+ bic_list = list ()
52+ for name_item in va_el .findall ("./ClassifiedRecord/SimpleAllele/OtherNameList/Name" ):
53+ name = name_item .text
54+ if _is_bic_designation (name ):
55+ bic_list .append (name )
56+ if len (bic_list ) > 0 :
57+ return '|' .join (bic_list )
58+ return default_val
59+
60+
5661
5762
5863
@@ -79,8 +84,8 @@ def _parse_engima_assertion(enigma_assertion, hgvs_util):
7984 rec ["Assertion_method" ] = enigma_assertion .assertionMethod
8085 rec ["Assertion_method_citation" ] = enigma_assertion .assertionMethodCitation
8186 rec ["Comment_on_clinical_significance" ] = enigma_assertion .summaryEvidence
82- rec ["Collection_method" ] = ',' .join (enigma_assertion .method .capitalize () )
83- rec ["Allele_origin" ] = ',' .join (enigma_assertion .origin .capitalize () )
87+ rec ["Collection_method" ] = ',' .join (enigma_assertion .method ) .capitalize ()
88+ rec ["Allele_origin" ] = ',' .join (enigma_assertion .origin ) .capitalize ()
8489 rec ["ClinVarAccession" ] = "%s.%s" % (enigma_assertion .accession ,
8590 enigma_assertion .accession_version )
8691 return rec
@@ -155,13 +160,17 @@ def parse_record(va_el, hgvs_util, symbols, mane_transcript,
155160 rec ["Reference_sequence" ] = transcript .split (":" )[0 ]
156161 rec ["HGVS_cDNA" ] = transcript .split (":" )[1 ]
157162 rec ["Condition_ID_type" ] = va .classification .condition_type
158- if va .classification_condition_value == None :
163+ if va .classification . condition_value == None :
159164 rec ["Condition_ID_value" ] = "not provided"
160165 else :
161166 rec ["Condition_ID_value" ] = va .classification .condition_value
162167 #
163- # 11/25/2024: the trait set / condition category has been deprecated (is no longer displayed)
164- rec ["Condition_category" ] = default_val
168+ # 2/28/25: it seems counter-intuitive that the condition category
169+ # comes from the ClinVar condition type field, while the condition
170+ # type is blank, but that's the way it is. FWIW, neither of these
171+ # fields are actually displayed.
172+ rec ["Condition_category" ] = va .classification .condition_type
173+ rec ["Condition_type" ] = default_val
165174 for scv_accession in va .otherAssertions .keys ():
166175 oa = va .otherAssertions [scv_accession ]
167176 if oa .reviewStatus == "reviewed by expert panel" :
0 commit comments