Skip to content

Commit ff8f48d

Browse files
authored
Merge pull request #1549 from BRCAChallenge/enigma_cleanup
Enigma cleanup
2 parents a914f1d + 8af9379 commit ff8f48d

File tree

2 files changed

+25
-13
lines changed

2 files changed

+25
-13
lines changed

pipeline/clinvar/clinvar_common.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def isCurrent(element):
1717
else:
1818
return(rr.text == "current")
1919

20+
2021
def textIfPresent(element, field):
2122
"""Return the text associated with a field under the element, or
2223
None if the field is not present"""
@@ -28,6 +29,7 @@ def textIfPresent(element, field):
2829
else:
2930
return None
3031

32+
3133
def findUniqueElement(name, parent):
3234
"""Find a child element directly or indirectly underneath this parent
3335
element which should occur only once (i.e. there should be no other
@@ -256,13 +258,14 @@ def __init__(self, element, debug=False):
256258
self.element = element
257259
self.condition_type = None
258260
self.condition_value = None
261+
self.condition_category = None
259262
self.condition_db_id = list()
260263
for trait in element.iter("Trait"):
261264
self.condition_type = trait.get("Type")
262265
for name in trait.iter("Name"):
263266
ev = name.find("ElementValue")
264267
if ev.get("Type") == "Preferred":
265-
self.condition_value = ev.text
268+
self.condition_category = ev.text
266269
for xref in trait.iter("XRef"):
267270
if not re.search("Genetic Testing Registry", xref.get("DB")):
268271
xref_string = xref.get("DB") + "_" + xref.get("ID")

pipeline/clinvar/enigma_from_clinvar.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,14 @@
1818
MULTI_ENTRY_SEP = ','
1919

2020

21-
2221
three_letters_aa = re.compile('p.\\(?[A-Z][a-z]{2}[0-9]+[A-Z][a-z]{2}') # e.g. p.(Tyr831SerfsTer9)
22+
2323
def _is_bic_designation(s):
2424
return any(k in s.lower() for k in {'ins', 'del', 'dup'}) or \
2525
(not s.startswith('p.') and '>' in s) or \
2626
(s.startswith('p.') and ':' not in s and three_letters_aa.match(s) is None) # shouldn't match for a BIC designator
2727

2828

29-
#
30-
# 11/27/2024: ClinVar no longer supplies the BIC nomenclature terms in their records
31-
def _fetch_bic(cvs_el):
32-
return default_val
33-
34-
3529
def _compute_protein_changes(hgvs_cdna, hgvs_util):
3630
if hgvs_cdna is not None:
3731
v_protein = hgvs_util.cdna_to_protein(hgvs_cdna, return_str=False)
@@ -53,6 +47,17 @@ def _compute_protein_changes(hgvs_cdna, hgvs_util):
5347
return (default_val, default_val)
5448

5549

50+
def _fetch_bic(va_el):
51+
bic_list = list()
52+
for name_item in va_el.findall("./ClassifiedRecord/SimpleAllele/OtherNameList/Name"):
53+
name = name_item.text
54+
if _is_bic_designation(name):
55+
bic_list.append(name)
56+
if len(bic_list) > 0:
57+
return '|'.join(bic_list)
58+
return default_val
59+
60+
5661

5762

5863

@@ -79,8 +84,8 @@ def _parse_engima_assertion(enigma_assertion, hgvs_util):
7984
rec["Assertion_method"] = enigma_assertion.assertionMethod
8085
rec["Assertion_method_citation"] = enigma_assertion.assertionMethodCitation
8186
rec["Comment_on_clinical_significance"] = enigma_assertion.summaryEvidence
82-
rec["Collection_method"] = ','.join(enigma_assertion.method.capitalize())
83-
rec["Allele_origin"] = ','.join(enigma_assertion.origin.capitalize())
87+
rec["Collection_method"] = ','.join(enigma_assertion.method).capitalize()
88+
rec["Allele_origin"] = ','.join(enigma_assertion.origin).capitalize()
8489
rec["ClinVarAccession"] = "%s.%s" % (enigma_assertion.accession,
8590
enigma_assertion.accession_version)
8691
return rec
@@ -155,13 +160,17 @@ def parse_record(va_el, hgvs_util, symbols, mane_transcript,
155160
rec["Reference_sequence"] = transcript.split(":")[0]
156161
rec["HGVS_cDNA"] = transcript.split(":")[1]
157162
rec["Condition_ID_type"] = va.classification.condition_type
158-
if va.classification_condition_value == None:
163+
if va.classification.condition_value == None:
159164
rec["Condition_ID_value"] = "not provided"
160165
else:
161166
rec["Condition_ID_value"] = va.classification.condition_value
162167
#
163-
# 11/25/2024: the trait set / condition category has been deprecated (is no longer displayed)
164-
rec["Condition_category"] = default_val
168+
# 2/28/25: it seems counter-intuitive that the condition category
169+
# comes from the ClinVar condition type field, while the condition
170+
# type is blank, but that's the way it is. FWIW, neither of these
171+
# fields are actually displayed.
172+
rec["Condition_category"] = va.classification.condition_type
173+
rec["Condition_type"] = default_val
165174
for scv_accession in va.otherAssertions.keys():
166175
oa = va.otherAssertions[scv_accession]
167176
if oa.reviewStatus == "reviewed by expert panel":

0 commit comments

Comments
 (0)