Skip to content

Commit 0631068

Browse files
authored
Merge pull request #285 from /issues/284-fix-iars-without-normal-event
Fix event with inconsistency in normals with many IARS(resolves #284)
2 parents 101565a + 3e195d9 commit 0631068

File tree

1 file changed

+34
-30
lines changed

1 file changed

+34
-30
lines changed

src/protect/binding_prediction/common.py

Lines changed: 34 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ def pept_diff(p1, p2):
329329
return sum([p1[i] != p2[i] for i in range(len(p1))])
330330

331331

332-
def _get_normal_peptides(mhc_df, iars, peplen):
332+
def _get_normal_peptides(job, mhc_df, iars, peplen):
333333
"""
334334
Get the corresponding normal peptides for the tumor peptides that have already been subjected to
335335
mhc:peptide binding prediction.
@@ -345,38 +345,42 @@ def _get_normal_peptides(mhc_df, iars, peplen):
345345
for pred in mhc_df.itertuples():
346346
containing_iars = [i for i, sl in iars.items() if pred.pept in sl[0]]
347347
assert len(containing_iars) != 0, "No IARS contained the peptide"
348-
if len(containing_iars) > 1:
349-
# If there are multiple IARs, they all or none of them have to have a corresponding
350-
# normal.
351-
assert len(set([len(y) for x, y in iars.items() if x in containing_iars])) == 1
352348
if len(iars[containing_iars[0]]) == 1:
353349
# This is a fusion and has no corresponding normal
354350
normal_peptides.append('N' * peplen)
355351
else:
356-
tum, norm = iars[containing_iars[0]]
357-
pos = tum.find(pred.pept)
358-
temp_normal_pept = norm[pos:pos + peplen]
359-
ndiff = pept_diff(pred.pept, temp_normal_pept)
360-
assert ndiff != 0
361-
if ndiff == 1:
362-
normal_peptides.append(norm[pos:pos + peplen])
352+
# If there are multiple IARs, they all or none of them have to have a corresponding
353+
# normal.
354+
if len(set([len(y) for x, y in iars.items() if x in containing_iars])) != 1:
355+
job.fileStore.logToMaster('Some IARS were found to contain the substring but were'
356+
'inconsistent with the presence of a corresponding '
357+
'normal.')
358+
normal_peptides.append('N' * peplen)
363359
else:
364-
if len(tum) == len(norm):
365-
# Too (2+) many single nucleotide changes to warrant having a normal counterpart
366-
# This might be an artifact
367-
normal_peptides.append('N' * peplen)
360+
tum, norm = iars[containing_iars[0]]
361+
pos = tum.find(pred.pept)
362+
temp_normal_pept = norm[pos:pos + peplen]
363+
ndiff = pept_diff(pred.pept, temp_normal_pept)
364+
assert ndiff != 0
365+
if ndiff == 1:
366+
normal_peptides.append(norm[pos:pos + peplen])
368367
else:
369-
# There is an indel in play. The difference cannot be in the last AA as that
370-
# would have come out properly in the first case. There is a possibility that
371-
# the indel was in the first AA causing a shift. We can handle that by looking
372-
# at the suffix.
373-
pos = norm.find(pred.pept[1:])
374-
if pos != -1:
375-
# The suffix was found,
376-
normal_peptides.append(norm[pos-1:pos + peplen])
377-
else:
378-
# The indel was too large to warrant having a normal counterpart
368+
if len(tum) == len(norm):
369+
# Too (2+) many single nucleotide changes to warrant having a normal
370+
# counterpart. This might be an artifact
379371
normal_peptides.append('N' * peplen)
372+
else:
373+
# There is an indel in play. The difference cannot be in the last AA as that
374+
# would have come out properly in the first case. There is a possibility
375+
# that the indel was in the first AA causing a shift. We can handle that by
376+
# looking at the suffix.
377+
pos = norm.find(pred.pept[1:])
378+
if pos != -1:
379+
# The suffix was found,
380+
normal_peptides.append(norm[pos-1:pos + peplen])
381+
else:
382+
# The indel was too large to warrant having a normal counterpart
383+
normal_peptides.append('N' * peplen)
380384

381385
mhc_df['normal_pept'] = normal_peptides
382386
return mhc_df, normal_peptides
@@ -417,7 +421,7 @@ def predict_normal_binding(job, binding_result, transgened_files, allele, peplen
417421
'predictor': None}
418422
elif predictor == 'Consensus':
419423
results = _process_consensus_mhcii(mhc_file)
420-
results, peptides = _get_normal_peptides(results, iars, peplen)
424+
results, peptides = _get_normal_peptides(job, results, iars, peplen)
421425
with open('peptides.faa', 'w') as pfile:
422426
for pept in peptides:
423427
print('>', pept, '\n', pept, sep='', file=pfile)
@@ -431,7 +435,7 @@ def predict_normal_binding(job, binding_result, transgened_files, allele, peplen
431435
'predictor': 'Consensus'}
432436
elif predictor == 'Sturniolo':
433437
results = _process_sturniolo_mhcii(mhc_file)
434-
results, peptides = _get_normal_peptides(results, iars, peplen)
438+
results, peptides = _get_normal_peptides(job, results, iars, peplen)
435439
with open('peptides.faa', 'w') as pfile:
436440
for pept in peptides:
437441
print('>', pept, '\n', pept, sep='', file=pfile)
@@ -445,7 +449,7 @@ def predict_normal_binding(job, binding_result, transgened_files, allele, peplen
445449
'predictor': 'Sturniolo'}
446450
elif predictor == 'netMHCIIpan':
447451
results = _process_net_mhcii(mhc_file)
448-
results, peptides = _get_normal_peptides(results, iars, peplen)
452+
results, peptides = _get_normal_peptides(job, results, iars, peplen)
449453
with open('peptides.faa', 'w') as pfile:
450454
for pept in peptides:
451455
print('>', pept, '\n', pept, sep='', file=pfile)
@@ -465,7 +469,7 @@ def predict_normal_binding(job, binding_result, transgened_files, allele, peplen
465469
mhc_file = job.fileStore.readGlobalFile(binding_result,
466470
os.path.join(work_dir, 'mhci_results'))
467471
results = _process_mhci(mhc_file)
468-
results, peptides = _get_normal_peptides(results, iars, peplen)
472+
results, peptides = _get_normal_peptides(job, results, iars, peplen)
469473
with open('peptides.faa', 'w') as pfile:
470474
for pept in peptides:
471475
print('>', pept, '\n', pept, sep='', file=pfile)

0 commit comments

Comments
 (0)