@@ -329,7 +329,7 @@ def pept_diff(p1, p2):
329
329
return sum ([p1 [i ] != p2 [i ] for i in range (len (p1 ))])
330
330
331
331
332
- def _get_normal_peptides (mhc_df , iars , peplen ):
332
+ def _get_normal_peptides (job , mhc_df , iars , peplen ):
333
333
"""
334
334
Get the corresponding normal peptides for the tumor peptides that have already been subjected to
335
335
mhc:peptide binding prediction.
@@ -345,38 +345,42 @@ def _get_normal_peptides(mhc_df, iars, peplen):
345
345
for pred in mhc_df .itertuples ():
346
346
containing_iars = [i for i , sl in iars .items () if pred .pept in sl [0 ]]
347
347
assert len (containing_iars ) != 0 , "No IARS contained the peptide"
348
- if len (containing_iars ) > 1 :
349
- # If there are multiple IARs, they all or none of them have to have a corresponding
350
- # normal.
351
- assert len (set ([len (y ) for x , y in iars .items () if x in containing_iars ])) == 1
352
348
if len (iars [containing_iars [0 ]]) == 1 :
353
349
# This is a fusion and has no corresponding normal
354
350
normal_peptides .append ('N' * peplen )
355
351
else :
356
- tum , norm = iars [ containing_iars [ 0 ]]
357
- pos = tum . find ( pred . pept )
358
- temp_normal_pept = norm [ pos : pos + peplen ]
359
- ndiff = pept_diff ( pred . pept , temp_normal_pept )
360
- assert ndiff != 0
361
- if ndiff == 1 :
362
- normal_peptides .append (norm [ pos : pos + peplen ] )
352
+ # If there are multiple IARs, they all or none of them have to have a corresponding
353
+ # normal.
354
+ if len ( set ([ len ( y ) for x , y in iars . items () if x in containing_iars ])) != 1 :
355
+ job . fileStore . logToMaster ( 'Some IARS were found to contain the substring but were'
356
+ 'inconsistent with the presence of a corresponding '
357
+ 'normal.' )
358
+ normal_peptides .append ('N' * peplen )
363
359
else :
364
- if len (tum ) == len (norm ):
365
- # Too (2+) many single nucleotide changes to warrant having a normal counterpart
366
- # This might be an artifact
367
- normal_peptides .append ('N' * peplen )
360
+ tum , norm = iars [containing_iars [0 ]]
361
+ pos = tum .find (pred .pept )
362
+ temp_normal_pept = norm [pos :pos + peplen ]
363
+ ndiff = pept_diff (pred .pept , temp_normal_pept )
364
+ assert ndiff != 0
365
+ if ndiff == 1 :
366
+ normal_peptides .append (norm [pos :pos + peplen ])
368
367
else :
369
- # There is an indel in play. The difference cannot be in the last AA as that
370
- # would have come out properly in the first case. There is a possibility that
371
- # the indel was in the first AA causing a shift. We can handle that by looking
372
- # at the suffix.
373
- pos = norm .find (pred .pept [1 :])
374
- if pos != - 1 :
375
- # The suffix was found,
376
- normal_peptides .append (norm [pos - 1 :pos + peplen ])
377
- else :
378
- # The indel was too large to warrant having a normal counterpart
368
+ if len (tum ) == len (norm ):
369
+ # Too (2+) many single nucleotide changes to warrant having a normal
370
+ # counterpart. This might be an artifact
379
371
normal_peptides .append ('N' * peplen )
372
+ else :
373
+ # There is an indel in play. The difference cannot be in the last AA as that
374
+ # would have come out properly in the first case. There is a possibility
375
+ # that the indel was in the first AA causing a shift. We can handle that by
376
+ # looking at the suffix.
377
+ pos = norm .find (pred .pept [1 :])
378
+ if pos != - 1 :
379
+ # The suffix was found,
380
+ normal_peptides .append (norm [pos - 1 :pos + peplen ])
381
+ else :
382
+ # The indel was too large to warrant having a normal counterpart
383
+ normal_peptides .append ('N' * peplen )
380
384
381
385
mhc_df ['normal_pept' ] = normal_peptides
382
386
return mhc_df , normal_peptides
@@ -417,7 +421,7 @@ def predict_normal_binding(job, binding_result, transgened_files, allele, peplen
417
421
'predictor' : None }
418
422
elif predictor == 'Consensus' :
419
423
results = _process_consensus_mhcii (mhc_file )
420
- results , peptides = _get_normal_peptides (results , iars , peplen )
424
+ results , peptides = _get_normal_peptides (job , results , iars , peplen )
421
425
with open ('peptides.faa' , 'w' ) as pfile :
422
426
for pept in peptides :
423
427
print ('>' , pept , '\n ' , pept , sep = '' , file = pfile )
@@ -431,7 +435,7 @@ def predict_normal_binding(job, binding_result, transgened_files, allele, peplen
431
435
'predictor' : 'Consensus' }
432
436
elif predictor == 'Sturniolo' :
433
437
results = _process_sturniolo_mhcii (mhc_file )
434
- results , peptides = _get_normal_peptides (results , iars , peplen )
438
+ results , peptides = _get_normal_peptides (job , results , iars , peplen )
435
439
with open ('peptides.faa' , 'w' ) as pfile :
436
440
for pept in peptides :
437
441
print ('>' , pept , '\n ' , pept , sep = '' , file = pfile )
@@ -445,7 +449,7 @@ def predict_normal_binding(job, binding_result, transgened_files, allele, peplen
445
449
'predictor' : 'Sturniolo' }
446
450
elif predictor == 'netMHCIIpan' :
447
451
results = _process_net_mhcii (mhc_file )
448
- results , peptides = _get_normal_peptides (results , iars , peplen )
452
+ results , peptides = _get_normal_peptides (job , results , iars , peplen )
449
453
with open ('peptides.faa' , 'w' ) as pfile :
450
454
for pept in peptides :
451
455
print ('>' , pept , '\n ' , pept , sep = '' , file = pfile )
@@ -465,7 +469,7 @@ def predict_normal_binding(job, binding_result, transgened_files, allele, peplen
465
469
mhc_file = job .fileStore .readGlobalFile (binding_result ,
466
470
os .path .join (work_dir , 'mhci_results' ))
467
471
results = _process_mhci (mhc_file )
468
- results , peptides = _get_normal_peptides (results , iars , peplen )
472
+ results , peptides = _get_normal_peptides (job , results , iars , peplen )
469
473
with open ('peptides.faa' , 'w' ) as pfile :
470
474
for pept in peptides :
471
475
print ('>' , pept , '\n ' , pept , sep = '' , file = pfile )
0 commit comments