Skip to content

Commit

Permalink
fixed issues report in #8
Browse files Browse the repository at this point in the history
  • Loading branch information
jakeb1996 committed Jan 11, 2024
1 parent bb50af9 commit cacfbfc
Showing 1 changed file with 22 additions and 13 deletions.
35 changes: 22 additions & 13 deletions src/crackling/Crackling.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def processSequence(sequence):

duplicatePercent = round(numDuplicateGuides / numIdentifiedGuides * 100.0, 3)
printer(f'\tIdentified {numIdentifiedGuides:,} possible target sites in this file.')
printer(f'\tOf these, {len(duplicateGuides):,} are not unique. These sites occur a total of {numDuplicateGuides} times.')
printer(f'\tOf these, {len(duplicateGuides):,} are not unique. These sites occur a total of {numDuplicateGuides:,} times.')
printer(f'\tRemoving {numDuplicateGuides:,} of {numIdentifiedGuides:,} ({duplicatePercent}%) guides.')
printer(f'\t{len(candidateGuides):,} distinct guides have been discovered so far.')

Expand All @@ -277,7 +277,7 @@ def processSequence(sequence):
for batchFile in guideBatchinator:
batchStartTime = time.time()

printer(f'Processing batch file {(batchFileId+1):,} of {len(guideBatchinator)}')
printer(f'Processing batch file {(batchFileId+1):,} of {len(guideBatchinator):,}')

# Create new candidate guide dictionary
candidateGuides = {}
Expand Down Expand Up @@ -401,14 +401,17 @@ def processSequence(sequence):
errorCount = 0
notFoundCount = 0

pgLength = int(configMngr['rnafold']['page-length'])
pgLength = min(
int(configMngr['input']['batch-size']),
int(configMngr['rnafold']['page-length'])
)

for pgIdx, pageCandidateGuides in Paginator(
filterCandidateGuides(candidateGuides, MODULE_MM10DB),
pgLength
):
if pgLength > 0:
printer(f'\tProcessing page {(pgIdx+1)} ({pgLength:,} per page).')
printer(f'\tProcessing page {(pgIdx+1):,} (max {pgLength:,} per page).')

if os.path.exists(configMngr['rnafold']['output']):
os.remove(configMngr['rnafold']['output'])
Expand Down Expand Up @@ -501,10 +504,10 @@ def processSequence(sequence):
printer(f'\t{failedCount:,} of {testedCount:,} failed here.')

if errorCount > 0:
printer(f'\t{errorCount} of {testedCount} erred here.')
printer(f'\t{errorCount:,} of {testedCount:,} erred here.')

if notFoundCount > 0:
printer(f'\t{notFoundCount} of {testedCount} not found in RNAfold output.')
printer(f'\t{notFoundCount:,} of {testedCount:,} not found in RNAfold output.')

#########################################
## Calc mm10db result ##
Expand All @@ -529,9 +532,9 @@ def processSequence(sequence):
candidateGuides[target23]['acceptedByMm10db'] = CODE_ACCEPTED
acceptedCount += 1

printer(f'\t{acceptedCount} accepted.')
printer(f'\t{acceptedCount:,} accepted.')

printer(f'\t{failedCount} failed.')
printer(f'\t{failedCount:,} failed.')

del acceptedCount

Expand Down Expand Up @@ -606,15 +609,18 @@ def processSequence(sequence):
testedCount = 0
failedCount = 0

pgLength = int(configMngr['bowtie2']['page-length'])
pgLength = min(
int(configMngr['input']['batch-size']),
int(configMngr['bowtie2']['page-length'])
)

for pgIdx, pageCandidateGuides in Paginator(
filterCandidateGuides(candidateGuides, MODULE_SPECIFICITY),
pgLength
):

if pgLength > 0:
printer(f'\tProcessing page {(pgIdx+1)} ({pgLength:,} per page).')
printer(f'\tProcessing page {(pgIdx+1):,} (max {pgLength:,} per page).')

if os.path.exists(configMngr['bowtie2']['output']):
os.remove(configMngr['bowtie2']['output'])
Expand Down Expand Up @@ -732,15 +738,18 @@ def processSequence(sequence):
testedCount = 0
failedCount = 0

pgLength = int(configMngr['offtargetscore']['page-length'])
pgLength = min(
int(configMngr['input']['batch-size']),
int(configMngr['offtargetscore']['page-length'])
)

for pgIdx, pageCandidateGuides in Paginator(
filterCandidateGuides(candidateGuides, MODULE_SPECIFICITY),
pgLength
):

if pgLength > 0:
printer(f'\tProcessing page {(pgIdx+1)} ({pgLength:,} per page).')
printer(f'\tProcessing page {(pgIdx+1):,} (max {pgLength:,} per page).')

# prepare the list of candidate guides to score
guidesInPage = 0
Expand Down Expand Up @@ -873,7 +882,7 @@ def processSequence(sequence):
#########################################
printer('Done.')

printer(f'{len(candidateGuides)} guides evaluated.')
printer(f'{len(candidateGuides):,} guides evaluated.')

printer('This batch ran in {} (dd hh:mm:ss) or {} seconds'.format(
time.strftime('%d %H:%M:%S', time.gmtime((time.time() - batchStartTime))),
Expand Down

0 comments on commit cacfbfc

Please sign in to comment.