Skip to content

Commit

Permalink
Updates.
Browse files Browse the repository at this point in the history
  • Loading branch information
DailyDreaming committed Feb 3, 2025
1 parent 7dc1a7c commit 06b7a40
Show file tree
Hide file tree
Showing 11 changed files with 1,173 additions and 234 deletions.
1 change: 0 additions & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ lint:
- ${MAIN_PYTHON_PKG} -m virtualenv venv && . venv/bin/activate && make prepare && make develop extras=[all]
- ${MAIN_PYTHON_PKG} -m pip freeze
- ${MAIN_PYTHON_PKG} --version
- make mypy
- make docs
- check-jsonschema --schemafile https://json.schemastore.org/dependabot-2.0.json .github/dependabot.yml
# - make diff_pydocstyle_report
Expand Down
13 changes: 12 additions & 1 deletion contrib/admin/mypy-with-ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,17 @@ def main():
'src/toil/lib/aws/__init__.py',
'src/toil/server/utils.py',
'src/toil/test',
'src/toil/utils/toilStats.py'
'src/toil/utils/toilStats.py',
'src/toil/server/utils.py',
'src/toil/jobStores/aws/jobStore.py',
'src/toil/jobStores/exceptions.py',
'src/toil/lib/aws/config.py',
'src/toil/lib/aws/s3.py',
'src/toil/lib/retry.py',
'src/toil/lib/pipes.py',
'src/toil/lib/checksum.py',
'src/toil/lib/conversions.py',
'src/toil/lib/iterables.py'
]]

def ignore(file_path):
Expand All @@ -99,6 +109,7 @@ def ignore(file_path):
for file_path in all_files_to_check:
if not ignore(file_path):
filtered_files_to_check.append(file_path)
print(f'Checking: {filtered_files_to_check}')
args = ['mypy', '--color-output', '--show-traceback'] + filtered_files_to_check
p = subprocess.run(args=args)
exit(p.returncode)
Expand Down
205 changes: 110 additions & 95 deletions src/toil/jobStores/aws/jobStore.py

Large diffs are not rendered by default.

78 changes: 78 additions & 0 deletions src/toil/jobStores/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright (C) 2015-2021 Regents of the University of California
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import urllib.parse as urlparse


class InvalidImportExportUrlException(Exception):
def __init__(self, url):
"""
:param urlparse.ParseResult url:
"""
super().__init__("The URL '%s' is invalid." % url.geturl())


class NoSuchJobException(Exception):
"""Indicates that the specified job does not exist."""
def __init__(self, jobStoreID):
"""
:param str jobStoreID: the jobStoreID that was mistakenly assumed to exist
"""
super().__init__("The job '%s' does not exist." % jobStoreID)


class ConcurrentFileModificationException(Exception):
"""Indicates that the file was attempted to be modified by multiple processes at once."""
def __init__(self, jobStoreFileID):
"""
:param str jobStoreFileID: the ID of the file that was modified by multiple workers
or processes concurrently
"""
super().__init__('Concurrent update to file %s detected.' % jobStoreFileID)


class NoSuchFileException(Exception):
"""Indicates that the specified file does not exist."""
def __init__(self, jobStoreFileID, customName=None, *extra):
"""
:param str jobStoreFileID: the ID of the file that was mistakenly assumed to exist
:param str customName: optionally, an alternate name for the nonexistent file
:param list extra: optional extra information to add to the error message
"""
# Having the extra argument may help resolve the __init__() takes at
# most three arguments error reported in
# https://github.com/DataBiosphere/toil/issues/2589#issuecomment-481912211
if customName is None:
message = "File '%s' does not exist." % jobStoreFileID
else:
message = "File '%s' (%s) does not exist." % (customName, jobStoreFileID)

if extra:
# Append extra data.
message += " Extra info: " + " ".join((str(x) for x in extra))

super().__init__(message)


class NoSuchJobStoreException(Exception):
"""Indicates that the specified job store does not exist."""
def __init__(self, locator):
super().__init__("The job store '%s' does not exist, so there is nothing to restart." % locator)


class JobStoreExistsException(Exception):
"""Indicates that the specified job store already exists."""
def __init__(self, locator):
super().__init__(
"The job store '%s' already exists. Use --restart to resume the workflow, or remove "
"the job store with 'toil clean' to start the workflow from scratch." % locator)
22 changes: 22 additions & 0 deletions src/toil/lib/aws/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
S3_PARALLELIZATION_FACTOR = 8
S3_PART_SIZE = 16 * 1024 * 1024
KiB = 1024
MiB = KiB * KiB

# Files must be larger than this before we consider multipart uploads.
AWS_MIN_CHUNK_SIZE = 64 * MiB
# Convenience variable for Boto3 TransferConfig(multipart_threhold=).
MULTIPART_THRESHOLD = AWS_MIN_CHUNK_SIZE + 1
# Maximum number of parts allowed in a multipart upload. This is a limitation imposed by S3.
AWS_MAX_MULTIPART_COUNT = 10000


def get_s3_multipart_chunk_size(filesize: int) -> int:
"""Returns the chunk size of the S3 multipart object, given a file's size in bytes."""
if filesize <= AWS_MAX_MULTIPART_COUNT * AWS_MIN_CHUNK_SIZE:
return AWS_MIN_CHUNK_SIZE
else:
div = filesize // AWS_MAX_MULTIPART_COUNT
if div * AWS_MAX_MULTIPART_COUNT < filesize:
div += 1
return ((div + MiB - 1) // MiB) * MiB
Loading

0 comments on commit 06b7a40

Please sign in to comment.