Skip to content

Commit

Permalink
Add size validation option and workflow for monthly minor releases
Browse files Browse the repository at this point in the history
  • Loading branch information
aws-tianquaw committed Apr 12, 2024
1 parent 19d7553 commit 1ddddf3
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 2 deletions.
36 changes: 36 additions & 0 deletions .github/workflows/check-image-size.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Check Image Size of Current Release
on:
# Manually call
workflow_dispatch:
inputs:
image-version:
required: true
description: Image version=
# Call from other workflow
workflow_call:
inputs:
image-version:
type: string
required: true

jobs:
check-image-size:
name: Check image size
runs-on: ubuntu-latest
if: github.repository == 'aws/sagemaker-distribution'
permissions:
pull-requests: write
contents: write
steps:
- uses: actions/checkout@v4
- uses: mamba-org/setup-micromamba@v1
with:
environment-file: ./environment.yml
environment-name: sagemaker-distribution
init-shell: bash
- name: Free up disk space
run: rm -rf /opt/hostedtoolcache
- name: Activate sagemaker-distribution
run: micromamba activate sagemaker-distribution
- name: Run size validation
run: python ./src/main.py generate-size-report --target-patch-version ${{ inputs.image-version }} --validate
12 changes: 12 additions & 0 deletions .github/workflows/monthly-minor-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,15 @@ jobs:
with:
release-type: "minor"
base-version: ${{ matrix.version }}
check-image-size:
name: Check Image Size
needs: start-monthly-minor
permissions:
pull-requests: write
contents: write
strategy:
matrix: ${{ fromJson(needs.generate-version-matrix.outputs.matrix) }}
fail-fast: false
uses: aws/sagemaker-distribution/.github/workflows/check-image-size.yml@main
with:
base-version: ${{ matrix.version }}
5 changes: 5 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,11 @@ def get_arg_parser():
required=True,
help="Specify the target patch version for which the package size report needs to be " "generated.",
)
package_size_parser.add_argument(
"--validate",
action="store_true",
help="Validate package size delta and raise error if the validation failed.",
)
return parser


Expand Down
49 changes: 47 additions & 2 deletions src/package_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,42 @@ def _get_installed_package_versions_and_conda_versions(
return target_packages_match_spec_out, latest_package_versions_in_upstream


def _validate_new_package_size(new_package_total_size, target_total_size, image_type, target_version):
# Validate if the new packages account for <= 5% of the total python package size of target image.
new_package_total_size_percent_threshold = 5
validate_result = None
new_package_total_size_percent = round(new_package_total_size / target_total_size * 100, 2)
new_package_total_size_percent_string = str(new_package_total_size_percent)
if new_package_total_size_percent > new_package_total_size_percent_threshold:
validate_result = (
"The total size of newly introduced Python packages accounts for more than "
+ str(new_package_total_size_percent_threshold)
+ "% of the total Python package size of "
+ image_type
+ " image, version "
+ str(target_version)
+ "! ("
+ str(new_package_total_size_percent)
+ "%)"
)
new_package_total_size_percent_string = "${\color{red}" + str(new_package_total_size_percent) + "}$"

print(
"The total size of newly introduced Python packages is "
+ sizeof_fmt(new_package_total_size)
+ ", accounts for "
+ new_package_total_size_percent_string
+ "% of the total package size."
)
return validate_result


def _generate_python_package_size_report_per_image(
base_pkg_metadata, target_pkg_metadata, image_config, base_version, target_version
):
print("\n# Python Package Size Report " + "(" + image_config["image_type"].upper() + ")\n")
validate_result = None
image_type = image_config["image_type"].upper()
print("\n# Python Package Size Report " + "(" + image_type + ")\n")
print("\n### Target Image Version: " + str(target_version) + " | Base Image Version: " + str(base_version) + "\n")
if not base_pkg_metadata or not base_version:
print("WARNING: No Python package metadata file found for base image, only partial results will be shown.")
Expand Down Expand Up @@ -148,6 +180,7 @@ def _generate_python_package_size_report_per_image(
# Print out the size delta for each changed/new package in the image, sorted decending by size.
if base_pkg_metadata:
print("\n## Python Package Size Delta\n")
new_package_total_size = 0
package_size_delta_list = []
for k, v in target_pkg_metadata.items():
if k not in base_pkg_metadata or base_pkg_metadata[k]["version"] != v["version"]:
Expand All @@ -162,12 +195,17 @@ def _generate_python_package_size_report_per_image(
"size_delta_rel": (size_delta_abs / base_pkg_size) if base_pkg_size else None,
}
)
if k not in base_pkg_metadata:
new_package_total_size += v["size"]
# Sort the package size delta based on absolute size diff in decending order.
package_size_delta_list = sorted(package_size_delta_list, key=lambda item: item["size_delta_abs"], reverse=True)
for v in package_size_delta_list:
v["size_delta_rel"] = str(round(v["size_delta_rel"] * 100, 2)) if v["size_delta_rel"] else "-"
v["size_delta_abs"] = sizeof_fmt(v["size_delta_abs"])

validate_result = _validate_new_package_size(
new_package_total_size, target_total_size, image_type, target_version
)
print(
create_markdown_table(
[
Expand All @@ -180,6 +218,7 @@ def _generate_python_package_size_report_per_image(
package_size_delta_list,
)
)
return validate_result


def generate_package_staleness_report(args):
Expand All @@ -206,10 +245,16 @@ def generate_package_size_report(args):
source_patch_version = f.readline()
base_version = get_semver(source_patch_version)
base_version_dir = get_dir_for_version(base_version) if base_version else None
validate_results = []
for image_config in _image_generator_configs:
base_pkg_metadata = pull_conda_package_metadata(image_config, base_version_dir) if base_version else None
target_pkg_metadata = pull_conda_package_metadata(image_config, target_version_dir)

_generate_python_package_size_report_per_image(
validate_result = _generate_python_package_size_report_per_image(
base_pkg_metadata, target_pkg_metadata, image_config, base_version, target_version
)
if validate_result:
validate_results.append(validate_result)

if args.validate and validate_results:
raise Exception(f"Size Validation Failed! Issues found: {validate_results}")
6 changes: 6 additions & 0 deletions test/test_package_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,12 @@ def test_generate_package_size_report(capsys):
assert "libclang|18.1.2|18.38MB" in captured.out
assert "tqdm|4.66.2|87.47KB" in captured.out

# Assert size validation message
assert (
"The total size of newly introduced Python packages is 18.38MB, accounts for ${\color{red}21.39}$% of the total package size."
in captured.out
)


def test_generate_package_size_report_when_base_version_is_not_present(capsys):
target_pkg_metadata = _create_target_image_package_metadata()
Expand Down

0 comments on commit 1ddddf3

Please sign in to comment.