diff --git a/.github/workflows/check-image-size.yml b/.github/workflows/check-image-size.yml new file mode 100644 index 00000000..62cb0b0c --- /dev/null +++ b/.github/workflows/check-image-size.yml @@ -0,0 +1,36 @@ +name: Check Image Size of Current Release +on: + # Manually call + workflow_dispatch: + inputs: + image-version: + required: true + description: Image version= + # Call from other workflow + workflow_call: + inputs: + image-version: + type: string + required: true + +jobs: + check-image-size: + name: Check image size + runs-on: ubuntu-latest + if: github.repository == 'aws/sagemaker-distribution' + permissions: + pull-requests: write + contents: write + steps: + - uses: actions/checkout@v4 + - uses: mamba-org/setup-micromamba@v1 + with: + environment-file: ./environment.yml + environment-name: sagemaker-distribution + init-shell: bash + - name: Free up disk space + run: rm -rf /opt/hostedtoolcache + - name: Activate sagemaker-distribution + run: micromamba activate sagemaker-distribution + - name: Run size validation + run: python ./src/main.py generate-size-report --target-patch-version ${{ inputs.image-version }} --validate diff --git a/.github/workflows/monthly-minor-release.yml b/.github/workflows/monthly-minor-release.yml index c10bf91a..2de953bb 100644 --- a/.github/workflows/monthly-minor-release.yml +++ b/.github/workflows/monthly-minor-release.yml @@ -43,3 +43,15 @@ jobs: with: release-type: "minor" base-version: ${{ matrix.version }} + check-image-size: + name: Check Image Size + needs: start-monthly-minor + permissions: + pull-requests: write + contents: write + strategy: + matrix: ${{ fromJson(needs.generate-version-matrix.outputs.matrix) }} + fail-fast: false + uses: aws/sagemaker-distribution/.github/workflows/check-image-size.yml@main + with: + base-version: ${{ matrix.version }} diff --git a/src/main.py b/src/main.py index f1a39361..0b59f3cf 100644 --- a/src/main.py +++ b/src/main.py @@ -412,6 +412,11 @@ def get_arg_parser(): required=True, help="Specify the target patch version for which the package size report needs to be " "generated.", ) + package_size_parser.add_argument( + "--validate", + action="store_true", + help="Validate package size delta and raise error if the validation failed.", + ) return parser diff --git a/src/package_report.py b/src/package_report.py index 08af88cc..7258a98d 100644 --- a/src/package_report.py +++ b/src/package_report.py @@ -105,10 +105,42 @@ def _get_installed_package_versions_and_conda_versions( return target_packages_match_spec_out, latest_package_versions_in_upstream +def _validate_new_package_size(new_package_total_size, target_total_size, image_type, target_version): + # Validate if the new packages account for <= 5% of the total python package size of target image. + new_package_total_size_percent_threshold = 5 + validate_result = None + new_package_total_size_percent = round(new_package_total_size / target_total_size * 100, 2) + new_package_total_size_percent_string = str(new_package_total_size_percent) + if new_package_total_size_percent > new_package_total_size_percent_threshold: + validate_result = ( + "The total size of newly introduced Python packages accounts for more than " + + str(new_package_total_size_percent_threshold) + + "% of the total Python package size of " + + image_type + + " image, version " + + str(target_version) + + "! (" + + str(new_package_total_size_percent) + + "%)" + ) + new_package_total_size_percent_string = "${\color{red}" + str(new_package_total_size_percent) + "}$" + + print( + "The total size of newly introduced Python packages is " + + sizeof_fmt(new_package_total_size) + + ", accounts for " + + new_package_total_size_percent_string + + "% of the total package size." + ) + return validate_result + + def _generate_python_package_size_report_per_image( base_pkg_metadata, target_pkg_metadata, image_config, base_version, target_version ): - print("\n# Python Package Size Report " + "(" + image_config["image_type"].upper() + ")\n") + validate_result = None + image_type = image_config["image_type"].upper() + print("\n# Python Package Size Report " + "(" + image_type + ")\n") print("\n### Target Image Version: " + str(target_version) + " | Base Image Version: " + str(base_version) + "\n") if not base_pkg_metadata or not base_version: print("WARNING: No Python package metadata file found for base image, only partial results will be shown.") @@ -148,6 +180,7 @@ def _generate_python_package_size_report_per_image( # Print out the size delta for each changed/new package in the image, sorted decending by size. if base_pkg_metadata: print("\n## Python Package Size Delta\n") + new_package_total_size = 0 package_size_delta_list = [] for k, v in target_pkg_metadata.items(): if k not in base_pkg_metadata or base_pkg_metadata[k]["version"] != v["version"]: @@ -162,12 +195,17 @@ def _generate_python_package_size_report_per_image( "size_delta_rel": (size_delta_abs / base_pkg_size) if base_pkg_size else None, } ) + if k not in base_pkg_metadata: + new_package_total_size += v["size"] # Sort the package size delta based on absolute size diff in decending order. package_size_delta_list = sorted(package_size_delta_list, key=lambda item: item["size_delta_abs"], reverse=True) for v in package_size_delta_list: v["size_delta_rel"] = str(round(v["size_delta_rel"] * 100, 2)) if v["size_delta_rel"] else "-" v["size_delta_abs"] = sizeof_fmt(v["size_delta_abs"]) + validate_result = _validate_new_package_size( + new_package_total_size, target_total_size, image_type, target_version + ) print( create_markdown_table( [ @@ -180,6 +218,7 @@ def _generate_python_package_size_report_per_image( package_size_delta_list, ) ) + return validate_result def generate_package_staleness_report(args): @@ -206,10 +245,16 @@ def generate_package_size_report(args): source_patch_version = f.readline() base_version = get_semver(source_patch_version) base_version_dir = get_dir_for_version(base_version) if base_version else None + validate_results = [] for image_config in _image_generator_configs: base_pkg_metadata = pull_conda_package_metadata(image_config, base_version_dir) if base_version else None target_pkg_metadata = pull_conda_package_metadata(image_config, target_version_dir) - _generate_python_package_size_report_per_image( + validate_result = _generate_python_package_size_report_per_image( base_pkg_metadata, target_pkg_metadata, image_config, base_version, target_version ) + if validate_result: + validate_results.append(validate_result) + + if args.validate and validate_results: + raise Exception(f"Size Validation Failed! Issues found: {validate_results}") diff --git a/test/test_package_report.py b/test/test_package_report.py index 3602282d..c59c5e72 100644 --- a/test/test_package_report.py +++ b/test/test_package_report.py @@ -139,6 +139,12 @@ def test_generate_package_size_report(capsys): assert "libclang|18.1.2|18.38MB" in captured.out assert "tqdm|4.66.2|87.47KB" in captured.out + # Assert size validation message + assert ( + "The total size of newly introduced Python packages is 18.38MB, accounts for ${\color{red}21.39}$% of the total package size." + in captured.out + ) + def test_generate_package_size_report_when_base_version_is_not_present(capsys): target_pkg_metadata = _create_target_image_package_metadata()