From 0cf306ce19e3f0cab4a57194f74442cbb47f074a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sondre=20Lilleb=C3=B8=20Gundersen?= Date: Tue, 9 Jul 2024 00:12:38 +0200 Subject: [PATCH 1/2] wip: Fetch multi-platform image digests https://github.com/distribution/distribution/blob/main/docs/content/spec/api.md --- Cargo.lock | 1 + Cargo.toml | 1 + justfile | 6 +-- src/client/builder.rs | 25 ++++++++++--- src/client/client.rs | 87 +++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 31 ++++++++++----- 6 files changed, 133 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8a7f4a3..2b5f4a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -263,6 +263,7 @@ name = "container-retention-policy" version = "3.0.0" dependencies = [ "assert_cmd", + "base64", "chrono", "clap", "color-eyre", diff --git a/Cargo.toml b/Cargo.toml index 844c672..97476c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ tracing-indicatif = "0.3.6" url = { version = "2.5.0" , default-features = false} urlencoding = { version="2.1.3" } wildmatch = { version = "2.3.3" } +base64 = "0.22.1" [dev-dependencies] assert_cmd = "2.0.14" diff --git a/justfile b/justfile index 194e19a..899147e 100644 --- a/justfile +++ b/justfile @@ -50,11 +50,11 @@ run: RUST_LOG=container_retention_policy=debug cargo r -- \ --account snok \ --token $DELETE_PACKAGES_CLASSIC_TOKEN \ - --tag-selection untagged \ + --tag-selection both \ --image-names "container-retention-policy" \ --image-tags "!latest !test-1* !v*" \ --shas-to-skip "" \ --keep-n-most-recent 5 \ --timestamp-to-use "updated_at" \ - --cut-off 1h \ - --dry-run false + --cut-off 1m \ + --dry-run true diff --git a/src/client/builder.rs b/src/client/builder.rs index 3ca80f6..8dbfa11 100644 --- a/src/client/builder.rs +++ b/src/client/builder.rs @@ -1,10 +1,10 @@ -use std::sync::Arc; -use std::time::Duration; - +use base64::{alphabet, engine, engine::general_purpose::GeneralPurpose, Engine as _}; use color_eyre::eyre::Result; use reqwest::header::HeaderMap; use reqwest::Client; use secrecy::ExposeSecret; +use std::sync::Arc; +use std::time::Duration; use tokio::sync::Mutex; use tower::limit::{ConcurrencyLimit, RateLimit}; use tower::ServiceBuilder; @@ -14,12 +14,13 @@ use url::Url; use crate::cli::models::{Account, Token}; use crate::client::client::PackagesClient; use crate::client::urls::Urls; - +use base64::prelude::*; pub type RateLimitedService = Arc>>>; #[derive(Debug)] pub struct PackagesClientBuilder { pub headers: Option, + pub oci_headers: Option, pub urls: Option, pub token: Option, pub fetch_package_service: Option, @@ -33,6 +34,7 @@ impl PackagesClientBuilder { pub fn new() -> Self { Self { headers: None, + oci_headers: None, urls: None, fetch_package_service: None, list_packages_service: None, @@ -51,12 +53,24 @@ impl PackagesClientBuilder { Token::Temporal(token) | Token::ClassicPersonalAccess(token) => token.expose_secret(), } ); + let engine = GeneralPurpose::new(&alphabet::STANDARD, engine::general_purpose::PAD); + let encoded_auth_header_value = format!( + "Bearer {}", + match &token { + Token::Temporal(token) | Token::ClassicPersonalAccess(token) => engine.encode(token.expose_secret()), + } + ); let mut headers = HeaderMap::new(); headers.insert("Authorization", auth_header_value.as_str().parse()?); headers.insert("X-GitHub-Api-Version", "2022-11-28".parse()?); headers.insert("Accept", "application/vnd.github+json".parse()?); headers.insert("User-Agent", "snok/container-retention-policy".parse()?); - self.headers = Some(headers); + self.headers = Some(headers.clone()); + + headers.insert("Accept", "application/vnd.oci.image.index.v1+json".parse()?); + headers.insert("Authorization", encoded_auth_header_value.as_str().parse()?); + self.oci_headers = Some(headers); + self.token = Some(token); Ok(self) } @@ -143,6 +157,7 @@ impl PackagesClientBuilder { // Create PackageVersionsClient instance let client = PackagesClient { headers: self.headers.unwrap(), + oci_headers: self.oci_headers.unwrap(), urls: self.urls.unwrap(), fetch_package_service: self.fetch_package_service.unwrap(), list_packages_service: self.list_packages_service.unwrap(), diff --git a/src/client/client.rs b/src/client/client.rs index 6dedcef..89dce48 100644 --- a/src/client/client.rs +++ b/src/client/client.rs @@ -6,6 +6,7 @@ use chrono::{DateTime, Utc}; use color_eyre::eyre::{eyre, Result}; use reqwest::header::HeaderMap; use reqwest::{Client, Method, Request, StatusCode}; +use serde_json::Value; use tokio::time::sleep; use tower::{Service, ServiceExt}; use tracing::{debug, error, info, Span}; @@ -22,6 +23,7 @@ use crate::{Counts, PackageVersions}; #[derive(Debug)] pub struct PackagesClient { pub headers: HeaderMap, + pub oci_headers: HeaderMap, pub urls: Urls, pub fetch_package_service: RateLimitedService, pub list_packages_service: RateLimitedService, @@ -478,6 +480,91 @@ impl PackagesClient { response_headers.x_ratelimit_reset, )) } + + pub async fn fetch_image_manifest(&self, tag: &str) -> Result> { + debug!(tag = tag, "Retrieving image manifest"); + + let url = format!("https://ghcr.io/v2/snok%2Fcontainer-retention-policy/manifests/{tag}"); + + // Construct initial request + let response = Client::new().get(url).headers(self.oci_headers.clone()).send().await?; + + let raw_json = response.text().await?; + let resp: OCIImageIndex = match serde_json::from_str(&raw_json) { + Ok(t) => t, + Err(e) => { + println!("{}", raw_json); + return Ok(vec![]); + } + }; + + Ok(resp + .manifests + .unwrap_or(vec![]) + .iter() + .map(|manifest| manifest.digest.to_string()) + .collect()) + } +} + +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct OCIImageIndex { + schema_version: u32, + media_type: String, + manifests: Option>, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct Manifest { + media_type: String, + digest: String, + size: u64, + platform: Option, + annotations: Option, +} + +#[derive(Serialize, Deserialize, Debug)] +struct Platform { + architecture: String, + os: String, +} + +#[derive(Serialize, Deserialize, Debug)] +struct Annotations { + #[serde(rename = "vnd.docker.reference.digest")] + docker_reference_digest: Option, + + #[serde(rename = "vnd.docker.reference.type")] + docker_reference_type: Option, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct DockerDistributionManifest { + schema_version: u32, + media_type: String, + config: Config, + layers: Vec, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct Config { + media_type: String, + size: u64, + digest: String, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct Layer { + media_type: String, + size: u64, + digest: String, } #[cfg(test)] diff --git a/src/main.rs b/src/main.rs index ab24fb6..9945541 100644 --- a/src/main.rs +++ b/src/main.rs @@ -150,16 +150,27 @@ async fn main() -> Result<()> { *counts.remaining_requests.read().await ); - let (deleted_packages, failed_packages) = - delete_package_versions(package_version_map, client, counts.clone(), input.dry_run) - .instrument(info_span!("deleting package versions")) - .await; - - let mut github_output = env::var("GITHUB_OUTPUT").unwrap_or_default(); - - github_output.push_str(&format!("deleted={}", deleted_packages.join(","))); - github_output.push_str(&format!("failed={}", failed_packages.join(","))); - env::set_var("GITHUB_OUTPUT", github_output); + for (package, package_versions) in package_version_map.iter() { + info!("Print package {package}"); + for package_version in &package_versions.tagged { + for tag in &package_version.metadata.container.tags { + info!("Print tag {tag}"); + let digests = client.fetch_image_manifest(tag).await.unwrap(); + + println!("digests: {digests:?}"); + } + } + } + // let (deleted_packages, failed_packages) = + // delete_package_versions(package_version_map, client, counts.clone(), input.dry_run) + // .instrument(info_span!("deleting package versions")) + // .await; + // + // let mut github_output = env::var("GITHUB_OUTPUT").unwrap_or_default(); + // + // github_output.push_str(&format!("deleted={}", deleted_packages.join(","))); + // github_output.push_str(&format!("failed={}", failed_packages.join(","))); + // env::set_var("GITHUB_OUTPUT", github_output); Ok(()) } From 03d5514357ef057ce1f0b45d5e5b3c6a42c2c13c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sondre=20Lilleb=C3=B8=20Gundersen?= Date: Tue, 9 Jul 2024 22:56:39 +0200 Subject: [PATCH 2/2] wip: Move digest fetching into package version filtering --- src/client/builder.rs | 1 - src/client/client.rs | 26 +++++--- src/core/select_package_versions.rs | 96 +++++++++++++++++++++++++++-- src/main.rs | 31 +++------- 4 files changed, 119 insertions(+), 35 deletions(-) diff --git a/src/client/builder.rs b/src/client/builder.rs index 8dbfa11..bfe02e3 100644 --- a/src/client/builder.rs +++ b/src/client/builder.rs @@ -14,7 +14,6 @@ use url::Url; use crate::cli::models::{Account, Token}; use crate::client::client::PackagesClient; use crate::client::urls::Urls; -use base64::prelude::*; pub type RateLimitedService = Arc>>>; #[derive(Debug)] diff --git a/src/client/client.rs b/src/client/client.rs index 89dce48..fcde9a5 100644 --- a/src/client/client.rs +++ b/src/client/client.rs @@ -6,7 +6,6 @@ use chrono::{DateTime, Utc}; use color_eyre::eyre::{eyre, Result}; use reqwest::header::HeaderMap; use reqwest::{Client, Method, Request, StatusCode}; -use serde_json::Value; use tokio::time::sleep; use tower::{Service, ServiceExt}; use tracing::{debug, error, info, Span}; @@ -481,7 +480,11 @@ impl PackagesClient { )) } - pub async fn fetch_image_manifest(&self, tag: &str) -> Result> { + pub async fn fetch_image_manifest( + &self, + package_name: String, + tag: String, + ) -> Result<(String, String, Vec)> { debug!(tag = tag, "Retrieving image manifest"); let url = format!("https://ghcr.io/v2/snok%2Fcontainer-retention-policy/manifests/{tag}"); @@ -494,16 +497,21 @@ impl PackagesClient { Ok(t) => t, Err(e) => { println!("{}", raw_json); - return Ok(vec![]); + return Err(eyre!( + "Failed to fetch image manifest for \x1b[34m{package_name}\x1b[0m:\x1b[32m{tag}\x1b[0m: {e}" + )); } }; - Ok(resp - .manifests - .unwrap_or(vec![]) - .iter() - .map(|manifest| manifest.digest.to_string()) - .collect()) + Ok(( + package_name, + tag, + resp.manifests + .unwrap_or(vec![]) + .iter() + .map(|manifest| manifest.digest.to_string()) + .collect(), + )) } } diff --git a/src/core/select_package_versions.rs b/src/core/select_package_versions.rs index db7c30d..b2a8a58 100644 --- a/src/core/select_package_versions.rs +++ b/src/core/select_package_versions.rs @@ -8,7 +8,7 @@ use chrono::Utc; use color_eyre::Result; use humantime::Duration as HumantimeDuration; use indicatif::ProgressStyle; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::sync::Arc; use std::time::Duration; use tokio::task::JoinSet; @@ -287,16 +287,104 @@ pub async fn select_package_versions( ); } - let mut package_version_map = HashMap::new(); + let mut all_package_versions = vec![]; + let mut fetch_digest_set = JoinSet::new(); debug!("Fetching package versions"); + while let Some(r) = set.join_next().await { // Get all the package versions for a package let (package_name, mut package_versions) = r??; + // Queue fetching of digests for each tag + for package_version in &package_versions.tagged { + for tag in &package_version.metadata.container.tags { + fetch_digest_set.spawn(client.fetch_image_manifest(package_name.clone(), tag.clone())); + } + } + + all_package_versions.push((package_name, package_versions)); + } + + debug!("Fetching package versions"); + let mut digests = HashSet::new(); + let mut digest_tag = HashMap::new(); + + while let Some(r) = fetch_digest_set.join_next().await { + // Get all the digests for the package + let (package_name, tag, package_digests) = r??; + + if package_digests.is_empty() { + debug!( + package_name = package_name, + "Found {} associated digests for \x1b[34m{package_name}\x1b[0m:\x1b[32m{tag}\x1b[0m", + package_digests.len() + ); + } else { + info!( + package_name = package_name, + "Found {} associated digests for \x1b[34m{package_name}\x1b[0m:\x1b[32m{tag}\x1b[0m", + package_digests.len() + ); + } + + digests.extend(package_digests.clone()); + for digest in package_digests.into_iter() { + digest_tag.insert(digest, format!("\x1b[34m{package_name}\x1b[0m:\x1b[32m{tag}\x1b[0m")); + } + } + + let mut package_version_map = HashMap::new(); + + for (package_name, mut package_versions) in all_package_versions { + package_versions.untagged = package_versions + .untagged + .into_iter() + .filter_map(|package_version| { + if digests.contains(&package_version.name) { + let x: String = package_version.name.clone(); + let association: &String = digest_tag.get(&x as &str).unwrap(); + debug!( + "Skipping deletion of {} because it's associated with {association}", + package_version.name + ); + None + } else { + Some(package_version) + } + }) + .collect(); + let count_before = package_versions.tagged.len(); + package_versions.tagged = package_versions + .tagged + .into_iter() + .filter(|package_version| { + if digests.contains(&package_version.name) { + let association = digest_tag.get(&*(package_version.name)).unwrap(); + debug!( + "Skipping deletion of {} because it's associated with {association}", + package_version.name + ); + false + } else { + true + } + }) + .collect(); + + let adjusted_keep_n_most_recent = + if keep_n_most_recent as i64 - (count_before as i64 - package_versions.tagged.len() as i64) < 0 { + 0 + } else { + keep_n_most_recent as i64 - (count_before as i64 - package_versions.tagged.len() as i64) + }; + // Keep n package versions per package, if specified - package_versions.tagged = - handle_keep_n_most_recent(package_versions.tagged, keep_n_most_recent, timestamp_to_use); + package_versions.tagged = handle_keep_n_most_recent( + package_versions.tagged, + adjusted_keep_n_most_recent as u32, + timestamp_to_use, + ); info!( package_name = package_name, diff --git a/src/main.rs b/src/main.rs index 9945541..1a334c2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use color_eyre::eyre::Result; use tokio::sync::RwLock; -use tracing::{debug, error, info, info_span, trace, Instrument}; +use tracing::{debug, error, info_span, trace, Instrument}; use tracing_indicatif::IndicatifLayer; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -150,27 +150,16 @@ async fn main() -> Result<()> { *counts.remaining_requests.read().await ); - for (package, package_versions) in package_version_map.iter() { - info!("Print package {package}"); - for package_version in &package_versions.tagged { - for tag in &package_version.metadata.container.tags { - info!("Print tag {tag}"); - let digests = client.fetch_image_manifest(tag).await.unwrap(); + let (deleted_packages, failed_packages) = + delete_package_versions(package_version_map, client, counts.clone(), input.dry_run) + .instrument(info_span!("deleting package versions")) + .await; - println!("digests: {digests:?}"); - } - } - } - // let (deleted_packages, failed_packages) = - // delete_package_versions(package_version_map, client, counts.clone(), input.dry_run) - // .instrument(info_span!("deleting package versions")) - // .await; - // - // let mut github_output = env::var("GITHUB_OUTPUT").unwrap_or_default(); - // - // github_output.push_str(&format!("deleted={}", deleted_packages.join(","))); - // github_output.push_str(&format!("failed={}", failed_packages.join(","))); - // env::set_var("GITHUB_OUTPUT", github_output); + let mut github_output = env::var("GITHUB_OUTPUT").unwrap_or_default(); + + github_output.push_str(&format!("deleted={}", deleted_packages.join(","))); + github_output.push_str(&format!("failed={}", failed_packages.join(","))); + env::set_var("GITHUB_OUTPUT", github_output); Ok(()) }