From 5fa0f7f33beedfa455966f409aa2f80ccd3af9ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C5=A1per=20Grom?= Date: Thu, 10 Jul 2025 11:53:41 +0200 Subject: [PATCH 1/5] feat: widget endpoint support for multi repo filtering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Gašper Grom --- .../tinybird/pipes/activities_filtered.pipe | 6 +- ...activities_filtered_historical_cutoff.pipe | 6 +- .../pipes/activities_filtered_retention.pipe | 6 +- .../pipes/activityRepositories_filtered.pipe | 4 + ..._heatmap_by_weekday_and_2hours_blocks.pipe | 46 ++--- .../libs/tinybird/pipes/member_roles.pipe | 6 +- .../libs/tinybird/pipes/package_metrics.pipe | 190 ++++++++++++++++++ services/libs/tinybird/pipes/packages.pipe | 26 +++ .../pipes/pull_requests_filtered.pipe | 6 +- .../pipes/security_and_best_practices.pipe | 4 + 10 files changed, 268 insertions(+), 32 deletions(-) create mode 100644 services/libs/tinybird/pipes/package_metrics.pipe create mode 100644 services/libs/tinybird/pipes/packages.pipe diff --git a/services/libs/tinybird/pipes/activities_filtered.pipe b/services/libs/tinybird/pipes/activities_filtered.pipe index 8a389b4250..8b10699884 100644 --- a/services/libs/tinybird/pipes/activities_filtered.pipe +++ b/services/libs/tinybird/pipes/activities_filtered.pipe @@ -4,7 +4,7 @@ SQL > SELECT id, timestamp, type, platform, memberId, organizationId, segmentId FROM activities_with_relations_sorted_deduplicated_ds a where - segmentId = (SELECT id FROM segments_filtered) + segmentId = (SELECT segmentId FROM segments_filtered) {% if defined(startDate) %} AND a.timestamp > {{ DateTime(startDate, description="Filter activity timestamp after", required=False) }} @@ -16,6 +16,10 @@ SQL > {% if defined(repo) %} AND a.channel = {{ String(repo, description="Filter activity repo", required=False) }} {% end %} + {% if defined(repos) %} + AND a.channel + IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} + {% end %} {% if defined(platform) %} AND a.platform = {{ String(platform, description="Filter activity platform", required=False) }} diff --git a/services/libs/tinybird/pipes/activities_filtered_historical_cutoff.pipe b/services/libs/tinybird/pipes/activities_filtered_historical_cutoff.pipe index dca09936ea..8baa8926e7 100644 --- a/services/libs/tinybird/pipes/activities_filtered_historical_cutoff.pipe +++ b/services/libs/tinybird/pipes/activities_filtered_historical_cutoff.pipe @@ -7,7 +7,7 @@ SQL > SELECT id, timestamp, type, platform, memberId, organizationId, segmentId FROM activities_with_relations_sorted_deduplicated_ds a where - segmentId = (SELECT id FROM segments_filtered) + segmentId = (SELECT segmentId FROM segments_filtered) {% if defined(startDate) %} AND a.timestamp <= {{ DateTime(startDate, description="Filter activity timestamp after", required=False) }} @@ -20,6 +20,10 @@ SQL > {% if defined(repo) %} AND a.channel = {{ String(repo, description="Filter activity repo", required=False) }} {% end %} + {% if defined(repos) %} + AND a.channel + IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} + {% end %} {% if defined(platform) %} AND a.platform = {{ String(platform, description="Filter activity platform", required=False) }} diff --git a/services/libs/tinybird/pipes/activities_filtered_retention.pipe b/services/libs/tinybird/pipes/activities_filtered_retention.pipe index 69ca418284..7ddacf7f8f 100644 --- a/services/libs/tinybird/pipes/activities_filtered_retention.pipe +++ b/services/libs/tinybird/pipes/activities_filtered_retention.pipe @@ -7,7 +7,7 @@ SQL > SELECT id, timestamp, type, platform, memberId, organizationId, segmentId FROM activities_with_relations_sorted_deduplicated_ds a where - segmentId = (SELECT id FROM segments_filtered) + segmentId = (SELECT segmentId FROM segments_filtered) {% if defined(startDate) %} AND a.timestamp > {% if defined(granularity) and granularity == "daily" %} @@ -36,6 +36,10 @@ SQL > {% if defined(repo) %} AND a.channel = {{ String(repo, description="Filter activity repo", required=False) }} {% end %} + {% if defined(repos) %} + AND a.channel + IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} + {% end %} {% if defined(platform) %} AND a.platform = {{ String(platform, description="Filter activity platform", required=False) }} diff --git a/services/libs/tinybird/pipes/activityRepositories_filtered.pipe b/services/libs/tinybird/pipes/activityRepositories_filtered.pipe index 6c43ea78cf..49d651e1bd 100644 --- a/services/libs/tinybird/pipes/activityRepositories_filtered.pipe +++ b/services/libs/tinybird/pipes/activityRepositories_filtered.pipe @@ -12,6 +12,10 @@ SQL > {% if defined(repo) %} AND repo = {{ String(repo, description="Filter project repo", required=False) }} {% end %} + {% if defined(repos) %} + AND repo + IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} + {% end %} {% if defined(search) %} AND repo like 'https://%' diff --git a/services/libs/tinybird/pipes/activity_heatmap_by_weekday_and_2hours_blocks.pipe b/services/libs/tinybird/pipes/activity_heatmap_by_weekday_and_2hours_blocks.pipe index 43abf19271..f14d6de7fc 100644 --- a/services/libs/tinybird/pipes/activity_heatmap_by_weekday_and_2hours_blocks.pipe +++ b/services/libs/tinybird/pipes/activity_heatmap_by_weekday_and_2hours_blocks.pipe @@ -1,35 +1,29 @@ DESCRIPTION > Serves the `Development - Contributions outside work hours` widget -NODE country_mapping_array -SQL > - SELECT groupArray((country, flag, country_code, timezone_offset)) AS country_data - FROM country_mapping - -NODE members_with_location_information -SQL > - SELECT - m.id, - m.location, - arrayFilter( - x -> position(coalesce(nullIf(upper(m.country), ''), upper(m.location)), upper(x .1)) > 0, - (SELECT country_data FROM country_mapping_array) - ) AS matched_countries, - arrayJoin( - if(empty(matched_countries), [('Unknown', '❓', 'XX', 0)], matched_countries) - ) AS country_data - FROM members_sorted AS m - where country_data .1 != 'Unknown' and m.id in (select memberId from activities_filtered) NODE activities_with_local_timestamp SQL > - select - count(id) as activityCount, - toDayOfWeek(addHours(af.timestamp, mwli.country_data .4)) as weekday, - intDiv(toHour(addHours(af.timestamp, mwli.country_data .4)), 2) * 2 AS two_hours_block - from activities_filtered af - join members_with_location_information mwli on mwli.id = af.memberId - where platform in ('git', 'github', 'gitlab', 'gerrit') + % + select count(id) as activityCount, weekday, two_hours_block + from contributions_with_local_time_ds a + where + segmentId = (SELECT segmentId FROM segments_filtered) + {% if defined(startDate) %} + AND a.timestamp + > {{ DateTime(startDate, description="Filter activity timestamp after", required=False) }} + {% end %} + {% if defined(endDate) %} + AND a.timestamp + < {{ DateTime(endDate, description="Filter activity timestamp before", required=False) }} + {% end %} + {% if defined(repo) %} + AND a.channel = {{ String(repo, description="Filter activity repo", required=False) }} + {% end %} + {% if defined(repos) %} + AND a.channel + IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} + {% end %} group by weekday, two_hours_block NODE weekday_hours diff --git a/services/libs/tinybird/pipes/member_roles.pipe b/services/libs/tinybird/pipes/member_roles.pipe index 6e61ca9b69..1954670dfa 100644 --- a/services/libs/tinybird/pipes/member_roles.pipe +++ b/services/libs/tinybird/pipes/member_roles.pipe @@ -1,5 +1,3 @@ -TOKEN "member_roles_endpoint_read_1372" READ - NODE member_roles_result SQL > % @@ -13,4 +11,8 @@ SQL > {% if defined(repo) %} AND repoUrl = {{ String(repo, description="Filter activity repo", required=False) }} {% end %} + {% if defined(repos) %} + AND repoUrl + IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} + {% end %} GROUP BY memberId diff --git a/services/libs/tinybird/pipes/package_metrics.pipe b/services/libs/tinybird/pipes/package_metrics.pipe new file mode 100644 index 0000000000..a5ce5897c7 --- /dev/null +++ b/services/libs/tinybird/pipes/package_metrics.pipe @@ -0,0 +1,190 @@ +NODE package_downloads_filtered +SQL > + % + SELECT * + FROM packageDownloads p FINAL + where + insightsProjectId = (select insightsProjectId from segments_filtered) + {% if defined(startDate) %} + AND p.date > toDate( + {{ DateTime(startDate, description="Filter package downloads after", required=False) }} + ) + {% end %} + {% if defined(endDate) %} + AND p.date < toDate( + {{ DateTime(endDate, description="Filter package downloads before", required=False) }} + ) + {% end %} + {% if defined(ecosystem) %} + AND p.ecosystem + = {{ String(ecosystem, description="Filter package download ecosystem", required=False) }} + {% end %} + {% if defined(name) %} + AND p.name = {{ String(name, description="Filter by package name", required=False) }} + {% end %} + {% if defined(repo) %} + AND p.repo = {{ String(repo, description="Filter by repo", required=False) }} + {% end %} + {% if defined(repos) %} + AND p.repo + IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} + {% end %} + +NODE package_downloads_timeseries_bounds +SQL > + % + {% if defined(startDate) and not defined(endDate) %} + select toDate({{ startDate }}) as actual_start_date, toDate(now()) as actual_end_date + {% end %} + {% if not defined(startDate) and defined(endDate) %} + select + min(package_downloads_filtered.date) as actual_start_date, + toDate({{ endDate }}) as actual_end_date + from package_downloads_filtered + where package_downloads_filtered.date > '1980-01-01' + {% end %} + {% if not defined(startDate) and not defined(endDate) %} + select + min(toDate(package_downloads_filtered.date)) as actual_start_date, + toDate(now()) as actual_end_date + from package_downloads_filtered + where package_downloads_filtered.date > '1980-01-01' + {% end %} + {% if defined(startDate) and defined(endDate) %} + select toDate({{ startDate }}) as actual_start_date, toDate({{ endDate }}) as actual_end_date + {% end %} + +NODE package_downloads_generate_timeseries +SQL > + % + SELECT DISTINCT + CASE + WHEN {{ granularity }} = 'daily' + THEN toDate(addDays(package_downloads_timeseries_bounds.actual_start_date, number)) + WHEN {{ granularity }} = 'weekly' + THEN + toStartOfWeek( + addDays(package_downloads_timeseries_bounds.actual_start_date, number * 7) + ) + WHEN {{ granularity }} = 'monthly' + THEN + toStartOfMonth(addMonths(package_downloads_timeseries_bounds.actual_start_date, number)) + WHEN {{ granularity }} = 'quarterly' + THEN + toStartOfQuarter( + addMonths(package_downloads_timeseries_bounds.actual_start_date, number * 3) + ) + WHEN {{ granularity }} = 'yearly' + THEN toStartOfYear(addYears(package_downloads_timeseries_bounds.actual_start_date, number)) + END AS "startDate", + CASE + WHEN {{ granularity }} = 'daily' + THEN toDate(addDays(actual_start_date, number)) + WHEN {{ granularity }} = 'weekly' + THEN toDate(toStartOfWeek(addDays(actual_start_date, number * 7)) + INTERVAL 6 DAY) + WHEN {{ granularity }} = 'monthly' + THEN + toDate( + toStartOfMonth(addMonths(actual_start_date, number)) + + INTERVAL 1 MONTH + - INTERVAL 1 DAY + ) + WHEN {{ granularity }} = 'quarterly' + THEN + toDate( + toStartOfQuarter(addMonths(actual_start_date, number * 3)) + + INTERVAL 3 MONTH + - INTERVAL 1 DAY + ) + WHEN {{ granularity }} = 'yearly' + THEN + toDate( + toStartOfYear(addYears(actual_start_date, number)) + + INTERVAL 1 YEAR + - INTERVAL 1 DAY + ) + END AS "endDate" + FROM numbers(1000) + CROSS JOIN + ( + SELECT + CASE + WHEN {{ granularity }} = 'weekly' + THEN toStartOfWeek(actual_start_date) + WHEN {{ granularity }} = 'monthly' + THEN toStartOfMonth(actual_start_date) + WHEN {{ granularity }} = 'quarterly' + THEN toStartOfQuarter(actual_start_date) + WHEN {{ granularity }} = 'yearly' + THEN toStartOfYear(actual_start_date) + ELSE actual_start_date + END AS actual_start_date, + actual_end_date + FROM package_downloads_timeseries_bounds + ) package_downloads_timeseries_bounds + WHERE "startDate" >= actual_start_date AND "startDate" < actual_end_date + +NODE package_downloads_initial_aggregation +SQL > + % + {% if defined(granularity) %} + SELECT + p.date, + sum(p.downloadsCount) as downloadsCount, + sum(p.dockerDownloadsCount) as "dockerDownloadsCount", + sum(p.dockerDependentsCount) as "dockerDependentsCount", + sum(p.dependentPackagesCount) as "dependentPackagesCount", + sum(p.dependentReposCount) as "dependentReposCount" + FROM package_downloads_filtered p + group by p.insightsProjectId, p.date + {% else %} + SELECT + max(p.downloadsCount) as downloadsCount, + max(p.dockerDownloadsCount) as "dockerDownloadsCount", + max(p.dockerDependentsCount) as "dockerDependentsCount", + max(p.dependentPackagesCount) as "dependentPackagesCount", + max(p.dependentReposCount) as "dependentReposCount" + FROM package_downloads_filtered p + group by p.insightsProjectId + {% end %} + +NODE package_downloads_timeseries_merge +SQL > + % + {% set onlyContributions = False %} + {% if defined(granularity) %} + SELECT + ds."startDate", + ds."endDate", + max(pdf.downloadsCount) AS "downloadsCount", + max(pdf.dockerDownloadsCount) AS "dockerDownloadsCount", + max(pdf.dockerDependentsCount) AS "dockerDependentsCount", + max(pdf.dependentPackagesCount) AS "dependentPackagesCount", + max(pdf.dependentReposCount) AS "dependentReposCount" + FROM package_downloads_generate_timeseries ds + LEFT JOIN + package_downloads_initial_aggregation pdf + ON CASE + WHEN {{ granularity }} = 'daily' + THEN toDate(pdf.date) + WHEN {{ granularity }} = 'weekly' + THEN toStartOfWeek(pdf.date) + WHEN {{ granularity }} = 'monthly' + THEN toStartOfMonth(pdf.date) + WHEN {{ granularity }} = 'quarterly' + THEN toStartOfQuarter(pdf.date) + WHEN {{ granularity }} = 'yearly' + THEN toStartOfYear(pdf.date) + END + = ds."startDate" + GROUP BY ds."startDate", ds."endDate" + order by ds."startDate" + {% else %} SELECT 1 + {% end %} + +NODE package_downloads_result +SQL > + % + {% if not defined(granularity) %} SELECT * FROM package_downloads_initial_aggregation + {% else %} select * from package_downloads_timeseries_merge + {% end %} diff --git a/services/libs/tinybird/pipes/packages.pipe b/services/libs/tinybird/pipes/packages.pipe new file mode 100644 index 0000000000..2770eaf451 --- /dev/null +++ b/services/libs/tinybird/pipes/packages.pipe @@ -0,0 +1,26 @@ +NODE packages_0 +SQL > + % + SELECT distinct p.repo, p.name, p.ecosystem + FROM packageDownloads p + where + p.insightsProjectId = (select insightsProjectId from segments_filtered) + {% if defined(search) %} + AND ( + p.name + ilike '%' + || {{ String(search, description="Filter package download repo", required=False) }} + || '%' + OR p.ecosystem + ilike '%' + || {{ String(search, description="Filter package download ecosystem", required=False) }} + || '%' + ) + {% end %} + {% if defined(repo) %} + AND p.repo = {{ String(repo, description="Filter by repo", required=False) }} + {% end %} + {% if defined(repos) %} + AND p.repo + IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} + {% end %} diff --git a/services/libs/tinybird/pipes/pull_requests_filtered.pipe b/services/libs/tinybird/pipes/pull_requests_filtered.pipe index d094b3a6f1..df60c28daf 100644 --- a/services/libs/tinybird/pipes/pull_requests_filtered.pipe +++ b/services/libs/tinybird/pipes/pull_requests_filtered.pipe @@ -7,7 +7,11 @@ SQL > SELECT * FROM pull_requests_analyzed pra where - pra.segmentId = (SELECT id FROM segments_filtered) + pra.segmentId = (SELECT segmentId FROM segments_filtered) {% if defined(repo) %} AND pra.channel = {{ String(repo, description="Filter activity repo", required=False) }} {% end %} + {% if defined(repos) %} + AND a.channel + IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} + {% end %} diff --git a/services/libs/tinybird/pipes/security_and_best_practices.pipe b/services/libs/tinybird/pipes/security_and_best_practices.pipe index 0275c5f135..f9a31a34d4 100644 --- a/services/libs/tinybird/pipes/security_and_best_practices.pipe +++ b/services/libs/tinybird/pipes/security_and_best_practices.pipe @@ -82,3 +82,7 @@ SQL > {% if defined(repo) %} AND s.repo = {{ String(repo, description="Filter activity repo", required=False) }} {% end %} + {% if defined(repos) %} + AND s.repo + IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} + {% end %} From de8a8351cb74523596f5b166ab18cbb25cbb79e9 Mon Sep 17 00:00:00 2001 From: anilb Date: Thu, 10 Jul 2025 22:55:25 +0200 Subject: [PATCH 2/5] fix: formatting --- .../libs/tinybird/pipes/package_metrics.pipe | 237 +++++++++--------- 1 file changed, 116 insertions(+), 121 deletions(-) diff --git a/services/libs/tinybird/pipes/package_metrics.pipe b/services/libs/tinybird/pipes/package_metrics.pipe index abfb64c489..83506a39d9 100644 --- a/services/libs/tinybird/pipes/package_metrics.pipe +++ b/services/libs/tinybird/pipes/package_metrics.pipe @@ -32,100 +32,100 @@ SQL > NODE package_downloads_timeseries_bounds SQL > - % - {% if defined(startDate) and not defined(endDate) %} - select toDate({{ startDate }}) as actual_start_date, toDate(now()) as actual_end_date - {% end %} - {% if not defined(startDate) and defined(endDate) %} - select - min(package_downloads_filtered.date) as actual_start_date, - toDate({{ endDate }}) as actual_end_date - from package_downloads_filtered - where package_downloads_filtered.date > '1980-01-01' - {% end %} - {% if not defined(startDate) and not defined(endDate) %} - select - min(toDate(package_downloads_filtered.date)) as actual_start_date, - toDate(now()) as actual_end_date - from package_downloads_filtered - where package_downloads_filtered.date > '1980-01-01' - {% end %} - {% if defined(startDate) and defined(endDate) %} - select toDate({{ startDate }}) as actual_start_date, toDate({{ endDate }}) as actual_end_date - {% end %} - - + {% if defined(startDate) and not defined(endDate) %} + select toDate({{ startDate }}) as actual_start_date, toDate(now()) as actual_end_date + {% end %} + {% if not defined(startDate) and defined(endDate) %} + select + min(package_downloads_filtered.date) as actual_start_date, + toDate({{ endDate }}) as actual_end_date + from package_downloads_filtered + where package_downloads_filtered.date > '1980-01-01' + {% end %} + {% if not defined(startDate) and not defined(endDate) %} + select + min(toDate(package_downloads_filtered.date)) as actual_start_date, + toDate(now()) as actual_end_date + from package_downloads_filtered + where package_downloads_filtered.date > '1980-01-01' + {% end %} + {% if defined(startDate) and defined(endDate) %} + select toDate({{ startDate }}) as actual_start_date, toDate({{ endDate }}) as actual_end_date + {% end %} NODE package_downloads_generate_timeseries SQL > - % - SELECT DISTINCT - CASE - WHEN {{ granularity }} = 'daily' - THEN toDate(addDays(package_downloads_timeseries_bounds.actual_start_date, number)) - WHEN {{ granularity }} = 'weekly' - THEN toStartOfWeek(addDays(package_downloads_timeseries_bounds.actual_start_date, number * 7)) - WHEN {{ granularity }} = 'monthly' - THEN toStartOfMonth(addMonths(package_downloads_timeseries_bounds.actual_start_date, number)) - WHEN {{ granularity }} = 'quarterly' - THEN toStartOfQuarter(addMonths(package_downloads_timeseries_bounds.actual_start_date, number * 3)) - WHEN {{ granularity }} = 'yearly' - THEN toStartOfYear(addYears(package_downloads_timeseries_bounds.actual_start_date, number)) - END AS "startDate", - CASE - WHEN {{ granularity }} = 'daily' - THEN toDate(addDays(actual_start_date, number)) - WHEN {{ granularity }} = 'weekly' - THEN toDate(toStartOfWeek(addDays(actual_start_date, number * 7)) + INTERVAL 6 DAY) - WHEN {{ granularity }} = 'monthly' - THEN - toDate( - toStartOfMonth(addMonths(actual_start_date, number)) - + INTERVAL 1 MONTH - - INTERVAL 1 DAY - ) - WHEN {{ granularity }} = 'quarterly' - THEN - toDate( - toStartOfQuarter(addMonths(actual_start_date, number * 3)) - + INTERVAL 3 MONTH - - INTERVAL 1 DAY - ) - WHEN {{ granularity }} = 'yearly' - THEN - toDate( - toStartOfYear(addYears(actual_start_date, number)) - + INTERVAL 1 YEAR - - INTERVAL 1 DAY - ) - END AS "endDate" - FROM numbers(1000) - CROSS JOIN - ( - SELECT - CASE - WHEN {{ granularity }} = 'weekly' - THEN toStartOfWeek(actual_start_date) - WHEN {{ granularity }} = 'monthly' - THEN toStartOfMonth(actual_start_date) - WHEN {{ granularity }} = 'quarterly' - THEN toStartOfQuarter(actual_start_date) - WHEN {{ granularity }} = 'yearly' - THEN toStartOfYear(actual_start_date) - ELSE actual_start_date - END AS actual_start_date, - actual_end_date - FROM package_downloads_timeseries_bounds - ) package_downloads_timeseries_bounds - WHERE "startDate" >= actual_start_date AND "startDate" < actual_end_date - - + SELECT DISTINCT + CASE + WHEN {{ granularity }} = 'daily' + THEN toDate(addDays(package_downloads_timeseries_bounds.actual_start_date, number)) + WHEN {{ granularity }} = 'weekly' + THEN + toStartOfWeek( + addDays(package_downloads_timeseries_bounds.actual_start_date, number * 7) + ) + WHEN {{ granularity }} = 'monthly' + THEN + toStartOfMonth(addMonths(package_downloads_timeseries_bounds.actual_start_date, number)) + WHEN {{ granularity }} = 'quarterly' + THEN + toStartOfQuarter( + addMonths(package_downloads_timeseries_bounds.actual_start_date, number * 3) + ) + WHEN {{ granularity }} = 'yearly' + THEN toStartOfYear(addYears(package_downloads_timeseries_bounds.actual_start_date, number)) + END AS "startDate", + CASE + WHEN {{ granularity }} = 'daily' + THEN toDate(addDays(actual_start_date, number)) + WHEN {{ granularity }} = 'weekly' + THEN toDate(toStartOfWeek(addDays(actual_start_date, number * 7)) + INTERVAL 6 DAY) + WHEN {{ granularity }} = 'monthly' + THEN + toDate( + toStartOfMonth(addMonths(actual_start_date, number)) + + INTERVAL 1 MONTH + - INTERVAL 1 DAY + ) + WHEN {{ granularity }} = 'quarterly' + THEN + toDate( + toStartOfQuarter(addMonths(actual_start_date, number * 3)) + + INTERVAL 3 MONTH + - INTERVAL 1 DAY + ) + WHEN {{ granularity }} = 'yearly' + THEN + toDate( + toStartOfYear(addYears(actual_start_date, number)) + + INTERVAL 1 YEAR + - INTERVAL 1 DAY + ) + END AS "endDate" + FROM numbers(1000) + CROSS JOIN + ( + SELECT + CASE + WHEN {{ granularity }} = 'weekly' + THEN toStartOfWeek(actual_start_date) + WHEN {{ granularity }} = 'monthly' + THEN toStartOfMonth(actual_start_date) + WHEN {{ granularity }} = 'quarterly' + THEN toStartOfQuarter(actual_start_date) + WHEN {{ granularity }} = 'yearly' + THEN toStartOfYear(actual_start_date) + ELSE actual_start_date + END AS actual_start_date, + actual_end_date + FROM package_downloads_timeseries_bounds + ) package_downloads_timeseries_bounds + WHERE "startDate" >= actual_start_date AND "startDate" < actual_end_date NODE package_downloads_initial_aggregation SQL > - % SELECT {% if defined(granularity) %} p.date, {% end %} @@ -137,44 +137,39 @@ SQL > FROM package_downloads_filtered p group by p.insightsProjectId {% if defined(granularity) %}, p.date {% end %} - - NODE package_downloads_timeseries_merge SQL > - % - {% set onlyContributions = False %} - {% if defined(granularity) %} - SELECT - ds."startDate", - ds."endDate", - max(pdf.downloadsCount) AS "downloadsCount", - max(pdf.dockerDownloadsCount) AS "dockerDownloadsCount", - max(pdf.dockerDependentsCount) AS "dockerDependentsCount", - max(pdf.dependentPackagesCount) AS "dependentPackagesCount", - max(pdf.dependentReposCount) AS "dependentReposCount" - FROM package_downloads_generate_timeseries ds - LEFT JOIN - package_downloads_initial_aggregation pdf - ON CASE - WHEN {{ granularity }} = 'daily' - THEN toDate(pdf.date) - WHEN {{ granularity }} = 'weekly' - THEN toStartOfWeek(pdf.date) - WHEN {{ granularity }} = 'monthly' - THEN toStartOfMonth(pdf.date) - WHEN {{ granularity }} = 'quarterly' - THEN toStartOfQuarter(pdf.date) - WHEN {{ granularity }} = 'yearly' - THEN toStartOfYear(pdf.date) - END - = ds."startDate" - GROUP BY ds."startDate", ds."endDate" - order by ds."startDate" - {% else %} SELECT 1 - {% end %} - - + {% set onlyContributions = False %} + {% if defined(granularity) %} + SELECT + ds."startDate", + ds."endDate", + max(pdf.downloadsCount) AS "downloadsCount", + max(pdf.dockerDownloadsCount) AS "dockerDownloadsCount", + max(pdf.dockerDependentsCount) AS "dockerDependentsCount", + max(pdf.dependentPackagesCount) AS "dependentPackagesCount", + max(pdf.dependentReposCount) AS "dependentReposCount" + FROM package_downloads_generate_timeseries ds + LEFT JOIN + package_downloads_initial_aggregation pdf + ON CASE + WHEN {{ granularity }} = 'daily' + THEN toDate(pdf.date) + WHEN {{ granularity }} = 'weekly' + THEN toStartOfWeek(pdf.date) + WHEN {{ granularity }} = 'monthly' + THEN toStartOfMonth(pdf.date) + WHEN {{ granularity }} = 'quarterly' + THEN toStartOfQuarter(pdf.date) + WHEN {{ granularity }} = 'yearly' + THEN toStartOfYear(pdf.date) + END + = ds."startDate" + GROUP BY ds."startDate", ds."endDate" + order by ds."startDate" + {% else %} SELECT 1 + {% end %} NODE package_downloads_result SQL > From ae39ac6d95ebc42609e7fea47326886826cbd824 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C5=A1per=20Grom?= Date: Fri, 11 Jul 2025 13:38:08 +0200 Subject: [PATCH 3/5] fix: pr filtered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Gašper Grom --- services/libs/tinybird/pipes/pull_requests_filtered.pipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/libs/tinybird/pipes/pull_requests_filtered.pipe b/services/libs/tinybird/pipes/pull_requests_filtered.pipe index df60c28daf..150b02c202 100644 --- a/services/libs/tinybird/pipes/pull_requests_filtered.pipe +++ b/services/libs/tinybird/pipes/pull_requests_filtered.pipe @@ -12,6 +12,6 @@ SQL > AND pra.channel = {{ String(repo, description="Filter activity repo", required=False) }} {% end %} {% if defined(repos) %} - AND a.channel + AND pra.channel IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} {% end %} From 41ecd3f6d231000e346446b5e13accc147a3d5e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C5=A1per=20Grom?= Date: Fri, 11 Jul 2025 13:56:48 +0200 Subject: [PATCH 4/5] feat: filter segments by multi repos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Gašper Grom --- services/libs/tinybird/pipes/segments_filtered.pipe | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/services/libs/tinybird/pipes/segments_filtered.pipe b/services/libs/tinybird/pipes/segments_filtered.pipe index 50e1bf523a..239e1475be 100644 --- a/services/libs/tinybird/pipes/segments_filtered.pipe +++ b/services/libs/tinybird/pipes/segments_filtered.pipe @@ -15,3 +15,9 @@ SQL > {{ String(repo, description="Filter activity repo", required=False) }} ) {% end %} + {% if defined(repos) %} + AND arrayAll( + r -> has(insightsProjects.repositories, r), + {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} + ) + {% end %} From 38de86ca29757383ab67c28256de620578ca8242 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C5=A1per=20Grom?= Date: Mon, 14 Jul 2025 21:38:28 +0200 Subject: [PATCH 5/5] feat: remove single repo filtering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Gašper Grom --- .../tinybird/pipes/activities_filtered.pipe | 3 -- ...activities_filtered_historical_cutoff.pipe | 3 -- .../pipes/activities_filtered_retention.pipe | 3 -- .../pipes/activityRepositories_filtered.pipe | 3 -- ..._heatmap_by_weekday_and_2hours_blocks.pipe | 3 -- .../libs/tinybird/pipes/member_roles.pipe | 3 -- .../libs/tinybird/pipes/package_metrics.pipe | 32 ++++++++++++------- services/libs/tinybird/pipes/packages.pipe | 5 +-- .../pipes/pull_requests_filtered.pipe | 3 -- .../pipes/security_and_best_practices.pipe | 3 -- .../tinybird/pipes/segments_filtered.pipe | 6 ---- 11 files changed, 21 insertions(+), 46 deletions(-) diff --git a/services/libs/tinybird/pipes/activities_filtered.pipe b/services/libs/tinybird/pipes/activities_filtered.pipe index ecf32e2887..970e124773 100644 --- a/services/libs/tinybird/pipes/activities_filtered.pipe +++ b/services/libs/tinybird/pipes/activities_filtered.pipe @@ -13,9 +13,6 @@ SQL > AND a.timestamp < {{ DateTime(endDate, description="Filter activity timestamp before", required=False) }} {% end %} - {% if defined(repo) %} - AND a.channel = {{ String(repo, description="Filter activity repo", required=False) }} - {% end %} {% if defined(repos) %} AND a.channel IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} diff --git a/services/libs/tinybird/pipes/activities_filtered_historical_cutoff.pipe b/services/libs/tinybird/pipes/activities_filtered_historical_cutoff.pipe index ec6af8d1dc..ebb1cf235f 100644 --- a/services/libs/tinybird/pipes/activities_filtered_historical_cutoff.pipe +++ b/services/libs/tinybird/pipes/activities_filtered_historical_cutoff.pipe @@ -17,9 +17,6 @@ SQL > AND a.timestamp < {{ DateTime(endDate, description="Filter activity timestamp before", required=False) }} {% end %} - {% if defined(repo) %} - AND a.channel = {{ String(repo, description="Filter activity repo", required=False) }} - {% end %} {% if defined(repos) %} AND a.channel IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} diff --git a/services/libs/tinybird/pipes/activities_filtered_retention.pipe b/services/libs/tinybird/pipes/activities_filtered_retention.pipe index 367893b6cd..13da259951 100644 --- a/services/libs/tinybird/pipes/activities_filtered_retention.pipe +++ b/services/libs/tinybird/pipes/activities_filtered_retention.pipe @@ -33,9 +33,6 @@ SQL > AND a.timestamp < {{ DateTime(endDate, description="Filter activity timestamp before", required=False) }} {% end %} - {% if defined(repo) %} - AND a.channel = {{ String(repo, description="Filter activity repo", required=False) }} - {% end %} {% if defined(repos) %} AND a.channel IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} diff --git a/services/libs/tinybird/pipes/activityRepositories_filtered.pipe b/services/libs/tinybird/pipes/activityRepositories_filtered.pipe index 49d651e1bd..e625afbca9 100644 --- a/services/libs/tinybird/pipes/activityRepositories_filtered.pipe +++ b/services/libs/tinybird/pipes/activityRepositories_filtered.pipe @@ -9,9 +9,6 @@ SQL > from insightsProjects final where 1 = 1 - {% if defined(repo) %} - AND repo = {{ String(repo, description="Filter project repo", required=False) }} - {% end %} {% if defined(repos) %} AND repo IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} diff --git a/services/libs/tinybird/pipes/activity_heatmap_by_weekday_and_2hours_blocks.pipe b/services/libs/tinybird/pipes/activity_heatmap_by_weekday_and_2hours_blocks.pipe index d6cf31480f..67a630de07 100644 --- a/services/libs/tinybird/pipes/activity_heatmap_by_weekday_and_2hours_blocks.pipe +++ b/services/libs/tinybird/pipes/activity_heatmap_by_weekday_and_2hours_blocks.pipe @@ -16,9 +16,6 @@ SQL > AND a.timestamp < {{ DateTime(endDate, description="Filter activity timestamp before", required=False) }} {% end %} - {% if defined(repo) %} - AND a.channel = {{ String(repo, description="Filter activity repo", required=False) }} - {% end %} {% if defined(repos) %} AND a.channel IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} diff --git a/services/libs/tinybird/pipes/member_roles.pipe b/services/libs/tinybird/pipes/member_roles.pipe index 1954670dfa..f613b22ac5 100644 --- a/services/libs/tinybird/pipes/member_roles.pipe +++ b/services/libs/tinybird/pipes/member_roles.pipe @@ -8,9 +8,6 @@ SQL > {% if defined(project) %} AND insightsProjectId = (SELECT insightsProjectId FROM segments_filtered) {% end %} - {% if defined(repo) %} - AND repoUrl = {{ String(repo, description="Filter activity repo", required=False) }} - {% end %} {% if defined(repos) %} AND repoUrl IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} diff --git a/services/libs/tinybird/pipes/package_metrics.pipe b/services/libs/tinybird/pipes/package_metrics.pipe index 83506a39d9..c797db4e18 100644 --- a/services/libs/tinybird/pipes/package_metrics.pipe +++ b/services/libs/tinybird/pipes/package_metrics.pipe @@ -22,9 +22,6 @@ SQL > {% if defined(name) %} AND p.name = {{ String(name, description="Filter by package name", required=False) }} {% end %} - {% if defined(repo) %} - AND p.repo = {{ String(repo, description="Filter by repo", required=False) }} - {% end %} {% if defined(repos) %} AND p.repo IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} @@ -127,15 +124,26 @@ SQL > NODE package_downloads_initial_aggregation SQL > % - SELECT - {% if defined(granularity) %} p.date, {% end %} - sum(p.downloadsCount) as downloadsCount, - sum(p.dockerDownloadsCount) as "dockerDownloadsCount", - sum(p.dockerDependentsCount) as "dockerDependentsCount", - sum(p.dependentPackagesCount) as "dependentPackagesCount", - sum(p.dependentReposCount) as "dependentReposCount" - FROM package_downloads_filtered p - group by p.insightsProjectId {% if defined(granularity) %}, p.date {% end %} + {% if defined(granularity) %} + SELECT + p.date, + sum(p.downloadsCount) as downloadsCount, + sum(p.dockerDownloadsCount) as "dockerDownloadsCount", + sum(p.dockerDependentsCount) as "dockerDependentsCount", + sum(p.dependentPackagesCount) as "dependentPackagesCount", + sum(p.dependentReposCount) as "dependentReposCount" + FROM package_downloads_filtered p + group by p.insightsProjectId, p.date + {% else %} + SELECT + max(p.downloadsCount) as downloadsCount, + max(p.dockerDownloadsCount) as "dockerDownloadsCount", + max(p.dockerDependentsCount) as "dockerDependentsCount", + max(p.dependentPackagesCount) as "dependentPackagesCount", + max(p.dependentReposCount) as "dependentReposCount" + FROM package_downloads_filtered p + group by p.insightsProjectId + {% end %} NODE package_downloads_timeseries_merge SQL > diff --git a/services/libs/tinybird/pipes/packages.pipe b/services/libs/tinybird/pipes/packages.pipe index 7e6e3787ee..3966b39d40 100644 --- a/services/libs/tinybird/pipes/packages.pipe +++ b/services/libs/tinybird/pipes/packages.pipe @@ -4,7 +4,7 @@ SQL > SELECT distinct p.repo, p.name, p.ecosystem FROM packageDownloads p where - insightsProjectId = (select insightsProjectId from segments_filtered) + p.insightsProjectId = (select insightsProjectId from segments_filtered) {% if defined(search) %} AND ( p.name @@ -17,9 +17,6 @@ SQL > || '%' ) {% end %} - {% if defined(repo) %} - AND p.repo = {{ String(repo, description="Filter by repo", required=False) }} - {% end %} {% if defined(repos) %} AND p.repo IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} diff --git a/services/libs/tinybird/pipes/pull_requests_filtered.pipe b/services/libs/tinybird/pipes/pull_requests_filtered.pipe index 150b02c202..6c35bb4d65 100644 --- a/services/libs/tinybird/pipes/pull_requests_filtered.pipe +++ b/services/libs/tinybird/pipes/pull_requests_filtered.pipe @@ -8,9 +8,6 @@ SQL > FROM pull_requests_analyzed pra where pra.segmentId = (SELECT segmentId FROM segments_filtered) - {% if defined(repo) %} - AND pra.channel = {{ String(repo, description="Filter activity repo", required=False) }} - {% end %} {% if defined(repos) %} AND pra.channel IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} diff --git a/services/libs/tinybird/pipes/security_and_best_practices.pipe b/services/libs/tinybird/pipes/security_and_best_practices.pipe index f9a31a34d4..33ec1e65f7 100644 --- a/services/libs/tinybird/pipes/security_and_best_practices.pipe +++ b/services/libs/tinybird/pipes/security_and_best_practices.pipe @@ -79,9 +79,6 @@ SQL > = {{ String(project, description="Filter by project slug", required=True) }} {% else %} AND false {% end %} - {% if defined(repo) %} - AND s.repo = {{ String(repo, description="Filter activity repo", required=False) }} - {% end %} {% if defined(repos) %} AND s.repo IN {{ Array(repos, 'String', description="Filter activity repo list", required=False) }} diff --git a/services/libs/tinybird/pipes/segments_filtered.pipe b/services/libs/tinybird/pipes/segments_filtered.pipe index 239e1475be..eeb70f549c 100644 --- a/services/libs/tinybird/pipes/segments_filtered.pipe +++ b/services/libs/tinybird/pipes/segments_filtered.pipe @@ -9,12 +9,6 @@ SQL > AND slug = {{ String(project, description="Filter by project slug", required=True) }} {% else %} AND false {% end %} - {% if defined(repo) %} - AND has( - insightsProjects.repositories, - {{ String(repo, description="Filter activity repo", required=False) }} - ) - {% end %} {% if defined(repos) %} AND arrayAll( r -> has(insightsProjects.repositories, r),