From 80f94a46c7a8532e32e740b9f9771e08a13e9826 Mon Sep 17 00:00:00 2001 From: Jason Baumohl Date: Thu, 16 Mar 2023 03:29:35 +0000 Subject: [PATCH] fixed bug in query and out of memory issue --- .../get_downloaders_lookup.py | 32 ++++++++++++++++--- .../methods_upload_doi_metrics.py | 4 +-- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/source/monthly_cron_jobs/get_downloaders_lookup.py b/source/monthly_cron_jobs/get_downloaders_lookup.py index cdb8208..7478148 100644 --- a/source/monthly_cron_jobs/get_downloaders_lookup.py +++ b/source/monthly_cron_jobs/get_downloaders_lookup.py @@ -147,7 +147,7 @@ def pull_downloading_jobs(downloaders_set, problem_refs_lookup): earliest_year = 2016 today = date.today() current_year = int(today.year) - part_of_year_list = (1,2,3,4) + part_of_year_list = (1,2,3,4,5,6,7,8,9,10,11,12) years_to_do = range(earliest_year,(current_year + 1)) @@ -162,15 +162,39 @@ def pull_downloading_jobs(downloaders_set, problem_refs_lookup): for part_of_year in part_of_year_list: if part_of_year == 1: begin = int(datetime(year_to_do, 1, 1, 0, 0).timestamp()) * 1000 - end = int(datetime(year_to_do, 3, 31, 23, 59).timestamp()) * 1000 + end = int(datetime(year_to_do, 1, 31, 23, 59).timestamp()) * 1000 elif part_of_year == 2: + begin = int(datetime(year_to_do, 2, 1, 0, 0).timestamp()) * 1000 + end = int(datetime(year_to_do, 3, 1, 23, 59).timestamp()) * 1000 + elif part_of_year == 3: + begin = int(datetime(year_to_do, 3, 2, 0, 0).timestamp()) * 1000 + end = int(datetime(year_to_do, 3, 31, 23, 59).timestamp()) * 1000 + elif part_of_year == 4: begin = int(datetime(year_to_do, 4, 1, 0, 0).timestamp()) * 1000 + end = int(datetime(year_to_do, 4, 30, 23, 59).timestamp()) * 1000 + elif part_of_year == 5: + begin = int(datetime(year_to_do, 5, 1, 0, 0).timestamp()) * 1000 + end = int(datetime(year_to_do, 5, 31, 23, 59).timestamp()) * 1000 + elif part_of_year == 6: + begin = int(datetime(year_to_do, 6, 1, 0, 0).timestamp()) * 1000 end = int(datetime(year_to_do, 6, 30, 23, 59).timestamp()) * 1000 - elif part_of_year == 3: + elif part_of_year == 7: begin = int(datetime(year_to_do, 7, 1, 0, 0).timestamp()) * 1000 + end = int(datetime(year_to_do, 7, 31, 23, 59).timestamp()) * 1000 + elif part_of_year == 8: + begin = int(datetime(year_to_do, 8, 1, 0, 0).timestamp()) * 1000 + end = int(datetime(year_to_do, 8, 30, 23, 59).timestamp()) * 1000 + elif part_of_year == 9: + begin = int(datetime(year_to_do, 9, 1, 0, 0).timestamp()) * 1000 end = int(datetime(year_to_do, 9, 30, 23, 59).timestamp()) * 1000 - else: + elif part_of_year == 10: begin = int(datetime(year_to_do, 10, 1, 0, 0).timestamp()) * 1000 + end = int(datetime(year_to_do, 10, 31, 23, 59).timestamp()) * 1000 + elif part_of_year == 11: + begin = int(datetime(year_to_do, 11, 1, 0, 0).timestamp()) * 1000 + end = int(datetime(year_to_do, 11, 30, 23, 59).timestamp()) * 1000 + elif part_of_year == 12: + begin = int(datetime(year_to_do, 12, 1, 0, 0).timestamp()) * 1000 end = int(datetime(year_to_do, 12, 31, 23, 59).timestamp()) * 1000 yearly_start_time = time.time() diff --git a/source/monthly_cron_jobs/methods_upload_doi_metrics.py b/source/monthly_cron_jobs/methods_upload_doi_metrics.py index 7c21038..401d068 100644 --- a/source/monthly_cron_jobs/methods_upload_doi_metrics.py +++ b/source/monthly_cron_jobs/methods_upload_doi_metrics.py @@ -780,7 +780,7 @@ def upload_doi_externally_derived_data(doi_results_map, ws_owners_lookup): doi_total_downloads_statement = ("select count(*) as total_downloads from " "(select distinct downloaded_ws_obj_id, job_id " - "from copy_doi_unique_downloads where doi_ws_id = %s);") + "from copy_doi_unique_downloads where doi_ws_id = %s) tdl;") doi_total_unique_download_users = ( "select count(*) from (select distinct downloader_username from copy_doi_unique_downloads where doi_ws_id = %s) as user_count;") @@ -829,7 +829,7 @@ def upload_doi_externally_derived_data(doi_results_map, ws_owners_lookup): doi_total_downloads_statement = ("select count(*) as total_downloads from " "(select distinct downloaded_ws_obj_id, job_id " - "from doi_unique_downloads where doi_ws_id = %s);") + "from doi_unique_downloads where doi_ws_id = %s) tdl;") doi_total_unique_download_users = ( "select count(*) from (select distinct downloader_username from doi_unique_downloads where doi_ws_id = %s) as user_count;")