Skip to content

Commit

Permalink
Merge pull request #110 from kbase/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
jkbaumohl authored Apr 5, 2023
2 parents 9246355 + a7b0edd commit 4a3a0b4
Show file tree
Hide file tree
Showing 3 changed files with 242 additions and 17 deletions.
56 changes: 39 additions & 17 deletions source/custom_scripts/dump_query_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,10 @@ def dump_query_results():
query = "use " + metrics
cursor.execute(query)

# CHANGE QUERY HERE
# query = "select username, display_name, email, orcid, kb_internal_user, institution, country, signup_date, last_signin_date from user_info order by signup_date"
# Query for Adam Narratives dump of information:
# select wc.* from metrics.user_info ui inner join metrics_reporting.workspaces_current wc on ui.username = wc.username
# where ui.kb_internal_user = 0 and wc.narrative_version > 0 and is_deleted = 0 and is_temporary = 0;
#query = ("select * from metrics_reporting.narrative_app_flows")
query = ("select * from metrics_reporting.user_super_summary")
# CHANGE COLUMN HEADERS HERE TO MATCH QUERY HEADERS
# print("username\temail\tlast_signin_date\tmax_last_seen\tHasBeenSeen")
# print("ws_id\tusername\tmod_date\tinitial_save_date\trecord_date\ttop_lvl_object_count\ttotal_object_count\tvisible_app_cells_count\tnarrative_version\thidden_object_count\tdeleted_object_count\ttotal_size\ttop_lvl_size\tis_public\tis_temporary\tnumber_of_shares")
# Headers for Adam's narratives query (Note if more columns added, may need to update this
# print(
# "ws_id\tusername\tmod_date\tinitial_save_date\trecord_date\ttop_lvl_object_count\ttotal_object_count\tvisible_app_cells_count\tcode_cells_count\t"
# "narrative_version\thidden_object_count\tdeleted_object_count\ttotal_size\ttop_lvl_size\tis_public\tis_temporary\tis_deleted\tnumber_of_shares\t"
# "num_nar_obj_ids\tstatic_narratives_count"
# )
# CHANGE QUERIES AND HEADERS HERE

# USER SUPER SUMMARY
query = ("select * from metrics_reporting.user_super_summary")
# HEADERS FOR user_super_summary
print(
"username\tdisplay_name\temail\tkb_internal_user\tuser_id\tglobus_login\tgoogle_login\torcid\tsession_info_country\tcountry\tstate\t"
Expand All @@ -58,9 +46,43 @@ def dump_query_results():
"total_apps_run_last90\ttotal_apps_run_last30\ttotal_app_errors_all_time\tfirst_app_run\tlast_app_run\ttotal_run_time_hours\t"
"total_queue_time_hours\ttotal_CPU_hours\tsession_count_all_time\tsession_count_last_year\tsession_count_last_90\tsession_count_last_30"
)
#Header for Adam's narrative_app_flow

# APP FLOWS - for Adam's narrative_app_flow
#query = ("select * from metrics_reporting.narrative_app_flows")
#print("ws_id\tusername\tapp_name\tfunc_name\tstart_date\tfinish_date")

# app popularity growth
#query = ("select uau.app_name, DATE_FORMAT(`finish_date`,'%Y-%m') as run_month, count(*) as run_count, sum(run_time)/3600 as total_run_hours\
# from metrics.user_app_usage uau inner join metrics.user_info ui on uau.username = ui.username\
# where ui.kb_internal_user = 0\
# group by uau.app_name, run_month\
# order by run_month, app_name")
#print("app_name\trun_month\trun_count\ttotal_run_hours")

# App category run totals
#query = ("select uau.app_name,\
# IFNULL(app_category, \"No Category Association\") as app_cat,\
# DATE_FORMAT(`finish_date`,'%Y-%m') as run_month, count(*) as run_count,\
# sum(run_time)/3600 as total_run_hours\
# from metrics.user_app_usage uau inner join\
# metrics.user_info ui on uau.username = ui.username\
# left outer join\
# metrics.app_name_category_map anm on uau.app_name = anm.app_name\
# where ui.kb_internal_user = 0\
# group by uau.app_name, app_cat, run_month\
# order by run_month, app_name;")
#print("app_name\tapp_cat\trun_month\trun_count\ttotal_run_hours")

# USER SESSION STATS:
#query = ("select si.username, count(*) as session_count, sum(estimated_hrs_active) total_hours_active,\
# avg(estimated_hrs_active) avg_hours_active, std(estimated_hrs_active) std_hours_active,\
# min(first_seen), max(last_seen)\
# from metrics.user_info ui inner join metrics.session_info si on ui.username = si.username\
# where estimated_hrs_active < 24\
# group by username\
# order by avg_hours_active desc, session_count, total_hours_active")
#print("username\tsession_count\ttotal_hours_active\tavg_hours_active\tstd_hours_active\tfirst_seen\tlast_seen")

cursor.execute(query)
row_values = list()

Expand Down
189 changes: 189 additions & 0 deletions source/monthly_cron_jobs/backfill_static_views_for_doi_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
import os
import requests
import mysql.connector as mysql

################################################################################################################################################################################################
################################################################################################################################################################################################
#
# NOTE THIS WILL NOT WORK ANYMORE MOVED STATIC NARRATIVE VIEWS OUT OF DOI_METRICS AND INTO
#
################################################################################################################################################################################################
################################################################################################################################################################################################




metrics_mysql_password = os.environ["METRICS_MYSQL_PWD"]

sql_host = os.environ["SQL_HOST"]
query_on = os.environ["QUERY_ON"]

requests.packages.urllib3.disable_warnings()

kb_google_analytics_url = os.environ["KB_GOOGLE_ANALYTICS_URL"]

def get_kbase_google_analytics():
"""
Gets the kbase_google_analytics
"""
params = (("tqx", "out:csv"), ("sheet", "Monthly"))
response = requests.get(kb_google_analytics_url, params=params)
if response.status_code != 200:
print(
"ERROR - KBase Google analytics GOOGLE SHEET RESPONSE STATUS CODE : "
+ str(response.status_code)
)
print(
"KBase Google analytics."
)
return 0

# key ws_id -> year -> month -> monthly_page_view
static_narrative_view_monthly_stats = dict()

lines = response.text.split("\n")
i = 1;
found_header_line = False
for line in lines:
line_elements = line.split(",")
first_element = line_elements[0][1:-1].strip()
if found_header_line:
print("in if")
landing_page_elements = first_element.split("/")
ws_id = int(landing_page_elements[2])
print("ws_id :" + str(ws_id))
year = int(line_elements[1][1:-1].strip())
print("year :" + str(year))
month = int(line_elements[2][1:-1].strip())
print("month :" + str(month))
page_views = int(line_elements[3][1:-1].strip())
print("page_views :" + str(page_views))

if ws_id not in static_narrative_view_monthly_stats:
static_narrative_view_monthly_stats[ws_id] = dict()
if year not in static_narrative_view_monthly_stats[ws_id]:
static_narrative_view_monthly_stats[ws_id][year] = dict()
if month not in static_narrative_view_monthly_stats[ws_id][year]:
static_narrative_view_monthly_stats[ws_id][year][month] = 0
static_narrative_view_monthly_stats[ws_id][year][month] = static_narrative_view_monthly_stats[ws_id][year][month] + page_views
print(str(i) + " :: " + line)
i += 1
elif first_element == "Landing Page":
found_header_line = True

print(str(static_narrative_view_monthly_stats))
print("Length static_narrative_view_monthly_stats : " + str(len(static_narrative_view_monthly_stats)))

static_narrative_view_summary_stats = dict()
for ws_id in static_narrative_view_monthly_stats:
running_total_page_views = 0
if ws_id not in static_narrative_view_summary_stats:
static_narrative_view_summary_stats[ws_id] = dict()
for year in sorted(static_narrative_view_monthly_stats[ws_id]):
if year not in static_narrative_view_summary_stats[ws_id]:
static_narrative_view_summary_stats[ws_id][year] = dict()
for month in sorted(static_narrative_view_monthly_stats[ws_id][year]):
running_total_page_views = running_total_page_views + static_narrative_view_monthly_stats[ws_id][year][month]
static_narrative_view_summary_stats[ws_id][year][month] = running_total_page_views

print(str(static_narrative_view_summary_stats))
print("Length static_narrative_view_summary_stats : " + str(len(static_narrative_view_summary_stats)))


#####################################

years_to_do = [2020,2021,2022,2023]
months_to_do = [1,2,3,4,5,6,7,8,9,10,11,12]
static_narrative_view_complete_stats = dict()
for ws_id in static_narrative_view_monthly_stats:
running_total_page_views = 0
if ws_id not in static_narrative_view_complete_stats:
static_narrative_view_complete_stats[ws_id] = dict()
for year in years_to_do:
# if year in static_narrative_view_monthly_stats[ws_id]:
if year not in static_narrative_view_complete_stats[ws_id]:
static_narrative_view_complete_stats[ws_id][year] = dict()
for month in months_to_do:
if year not in static_narrative_view_monthly_stats[ws_id] or month not in static_narrative_view_monthly_stats[ws_id][year]:
static_narrative_view_complete_stats[ws_id][year][month] = dict()
if year in static_narrative_view_monthly_stats[ws_id] and month in static_narrative_view_monthly_stats[ws_id][year]:
running_total_page_views = running_total_page_views + static_narrative_view_monthly_stats[ws_id][year][month]
static_narrative_view_complete_stats[ws_id][year][month] = running_total_page_views

print(str(static_narrative_view_complete_stats))
print("Length static_narrative_view_complete_stats : " + str(len(static_narrative_view_complete_stats)))


##########################

# connect to mysql
db_connection = mysql.connect(
host=sql_host, user="metrics", passwd=metrics_mysql_password, database="metrics"
)

cursor = db_connection.cursor()
query = "use " + query_on
cursor.execute(query)

existing_doi_metrics_dict = dict()
get_existing_doi_metrics_statement = (
"select ws_id, DATE_FORMAT(`record_date`,'%Y') as year, DATE_FORMAT(`record_date`,'%m') as month "
"from doi_metrics")
cursor.execute(get_existing_doi_metrics_statement)
for ( ws_id, year, month) in cursor:
if ws_id not in existing_doi_metrics_dict:
existing_doi_metrics_dict[ws_id] = dict()
if int(year) not in existing_doi_metrics_dict[ws_id]:
existing_doi_metrics_dict[ws_id][int(year)] = set()
existing_doi_metrics_dict[ws_id][int(year)].add(int(month))

update_prep_cursor = db_connection.cursor(prepared=True)

udate_narratives_views_statement = (
"update metrics.doi_metrics set static_narrative_views = %s "
"where ws_id = %s and DATE_FORMAT(`record_date`,'%Y-%m') = %s;"
)

updates_performed = 0

print("WS ID: 133260 " + str(existing_doi_metrics_dict[133260]))

for ws_id in static_narrative_view_complete_stats:
for year in static_narrative_view_complete_stats[ws_id]:
for month in static_narrative_view_complete_stats[ws_id][year]:
temp_month = month + 1
temp_year = year
if temp_month == 13:
temp_month = 1
temp_year = year + 1
month_input = str(temp_month)
if temp_month < 10:
month_input = "0" + str(temp_month)
date_used = str(temp_year) + "-" + month_input
if ws_id in existing_doi_metrics_dict:
if ws_id == 133260:
print("FOUND WS ID: 133260 " + str(existing_doi_metrics_dict[133260]))
if temp_year in existing_doi_metrics_dict[ws_id]:
if ws_id == 133260:
print("FOUND WS ID: 133260 YEAR : " + str(temp_year) + "::" + str(existing_doi_metrics_dict[133260]))
if temp_month in existing_doi_metrics_dict[ws_id][temp_year]:
if ws_id == 133260:
print("FOUND WS ID: 133260 YEAR : " + str(temp_year) + "::MONTH " + str(temp_month) + "::" + str(existing_doi_metrics_dict[133260]))
# Do update statement
input = (
static_narrative_view_complete_stats[ws_id][year][month],
ws_id,
date_used,
)
print("udate_narratives_views_statement : " + udate_narratives_views_statement)
print("input : " + str(input))
update_prep_cursor.execute(udate_narratives_views_statement, input)
updates_performed += 1

db_connection.commit()

print("Total updates performed: " + str(updates_performed))

return 1

get_kbase_google_analytics()
14 changes: 14 additions & 0 deletions sql_create_statements/sql_reporting_views_and_tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1592,3 +1592,17 @@ dmc.derived_object_count, dmc.copied_only_object_count, dmc.fully_derived_object
from metrics.doi_ws_map dwm inner join metrics_reporting.doi_metrics_current dmc
on dwm.ws_id =dmc.ws_id
order by dwm.doi_url, is_parent_ws desc);


create or replace view metrics_reporting.doi_metrics_current_report
as (
select dwm.doi_url AS doi_url, dwm.title AS title, dwm.is_parent_ws AS is_parent_ws,
dmc.ws_id AS ws_id, dmc.record_date AS record_date, dmc.unique_users_count AS unique_users_count, dmc.unique_ws_ids_count AS unique_ws_ids_count,
dmc.ttl_dls_cnt AS ttl_dls_cnt, dmc.ttl_uniq_dl_users_cnt AS ttl_uniq_dl_users_cnt, dmc.ttl_dl_user_doi_obj_cnt AS ttl_dl_user_doi_obj_cnt,
dmc.ttl_dl_users_dled_obj_cnt AS ttl_dl_users_dled_obj_cnt, dmc.derived_object_count AS derived_object_count,
dmc.copied_only_object_count AS copied_only_object_count, dmc.fully_derived_object_pair_counts AS fully_derived_object_pair_counts,
wc.static_narratives_views
from metrics.doi_ws_map dwm inner join
metrics_reporting.doi_metrics_current dmc on dwm.ws_id = dmc.ws_id
inner join metrics_reporting.workspaces_current wc on dmc.ws_id = wc.ws_id
order by dwm.doi_url,dwm.is_parent_ws desc);

0 comments on commit 4a3a0b4

Please sign in to comment.