Skip to content

Commit

Permalink
Merge pull request #106 from kbase/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
jkbaumohl authored Mar 10, 2023
2 parents 1366a94 + 7c5d842 commit 0ceab07
Show file tree
Hide file tree
Showing 3 changed files with 222 additions and 3 deletions.
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ services:
PROFILE_URL: ${PROFILE_URL}
KB_INTERNAL_USER_URL: ${KB_INTERNAL_USER_URL}
KB_OUTREACH_EVENTS_URL: ${KB_OUTREACH_EVENTS_URL}
KB_GOOGLE_ANALYTICS_URL: ${KB_GOOGLE_ANALYTICS_URL}
MONGO_PATH: ${MONGO_PATH}
METRICSRO_MONGO_PATH: ${METRICSRO_MONGO_PATH}
ELASTICSEARCH_URL: ${ELASTICSEARCH_URL}
Expand Down
174 changes: 174 additions & 0 deletions source/monthly_cron_jobs/backfill_static_views_for_workspaces.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
import os
import requests
import mysql.connector as mysql

metrics_mysql_password = os.environ["METRICS_MYSQL_PWD"]

sql_host = os.environ["SQL_HOST"]
query_on = os.environ["QUERY_ON"]

requests.packages.urllib3.disable_warnings()

kb_google_analytics_url = os.environ["KB_GOOGLE_ANALYTICS_URL"]

def get_kbase_google_analytics():
"""
Gets the kbase_google_analytics
"""
params = (("tqx", "out:csv"), ("sheet", "Monthly"))
response = requests.get(kb_google_analytics_url, params=params)
if response.status_code != 200:
print(
"ERROR - KBase Google analytics GOOGLE SHEET RESPONSE STATUS CODE : "
+ str(response.status_code)
)
print(
"KBase Google analytics."
)
return 0

# key ws_id -> year -> month -> monthly_page_view
static_narrative_view_monthly_stats = dict()

lines = response.text.split("\n")
i = 1;
found_header_line = False
for line in lines:
line_elements = line.split(",")
first_element = line_elements[0][1:-1].strip()
if found_header_line:
print("in if")
landing_page_elements = first_element.split("/")
ws_id = int(landing_page_elements[2])
print("ws_id :" + str(ws_id))
year = int(line_elements[1][1:-1].strip())
print("year :" + str(year))
month = int(line_elements[2][1:-1].strip())
print("month :" + str(month))
page_views = int(line_elements[3][1:-1].strip())
print("page_views :" + str(page_views))

if ws_id not in static_narrative_view_monthly_stats:
static_narrative_view_monthly_stats[ws_id] = dict()
if year not in static_narrative_view_monthly_stats[ws_id]:
static_narrative_view_monthly_stats[ws_id][year] = dict()
if month not in static_narrative_view_monthly_stats[ws_id][year]:
static_narrative_view_monthly_stats[ws_id][year][month] = 0
static_narrative_view_monthly_stats[ws_id][year][month] = static_narrative_view_monthly_stats[ws_id][year][month] + page_views
print(str(i) + " :: " + line)
i += 1
elif first_element == "Landing Page":
found_header_line = True

print(str(static_narrative_view_monthly_stats))
print("Length static_narrative_view_monthly_stats : " + str(len(static_narrative_view_monthly_stats)))

static_narrative_view_summary_stats = dict()
for ws_id in static_narrative_view_monthly_stats:
running_total_page_views = 0
if ws_id not in static_narrative_view_summary_stats:
static_narrative_view_summary_stats[ws_id] = dict()
for year in sorted(static_narrative_view_monthly_stats[ws_id]):
if year not in static_narrative_view_summary_stats[ws_id]:
static_narrative_view_summary_stats[ws_id][year] = dict()
for month in sorted(static_narrative_view_monthly_stats[ws_id][year]):
running_total_page_views = running_total_page_views + static_narrative_view_monthly_stats[ws_id][year][month]
static_narrative_view_summary_stats[ws_id][year][month] = running_total_page_views

print(str(static_narrative_view_summary_stats))
print("Length static_narrative_view_summary_stats : " + str(len(static_narrative_view_summary_stats)))


#####################################

years_to_do = [2020,2021,2022,2023]
months_to_do = [1,2,3,4,5,6,7,8,9,10,11,12]
static_narrative_view_complete_stats = dict()
for ws_id in static_narrative_view_monthly_stats:
running_total_page_views = 0
if ws_id not in static_narrative_view_complete_stats:
static_narrative_view_complete_stats[ws_id] = dict()
for year in years_to_do:
# if year in static_narrative_view_monthly_stats[ws_id]:
if year not in static_narrative_view_complete_stats[ws_id]:
static_narrative_view_complete_stats[ws_id][year] = dict()
for month in months_to_do:
if year not in static_narrative_view_monthly_stats[ws_id] or month not in static_narrative_view_monthly_stats[ws_id][year]:
static_narrative_view_complete_stats[ws_id][year][month] = dict()
if year in static_narrative_view_monthly_stats[ws_id] and month in static_narrative_view_monthly_stats[ws_id][year]:
running_total_page_views = running_total_page_views + static_narrative_view_monthly_stats[ws_id][year][month]
static_narrative_view_complete_stats[ws_id][year][month] = running_total_page_views

print(str(static_narrative_view_complete_stats))
print("Length static_narrative_view_complete_stats : " + str(len(static_narrative_view_complete_stats)))


##########################

# connect to mysql
db_connection = mysql.connect(
host=sql_host, user="metrics", passwd=metrics_mysql_password, database="metrics"
)

cursor = db_connection.cursor()
query = "use " + query_on
cursor.execute(query)

update_prep_cursor = db_connection.cursor(prepared=True)

udate_narratives_views_statement = (
"update metrics.workspaces set static_narratives_views = %s "
"where ws_id = %s and DATE_FORMAT(`record_date`,'%Y-%m') = %s;"
)

# udate_narratives_views_statement = (
# "update metrics.workspaces set static_narratives_views = %s "
# "where ws_id = %s and DATE_FORMAT(`record_date`,'%Y') = %s and DATE_FORMAT(`record_date`,'%m') = %s;"
# )

updates_performed = 0

for ws_id in static_narrative_view_complete_stats:
# if ws_id != 15253:
# continue
for year in static_narrative_view_complete_stats[ws_id]:
for month in static_narrative_view_complete_stats[ws_id][year]:
temp_month = month + 1
temp_year = year
if temp_month == 13:
temp_month = 1
temp_year = year + 1
month_input = str(temp_month)
if temp_month < 10:
month_input = "0" + str(temp_month)
date_used = str(temp_year) + "-" + month_input
# Do update statement
input = (
static_narrative_view_complete_stats[ws_id][year][month],
ws_id,
date_used,
)
# input = (
# static_narrative_view_complete_stats[ws_id][year][month],
# ws_id,
# str(temp_year),
# month_input,
# )
print("udate_narratives_views_statement : " + udate_narratives_views_statement)
print("input : " + str(input))
update_prep_cursor.execute(udate_narratives_views_statement, input)
updates_performed += 1

db_connection.commit()

print("15253 summary: ========")
print(str(static_narrative_view_summary_stats[15253]))

print("15253 complete: ========")
print(str(static_narrative_view_complete_stats[15253]))

print("Total updates performed: " + str(updates_performed))

return 1

get_kbase_google_analytics()
50 changes: 47 additions & 3 deletions source/monthly_cron_jobs/methods_upload_workspace_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
ws_user_token = os.environ["METRICS_WS_USER_TOKEN"]
to_workspace = os.environ["WRK_SUFFIX"]

# url for Google analytics for astatic narrative page views
kb_google_analytics_url = os.environ["KB_GOOGLE_ANALYTICS_URL"]

def get_static_narrative_counts():
"""
returns a dict of ws_id to count of the nuber of static_workspaces made for it.
Expand All @@ -41,6 +44,42 @@ def get_static_narrative_counts():
static_narrative_counts[int(ws_id)] = len(stats[0][ws_id])
return static_narrative_counts;

def get_static_narrative_page_views():
"""
returns static narative pageviews for universal google analytics
"""
params = (("tqx", "out:csv"), ("sheet", "Monthly"))
response = requests.get(kb_google_analytics_url, params=params)
if response.status_code != 200:
print(
"ERROR - KBase Google analytics GOOGLE SHEET RESPONSE STATUS CODE : "
+ str(response.status_code)
)
print(
"KBase Google analytics."
)
return 0
static_narratives_total_views = dict()
lines = response.text.split("\n")
found_header_line = False
for line in lines:
line_elements = line.split(",")
first_element = line_elements[0][1:-1].strip()
if found_header_line:
landing_page_elements = first_element.split("/")
ws_id = int(landing_page_elements[2])
page_views = int(line_elements[3][1:-1].strip())
if ws_id not in static_narratives_total_views:
static_narratives_total_views[ws_id] = 0
static_narratives_total_views[ws_id] = static_narratives_total_views[ws_id] + page_views

elif first_element == "Landing Page":
found_header_line = True

print(str(static_narratives_total_views))
print("Length static_narratives_total_views : " + str(len(static_narratives_total_views)))
return static_narratives_total_views

def get_workspaces(db, static_narrative_counts):
"""
gets narrative workspaces information for non temporary workspaces
Expand Down Expand Up @@ -630,6 +669,7 @@ def upload_workspace_stats():
db = client.workspace

static_narrative_counts = get_static_narrative_counts()
static_narrative_page_views = get_static_narrative_page_views()
workspaces_dict = get_workspaces(db, static_narrative_counts)
get_ws_top_info_time = time.time() - start_time
kbase_staff = get_kbase_staff(db_connection)
Expand Down Expand Up @@ -666,12 +706,15 @@ def upload_workspace_stats():
"visible_app_cells_count, code_cells_count, narrative_version, "
"hidden_object_count, deleted_object_count, "
"total_size, top_lvl_size, is_public, "
"is_temporary, is_deleted, number_of_shares, "
"num_nar_obj_ids, static_narratives_count,unique_object_types_count) "
"values(%s,%s, %s, %s, now(), %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"
"is_temporary, is_deleted, number_of_shares, num_nar_obj_ids, "
"static_narratives_count, static_narratives_views, unique_object_types_count) "
"values(%s,%s, %s, %s, now(), %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"
)

for ws_id in sorted(workspaces_dict.keys()):
static_narrative_views = 0
if ws_id in static_narrative_page_views:
static_narrative_views = static_narrative_page_views[ws_id]
input = (
ws_id,
workspaces_dict[ws_id]["username"],
Expand All @@ -692,6 +735,7 @@ def upload_workspace_stats():
workspaces_dict[ws_id]["number_of_shares"],
workspaces_dict[ws_id]["num_nar_obj_ids"],
workspaces_dict[ws_id]["static_narrative_count"],
static_narrative_views,
workspaces_dict[ws_id]["unique_object_types_count"],
)
prep_cursor.execute(workspaces_insert_statement, input)
Expand Down

0 comments on commit 0ceab07

Please sign in to comment.