Skip to content

Commit

Permalink
Merge pull request #118 from kbase/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
jkbaumohl authored Oct 10, 2024
2 parents 7b0e3b5 + 5869feb commit 938b79f
Show file tree
Hide file tree
Showing 33 changed files with 11,294 additions and 21 deletions.
16 changes: 12 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,15 @@ source/daily/upload_public_narratives_count.py
source/daily/make_reporting_tables.py


-------------------
Within the logstash dockerfile there is:
https://github.com/kbase/logstash/blob/41778da1238129a65296bdddcb6ff26e9c694779/Dockerfile#L24-L29
The rm at the end I believe is just cleaning up after itself. This was set up by Steve for Cheyenne's work
This is used by this code:
https://github.com/kbase/metrics/blob/master/source/daily_cron_jobs/methods_upload_elasticsearch_sumrydicts.py



-------------------

CRON Jobs are run from mysql-metrics
Expand All @@ -53,23 +62,22 @@ There are nightly CRON jobs that get run are located in bin/master_cron_shell.sh
which runs scripts from the source/daily directory

Then there are also monthly CRON jobs that get run are located in bin/upload_workspace_stats.sh
It used to be workspaces (user info needed first for FK potential issues), but now it also conatins scripts for
DOI metrics.)
It used to be workspaces (user info needed first for FK potential issues),
Runs scripts from source/monthly directory

There is a doi_monthly CRON job for Credit Engine that runs are located in bin/upload_doi_metrics.sh

These create Logs to keep track of (note nightly metrics is calling master_cron_shell
01 17 * * * /root/metrics/nightly_metrics.sh >>/mnt/metrics_logs/crontab_nightly 2>&1
01 0 1 * * /root/metrics/monthly_metrics.sh >>/mnt/metrics_logs/crontab_monthly 2>&1
01 0 15 * * /root/metrics/monthly_metrics.sh >>/mnt/metrics_logs/crontab_doi_monthly 2>&1
01 07 * * * /root/metrics/nightly_errorlogs.sh >>/mnt/metrics_logs/crontab_errorlogs 2>&1

From Docker03 the logs can be checked by going doing the following. (Note no y at end of monthly)
From Docker03 the logs can be checked by going doing the following.
cat /mnt/nfs3/data1/metrics/crontab_logs/crontab_nightly
cat /mnt/nfs3/data1/metrics/crontab_logs/crontab_monthly
cat /mnt/nfs3/data1/metrics/crontab_logs/crontab_doi_monthly


Can also confirm things ran by looking in the database (if not need to do backfills).
Example: (should be first of each month)
select DATE_FORMAT(`record_date`,'%Y-%m') as narrative_cron_month, count(*) as narrative_count from metrics.workspaces ws group by narrative_cron_month;
Expand Down
3 changes: 3 additions & 0 deletions bin/dump_get_copy_info_for_narratives.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

python custom_scripts/get_copy_info_for_narratives.py
3 changes: 3 additions & 0 deletions bin/dump_weekly_ADAM_app_categories.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

python custom_scripts/dump_weekly_ADAM_app_categories.py
3 changes: 3 additions & 0 deletions bin/dump_weekly_app_categories_v2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

python custom_scripts/dump_weekly_app_categories_v2.py
2 changes: 2 additions & 0 deletions bin/master_cron_shell.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ python daily_cron_jobs/upload_public_narratives_count.py

python daily_cron_jobs/upload_user_orcid_count.py

python daily_cron_jobs/upload_blobstore_details.py

python daily_cron_jobs/make_reporting_tables.py


1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,6 @@ services:
GRP_SUFFIX: ${GRP_SUFFIX}
WRK_SUFFIX: ${WRK_SUFFIX}
BLOBSTORE_SUFFIX: ${BLOBSTORE_SUFFIX}
HANDLE_DB_SUFFIX: ${HANDLE_DB_SUFFIX}
SERVICE_WIZARD_URL: ${SERVICE_WIZARD_URL}

10 changes: 7 additions & 3 deletions source/custom_scripts/dump_narratives_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,16 @@ def dump_narratives_results():

# CHANGE QUERY HERE
# Query for Adam Narratives dump of information:
query = ("select wc.* from metrics.user_info ui inner join metrics_reporting.workspaces_current wc on ui.username = wc.username "
"where ui.kb_internal_user = 0 and wc.narrative_version > 0 and is_deleted = 0 and is_temporary = 0")
query = ("select * from metrics.workspaces_current_plus_users ")
# query = ("select wc.* from metrics.user_info ui inner join metrics_reporting.workspaces_current wc on ui.username = wc.username "
# "where ui.kb_internal_user = 0 and wc.narrative_version > 0 and is_deleted = 0 and is_temporary = 0")
# Headers for Adam's narratives query (Note if more columns added, may need to update this
print("ws_id\tusername\tmod_date\tinitial_save_date\trecord_date\ttop_lvl_object_count\ttotal_object_count\tvisible_app_cells_count\tcode_cells_count\t"
"narrative_version\thidden_object_count\tdeleted_object_count\ttotal_size\ttop_lvl_size\tis_public\tis_temporary\tis_deleted\tnumber_of_shares\t"
"num_nar_obj_ids\tstatic_narratives_count\tstatic_narratives_views\tunique_object_types_count")
"num_nar_obj_ids\tstatic_narratives_count\tstatic_narratives_views\tunique_object_types_count\t"
"orig_saver_count\tnon_orig_saver_count\torig_saver_size_GB\tnon_orig_saver_size_GB")

# "num_nar_obj_ids\tstatic_narratives_count\tstatic_narratives_views\tunique_object_types_count")

cursor.execute(query)
row_values = list()
Expand Down
19 changes: 18 additions & 1 deletion source/custom_scripts/dump_query_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ def dump_query_results():
"last_narrative_modified_date\ttotal_narrative_objects_count\ttop_lvl_narrative_objects_count\ttotal_narrative_objects_size\t"
"top_lvl_narrative_objects_size\ttotal_narrative_count\ttotal_public_narrative_count\tdistinct_static_narratives_count\t"
"static_narratives_created_count\ttotal_visible_app_cells\ttotal_code_cells_count\tfirst_file_date\tlast_file_date\t"
"total_file_sizes_MB\ttotal_file_count\tmost_used_app\tdistinct_apps_used\ttotal_apps_run_all_time\ttotal_apps_run_last365\t"
"total_file_sizes_MB\ttotal_file_count\tblobstore_orig_saver_count\tblobstore_non_orig_saver_count\t"
"blobstore_orig_saver_size_GB\tblobstore_non_orig_saver_size_GB\t"
"most_used_app\tdistinct_apps_used\ttotal_apps_run_all_time\ttotal_apps_run_last365\t"
"total_apps_run_last90\ttotal_apps_run_last30\ttotal_app_errors_all_time\tfirst_app_run\tlast_app_run\ttotal_run_time_hours\t"
"total_queue_time_hours\ttotal_CPU_hours\tsession_count_all_time\tsession_count_last_year\tsession_count_last_90\tsession_count_last_30"
)
Expand Down Expand Up @@ -73,6 +75,11 @@ def dump_query_results():
# order by run_month, app_name;")
#print("app_name\tapp_cat\trun_month\trun_count\ttotal_run_hours")

# UserObjectTypeCount
# query = ("select DATE_FORMAT(`record_date`,'%Y-%m') as month, object_type, sum(top_lvl_object_count) as user_object_count\
# from users_workspace_object_counts group by month, object_type")
# print("month\tobject_type\tuser_object_count")

# USER SESSION STATS:
#query = ("select si.username, count(*) as session_count, sum(estimated_hrs_active) total_hours_active,\
# avg(estimated_hrs_active) avg_hours_active, std(estimated_hrs_active) std_hours_active,\
Expand All @@ -82,6 +89,16 @@ def dump_query_results():
# group by username\
# order by avg_hours_active desc, session_count, total_hours_active")
#print("username\tsession_count\ttotal_hours_active\tavg_hours_active\tstd_hours_active\tfirst_seen\tlast_seen")

# Custom apps updates for RSV
# query = ("select app_name, git_commit_hash, min(finish_date) as first_run_date from user_app_usage \
# group by app_name, git_commit_hash having first_run_date > '2021-01-01'")
# print("appname\tgit_commit_hash\tfirst_run_date")

#Blobstore cumulative sizes over users
# query = ("select sum(total_size) as blobstore_size, bs.username from blobstore_stats bs \
# group by username order by blobstore_size")
# print("blobstore_size\tusername")

cursor.execute(query)
row_values = list()
Expand Down
59 changes: 59 additions & 0 deletions source/custom_scripts/dump_weekly_ADAM_app_categories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/local/bin/python

import os
import mysql.connector as mysql

metrics_mysql_password = os.environ["METRICS_MYSQL_PWD"]
sql_host = os.environ["SQL_HOST"]
metrics = os.environ["QUERY_ON"]


def dump_weekly_app_categories():
# Dumps the weekly app catagory users report used in the quarterly report

# connect to mysql
db_connection = mysql.connect(
host=sql_host, # "mysql1", #"localhost",
user="metrics", # "root",
passwd=metrics_mysql_password,
database="metrics", # "datacamp"
)

cursor = db_connection.cursor()
query = "use " + metrics
cursor.execute(query)

# CHANGE QUERY HERE
# Regular weekly app categories
# query = ("select * from metrics_reporting.app_category_unique_users_weekly")

# ADAM's special cagtegory mappings from late 2023 early 2024
query = ("select in_query.week_run, in_query.master_category, count(*) as unique_users "
"from (select distinct DATE_FORMAT(`finish_date`,'%Y-%u') as week_run, "
"IFNULL(master_category,'None') as master_category, uau.username "
"from metrics.user_app_usage uau inner join "
"metrics.user_info ui on uau.username = ui.username "
"left outer join "
"metrics.adams_app_name_category_map anc on uau.app_name = anc.app_name "
"where ui.kb_internal_user = 0 "
"and func_name != 'kb_gtdbtk/run_kb_gtdbtk') as in_query "
"group by in_query.week_run, in_query.master_category;")
# CHANGE COLUMN HEADERS HERE TO MATCH QUERY HEADERS
print("week_run\tmaster_category\tunique_users")

cursor.execute(query)
row_values = list()

for row_values in cursor:
temp_string = ""
for i in range(len(row_values) - 1):
if row_values[i] is not None:
temp_string += str(row_values[i])
temp_string += "\t"
if row_values[-1] is not None:
temp_string += str(row_values[-1])
print(temp_string)
return 1


dump_weekly_app_categories()
46 changes: 46 additions & 0 deletions source/custom_scripts/dump_weekly_app_categories_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/local/bin/python

import os
import mysql.connector as mysql

metrics_mysql_password = os.environ["METRICS_MYSQL_PWD"]
sql_host = os.environ["SQL_HOST"]
metrics = os.environ["QUERY_ON"]


def dump_weekly_app_categories():
# Dumps the weekly app catagory users report used in the quarterly report

# connect to mysql
db_connection = mysql.connect(
host=sql_host, # "mysql1", #"localhost",
user="metrics", # "root",
passwd=metrics_mysql_password,
database="metrics", # "datacamp"
)

cursor = db_connection.cursor()
query = "use " + metrics
cursor.execute(query)

# CHANGE QUERY HERE
query = ("select * from metrics_reporting.app_category_unique_users_weekly_v2")
# CHANGE COLUMN HEADERS HERE TO MATCH QUERY HEADERS
print("week_run\tapp_category\tunique_users")

cursor.execute(query)
row_values = list()

for row_values in cursor:
temp_string = ""
for i in range(len(row_values) - 1):
if row_values[i] is not None:
temp_string += str(row_values[i])
temp_string += "\t"
if row_values[-1] is not None:
temp_string += str(row_values[-1])
print(temp_string)
return 1


dump_weekly_app_categories()
Loading

0 comments on commit 938b79f

Please sign in to comment.