diff --git a/askbot/management/commands/askbot_compile_analytics_events.py b/askbot/management/commands/askbot_compile_analytics_events.py index 9abb549a70..6a7ef3a766 100644 --- a/askbot/management/commands/askbot_compile_analytics_events.py +++ b/askbot/management/commands/askbot_compile_analytics_events.py @@ -3,10 +3,13 @@ per-user and per-group Summary tables. """ import datetime -from django.db import transaction +from django.db import models, transaction from django.core.management.base import BaseCommand from askbot.utils.console import ProgressBar -from askbot.models.analytics import Event, GroupDailySummary, UserDailySummary +from askbot.models.analytics import ( + Event, DailyGroupSummary, HourlyGroupSummary, + DailyUserSummary, HourlyUserSummary, Session +) class Command(BaseCommand): # pylint: disable=missing-class-docstring, too-few-public-methods @@ -15,70 +18,144 @@ def add_arguments(self, parser): # pylint: disable=missing-function-docstring def handle(self, *args, **options): # pylint: disable=missing-function-docstring """ - Filters uncompiled analytics events. + Filters unsummarized analytics events. Iterates over the events, and calculates per user summaries per date. THen iterates over the per-user summaries and combines them into the per-group summaries. """ - events = Event.objects.filter(compiled=False).order_by('timestamp') # pylint: disable=no-member + now = datetime.datetime.now() + self.summarize_events(options) # to hourly user summaries + self.extract_time_on_site_from_sessions(options) + self.compile_hourly_user_summaries(options, now) # to daily user and hourly group summaries + self.compile_hourly_group_summaries(options, now) + + + def summarize_events(self, options): + """Compiles events into daily per-user summaries""" + events = Event.objects.filter(summarized=False).order_by('timestamp') # pylint: disable=no-member events_count = events.count() message = 'Compiling Events:' silent = options['silent'] + # 1) Populate daily summaries per user for event in ProgressBar(events.iterator(), events_count, message=message, silent=silent): - self.compile_event(event) - - daily_summaries = UserDailySummary.objects.filter(compiled=False).order_by('date') # pylint: disable=no-member - message = 'Compiling User Daily Summaries:' - summaries_count = daily_summaries.count() - iterator = daily_summaries.iterator() - for daily_summary in ProgressBar(iterator, summaries_count, message=message, silent=silent): - self.compile_user_daily_summary(daily_summary) - - # todo: - # update the time on site (how?) - # update the total number of users per group - # maybe: record number of active users per group within period - message = 'Count users per group:' - group_daily_summaries = GroupDailySummary.objects.filter(compiled=False) # pylint: disable=no-member - count = group_daily_summaries.count() - iterator = group_daily_summaries.iterator() # pylint: disable=no-member - for group_summary in ProgressBar(iterator, count, message=message, silent=silent): - self.update_users_count_per_group(group_summary) + self.summarize_event(event) @transaction.atomic - def update_users_count_per_group(self, group_summary): - """Counts the number of users in the group at the end of the day""" - join_date_cutoff = group_summary.date + datetime.timedelta(days=1) - users = group_summary.group.user_set.filter(date_joined__lte=join_date_cutoff) # pylint: disable=no-member - group_summary.num_users = users.count() - group_summary.compiled = True - group_summary.save() - - - @transaction.atomic - def compile_event(self, event): + def summarize_event(self, event): """Adds up event stats into the user daily summary""" - date = event.timestamp.date() + hour = event.timestamp.replace(minute=0, second=0, microsecond=0) user = event.session.user - user_summary, _ = UserDailySummary.objects.get_or_create(date=date, # pylint: disable=no-member + user_summary, _ = HourlyUserSummary.objects.get_or_create(hour=hour, # pylint: disable=no-member user=user) user_summary.add_event(event) user_summary.save() - Event.objects.filter(id=event.id).update(compiled=True) # pylint: disable=no-member + Event.objects.filter(id=event.id).update(summarized=True) # pylint: disable=no-member + + + def extract_time_on_site_from_sessions(self, options): + """Updates the time on site in the per-user daily summaries""" + message = 'Updating the time on site:' + sessions = Session.objects.filter(last_summarized_at__lt=models.F('updated_at')) # pylint: disable=no-member + sessions = sessions.order_by('updated_at') + for session in ProgressBar(sessions.iterator(), sessions.count(), + message=message, silent=options['silent']): + self.extract_time_on_site_from_session(session) @transaction.atomic - def compile_user_daily_summary(self, user_daily_summary): - groups = user_daily_summary.user.get_groups(used_for_analytics=True) + def extract_time_on_site_from_session(self, session): + """Calculates the time on site for the session""" + if session.updated_at <= session.last_summarized_at: + return + + sess_start = session.created_at + sess_end = session.updated_at + user_id = session.user_id + hour = sess_start.replace(minute=0, second=0, microsecond=0) + while hour <= sess_end: + window_start = max(sess_start, hour) + window_end = min(sess_end, hour + datetime.timedelta(hours=1)) + window_duration = window_end - window_start + summary, _ = HourlyUserSummary.objects.get_or_create(hour=hour, user_id=user_id) # pylint: disable=no-member + summary.time_on_site += window_duration + summary.save() + + hour += datetime.timedelta(hours=1) + + session.last_summarized_at = sess_end + session.save() + + + def compile_hourly_user_summaries(self, options, cutoff_time): + """Compiles hourly per-user summaries into daily per-user summaries""" + hourly_summaries = HourlyUserSummary.objects.filter(summarized=False) # pylint: disable=no-member + cutoff_hour = cutoff_time.replace(minute=0, second=0, microsecond=0) + hourly_summaries = hourly_summaries.filter(hour__lt=cutoff_hour) # hour must be completed + hourly_summaries = hourly_summaries.order_by('hour') + count = hourly_summaries.count() + message = 'Compiling User Hourly Summaries:' + silent = options['silent'] + for hourly_summary in ProgressBar(hourly_summaries.iterator(), count, + message=message, silent=silent): + self.compile_hourly_user_summary(hourly_summary) + + + @transaction.atomic + def compile_hourly_user_summary(self, hourly_user_summary): + """Adds up user hourly summaries into the user daily summaries + and the group hourly summaries""" + groups = hourly_user_summary.user.get_groups(used_for_analytics=True) + hour = hourly_user_summary.hour for group in groups: - date = user_daily_summary.date - group_summary, _ = GroupDailySummary.objects.get_or_create(date=date, # pylint: disable=no-member - group=group) - group_summary += user_daily_summary + group_summary, _ = HourlyGroupSummary.objects.get_or_create(hour=hour, # pylint: disable=no-member + group=group) + group_summary += hourly_user_summary group_summary.save() - UserDailySummary.objects.filter(id=user_daily_summary.id).update(compiled=True) # pylint: disable=no-member + daily_user_summary, _ = DailyUserSummary.objects.get_or_create( # pylint: disable=no-member + date=hourly_user_summary.hour.date(), + user=hourly_user_summary.user) + daily_user_summary += hourly_user_summary + daily_user_summary.save() + + HourlyUserSummary.objects.filter(id=hourly_user_summary.id).update(summarized=True) # pylint: disable=no-member + + + def compile_hourly_group_summaries(self, options, cutoff_time): + """ + 1. Compiles hourly per-group summaries into daily per-group summaries + 2. Updates the total number of users in the group that joined before the end of the hour + """ + message = 'Compile hourly group summaries: ' + hourly_group_summaries = HourlyGroupSummary.objects.filter(summarized=False) # pylint: disable=no-member + cutoff_hour = cutoff_time.replace(minute=0, second=0, microsecond=0) + # hour must be completed + hourly_group_summaries = hourly_group_summaries.filter(hour__lt=cutoff_hour) + count = hourly_group_summaries.count() + iterator = hourly_group_summaries.iterator() # pylint: disable=no-member + for group_summary in ProgressBar(iterator, count, message=message, + silent=options['silent']): + self.compile_hourly_group_summary(group_summary) + + + @transaction.atomic + def compile_hourly_group_summary(self, hourly_group_summary): + """ + 1. Adds hourly per-group summary into daily per-group summary + 2. Updates the total number of users in the group that joined before the end of the hour + """ + join_date_cutoff = hourly_group_summary.hour + datetime.timedelta(hours=1) + users = hourly_group_summary.group.user_set.filter(date_joined__lt=join_date_cutoff) # pylint: disable=no-member + hourly_group_summary.num_users = users.count() + + daily_group_summary, _ = DailyGroupSummary.objects.get_or_create( # pylint: disable=no-member + date=hourly_group_summary.hour.date(), + group=hourly_group_summary.group) + daily_group_summary += hourly_group_summary + daily_group_summary.save() + hourly_group_summary.summarized = True + hourly_group_summary.save() diff --git a/askbot/migrations/0028_userdailysummary_groupdailysummary.py b/askbot/migrations/0028_dailyusersummary_dailygroupsummary.py similarity index 96% rename from askbot/migrations/0028_userdailysummary_groupdailysummary.py rename to askbot/migrations/0028_dailyusersummary_dailygroupsummary.py index f9520ac676..7a7c6c5ff7 100644 --- a/askbot/migrations/0028_userdailysummary_groupdailysummary.py +++ b/askbot/migrations/0028_dailyusersummary_dailygroupsummary.py @@ -14,7 +14,7 @@ class Migration(migrations.Migration): operations = [ migrations.CreateModel( - name='UserDailySummary', + name='DailyUserSummary', fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('num_questions', models.PositiveIntegerField(default=0)), @@ -31,7 +31,7 @@ class Migration(migrations.Migration): }, ), migrations.CreateModel( - name='GroupDailySummary', + name='DailyGroupSummary', fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('num_questions', models.PositiveIntegerField(default=0)), diff --git a/askbot/migrations/0029_group_visibility.py b/askbot/migrations/0029_group_visibility.py index 7252db4eeb..d2a09a13b9 100644 --- a/askbot/migrations/0029_group_visibility.py +++ b/askbot/migrations/0029_group_visibility.py @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ('askbot', '0028_userdailysummary_groupdailysummary'), + ('askbot', '0028_dailyusersummary_dailygroupsummary'), ] operations = [ diff --git a/askbot/migrations/0030_event_compiled_group_used_for_analytics_and_more.py b/askbot/migrations/0030_event_summarized_group_used_for_analytics_and_more.py similarity index 75% rename from askbot/migrations/0030_event_compiled_group_used_for_analytics_and_more.py rename to askbot/migrations/0030_event_summarized_group_used_for_analytics_and_more.py index b273b3c22c..73fcafecef 100644 --- a/askbot/migrations/0030_event_compiled_group_used_for_analytics_and_more.py +++ b/askbot/migrations/0030_event_summarized_group_used_for_analytics_and_more.py @@ -12,8 +12,8 @@ class Migration(migrations.Migration): operations = [ migrations.AddField( model_name='event', - name='compiled', - field=models.BooleanField(default=False, help_text='True if the event is compiled into a summary'), + name='summarized', + field=models.BooleanField(default=False, help_text='True if the event is included into a summary'), ), migrations.AddField( model_name='group', @@ -21,13 +21,13 @@ class Migration(migrations.Migration): field=models.BooleanField(default=False), ), migrations.AddField( - model_name='groupdailysummary', - name='compiled', + model_name='dailygroupsummary', + name='summarized', field=models.BooleanField(default=False), ), migrations.AddField( - model_name='userdailysummary', - name='compiled', + model_name='dailyusersummary', + name='summarized', field=models.BooleanField(default=False), ), ] diff --git a/askbot/migrations/0031_session_last_summarized_at.py b/askbot/migrations/0031_session_last_summarized_at.py new file mode 100644 index 0000000000..9a290f0a8e --- /dev/null +++ b/askbot/migrations/0031_session_last_summarized_at.py @@ -0,0 +1,29 @@ +# Generated by Django 4.2.4 on 2024-07-22 15:52 +# edited by hand to add the populate_last_summarized_at function +# and remove the null=True from the field +from django.db import migrations, models + +def populate_last_summarized_at(apps, schema_editor): + Session = apps.get_model('askbot', 'Session') + Session.objects.all().update(last_summarized_at=models.F('created_at')) + + +class Migration(migrations.Migration): + + dependencies = [ + ('askbot', '0030_event_summarized_group_used_for_analytics_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='session', + name='last_summarized_at', + field=models.DateTimeField(null=True), + ), + migrations.RunPython(populate_last_summarized_at, reverse_code=migrations.RunPython.noop), + migrations.AlterField( + model_name='session', + name='last_summarized_at', + field=models.DateTimeField(null=False) + ), + ] diff --git a/askbot/migrations/0032_hourlyusersummary_hourlygroupsummary.py b/askbot/migrations/0032_hourlyusersummary_hourlygroupsummary.py new file mode 100644 index 0000000000..2e84fdb187 --- /dev/null +++ b/askbot/migrations/0032_hourlyusersummary_hourlygroupsummary.py @@ -0,0 +1,54 @@ +# Generated by Django 4.2.4 on 2024-07-28 21:52 + +import datetime +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('askbot', '0031_session_last_summarized_at'), + ] + + operations = [ + migrations.CreateModel( + name='HourlyUserSummary', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('num_questions', models.PositiveIntegerField(default=0)), + ('num_answers', models.PositiveIntegerField(default=0)), + ('num_upvotes', models.PositiveIntegerField(default=0)), + ('num_downvotes', models.PositiveIntegerField(default=0)), + ('question_views', models.PositiveIntegerField(default=0)), + ('time_on_site', models.DurationField(default=datetime.timedelta(0))), + ('summarized', models.BooleanField(default=False)), + ('hour', models.DateTimeField(db_index=True)), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + options={ + 'abstract': False, + }, + ), + migrations.CreateModel( + name='HourlyGroupSummary', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('num_questions', models.PositiveIntegerField(default=0)), + ('num_answers', models.PositiveIntegerField(default=0)), + ('num_upvotes', models.PositiveIntegerField(default=0)), + ('num_downvotes', models.PositiveIntegerField(default=0)), + ('question_views', models.PositiveIntegerField(default=0)), + ('time_on_site', models.DurationField(default=datetime.timedelta(0))), + ('summarized', models.BooleanField(default=False)), + ('hour', models.DateTimeField(db_index=True)), + ('num_users', models.PositiveIntegerField(default=0)), + ('group', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='askbot.group')), + ], + options={ + 'abstract': False, + }, + ), + ] diff --git a/askbot/models/analytics.py b/askbot/models/analytics.py index b0654d67a7..6f22989814 100644 --- a/askbot/models/analytics.py +++ b/askbot/models/analytics.py @@ -133,6 +133,7 @@ class Session(models.Model): user_agent = models.CharField(max_length=512, null=True, blank=True) created_at = models.DateTimeField() # no auto_now_add or auto_now for created_at and updated_at updated_at = models.DateTimeField() # b/c we want to set it manually for the testing purposes + last_summarized_at = models.DateTimeField() # used for calculating the time on site def __str__(self): created_at = self.created_at.isoformat() # pylint: disable=no-member @@ -149,8 +150,8 @@ class Event(models.Model): content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE) object_id = models.PositiveIntegerField(db_index=True) content_object = GenericForeignKey('content_type', 'object_id') - compiled = models.BooleanField(default=False, - help_text="True if the event is compiled into a summary") + summarized = models.BooleanField(default=False, + help_text="True if the event is included into a summary") def __str__(self): timestamp = self.timestamp.isoformat() # pylint: disable=no-member @@ -168,7 +169,7 @@ class BaseSummary(models.Model): num_downvotes = models.PositiveIntegerField(default=0) question_views = models.PositiveIntegerField(default=0) time_on_site = models.DurationField(default=datetime.timedelta(0)) - compiled = models.BooleanField(default=False) + summarized = models.BooleanField(default=False) class Meta: # pylint: disable=too-few-public-methods, missing-class-docstring abstract = True @@ -199,6 +200,25 @@ def __add__(self, other): return self +class HourlySummary(BaseSummary): + """An abstract class for hourly summaries.""" + hour = models.DateTimeField(db_index=True) + + class Meta: # pylint: disable=too-few-public-methods, missing-class-docstring + abstract = True + + +class HourlyUserSummary(HourlySummary): + """User summary for each hour with activity.""" + user = models.ForeignKey(User, on_delete=models.CASCADE) + + +class HourlyGroupSummary(HourlySummary): + """Group summary for each hour with activity.""" + group = models.ForeignKey(AskbotGroup, on_delete=models.CASCADE) + num_users = models.PositiveIntegerField(default=0) + + class DailySummary(BaseSummary): """An abstract class for daily summaries.""" date = models.DateField(db_index=True) @@ -206,27 +226,17 @@ class DailySummary(BaseSummary): class Meta: # pylint: disable=too-few-public-methods, missing-class-docstring abstract = True - def add_event(self, event): - """Increments the attribute appropriate for the event type. - In addition adds up the time on site for all matching sessions. - """ - super().add_event(event) - # todo: get all sessions intersecting the date - # for each session, calculate the intersection with the date - # add up those intervals - # assumes that sessions do not overlap - -class UserDailySummary(DailySummary): +class DailyUserSummary(DailySummary): """User summary for each day with activity.""" user = models.ForeignKey(User, on_delete=models.CASCADE) -class GroupDailySummary(DailySummary): +class DailyGroupSummary(DailySummary): """Group summary for each day with activity.""" group = models.ForeignKey(AskbotGroup, on_delete=models.CASCADE) num_users = models.PositiveIntegerField(default=0) def add_event(self, event): - raise RuntimeError("Cannot add events to GroupDailySummary") + raise RuntimeError("Cannot add events to DailyGroupSummary")