diff --git a/askbot/__init__.py b/askbot/__init__.py index 57696fd9c3..8365fd0c2b 100644 --- a/askbot/__init__.py +++ b/askbot/__init__.py @@ -54,6 +54,7 @@ 'requirements': 'requirements-parser>=0.2.0', 'robots': 'django-robots==5.0', 'regex': 'regex', + 'tldextract': 'tldextract==5.1.2', 'unidecode': 'unidecode', 'urllib3': 'urllib3<1.27,>=1.21.1' } diff --git a/askbot/management/commands/askbot_compile_analytics_events.py b/askbot/management/commands/askbot_compile_analytics_events.py new file mode 100644 index 0000000000..9abb549a70 --- /dev/null +++ b/askbot/management/commands/askbot_compile_analytics_events.py @@ -0,0 +1,84 @@ +"""Management commands for Askbot Analytics Events. +Compiles summaries of Askbot Analytics Events in the +per-user and per-group Summary tables. +""" +import datetime +from django.db import transaction +from django.core.management.base import BaseCommand +from askbot.utils.console import ProgressBar +from askbot.models.analytics import Event, GroupDailySummary, UserDailySummary + +class Command(BaseCommand): # pylint: disable=missing-class-docstring, too-few-public-methods + + def add_arguments(self, parser): # pylint: disable=missing-function-docstring + parser.add_argument('--silent', action='store_true', help='Print progress on the console') + + def handle(self, *args, **options): # pylint: disable=missing-function-docstring + """ + Filters uncompiled analytics events. + Iterates over the events, and calculates per user summaries + per date. + + THen iterates over the per-user summaries and combines them + into the per-group summaries. + """ + events = Event.objects.filter(compiled=False).order_by('timestamp') # pylint: disable=no-member + events_count = events.count() + message = 'Compiling Events:' + silent = options['silent'] + for event in ProgressBar(events.iterator(), events_count, message=message, silent=silent): + self.compile_event(event) + + daily_summaries = UserDailySummary.objects.filter(compiled=False).order_by('date') # pylint: disable=no-member + message = 'Compiling User Daily Summaries:' + summaries_count = daily_summaries.count() + iterator = daily_summaries.iterator() + for daily_summary in ProgressBar(iterator, summaries_count, message=message, silent=silent): + self.compile_user_daily_summary(daily_summary) + + # todo: + # update the time on site (how?) + # update the total number of users per group + # maybe: record number of active users per group within period + message = 'Count users per group:' + group_daily_summaries = GroupDailySummary.objects.filter(compiled=False) # pylint: disable=no-member + count = group_daily_summaries.count() + iterator = group_daily_summaries.iterator() # pylint: disable=no-member + for group_summary in ProgressBar(iterator, count, message=message, silent=silent): + self.update_users_count_per_group(group_summary) + + + @transaction.atomic + def update_users_count_per_group(self, group_summary): + """Counts the number of users in the group at the end of the day""" + join_date_cutoff = group_summary.date + datetime.timedelta(days=1) + users = group_summary.group.user_set.filter(date_joined__lte=join_date_cutoff) # pylint: disable=no-member + group_summary.num_users = users.count() + group_summary.compiled = True + group_summary.save() + + + @transaction.atomic + def compile_event(self, event): + """Adds up event stats into the user daily summary""" + date = event.timestamp.date() + user = event.session.user + user_summary, _ = UserDailySummary.objects.get_or_create(date=date, # pylint: disable=no-member + user=user) + user_summary.add_event(event) + user_summary.save() + Event.objects.filter(id=event.id).update(compiled=True) # pylint: disable=no-member + + + @transaction.atomic + def compile_user_daily_summary(self, user_daily_summary): + groups = user_daily_summary.user.get_groups(used_for_analytics=True) + for group in groups: + date = user_daily_summary.date + group_summary, _ = GroupDailySummary.objects.get_or_create(date=date, # pylint: disable=no-member + group=group) + group_summary += user_daily_summary + group_summary.save() + + UserDailySummary.objects.filter(id=user_daily_summary.id).update(compiled=True) # pylint: disable=no-member + diff --git a/askbot/management/commands/askbot_create_per_email_domain_groups.py b/askbot/management/commands/askbot_create_per_email_domain_groups.py index e51078d13f..6a4e106034 100644 --- a/askbot/management/commands/askbot_create_per_email_domain_groups.py +++ b/askbot/management/commands/askbot_create_per_email_domain_groups.py @@ -9,6 +9,9 @@ class Command(BaseCommand): # pylint: disable=missing-docstring help = 'Create groups for each email domain in the database.' + def add_arguments(self, parser): # pylint: disable=missing-docstring + parser.add_argument('--silent', action='store_true', help='Do not print progress messages.') + def handle(self, *args, **options): # pylint: disable=missing-docstring, unused-argument """Obtains a list of unique email domains names. Creates a group for each domain name, if such group does not exist. @@ -20,19 +23,27 @@ def handle(self, *args, **options): # pylint: disable=missing-docstring, unused- created_groups = [] unchanged_groups = [] done_lowercased_domains = [] - for domain in ProgressBar(domains.iterator(), count, message): + silent = options['silent'] + for domain in ProgressBar(domains.iterator(), count, message=message, silent=silent): - domain_name = domain['domain'] - if domain_name.lower in done_lowercased_domains: + domain_name = domain['domain'] or 'Unknown Organization' + if domain_name.lower() in done_lowercased_domains: continue - else: - done_lowercased_domains.append(domain_name.lower()) + + done_lowercased_domains.append(domain_name.lower()) organization_name = get_organization_name_from_domain(domain_name) group, created = Group.objects.get_or_create( name=organization_name, - visibility=askbot_settings.PER_EMAIL_DOMAIN_GROUP_DEFAULT_VISIBILITY + visibility=askbot_settings.PER_EMAIL_DOMAIN_GROUP_DEFAULT_VISIBILITY, + used_for_analytics=True ) + + if not created: + if not group.used_for_analytics: + group.used_for_analytics = True + group.save() + users = User.objects.filter(email__endswith='@' + domain_name) for user in users.iterator(): user.join_group(group, force=True) @@ -41,4 +52,3 @@ def handle(self, *args, **options): # pylint: disable=missing-docstring, unused- created_groups.append(group) else: unchanged_groups.append(group) - diff --git a/askbot/migrations/0028_userdailysummary_groupdailysummary.py b/askbot/migrations/0028_userdailysummary_groupdailysummary.py index 88abbff2fa..f9520ac676 100644 --- a/askbot/migrations/0028_userdailysummary_groupdailysummary.py +++ b/askbot/migrations/0028_userdailysummary_groupdailysummary.py @@ -1,5 +1,5 @@ # Generated by Django 4.2.4 on 2024-06-24 21:15 - +import datetime from django.conf import settings from django.db import migrations, models import django.db.models.deletion @@ -17,12 +17,12 @@ class Migration(migrations.Migration): name='UserDailySummary', fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('num_questions', models.PositiveIntegerField()), - ('num_answers', models.PositiveIntegerField()), - ('num_upvotes', models.PositiveIntegerField()), - ('num_downvotes', models.PositiveIntegerField()), - ('question_views', models.PositiveIntegerField()), - ('time_on_site', models.DurationField()), + ('num_questions', models.PositiveIntegerField(default=0)), + ('num_answers', models.PositiveIntegerField(default=0)), + ('num_upvotes', models.PositiveIntegerField(default=0)), + ('num_downvotes', models.PositiveIntegerField(default=0)), + ('question_views', models.PositiveIntegerField(default=0)), + ('time_on_site', models.DurationField(default=datetime.timedelta(0))), ('date', models.DateField(db_index=True)), ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), ], @@ -34,14 +34,14 @@ class Migration(migrations.Migration): name='GroupDailySummary', fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('num_questions', models.PositiveIntegerField()), - ('num_answers', models.PositiveIntegerField()), - ('num_upvotes', models.PositiveIntegerField()), - ('num_downvotes', models.PositiveIntegerField()), - ('question_views', models.PositiveIntegerField()), - ('time_on_site', models.DurationField()), + ('num_questions', models.PositiveIntegerField(default=0)), + ('num_answers', models.PositiveIntegerField(default=0)), + ('num_upvotes', models.PositiveIntegerField(default=0)), + ('num_downvotes', models.PositiveIntegerField(default=0)), + ('question_views', models.PositiveIntegerField(default=0)), + ('time_on_site', models.DurationField(default=datetime.timedelta(0))), ('date', models.DateField(db_index=True)), - ('num_users', models.PositiveIntegerField()), + ('num_users', models.PositiveIntegerField(default=0)), ('group', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='askbot.group')), ], options={ diff --git a/askbot/migrations/0030_event_compiled_group_used_for_analytics_and_more.py b/askbot/migrations/0030_event_compiled_group_used_for_analytics_and_more.py new file mode 100644 index 0000000000..b273b3c22c --- /dev/null +++ b/askbot/migrations/0030_event_compiled_group_used_for_analytics_and_more.py @@ -0,0 +1,33 @@ +# Generated by Django 4.2.4 on 2024-07-02 00:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('askbot', '0029_group_visibility'), + ] + + operations = [ + migrations.AddField( + model_name='event', + name='compiled', + field=models.BooleanField(default=False, help_text='True if the event is compiled into a summary'), + ), + migrations.AddField( + model_name='group', + name='used_for_analytics', + field=models.BooleanField(default=False), + ), + migrations.AddField( + model_name='groupdailysummary', + name='compiled', + field=models.BooleanField(default=False), + ), + migrations.AddField( + model_name='userdailysummary', + name='compiled', + field=models.BooleanField(default=False), + ), + ] diff --git a/askbot/models/__init__.py b/askbot/models/__init__.py index d69fd7bdfc..a76a907743 100644 --- a/askbot/models/__init__.py +++ b/askbot/models/__init__.py @@ -2932,10 +2932,11 @@ def get_profile_link(self, text=None): return mark_safe(profile_link) -def user_get_groups(self, private=False): +def user_get_groups(self, private=False, used_for_analytics=False): """returns a query set of groups to which user belongs""" #todo: maybe cache this query - return Group.objects.get_for_user(self, private=private) + return Group.objects.get_for_user(self, private=private, + used_for_analytics=used_for_analytics) def user_join_default_groups(self): """adds user to "global" and "personal" groups""" diff --git a/askbot/models/analytics.py b/askbot/models/analytics.py index 7d029d51be..b0654d67a7 100644 --- a/askbot/models/analytics.py +++ b/askbot/models/analytics.py @@ -1,4 +1,5 @@ """Models for the Analytics feature""" +import datetime from django.db import models from django.db.models import Q from django.db.models import Value @@ -47,21 +48,36 @@ #TYPE_ACTIVITY_QUESTION_VIEWED = 52 #TYPE_ACTIVITY_ANSWER_VIEWED = 53 +EVENT_TYPE_USER_REGISTERED = 1 +EVENT_TYPE_LOGGED_IN = 2 +EVENT_TYPE_LOGGED_OUT = 3 +EVENT_TYPE_QUESTION_VIEWED = 4 +EVENT_TYPE_ANSWER_VIEWED = 5 +EVENT_TYPE_UPVOTED = 6 +EVENT_TYPE_DOWNVOTED = 7 +EVENT_TYPE_VOTE_CANCELED = 8 +EVENT_TYPE_ASKED = 9 +EVENT_TYPE_ANSWERED = 10 +EVENT_TYPE_QUESTION_COMMENTED = 11 +EVENT_TYPE_ANSWER_COMMENTED = 12 +EVENT_TYPE_QUESTION_RETAGGED = 13 +EVENT_TYPE_SEARCHED = 14 + EVENT_TYPES = ( - (1, _('registered')), # Activity.activity_type == 51 - (2, _('logged in')), - (3, _('logged out')), - (4, _('question viewed')), # Activity.activity_type == 52 - (5, _('answer viewed')), # Activity.activity_type == 53 - (6, _('upvoted')), # Activity.activity_type == 9 - (7, _('downvoted')), # Activity.activity_type == 10 - (8, _('canceled vote')), # Activity.activity_type == 11 - (9, _('asked')), # Activity.activity_type == 1 - (10, _('answered')), # Activity.activity_type == 2 - (11, _('commented question')), # Activity.activity_type == 3 - (12, _('commented answer')), # Activity.activity_type == 4 - (13, _('retagged question')), # Activity.activity_type == 15 - (14, _('searched')), + (EVENT_TYPE_USER_REGISTERED, _('registered')), # Activity.activity_type == 51 + (EVENT_TYPE_LOGGED_IN, _('logged in')), + (EVENT_TYPE_LOGGED_OUT, _('logged out')), + (EVENT_TYPE_QUESTION_VIEWED, _('question viewed')), # Activity.activity_type == 52 + (EVENT_TYPE_ANSWER_VIEWED, _('answer viewed')), # Activity.activity_type == 53 + (EVENT_TYPE_UPVOTED, _('upvoted')), # Activity.activity_type == 9 + (EVENT_TYPE_DOWNVOTED, _('downvoted')), # Activity.activity_type == 10 + (EVENT_TYPE_VOTE_CANCELED, _('canceled vote')), # Activity.activity_type == 11 + (EVENT_TYPE_ASKED, _('asked')), # Activity.activity_type == 1 + (EVENT_TYPE_ANSWERED, _('answered')), # Activity.activity_type == 2 + (EVENT_TYPE_QUESTION_COMMENTED, _('commented question')), # Activity.activity_type == 3 + (EVENT_TYPE_ANSWER_COMMENTED, _('commented answer')), # Activity.activity_type == 4 + (EVENT_TYPE_QUESTION_RETAGGED, _('retagged question')), # Activity.activity_type == 15 + (EVENT_TYPE_SEARCHED, _('searched')), ) # Dimension and Metric would make a generic implementation of the analytics feature @@ -107,7 +123,7 @@ def get_organizations_count(): def get_unique_user_email_domains(): """Returns a list of unique email domain names""" - return list(get_user_organization_domains_qs().values_list('domain', flat=True)) + return list(get_unique_user_email_domains_qs().values_list('domain', flat=True)) class Session(models.Model): @@ -133,6 +149,8 @@ class Event(models.Model): content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE) object_id = models.PositiveIntegerField(db_index=True) content_object = GenericForeignKey('content_type', 'object_id') + compiled = models.BooleanField(default=False, + help_text="True if the event is compiled into a summary") def __str__(self): timestamp = self.timestamp.isoformat() # pylint: disable=no-member @@ -144,17 +162,43 @@ class BaseSummary(models.Model): An abstract model for per-interval summaries. An interval name is defined in the subclass. """ - num_questions = models.PositiveIntegerField() - num_answers = models.PositiveIntegerField() - num_upvotes = models.PositiveIntegerField() - num_downvotes = models.PositiveIntegerField() - question_views = models.PositiveIntegerField() - time_on_site = models.DurationField() + num_questions = models.PositiveIntegerField(default=0) + num_answers = models.PositiveIntegerField(default=0) + num_upvotes = models.PositiveIntegerField(default=0) + num_downvotes = models.PositiveIntegerField(default=0) + question_views = models.PositiveIntegerField(default=0) + time_on_site = models.DurationField(default=datetime.timedelta(0)) + compiled = models.BooleanField(default=False) class Meta: # pylint: disable=too-few-public-methods, missing-class-docstring abstract = True + def add_event(self, event): + """Increments the attribute appropriate for the event type""" + if event.event_type == EVENT_TYPE_ASKED: + self.num_questions += 1 + elif event.event_type == EVENT_TYPE_ANSWERED: + self.num_answers += 1 + elif event.event_type == EVENT_TYPE_UPVOTED: + self.num_upvotes += 1 + elif event.event_type == EVENT_TYPE_DOWNVOTED: + self.num_downvotes += 1 + elif event.event_type == EVENT_TYPE_QUESTION_VIEWED: + self.question_views += 1 + + + def __add__(self, other): + """Adds the attributes of two summaries""" + self.num_questions += other.num_questions + self.num_answers += other.num_answers + self.num_upvotes += other.num_upvotes + self.num_downvotes += other.num_downvotes + self.question_views += other.question_views + self.time_on_site += other.time_on_site + return self + + class DailySummary(BaseSummary): """An abstract class for daily summaries.""" date = models.DateField(db_index=True) @@ -162,6 +206,16 @@ class DailySummary(BaseSummary): class Meta: # pylint: disable=too-few-public-methods, missing-class-docstring abstract = True + def add_event(self, event): + """Increments the attribute appropriate for the event type. + In addition adds up the time on site for all matching sessions. + """ + super().add_event(event) + # todo: get all sessions intersecting the date + # for each session, calculate the intersection with the date + # add up those intervals + # assumes that sessions do not overlap + class UserDailySummary(DailySummary): """User summary for each day with activity.""" @@ -171,4 +225,8 @@ class UserDailySummary(DailySummary): class GroupDailySummary(DailySummary): """Group summary for each day with activity.""" group = models.ForeignKey(AskbotGroup, on_delete=models.CASCADE) - num_users = models.PositiveIntegerField() + num_users = models.PositiveIntegerField(default=0) + + + def add_event(self, event): + raise RuntimeError("Cannot add events to GroupDailySummary") diff --git a/askbot/models/user.py b/askbot/models/user.py index 8c69a8c3fe..aad0e463fe 100644 --- a/askbot/models/user.py +++ b/askbot/models/user.py @@ -12,6 +12,7 @@ from django.utils import translation, timezone from django.utils.translation import gettext as _ from django.utils.translation import gettext_lazy +import tldextract from askbot import const from askbot.conf import settings as askbot_settings from askbot.utils import functions @@ -25,8 +26,9 @@ def get_organization_name_from_domain(domain): The organization name is the second level domain name, sentence-cased. """ - base_domain = domain.split('.')[-2] - return base_domain.capitalize() + result = tldextract.extract(domain) + raw_name = result.domain + return '-'.join(part.capitalize() for part in raw_name.split('-')) class InvitedModerator(object): """Mock user class to represent invited moderators""" @@ -531,13 +533,13 @@ def get_personal(self): name__startswith=PERSONAL_GROUP_NAME_PREFIX ) - def get_for_user(self, user=None, private=False): + def get_for_user(self, user=None, private=False, used_for_analytics=False): gms = GroupMembership.objects.filter(user=user) if private: global_group = Group.objects.get_global_group() gms = gms.exclude(group=global_group) group_ids = gms.values_list('group_id', flat=True) - return Group.objects.filter(pk__in=group_ids) + return Group.objects.filter(pk__in=group_ids, used_for_analytics=used_for_analytics) def get_by_name(self, group_name = None): from askbot.models.tag import clean_group_name#todo - delete this @@ -573,7 +575,9 @@ def create(self, **kwargs): pass return super().create(**kwargs) - def get_or_create(self, name=None, user=None, openness=None, visibility=None): + def get_or_create(self, name=None, + user=None, openness=None, + visibility=None, used_for_analytics=False): """creates a group tag or finds one, if exists""" #todo: here we might fill out the group profile try: @@ -588,7 +592,10 @@ def get_or_create(self, name=None, user=None, openness=None, visibility=None): if openness is None: openness = self.model.DEFAULT_OPENNESS - group = self.create(name=name, openness=openness, visibility=visibility) + group = self.create(name=name, + openness=openness, + visibility=visibility, + used_for_analytics=used_for_analytics) created = True return group, created @@ -619,6 +626,7 @@ class Group(AuthGroup): openness = models.SmallIntegerField(default=DEFAULT_OPENNESS, choices=OPENNESS_CHOICES) visibility = models.SmallIntegerField(default=const.GROUP_VISIBILITY_PUBLIC, choices=const.GROUP_VISIBILITY_CHOICES) + used_for_analytics = models.BooleanField(default=False) # preapproved email addresses and domain names to auto-join groups # trick - the field is padded with space and all tokens are space separated diff --git a/askbot/tests/test_management_commands.py b/askbot/tests/test_management_commands.py index c44d59813c..5a2aaf2eea 100644 --- a/askbot/tests/test_management_commands.py +++ b/askbot/tests/test_management_commands.py @@ -12,7 +12,7 @@ from django.core import management, mail from django.conf import settings as django_settings from django.contrib import auth -from django.contrib.auth.models import User +from django.contrib.auth.models import User, Group as AuthGroup from askbot.utils.html import site_url from askbot.utils.url_utils import reload_urlconf from askbot.tests.utils import AskbotTestCase @@ -386,5 +386,7 @@ def test_askbot_create_per_email_domain_groups(self): management.call_command('askbot_create_per_email_domain_groups') count = models.Group.objects.filter(name__in=('Org1', 'Org2')).count() self.assertEqual(count, 2) + self.assertEqual(AuthGroup.objects.filter(name='Org1').count(), 1) + self.assertEqual(AuthGroup.objects.filter(name='Org2').count(), 1) diff --git a/askbot/utils/console.py b/askbot/utils/console.py index 562e09cce1..fde9eb38ff 100644 --- a/askbot/utils/console.py +++ b/askbot/utils/console.py @@ -165,7 +165,7 @@ def simple_dialog(prompt_phrase, required=False, default=None): if response or required is False: return response - + time.sleep(.1) @@ -250,14 +250,15 @@ class ProgressBar(object): """A wrapper for an iterator, that prints a progress bar along the way of iteration """ - def __init__(self, iterable, length, message = ''): + def __init__(self, iterable, length, message = '', silent=False): self.iterable = iterable self.length = length self.counter = float(0) self.max_barlen = 60 self.curr_barlen: float = 0 self.progress = '' - if message and length > 0: + self.silent = silent + if message and length > 0 and not self.silent: print(message) @@ -286,6 +287,8 @@ def backspace_progress_percent(self): def print_progress_percent(self): """prints percent of achieved progress""" + if self.silent: + return self.progress = ' %.2f%%' % (100 * (self.counter/self.length)) sys.stdout.write(self.progress) sys.stdout.flush() @@ -300,12 +303,14 @@ def __next__(self): try: result = next(self.iterable) except StopIteration: - if self.length > 0: + if self.length > 0 and not self.silent: self.finish_progress_bar() self.print_progress_percent() sys.stdout.write('\n') raise - self.print_progress_bar() + if not self.silent: + self.print_progress_bar() + self.counter += 1 return result diff --git a/askbot/views/commands.py b/askbot/views/commands.py index 7e858cd9d7..fcaba3691a 100644 --- a/askbot/views/commands.py +++ b/askbot/views/commands.py @@ -1410,6 +1410,7 @@ def publish_post(request): if request.user.is_authenticated: if request.user.is_administrator_or_moderator() is False: raise exceptions.PermissionDenied(denied_msg) + #todo: assert permission post_id = IntegerField().clean(request.POST['post_id']) post = models.Post.objects.get(pk=post_id) diff --git a/askbot_requirements.txt b/askbot_requirements.txt index 4005182b0d..c2fa623cac 100644 --- a/askbot_requirements.txt +++ b/askbot_requirements.txt @@ -34,5 +34,6 @@ regex requests-oauthlib>=1.2.0 requirements-parser>=0.2.0 responses>=0.9.0,<=0.23.1 +tldextract==5.1.2 unidecode urllib3<1.27,>=1.21.1