set review caching table

pennlabs · Aug 14, 2023 · 490b986 · 490b986
1 parent 4baaafc
commit 490b986
Showing 1 changed file with 123 additions and 40 deletions.
diff --git a/backend/review/models.py b/backend/review/models.py
@@ -2,10 +2,12 @@
 from django.db import models
 from django.db.models import Avg, Q, UniqueConstraint, QuerySet
 from django.core.exceptions import ObjectDoesNotExist
+from backend.review.annotations import annotate_with_matching_reviews
 from courses.models import Topic, Instructor, Department
 from django.db import transaction
 
-from review.annotations import review_averages
+from review.annotations import review_averages, annotate_average_and_recent
+from review.views import review_filters_pcr, section_filters_pcr, course_filters_pcr
 
 class Review(models.Model):
     """
@@ -141,6 +143,17 @@ def __str__(self):
     "semester_count"
 ]
 ALL_METRIC_SLUGS = FIELD_SLUGS + EXTRA_METRICS_SLUGS + SEMESTER_AGGREGATION_SLUGS
+_ALL_METRiC_SLUGS_AVERAGE = [
+    "average_" + slug for slug in ALL_METRIC_SLUGS
+]
+_ALL_METRiC_SLUGS_RECENT = [
+    "recent_" + slug for slug in ALL_METRIC_SLUGS
+]
+_ALL_METRiC_SLUGS_AVERAGE_RECENT = [
+    *_ALL_METRiC_SLUGS_AVERAGE,
+    *_ALL_METRiC_SLUGS_RECENT
+]
+
 
 class AverageBit(models.Model):
     """
@@ -152,31 +165,32 @@ class AverageBit(models.Model):
 
     field = models.CharField(max_length=32, db_index=True)
     average = models.DecimalField(max_digits=6, decimal_places=5) # TODO: check how n/a values are handled
-    average_reviews = models.ForeignKey("AverageReviews", on_delete=models.CASCADE, related_name='bits', db_index=True) # TODO: add help strings
+    count = models.PositiveIntegerField(help="Number of reviews that this average is based on")
+    average_review = models.ForeignKey("AverageReview", on_delete=models.CASCADE, related_name='bits', db_index=True) # TODO: add help strings
+    average_or_recent = models.BooleanField()
+
 
-class AverageReviews(models.Model):
+MODEL_MAP = (
+    (1, "Topic", Topic),
+    (2, "Instructor", Instructor)
+    (3, "Department", Department)
+)
+MODEL_OPTIONS = tuple([(k, v) for k, v, _ in MODEL_MAP])
+MODEL_OPTIONS_DICT = {k: v for k, v, _ in MODEL_OPTIONS}
+REV_MODEL_OPTIONS_DICT = {v: k for k, v, _ in MODEL_OPTIONS}
+MODEL_INDEX_TO_CLASS = {k: v for k, _, v in MODEL_MAP}
+
+
+class AverageReview(models.Model):
     """
     The reviews for a model (e.g., topic or instructor). 
     This is used to cache the reviews for a given instance of that model. 
     It is expected that subclasses of BaseReviewAverage will be instantiated by a cron job.
-
-    The average_or_recent field is used to distinguish between the average and recent reviews.
     """
 
-    MODEL_MAP = (
-        (1, "Topic", Topic),
-        (2, "Instructor", Instructor)
-        (3, "Department", Department)
-    )
-    MODEL_OPTIONS = tuple([(k, v) for k, v, _ in MODEL_MAP])
-    MODEL_OPTIONS_DICT = {k: v for k, v, _ in MODEL_OPTIONS}
-    MODEL_OPTIONS_REV_DICT = {v: k for k, v, _ in MODEL_OPTIONS}
-    MODEL_INDEX_TO_CLASS = {k: v for k, _, v in MODEL_MAP}
-
     model = models.SmallIntegerField(choices=MODEL_OPTIONS) 
     instance_id = models.PositiveBigIntegerField() # note: this is used to store primary keys, but it supports a 0 value (not supported for primary keys)
     updated_at = models.DateTimeField(auto_now=True, help="Tracks the freshness of the average")
-    average_or_recent = models.BooleanField()
 
     class Meta:
         constraints = [
@@ -187,39 +201,108 @@ class Meta:
         ]
 
     @classmethod
-    def get_average(
+    def get_or_set_average(
         cls,
         model: str,
         instance_id: int,
-        average_or_recent: bool
+        average_or_recent_or_both: bool | None
     ) -> QuerySet["AverageBit"]:
         model_index = cls.MODEL_OPTIONS_REV_DICT[model]
         try:
-            bits = cls.objects.get(model=model_index, average_or_recent=average_or_recent, instance_id=instance_id).bits.all()
+            bits = cls.objects.get(
+                model=model_index, 
+                instance_id=instance_id
+            ).bits.filter(
+                Q(average_or_recent=average_or_recent_or_both)
+                if average_or_recent_or_both is not None else Q()
+            )
             assert len(bits) == len(ALL_METRIC_SLUGS) # TODO: remove in production
             return bits
         except ObjectDoesNotExist:
-            # call code to try to compute it
-            with transaction.atomic():
-                average_reviews = cls.create(
-                    model=model,
-                    instance_id=instance_id,
-                    average_or_recent=average_or_recent
+            return cls.set_average_all(
+                model, 
+                queryset=MODEL_INDEX_TO_CLASS[model_index].objects.filter(pk=instance_id), 
+                average_or_recent_or_both=average_or_recent_or_both
+            )
+
+
+    @classmethod
+    def set_averages(
+        cls, 
+        model: str, 
+        queryset: QuerySet[model], 
+        average_or_recent_or_both: bool | None
+    ):
+        """
+        Creates and returns averages for all of the items in the queryset.
+        :param model: The name of the model to create averages for
+        :param queryset: The queryset of instances of `model` to create averages for
+        :param average_or_recent: Whether to create averages or recent averages. If None, both are created.
+        :return: The queryset of AverageReviews that were created
+        """
+
+        # TODO: remove these match statements since prod doesn't support updated python
+        match model:
+            case "Instructor":
+                match_section_on = Q(instructor__in=queryset)
+                match_review_on = Q(instructor__in=queryset)
+            case "Topic":
+                match_section_on = Q(course__topic__in=queryset)
+                match_review_on = Q(section__course__topic__in=queryset)
+            case "Department":
+                match_section_on = Q(course__department__in=queryset)
+                match_review_on = Q(section__course__department__in=queryset)
+
+        match average_or_recent_or_both:
+            case None:
+                fields = _ALL_METRiC_SLUGS_AVERAGE_RECENT
+            case True:
+                fields = _ALL_METRiC_SLUGS_AVERAGE
+            case False:
+                fields = _ALL_METRiC_SLUGS_RECENT
+
+
+        with transaction.atomic():
+            if average_or_recent_or_both is None or average_or_recent_or_both:
+                queryset = annotate_with_matching_reviews(
+                    queryset,
+                    match_section_on=match_section_on & section_filters_pcr,
+                    match_review_on=match_review_on & review_filters_pcr,
+                    most_recent=False,
+                    prefix="average_",
+                    extra_metrics=True,
                 )
+
+            if average_or_recent_or_both is None or not average_or_recent_or_both:
+                qs = annotate_with_matching_reviews(
+                    queryset,
+                    match_section_on=match_section_on & section_filters_pcr,
+                    match_review_on=match_review_on & review_filters_pcr,
+                    most_recent=False,
+                    prefix="recent_",
+                    extra_metrics=True,
+                )
+
+            queryset.values(*fields) # TODO: check that this is efficient
 
-                annotation_dict = review_averages(
-                    queryset=cls.MODEL_INDEX_TO_CLASS[model_index].objects.filter(pk=instance_id),
-                    reviewbit_subfilters=[], # TODO
-                    section_subfilters=[], # TODO
-                    fields=FIELD_SLUGS,
-                    semester_aggregations=True # TODO: figure out where this is used
-                ).values().get()
-
-                # create the AverageBit for each one
-                for field in ALL_METRIC_SLUGS:
-                    AverageBit.objects.create(
+            average_reviews = [
+                AverageReview.get_or_create( # update averages
+                    model=model,
+                    instance_id=obj.pk,
+                ) 
+                for obj in queryset.distinct()
+            ]
+
+            # create the AverageBit for each reivew
+            for field in fields:
+                for obj, average_review in zip(qs, average_reviews): # TODO: check that zip is efficient
+                    average_or_recent = field.startswith("average_")
+                    assert field.startswith("average_") or field.startswith("recent_") # TODO: remove in prod
+                    AverageBit.objects.get_or_create(
+                        average_review=average_review,
                         field=field,
-                        average=annotation_dict[field],
-                        average_reviews=average_reviews
-                    )                
-            return average_reviews.bits.all()
+                        average=obj[field],
+                        average_or_recent=average_or_recent
+                    )
+
+            return average_reviews