Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated caching strategy #576

Merged
merged 47 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
488aa1e
Add New Cache Strategy
shiva-menta Jan 31, 2024
0780c5b
lint
shiva-menta Jan 31, 2024
fef73cb
Update clear cache + refactor precompute into separate command
AaDalal Jan 31, 2024
2ae285e
fix circular imports
AaDalal Jan 31, 2024
302473c
add cache backends + get it running
AaDalal Jan 31, 2024
81c3b04
Explicitly only cache most recent topic of full_code
AaDalal Jan 31, 2024
28fc5ba
switch to redis cache for B/G caches
AaDalal Jan 31, 2024
21c80e9
Working with course reviews
AaDalal Jan 31, 2024
7b8117d
Fix superseded logic if there are no superseding courses
AaDalal Jan 31, 2024
1c467e9
Add tests
AaDalal Feb 4, 2024
04906e6
fix failing test
AaDalal Feb 4, 2024
f2891b3
fix failing old pcr test (also fail on master)
AaDalal Feb 4, 2024
32c727d
add cache clearing
AaDalal Feb 4, 2024
b09a0b0
handle null returns from superseded aggregation
AaDalal Feb 9, 2024
dc3fd4b
Updated DB Strategy
shiva-menta Mar 22, 2024
2eaa186
Change to DB Cache
shiva-menta Mar 23, 2024
bf05b33
Add Transaction and Output Counts
shiva-menta Mar 25, 2024
574d8a0
Modify CronTime and Fix Caching
shiva-menta Apr 21, 2024
3616679
Change Courses cronjob
shiva-menta Apr 22, 2024
2766b66
Add Initial Tests
shiva-menta Apr 23, 2024
82463ca
Modify Command for Course Selection
shiva-menta Apr 24, 2024
dd91f16
Fix Tests
shiva-menta Apr 24, 2024
0db42a2
Remove Old Method Code Leftover
shiva-menta Apr 24, 2024
139bf45
Lint
shiva-menta Apr 24, 2024
11321d3
Lint again..
shiva-menta Apr 24, 2024
78964ea
Fix Failing Tests with Topic fix
shiva-menta Apr 24, 2024
1c2f442
Lint error
shiva-menta Apr 24, 2024
54d9995
Merge branch 'master' of github.com:pennlabs/penn-courses into update…
shiva-menta Apr 24, 2024
c36b84f
Resolving PR Nits
shiva-menta May 17, 2024
2b7e704
Resolve Migration Issue
shiva-menta May 17, 2024
dad1130
Linting Migration Files
shiva-menta May 17, 2024
5159a38
fix(type)
esinx May 17, 2024
7b0c121
fix(lint)
esinx May 17, 2024
5896fae
Merge branch 'master' into updated_caching_strategy
shiva-menta May 17, 2024
44c1b71
Merge in Master
shiva-menta Oct 22, 2024
33da750
Script Cleanup
shiva-menta Oct 25, 2024
721fd48
Write More Comprehensive Tests
shiva-menta Oct 25, 2024
80937d1
Merge in Master
shiva-menta Oct 25, 2024
0e710b0
fix lint error
shiva-menta Oct 25, 2024
949fe74
Remove Old Migration Files
shiva-menta Oct 26, 2024
63a8b15
Clean Up Migration Files
shiva-menta Oct 26, 2024
2db49cb
Add Manual Cache Prefix
shiva-menta Oct 26, 2024
c7fd22e
Remove tsbuildinfo Files
shiva-menta Oct 26, 2024
c21473a
Revert registrarimport cronjob to original times
shiva-menta Oct 26, 2024
39d76fb
Final fix
shiva-menta Oct 26, 2024
c70e1c2
Final Changes
shiva-menta Nov 1, 2024
9d88925
Final Changes
shiva-menta Nov 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions backend/PennCourses/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,6 @@
# The name of the schedule that is created/verified by Penn Mobile,
# containing the user's active course registrations from Path.
PATH_REGISTRATION_SCHEDULE_NAME = "Path Registration"

# Manually Set Cache Prefix
CACHE_PREFIX = "MANUAL_CACHE_"
3 changes: 3 additions & 0 deletions backend/courses/management/commands/registrarimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from courses.models import Department, Section
from courses.util import get_current_semester, upsert_course_from_opendata
from review.management.commands.clearcache import clear_cache
from review.management.commands.precompute_pcr_views import precompute_pcr_views


def registrar_import(semester=None, query=""):
Expand Down Expand Up @@ -45,6 +46,8 @@ def registrar_import(semester=None, query=""):
# (cron job only does current semester, which is either fall or spring)
registrar_import(semester=semester[:-1] + "B", query=query)

precompute_pcr_views(verbose=True, is_new_data=False)


class Command(BaseCommand):
help = "Load in courses, sections and associated models from the Penn registrar and requirements data sources." # noqa: E501
Expand Down
5 changes: 5 additions & 0 deletions backend/review/management/commands/clearcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from django.core.cache import cache
from django.core.management import BaseCommand

from PennCourses.settings.base import CACHE_PREFIX


def clear_cache():
# If we are not using redis as the cache backend, then we can delete everything from the cache.
Expand All @@ -22,6 +24,9 @@ def clear_cache():
for key in r.scan_iter("*views.decorators.cache*"):
r.delete(key)
del_count += 1
for key in r.scan_iter(f"*{CACHE_PREFIX}*"):
r.delete(key)
del_count += 1
return del_count


Expand Down
2 changes: 2 additions & 0 deletions backend/review/management/commands/iscimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)
from review.import_utils.parse_sql import load_sql_dump
from review.management.commands.clearcache import clear_cache
from review.management.commands.precompute_pcr_views import precompute_pcr_views
from review.models import Review


Expand Down Expand Up @@ -277,6 +278,7 @@ def handle(self, *args, **kwargs):

print("Recomputing Section.has_reviews...")
recompute_has_reviews()
precompute_pcr_views(verbose=True, is_new_data=True)

print("Done.")
return 0
113 changes: 113 additions & 0 deletions backend/review/management/commands/precompute_pcr_views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import logging

from django.core.cache import cache
from django.core.management.base import BaseCommand
from django.db import transaction
from tqdm import tqdm

from courses.models import Topic
from PennCourses.settings.base import CACHE_PREFIX
from review.models import CachedReviewResponse
from review.views import manual_course_reviews


def precompute_pcr_views(verbose=False, is_new_data=False):
if verbose:
print("Now precomputing PCR reviews.")

objs_to_insert = []
objs_to_update = []
cache_deletes = set()
valid_reviews_in_db = total_reviews = 0

with transaction.atomic():
shiva-menta marked this conversation as resolved.
Show resolved Hide resolved
# Mark all the topics as expired.
CachedReviewResponse.objects.all().update(expired=True)
cached_responses = CachedReviewResponse.objects.all()
topic_id_to_response_obj = {response.topic_id: response for response in cached_responses}

# Iterate through all topics.
for topic in tqdm(
Topic.objects.all().select_related("most_recent").order_by("most_recent__semester"),
disable=not verbose,
):
# get topic id
course_id_list, course_code_list = zip(*topic.courses.values_list("id", "full_code"))
topic_id = ".".join([str(id) for id in sorted(course_id_list)])
total_reviews += 1

if topic_id in topic_id_to_response_obj:
# current topic id is already cached
valid_reviews_in_db += 1
response_obj = topic_id_to_response_obj[topic_id]
response_obj.expired = False

if is_new_data:
cache_deletes.add(CACHE_PREFIX + topic_id)
review_data = manual_course_reviews(
topic.most_recent.full_code, topic.most_recent.semester
)
if not review_data:
logging.info(
f"Invalid review data for ("
f"topic_id={topic_id},"
f"course_code={course_code_list[0]},"
f"semester={topic.most_recent.semester})"
)
continue
response_obj.response = review_data
objs_to_update.append(response_obj)
else:
# current topic id is not cached
review_data = manual_course_reviews(
topic.most_recent.full_code, topic.most_recent.semester
)
if not review_data:
logging.info(
f"Invalid review data for ("
f"topic_id={topic_id},"
f"course_code={course_code_list[0]},"
f"semester={topic.most_recent.semester})"
)
continue

objs_to_insert.append(
CachedReviewResponse(topic_id=topic_id, response=review_data, expired=False)
)
for course_code in course_code_list:
curr_topic_id = cache.get(CACHE_PREFIX + course_code)
if curr_topic_id:
cache_deletes.add(CACHE_PREFIX + curr_topic_id)
cache_deletes.add(CACHE_PREFIX + course_code)

if verbose:
print(
f"{total_reviews} course reviews covered, "
f"{valid_reviews_in_db} of which were already in the database. "
f"{len(objs_to_insert)} course reviews were created. "
f"{len(objs_to_update)} course reviews were updated."
)

# Bulk create / update objects.
if verbose:
print("Creating and updating objects.")
CachedReviewResponse.objects.bulk_create(objs_to_insert)
CachedReviewResponse.objects.bulk_update(objs_to_update, ["response", "expired"])

# Bulk delete objects.
if verbose:
print("Deleting expired objects.")
CachedReviewResponse.objects.filter(expired=True).delete()
cache.delete_many(cache_deletes)


class Command(BaseCommand):
help = "Precompute PCR views for all topics"

def add_arguments(self, parser):
parser.add_argument(
"--new_data", action="store_true", help="Include this flag to recalculate review data."
)

def handle(self, *args, **kwargs):
precompute_pcr_views(verbose=True, is_new_data=kwargs["new_data"])
27 changes: 27 additions & 0 deletions backend/review/migrations/0006_cachedreviewresponse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Generated by Django 5.0.2 on 2024-10-26 03:37

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("review", "0005_review_comments"),
]

operations = [
migrations.CreateModel(
name="CachedReviewResponse",
fields=[
(
"id",
models.AutoField(
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
),
),
("topic_id", models.CharField(db_index=True, max_length=1000, unique=True)),
("response", models.JSONField()),
("expired", models.BooleanField(default=True)),
],
),
]
13 changes: 13 additions & 0 deletions backend/review/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,19 @@ def get_averages(topic_id, instructor_name=None, fields=None):
ALL_FIELD_SLUGS = [x[2] for x in REVIEW_BIT_LABEL]


class CachedReviewResponse(models.Model):
"""
Represents a mapping from temp_topic_id (string-delimited list of sorted courses within a topic)
shiva-menta marked this conversation as resolved.
Show resolved Hide resolved
to a JSON object storing summarized course review data (all the data frontend uses to display
reviews).
"""

shiva-menta marked this conversation as resolved.
Show resolved Hide resolved
# Using large string to account for topics with many course ids.
topic_id = models.CharField(max_length=1000, db_index=True, unique=True)
response = models.JSONField()
expired = models.BooleanField(default=True)


class ReviewBit(models.Model):
"""
A single key/value pair associated with a review. Fields are things like "course_quality",
Expand Down
2 changes: 1 addition & 1 deletion backend/review/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
urlpatterns = [
path(
"course/<slug:course_code>",
cache_page(MONTH_IN_SECONDS)(course_reviews),
course_reviews,
name="course-reviews",
),
path(
Expand Down
Loading
Loading