Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated caching strategy #576

Merged
merged 47 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
488aa1e
Add New Cache Strategy
shiva-menta Jan 31, 2024
0780c5b
lint
shiva-menta Jan 31, 2024
fef73cb
Update clear cache + refactor precompute into separate command
AaDalal Jan 31, 2024
2ae285e
fix circular imports
AaDalal Jan 31, 2024
302473c
add cache backends + get it running
AaDalal Jan 31, 2024
81c3b04
Explicitly only cache most recent topic of full_code
AaDalal Jan 31, 2024
28fc5ba
switch to redis cache for B/G caches
AaDalal Jan 31, 2024
21c80e9
Working with course reviews
AaDalal Jan 31, 2024
7b8117d
Fix superseded logic if there are no superseding courses
AaDalal Jan 31, 2024
1c467e9
Add tests
AaDalal Feb 4, 2024
04906e6
fix failing test
AaDalal Feb 4, 2024
f2891b3
fix failing old pcr test (also fail on master)
AaDalal Feb 4, 2024
32c727d
add cache clearing
AaDalal Feb 4, 2024
b09a0b0
handle null returns from superseded aggregation
AaDalal Feb 9, 2024
dc3fd4b
Updated DB Strategy
shiva-menta Mar 22, 2024
2eaa186
Change to DB Cache
shiva-menta Mar 23, 2024
bf05b33
Add Transaction and Output Counts
shiva-menta Mar 25, 2024
574d8a0
Modify CronTime and Fix Caching
shiva-menta Apr 21, 2024
3616679
Change Courses cronjob
shiva-menta Apr 22, 2024
2766b66
Add Initial Tests
shiva-menta Apr 23, 2024
82463ca
Modify Command for Course Selection
shiva-menta Apr 24, 2024
dd91f16
Fix Tests
shiva-menta Apr 24, 2024
0db42a2
Remove Old Method Code Leftover
shiva-menta Apr 24, 2024
139bf45
Lint
shiva-menta Apr 24, 2024
11321d3
Lint again..
shiva-menta Apr 24, 2024
78964ea
Fix Failing Tests with Topic fix
shiva-menta Apr 24, 2024
1c2f442
Lint error
shiva-menta Apr 24, 2024
54d9995
Merge branch 'master' of github.com:pennlabs/penn-courses into update…
shiva-menta Apr 24, 2024
c36b84f
Resolving PR Nits
shiva-menta May 17, 2024
2b7e704
Resolve Migration Issue
shiva-menta May 17, 2024
dad1130
Linting Migration Files
shiva-menta May 17, 2024
5159a38
fix(type)
esinx May 17, 2024
7b0c121
fix(lint)
esinx May 17, 2024
5896fae
Merge branch 'master' into updated_caching_strategy
shiva-menta May 17, 2024
44c1b71
Merge in Master
shiva-menta Oct 22, 2024
33da750
Script Cleanup
shiva-menta Oct 25, 2024
721fd48
Write More Comprehensive Tests
shiva-menta Oct 25, 2024
80937d1
Merge in Master
shiva-menta Oct 25, 2024
0e710b0
fix lint error
shiva-menta Oct 25, 2024
949fe74
Remove Old Migration Files
shiva-menta Oct 26, 2024
63a8b15
Clean Up Migration Files
shiva-menta Oct 26, 2024
2db49cb
Add Manual Cache Prefix
shiva-menta Oct 26, 2024
c7fd22e
Remove tsbuildinfo Files
shiva-menta Oct 26, 2024
c21473a
Revert registrarimport cronjob to original times
shiva-menta Oct 26, 2024
39d76fb
Final fix
shiva-menta Oct 26, 2024
c70e1c2
Final Changes
shiva-menta Nov 1, 2024
9d88925
Final Changes
shiva-menta Nov 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/PennCourses/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@

# Redis
REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379/1")
REDIS_URL_BASE = REDIS_URL.rsplit("/", 1)[0]

# Celery
MESSAGE_BROKER_URL = REDIS_URL
Expand Down
16 changes: 15 additions & 1 deletion backend/PennCourses/settings/development.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,19 @@
CACHES = {
"default": {
"BACKEND": "django.core.cache.backends.dummy.DummyCache",
}
},
"blue": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": REDIS_URL_BASE + "/2",
"OPTIONS": {
"CLIENT_CLASS": "django_redis.client.DefaultClient",
},
},
"green": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": REDIS_URL_BASE + "/3",
"OPTIONS": {
"CLIENT_CLASS": "django_redis.client.DefaultClient",
},
},
}
17 changes: 16 additions & 1 deletion backend/PennCourses/settings/production.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,22 @@
"OPTIONS": {
"CLIENT_CLASS": "django_redis.client.DefaultClient",
},
}
},
"blue": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": REDIS_URL_BASE + "/2",
"OPTIONS": {
"CLIENT_CLASS": "django_redis.client.DefaultClient",
},
},
"green": {
"BACKEND": "django_redis.cache.RedisCache",
"LOCATION": REDIS_URL_BASE + "/3",
"OPTIONS": {
"CLIENT_CLASS": "django_redis.client.DefaultClient",
},
},

}

MOBILE_NOTIFICATION_SECRET = os.environ.get("MOBILE_NOTIFICATION_SECRET", "")
4 changes: 4 additions & 0 deletions backend/courses/management/commands/registrarimport.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import logging

from django.core.cache import caches
from django.core.management.base import BaseCommand
from tqdm import tqdm
from review.management.commands.precompute_pcr_views import precompute_pcr_views

from courses import registrar
from courses.management.commands.loadstatus import set_all_status
Expand Down Expand Up @@ -44,6 +46,8 @@ def registrar_import(semester=None, query=""):
# Make sure to load in summer course data as well
# (cron job only does current semester, which is either fall or spring)
registrar_import(semester=semester[:-1] + "B", query=query)

precompute_pcr_views()


class Command(BaseCommand):
Expand Down
46 changes: 30 additions & 16 deletions backend/review/management/commands/clearcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,47 @@

import redis
from django.conf import settings
from django.core.cache import cache
from django.core.cache import caches
from django.core.management import BaseCommand


def clear_cache():
def clear_cache(clear_pcr_cache=False, _cache_alias="default", _redis_delete_keys="*views.decorators.cache*"):
# If we are not using redis as the cache backend, then we can delete everything from the cache.
del_count = -1
if (
settings.CACHES is None
or settings.CACHES.get("default").get("BACKEND") != "django_redis.cache.RedisCache"
or settings.CACHES.get(_cache_alias).get("BACKEND") != "django_redis.cache.RedisCache"
):
cache.clear()
return -1
caches[_cache_alias].clear()
else:
# If redis is the cache backend, then we need to be careful to only delete django cache entries,
# since celery also uses redis as a message broker backend.
redis_url = settings.CACHES.get(_cache_alias).get("LOCATION")
r = redis.Redis.from_url(redis_url)
del_count = 0
for key in r.scan_iter(_redis_delete_keys):
r.delete(key)
del_count += 1

# If redis is the cache backend, then we need to be careful to only delete django cache entries,
# since celery also uses redis as a message broker backend.
r = redis.Redis.from_url(settings.REDIS_URL)
del_count = 0
for key in r.scan_iter("*views.decorators.cache*"):
r.delete(key)
del_count += 1
if clear_pcr_cache:
# avoid circular import
if del_count == -1: # TODO: this hacky
del_count = 0
del_count += clear_cache(clear_pcr_cache=False, _cache_alias="green", _redis_delete_keys=f"*")
del_count += clear_cache(clear_pcr_cache=False, _cache_alias="blue", _redis_delete_keys=f"*")
return del_count


class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--clear_pcr_cache",
action="store_true",
help="If set, will clear the PCR cache as well.",
)

def handle(self, *args, **options):
root_logger = logging.getLogger("")
root_logger.setLevel(logging.DEBUG)

del_count = clear_cache()
print(f"{del_count if del_count >=0 else 'all'} cache entries removed.")
del_count = clear_cache(clear_pcr_cache=options["clear_pcr_cache"])
print(f"{'At least ' if options['clear_pcr_cache'] else ''}"
f"{del_count if del_count >=0 else 'all'} cache entries removed.")
2 changes: 1 addition & 1 deletion backend/review/management/commands/iscimport.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def handle(self, *args, **kwargs):
self.close_files(files)
# invalidate cached views
print("Invalidating cache...")
del_count = clear_cache()
del_count = clear_cache(clear_pcr_cache=True)
print(f"{del_count if del_count >=0 else 'all'} cache entries removed.")

gc.collect()
Expand Down
55 changes: 55 additions & 0 deletions backend/review/management/commands/precompute_pcr_views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@

from django.core.management.base import BaseCommand
from tqdm import tqdm

from review.views import manual_course_reviews
from courses.models import Topic
from django.core.cache import caches
from django.conf import settings
from django.http import Http404
import logging
import redis

def precompute_pcr_views(verbose=False):
blue_cache = caches["blue"]
green_cache = caches["green"]
green_cache.clear()

# Note: we only cache the most recent topic for each full_code (any other cache entries get overwritten)
for topic in tqdm(
Topic.objects
.all()
.select_related("most_recent")
.order_by("most_recent__semester"),
disable=not verbose
):
course_id_list, course_code_list = zip(*topic.courses.values_list("id", "full_code"))
topic_id = ".".join([str(id) for id in sorted(course_id_list)])
if blue_cache.get(topic_id) is None:
try:
green_cache.set(
topic_id,
manual_course_reviews(course_code_list[0], topic.most_recent.semester),
)
except Http404:
logging.info(f"Topic returned 404 (topic_id {topic_id}, "
f"course_code {course_code_list[0]}, semester {topic.most_recent.semester})")
else:
green_cache.set(topic_id, blue_cache.get(topic_id))

for course_code in course_code_list:
green_cache.set(course_code, topic_id)

blue_redis_url, blue_redis_db = settings.CACHES.get("blue").get("LOCATION").rsplit("/", 1)
green_redis_url, green_redis_db = settings.CACHES.get("green").get("LOCATION").rsplit("/", 1)
assert blue_redis_url == green_redis_url, "Expect blue and green to use the same redis instance! Aborting blue green swap."
r = redis.Redis.from_url(blue_redis_url)

# swap blue and green
r.swapdb(int(blue_redis_db), int(green_redis_db))

class Command(BaseCommand):
help = "Precompute PCR views for all topics"

def handle(self, *args, **kwargs):
precompute_pcr_views(verbose=True)
2 changes: 1 addition & 1 deletion backend/review/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
urlpatterns = [
path(
"course/<slug:course_code>",
cache_page(MONTH_IN_SECONDS)(course_reviews),
course_reviews,
name="course-reviews",
),
path(
Expand Down
51 changes: 31 additions & 20 deletions backend/review/views.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections import Counter, defaultdict

from dateutil.tz import gettz
from django.core.cache import caches
from django.db.models import F, Max, OuterRef, Q, Subquery, Value
from django.http import Http404
from django.shortcuts import get_object_or_404
Expand Down Expand Up @@ -129,13 +130,25 @@ def extra_metrics_section_filters_pcr(current_semester=None):
)
@permission_classes([IsAuthenticated])
def course_reviews(request, course_code, semester=None):
request_semester = request.GET.get("semester")
cache = caches["blue"]
topic_id = cache.get(course_code)
if topic_id is None:
return Response(manual_course_reviews(course_code, request_semester, semester))
response = cache.get(topic_id)
if response is None:
return Response(manual_course_reviews(course_code, request_semester, semester))
return Response(response)


def manual_course_reviews(course_code, request_semester, semester=None):
"""
Get all reviews for the topic of a given course and other relevant information.
Different aggregation views are provided, such as reviews spanning all semesters,
only the most recent semester, and instructor-specific views.
"""
try:
semester = request.GET.get("semester")
semester = request_semester
course = (
Course.objects.filter(
course_filters_pcr,
Expand Down Expand Up @@ -168,8 +181,8 @@ def course_reviews(request, course_code, semester=None):
course_filters_pcr,
full_code=course_code,
)
.aggregate(max_semester=Max("semester"))
.get("max_semester")
.aggregate(max_semester=Max("semester", default="")) # default of "" handles case when no course is found
.get("max_semester", "")
> course.semester
if semester
else False
Expand Down Expand Up @@ -247,23 +260,21 @@ def course_reviews(request, course_code, semester=None):
section_filters_pcr,
course__topic=topic,
)

return Response(
{
"code": course["full_code"],
"last_offered_sem_if_superceded": last_offered_sem_if_superceded,
"name": course["title"],
"description": course["description"],
"aliases": aliases,
"historical_codes": historical_codes,
"latest_semester": course["semester"],
"num_sections": num_sections,
"num_sections_recent": num_sections_recent,
"instructors": instructors,
"registration_metrics": num_registration_metrics > 0,
**get_average_and_recent_dict_single(course),
}
)

return {
"code": course["full_code"],
"last_offered_sem_if_superceded": last_offered_sem_if_superceded,
"name": course["title"],
"description": course["description"],
"aliases": aliases,
"historical_codes": historical_codes,
"latest_semester": course["semester"],
"num_sections": num_sections,
"num_sections_recent": num_sections_recent,
"instructors": instructors,
"registration_metrics": num_registration_metrics > 0,
**get_average_and_recent_dict_single(course),
}


@api_view(["GET"])
Expand Down
Loading
Loading