Skip to content

Commit

Permalink
Merge pull request #662 from pennlabs/revert-661-revert-597-pcx-histo…
Browse files Browse the repository at this point in the history
…ry-probabilities
  • Loading branch information
AaDalal authored Apr 13, 2024
2 parents 2420e54 + b78cfe8 commit 13ab45f
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 2 deletions.
28 changes: 26 additions & 2 deletions backend/courses/management/commands/recompute_topics.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from django.core.management.base import BaseCommand
from django.db import transaction
from django.db.models import Count, OuterRef, Subquery
from tqdm import tqdm

from courses.models import Course, Topic
from courses.util import all_semesters
from courses.util import all_semesters, historical_semester_probability


def garbage_collect_topics():
Expand Down Expand Up @@ -151,5 +152,28 @@ def handle(self, *args, **kwargs):
assert (
min_semester in all_semesters()
), f"--min-semester={min_semester} is not a valid semester."

semesters = sorted(
[sem for sem in all_semesters() if not min_semester or sem >= min_semester]
)
recompute_topics(min_semester, verbose=True, allow_null_parent_topic=bool(min_semester))
recompute_historical_semester_probabilities(current_semester=semesters[-1], verbose=True)


def recompute_historical_semester_probabilities(current_semester, verbose=False):
"""
Recomputes the historical probabilities for all topics.
"""
if verbose:
print("Recomputing historical probabilities for all topics...")
topics = Topic.objects.all()
# Iterate over each Topic
for i, topic in tqdm(enumerate(topics), disable=not verbose, total=topics.count()):
# Calculate historical_year_probability for the current topic
ordered_courses = topic.courses.all().order_by("semester")
ordered_semester = [course.semester for course in ordered_courses]
historical_prob = historical_semester_probability(current_semester, ordered_semester)
# Update the historical_probabilities field for the current topic
topic.historical_probabilities_spring = historical_prob[0]
topic.historical_probabilities_summer = historical_prob[1]
topic.historical_probabilities_fall = historical_prob[2]
topic.save()
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Generated by Django 5.0.2 on 2024-04-11 05:55

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("courses", "0064_auto_20240327_0249"),
]

operations = [
migrations.AddField(
model_name="topic",
name="historical_probabilities_fall",
field=models.FloatField(
default=0,
help_text="\nThe historical probability of a student taking a course in this topic in the fall\nsemester, based on historical data. This field is recomputed nightly from the\n`parent_course` graph (in the recompute_soft_state cron job).\n",
),
),
migrations.AddField(
model_name="topic",
name="historical_probabilities_spring",
field=models.FloatField(
default=0,
help_text="\nThe historical probability of a student taking a course in this topic in the spring\nsemester, based on historical data. This field is recomputed nightly from the\n`parent_course` graph (in the recompute_soft_state cron job).\n",
),
),
migrations.AddField(
model_name="topic",
name="historical_probabilities_summer",
field=models.FloatField(
default=0,
help_text="\nThe historical probability of a student taking a course in this topic in the summer\nsemester, based on historical data. This field is recomputed nightly from the\n`parent_course` graph (in the recompute_soft_state cron job).\n",
),
),
]
30 changes: 30 additions & 0 deletions backend/courses/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,36 @@ class Topic(models.Model):
),
)

historical_probabilities_spring = models.FloatField(
default=0,
help_text=dedent(
"""
The historical probability of a student taking a course in this topic in the spring
semester, based on historical data. This field is recomputed nightly from the
`parent_course` graph (in the recompute_soft_state cron job).
"""
),
)
historical_probabilities_summer = models.FloatField(
default=0,
help_text=dedent(
"""
The historical probability of a student taking a course in this topic in the summer
semester, based on historical data. This field is recomputed nightly from the
`parent_course` graph (in the recompute_soft_state cron job).
"""
),
)
historical_probabilities_fall = models.FloatField(
default=0,
help_text=dedent(
"""
The historical probability of a student taking a course in this topic in the fall
semester, based on historical data. This field is recomputed nightly from the
`parent_course` graph (in the recompute_soft_state cron job).
"""
),
)
branched_from = models.ForeignKey(
"Topic",
related_name="branched_to",
Expand Down
50 changes: 50 additions & 0 deletions backend/courses/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,3 +721,53 @@ def get_semesters(semesters: str = None) -> list[str]:
if s not in possible_semesters:
raise ValueError(f"Provided semester {s} was not found in the db.")
return sorted(semesters)


def historical_semester_probability(current_semester: str, semesters: list[str]):
"""
:param current: The current semester represented in the 20XX(A|B|C) format.
:type current: str
:param courses: A list of Course objects sorted by date in ascending order.
:type courses: list
:returns: A list of 3 probabilities representing the likelihood of
taking a course in each semester.
:rtype: list
"""
PROB_DISTRIBUTION = [0.4, 0.3, 0.15, 0.1, 0.05]

def normalize_and_round(prob, i):
"""Modifies the probability distribution to account for the
fact that the last course was taken i years ago."""
truncate = PROB_DISTRIBUTION[:i]
total = sum(truncate)
return list(map(lambda x: round(x / total, 3), truncate))

semester_probabilities = {"A": 0.0, "B": 0.0, "C": 0.0}
current_year = int(current_semester[:-1])
semesters = [
semester
for semester in semesters
if semester < str(current_year) and semester > str(current_year - 5)
]
if not semesters:
return [0, 0, 0]
if current_year - int(semesters[0][:-1]) < 5:
# If the class hasn't been offered in the last 5 years,
# we make sure the resulting probabilities sum to 1
modified_prob_distribution = normalize_and_round(
PROB_DISTRIBUTION, current_year - int(semesters[0][:-1])
)
else:
modified_prob_distribution = PROB_DISTRIBUTION
for historical_semester in semesters:
historical_year = int(historical_semester[:-1])
sem_char = historical_semester[-1].upper() # A, B, C
semester_probabilities[sem_char] += modified_prob_distribution[
current_year - historical_year - 1
]
return list(
map(
lambda x: min(round(x, 2), 1.00),
[semester_probabilities["A"], semester_probabilities["B"], semester_probabilities["C"]],
)
)

0 comments on commit 13ab45f

Please sign in to comment.