Skip to content

Commit

Permalink
Switch to a query builder approach for the annotation counts
Browse files Browse the repository at this point in the history
This allows to reuse the same code for multiple cases.

Introduce the h_userids filter which with the new approach is available in
all queries.
  • Loading branch information
marcospri committed Jun 14, 2024
1 parent a900f08 commit fa36724
Show file tree
Hide file tree
Showing 6 changed files with 211 additions and 220 deletions.
163 changes: 93 additions & 70 deletions h/services/bulk_api/lms_stats.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# pylint:disable=not-callable,use-implicit-booleaness-not-comparison-to-zero,singleton-comparison
from dataclasses import dataclass
from datetime import datetime
from enum import Flag, auto

from sqlalchemy import case, func, select
from sqlalchemy.orm import Session
Expand All @@ -9,30 +10,35 @@


@dataclass
class _AnnotationCounts:
class AnnotationCounts:
annotations: int
replies: int
last_activity: datetime

assignment_id: str | None = None
display_name: str | None = None
userid: str | None = None

@dataclass
class CountsByUser(_AnnotationCounts):
display_name: str
userid: str

class CountsGroupBy(Flag):
"""Allowed values to group the queries by."""

@dataclass
class CountsByAssignment(_AnnotationCounts):
assignment_id: str
USER = auto()
ASSIGNMENT = auto()


class BulkLMSStatsService:
def __init__(self, db: Session, authorized_authority: str):
self._db = db
self._authorized_authority = authorized_authority

def _annotation_query(self, groups: list[str]):
return (
def _annotation_query(
self,
groups: list[str],
h_userids: list[str] | None = None,
assignment_id: str | None = None,
):
query = (
select(
AnnotationSlim,
AnnotationMetadata.data,
Expand Down Expand Up @@ -71,85 +77,102 @@ def _annotation_query(self, groups: list[str]):
Group.authority_provided_id.in_(groups),
)
)

def get_counts_by_user(
self, groups: list[str], assignment_id: str
) -> list[CountsByUser]:
"""
Get basic stats per user for an LMS assignment.
:param groups: List of "authority_provided_id" to filter groups by.
:param assignment_id: ID of the assignment we are generating the stats for.
"""
annos_query = (
self._annotation_query(groups)
.where(
if assignment_id:
query = query.where(
AnnotationMetadata.data["lms"]["assignment"]["resource_link_id"].astext
== assignment_id,
)
.cte("annotations")
)

query = (
select(
User.display_name,
# Unfortunately all the magic around User.userid doesn't work in this context
func.concat("acct:", User.username, "@", User.authority).label(
"userid"
),
func.count(annos_query.c.id)
.filter(annos_query.c.type == "annotation")
.label("annotations"),
func.count(annos_query.c.id)
.filter(annos_query.c.type == "reply")
.label("replies"),
func.max(annos_query.c.created).label("last_activity"),
if h_userids:
query = query.where(
func.concat("acct:", User.username, "@", User.authority).in_(h_userids)
)
.join(annos_query, annos_query.c.user_id == User.id)
.group_by(User.id)
)

results = self._db.execute(query)
return [
CountsByUser(
userid=row.userid,
display_name=row.display_name,
annotations=row.annotations,
replies=row.replies,
last_activity=row.last_activity,
)
for row in results
]
return query

def _count_columns(self, counts_query) -> tuple:
return (
func.count(counts_query.c.id)
.filter(counts_query.c.type == "annotation")
.label("annotations"),
func.count(counts_query.c.id)
.filter(counts_query.c.type == "reply")
.label("replies"),
func.max(counts_query.c.created).label("last_activity"),
)

def get_counts_by_assignment(self, groups: list[str]) -> list[CountsByAssignment]:
def get_annotation_counts(
self,
groups: list[str],
group_by: CountsGroupBy,
h_userids: list[str] | None = None,
assignment_id: str | None = None,
) -> list[AnnotationCounts]:
"""
Get basic stats per assignment for an LMS course.
Get basic stats per user for an LMS assignment.
:param groups: List of "authority_provided_id" to filter groups by.
:param group_by: By which column to aggregate the data.
:param h_userids: List of User.userid to filter annotations by
:param assignment_id: ID of the assignment to filter annotations by
"""
annos_query = self._annotation_query(groups).cte("annotations")
query = select(
annos_query.c.data["lms"]["assignment"]["resource_link_id"].astext.label(
"assignment_id"
annos_query = self._annotation_query(
groups, h_userids=h_userids, assignment_id=assignment_id
).cte("annotations")

# Alias some columns
query_assignment_id = annos_query.c.data["lms"]["assignment"][
"resource_link_id"
].astext
# Unfortunately all the magic around User.userid doesn't work in this context
query_userid = func.concat("acct:", User.username, "@", User.authority)

# What to group_by depending on the selection
group_by_clause = {
CountsGroupBy.USER: User.id,
CountsGroupBy.ASSIGNMENT: query_assignment_id,
}

# What columns to include, depending on the group by
group_by_select_columns = {
CountsGroupBy.USER: (
query_userid.label("userid"),
User.display_name,
),
func.count(annos_query.c.id)
.filter(annos_query.c.type == "annotation")
.label("annotations"),
func.count(annos_query.c.id)
.filter(annos_query.c.type == "reply")
.label("replies"),
func.max(annos_query.c.created).label("last_activity"),
).group_by(annos_query.c.data["lms"]["assignment"]["resource_link_id"].astext)
CountsGroupBy.ASSIGNMENT: (query_assignment_id.label("assignment_id"),),
}

# What joins to include, depending on the group by
group_by_select_joins = {
CountsGroupBy.USER: ((annos_query, annos_query.c.user_id == User.id),),
CountsGroupBy.ASSIGNMENT: [],
}

query = select(
# Include the relevant columnns based on group_by
*group_by_select_columns[group_by],
# Always include the counts column
*self._count_columns(annos_query)
)

# Apply relevant joins
for join in group_by_select_joins[group_by]:
query = query.join(*join)

# And finally the group by
query = query.group_by(group_by_clause[group_by])

results = self._db.execute(query)
return [
CountsByAssignment(
assignment_id=row.assignment_id,
AnnotationCounts(
assignment_id=row.get("assignment_id"),
userid=row.get("userid"),
display_name=row.get("display_name"),
annotations=row.annotations,
replies=row.replies,
last_activity=row.last_activity,
)
for row in results
for row in results.mappings()
]


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,18 @@
"properties": {
"filter": {
"$ref": "#/$defs/filter"
},
"group_by": {
"type": "string",
"enum": [
"assignment",
"user"
]
}
},
"required": [
"filter"
"filter",
"group_by"
],
"additionalProperties": true,
"$defs": {
Expand All @@ -18,20 +26,27 @@
"type": "object",
"properties": {
"groups": {
"$ref": "#/$defs/groupsFilter"
"$ref": "#/$defs/nonEmptyStringArray"
},
"h_userids": {
"$ref": "#/$defs/nonEmptyStringArray"
},
"assignment_id": {
"type": "string"
"type": [
"string",
"null"
]
}
},
"required": [
"groups",
"assignment_id"
"groups"
],
"additionalProperties": false
},
"groupsFilter": {
"type": "array",
"nonEmptyStringArray": {
"type": [
"array"
],
"minItems": 1,
"items": {
"type": "string"
Expand Down
53 changes: 9 additions & 44 deletions h/views/api/bulk/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,12 @@

from h.schemas.base import JSONSchema
from h.security import Permission
from h.services.bulk_api import BulkLMSStatsService
from h.services.bulk_api.lms_stats import BulkLMSStatsService, CountsGroupBy
from h.views.api.config import api_config


class AssignmentStatsSchema(JSONSchema):
_SCHEMA_FILE = files("h.views.api.bulk") / "stats_assignment.json"
schema_version = 7
schema = json.loads(_SCHEMA_FILE.read_text(encoding="utf-8"))


class CourseStatsSchema(JSONSchema):
_SCHEMA_FILE = files("h.views.api.bulk") / "stats_course.json"
_SCHEMA_FILE = files("h.views.api.bulk") / "annotation_counts.json"
schema_version = 7
schema = json.loads(_SCHEMA_FILE.read_text(encoding="utf-8"))

Expand All @@ -30,52 +24,23 @@ class CourseStatsSchema(JSONSchema):
subtype="x-ndjson",
permission=Permission.API.BULK_ACTION,
)
def get_counts_by_user(request):
def get_annotation_counts(request):
data = AssignmentStatsSchema().validate(request.json)
query_filter = data["filter"]

stats = request.find_service(BulkLMSStatsService).get_counts_by_user(
groups=query_filter["groups"],
assignment_id=query_filter["assignment_id"],
)

return Response(
json=[
{
"display_name": row.display_name,
"userid": row.userid,
"annotations": row.annotations,
"replies": row.replies,
"last_activity": row.last_activity.isoformat(),
}
for row in stats
],
status=200,
content_type="application/x-ndjson",
)


@api_config(
versions=["v1", "v2"],
route_name="api.bulk.stats.assignments",
request_method="POST",
description="Retrieve stats grouped by assignment",
link_name="bulk.stats.course",
subtype="x-ndjson",
permission=Permission.API.BULK_ACTION,
)
def get_counts_by_assignment(request):
data = CourseStatsSchema().validate(request.json)
query_filter = data["filter"]

stats = request.find_service(BulkLMSStatsService).get_counts_by_assignment(
stats = request.find_service(BulkLMSStatsService).get_annotation_counts(
group_by=CountsGroupBy[data["group_by"].upper()],
groups=query_filter["groups"],
assignment_id=query_filter.get("assignment_id"),
h_userids=query_filter.get("h_userids"),
)

return Response(
json=[
{
"assignment_id": row.assignment_id,
"userid": row.userid,
"display_name": row.display_name,
"annotations": row.annotations,
"replies": row.replies,
"last_activity": row.last_activity.isoformat(),
Expand Down
36 changes: 0 additions & 36 deletions h/views/api/bulk/stats_course.json

This file was deleted.

0 comments on commit fa36724

Please sign in to comment.