Switch to a query builder approach for the annotation counts

This allows to reuse the same code for multiple cases. Introduce the h_userids filter which with the new approach is available in all queries.
hypothesis · Jun 14, 2024 · fa36724 · fa36724
1 parent a900f08
commit fa36724
Show file tree

Hide file tree

Showing 6 changed files with 211 additions and 220 deletions.
diff --git a/h/services/bulk_api/lms_stats.py b/h/services/bulk_api/lms_stats.py
@@ -1,6 +1,7 @@
 # pylint:disable=not-callable,use-implicit-booleaness-not-comparison-to-zero,singleton-comparison
 from dataclasses import dataclass
 from datetime import datetime
+from enum import Flag, auto
 
 from sqlalchemy import case, func, select
 from sqlalchemy.orm import Session
@@ -9,30 +10,35 @@
 
 
 @dataclass
-class _AnnotationCounts:
+class AnnotationCounts:
  annotations: int
  replies: int
  last_activity: datetime
 
+ assignment_id: str | None = None
+ display_name: str | None = None
+ userid: str | None = None
 
-@dataclass
-class CountsByUser(_AnnotationCounts):
- display_name: str
- userid: str
 
+class CountsGroupBy(Flag):
+ """Allowed values to group the queries by."""
 
-@dataclass
-class CountsByAssignment(_AnnotationCounts):
- assignment_id: str
+ USER = auto()
+ ASSIGNMENT = auto()
 
 
 class BulkLMSStatsService:
  def __init__(self, db: Session, authorized_authority: str):
  self._db = db
  self._authorized_authority = authorized_authority
 
- def _annotation_query(self, groups: list[str]):
- return (
+ def _annotation_query(
+ self,
+ groups: list[str],
+ h_userids: list[str] | None = None,
+ assignment_id: str | None = None,
+ ):
+ query = (
  select(
  AnnotationSlim,
  AnnotationMetadata.data,
@@ -71,85 +77,102 @@ def _annotation_query(self, groups: list[str]):
  Group.authority_provided_id.in_(groups),
  )
  )
-
- def get_counts_by_user(
- self, groups: list[str], assignment_id: str
- ) -> list[CountsByUser]:
- """
- Get basic stats per user for an LMS assignment.
-
- :param groups: List of "authority_provided_id" to filter groups by.
- :param assignment_id: ID of the assignment we are generating the stats for.
- """
- annos_query = (
- self._annotation_query(groups)
- .where(
+ if assignment_id:
+ query = query.where(
  AnnotationMetadata.data["lms"]["assignment"]["resource_link_id"].astext
  == assignment_id,
  )
- .cte("annotations")
- )
 
- query = (
- select(
- User.display_name,
- # Unfortunately all the magic around User.userid doesn't work in this context
- func.concat("acct:", User.username, "@", User.authority).label(
- "userid"
- ),
- func.count(annos_query.c.id)
- .filter(annos_query.c.type == "annotation")
- .label("annotations"),
- func.count(annos_query.c.id)
- .filter(annos_query.c.type == "reply")
- .label("replies"),
- func.max(annos_query.c.created).label("last_activity"),
+ if h_userids:
+ query = query.where(
+ func.concat("acct:", User.username, "@", User.authority).in_(h_userids)
  )
- .join(annos_query, annos_query.c.user_id == User.id)
- .group_by(User.id)
- )
 
- results = self._db.execute(query)
- return [
- CountsByUser(
- userid=row.userid,
- display_name=row.display_name,
- annotations=row.annotations,
- replies=row.replies,
- last_activity=row.last_activity,
- )
- for row in results
- ]
+ return query
+
+ def _count_columns(self, counts_query) -> tuple:
+ return (
+ func.count(counts_query.c.id)
+ .filter(counts_query.c.type == "annotation")
+ .label("annotations"),
+ func.count(counts_query.c.id)
+ .filter(counts_query.c.type == "reply")
+ .label("replies"),
+ func.max(counts_query.c.created).label("last_activity"),
+ )
 
- def get_counts_by_assignment(self, groups: list[str]) -> list[CountsByAssignment]:
+ def get_annotation_counts(
+ self,
+ groups: list[str],
+ group_by: CountsGroupBy,
+ h_userids: list[str] | None = None,
+ assignment_id: str | None = None,
+ ) -> list[AnnotationCounts]:
  """
- Get basic stats per assignment for an LMS course.
+ Get basic stats per user for an LMS assignment.
 
  :param groups: List of "authority_provided_id" to filter groups by.
+ :param group_by: By which column to aggregate the data.
+ :param h_userids: List of User.userid to filter annotations by
+ :param assignment_id: ID of the assignment to filter annotations by
  """
- annos_query = self._annotation_query(groups).cte("annotations")
- query = select(
- annos_query.c.data["lms"]["assignment"]["resource_link_id"].astext.label(
- "assignment_id"
+ annos_query = self._annotation_query(
+ groups, h_userids=h_userids, assignment_id=assignment_id
+ ).cte("annotations")
+
+ # Alias some columns
+ query_assignment_id = annos_query.c.data["lms"]["assignment"][
+ "resource_link_id"
+ ].astext
+ # Unfortunately all the magic around User.userid doesn't work in this context
+ query_userid = func.concat("acct:", User.username, "@", User.authority)
+
+ # What to group_by depending on the selection
+ group_by_clause = {
+ CountsGroupBy.USER: User.id,
+ CountsGroupBy.ASSIGNMENT: query_assignment_id,
+ }
+
+ # What columns to include, depending on the group by
+ group_by_select_columns = {
+ CountsGroupBy.USER: (
+ query_userid.label("userid"),
+ User.display_name,
  ),
- func.count(annos_query.c.id)
- .filter(annos_query.c.type == "annotation")
- .label("annotations"),
- func.count(annos_query.c.id)
- .filter(annos_query.c.type == "reply")
- .label("replies"),
- func.max(annos_query.c.created).label("last_activity"),
- ).group_by(annos_query.c.data["lms"]["assignment"]["resource_link_id"].astext)
+ CountsGroupBy.ASSIGNMENT: (query_assignment_id.label("assignment_id"),),
+ }
+
+ # What joins to include, depending on the group by
+ group_by_select_joins = {
+ CountsGroupBy.USER: ((annos_query, annos_query.c.user_id == User.id),),
+ CountsGroupBy.ASSIGNMENT: [],
+ }
+
+ query = select(
+ # Include the relevant columnns based on group_by
+ *group_by_select_columns[group_by],
+ # Always include the counts column
+ *self._count_columns(annos_query)
+ )
+
+ # Apply relevant joins
+ for join in group_by_select_joins[group_by]:
+ query = query.join(*join)
+
+ # And finally the group by
+ query = query.group_by(group_by_clause[group_by])
 
  results = self._db.execute(query)
  return [
- CountsByAssignment(
- assignment_id=row.assignment_id,
+ AnnotationCounts(
+ assignment_id=row.get("assignment_id"),
+ userid=row.get("userid"),
+ display_name=row.get("display_name"),
  annotations=row.annotations,
  replies=row.replies,
  last_activity=row.last_activity,
  )
- for row in results
+ for row in results.mappings()
  ]
 
 

diff --git a/h/views/api/bulk/stats_assignment.json → h/views/api/bulk/annotation_counts.json b/h/views/api/bulk/stats_assignment.json → h/views/api/bulk/annotation_counts.json
@@ -5,10 +5,18 @@
  "properties": {
  "filter": {
  "$ref": "#/$defs/filter"
+ },
+ "group_by": {
+ "type": "string",
+ "enum": [
+ "assignment",
+ "user"
+ ]
  }
  },
  "required": [
- "filter"
+ "filter",
+ "group_by"
  ],
  "additionalProperties": true,
  "$defs": {
@@ -18,20 +26,27 @@
  "type": "object",
  "properties": {
  "groups": {
- "$ref": "#/$defs/groupsFilter"
+ "$ref": "#/$defs/nonEmptyStringArray"
+ },
+ "h_userids": {
+ "$ref": "#/$defs/nonEmptyStringArray"
  },
  "assignment_id": {
- "type": "string"
+ "type": [
+ "string",
+ "null"
+ ]
  }
  },
  "required": [
- "groups",
- "assignment_id"
+ "groups"
  ],
  "additionalProperties": false
  },
- "groupsFilter": {
- "type": "array",
+ "nonEmptyStringArray": {
+ "type": [
+ "array"
+ ],
  "minItems": 1,
  "items": {
  "type": "string"

diff --git a/h/views/api/bulk/stats.py b/h/views/api/bulk/stats.py
@@ -5,18 +5,12 @@
 
 from h.schemas.base import JSONSchema
 from h.security import Permission
-from h.services.bulk_api import BulkLMSStatsService
+from h.services.bulk_api.lms_stats import BulkLMSStatsService, CountsGroupBy
 from h.views.api.config import api_config
 
 
 class AssignmentStatsSchema(JSONSchema):
- _SCHEMA_FILE = files("h.views.api.bulk") / "stats_assignment.json"
- schema_version = 7
- schema = json.loads(_SCHEMA_FILE.read_text(encoding="utf-8"))
-
-
-class CourseStatsSchema(JSONSchema):
- _SCHEMA_FILE = files("h.views.api.bulk") / "stats_course.json"
+ _SCHEMA_FILE = files("h.views.api.bulk") / "annotation_counts.json"
  schema_version = 7
  schema = json.loads(_SCHEMA_FILE.read_text(encoding="utf-8"))
 
@@ -30,52 +24,23 @@ class CourseStatsSchema(JSONSchema):
  subtype="x-ndjson",
  permission=Permission.API.BULK_ACTION,
 )
-def get_counts_by_user(request):
+def get_annotation_counts(request):
  data = AssignmentStatsSchema().validate(request.json)
  query_filter = data["filter"]
 
- stats = request.find_service(BulkLMSStatsService).get_counts_by_user(
- groups=query_filter["groups"],
- assignment_id=query_filter["assignment_id"],
- )
-
- return Response(
- json=[
- {
- "display_name": row.display_name,
- "userid": row.userid,
- "annotations": row.annotations,
- "replies": row.replies,
- "last_activity": row.last_activity.isoformat(),
- }
- for row in stats
- ],
- status=200,
- content_type="application/x-ndjson",
- )
-
-
-@api_config(
- versions=["v1", "v2"],
- route_name="api.bulk.stats.assignments",
- request_method="POST",
- description="Retrieve stats grouped by assignment",
- link_name="bulk.stats.course",
- subtype="x-ndjson",
- permission=Permission.API.BULK_ACTION,
-)
-def get_counts_by_assignment(request):
- data = CourseStatsSchema().validate(request.json)
- query_filter = data["filter"]
-
- stats = request.find_service(BulkLMSStatsService).get_counts_by_assignment(
+ stats = request.find_service(BulkLMSStatsService).get_annotation_counts(
+ group_by=CountsGroupBy[data["group_by"].upper()],
  groups=query_filter["groups"],
+ assignment_id=query_filter.get("assignment_id"),
+ h_userids=query_filter.get("h_userids"),
  )
 
  return Response(
  json=[
  {
  "assignment_id": row.assignment_id,
+ "userid": row.userid,
+ "display_name": row.display_name,
  "annotations": row.annotations,
  "replies": row.replies,
  "last_activity": row.last_activity.isoformat(),

diff --git a/h/views/api/bulk/stats_course.json b/h/views/api/bulk/stats_course.json