Skip to content

Commit b13d45e

Browse files
authored
Fixes issue #1557 Changes to support switching to bigquery. (#1568)
* WIP: Changes to support switching to bigquery. The queries currently aren't working but it feels like it might be making progress! * Converting queries over to user context_store_entity and context_store_keymap Currently gets as far as assignment table. * Getting up to submission now! * Changes to get this through submission, up to resources now * Fix up resources and report queries * Removing commented out methods Adding in tbyte calculation to the new bq method * Some cleanup around the bytes informational displays * Removing support for DATA_WAREHOUSE and postgres * Fixing up codacy issues, removing unused values * Resolving SQL injection warning * Missing import for Optional * Incorrect value in the return * Fixing time for course date * Adding more explicit support for DATE and DATETIME * Removing helper utility functions and passing explicit QueryJobConfig * Removing some additional references to DATA_WAREHOUSE * Removing unused imports
1 parent ba990de commit b13d45e

File tree

6 files changed

+295
-299
lines changed

6 files changed

+295
-299
lines changed

config/cron_udp.hjson

Lines changed: 88 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,18 @@
55
// and it wouldn’t reflect when the data was actually dumped from canvas.
66
// More info on UDP's batch-ingest DAG process can be found here: https://resources.unizin.org/display/UDP/Batch-ingest+application
77
'''
8-
select 'canvasdatadate' as pkey, min(dag_run) as pvalue from report.publish_info pi2
8+
SELECT * FROM EXTERNAL_QUERY("us.context_store", "select 'canvasdatadate' as pkey, min(dag_run) as pvalue from report.publish_info pi2");
99
''',
1010
"user" :
1111
'''
1212
select
1313
(
14-
cast(%(canvas_data_id_increment)s as bigint)
14+
cast(@canvas_data_id_increment as bigint)
1515
+
1616
cast(p2.lms_ext_id as bigint)
1717
) as user_id,
1818
case
19-
when pe.email_address is not null then lower(split_part(pe.email_address , '@', 1))
19+
WHEN pe.email_address IS NOT NULL THEN LOWER(REGEXP_EXTRACT(pe.email_address, r'^([^@]+)'))
2020
else p2.sis_ext_id end as sis_name,
2121
cast(co.lms_int_id as bigint) as course_id,
2222
cg.le_current_score as current_grade,
@@ -27,22 +27,22 @@
2727
when cse.role = 'Teacher' then 'TeacherEnrollment'
2828
else '' end
2929
as enrollment_type
30-
from entity.course_section_enrollment cse
31-
left join entity.course_section cs
30+
from context_store_entity.course_section_enrollment cse
31+
left join context_store_entity.course_section cs
3232
on cse.course_section_id = cs.course_section_id
33-
left join keymap.course_offering co
33+
left join context_store_keymap.course_offering co
3434
on cs.le_current_course_offering_id = co.id
35-
left join entity.person p
35+
left join context_store_entity.person p
3636
on cse.person_id = p.person_id
37-
left join keymap.person p2
37+
left join context_store_keymap.person p2
3838
on p.person_id = p2.id
39-
left join entity.person_email pe
39+
left join context_store_entity.person_email pe
4040
on p.person_id = pe.person_id
41-
left join entity.course_grade cg
41+
left join context_store_entity.course_grade cg
4242
on cse.course_section_id = cg.course_section_id and cse.person_id = cg.person_id
4343
where
44-
co.lms_int_id = ANY(%(course_ids)s)
45-
and cse.role = ANY(ARRAY['Student', 'Teacher', 'TeachingAssistant']::text[])
44+
co.lms_int_id IN UNNEST(@course_ids)
45+
and cse.role IN UNNEST(ARRAY['Student', 'Teacher', 'TeachingAssistant'])
4646
and cse.role_status = 'Enrolled'
4747
and cse.enrollment_status = 'Active'
4848
order by user_id
@@ -51,28 +51,31 @@
5151
'''
5252
with assignment_details as (
5353
select la.due_date, title, la.course_offering_id, la.learner_activity_id, la.points_possible, la.learner_activity_group_id
54-
from entity.learner_activity la, keymap.course_offering co
54+
from context_store_entity.learner_activity la, context_store_keymap.course_offering co
5555
where
5656
la.visibility = 'everyone'
5757
and la.status = 'published'
5858
and la.course_offering_id = co.id
59-
and co.lms_int_id = ANY(%(course_ids)s)
59+
and co.lms_int_id IN UNNEST(@course_ids)
6060
), assignment_grp as (
6161
select lg.*
62-
from entity.learner_activity_group lg, keymap.course_offering co
62+
from context_store_entity.learner_activity_group lg, context_store_keymap.course_offering co
6363
where
6464
lg.status = 'available'
6565
and lg.course_offering_id = co.id
66-
and co.lms_int_id = ANY(%(course_ids)s)
66+
and co.lms_int_id IN UNNEST(@course_ids)
6767
), assign_more as (
6868
select distinct(a.learner_activity_group_id), da.group_points
6969
from assignment_details a
70-
join (
71-
select learner_activity_group_id, sum(points_possible) as group_points
72-
from assignment_details
73-
group by learner_activity_group_id
74-
) as da
75-
on a.learner_activity_group_id = da.learner_activity_group_id
70+
JOIN UNNEST((
71+
SELECT ARRAY_AGG(STRUCT(learner_activity_group_id, group_points))
72+
FROM (
73+
select learner_activity_group_id, sum(points_possible) as group_points
74+
from assignment_details
75+
group by learner_activity_group_id
76+
)
77+
)) as da
78+
on a.learner_activity_group_id = da.learner_activity_group_id
7679
), grp_full as (
7780
select a.group_points, b.learner_activity_group_id
7881
from assign_more a
@@ -81,7 +84,7 @@
8184
), assign_rules as (
8285
select distinct ad.learner_activity_group_id, agr.drop_lowest_amount as drop_lowest, agr.drop_highest_amount as drop_highest
8386
from grp_full ad
84-
join entity.learner_activity_group agr
87+
join context_store_entity.learner_activity_group agr
8588
on ad.learner_activity_group_id = agr.learner_activity_group_id
8689
), assignment_grp_points as (
8790
select ag.*, am.group_points AS group_points, ar.drop_lowest as drop_lowest, ar.drop_highest as drop_highest
@@ -90,16 +93,16 @@
9093
join assign_rules ar on ag.learner_activity_group_id = ar.learner_activity_group_id
9194
)
9295
select
93-
cast(lag_km.lms_int_id as BIGINT) as id,
94-
cast(co_km.lms_int_id as BIGINT) as course_id,
95-
cast(agp.group_weight as float) as weight,
96+
cast(lag_km.lms_int_id as INT64) as id,
97+
cast(co_km.lms_int_id as INT64) as course_id,
98+
cast(agp.group_weight as FLOAT64) as weight,
9699
agp.name as name,
97100
agp.group_points as group_points,
98101
agp.drop_lowest as drop_lowest,
99102
agp.drop_highest as drop_highest
100103
from assignment_grp_points agp,
101-
keymap.course_offering co_km,
102-
keymap.learner_activity_group lag_km
104+
context_store_keymap.course_offering co_km,
105+
context_store_keymap.learner_activity_group lag_km
103106
where agp.course_offering_id = co_km.id
104107
and agp.learner_activity_group_id = lag_km.id
105108
order by id
@@ -109,22 +112,22 @@
109112
with assignment_info as
110113
(
111114
select
112-
la.due_date AT TIME ZONE 'UTC' as due_date,
115+
la.due_date as due_date,
113116
la.title as name,
114-
cast(co.lms_int_id as BIGINT) as course_id,
115-
cast(la_km.lms_int_id as BIGINT) as id,
117+
cast(co.lms_int_id as INT64) as course_id,
118+
cast(la_km.lms_int_id as INT64) as id,
116119
la.points_possible as points_possible,
117-
cast(lag_km.lms_int_id as BIGINT) as assignment_group_id
120+
cast(lag_km.lms_int_id as INT64) as assignment_group_id
118121
from
119-
entity.learner_activity la,
120-
keymap.course_offering co,
121-
keymap.learner_activity la_km,
122-
keymap.learner_activity_group lag_km
122+
context_store_entity.learner_activity la,
123+
context_store_keymap.course_offering co,
124+
context_store_keymap.learner_activity la_km,
125+
context_store_keymap.learner_activity_group lag_km
123126
where
124127
la.visibility = 'everyone'
125128
and la.status = 'published'
126129
and la.course_offering_id = co.id
127-
and co.lms_int_id = ANY(%(course_ids)s)
130+
and co.lms_int_id IN UNNEST(@course_ids)
128131
and la.learner_activity_id = la_km.id
129132
and la.learner_activity_group_id = lag_km.id
130133
)
@@ -142,24 +145,24 @@
142145
cast(0 as boolean)
143146
end as consider_weight
144147
from
145-
entity.learner_activity_group lag,
146-
keymap.course_offering co_km
148+
context_store_entity.learner_activity_group lag,
149+
context_store_keymap.course_offering co_km
147150
where
148151
lag.course_offering_id = co_km.id
149-
and co_km.lms_int_id = ANY(%(course_ids)s)
152+
and co_km.lms_int_id IN UNNEST(@course_ids)
150153
group by co_km.lms_int_id
151154
''',
152155
"term":
153156
'''
154157
select
155-
cast(ka.lms_int_id as BIGINT) as id,
156-
cast(ka.lms_ext_id as BIGINT) as canvas_id,
158+
cast(ka.lms_int_id as INT64) as id,
159+
cast(ka.lms_ext_id as INT64) as canvas_id,
157160
a.name as name,
158-
a.le_term_begin_date::timestamp without time zone as date_start,
159-
a.le_term_end_date::timestamp without time zone as date_end
161+
a.le_term_begin_date as date_start,
162+
a.le_term_end_date as date_end
160163
from
161-
entity.academic_term as a
162-
left join keymap.academic_term as ka on ka.id = a.academic_term_id
164+
context_store_entity.academic_term as a
165+
left join context_store_keymap.academic_term as ka on ka.id = a.academic_term_id
163166
where
164167
ka.lms_ext_id is not null
165168
order by id
@@ -170,18 +173,18 @@
170173
"course":
171174
'''
172175
SELECT
173-
cast(co2.lms_int_id as BIGINT) as id,
174-
cast(co2.lms_ext_id as BIGINT) as canvas_id,
175-
cast(at2.lms_int_id as BIGINT) as enrollment_term_id,
176+
cast(co2.lms_int_id as INT64) as id,
177+
cast(co2.lms_ext_id as INT64) as canvas_id,
178+
cast(at2.lms_int_id as INT64) as enrollment_term_id,
176179
co.le_code as name,
177-
co.le_start_date::timestamp without time zone as start_at,
178-
co.le_end_date::timestamp without time zone as conclude_at
180+
TIMESTAMP(co.le_start_date) as start_at,
181+
TIMESTAMP(co.le_end_date) as conclude_at
179182
FROM
180-
entity.course_offering co
181-
LEFT OUTER JOIN entity.academic_term at1 on (co.academic_term_id = at1.academic_term_id),
182-
keymap.course_offering co2,
183-
keymap.academic_term at2
184-
WHERE co2.lms_int_id = ANY(%(course_ids)s)
183+
context_store_entity.course_offering co
184+
LEFT OUTER JOIN context_store_entity.academic_term at1 on (co.academic_term_id = at1.academic_term_id),
185+
context_store_keymap.course_offering co2,
186+
context_store_keymap.academic_term at2
187+
WHERE co2.lms_int_id IN UNNEST(@course_ids)
185188
and co.course_offering_id = co2.id
186189
and at1.academic_term_id = at2.id
187190
''',
@@ -191,29 +194,28 @@
191194
cast(f_km.lms_int_id as BIGINT) as id,
192195
f.status as file_state,
193196
f.display_name as display_name
194-
from entity.file f, keymap.file f_km, keymap.course_offering co_km
197+
from context_store_entity.file f, context_store_keymap.file f_km, context_store_keymap.course_offering co_km
195198
where
196199
f.course_offering_id = co_km.id
197200
and f.file_id = f_km.id
198-
and co_km.lms_int_id = ANY(%(course_ids)s)
201+
and co_km.lms_int_id IN UNNEST(@course_ids)
199202
order by id
200203
''',
201204
"submission":
202205
'''
203-
create temporary table all_assign_sub as (
204206
with enrollment as
205207
(
206208
select
207209
distinct cse.person_id as user_id
208-
from entity.course_section_enrollment cse
209-
left join entity.course_section cs
210+
from context_store_entity.course_section_enrollment cse
211+
left join context_store_entity.course_section cs
210212
on cse.course_section_id = cs.course_section_id
211-
left join keymap.course_offering co
213+
left join context_store_keymap.course_offering co
212214
on cs.le_current_course_offering_id = co.id
213215
where
214-
co.lms_int_id = ANY(:course_ids)
216+
co.lms_int_id in UNNEST(@course_ids)
215217
and cse.role_status ='Enrolled'
216-
and cse."role" = 'Student'
218+
and cse.role = 'Student'
217219
and cse.enrollment_status = 'Active'
218220
),
219221
submission as
@@ -222,7 +224,7 @@
222224
la.status,
223225
la.visibility,
224226
la2.lms_int_id as assignment_id,
225-
cast(co.lms_int_id as BIGINT) as course_id,
227+
co.lms_int_id as course_id,
226228
la.title as assignment_title,
227229
lar.published_score as published_score,
228230
lar.response_date as submitted_at,
@@ -232,22 +234,24 @@
232234
la.title as title,
233235
lar.learner_activity_result_id as learner_activity_result_id,
234236
lar.person_id as short_user_id,
235-
cast(lar2.lms_int_id as BIGINT) as submission_id,
236-
(cast(:canvas_data_id_increment as bigint) + cast(p.lms_ext_id as bigint)) as canvas_user_id
237-
from entity.learner_activity_result lar
237+
lar2.lms_int_id as submission_id,
238+
CAST(@canvas_data_id_increment AS INT64) + CAST(p.lms_ext_id AS INT64) as canvas_user_id
239+
from context_store_entity.learner_activity_result lar
238240
join enrollment on lar.person_id= enrollment.user_id
239241
join enrollment e on lar.person_id = e.user_id
240-
join keymap.learner_activity_result lar2 on lar.learner_activity_result_id = lar2.id
241-
left join entity.learner_activity la on lar.learner_activity_id = la.learner_activity_id
242-
left join keymap.learner_activity la2 on la.learner_activity_id = la2.id
243-
left join keymap.course_offering co on co.id = la.course_offering_id
244-
join keymap.person p on p.id = lar.person_id
242+
join context_store_keymap.learner_activity_result lar2 on lar.learner_activity_result_id = lar2.id
243+
left join context_store_entity.learner_activity la on lar.learner_activity_id = la.learner_activity_id
244+
left join context_store_keymap.learner_activity la2 on la.learner_activity_id = la2.id
245+
left join context_store_keymap.course_offering co on co.id = la.course_offering_id
246+
join context_store_keymap.person p on p.id = lar.person_id
245247
where
246-
co.lms_int_id = ANY(:course_ids)
248+
co.lms_int_id in UNNEST(@course_ids)
247249
and la.status = 'published'
248-
)
250+
),
251+
all_assign_sub as
252+
(
249253
select
250-
cast(submission_id as BIGINT) AS id,
254+
submission_id AS id,
251255
assignment_id AS assignment_id,
252256
course_id,
253257
canvas_user_id,
@@ -264,22 +268,20 @@
264268
submitted_at AS submitted_at,
265269
graded_at AS graded_date,
266270
grade_posted
267-
from
268-
submission
271+
from
272+
submission
273+
order by assignment_id
269274
)
270-
''',
271-
"submission_with_avg_score":
272-
'''
273275
select
274-
f.id::bigint,
275-
f.assignment_id::bigint assignment_id,
276+
f.id,
277+
CAST(f.assignment_id AS INT64) AS assignment_id,
276278
f.course_id,
277-
f.canvas_user_id::bigint as user_id,
278-
f.score::float,
279+
CAST(f.canvas_user_id AS INT64) AS user_id,
280+
CAST(f.score AS FLOAT64) AS score,
279281
f.submitted_at,
280282
f.graded_date,
281283
f.grade_posted,
282-
cast(f1.avg_score as float) as avg_score
284+
CAST(f1.avg_score AS FLOAT64) AS avg_score
283285
from
284286
all_assign_sub f join
285287
(

config/env_sample.hjson

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -185,26 +185,8 @@
185185
# By default this is empty and no views are disabled
186186
# options are as described in course_view_options table column names [\"show_assignment_planning\", \"show_grade_distribution\"]
187187
"VIEWS_DISABLED": "",
188-
# Data Warehouse configuration
189-
# Uncomment these variables and fill them in if you're using cron to load
190-
# from a data warehouse. These are optional
191-
# Database engine driver
192-
"DATA_WAREHOUSE": {
193-
"ENGINE": "django.db.backends.postgresql",
194-
# database name
195-
"NAME": "",
196-
# database user
197-
"USER": "",
198-
# database password
199-
"PASSWORD": "",
200-
# database host
201-
"HOST": "",
202-
# database port
203-
"PORT": 5432,
204-
# Enable/Disable Unizin Date Warehouse specific features/data
205-
"IS_UNIZIN": true
206-
},
207-
# Learning Record Store configuration
188+
# Data Warehoue and Learning Record Store configuration
189+
# The warehouse and LRS are combined now in the same data source
208190
"LRS": {
209191
# LRS database engine driver (use `google.cloud.bigquery` for bigquery). no other LRS settings needed
210192
"ENGINE": "google.cloud.bigquery",

0 commit comments

Comments
 (0)