forked from viodotcom/analytics-engineer-assignment
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path06-daily-ticket-size.sql
152 lines (102 loc) · 3.14 KB
/
06-daily-ticket-size.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
with
-- import CTEs
source_events as (
select * from event_clean
),
source_daily_sales as (
select * from daily_sales
),
target_table as (
select
datetime(coalesce(max(date), '1900-01-01')) as latest_date
from daily_ticket_size
),
-- logical CTEs
tickets as (
select
distinct
date(event_time) as date,
sum(price) over (partition by date(event_time), user_session) as ticket_size
from source_events
where event_type = 'purchase'
),
items as (
select
date,
count(*) as total_number
from tickets
group by date
),
tickets_ranks as (
select
date,
ticket_size,
dense_rank() over (partition by date order by ticket_size) as ticket_rank
from tickets
),
percentiles as (
select
date,
cast(round(0.25 * (total_number + 1), 0) as integer) as percentile_rank_25,
cast(round(0.50 * (total_number + 1), 0) as integer) as percentile_rank_50,
cast(round(0.75 * (total_number + 1), 0) as integer) as percentile_rank_75
from items
),
percentile_tickets as (
select
percentiles.date,
perc_25.ticket_size as perc_ticket_25th,
perc_50.ticket_size as perc_ticket_50th,
perc_75.ticket_size as perc_ticket_75th
from percentiles
inner join tickets_ranks as perc_25 on percentiles.date = perc_25.date
and percentiles.percentile_rank_25 = perc_25.ticket_rank
inner join tickets_ranks as perc_50 on percentiles.date = perc_50.date
and percentiles.percentile_rank_50 = perc_50.ticket_rank
inner join tickets_ranks as perc_75 on percentiles.date = perc_75.date
and percentiles.percentile_rank_75 = perc_75.ticket_rank
),
agg_tickets as (
select
date,
min(ticket_size) as min_ticket,
max(ticket_size) as max_ticket
from tickets
group by date
),
-- final CTE
final as (
select
source_daily_sales.date,
source_daily_sales.total_sales,
printf("%.2d", agg_tickets.min_ticket) as min_ticket,
printf("%.2f", percentile_tickets.perc_ticket_25th) as perc_ticket_25th,
printf("%.2f", percentile_tickets.perc_ticket_50th) as perc_ticket_50th,
printf("%.2f", percentile_tickets.perc_ticket_75th) as perc_ticket_75th,
printf("%.2d", agg_tickets.max_ticket) as max_ticket
from source_daily_sales
inner join agg_tickets on source_daily_sales.date = agg_tickets.date
inner join percentile_tickets on agg_tickets.date = percentile_tickets.date
where source_daily_sales.date > (
select latest_date from target_table
)
)
-- simple insert from select statement
insert into daily_ticket_size (
date,
total_sales,
min_ticket,
perc_ticket_25th,
perc_ticket_50th,
perc_ticket_75th,
max_ticket
)
select
date,
total_sales,
min_ticket,
perc_ticket_25th,
perc_ticket_50th,
perc_ticket_75th,
max_ticket
from final;