-
Notifications
You must be signed in to change notification settings - Fork 42
Expand file tree
/
Copy pathv_0_1_x.py
More file actions
573 lines (503 loc) · 23.6 KB
/
v_0_1_x.py
File metadata and controls
573 lines (503 loc) · 23.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
# Migration code for upgrading from a 0.1.x SOGS database. The database structure is completely
# changed, and so this is technically more like an import than a migration.
import os
import logging
import time
from .exc import DatabaseUpgradeRequired
def migrate(conn, *, check_only):
n_rooms = conn.execute("SELECT COUNT(*) FROM rooms").first()[0]
# Migration from a v0.1.x database:
if n_rooms > 0 or not os.path.exists("database.db"):
return False
logging.warning("No rooms found, but database.db exists; attempting migration")
if check_only:
raise DatabaseUpgradeRequired("v0.1.x import")
try:
import_from_0_1_x(conn)
except Exception:
logging.critical(
"database.db exists but migration failed! Please report this bug!\n\n"
"If no migration from 0.1.x is needed then rename or delete database.db to "
"start up with a fresh (new) database.\n\n"
)
raise
return True
def sqlite_connect_readonly(path):
import sqlite3
conn = sqlite3.connect(f"file:{path}?mode=ro", uri=True, isolation_level=None)
conn.row_factory = sqlite3.Row
return conn
def import_from_0_1_x(conn):
from .. import config, db, utils
# Old database database.db is a single table database containing just the list of rooms:
# CREATE TABLE IF NOT EXISTS main (
# id TEXT PRIMARY KEY, -- now called token
# name TEXT,
# image_id TEXT -- entirely unused.
# )
with sqlite_connect_readonly("database.db") as main_db:
rooms = [(r[0], r[1]) for r in main_db.execute("SELECT id, name FROM main")]
logging.warning(f"{len(rooms)} rooms to import")
db.query(
"""
CREATE TABLE IF NOT EXISTS room_import_hacks (
room BIGINT PRIMARY KEY NOT NULL REFERENCES rooms(id) ON DELETE CASCADE,
old_message_id_max BIGINT NOT NULL,
message_id_offset BIGINT NOT NULL
)
""",
dbconn=conn,
)
db.query(
"""
CREATE TABLE IF NOT EXISTS file_id_hacks (
room BIGINT NOT NULL REFERENCES rooms(id) ON DELETE CASCADE,
old_file_id BIGINT NOT NULL,
file BIGINT NOT NULL REFERENCES files(id) ON DELETE CASCADE,
PRIMARY KEY(room, old_file_id)
)
""",
dbconn=conn,
)
used_room_hacks, used_file_hacks = False, False
def ins_user(session_id):
if not db.query(
"SELECT COUNT(*) FROM users WHERE session_id = :session_id",
dbconn=conn,
session_id=session_id,
).first()[0]:
db.query(
"INSERT INTO users (session_id, last_active) VALUES (:session_id, 0.0)",
session_id=session_id,
dbconn=conn,
)
total_rooms, total_msgs, total_files = 0, 0, 0
for room_token, room_name in rooms:
room_db_path = f"rooms/{room_token}.db"
if not os.path.exists(room_db_path):
logging.warning(f"Skipping room {room_token}: database {room_db_path} does not exist")
continue
logging.info(f"Importing room {room_token} -- {room_name}...")
room_id = db.insert_and_get_pk(
"INSERT INTO rooms (token, name) VALUES (:token, :name)",
"id",
token=room_token,
name=room_name,
dbconn=conn,
)
with sqlite_connect_readonly(room_db_path) as rconn:
# Messages were stored in this:
#
# CREATE TABLE IF NOT EXISTS messages (
# id INTEGER PRIMARY KEY,
# public_key TEXT,
# timestamp INTEGER,
# data TEXT,
# signature TEXT,
# is_deleted INTEGER
# );
#
# where public_key is the session_id (in hex), timestamp is in milliseconds since
# unix epoch, data and signature are in base64 (wtf), data is typically padded from
# the client (i.e. to the next multiple, with lots of 0s on the end). If the
# message was deleted then it remains here but `is_deleted` is set to 1 (data and
# signature should be NULL as well, but older versions apparently didn't do that and
# migrations never fixed it), plus we have a row in here:
#
# CREATE TABLE IF NOT EXISTS deleted_messages (
# id INTEGER PRIMARY KEY,
# deleted_message_id INTEGER
# );
#
# where the `id` of this table is returned to the Session client so that they can
# query for "deletions since [id]".
#
# This introduces some major complications, though: Session message polling works by
# requesting messages (and deletions) since a given id, but we can't preserve IDs
# because there are guaranteed to be duplicates across rooms. So we use this
# room_import_hacks defined above to figure this out:
#
# - if requesting messages in a room since some id <= old_message_id_max then we
# actually query messages in the room since id + message_id_offset.
#
# Deletions doesn't have the same complication because in the new database they use
# a monotonic message_sequence field that we can make conform (for imported rows) to
# the imported deletion ids.
id_offset = db.query(
"SELECT COALESCE( MAX(id), 0 ) + 1 FROM messages", dbconn=conn
).first()[0]
top_old_id, seqno, imported_msgs = -1, 0, 0
n_msgs = rconn.execute("SELECT COUNT(*) FROM messages").fetchone()[0]
last_id, dupe_dels = -1, 0
for id, session_id, timestamp, data, signature, deleted in rconn.execute(
"""
SELECT messages.id, public_key AS session_id, timestamp, data, signature,
CASE WHEN is_deleted THEN deleted_messages.id ELSE NULL END AS deleted
FROM messages LEFT JOIN deleted_messages
ON messages.id = deleted_messages.deleted_message_id
ORDER BY messages.id
"""
):
if top_old_id == -1:
id_offset -= id
if id > top_old_id:
top_old_id = id
if id == last_id:
# There are duplicates in the deleted_messages table (WTF) that can give us
# multiple rows through the join, so skip duplicates if they occur.
dupe_dels += 1
continue
else:
last_id = id
# NB: the old database cleared the session ID when deleting a message (which is
# bad, because no auditability at all), but also cleared it by setting it to the
# fixed string 'deleted' because I guess the author didn't know NULL was a
# thing? We import them as such because our session_ids *can't* be null (and we
# no longer clear it when deleting), but the data is gone from the imported
# table so there's not much else we can do.
ins_user(session_id)
if config.IMPORT_ADJUST_MS:
timestamp += config.IMPORT_ADJUST_MS
# Timestamp is in unix epoch *milliseconds* for some non-standard reason.
timestamp /= 1000.0
if data is not None and signature is not None and deleted is None:
# Regular message
# Data was pointlessly store padded *and* base64 encoded, so decode and
# unpad it:
data = utils.decode_base64(data)
data_size = len(data)
data = utils.remove_session_message_padding(data)
# Signature was just base64 encoded:
signature = utils.decode_base64(signature)
if len(signature) != 64:
raise RuntimeError(
f"Unexpected data: {room_db_path} message id={id} "
"has invalid signature"
)
db.query(
"""
INSERT INTO messages
(id, room, "user", posted, data, data_size, signature)
VALUES (:m, :r, (SELECT id FROM users WHERE session_id = :session_id),
:posted, :data, :data_size, :signature)
""",
m=id + id_offset,
r=room_id,
session_id=session_id,
posted=timestamp,
data=data,
data_size=data_size,
signature=signature,
dbconn=conn,
)
elif (
deleted is not None
# Deleted messages are usually set to the fixed string "deleted" (why not
# NULL?) for data and signature, so accept either null or that string if the
# other columns indicate a deleted message.
and data in (None, "deleted")
and signature in (None, "deleted")
):
# Deleted message; we still need to insert a tombstone for it, and copy the
# deletion id as the "seqno" field. (We do this with a second query
# because the first query is going to trigger an automatic update of the
# field).
seqno += 1
db.query(
"""
INSERT INTO messages (id, room, "user", posted)
VALUES (:m, :r, (SELECT id FROM users WHERE session_id = :session_id),
:posted)
""",
m=id + id_offset,
r=room_id,
session_id=session_id,
posted=timestamp,
dbconn=conn,
)
else:
raise RuntimeError(
"Inconsistent message in {} database: message id={} has inconsistent "
"deletion state (data: {}, signature: {}, del row: {})".format(
room_db_path,
id,
data is not None,
signature is not None,
deleted is not None,
)
)
db.query(
"UPDATE messages SET seqno = :s, seqno_creation = 0, seqno_data = :s"
" WHERE id = :m",
s=seqno,
m=id + id_offset,
dbconn=conn,
)
imported_msgs += 1
if imported_msgs % 5000 == 0:
logging.info(f"- ... imported {imported_msgs}/{n_msgs} messages")
logging.info(
f"- migrated {imported_msgs} messages, {dupe_dels} duplicate deletions ignored"
)
# Old SOGS has a bug where it inserts duplicate deletion tombstones (see above), but
# this means that our seqno count might not be large enough for existing Session
# clients to not break: they will be fetching deletion ids > X, but if we have 100
# duplicates, the room's update counter would be X-100 and so existing clients
# wouldn't actually fetch any new deletions until the counter catches up. Fix that
# up by incrementing the message_sequence counter if necessary.
top_del_id = rconn.execute("SELECT MAX(id) FROM deleted_messages").fetchone()[0]
if top_del_id is None:
top_del_id = 0
db.query(
"UPDATE rooms SET message_sequence = :ms WHERE id = :r",
ms=max(seqno, top_del_id),
r=room_id,
dbconn=conn,
)
# If we have to offset rowids then make sure the hack table exists and insert our
# hack.
if id_offset != 0:
used_room_hacks = True
db.query(
"""
INSERT INTO room_import_hacks (room, old_message_id_max, message_id_offset)
VALUES (:r, :old_max, :offset)
""",
r=room_id,
old_max=top_old_id,
offset=id_offset,
dbconn=conn,
)
# Files were stored in:
#
# CREATE TABLE files (
# id TEXT PRIMARY KEY,
# timestamp INTEGER
# );
#
# where `id` is a randomized integer value, but stored in a TEXT because yeah. (The
# randomization was completely pointless: it was there for the file server mode,
# which we don't care about). `timestamp` is milliseconds rather than seconds
# because yeah.
#
# The actual file on disk is stored in ./files/{ROOM}_files/{id}. We leave it there
# (rather than moving it to the new location) as it isn't a big deal to let it rest
# there until expiry.
#
# Since we have to preserve the old random IDs for existing Session clients (until
# they expire), we need to track old_id -> new_id, and we do that using the
# file_id_hacks table that we created above. If this table exists then attempting
# to download a file with an id that doesn't exist does a second check into the
# hacks table to see if it exists in the mapping.
imported_files = 0
n_files = rconn.execute("SELECT COUNT(*) FROM files").fetchone()[0]
for file_id, timestamp in rconn.execute("SELECT id, timestamp FROM files"):
# file_id is an integer value but stored in a TEXT field, of course.
file_id = int(file_id)
path = f"files/{room_token}_files/{file_id}"
try:
size = os.path.getsize(path)
except Exception as e:
logging.warning(
f"Error accessing file {path} ({e}); skipping import of this upload"
)
continue
if timestamp > 10000000000:
logging.warning(
f"- file {path} has nonsensical timestamp {timestamp}; "
"importing it with current time"
)
timestamp = time.time()
exp = None
if config.UPLOAD_DEFAULT_EXPIRY:
exp = timestamp + config.UPLOAD_DEFAULT_EXPIRY
new_id = db.insert_and_get_pk(
"""
INSERT INTO files (room, size, uploaded, expiry, path)
VALUES (:r, :size, :uploaded, :expiry, :path)
""",
"id",
r=room_id,
size=size,
uploaded=timestamp,
expiry=exp,
path=path,
dbconn=conn,
)
db.query(
"""
INSERT INTO file_id_hacks (room, old_file_id, file)
VALUES (:r, :old, :new)
""",
r=room_id,
old=file_id,
new=new_id,
dbconn=conn,
)
imported_files += 1
if imported_files % 1000 == 0:
logging.info(f"- ... imported {imported_files}/{n_files} files")
if imported_files > 0:
used_file_hacks = True
logging.info(f"- migrated {imported_files} files")
# There's also a potential room image, which is just stored on disk and not
# referenced in the database at all because why bother with proper structure when
# you can just do random stuff.
#
# Unlike the regular files (which will expire in 15 days) this one doesn't expire,
# so we hard link it into the new uploads directory so that (after 15 days) the old
# dirs can be cleared out without deleting it. (There is a potential that the old
# one remains if the room image gets replaced, but since this is one image per room,
# a few temporarily forgotten room images left around isn't a big deal).
#
# (Unlike regular attachments we don't need a file hack row because the room image
# is not reference by id from existing clients.)
room_image_path = "files/" + room_token
old_stat = None
try:
old_stat = os.stat(room_image_path, follow_symlinks=False)
except Exception:
pass
if old_stat is not None:
files_dir = "uploads/" + room_token
os.makedirs(files_dir, exist_ok=True)
file_id = db.insert_and_get_pk(
"""
INSERT INTO files (room, size, uploaded, expiry, path)
VALUES (:r, :size, :uploaded, NULL, :path)
""",
"id",
r=room_id,
size=os.path.getsize(room_image_path),
uploaded=os.path.getmtime(room_image_path),
path='tmp',
dbconn=conn,
)
new_path = f"uploads/{room_token}/{file_id}_(imported_room_image)"
if os.path.exists(new_path):
os.remove(new_path)
os.link(room_image_path, new_path)
db.query(
"UPDATE files SET path = :p WHERE id = :f", p=new_path, f=file_id, dbconn=conn
)
db.query(
"UPDATE rooms SET image = :f WHERE id = :r", f=file_id, r=room_id, dbconn=conn
)
logging.info("- migrated room image")
else:
logging.info("- no room image")
# Banned users. These are just dumped in a table called "block_list" with just a
# "public_key" TEXT field containing the session id.
imported_bans = 0
for (session_id,) in rconn.execute("SELECT public_key FROM block_list"):
ins_user(session_id)
db.query(
"""
INSERT INTO user_permission_overrides (room, "user", banned)
VALUES (:r, (SELECT id FROM users WHERE session_id = :session_id), TRUE)
ON CONFLICT (room, "user") DO UPDATE SET banned = TRUE
""",
r=room_id,
session_id=session_id,
dbconn=conn,
)
imported_bans += 1
# Moderators. Since the older version didn't have the concept of moderators and
# admins, old moderators had all the permissions that new admins have, so import
# them all as admins.
imported_mods = 0
for (session_id,) in rconn.execute("SELECT public_key from moderators"):
ins_user(session_id)
db.query(
"""
INSERT INTO user_permission_overrides
(room, "user", read, write, upload, moderator, admin)
VALUES (:r, (SELECT id FROM users WHERE session_id = :session_id),
TRUE, TRUE, TRUE, TRUE, TRUE)
ON CONFLICT (room, "user") DO UPDATE SET banned = FALSE,
read = TRUE, write = TRUE, upload = TRUE, moderator = TRUE, admin = TRUE
""",
r=room_id,
session_id=session_id,
dbconn=conn,
)
imported_mods += 1
# User activity. For some reason, unlike all the other timestamps in the database,
# the timestamp here is stored in milliseconds.
imported_activity, imported_active = 0, 0
# Don't import rows we're going to immediately prune:
import_cutoff = time.time() - config.ROOM_ACTIVE_PRUNE_THRESHOLD
active_cutoff = time.time() - config.ROOM_DEFAULT_ACTIVE_THRESHOLD
n_activity = rconn.execute(
"SELECT COUNT(*) FROM user_activity WHERE last_active > ?", (import_cutoff,)
).fetchone()[0]
for session_id, last_active in rconn.execute(
"""
SELECT public_key, last_active
FROM user_activity
WHERE last_active > ? AND public_key IS NOT NULL
""",
(import_cutoff,),
):
ins_user(session_id)
db.query(
"""
INSERT INTO room_users (room, "user", last_active)
VALUES (:r, (SELECT id FROM users WHERE session_id = :session_id),
:active)
ON CONFLICT (room, "user") DO UPDATE
SET last_active = excluded.last_active
WHERE excluded.last_active > room_users.last_active
""",
r=room_id,
session_id=session_id,
active=last_active,
dbconn=conn,
)
db.query(
"""
UPDATE users
SET last_active = :active
WHERE session_id = :session_id AND last_active < :active
""",
active=last_active,
session_id=session_id,
dbconn=conn,
)
if last_active >= active_cutoff:
imported_active += 1
imported_activity += 1
if imported_activity % 5000 == 0:
logging.info(
"- ... imported {}/{} user activity records ({} active)".format(
imported_activity, n_activity, imported_active
)
)
logging.warning(
"Imported room {}: "
"{} messages, {} files, {} moderators, {} bans, {} users ({} active)".format(
room_token,
imported_msgs,
imported_files,
imported_mods,
imported_bans,
imported_activity,
imported_active,
)
)
total_msgs += imported_msgs
total_files += imported_files
total_rooms += 1
if not used_room_hacks:
db.query("DROP TABLE room_import_hacks", dbconn=conn)
if not used_file_hacks:
db.query("DROP TABLE file_id_hacks", dbconn=conn)
logging.warning(
"Import finished! Imported {} messages/{} files in {} rooms".format(
total_msgs, total_files, total_rooms
)
)
try:
os.rename("database.db", "old-database.db")
except Exception as e:
logging.warning(f"Failed to rename database.db -> old-database.db: {e}")