Skip to content

Commit 576d9dd

Browse files
committed
Fix various issues with broken link checking
1 parent b31268c commit 576d9dd

File tree

2 files changed

+41
-24
lines changed

2 files changed

+41
-24
lines changed

app/tasks/pkgtasks.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
import datetime
1818
import re
1919
import sys
20+
from time import sleep
21+
from urllib.parse import urlparse
2022
from typing import Optional
2123

2224
import requests
@@ -76,13 +78,7 @@ def notify_about_git_forum_links():
7678
"package will improve the user experience.\n\nHere are some URLs you might wish to replace:\n"
7779

7880
for x in links:
79-
line = f"\n* {x[1].replace('%', '')} -> {x[0].get_url('packages.view', absolute=True)}"
80-
line_added = msg + line
81-
if len(line_added) > 2000 - 150:
82-
post_bot_message(package, title, msg)
83-
msg = f"(...continued)\n{line}"
84-
else:
85-
msg = line_added
81+
msg += f"\n* {x[1].replace('%', '')} -> {x[0].get_url('packages.view', absolute=True)}"
8682

8783
post_bot_message(package, title, msg)
8884

@@ -111,11 +107,14 @@ def clear_removed_packages(all_packages: bool):
111107

112108
def _url_exists(url: str) -> str:
113109
try:
114-
with requests.get(url, stream=True, timeout=10) as response:
110+
headers = {
111+
"User-Agent": "Mozilla/5.0 (compatible; ContentDB link checker; +https://content.minetest.net/)",
112+
}
113+
with requests.get(url, stream=True, headers=headers, timeout=10) as response:
115114
response.raise_for_status()
116115
return ""
117116
except requests.exceptions.HTTPError as e:
118-
print(f" - [{e.response.status_code}] {url}", file=sys.stderr)
117+
print(f" - [{e.response.status_code}] <{url}>", file=sys.stderr)
119118
return str(e.response.status_code)
120119
except requests.exceptions.ConnectionError:
121120
return "ConnectionError"
@@ -143,10 +142,16 @@ def _check_for_dead_links(package: Package) -> dict[str, str]:
143142
if link is None:
144143
continue
145144

145+
url = urlparse(link)
146+
if url.scheme != "http" and url.scheme != "https":
147+
continue
148+
146149
res = _url_exists(link)
147150
if res != "":
148151
bad_urls[link] = res
149152

153+
sleep(0.5)
154+
150155
return bad_urls
151156

152157

app/utils/models.py

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,29 @@ def add_system_audit_log(severity: AuditSeverity, title: str, url: str, package=
120120
return add_audit_log(severity, get_system_user(), title, url, package, description)
121121

122122

123+
def add_replies(thread: Thread, user: User, message: str, continuation: str = "(continued)\n\n", is_status_update=False):
124+
is_first = True
125+
while message != "":
126+
if len(message) > 1900:
127+
idx = message[:1900].rfind("\n")
128+
this_reply = message[:idx] + "\n\n…"
129+
message = message[idx:]
130+
else:
131+
this_reply = message
132+
message = ""
133+
134+
reply = ThreadReply()
135+
reply.thread = thread
136+
reply.author = user
137+
reply.is_status_update = is_status_update
138+
if is_first:
139+
reply.comment = this_reply
140+
else:
141+
reply.comment = f"{continuation}{this_reply}"
142+
thread.replies.append(reply)
143+
is_first = False
144+
145+
123146
def post_bot_message(package: Package, title: str, message: str, session=None):
124147
if session is None:
125148
session = db.session
@@ -137,16 +160,12 @@ def post_bot_message(package: Package, title: str, message: str, session=None):
137160
session.add(thread)
138161
session.flush()
139162

140-
reply = ThreadReply()
141-
reply.thread = thread
142-
reply.author = system_user
143-
reply.comment = "**{}**\n\n{}\n\nThis is an automated message, but you can reply if you need help".format(title, message)
144-
session.add(reply)
163+
add_replies(thread, system_user,
164+
f"**{title}**\n\n{message}\n\nThis is an automated message, but you can reply if you need help",
165+
continuation=f"(continued)\n\n**{title}**\n\n")
145166

146167
add_notification(thread.watchers, system_user, NotificationType.BOT, title, thread.get_view_url(), thread.package, session)
147168

148-
thread.replies.append(reply)
149-
150169

151170
def post_to_approval_thread(package: Package, user: User, message: str, is_status_update=True, create_thread=False):
152171
thread = package.review_thread
@@ -163,12 +182,7 @@ def post_to_approval_thread(package: Package, user: User, message: str, is_statu
163182
else:
164183
return
165184

166-
reply = ThreadReply()
167-
reply.thread = thread
168-
reply.author = user
169-
reply.is_status_update = is_status_update
170-
reply.comment = message
171-
db.session.add(reply)
185+
add_replies(thread, user, message, is_status_update=is_status_update)
172186

173187
if is_status_update:
174188
msg = f"{message} - {thread.title}"
@@ -177,8 +191,6 @@ def post_to_approval_thread(package: Package, user: User, message: str, is_statu
177191

178192
add_notification(thread.watchers, user, NotificationType.THREAD_REPLY, msg, thread.get_view_url(), package)
179193

180-
thread.replies.append(reply)
181-
182194

183195
def get_games_from_csv(session: sqlalchemy.orm.Session, csv: str) -> List[Package]:
184196
return get_games_from_list(session, [name.strip() for name in csv.split(",")])

0 commit comments

Comments
 (0)