Skip to content

Commit 6d1c2eb

Browse files
dralleyggainey
authored andcommitted
Made sync more tolerant of poorly configured webservers
Treeinfo download will ignore results that look like HTML. Some webservers return 200 with an HTML error page rather than 404. closes #3599 (cherry picked from commit 298f3a9)
1 parent 0b82c47 commit 6d1c2eb

File tree

3 files changed

+19
-5
lines changed

3 files changed

+19
-5
lines changed

CHANGES/3599.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Made sync more tolerant of poorly configured webservers.

pulp_rpm/app/kickstart/treeinfo.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@ class PulpTreeInfo(TreeInfo):
1717
1818
"""
1919

20-
def load(self, f):
20+
def loads(self, s):
2121
"""
22-
Load data from a file.
22+
Load data from a string.
2323
2424
"""
2525
try:
26-
super().load(f)
26+
super().loads(s)
2727
except MissingSectionHeaderError:
2828
raise TypeError(_("Treeinfo file should have INI format"))
2929

pulp_rpm/app/tasks/synchronizing.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -404,8 +404,9 @@ def get_treeinfo_data(remote, remote_url):
404404

405405
namespaces = [".treeinfo", "treeinfo"]
406406
for namespace in namespaces:
407+
treeinfo_url = urlpath_sanitize(remote_url, namespace)
407408
downloader = remote.get_downloader(
408-
url=urlpath_sanitize(remote_url, namespace),
409+
url=treeinfo_url,
409410
silence_errors_for_response_status_codes={403, 404},
410411
)
411412

@@ -415,7 +416,19 @@ def get_treeinfo_data(remote, remote_url):
415416
continue
416417

417418
treeinfo = PulpTreeInfo()
418-
treeinfo.load(f=result.path)
419+
with open(result.path, "r") as f:
420+
treeinfo_str = f.read()
421+
# some impolitely configured webservers return HTTP 200 with an HTML error page
422+
# when a resource isn't found, instead of returning an HTTP 404 code
423+
if treeinfo_str.startswith("<"):
424+
# in the event that the response looks like HTML rather than an INI file,
425+
# let's just pretend it returned 404
426+
log.debug(
427+
f"Server returned 200 for {treeinfo_url}, but the result looks like HTML"
428+
" rather than treeinfo. Ignoring it."
429+
)
430+
continue
431+
treeinfo.loads(treeinfo_str)
419432
sha256 = result.artifact_attributes["sha256"]
420433
treeinfo_data = TreeinfoData(treeinfo.parsed_sections())
421434

0 commit comments

Comments
 (0)