Skip to content

Commit 9e0fce7

Browse files
ROB: Fixing infinite loop in ArrayObject read_from_stream (py-pdf#2928)
Fixing infinite loop in ArrayObject read_from_stream if stream ends unexpectedly.
1 parent 9f647e6 commit 9e0fce7

File tree

2 files changed

+14
-0
lines changed

2 files changed

+14
-0
lines changed

pypdf/generic/_data_structures.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,8 @@ def read_from_stream(
251251
tok = stream.read(1)
252252
while tok.isspace():
253253
tok = stream.read(1)
254+
if tok == b"":
255+
break
254256
if tok == b"%":
255257
stream.seek(-1, 1)
256258
skip_over_comment(stream)

tests/test_text_extraction.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,3 +260,15 @@ def test_layout_mode_space_vertically_font_height_weight():
260260
"PDF extracted text differs from expected value.\n\n"
261261
"Expected:\n\n%r\n\nExtracted:\n\n%r\n\n" % (pdftext, text)
262262
)
263+
264+
265+
@pytest.mark.enable_socket
266+
def test_infinite_loop_arrays():
267+
"""Tests for #2928"""
268+
url = "https://github.com/user-attachments/files/17576546/arrayabruptending.pdf"
269+
name = "arrayabruptending.pdf"
270+
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
271+
272+
page = reader.pages[0]
273+
extracted = page.extract_text()
274+
assert "RNA structure comparison" in extracted

0 commit comments

Comments
 (0)