Skip to content

Commit 5587d5b

Browse files
committed
**Changed**: Replace the deprecated urllib.request.urlretrieve because it is legacy.
1 parent f13deda commit 5587d5b

File tree

15 files changed

+57
-64
lines changed

15 files changed

+57
-64
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
## Releases
22

3+
- **Changed**: Replace the deprecated `urllib.request.urlretrieve` because it is legacy.
4+
35
### v1.3.17 (2025-09-12)
46

57
- **Added:** The `load_archive` function now supports loading data directly from a URL.

jsonl.py

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import fnmatch
1919
import functools
2020
import gzip
21+
import io
2122
import json
2223
import logging
2324
import lzma
@@ -138,16 +139,22 @@ def _del_archive_extension(path, /):
138139
return os.path.normpath(arcpath)
139140

140141

141-
def _iterfind_zip_members(filename, pattern, pwd, /):
142-
with zipfile.ZipFile(filename) as zf:
142+
def _iterfind_zip_members(name_or_obj, pattern, pwd, /):
143+
with zipfile.ZipFile(name_or_obj) as zf:
143144
for name in fnmatch.filter(zf.namelist(), pattern):
144145
file = zf.open(name, pwd=pwd)
145146
with file:
146147
yield file
147148

148149

149-
def _iterfind_tar_members(filename, pattern, /):
150-
with tarfile.open(filename) as archive:
150+
def _iterfind_tar_members(name_or_obj, pattern, /):
151+
args, kwargs = (), {}
152+
if isinstance(name_or_obj, io.BytesIO):
153+
name_or_obj.seek(0) # Ensure the pointer is at the start
154+
kwargs = {"fileobj": name_or_obj, "mode": "r:*"}
155+
else:
156+
args = (name_or_obj,)
157+
with tarfile.open(*args, **kwargs) as archive:
151158
for name in fnmatch.filter(archive.getnames(), pattern):
152159
if file := archive.extractfile(name):
153160
with file:
@@ -288,14 +295,17 @@ def load(source, /, *, opener=None, broken=False, json_loads=None, **json_loads_
288295
:rtype: Iterable[Any]
289296
"""
290297

291-
# If a URL or Request object is provided, download the file first.
298+
# URL or Request object handling
292299
if _looks_like_url(source):
293300
if opener is not None:
294301
raise ValueError("Custom opener is not supported for URLs or Request objects.")
295-
source, _ = urllib.request.urlretrieve(source)
296-
302+
with urllib.request.urlopen(source) as fd:
303+
charset = fd.headers.get_content_charset(failobj=_utf_8)
304+
# Wrap the file descriptor to handle text encoding.
305+
stream = io.TextIOWrapper(fd, encoding=charset)
306+
yield from loader(stream, broken, json_loads=json_loads, **json_loads_kwargs)
297307
# Filename handling
298-
if isinstance(source, (str, os.PathLike)):
308+
elif isinstance(source, (str, os.PathLike)):
299309
filename = source if isinstance(source, str) else os.fspath(source) # Ensure it's a string path
300310
openhook = opener or _xopen
301311
with openhook(filename, mode="rb", encoding=None) as fd:
@@ -341,14 +351,15 @@ def load_archive(
341351
if _looks_like_url(file):
342352
if opener is not None:
343353
raise ValueError("Custom opener is not supported for URLs or Request objects.")
344-
file, _ = urllib.request.urlretrieve(file)
354+
with urllib.request.urlopen(file) as file:
355+
file = io.BytesIO(file.read()) # noqa: PLW2901
345356

346357
if zipfile.is_zipfile(file):
347358
members = _iterfind_zip_members(file, pattern, pwd)
348359
elif tarfile.is_tarfile(file):
349360
members = _iterfind_tar_members(file, pattern)
350361
else:
351-
raise ValueError(f"Unsupported archive format: {file}")
362+
raise ValueError("Unsupported archive format")
352363

353364
for member in members:
354365
filename = member.name
@@ -358,15 +369,15 @@ def load_archive(
358369

359370

360371
def dump_archive(
361-
path,
362-
data,
363-
/,
364-
*,
365-
opener=None,
366-
text_mode=True,
367-
dump_if_empty=True,
368-
json_dumps=None,
369-
**json_dumps_kwargs,
372+
path,
373+
data,
374+
/,
375+
*,
376+
opener=None,
377+
text_mode=True,
378+
dump_if_empty=True,
379+
json_dumps=None,
380+
**json_dumps_kwargs,
370381
):
371382
"""
372383
Dump multiple JSON Lines items into an archive file (zip or tar) with the specified path.

tests/acceptance/__init__.py

Whitespace-only changes.

tests/acceptance/tests.py

Lines changed: 0 additions & 20 deletions
This file was deleted.

tests/conftest.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,23 +35,23 @@ def log_message(self, fmt, *args): # pragma: no cover
3535
functools.partial(MyHandler, directory=os.path.abspath(directory)),
3636
)
3737
name, port = server.socket.getsockname()
38-
uri = "http://{}:{}/".format(name, port)
38+
url = "http://{}:{}/".format(name, port)
3939

4040
server_thread = threading.Thread(target=server.serve_forever, name="http_server")
4141
server_thread.start()
4242

4343
try:
4444
with server.socket:
45-
yield uri
45+
yield url
4646
finally:
4747
server.shutdown()
4848
server_thread.join()
4949

5050

5151
@pytest.fixture(scope="session")
52-
def http_server_uri():
53-
with manage_http_server(DATA_DIR) as uri:
54-
yield uri
52+
def http_server():
53+
with manage_http_server(DATA_DIR) as url:
54+
yield url
5555

5656

5757
@pytest.fixture(scope="package", params=(True, False))
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# -*- coding: utf-8 -*-
2+
23
import contextlib
34
import io
45
import json

0 commit comments

Comments
 (0)