1818import fnmatch
1919import functools
2020import gzip
21+ import io
2122import json
2223import logging
2324import lzma
@@ -138,16 +139,22 @@ def _del_archive_extension(path, /):
138139 return os .path .normpath (arcpath )
139140
140141
141- def _iterfind_zip_members (filename , pattern , pwd , / ):
142- with zipfile .ZipFile (filename ) as zf :
142+ def _iterfind_zip_members (name_or_obj , pattern , pwd , / ):
143+ with zipfile .ZipFile (name_or_obj ) as zf :
143144 for name in fnmatch .filter (zf .namelist (), pattern ):
144145 file = zf .open (name , pwd = pwd )
145146 with file :
146147 yield file
147148
148149
149- def _iterfind_tar_members (filename , pattern , / ):
150- with tarfile .open (filename ) as archive :
150+ def _iterfind_tar_members (name_or_obj , pattern , / ):
151+ args , kwargs = (), {}
152+ if isinstance (name_or_obj , io .BytesIO ):
153+ name_or_obj .seek (0 ) # Ensure the pointer is at the start
154+ kwargs = {"fileobj" : name_or_obj , "mode" : "r:*" }
155+ else :
156+ args = (name_or_obj ,)
157+ with tarfile .open (* args , ** kwargs ) as archive :
151158 for name in fnmatch .filter (archive .getnames (), pattern ):
152159 if file := archive .extractfile (name ):
153160 with file :
@@ -288,14 +295,17 @@ def load(source, /, *, opener=None, broken=False, json_loads=None, **json_loads_
288295 :rtype: Iterable[Any]
289296 """
290297
291- # If a URL or Request object is provided, download the file first.
298+ # URL or Request object handling
292299 if _looks_like_url (source ):
293300 if opener is not None :
294301 raise ValueError ("Custom opener is not supported for URLs or Request objects." )
295- source , _ = urllib .request .urlretrieve (source )
296-
302+ with urllib .request .urlopen (source ) as fd :
303+ charset = fd .headers .get_content_charset (failobj = _utf_8 )
304+ # Wrap the file descriptor to handle text encoding.
305+ stream = io .TextIOWrapper (fd , encoding = charset )
306+ yield from loader (stream , broken , json_loads = json_loads , ** json_loads_kwargs )
297307 # Filename handling
298- if isinstance (source , (str , os .PathLike )):
308+ elif isinstance (source , (str , os .PathLike )):
299309 filename = source if isinstance (source , str ) else os .fspath (source ) # Ensure it's a string path
300310 openhook = opener or _xopen
301311 with openhook (filename , mode = "rb" , encoding = None ) as fd :
@@ -341,14 +351,15 @@ def load_archive(
341351 if _looks_like_url (file ):
342352 if opener is not None :
343353 raise ValueError ("Custom opener is not supported for URLs or Request objects." )
344- file , _ = urllib .request .urlretrieve (file )
354+ with urllib .request .urlopen (file ) as file :
355+ file = io .BytesIO (file .read ()) # noqa: PLW2901
345356
346357 if zipfile .is_zipfile (file ):
347358 members = _iterfind_zip_members (file , pattern , pwd )
348359 elif tarfile .is_tarfile (file ):
349360 members = _iterfind_tar_members (file , pattern )
350361 else :
351- raise ValueError (f "Unsupported archive format: { file } " )
362+ raise ValueError ("Unsupported archive format" )
352363
353364 for member in members :
354365 filename = member .name
@@ -358,15 +369,15 @@ def load_archive(
358369
359370
360371def dump_archive (
361- path ,
362- data ,
363- / ,
364- * ,
365- opener = None ,
366- text_mode = True ,
367- dump_if_empty = True ,
368- json_dumps = None ,
369- ** json_dumps_kwargs ,
372+ path ,
373+ data ,
374+ / ,
375+ * ,
376+ opener = None ,
377+ text_mode = True ,
378+ dump_if_empty = True ,
379+ json_dumps = None ,
380+ ** json_dumps_kwargs ,
370381):
371382 """
372383 Dump multiple JSON Lines items into an archive file (zip or tar) with the specified path.
0 commit comments