@@ -62,6 +62,7 @@ def __init__(
62
62
alternative_document_service : Optional [Dict [str , str ]] = None ,
63
63
detect_file_duplication : Optional [bool ] = False ,
64
64
skip_storage_download : Optional [bool ] = False ,
65
+ download_using_prefix : Optional [bool ] = False ,
65
66
** kwargs : Any ,
66
67
) -> None :
67
68
"""Initialize S3 bucket and key, along with credentials if needed.
@@ -138,6 +139,7 @@ def __init__(
138
139
139
140
self .verbose = verbose
140
141
self .download_dir = download_dir if download_dir else tempfile .mkdtemp ()
142
+ self .download_using_prefix = download_using_prefix
141
143
if not os .path .exists (self .download_dir ):
142
144
os .makedirs (self .download_dir )
143
145
@@ -558,15 +560,20 @@ def get_files(self) -> list[str]:
558
560
return file_paths
559
561
560
562
561
- def download_s3_file (self , key : str , temp_dir : str , file_paths : list ):
563
+ def download_s3_file (self , prefix_key : str , temp_dir : str , file_paths : list ):
562
564
"""Download a single file"""
563
565
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/download_file.html#S3.Client.download_file
566
+ key = prefix_key
567
+ original_key = key
568
+ bucket = self .bucket
569
+ if self .prefix and self .download_using_prefix is False :
570
+ key = key .replace (f"{ self .prefix } /" , "" )
564
571
filepath = f"{ temp_dir } /{ key } "
565
572
folder_path = os .path .dirname (filepath )
566
573
os .makedirs (folder_path , exist_ok = True )
567
- original_key = key
574
+
568
575
try :
569
- self .s3 .meta .client .download_file (self . bucket , original_key , filepath )
576
+ self .s3 .meta .client .download_file (bucket , original_key , filepath )
570
577
file_paths .append (filepath )
571
578
logging .getLogger ().debug (f" { original_key } to { key } " )
572
579
except Exception as e :
0 commit comments