1
1
import boto3
2
2
from botocore .exceptions import ClientError
3
3
from objwrapper .exceptions import InvalidConfigError
4
+ import requests
5
+ from datetime import datetime
6
+ import hmac
7
+ import hashlib
8
+ import base64
9
+ from lxml import etree
4
10
5
11
class S3Conf (object ):
6
12
def __init__ (self , key_id , key , bucket_name , host , port , use_v4_sig , aws_region , use_https , path_style_request , sse_c_key ):
@@ -24,6 +30,7 @@ def __init__(self, conf):
24
30
self .conf = conf
25
31
self .client = None
26
32
self .bucket = None
33
+ self .enpoint_url = None ;
27
34
self .do_connect ()
28
35
29
36
def do_connect (self ):
@@ -37,6 +44,10 @@ def do_connect(self):
37
44
config = boto3 .session .Config (signature_version = 's3' ,s3 = {'addressing_style' :addressing_style })
38
45
39
46
if self .conf .host is None :
47
+ if self .conf .use_https :
48
+ self .endpoint_url = f'https://s3.{ self .conf .aws_region } .amazonaws.com'
49
+ else :
50
+ self .endpoint_url = f'http://s3.{ self .conf .aws_region } .amazonaws.com'
40
51
self .client = boto3 .client ('s3' ,
41
52
region_name = self .conf .aws_region ,
42
53
aws_access_key_id = self .conf .key_id ,
@@ -48,6 +59,7 @@ def do_connect(self):
48
59
endpoint_url = 'https://%s' % self .conf .host if self .conf .use_https else 'http://%s' % self .conf .host
49
60
if self .conf .port :
50
61
endpoint_url = '%s:%s' % (endpoint_url , self .conf .port )
62
+ self .endpoint_url = endpoint_url
51
63
self .client = boto3 .client ('s3' ,
52
64
aws_access_key_id = self .conf .key_id ,
53
65
aws_secret_access_key = self .conf .key ,
@@ -67,6 +79,11 @@ def get_name(self):
67
79
return 'S3 storage backend'
68
80
69
81
def list_objs (self , prefix = None ):
82
+ if not self .conf .use_v4_sig and self .conf .path_style_request :
83
+ # When using the S3 v2 protocol and path-style requests, boto3 is unable to list objects properly.
84
+ # We manually sign the requests and then use the list-objects API to list the objects in the bucket.
85
+ yield from self .list_objs_v2 (prefix )
86
+ return
70
87
paginator = self .client .get_paginator ('list_objects_v2' )
71
88
if prefix :
72
89
iterator = paginator .paginate (Bucket = self .bucket , Prefix = prefix )
@@ -79,7 +96,6 @@ def list_objs(self, prefix=None):
79
96
obj = [tokens , content .get ('Size' , 0 )]
80
97
yield obj
81
98
82
-
83
99
def obj_exists (self , key ):
84
100
bucket = self .bucket
85
101
try :
@@ -128,3 +144,65 @@ def get_ctime(self, key):
128
144
return float (ctime )
129
145
except :
130
146
return 0
147
+
148
+ def get_signature (self , date ):
149
+ sign_str = f"GET\n \n \n { date } \n /{ self .bucket } /"
150
+
151
+ hmac_object = hmac .new (self .conf .key .encode ('utf-8' ), sign_str .encode ('utf-8' ), hashlib .sha1 )
152
+ hmac_bytes = hmac_object .digest ()
153
+ signature = base64 .b64encode (hmac_bytes ).decode ('utf-8' )
154
+ return signature
155
+
156
+ def list_bucket_v2 (self , marker , prefix ):
157
+ now = datetime .utcnow ()
158
+ date = now .strftime ('%a, %d %b %Y %H:%M:%S GMT' )
159
+ signature = self .get_signature (date )
160
+
161
+ headers = {'Date' :date ,
162
+ 'Authorization' : f"AWS { self .conf .key_id } :{ signature } " ,
163
+ }
164
+
165
+ endpoint_url = self .endpoint_url
166
+ bucket = self .bucket
167
+
168
+ if marker and prefix :
169
+ url = f'{ endpoint_url } /{ bucket } /?marker={ marker } &prefix={ prefix } /'
170
+ elif marker :
171
+ url = f'{ endpoint_url } /{ bucket } /?marker={ marker } '
172
+ elif prefix :
173
+ url = f'{ endpoint_url } /{ bucket } /?prefix={ prefix } /'
174
+ else :
175
+ url = f'{ endpoint_url } /{ bucket } /'
176
+
177
+ response = requests .get (url , headers = headers , timeout = 300 )
178
+ if response .status_code != 200 :
179
+ return None
180
+ return response .text
181
+
182
+ def list_objs_v2 (self , prefix ):
183
+ is_truncated = True
184
+ marker = None
185
+ while is_truncated :
186
+ rsp = self .list_bucket_v2 (marker , prefix )
187
+ if not rsp :
188
+ break
189
+
190
+ root = etree .fromstring (rsp .encode ('utf-8' ))
191
+ if "ListBucketResult" not in root .tag :
192
+ break
193
+ for child in root :
194
+ if "IsTruncated" in child .tag :
195
+ if child .text == "true" :
196
+ is_truncated = True
197
+ else :
198
+ is_truncated = False
199
+ if "Contents" in child .tag :
200
+ obj = []
201
+ for contents in child :
202
+ if "Key" in contents .tag :
203
+ marker = contents .text
204
+ obj .append (contents .text )
205
+ if "Size" in contents .tag :
206
+ obj .append (int (contents .text ))
207
+ if obj :
208
+ yield obj
0 commit comments