Skip to content

Commit 83b7fd1

Browse files
committed
feat: 支持给流式下载传入参数 + 适配host为空的情况
1 parent ddc4224 commit 83b7fd1

File tree

3 files changed

+91
-8
lines changed

3 files changed

+91
-8
lines changed

docs/docs/tutorial/advance/utils.md

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
title: 工具函数
3+
---
4+
5+
6+
7+
在实现`pepperbot`的过程中,有些功能比较通用,可以直接使用这些工具函数
8+
9+
## `download_file`
10+
11+
异步、流式下载文件
12+
13+
```python
14+
15+
from pepperbot.core.message.segment import download_file
16+
17+
```

docs/sidebars.js

+1
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ module.exports = {
102102
"tutorial/advance/log",
103103
"tutorial/advance/deploy",
104104
"tutorial/advance/test",
105+
"tutorial/advance/utils",
105106
],
106107
},
107108
{

pepperbot/core/message/segment.py

+73-8
Original file line numberDiff line numberDiff line change
@@ -201,10 +201,26 @@ async def download(
201201
save_dir: Optional[str] = None,
202202
file_name: Optional[str] = None,
203203
extension: str = "jpg",
204+
client_kwargs: Optional[Dict] = None,
205+
stream_kwargs: Optional[Dict] = None,
206+
chunk_size: int = 8192,
204207
) -> str:
205-
"""返回下载后的绝对(完整)路径
208+
"""
209+
使用httpx,异步流式下载
210+
211+
httpx默认timeout仅为5s,如果下载大文件,需要设置timeout
212+
可以通过client_kwargs、stream_kwargs传入参数,分别对应client和stream的参数
213+
214+
Args:
215+
full_path : 如果提供了full_path(需要提供文件的扩展类型),则直接下载到该路径,其他参数无效
216+
client_kwargs : https://www.python-httpx.org/api/#asyncclient
217+
stream_kwargs : https://www.python-httpx.org/api/#request
218+
219+
Raises:
220+
EventHandleError: _description_
206221
207-
如果提供了full_path(需要提供文件的扩展类型),则直接下载到该路径,其他参数无效
222+
Returns:
223+
返回图片下载后的绝对(完整)路径
208224
"""
209225

210226
if full_path:
@@ -218,9 +234,21 @@ async def download(
218234
save_path = os.path.abspath(save_path)
219235

220236
if self.temporary_file_path.startswith("http"):
221-
await download_file(self.file_path, save_path)
237+
await download_file(
238+
url=self.file_path,
239+
save_path=save_path,
240+
client_kwargs=client_kwargs,
241+
stream_kwargs=stream_kwargs,
242+
chunk_size=chunk_size,
243+
)
222244
return save_path
223245

246+
# 这里只处理了host为空的情况(3个斜杠)
247+
# https://en.wikipedia.org/wiki/File_URI_scheme
248+
elif self.temporary_file_path.startswith("file:///"):
249+
return self.file_path[8:]
250+
251+
# TODO 不太确定带host的情况下,path是否正确
224252
elif self.temporary_file_path.startswith("file://"):
225253
return self.file_path[7:]
226254

@@ -263,8 +291,23 @@ def hash_string(string: str):
263291
return h.hexdigest()
264292

265293

266-
async def download_file(url: str, save_path: str):
267-
"""使用httpx,流式下载"""
294+
async def download_file(
295+
url: str,
296+
save_path: str,
297+
client_kwargs: Optional[Dict] = None,
298+
stream_kwargs: Optional[Dict] = None,
299+
chunk_size: int = 8192,
300+
):
301+
"""使用httpx,流式下载
302+
303+
默认使用GET,可以通过stream_kwargs传入method参数,来指定请求方法
304+
305+
最好别在stream_kwargs手动覆写URL,可能会导致函数参数的URL和stream_kwargs中的URL不一致
306+
307+
Args:
308+
client_kwargs : https://www.python-httpx.org/api/#asyncclient
309+
stream_kwargs : https://www.python-httpx.org/api/#request
310+
"""
268311

269312
# 以防万一,还是再调用一次,保证绝对是绝对路径
270313
absolute_path = os.path.abspath(save_path)
@@ -273,10 +316,32 @@ async def download_file(url: str, save_path: str):
273316
if not os.path.exists(directory): # 如果文件夹不存在
274317
os.makedirs(directory) # 创建文件夹
275318

276-
async with httpx.AsyncClient() as client:
277-
async with client.stream("GET", url) as response:
319+
# 设置默认参数
320+
default_client_kwargs: Dict = dict(
321+
timeout=60,
322+
)
323+
324+
default_stream_kwargs: Dict = dict(
325+
method="GET",
326+
url=url,
327+
timeout=60,
328+
)
329+
330+
# 确保kwargs不为None
331+
if client_kwargs:
332+
client_kwargs = {**default_client_kwargs, **client_kwargs}
333+
else:
334+
client_kwargs = default_client_kwargs
335+
336+
if stream_kwargs:
337+
stream_kwargs = {**default_stream_kwargs, **stream_kwargs}
338+
else:
339+
stream_kwargs = default_stream_kwargs
340+
341+
async with httpx.AsyncClient(**client_kwargs) as client:
342+
async with client.stream(**stream_kwargs) as response:
278343
async with aiofiles.open(absolute_path, "wb") as f:
279-
async for chunk in response.aiter_bytes(chunk_size=8192):
344+
async for chunk in response.aiter_bytes(chunk_size=chunk_size):
280345
await f.write(chunk)
281346

282347

0 commit comments

Comments
 (0)