diff --git a/data/driver.py b/data/driver.py index 568d49e..ab8c8be 100644 --- a/data/driver.py +++ b/data/driver.py @@ -87,3 +87,104 @@ async def expire(self, id: str) -> bool: logger.error(f'failed to save cookies, error: {e}') await conn.rollback() return False + +class Proxies(SqliteStore): + def __init__(self, store_path): + super().__init__(store_path) + self.primary_key = 'id' + self.table_name = 'proxies' + self._create_table() + + def _create_table(self): + with closing(self._get_sync_connection()) as conn, closing(conn.cursor()) as cursor: + try: + sql = f''' + CREATE TABLE IF NOT EXISTS {self.table_name} ( + {self.primary_key} INTEGER PRIMARY KEY AUTOINCREMENT, + url VARCHAR(512) NOT NULL, + enable INTEGER NOT NULL, + ct INTEGER NOT NULL, + ut INTEGER NOT NULL + ) + ''' + cursor.execute(sql) + conn.commit() + except Exception as e: + logger.error(f'failed to create table, error: {e}') + + async def save(self, url: str = '', enable: int = 1, id: int = 0) -> bool: + ct = ut = int(time.time()) + async with self._get_connection() as conn: + try: + sql = f'UPDATE {self.table_name} SET url = ?, enable = ?, ut = ? WHERE id = ?' + await conn.execute(sql, (url, enable, ut, id)) + if conn.total_changes == 0: + sql = f'INSERT INTO {self.table_name} (url, enable, ct, ut) VALUES (?, ?, ?, ?)' + await conn.execute(sql, (url, enable, ct, ut)) + await conn.commit() + return True + except Exception as e: + logger.error(f'failed to save proxy, error: {e}') + await conn.rollback() + return False + + async def remove(self, id: int) -> bool: + async with self._get_connection() as conn: + try: + sql = f'DELETE FROM {self.table_name} WHERE id = ?' + await conn.execute(sql, (id,)) + await conn.commit() + return True + except Exception as e: + logger.error(f'failed to remove proxy, error: {e}') + await conn.rollback() + return False + + async def load(self, enable: int = -1, offset: int = 0, limit: int = 0) -> list: + async with self._get_connection() as conn: + try: + base_sql = f'SELECT * FROM {self.table_name}' + params = [] + + if enable != -1: + base_sql += ' WHERE enable = ?' + params.append(enable) + + if limit > 0: + base_sql += ' LIMIT ? OFFSET ?' + params.extend([limit, offset]) + + cursor = await conn.execute(base_sql, params) + results = await cursor.fetchall() + return [dict(row) for row in results] + except Exception as e: + logger.error(f'failed to load proxies, error: {e}') + await conn.rollback() + return [] + + + async def enable(self, id: int) -> bool: + ut = int(time.time()) + async with self._get_connection() as conn: + try: + sql = f'UPDATE {self.table_name} SET enable = ?, ut = ? WHERE id = ?' + await conn.execute(sql, (1, ut, id)) + await conn.commit() + return True + except Exception as e: + logger.error(f'failed to save proxies, error: {e}') + await conn.rollback() + return False + + async def disable(self, id: int) -> bool: + ut = int(time.time()) + async with self._get_connection() as conn: + try: + sql = f'UPDATE {self.table_name} SET enable = ?, ut = ? WHERE id = ?' + await conn.execute(sql, (0, ut, id)) + await conn.commit() + return True + except Exception as e: + logger.error(f'failed to save proxies, error: {e}') + await conn.rollback() + return False \ No newline at end of file diff --git a/data/proxies/.gitkeep b/data/proxies/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/api/proxies/proxies.md b/docs/api/proxies/proxies.md new file mode 100644 index 0000000..94f7a08 --- /dev/null +++ b/docs/api/proxies/proxies.md @@ -0,0 +1,145 @@ +# API 文档 + +## 代理 + +以下POST请求参数均使用raw json格式,例如添加代理接口参数为:`{"urls": ["http://example.com:1234", "http://example2.com:2345"] }` + +### 添加代理 + +- **功能说明** + +添加完成后,默认为启用状态(`enable = 1`)。 + +- **URL** + + `/proxies/add` + +- **Method** + + `POST` + +- **Data Params** + +| 参数 | 必选 | 类型 | 说明 | +|:---:|:---:|:---:|:---:| +| urls | true | [string] | url列表 | + +- **Response** + +| 参数 | 必选 | 类型 | 说明 | +|:---:|:---:|:---:|:---:| +| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 | +| data | false | null | 数据 | +| msg | true | string | 请求说明(成功、参数错误、服务器错误) | + +### 移除代理 + +- **URL** + + `/proxies/remove` + +- **Method** + + `POST` + +- **Data Params** + +| 参数 | 必选 | 类型 | 说明 | +|:---:|:---:|:---:|:---:| +| ids | true | [int] | 需要移除的代理id列表,id可以从代理的list接口获取 | + +- **Response** + +| 参数 | 必选 | 类型 | 说明 | +|:---:|:---:|:---:|:---:| +| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 | +| data | false | null | 数据 | +| msg | true | string | 请求说明(成功、参数错误、服务器错误) | + +### 代理列表 + +- **URL** + + `/proxies/list` + +- **Method** + + `GET` + +- **URL Params** + + None + +- **Response** + +| 参数 | 必选 | 类型 | 说明 | +|:---:|:---:|:---:|:---:| +| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 | +| data | true | list | [ [代理列表](#代理信息) ] | +| msg | true | string | 请求说明(成功、参数错误、服务器错误) | + +#### 代理信息 + +| 参数 | 必选 | 类型 | 说明 | +|:---:|:---:|:---:|:---:| +| id | true | int | id(用于管理) | +| url | true | string | 代理地址 | +| ct | true | int | 创建时间戳 | +| ut | true | int | 更新时间戳 | +| enable | true | int | 0: 不启用 1: 启用 | + +### 启用代理 + +- **功能说明** + +请求会轮换使用处于启用状态下的代理。 + +- **URL** + + `/proxies/enable` + +- **Method** + + `POST` + +- **Data Params** + +| 参数 | 必选 | 类型 | 说明 | +|:---:|:---:|:---:|:---:| +| ids | true | [int] | 需要启用的代理id列表,id可以从代理的list接口获取 | + +- **Response** + +| 参数 | 必选 | 类型 | 说明 | +|:---:|:---:|:---:|:---:| +| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 | +| data | false | null | 数据 | +| msg | true | string | 请求说明(成功、参数错误、服务器错误) | + +### 禁用代理 + +- **功能说明** + +禁用状态下的代理不会被使用。 + +- **URL** + + `/proxies/disable` + +- **Method** + + `POST` + +- **Data Params** + +| 参数 | 必选 | 类型 | 说明 | +|:---:|:---:|:---:|:---:| +| ids | true | [int] | 需要禁用的代理id列表,id可以从代理的list接口获取 | + +- **Response** + +| 参数 | 必选 | 类型 | 说明 | +|:---:|:---:|:---:|:---:| +| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 | +| data | false | null | 数据 | +| msg | true | string | 请求说明(成功、参数错误、服务器错误) | diff --git a/docs/api/weibo/weibo.md b/docs/api/weibo/weibo.md index 8a75faf..501e560 100644 --- a/docs/api/weibo/weibo.md +++ b/docs/api/weibo/weibo.md @@ -250,4 +250,4 @@ |:---:|:---:|:---:|:---:| | code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 | | data | true | struct | 数据 | -| msg | true | string | 请求说明(成功、参数错误、服务器错误) | \ No newline at end of file +| msg | true | string | 请求说明(成功、参数错误、服务器错误) | diff --git a/docs/doc.md b/docs/doc.md index ad2a7e9..a931411 100644 --- a/docs/doc.md +++ b/docs/doc.md @@ -99,3 +99,15 @@ - 微博用户信息和作品获取 微博:[API 文档](api/weibo/weibo.md) + +## 代理 + +目前支持以下接口: + +- 添加代理url +- 移除代理url +- 启用代理 +- 关闭代理 +- 获取代理url列表 + +代理:[API 文档](api/proxies/proxies.md) diff --git a/lib/requests/requests.py b/lib/requests/requests.py index 3e577b3..40676bc 100644 --- a/lib/requests/requests.py +++ b/lib/requests/requests.py @@ -1,5 +1,10 @@ import httpx import json +import time +from data.driver import Proxies + +proxyModel = Proxies("data/proxies/proxies.db") +proxies = [] class Response: def __init__(self, status_code, text): @@ -21,14 +26,24 @@ async def wrapper(*args, **kwargs): return wrapper +async def get_proxy(): + global proxies + proxies = await proxyModel.load(enable = 1) + if(len(proxies) == 0): + return None + proxy = proxies[int(int(time.time()) / 300) % len(proxies)] + return proxy['url'] + @retry_request async def get(url, headers=None, params=None) -> Response: - async with httpx.AsyncClient() as client: + proxy = await get_proxy() + async with httpx.AsyncClient(proxy=proxy) as client: response = await client.get(url, headers=headers, params=params) return Response(response.status_code, response.text) @retry_request async def post(url, headers=None, data=None, json=None) -> Response: - async with httpx.AsyncClient() as client: + proxy = await get_proxy() + async with httpx.AsyncClient(proxy=proxy) as client: response = await client.post(url, headers=headers, json=json, data=data) return Response(response.status_code, response.text) diff --git a/main.py b/main.py index f255bd7..0a1b28c 100644 --- a/main.py +++ b/main.py @@ -9,7 +9,7 @@ CONFIG_PATH = '' app = FastAPI() -services = ['xhs', 'weibo', 'taobao', 'kuaishou', 'jd', 'douyin', 'bilibili'] +services = ['xhs', 'weibo', 'taobao', 'kuaishou', 'jd', 'douyin', 'bilibili', 'proxies'] def register_router(): for service in services: diff --git a/readme.md b/readme.md index f24658d..2ea3cf6 100644 --- a/readme.md +++ b/readme.md @@ -34,6 +34,7 @@ - 小红书[无水印视频](docs/api/xhs/xhs.md#获取笔记详情) - 哔哩哔哩视频[一键下载](docs/api/bilibili/bilibili.md#bilibili视频下载) - 微博媒体资源[预览接口](docs/api/weibo/weibo.md#微博媒体预览) +- 支持[IP代理池](https://github.com/ShilongLee/Crawler/wiki/%E4%BB%A3%E7%90%86)(轮换算法)。 ## 快速开始 diff --git a/service/proxies/models.py b/service/proxies/models.py new file mode 100644 index 0000000..64112fc --- /dev/null +++ b/service/proxies/models.py @@ -0,0 +1,3 @@ +from data.driver import Proxies + +proxies = Proxies("data/proxies/proxies.db") \ No newline at end of file diff --git a/service/proxies/urls.py b/service/proxies/urls.py new file mode 100644 index 0000000..01b7dba --- /dev/null +++ b/service/proxies/urls.py @@ -0,0 +1,10 @@ +from . import views +from fastapi import APIRouter + +router = APIRouter(prefix='/proxies') + +router.add_api_route('/add', views.add, methods=['POST']) +router.add_api_route('/list', views.list, methods=['GET']) +router.add_api_route('/remove', views.remove, methods=['POST']) +router.add_api_route('/disable', views.disable, methods=['POST']) +router.add_api_route('/enable', views.enable, methods=['POST']) diff --git a/service/proxies/views/__init__.py b/service/proxies/views/__init__.py new file mode 100644 index 0000000..21100f3 --- /dev/null +++ b/service/proxies/views/__init__.py @@ -0,0 +1,5 @@ +from .add import add +from .disable import disable +from .enable import enable +from .list import list +from .remove import remove \ No newline at end of file diff --git a/service/proxies/views/add.py b/service/proxies/views/add.py new file mode 100644 index 0000000..fc63daf --- /dev/null +++ b/service/proxies/views/add.py @@ -0,0 +1,17 @@ +from utils.reply import reply +from ..models import proxies +from pydantic import BaseModel +from lib.logger import logger +from typing import List + +class Param(BaseModel): + urls: List[str] + +async def add(param: Param): + ''' + 增加代理地址,默认开启使用 + ''' + for url in param.urls: + await proxies.save(url, 1) + logger.info(f'add proxy, url: {url}') + return reply() \ No newline at end of file diff --git a/service/proxies/views/disable.py b/service/proxies/views/disable.py new file mode 100644 index 0000000..fed8aea --- /dev/null +++ b/service/proxies/views/disable.py @@ -0,0 +1,23 @@ +from utils.reply import reply +from ..models import proxies +from pydantic import BaseModel +from lib.logger import logger +from typing import List + +class Param(BaseModel): + ids: List[int] + +async def disable(param: Param): + ''' + 禁用代理地址 + ''' + failed_list = [] + for id in param.ids: + result = await proxies.disable(id) + if(not result): + failed_list.append(id) + logger.error(f"disable proxy failed, id: {id}") + data = None + if(len(failed_list) > 0): + data = {"failed": failed_list} + return reply(data = data) \ No newline at end of file diff --git a/service/proxies/views/enable.py b/service/proxies/views/enable.py new file mode 100644 index 0000000..881c29c --- /dev/null +++ b/service/proxies/views/enable.py @@ -0,0 +1,23 @@ +from utils.reply import reply +from ..models import proxies +from pydantic import BaseModel +from lib.logger import logger +from typing import List + +class Param(BaseModel): + ids: List[int] + +async def enable(param: Param): + ''' + 启用代理地址 + ''' + failed_list = [] + for id in param.ids: + result = await proxies.enable(id) + if(not result): + failed_list.append(id) + logger.error(f"enable proxy failed, id: {id}") + data = None + if(len(failed_list) > 0): + data = {"failed": failed_list} + return reply(data = data) \ No newline at end of file diff --git a/service/proxies/views/list.py b/service/proxies/views/list.py new file mode 100644 index 0000000..82a3fed --- /dev/null +++ b/service/proxies/views/list.py @@ -0,0 +1,8 @@ +from utils.error_code import ErrorCode +from utils.reply import reply +from ..models import proxies +async def list(): + ''' + 返回代理地址 + ''' + return reply(ErrorCode.OK, "OK", await proxies.load()) \ No newline at end of file diff --git a/service/proxies/views/remove.py b/service/proxies/views/remove.py new file mode 100644 index 0000000..58dcc1f --- /dev/null +++ b/service/proxies/views/remove.py @@ -0,0 +1,24 @@ +from utils.error_code import ErrorCode +from utils.reply import reply +from ..models import proxies +from pydantic import BaseModel +from lib.logger import logger +from typing import List + +class Param(BaseModel): + ids: List[int] + +async def remove(param: Param): + ''' + 移除代理地址 + ''' + failed_list = [] + for id in param.ids: + result = await proxies.remove(id) + if(not result): + failed_list.append(id) + logger.error(f"remove proxy failed, id: {id}") + data = None + if(len(failed_list) > 0): + data = {"failed": failed_list} + return reply(data = data) \ No newline at end of file