Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proxy #56

Merged
merged 5 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions data/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,104 @@ async def expire(self, id: str) -> bool:
logger.error(f'failed to save cookies, error: {e}')
await conn.rollback()
return False

class Proxies(SqliteStore):
def __init__(self, store_path):
super().__init__(store_path)
self.primary_key = 'id'
self.table_name = 'proxies'
self._create_table()

def _create_table(self):
with closing(self._get_sync_connection()) as conn, closing(conn.cursor()) as cursor:
try:
sql = f'''
CREATE TABLE IF NOT EXISTS {self.table_name} (
{self.primary_key} INTEGER PRIMARY KEY AUTOINCREMENT,
url VARCHAR(512) NOT NULL,
enable INTEGER NOT NULL,
ct INTEGER NOT NULL,
ut INTEGER NOT NULL
)
'''
cursor.execute(sql)
conn.commit()
except Exception as e:
logger.error(f'failed to create table, error: {e}')

async def save(self, url: str = '', enable: int = 1, id: int = 0) -> bool:
ct = ut = int(time.time())
async with self._get_connection() as conn:
try:
sql = f'UPDATE {self.table_name} SET url = ?, enable = ?, ut = ? WHERE id = ?'
await conn.execute(sql, (url, enable, ut, id))
if conn.total_changes == 0:
sql = f'INSERT INTO {self.table_name} (url, enable, ct, ut) VALUES (?, ?, ?, ?)'
await conn.execute(sql, (url, enable, ct, ut))
await conn.commit()
return True
except Exception as e:
logger.error(f'failed to save proxy, error: {e}')
await conn.rollback()
return False

async def remove(self, id: int) -> bool:
async with self._get_connection() as conn:
try:
sql = f'DELETE FROM {self.table_name} WHERE id = ?'
await conn.execute(sql, (id,))
await conn.commit()
return True
except Exception as e:
logger.error(f'failed to remove proxy, error: {e}')
await conn.rollback()
return False

async def load(self, enable: int = -1, offset: int = 0, limit: int = 0) -> list:
async with self._get_connection() as conn:
try:
base_sql = f'SELECT * FROM {self.table_name}'
params = []

if enable != -1:
base_sql += ' WHERE enable = ?'
params.append(enable)

if limit > 0:
base_sql += ' LIMIT ? OFFSET ?'
params.extend([limit, offset])

cursor = await conn.execute(base_sql, params)
results = await cursor.fetchall()
return [dict(row) for row in results]
except Exception as e:
logger.error(f'failed to load proxies, error: {e}')
await conn.rollback()
return []


async def enable(self, id: int) -> bool:
ut = int(time.time())
async with self._get_connection() as conn:
try:
sql = f'UPDATE {self.table_name} SET enable = ?, ut = ? WHERE id = ?'
await conn.execute(sql, (1, ut, id))
await conn.commit()
return True
except Exception as e:
logger.error(f'failed to save proxies, error: {e}')
await conn.rollback()
return False

async def disable(self, id: int) -> bool:
ut = int(time.time())
async with self._get_connection() as conn:
try:
sql = f'UPDATE {self.table_name} SET enable = ?, ut = ? WHERE id = ?'
await conn.execute(sql, (0, ut, id))
await conn.commit()
return True
except Exception as e:
logger.error(f'failed to save proxies, error: {e}')
await conn.rollback()
return False
Empty file added data/proxies/.gitkeep
Empty file.
145 changes: 145 additions & 0 deletions docs/api/proxies/proxies.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# API 文档

## 代理

以下POST请求参数均使用raw json格式,例如添加代理接口参数为:`{"urls": ["http://example.com:1234", "http://example2.com:2345"] }`

### 添加代理

- **功能说明**

添加完成后,默认为启用状态(`enable = 1`)。

- **URL**

`/proxies/add`

- **Method**

`POST`

- **Data Params**

| 参数 | 必选 | 类型 | 说明 |
|:---:|:---:|:---:|:---:|
| urls | true | [string] | url列表 |

- **Response**

| 参数 | 必选 | 类型 | 说明 |
|:---:|:---:|:---:|:---:|
| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 |
| data | false | null | 数据 |
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |

### 移除代理

- **URL**

`/proxies/remove`

- **Method**

`POST`

- **Data Params**

| 参数 | 必选 | 类型 | 说明 |
|:---:|:---:|:---:|:---:|
| ids | true | [int] | 需要移除的代理id列表,id可以从代理的list接口获取 |

- **Response**

| 参数 | 必选 | 类型 | 说明 |
|:---:|:---:|:---:|:---:|
| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 |
| data | false | null | 数据 |
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |

### 代理列表

- **URL**

`/proxies/list`

- **Method**

`GET`

- **URL Params**

None

- **Response**

| 参数 | 必选 | 类型 | 说明 |
|:---:|:---:|:---:|:---:|
| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 |
| data | true | list | [ [代理列表](#代理信息) ] |
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |

#### 代理信息

| 参数 | 必选 | 类型 | 说明 |
|:---:|:---:|:---:|:---:|
| id | true | int | id(用于管理) |
| url | true | string | 代理地址 |
| ct | true | int | 创建时间戳 |
| ut | true | int | 更新时间戳 |
| enable | true | int | 0: 不启用 1: 启用 |

### 启用代理

- **功能说明**

请求会轮换使用处于启用状态下的代理。

- **URL**

`/proxies/enable`

- **Method**

`POST`

- **Data Params**

| 参数 | 必选 | 类型 | 说明 |
|:---:|:---:|:---:|:---:|
| ids | true | [int] | 需要启用的代理id列表,id可以从代理的list接口获取 |

- **Response**

| 参数 | 必选 | 类型 | 说明 |
|:---:|:---:|:---:|:---:|
| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 |
| data | false | null | 数据 |
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |

### 禁用代理

- **功能说明**

禁用状态下的代理不会被使用。

- **URL**

`/proxies/disable`

- **Method**

`POST`

- **Data Params**

| 参数 | 必选 | 类型 | 说明 |
|:---:|:---:|:---:|:---:|
| ids | true | [int] | 需要禁用的代理id列表,id可以从代理的list接口获取 |

- **Response**

| 参数 | 必选 | 类型 | 说明 |
|:---:|:---:|:---:|:---:|
| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 |
| data | false | null | 数据 |
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |
2 changes: 1 addition & 1 deletion docs/api/weibo/weibo.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,4 +250,4 @@
|:---:|:---:|:---:|:---:|
| code | true | int | 0: 成功 1: 参数错误 2: 服务器错误 |
| data | true | struct | 数据 |
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |
| msg | true | string | 请求说明(成功、参数错误、服务器错误) |
12 changes: 12 additions & 0 deletions docs/doc.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,15 @@
- 微博用户信息和作品获取

微博:[API 文档](api/weibo/weibo.md)

## 代理

目前支持以下接口:

- 添加代理url
- 移除代理url
- 启用代理
- 关闭代理
- 获取代理url列表

代理:[API 文档](api/proxies/proxies.md)
19 changes: 17 additions & 2 deletions lib/requests/requests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import httpx
import json
import time
from data.driver import Proxies

proxyModel = Proxies("data/proxies/proxies.db")
proxies = []

class Response:
def __init__(self, status_code, text):
Expand All @@ -21,14 +26,24 @@ async def wrapper(*args, **kwargs):

return wrapper

async def get_proxy():
global proxies
proxies = await proxyModel.load(enable = 1)
if(len(proxies) == 0):
return None
proxy = proxies[int(int(time.time()) / 300) % len(proxies)]
return proxy['url']

@retry_request
async def get(url, headers=None, params=None) -> Response:
async with httpx.AsyncClient() as client:
proxy = await get_proxy()
async with httpx.AsyncClient(proxy=proxy) as client:
response = await client.get(url, headers=headers, params=params)
return Response(response.status_code, response.text)

@retry_request
async def post(url, headers=None, data=None, json=None) -> Response:
async with httpx.AsyncClient() as client:
proxy = await get_proxy()
async with httpx.AsyncClient(proxy=proxy) as client:
response = await client.post(url, headers=headers, json=json, data=data)
return Response(response.status_code, response.text)
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
CONFIG_PATH = ''

app = FastAPI()
services = ['xhs', 'weibo', 'taobao', 'kuaishou', 'jd', 'douyin', 'bilibili']
services = ['xhs', 'weibo', 'taobao', 'kuaishou', 'jd', 'douyin', 'bilibili', 'proxies']

def register_router():
for service in services:
Expand Down
1 change: 1 addition & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
- 小红书[无水印视频](docs/api/xhs/xhs.md#获取笔记详情)
- 哔哩哔哩视频[一键下载](docs/api/bilibili/bilibili.md#bilibili视频下载)
- 微博媒体资源[预览接口](docs/api/weibo/weibo.md#微博媒体预览)
- 支持[IP代理池](https://github.com/ShilongLee/Crawler/wiki/%E4%BB%A3%E7%90%86)(轮换算法)。

## 快速开始

Expand Down
3 changes: 3 additions & 0 deletions service/proxies/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from data.driver import Proxies

proxies = Proxies("data/proxies/proxies.db")
10 changes: 10 additions & 0 deletions service/proxies/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from . import views
from fastapi import APIRouter

router = APIRouter(prefix='/proxies')

router.add_api_route('/add', views.add, methods=['POST'])
router.add_api_route('/list', views.list, methods=['GET'])
router.add_api_route('/remove', views.remove, methods=['POST'])
router.add_api_route('/disable', views.disable, methods=['POST'])
router.add_api_route('/enable', views.enable, methods=['POST'])
5 changes: 5 additions & 0 deletions service/proxies/views/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .add import add
from .disable import disable
from .enable import enable
from .list import list
from .remove import remove
17 changes: 17 additions & 0 deletions service/proxies/views/add.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from utils.reply import reply
from ..models import proxies
from pydantic import BaseModel
from lib.logger import logger
from typing import List

class Param(BaseModel):
urls: List[str]

async def add(param: Param):
'''
增加代理地址,默认开启使用
'''
for url in param.urls:
await proxies.save(url, 1)
logger.info(f'add proxy, url: {url}')
return reply()
23 changes: 23 additions & 0 deletions service/proxies/views/disable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from utils.reply import reply
from ..models import proxies
from pydantic import BaseModel
from lib.logger import logger
from typing import List

class Param(BaseModel):
ids: List[int]

async def disable(param: Param):
'''
禁用代理地址
'''
failed_list = []
for id in param.ids:
result = await proxies.disable(id)
if(not result):
failed_list.append(id)
logger.error(f"disable proxy failed, id: {id}")
data = None
if(len(failed_list) > 0):
data = {"failed": failed_list}
return reply(data = data)
Loading
Loading