Skip to content

Commit 6f6d4cd

Browse files
QiuZiXian邱梓咸
andauthored
添加searxng搜索引擎支持 (#293)
* 添加searxng搜索引擎支持 * doc 修改为en * pre-commit * remove and change to en --------- Co-authored-by: 邱梓咸 <[email protected]>
1 parent cca9365 commit 6f6d4cd

File tree

2 files changed

+140
-0
lines changed

2 files changed

+140
-0
lines changed

lagent/actions/web_browser.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import hmac
44
import json
55
import logging
6+
import os
67
import random
78
import re
89
import time
@@ -204,6 +205,128 @@ def _parse_response(self, response: dict) -> dict:
204205
return self._filter_results(raw_results)
205206

206207

208+
class SearxngSearch(BaseSearch):
209+
"""
210+
Create a SearXNG client.
211+
(PS: 1. First, set up your own SearXNG search engine server: https://docs.searxng.org/
212+
2. For servers like SearXNG that do not require an apiKey,
213+
you don't need to concern yourself with auth_name and api_key.
214+
3. For servers that require passing an apiKey, auth_name would be the key in the header,
215+
and api_key would be the value.
216+
Note that custom parameters are not yet supported for input.
217+
218+
For SearXNG-like search engine servers that do not require authentication:
219+
You need to set the server address in one of two ways:
220+
Method One: Environment variable: export SEARXNG_URL="http://192.168.26.xxx:18080/search"
221+
Method Two: Assign the URL when initializing SearxngSearch: tool = SearxngSearch(url="")
222+
If both methods are used, the value from method one takes precedence.
223+
For search engine servers that require authentication:
224+
Omitted
225+
Args:
226+
api_key (str): API key, default is 'sk-xxxx'.
227+
auth_name (str): Authentication name, default is 'searxng'.
228+
language (str): Language setting, default is 'zh'.
229+
categories (str): Category setting, default is 'general'.
230+
url (str): URL of the SearXNG service, default is 'http://127.0.0.1:18883'.
231+
topk (int): Number of top results to return, default is 3.
232+
black_list (List[str]): Blacklist of domains you do not wish to see in search results.
233+
**kwargs: Other variable keyword arguments, such as proxy settings.
234+
"""
235+
236+
def __init__(
237+
self,
238+
api_key: str = 'sk-xxxx',
239+
auth_name: str = 'searxng',
240+
language: str = 'zh',
241+
categories: str = 'general',
242+
url: str = 'http://127.0.0.1:18883',
243+
topk: int = 3,
244+
black_list: List[str] = [
245+
'enoN',
246+
'youtube.com',
247+
'bilibili.com',
248+
'researchgate.net',
249+
],
250+
**kwargs,
251+
):
252+
self.api_key = api_key
253+
self.auth_name = auth_name
254+
self.language = language
255+
self.categories = categories
256+
self.proxy = kwargs.get('proxy')
257+
self.SEARXNG_URL = os.getenv('SEARXNG_URL')
258+
if self.SEARXNG_URL is None or self.SEARXNG_URL == '':
259+
self.SEARXNG_URL = url
260+
super().__init__(topk, black_list)
261+
262+
@cached(cache=TTLCache(maxsize=100, ttl=600))
263+
def search(self, query: str, max_retry: int = 3) -> dict:
264+
for attempt in range(max_retry):
265+
try:
266+
response = self._call_searxng_api(query)
267+
return self._parse_response(response)
268+
except Exception as e:
269+
logging.exception(str(e))
270+
warnings.warn(f'Retry {attempt + 1}/{max_retry} due to error: {e}')
271+
time.sleep(random.randint(2, 5))
272+
raise Exception('Failed to get search results from Bing Search after retries.')
273+
274+
@acached(cache=TTLCache(maxsize=100, ttl=600))
275+
async def asearch(self, query: str, max_retry: int = 3) -> dict:
276+
for attempt in range(max_retry):
277+
try:
278+
response = await self._async_call_searxng_api(query)
279+
return self._parse_response(response)
280+
except Exception as e:
281+
logging.exception(str(e))
282+
warnings.warn(f'Retry {attempt + 1}/{max_retry} due to error: {e}')
283+
await asyncio.sleep(random.randint(2, 5))
284+
raise Exception('Failed to get search results from Bing Search after retries.')
285+
286+
def _call_searxng_api(self, query: str) -> dict:
287+
# params = {'q': query, 'mkt': self.market, 'count': f'{self.topk * 2}'}
288+
params = {
289+
'q': query, #
290+
'categories': self.categories,
291+
'language': self.language,
292+
'format': 'json',
293+
'count': f'{self.topk * 2}',
294+
}
295+
headers = {self.auth_name: self.api_key or ''}
296+
response = requests.get(self.SEARXNG_URL, headers=headers, params=params, proxies=self.proxy)
297+
response.raise_for_status()
298+
return response.json()
299+
300+
async def _async_call_searxng_api(self, query: str) -> dict:
301+
params = {
302+
'q': query, # question
303+
'categories': self.categories, # categories
304+
'language': self.language, # language
305+
'format': 'json', # format
306+
'count': f'{self.topk * 2}',
307+
}
308+
headers = {self.auth_name: self.api_key or ''}
309+
async with aiohttp.ClientSession(raise_for_status=True) as session:
310+
async with session.get(
311+
self.SEARXNG_URL,
312+
headers=headers,
313+
params=params,
314+
proxy=self.proxy and (self.proxy.get('http') or self.proxy.get('https')),
315+
) as resp:
316+
return await resp.json()
317+
318+
def _parse_response(self, response: dict) -> dict:
319+
raw_results = []
320+
321+
for result in response['results']:
322+
title = result['title']
323+
url = result['url']
324+
content = result['content']
325+
raw_results.append((url, content, title))
326+
327+
return self._filter_results(raw_results)
328+
329+
207330
class BraveSearch(BaseSearch):
208331
"""
209332
Wrapper around the Brave Search API.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import os
2+
from unittest import TestCase, mock
3+
4+
from lagent.actions.web_browser import SearxngSearch
5+
6+
7+
class TestGoogleSearch(TestCase):
8+
9+
@mock.patch.object(SearxngSearch, 'search')
10+
def test_search_tool(self, mock_search_func):
11+
# mock_response = (200, json.load('tests/data/search.json'))
12+
# mock_search_func.return_value = mock_response
13+
14+
os.environ['SEARXNG_URL'] = 'http://192.168.26.xx:18080/search'
15+
search_tool = SearxngSearch(api_key='abc')
16+
tool_return = search_tool.search("What's the capital of China?")
17+
self.assertGreater(len(tool_return), 0)

0 commit comments

Comments
 (0)