|
3 | 3 | import hmac
|
4 | 4 | import json
|
5 | 5 | import logging
|
| 6 | +import os |
6 | 7 | import random
|
7 | 8 | import re
|
8 | 9 | import time
|
@@ -204,6 +205,128 @@ def _parse_response(self, response: dict) -> dict:
|
204 | 205 | return self._filter_results(raw_results)
|
205 | 206 |
|
206 | 207 |
|
| 208 | +class SearxngSearch(BaseSearch): |
| 209 | + """ |
| 210 | + Create a SearXNG client. |
| 211 | + (PS: 1. First, set up your own SearXNG search engine server: https://docs.searxng.org/ |
| 212 | + 2. For servers like SearXNG that do not require an apiKey, |
| 213 | + you don't need to concern yourself with auth_name and api_key. |
| 214 | + 3. For servers that require passing an apiKey, auth_name would be the key in the header, |
| 215 | + and api_key would be the value. |
| 216 | + Note that custom parameters are not yet supported for input. |
| 217 | +
|
| 218 | + For SearXNG-like search engine servers that do not require authentication: |
| 219 | + You need to set the server address in one of two ways: |
| 220 | + Method One: Environment variable: export SEARXNG_URL="http://192.168.26.xxx:18080/search" |
| 221 | + Method Two: Assign the URL when initializing SearxngSearch: tool = SearxngSearch(url="") |
| 222 | + If both methods are used, the value from method one takes precedence. |
| 223 | + For search engine servers that require authentication: |
| 224 | + Omitted |
| 225 | + Args: |
| 226 | + api_key (str): API key, default is 'sk-xxxx'. |
| 227 | + auth_name (str): Authentication name, default is 'searxng'. |
| 228 | + language (str): Language setting, default is 'zh'. |
| 229 | + categories (str): Category setting, default is 'general'. |
| 230 | + url (str): URL of the SearXNG service, default is 'http://127.0.0.1:18883'. |
| 231 | + topk (int): Number of top results to return, default is 3. |
| 232 | + black_list (List[str]): Blacklist of domains you do not wish to see in search results. |
| 233 | + **kwargs: Other variable keyword arguments, such as proxy settings. |
| 234 | + """ |
| 235 | + |
| 236 | + def __init__( |
| 237 | + self, |
| 238 | + api_key: str = 'sk-xxxx', |
| 239 | + auth_name: str = 'searxng', |
| 240 | + language: str = 'zh', |
| 241 | + categories: str = 'general', |
| 242 | + url: str = 'http://127.0.0.1:18883', |
| 243 | + topk: int = 3, |
| 244 | + black_list: List[str] = [ |
| 245 | + 'enoN', |
| 246 | + 'youtube.com', |
| 247 | + 'bilibili.com', |
| 248 | + 'researchgate.net', |
| 249 | + ], |
| 250 | + **kwargs, |
| 251 | + ): |
| 252 | + self.api_key = api_key |
| 253 | + self.auth_name = auth_name |
| 254 | + self.language = language |
| 255 | + self.categories = categories |
| 256 | + self.proxy = kwargs.get('proxy') |
| 257 | + self.SEARXNG_URL = os.getenv('SEARXNG_URL') |
| 258 | + if self.SEARXNG_URL is None or self.SEARXNG_URL == '': |
| 259 | + self.SEARXNG_URL = url |
| 260 | + super().__init__(topk, black_list) |
| 261 | + |
| 262 | + @cached(cache=TTLCache(maxsize=100, ttl=600)) |
| 263 | + def search(self, query: str, max_retry: int = 3) -> dict: |
| 264 | + for attempt in range(max_retry): |
| 265 | + try: |
| 266 | + response = self._call_searxng_api(query) |
| 267 | + return self._parse_response(response) |
| 268 | + except Exception as e: |
| 269 | + logging.exception(str(e)) |
| 270 | + warnings.warn(f'Retry {attempt + 1}/{max_retry} due to error: {e}') |
| 271 | + time.sleep(random.randint(2, 5)) |
| 272 | + raise Exception('Failed to get search results from Bing Search after retries.') |
| 273 | + |
| 274 | + @acached(cache=TTLCache(maxsize=100, ttl=600)) |
| 275 | + async def asearch(self, query: str, max_retry: int = 3) -> dict: |
| 276 | + for attempt in range(max_retry): |
| 277 | + try: |
| 278 | + response = await self._async_call_searxng_api(query) |
| 279 | + return self._parse_response(response) |
| 280 | + except Exception as e: |
| 281 | + logging.exception(str(e)) |
| 282 | + warnings.warn(f'Retry {attempt + 1}/{max_retry} due to error: {e}') |
| 283 | + await asyncio.sleep(random.randint(2, 5)) |
| 284 | + raise Exception('Failed to get search results from Bing Search after retries.') |
| 285 | + |
| 286 | + def _call_searxng_api(self, query: str) -> dict: |
| 287 | + # params = {'q': query, 'mkt': self.market, 'count': f'{self.topk * 2}'} |
| 288 | + params = { |
| 289 | + 'q': query, # |
| 290 | + 'categories': self.categories, |
| 291 | + 'language': self.language, |
| 292 | + 'format': 'json', |
| 293 | + 'count': f'{self.topk * 2}', |
| 294 | + } |
| 295 | + headers = {self.auth_name: self.api_key or ''} |
| 296 | + response = requests.get(self.SEARXNG_URL, headers=headers, params=params, proxies=self.proxy) |
| 297 | + response.raise_for_status() |
| 298 | + return response.json() |
| 299 | + |
| 300 | + async def _async_call_searxng_api(self, query: str) -> dict: |
| 301 | + params = { |
| 302 | + 'q': query, # question |
| 303 | + 'categories': self.categories, # categories |
| 304 | + 'language': self.language, # language |
| 305 | + 'format': 'json', # format |
| 306 | + 'count': f'{self.topk * 2}', |
| 307 | + } |
| 308 | + headers = {self.auth_name: self.api_key or ''} |
| 309 | + async with aiohttp.ClientSession(raise_for_status=True) as session: |
| 310 | + async with session.get( |
| 311 | + self.SEARXNG_URL, |
| 312 | + headers=headers, |
| 313 | + params=params, |
| 314 | + proxy=self.proxy and (self.proxy.get('http') or self.proxy.get('https')), |
| 315 | + ) as resp: |
| 316 | + return await resp.json() |
| 317 | + |
| 318 | + def _parse_response(self, response: dict) -> dict: |
| 319 | + raw_results = [] |
| 320 | + |
| 321 | + for result in response['results']: |
| 322 | + title = result['title'] |
| 323 | + url = result['url'] |
| 324 | + content = result['content'] |
| 325 | + raw_results.append((url, content, title)) |
| 326 | + |
| 327 | + return self._filter_results(raw_results) |
| 328 | + |
| 329 | + |
207 | 330 | class BraveSearch(BaseSearch):
|
208 | 331 | """
|
209 | 332 | Wrapper around the Brave Search API.
|
|
0 commit comments