Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

urllib3 Retry feature + support for custom HTTP adapters #1

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 42 additions & 21 deletions sickle/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import time

import requests
from urllib3.util.retry import Retry

from sickle.iterator import BaseOAIIterator, OAIItemIterator
from sickle.response import OAIResponse
Expand All @@ -33,6 +34,14 @@
'Identify': Identify,
}

def get_adapter(max_retries, custom_adapter=None):

if custom_adapter:
custom_adapter.max_retries = max_retries
return custom_adapter
else:
return requests.adapters.HTTPAdapter(max_retries=max_retries)


class Sickle(object):
"""Client for harvesting OAI interfaces.
Expand All @@ -56,11 +65,11 @@ class Sickle(object):
use the value from the retry-after header (if present) and will wait the specified number of
seconds between retries.
:type max_retries: int
:param retry_status_codes: HTTP status codes to retry (default will only retry on 503)
:param retry_status_codes: HTTP status codes to retry (default will retry on 429, 500, 502, 503 and 504)
:type retry_status_codes: iterable
:param default_retry_after: default number of seconds to wait between retries in case no retry-after header is found
on the response (defaults to 60 seconds)
:type default_retry_after: int
:param retry_backoff_factor: Backoff factor to apply between retries after the second try,
if no Retry-After header is sent by the server. Default: 2.0
:type retry_backoff_factor: float
:type protocol_version: str
:param class_mapping: A dictionary that maps OAI verbs to classes representing
OAI items. If not provided,
Expand All @@ -73,6 +82,8 @@ class Sickle(object):
information is missing, `requests` will fallback to
`'ISO-8859-1'`.
:type encoding: str
:param custom_http_adapter: instance of subclass of requests.adapters.HTTPAdapter
:param custom_https_adapter: instance of subclass of requests.adapters.HTTPAdapter
:param request_args: Arguments to be passed to requests when issuing HTTP
requests. Useful examples are `auth=('username', 'password')`
for basic auth-protected endpoints or `timeout=<int>`.
Expand All @@ -86,9 +97,12 @@ def __init__(self, endpoint,
iterator=OAIItemIterator,
max_retries=0,
retry_status_codes=None,
default_retry_after=60,
default_retry_after=None,
retry_backoff_factor=2,
class_mapping=None,
encoding=None,
custom_http_adapter=None,
custom_https_adapter=None,
**request_args):

self.endpoint = endpoint
Expand All @@ -104,14 +118,27 @@ def __init__(self, endpoint,
else:
raise TypeError(
"Argument 'iterator' must be subclass of %s" % BaseOAIIterator.__name__)
self.max_retries = max_retries
self.retry_status_codes = retry_status_codes or [503]
self.default_retry_after = default_retry_after

if default_retry_after is not None:
logger.warning("default_retry_after is no longer supported, please use retry_backoff_factor instead.")

max_retries = Retry(
total=max_retries,
backoff_factor=retry_backoff_factor,
status_forcelist=retry_status_codes or [429, 500, 502, 503, 504],
method_whitelist=frozenset(['GET', 'POST'])
)

self.session = requests.Session()

self.session.mount('https://', get_adapter(max_retries, custom_https_adapter))
self.session.mount('http://', get_adapter(max_retries, custom_http_adapter))

self.oai_namespace = OAI_NAMESPACE % self.protocol_version
self.class_mapping = class_mapping or DEFAULT_CLASS_MAP
self.encoding = encoding
self.request_args = request_args
self.session = requests.Session()
self.request_args = request_args


def harvest(self, **kwargs): # pragma: no cover
"""Make HTTP requests to the OAI server.
Expand All @@ -120,23 +147,17 @@ def harvest(self, **kwargs): # pragma: no cover
:rtype: :class:`sickle.OAIResponse`
"""
http_response = self._request(kwargs)
for _ in range(self.max_retries):
if self._is_error_code(http_response.status_code) \
and http_response.status_code in self.retry_status_codes:
retry_after = self.get_retry_after(http_response)
logger.warning(
"HTTP %d! Retrying after %d seconds..." % (http_response.status_code, retry_after))
time.sleep(retry_after)
http_response = self._request(kwargs)
http_response.raise_for_status()
if self.encoding:
http_response.encoding = self.encoding
return OAIResponse(http_response, params=kwargs)

def _request(self, kwargs):
if self.http_method == 'GET':
return self.session.get(self.endpoint, params=kwargs, **self.request_args)
return self.session.post(self.endpoint, data=kwargs, **self.request_args)
response = self.session.get(self.endpoint, params=kwargs, **self.request_args)
else:
response = self.session.post(self.endpoint, data=kwargs, **self.request_args)
response.raise_for_status()
return response

def ListRecords(self, ignore_deleted=False, **kwargs):
"""Issue a ListRecords request.
Expand Down
2 changes: 1 addition & 1 deletion sickle/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from lxml import etree

XMLParser = etree.XMLParser(remove_blank_text=True, recover=True, resolve_entities=False)
XMLParser = etree.XMLParser(remove_blank_text=True, huge_tree=True, recover=True, resolve_entities=False)


class OAIResponse(object):
Expand Down