diff --git a/data_collection/gazette/spiders/base/diof.py b/data_collection/gazette/spiders/base/diof.py index f2f753155..2abacde37 100644 --- a/data_collection/gazette/spiders/base/diof.py +++ b/data_collection/gazette/spiders/base/diof.py @@ -15,7 +15,7 @@ class BaseDiofSpider(BaseGazetteSpider): """ - Base Spider for all cases with use DIOF/SAI service + Base Spider for all cases that use DIOF/SAI service Attributes ---------- @@ -27,7 +27,7 @@ class BaseDiofSpider(BaseGazetteSpider): - https://sai.io.org.br/ba/abare/site/diariooficial """ - custom_settings = {"DOWNLOAD_DELAY": 1} + custom_settings = {"DOWNLOAD_DELAY": 0.5} handle_httpstatus_list = [404] api_url = "https://diof.io.org.br/api" @@ -85,6 +85,13 @@ def interval_request(self, response): ) def parse_items(self, response): + """ + The SAI service appears to be migrating its backend to consume a DIOF API, + but some gazettes are only collectible through the old URL. So, this method + checks whether the document exists in the new URL and, if not, collects + it using the old URL. + """ + for gazette_date in json.loads(response.text): for gazette in gazette_date["elements"]: date = gazette["dat_envio"]