Skip to content

Commit

Permalink
Adiciona documentação específica a diof.py
Browse files Browse the repository at this point in the history
  • Loading branch information
trevineju committed Jan 29, 2025
1 parent 889c99e commit 622e5c9
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions data_collection/gazette/spiders/base/diof.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@

class BaseDiofSpider(BaseGazetteSpider):
"""
Base Spider for all cases with use DIOF/SAI service
Base Spider for all cases that use DIOF/SAI service
The spider
Attributes
----------
Expand All @@ -27,10 +29,8 @@ class BaseDiofSpider(BaseGazetteSpider):
- https://sai.io.org.br/ba/abare/site/diariooficial
"""

custom_settings = {"DOWNLOAD_DELAY": 1}
handle_httpstatus_list = [404]

api_url = "https://diof.io.org.br/api"
handle_httpstatus_list = [404]

def start_requests(self):
self._set_allowed_domains()
Expand Down Expand Up @@ -85,6 +85,13 @@ def interval_request(self, response):
)

def parse_items(self, response):
"""
The SAI service appears to be migrating its backend to consume a DIOF API,
but some gazettes are only collectible through the old URL. So, this method
checks whether the document exists in the new URL and, if not, collects
it using the old URL.
"""

for gazette_date in json.loads(response.text):
for gazette in gazette_date["elements"]:
date = gazette["dat_envio"]
Expand Down

0 comments on commit 622e5c9

Please sign in to comment.