-
-
Notifications
You must be signed in to change notification settings - Fork 409
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Novo spider base]: NucleoGov - Anápolis, GO #1147
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import json | ||
from datetime import datetime | ||
|
||
import scrapy | ||
from dateutil.rrule import DAILY, rrule | ||
|
||
from gazette.items import Gazette | ||
from gazette.spiders.base import BaseGazetteSpider | ||
|
||
|
||
class NucleoGovGazetteSpider(BaseGazetteSpider): | ||
def start_requests(self): | ||
days = rrule(freq=DAILY, dtstart=self.start_date, until=self.end_date) | ||
for day in days: | ||
yield scrapy.Request(self.url_base.format(day.strftime("%Y-%m-%d"))) | ||
|
||
def parse(self, response): | ||
data = json.loads(response.text) | ||
|
||
gazettes = data.get("data") | ||
for gazette in gazettes: | ||
gazette_urls = [] | ||
|
||
if gazette.get("media_legacy"): | ||
gazette_urls.append(gazette.get("media_legacy")) | ||
else: | ||
midias = gazette.get("midias") | ||
|
||
for midia in midias: | ||
gazette_urls.append(midia.get("url")) | ||
|
||
gazette_date = datetime.strptime(gazette.get("data"), "%Y-%m-%d") | ||
edition_number = gazette.get("numero") | ||
|
||
yield Gazette( | ||
date=gazette_date.date(), | ||
file_urls=gazette_urls, | ||
edition_number=edition_number, | ||
power="executive", | ||
is_extra_edition=False, | ||
) |
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,16 @@ | ||||||||||
import datetime as dt | ||||||||||
|
||||||||||
from gazette.spiders.base.nucleogov import NucleoGovGazetteSpider | ||||||||||
|
||||||||||
|
||||||||||
class GoAnapolisSpider(NucleoGovGazetteSpider): | ||||||||||
name = "go_anapolis" | ||||||||||
TERRITORY_ID = "5201108" | ||||||||||
allowed_domains = [ | ||||||||||
"dom.anapolis.go.gov.br", | ||||||||||
] | ||||||||||
url_base = ( | ||||||||||
"https://dom.anapolis.go.gov.br/api/diarios?data={}&calendar=true&situacao=2" | ||||||||||
) | ||||||||||
Comment on lines
+12
to
+14
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reflete aqui a mudança na classe base
Suggested change
|
||||||||||
|
||||||||||
start_date = dt.date(2010, 5, 31) |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,14 @@ | ||||||
import datetime as dt | ||||||
|
||||||
from gazette.spiders.base.nucleogov import NucleoGovGazetteSpider | ||||||
|
||||||
|
||||||
class GoValparaisoDeGoiasSpider(NucleoGovGazetteSpider): | ||||||
name = "go_valparaiso_de_goias" | ||||||
TERRITORY_ID = "5221858" | ||||||
allowed_domains = [ | ||||||
"diariooficial.valparaisodegoias.go.gov.br", | ||||||
] | ||||||
url_base = "https://diariooficial.valparaisodegoias.go.gov.br/api/diarios?data={}&calendar=true&situacao=2" | ||||||
|
||||||
Comment on lines
+12
to
+13
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reflete aqui a mudança na classe base
Suggested change
|
||||||
start_date = dt.date(2021, 2, 17) |
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,16 @@ | ||||||||||
import datetime as dt | ||||||||||
|
||||||||||
from gazette.spiders.base.nucleogov import NucleoGovGazetteSpider | ||||||||||
|
||||||||||
|
||||||||||
class ToCaririDoTocantinsSpider(NucleoGovGazetteSpider): | ||||||||||
name = "to_cariri_do_tocantins" | ||||||||||
TERRITORY_ID = "1703867" | ||||||||||
allowed_domains = [ | ||||||||||
"dom.cariri.to.gov.br", | ||||||||||
] | ||||||||||
url_base = ( | ||||||||||
"https://dom.cariri.to.gov.br/api/diarios?data={}&calendar=true&situacao=2" | ||||||||||
) | ||||||||||
Comment on lines
+12
to
+14
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reflete aqui a mudança na classe base
Suggested change
|
||||||||||
|
||||||||||
start_date = dt.date(2023, 1, 30) |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,14 @@ | ||||||
import datetime as dt | ||||||
|
||||||
from gazette.spiders.base.nucleogov import NucleoGovGazetteSpider | ||||||
|
||||||
|
||||||
class ToJauDoTocantinsSpider(NucleoGovGazetteSpider): | ||||||
name = "to_jau_do_tocantins" | ||||||
TERRITORY_ID = "1711506" | ||||||
allowed_domains = [ | ||||||
"diariooficial.jau.to.gov.br", | ||||||
] | ||||||
url_base = "https://diariooficial.jau.to.gov.br/api/diarios?data={}&calendar=true&situacao=2" | ||||||
|
||||||
Comment on lines
+12
to
+13
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reflete aqui a mudança na classe base
Suggested change
|
||||||
start_date = dt.date(2023, 5, 15) |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,14 @@ | ||||||
import datetime as dt | ||||||
|
||||||
from gazette.spiders.base.nucleogov import NucleoGovGazetteSpider | ||||||
|
||||||
|
||||||
class ToParanaSpider(NucleoGovGazetteSpider): | ||||||
name = "to_parana" | ||||||
TERRITORY_ID = "1716208" | ||||||
allowed_domains = [ | ||||||
"diariooficial.parana.to.gov.br", | ||||||
] | ||||||
url_base = "https://diariooficial.parana.to.gov.br/api/diarios?data={}&calendar=true&situacao=2" | ||||||
|
||||||
Comment on lines
+12
to
+13
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reflete aqui a mudança na classe base
Suggested change
|
||||||
start_date = dt.date(2023, 5, 8) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
como a string de acesso a API
/api/diarios?data={}&calendar=true&situacao=2"
é igual em todas as classes filhas, nós temos adotado o padrão de deixar essa construção da URL na classe mãe, escondendo isso da classe filha e deixando mais simples o raspador do município.