Skip to content

Commit

Permalink
Ajustar regex de coleta de metadado #1187 (#1188)
Browse files Browse the repository at this point in the history
  • Loading branch information
trevineju authored Jul 16, 2024
2 parents 48589c6 + c71c350 commit 7a87087
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion data_collection/gazette/spiders/base/adiarios_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,20 @@ def parse_page(self, response):
date = datetime.strptime(date, "%d/%m/%Y").date()

text = element.css("span strong::text").get()
edition_number = re.search(r":\s*(\d+).*/", text).group(1)

try:
edition_number = re.search(r":\s*(\d+).*/", text).group(1)
except AttributeError:
edition_number = ""

title = element.css("span::text").getall()[1]
is_extra_edition = bool(
re.search(
r"complementar|suplementar|extra|especial", title, re.IGNORECASE
)
or re.search(
r"complementar|suplementar|extra|especial", text, re.IGNORECASE
)
)
power = self.get_power(title)

Expand Down

0 comments on commit 7a87087

Please sign in to comment.