Skip to content

Commit

Permalink
Merge pull request #5075 from openstates/va-events-location-name-trun…
Browse files Browse the repository at this point in the history
…cate

VA: events fix DB import error on location name too long
  • Loading branch information
jessemortenson authored Nov 6, 2024
2 parents d3ebdf9 + 32ce8c6 commit d36adfb
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion scrapers/va/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import re


simple_html_tag_regex = re.compile("<.*?>")


class VaEventScraper(Scraper):
_tz = pytz.timezone("America/New_York")

Expand Down Expand Up @@ -43,7 +46,10 @@ def scrape(self, start_date=None):
if "RoomDescription" in row:
location = row["RoomDescription"]
else:
location = row["Description"]
# the Description property is kinda sloppy, it can have a little overlapping title
# and sometimes links to the agenda and livestream
# so need to strip: anything in HTML tags (location seems to never be bolded or in link)
location = re.sub(simple_html_tag_regex, "", row["Description"])[:200]

if location == "":
location = "See Agenda"
Expand Down

0 comments on commit d36adfb

Please sign in to comment.