Skip to content

Commit 29b69e7

Browse files
authored
Corrige modificação do banco em atualização de raspadores (#1151)
#### Descrição Ao modificar o `TERRITORY_ID` ou o `start_date` de um raspador, um erro ocorria pois o raspador era detectado como um novo raspador e a nova entrada no banco era bloqueada pelo nome como chave primária. Agora, atualizações em `TERRITORY_ID` e `start_date` podem ser realizadas e os campos serão atualizados no banco sem a tentativa de inserção de novo registro.
2 parents e67c439 + 7e3849d commit 29b69e7

File tree

1 file changed

+7
-9
lines changed

1 file changed

+7
-9
lines changed

data_collection/gazette/database/models.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,28 +49,28 @@ def load_territories(engine):
4949
logger.info("Populating 'territories' table - Done!")
5050

5151

52-
def get_new_spiders(session, territory_spider_map):
52+
def get_new_or_modified_spiders(session, territory_spider_map):
5353
registered_spiders = session.query(QueridoDiarioSpider).all()
5454
registered_spiders_set = {
5555
(spider.spider_name, territory.id, spider.date_from)
5656
for spider in registered_spiders
5757
for territory in spider.territories
5858
}
59-
only_new_spiders = [
59+
only_new_or_modified_spiders = [
6060
spider_info
6161
for spider_info in territory_spider_map
6262
if spider_info not in registered_spiders_set
6363
]
64-
return only_new_spiders
64+
return only_new_or_modified_spiders
6565

6666

6767
def load_spiders(engine, territory_spider_map):
6868
Session = sessionmaker(bind=engine)
6969
session = Session()
7070

7171
table_is_populated = session.query(QueridoDiarioSpider).count() > 0
72-
new_spiders = (
73-
get_new_spiders(session, territory_spider_map)
72+
spiders_to_persist = (
73+
get_new_or_modified_spiders(session, territory_spider_map)
7474
if table_is_populated
7575
else territory_spider_map
7676
)
@@ -80,20 +80,18 @@ def load_spiders(engine, territory_spider_map):
8080
territories = session.query(Territory).all()
8181
territory_map = {t.id: t for t in territories}
8282

83-
spiders = []
84-
for info in new_spiders:
83+
for info in spiders_to_persist:
8584
spider_name, territory_id, date_from = info
8685
territory = territory_map.get(territory_id)
8786
if territory is not None:
88-
spiders.append(
87+
session.merge(
8988
QueridoDiarioSpider(
9089
spider_name=spider_name,
9190
date_from=date_from,
9291
territories=[territory],
9392
)
9493
)
9594

96-
session.add_all(spiders)
9795
session.commit()
9896
logger.info("Populating 'querido_diario_spider' table - Done!")
9997

0 commit comments

Comments
 (0)