Skip to content

Commit dab59d5

Browse files
committed
replace '_' and ' ' when matching wikipedia article names
1 parent 28a5ef8 commit dab59d5

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

wikidata/import.sh

+5-1
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,9 @@ $PSQL -c "alter table entity add column description_en text"
2020
$PSQL -c "update entity set description_en = description from entity_description where entity.entity_id = entity_description.entity_id and language = 'en'"
2121

2222
cat totals.txt | $PSQL -c "COPY import_link_hit from STDIN WITH CSV DELIMITER ' '"
23-
$PSQL -c "insert into link_hit select target||'wiki', catch_decode_url_part(value), sum(hits) from import_link_hit group by target||'wiki', catch_decode_url_part(value)"
23+
$PSQL -c "truncate link_hit"
24+
$PSQL -c "insert into link_hit select target||'wiki', replace(catch_decode_url_part(value), '_', ' '), sum(hits) from import_link_hit where replace(catch_decode_url_part(value), '_', ' ') is not null group by target||'wiki', replace(dcatch_decode_url_part(value), '_', ' ')"
25+
$PSQL -c "truncate entity_link_hit"
2426
$PSQL -c "insert into entity_link_hit select entity_id, target, value, coalesce(hits,0) from entity_link left outer join link_hit using (target, value)"
27+
$PSQL -c "create table entity_hit as select entity_id,sum(hits) as hits from entity_link_hit group by entity_id"
28+
$PSQL -c "create unique index idx_entity_hit on entity_hit using btree (entity_id)"

0 commit comments

Comments
 (0)