-
Notifications
You must be signed in to change notification settings - Fork 3
/
osm_wikidata.py
49 lines (43 loc) · 1.36 KB
/
osm_wikidata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import json
from requests import get
from gzip import decompress
from rdflib import Graph, URIRef, Literal, Namespace
g = Graph()
if os.path.isfile("wikibase-rdf.ttl"):
print("Parsing local file")
g.parse("wikibase-rdf.ttl", format="turtle")
else:
print("Parsing remote file")
osm_wikibase = decompress(
get("https://wiki.openstreetmap.org/dump/wikibase-rdf.ttl.gz").content
)
g.parse(osm_wikibase, format="turtle")
wikibase = Namespace("http://wikiba.se/ontology#")
wd = Namespace("//wiki.openstreetmap.org/entity/")
wdt = Namespace("//wiki.openstreetmap.org/prop/direct/")
p = Namespace("//wiki.openstreetmap.org/prop/")
ps = Namespace("//wiki.openstreetmap.org/prop/statement/")
g.bind("wikibase", wikibase)
g.bind("wd", wd)
g.bind("wdt", wdt)
g.bind("p", p)
g.bind("ps", ps)
with open("tag2link.dataitem.sparql", "r", encoding="UTF-8") as query_sparql:
query = query_sparql.read()
results = []
for result in g.query(query):
d = result.asdict()
for k in d:
v = d[k]
if isinstance(v, Literal):
d[k] = v.value
elif isinstance(v, URIRef):
d[k] = v.toPython()
else:
raise Exception("Unknown type: " + str(type(v)))
results.append(d)
with open("wikidata.json", "w") as wikidata:
json.dump(results, wikidata)