This repository has been archived by the owner on Aug 23, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cache.js
258 lines (224 loc) · 7.95 KB
/
cache.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
const fs = require('fs');
const glob = require('glob');
const path = require('path');
const { Pool } = require('pg');
const copyFrom = require('pg-copy-streams').from
const pool = new Pool({
host: 'localhost',
user: 'postgres',
database: 'oalogs'
});
if (!process.argv[2] || !process.argv[3]) {
console.error();
console.error('Usage: node cache.js <openaddresses path> <s3 inventory.csv>');
console.error();
console.error('Download s3 inventory from s3://logs.openaddresses.io/s3inventories/data.openaddresses.io/all-objects-in-data/data/');
console.error();
return;
}
main();
async function main() {
try {
//await logs();
//await logs_optimize();
await convert();
} catch(err) {
console.error(err);
}
}
async function logs() {
return new Promise((resolve, reject) => {
pool.connect(async (err, client, done) => {
if (err) return reject(err);
try {
await client.query(`
CREATE TABLE IF NOT EXISTS logs (
bucket TEXT,
file TEXT,
size BIGINT,
modified TIMESTAMP,
hash TEXT,
zone TEXT
);
`);
await client.query(`
CREATE TABLE IF NOT EXISTS parsed (
file TEXT,
run BIGINT,
source TEXT,
modified TIMESTAMP
);
`);
await client.query(`
DELETE FROM logs;
`);
await client.query(`
DELETE FROM parsed;
`);
await client.query(`
DROP INDEX IF EXISTS log_source;
`);
glob(path.resolve(process.argv[3], '*.csv'), {
nodir: false
}, async (err, files) => {
if (err) return reject(err);
for (const file of files) {
console.error(`ok - importing ${file}`);
try {
await single_log(client, file);
} catch (err) {
return reject(new Error(err));
}
}
client.end();
return resolve();
});
} catch (err) {
return reject(err);
}
});
});
}
async function single_log(client, file) {
return new Promise((resolve, reject) => {
const stream = client.query(copyFrom(`
COPY logs (
bucket,
file,
size,
modified,
hash,
zone
) FROM STDIN DELIMITER ',' CSV;
`));
const fileStream = fs.createReadStream(file)
fileStream.on('error', reject);
stream.on('error', reject);
stream.on('finish', () => {
return resolve();
});
fileStream.pipe(stream);
});
}
async function logs_optimize() {
return new Promise((resolve, reject) => {
console.error('ok - optimizing logs');
pool.connect(async (err, client, done) => {
if (err) return reject(err);
try {
console.error('ok - populating parsed');
await client.query(`
INSERT INTO parsed (
file,
run,
source,
modified
) (
SELECT
file,
regexp_replace(replace(file, 'runs/', ''), '/.*', '')::BIGINT,
replace(replace(regexp_replace(file, 'runs/([0-9]+)/', ''), '/', '-'), '.zip', '') AS source,
modified
FROM
logs
WHERE
file ~ 'runs/[0-9]+/.*zip'
AND file NOT LIKE 'runs/%/cache.zip'
)
`);
console.error('ok - creating source index');
await client.query(`
CREATE INDEX log_source
ON parsed (source);
`);
client.end()
console.error('ok - optimization complete');
return resolve();
} catch (err) {
return reject(err);
}
});
});
}
async function lookup(client, source) {
const pgres = await client.query(`
SELECT
*
FROM
parsed
WHERE
source = '${source}'
ORDER BY
modified DESC
LIMIT 1
`);
if (!pgres.rows[0]) return false;
return `http://data.openaddresses.io/${pgres.rows[0].file}`;
}
async function convert() {
return new Promise((resolve, reject) => {
console.error('ok - beginning conversion');
pool.connect((err, client, done) => {
if (err) return reject(err);
glob(path.resolve(process.argv[2], 'sources/**/*.json'), {
nodir: false
}, async (err, files) => {
if (err) throw err;
for (const file of files) {
const psd = JSON.parse(fs.readFileSync(file))
if (psd.schema === 2) continue;
if (psd.coverage.city) {
name = 'city';
} else if (psd.coverage.town) {
name = 'town';
} else if (psd.coverage.county) {
name = 'county';
} else if (psd.coverage.district) {
name = 'district';
} else if (psd.coverage.region) {
name = 'region';
} else if (psd.coverage.province) {
name = 'province';
} else if (psd.coverage.state) {
name = 'state';
} else if (psd.coverage.country) {
name = 'country';
}
psd.schema = 2;
psd.layers = {
addresses: [{
name: name
}]
}
for (const key of Object.keys(psd)) {
if (['coverage', 'layers', 'schema'].includes(key)) continue;
psd.layers.addresses[0][key] = psd[key];
delete psd[key];
}
const url = await lookup(client, path.parse(file.replace(/.*sources\//, '').replace(/\//g, '-')).name);
if (url) {
psd.layers.addresses[0].data = url;
psd.layers.addresses[0].protocol = 'http';
psd.layers.addresses[0].compression = 'zip';
psd.layers.addresses[0].conform = {
format: 'csv',
lon: 'LON',
lat: 'LAT',
number: 'NUMBER',
street: 'STREET',
unit: 'UNIT',
city: 'CITY',
district: 'DISTRICT',
region: 'REGION',
postcode: 'POSTCODE',
id: 'ID'
};
}
fs.writeFileSync(file, JSON.stringify(psd, null, 4));
}
client.end();
return resolve();
});
});
});
}