-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit e3ed000
Showing
11 changed files
with
3,792 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
|
||
# Created by https://www.gitignore.io/api/node,macos | ||
# Edit at https://www.gitignore.io/?templates=node,macos | ||
|
||
### macOS ### | ||
# General | ||
.DS_Store | ||
.AppleDouble | ||
.LSOverride | ||
|
||
# Icon must end with two \r | ||
Icon | ||
|
||
# Thumbnails | ||
._* | ||
|
||
# Files that might appear in the root of a volume | ||
.DocumentRevisions-V100 | ||
.fseventsd | ||
.Spotlight-V100 | ||
.TemporaryItems | ||
.Trashes | ||
.VolumeIcon.icns | ||
.com.apple.timemachine.donotpresent | ||
|
||
# Directories potentially created on remote AFP share | ||
.AppleDB | ||
.AppleDesktop | ||
Network Trash Folder | ||
Temporary Items | ||
.apdisk | ||
|
||
### Node ### | ||
# Logs | ||
logs | ||
*.log | ||
npm-debug.log* | ||
yarn-debug.log* | ||
yarn-error.log* | ||
lerna-debug.log* | ||
|
||
# Diagnostic reports (https://nodejs.org/api/report.html) | ||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json | ||
|
||
# Runtime data | ||
pids | ||
*.pid | ||
*.seed | ||
*.pid.lock | ||
|
||
# Directory for instrumented libs generated by jscoverage/JSCover | ||
lib-cov | ||
|
||
# Coverage directory used by tools like istanbul | ||
coverage | ||
|
||
# nyc test coverage | ||
.nyc_output | ||
|
||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) | ||
.grunt | ||
|
||
# Bower dependency directory (https://bower.io/) | ||
bower_components | ||
|
||
# node-waf configuration | ||
.lock-wscript | ||
|
||
# Compiled binary addons (https://nodejs.org/api/addons.html) | ||
build/Release | ||
|
||
# Dependency directories | ||
node_modules/ | ||
jspm_packages/ | ||
|
||
# TypeScript v1 declaration files | ||
typings/ | ||
|
||
# Optional npm cache directory | ||
.npm | ||
|
||
# Optional eslint cache | ||
.eslintcache | ||
|
||
# Optional REPL history | ||
.node_repl_history | ||
|
||
# Output of 'npm pack' | ||
*.tgz | ||
|
||
# Yarn Integrity file | ||
.yarn-integrity | ||
|
||
# dotenv environment variables file | ||
.env | ||
.env.test | ||
|
||
# parcel-bundler cache (https://parceljs.org/) | ||
.cache | ||
|
||
# next.js build output | ||
.next | ||
|
||
# nuxt.js build output | ||
.nuxt | ||
|
||
# vuepress build output | ||
.vuepress/dist | ||
|
||
# Serverless directories | ||
.serverless/ | ||
|
||
# FuseBox cache | ||
.fusebox/ | ||
|
||
# DynamoDB Local files | ||
.dynamodb/ | ||
|
||
# End of https://www.gitignore.io/api/node,macos | ||
|
||
/data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2019 Etalab | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#!/usr/bin/env node --max-old-space-size=4096 | ||
/* eslint camelcase: off */ | ||
|
||
const {join} = require('path') | ||
const {createReadStream} = require('fs') | ||
const {Transform} = require('stream') | ||
const bluebird = require('bluebird') | ||
const {ensureDir} = require('fs-extra') | ||
const csvParser = require('csv-parser') | ||
const {createGunzip} = require('gunzip-stream') | ||
const {groupBy} = require('lodash') | ||
const pumpify = require('pumpify').obj | ||
const getStream = require('get-stream') | ||
const {writeCsv} = require('./lib/csv') | ||
const {getCulturesMap, getCulturesSpecialesMap} = require('./lib/cultures') | ||
const {getDateMutation, getIdParcelle, getCodeCommune, getCodePostal} = require('./lib/parse') | ||
const {getCodeDepartement} = require('./lib/util') | ||
|
||
function convertRow(row, {culturesMap, culturesSpecialesMap}) { | ||
const codeCommune = getCodeCommune(row) | ||
|
||
return { | ||
date_mutation: getDateMutation(row), | ||
nature_mutation: row['Nature mutation'], | ||
valeur_fonciere: Number.parseFloat(row['Valeur fonciere'].replace(',', '.')) || '', | ||
adresse_numero: row['No voie'], | ||
adresse_suffixe: row['B/T/Q'], | ||
adresse_nom_voie: [row['Type de voie'], row.Voie].filter(Boolean).join(' '), | ||
adresse_code_voie: row['Code voie'], | ||
code_postal: getCodePostal(row) || '', | ||
code_commune: codeCommune, | ||
nom_commune: row.Commune, | ||
code_departement: getCodeDepartement(codeCommune), | ||
id_parcelle: getIdParcelle(row), | ||
numero_volume: row['No Volume'], | ||
lot1_numero: row['1er lot'], | ||
lot1_surface_carrez: row['Surface Carrez du 1er lot'], | ||
lot2_numero: row['2e lot'], | ||
lot2_surface_carrez: row['Surface Carrez du 2e lot'], | ||
lot3_numero: row['3e lot'], | ||
lot3_surface_carrez: row['Surface Carrez du 3e lot'], | ||
lot4_numero: row['4e lot'], | ||
lot4_surface_carrez: row['Surface Carrez du 4e lot'], | ||
lot5_numero: row['5e lot'], | ||
lot5_surface_carrez: row['Surface Carrez du 5e lot'], | ||
nombre_lots: row['Nombre de lots'], | ||
code_type_local: row['Code type local'], | ||
type_local: row['Type local'], | ||
surface_reelle_bati: row['Surface reelle bati'], | ||
nombre_pieces_principales: row['Nombre pieces principales'], | ||
code_nature_culture: row['Nature culture'], | ||
nature_culture: row['Nature culture'] in culturesMap ? culturesMap[row['Nature culture']] : '', | ||
code_nature_culture_speciale: row['Nature culture speciale'], | ||
nature_culture_speciale: row['Nature culture speciale'] in culturesSpecialesMap ? culturesSpecialesMap[row['Nature culture speciale']] : '', | ||
surface_terrain: row['Surface terrain'] | ||
} | ||
} | ||
|
||
const millesimes = ['2018', '2017', '2016', '2015', '2014'] | ||
|
||
async function main() { | ||
const culturesMap = await getCulturesMap() | ||
const culturesSpecialesMap = await getCulturesSpecialesMap() | ||
|
||
await bluebird.each(millesimes, async millesime => { | ||
const rows = await getStream.array(pumpify( | ||
createReadStream(join(__dirname, 'data', `valeursfoncieres-${millesime}.txt.gz`)), | ||
createGunzip(), | ||
csvParser({separator: '|'}), | ||
new Transform({objectMode: true, transform(row, enc, cb) { | ||
cb(null, convertRow(row, {culturesMap, culturesSpecialesMap})) | ||
}}) | ||
)) | ||
|
||
const communesGroupedRows = groupBy(rows, 'code_commune') | ||
|
||
await bluebird.map(Object.keys(communesGroupedRows), async codeCommune => { | ||
console.log(codeCommune) | ||
const communeRows = communesGroupedRows[codeCommune] | ||
const codeDepartement = getCodeDepartement(codeCommune) | ||
const departementPath = join(__dirname, 'dist', millesime, 'communes', codeDepartement) | ||
await ensureDir(departementPath) | ||
await writeCsv(join(departementPath, `${codeCommune}.csv`), communeRows) | ||
}, {concurrency: 8}) | ||
|
||
const departementsGroupedRows = groupBy(rows, 'code_departement') | ||
|
||
await bluebird.map(Object.keys(departementsGroupedRows), async codeDepartement => { | ||
console.log(codeDepartement) | ||
const departementRows = departementsGroupedRows[codeDepartement] | ||
const departementsPath = join(__dirname, 'dist', millesime, 'departements') | ||
await ensureDir(departementsPath) | ||
await writeCsv(join(departementsPath, `${codeDepartement}.csv.gz`), departementRows) | ||
}, {concurrency: 8}) | ||
}) | ||
} | ||
|
||
main().catch(error => { | ||
console.error(error) | ||
process.exit(1) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
const {promisify} = require('util') | ||
const finished = promisify(require('stream').finished) | ||
const {PassThrough} = require('stream') | ||
const {createGzip} = require('zlib') | ||
const {createReadStream, createWriteStream} = require('fs') | ||
const csvParser = require('csv-parser') | ||
const csvWriter = require('csv-write-stream') | ||
const getStream = require('get-stream') | ||
const intoStream = require('into-stream') | ||
const pumpify = require('pumpify') | ||
|
||
function readCsv(filePath, options = {}) { | ||
return getStream.array(pumpify.obj( | ||
createReadStream(filePath), | ||
csvParser({separator: options.separator || ','}) | ||
)) | ||
} | ||
|
||
function writeCsv(filePath, rows, options = {}) { | ||
return finished(pumpify( | ||
intoStream.object(rows), | ||
csvWriter({separator: options.separator || ','}), | ||
filePath.endsWith('.gz') ? createGzip() : new PassThrough(), | ||
createWriteStream(filePath) | ||
)) | ||
} | ||
|
||
module.exports = {readCsv, writeCsv} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
const {join} = require('path') | ||
const {readCsv} = require('./csv') | ||
|
||
async function getCulturesMap() { | ||
const rows = await readCsv(join(__dirname, '..', 'table-cultures.csv')) | ||
return rows.slice(1).reduce((acc, item) => { | ||
acc[item['Code Nature de Culture']] = item['Libellé Nature de Culture'] | ||
return acc | ||
}, {}) | ||
} | ||
|
||
async function getCulturesSpecialesMap() { | ||
const rows = await readCsv(join(__dirname, '..', 'table-cultures-speciales.csv')) | ||
return rows.slice(1).reduce((acc, item) => { | ||
acc[item['Code Nature Culture Spéciale']] = item['Libellé Nature Culture Spéciale'] | ||
return acc | ||
}, {}) | ||
} | ||
|
||
module.exports = {getCulturesMap, getCulturesSpecialesMap} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
function getCodeCommune(row) { | ||
const rowCodeDepartement = row['Code departement'] | ||
const rowCodeCommune = row['Code commune'] | ||
const codeDepartement = rowCodeDepartement.startsWith('97') ? rowCodeDepartement : rowCodeDepartement.padStart(2, '0') | ||
const codeCommuneSeule = rowCodeDepartement.startsWith('97') ? rowCodeCommune.padStart(2, '0') : rowCodeCommune.padStart(3, '0') | ||
return codeDepartement + codeCommuneSeule | ||
} | ||
|
||
function getDateMutation(row) { | ||
const rawDateMutation = row['Date mutation'] | ||
return `${rawDateMutation.substr(6, 4)}-${rawDateMutation.substr(3, 2)}-${rawDateMutation.substr(0, 2)}` | ||
} | ||
|
||
function getIdParcelle(row) { | ||
const codeCommune = getCodeCommune(row) | ||
const codePrefixeSection = row['Prefixe de section'] ? row['Prefixe de section'].padStart(3, '0') : '000' | ||
const codeSection = row['Section'].padStart(2, '0') | ||
const numeroParcelle = row['No plan'].padStart(4, '0') | ||
return codeCommune + codePrefixeSection + codeSection + numeroParcelle | ||
} | ||
|
||
function getCodePostal(row) { | ||
if (row['Code postal']) { | ||
return row['Code postal'].padStart(5, '0') | ||
} | ||
} | ||
|
||
module.exports = {getDateMutation, getCodeCommune, getIdParcelle, getCodePostal} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
function getCodeDepartement(codeCommune) { | ||
return codeCommune.startsWith('97') ? codeCommune.substr(0, 3) : codeCommune.substr(0, 2) | ||
} | ||
|
||
module.exports = {getCodeDepartement} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
{ | ||
"name": "@etalab/dvf", | ||
"version": "0.0.0", | ||
"description": "Scripts permettant de manipuler les données DVF", | ||
"main": "index.js", | ||
"repository": "https://github.com/etalab/dvf", | ||
"author": "Jérôme Desboeufs <[email protected]>", | ||
"license": "MIT", | ||
"private": false, | ||
"scripts": { | ||
"lint": "xo", | ||
"improve-csv": "node --max-old-space-size=4096 improve-csv" | ||
}, | ||
"dependencies": { | ||
"bluebird": "^3.5.4", | ||
"csv-parser": "^2.2.0", | ||
"csv-write-stream": "^2.0.0", | ||
"fs-extra": "^7.0.1", | ||
"get-stream": "^5.1.0", | ||
"gunzip-stream": "^1.0.1", | ||
"into-stream": "^5.1.0", | ||
"lodash": "^4.17.11", | ||
"pumpify": "^1.5.1" | ||
}, | ||
"devDependencies": { | ||
"xo": "^0.24.0" | ||
}, | ||
"xo": { | ||
"semicolon": false, | ||
"space": "2" | ||
}, | ||
"engines": { | ||
"node": ">= 10" | ||
} | ||
} |
Oops, something went wrong.