-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.js
executable file
·107 lines (94 loc) · 2.8 KB
/
main.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#! /usr/bin/env node
const flow = require('xml-flow')
const https = require('https')
const path = require('path')
const tar = require('tar')
const fs = require('fs')
const api =
process.argv.slice(2).shift() ||
'https://data.public.lu/api/1/datasets/letzebuerger-online-dictionnaire/'
const regex = /\.xml$/
const infos = { writeFail: [], countJson: 0 }
const hrstart = process.hrtime()
let jsonFolder
const getURLfromAPI = () => {
return new Promise((resolve, reject) => {
https
.get(api, (resp) => {
if (resp.statusCode !== 200)
return reject(new Error(resp.statusCode + ' : ' + resp.statusMessage))
let body = ''
resp.on('data', (data) => (body += data))
resp.on('end', () => {
try {
body = JSON.parse(body)
} catch (err) {
reject(err)
}
let [resources] = body.resources || []
if (resources && 'url' in resources) resolve(resources.url)
else reject(new Error('URL ressource not found'))
})
})
.on('error', (err) => reject(err))
})
}
const createFolders = (distFolder = 'dist') => {
distFolder = path.join(process.cwd(), distFolder)
jsonFolder = path.join(distFolder, 'json')
const folders = [distFolder, jsonFolder]
for (const folder of folders)
if (!fs.existsSync(folder)) fs.mkdirSync(folder, { recursive: true })
}
const extract = (url) =>
https.get(url, (resp) => {
console.info(`\nExtracting from : ${url}\n`)
resp.pipe(tar.t()).on('entry', (entry) => {
if (regex.test(entry.path)) parse(entry)
})
})
const parse = (entry) => {
console.info(`Parsing from : ${entry.path}\n`)
return flow(entry)
.on('tag:lod:item', (item) => {
const id = item['lod:meta']['lod:id']
printProgress(id)
writeJson(id, item)
})
.on('error', (err) => console.error(err))
.on('end', feedBack)
}
const writeJson = (id, item) => {
const filename = `${id}.json`
const jsonPath = path.join(jsonFolder, filename)
try {
fs.writeFileSync(jsonPath, JSON.stringify(item, null, 2))
infos.countJson++
} catch (err) {
infos.writeFail.push(filename)
}
}
const printProgress = (progress) => {
process.stdout.clearLine()
process.stdout.cursorTo(0)
process.stdout.write(progress)
}
const feedBack = () => {
const hrend = process.hrtime(hrstart)
const time = new Date(hrend[0] * 1000).toISOString().substr(11, 8)
process.stdout.cursorTo(0)
process.stdout.clearLine()
console.info('⦿ Execution time : %s', time)
console.info('√ Json files : %s', infos.countJson, '\n')
process.exit()
}
const main = () => {
process.on('SIGINT', feedBack)
getURLfromAPI()
.then((url) => {
createFolders(path.basename(url).replace('.tar.gz', ''))
extract(url)
})
.catch(console.error)
}
main()