Skip to content

Commit

Permalink
feat(geo-index): Refactor library to use quadtree index for geographi…
Browse files Browse the repository at this point in the history
…c lookup of tz data

This refactor aims to improve the speed and lower the memory usage of this library.  Before, all of

the world's tz data was loaded into memory and then every single tz was searched until there was a

match.  This refactor includes automatic generation of a quadtree index of the tz data although

still searches in much smaller files to determine the exact timezone if needed.
  • Loading branch information
evansiroky committed May 17, 2016
1 parent 0195838 commit fdaf32e
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 153 deletions.
121 changes: 32 additions & 89 deletions lib/createGeoIndex.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ var _ = require('lodash'),
mkdirp = require('mkdirp'),
polygon = require('turf-polygon')

var DATA_DIR = './data'

var geoJsonReader = new jsts.io.GeoJSONReader(),
geoJsonWriter = new jsts.io.GeoJSONWriter()

Expand All @@ -34,35 +32,8 @@ var intersection = function(a, b) {

var result = _a.intersection(_b)

try {
result = geoJsonWriter.write(result);
} catch(e) {
console.log('error interseting')
async.parallel([
function(cb) {
var filePath = DATA_DIR + '/err-a.json',
writeStream = fs.createWriteStream(filePath)

writeStream.end(JSON.stringify(a, null, 2), function(err) {
if(err) { throw err }
cb()
})
}, function(cb) {
var filePath = DATA_DIR + '/err-b.json',
writeStream = fs.createWriteStream(filePath)

writeStream.end(JSON.stringify(b, null, 2), function(err) {
if(err) { throw err }
cb()
})
}], function() {
throw e
})
result = geoJsonWriter.write(result);

return 'error'


}
if(result.type === 'GeometryCollection' && result.geometries.length === 0) {
return undefined
} else {
Expand All @@ -74,7 +45,7 @@ var intersection = function(a, b) {
}
}

module.exports = function(tzGeojson, callback) {
module.exports = function(tzGeojson, dataDir, callback) {

console.log('indexing')

Expand Down Expand Up @@ -129,7 +100,7 @@ module.exports = function(tzGeojson, callback) {
cb()
})
})
}, 2)
}, 10)

// create array and index lookup of timezone names
for(i = 0; i < tzGeojson.features.length; i++) {
Expand All @@ -142,29 +113,32 @@ module.exports = function(tzGeojson, callback) {
expectedAtLevel = 4,
curZones = [{
id: 'a',
bounds: [0, 0, 180, 90]
bounds: [0, 0, 179.9999, 89.9999]
}, {
id: 'b',
bounds: [-180, 0, 0, 90],
bounds: [-179.9999, 0, 0, 89.9999],
}, {
id: 'c',
bounds: [-180, -90, 0, 0],
bounds: [-179.9999, -89.9999, 0, 0],
}, {
id: 'd',
bounds: [0, -90, 180, 0]
}]
bounds: [0, -89.9999, 179.9999, 0]
}],
printMod

while(curPctIndexed < 0.90 && curZones.length < 40) {
while(curPctIndexed < 0.99) {
var nextZones = []

console.log('*********************************************')
console.log('level', curLevel, ' pct Indexed: ', curPctIndexed)
console.log('*********************************************')

for(i = 0; i < curZones.length; i++) {
printMod = Math.round(curZones.length / 5)

if(i % 1000 == 0) {
console.log('inspecting index area ', i + 1, ' of ', curZones.length)
for (i = curZones.length - 1; i >= 0; i--) {

if(i % printMod == 0) {
console.log('inspecting index area ', curZones.length - i, ' of ', curZones.length)
}

var curZone = curZones[i],
Expand All @@ -177,17 +151,6 @@ module.exports = function(tzGeojson, callback) {
[curBounds[0], curBounds[1]]
]]).geometry

var subZones = [],
debug = polygon([[
[curBounds[0], curBounds[1]],
[curBounds[0], curBounds[3]],
[curBounds[2], curBounds[3]],
[curBounds[2], curBounds[1]],
[curBounds[0], curBounds[1]]
]])

subZones.push(debug)

// calculate intersection with timezone boundaries
var timezonesToInspect = []

Expand All @@ -196,7 +159,7 @@ module.exports = function(tzGeojson, callback) {
timezonesToInspect = curZone.tzs
} else {
// first iteration, find all intersections in world
for (j = 0; j < tzGeojson.features.length; j++) {
for (var j = tzGeojson.features.length - 1; j >= 0; j--) {
timezonesToInspect.push(j)
}
}
Expand All @@ -205,13 +168,6 @@ module.exports = function(tzGeojson, callback) {
intersectedZones = result.intersectedZones,
foundExactMatch = result.foundExactMatch

/*for (j = 0; j < timezonesToInspect.length; j++) {
var zIntersect = intersection(tzGeojson.features[timezonesToInspect[j]].geometry, curBoundsGeoJson)
if(zIntersect) {
subZones.push(zIntersect)
}
}*/

var zoneResult = -1 // defaults to no zones found

// check the results
Expand Down Expand Up @@ -279,12 +235,6 @@ module.exports = function(tzGeojson, callback) {

_.set(data.lookup, curZone.id, zoneResult)

/*fileWritingQueue.push({
folder: DATA_DIR,
filename: 'level' + curLevel + '-sz-' + i + '.json',
data: featurecollection(subZones),
})*/

}

// recalculate pct indexed after this round
Expand All @@ -294,14 +244,18 @@ module.exports = function(tzGeojson, callback) {
curLevel++
}

console.log('*********************************************')
console.log('reached target index: ', curPctIndexed)
console.log('writing unindexable zone data')

var allSubZones = []
printMod = Math.round(curZones.length / 5)

// process remaining zones and write out individual geojson for each small region
for(i = 0; i < curZones.length; i++) {
for (i = curZones.length - 1; i >= 0; i--) {

console.log('writing zone data ', i ,'of', curZones.length)
if(i % printMod == 0) {
console.log('inspecting unindexable area ', curZones.length - i, ' of ', curZones.length)
}

var curZone = curZones[i],
curBounds = curZone.bounds,
Expand All @@ -312,15 +266,14 @@ module.exports = function(tzGeojson, callback) {
[curBounds[2], curBounds[1]],
[curBounds[0], curBounds[1]]
]]).geometry
errFound = false

console.log(curZone.id)
//console.log('writing zone data `', curZone.id, '`', i ,'of', curZones.length)

var result = inspectZones(curZone.tzs, curBoundsGeoJson),
intersectedZones = result.intersectedZones,
foundExactMatch = result.foundExactMatch

console.log('intersectedZones', intersectedZones.length, 'exact:', foundExactMatch)
//console.log('intersectedZones', intersectedZones.length, 'exact:', foundExactMatch)

var zoneResult = -1 // defaults to no zones found

Expand All @@ -334,27 +287,18 @@ module.exports = function(tzGeojson, callback) {
for (j = intersectedZones.length - 1; j >= 0; j--) {
var tzIdx = intersectedZones[j]

console.log('intersecting', tzGeojson.features[tzIdx].properties)
//console.log('intersecting', tzGeojson.features[tzIdx].properties)

var intersectedArea = intersection(tzGeojson.features[tzIdx].geometry, curBoundsGeoJson)

if(intersectedArea === 'error') {
errFound = true
break
if(intersectedArea) {
intersectedArea.properties.TZID = data.timezones[tzIdx]
features.push(intersectedArea)
}

intersectedArea.properties.TZID = data.timezones[tzIdx]
features.push(intersectedArea)
allSubZones.push(intersectedArea)
}

if(errFound) {
break
}

var areaGeoJson = featurecollection(features),
path = './data/' + curZone.id.replace(/\./g, '/')

path = dataDir + '/' + curZone.id.replace(/\./g, '/')

fileWritingQueue.push({ folder: path, filename: 'geo.json', data: areaGeoJson })

Expand All @@ -369,12 +313,11 @@ module.exports = function(tzGeojson, callback) {
console.log('writing index file')

fileWritingQueue.drain = function(err) {
console.log('drained')
console.log('done indexing')
callback(err)
}

// write index data to file
fileWritingQueue.push({ folder: DATA_DIR, filename: 'index.json', data: data })
fileWritingQueue.push({ folder: DATA_DIR, filename: 'finalSubzones.json', data: featurecollection(allSubZones) })
fileWritingQueue.push({ folder: dataDir, filename: 'index.json', data: data })

}
5 changes: 1 addition & 4 deletions lib/find.js
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ var getTimezone = function(lat, lon) {

// analyze result of current depth
if(curTzData === 'f') {
console.log('looking up tz from file', quadPos)
// exact boundaries saved in file
// parse geojson for exact boundaries
var filepath = quadPos.split('').join('/'),
Expand All @@ -92,13 +91,11 @@ var getTimezone = function(lat, lon) {
// not within subarea, therefore no valid timezone
return null
} else if(curTzData === -1) {
console.log('no timezone at index')
// no timezone at this gps location
return null
} else if(typeof curTzData === 'number') {
// exact match found
console.log('exact match at index')
return tzData.timezones[curTzData[nextQuad]]
return tzData.timezones[curTzData]
} else if(typeof curTzData !== 'object') {
// not another nested quad index, throw error
var err = new Error('Unexpected data type')
Expand Down
Loading

0 comments on commit fdaf32e

Please sign in to comment.