Skip to content

Commit

Permalink
feat(default labels): Add region to label if different from city
Browse files Browse the repository at this point in the history
This adds the region to the default labels, but only if the _region
name_ is different from the _city name_ (defined as locality or
localadmin name).

The intent is to handle major world cities like Berlin, Sao Paulo,
Paris, etc that are contained within an administrative region of the
same name, and are so well known that they do not require any additional
specifiers.

Differences such as capitalization and accents are ignored for
comparison purposes, since they often exist in real data.

In the more common case where the region and city names are different,
the region abbreviation is preferred, with the region name being
returned only if the abbreviaton is not available.
  • Loading branch information
orangejulius committed Feb 9, 2021
1 parent 546734a commit 85c36e4
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 8 deletions.
40 changes: 40 additions & 0 deletions labelSchema.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
var _ = require('lodash');
const removeAccents = require('remove-accents');

// lowercase characters and remove some punctuation
function normalizeString(str){
if (!str) {
return '';
}
return removeAccents(str.toLowerCase().split(/[ ,-]+/).join(' '));
}

// French Guiana, Guadeloupe, Martinique, Reunion, Mayotte
const FRA_OVERSEAS = ['GF', 'GP', 'MQ', 'RE', 'YT'];
Expand Down Expand Up @@ -41,9 +50,39 @@ function getRegionalValue(record) {
} else if (!_.isEmpty(record.region)) {
// return the full name when there's no region code available
return record.region[0];
}
}

// The same as getRegionalValue above, but only returns a region if the region name
// is distinct from the locality/localadmin/city name
// This works best for large cities in countries where the region name/abbr is not _always_ included in the label
function getUniqueRegionalValue(record) {
if (!_.isEmpty(record.dependency) || !_.isEmpty(record.dependency_a)) {
return;
}

// handle the region value where this record itself is a region
if ('region' === record.layer) {
if (!_.isEmpty(record.region)) {
// return full state name when state is the most granular piece of info
return record.region[0];
}
} else {
const localityValue = getFirstProperty(['locality', 'localadmin'])(record);

if (record.region && normalizeString(localityValue) === normalizeString(record.region[0])) {
// skip returning anything when the region and locality name are identical
// This handles major cities in their own region like Berlin, Tokyo, Paris, Sao Paulo, etc
return;
}

// prefer the region abbreviation, fall back to the region name if no abbreviation
if (!_.isEmpty(record.region_a)) {
return record.region_a[0];
} else if (!_.isEmpty(record.region)) {
return record.region[0];
}
}
}

// this function generates the last field of the labels for US records
Expand Down Expand Up @@ -150,6 +189,7 @@ module.exports = {
'default': {
'valueFunctions': {
'local': getFirstProperty(['locality', 'localadmin']),
'regional': getUniqueRegionalValue,
'country': getFirstProperty(['dependency', 'country'])
}
},
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
"url": "https://github.com/pelias/labels.git"
},
"dependencies": {
"lodash": "^4.16.4"
"lodash": "^4.16.4",
"remove-accents": "^0.4.2"
},
"devDependencies": {
"difflet": "^1.0.1",
Expand Down
59 changes: 52 additions & 7 deletions test/labelGenerator_default.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ module.exports.tests.default_country = function(test, common) {
'county': ['county name'],
'macrocounty': ['macrocounty name'],
'region': ['region name'],
'region_a': ['region abbrv'],
'macroregion': ['macroregion name'],
'country_a': ['country code'],
'country': ['country name']
};
t.equal(generator(doc),'venue name, locality name, country name');
t.equal(generator(doc),'venue name, locality name, region abbrv, country name');
t.end();
});

Expand All @@ -43,11 +44,12 @@ module.exports.tests.default_country = function(test, common) {
'county': ['county name'],
'macrocounty': ['macrocounty name'],
'region': ['region name'],
'region_a': ['region abbrv'],
'macroregion': ['macroregion name'],
'country_a': ['country code'],
'country': ['country name']
};
t.equal(generator(doc),'venue name, localadmin name, country name');
t.equal(generator(doc),'venue name, localadmin name, region abbrv, country name');
t.end();
});

Expand All @@ -63,11 +65,12 @@ module.exports.tests.default_country = function(test, common) {
'county': ['county name'],
'macrocounty': ['macrocounty name'],
'region': ['region name'],
'region_a': ['region abbrv'],
'macroregion': ['macroregion name'],
'country_a': ['country code'],
'country': ['country name']
};
t.equal(generator(doc),'house number street name, locality name, country name');
t.equal(generator(doc),'house number street name, locality name, region abbrv, country name');
t.end();
});

Expand All @@ -81,11 +84,12 @@ module.exports.tests.default_country = function(test, common) {
'county': ['county name'],
'macrocounty': ['macrocounty name'],
'region': ['region name'],
'region_a': ['region abbrv'],
'macroregion': ['macroregion name'],
'country_a': ['country code'],
'country': ['country name']
};
t.equal(generator(doc),'neighbourhood name, locality name, country name');
t.equal(generator(doc),'neighbourhood name, locality name, region abbrv, country name');
t.end();
});

Expand All @@ -98,6 +102,43 @@ module.exports.tests.default_country = function(test, common) {
'county': ['county name'],
'macrocounty': ['macrocounty name'],
'region': ['region name'],
'region_a': ['region abbrv'],
'macroregion': ['macroregion name'],
'country_a': ['country code'],
'country': ['country name']
};
t.equal(generator(doc),'locality name, region abbrv, country name');
t.end();
});

test('locality for a major city with a region of the same name', function(t) {
var doc = {
'name': { 'default': 'locality name' },
'layer': 'locality',
'locality': ['locality name'],
'localadmin': ['localadmin name'],
'county': ['county name'],
'macrocounty': ['macrocounty name'],
'region': ['locality name'], // same as locality
'region_a': ['region abbrv'],
'macroregion': ['macroregion name'],
'country_a': ['country code'],
'country': ['country name']
};
t.equal(generator(doc),'locality name, country name');
t.end();
});

test('locality for a major city with a region of the same name, minor formatting differences', function(t) {
var doc = {
'name': { 'default': 'locality name' },
'layer': 'locality',
'locality': ['locality name'],
'localadmin': ['localadmin name'],
'county': ['county name'],
'macrocounty': ['macrocounty name'],
'region': ['Locality nÁme'], // same as locality
'region_a': ['region abbrv'],
'macroregion': ['macroregion name'],
'country_a': ['country code'],
'country': ['country name']
Expand All @@ -114,11 +155,12 @@ module.exports.tests.default_country = function(test, common) {
'county': ['county name'],
'macrocounty': ['macrocounty name'],
'region': ['region name'],
'region_a': ['region abbrv'],
'macroregion': ['macroregion name'],
'country_a': ['country code'],
'country': ['country name']
};
t.equal(generator(doc),'localadmin name, country name');
t.equal(generator(doc),'localadmin name, region abbrv, country name');
t.end();
});

Expand All @@ -129,11 +171,12 @@ module.exports.tests.default_country = function(test, common) {
'county': ['county name'],
'macrocounty': ['macrocounty name'],
'region': ['region name'],
'region_a': ['region abbrv'],
'macroregion': ['macroregion name'],
'country_a': ['country code'],
'country': ['country name']
};
t.equal(generator(doc),'county name, country name');
t.equal(generator(doc),'county name, region abbrv, country name');
t.end();
});

Expand All @@ -143,11 +186,12 @@ module.exports.tests.default_country = function(test, common) {
'layer': 'macrocounty',
'macrocounty': ['macrocounty name'],
'region': ['region name'],
'region_a': ['region abbrv'],
'macroregion': ['macroregion name'],
'country_a': ['country code'],
'country': ['country name']
};
t.equal(generator(doc),'macrocounty name, country name');
t.equal(generator(doc),'macrocounty name, region abbrv, country name');
t.end();
});

Expand All @@ -156,6 +200,7 @@ module.exports.tests.default_country = function(test, common) {
'name': { 'default': 'region name' },
'layer': 'region',
'region': ['region name'],
'region_a': ['region abbrv'],
'macroregion': ['macroregion name'],
'country_a': ['country code'],
'country': ['country name']
Expand Down
42 changes: 42 additions & 0 deletions test/labelGenerator_examples.js
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,48 @@ module.exports.tests.france = function(test, common) {
t.end();
});

test('São Paulo, major city in Brazil', function(t) {
const doc = {
'name': { 'default': 'São Paulo'},
'layer': 'locality',
'locality': ['São Paulo'],
'region': ['São Paulo'],
'region_a': ['SP'],
'country_a': ['BRA'],
'country': ['Brazil']
};
t.equal(generator(doc),'São Paulo, Brazil');
t.end();
});

test('São Paulo, major city in Brazil. Language set to English', function(t) {
const doc = {
'name': { 'default': 'São Paulo'},
'layer': 'locality',
'locality': ['São Paulo'],
'region': ['Sao Paulo'],
'region_a': ['SP'],
'country_a': ['BRA'],
'country': ['Brazil']
};
t.equal(generator(doc),'São Paulo, Brazil');
t.end();
});

test('São Paulo, Amazonas - small village in Brazil', function(t) {
const doc = {
'name': { 'default': 'São Paulo'},
'layer': 'locality',
'locality': ['São Paulo'],
'region': ['Amazonas'],
'region_a': ['AM'],
'country_a': ['BRA'],
'country': ['Brazil']
};
t.equal(generator(doc),'São Paulo, AM, Brazil');
t.end();
});

};
module.exports.tests.italy = function(test, common) {
test('Italian street address', function(t) {
Expand Down

0 comments on commit 85c36e4

Please sign in to comment.