-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathwikidata-backend-adapter.js
144 lines (121 loc) · 4.76 KB
/
wikidata-backend-adapter.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
'use strict';
// External deps
const request = require('request-promise-native');
const config = require('config');
const escapeHTML = require('escape-html');
const debug = require('../util/debug');
// Internal deps
const AbstractBackendAdapter = require('./abstract-backend-adapter');
const languages = require('../locales/languages');
// How do lib.reviews language code translate to Wikidata language codes?
// Since Wikidata supports a superset of languages and most language codes
// are identical, we only enumerate exceptions.
const nativeToWikidata = {
pt: 'pt-br',
'pt-PT': 'pt'
};
const apiBaseURL = 'https://www.wikidata.org/w/api.php';
class WikidataBackendAdapter extends AbstractBackendAdapter {
constructor() {
super();
this.supportedPattern = new RegExp('^http(s)*://(www.)*wikidata.org/(entity|wiki)/(Q\\d+)(?:#.*)?$', 'i');
this.supportedFields = ['label', 'description'];
this.sourceID = 'wikidata';
this.sourceURL = 'https://www.wikidata.org/';
}
async lookup(url) {
let qNumber = (url.match(this.supportedPattern) || [])[4];
if (!qNumber)
throw new Error('URL does not appear to contain a Q number (e.g., Q42) or is not a Wikidata URL.');
// in case the URL had a lower case "q"
qNumber = qNumber.toUpperCase();
// Not we don't specify fallback, so we won't get results for languages
// that don't have content
const options = {
uri: apiBaseURL,
qs: {
action: 'wbgetentities',
format: 'json',
languages: this.getAcceptedWikidataLanguageList(),
props: 'labels|descriptions',
ids: qNumber
},
headers: {
'User-Agent': config.adapterUserAgent
},
json: true,
timeout: config.adapterTimeout
};
const data = await request(options);
debug.adapters('Received data from Wikidata adapter:\n' + JSON.stringify(data, null, 2));
if (typeof data !== 'object' || !data.success || !data.entities || !data.entities[qNumber])
throw new Error('Did not get a valid Wikidata entity for query: ' + qNumber);
const entity = data.entities[qNumber];
// Descriptions result will be an empty object if no description is available, so
// will always pass this test
if (!entity.labels || !entity.descriptions)
throw new Error('Did not get label and description information for query: ' + qNumber);
// Get multilingual string for descriptions and entities
const description = this.convertToMlString(entity.descriptions, 256);
const label = this.convertToMlString(entity.labels, 512);
if (!Object.keys(label).length)
throw new Error('Did not get a label for ' + qNumber + ' in any supported language.');
return {
data: {
label,
description
},
sourceID: this.sourceID
};
}
// Convert a Wikidata string object to a lib.reviews multilingual string.
// They are similar, but language codes differ, and Wikidata nests
// one level deeper in order to sometimes convey that a string
// represents a fallback for another language.
//
// Wikidata strings may also contain unescaped special characters,
// while ml-strings may not, and we impose a maximum length if provided
// (applied to the escaped length).
convertToMlString(wdObj, maxLength) {
let mlStr = {};
for (let language in wdObj) {
let native = this.getNativeLanguageCode(language);
// Can't handle this language in lib.reviews, ignore
if (native === null)
continue;
if (typeof wdObj[language] == 'object' && wdObj[language].language === language &&
wdObj[language].value) {
let wdStr = escapeHTML(wdObj[language].value);
if (typeof maxLength === 'number')
wdStr = wdStr.substr(0, maxLength);
mlStr[native] = escapeHTML(wdObj[language].value);
}
}
return mlStr;
}
// Return the Wikimedia code for a lib.reviews language code
getWikidataLanguageCode(language) {
let code = nativeToWikidata[language] || language;
// WMF codes are consistently lower case
return code.toLowerCase();
}
// Return the native code for a Wikidata language code. Returns null if
// not a valid native language.
getNativeLanguageCode(language) {
for (let k in nativeToWikidata) {
if (nativeToWikidata[k].toUpperCase() === language.toUpperCase())
return k;
}
return languages.isValid(language) ? language : null;
}
// Return array of the codes we can handle
getAcceptedWikidataLanguageCodes() {
return languages
.getValidLanguages().map(language => this.getWikidataLanguageCode(language));
}
// Return codes in list format expected by APi
getAcceptedWikidataLanguageList() {
return this.getAcceptedWikidataLanguageCodes().join('|');
}
}
module.exports = WikidataBackendAdapter;