forked from github/docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpage-data.js
290 lines (253 loc) · 10.3 KB
/
page-data.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
import { fileURLToPath } from 'url'
import path from 'path'
import languages from './languages.js'
import { allVersions } from './all-versions.js'
import createTree, { getBasePath } from './create-tree.js'
import renderContent from './render-content/index.js'
import loadSiteData from './site-data.js'
import nonEnterpriseDefaultVersion from './non-enterprise-default-version.js'
import Page from './page.js'
import shortVersionsMiddleware from '../middleware/contextualizers/short-versions.js'
const __dirname = path.dirname(fileURLToPath(import.meta.url))
const versions = Object.keys(allVersions)
const enterpriseServerVersions = versions.filter((v) => v.startsWith('enterprise-server@'))
const renderOpts = { textOnly: true, encodeEntities: true }
// These are the exceptions to the rule.
// These URI prefixes should match what you'll find in crowdin.yml.
// If a URI starts with one of these prefixes, it basically means we don't
// bother to "backfill" a translation in its spot.
// For example, `/en/github/site-policy-deprecated/foo` works
// only in English and we don't bother making `/ja/github/site-policy-deprecated/foo`
// work too.
const TRANSLATION_DRIFT_EXCEPTIONS = [
'github/site-policy-deprecated',
// Early access stuff never has translations.
'early-access',
]
/**
* We only need to initialize pages _once per language_ since pages don't change per version. So we do that
* first since it's the most expensive work. This gets us a nested object with pages attached that we can use
* as the basis for the siteTree after we do some versioning. We can also use it to derive the pageList.
*/
export async function loadUnversionedTree(languagesOnly = null) {
if (languagesOnly && !Array.isArray(languagesOnly)) {
throw new Error("'languagesOnly' has to be an array")
}
const unversionedTree = {}
const languagesValues = Object.entries(languages)
.filter(([language]) => {
return !languagesOnly || languagesOnly.includes(language)
})
.map(([, data]) => {
return data
})
await Promise.all(
languagesValues.map(async (langObj) => {
const localizedContentPath = path.posix.join(__dirname, '..', langObj.dir, 'content')
unversionedTree[langObj.code] = await createTree(localizedContentPath, langObj)
})
)
return unversionedTree
}
/**
* The siteTree is a nested object with pages for every language and version, useful for nav because it
* contains parent, child, and sibling relationships:
*
* siteTree[languageCode][version].childPages[<array of pages>].childPages[<array of pages>] (etc...)
* Given an unversioned tree of all pages per language, we can walk it for each version and do a couple operations:
* 1. Add a versioned href to every item, where the href is the relevant permalink for the current version.
* 2. Drop any child pages that are not available in the current version.
*
* Order of languages and versions doesn't matter, but order of child page arrays DOES matter (for navigation).
*/
export async function loadSiteTree(unversionedTree, siteData) {
const site = siteData || loadSiteData()
const rawTree = Object.assign({}, unversionedTree || (await loadUnversionedTree()))
const siteTree = {}
// For every language...
await Promise.all(
Object.keys(languages).map(async (langCode) => {
const treePerVersion = {}
// in every version...
await Promise.all(
versions.map(async (version) => {
// "version" the pages.
treePerVersion[version] = await versionPages(
Object.assign({}, rawTree[langCode]),
version,
langCode,
site
)
})
)
siteTree[langCode] = treePerVersion
})
)
return siteTree
}
export async function versionPages(obj, version, langCode, site) {
// Add a versioned href as a convenience for use in layouts.
obj.href = obj.page.permalinks.find(
(pl) =>
pl.pageVersion === version ||
(pl.pageVersion === 'homepage' && version === nonEnterpriseDefaultVersion)
).href
const req = {}
req.context = {
allVersions,
enterpriseServerVersions,
currentLanguage: langCode,
currentVersion: version,
site: site[langCode].site,
}
// The Liquid parseAndRender method is MUCH faster than renderContent or renderProp.
// This only works for titles and short titles, which have no other Markdown that needs
// to be converted to HTML, so we can get away with _only_ parsing Liquid.
await shortVersionsMiddleware(req, null, () => {})
obj.renderedFullTitle = obj.page.title.includes('{')
? await renderContent.liquid.parseAndRender(obj.page.title, req.context, renderOpts)
: obj.page.title
if (obj.page.shortTitle) {
obj.renderedShortTitle = obj.page.shortTitle.includes('{')
? await renderContent.liquid.parseAndRender(obj.page.shortTitle, req.context, renderOpts)
: obj.page.shortTitle
}
if (!obj.childPages) return obj
const versionedChildPages = await Promise.all(
obj.childPages
// Drop child pages that do not apply to the current version
.filter((childPage) => childPage.page.applicableVersions.includes(version))
// Version the child pages recursively.
.map((childPage) => versionPages(Object.assign({}, childPage), version, langCode, site))
)
obj.childPages = [...versionedChildPages]
return obj
}
// Derive a flat array of Page objects in all languages.
export async function loadPageList(unversionedTree, languagesOnly = null) {
if (languagesOnly && !Array.isArray(languagesOnly)) {
throw new Error("'languagesOnly' has to be an array")
}
const rawTree = unversionedTree || (await loadUnversionedTree(languagesOnly))
const pageList = []
await Promise.all(
(languagesOnly || Object.keys(languages)).map(async (langCode) => {
await addToCollection(rawTree[langCode], pageList)
})
)
async function addToCollection(item, collection) {
if (!item.page) return
collection.push(item.page)
if (!item.childPages) return
await Promise.all(
item.childPages.map(async (childPage) => await addToCollection(childPage, collection))
)
}
return pageList
}
export const loadPages = loadPageList
// Create an object from the list of all pages with permalinks as keys for fast lookup.
export function createMapFromArray(pageList) {
const pageMap = pageList.reduce((pageMap, page) => {
for (const permalink of page.permalinks) {
pageMap[permalink.href] = page
}
return pageMap
}, {})
return pageMap
}
export async function loadPageMap(pageList) {
const pages = await correctTranslationOrphans(pageList || (await loadPageList()))
const pageMap = createMapFromArray(pages)
return pageMap
}
// If a translation page exists, that doesn't have an English equivalent,
// remove it.
// If an English page exists, that doesn't have an translation equivalent,
// add it.
// Note, this function is exported purely for the benefit of the unit tests.
export async function correctTranslationOrphans(pageList, basePath = null) {
const englishRelativePaths = new Set()
for (const page of pageList) {
if (page.languageCode === 'en') {
englishRelativePaths.add(page.relativePath)
}
}
// Prime the Map with an empty set for each language prefix.
// It's important that we do this for *every* language rather than
// just populating `nonEnglish` based on those pages that *are* present.
// Otherwise, we won't have an index of all the languages
// that *might* be missing.
const nonEnglish = new Map()
Object.keys(languages)
.filter((lang) => lang !== 'en')
.forEach((languageCode) => {
nonEnglish.set(languageCode, new Set())
})
// By default, when backfilling, we set the `basePath` to be that of
// English. But for the benefit of being able to do unit tests,
// we make this an optional argument. Then, unit tests can use
// its "tests/fixtures" directory.
const englishBasePath = basePath || getBasePath(languages.en.dir)
// Filter out all non-English pages that appear to be excess.
// E.g. if an English doc was renamed from `content/foo.md` to
// `content/bar.md` what will happen is that `translations/*/content/foo.md`
// will still linger around and we want to remove that even if it was
// scooped up from disk.
const newPageList = []
for (const page of pageList) {
if (page.languageCode === 'en') {
// English pages are never considered "excess"
newPageList.push(page)
continue
}
// If this translation page exists in English, keep it but also
// add it to the set of relative paths that is known.
if (englishRelativePaths.has(page.relativePath)) {
nonEnglish.get(page.languageCode).add(page.relativePath)
newPageList.push(page)
continue
}
}
const pageLoadPromises = []
for (const relativePath of englishRelativePaths) {
for (const [languageCode, relativePaths] of nonEnglish) {
if (!relativePaths.has(relativePath)) {
// At this point, we've found an English `relativePath` that is
// not used by this language.
// But before we decide to "backfill" it from the English equivalent
// we first need to figure out if it should be excluded.
// The reason for doing this check this late is for the benefit
// of optimization. In general, when the translation pipeline has
// done its magic, this should be very rare, so it's unnecessary
// to do this exception check on every single English relativePath.
if (TRANSLATION_DRIFT_EXCEPTIONS.find((exception) => relativePath.startsWith(exception))) {
continue
}
// The magic right here!
// The trick is that we can't clone instances of class Page. We need
// to create them for this language. But the trick is that we
// use the English relative path so it can have something to read.
// For example, if we have figured out that
// `translations/ja-JP/content/foo.md` doesn't exist, we pretend
// that we can use `foo.md` and the base path of `content/`.
pageLoadPromises.push(
Page.init({
basePath: englishBasePath,
relativePath,
languageCode,
})
)
}
}
}
const additionalPages = await Promise.all(pageLoadPromises)
newPageList.push(...additionalPages)
return newPageList
}
export default {
loadUnversionedTree,
loadSiteTree,
loadPages: loadPageList,
loadPageMap,
}