@@ -2,15 +2,29 @@ import { fileURLToPath } from 'url'
2
2
import path from 'path'
3
3
import languages from './languages.js'
4
4
import { allVersions } from './all-versions.js'
5
- import createTree from './create-tree.js'
5
+ import createTree , { getBasePath } from './create-tree.js'
6
6
import renderContent from './render-content/index.js'
7
7
import loadSiteData from './site-data.js'
8
8
import nonEnterpriseDefaultVersion from './non-enterprise-default-version.js'
9
+ import Page from './page.js'
9
10
const __dirname = path . dirname ( fileURLToPath ( import . meta. url ) )
10
11
const versions = Object . keys ( allVersions )
11
12
const enterpriseServerVersions = versions . filter ( ( v ) => v . startsWith ( 'enterprise-server@' ) )
12
13
const renderOpts = { textOnly : true , encodeEntities : true }
13
14
15
+ // These are the exceptions to the rule.
16
+ // These URI prefixes should match what you'll find in crowdin.yml.
17
+ // If a URI starts with one of these prefixes, it basically means we don't
18
+ // bother to "backfill" a translation in its spot.
19
+ // For example, `/en/github/site-policy-deprecated/foo` works
20
+ // only in English and we don't bother making `/ja/github/site-policy-deprecated/foo`
21
+ // work too.
22
+ const TRANSLATION_DRIFT_EXCEPTIONS = [
23
+ 'github/site-policy-deprecated' ,
24
+ // Early access stuff never has translations.
25
+ 'early-access' ,
26
+ ]
27
+
14
28
/**
15
29
* We only need to initialize pages _once per language_ since pages don't change per version. So we do that
16
30
* first since it's the most expensive work. This gets us a nested object with pages attached that we can use
@@ -152,8 +166,112 @@ export function createMapFromArray(pageList) {
152
166
}
153
167
154
168
export async function loadPageMap ( pageList ) {
155
- const pages = pageList || ( await loadPageList ( ) )
156
- return createMapFromArray ( pages )
169
+ const pages = await correctTranslationOrphans ( pageList || ( await loadPageList ( ) ) )
170
+ const pageMap = createMapFromArray ( pages )
171
+ return pageMap
172
+ }
173
+
174
+ // If a translation page exists, that doesn't have an English equivalent,
175
+ // remove it.
176
+ // If an English page exists, that doesn't have an translation equivalent,
177
+ // add it.
178
+ // Note, this function is exported purely for the benefit of the unit tests.
179
+ export async function correctTranslationOrphans ( pageList , basePath = null ) {
180
+ const englishRelativePaths = new Set ( )
181
+ for ( const page of pageList ) {
182
+ if ( page . languageCode === 'en' ) {
183
+ englishRelativePaths . add ( page . relativePath )
184
+ }
185
+ }
186
+
187
+ // Prime the Map with an empty set for each language prefix.
188
+ // It's important that we do this for *every* language rather than
189
+ // just populating `nonEnglish` based on those pages that *are* present.
190
+ // Otherwise, we won't have an index of all the languages
191
+ // that *might* be missing.
192
+ const nonEnglish = new Map ( )
193
+ Object . keys ( languages )
194
+ . filter ( ( lang ) => lang !== 'en' )
195
+ . forEach ( ( languageCode ) => {
196
+ nonEnglish . set ( languageCode , new Set ( ) )
197
+ } )
198
+
199
+ // By default, when backfilling, we set the `basePath` to be that of
200
+ // English. But for the benefit of being able to do unit tests,
201
+ // we make this an optional argument. Then, unit tests can use
202
+ // its "tests/fixtures" directory.
203
+ const englishBasePath = basePath || getBasePath ( languages . en . dir )
204
+
205
+ // Filter out all non-English pages that appear to be excess.
206
+ // E.g. if an English doc was renamed from `content/foo.md` to
207
+ // `content/bar.md` what will happen is that `translations/*/content/foo.md`
208
+ // will still linger around and we want to remove that even if it was
209
+ // scooped up from disk.
210
+ const newPageList = [ ]
211
+ for ( const page of pageList ) {
212
+ if ( page . languageCode === 'en' ) {
213
+ // English pages are never considered "excess"
214
+ newPageList . push ( page )
215
+ continue
216
+ }
217
+
218
+ // If this translation page exists in English, keep it but also
219
+ // add it to the set of relative paths that is known.
220
+ if ( englishRelativePaths . has ( page . relativePath ) ) {
221
+ nonEnglish . get ( page . languageCode ) . add ( page . relativePath )
222
+ newPageList . push ( page )
223
+ continue
224
+ }
225
+
226
+ // All else is considered "excess" and should be excluded from
227
+ // the new list of pages. So do nothing.
228
+ if ( process . env . NODE_ENV === 'development' ) {
229
+ console . log (
230
+ `For ${ page . languageCode } , omit the page ${ page . relativePath } even though it's present on disk.`
231
+ )
232
+ }
233
+ }
234
+
235
+ const pageLoadPromises = [ ]
236
+ for ( const relativePath of englishRelativePaths ) {
237
+ for ( const [ languageCode , relativePaths ] of nonEnglish ) {
238
+ if ( ! relativePaths . has ( relativePath ) ) {
239
+ // At this point, we've found an English `relativePath` that is
240
+ // not used by this language.
241
+ // But before we decide to "backfill" it from the English equivalent
242
+ // we first need to figure out if it should be excluded.
243
+ // The reason for doing this check this late is for the benefit
244
+ // of optimization. In general, when the translation pipeline has
245
+ // done its magic, this should be very rare, so it's unnecessary
246
+ // to do this exception check on every single English relativePath.
247
+ if ( TRANSLATION_DRIFT_EXCEPTIONS . find ( ( exception ) => relativePath . startsWith ( exception ) ) ) {
248
+ continue
249
+ }
250
+
251
+ // The magic right here!
252
+ // The trick is that we can't clone instances of class Page. We need
253
+ // to create them for this language. But the trick is that we
254
+ // use the English relative path so it can have something to read.
255
+ // For example, if we have figured out that
256
+ // `translations/ja-JP/content/foo.md` doesn't exist, we pretend
257
+ // that we can use `foo.md` and the base path of `content/`.
258
+ pageLoadPromises . push (
259
+ Page . init ( {
260
+ basePath : englishBasePath ,
261
+ relativePath : relativePath ,
262
+ languageCode : languageCode ,
263
+ } )
264
+ )
265
+ if ( process . env . NODE_ENV === 'development' ) {
266
+ console . log ( `Backfill ${ relativePath } for ${ languageCode } from English` )
267
+ }
268
+ }
269
+ }
270
+ }
271
+ const additionalPages = await Promise . all ( pageLoadPromises )
272
+ newPageList . push ( ...additionalPages )
273
+
274
+ return newPageList
157
275
}
158
276
159
277
export default {
0 commit comments