@@ -2,15 +2,29 @@ import { fileURLToPath } from 'url'
22import path from 'path'
33import languages from './languages.js'
44import { allVersions } from './all-versions.js'
5- import createTree from './create-tree.js'
5+ import createTree , { getBasePath } from './create-tree.js'
66import renderContent from './render-content/index.js'
77import loadSiteData from './site-data.js'
88import nonEnterpriseDefaultVersion from './non-enterprise-default-version.js'
9+ import Page from './page.js'
910const __dirname = path . dirname ( fileURLToPath ( import . meta. url ) )
1011const versions = Object . keys ( allVersions )
1112const enterpriseServerVersions = versions . filter ( ( v ) => v . startsWith ( 'enterprise-server@' ) )
1213const renderOpts = { textOnly : true , encodeEntities : true }
1314
15+ // These are the exceptions to the rule.
16+ // These URI prefixes should match what you'll find in crowdin.yml.
17+ // If a URI starts with one of these prefixes, it basically means we don't
18+ // bother to "backfill" a translation in its spot.
19+ // For example, `/en/github/site-policy-deprecated/foo` works
20+ // only in English and we don't bother making `/ja/github/site-policy-deprecated/foo`
21+ // work too.
22+ const TRANSLATION_DRIFT_EXCEPTIONS = [
23+ 'github/site-policy-deprecated' ,
24+ // Early access stuff never has translations.
25+ 'early-access' ,
26+ ]
27+
1428/**
1529 * We only need to initialize pages _once per language_ since pages don't change per version. So we do that
1630 * first since it's the most expensive work. This gets us a nested object with pages attached that we can use
@@ -152,8 +166,112 @@ export function createMapFromArray(pageList) {
152166}
153167
154168export async function loadPageMap ( pageList ) {
155- const pages = pageList || ( await loadPageList ( ) )
156- return createMapFromArray ( pages )
169+ const pages = await correctTranslationOrphans ( pageList || ( await loadPageList ( ) ) )
170+ const pageMap = createMapFromArray ( pages )
171+ return pageMap
172+ }
173+
174+ // If a translation page exists, that doesn't have an English equivalent,
175+ // remove it.
176+ // If an English page exists, that doesn't have an translation equivalent,
177+ // add it.
178+ // Note, this function is exported purely for the benefit of the unit tests.
179+ export async function correctTranslationOrphans ( pageList , basePath = null ) {
180+ const englishRelativePaths = new Set ( )
181+ for ( const page of pageList ) {
182+ if ( page . languageCode === 'en' ) {
183+ englishRelativePaths . add ( page . relativePath )
184+ }
185+ }
186+
187+ // Prime the Map with an empty set for each language prefix.
188+ // It's important that we do this for *every* language rather than
189+ // just populating `nonEnglish` based on those pages that *are* present.
190+ // Otherwise, we won't have an index of all the languages
191+ // that *might* be missing.
192+ const nonEnglish = new Map ( )
193+ Object . keys ( languages )
194+ . filter ( ( lang ) => lang !== 'en' )
195+ . forEach ( ( languageCode ) => {
196+ nonEnglish . set ( languageCode , new Set ( ) )
197+ } )
198+
199+ // By default, when backfilling, we set the `basePath` to be that of
200+ // English. But for the benefit of being able to do unit tests,
201+ // we make this an optional argument. Then, unit tests can use
202+ // its "tests/fixtures" directory.
203+ const englishBasePath = basePath || getBasePath ( languages . en . dir )
204+
205+ // Filter out all non-English pages that appear to be excess.
206+ // E.g. if an English doc was renamed from `content/foo.md` to
207+ // `content/bar.md` what will happen is that `translations/*/content/foo.md`
208+ // will still linger around and we want to remove that even if it was
209+ // scooped up from disk.
210+ const newPageList = [ ]
211+ for ( const page of pageList ) {
212+ if ( page . languageCode === 'en' ) {
213+ // English pages are never considered "excess"
214+ newPageList . push ( page )
215+ continue
216+ }
217+
218+ // If this translation page exists in English, keep it but also
219+ // add it to the set of relative paths that is known.
220+ if ( englishRelativePaths . has ( page . relativePath ) ) {
221+ nonEnglish . get ( page . languageCode ) . add ( page . relativePath )
222+ newPageList . push ( page )
223+ continue
224+ }
225+
226+ // All else is considered "excess" and should be excluded from
227+ // the new list of pages. So do nothing.
228+ if ( process . env . NODE_ENV === 'development' ) {
229+ console . log (
230+ `For ${ page . languageCode } , omit the page ${ page . relativePath } even though it's present on disk.`
231+ )
232+ }
233+ }
234+
235+ const pageLoadPromises = [ ]
236+ for ( const relativePath of englishRelativePaths ) {
237+ for ( const [ languageCode , relativePaths ] of nonEnglish ) {
238+ if ( ! relativePaths . has ( relativePath ) ) {
239+ // At this point, we've found an English `relativePath` that is
240+ // not used by this language.
241+ // But before we decide to "backfill" it from the English equivalent
242+ // we first need to figure out if it should be excluded.
243+ // The reason for doing this check this late is for the benefit
244+ // of optimization. In general, when the translation pipeline has
245+ // done its magic, this should be very rare, so it's unnecessary
246+ // to do this exception check on every single English relativePath.
247+ if ( TRANSLATION_DRIFT_EXCEPTIONS . find ( ( exception ) => relativePath . startsWith ( exception ) ) ) {
248+ continue
249+ }
250+
251+ // The magic right here!
252+ // The trick is that we can't clone instances of class Page. We need
253+ // to create them for this language. But the trick is that we
254+ // use the English relative path so it can have something to read.
255+ // For example, if we have figured out that
256+ // `translations/ja-JP/content/foo.md` doesn't exist, we pretend
257+ // that we can use `foo.md` and the base path of `content/`.
258+ pageLoadPromises . push (
259+ Page . init ( {
260+ basePath : englishBasePath ,
261+ relativePath : relativePath ,
262+ languageCode : languageCode ,
263+ } )
264+ )
265+ if ( process . env . NODE_ENV === 'development' ) {
266+ console . log ( `Backfill ${ relativePath } for ${ languageCode } from English` )
267+ }
268+ }
269+ }
270+ }
271+ const additionalPages = await Promise . all ( pageLoadPromises )
272+ newPageList . push ( ...additionalPages )
273+
274+ return newPageList
157275}
158276
159277export default {
0 commit comments