Skip to content

repo sync #23026

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion lib/languages.js
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ if (process.env.ENABLED_LANGUAGES) {
Object.keys(languages).forEach((code) => {
if (!process.env.ENABLED_LANGUAGES.includes(code)) delete languages[code]
})
console.log(`ENABLED_LANGUAGES: ${process.env.ENABLED_LANGUAGES}`)
// This makes the translation health report not valid JSON
// console.log(`ENABLED_LANGUAGES: ${process.env.ENABLED_LANGUAGES}`)
}
} else if (process.env.NODE_ENV === 'test') {
// Unless explicitly set, when running tests default to just English
Expand Down
14 changes: 8 additions & 6 deletions lib/page-data.js
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,13 @@ async function translateTree(dir, langObj, enTree) {
// has something wrong with, say, the `versions` frontmatter key
// we don't even care because we won't be using it anyway.
if (translatableFrontmatterKeys.includes(property)) {
const msg = `frontmatter error on '${property}' (in ${fullPath}) so falling back to English`
const message = `frontmatter error on '${property}' (in ${fullPath}) so falling back to English`
if (DEBUG_TRANSLATION_FALLBACKS) {
console.warn(msg)
// The object format is so the health report knows which path the issue is on
console.warn({ message, path: relativePath })
}
if (THROW_TRANSLATION_ERRORS) {
throw new Error(msg)
throw new Error(message)
}
data[property] = enData[property]
}
Expand All @@ -128,12 +129,13 @@ async function translateTree(dir, langObj, enTree) {
if (error.code === 'ENOENT' || error instanceof FrontmatterParsingError) {
data = enData
content = enPage.markdown
const msg = `Unable to initialized ${fullPath} because translation content file does not exist.`
const message = `Unable to initialize ${fullPath} because translation content file does not exist.`
if (DEBUG_TRANSLATION_FALLBACKS) {
console.warn(msg)
// The object format is so the health report knows which path the issue is on
console.warn({ message, path: relativePath })
}
if (THROW_TRANSLATION_ERRORS) {
throw new Error(msg)
throw new Error(message)
}
} else {
throw error
Expand Down
2 changes: 1 addition & 1 deletion lib/redirects/precompile.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ const EXCEPTIONS_FILE = path.join(__dirname, './static/redirect-exceptions.txt')

// This function runs at server warmup and precompiles possible redirect routes.
// It outputs them in key-value pairs within a neat Javascript object: { oldPath: newPath }
async function precompileRedirects(pageList) {
export async function precompileRedirects(pageList) {
const allRedirects = readCompressedJsonFileFallback('./lib/redirects/static/developer.json')

const externalRedirects = readCompressedJsonFileFallback('./lib/redirects/external-sites.json')
Expand Down
4 changes: 1 addition & 3 deletions lib/render-content/plugins/rewrite-asset-urls.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@ function getNewSrc(node) {
} catch (err) {
console.warn(
`Failed to get a hash for ${src} ` +
'(This is mostly harmless and can happen with outdated translations). ' +
'Full error output:',
err
'(This is mostly harmless and can happen with outdated translations).'
)
}
}
225 changes: 95 additions & 130 deletions script/i18n/create-translation-health-report.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,155 +9,120 @@
/* Nota bene:
If you are getting more errors all the sudden, try running this:
$ script/i18n/create-translation-health-report.js -l en -r 000
If there's any errors, const context = { ... } probably needs more data.
If there's any errors before getting the JSON output,
const context = { ... } probably needs more data.
*/

import { program } from 'commander'
import fs from 'fs/promises'
import { pick } from 'lodash-es'

import { loadPages, loadPageMap } from '../../lib/page-data.js'
import loadSiteData from '../../lib/site-data.js'
import loadRedirects from '../../lib/redirects/precompile.js'
import { allVersions, allVersionKeys } from '../../lib/all-versions.js'
import { languageKeys } from '../../lib/languages.js'
import { getProductStringFromPath } from '../../lib/path-utils.js'

program
.description('Create a translation health report for one language.')
.requiredOption('-l, --language <language>', 'The language to health check')
.requiredOption('-r, --gitref <sha>', 'Language repo latest git commit short SHA')
.option('-r, --gitref <sha>', 'Language repo latest git commit short SHA')
.parse(process.argv)

// Gather popularity data the search uses to prioritize errors
async function fetchPopularityData() {
const output = {}
const popularPagesRaw = await fs.readFile('lib/search/popular-pages.json', 'utf8')
for (const line of popularPagesRaw.split('\n')) {
try {
const row = JSON.parse(line)
output[row.path_article] = row.path_count
} catch {}
}
return output
}
// Throw errors instead of falling back to English
process.env.DEBUG_TRANSLATION_FALLBACKS = true
// The error option stops everything, but we want it to continue to generate the full report
process.env.ENABLED_LANGUAGES = `en,${program.opts().language}`

async function collectPageErrors(page, { language, data, redirects, plainPath, pageMap }) {
// Go through each version...
const promises = allVersionKeys
.filter((version) => page.applicableVersions.includes(version))
.map(async (version) => {
// Collect if errors
const pageVersionErrors = []
try {
const path = `/${language}/${version}/${plainPath}`
// Reference middleware/context.js for data shape
const context = {
...data, // needed for all pages
currentVersion: version, // needed for all pages
currentLanguage: language, // needed for all pages
currentPath: path, // needed for all pages
currentVersionObj: allVersions[version], // needed for ifversion tag
currentProduct: getProductStringFromPath(path), // needed for learning-track on guides pages
pages: pageMap, // needed for learning-track on guides pages
redirects, // needed for learning-track on guides pages
}
await page.render(context, pageVersionErrors)
} catch (err) {
pageVersionErrors.push(err)
}
if (pageVersionErrors.length) {
return [
version,
// Filter down properties to make it easier for
// translators to get the clearest information on the error
pageVersionErrors.map((err) => pick(err, ['name', 'message', 'token.content'])),
]
// Other fields: Object.getOwnPropertyNames(err)
}
})
const arr = (await Promise.all(promises)).filter(Boolean)
if (arr.length) {
return Object.fromEntries(arr)
}
}
// In debug mode, it will call console.warn ... so overriding :)
// Want to make sure the result is valid JSON
const prevConsoleWarn = console.warn
const prevConsoleError = console.error

function groupErrors(errors) {
return errors
.map((page) => Object.values(page.versions).flat())
.flat()
.map((version) => version.message)
.reduce((sum, val) => {
sum[val] = sum[val] || 0
sum[val]++
return sum
}, {})
let issues = []
console.warn = console.error = (...args) => {
if (args.length > 1) {
issues.push({ message: args.map(String).join(' '), score: 0 })
} else if (typeof args[0] === 'string') {
issues.push({ message: args[0], score: 0 })
} else if (args[0]?.constructor === Object) {
const path = args[0].path?.replace('/index.md', '').replace('.md', '')
issues.push({ path, message: args[0].message, score: scores[path] || 0 })
}
}

async function createReport() {
// Check that the language is valid
const { language, gitref } = program.opts()
if (!languageKeys.includes(language)) {
throw new Error(`Language ${language} is not in ${languageKeys.join()}.`)
}
// Weird import syntax, but forces it to load after process.env... changes
const { languageKeys } = await import('../../lib/languages.js')
const { loadPages, loadPageMap } = await import('../../lib/page-data.js')
const { precompileRedirects } = await import('../../lib/redirects/precompile.js')
const { allVersions, allVersionKeys } = await import('../../lib/all-versions.js')
const { getProductStringFromPath } = await import('../../lib/path-utils.js')

// Load popularity data to sort errors
const popularity = await fetchPopularityData()
// Check that the language is valid
const { language, gitref } = program.opts()
if (!languageKeys.includes(language)) {
throw new Error(`Language ${language} is not in ${languageKeys.join()}.`)
}

// Load all pages
const allPages = await loadPages()
const dataErrors = []
const data = loadSiteData(dataErrors)[language]
const pages = allPages
.filter((page) => page.languageCode === language)
// Early access pages log to the console, which would show in the report
.filter((page) => !page.relativePath.includes('early-access'))
const pageMap = await loadPageMap(pages)
const redirects = await loadRedirects(pages)
// Gather popularity data the search uses to prioritize errors
const scores = {}
const popularPagesRaw = await fs.readFile('lib/search/popular-pages.json', 'utf8')
for (const line of popularPagesRaw.split('\n')) {
try {
const row = JSON.parse(line)
scores[row.path_article] = row.path_count
} catch {}
}

// Try to render each page
const pageErrors = (
await Promise.all(
pages.map(async (page) => {
const plainPath = page.relativePath.replace('/index.md', '').replace('.md', '')
const errorsByVersion = await collectPageErrors(page, {
language,
data,
redirects,
plainPath,
pageMap,
})
if (errorsByVersion) {
return {
path: plainPath,
popularity: popularity[plainPath] || 0,
versions: errorsByVersion,
}
}
})
)
)
.filter(Boolean)
// Sort by popularity desc so the translators know what to focus on first
.sort((a, b) => b.popularity - a.popularity)
// Load all pages in language
const allPages = await loadPages()
const pages = allPages.filter((page) => page.languageCode === language)
const pageMap = await loadPageMap(pages)
const redirects = await precompileRedirects(pages)

// Begin an output report
const report = {
language,
gitref,
datetime: new Date().toJSON(),
totalPages: pages.length,
totalErrorPages: pageErrors.length,
pageErrors,
// To group errors by message instead
groupedPageErrors: groupErrors(pageErrors),
// Filter down properties to make it easier for
// translators to get the clearest information on the error
dataErrors: dataErrors.map((err) => pick(err, ['name', 'message', 'token.content'])),
// Try to render each page
for (const page of pages) {
const plainPath = page.relativePath.replace('/index.md', '').replace('.md', '')
// Go through each version...
const versions = allVersionKeys.filter((version) => page.applicableVersions.includes(version))
const pageIssues = {}
for (const version of versions) {
const path = `/${language}/${version}/${plainPath}`
// Reference middleware/context.js for shape
const context = {
currentVersion: version, // needed for all pages
currentLanguage: language, // needed for all pages
currentPath: path, // needed for all pages
currentVersionObj: allVersions[version], // needed for ifversion tag
currentProduct: getProductStringFromPath(path), // needed for learning-track on guides pages
pages: pageMap, // needed for learning-track on guides pages
redirects, // needed for learning-track on guides pages
}
try {
await page.render(context)
} catch (err) {
// Which messages apply to which versions
pageIssues[err.message] = pageIssues[err.message] || []
pageIssues[err.message].push(version)
}
}
if (Object.keys(pageIssues).length) {
issues.push({
path: plainPath,
messages: pageIssues,
score: scores[plainPath] || 0,
})
}
}

// Sort by score desc so the translators know what to focus on first
// Issues with more information should be higher
issues = issues
.filter((issue) => !issue.message?.includes('early-access'))
.sort((a, b) => b.score - a.score || JSON.stringify(b).length - JSON.stringify(a).length)

return report
// Begin an output report
const report = {
language,
gitref,
datetime: new Date().toJSON(),
issuesCount: issues.length,
issues,
}

console.warn = () => {} // shhh
console.log(JSON.stringify(await createReport(), null, 2))
console.warn = prevConsoleWarn
console.error = prevConsoleError
console.log(JSON.stringify(report, null, 2))