Skip to content

Commit

Permalink
fix: Limit translated chunk size
Browse files Browse the repository at this point in the history
  • Loading branch information
3y3 committed Dec 22, 2023
1 parent f9cdb38 commit 00d8852
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 35 deletions.
5 changes: 4 additions & 1 deletion .eslintrc
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
{
"extends": ["@diplodoc/eslint-config"]
"extends": ["@diplodoc/eslint-config"],
"env": {
"node": true
}
}
111 changes: 77 additions & 34 deletions src/cmd/translate/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ const translate = {

const MD_GLOB = '**/*.md';
const REQUESTS_LIMIT = 20;
const RETRY_LIMIT = 8;
const BYTES_LIMIT = 10000;
const RETRY_LIMIT = 3;
const MTRANS_LOCALE = 'MTRANS';

function builder<T>(argv: Argv<T>) {
Expand Down Expand Up @@ -148,6 +149,23 @@ function translator(params: TranslatorParams) {

const session = new Session({oauthToken});
const client = session.client(TranslationServiceClient);
const request = (texts: string[]) => () =>
client
.translate(
TranslateRequest.fromPartial({
texts,
folderId,
sourceLanguageCode: sourceLanguage,
targetLanguageCode: targetLanguage,
glossaryConfig: {
glossaryData: {
glossaryPairs: yandexCloudTranslateGlossaryPairs,
},
},
format: Format.PLAIN_TEXT,
}),
)
.then((results) => results.translations.map(({text}) => text));

return async (mdPath: string) => {
try {
Expand All @@ -171,45 +189,57 @@ function translator(params: TranslatorParams) {

const texts = parseSourcesFromXLIFF(xlf);

const machineTranslateParams = TranslateRequest.fromPartial({
texts,
folderId,
sourceLanguageCode: sourceLanguage,
targetLanguageCode: targetLanguage,
glossaryConfig: {
glossaryData: {
glossaryPairs: yandexCloudTranslateGlossaryPairs,
const parts = await Promise.all(
texts.reduce(
(
{
promises,
buffer,
bufferSize,
}: {
promises: Promise<string[]>[];
buffer: string[];
bufferSize: number;
},
text,
index,
) => {
if (text.length >= BYTES_LIMIT) {
logger.warn(
mdPath,
'Skip document part for translation. Part is too big.',
);
promises.push(Promise.resolve([text]));
return {promises, buffer, bufferSize};
}

if (bufferSize + text.length > BYTES_LIMIT || index === texts.length - 1) {
promises.push(backoff(request(buffer)));
buffer = [];
bufferSize = 0;
}

buffer.push(text);
bufferSize += text.length;

return {promises, buffer, bufferSize};
},
},
format: Format.PLAIN_TEXT,
});

const translations = await retry(
{
times: RETRY_LIMIT,
interval: (count: number) => {
// eslint-disable-next-line no-bitwise
return (1 << count) * 1000;
{
promises: [],
buffer: [],
bufferSize: 0,
},
},
asyncify(
async () =>
await client
.translate(machineTranslateParams)
.then((results: {translations: {text: string}[]}) =>
results.translations.map(({text}: {text: string}) => text),
),
),
).promises,
);

const createXLIFFDocumentParams = {
const translations = ([] as string[]).concat(...parts);

const translatedXLIFF = createXLIFFDocument({
sourceLanguage: sourceLanguage + '-' + MTRANS_LOCALE,
targetLanguage: targetLanguage + '-' + MTRANS_LOCALE,
sources: texts,
targets: translations as string[],
};

const translatedXLIFF = createXLIFFDocument(createXLIFFDocumentParams);
targets: translations,
});

const composed = await markdownTranslation.compose({
xlf: translatedXLIFF,
Expand All @@ -230,7 +260,20 @@ function translator(params: TranslatorParams) {
};
}

function parseSourcesFromXLIFF(xliff: string) {
function backoff(action: () => Promise<string[]>): Promise<string[]> {
return retry(
{
times: RETRY_LIMIT,
interval: (count: number) => {
// eslint-disable-next-line no-bitwise
return (1 << count) * 1000;
},
},
asyncify(action),
);
}

function parseSourcesFromXLIFF(xliff: string): string[] {
const parser = new XMLParser();

const inputs = parser.parse(xliff)?.xliff?.file?.body['trans-unit'] ?? [];
Expand Down

0 comments on commit 00d8852

Please sign in to comment.