From c477efd2fa769013d2150f1dcf152c4c11adc4c6 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 30 Aug 2023 17:32:39 +0300 Subject: [PATCH 01/14] Introduce renderName parameter for scrapping proccess --- src/mwoffliner.lib.ts | 2 +- test/e2e/bm.e2e.test.ts | 150 ++++++++++++++++++---------------- test/e2e/rendererList.test.ts | 55 +++++++++++++ test/e2e/rendererList.ts | 1 + 4 files changed, 136 insertions(+), 72 deletions(-) create mode 100644 test/e2e/rendererList.test.ts create mode 100644 test/e2e/rendererList.ts diff --git a/src/mwoffliner.lib.ts b/src/mwoffliner.lib.ts index 782f7a8d..91e3a8f2 100644 --- a/src/mwoffliner.lib.ts +++ b/src/mwoffliner.lib.ts @@ -218,7 +218,7 @@ async function execute(argv: any) { RedisStore.setOptions(argv.redis || config.defaults.redisPath) await RedisStore.connect() const { articleDetailXId, filesToDownloadXPath, filesToRetryXPath, redirectsXId } = RedisStore - + await downloader.setBaseUrls(forceRender) // Output directory const outputDirectory = path.isAbsolute(_outputDirectory || '') ? _outputDirectory : path.join(process.cwd(), _outputDirectory || 'out') await mkdirPromise(outputDirectory) diff --git a/test/e2e/bm.e2e.test.ts b/test/e2e/bm.e2e.test.ts index e957a333..3412db56 100644 --- a/test/e2e/bm.e2e.test.ts +++ b/test/e2e/bm.e2e.test.ts @@ -1,5 +1,6 @@ import * as mwoffliner from '../../src/mwoffliner.lib.js' import { execa } from 'execa' +import { renderers } from './rendererList.js' import rimraf from 'rimraf' import { zimcheckAvailable, zimcheck } from '../util.js' import 'dotenv/config.js' @@ -9,78 +10,85 @@ import { zimdumpAvailable, zimdump } from '../util.js' jest.setTimeout(200000) describe('bm', () => { - const now = new Date() - const testId = `mwo-test-${+now}` - - const parameters = { - mwUrl: 'https://bm.wikipedia.org', - adminEmail: 'test@kiwix.org', - outputDirectory: testId, - redis: process.env.REDIS, - format: ['nopic'], - } - - test('Simple articleList', async () => { - await execa('redis-cli flushall', { shell: true }) - - const outFiles = await mwoffliner.execute(parameters) - - // Created 1 output - expect(outFiles).toHaveLength(1) - - for (const dump of outFiles) { - if (dump.nopic) { - // nopic has enough files - expect(dump.status.files.success).toBeGreaterThan(14) - // nopic has enough redirects - expect(dump.status.redirects.written).toBeGreaterThan(170) - // nopic has enough articles - expect(dump.status.articles.success).toBeGreaterThan(700) + for (const renderer of renderers) { + // Test only render API that is supported by the wiki + if (renderer === 'WikimediaDesktop') { + const now = new Date() + const testId = `mwo-test-${+now}` + + const parameters = { + mwUrl: 'https://bm.wikipedia.org', + adminEmail: 'test@kiwix.org', + outputDirectory: testId, + redis: process.env.REDIS, + format: ['nopic'], } - } - - if (await zimcheckAvailable()) { - await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() - } else { - console.log('Zimcheck not installed, skipping test') - } - if (await zimdumpAvailable()) { - const discussionArticlesStr = await zimdump(`list --ns A/Discussion ${outFiles[0].outFile}`) - // Articles with "Discussion" namespace should be only with option addNamespaces: 1 - expect(discussionArticlesStr.length).toBe(0) - } else { - console.log('Zimdump not installed, skipping test') + const renderParameters = { ...parameters, renderName: renderer } + + test('Simple articleList', async () => { + await execa('redis-cli flushall', { shell: true }) + + const outFiles = await mwoffliner.execute(renderParameters) + + // Created 1 output + expect(outFiles).toHaveLength(1) + + for (const dump of outFiles) { + if (dump.nopic) { + // nopic has enough files + expect(dump.status.files.success).toBeGreaterThan(14) + // nopic has enough redirects + expect(dump.status.redirects.written).toBeGreaterThan(170) + // nopic has enough articles + expect(dump.status.articles.success).toBeGreaterThan(700) + } + } + + if (await zimcheckAvailable()) { + await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() + } else { + console.log('Zimcheck not installed, skipping test') + } + + if (await zimdumpAvailable()) { + const discussionArticlesStr = await zimdump(`list --ns A/Discussion ${outFiles[0].outFile}`) + // Articles with "Discussion" namespace should be only with option addNamespaces: 1 + expect(discussionArticlesStr.length).toBe(0) + } else { + console.log('Zimdump not installed, skipping test') + } + + // TODO: clear test dir + rimraf.sync(`./${testId}`) + + const redisScan = await execa('redis-cli --scan', { shell: true }) + // Redis has been cleared + expect(redisScan.stdout).toEqual('') + }) + + test('Articles with "Discussion" namespace', async () => { + await execa('redis-cli flushall', { shell: true }) + + const outFiles = await mwoffliner.execute({ ...renderParameters, addNamespaces: 1 }) + // Created 1 output + expect(outFiles).toHaveLength(1) + + if (await zimdumpAvailable()) { + const discussionArticlesStr = await zimdump(`list --ns A/Discussion ${outFiles[0].outFile}`) + const discussionArticlesList = discussionArticlesStr.match(/Discussion:/g) + expect(discussionArticlesList.length).toBeGreaterThan(30) + } else { + console.log('Zimdump not installed, skipping test') + } + + // TODO: clear test dir + rimraf.sync(`./${testId}`) + + const redisScan = await execa('redis-cli --scan', { shell: true }) + // Redis has been cleared + expect(redisScan.stdout).toEqual('') + }) } - - // TODO: clear test dir - rimraf.sync(`./${testId}`) - - const redisScan = await execa('redis-cli --scan', { shell: true }) - // Redis has been cleared - expect(redisScan.stdout).toEqual('') - }) - - test('Articles with "Discussion" namespace', async () => { - await execa('redis-cli flushall', { shell: true }) - - const outFiles = await mwoffliner.execute({ ...parameters, addNamespaces: 1 }) - // Created 1 output - expect(outFiles).toHaveLength(1) - - if (await zimdumpAvailable()) { - const discussionArticlesStr = await zimdump(`list --ns A/Discussion ${outFiles[0].outFile}`) - const discussionArticlesList = discussionArticlesStr.match(/Discussion:/g) - expect(discussionArticlesList.length).toBeGreaterThan(30) - } else { - console.log('Zimdump not installed, skipping test') - } - - // TODO: clear test dir - rimraf.sync(`./${testId}`) - - const redisScan = await execa('redis-cli --scan', { shell: true }) - // Redis has been cleared - expect(redisScan.stdout).toEqual('') - }) + } }) diff --git a/test/e2e/rendererList.test.ts b/test/e2e/rendererList.test.ts new file mode 100644 index 00000000..9190ce13 --- /dev/null +++ b/test/e2e/rendererList.test.ts @@ -0,0 +1,55 @@ +import * as mwoffliner from '../../src/mwoffliner.lib.js' +import { execa } from 'execa' +import rimraf from 'rimraf' +import { jest } from '@jest/globals' +import { zimcheckAvailable, zimcheck } from '../util.js' + +jest.setTimeout(200000) + +describe('renderName', () => { + const now = new Date() + const testId = `mwo-test-${+now}` + + const parameters = { + mwUrl: 'https://bm.wikipedia.org', + adminEmail: 'test@kiwix.org', + outputDirectory: testId, + redis: process.env.REDIS, + format: ['nopic'], + articleList: 'France', + } + + beforeAll(async () => { + await execa('redis-cli flushall', { shell: true }) + }) + + test('Scrape article from bm wiki using WikimediaDesktop renderName', async () => { + const renderName = 'WikimediaDesktop' + const outFiles = await mwoffliner.execute({ ...parameters, renderName }) + + if (await zimcheckAvailable()) { + await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() + } else { + console.log('Zimcheck not installed, skipping test') + } + + rimraf.sync(`./${testId}`) + const redisScan = await execa('redis-cli --scan', { shell: true }) + // Redis has been cleared + expect(redisScan.stdout).toEqual('') + }) + + test('Scrape article from bm wiki should throw error when using VisualEditor renderName', async () => { + const renderName = 'VisualEditor' + expect(async () => { + await mwoffliner.execute({ ...parameters, renderName }) + }).rejects.toThrowError() + }) + + test('Scrape article from bm wiki should throw error when using wrong renderName', async () => { + const renderName = 'unknownRenderName' + expect(async () => { + await mwoffliner.execute({ ...parameters, renderName }) + }).rejects.toThrowError('Unable to find specific API end-point to retrieve article HTML') + }) +}) diff --git a/test/e2e/rendererList.ts b/test/e2e/rendererList.ts new file mode 100644 index 00000000..45b14dee --- /dev/null +++ b/test/e2e/rendererList.ts @@ -0,0 +1 @@ +export const renderers = ['WikimediaDesktop', 'VisualEditor'] From 2befc40e2fedf35df83babef103152266fd147ae Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Thu, 31 Aug 2023 10:27:52 +0300 Subject: [PATCH 02/14] Update renderList test --- test/e2e/rendererList.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/rendererList.test.ts b/test/e2e/rendererList.test.ts index 9190ce13..8fd1ef03 100644 --- a/test/e2e/rendererList.test.ts +++ b/test/e2e/rendererList.test.ts @@ -50,6 +50,6 @@ describe('renderName', () => { const renderName = 'unknownRenderName' expect(async () => { await mwoffliner.execute({ ...parameters, renderName }) - }).rejects.toThrowError('Unable to find specific API end-point to retrieve article HTML') + }).rejects.toThrowError() }) }) From d8c61225c40590c93c08b46ac521ff3c338a2be0 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 20 Sep 2023 18:53:03 +0300 Subject: [PATCH 03/14] Add e2e test for article treatments per renderer --- test/e2e/bm.e2e.test.ts | 5 +- test/e2e/rendererList.test.ts | 2 +- test/e2e/treatments.e2e.test.ts | 92 +++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 test/e2e/treatments.e2e.test.ts diff --git a/test/e2e/bm.e2e.test.ts b/test/e2e/bm.e2e.test.ts index 3412db56..99830323 100644 --- a/test/e2e/bm.e2e.test.ts +++ b/test/e2e/bm.e2e.test.ts @@ -2,10 +2,9 @@ import * as mwoffliner from '../../src/mwoffliner.lib.js' import { execa } from 'execa' import { renderers } from './rendererList.js' import rimraf from 'rimraf' -import { zimcheckAvailable, zimcheck } from '../util.js' +import { zimcheckAvailable, zimdumpAvailable, zimcheck, zimdump } from '../util.js' import 'dotenv/config.js' import { jest } from '@jest/globals' -import { zimdumpAvailable, zimdump } from '../util.js' jest.setTimeout(200000) @@ -24,7 +23,7 @@ describe('bm', () => { format: ['nopic'], } - const renderParameters = { ...parameters, renderName: renderer } + const renderParameters = { ...parameters, forceRender: renderer } test('Simple articleList', async () => { await execa('redis-cli flushall', { shell: true }) diff --git a/test/e2e/rendererList.test.ts b/test/e2e/rendererList.test.ts index 8fd1ef03..a144a247 100644 --- a/test/e2e/rendererList.test.ts +++ b/test/e2e/rendererList.test.ts @@ -16,7 +16,7 @@ describe('renderName', () => { outputDirectory: testId, redis: process.env.REDIS, format: ['nopic'], - articleList: 'France', + articleList: 'Fàransi, Kanada', } beforeAll(async () => { diff --git a/test/e2e/treatments.e2e.test.ts b/test/e2e/treatments.e2e.test.ts new file mode 100644 index 00000000..38b559eb --- /dev/null +++ b/test/e2e/treatments.e2e.test.ts @@ -0,0 +1,92 @@ +import 'dotenv/config.js' +import * as mwoffliner from '../../src/mwoffliner.lib.js' +import domino from 'domino' +import rimraf from 'rimraf' +import { execa } from 'execa' +import { jest } from '@jest/globals' +import { renderers } from './rendererList.js' +import { zimcheckAvailable, zimdumpAvailable, zimcheck, zimdump } from '../util.js' + +jest.setTimeout(200000) + +let zimcheckIsAvailable +let zimdumpIsAvailable + +beforeAll(async () => { + zimcheckIsAvailable = await zimcheckAvailable() + zimdumpIsAvailable = await zimdumpAvailable() +}) + +const setDefaultParams = (renderName: string, testId: string) => { + return { + mwUrl: 'https://en.wikipedia.org', + adminEmail: 'test@kiwix.org', + outputDirectory: testId, + redis: process.env.REDIS, + articleList: 'User:Kelson/MWoffliner_CI_reference', + forceRender: renderName, + } +} + +// Check the integrity of img elements between zim file and article html taken from it +const verifyImgElements = (imgFilesArr: string[], imgElements: DominoElement[]) => { + for (const img of imgElements) { + for (const imgFile of imgFilesArr) { + if (img.getAttribute('src').includes(imgFile)) { + return true + } + } + } + return false +} + +const commonTreatmentTest = async (renderer) => { + if (!zimcheckIsAvailable) { + console.log('Zimcheck not installed, skipping test') + return + } + if (!zimdumpIsAvailable) { + console.log('Zimcdump not installed, skipping test') + return + } + const now = new Date() + const testId = `mwo-test-${+now}` + + const parameters = setDefaultParams(renderer, testId) + await execa('redis-cli flushall', { shell: true }) + const outFiles = await mwoffliner.execute(parameters) + await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() + + const articleFromDump = await zimdump(`show --url A/${parameters.articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + // TODO: test collapsible sections + + // Test page header title + expect(articleDoc.querySelector('h1.article-header')).toBeTruthy() + + // Check media files + const mediaFiles = await zimdump(`list --ns I ${outFiles[0].outFile}`) + const mediaFilesArr = mediaFiles.split('\n') + const imgFilesArr = mediaFilesArr.filter((elem) => elem.endsWith('pdf') || elem.endsWith('png') || elem.endsWith('jpg')) + const imgElements = Array.from(articleDoc.querySelectorAll('img')) + + expect(verifyImgElements(imgFilesArr, imgElements)).toBe(true) + + rimraf.sync(`./${testId}`) +} + +describe('Treatments e2e', () => { + for (const renderer of renderers) { + if (renderer === 'WikimediaDesktop') { + test('WikimediaDesktop e2e', async () => { + await commonTreatmentTest(renderer) + }) + } + if (renderer === 'VisualEditor') { + test('VisualEditor e2e', async () => { + await commonTreatmentTest(renderer) + }) + } + } +}) From 2962723b1cf562774c0351380a9ca7dc9e179265 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Thu, 21 Sep 2023 11:29:51 +0300 Subject: [PATCH 04/14] Minor refactoring of treatments e2e tests --- src/sanitize-argument.ts | 4 +-- src/util/const.ts | 1 + test/e2e/bm.e2e.test.ts | 4 +-- test/e2e/rendererList.test.ts | 6 ++-- test/e2e/rendererList.ts | 1 - test/e2e/treatments.e2e.test.ts | 49 ++++++++++++++++----------------- 6 files changed, 31 insertions(+), 34 deletions(-) delete mode 100644 test/e2e/rendererList.ts diff --git a/src/sanitize-argument.ts b/src/sanitize-argument.ts index 0b2b49d1..41d3cef4 100644 --- a/src/sanitize-argument.ts +++ b/src/sanitize-argument.ts @@ -11,6 +11,7 @@ import { isValidEmail } from './util/index.js' import * as path from 'path' import { fileURLToPath } from 'url' import { parameterDescriptions } from './parameterList.js' +import { RENDERERS_LIST } from './util/const.js' const __filename = fileURLToPath(import.meta.url) const __dirname = path.dirname(__filename) @@ -192,11 +193,10 @@ export function sanitize_customFlavour(customFlavour: string): string { } export function sanitize_forceRender(renderName: string): string { - const renderNames = ['VisualEditor', 'WikimediaDesktop', 'WikimediaMobile'] const checkRenderName = (arr: string[], val: string) => { return arr.some((arrVal) => val === arrVal) } - if (checkRenderName(renderNames, renderName)) { + if (checkRenderName(RENDERERS_LIST, renderName)) { return renderName } throw new Error(`Invalid render name: ${renderName}`) diff --git a/src/util/const.ts b/src/util/const.ts index f7bbb515..6c511f56 100644 --- a/src/util/const.ts +++ b/src/util/const.ts @@ -20,3 +20,4 @@ export const RULE_TO_REDIRECT = /window\.top !== window\.self/ export const WEBP_HANDLER_URL = 'https://gist.githubusercontent.com/rgaudin/60bb9cc6f187add506584258028b8ee1/raw/9d575b8e25d67eed2a9c9a91d3e053a0062d2fc7/web-handler.js' export const MAX_FILE_DOWNLOAD_RETRIES = 5 export const BLACKLISTED_NS = ['Story'] // 'Story' Wikipedia namespace is content, but not indgestable by Parsoid https://github.com/openzim/mwoffliner/issues/1853 +export const RENDERERS_LIST = ['WikimediaDesktop', 'VisualEditor'] diff --git a/test/e2e/bm.e2e.test.ts b/test/e2e/bm.e2e.test.ts index 99830323..e750bb15 100644 --- a/test/e2e/bm.e2e.test.ts +++ b/test/e2e/bm.e2e.test.ts @@ -1,6 +1,6 @@ import * as mwoffliner from '../../src/mwoffliner.lib.js' import { execa } from 'execa' -import { renderers } from './rendererList.js' +import { RENDERERS_LIST } from '../../src/util/const.js' import rimraf from 'rimraf' import { zimcheckAvailable, zimdumpAvailable, zimcheck, zimdump } from '../util.js' import 'dotenv/config.js' @@ -9,7 +9,7 @@ import { jest } from '@jest/globals' jest.setTimeout(200000) describe('bm', () => { - for (const renderer of renderers) { + for (const renderer of RENDERERS_LIST) { // Test only render API that is supported by the wiki if (renderer === 'WikimediaDesktop') { const now = new Date() diff --git a/test/e2e/rendererList.test.ts b/test/e2e/rendererList.test.ts index a144a247..8e6c4f91 100644 --- a/test/e2e/rendererList.test.ts +++ b/test/e2e/rendererList.test.ts @@ -23,7 +23,7 @@ describe('renderName', () => { await execa('redis-cli flushall', { shell: true }) }) - test('Scrape article from bm wiki using WikimediaDesktop renderName', async () => { + test('Scrape article from bm.wikipedia.org using WikimediaDesktop renderName', async () => { const renderName = 'WikimediaDesktop' const outFiles = await mwoffliner.execute({ ...parameters, renderName }) @@ -39,14 +39,14 @@ describe('renderName', () => { expect(redisScan.stdout).toEqual('') }) - test('Scrape article from bm wiki should throw error when using VisualEditor renderName', async () => { + test('Scrape article from bm.wikipedia.org should throw error when using VisualEditor renderName', async () => { const renderName = 'VisualEditor' expect(async () => { await mwoffliner.execute({ ...parameters, renderName }) }).rejects.toThrowError() }) - test('Scrape article from bm wiki should throw error when using wrong renderName', async () => { + test('Scrape article from bm.wikipedia.org should throw error when using wrong renderName', async () => { const renderName = 'unknownRenderName' expect(async () => { await mwoffliner.execute({ ...parameters, renderName }) diff --git a/test/e2e/rendererList.ts b/test/e2e/rendererList.ts deleted file mode 100644 index 45b14dee..00000000 --- a/test/e2e/rendererList.ts +++ /dev/null @@ -1 +0,0 @@ -export const renderers = ['WikimediaDesktop', 'VisualEditor'] diff --git a/test/e2e/treatments.e2e.test.ts b/test/e2e/treatments.e2e.test.ts index 38b559eb..de64ed8b 100644 --- a/test/e2e/treatments.e2e.test.ts +++ b/test/e2e/treatments.e2e.test.ts @@ -4,7 +4,7 @@ import domino from 'domino' import rimraf from 'rimraf' import { execa } from 'execa' import { jest } from '@jest/globals' -import { renderers } from './rendererList.js' +import { RENDERERS_LIST } from '../../src/util/const.js' import { zimcheckAvailable, zimdumpAvailable, zimcheck, zimdump } from '../util.js' jest.setTimeout(200000) @@ -17,15 +17,20 @@ beforeAll(async () => { zimdumpIsAvailable = await zimdumpAvailable() }) -const setDefaultParams = (renderName: string, testId: string) => { - return { - mwUrl: 'https://en.wikipedia.org', +async function getOutFiles(renderName: string, testId: string, articleList: string, mwUrl: string): Promise { + const parameters = { + mwUrl, adminEmail: 'test@kiwix.org', outputDirectory: testId, redis: process.env.REDIS, - articleList: 'User:Kelson/MWoffliner_CI_reference', + articleList, forceRender: renderName, } + + await execa('redis-cli flushall', { shell: true }) + const outFiles = await mwoffliner.execute(parameters) + + return outFiles } // Check the integrity of img elements between zim file and article html taken from it @@ -40,24 +45,18 @@ const verifyImgElements = (imgFilesArr: string[], imgElements: DominoElement[]) return false } -const commonTreatmentTest = async (renderer) => { - if (!zimcheckIsAvailable) { - console.log('Zimcheck not installed, skipping test') - return - } - if (!zimdumpIsAvailable) { - console.log('Zimcdump not installed, skipping test') +const commonTreatmentTest = async (renderer: string, articleList: string, mwUrl: string) => { + if (!zimcheckIsAvailable || !zimdumpIsAvailable) { + const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump' + console.log(`${missingTool} not installed, skipping test`) return } const now = new Date() const testId = `mwo-test-${+now}` - const parameters = setDefaultParams(renderer, testId) - await execa('redis-cli flushall', { shell: true }) - const outFiles = await mwoffliner.execute(parameters) + const outFiles = await getOutFiles(renderer, testId, articleList, mwUrl) await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() - - const articleFromDump = await zimdump(`show --url A/${parameters.articleList} ${outFiles[0].outFile}`) + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) const articleDoc = domino.createDocument(articleFromDump) // TODO: test collapsible sections @@ -77,15 +76,13 @@ const commonTreatmentTest = async (renderer) => { } describe('Treatments e2e', () => { - for (const renderer of renderers) { - if (renderer === 'WikimediaDesktop') { - test('WikimediaDesktop e2e', async () => { - await commonTreatmentTest(renderer) - }) - } - if (renderer === 'VisualEditor') { - test('VisualEditor e2e', async () => { - await commonTreatmentTest(renderer) + const mwUrl = 'https://en.wikipedia.org' + const articleList = 'User:Kelson/MWoffliner_CI_reference' + + for (const renderer of RENDERERS_LIST) { + if (['WikimediaDesktop', 'VisualEditor'].includes(renderer)) { + test(`${renderer} e2e`, async () => { + await commonTreatmentTest(renderer, articleList, mwUrl) }) } } From 47b408f14eb4eaf30ece6d379f2c087fc8b5b8e7 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Thu, 21 Sep 2023 12:08:59 +0300 Subject: [PATCH 05/14] Optimize nodet tests --- test/unit/saveArticles.test.ts | 109 ++++++++++++++------------------- 1 file changed, 46 insertions(+), 63 deletions(-) diff --git a/test/unit/saveArticles.test.ts b/test/unit/saveArticles.test.ts index 525f80b0..3bd3c1b8 100644 --- a/test/unit/saveArticles.test.ts +++ b/test/unit/saveArticles.test.ts @@ -10,6 +10,7 @@ import { jest } from '@jest/globals' import { getArticleUrl } from '../../src/util/saveArticles.js' import { WikimediaDesktopRenderer } from '../../src/renderers/wikimedia-desktop.renderer.js' import { VisualEditorRenderer } from '../../src/renderers/visual-editor.renderer.js' +import { RENDERERS_LIST } from '../../src/util/const.js' jest.setTimeout(40000) @@ -79,69 +80,51 @@ describe('saveArticles', () => { expect(articleDoc.querySelector('h1.article-header')).toBeTruthy() }) - test('Check nodet article for en.wikipedia.org using Visual Editor renderer', async () => { - const visualEditorRenderer = new VisualEditorRenderer() - const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia - await downloader.setBaseUrls('VisualEditor') - const articleId = 'Canada' - const articleUrl = getArticleUrl(downloader, dump, articleId) - const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId]) - const articlesDetail = mwRetToArticleDetail(_articleDetailsRet) - const { articleDetailXId } = RedisStore - const articleDetail = { title: articleId, timestamp: '2023-09-10T17:36:04Z' } - const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title) - articleDetailXId.setMany(articlesDetail) - const result = await downloader.getArticle( - downloader.webp, - _moduleDependencies, - articleId, - articleDetailXId, - visualEditorRenderer, - articleUrl, - dump, - articleDetail, - dump.isMainPage(articleId), - ) - - const articleDoc = domino.createDocument(result[0].html) - - const sections = Array.from(articleDoc.querySelectorAll('section')) - const leadSection = sections[0] - expect(sections.length).toEqual(1) - expect(leadSection.getAttribute('data-mw-section-id')).toEqual('0') - }) - - test('Check nodet article for en.wikipedia.org using Wikimedia Desktop renderer', async () => { - const wikimediaDesktopRenderer = new WikimediaDesktopRenderer() - const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia - await downloader.setBaseUrls('WikimediaDesktop') - const articleId = 'London' - const articleUrl = getArticleUrl(downloader, dump, articleId) - const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId]) - const articlesDetail = mwRetToArticleDetail(_articleDetailsRet) - const { articleDetailXId } = RedisStore - const articleDetail = { title: articleId } - const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title) - articleDetailXId.setMany(articlesDetail) - const result = await downloader.getArticle( - downloader.webp, - _moduleDependencies, - articleId, - articleDetailXId, - wikimediaDesktopRenderer, - articleUrl, - dump, - articleDetail, - dump.isMainPage(articleId), - ) - - const articleDoc = domino.createDocument(result[0].html) - - const sections = Array.from(articleDoc.querySelectorAll('section')) - const leadSection = sections[0] - expect(sections.length).toEqual(1) - expect(leadSection.getAttribute('data-mw-section-id')).toEqual('0') - }) + for (const renderer of RENDERERS_LIST) { + if (['WikimediaDesktop', 'VisualEditor'].includes(renderer)) { + test(`Check nodet article for en.wikipedia.org using ${renderer} renderer`, async () => { + let rendererInstance + switch (renderer) { + case 'VisualEditor': + rendererInstance = new VisualEditorRenderer() + break + case 'WikimediaDesktop': + rendererInstance = new WikimediaDesktopRenderer() + break + default: + throw new Error(`Unknown renderer: ${renderer}`) + } + const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia + await downloader.setBaseUrls(renderer) + const articleId = 'Canada' + const articleUrl = getArticleUrl(downloader, dump, articleId) + const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId]) + const articlesDetail = mwRetToArticleDetail(_articleDetailsRet) + const { articleDetailXId } = RedisStore + const articleDetail = { title: articleId, timestamp: '2023-09-10T17:36:04Z' } + const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title) + articleDetailXId.setMany(articlesDetail) + const result = await downloader.getArticle( + downloader.webp, + _moduleDependencies, + articleId, + articleDetailXId, + rendererInstance, + articleUrl, + dump, + articleDetail, + dump.isMainPage(articleId), + ) + + const articleDoc = domino.createDocument(result[0].html) + + const sections = Array.from(articleDoc.querySelectorAll('section')) + const leadSection = sections[0] + expect(sections.length).toEqual(1) + expect(leadSection.getAttribute('data-mw-section-id')).toEqual('0') + }) + } + } test('Load main page and check that it is without header', async () => { const wikimediaDesktopRenderer = new WikimediaDesktopRenderer() From a98d7809da94211cd05cbe8322748298bf39689b Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 22 Sep 2023 09:26:50 +0300 Subject: [PATCH 06/14] Connect VE to bm.e2e.test.ts --- test/e2e/bm.e2e.test.ts | 125 ++++++++++++++++++++-------------------- 1 file changed, 61 insertions(+), 64 deletions(-) diff --git a/test/e2e/bm.e2e.test.ts b/test/e2e/bm.e2e.test.ts index e750bb15..0d7db10e 100644 --- a/test/e2e/bm.e2e.test.ts +++ b/test/e2e/bm.e2e.test.ts @@ -10,84 +10,81 @@ jest.setTimeout(200000) describe('bm', () => { for (const renderer of RENDERERS_LIST) { - // Test only render API that is supported by the wiki - if (renderer === 'WikimediaDesktop') { - const now = new Date() - const testId = `mwo-test-${+now}` - - const parameters = { - mwUrl: 'https://bm.wikipedia.org', - adminEmail: 'test@kiwix.org', - outputDirectory: testId, - redis: process.env.REDIS, - format: ['nopic'], - } + const now = new Date() + const testId = `mwo-test-${+now}` + + const parameters = { + mwUrl: 'https://bm.wikipedia.org', + adminEmail: 'test@kiwix.org', + outputDirectory: testId, + redis: process.env.REDIS, + format: ['nopic'], + } - const renderParameters = { ...parameters, forceRender: renderer } + const renderParameters = { ...parameters, forceRender: renderer } - test('Simple articleList', async () => { - await execa('redis-cli flushall', { shell: true }) + test(`Simple articleList (${renderer} renderer)`, async () => { + await execa('redis-cli flushall', { shell: true }) - const outFiles = await mwoffliner.execute(renderParameters) + const outFiles = await mwoffliner.execute(renderParameters) - // Created 1 output - expect(outFiles).toHaveLength(1) + // Created 1 output + expect(outFiles).toHaveLength(1) - for (const dump of outFiles) { - if (dump.nopic) { - // nopic has enough files - expect(dump.status.files.success).toBeGreaterThan(14) - // nopic has enough redirects - expect(dump.status.redirects.written).toBeGreaterThan(170) - // nopic has enough articles - expect(dump.status.articles.success).toBeGreaterThan(700) - } + for (const dump of outFiles) { + if (dump.nopic) { + // nopic has enough files + expect(dump.status.files.success).toBeGreaterThan(14) + // nopic has enough redirects + expect(dump.status.redirects.written).toBeGreaterThan(170) + // nopic has enough articles + expect(dump.status.articles.success).toBeGreaterThan(700) } + } - if (await zimcheckAvailable()) { - await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() - } else { - console.log('Zimcheck not installed, skipping test') - } + if (await zimcheckAvailable()) { + await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() + } else { + console.log('Zimcheck not installed, skipping test') + } - if (await zimdumpAvailable()) { - const discussionArticlesStr = await zimdump(`list --ns A/Discussion ${outFiles[0].outFile}`) - // Articles with "Discussion" namespace should be only with option addNamespaces: 1 - expect(discussionArticlesStr.length).toBe(0) - } else { - console.log('Zimdump not installed, skipping test') - } + if (await zimdumpAvailable()) { + const discussionArticlesStr = await zimdump(`list --ns A/Discussion ${outFiles[0].outFile}`) + // Articles with "Discussion" namespace should be only with option addNamespaces: 1 + expect(discussionArticlesStr.length).toBe(0) + } else { + console.log('Zimdump not installed, skipping test') + } - // TODO: clear test dir - rimraf.sync(`./${testId}`) + // TODO: clear test dir + rimraf.sync(`./${testId}`) - const redisScan = await execa('redis-cli --scan', { shell: true }) - // Redis has been cleared - expect(redisScan.stdout).toEqual('') - }) + const redisScan = await execa('redis-cli --scan', { shell: true }) + // Redis has been cleared + expect(redisScan.stdout).toEqual('') + }) - test('Articles with "Discussion" namespace', async () => { - await execa('redis-cli flushall', { shell: true }) + test(`Articles with "Discussion" namespace (${renderer} renderer)`, async () => { + await execa('redis-cli flushall', { shell: true }) - const outFiles = await mwoffliner.execute({ ...renderParameters, addNamespaces: 1 }) - // Created 1 output - expect(outFiles).toHaveLength(1) + const outFiles = await mwoffliner.execute({ ...renderParameters, addNamespaces: 1 }) + // Created 1 output + expect(outFiles).toHaveLength(1) - if (await zimdumpAvailable()) { - const discussionArticlesStr = await zimdump(`list --ns A/Discussion ${outFiles[0].outFile}`) - const discussionArticlesList = discussionArticlesStr.match(/Discussion:/g) - expect(discussionArticlesList.length).toBeGreaterThan(30) - } else { - console.log('Zimdump not installed, skipping test') - } + if (await zimdumpAvailable()) { + const discussionArticlesStr = await zimdump(`list --ns A/Discussion ${outFiles[0].outFile}`) + const discussionArticlesList = discussionArticlesStr.match(/Discussion:/g) + expect(discussionArticlesList.length).toBeGreaterThan(30) + } else { + console.log('Zimdump not installed, skipping test') + } - // TODO: clear test dir - rimraf.sync(`./${testId}`) + // TODO: clear test dir + rimraf.sync(`./${testId}`) - const redisScan = await execa('redis-cli --scan', { shell: true }) - // Redis has been cleared - expect(redisScan.stdout).toEqual('') - }) - } + const redisScan = await execa('redis-cli --scan', { shell: true }) + // Redis has been cleared + expect(redisScan.stdout).toEqual('') + }) } }) From 9afab57ca97d6908769a0b5100f6343fe84dc43a Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 22 Sep 2023 09:47:16 +0300 Subject: [PATCH 07/14] Increase timeout to bm.e2e.test.ts since it scrapes data using multiple renderers --- test/e2e/bm.e2e.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/bm.e2e.test.ts b/test/e2e/bm.e2e.test.ts index 0d7db10e..d66dc3a7 100644 --- a/test/e2e/bm.e2e.test.ts +++ b/test/e2e/bm.e2e.test.ts @@ -6,7 +6,7 @@ import { zimcheckAvailable, zimdumpAvailable, zimcheck, zimdump } from '../util. import 'dotenv/config.js' import { jest } from '@jest/globals' -jest.setTimeout(200000) +jest.setTimeout(800000) describe('bm', () => { for (const renderer of RENDERERS_LIST) { From 003dfd2b98db58b3884ed9f906d138258859ec9f Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Thu, 28 Sep 2023 21:49:06 +0300 Subject: [PATCH 08/14] Add multiple variants of e2e templates --- test/e2e/en.e2e-callback.test.ts | 52 +++++++++++++++ test/e2e/en.e2e.test.ts | 10 +++ test/e2e/treatments.e2e.test.ts | 89 -------------------------- test/testAllRendersTemplate.ts | 78 ++++++++++++++++++++++ test/testAllRendersTemplateCallback.ts | 51 +++++++++++++++ 5 files changed, 191 insertions(+), 89 deletions(-) create mode 100644 test/e2e/en.e2e-callback.test.ts create mode 100644 test/e2e/en.e2e.test.ts delete mode 100644 test/e2e/treatments.e2e.test.ts create mode 100644 test/testAllRendersTemplate.ts create mode 100644 test/testAllRendersTemplateCallback.ts diff --git a/test/e2e/en.e2e-callback.test.ts b/test/e2e/en.e2e-callback.test.ts new file mode 100644 index 00000000..2d279a0e --- /dev/null +++ b/test/e2e/en.e2e-callback.test.ts @@ -0,0 +1,52 @@ +import domino from 'domino' +import rimraf from 'rimraf' +import testWithAllRenders from '../testAllRendersTemplateCallback.js' + +const parameters = { + mwUrl: 'https://en.wikipedia.org', + articleList: 'BMW', + format: '', +} + +// Check the integrity of img elements between zim file and article html taken from it +const verifyImgElements = (imgFilesArr: string[], imgElements: DominoElement[]) => { + for (const img of imgElements) { + for (const imgFile of imgFilesArr) { + if (img.getAttribute('src').includes(imgFile)) { + return true + } + } + } + return false +} + +test('testing e2e callback', (done) => { + async function callback(data) { + try { + data.outFilesArr.forEach(async (outFile) => { + const articleFromDump = await data.zimdump(`show --url A/${parameters.articleList} ${outFile[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + // TODO: test collapsible sections + + // Test page header title + expect(articleDoc.querySelector('h1.article-header')).toBeTruthy() + + // Check media files + const mediaFiles = await data.zimdump(`list --ns I ${outFile[0].outFile}`) + const mediaFilesArr = mediaFiles.split('\n') + const imgFilesArr = mediaFilesArr.filter((elem) => elem.endsWith('pdf') || elem.endsWith('png') || elem.endsWith('jpg')) + const imgElements = Array.from(articleDoc.querySelectorAll('img')) + + expect(verifyImgElements(imgFilesArr, imgElements)).toBe(true) + + rimraf.sync(`./${outFile[0].testId}`) + + done() + }) + } catch (err) { + throw new Error(err) + } + } + testWithAllRenders(callback, parameters) +}) diff --git a/test/e2e/en.e2e.test.ts b/test/e2e/en.e2e.test.ts new file mode 100644 index 00000000..82be318b --- /dev/null +++ b/test/e2e/en.e2e.test.ts @@ -0,0 +1,10 @@ +import testWithAllRenders from '../testAllRendersTemplate.js' + +describe('e2e test en.wikipedia.org', () => { + it('Run all renders test', async () => { + const mwUrl = 'https://en.wikipedia.org' + const articleList = 'User:Kelson/MWoffliner_CI_reference' + const format = '' + await testWithAllRenders(mwUrl, articleList, format) + }) +}) diff --git a/test/e2e/treatments.e2e.test.ts b/test/e2e/treatments.e2e.test.ts deleted file mode 100644 index de64ed8b..00000000 --- a/test/e2e/treatments.e2e.test.ts +++ /dev/null @@ -1,89 +0,0 @@ -import 'dotenv/config.js' -import * as mwoffliner from '../../src/mwoffliner.lib.js' -import domino from 'domino' -import rimraf from 'rimraf' -import { execa } from 'execa' -import { jest } from '@jest/globals' -import { RENDERERS_LIST } from '../../src/util/const.js' -import { zimcheckAvailable, zimdumpAvailable, zimcheck, zimdump } from '../util.js' - -jest.setTimeout(200000) - -let zimcheckIsAvailable -let zimdumpIsAvailable - -beforeAll(async () => { - zimcheckIsAvailable = await zimcheckAvailable() - zimdumpIsAvailable = await zimdumpAvailable() -}) - -async function getOutFiles(renderName: string, testId: string, articleList: string, mwUrl: string): Promise { - const parameters = { - mwUrl, - adminEmail: 'test@kiwix.org', - outputDirectory: testId, - redis: process.env.REDIS, - articleList, - forceRender: renderName, - } - - await execa('redis-cli flushall', { shell: true }) - const outFiles = await mwoffliner.execute(parameters) - - return outFiles -} - -// Check the integrity of img elements between zim file and article html taken from it -const verifyImgElements = (imgFilesArr: string[], imgElements: DominoElement[]) => { - for (const img of imgElements) { - for (const imgFile of imgFilesArr) { - if (img.getAttribute('src').includes(imgFile)) { - return true - } - } - } - return false -} - -const commonTreatmentTest = async (renderer: string, articleList: string, mwUrl: string) => { - if (!zimcheckIsAvailable || !zimdumpIsAvailable) { - const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump' - console.log(`${missingTool} not installed, skipping test`) - return - } - const now = new Date() - const testId = `mwo-test-${+now}` - - const outFiles = await getOutFiles(renderer, testId, articleList, mwUrl) - await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() - const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) - const articleDoc = domino.createDocument(articleFromDump) - - // TODO: test collapsible sections - - // Test page header title - expect(articleDoc.querySelector('h1.article-header')).toBeTruthy() - - // Check media files - const mediaFiles = await zimdump(`list --ns I ${outFiles[0].outFile}`) - const mediaFilesArr = mediaFiles.split('\n') - const imgFilesArr = mediaFilesArr.filter((elem) => elem.endsWith('pdf') || elem.endsWith('png') || elem.endsWith('jpg')) - const imgElements = Array.from(articleDoc.querySelectorAll('img')) - - expect(verifyImgElements(imgFilesArr, imgElements)).toBe(true) - - rimraf.sync(`./${testId}`) -} - -describe('Treatments e2e', () => { - const mwUrl = 'https://en.wikipedia.org' - const articleList = 'User:Kelson/MWoffliner_CI_reference' - - for (const renderer of RENDERERS_LIST) { - if (['WikimediaDesktop', 'VisualEditor'].includes(renderer)) { - test(`${renderer} e2e`, async () => { - await commonTreatmentTest(renderer, articleList, mwUrl) - }) - } - } -}) diff --git a/test/testAllRendersTemplate.ts b/test/testAllRendersTemplate.ts new file mode 100644 index 00000000..c0d5a5cf --- /dev/null +++ b/test/testAllRendersTemplate.ts @@ -0,0 +1,78 @@ +import * as mwoffliner from '../src/mwoffliner.lib.js' +import domino from 'domino' +import { execa } from 'execa' +import rimraf from 'rimraf' +import { jest } from '@jest/globals' +import { RENDERERS_LIST } from '../src/util/const.js' +import { zimcheckAvailable, zimdumpAvailable, zimcheck, zimdump } from './util.js' +import 'dotenv/config.js' + +jest.setTimeout(200000) + +// Check the integrity of img elements between zim file and article html taken from it +const verifyImgElements = (imgFilesArr: string[], imgElements: DominoElement[]) => { + for (const img of imgElements) { + for (const imgFile of imgFilesArr) { + if (img.getAttribute('src').includes(imgFile)) { + return true + } + } + } + return false +} + +async function getOutFiles(renderName: string, testId: string, articleList: string, mwUrl: string, format?: string): Promise { + const parameters = { + mwUrl, + adminEmail: 'test@kiwix.org', + outputDirectory: testId, + redis: process.env.REDIS, + articleList, + forceRender: renderName, + format, + } + + await execa('redis-cli flushall', { shell: true }) + const outFiles = await mwoffliner.execute(parameters) + + return outFiles +} + +const testWithAllRenders = async (mwUrl: string, articleList: string, format?: string) => { + const zimcheckIsAvailable = await zimcheckAvailable() + const zimdumpIsAvailable = await zimdumpAvailable() + + if (!zimcheckIsAvailable || !zimdumpIsAvailable) { + const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump' + console.log(`${missingTool} not installed, skipping test`) + return + } + + for (const renderer of RENDERERS_LIST) { + const now = new Date() + const testId = `mwo-test-${+now}` + + const outFiles = await getOutFiles(renderer, testId, articleList, mwUrl, format) + await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() + + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + + // TODO: test collapsible sections + + // Test page header title + expect(articleDoc.querySelector('h1.article-header')).toBeTruthy() + + // Check media files + const mediaFiles = await zimdump(`list --ns I ${outFiles[0].outFile}`) + const mediaFilesArr = mediaFiles.split('\n') + const imgFilesArr = mediaFilesArr.filter((elem) => elem.endsWith('pdf') || elem.endsWith('png') || elem.endsWith('jpg')) + const imgElements = Array.from(articleDoc.querySelectorAll('img')) + + expect(verifyImgElements(imgFilesArr, imgElements)).toBe(true) + + rimraf.sync(`./${testId}`) + } +} + +export default testWithAllRenders diff --git a/test/testAllRendersTemplateCallback.ts b/test/testAllRendersTemplateCallback.ts new file mode 100644 index 00000000..85af5309 --- /dev/null +++ b/test/testAllRendersTemplateCallback.ts @@ -0,0 +1,51 @@ +import * as mwoffliner from '../src/mwoffliner.lib.js' +import { jest } from '@jest/globals' +import { RENDERERS_LIST } from '../src/util/const.js' +import { zimcheckAvailable, zimdumpAvailable, zimcheck, zimdump } from './util.js' +import 'dotenv/config.js' +import { execa } from 'execa' + +jest.setTimeout(200000) + +async function getOutFiles(renderName: string, testId: string, articleList: string, mwUrl: string, format?: string): Promise { + const parameters = { + mwUrl, + adminEmail: 'test@kiwix.org', + outputDirectory: testId, + redis: process.env.REDIS, + articleList, + forceRender: renderName, + format, + } + + await execa('redis-cli flushall', { shell: true }) + const outFiles = await mwoffliner.execute(parameters) + + return outFiles +} + +const testWithAllRenders = async (back, parameters) => { + const zimcheckIsAvailable = await zimcheckAvailable() + const zimdumpIsAvailable = await zimdumpAvailable() + const outFilesArr = [] + + if (!zimcheckIsAvailable || !zimdumpIsAvailable) { + const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump' + console.log(`${missingTool} not installed, skipping test`) + return + } + + for (const renderer of RENDERERS_LIST) { + const now = new Date() + const testId = `mwo-test-${+now}` + const outFiles = await getOutFiles(renderer, testId, parameters.articleList, parameters.mwUrl, parameters.format) + await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() + // Update outFiles dump with the renderer info and testId as we need them for futher testing + outFiles[0].testId = testId + outFiles[0].renderer = renderer + outFilesArr.push(outFiles) + } + return back({ outFilesArr, zimdump }) +} + +export default testWithAllRenders From 956fd7ad2c75f569d37253e4c8c2f56e02926ec8 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 29 Sep 2023 14:46:20 +0300 Subject: [PATCH 09/14] Update e2e template --- test/e2e/en.e2e-callback.test.ts | 52 ------------- test/e2e/en.e2e.test.ts | 49 +++++++++++- ...sTemplateCallback.ts => testAllRenders.ts} | 32 ++++---- test/testAllRendersTemplate.ts | 78 ------------------- 4 files changed, 62 insertions(+), 149 deletions(-) delete mode 100644 test/e2e/en.e2e-callback.test.ts rename test/{testAllRendersTemplateCallback.ts => testAllRenders.ts} (55%) delete mode 100644 test/testAllRendersTemplate.ts diff --git a/test/e2e/en.e2e-callback.test.ts b/test/e2e/en.e2e-callback.test.ts deleted file mode 100644 index 2d279a0e..00000000 --- a/test/e2e/en.e2e-callback.test.ts +++ /dev/null @@ -1,52 +0,0 @@ -import domino from 'domino' -import rimraf from 'rimraf' -import testWithAllRenders from '../testAllRendersTemplateCallback.js' - -const parameters = { - mwUrl: 'https://en.wikipedia.org', - articleList: 'BMW', - format: '', -} - -// Check the integrity of img elements between zim file and article html taken from it -const verifyImgElements = (imgFilesArr: string[], imgElements: DominoElement[]) => { - for (const img of imgElements) { - for (const imgFile of imgFilesArr) { - if (img.getAttribute('src').includes(imgFile)) { - return true - } - } - } - return false -} - -test('testing e2e callback', (done) => { - async function callback(data) { - try { - data.outFilesArr.forEach(async (outFile) => { - const articleFromDump = await data.zimdump(`show --url A/${parameters.articleList} ${outFile[0].outFile}`) - const articleDoc = domino.createDocument(articleFromDump) - - // TODO: test collapsible sections - - // Test page header title - expect(articleDoc.querySelector('h1.article-header')).toBeTruthy() - - // Check media files - const mediaFiles = await data.zimdump(`list --ns I ${outFile[0].outFile}`) - const mediaFilesArr = mediaFiles.split('\n') - const imgFilesArr = mediaFilesArr.filter((elem) => elem.endsWith('pdf') || elem.endsWith('png') || elem.endsWith('jpg')) - const imgElements = Array.from(articleDoc.querySelectorAll('img')) - - expect(verifyImgElements(imgFilesArr, imgElements)).toBe(true) - - rimraf.sync(`./${outFile[0].testId}`) - - done() - }) - } catch (err) { - throw new Error(err) - } - } - testWithAllRenders(callback, parameters) -}) diff --git a/test/e2e/en.e2e.test.ts b/test/e2e/en.e2e.test.ts index 82be318b..d0404c54 100644 --- a/test/e2e/en.e2e.test.ts +++ b/test/e2e/en.e2e.test.ts @@ -1,10 +1,51 @@ -import testWithAllRenders from '../testAllRendersTemplate.js' +import { testAllRenders } from '../testAllRenders.js' +import domino from 'domino' +import { zimdump } from '../util.js' +import 'dotenv/config.js' +import { jest } from '@jest/globals' +import rimraf from 'rimraf' -describe('e2e test en.wikipedia.org', () => { - it('Run all renders test', async () => { +jest.setTimeout(60000) + +// Check the integrity of img elements between zim file and article html taken from it +const verifyImgElements = (imgFilesArr, imgElements) => { + for (const img of imgElements) { + for (const imgFile of imgFilesArr) { + if (img.getAttribute('src').includes(imgFile)) { + return true + } + } + } + return false +} + +// TODO: Jest requires that tests within a describe block be defined synchronously, no async () functions in callback +describe('e2e test for en.wikipedia.org', () => { + test('Test article structure', async () => { const mwUrl = 'https://en.wikipedia.org' const articleList = 'User:Kelson/MWoffliner_CI_reference' const format = '' - await testWithAllRenders(mwUrl, articleList, format) + await testAllRenders(mwUrl, articleList, format, async (outFiles) => { + /* + TODO: + We have to call expect() here in a callback but without test('', () => {...}) + Otherwise, receive Error: Tests cannot be nested. Test "some test" cannot run because it is nested within "Test article structure". + Uncomment code below to see the issue + */ + /* + test('some test', () => { + expect(1).toBe(1) + }) + */ + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + const articleDoc = domino.createDocument(articleFromDump) + expect(articleDoc.querySelector('h1.article-header')).toBeTruthy() + const mediaFiles = await zimdump(`list --ns I ${outFiles[0].outFile}`) + const mediaFilesArr = mediaFiles.split('\n') + const imgFilesArr = mediaFilesArr.filter((elem) => elem.endsWith('pdf') || elem.endsWith('png') || elem.endsWith('jpg')) + const imgElements = Array.from(articleDoc.querySelectorAll('img')) + expect(verifyImgElements(imgFilesArr, imgElements)).toBe(true) + rimraf.sync(`./${outFiles[0].testId}`) + }) }) }) diff --git a/test/testAllRendersTemplateCallback.ts b/test/testAllRenders.ts similarity index 55% rename from test/testAllRendersTemplateCallback.ts rename to test/testAllRenders.ts index 85af5309..7b3475df 100644 --- a/test/testAllRendersTemplateCallback.ts +++ b/test/testAllRenders.ts @@ -1,13 +1,16 @@ import * as mwoffliner from '../src/mwoffliner.lib.js' -import { jest } from '@jest/globals' -import { RENDERERS_LIST } from '../src/util/const.js' -import { zimcheckAvailable, zimdumpAvailable, zimcheck, zimdump } from './util.js' -import 'dotenv/config.js' import { execa } from 'execa' +import { RENDERERS_LIST } from '../src/util/const.js' +import { zimcheckAvailable, zimdumpAvailable } from './util.js' -jest.setTimeout(200000) +/* + This is the template for e2e tests of different wikis + 1. Verify zimcheck and zimdump availability + 2. Gets output file and checks its integrity + 3. Returns output file per renderer in the callback function +*/ -async function getOutFiles(renderName: string, testId: string, articleList: string, mwUrl: string, format?: string): Promise { +async function getOutFiles(renderName: string, testId: string, articleList: string, mwUrl: string, format?: string | string[]): Promise { const parameters = { mwUrl, adminEmail: 'test@kiwix.org', @@ -24,10 +27,9 @@ async function getOutFiles(renderName: string, testId: string, articleList: stri return outFiles } -const testWithAllRenders = async (back, parameters) => { +export async function testAllRenders(mwUrl: string, articleList: string, format: string | string[], callback) { const zimcheckIsAvailable = await zimcheckAvailable() const zimdumpIsAvailable = await zimdumpAvailable() - const outFilesArr = [] if (!zimcheckIsAvailable || !zimdumpIsAvailable) { const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump' @@ -38,14 +40,14 @@ const testWithAllRenders = async (back, parameters) => { for (const renderer of RENDERERS_LIST) { const now = new Date() const testId = `mwo-test-${+now}` - const outFiles = await getOutFiles(renderer, testId, parameters.articleList, parameters.mwUrl, parameters.format) - await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() - // Update outFiles dump with the renderer info and testId as we need them for futher testing + const outFiles = await getOutFiles(renderer, testId, articleList, mwUrl, format) outFiles[0].testId = testId outFiles[0].renderer = renderer - outFilesArr.push(outFiles) + /* + TODO: render name should be passed to the test name somehow. + But since jest test methods are not available inside the callback, this is impossible given this pattern + */ + console.log('test renderer:', renderer) + await callback(outFiles) } - return back({ outFilesArr, zimdump }) } - -export default testWithAllRenders diff --git a/test/testAllRendersTemplate.ts b/test/testAllRendersTemplate.ts deleted file mode 100644 index c0d5a5cf..00000000 --- a/test/testAllRendersTemplate.ts +++ /dev/null @@ -1,78 +0,0 @@ -import * as mwoffliner from '../src/mwoffliner.lib.js' -import domino from 'domino' -import { execa } from 'execa' -import rimraf from 'rimraf' -import { jest } from '@jest/globals' -import { RENDERERS_LIST } from '../src/util/const.js' -import { zimcheckAvailable, zimdumpAvailable, zimcheck, zimdump } from './util.js' -import 'dotenv/config.js' - -jest.setTimeout(200000) - -// Check the integrity of img elements between zim file and article html taken from it -const verifyImgElements = (imgFilesArr: string[], imgElements: DominoElement[]) => { - for (const img of imgElements) { - for (const imgFile of imgFilesArr) { - if (img.getAttribute('src').includes(imgFile)) { - return true - } - } - } - return false -} - -async function getOutFiles(renderName: string, testId: string, articleList: string, mwUrl: string, format?: string): Promise { - const parameters = { - mwUrl, - adminEmail: 'test@kiwix.org', - outputDirectory: testId, - redis: process.env.REDIS, - articleList, - forceRender: renderName, - format, - } - - await execa('redis-cli flushall', { shell: true }) - const outFiles = await mwoffliner.execute(parameters) - - return outFiles -} - -const testWithAllRenders = async (mwUrl: string, articleList: string, format?: string) => { - const zimcheckIsAvailable = await zimcheckAvailable() - const zimdumpIsAvailable = await zimdumpAvailable() - - if (!zimcheckIsAvailable || !zimdumpIsAvailable) { - const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump' - console.log(`${missingTool} not installed, skipping test`) - return - } - - for (const renderer of RENDERERS_LIST) { - const now = new Date() - const testId = `mwo-test-${+now}` - - const outFiles = await getOutFiles(renderer, testId, articleList, mwUrl, format) - await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() - - const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) - const articleDoc = domino.createDocument(articleFromDump) - - // TODO: test collapsible sections - - // Test page header title - expect(articleDoc.querySelector('h1.article-header')).toBeTruthy() - - // Check media files - const mediaFiles = await zimdump(`list --ns I ${outFiles[0].outFile}`) - const mediaFilesArr = mediaFiles.split('\n') - const imgFilesArr = mediaFilesArr.filter((elem) => elem.endsWith('pdf') || elem.endsWith('png') || elem.endsWith('jpg')) - const imgElements = Array.from(articleDoc.querySelectorAll('img')) - - expect(verifyImgElements(imgFilesArr, imgElements)).toBe(true) - - rimraf.sync(`./${testId}`) - } -} - -export default testWithAllRenders From 0e3e103a140172b858c09754209b32af0da75d23 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 29 Sep 2023 17:34:45 +0300 Subject: [PATCH 10/14] Improve e2e template --- test/e2e/en.e2e.test.ts | 40 ++++++++++++++++++++-------------------- test/testAllRenders.ts | 25 ++++++++++--------------- 2 files changed, 30 insertions(+), 35 deletions(-) diff --git a/test/e2e/en.e2e.test.ts b/test/e2e/en.e2e.test.ts index d0404c54..541f05ba 100644 --- a/test/e2e/en.e2e.test.ts +++ b/test/e2e/en.e2e.test.ts @@ -19,32 +19,32 @@ const verifyImgElements = (imgFilesArr, imgElements) => { return false } -// TODO: Jest requires that tests within a describe block be defined synchronously, no async () functions in callback -describe('e2e test for en.wikipedia.org', () => { - test('Test article structure', async () => { - const mwUrl = 'https://en.wikipedia.org' - const articleList = 'User:Kelson/MWoffliner_CI_reference' - const format = '' - await testAllRenders(mwUrl, articleList, format, async (outFiles) => { - /* - TODO: - We have to call expect() here in a callback but without test('', () => {...}) - Otherwise, receive Error: Tests cannot be nested. Test "some test" cannot run because it is nested within "Test article structure". - Uncomment code below to see the issue - */ - /* - test('some test', () => { - expect(1).toBe(1) - }) - */ - const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) - const articleDoc = domino.createDocument(articleFromDump) +const mwUrl = 'https://en.wikipedia.org' +const articleList = 'User:Kelson/MWoffliner_CI_reference' +const format = '' + +await testAllRenders(mwUrl, articleList, format, async (outFiles) => { + const describeIf = outFiles.isSkipped ? describe.skip : describe + + let articleFromDump + if (!outFiles.isSkipped) { + articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + } + + describeIf('e2e test for en.wikipedia.org', () => { + const articleDoc = domino.createDocument(articleFromDump) + test(`test article header for ${outFiles[0]?.renderer} renderer`, async () => { expect(articleDoc.querySelector('h1.article-header')).toBeTruthy() + }) + test(`test article image integrity for ${outFiles[0]?.renderer} renderer`, async () => { const mediaFiles = await zimdump(`list --ns I ${outFiles[0].outFile}`) const mediaFilesArr = mediaFiles.split('\n') const imgFilesArr = mediaFilesArr.filter((elem) => elem.endsWith('pdf') || elem.endsWith('png') || elem.endsWith('jpg')) const imgElements = Array.from(articleDoc.querySelectorAll('img')) expect(verifyImgElements(imgFilesArr, imgElements)).toBe(true) + }) + + afterAll(() => { rimraf.sync(`./${outFiles[0].testId}`) }) }) diff --git a/test/testAllRenders.ts b/test/testAllRenders.ts index 7b3475df..6d357e97 100644 --- a/test/testAllRenders.ts +++ b/test/testAllRenders.ts @@ -34,20 +34,15 @@ export async function testAllRenders(mwUrl: string, articleList: string, format: if (!zimcheckIsAvailable || !zimdumpIsAvailable) { const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump' console.log(`${missingTool} not installed, skipping test`) - return - } - - for (const renderer of RENDERERS_LIST) { - const now = new Date() - const testId = `mwo-test-${+now}` - const outFiles = await getOutFiles(renderer, testId, articleList, mwUrl, format) - outFiles[0].testId = testId - outFiles[0].renderer = renderer - /* - TODO: render name should be passed to the test name somehow. - But since jest test methods are not available inside the callback, this is impossible given this pattern - */ - console.log('test renderer:', renderer) - await callback(outFiles) + return callback({ isSkipped: true }) + } else { + for (const renderer of RENDERERS_LIST) { + const now = new Date() + const testId = `mwo-test-${+now}` + const outFiles = await getOutFiles(renderer, testId, articleList, mwUrl, format) + outFiles[0].testId = testId + outFiles[0].renderer = renderer + await callback(outFiles) + } } } From 4f83356d325dcc8e82d0661fcb888e24a43803d6 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 29 Sep 2023 19:18:36 +0300 Subject: [PATCH 11/14] Roll back bm.e2e.test.ts --- test/e2e/bm.e2e.test.ts | 130 ++++++++++++++++------------------ test/e2e/rendererList.test.ts | 7 -- 2 files changed, 63 insertions(+), 74 deletions(-) diff --git a/test/e2e/bm.e2e.test.ts b/test/e2e/bm.e2e.test.ts index d66dc3a7..e957a333 100644 --- a/test/e2e/bm.e2e.test.ts +++ b/test/e2e/bm.e2e.test.ts @@ -1,90 +1,86 @@ import * as mwoffliner from '../../src/mwoffliner.lib.js' import { execa } from 'execa' -import { RENDERERS_LIST } from '../../src/util/const.js' import rimraf from 'rimraf' -import { zimcheckAvailable, zimdumpAvailable, zimcheck, zimdump } from '../util.js' +import { zimcheckAvailable, zimcheck } from '../util.js' import 'dotenv/config.js' import { jest } from '@jest/globals' +import { zimdumpAvailable, zimdump } from '../util.js' -jest.setTimeout(800000) +jest.setTimeout(200000) describe('bm', () => { - for (const renderer of RENDERERS_LIST) { - const now = new Date() - const testId = `mwo-test-${+now}` - - const parameters = { - mwUrl: 'https://bm.wikipedia.org', - adminEmail: 'test@kiwix.org', - outputDirectory: testId, - redis: process.env.REDIS, - format: ['nopic'], - } - - const renderParameters = { ...parameters, forceRender: renderer } + const now = new Date() + const testId = `mwo-test-${+now}` + + const parameters = { + mwUrl: 'https://bm.wikipedia.org', + adminEmail: 'test@kiwix.org', + outputDirectory: testId, + redis: process.env.REDIS, + format: ['nopic'], + } - test(`Simple articleList (${renderer} renderer)`, async () => { - await execa('redis-cli flushall', { shell: true }) + test('Simple articleList', async () => { + await execa('redis-cli flushall', { shell: true }) - const outFiles = await mwoffliner.execute(renderParameters) + const outFiles = await mwoffliner.execute(parameters) - // Created 1 output - expect(outFiles).toHaveLength(1) + // Created 1 output + expect(outFiles).toHaveLength(1) - for (const dump of outFiles) { - if (dump.nopic) { - // nopic has enough files - expect(dump.status.files.success).toBeGreaterThan(14) - // nopic has enough redirects - expect(dump.status.redirects.written).toBeGreaterThan(170) - // nopic has enough articles - expect(dump.status.articles.success).toBeGreaterThan(700) - } + for (const dump of outFiles) { + if (dump.nopic) { + // nopic has enough files + expect(dump.status.files.success).toBeGreaterThan(14) + // nopic has enough redirects + expect(dump.status.redirects.written).toBeGreaterThan(170) + // nopic has enough articles + expect(dump.status.articles.success).toBeGreaterThan(700) } + } - if (await zimcheckAvailable()) { - await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() - } else { - console.log('Zimcheck not installed, skipping test') - } + if (await zimcheckAvailable()) { + await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() + } else { + console.log('Zimcheck not installed, skipping test') + } - if (await zimdumpAvailable()) { - const discussionArticlesStr = await zimdump(`list --ns A/Discussion ${outFiles[0].outFile}`) - // Articles with "Discussion" namespace should be only with option addNamespaces: 1 - expect(discussionArticlesStr.length).toBe(0) - } else { - console.log('Zimdump not installed, skipping test') - } + if (await zimdumpAvailable()) { + const discussionArticlesStr = await zimdump(`list --ns A/Discussion ${outFiles[0].outFile}`) + // Articles with "Discussion" namespace should be only with option addNamespaces: 1 + expect(discussionArticlesStr.length).toBe(0) + } else { + console.log('Zimdump not installed, skipping test') + } - // TODO: clear test dir - rimraf.sync(`./${testId}`) + // TODO: clear test dir + rimraf.sync(`./${testId}`) - const redisScan = await execa('redis-cli --scan', { shell: true }) - // Redis has been cleared - expect(redisScan.stdout).toEqual('') - }) + const redisScan = await execa('redis-cli --scan', { shell: true }) + // Redis has been cleared + expect(redisScan.stdout).toEqual('') + }) - test(`Articles with "Discussion" namespace (${renderer} renderer)`, async () => { - await execa('redis-cli flushall', { shell: true }) + test('Articles with "Discussion" namespace', async () => { + await execa('redis-cli flushall', { shell: true }) - const outFiles = await mwoffliner.execute({ ...renderParameters, addNamespaces: 1 }) - // Created 1 output - expect(outFiles).toHaveLength(1) + const outFiles = await mwoffliner.execute({ ...parameters, addNamespaces: 1 }) + // Created 1 output + expect(outFiles).toHaveLength(1) - if (await zimdumpAvailable()) { - const discussionArticlesStr = await zimdump(`list --ns A/Discussion ${outFiles[0].outFile}`) - const discussionArticlesList = discussionArticlesStr.match(/Discussion:/g) - expect(discussionArticlesList.length).toBeGreaterThan(30) - } else { - console.log('Zimdump not installed, skipping test') - } + if (await zimdumpAvailable()) { + const discussionArticlesStr = await zimdump(`list --ns A/Discussion ${outFiles[0].outFile}`) + const discussionArticlesList = discussionArticlesStr.match(/Discussion:/g) + expect(discussionArticlesList.length).toBeGreaterThan(30) + } else { + console.log('Zimdump not installed, skipping test') + } - // TODO: clear test dir - rimraf.sync(`./${testId}`) + // TODO: clear test dir + rimraf.sync(`./${testId}`) - const redisScan = await execa('redis-cli --scan', { shell: true }) - // Redis has been cleared - expect(redisScan.stdout).toEqual('') - }) - } + const redisScan = await execa('redis-cli --scan', { shell: true }) + // Redis has been cleared + expect(redisScan.stdout).toEqual('') + }) }) diff --git a/test/e2e/rendererList.test.ts b/test/e2e/rendererList.test.ts index 8e6c4f91..517729f5 100644 --- a/test/e2e/rendererList.test.ts +++ b/test/e2e/rendererList.test.ts @@ -39,13 +39,6 @@ describe('renderName', () => { expect(redisScan.stdout).toEqual('') }) - test('Scrape article from bm.wikipedia.org should throw error when using VisualEditor renderName', async () => { - const renderName = 'VisualEditor' - expect(async () => { - await mwoffliner.execute({ ...parameters, renderName }) - }).rejects.toThrowError() - }) - test('Scrape article from bm.wikipedia.org should throw error when using wrong renderName', async () => { const renderName = 'unknownRenderName' expect(async () => { From 7d9ff2fcfeabe52409d91bd0b11ed6ece3e7b5d3 Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Fri, 29 Sep 2023 19:48:01 +0300 Subject: [PATCH 12/14] Remove redundant rendererList test --- test/e2e/rendererList.test.ts | 48 ----------------------------------- 1 file changed, 48 deletions(-) delete mode 100644 test/e2e/rendererList.test.ts diff --git a/test/e2e/rendererList.test.ts b/test/e2e/rendererList.test.ts deleted file mode 100644 index 517729f5..00000000 --- a/test/e2e/rendererList.test.ts +++ /dev/null @@ -1,48 +0,0 @@ -import * as mwoffliner from '../../src/mwoffliner.lib.js' -import { execa } from 'execa' -import rimraf from 'rimraf' -import { jest } from '@jest/globals' -import { zimcheckAvailable, zimcheck } from '../util.js' - -jest.setTimeout(200000) - -describe('renderName', () => { - const now = new Date() - const testId = `mwo-test-${+now}` - - const parameters = { - mwUrl: 'https://bm.wikipedia.org', - adminEmail: 'test@kiwix.org', - outputDirectory: testId, - redis: process.env.REDIS, - format: ['nopic'], - articleList: 'Fàransi, Kanada', - } - - beforeAll(async () => { - await execa('redis-cli flushall', { shell: true }) - }) - - test('Scrape article from bm.wikipedia.org using WikimediaDesktop renderName', async () => { - const renderName = 'WikimediaDesktop' - const outFiles = await mwoffliner.execute({ ...parameters, renderName }) - - if (await zimcheckAvailable()) { - await expect(zimcheck(outFiles[0].outFile)).resolves.not.toThrowError() - } else { - console.log('Zimcheck not installed, skipping test') - } - - rimraf.sync(`./${testId}`) - const redisScan = await execa('redis-cli --scan', { shell: true }) - // Redis has been cleared - expect(redisScan.stdout).toEqual('') - }) - - test('Scrape article from bm.wikipedia.org should throw error when using wrong renderName', async () => { - const renderName = 'unknownRenderName' - expect(async () => { - await mwoffliner.execute({ ...parameters, renderName }) - }).rejects.toThrowError() - }) -}) From d84b294610dfe1b94aff69111afbb2b4ee39664d Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Mon, 2 Oct 2023 09:33:31 +0300 Subject: [PATCH 13/14] Exit test process if no zim tools available --- test/e2e/en.e2e.test.ts | 10 +--- test/testAllRenders.ts | 23 +++++----- test/unit/saveArticles.test.ts | 84 +++++++++++++++++----------------- 3 files changed, 55 insertions(+), 62 deletions(-) diff --git a/test/e2e/en.e2e.test.ts b/test/e2e/en.e2e.test.ts index 541f05ba..19f66928 100644 --- a/test/e2e/en.e2e.test.ts +++ b/test/e2e/en.e2e.test.ts @@ -24,14 +24,8 @@ const articleList = 'User:Kelson/MWoffliner_CI_reference' const format = '' await testAllRenders(mwUrl, articleList, format, async (outFiles) => { - const describeIf = outFiles.isSkipped ? describe.skip : describe - - let articleFromDump - if (!outFiles.isSkipped) { - articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) - } - - describeIf('e2e test for en.wikipedia.org', () => { + const articleFromDump = await zimdump(`show --url A/${articleList} ${outFiles[0].outFile}`) + describe('e2e test for en.wikipedia.org', () => { const articleDoc = domino.createDocument(articleFromDump) test(`test article header for ${outFiles[0]?.renderer} renderer`, async () => { expect(articleDoc.querySelector('h1.article-header')).toBeTruthy() diff --git a/test/testAllRenders.ts b/test/testAllRenders.ts index 6d357e97..f7280da4 100644 --- a/test/testAllRenders.ts +++ b/test/testAllRenders.ts @@ -1,3 +1,4 @@ +import * as logger from '../src/Logger.js' import * as mwoffliner from '../src/mwoffliner.lib.js' import { execa } from 'execa' import { RENDERERS_LIST } from '../src/util/const.js' @@ -33,16 +34,16 @@ export async function testAllRenders(mwUrl: string, articleList: string, format: if (!zimcheckIsAvailable || !zimdumpIsAvailable) { const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump' - console.log(`${missingTool} not installed, skipping test`) - return callback({ isSkipped: true }) - } else { - for (const renderer of RENDERERS_LIST) { - const now = new Date() - const testId = `mwo-test-${+now}` - const outFiles = await getOutFiles(renderer, testId, articleList, mwUrl, format) - outFiles[0].testId = testId - outFiles[0].renderer = renderer - await callback(outFiles) - } + logger.error(`${missingTool} not installed, exiting test`) + process.exit(1) + } + + for (const renderer of RENDERERS_LIST) { + const now = new Date() + const testId = `mwo-test-${+now}` + const outFiles = await getOutFiles(renderer, testId, articleList, mwUrl, format) + outFiles[0].testId = testId + outFiles[0].renderer = renderer + await callback(outFiles) } } diff --git a/test/unit/saveArticles.test.ts b/test/unit/saveArticles.test.ts index 3bd3c1b8..d1644e4b 100644 --- a/test/unit/saveArticles.test.ts +++ b/test/unit/saveArticles.test.ts @@ -81,49 +81,47 @@ describe('saveArticles', () => { }) for (const renderer of RENDERERS_LIST) { - if (['WikimediaDesktop', 'VisualEditor'].includes(renderer)) { - test(`Check nodet article for en.wikipedia.org using ${renderer} renderer`, async () => { - let rendererInstance - switch (renderer) { - case 'VisualEditor': - rendererInstance = new VisualEditorRenderer() - break - case 'WikimediaDesktop': - rendererInstance = new WikimediaDesktopRenderer() - break - default: - throw new Error(`Unknown renderer: ${renderer}`) - } - const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia - await downloader.setBaseUrls(renderer) - const articleId = 'Canada' - const articleUrl = getArticleUrl(downloader, dump, articleId) - const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId]) - const articlesDetail = mwRetToArticleDetail(_articleDetailsRet) - const { articleDetailXId } = RedisStore - const articleDetail = { title: articleId, timestamp: '2023-09-10T17:36:04Z' } - const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title) - articleDetailXId.setMany(articlesDetail) - const result = await downloader.getArticle( - downloader.webp, - _moduleDependencies, - articleId, - articleDetailXId, - rendererInstance, - articleUrl, - dump, - articleDetail, - dump.isMainPage(articleId), - ) - - const articleDoc = domino.createDocument(result[0].html) - - const sections = Array.from(articleDoc.querySelectorAll('section')) - const leadSection = sections[0] - expect(sections.length).toEqual(1) - expect(leadSection.getAttribute('data-mw-section-id')).toEqual('0') - }) - } + test(`Check nodet article for en.wikipedia.org using ${renderer} renderer`, async () => { + let rendererInstance + switch (renderer) { + case 'VisualEditor': + rendererInstance = new VisualEditorRenderer() + break + case 'WikimediaDesktop': + rendererInstance = new WikimediaDesktopRenderer() + break + default: + throw new Error(`Unknown renderer: ${renderer}`) + } + const { downloader, dump } = await setupScrapeClasses({ mwUrl: 'https://en.wikipedia.org', format: 'nodet' }) // en wikipedia + await downloader.setBaseUrls(renderer) + const articleId = 'Canada' + const articleUrl = getArticleUrl(downloader, dump, articleId) + const _articleDetailsRet = await downloader.getArticleDetailsIds([articleId]) + const articlesDetail = mwRetToArticleDetail(_articleDetailsRet) + const { articleDetailXId } = RedisStore + const articleDetail = { title: articleId, timestamp: '2023-09-10T17:36:04Z' } + const _moduleDependencies = await downloader.getModuleDependencies(articleDetail.title) + articleDetailXId.setMany(articlesDetail) + const result = await downloader.getArticle( + downloader.webp, + _moduleDependencies, + articleId, + articleDetailXId, + rendererInstance, + articleUrl, + dump, + articleDetail, + dump.isMainPage(articleId), + ) + + const articleDoc = domino.createDocument(result[0].html) + + const sections = Array.from(articleDoc.querySelectorAll('section')) + const leadSection = sections[0] + expect(sections.length).toEqual(1) + expect(leadSection.getAttribute('data-mw-section-id')).toEqual('0') + }) } test('Load main page and check that it is without header', async () => { From 3c5a0d023069b206adf251f8e62fddb9ba70332e Mon Sep 17 00:00:00 2001 From: Vadim Kovalenko Date: Wed, 4 Oct 2023 09:32:32 +0300 Subject: [PATCH 14/14] Refactor zim tool checking as lazy method --- test/testAllRenders.ts | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/test/testAllRenders.ts b/test/testAllRenders.ts index f7280da4..16c7e330 100644 --- a/test/testAllRenders.ts +++ b/test/testAllRenders.ts @@ -6,11 +6,29 @@ import { zimcheckAvailable, zimdumpAvailable } from './util.js' /* This is the template for e2e tests of different wikis - 1. Verify zimcheck and zimdump availability + 1. Verify zimcheck and zimdump availability and caches result 2. Gets output file and checks its integrity 3. Returns output file per renderer in the callback function */ +let zimToolsChecked = false +async function checkZimTools() { + if (zimToolsChecked) { + return + } + + const zimcheckIsAvailable = await zimcheckAvailable() + const zimdumpIsAvailable = await zimdumpAvailable() + + if (!zimcheckIsAvailable || !zimdumpIsAvailable) { + const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump' + logger.error(`${missingTool} not installed, exiting test`) + process.exit(1) + } + + zimToolsChecked = true +} + async function getOutFiles(renderName: string, testId: string, articleList: string, mwUrl: string, format?: string | string[]): Promise { const parameters = { mwUrl, @@ -29,15 +47,7 @@ async function getOutFiles(renderName: string, testId: string, articleList: stri } export async function testAllRenders(mwUrl: string, articleList: string, format: string | string[], callback) { - const zimcheckIsAvailable = await zimcheckAvailable() - const zimdumpIsAvailable = await zimdumpAvailable() - - if (!zimcheckIsAvailable || !zimdumpIsAvailable) { - const missingTool = !zimcheckIsAvailable ? 'Zimcheck' : 'Zimdump' - logger.error(`${missingTool} not installed, exiting test`) - process.exit(1) - } - + await checkZimTools() for (const renderer of RENDERERS_LIST) { const now = new Date() const testId = `mwo-test-${+now}`