From 8ac9e6f00fa29f500dd30701fa736b47847b0a20 Mon Sep 17 00:00:00 2001 From: Travis Briggs Date: Fri, 21 Jun 2024 15:23:27 -0700 Subject: [PATCH 1/2] Use data-data-file-original-src in WikimediaMobileRenderer --- package-lock.json | 9 ++ package.json | 1 + src/renderers/wikimedia-mobile.renderer.ts | 18 +++- test/unit/renderers/mobile.renderer.test.ts | 100 ++++++++++++++++++++ 4 files changed, 125 insertions(+), 3 deletions(-) create mode 100644 test/unit/renderers/mobile.renderer.test.ts diff --git a/package-lock.json b/package-lock.json index 5b7c9542..813f3adb 100644 --- a/package-lock.json +++ b/package-lock.json @@ -57,6 +57,7 @@ "split-by-grapheme": "^1.0.1", "swig-templates": "^2.0.3", "typescript": "^4.9.4", + "url-join": "^5.0.0", "utf8-binary-cutter": "^0.9.2", "webp-hero": "0.0.2", "yargs": "^17.7.1" @@ -18806,6 +18807,14 @@ "integrity": "sha512-Am1ousAhSLBeB9cG/7k7r2R0zj50uDRlZHPGbazid5s9rlF1F/QKYObEKSIunSjIOkJZqwRRLpvewjEkM7pSqg==", "deprecated": "Please see https://github.com/lydell/urix#deprecated" }, + "node_modules/url-join": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/url-join/-/url-join-5.0.0.tgz", + "integrity": "sha512-n2huDr9h9yzd6exQVnH/jU5mr+Pfx08LRXXZhkLLetAMESRj+anQsTAh940iMrIetKAmry9coFuZQ2jY8/p3WA==", + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + } + }, "node_modules/url-parse-lax": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/url-parse-lax/-/url-parse-lax-1.0.0.tgz", diff --git a/package.json b/package.json index ea4e7bbe..3f1f2646 100644 --- a/package.json +++ b/package.json @@ -106,6 +106,7 @@ "split-by-grapheme": "^1.0.1", "swig-templates": "^2.0.3", "typescript": "^4.9.4", + "url-join": "^5.0.0", "utf8-binary-cutter": "^0.9.2", "webp-hero": "0.0.2", "yargs": "^17.7.1" diff --git a/src/renderers/wikimedia-mobile.renderer.ts b/src/renderers/wikimedia-mobile.renderer.ts index 1697f493..b1e8b950 100644 --- a/src/renderers/wikimedia-mobile.renderer.ts +++ b/src/renderers/wikimedia-mobile.renderer.ts @@ -1,4 +1,5 @@ import * as domino from 'domino' +import urlJoin from 'url-join' import * as logger from '../Logger.js' import { MobileRenderer } from './abstractMobile.render.js' import { getStrippedTitleFromHtml } from '../util/misc.js' @@ -33,7 +34,7 @@ export class WikimediaMobileRenderer extends MobileRenderer { const mobileHTML = domino.createDocument(data) const finalHTMLMobile = await this.pipeMobileTransformations( mobileHTML, - this.convertLazyLoadToImages, + this.INTERNAL.convertLazyLoadToImages, this.removeEditContainer, this.removeHiddenClass, async (doc) => { @@ -91,7 +92,7 @@ export class WikimediaMobileRenderer extends MobileRenderer { return doc } - private convertLazyLoadToImages(doc: DominoElement) { + private convertLazyLoadToImagesImpl(doc: DominoElement) { const protocol = 'https://' const spans = doc.querySelectorAll('.pcs-lazy-load-placeholder') @@ -100,7 +101,14 @@ export class WikimediaMobileRenderer extends MobileRenderer { const img = doc.createElement('img') as DominoElement // Set the attributes for the img element based on the data attributes in the span - img.src = protocol + span.getAttribute('data-src') + + // The data-data-file-original-src attribute is the URL of the image that was used in the original article. + // It is preferred over the data-src attribute, which is a "mobile" image that may be scaled up to 320px + // or 640px in order to be "full width" on mobile devices. However, if the mobile API didn't scale the + // image up, then the data-data-file-original-src attribute will be missing, and we should use the data-src. + // See https://github.com/openzim/mwoffliner/issues/1925. + const imgSrc = span.getAttribute('data-data-file-original-src') || span.getAttribute('data-src') + img.src = urlJoin(protocol, imgSrc) img.setAttribute('decoding', 'async') img.width = span.getAttribute('data-width') img.height = span.getAttribute('data-height') @@ -148,4 +156,8 @@ export class WikimediaMobileRenderer extends MobileRenderer { return doc } + + public readonly INTERNAL = { + convertLazyLoadToImages: this.convertLazyLoadToImagesImpl, + } } diff --git a/test/unit/renderers/mobile.renderer.test.ts b/test/unit/renderers/mobile.renderer.test.ts new file mode 100644 index 00000000..83c1deaa --- /dev/null +++ b/test/unit/renderers/mobile.renderer.test.ts @@ -0,0 +1,100 @@ +import * as domino from 'domino' + +import { WikimediaMobileRenderer } from '../../../src/renderers/wikimedia-mobile.renderer' + +describe('mobile renderer', () => { + let window + + describe('image converter', () => { + beforeEach(() => { + window = domino.createWindow( + ` +
+ + + + + +
Bamako
+
+
+ + + + + +
Bamako Pont, mi bɛ Niger baw kan
+
+ +

+ San 2021 mɔgɔ 3 000 000 dɛ tun sigin len bɛ Mali faba kɔnɔ. An bɛ yoro mi nan farafina be kono Bamakɔ fanga wili tɔgɔ ka bɔ ni bɛɛ ta ye. wa + dumia kɔnɔ a bɛ la wɔrɔ dugu la dɛ la singɛ munu la +

+ +

bamakɔ dɛ yɛ ka famgadɔda yɛ.wa nafa ka bɔ a lamini mara bɛ ma

+ +

+ Bamako faaba kila nɛ dɔ ni ki woro dɛ yɛ. ni o niɛmogo tɔgɔ IBRAHIMA N’DIAYE ni ɔ ba fɔ ɔ ma maire + Kinw minu bɛ Bamakɔ kɔnɔ: +

+ `, + 'http://bm.wikipedia.org/api/rest_v1/page/mobile-html/BamakBamakɔ', + ) + }) + + test('it converts lazy load to images with the proper size', async () => { + const mobileRenderer = new WikimediaMobileRenderer() + + const actual = mobileRenderer.INTERNAL.convertLazyLoadToImages(window.document) + const spans = actual.querySelectorAll('.pcs-lazy-load-placeholder') + const imgs = actual.querySelectorAll('img') + + expect(spans.length).toBe(0) + expect(imgs.length).toBe(2) + expect(imgs[0].src).toEqual('https://upload.wikimedia.org/wikipedia/commons/thumb/8/8f/Bamako_et_fleuve_Niger.jpg/250px-Bamako_et_fleuve_Niger.jpg') + expect(imgs[1].src).toEqual('https://upload.wikimedia.org/wikipedia/commons/thumb/2/20/Bamako_bridge2.jpg/250px-Bamako_bridge2.jpg') + }) + }) +}) From fdccec9c95f195acf46ceaab3e9c0add486e2ddb Mon Sep 17 00:00:00 2001 From: Travis Briggs Date: Sun, 23 Jun 2024 10:10:13 -0700 Subject: [PATCH 2/2] Update test for mobile images --- test/unit/renderers/mobile.renderer.test.ts | 29 +++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/test/unit/renderers/mobile.renderer.test.ts b/test/unit/renderers/mobile.renderer.test.ts index 83c1deaa..e11aa661 100644 --- a/test/unit/renderers/mobile.renderer.test.ts +++ b/test/unit/renderers/mobile.renderer.test.ts @@ -96,5 +96,34 @@ describe('mobile renderer', () => { expect(imgs[0].src).toEqual('https://upload.wikimedia.org/wikipedia/commons/thumb/8/8f/Bamako_et_fleuve_Niger.jpg/250px-Bamako_et_fleuve_Niger.jpg') expect(imgs[1].src).toEqual('https://upload.wikimedia.org/wikipedia/commons/thumb/2/20/Bamako_bridge2.jpg/250px-Bamako_bridge2.jpg') }) + + test('it uses the data-src when data-data-file-original-src is not available', async () => { + const test_window = domino.createWindow( + ` + + `, + 'http://en.wikipedia.org/api/rest_v1/page/mobile-html/BMW', + ) + const mobileRenderer = new WikimediaMobileRenderer() + + const actual = mobileRenderer.INTERNAL.convertLazyLoadToImages(test_window.document) + const spans = actual.querySelectorAll('.pcs-lazy-load-placeholder') + const imgs = actual.querySelectorAll('img') + + expect(spans.length).toBe(0) + expect(imgs.length).toBe(1) + expect(imgs[0].src).toEqual('https://upload.wikimedia.org/wikipedia/commons/thumb/4/44/BMW.svg/150px-BMW.svg.png') + }) }) })