Skip to content

Commit 0f9fb80

Browse files
committed
feat: localizable images through urls in captions
1 parent 5ceb7f2 commit 0f9fb80

File tree

4 files changed

+203
-30
lines changed

4 files changed

+203
-30
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ notion-pull-mdx makes some attempt to keep the right order of things, but there
8181

8282
Localize your files in Crowdin (or whatever) based on the markdown files, not in Notion. For how to do this with Docusaurus, see [Docusaurus i18n](https://docusaurus.io/docs/i18n/crowdin).
8383

84+
You may also need to localize screenshots. Crowdin can also handle localizing assets, but this library currently supports a different approach. If you place for example `fr https:\\imgur.com\1234.png` in the caption of a screenshot in Notion, `notion-pull-mdx` will fetch that image and save it locally with the same name as the primary screenshot, but with "-fr" appended. So you'd get for example `static\img\9876.png` and `static\img\9876-fr.png`. To get the French version to show, you'd need to add that "-fr" to the markdown link when you localize the page's text in crowdin. If there is a way, maybe this modification of the markdown can be made automatic in the future so that you automatically get the right image version.
85+
8486
# Automated builds with Github Actions
8587

8688
Here is a working Github Action script to copy and customize: https://github.com/BloomBooks/bloom-docs/blob/master/.github/workflows/release.yml

src/CustomTranformers.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ async function notionColumnToMarkdown(
5151
notionClient: Client,
5252
block: ListBlockChildrenResponseResult
5353
): Promise<string> {
54-
console.log(JSON.stringify(block));
54+
//console.log(JSON.stringify(block));
5555
const { id, has_children } = block as any; // "any" because the notion api type system is complex with a union that don't know how to help TS to cope with
5656

5757
if (!has_children) return "";

src/NotionImage.spec.ts

Lines changed: 127 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,134 @@
11
import { parseImageBlock } from "./NotionImage";
22

3+
const kPrimaryImageUrl =
4+
"https://s3.us-west-2.amazonaws.com/primaryImage.png?Blah=foo";
5+
36
/* eslint-disable @typescript-eslint/require-await */
4-
describe("image caption", () => {
5-
it("should find a caption", async () => {
6-
const img = parseImageBlock(kImageBlock);
7-
expect(img.url).toBe("https://someimage.png");
8-
});
7+
test("finds primary image url", async () => {
8+
const img = parseImageBlock(kImageBlockWithTwoLocalizedImages);
9+
expect(img.primaryUrl).toBe(kPrimaryImageUrl);
10+
});
11+
12+
test("primary caption content after image links are removed", async () => {
13+
const img = parseImageBlock(
14+
kImageBlockWithTwoLocalizedImagesWrappedWithActualCaptionText
15+
);
16+
expect(img.caption).toBe("Caption before images.\nCaption after images.");
917
});
1018

11-
const kImageBlock = {
19+
test("gets localized image links", async () => {
20+
const img = parseImageBlock(
21+
kImageBlockWithTwoLocalizedImagesWrappedWithActualCaptionText
22+
);
23+
expect(img.localizedUrls.length).toBe(2);
24+
expect(img.localizedUrls[0].iso632Code).toBe("FR");
25+
expect(img.localizedUrls[1].iso632Code).toBe("ES");
26+
expect(img.localizedUrls[0].url).toBe("https://i.imgur.com/pYmE7OJ.png");
27+
expect(img.localizedUrls[1].url).toBe("https://i.imgur.com/8paSZ0i.png");
28+
});
29+
30+
const kImageBlockWithTwoLocalizedImagesWrappedWithActualCaptionText = {
31+
object: "block",
32+
id: "20b821b4-7c5b-41dc-8e30-92c23c125580",
33+
parent: { type: "page_id", page_id: "9dd05134-0401-47f6-b159-1e6b76b9aad3" },
34+
created_time: "2022-07-25T23:05:00.000Z",
35+
last_edited_time: "2022-07-26T15:31:00.000Z",
36+
created_by: { object: "user", id: "11fb7f16-0560-4aee-ab88-ed75a850cfc4" },
37+
last_edited_by: {
38+
object: "user",
39+
id: "11fb7f16-0560-4aee-ab88-ed75a850cfc4",
40+
},
41+
has_children: false,
42+
archived: false,
43+
type: "image",
44+
image: {
45+
caption: [
46+
{
47+
type: "text",
48+
text: { content: "Caption before images. fr-", link: null },
49+
annotations: {
50+
bold: false,
51+
italic: false,
52+
strikethrough: false,
53+
underline: false,
54+
code: false,
55+
color: "default",
56+
},
57+
plain_text: "Caption before images.\nfr ",
58+
href: null,
59+
},
60+
{
61+
type: "text",
62+
text: {
63+
content: "https://i.imgur.com/pYmE7OJ.png",
64+
link: { url: "https://i.imgur.com/pYmE7OJ.png" },
65+
},
66+
annotations: {
67+
bold: false,
68+
italic: false,
69+
strikethrough: false,
70+
underline: false,
71+
code: false,
72+
color: "default",
73+
},
74+
plain_text: "https://i.imgur.com/pYmE7OJ.png",
75+
href: "https://i.imgur.com/pYmE7OJ.png",
76+
},
77+
{
78+
type: "text",
79+
text: { content: " es-", link: null },
80+
annotations: {
81+
bold: false,
82+
italic: false,
83+
strikethrough: false,
84+
underline: false,
85+
code: false,
86+
color: "default",
87+
},
88+
plain_text: "\nES ",
89+
href: null,
90+
},
91+
{
92+
type: "text",
93+
text: {
94+
content: "https://i.imgur.com/8paSZ0i.png",
95+
link: { url: "https://i.imgur.com/8paSZ0i.png" },
96+
},
97+
annotations: {
98+
bold: false,
99+
italic: false,
100+
strikethrough: false,
101+
underline: false,
102+
code: false,
103+
color: "default",
104+
},
105+
plain_text: "https://i.imgur.com/8paSZ0i.png",
106+
href: "https://i.imgur.com/8paSZ0i.png",
107+
},
108+
{
109+
type: "text",
110+
text: { content: "\nCaption after images", link: null },
111+
annotations: {
112+
bold: false,
113+
italic: false,
114+
strikethrough: false,
115+
underline: false,
116+
code: false,
117+
color: "default",
118+
},
119+
plain_text: "\nCaption after images.",
120+
href: null,
121+
},
122+
],
123+
type: "file",
124+
file: {
125+
url: kPrimaryImageUrl,
126+
expiry_time: "2022-07-26T16:35:44.029Z",
127+
},
128+
},
129+
};
130+
131+
const kImageBlockWithTwoLocalizedImages = {
12132
object: "block",
13133
id: "20b821b4-7c5b-41dc-8e30-92c23c125580",
14134
parent: {
@@ -95,7 +215,7 @@ const kImageBlock = {
95215
],
96216
type: "file",
97217
file: {
98-
url: "https://someimage.png",
218+
url: kPrimaryImageUrl,
99219
expiry_time: "2022-07-26T00:19:09.096Z",
100220
},
101221
},

src/NotionImage.ts

Lines changed: 73 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ export async function initImageHandling(
2323
}
2424

2525
async function saveImage(
26-
url: string,
26+
imageSet: ImageSet,
2727
imageFolderPath: string
2828
): Promise<string> {
29-
const response = await fetch(url);
29+
const response = await fetch(imageSet.primaryUrl);
3030
const arrayBuffer = await response.arrayBuffer();
3131
const buffer = Buffer.from(arrayBuffer);
3232
const fileType = await FileType.fromBuffer(buffer);
@@ -35,29 +35,55 @@ async function saveImage(
3535
// Images that are stored by notion come to us with a complex url that changes over time, so we pick out the UUID that doesn't change. Example:
3636
// https://s3.us-west-2.amazonaws.com/secure.notion-static.com/d1058f46-4d2f-4292-8388-4ad393383439/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20220516%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20220516T233630Z&X-Amz-Expires=3600&X-Amz-Signature=f215704094fcc884d37073b0b108cf6d1c9da9b7d57a898da38bc30c30b4c4b5&X-Amz-SignedHeaders=host&x-id=GetObject
3737

38-
let thingToHash = url;
39-
const m = /.*secure\.notion-static\.com\/(.*)\//gm.exec(url);
38+
let thingToHash = imageSet.primaryUrl;
39+
const m = /.*secure\.notion-static\.com\/(.*)\//gm.exec(
40+
imageSet.primaryUrl
41+
);
4042
if (m && m.length > 1) {
4143
thingToHash = m[1];
4244
}
4345

4446
const hash = hashOfString(thingToHash);
4547
const outputFileName = `${hash}.${fileType.ext}`;
46-
const path = imageFolderPath + "/" + outputFileName;
47-
imageWasSeen(path);
48-
if (!fs.pathExistsSync(path)) {
49-
// // I think that this ok that this is writing async as we continue
50-
console.log("Adding image " + path);
51-
fs.createWriteStream(path).write(buffer);
48+
const primaryFilePath = writeImageIfNew(
49+
imageFolderPath,
50+
outputFileName,
51+
buffer
52+
);
53+
54+
// if there are localized images, save them too, using the same
55+
// name as the primary but with their language code attached
56+
for (const localizedImage of imageSet.localizedUrls) {
57+
const outputFileName = `${hash}-${localizedImage.iso632Code}.${fileType.ext}`;
58+
console.log("Saving localized image to " + outputFileName);
59+
const response = await fetch(localizedImage.url);
60+
const arrayBuffer = await response.arrayBuffer();
61+
const buffer = Buffer.from(arrayBuffer);
62+
writeImageIfNew(imageFolderPath, outputFileName, buffer);
5263
}
53-
return outputFileName;
64+
65+
return primaryFilePath;
5466
} else {
5567
console.error(
56-
`Something wrong with the filetype extension on the blob we got from ${url}`
68+
`Something wrong with the filetype extension on the blob we got from ${imageSet.primaryUrl}`
5769
);
5870
return "error";
5971
}
6072
}
73+
function writeImageIfNew(
74+
imageFolderPath: string,
75+
outputFileName: string,
76+
buffer: Buffer
77+
) {
78+
const path = imageFolderPath + "/" + outputFileName;
79+
imageWasSeen(path);
80+
if (!fs.pathExistsSync(path)) {
81+
console.log("Adding image " + path);
82+
fs.createWriteStream(path).write(buffer); // async but we're not waiting
83+
}
84+
return outputFileName;
85+
}
86+
6187
function hashOfString(s: string) {
6288
let hash = 0;
6389
for (let i = 0; i < s.length; ++i)
@@ -66,33 +92,58 @@ function hashOfString(s: string) {
6692
return Math.abs(hash);
6793
}
6894

69-
type LocalizableImageWithCaption = {
70-
url: string;
95+
// we parse a notion image and its caption into what we need, which includes any urls to localized versions of the image that may be embedded in the caption
96+
type ImageSet = {
97+
primaryUrl: string;
7198
caption?: string;
7299
localizedUrls: Array<{ iso632Code: string; url: string }>;
73100
};
74-
export function parseImageBlock(b: any): LocalizableImageWithCaption {
75-
const img: LocalizableImageWithCaption = {
76-
url: "",
101+
export function parseImageBlock(b: any): ImageSet {
102+
const imageSet: ImageSet = {
103+
primaryUrl: "",
104+
caption: "",
77105
localizedUrls: [],
78106
};
79107

80108
if ("file" in b.image) {
81-
img.url = b.image.file.url; // image saved on notion (actually AWS)
109+
imageSet.primaryUrl = b.image.file.url; // image saved on notion (actually AWS)
82110
} else {
83-
img.url = b.image.external.url; // image still pointing somewhere else. I've see this happen when copying a Google Doc into Notion. Notion kep pointing at the google doc.
111+
imageSet.primaryUrl = b.image.external.url; // image still pointing somewhere else. I've see this happen when copying a Google Doc into Notion. Notion kep pointing at the google doc.
84112
}
85113

86-
return img;
114+
const mergedCaption: string = b.image.caption
115+
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
116+
.map((c: any) => c.plain_text)
117+
.join("");
118+
const lines = mergedCaption.split("\n");
119+
120+
// Example:
121+
// Caption before images.\nfr https://i.imgur.com/pYmE7OJ.png\nES https://i.imgur.com/8paSZ0i.png\nCaption after images
122+
123+
lines.forEach(l => {
124+
const match = /\s*(..)\s*(https:\/\/.*)/.exec(l);
125+
if (match) {
126+
imageSet.localizedUrls.push({
127+
iso632Code: match[1].toUpperCase(),
128+
url: match[2],
129+
});
130+
} else {
131+
imageSet.caption += l + "\n";
132+
}
133+
});
134+
imageSet.caption = imageSet.caption?.trim();
135+
//console.log(JSON.stringify(imageSet, null, 2));
136+
137+
return imageSet;
87138
}
88139

89140
// Download the image if we don't have it, give it a good name, and
90141
// change the src to point to our copy of the image.
91142
export async function processImageBlock(b: any): Promise<void> {
143+
//console.log(JSON.stringify(b));
92144
const img = parseImageBlock(b);
93145

94-
const newPath =
95-
imagePrefix + "/" + (await saveImage(img.url, imageOutputPath));
146+
const newPath = imagePrefix + "/" + (await saveImage(img, imageOutputPath));
96147

97148
// change the src to point to our copy of the image
98149
if ("file" in b.image) {

0 commit comments

Comments
 (0)