Skip to content

Commit 07f66e0

Browse files
committed
Finish refactoring transformers
1 parent be5677b commit 07f66e0

File tree

7 files changed

+154
-125
lines changed

7 files changed

+154
-125
lines changed

src/pull.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ async function outputPage(page: NotionPage) {
235235
);
236236

237237
// One half of a horrible hack to make heading links work.
238-
// See the other half and explanation in CustomTransformers.ts => headingCustomTransformer.
238+
// See the other half and explanation in HeadingTransformer.ts.
239239
for (const block_t of blocks) {
240240
const block = block_t as any;
241241
if (block.type.startsWith("heading"))

src/transformers/CalloutTransformer.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@ import { ListBlockChildrenResponseResult } from "notion-to-md/build/types";
33
import { Client } from "@notionhq/client";
44
import { getBlockChildren } from "./CustomTransformers";
55

6+
// In Notion, you can make a callout and change its emoji. We map 5 of these
7+
// to the 5 Docusaurus admonition styles.
8+
// This is mostly a copy of the callout code from notion-to-md. The change is to output docusaurus
9+
// admonitions instead of emulating a callout with markdown > syntax.
10+
// Note: I haven't yet tested this with any emoji except "💡"/"tip", nor the case where the
11+
// callout has-children. Not even sure what that would mean, since the document I was testing
12+
// with has quite complex markup inside the callout, but still takes the no-children branch.
613
export async function notionCalloutToAdmonition(
714
notionToMarkdown: NotionToMarkdown,
815
notionClient: Client,
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import { Client } from "@notionhq/client";
2+
import { NotionToMarkdown } from "notion-to-md";
3+
import { ListBlockChildrenResponseResult } from "notion-to-md/build/types";
4+
import { getBlockChildren } from "./CustomTransformers";
5+
6+
export async function notionColumnListToMarkdown(
7+
notionToMarkdown: NotionToMarkdown,
8+
notionClient: Client,
9+
block: ListBlockChildrenResponseResult
10+
): Promise<string> {
11+
// Enhance: The @notionhq/client, which uses the official API, cannot yet get at column formatting information (column_ratio)
12+
// However https://github1s.com/NotionX/react-notion-x/blob/master/packages/react-notion-x/src/block.tsx#L528 can get it.
13+
const { id, has_children } = block as any; // "any" because the notion api type system is complex with a union that don't know how to help TS to cope with
14+
15+
if (!has_children) return "";
16+
17+
const column_list_children = await getBlockChildren(notionClient, id, 100);
18+
19+
const column_list_promise = column_list_children.map(
20+
async column => await notionToMarkdown.blockToMarkdown(column)
21+
);
22+
23+
const columns: string[] = await Promise.all(column_list_promise);
24+
25+
return `<div class='notion-row'>\n${columns.join("\n\n")}\n</div>`;
26+
}

src/transformers/ColumnTransformer.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import { Client } from "@notionhq/client";
2+
import { NotionToMarkdown } from "notion-to-md";
3+
import { ListBlockChildrenResponseResult } from "notion-to-md/build/types";
4+
import { getBlockChildren } from "./CustomTransformers";
5+
6+
export async function notionColumnToMarkdown(
7+
notionToMarkdown: NotionToMarkdown,
8+
notionClient: Client,
9+
block: ListBlockChildrenResponseResult
10+
): Promise<string> {
11+
//console.log(JSON.stringify(block));
12+
const { id, has_children } = block as any; // "any" because the notion api type system is complex with a union that don't know how to help TS to cope with
13+
14+
if (!has_children) return "";
15+
16+
const children = await getBlockChildren(notionClient, id, 100);
17+
18+
const childrenPromise = children.map(
19+
async column => await notionToMarkdown.blockToMarkdown(column)
20+
);
21+
22+
const childrenStrings: string[] = await Promise.all(childrenPromise);
23+
24+
// note: it would look better in the markup with \n, but that
25+
// causes notion-to-md to give us ":::A" instead of \n for some reason.
26+
return `<div class='notion-column'>\n\n${childrenStrings.join(
27+
"\n\n"
28+
)}\n\n</div>`;
29+
}

src/transformers/CustomTransformers.ts

Lines changed: 8 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import { Client } from "@notionhq/client";
22
import { ListBlockChildrenResponse } from "@notionhq/client/build/src/api-endpoints";
33
import { NotionToMarkdown } from "notion-to-md";
4-
import markdownTable from "markdown-table";
54
import {
65
ListBlockChildrenResponseResult,
76
ListBlockChildrenResponseResults,
87
} from "notion-to-md/build/types";
98
import { notionCalloutToAdmonition } from "./CalloutTransformer";
109
import { numberedListTransformer } from "./NumberedListTransformer";
10+
import { notionColumnToMarkdown } from "./ColumnTransformer";
11+
import { notionColumnListToMarkdown } from "./ColumnListTransformer";
12+
import { tableTransformer } from "./TableTransformer";
13+
import { headingTransformer } from "./HeadingTransformer";
1114

1215
export function setupCustomTransformers(
1316
notionToMarkdown: NotionToMarkdown,
@@ -25,66 +28,12 @@ export function setupCustomTransformers(
2528
notionColumnToMarkdown(notionToMarkdown, notionClient, block)
2629
);
2730

28-
// This is mostly a copy of the table handler from notion-to-md. The change is to handle newlines in the
29-
// notion cell content.
3031
notionToMarkdown.setCustomTransformer(
3132
"table",
32-
async (block: ListBlockChildrenResponseResult) => {
33-
const { id, has_children } = block as any;
34-
const tableArr: string[][] = [];
35-
if (has_children) {
36-
const tableRows = await getBlockChildren(notionClient, id, 100);
37-
// console.log(">>", tableRows);
38-
const rowsPromise = tableRows?.map(async row => {
39-
const { type } = row as any;
40-
const cells = (row as any)[type]["cells"];
41-
42-
/**
43-
* this is more like a hack since matching the type text was
44-
* difficult. So converting each cell to paragraph type to
45-
* reuse the blockToMarkdown function
46-
*/
47-
const cellStringPromise = cells.map(
48-
async (cell: any) =>
49-
await notionToMarkdown.blockToMarkdown({
50-
type: "paragraph",
51-
paragraph: { rich_text: cell },
52-
} as ListBlockChildrenResponseResult)
53-
);
54-
55-
const cellStringArrRaw: string[] = await Promise.all(
56-
cellStringPromise
57-
);
58-
// This is our patch to the original notion-to-md code.
59-
const cellStringArr = cellStringArrRaw.map(c =>
60-
c
61-
// Trailing newlines are almost certainly not wanted, and converting to br's gives weird results
62-
.replace(/[\r\n]+$/, "")
63-
// Preserving line breaks within cells can't be done in stock markdown. Since we're producing
64-
// mdx, which supports embedded HTML, we can handle it with <br/>.
65-
// I'm not sure exactly what line breaks might occur in the input, depending on platform,
66-
// so handle all the common cases.
67-
.replaceAll("\r\n", "<br/>")
68-
.replaceAll("\n", "<br/>")
69-
.replaceAll("\r", "<br/>")
70-
);
71-
// console.log("~~", cellStringArr);
72-
tableArr.push(cellStringArr);
73-
// console.log(tableArr);
74-
});
75-
await Promise.all(rowsPromise || []);
76-
}
77-
return markdownTable(tableArr);
78-
}
33+
(block: ListBlockChildrenResponseResult) =>
34+
tableTransformer(notionToMarkdown, notionClient, block)
7935
);
8036

81-
// In Notion, you can make a callout and change its emoji. We map 5 of these
82-
// to the 5 Docusaurus admonition styles.
83-
// This is mostly a copy of the callout code from notion-to-md. The change is to output docusaurus
84-
// admonitions instead of emulating a callout with markdown > syntax.
85-
// Note: I haven't yet tested this with any emoji except "💡"/"tip", nor the case where the
86-
// callout has-children. Not even sure what that would mean, since the document I was testing
87-
// with has quite complex markup inside the callout, but still takes the no-children branch.
8837
notionToMarkdown.setCustomTransformer(
8938
"callout",
9039
(block: ListBlockChildrenResponseResult) =>
@@ -97,26 +46,8 @@ export function setupCustomTransformers(
9746
numberedListTransformer(notionToMarkdown, notionClient, block)
9847
);
9948

100-
const headingCustomTransformer = async (
101-
block: ListBlockChildrenResponseResult
102-
) => {
103-
// This is the other half of the horrible hack in pull.ts which sets the type
104-
// of every heading_n to my_heading_n. We have to do this because if
105-
// we simply set a custom transformer to heading_n, it will keep
106-
// recursively calling this code, with blockToMarkdown using the custom transformer
107-
// over and over. Instead, we want blockToMarkdown to give us the normal
108-
// result, to which we will append the block ID to enable heading links.
109-
(block as any).type = (block as any).type.replace("my_", "");
110-
111-
const unmodifiedMarkdown = await notionToMarkdown.blockToMarkdown(block);
112-
// For some reason, inline links come in without the dashes, so we have to strip
113-
// dashes here to match them.
114-
const blockIdSansDashes = block.id.replaceAll("-", "");
115-
// To make heading links work in docusaurus, you make them look like:
116-
// ### Hello World {#my-explicit-id}
117-
// See https://docusaurus.io/docs/markdown-features/toc#heading-ids.
118-
return `${unmodifiedMarkdown} {#${blockIdSansDashes}}`;
119-
};
49+
const headingCustomTransformer = (block: ListBlockChildrenResponseResult) =>
50+
headingTransformer(notionToMarkdown, notionClient, block);
12051
notionToMarkdown.setCustomTransformer(
12152
"my_heading_1",
12253
headingCustomTransformer
@@ -134,53 +65,6 @@ export function setupCustomTransformers(
13465
// page so we don't do it here.
13566
}
13667

137-
async function notionColumnListToMarkdown(
138-
notionToMarkdown: NotionToMarkdown,
139-
notionClient: Client,
140-
block: ListBlockChildrenResponseResult
141-
): Promise<string> {
142-
// Enhance: The @notionhq/client, which uses the official API, cannot yet get at column formatting information (column_ratio)
143-
// However https://github1s.com/NotionX/react-notion-x/blob/master/packages/react-notion-x/src/block.tsx#L528 can get it.
144-
145-
const { id, has_children } = block as any; // "any" because the notion api type system is complex with a union that don't know how to help TS to cope with
146-
147-
if (!has_children) return "";
148-
149-
const column_list_children = await getBlockChildren(notionClient, id, 100);
150-
151-
const column_list_promise = column_list_children.map(
152-
async column => await notionToMarkdown.blockToMarkdown(column)
153-
);
154-
155-
const columns: string[] = await Promise.all(column_list_promise);
156-
157-
return `<div class='notion-row'>\n${columns.join("\n\n")}\n</div>`;
158-
}
159-
async function notionColumnToMarkdown(
160-
notionToMarkdown: NotionToMarkdown,
161-
notionClient: Client,
162-
block: ListBlockChildrenResponseResult
163-
): Promise<string> {
164-
//console.log(JSON.stringify(block));
165-
const { id, has_children } = block as any; // "any" because the notion api type system is complex with a union that don't know how to help TS to cope with
166-
167-
if (!has_children) return "";
168-
169-
const children = await getBlockChildren(notionClient, id, 100);
170-
171-
const childrenPromise = children.map(
172-
async column => await notionToMarkdown.blockToMarkdown(column)
173-
);
174-
175-
const childrenStrings: string[] = await Promise.all(childrenPromise);
176-
177-
// note: it would look better in the markup with \n, but that
178-
// causes notion-to-md to give us ":::A" instead of \n for some reason.
179-
return `<div class='notion-column'>\n\n${childrenStrings.join(
180-
"\n\n"
181-
)}\n\n</div>`;
182-
}
183-
18468
export async function getBlockChildren(
18569
notionClient: Client,
18670
block_id: string,
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import { Client } from "@notionhq/client";
2+
import { NotionToMarkdown } from "notion-to-md";
3+
import { ListBlockChildrenResponseResult } from "notion-to-md/build/types";
4+
5+
export async function headingTransformer(
6+
notionToMarkdown: NotionToMarkdown,
7+
notionClient: Client,
8+
block: ListBlockChildrenResponseResult
9+
): Promise<string> {
10+
// This is the other half of the horrible hack in pull.ts which sets the type
11+
// of every heading_n to my_heading_n. We have to do this because if
12+
// we simply set a custom transformer to heading_n, it will keep
13+
// recursively calling this code, with blockToMarkdown using the custom transformer
14+
// over and over. Instead, we want blockToMarkdown to give us the normal
15+
// result, to which we will append the block ID to enable heading links.
16+
(block as any).type = (block as any).type.replace("my_", "");
17+
18+
const unmodifiedMarkdown = await notionToMarkdown.blockToMarkdown(block);
19+
// For some reason, inline links come in without the dashes, so we have to strip
20+
// dashes here to match them.
21+
const blockIdSansDashes = block.id.replaceAll("-", "");
22+
// To make heading links work in docusaurus, you make them look like:
23+
// ### Hello World {#my-explicit-id}
24+
// See https://docusaurus.io/docs/markdown-features/toc#heading-ids.
25+
return `${unmodifiedMarkdown} {#${blockIdSansDashes}}`;
26+
}

src/transformers/TableTransformer.ts

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import { Client } from "@notionhq/client";
2+
import { NotionToMarkdown } from "notion-to-md";
3+
import { ListBlockChildrenResponseResult } from "notion-to-md/build/types";
4+
import { getBlockChildren } from "./CustomTransformers";
5+
import markdownTable from "markdown-table";
6+
7+
// This is mostly a copy of the table handler from notion-to-md. The change is to handle newlines in the
8+
// notion cell content.
9+
export async function tableTransformer(
10+
notionToMarkdown: NotionToMarkdown,
11+
notionClient: Client,
12+
block: ListBlockChildrenResponseResult
13+
): Promise<string> {
14+
const { id, has_children } = block as any;
15+
const tableArr: string[][] = [];
16+
if (has_children) {
17+
const tableRows = await getBlockChildren(notionClient, id, 100);
18+
// console.log(">>", tableRows);
19+
const rowsPromise = tableRows?.map(async row => {
20+
const { type } = row as any;
21+
const cells = (row as any)[type]["cells"];
22+
23+
/**
24+
* this is more like a hack since matching the type text was
25+
* difficult. So converting each cell to paragraph type to
26+
* reuse the blockToMarkdown function
27+
*/
28+
const cellStringPromise = cells.map(
29+
async (cell: any) =>
30+
await notionToMarkdown.blockToMarkdown({
31+
type: "paragraph",
32+
paragraph: { rich_text: cell },
33+
} as ListBlockChildrenResponseResult)
34+
);
35+
36+
const cellStringArrRaw: string[] = await Promise.all(cellStringPromise);
37+
// This is our patch to the original notion-to-md code.
38+
const cellStringArr = cellStringArrRaw.map(c =>
39+
c
40+
// Trailing newlines are almost certainly not wanted, and converting to br's gives weird results
41+
.replace(/[\r\n]+$/, "")
42+
// Preserving line breaks within cells can't be done in stock markdown. Since we're producing
43+
// mdx, which supports embedded HTML, we can handle it with <br/>.
44+
// I'm not sure exactly what line breaks might occur in the input, depending on platform,
45+
// so handle all the common cases.
46+
.replaceAll("\r\n", "<br/>")
47+
.replaceAll("\n", "<br/>")
48+
.replaceAll("\r", "<br/>")
49+
);
50+
// console.log("~~", cellStringArr);
51+
tableArr.push(cellStringArr);
52+
// console.log(tableArr);
53+
});
54+
await Promise.all(rowsPromise || []);
55+
}
56+
return markdownTable(tableArr);
57+
}

0 commit comments

Comments
 (0)