Add import handling for images in bear notes

This isn't an optimal solution since Bear uploads files/images to their cloud and doesn't preserve any metadata about a (possible) local path to that file on disk. Instead we have to specify some search paths, do a recursive glob, and attempt to locate source files with the proper filenames ourselves.
solomonhawk · Sep 26, 2022 · 6cd9172 · 6cd9172
1 parent 4df22b7
commit 6cd9172
Show file tree

Hide file tree

Showing 7 changed files with 213 additions and 18 deletions.
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -17,12 +17,15 @@
     "@astrojs/sitemap": "^0.1.0",
     "@astrojs/tailwind": "^0.2.1",
     "@mertasan/tailwindcss-variables": "^2.5.0",
+    "@types/github-slugger": "^1.3.0",
     "@types/node": "^18.7.14",
     "@types/react": "^18.0.18",
     "astro": "^1.0.0-beta.28",
     "astro-compress": "1.0.7",
     "autoprefixer": "^10.4.8",
+    "fast-glob": "^3.2.12",
     "fp-ts": "^2.12.3",
+    "github-slugger": "^1.4.0",
     "io-ts": "^2.2.18",
     "postcss": "^8.4.16",
     "react": "^18.1.0",

diff --git a/public/assets/images/linters-formatters.png b/public/assets/images/linters-formatters.png
diff --git a/scripts/import-from-bear/config.ts b/scripts/import-from-bear/config.ts
@@ -0,0 +1,23 @@
+type Config = {
+  blogTagPattern: string;
+  imageSearchPaths: string[];
+  defaultLayout: string;
+  postsPath: string;
+  assetsUrl: string;
+}
+
+const config: Config = {
+  blogTagPattern: '^(blog\/solomonhawk\/?)|(blog\/?)',
+  // XXX: this isn't great - is there a better way to find the images that were
+  // embedded in a bear post?
+  imageSearchPaths: [
+    "~/Downloads",
+    "~/Documents",
+    "~/Pictures",
+  ],
+  defaultLayout: '@layouts/BlogPost.astro',
+  postsPath: 'src/pages/writing/posts',
+  assetsUrl: '/assets/images'
+}
+
+export default config;
diff --git a/scripts/import-from-bear/index.ts b/scripts/import-from-bear/index.ts
@@ -1,15 +1,20 @@
 import { pipe } from 'fp-ts/lib/function';
 import * as ra from 'fp-ts/ReadonlyArray';
-import { readJSONFromStdIn, convertToMarkdown, parsePost, writePostAsMarkdown } from './lib';
+import { readJSONFromStdIn, parsePost, convertToMarkdown, extractImageFilenames, writePostAsMarkdown, copyFilesToAssets } from './lib';
 
 async function main() {
+  const imageFilenames = new Set<string>();
+
   pipe(
     await readJSONFromStdIn(),
     ra.filterMap(parsePost),
     ra.map(convertToMarkdown),
-    ra.map(writePostAsMarkdown)
+    ra.map(extractImageFilenames(imageFilenames)),
+    ra.map(writePostAsMarkdown),
   );
 
+  await copyFilesToAssets(imageFilenames);
+
   console.log('> Finished processing JSON.');
 }
 

diff --git a/scripts/import-from-bear/lib.ts b/scripts/import-from-bear/lib.ts
@@ -2,8 +2,14 @@ import { isRight } from 'fp-ts/lib/Either';
 import { pipe } from 'fp-ts/lib/function';
 import * as O from 'fp-ts/Option';
 import fs from 'fs';
+import fg from 'fast-glob';
+import os from 'os';
 import path from 'path';
+import GithubSlugger from 'github-slugger';
 import { PostData, PostSchema } from '../../src/types/post';
+import config from './config';
+
+const slugger = new GithubSlugger();
 
 type DBPost = {
   title: string;
@@ -12,6 +18,15 @@ type DBPost = {
   tags: string;
 }
 
+type FileInfoMap = {
+  [key: string]: {
+    filename: string;
+    info: ReturnType<typeof path.parse>;
+    found: boolean;
+    path: string | null;
+  }
+}
+
 export async function readJSONFromStdIn(): Promise<DBPost[]> {
   const data = await fs.promises.readFile('/dev/stdin', 'utf-8');
   return JSON.parse(data);
@@ -44,29 +59,32 @@ function postDate(date: number): string {
   return d.toISOString();
 }
 
+/**
+ * Filters out tags that are specific to Bear which are prefixed with a hashtag.
+ * Removes "published" from the remaining tags.
+ *
+ * @param {string[]} tags a list of tags to filter
+ * @returns {string[]} the filtered tags
+ */
 function filterBearTags(tags: string[]): string[] {
-  return tags.map(tag => tag.replace(/^(blog\/solomonhawk\/?)|(blog\/?)/, '')).filter(Boolean).filter(tag => tag !== 'published');
+  return tags.map(tag => tag.replace(new RegExp(config.blogTagPattern), '')).filter(Boolean).filter(tag => tag !== 'published');
 }
 
 export function convertToMarkdown(post: PostData): { filename: string, markdown: string } {
   console.log(`> Converting "${post.title}" to MDX`);
 
   return {
-    filename: slugify(post.title),
+    filename: slugger.slug(post.title),
     markdown: `---
-layout: '@layouts/BlogPost.astro'
+layout: '${config.defaultLayout}'
 title: ${post.title}
 publishDate: ${post.publishDate}
 tags: [${post.tags?.join(', ')}]
 ---
-${pipe(post.markdown?.trim() || '', stripNoteTitle, stripBearTags).trim()}
+${pipe(post.markdown?.trim() || '', stripNoteTitle, stripBearTags, rewriteImageRefs).trim()}
 `};
 }
 
-function slugify(str: string): string {
-  return str.toLowerCase().replace(/ /g, '-');
-}
-
 /**
  * Removes the first line of the note (H1 title).
  *
@@ -85,14 +103,97 @@ function stripNoteTitle(markdown: string): string {
  * @returns string with Bear tags removed
  */
 function stripBearTags(markdown: string): string {
-  return markdown.replace(/(?<!`)\B#[\w\/]+\b/gm, '');
+  return markdown.replace(/(?<!`)\B#[\w\/-]+\b/gm, '');
+}
+
+export function extractImageFilenames(imageFilenames: Set<string>) {
+  return (post: { filename: string, markdown: string }) => {
+    // matches the URL in a markdown image tag like `![alt text](<url>)`
+    const pattern = /!\[.*\]\((?<filename>[\w\/-]+\.\w+)\)/gm;
+    let result
+
+    while((result = pattern.exec(post.markdown)) !== null) {
+      if (!result?.groups?.filename) {
+        console.error(`[!] Failed to extract image filename for "${post.filename}"`);
+        continue;
+      }
+
+      imageFilenames.add(path.join(process.cwd(), 'public', result.groups.filename));
+    }
+
+    return post;
+  }
+}
+
+/**
+ * Converts Bear's image references with markdown to render a local image file
+ * from the Astro assets.
+ *
+ * @param {string} markdown Markdown string with possible embedded image refs
+ * @returns Markdown with image refs transformed to image tags
+ */
+function rewriteImageRefs(markdown: string): string {
+  return markdown.replace(/\[image:.*\/([\w-]+)\.(\w+)\]$/gm, `![$1](${config.assetsUrl}/$1.$2)`);
 }
 
 export function writePostAsMarkdown({filename, markdown}: { filename: string, markdown: string }): void {
-  const relPath = `./src/pages/writing/posts/${filename}.mdx`;
+  const relPath = path.join('./', config.postsPath, `${filename}.mdx`);
   const fullPath = path.resolve(__dirname, '../../', relPath);
 
   console.log(`> Writing ${relPath}`);
 
   fs.writeFileSync(fullPath, markdown);
 }
+
+export async function copyFilesToAssets(filenames: Set<string>): Promise<void> {
+  const fileInfos: FileInfoMap = collectFileInfos(filenames);
+
+  for (const searchDir of config.imageSearchPaths) {
+    for await (const file of streamMatches(searchDir, fileInfos)) {
+      updateFileInfos(fileInfos, file);
+    }
+  }
+
+  for (const entry in fileInfos) {
+    if (!fileInfos[entry].found || !fileInfos[entry].path) {
+      console.error(`[!] Failed to find "${entry}" in search paths`);
+      continue;
+    }
+
+    try {
+      await fs.promises.copyFile(fileInfos[entry].path!, fileInfos[entry].filename)
+      console.log(`> Copied "${entry}" to "${config.assetsUrl}"`);
+    } catch (err) {
+      console.error(`[!] Failed to copy "${entry}" to assets`);
+    }
+  }
+}
+
+function collectFileInfos(filenames: Set<string>): FileInfoMap {
+  return Object.fromEntries(Array.from(filenames).map(filename => {
+    const info = path.parse(filename);
+    return [info.base, { filename, info, found: false, path: null }]
+  }));
+}
+
+function streamMatches(dir: string, fileInfos: FileInfoMap): NodeJS.ReadableStream {
+  // "$SEARCHDIR/**/*@(file1|file2|file3)", glob match for exactly the files we want
+  const pattern = path.join(dir, "**", `@(${Object.keys(fileInfos).join('|')})`);
+  return fg.stream(resolveDir(pattern), { followSymbolicLinks: false, suppressErrors: true })
+}
+
+function updateFileInfos(fileInfos: FileInfoMap, file: string | Buffer): void {
+  if (typeof file !== 'string') {
+    console.warn(`[!] Unexpected file type Buffer: ${file}`);
+    return;
+  }
+
+  const info = path.parse(file);
+
+  fileInfos[info.base].found = true;
+  fileInfos[info.base].path = file;
+}
+
+function resolveDir(dir: string): string {
+  return dir.replace("~", os.homedir());
+}
diff --git a/src/pages/writing/posts/is-code-formatting-a-linter-concern.mdx b/src/pages/writing/posts/is-code-formatting-a-linter-concern.mdx
@@ -0,0 +1,17 @@
+---
+layout: '@layouts/BlogPost.astro'
+title: Is code formatting a linter concern?
+publishDate: 2022-09-26T04:00:00.000Z
+tags: [draft, linting, code-quality, formatting, prettier, eslint]
+---
+Linters and code-formatters are both crucial tools I rely on for writing quality software. I’m always happy to delegate effort to a tool that can statically analyze my work and provide helpful guidance.
+
+According to the [wiki](https://en.wikipedia.org/wiki/Lint_(software)), linting encompasses automated checks for programming errors,  bugs, stylistic errors and “suspicious constructs”.
+
+Code-formatters are constrained to consider style and presentation only. The [wiki](https://en.wikipedia.org/wiki/Prettyprint#Programming_code_formatting) entry under `Prettyprint` describes converting source code from one format to another.
+
+A fair question to ask is how best to integrate tools of these 2 categories. It seems that, to a degree, the category of tools that lint code encompass those that format it.
+
+![linters-formatters](/assets/images/linters-formatters.png)
+
+In the JavaScript ecosystem, the standard toolset includes [ESLint](https://eslint.org/) and [Prettier](https://prettier.io/). One notable difference between the two is configurability and extensibility. Prettier is opinionated and has limited customization through its configuration file which is a feature, not a bug.