From 47448205e8b8ccd0506145f15561777fc1b858dd Mon Sep 17 00:00:00 2001 From: Vaibhav Raj <68665948+dead8309@users.noreply.github.com> Date: Wed, 25 Dec 2024 22:22:28 +0530 Subject: [PATCH 1/4] correct ppt support info --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2ecfc19..a4c8583 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,6 @@ It supports: - [x] PDF -- [x] PowerPoint - [x] Word (.docx) - [x] Excel (.xlsx) - [x] Images (EXIF metadata extraction and optional LLM-based description) @@ -17,6 +16,7 @@ It supports: - [x] Jupyter Notebooks (.ipynb) - [x] Bing Search Result Pages (SERP) - [x] ZIP files (recursively iterates over contents) +- [ ] PowerPoint > [!NOTE] > From 3461eb9c630b00545e60aa5823700701aae9900d Mon Sep 17 00:00:00 2001 From: Vaibhav Raj Date: Thu, 2 Jan 2025 19:22:55 +0530 Subject: [PATCH 2/4] remove ai from peerDependencies - import unzipper dynamically and show error if not installed --- package.json | 7 ++----- src/converters/zip.ts | 10 +++++++++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/package.json b/package.json index 694fbce..32891f8 100644 --- a/package.json +++ b/package.json @@ -59,10 +59,10 @@ "mime-types": "^2.1.35", "pdf-ts": "^0.0.2", "turndown": "^7.2.0", - "xlsx": "^0.18.5" + "xlsx": "^0.18.5", + "ai": "^4.0.22" }, "peerDependencies": { - "ai": "^4.0.22", "youtube-transcript": "^1.2.1", "unzipper": "^0.12.3" }, @@ -70,9 +70,6 @@ "youtube-transcript": { "optional": true }, - "ai": { - "optional": true - }, "unzipper": { "optional": true } diff --git a/src/converters/zip.ts b/src/converters/zip.ts index 5979d3b..a9c370d 100644 --- a/src/converters/zip.ts +++ b/src/converters/zip.ts @@ -1,7 +1,6 @@ import { ConverterOptions, ConverterResult, DocumentConverter } from "../types"; import * as fs from "fs/promises"; import * as path from "path"; -import * as unzipper from "unzipper"; export class ZipConverter implements DocumentConverter { async convert( @@ -29,6 +28,15 @@ export class ZipConverter implements DocumentConverter { text_content: `[ERROR] Invalid zip file path: ${localPath}` }; } + let unzipper; + try { + unzipper = await import("unzipper").then((mod) => mod.default); + } catch (error) { + console.error( + "Optional dependency 'unzipper' is not installed. Run `npm install unzipper` to enable this feature." + ); + return null; + } try { await fs.mkdir(newFolder, { recursive: true }); const zip = await unzipper.Open.file(localPath); From 8c017a520a53506f243006e27d8517772c14a75a Mon Sep 17 00:00:00 2001 From: Vaibhav Raj Date: Thu, 2 Jan 2025 19:24:51 +0530 Subject: [PATCH 3/4] fix(xlsx): xlsx does not contain a default export (imported as 'XLSX') ref: #1 --- src/converters/xlsx.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/converters/xlsx.ts b/src/converters/xlsx.ts index ebc6875..a3eeeae 100644 --- a/src/converters/xlsx.ts +++ b/src/converters/xlsx.ts @@ -1,7 +1,7 @@ import { ConverterOptions, ConverterResult } from "../types"; import { HtmlConverter } from "./html"; import * as fs from "fs"; -import XLSX from "xlsx"; +import * as XLSX from "xlsx"; export class XlsxConverter extends HtmlConverter { async convert(local_path: string, options: ConverterOptions): Promise { From 30d07220e6ff66edbbbfde836d65846647bb2191 Mon Sep 17 00:00:00 2001 From: Vaibhav Raj Date: Thu, 2 Jan 2025 19:27:43 +0530 Subject: [PATCH 4/4] chore: release v0.0.4 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 32891f8..6fd71e0 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "markitdown-ts", - "version": "0.0.3", + "version": "0.0.4", "description": "", "keywords": [], "homepage": "https://github.com/dead8309/markitdown-ts#readme",