Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 55 additions & 3 deletions packages/markitdown/src/markitdown/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,22 @@ def main():
help="Keep data URIs (like base64-encoded images) in the output. By default, data URIs are truncated.",
)

parser.add_argument(
"--llm-client",
choices=["openai"],
help="LLM client to use for converters that support vision or OCR. Currently supports 'openai'.",
)

parser.add_argument(
"--llm-model",
help="LLM model to pass to converters that use --llm-client.",
)

parser.add_argument(
"--llm-prompt",
help="Optional prompt override for converters that use --llm-client.",
)

parser.add_argument("filename", nargs="?")
args = parser.parse_args()

Expand Down Expand Up @@ -200,6 +216,8 @@ def main():
)
sys.exit(0)

llm_kwargs = _parse_llm_options(args)

if args.use_docintel:
if args.endpoint is None:
_exit_with_error(
Expand All @@ -209,7 +227,7 @@ def main():
_exit_with_error("Filename is required when using Document Intelligence.")

markitdown = MarkItDown(
enable_plugins=args.use_plugins, docintel_endpoint=args.endpoint
enable_plugins=args.use_plugins, docintel_endpoint=args.endpoint, **llm_kwargs
)
elif args.use_cu:
if args.cu_endpoint is None:
Expand Down Expand Up @@ -240,9 +258,9 @@ def main():
_exit_with_error(f"Unknown file type: {name}")
cu_kwargs["cu_file_types"] = cu_types

markitdown = MarkItDown(enable_plugins=args.use_plugins, **cu_kwargs)
markitdown = MarkItDown(enable_plugins=args.use_plugins, **llm_kwargs, **cu_kwargs)
else:
markitdown = MarkItDown(enable_plugins=args.use_plugins)
markitdown = MarkItDown(enable_plugins=args.use_plugins, **llm_kwargs)

if args.filename is None:
result = markitdown.convert_stream(
Expand All @@ -258,6 +276,40 @@ def main():
_handle_output(args, result)


def _parse_llm_options(args) -> dict[str, Any]:
if args.llm_client is None:
if args.llm_model or args.llm_prompt:
_exit_with_error("--llm-model and --llm-prompt require --llm-client.")
return {}

if not args.llm_model:
_exit_with_error("--llm-client requires --llm-model.")

llm_kwargs: dict[str, Any] = {
"llm_client": _create_llm_client(args.llm_client),
"llm_model": args.llm_model,
}
if args.llm_prompt:
llm_kwargs["llm_prompt"] = args.llm_prompt
return llm_kwargs


def _create_llm_client(client_name: str) -> Any:
if client_name == "openai":
try:
from openai import OpenAI
except ImportError as ex:
_exit_with_error(
"The OpenAI client is required for --llm-client openai. Install it with `pip install openai`."
)
raise AssertionError("unreachable") from ex

return OpenAI()

_exit_with_error(f"Unsupported LLM client: {client_name}")
raise AssertionError("unreachable")


def _handle_output(args, result: DocumentConverterResult):
"""Handle output to stdout or file"""
if args.output:
Expand Down
61 changes: 59 additions & 2 deletions packages/markitdown/tests/test_cli_misc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
#!/usr/bin/env python3 -m pytest
import subprocess
import sys
from types import SimpleNamespace
from unittest.mock import Mock, patch

import pytest
from markitdown import __version__

# This file contains CLI tests that are not directly tested by the FileTestVectors.
Expand All @@ -8,7 +13,7 @@

def test_version() -> None:
result = subprocess.run(
["python", "-m", "markitdown", "--version"], capture_output=True, text=True
[sys.executable, "-m", "markitdown", "--version"], capture_output=True, text=True
)

assert result.returncode == 0, f"CLI exited with error: {result.stderr}"
Expand All @@ -17,7 +22,7 @@ def test_version() -> None:

def test_invalid_flag() -> None:
result = subprocess.run(
["python", "-m", "markitdown", "--foobar"], capture_output=True, text=True
[sys.executable, "-m", "markitdown", "--foobar"], capture_output=True, text=True
)

assert result.returncode != 0, f"CLI exited with error: {result.stderr}"
Expand All @@ -27,6 +32,58 @@ def test_invalid_flag() -> None:
assert "SYNTAX" in result.stderr, "Expected 'SYNTAX' to appear in STDERR"


def test_llm_cli_options_are_passed_to_markitdown(monkeypatch, capsys) -> None:
import markitdown.__main__ as markitdown_cli

llm_client = object()
markitdown_instance = Mock()
markitdown_instance.convert.return_value = SimpleNamespace(markdown="converted")
monkeypatch.setattr(
sys,
"argv",
[
"markitdown",
"document.pdf",
"--use-plugins",
"--llm-client",
"openai",
"--llm-model",
"gpt-4o",
"--llm-prompt",
"Extract the text.",
],
)

with (
patch.object(markitdown_cli, "_create_llm_client", return_value=llm_client) as create_llm_client,
patch.object(markitdown_cli, "MarkItDown", return_value=markitdown_instance) as markitdown_cls,
):
markitdown_cli.main()

create_llm_client.assert_called_once_with("openai")
markitdown_cls.assert_called_once_with(
enable_plugins=True,
llm_client=llm_client,
llm_model="gpt-4o",
llm_prompt="Extract the text.",
)
markitdown_instance.convert.assert_called_once_with(
"document.pdf", stream_info=None, keep_data_uris=False
)
assert capsys.readouterr().out.strip() == "converted"


def test_llm_model_requires_llm_client(monkeypatch, capsys) -> None:
import markitdown.__main__ as markitdown_cli

monkeypatch.setattr(sys, "argv", ["markitdown", "document.pdf", "--llm-model", "gpt-4o"])

with pytest.raises(SystemExit):
markitdown_cli.main()

assert "--llm-model and --llm-prompt require --llm-client" in capsys.readouterr().out


if __name__ == "__main__":
"""Runs this file's tests from the command line."""
test_version()
Expand Down