vectorlessflow · zTgx · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -3,7 +3,7 @@ members = ["rust", "python"]
 resolver = "2"
 
 [workspace.package]
-version = "0.1.25"
+version = "0.1.26"
 edition = "2024"
 authors = ["zTgx <beautifularea@gmail.com>"]
 license = "Apache-2.0"

diff --git a/examples/batch_indexing/README.md b/examples/batch_indexing/README.md
@@ -0,0 +1,28 @@
+# Batch Indexing Example
+
+Demonstrates indexing multiple documents at once using:
+- `from_paths` -- explicit list of file paths
+- `from_dir` -- all supported files in a directory
+- `from_bytes` -- raw in-memory content
+
+Also shows cross-document querying with `with_doc_ids`.
+
+## Setup
+
+```bash
+pip install vectorless
+```
+
+## Run
+
+```bash
+python main.py
+```
+
+## Environment Variables
+
+| Variable                | Description          | Default   |
+|------------------------|----------------------|-----------|
+| `VECTORLESS_API_KEY`   | LLM API key          | `sk-...`  |
+| `VECTORLESS_MODEL`     | LLM model name       | `gpt-4o`  |
+| `VECTORLESS_ENDPOINT`  | Custom API endpoint  | `None`    |
diff --git a/examples/batch_indexing/main.py b/examples/batch_indexing/main.py
@@ -0,0 +1,183 @@
+"""
+Batch indexing example -- demonstrates indexing multiple documents at once
+using from_paths, from_dir, and from_bytes.
+
+Usage:
+    pip install vectorless
+    python main.py
+"""
+
+import asyncio
+import os
+
+from vectorless import (
+    Engine,
+    IndexContext,
+    IndexOptions,
+    QueryContext,
+    VectorlessError,
+)
+
+# --- Configuration ---
+API_KEY = os.environ.get("VECTORLESS_API_KEY", "sk-...")
+MODEL = os.environ.get("VECTORLESS_MODEL", "gpt-4o")
+ENDPOINT = os.environ.get("VECTORLESS_ENDPOINT", None)
+WORKSPACE = "./workspace"
+
+# Sample documents for demonstration
+DOCS = {
+    "alpha.md": """\
+# Alpha Report
+
+## Summary
+
+Alpha is a distributed key-value store designed for low-latency reads.
+It uses a log-structured merge tree for storage.
+
+## Architecture
+
+Write requests go through a write-ahead log, then are buffered in memory.
+When the buffer is full, it is flushed to disk as an immutable SSTable.
+""",
+    "beta.md": """\
+# Beta Report
+
+## Summary
+
+Beta is a stream processing engine that consumes events from Kafka topics
+and applies real-time transformations using a DAG-based execution model.
+
+## Performance
+
+Beta processes up to 2 million events per second per node on commodity hardware.
+""",
+    "gamma.md": """\
+# Gamma Report
+
+## Summary
+
+Gamma is a feature store that bridges the gap between offline feature
+computation and online serving. Features are computed in Spark and served
+via a low-latency gRPC endpoint.
+
+## Integration
+
+Gamma integrates with Alpha for feature metadata storage and Beta for
+real-time feature updates.
+""",
+}
+
+
+def write_sample_docs(base_dir: str) -> list[str]:
+    """Write sample markdown files and return their paths."""
+    paths = []
+    for name, content in DOCS.items():
+        path = os.path.join(base_dir, name)
+        with open(path, "w") as f:
+            f.write(content)
+        paths.append(path)
+    return paths
+
+
+async def main() -> None:
+    engine = Engine(
+        workspace=WORKSPACE,
+        api_key=API_KEY,
+        model=MODEL,
+        endpoint=ENDPOINT,
+    )
+
+    # Create a temp directory with sample documents
+    docs_dir = "./batch_docs"
+    os.makedirs(docs_dir, exist_ok=True)
+    paths = write_sample_docs(docs_dir)
+
+    # ---- 1. Index multiple files at once via from_paths ----
+    print("=" * 50)
+    print("  from_paths -- index a list of files")
+    print("=" * 50)
+
+    ctx = IndexContext.from_paths(paths)
+    result = await engine.index(ctx)
+
+    print(f"  Indexed {len(result.items)} document(s)")
+    for item in result.items:
+        print(f"    - {item.name} ({item.doc_id[:8]}...)")
+    if result.has_failures():
+        for f in result.failed:
+            print(f"    ! Failed: {f.source} -- {f.error}")
+    print()
+
+    doc_ids = [item.doc_id for item in result.items]
+
+    # ---- 2. Query across all batch-indexed documents ----
+    print("=" * 50)
+    print("  Query across multiple documents")
+    print("=" * 50)
+
+    answer = await engine.query(
+        QueryContext(
+            "Which system processes the most events per second?"
+        ).with_doc_ids(doc_ids)
+    )
+    for item in answer.items:
+        print(f"  [{item.doc_id[:8]}...] score={item.score:.2f}")
+        print(f"    {item.content[:200]}...")
+    print()
+
+    # ---- 3. Index a directory via from_dir ----
+    print("=" * 50)
+    print("  from_dir -- index all supported files in a directory")
+    print("=" * 50)
+
+    # Clear first so we see fresh results
+    await engine.clear()
+
+    ctx = IndexContext.from_dir(docs_dir).with_options(
+        IndexOptions(generate_summaries=True, generate_description=True)
+    )
+    result = await engine.index(ctx)
+
+    print(f"  Indexed {len(result.items)} document(s)")
+    for item in result.items:
+        desc = item.description[:80] if item.description else "N/A"
+        print(f"    - {item.name}: {desc}...")
+    print()
+
+    # ---- 4. Index from raw bytes via from_bytes ----
+    print("=" * 50)
+    print("  from_bytes -- index in-memory content")
+    print("=" * 50)
+
+    md_bytes = b"""# Delta Notes
+
+## Key Points
+
+- Delta uses CRDTs for conflict-free replication.
+- Writes are locally committed then asynchronously propagated.
+- Read repair ensures eventual consistency across all replicas.
+"""
+
+    ctx = IndexContext.from_bytes(md_bytes, "markdown").with_name("delta")
+    result = await engine.index(ctx)
+
+    print(f"  Indexed: {result.doc_id}")
+    print()
+
+    # ---- Cleanup ----
+    print("=" * 50)
+    print("  Cleanup")
+    print("=" * 50)
+
+    removed = await engine.clear()
+    print(f"  Removed {removed} document(s)")
+
+    # Remove temp files
+    for p in paths:
+        os.remove(p)
+    os.rmdir(docs_dir)
+    print(f"  Cleaned up {docs_dir}/")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/document_management/README.md b/examples/document_management/README.md
@@ -0,0 +1,28 @@
+# Document Management Example
+
+Demonstrates CRUD operations on indexed documents:
+
+- `engine.list()` -- list all documents
+- `engine.exists(doc_id)` -- check if a document exists
+- `engine.remove(doc_id)` -- remove a single document
+- `engine.clear()` -- remove all documents
+
+## Setup
+
+```bash
+pip install vectorless
+```
+
+## Run
+
+```bash
+python main.py
+```
+
+## Environment Variables
+
+| Variable                | Description          | Default   |
+|------------------------|----------------------|-----------|
+| `VECTORLESS_API_KEY`   | LLM API key          | `sk-...`  |
+| `VECTORLESS_MODEL`     | LLM model name       | `gpt-4o`  |
+| `VECTORLESS_ENDPOINT`  | Custom API endpoint  | `None`    |
diff --git a/examples/document_management/main.py b/examples/document_management/main.py
@@ -0,0 +1,135 @@
+"""
+Document management example -- demonstrates CRUD operations on indexed documents:
+list, exists, remove, and clear.
+
+Usage:
+    pip install vectorless
+    python main.py
+"""
+
+import asyncio
+import os
+
+from vectorless import (
+    Engine,
+    IndexContext,
+    QueryContext,
+    VectorlessError,
+)
+
+# --- Configuration ---
+API_KEY = os.environ.get("VECTORLESS_API_KEY", "sk-...")
+MODEL = os.environ.get("VECTORLESS_MODEL", "gpt-4o")
+ENDPOINT = os.environ.get("VECTORLESS_ENDPOINT", None)
+WORKSPACE = "./workspace"
+
+# Sample documents
+SAMPLE_A = """\
+# Project Alpha
+
+## Overview
+
+Project Alpha is a next-generation database engine written in Rust.
+It supports ACID transactions and serializable isolation.
+
+## Features
+
+- MVCC concurrency control
+- B-tree and LSM storage engines
+- Query planner with cost-based optimization
+"""
+
+SAMPLE_B = """\
+# Project Beta
+
+## Overview
+
+Project Beta is a web framework for building real-time applications.
+It uses WebSocket-based communication and server-side rendering.
+
+## Features
+
+- Hot module reloading
+- Built-in authentication middleware
+- Automatic code splitting
+"""
+
+
+async def main() -> None:
+    engine = Engine(
+        workspace=WORKSPACE,
+        api_key=API_KEY,
+        model=MODEL,
+        endpoint=ENDPOINT,
+    )
+
+    # ---- Index two documents ----
+    print("Indexing two documents...")
+
+    result_a = await engine.index(
+        IndexContext.from_content(SAMPLE_A, "markdown").with_name("alpha")
+    )
+    doc_id_a = result_a.doc_id
+    print(f"  A: {doc_id_a}")
+
+    result_b = await engine.index(
+        IndexContext.from_content(SAMPLE_B, "markdown").with_name("beta")
+    )
+    doc_id_b = result_b.doc_id
+    print(f"  B: {doc_id_b}")
+    print()
+
+    # ---- list() -- show all indexed documents ----
+    print("--- list() ---")
+    docs = await engine.list()
+    for doc in docs:
+        pages = f", pages={doc.page_count}" if doc.page_count else ""
+        lines = f", lines={doc.line_count}" if doc.line_count else ""
+        print(f"  {doc.name}  id={doc.id[:8]}...  format={doc.format}{pages}{lines}")
+    print(f"  Total: {len(docs)} document(s)\n")
+
+    # ---- exists() -- check if a document is indexed ----
+    print("--- exists() ---")
+    for did, label in [(doc_id_a, "A"), (doc_id_b, "B"), ("nonexistent-id", "?")]:
+        found = await engine.exists(did)
+        print(f"  {label}: exists={found}")
+    print()
+
+    # ---- Query a specific document ----
+    print("--- query(doc_id_a) ---")
+    answer = await engine.query(
+        QueryContext("What storage engines does Alpha support?").with_doc_id(doc_id_a)
+    )
+    item = answer.single()
+    if item:
+        print(f"  Score: {item.score:.2f}")
+        print(f"  Answer: {item.content[:200]}...\n")
+
+    # ---- remove() -- delete a single document ----
+    print("--- remove(doc_id_a) ---")
+    removed = await engine.remove(doc_id_a)
+    print(f"  Removed A: {removed}")
+
+    # Verify it's gone
+    exists_a = await engine.exists(doc_id_a)
+    print(f"  exists(A) after removal: {exists_a}")
+    print()
+
+    # ---- list() again -- only B should remain ----
+    print("--- list() after removal ---")
+    docs = await engine.list()
+    for doc in docs:
+        print(f"  {doc.name}  id={doc.id[:8]}...")
+    print(f"  Total: {len(docs)} document(s)\n")
+
+    # ---- clear() -- remove all remaining documents ----
+    print("--- clear() ---")
+    cleared = await engine.clear()
+    print(f"  Cleared {cleared} document(s)")
+
+    docs = await engine.list()
+    print(f"  Remaining: {len(docs)} document(s)")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())