Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions .github/ISSUE_TEMPLATE/issue-template.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,7 @@ assignees: ''

### Target(s)

<replace w/ one or more of the following options: `server`, `search`, `chat`>

### Requirement to close

<please describe what is required to close this issue here>
<replace w/ name of the service(s) which are associated with this issue>

### Community channels

Expand Down
149 changes: 149 additions & 0 deletions .github/workflows/push-pdf2md-server.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
name: Create PDF2MD Docker Images

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref }}
cancel-in-progress: true

on:
workflow_dispatch:
push:
branches:
- "main"
paths:
- "pdf2md/server/**"

jobs:
pdf2md-server:
name: Push PDF2MD Server image
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [blacksmith-8vcpu-ubuntu-2204]
platform: [linux/amd64]
exclude:
- runner: blacksmith-8vcpu-ubuntu-2204
platform: linux/arm64
- runner: blacksmith-8vcpu-ubuntu-2204-arm
platform: linux/amd64
steps:
- name: Checkout the repo
uses: actions/checkout@v4

- name: Setup buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
trieve/pdf2md-server
tags: |
type=raw,latest
type=sha

- name: Build and push Docker image
uses: useblacksmith/build-push-action@v1.0.0-beta
with:
platforms: ${{ matrix.platform }}
context: pdf2md/
file: ./pdf2md/server/Dockerfile.pdf2md-server
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

chunk-worker:
name: Push PDF2MD Chunk Worker image
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [blacksmith-8vcpu-ubuntu-2204]
platform: [linux/amd64]
exclude:
- runner: blacksmith-8vcpu-ubuntu-2204
platform: linux/arm64
- runner: blacksmith-8vcpu-ubuntu-2204-arm
platform: linux/amd64
steps:
- name: Checkout the repo
uses: actions/checkout@v4

- name: Setup buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
trieve/chunk-worker
tags: |
type=raw,latest
type=sha

- name: Build and push Docker image
uses: useblacksmith/build-push-action@v1.0.0-beta
with:
platforms: ${{ matrix.platform }}
context: pdf2md/
file: ./pdf2md/server/Dockerfile.chunk-worker
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

supervisor-worker:
name: Push PDF2MD Supervisor Worker image
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [blacksmith-8vcpu-ubuntu-2204]
platform: [linux/amd64]
exclude:
- runner: blacksmith-8vcpu-ubuntu-2204
platform: linux/arm64
- runner: blacksmith-8vcpu-ubuntu-2204-arm
platform: linux/amd64
steps:
- name: Checkout the repo
uses: actions/checkout@v4

- name: Setup buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
trieve/supervisor-worker
tags: |
type=raw,latest
type=sha

- name: Build and push Docker image
uses: useblacksmith/build-push-action@v1.0.0-beta
with:
platforms: ${{ matrix.platform }}
context: pdf2md/
file: ./pdf2md/server/Dockerfile.supervisor-worker
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -94,4 +94,4 @@ dist/**


clients/python-sdk/dist
file-chunker/ch_migrations/chm.toml
pdf2md/ch_migrations/chm.toml
9 changes: 6 additions & 3 deletions pdf2md/server/.env.dist → pdf2md/.env.dist
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
# Redis
REDIS_URL=redis://:thisredispasswordisverysecureandcomplex@localhost:6379
REDIS_PASSWORD=thisredispasswordisverysecureandcomplex

# Clickhouse
CLICKHOUSE_URL=http://localhost:8123
CLICKHOUSE_DB=default
CLICKHOUSE_USER=clickhouse
CLICKHOUSE_PASSWORD=password

# S3
S3_ENDPOINT=http://127.0.0.1:9000
S3_ACCESS_KEY=ZaaZZaaZZaaZZaaZZaaZ
S3_SECRET_KEY=ssssssssssssssssssssTTTTTTTTTTTTTTTTTTTT
S3_BUCKET=trieve

# S3 dockerfile auto-configuration
MINIO_ROOT_USER=rootuser
MINIO_ROOT_PASSWORD=rootpassword

# PDF2MD conversion worker services
LLM_BASE_URL=https://openrouter.ai/api/v1
LLM_API_KEY=
LLM_MODEL=gpt-4o-mini

# PDF2MD HTTP API server
API_KEY=admin

PDLA_SERVER_ORIGIN=http://localhost:5060

12 changes: 6 additions & 6 deletions pdf2md/cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,21 @@ pub mod operators;
#[command(author, version)]
#[command(
name = "tr-chunk",
about = "Trieve File Chunker CLI - CLI for Trieve File Chunker",
long_about = "Trieve File Chunker CLI is a CLI for the Trieve File Chunker.
about = "PDF2MD CLI - CLI for PDF2MD",
long_about = "PDF2MD CLI is a CLI for the PDF2MD.

It allows you to interact with the Trieve File Chunker from the command line by creating and polling tasks."
It allows you to interact with the PDF2MD from the command line by creating and polling tasks."
)]
#[command(arg_required_else_help(true))]
struct Cli {
#[command(subcommand)]
command: Option<Commands>,

/// The base URL of the Trieve File Chunker server
/// The base URL of the PDF2MD server
#[arg(
short,
long,
env = "TRIEVE_FILE_CHUNKER_BASE_URL",
env = "PDF2MD_BASE_URL",
default_value = "http://localhost:8081"
)]
base_url: String,
Expand All @@ -30,7 +30,7 @@ struct Cli {
#[arg(
short,
long,
env = "TRIEVE_FILE_CHUNKER_API_KEY",
env = "PDF2MD_API_KEY",
default_value = "admin"
)]
api_key: String,
Expand Down
33 changes: 33 additions & 0 deletions pdf2md/server/docker-compose.yml → pdf2md/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
services:
redis:
image: redis:7.2.2
profiles: ["dev", "prod-deps"]
restart: always
healthcheck:
test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD}", "ping"]
Expand All @@ -17,6 +18,7 @@ services:

s3:
image: minio/minio:RELEASE.2023-09-27T15-22-50Z
profiles: ["dev", "prod-deps"]
ports:
- 9000:9000
- 42625:42625
Expand All @@ -36,6 +38,7 @@ services:

s3-client:
image: minio/mc
profiles: ["dev", "prod-deps"]
depends_on:
s3:
condition: service_healthy
Expand All @@ -56,6 +59,7 @@ services:

clickhouse-db:
image: trieve/clickhouse:latest
profiles: ["dev", "prod-deps"]
restart: always
environment:
- CLICKHOUSE_USER=clickhouse
Expand All @@ -70,6 +74,35 @@ services:
networks:
- app-network

pdf2md-server:
image: trieve/pdf2md-server:latest
profiles: ["prod"]
network_mode: "host"
build:
context: ./server/
dockerfile: Dockerfile.pdf2md-server
env_file: .env

supervisor-worker:
image: trieve/supervisor-worker:latest
profiles: ["prod"]
network_mode: "host"
build:
context: ./server/
dockerfile: Dockerfile.supervisor-worker
env_file: .env

chunk-worker:
image: trieve/chunk-worker:latest
profiles: ["prod"]
network_mode: "host"
build:
context: ./server/
dockerfile: Dockerfile.chunk-worker
env_file: .env
deploy:
replicas: 5

networks:
app-network:
driver: bridge
Expand Down
68 changes: 34 additions & 34 deletions pdf2md/server/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading