Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More fixes trtllm #2342

Merged
merged 5 commits into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions backends/trtllm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,18 @@ homepage.workspace = true
[dependencies]
async-trait = "0.1"
async-stream = "0.3"
clap = { version = "4.5", features = ["derive"] }
cxx = "1.0"
log = { version = "0.4", features = [] }
text-generation-router = { path = "../../router" }
tokenizers = { version = "0.19", features = ["hf-hub"] }
tokio = { version = "1.38", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
tokio-stream = "0.1.15"
clap = { version = "4.5", features = ["derive"] }
thiserror = "1.0.62"
tracing = "0.1"
tracing-opentelemetry = "0.24"
tracing-subscriber = { version = "0.3", features = ["json", "env-filter"] }
log = { version = "0.4", features = [] }
parking_lot = "0.12"

[build-dependencies]
cmake = "0.1"
Expand Down
9 changes: 5 additions & 4 deletions backends/trtllm/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ ARG OMPI_VERSION="4.1.6"

# Build dependencies resolver stage
FROM lukemathwalker/cargo-chef:latest AS chef
WORKDIR /usr/src/text-generation-inference
WORKDIR /usr/src/text-generation-inference/backends/trtllm

FROM chef AS planner
COPY . .
Expand Down Expand Up @@ -42,7 +42,7 @@ RUN wget "https://download.open-mpi.org/release/open-mpi/v4.1/$OMPI_TARBALL_FILE
mkdir /usr/src/mpi && \
tar -xf "/opt/src/$OMPI_TARBALL_FILENAME" -C /usr/src/mpi --strip-components=1 && \
cd /usr/src/mpi && \
./configure --prefix=/usr/local/mpi --with-cuda=/usr/local/cuda --without-slurm && \
./configure --prefix=/usr/local/mpi --with-cuda=/usr/local/cuda && \
make -j all && \
make install && \
rm -rf "/opt/src/$OMPI_TARBALL_FILENAME"
Expand All @@ -66,7 +66,7 @@ ENV PATH="/root/.cargo/bin:$PATH"
RUN cargo install cargo-chef

# Cache dependencies
COPY --from=planner /usr/src/text-generation-inference/recipe.json .
COPY --from=planner /usr/src/text-generation-inference/backends/trtllm/recipe.json .
RUN cargo chef cook --release --recipe-path recipe.json

# Build actual TGI
Expand All @@ -79,7 +79,8 @@ COPY . .
COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt
COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi
RUN mkdir $TGI_INSTALL_PREFIX && mkdir "$TGI_INSTALL_PREFIX/include" && mkdir "$TGI_INSTALL_PREFIX/lib" && \
CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX cargo build --release --bin text-generation-backends-trtllm
cd backends/trtllm && \
CMAKE_INSTALL_PREFIX=$TGI_INSTALL_PREFIX cargo build --release

FROM nvidia/cuda:12.5.1-cudnn-runtime-ubuntu22.04 AS runtime
WORKDIR /usr/local/tgi/bin
Expand Down
3 changes: 2 additions & 1 deletion backends/trtllm/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@ use cxx::UniquePtr;
use log::{error, warn};
use tokenizers::Tokenizer;
use tokio::sync::mpsc::{unbounded_channel, UnboundedSender};
use tokio::sync::RwLock;
use tokio::time::{sleep, Instant};
use tokio_stream::wrappers::UnboundedReceiverStream;
use tokio_stream::{Stream, StreamExt};
use tracing::{instrument, span, Level};

// use tokio::sync::RwLock;
use parking_lot::RwLock;
use text_generation_router::infer::{Backend, GeneratedText, InferError, InferStreamResponse};
use text_generation_router::validation::ValidationError::UnsupportedModality;
use text_generation_router::validation::{Chunk, ValidGenerateRequest, ValidationError};
Expand Down
8 changes: 4 additions & 4 deletions backends/trtllm/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
use clap::Parser;
use std::collections::HashMap;
use std::path::PathBuf;

use clap::Parser;
use tokenizers::{FromPretrainedParameters, Tokenizer};

use text_generation_backends_trtllm::errors::TensorRtLlmBackendError;
use text_generation_backends_trtllm::TensorRtLlmBackend;
use text_generation_router::server;
use tokenizers::{FromPretrainedParameters, Tokenizer};

/// App Configuration
#[derive(Parser, Debug)]
Expand Down Expand Up @@ -160,6 +158,8 @@ async fn main() -> Result<(), TensorRtLlmBackendError> {
messages_api_enabled,
true,
max_client_batch_size,
false,
false,
)
.await?;
Ok(())
Expand Down
Loading