Skip to content

Rework Backend to Native HTTP Requests and Enhance API Compatibility & Performance #91

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Mar 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ repos:
ftfy,
loguru,
numpy,
openai,
pillow,
pydantic,
pydantic_settings,
pyyaml,
respx,
requests,
rich,
transformers,
Expand Down
13 changes: 6 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ dependencies = [
"click",
"datasets",
"ftfy>=6.0.0",
"httpx[http2]<1.0.0",
"loguru",
"numpy",
"openai",
"pillow",
"pydantic>=2.0.0",
"pydantic-settings>=2.0.0",
"pyyaml>=6.0.0",
Expand All @@ -48,12 +49,14 @@ dev = [
"tox~=4.16.0",

# testing
"lorem~=0.1.1",
"pytest~=8.2.2",
"pytest-asyncio~=0.23.8",
"pytest-cov~=5.0.0",
"pytest-mock~=3.14.0",
"pytest-rerunfailures~=14.0",
"requests-mock~=1.12.1",
"respx~=0.22.0",

# code quality
"mypy~=1.10.1",
Expand Down Expand Up @@ -82,10 +85,6 @@ guidellm-config = "guidellm.config:print_config"
# ********** Code Quality Tools **********
# ************************************************

[tool.black]
line-length = 88
target-version = ['py38']


[tool.isort]
profile = "black"
Expand Down Expand Up @@ -127,8 +126,8 @@ ignore = [
"TCH002",
"PLW1514", # allow Path.open without encoding
"RET505", # allow `else` blocks
"RET506" # allow `else` blocks

"RET506", # allow `else` blocks
"PD011", # ignore .values usage since ruff assumes it's a Pandas DataFrame
]
select = [
# Rules reference: https://docs.astral.sh/ruff/rules/
Expand Down
23 changes: 17 additions & 6 deletions src/guidellm/backend/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
from .base import Backend, BackendEngine, BackendEnginePublic, GenerativeResponse
from .openai import OpenAIBackend
from .backend import (
Backend,
BackendType,
)
from .openai import OpenAIHTTPBackend
from .response import (
RequestArgs,
ResponseSummary,
StreamingResponseType,
StreamingTextResponse,
)

__all__ = [
"StreamingResponseType",
"StreamingTextResponse",
"RequestArgs",
"ResponseSummary",
"Backend",
"BackendEngine",
"BackendEnginePublic",
"GenerativeResponse",
"OpenAIBackend",
"BackendType",
"OpenAIHTTPBackend",
]
223 changes: 223 additions & 0 deletions src/guidellm/backend/backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
import asyncio
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any, AsyncGenerator, Dict, List, Literal, Optional, Type, Union

from loguru import logger
from PIL import Image

from guidellm.backend.response import ResponseSummary, StreamingTextResponse

__all__ = [
"Backend",
"BackendType",
]


BackendType = Literal["openai_http"]


class Backend(ABC):
"""
Abstract base class for generative AI backends.

This class provides a common interface for creating and interacting with different
generative AI backends. Subclasses should implement the abstract methods to
define specific backend behavior.

:cvar _registry: A registration dictionary that maps BackendType to backend classes.
:param type_: The type of the backend.
"""

_registry: Dict[BackendType, "Type[Backend]"] = {}

@classmethod
def register(cls, backend_type: BackendType):
"""
A decorator to register a backend class in the backend registry.

:param backend_type: The type of backend to register.
:type backend_type: BackendType
:return: The decorated backend class.
:rtype: Type[Backend]
"""
if backend_type in cls._registry:
raise ValueError(f"Backend type already registered: {backend_type}")

if not issubclass(cls, Backend):
raise TypeError("Only subclasses of Backend can be registered")

def inner_wrapper(wrapped_class: Type["Backend"]):
cls._registry[backend_type] = wrapped_class
logger.info("Registered backend type: {}", backend_type)
return wrapped_class

return inner_wrapper

@classmethod
def create(cls, type_: BackendType, **kwargs) -> "Backend":
"""
Factory method to create a backend instance based on the backend type.

:param type_: The type of backend to create.
:type type_: BackendType
:param kwargs: Additional arguments for backend initialization.
:return: An instance of a subclass of Backend.
:rtype: Backend
:raises ValueError: If the backend type is not registered.
"""

logger.info("Creating backend of type {}", type_)

if type_ not in cls._registry:
err = ValueError(f"Unsupported backend type: {type_}")
logger.error("{}", err)
raise err

return Backend._registry[type_](**kwargs)

def __init__(self, type_: BackendType):
self._type = type_

@property
def type_(self) -> BackendType:
"""
:return: The type of the backend.
"""
return self._type

@property
@abstractmethod
def target(self) -> str:
"""
:return: The target location for the backend.
"""
...

@property
@abstractmethod
def model(self) -> Optional[str]:
"""
:return: The model used for the backend requests.
"""
...

def validate(self):
"""
Handle final setup and validate the backend is ready for use.
If not successful, raises the appropriate exception.
"""
logger.info("{} validating backend {}", self.__class__.__name__, self.type_)
self.check_setup()
models = self.available_models()
if not models:
raise ValueError("No models available for the backend")

async def _test_request():
async for _ in self.text_completions(
prompt="Test connection", output_token_count=1
): # type: ignore[attr-defined]
pass

asyncio.run(_test_request())

@abstractmethod
def check_setup(self):
"""
Check the setup for the backend.
If unsuccessful, raises the appropriate exception.

:raises ValueError: If the setup check fails.
"""
...

@abstractmethod
def available_models(self) -> List[str]:
"""
Get the list of available models for the backend.

:return: The list of available models.
:rtype: List[str]
"""
...

@abstractmethod
async def text_completions(
self,
prompt: Union[str, List[str]],
request_id: Optional[str] = None,
prompt_token_count: Optional[int] = None,
output_token_count: Optional[int] = None,
**kwargs,
) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
"""
Generate text only completions for the given prompt.
Does not support multiple modalities, complicated chat interfaces,
or chat templates. Specifically, it requests with only the prompt.

:param prompt: The prompt (or list of prompts) to generate a completion for.
If a list is supplied, these are concatenated and run through the model
for a single prompt.
:param request_id: The unique identifier for the request, if any.
Added to logging statements and the response for tracking purposes.
:param prompt_token_count: The number of tokens measured in the prompt, if any.
Returned in the response stats for later analysis, if applicable.
:param output_token_count: If supplied, the number of tokens to enforce
generation of for the output for this request.
:param kwargs: Additional keyword arguments to pass with the request.
:return: An async generator that yields a StreamingTextResponse for start,
a StreamingTextResponse for each received iteration,
and a ResponseSummary for the final response.
"""
...

@abstractmethod
async def chat_completions(
self,
content: Union[
str,
List[Union[str, Dict[str, Union[str, Dict[str, str]]], Path, Image.Image]],
Any,
],
request_id: Optional[str] = None,
prompt_token_count: Optional[int] = None,
output_token_count: Optional[int] = None,
raw_content: bool = False,
**kwargs,
) -> AsyncGenerator[Union[StreamingTextResponse, ResponseSummary], None]:
"""
Generate chat completions for the given content.
Supports multiple modalities, complicated chat interfaces, and chat templates.
Specifically, it requests with the content, which can be any combination of
text, images, and audio provided the target model supports it,
and returns the output text. Additionally, any chat templates
for the model are applied within the backend.

:param content: The content (or list of content) to generate a completion for.
This supports any combination of text, images, and audio (model dependent).
Supported text only request examples:
content="Sample prompt", content=["Sample prompt", "Second prompt"],
content=[{"type": "text", "value": "Sample prompt"}.
Supported text and image request examples:
content=["Describe the image", PIL.Image.open("image.jpg")],
content=["Describe the image", Path("image.jpg")],
content=["Describe the image", {"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}].
Supported text and audio request examples:
content=["Transcribe the audio", Path("audio.wav")],
content=["Transcribe the audio", {"type": "input_audio",
"input_audio": {"data": f"{base64_bytes}", "format": "wav}].
Additionally, if raw_content=True then the content is passed directly to the
backend without any processing.
:param request_id: The unique identifier for the request, if any.
Added to logging statements and the response for tracking purposes.
:param prompt_token_count: The number of tokens measured in the prompt, if any.
Returned in the response stats for later analysis, if applicable.
:param output_token_count: If supplied, the number of tokens to enforce
generation of for the output for this request.
:param kwargs: Additional keyword arguments to pass with the request.
:return: An async generator that yields a StreamingTextResponse for start,
a StreamingTextResponse for each received iteration,
and a ResponseSummary for the final response.
"""
...
Loading