Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Redis Configuration
REDIS_URL=redis://localhost:6379

# Ollama/Llama Server Configuration
LLAMA_SERVER_URL=http://localhost:11434
OLLAMA_HOST=http://localhost:11434 # Used by ollama Python library
DEFAULT_MODEL=deepseek-coder-v2:latest

# OpenTelemetry Configuration
OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317

# Optional: Llama.cpp CLI configuration (for local llama.cpp usage)
# LLAMA_CPP_CLI=/data1/llama.cpp/bin/llama-cli
# GGUF_DIR=/data1/GGUF
26 changes: 25 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
To install project dependencies, including development dependencies:

```console
$ pip install -e .[dev]
$ source venv/bin/activate;

$ pip install -e '.[dev]'
```

To install pre-commit hooks:
Expand All @@ -19,3 +21,25 @@ To run the test suite:
```console
$ pytest
```

To run locally:

Pre-requisites:

Must have redis and lamma server up and running.


```console
$ docker compose -f opentelemetry_collector/docker-compose.yml up -d

$ REDIS_URL='redis://localhost:6379' LLAMA_SERVER_URL='http://localhost:11434' python3 -m ai_server.__main__
```

Send Request:

Example

```curl
curl --location 'localhost:5000/chat' \
--form 'content="asdf asdf asdasdf ad"'
```
47 changes: 46 additions & 1 deletion markus_ai_server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,15 @@
import requests
from dotenv import load_dotenv
from flask import Flask, abort, jsonify, request
from opentelemetry import metrics, trace
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.flask import FlaskInstrumentor
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor

from .redis_helper import REDIS_CONNECTION

Expand All @@ -22,7 +31,42 @@
# Load environment variables from .env file
load_dotenv()

# OpenTelemetry endpoint configuration
OTEL_EXPORTER_OTLP_ENDPOINT = os.getenv('OTEL_EXPORTER_OTLP_ENDPOINT', 'http://localhost:4317')

# Configure OpenTelemetry - Shared Resource (identifies this service)
resource = Resource.create({"service.name": "ai-server"})

# ========== TRACES CONFIGURATION ==========
# TracerProvider: Factory for creating tracers (for distributed tracing)
tracer_provider = TracerProvider(resource=resource)

# OTLP Trace Exporter: Sends traces to collector
otlp_trace_exporter = OTLPSpanExporter(endpoint=OTEL_EXPORTER_OTLP_ENDPOINT, insecure=True)
span_processor = BatchSpanProcessor(otlp_trace_exporter)
tracer_provider.add_span_processor(span_processor)

# Set the global tracer provider (FlaskInstrumentor will use this)
trace.set_tracer_provider(tracer_provider)
tracer = trace.get_tracer("ai-server.tracer")

# ========== METRICS CONFIGURATION ==========
# OTLP Metric Exporter: Sends metrics to collector
otlp_metric_exporter = OTLPMetricExporter(endpoint=OTEL_EXPORTER_OTLP_ENDPOINT, insecure=True)

# PeriodicExportingMetricReader: Collects and exports metrics every 10 seconds
metric_reader = PeriodicExportingMetricReader(
exporter=otlp_metric_exporter, export_interval_millis=10000 # Export every 10 seconds
)

# MeterProvider: Factory for creating meters (for metrics collection)
meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])

# Set the global meter provider (FlaskInstrumentor will use this for HTTP metrics)
metrics.set_meter_provider(meter_provider)

app = Flask('AI server')
FlaskInstrumentor().instrument_app(app)

# Configuration from environment variables
DEFAULT_MODEL = os.getenv('DEFAULT_MODEL', 'deepseek-coder-v2:latest')
Expand All @@ -32,7 +76,8 @@
GGUF_DIR = os.getenv('GGUF_DIR', '/data1/GGUF')

# Llama server configuration
_llama_server_url = os.getenv('LLAMA_SERVER_URL') # e.g., http://localhost:8080 or localhost:8080
# e.g., http://localhost:8080 or localhost:8080
_llama_server_url = os.getenv('LLAMA_SERVER_URL')
LLAMA_SERVER_URL = (
f"http://{_llama_server_url}"
if _llama_server_url and not _llama_server_url.startswith(('http://', 'https://'))
Expand Down
Loading