Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 41 additions & 4 deletions hindsight-api/hindsight_api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,29 @@
ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
ENV_EMBEDDINGS_OPENAI_BASE_URL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_BASE_URL"

ENV_COHERE_API_KEY = "HINDSIGHT_API_COHERE_API_KEY"
# Cohere configuration (separate for embeddings and reranker)
ENV_EMBEDDINGS_COHERE_API_KEY = "HINDSIGHT_API_EMBEDDINGS_COHERE_API_KEY"
ENV_EMBEDDINGS_COHERE_MODEL = "HINDSIGHT_API_EMBEDDINGS_COHERE_MODEL"
ENV_EMBEDDINGS_COHERE_BASE_URL = "HINDSIGHT_API_EMBEDDINGS_COHERE_BASE_URL"
ENV_RERANKER_COHERE_API_KEY = "HINDSIGHT_API_RERANKER_COHERE_API_KEY"
ENV_RERANKER_COHERE_MODEL = "HINDSIGHT_API_RERANKER_COHERE_MODEL"
ENV_RERANKER_COHERE_BASE_URL = "HINDSIGHT_API_RERANKER_COHERE_BASE_URL"

# LiteLLM gateway configuration (for embeddings and reranker via LiteLLM proxy)
ENV_LITELLM_API_BASE = "HINDSIGHT_API_LITELLM_API_BASE"
ENV_LITELLM_API_KEY = "HINDSIGHT_API_LITELLM_API_KEY"
# Deprecated: Legacy shared Cohere API key (for backward compatibility)
ENV_COHERE_API_KEY = "HINDSIGHT_API_COHERE_API_KEY"

# LiteLLM configuration (separate for embeddings and reranker)
ENV_EMBEDDINGS_LITELLM_API_BASE = "HINDSIGHT_API_EMBEDDINGS_LITELLM_API_BASE"
ENV_EMBEDDINGS_LITELLM_API_KEY = "HINDSIGHT_API_EMBEDDINGS_LITELLM_API_KEY"
ENV_EMBEDDINGS_LITELLM_MODEL = "HINDSIGHT_API_EMBEDDINGS_LITELLM_MODEL"
ENV_RERANKER_LITELLM_API_BASE = "HINDSIGHT_API_RERANKER_LITELLM_API_BASE"
ENV_RERANKER_LITELLM_API_KEY = "HINDSIGHT_API_RERANKER_LITELLM_API_KEY"
ENV_RERANKER_LITELLM_MODEL = "HINDSIGHT_API_RERANKER_LITELLM_MODEL"

# Deprecated: Legacy shared LiteLLM config (for backward compatibility)
ENV_LITELLM_API_BASE = "HINDSIGHT_API_LITELLM_API_BASE"
ENV_LITELLM_API_KEY = "HINDSIGHT_API_LITELLM_API_KEY"

ENV_RERANKER_PROVIDER = "HINDSIGHT_API_RERANKER_PROVIDER"
ENV_RERANKER_LOCAL_MODEL = "HINDSIGHT_API_RERANKER_LOCAL_MODEL"
ENV_RERANKER_LOCAL_FORCE_CPU = "HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU"
Expand Down Expand Up @@ -395,7 +406,12 @@ class HindsightConfig:
embeddings_local_force_cpu: bool
embeddings_tei_url: str | None
embeddings_openai_base_url: str | None
embeddings_cohere_api_key: str | None
embeddings_cohere_model: str
embeddings_cohere_base_url: str | None
embeddings_litellm_api_base: str
embeddings_litellm_api_key: str | None
embeddings_litellm_model: str

# Reranker
reranker_provider: str
Expand All @@ -406,7 +422,12 @@ class HindsightConfig:
reranker_tei_batch_size: int
reranker_tei_max_concurrent: int
reranker_max_candidates: int
reranker_cohere_api_key: str | None
reranker_cohere_model: str
reranker_cohere_base_url: str | None
reranker_litellm_api_base: str
reranker_litellm_api_key: str | None
reranker_litellm_model: str

# Server
host: str
Expand Down Expand Up @@ -588,7 +609,15 @@ def from_env(cls) -> "HindsightConfig":
in ("true", "1"),
embeddings_tei_url=os.getenv(ENV_EMBEDDINGS_TEI_URL),
embeddings_openai_base_url=os.getenv(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None,
# Cohere embeddings (with backward-compatible fallback to shared API key)
embeddings_cohere_api_key=os.getenv(ENV_EMBEDDINGS_COHERE_API_KEY) or os.getenv(ENV_COHERE_API_KEY),
embeddings_cohere_model=os.getenv(ENV_EMBEDDINGS_COHERE_MODEL, DEFAULT_EMBEDDINGS_COHERE_MODEL),
embeddings_cohere_base_url=os.getenv(ENV_EMBEDDINGS_COHERE_BASE_URL) or None,
# LiteLLM embeddings (with backward-compatible fallback to shared config)
embeddings_litellm_api_base=os.getenv(ENV_EMBEDDINGS_LITELLM_API_BASE)
or os.getenv(ENV_LITELLM_API_BASE, DEFAULT_LITELLM_API_BASE),
embeddings_litellm_api_key=os.getenv(ENV_EMBEDDINGS_LITELLM_API_KEY) or os.getenv(ENV_LITELLM_API_KEY),
embeddings_litellm_model=os.getenv(ENV_EMBEDDINGS_LITELLM_MODEL, DEFAULT_EMBEDDINGS_LITELLM_MODEL),
# Reranker
reranker_provider=os.getenv(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER),
reranker_local_model=os.getenv(ENV_RERANKER_LOCAL_MODEL, DEFAULT_RERANKER_LOCAL_MODEL),
Expand All @@ -605,7 +634,15 @@ def from_env(cls) -> "HindsightConfig":
os.getenv(ENV_RERANKER_TEI_MAX_CONCURRENT, str(DEFAULT_RERANKER_TEI_MAX_CONCURRENT))
),
reranker_max_candidates=int(os.getenv(ENV_RERANKER_MAX_CANDIDATES, str(DEFAULT_RERANKER_MAX_CANDIDATES))),
# Cohere reranker (with backward-compatible fallback to shared API key)
reranker_cohere_api_key=os.getenv(ENV_RERANKER_COHERE_API_KEY) or os.getenv(ENV_COHERE_API_KEY),
reranker_cohere_model=os.getenv(ENV_RERANKER_COHERE_MODEL, DEFAULT_RERANKER_COHERE_MODEL),
reranker_cohere_base_url=os.getenv(ENV_RERANKER_COHERE_BASE_URL) or None,
# LiteLLM reranker (with backward-compatible fallback to shared config)
reranker_litellm_api_base=os.getenv(ENV_RERANKER_LITELLM_API_BASE)
or os.getenv(ENV_LITELLM_API_BASE, DEFAULT_LITELLM_API_BASE),
reranker_litellm_api_key=os.getenv(ENV_RERANKER_LITELLM_API_KEY) or os.getenv(ENV_LITELLM_API_KEY),
reranker_litellm_model=os.getenv(ENV_RERANKER_LITELLM_MODEL, DEFAULT_RERANKER_LITELLM_MODEL),
# Server
host=os.getenv(ENV_HOST, DEFAULT_HOST),
port=int(os.getenv(ENV_PORT, DEFAULT_PORT)),
Expand Down
27 changes: 13 additions & 14 deletions hindsight-api/hindsight_api/engine/cross_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,10 @@
DEFAULT_RERANKER_PROVIDER,
DEFAULT_RERANKER_TEI_BATCH_SIZE,
DEFAULT_RERANKER_TEI_MAX_CONCURRENT,
ENV_COHERE_API_KEY,
ENV_LITELLM_API_BASE,
ENV_LITELLM_API_KEY,
ENV_RERANKER_COHERE_BASE_URL,
ENV_RERANKER_COHERE_API_KEY,
ENV_RERANKER_COHERE_MODEL,
ENV_RERANKER_FLASHRANK_CACHE_DIR,
ENV_RERANKER_FLASHRANK_MODEL,
ENV_RERANKER_LITELLM_MODEL,
ENV_RERANKER_LOCAL_FORCE_CPU,
ENV_RERANKER_LOCAL_MAX_CONCURRENT,
ENV_RERANKER_LOCAL_MODEL,
Expand Down Expand Up @@ -849,21 +845,24 @@ def create_cross_encoder_from_env() -> CrossEncoderModel:
force_cpu=config.reranker_local_force_cpu,
)
elif provider == "cohere":
api_key = os.environ.get(ENV_COHERE_API_KEY)
api_key = config.reranker_cohere_api_key
if not api_key:
raise ValueError(f"{ENV_COHERE_API_KEY} is required when {ENV_RERANKER_PROVIDER} is 'cohere'")
model = os.environ.get(ENV_RERANKER_COHERE_MODEL, DEFAULT_RERANKER_COHERE_MODEL)
base_url = os.environ.get(ENV_RERANKER_COHERE_BASE_URL) or None
return CohereCrossEncoder(api_key=api_key, model=model, base_url=base_url)
raise ValueError(f"{ENV_RERANKER_COHERE_API_KEY} is required when {ENV_RERANKER_PROVIDER} is 'cohere'")
return CohereCrossEncoder(
api_key=api_key,
model=config.reranker_cohere_model,
base_url=config.reranker_cohere_base_url,
)
elif provider == "flashrank":
model = os.environ.get(ENV_RERANKER_FLASHRANK_MODEL, DEFAULT_RERANKER_FLASHRANK_MODEL)
cache_dir = os.environ.get(ENV_RERANKER_FLASHRANK_CACHE_DIR, DEFAULT_RERANKER_FLASHRANK_CACHE_DIR)
return FlashRankCrossEncoder(model_name=model, cache_dir=cache_dir)
elif provider == "litellm":
api_base = os.environ.get(ENV_LITELLM_API_BASE, DEFAULT_LITELLM_API_BASE)
api_key = os.environ.get(ENV_LITELLM_API_KEY)
model = os.environ.get(ENV_RERANKER_LITELLM_MODEL, DEFAULT_RERANKER_LITELLM_MODEL)
return LiteLLMCrossEncoder(api_base=api_base, api_key=api_key, model=model)
return LiteLLMCrossEncoder(
api_base=config.reranker_litellm_api_base,
api_key=config.reranker_litellm_api_key,
model=config.reranker_litellm_model,
)
elif provider == "rrf":
return RRFPassthroughCrossEncoder()
else:
Expand Down
28 changes: 13 additions & 15 deletions hindsight-api/hindsight_api/engine/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,14 @@
DEFAULT_EMBEDDINGS_OPENAI_MODEL,
DEFAULT_EMBEDDINGS_PROVIDER,
DEFAULT_LITELLM_API_BASE,
ENV_COHERE_API_KEY,
ENV_EMBEDDINGS_COHERE_BASE_URL,
ENV_EMBEDDINGS_COHERE_MODEL,
ENV_EMBEDDINGS_LITELLM_MODEL,
ENV_EMBEDDINGS_COHERE_API_KEY,
ENV_EMBEDDINGS_LOCAL_FORCE_CPU,
ENV_EMBEDDINGS_LOCAL_MODEL,
ENV_EMBEDDINGS_OPENAI_API_KEY,
ENV_EMBEDDINGS_OPENAI_BASE_URL,
ENV_EMBEDDINGS_OPENAI_MODEL,
ENV_EMBEDDINGS_PROVIDER,
ENV_EMBEDDINGS_TEI_URL,
ENV_LITELLM_API_BASE,
ENV_LITELLM_API_KEY,
ENV_LLM_API_KEY,
)

Expand Down Expand Up @@ -754,17 +749,20 @@ def create_embeddings_from_env() -> Embeddings:
base_url = os.environ.get(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None
return OpenAIEmbeddings(api_key=api_key, model=model, base_url=base_url)
elif provider == "cohere":
api_key = os.environ.get(ENV_COHERE_API_KEY)
api_key = config.embeddings_cohere_api_key
if not api_key:
raise ValueError(f"{ENV_COHERE_API_KEY} is required when {ENV_EMBEDDINGS_PROVIDER} is 'cohere'")
model = os.environ.get(ENV_EMBEDDINGS_COHERE_MODEL, DEFAULT_EMBEDDINGS_COHERE_MODEL)
base_url = os.environ.get(ENV_EMBEDDINGS_COHERE_BASE_URL) or None
return CohereEmbeddings(api_key=api_key, model=model, base_url=base_url)
raise ValueError(f"{ENV_EMBEDDINGS_COHERE_API_KEY} is required when {ENV_EMBEDDINGS_PROVIDER} is 'cohere'")
return CohereEmbeddings(
api_key=api_key,
model=config.embeddings_cohere_model,
base_url=config.embeddings_cohere_base_url,
)
elif provider == "litellm":
api_base = os.environ.get(ENV_LITELLM_API_BASE, DEFAULT_LITELLM_API_BASE)
api_key = os.environ.get(ENV_LITELLM_API_KEY)
model = os.environ.get(ENV_EMBEDDINGS_LITELLM_MODEL, DEFAULT_EMBEDDINGS_LITELLM_MODEL)
return LiteLLMEmbeddings(api_base=api_base, api_key=api_key, model=model)
return LiteLLMEmbeddings(
api_base=config.embeddings_litellm_api_base,
api_key=config.embeddings_litellm_api_key,
model=config.embeddings_litellm_model,
)
else:
raise ValueError(
f"Unknown embeddings provider: {provider}. Supported: 'local', 'tei', 'openai', 'cohere', 'litellm'"
Expand Down
10 changes: 10 additions & 0 deletions hindsight-api/hindsight_api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,12 @@ def main():
embeddings_local_force_cpu=config.embeddings_local_force_cpu,
embeddings_tei_url=config.embeddings_tei_url,
embeddings_openai_base_url=config.embeddings_openai_base_url,
embeddings_cohere_api_key=config.embeddings_cohere_api_key,
embeddings_cohere_model=config.embeddings_cohere_model,
embeddings_cohere_base_url=config.embeddings_cohere_base_url,
embeddings_litellm_api_base=config.embeddings_litellm_api_base,
embeddings_litellm_api_key=config.embeddings_litellm_api_key,
embeddings_litellm_model=config.embeddings_litellm_model,
reranker_provider=config.reranker_provider,
reranker_local_model=config.reranker_local_model,
reranker_local_force_cpu=config.reranker_local_force_cpu,
Expand All @@ -208,7 +213,12 @@ def main():
reranker_tei_batch_size=config.reranker_tei_batch_size,
reranker_tei_max_concurrent=config.reranker_tei_max_concurrent,
reranker_max_candidates=config.reranker_max_candidates,
reranker_cohere_api_key=config.reranker_cohere_api_key,
reranker_cohere_model=config.reranker_cohere_model,
reranker_cohere_base_url=config.reranker_cohere_base_url,
reranker_litellm_api_base=config.reranker_litellm_api_base,
reranker_litellm_api_key=config.reranker_litellm_api_key,
reranker_litellm_model=config.reranker_litellm_model,
host=args.host,
port=args.port,
log_level=args.log_level,
Expand Down
25 changes: 14 additions & 11 deletions hindsight-docs/docs/developer/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -273,11 +273,11 @@ export HINDSIGHT_API_RETAIN_LLM_MAX_BACKOFF=120.0 # Cap at 2min instead of 1m
| `HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY` | OpenAI API key (falls back to `HINDSIGHT_API_LLM_API_KEY`) | - |
| `HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL` | OpenAI embedding model | `text-embedding-3-small` |
| `HINDSIGHT_API_EMBEDDINGS_OPENAI_BASE_URL` | Custom base URL for OpenAI-compatible API (e.g., Azure OpenAI) | - |
| `HINDSIGHT_API_COHERE_API_KEY` | Cohere API key (shared for embeddings and reranker) | - |
| `HINDSIGHT_API_EMBEDDINGS_COHERE_API_KEY` | Cohere API key for embeddings | - |
| `HINDSIGHT_API_EMBEDDINGS_COHERE_MODEL` | Cohere embedding model | `embed-english-v3.0` |
| `HINDSIGHT_API_EMBEDDINGS_COHERE_BASE_URL` | Custom base URL for Cohere-compatible API (e.g., Azure-hosted) | - |
| `HINDSIGHT_API_LITELLM_API_BASE` | LiteLLM proxy base URL (shared for embeddings and reranker) | `http://localhost:4000` |
| `HINDSIGHT_API_LITELLM_API_KEY` | LiteLLM proxy API key (optional, depends on proxy config) | - |
| `HINDSIGHT_API_EMBEDDINGS_LITELLM_API_BASE` | LiteLLM proxy base URL for embeddings | `http://localhost:4000` |
| `HINDSIGHT_API_EMBEDDINGS_LITELLM_API_KEY` | LiteLLM proxy API key for embeddings (optional, depends on proxy config) | - |
| `HINDSIGHT_API_EMBEDDINGS_LITELLM_MODEL` | LiteLLM embedding model (use provider prefix, e.g., `cohere/embed-english-v3.0`) | `text-embedding-3-small` |

```bash
Expand All @@ -302,19 +302,19 @@ export HINDSIGHT_API_EMBEDDINGS_TEI_URL=http://localhost:8080

# Cohere - cloud-based embeddings
export HINDSIGHT_API_EMBEDDINGS_PROVIDER=cohere
export HINDSIGHT_API_COHERE_API_KEY=your-api-key
export HINDSIGHT_API_EMBEDDINGS_COHERE_API_KEY=your-api-key
export HINDSIGHT_API_EMBEDDINGS_COHERE_MODEL=embed-english-v3.0 # 1024 dimensions

# Azure-hosted Cohere - embeddings via custom endpoint
export HINDSIGHT_API_EMBEDDINGS_PROVIDER=cohere
export HINDSIGHT_API_COHERE_API_KEY=your-azure-api-key
export HINDSIGHT_API_EMBEDDINGS_COHERE_API_KEY=your-azure-api-key
export HINDSIGHT_API_EMBEDDINGS_COHERE_MODEL=embed-english-v3.0
export HINDSIGHT_API_EMBEDDINGS_COHERE_BASE_URL=https://your-azure-cohere-endpoint.com

# LiteLLM proxy - unified gateway for multiple providers
export HINDSIGHT_API_EMBEDDINGS_PROVIDER=litellm
export HINDSIGHT_API_LITELLM_API_BASE=http://localhost:4000
export HINDSIGHT_API_LITELLM_API_KEY=your-litellm-key # optional
export HINDSIGHT_API_EMBEDDINGS_LITELLM_API_BASE=http://localhost:4000
export HINDSIGHT_API_EMBEDDINGS_LITELLM_API_KEY=your-litellm-key # optional
export HINDSIGHT_API_EMBEDDINGS_LITELLM_MODEL=text-embedding-3-small # or cohere/embed-english-v3.0
```

Expand Down Expand Up @@ -344,8 +344,11 @@ Supported OpenAI embedding dimensions:
| `HINDSIGHT_API_RERANKER_TEI_URL` | TEI server URL | - |
| `HINDSIGHT_API_RERANKER_TEI_BATCH_SIZE` | Batch size for TEI reranking | `128` |
| `HINDSIGHT_API_RERANKER_TEI_MAX_CONCURRENT` | Max concurrent TEI reranking requests | `8` |
| `HINDSIGHT_API_RERANKER_COHERE_API_KEY` | Cohere API key for reranking | - |
| `HINDSIGHT_API_RERANKER_COHERE_MODEL` | Cohere rerank model | `rerank-english-v3.0` |
| `HINDSIGHT_API_RERANKER_COHERE_BASE_URL` | Custom base URL for Cohere-compatible API (e.g., Azure-hosted) | - |
| `HINDSIGHT_API_RERANKER_LITELLM_API_BASE` | LiteLLM proxy base URL for reranking | `http://localhost:4000` |
| `HINDSIGHT_API_RERANKER_LITELLM_API_KEY` | LiteLLM proxy API key for reranking (optional, depends on proxy config) | - |
| `HINDSIGHT_API_RERANKER_LITELLM_MODEL` | LiteLLM rerank model (use provider prefix, e.g., `cohere/rerank-english-v3.0`) | `cohere/rerank-english-v3.0` |
| `HINDSIGHT_API_RERANKER_FLASHRANK_MODEL` | FlashRank model for fast CPU-based reranking | `ms-marco-MiniLM-L-12-v2` |
| `HINDSIGHT_API_RERANKER_FLASHRANK_CACHE_DIR` | Cache directory for FlashRank models | System default |
Expand All @@ -361,19 +364,19 @@ export HINDSIGHT_API_RERANKER_TEI_URL=http://localhost:8081

# Cohere - cloud-based reranking
export HINDSIGHT_API_RERANKER_PROVIDER=cohere
export HINDSIGHT_API_COHERE_API_KEY=your-api-key # shared with embeddings
export HINDSIGHT_API_RERANKER_COHERE_API_KEY=your-api-key
export HINDSIGHT_API_RERANKER_COHERE_MODEL=rerank-english-v3.0

# Azure-hosted Cohere - reranking via custom endpoint
export HINDSIGHT_API_RERANKER_PROVIDER=cohere
export HINDSIGHT_API_COHERE_API_KEY=your-azure-api-key
export HINDSIGHT_API_RERANKER_COHERE_API_KEY=your-azure-api-key
export HINDSIGHT_API_RERANKER_COHERE_MODEL=rerank-english-v3.0
export HINDSIGHT_API_RERANKER_COHERE_BASE_URL=https://your-azure-cohere-endpoint.com

# LiteLLM proxy - unified gateway for multiple reranking providers
export HINDSIGHT_API_RERANKER_PROVIDER=litellm
export HINDSIGHT_API_LITELLM_API_BASE=http://localhost:4000
export HINDSIGHT_API_LITELLM_API_KEY=your-litellm-key # optional
export HINDSIGHT_API_RERANKER_LITELLM_API_BASE=http://localhost:4000
export HINDSIGHT_API_RERANKER_LITELLM_API_KEY=your-litellm-key # optional
export HINDSIGHT_API_RERANKER_LITELLM_MODEL=cohere/rerank-english-v3.0 # or voyage/rerank-2, together_ai/...
```

Expand Down
Loading