Canner · yichieh-lu · Apr 10, 2025 · Apr 10, 2025 · Apr 10, 2025 · Apr 10, 2025
diff --git a/wren-ai-service/tools/providers-setup/Dockerfile b/wren-ai-service/tools/providers-setup/Dockerfile
@@ -0,0 +1,58 @@
+# Use official slim Python 3.12 base image
+FROM python:3.12.0-slim
+
+# -------------------------------
+# System Dependencies for Poetry
+# -------------------------------
+# Install minimal system packages: curl (for downloading), build tools (for native extensions)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl build-essential gcc \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# -------------------------------
+# Install Poetry (Python package manager)
+# -------------------------------
+ENV POETRY_VERSION=1.8.2
+ENV POETRY_HOME="/opt/poetry"
+ENV PATH="$POETRY_HOME/bin:$PATH"
+ENV POETRY_VIRTUALENVS_CREATE=false  
+# Don't use virtualenvs inside the container
+
+RUN curl -sSL https://install.python-poetry.org | python3 - \
+    && ln -s $POETRY_HOME/bin/poetry /usr/local/bin/poetry
+
+# -------------------------------
+# Set working directory for app
+# -------------------------------
+WORKDIR /app
+
+# -------------------------------
+# Install Python dependencies via Poetry
+# -------------------------------
+# Copy only dependency files first to leverage Docker layer caching
+COPY pyproject.toml poetry.lock ./
+RUN poetry install --no-interaction --no-ansi
+
+# -------------------------------
+# Copy remaining app code
+# -------------------------------
+COPY . .
+
+# -------------------------------
+# Environment variables for Streamlit
+# -------------------------------
+ENV PYTHONUNBUFFERED=1
+ENV STREAMLIT_SERVER_HEADLESS=true
+ENV STREAMLIT_SERVER_PORT=8501
+ENV STREAMLIT_SERVER_ENABLECORS=false
+
+# -------------------------------
+# Expose Streamlit port
+# -------------------------------
+EXPOSE 8501
+
+# -------------------------------
+# Default command to run the Streamlit app
+# -------------------------------
+CMD ["streamlit", "run", "app.py"]
-# Use official slim Python 3.12 base image
-FROM python:3.12.0-slim
-
-# -------------------------------
-# System Dependencies for Poetry
-# -------------------------------
-# Install minimal system packages: curl (for downloading), build tools (for native extensions)
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    curl build-essential gcc \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-
-# -------------------------------
-# Install Poetry (Python package manager)
-# -------------------------------
-ENV POETRY_VERSION=1.8.2
-ENV POETRY_HOME="/opt/poetry"
-ENV PATH="$POETRY_HOME/bin:$PATH"
-ENV POETRY_VIRTUALENVS_CREATE=false  
-# Don't use virtualenvs inside the container
-
-RUN curl -sSL https://install.python-poetry.org | python3 - \
-    && ln -s $POETRY_HOME/bin/poetry /usr/local/bin/poetry
-
-# -------------------------------
-# Set working directory for app
-# -------------------------------
-WORKDIR /app
-
-# -------------------------------
-# Install Python dependencies via Poetry
-# -------------------------------
-# Copy only dependency files first to leverage Docker layer caching
-COPY pyproject.toml poetry.lock ./
-RUN poetry install --no-interaction --no-ansi
-
-# -------------------------------
-# Copy remaining app code
-# -------------------------------
-COPY . .
-
-# -------------------------------
-# Environment variables for Streamlit
-# -------------------------------
-ENV PYTHONUNBUFFERED=1
-ENV STREAMLIT_SERVER_HEADLESS=true
-ENV STREAMLIT_SERVER_PORT=8501
-ENV STREAMLIT_SERVER_ENABLECORS=false
-
-# -------------------------------
-# Expose Streamlit port
-# -------------------------------
-EXPOSE 8501
-
-# -------------------------------
-# Default command to run the Streamlit app
-# -------------------------------
-CMD ["streamlit", "run", "app.py"]
+# Use official slim Python 3.12 base image
+FROM python:3.12.0-slim
+
+# -------------------------------
+# System Dependencies for Poetry
+# -------------------------------
+# Install minimal system packages: curl (for downloading), build tools (for native extensions)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl build-essential gcc \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# -------------------------------
+# Install Poetry (Python package manager)
+# -------------------------------
+ENV POETRY_VERSION=1.8.2
+ENV POETRY_HOME="/opt/poetry"
+ENV PATH="$POETRY_HOME/bin:$PATH"
+ENV POETRY_VIRTUALENVS_CREATE=false  
+# Don't use virtualenvs inside the container
+
+RUN curl -sSL https://install.python-poetry.org | python3 - \
+    && ln -s $POETRY_HOME/bin/poetry /usr/local/bin/poetry
+
+# -------------------------------
+# Set working directory for app
+# -------------------------------
+WORKDIR /app
+
+# -------------------------------
+# Install Python dependencies via Poetry
+# -------------------------------
+# Copy only dependency files first to leverage Docker layer caching
+COPY pyproject.toml poetry.lock ./
+RUN poetry install --no-interaction --no-ansi
+
+# -------------------------------
+# Copy remaining app code
+# -------------------------------
+COPY . .
+
+# -------------------------------
+# Create and switch to non-root user
+# -------------------------------
+RUN useradd -m streamlituser
+USER streamlituser
+
+# -------------------------------
+# Environment variables for Streamlit
+# -------------------------------
+ENV PYTHONUNBUFFERED=1
+ENV STREAMLIT_SERVER_HEADLESS=true
+ENV STREAMLIT_SERVER_PORT=8501
+ENV STREAMLIT_SERVER_ENABLECORS=false
+
+# -------------------------------
+# Expose Streamlit port
+# -------------------------------
+EXPOSE 8501
+
+# -------------------------------
+# Default command to run the Streamlit app
+# -------------------------------
+CMD ["streamlit", "run", "app.py"]
-# Use official slim Python 3.12 base image
-FROM python:3.12.0-slim
-
-# -------------------------------
-# System Dependencies for Poetry
-# -------------------------------
-# Install minimal system packages: curl (for downloading), build tools (for native extensions)
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    curl build-essential gcc \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-
-# -------------------------------
-# Install Poetry (Python package manager)
-# -------------------------------
-ENV POETRY_VERSION=1.8.2
-ENV POETRY_HOME="/opt/poetry"
-ENV PATH="$POETRY_HOME/bin:$PATH"
-ENV POETRY_VIRTUALENVS_CREATE=false  
-# Don't use virtualenvs inside the container
-
-RUN curl -sSL https://install.python-poetry.org | python3 - \
-    && ln -s $POETRY_HOME/bin/poetry /usr/local/bin/poetry
-
-# -------------------------------
-# Set working directory for app
-# -------------------------------
-WORKDIR /app
-
-# -------------------------------
-# Install Python dependencies via Poetry
-# -------------------------------
-# Copy only dependency files first to leverage Docker layer caching
-COPY pyproject.toml poetry.lock ./
-RUN poetry install --no-interaction --no-ansi
-
-# -------------------------------
-# Copy remaining app code
-# -------------------------------
-COPY . .
-
-# -------------------------------
-# Environment variables for Streamlit
-# -------------------------------
-ENV PYTHONUNBUFFERED=1
-ENV STREAMLIT_SERVER_HEADLESS=true
-ENV STREAMLIT_SERVER_PORT=8501
-ENV STREAMLIT_SERVER_ENABLECORS=false
-
-# -------------------------------
-# Expose Streamlit port
-# -------------------------------
-EXPOSE 8501
-
-# -------------------------------
-# Default command to run the Streamlit app
-# -------------------------------
-CMD ["streamlit", "run", "app.py"]
+# Use official slim Python 3.12 base image
+FROM python:3.12.0-slim
+
+# -------------------------------
+# System Dependencies for Poetry
+# -------------------------------
+# Install minimal system packages: curl (for downloading), build tools (for native extensions)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl build-essential gcc \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# -------------------------------
+# Install Poetry (Python package manager)
+# -------------------------------
+ENV POETRY_VERSION=1.8.2
+ENV POETRY_HOME="/opt/poetry"
+ENV PATH="$POETRY_HOME/bin:$PATH"
+ENV POETRY_VIRTUALENVS_CREATE=false  
+# Don't use virtualenvs inside the container
+
+RUN curl -sSL https://install.python-poetry.org | python3 - \
+    && ln -s $POETRY_HOME/bin/poetry /usr/local/bin/poetry
+
+# -------------------------------
+# Set working directory for app
+# -------------------------------
+WORKDIR /app
+
+# -------------------------------
+# Install Python dependencies via Poetry
+# -------------------------------
+# Copy only dependency files first to leverage Docker layer caching
+COPY pyproject.toml poetry.lock ./
+RUN poetry install --no-interaction --no-ansi
+
+# -------------------------------
+# Copy remaining app code
+# -------------------------------
+COPY . .
+
+# -------------------------------
+# Create and switch to non-root user
+# -------------------------------
+RUN useradd -m streamlituser
+USER streamlituser
+
+# -------------------------------
+# Environment variables for Streamlit
+# -------------------------------
+ENV PYTHONUNBUFFERED=1
+ENV STREAMLIT_SERVER_HEADLESS=true
+ENV STREAMLIT_SERVER_PORT=8501
+ENV STREAMLIT_SERVER_ENABLECORS=false
+
+# -------------------------------
+# Expose Streamlit port
+# -------------------------------
+EXPOSE 8501
+
+# -------------------------------
+# Default command to run the Streamlit app
+# -------------------------------
+CMD ["streamlit", "run", "app.py"]
diff --git a/wren-ai-service/tools/providers-setup/app.py b/wren-ai-service/tools/providers-setup/app.py
@@ -0,0 +1,89 @@
+from config_loader import load_config_yaml_blocks, group_blocks
+from session_state import ConfigState
+from ui_components import (
+    render_llm_config, 
+    render_embedder_config,  
+    render_import_yaml, 
+    render_pipeline_config,
+    render_preview,
+    render_apikey,
+    render_generate_button
+)
+import streamlit as st
+
+# Set Streamlit page layout
+st.set_page_config(
+    layout="wide",                # Use a wide layout for better horizontal space
+    initial_sidebar_state="expanded"  # Expand sidebar by default
+)
+
+# Load and group configuration blocks from YAML
+yaml_list = load_config_yaml_blocks()
+blocks = group_blocks(yaml_list)
+
+# Retrieve individual configuration sections
+llm_block = blocks.get("llm", {})
+embedder_block = blocks.get("embedder", {})
+document_store_block = blocks.get("document_store", {})
+engine_blocks = blocks.get("engine", [])
+pipeline_block = blocks.get("pipeline", {})
+settings_block = blocks.get("settings", {})
+
+
+# Validate required blocks (type + content)
+missing_blocks = []
+
+if not isinstance(llm_block, dict) or not llm_block:
+    missing_blocks.append("LLM")
+if not isinstance(embedder_block, dict) or not embedder_block:
+    missing_blocks.append("Embedder")
+if not isinstance(document_store_block, dict) or not document_store_block:
+    missing_blocks.append("Document Store")
+if not isinstance(pipeline_block, dict) or not pipeline_block:
+    missing_blocks.append("Pipeline")
+
+if missing_blocks:
+    st.warning(
+        f"⚠️ Missing or empty configuration blocks: {', '.join(missing_blocks)}. "
+        "Default values will be used where applicable."
+    )
+
+# Initialize session state with default or imported config values
+ConfigState.init(llm_block, embedder_block, document_store_block, pipeline_block)
+
+# ----------------------
+# Streamlit UI rendering
+# ----------------------
+st.title("Custom Provider Config Generator")
+
+# Layout: two columns – left for inputs, right for preview/export
+col1, col2 = st.columns([1.5, 1])  
+
+with col1:
+
+    # API key input section
+    st.subheader("API_KEY Configuration")
+    render_apikey()
+
+    # Upload and parse YAML file into session state
+    st.subheader("LLM Configuration") 
+    render_import_yaml()
+
+    # LLM model configuration UI
+    render_llm_config()
+
+    # Embedding model configuration UI
+    st.subheader("Embedder Configuration")
+    render_embedder_config()
+
+    # Pipeline flow configuration UI
+    st.subheader("Pipeline Configuration")
+    render_pipeline_config()
+
+    # Generate config.yaml and save configuration button
+    render_generate_button(engine_blocks, settings_block)
+
+with col2:
+    # Final preview and export of the combined configuration as YAML
+    render_preview(engine_blocks, settings_block)
+
diff --git a/wren-ai-service/tools/providers-setup/config_loader.py b/wren-ai-service/tools/providers-setup/config_loader.py
@@ -0,0 +1,127 @@
+import requests
+import yaml
+from session_state import ConfigState
+from pathlib import Path
+import constants as cst
+from typing import Any, Dict, List
+import streamlit as st
+
+def load_config_yaml_blocks() -> List[Dict[str, Any]]:
+    """
+    Load the config.yaml from local disk if available; 
+    otherwise, fetch it from the GitHub URL without downloading it.
+    """
+    CONFIG_IN_PATH = cst.get_config_path()
+    if CONFIG_IN_PATH.exists():
+        try:
+            return load_yaml_list(CONFIG_IN_PATH)
+        except Exception as e:
+            st.error(f"❌ Failed to parse local config.yaml: {e}")
+            return []
+    else:
+        return fetch_yaml_from_url(cst.CONFIG_URL)
+
+def load_selected_example_yaml(selected_example: str) -> List[Dict[str, Any]]:
+    """
+    Fetch a selected YAML example file from GitHub and return it as a list of blocks.
+    """
+    selected_url = cst.CONFIG_EXAMPLES_SELECTED_URL + selected_example
+    try:
+        response = requests.get(selected_url, timeout=cst.REQUEST_TIMEOUT)
+        response.raise_for_status()
+        return list(yaml.safe_load_all(response.text))
+    except requests.RequestException as e:
+        st.error(f"❌ Error loading config from GitHub: {e}")
+        return []
+
+def fetch_yaml_from_url(url: str) -> List[Dict[str, Any]]:
+    """
+    Fetch and parse a YAML list from a remote URL.
+    Returns an empty list if fetch or parsing fails.
+    """
+    try:
+        response = requests.get(url, timeout=cst.REQUEST_TIMEOUT)
+        response.raise_for_status()
+        config_list = list(yaml.safe_load_all(response.text))
+
+        if not config_list:
+            raise ValueError(f"⚠️ Received empty YAML content from: {url}")
+
+        return config_list
+
+    except (requests.RequestException, ValueError, yaml.YAMLError) as e:
+        st.error(f"❌ Error loading config from {url}: {e}")
+        return []
+
+def extract_config_blocks(config_list: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """
+    Extract the first block of each type from the config list.
+    """
+    grouped = group_blocks(config_list)
+
+    def get_first_or_empty(key: str) -> Dict[str, Any]:
+        val = grouped.get(key, {})
+        if isinstance(val, list):
+            return val[0] if val else {}
+        return val or {}
+
+    return {
+        "llm": get_first_or_empty("llm"),
+        "embedder": get_first_or_empty("embedder"),
+        "document_store": get_first_or_empty("document_store"),
+        "pipeline": get_first_or_empty("pipeline"),
+    }
+
+def load_yaml_list(path: Path) -> List[Dict[str, Any]]:
+    """
+    Load and parse all YAML documents from a file path.
+    """
+    with path.open("r", encoding="utf-8") as f:
+        return list(yaml.safe_load_all(f))
+
+def group_blocks(blocks: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """
+    Group YAML blocks by their 'type' field.
+    If multiple blocks share the same type, they are stored as a list.
+    """
+    save_blocks = {}
+    for block in blocks:
+        key = block.get("type") or ("settings" if "settings" in block else None)
+        if not key:
+            continue
+        if key in save_blocks:
+            if isinstance(save_blocks[key], list):
+                save_blocks[key].append(block)
+            else:
+                save_blocks[key] = [save_blocks[key], block]
+        else:
+            save_blocks[key] = block
+    return save_blocks
+
+def fetch_example_yaml_filenames() -> List[str]:
+    """
+    Fetch the filenames of all .yaml example configs from the GitHub directory 
+    (does not download the content).
+    """
+    try:
+        response = requests.get(cst.CONFIG_EXAMPLES_URL, timeout=cst.REQUEST_TIMEOUT)
+        response.raise_for_status()
+        file_list = response.json()
+        return [f["name"] for f in file_list if f["name"].endswith(".yaml")]
+    except requests.RequestException as e:
+        st.error(f"Error fetching config example filenames: {e}")
+        return []
+
+def apply_config_blocks(config_blocks: List[Dict[str, Any]]):
+    """
+    Group and apply config blocks by updating the Streamlit session state via ConfigState.
+    """
+    grouped = extract_config_blocks(config_blocks)
+
+    ConfigState.init(
+        grouped["llm"],
+        grouped["embedder"],
+        grouped["document_store"],
+        grouped["pipeline"],
+        force=True
+    )
diff --git a/wren-ai-service/tools/providers-setup/constants.py b/wren-ai-service/tools/providers-setup/constants.py
@@ -0,0 +1,87 @@
+from pathlib import Path
+import os
+import requests
+
+# -------------------------------
+# Fetch Latest Release Version
+# -------------------------------
+
+def get_latest_config_version():
+    """
+    Retrieve the latest release tag from the WrenAI GitHub repository.
+
+    Returns:
+        str: The latest version tag (e.g., "0.20.2") if successful,
+             or "main" as a fallback if the request fails.
+    """
+    url = "https://api.github.com/repos/Canner/WrenAI/releases/latest"
+    try:
+        response = requests.get(url, timeout=10)
+        if response.status_code == 200:
+            data = response.json()
+            return data["tag_name"]
+        else:
+            print(f"Failed to get latest release: {response.status_code}")
+    except Exception as e:
+        print(f"Error fetching latest config version: {e}")
+
+    return "main"  # Fallback to 'main' branch if the request fails
+
+
+# -------------------------------
+# Constants for Config Loading
+# -------------------------------
+
+CONFIG_VERSION = get_latest_config_version()
+
+# URL for the default config YAML (used if no local config is found)
+CONFIG_URL = f"https://raw.githubusercontent.com/Canner/WrenAI/{CONFIG_VERSION}/docker/config.example.yaml"
+
+# GitHub API URL to list config examples (only metadata)
+CONFIG_EXAMPLES_URL = (
+    f"https://api.github.com/repos/Canner/WrenAI/contents/wren-ai-service/docs/config_examples?ref={CONFIG_VERSION}"
+)
+
+# Base URL to fetch individual example YAML files by filename
+CONFIG_EXAMPLES_SELECTED_URL = (
+    f"https://raw.githubusercontent.com/Canner/WrenAI/{CONFIG_VERSION}/wren-ai-service/docs/config_examples/"
+)
+
+# -------------------------------
+# Local Config Paths
+# -------------------------------
+
+volume_app_data = Path("/app/data")
+
+# Global HTTP request timeout in seconds
+REQUEST_TIMEOUT = 10
+
+def get_config_done_path():
+    # Docker environment: mounted config.done
+    docker_path = volume_app_data / "config.done"
+    local_path = Path.home() / ".wrenai" / "config.done"
+
+    if docker_path.exists():
+        return docker_path
+    else:
+        return local_path
+
+def get_config_path():
+    # Docker environment: mounted config.yaml
+    docker_path = volume_app_data / "config.yaml"
+    local_path = Path.home() / ".wrenai" / "config.yaml"
+
+    if docker_path.exists():
+        return docker_path
+    else:
+        return local_path
+
+# Path to the .env file
+def get_env_path():
+    docker_path = volume_app_data / ".env"
+    local_path = Path.home() / ".wrenAI" / ".env"
+
-    local_path = Path.home() / ".wrenAI" / ".env"
+def get_env_path():
+    docker_path = volume_app_data / ".env"
+    local_path = Path.home() / ".wrenai" / ".env"
+
+    if docker_path.exists():
+        return docker_path
+    return local_path
-    local_path = Path.home() / ".wrenAI" / ".env"
+def get_env_path():
+    docker_path = volume_app_data / ".env"
+    local_path = Path.home() / ".wrenai" / ".env"
+
+    if docker_path.exists():
+        return docker_path
+    return local_path
+    if docker_path.exists():
+        return docker_path
+    else:
+        return local_path