From 2597b7178bd3358b139c08d7efb048163ff00814 Mon Sep 17 00:00:00 2001 From: wanhakim Date: Tue, 2 Dec 2025 11:27:21 +0800 Subject: [PATCH 1/2] Finetuning + fixes (#74) * revamp setup scripts supporting proxies and added studio cleanup scripts Signed-off-by: wwanarif * update build genaicomp script and fix proxy bugs Signed-off-by: wwanarif * enable development env for studio-frontend Signed-off-by: wwanarif * initial push for finetuning UI Signed-off-by: wwanarif * update docker-compose.dev.yml Signed-off-by: wwanarif * update dev env Signed-off-by: wwanarif * initial push for finetuning api server Signed-off-by: wwanarif * updates on the db and ft job view Signed-off-by: wwanarif * added logs in ft job table and streamlined its format. updated ft job create modal Signed-off-by: wwanarif * integrate finetuning server in studio k8 namespace Signed-off-by: wwanarif * update improvements to ft job table and modal Signed-off-by: wwanarif * initial push for ft job outputs downloading Signed-off-by: wwanarif * enable ws connections for ft status and output downloading Signed-off-by: wwanarif * updated the ft configs UI Signed-off-by: wwanarif * updated the ft training configs UI Signed-off-by: wwanarif * fixes for depreciated components and hardfix the genaicomp images versions Signed-off-by: wwanarif * added ws connection for finetuning in studio nginx Signed-off-by: wwanarif * add proxy for github workflows Signed-off-by: wwanarif * udpate studio-frontend image Signed-off-by: wwanarif * add kubectl logs when deploy studio fail in e2e wf Signed-off-by: wwanarif * update otel collector contrib version and remove redundant IconApps Signed-off-by: wwanarif * update otel deployment Signed-off-by: wwanarif * fix e2e test to make sure it exits properly when ansible fails Signed-off-by: wwanarif --------- Signed-off-by: wwanarif --- .../workflows/_build-image-to-registry.yml | 2 +- .github/workflows/_e2e-test.yml | 36 +- app-backend/Dockerfile | 10 + app-backend/megaservice.py | 2 +- app-backend/orchestrator.py | 4 - app-frontend/Dockerfile | 14 +- .../react/src/components/SideBar/SideBar.tsx | 1 - .../buildpush-genaicomps-images.yml | 28 +- .../buildpush-genaistudio-images.yml | 15 +- .../build-image-to-registry/vars.yml | 9 +- .../cleanup-genai-studio.sh | 218 +++ .../setup-genai-studio/genai-studio.yml | 6 + .../helm-values/mysqldb.yaml | 13 + .../helm-values/otel-collector.yaml | 4 +- .../manifests/studio-manifest.yaml | 132 +- .../playbooks/create-ssh-secrets.yml | 12 +- .../playbooks/deploy-monitoring.yml | 35 +- .../playbooks/deploy-mysqldb.yml | 33 +- .../playbooks/deploy-studio.yml | 29 +- .../playbooks/deploy-tracing.yml | 46 +- .../playbooks/install-prerequisites.yml | 29 + .../playbooks/setup-local-storageclass.yml | 24 + setup-scripts/setup-genai-studio/readme.md | 30 +- .../setup-genai-studio/studio-config.yaml | 5 +- setup-scripts/setup-genai-studio/vars.yml | 8 +- studio-backend/Dockerfile | 10 + studio-backend/app/routers/debuglog_router.py | 28 + .../app/services/exporter_service.py | 16 +- .../app/templates/app/app.manifest.yaml | 8 +- .../microsvc-manifests/asr-usvc.yaml | 6 +- .../microsvc-manifests/data-prep.yaml | 6 +- .../microsvc-manifests/embedding-usvc.yaml | 6 +- .../microsvc-manifests/reranking-usvc.yaml | 6 +- .../microsvc-manifests/retriever-usvc.yaml | 6 +- studio-backend/app/utils/exporter_utils.py | 2 +- .../app/utils/placeholders_utils.py | 18 +- studio-frontend/.env.development | 6 + studio-frontend/Dockerfile | 16 +- studio-frontend/docker-compose.dev.yml | 47 + studio-frontend/package.json | 7 +- studio-frontend/packages/server/package.json | 4 +- .../src/controllers/finetuning/index.ts | 211 +++ .../database/entities/FineTuningCheckpoint.ts | 19 + .../src/database/entities/FineTuningJob.ts | 34 + .../server/src/database/entities/index.ts | 4 +- .../1760424809635-AddFineTuningTables.ts | 38 + .../src/database/migrations/mysql/index.ts | 4 +- .../1760424809635-AddFineTuningTables.ts | 39 + .../src/database/migrations/sqlite/index.ts | 2 + studio-frontend/packages/server/src/index.ts | 44 +- .../server/src/middlewares/errors/index.ts | 14 +- .../server/src/routes/finetuning/index.ts | 32 + .../packages/server/src/routes/index.ts | 2 + .../server/src/services/chatflows/index.ts | 42 +- .../server/src/services/finetuning/index.ts | 752 +++++++++ .../src/utils/webSocketDownloadManager.ts | 0 .../server/src/ws/finetuningDownload.ts | 172 +++ .../server/src/ws/finetuningStatus.ts | 226 +++ .../packages/ui/public/silent-check-sso.html | 16 + .../packages/ui/src/KeycloakContext.jsx | 106 +- studio-frontend/packages/ui/src/api/client.js | 2 +- .../packages/ui/src/api/finetuning.js | 299 ++++ .../packages/ui/src/hooks/useApi.jsx | 2 + .../src/hooks/useFineTuningDownloadSocket.js | 0 .../ui/src/hooks/useWebSocketDownload.js | 0 .../ui/src/layout/MainLayout/Header/index.jsx | 35 +- .../MenuList/CollapsedMenuList/index.jsx | 104 ++ .../Sidebar/MenuList/NavGroup/index.jsx | 4 +- .../src/layout/MainLayout/Sidebar/index.jsx | 210 ++- .../ui/src/layout/MainLayout/ViewHeader.jsx | 5 +- .../ui/src/layout/MainLayout/index.jsx | 62 +- .../packages/ui/src/menu-items/dashboard.js | 30 +- .../packages/ui/src/routes/MainRoutes.jsx | 7 + .../packages/ui/src/store/constant.js | 1 + .../src/ui-component/button/AnimateButton.jsx | 12 +- .../ui/src/ui-component/cards/MainCard.jsx | 10 +- .../ui/src/ui-component/extended/Logo.jsx | 3 +- .../src/ui-component/extended/Transitions.jsx | 8 +- .../src/ui-component/table/FlowListTable.jsx | 13 +- .../packages/ui/src/views/debuglogs/index.jsx | 37 +- .../src/views/finetuning/FileUploadArea.jsx | 317 ++++ .../views/finetuning/FinetuningJobModal.jsx | 1345 +++++++++++++++++ .../views/finetuning/FinetuningJobsTable.jsx | 782 ++++++++++ .../ui/src/views/finetuning/index.jsx | 311 ++++ .../packages/ui/src/views/opeaflows/index.jsx | 95 +- .../packages/ui/src/views/tracer/index.jsx | 31 +- studio-frontend/packages/ui/vite.config.js | 21 +- 87 files changed, 6011 insertions(+), 429 deletions(-) create mode 100755 setup-scripts/setup-genai-studio/cleanup-genai-studio.sh create mode 100644 setup-scripts/setup-genai-studio/playbooks/install-prerequisites.yml create mode 100644 setup-scripts/setup-genai-studio/playbooks/setup-local-storageclass.yml create mode 100644 studio-frontend/.env.development create mode 100644 studio-frontend/docker-compose.dev.yml create mode 100644 studio-frontend/packages/server/src/controllers/finetuning/index.ts create mode 100644 studio-frontend/packages/server/src/database/entities/FineTuningCheckpoint.ts create mode 100644 studio-frontend/packages/server/src/database/entities/FineTuningJob.ts create mode 100644 studio-frontend/packages/server/src/database/migrations/mysql/1760424809635-AddFineTuningTables.ts create mode 100644 studio-frontend/packages/server/src/database/migrations/sqlite/1760424809635-AddFineTuningTables.ts create mode 100644 studio-frontend/packages/server/src/routes/finetuning/index.ts create mode 100644 studio-frontend/packages/server/src/services/finetuning/index.ts create mode 100644 studio-frontend/packages/server/src/utils/webSocketDownloadManager.ts create mode 100644 studio-frontend/packages/server/src/ws/finetuningDownload.ts create mode 100644 studio-frontend/packages/server/src/ws/finetuningStatus.ts create mode 100644 studio-frontend/packages/ui/public/silent-check-sso.html create mode 100644 studio-frontend/packages/ui/src/api/finetuning.js create mode 100644 studio-frontend/packages/ui/src/hooks/useFineTuningDownloadSocket.js create mode 100644 studio-frontend/packages/ui/src/hooks/useWebSocketDownload.js create mode 100644 studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/CollapsedMenuList/index.jsx create mode 100644 studio-frontend/packages/ui/src/views/finetuning/FileUploadArea.jsx create mode 100644 studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx create mode 100644 studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx create mode 100644 studio-frontend/packages/ui/src/views/finetuning/index.jsx diff --git a/.github/workflows/_build-image-to-registry.yml b/.github/workflows/_build-image-to-registry.yml index 40778c5..3bf7828 100644 --- a/.github/workflows/_build-image-to-registry.yml +++ b/.github/workflows/_build-image-to-registry.yml @@ -39,5 +39,5 @@ jobs: - name: Build Image and Push Image run: | sudo apt install ansible -y - ansible-playbook buildpush-genaistudio-images.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" + ansible-playbook buildpush-genaistudio-images.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" -e "http_proxy=${http_proxy}" working-directory: ${{ github.workspace }}/setup-scripts/build-image-to-registry/ \ No newline at end of file diff --git a/.github/workflows/_e2e-test.yml b/.github/workflows/_e2e-test.yml index f19bdff..9441d84 100644 --- a/.github/workflows/_e2e-test.yml +++ b/.github/workflows/_e2e-test.yml @@ -39,10 +39,19 @@ jobs: - name: Deploy GenAI Studio run: | sudo apt install ansible -y - ansible-playbook genai-studio.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" -e "mysql_host=mysql.mysql.svc.cluster.local" + ansible-playbook genai-studio.yml -e "container_registry=${OPEA_IMAGE_REPO}opea" -e "container_tag=${{ inputs.tag }}" -e "mysql_host=mysql.mysql.svc.cluster.local" -e "http_proxy=${http_proxy}" || { + echo "Ansible playbook failed. Checking pod status..." + echo "=== Pods in studio namespace ===" + kubectl get pods -n studio + echo "=== Logs and events for non-Ready pods in studio namespace ===" + for pod in $(kubectl get pods -n studio --field-selector=status.phase!=Running,status.phase!=Succeeded -o jsonpath='{.items[*].metadata.name}'); do + echo "--- Pod: $pod ---" + kubectl describe pod $pod -n studio + kubectl logs $pod -n studio --all-containers=true --tail=100 || echo "Could not fetch logs for $pod" + done + exit 1 + } sleep 5 - kubectl wait --for=condition=ready pod --all --namespace=studio --timeout=300s --field-selector=status.phase!=Succeeded - kubectl wait --for=condition=ready pod --all --namespace=monitoring --timeout=300s --field-selector=status.phase!=Succeeded working-directory: ${{ github.workspace }}/setup-scripts/setup-genai-studio/ - name: Set up Node.js @@ -74,25 +83,10 @@ jobs: name: playwright-test-results path: ${{ github.workspace }}/tests/playwright/playwright-report - - name: Cleanup sandbox namespaces + - name: Cleanup GenAI Studio if: always() run: | - if kubectl get namespace mysql; then - kubectl delete ns mysql || true - fi - if kubectl get namespace tracing; then - kubectl delete ns tracing || true - fi - for ns in $(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | grep '^sandbox-'); do - kubectl delete namespace $ns || true - done - if kubectl get namespace studio; then - kubectl delete -f manifests/studio-manifest.yaml || true - kubectl wait --for=delete pod --all --namespace=studio --timeout=300s - fi - if kubectl get namespace monitoring; then - kubectl delete -f manifests/monitoring-manifest.yaml || true - kubectl wait --for=delete pod --all --namespace=monitoring --timeout=300s - fi + echo "Running GenAI Studio cleanup script..." + ./cleanup-genai-studio.sh || echo "Cleanup script completed with warnings" working-directory: ${{ github.workspace }}/setup-scripts/setup-genai-studio/ diff --git a/app-backend/Dockerfile b/app-backend/Dockerfile index eddbb4d..ee9995b 100644 --- a/app-backend/Dockerfile +++ b/app-backend/Dockerfile @@ -1,5 +1,15 @@ FROM python:3.11-slim +# Accept proxy build arguments +ARG http_proxy +ARG https_proxy +ARG no_proxy + +# Set proxy environment variables for package managers +ENV http_proxy=${http_proxy} +ENV https_proxy=${https_proxy} +ENV no_proxy=${no_proxy} + RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ libsqlite3-0 \ libjemalloc-dev \ diff --git a/app-backend/megaservice.py b/app-backend/megaservice.py index 3335323..4cb7e13 100644 --- a/app-backend/megaservice.py +++ b/app-backend/megaservice.py @@ -170,7 +170,7 @@ def add_remote_service(self): microservice_name = node['name'].split('@')[1] if "docsum" in microservice_name: self.is_docsum = True - service_node_ip = node_id.split('@')[1].replace('_','-') if USE_NODE_ID_AS_IP else HOST_IP + service_node_ip = f"opea-{node_id.split('@')[1].replace('_','-')}" if USE_NODE_ID_AS_IP else HOST_IP microservice = templates[microservice_name].get_service(host_ip=service_node_ip, node_id_as_ip=USE_NODE_ID_AS_IP, port=os.getenv(f"{node_id.split('@')[1]}_port", None)) microservice.name = node_id self.services[node_id] = microservice diff --git a/app-backend/orchestrator.py b/app-backend/orchestrator.py index f4b949b..f6357f6 100644 --- a/app-backend/orchestrator.py +++ b/app-backend/orchestrator.py @@ -272,7 +272,6 @@ async def execute( url=endpoint, data=json.dumps(inputs), headers={"Content-type": "application/json", "Authorization": f"Bearer {access_token}"}, - proxies={"http": None}, stream=True, timeout=2000, ) @@ -283,7 +282,6 @@ async def execute( headers={ "Content-type": "application/json", }, - proxies={"http": None}, stream=True, timeout=2000, ) @@ -316,7 +314,6 @@ def generate(): "Content-type": "application/json", "Authorization": f"Bearer {access_token}", }, - proxies={"http": None}, timeout=2000, ) else: @@ -326,7 +323,6 @@ def generate(): headers={ "Content-type": "application/json", }, - proxies={"http": None}, timeout=2000, ) res_json = res.json() diff --git a/app-frontend/Dockerfile b/app-frontend/Dockerfile index 4c4d727..f1f41c6 100644 --- a/app-frontend/Dockerfile +++ b/app-frontend/Dockerfile @@ -2,13 +2,23 @@ # SPDX-License-Identifier: Apache-2.0 # Use node 20.11.1 as the base image -FROM node:20.11.1 as vite-app +FROM node:20.11.1 AS vite-app + +# Accept proxy build arguments +ARG http_proxy +ARG https_proxy +ARG no_proxy + +# Set proxy environment variables for package managers +ENV http_proxy=${http_proxy} +ENV https_proxy=${https_proxy} +ENV no_proxy=${no_proxy} COPY ./react /usr/app/react WORKDIR /usr/app/react -RUN ["npm", "install"] +RUN ["npm", "install", "--legacy-peer-deps"] RUN ["npm", "run", "build"] diff --git a/app-frontend/react/src/components/SideBar/SideBar.tsx b/app-frontend/react/src/components/SideBar/SideBar.tsx index ee356c9..26873e8 100644 --- a/app-frontend/react/src/components/SideBar/SideBar.tsx +++ b/app-frontend/react/src/components/SideBar/SideBar.tsx @@ -79,7 +79,6 @@ export const LinkedMenuItem: React.FC = ({ to={toWithQuery(to)} onClick={onClick} tabIndex={open ? 0 : -1} - aria-hidden={!open} > {children} diff --git a/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml b/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml index 55207b0..02e1d4a 100755 --- a/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml +++ b/setup-scripts/build-image-to-registry/buildpush-genaicomps-images.yml @@ -15,13 +15,21 @@ dest: /tmp/GenAIComps clone: yes update: no + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" when: not genaicomp_dir.stat.exists - - name: Pull latest changes in GenAIComps repo + - name: Checkout specific GenAIComps tag git: repo: https://github.com/opea-project/GenAIComps.git dest: /tmp/GenAIComps - update: yes + version: "{{ genaicomps_tag }}" + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" when: genaicomp_dir.stat.exists - name: Build and push GenAIComps images @@ -38,11 +46,25 @@ - { name: 'asr', dockerfile: 'comps/asr/src/Dockerfile' } block: - name: Build image - command: docker build -t {{ container_registry }}/{{ item.name }}:{{ container_tag }} -f {{ item.dockerfile }} . + command: > + docker build + --build-arg http_proxy="{{ http_proxy }}" + --build-arg https_proxy="{{ http_proxy }}" + --build-arg no_proxy="{{ no_proxy }}" + -t {{ container_registry }}/{{ item.name }}:{{ container_tag }} + -f {{ item.dockerfile }} . args: chdir: /tmp/GenAIComps + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" loop: "{{ genaicomp_images }}" - name: Push image command: docker push {{ container_registry }}/{{ item.name }}:{{ container_tag }} + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" loop: "{{ genaicomp_images }}" diff --git a/setup-scripts/build-image-to-registry/buildpush-genaistudio-images.yml b/setup-scripts/build-image-to-registry/buildpush-genaistudio-images.yml index e4f916f..20839a2 100755 --- a/setup-scripts/build-image-to-registry/buildpush-genaistudio-images.yml +++ b/setup-scripts/build-image-to-registry/buildpush-genaistudio-images.yml @@ -5,9 +5,18 @@ - vars.yml tasks: - name: Build Docker image - command: docker build -t "{{ container_registry }}/{{ item.image_name }}:{{ container_tag }}" . + command: > + docker build + --build-arg http_proxy="{{ http_proxy }}" + --build-arg https_proxy="{{ http_proxy }}" + --build-arg no_proxy="{{ no_proxy }}" + -t "{{ container_registry }}/{{ item.image_name }}:{{ container_tag }}" . args: chdir: "{{ item.directory }}" + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" loop: - { directory: '../../studio-frontend/', image_name: 'studio-frontend' } - { directory: '../../studio-backend/', image_name: 'studio-backend' } @@ -17,6 +26,10 @@ - name: Push Docker image command: docker push "{{ container_registry }}/{{ item.image_name }}:{{ container_tag }}" + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" loop: - { image_name: 'studio-frontend' } - { image_name: 'studio-backend' } diff --git a/setup-scripts/build-image-to-registry/vars.yml b/setup-scripts/build-image-to-registry/vars.yml index 2c97a25..7646ae8 100644 --- a/setup-scripts/build-image-to-registry/vars.yml +++ b/setup-scripts/build-image-to-registry/vars.yml @@ -1,2 +1,7 @@ -container_registry: 'opea' -container_tag: 'latest' \ No newline at end of file +# Container registry configuration +# Replace {{ ansible_default_ipv4.address }} with your Kubernetes master/API endpoint IP if needed +container_registry: '{{ ansible_default_ipv4.address }}:5000/opea' +container_tag: 'latest' +genaicomps_tag: 'main' +http_proxy: '' +no_proxy: 'localhost,127.0.0.1,.local,.svc.cluster.local,{{ ansible_default_ipv4.address }}' \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/cleanup-genai-studio.sh b/setup-scripts/setup-genai-studio/cleanup-genai-studio.sh new file mode 100755 index 0000000..dc4fcb4 --- /dev/null +++ b/setup-scripts/setup-genai-studio/cleanup-genai-studio.sh @@ -0,0 +1,218 @@ +#!/bin/bash + +# GenAI Studio Complete Cleanup Script +# This script ensures all namespaces and resources are properly deleted + +set -e + +echo "========================================" +echo "GenAI Studio Complete Cleanup Script" +echo "========================================" +echo + +# Function to check if namespace exists +namespace_exists() { + kubectl get namespace "$1" &>/dev/null +} + +# Function to delete namespace with comprehensive cleanup +cleanup_namespace() { + local ns=$1 + local manifest_file=$2 + + if ! namespace_exists "$ns"; then + echo "✅ Namespace '$ns' does not exist, skipping..." + return 0 + fi + + echo "🧹 Cleaning up namespace: $ns" + + # Delete resources using manifest if provided + echo " - Deleting resources using manifest: $manifest_file" + if [ ! -z "$manifest_file" ] && [ -f "$manifest_file" ]; then + kubectl delete -f "$manifest_file" --timeout=120s --ignore-not-found=true || true + elif [ ! -z "$manifest_file" ]; then + echo " - Namespace $ns has no manifest" + fi + + # Delete Helm releases in the namespace + echo " - Checking for Helm releases in $ns..." + helm list -n "$ns" -q 2>/dev/null | xargs -r -I {} helm delete {} -n "$ns" --timeout=120s || true + + # Wait for pods to terminate gracefully + echo " - Waiting for pods to terminate gracefully..." + if kubectl get pods -n "$ns" --no-headers 2>/dev/null | grep -q .; then + kubectl wait --for=delete pod --all --namespace="$ns" --timeout=180s || true + else + echo " No pods found in namespace $ns" + fi + + # Force delete any remaining pods + echo " - Force deleting any remaining pods..." + REMAINING_PODS=$(kubectl get pods -n "$ns" --no-headers 2>/dev/null | awk '{print $1}' || true) + if [ ! -z "$REMAINING_PODS" ]; then + echo " Found remaining pods: $REMAINING_PODS" + echo "$REMAINING_PODS" | xargs -r kubectl delete pod -n "$ns" --force --grace-period=0 || true + else + echo " No remaining pods to force delete" + fi + + # Delete PVCs + echo " - Deleting PersistentVolumeClaims..." + PVCS=$(kubectl get pvc -n "$ns" --no-headers 2>/dev/null | awk '{print $1}' || true) + if [ ! -z "$PVCS" ]; then + echo " Found PVCs: $PVCS" + echo "$PVCS" | xargs -r kubectl delete pvc -n "$ns" --timeout=60s || true + else + echo " No PVCs found in namespace $ns" + fi + + # Delete secrets (except default service account token) + echo " - Deleting secrets..." + SECRETS=$(kubectl get secrets -n "$ns" --no-headers 2>/dev/null | grep -v "default-token" | awk '{print $1}' || true) + if [ ! -z "$SECRETS" ]; then + echo " Found secrets: $SECRETS" + echo "$SECRETS" | xargs -r kubectl delete secret -n "$ns" || true + else + echo " No custom secrets found in namespace $ns" + fi + + # Delete configmaps + echo " - Deleting configmaps..." + CONFIGMAPS=$(kubectl get configmaps -n "$ns" --no-headers 2>/dev/null | grep -v "kube-root-ca.crt" | awk '{print $1}' || true) + if [ ! -z "$CONFIGMAPS" ]; then + echo " Found configmaps: $CONFIGMAPS" + echo "$CONFIGMAPS" | xargs -r kubectl delete configmap -n "$ns" || true + else + echo " No custom configmaps found in namespace $ns" + fi + + # Finally delete the namespace + echo " - Deleting namespace..." + kubectl delete namespace "$ns" --timeout=120s || true + + # If namespace still exists, patch it to remove finalizers + if namespace_exists "$ns"; then + echo " - Namespace still exists, removing finalizers..." + kubectl patch namespace "$ns" -p '{"metadata":{"finalizers":[]}}' --type=merge || true + kubectl delete namespace "$ns" --force --grace-period=0 || true + fi + + # Final check + if namespace_exists "$ns"; then + echo " ❌ WARNING: Namespace '$ns' still exists after cleanup" + return 1 + else + echo " ✅ SUCCESS: Namespace '$ns' has been deleted" + return 0 + fi +} + +# Main cleanup process +echo "Starting comprehensive cleanup..." +echo + +# Change to the setup directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +# Clean up sandbox namespaces first +echo "🔍 Looking for sandbox namespaces..." +SANDBOX_NAMESPACES=$(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | grep '^sandbox-' || true) + +if [ ! -z "$SANDBOX_NAMESPACES" ]; then + echo "Found sandbox namespaces: $SANDBOX_NAMESPACES" + for ns in $SANDBOX_NAMESPACES; do + cleanup_namespace "$ns" + done +else + echo "✅ No sandbox namespaces found" +fi + +echo + +# Clean up main namespaces +MAIN_NAMESPACES=( + "studio:manifests/studio-manifest.yaml" + "monitoring:manifests/monitoring-manifest.yaml" + "tracing:" + "mysql:" +) + +for ns_info in "${MAIN_NAMESPACES[@]}"; do + IFS=':' read -r ns manifest <<< "$ns_info" + cleanup_namespace "$ns" "$manifest" + echo +done + +# # Clean up any remaining Helm releases globally +# echo "🧹 Cleaning up any remaining Helm releases..." +# helm list --all-namespaces --filter="mysql|kube-prometheus-stack|clickhouse|pascaliske" -q 2>/dev/null | \ +# while read -r release; do +# if [ ! -z "$release" ]; then +# echo " - Deleting Helm release: $release" +# helm delete "$release" --timeout=60s || true +# fi +# done + +# Check local-path-storage namespace and explain why it's preserved +echo "🔒 Checking local-path-storage namespace..." +if namespace_exists "local-path-storage"; then + echo " ✅ INTENTIONALLY PRESERVED: local-path-storage namespace exists" + echo " 📝 This namespace provides storage provisioning and is NOT cleaned up because:" + echo " - It may be used by other applications beyond GenAI Studio" + echo " - Deleting it would break any existing PVCs using local-path storage" + echo " - The local-path StorageClass would become non-functional" + echo " - It's a cluster-wide infrastructure component" + echo "" + echo " 💡 To manually remove local-path-storage later (if you're sure it's safe):" + echo " kubectl delete namespace local-path-storage" + echo " kubectl delete storageclass local-path" +else + echo " ℹ️ local-path-storage namespace does not exist" +fi + + +echo +echo "========================================" +echo "Cleanup Summary" +echo "========================================" + +# Final verification +FAILED_CLEANUP=() +NAMESPACES_TO_CHECK="studio monitoring tracing mysql" + +for ns in $NAMESPACES_TO_CHECK; do + if namespace_exists "$ns"; then + echo "❌ FAILED: Namespace '$ns' still exists" + FAILED_CLEANUP+=("$ns") + else + echo "✅ SUCCESS: Namespace '$ns' deleted" + fi +done + +# Special handling for local-path-storage (intentionally preserved) +if namespace_exists "local-path-storage"; then + echo "🔒 PRESERVED: Namespace 'local-path-storage' intentionally kept" +else + echo "ℹ️ INFO: Namespace 'local-path-storage' was not present" +fi + +# Check for remaining sandbox namespaces +REMAINING_SANDBOX=$(kubectl get namespaces -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n' | grep '^sandbox-' || true) +if [ ! -z "$REMAINING_SANDBOX" ]; then + echo "❌ FAILED: Remaining sandbox namespaces: $REMAINING_SANDBOX" + FAILED_CLEANUP+=("sandbox namespaces") +else + echo "✅ SUCCESS: All sandbox namespaces deleted" +fi + +echo +if [ ${#FAILED_CLEANUP[@]} -eq 0 ]; then + echo "🎉 All namespaces have been successfully cleaned up!" + exit 0 +else + echo "⚠️ Some namespaces failed to cleanup: ${FAILED_CLEANUP[*]}" + echo "You may need to manually investigate and clean up these namespaces." + exit 1 +fi diff --git a/setup-scripts/setup-genai-studio/genai-studio.yml b/setup-scripts/setup-genai-studio/genai-studio.yml index 10fdc80..0c86d5b 100644 --- a/setup-scripts/setup-genai-studio/genai-studio.yml +++ b/setup-scripts/setup-genai-studio/genai-studio.yml @@ -1,3 +1,9 @@ +- name: Install prerequisites (Helm, etc.) + import_playbook: playbooks/install-prerequisites.yml + +- name: Setup local path storage provisioner + import_playbook: playbooks/setup-local-storageclass.yml + - name: Create ssh secrets import_playbook: playbooks/create-ssh-secrets.yml diff --git a/setup-scripts/setup-genai-studio/helm-values/mysqldb.yaml b/setup-scripts/setup-genai-studio/helm-values/mysqldb.yaml index 1a0e46b..9abe0c2 100644 --- a/setup-scripts/setup-genai-studio/helm-values/mysqldb.yaml +++ b/setup-scripts/setup-genai-studio/helm-values/mysqldb.yaml @@ -2,6 +2,8 @@ auth: rootPassword: root image: + registry: docker.io + repository: bitnamilegacy/mysql tag: "8.0" primary: @@ -14,6 +16,17 @@ primary: - ReadWriteOnce size: 8Gi +volumePermissions: + enabled: true + image: + registry: docker.io + repository: bitnamilegacy/os-shell + tag: "latest" + +global: + security: + allowInsecureImages: true + initdbScripts: 01-create-users-and-dbs.sql: | -- Create 'studio' user for '%' host diff --git a/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml b/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml index 0e2c9d4..a962e82 100644 --- a/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml +++ b/setup-scripts/setup-genai-studio/helm-values/otel-collector.yaml @@ -2,7 +2,7 @@ mode: deployment image: repository: "otel/opentelemetry-collector-contrib" - tag: "latest" + tag: "0.139.0" config: receivers: @@ -22,6 +22,8 @@ config: clickhouse: endpoint: tcp://clickhouse.tracing.svc.cluster.local:9000 database: otel + username: default + password: "" ttl: 72h traces_table_name: otel_traces logs_table_name: otel_logs diff --git a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml index 721f860..4b5440e 100644 --- a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml +++ b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml @@ -150,6 +150,25 @@ data: proxy_set_header Connection "upgrade"; } + # WebSocket connection for fine-tuning job monitoring and downloads + # Routes directly to studio-frontend's socket.io handlers + location /socket.io { + proxy_pass http://${STUDIO_FRONTEND_DNS}; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Ensure WebSocket upgrade headers + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + + # Disable buffering for WebSocket + proxy_buffering off; + proxy_cache off; + } + location /studio-backend { proxy_pass http://${STUDIO_BACKEND_DNS}; proxy_set_header Host $host; @@ -347,21 +366,21 @@ spec: spec: containers: - name: studio-backend - image: ${REGISTRY}/studio-backend:${TAG} + image: "${REGISTRY}/studio-backend:${TAG}" imagePullPolicy: Always env: - name: APP_FRONTEND_IMAGE - value: ${REGISTRY}/app-frontend:${TAG} + value: "${REGISTRY}/app-frontend:${TAG}" - name: APP_BACKEND_IMAGE - value: ${REGISTRY}/app-backend:${TAG} + value: "${REGISTRY}/app-backend:${TAG}" - name: REGISTRY - value: ${REGISTRY} + value: "opea" - name: TAG - value: ${TAG} + value: "1.4" - name: SBX_HTTP_PROXY value: ${HTTP_PROXY} - name: SBX_NO_PROXY - value: ${NO_PROXY} + value: ${NO_PROXY} envFrom: - configMapRef: name: studio-config @@ -397,6 +416,19 @@ spec: serviceAccountName: studio-backend-sa --- apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: shared-finetuning-output + namespace: studio +spec: + accessModes: + - ReadWriteOnce + storageClassName: local-path + resources: + requests: + storage: 1Gi +--- +apiVersion: v1 kind: Service metadata: name: studio-frontend @@ -432,8 +464,11 @@ spec: containers: - name: studio-frontend securityContext: {} - image: ${REGISTRY}/studio-frontend:${TAG} + image: "${REGISTRY}/studio-frontend:${TAG}" imagePullPolicy: Always + envFrom: + - configMapRef: + name: studio-config env: - name: DATABASE_TYPE value: mysql @@ -449,6 +484,12 @@ spec: value: studio - name: DATABASE_SSL value: "false" + - name: HTTP_PROXY + value: "${HTTP_PROXY}" + - name: HTTPS_PROXY + value: "${HTTP_PROXY}" + - name: NO_PROXY + value: "${NO_PROXY}" ports: - name: studio-frontend containerPort: 8080 @@ -460,6 +501,8 @@ spec: - name: ssh-key-volume mountPath: /root/.ssh readOnly: true + - name: shared-output-volume + mountPath: /tmp/finetuning/output volumes: - name: tmp emptyDir: {} @@ -470,6 +513,9 @@ spec: - key: studio-id_rsa.pub path: id_rsa.pub mode: 0644 + - name: shared-output-volume + persistentVolumeClaim: + claimName: shared-finetuning-output --- apiVersion: apps/v1 kind: Deployment @@ -502,6 +548,13 @@ spec: DOWNLOAD_URL="https://codeload.github.com/${OWNER}/${REPO}/tar.gz/${BRANCH}" curl "${DOWNLOAD_URL}" | tar -xz --strip-components=4 -C /opt/keycloak/themes "${REPO}-${BRANCH}/${KC_ASSETS_DIR}/themes" curl "${DOWNLOAD_URL}" | tar -xz --strip-components=4 -C /opt/keycloak/data "${REPO}-${BRANCH}/${KC_ASSETS_DIR}/data" + env: + - name: http_proxy + value: ${HTTP_PROXY} + - name: https_proxy + value: ${HTTP_PROXY} + - name: NO_PROXY + value: ${NO_PROXY} envFrom: - configMapRef: name: studio-config @@ -515,7 +568,7 @@ spec: runAsGroup: 0 containers: - name: keycloak - image: quay.io/keycloak/keycloak:latest + image: quay.io/keycloak/keycloak:26.2.5 volumeMounts: - name: app-tls mountPath: /etc/ssl @@ -598,4 +651,65 @@ spec: port: 8443 targetPort: 8443 selector: - app: keycloak \ No newline at end of file + app: keycloak + +--- +apiVersion: v1 +kind: Service +metadata: + name: finetuning-server + namespace: studio +spec: + type: ClusterIP + ports: + - name: api + protocol: TCP + port: 8015 + targetPort: 8015 + - name: dashboard + protocol: TCP + port: 8265 + targetPort: 8265 + selector: + app: finetuning-server + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: finetuning-server + namespace: studio + labels: + app: finetuning-server +spec: + replicas: 1 + selector: + matchLabels: + app: finetuning-server + template: + metadata: + labels: + app: finetuning-server + spec: + containers: + - name: finetuning-server + image: opea/finetuning:latest + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8015 + - containerPort: 8265 + env: + - name: http_proxy + value: ${HTTP_PROXY} + - name: https_proxy + value: ${HTTP_PROXY} + - name: NO_PROXY + value: ${NO_PROXY} + resources: {} + volumeMounts: + - name: shared-output-volume + mountPath: /home/user/comps/finetuning/src/output + volumes: + - name: shared-output-volume + persistentVolumeClaim: + claimName: shared-finetuning-output \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/playbooks/create-ssh-secrets.yml b/setup-scripts/setup-genai-studio/playbooks/create-ssh-secrets.yml index a4532e5..7841188 100644 --- a/setup-scripts/setup-genai-studio/playbooks/create-ssh-secrets.yml +++ b/setup-scripts/setup-genai-studio/playbooks/create-ssh-secrets.yml @@ -1,5 +1,7 @@ - name: Create ssh keys in k8 secrets using shell and kubectl commands hosts: localhost + vars_files: + - ../vars.yml tasks: @@ -23,9 +25,17 @@ command: kubectl wait --for=condition=Ready pod/ubuntu-ssh-keygen -n studio --timeout=60s when: "'NotFound' in kubectl_secret_check.stderr" + - name: Install openssh-client in pod + shell: | + kubectl exec -n studio ubuntu-ssh-keygen -- bash -c " + export http_proxy='{{ http_proxy }}' + export https_proxy='{{ http_proxy }}' + export no_proxy='{{ no_proxy }}' + apt-get update && apt-get install -y openssh-client" + when: "'NotFound' in kubectl_secret_check.stderr" + - name: Generate SSH key inside pod shell: | - kubectl exec -n studio ubuntu-ssh-keygen -- bash -c "apt-get update && apt-get install -y openssh-client" kubectl exec -n studio ubuntu-ssh-keygen -- bash -c "ssh-keygen -t rsa -b 2048 -f /tmp/id_rsa -N '' -C ''" when: "'NotFound' in kubectl_secret_check.stderr" diff --git a/setup-scripts/setup-genai-studio/playbooks/deploy-monitoring.yml b/setup-scripts/setup-genai-studio/playbooks/deploy-monitoring.yml index f64b1cd..5bfe0f2 100644 --- a/setup-scripts/setup-genai-studio/playbooks/deploy-monitoring.yml +++ b/setup-scripts/setup-genai-studio/playbooks/deploy-monitoring.yml @@ -1,38 +1,39 @@ - name: Deploy prometheus and grafana with local-path-storage hosts: localhost + vars_files: + - ../vars.yml tasks: - - name: Check if local-path-storage namespace exists - shell: kubectl get namespace local-path-storage --ignore-not-found - register: namespace_check - ignore_errors: yes + - name: Check if monitoring namespace exists + shell: kubectl get namespace monitoring --ignore-not-found + register: monitoring_namespace_check changed_when: false - - name: Install local-path-provisioner if namespace does not exist - shell: kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.30/deploy/local-path-storage.yaml - when: namespace_check.stdout == "" - register: apply_output - - - name: Wait for local-path-provisioner to be ready - shell: kubectl wait --for=condition=Ready pod -l app=local-path-provisioner -n local-path-storage --timeout=120s - when: namespace_check.stdout == "" - - name: Create monitoring namespace command: kubectl create namespace monitoring - ignore_errors: yes - - - name: Install Helm - shell: curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + when: monitoring_namespace_check.stdout == "" - name: Add Prometheus Helm repository command: helm repo add prometheus-community https://prometheus-community.github.io/helm-charts + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Update Helm repositories command: helm repo update + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" # Installing the CRDs needed - name: Helm install kube-prometheus-stack command: helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack -n monitoring ignore_errors: yes + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Delete kube-prometheus-stack command: helm delete kube-prometheus-stack -n monitoring diff --git a/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml b/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml index 591d826..89cb0fe 100644 --- a/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml +++ b/setup-scripts/setup-genai-studio/playbooks/deploy-mysqldb.yml @@ -1,42 +1,23 @@ - name: Deploy mysql database hosts: localhost + vars_files: + - ../vars.yml tasks: - - name: Check if local-path-storage namespace exists - shell: kubectl get namespace local-path-storage --ignore-not-found - register: namespace_check - ignore_errors: yes - changed_when: false - - - name: Install local-path-provisioner if namespace does not exist - shell: kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.30/deploy/local-path-storage.yaml - when: namespace_check.stdout == "" - register: apply_output - - - name: Wait for local-path-provisioner to be ready - shell: kubectl wait --for=condition=Ready pod -l app=local-path-provisioner -n local-path-storage --timeout=120s - when: namespace_check.stdout == "" - - name: Check if mysql namespace exists shell: kubectl get namespace mysql --ignore-not-found register: namespace_check - ignore_errors: yes changed_when: false - name: End playbook if mysql namespace exists meta: end_play when: namespace_check.stdout != "" - - name: Add bitnami Helm repository - command: helm repo add bitnami https://charts.bitnami.com/bitnami - - - name: Update Helm repositories - command: helm repo update - - - name: Create 'mysql' namespace - command: kubectl create ns mysql - - name: Install MySQL using Helm - command: helm install mysql bitnami/mysql -n mysql -f ../helm-values/mysqldb.yaml + command: helm install mysql oci://registry-1.docker.io/bitnamicharts/mysql -n mysql --create-namespace -f ../helm-values/mysqldb.yaml + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Wait for mysql-0 pod to be ready command: kubectl wait --for=condition=ready pod -l app.kubernetes.io/instance=mysql -n mysql --timeout=300s \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/playbooks/deploy-studio.yml b/setup-scripts/setup-genai-studio/playbooks/deploy-studio.yml index 1d2ecef..f28c231 100644 --- a/setup-scripts/setup-genai-studio/playbooks/deploy-studio.yml +++ b/setup-scripts/setup-genai-studio/playbooks/deploy-studio.yml @@ -3,29 +3,14 @@ vars_files: - ../vars.yml tasks: - - name: Check if local-path-storage namespace exists - shell: kubectl get namespace local-path-storage --ignore-not-found - register: namespace_check - ignore_errors: yes - changed_when: false - - - name: Install local-path-provisioner if namespace does not exist - shell: kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.30/deploy/local-path-storage.yaml - when: namespace_check.stdout == "" - register: apply_output - - - name: Wait for local-path-provisioner to be ready - shell: kubectl wait --for=condition=Ready pod -l app=local-path-provisioner -n local-path-storage --timeout=120s - when: namespace_check.stdout == "" - - name: Check if studio namespace exists - command: kubectl get namespace studio - register: studio_namespace - ignore_errors: yes + shell: kubectl get namespace studio --ignore-not-found + register: studio_namespace_check + changed_when: false - name: Create studio namespace command: kubectl create namespace studio - when: studio_namespace.rc != 0 + when: studio_namespace_check.stdout == "" - name: Check for coredns service shell: kubectl get svc coredns -n kube-system --ignore-not-found @@ -38,16 +23,16 @@ when: coredns_check.stdout != '' - name: Check if app-tls exists in studio namespace - command: kubectl get secret app-tls -n studio + shell: kubectl get secret app-tls -n studio --ignore-not-found register: app_tls_secret_check - ignore_errors: yes + changed_when: false - name: Generate TLS certificate and create app-tls shell: | openssl req -x509 -nodes -days 365 -newkey rsa:4096 -keyout app-tls.key -out app-tls.crt -subj "/CN=studio/O=studio" kubectl create secret generic app-tls --from-file=app-tls.crt --from-file=app-tls.key -n studio rm app-tls.key app-tls.crt - when: app_tls_secret_check.rc != 0 + when: app_tls_secret_check.stdout == "" - name: Apply studio configuration command: kubectl apply -f ../studio-config.yaml diff --git a/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml b/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml index 75bed58..07ebcd1 100644 --- a/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml +++ b/setup-scripts/setup-genai-studio/playbooks/deploy-tracing.yml @@ -1,26 +1,12 @@ --- - name: Deploy clickhouse and otel collector for tracing hosts: localhost + vars_files: + - ../vars.yml tasks: - - name: Check if local-path-storage namespace exists - shell: kubectl get namespace local-path-storage --ignore-not-found - register: namespace_check - ignore_errors: yes - changed_when: false - - - name: Install local-path-provisioner if namespace does not exist - shell: kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.30/deploy/local-path-storage.yaml - when: namespace_check.stdout == "" - register: apply_output - - - name: Wait for local-path-provisioner to be ready - shell: kubectl wait --for=condition=Ready pod -l app=local-path-provisioner -n local-path-storage --timeout=120s - when: namespace_check.stdout == "" - - name: Check if tracing namespace exists shell: kubectl get namespace tracing --ignore-not-found register: namespace_check - ignore_errors: yes changed_when: false - name: End playbook if tracing namespace exists @@ -29,15 +15,27 @@ - name: Add Pascaliske Helm repository command: helm repo add pascaliske https://charts.pascaliske.dev + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Update Helm repositories command: helm repo update + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Create 'tracing' namespace command: kubectl create ns tracing - name: Install Clickhouse Helm chart in 'tracing' namespace - command: helm install clickhouse pascaliske/clickhouse -n tracing --set persistentVolumeClaim.storageClassName=local-path + command: helm install clickhouse pascaliske/clickhouse --version 0.3.1 -n tracing --set persistentVolumeClaim.storageClassName=local-path + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Wait for Clickhouse pod to be ready command: kubectl wait --namespace tracing --for=condition=ready pod -l app.kubernetes.io/name=clickhouse --timeout=120s @@ -47,12 +45,24 @@ - name: Add OpenTelemetry Helm repository command: helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Update Helm repositories command: helm repo update + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Install OpenTelemetry Collector Helm chart in 'tracing' namespace - command: helm install tracing open-telemetry/opentelemetry-collector -n tracing -f ../helm-values/otel-collector.yaml + command: helm install tracing open-telemetry/opentelemetry-collector --version 0.139.1 -n tracing -f ../helm-values/otel-collector.yaml + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" - name: Wait for OpenTelemetry Collector pod to be ready command: kubectl wait --namespace tracing --for=condition=ready pod -l app.kubernetes.io/name=opentelemetry-collector --timeout=120s \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/playbooks/install-prerequisites.yml b/setup-scripts/setup-genai-studio/playbooks/install-prerequisites.yml new file mode 100644 index 0000000..67ad486 --- /dev/null +++ b/setup-scripts/setup-genai-studio/playbooks/install-prerequisites.yml @@ -0,0 +1,29 @@ +--- +- name: Install prerequisites for GenAI Studio + hosts: localhost + vars_files: + - ../vars.yml + tasks: + - name: Check if Helm is installed + command: helm version --short + register: helm_check + failed_when: false + changed_when: false + + - name: Install Helm + shell: curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" + when: helm_check.rc != 0 + + - name: Verify Helm installation + command: helm version --short + changed_when: false + + - name: Check if kubectl is available + command: kubectl version --client + register: kubectl_check + failed_when: kubectl_check.rc != 0 + changed_when: false diff --git a/setup-scripts/setup-genai-studio/playbooks/setup-local-storageclass.yml b/setup-scripts/setup-genai-studio/playbooks/setup-local-storageclass.yml new file mode 100644 index 0000000..af504c1 --- /dev/null +++ b/setup-scripts/setup-genai-studio/playbooks/setup-local-storageclass.yml @@ -0,0 +1,24 @@ +--- +- name: Setup local path storage provisioner + hosts: localhost + vars_files: + - ../vars.yml + tasks: + - name: Check if local-path-storage namespace exists + shell: kubectl get namespace local-path-storage --ignore-not-found + register: namespace_check + ignore_errors: yes + changed_when: false + + - name: Install local-path-provisioner if namespace does not exist + shell: kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/v0.0.30/deploy/local-path-storage.yaml + environment: + http_proxy: "{{ http_proxy }}" + https_proxy: "{{ http_proxy }}" + no_proxy: "{{ no_proxy }}" + when: namespace_check.stdout == "" + register: apply_output + + - name: Wait for local-path-provisioner to be ready + shell: kubectl wait --for=condition=Ready pod -l app=local-path-provisioner -n local-path-storage --timeout=120s + when: namespace_check.stdout == "" diff --git a/setup-scripts/setup-genai-studio/readme.md b/setup-scripts/setup-genai-studio/readme.md index 58815c6..328e1e8 100644 --- a/setup-scripts/setup-genai-studio/readme.md +++ b/setup-scripts/setup-genai-studio/readme.md @@ -21,7 +21,6 @@ The genai-studio playbook script will: Run below commands: ```sh -curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash sudo apt install ansible -y ansible-playbook genai-studio.yml ``` @@ -32,3 +31,32 @@ Run below commands to do a /health test: ```sh curl http://localhost:30007/studio-backend/health ``` + +## Cleanup + +To completely remove GenAI Studio and all its components: + +```sh +./cleanup-genai-studio.sh +``` + +This script will: +- Delete all GenAI Studio namespaces (studio, monitoring, tracing, mysql) +- Remove all sandbox namespaces +- Clean up Helm releases +- Remove PVCs, secrets, and configmaps +- Provide detailed feedback on the cleanup process + +### Important Notes + +**Local Path Storage Preservation:** +The cleanup script intentionally **does NOT** remove the `local-path-storage` namespace because: +- It may be used by other applications beyond GenAI Studio +- Deleting it would break existing PVCs that use the `local-path` StorageClass +- It's a cluster-wide infrastructure component that should be managed separately + +If you need to remove local-path-storage after ensuring it's safe to do so: +```sh +kubectl delete namespace local-path-storage +kubectl delete storageclass local-path +``` \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/studio-config.yaml b/setup-scripts/setup-genai-studio/studio-config.yaml index 8574900..e876692 100644 --- a/setup-scripts/setup-genai-studio/studio-config.yaml +++ b/setup-scripts/setup-genai-studio/studio-config.yaml @@ -13,5 +13,6 @@ data: APP_FRONTEND_DNS: "app-frontend.$namespace.svc.cluster.local:5275" APP_BACKEND_DNS: "app-backend.$namespace.svc.cluster.local:8899" APP_CHATHISTORY_DNS: "chathistory-mongo.$namespace.svc.cluster.local:6012" - PREPARE_DOC_REDIS_PREP_DNS: "prepare-doc-redis-prep-0.$namespace.svc.cluster.local:6007" - STUDIO_BACKEND_DNS: "studio-backend.studio.svc.cluster.local:5000" \ No newline at end of file + PREPARE_DOC_REDIS_PREP_DNS: "opea-prepare-doc-redis-prep-0.$namespace.svc.cluster.local:6007" + STUDIO_BACKEND_DNS: "studio-backend.studio.svc.cluster.local:5000" + FINETUNING_HOST: "finetuning-server.studio.svc.cluster.local" \ No newline at end of file diff --git a/setup-scripts/setup-genai-studio/vars.yml b/setup-scripts/setup-genai-studio/vars.yml index d53d819..b277acf 100644 --- a/setup-scripts/setup-genai-studio/vars.yml +++ b/setup-scripts/setup-genai-studio/vars.yml @@ -1,5 +1,7 @@ -container_registry: 'opea' +# Container registry configuration +# Replace {{ ansible_default_ipv4.address }} with your Kubernetes master/API endpoint IP if needed +container_registry: '{{ ansible_default_ipv4.address }}:5000/opea' container_tag: 'latest' +mysql_host: 'mysql.mysql.svc.cluster.local' http_proxy: '' -no_proxy: '' -mysql_host: 'mysql.mysql.svc.cluster.local' \ No newline at end of file +no_proxy: 'localhost,127.0.0.1,.local,.svc.cluster.local,{{ ansible_default_ipv4.address }}' \ No newline at end of file diff --git a/studio-backend/Dockerfile b/studio-backend/Dockerfile index 00b557d..71848f0 100644 --- a/studio-backend/Dockerfile +++ b/studio-backend/Dockerfile @@ -1,6 +1,16 @@ # Use an official Python runtime as a parent image FROM python:3.11-slim +# Accept proxy build arguments +ARG http_proxy +ARG https_proxy +ARG no_proxy + +# Set proxy environment variables for package managers +ENV http_proxy=${http_proxy} +ENV https_proxy=${https_proxy} +ENV no_proxy=${no_proxy} + # Set the working directory in the container WORKDIR /usr/src/ diff --git a/studio-backend/app/routers/debuglog_router.py b/studio-backend/app/routers/debuglog_router.py index 0d62100..760a072 100644 --- a/studio-backend/app/routers/debuglog_router.py +++ b/studio-backend/app/routers/debuglog_router.py @@ -85,6 +85,34 @@ def find_pod_dependencies(pod, all_pods, services, namespace, core_v1_api): # Combine all environment variables for further analysis all_env_vars = env_vars + init_env_vars + configmap_env_vars + # Special handling for app-backend pods - filter out dependent services + is_app_backend = pod.metadata.name and 'app-backend' in pod.metadata.name + if is_app_backend: + # For app-backend, we want to exclude references from dependent_services + # but keep direct OPEA service references + filtered_env_vars = [] + for env_val in all_env_vars: + # Skip if this looks like workflow-info.json content with dependent_services + if isinstance(env_val, str) and '"dependent_services"' in env_val: + # Parse the JSON to extract only direct service references, not dependent ones + try: + import json + workflow_data = json.loads(env_val) + if 'nodes' in workflow_data: + # Only include OPEA service names, not their dependencies + opea_services = [] + for node_id, node_data in workflow_data['nodes'].items(): + if node_data.get('name', '').startswith('opea_service@'): + opea_services.append(node_data['name']) + # Add these as simple strings for pattern matching + filtered_env_vars.extend(opea_services) + except: + # If JSON parsing fails, skip this env var + pass + else: + filtered_env_vars.append(env_val) + all_env_vars = filtered_env_vars + # # Debug output # print(f"Analyzing dependencies for pod: {pod.metadata.name}") # print(f"ConfigMap refs: {configmap_refs}") diff --git a/studio-backend/app/services/exporter_service.py b/studio-backend/app/services/exporter_service.py index 31d6911..0dd0d49 100644 --- a/studio-backend/app/services/exporter_service.py +++ b/studio-backend/app/services/exporter_service.py @@ -24,7 +24,21 @@ def convert_proj_info_to_manifest(proj_info_json, output_file=None): with open(service_file_path, "r") as service_file: service_manifest_read = service_file.read() service_manifest_raw = list(ordered_load_all(replace_dynamic_manifest_placeholder(service_manifest_read, service_info, proj_info_json), yaml.SafeLoader)) - service_manifest = [replace_manifest_placeholders(doc, service_info) for doc in service_manifest_raw] + # For app-backend, include all service endpoints in variables so it can connect to all services + if service_info.get('service_type') == 'app': + # Add only OPEA service endpoints to app-backend's variables + opea_service_endpoints = {} + for svc_name, svc_info in opea_services["services"].items(): + if 'endpoint' in svc_info and svc_info['endpoint'].startswith('opea-'): + # Clean the service name for use as variable key (remove @ symbols) + clean_svc_name = svc_name.replace('@', '_').replace('opea_service_', '') + opea_service_endpoints[f"{clean_svc_name}_endpoint"] = svc_info['endpoint'] + + # Merge with existing service_info + enhanced_service_info = {**service_info, **opea_service_endpoints} + service_manifest = [replace_manifest_placeholders(doc, enhanced_service_info) for doc in service_manifest_raw] + else: + service_manifest = [replace_manifest_placeholders(doc, service_info) for doc in service_manifest_raw] output_manifest.extend((doc, service_name) for doc in service_manifest) # print("Manifest generation completed.") diff --git a/studio-backend/app/templates/app/app.manifest.yaml b/studio-backend/app/templates/app/app.manifest.yaml index 9b43420..06d48a1 100644 --- a/studio-backend/app/templates/app/app.manifest.yaml +++ b/studio-backend/app/templates/app/app.manifest.yaml @@ -53,6 +53,12 @@ spec: value: 'true' - name: LOGFLAG value: 'True' + - name: http_proxy + value: "${HTTP_PROXY}" + - name: https_proxy + value: "${HTTP_PROXY}" + - name: no_proxy + value: "${NO_PROXY}" __TELEMETRY_ENDPOINT__ securityContext: allowPrivilegeEscalation: false @@ -219,7 +225,7 @@ spec: spec: containers: - name: chathistory-mongo - image: opea/chathistory-mongo:latest + image: opea/chathistory-mongo:1.3 imagePullPolicy: IfNotPresent ports: - containerPort: 6012 diff --git a/studio-backend/app/templates/microsvc-manifests/asr-usvc.yaml b/studio-backend/app/templates/microsvc-manifests/asr-usvc.yaml index cff1ece..868643b 100644 --- a/studio-backend/app/templates/microsvc-manifests/asr-usvc.yaml +++ b/studio-backend/app/templates/microsvc-manifests/asr-usvc.yaml @@ -10,9 +10,9 @@ metadata: data: HEALTHCHECK_ENDPOINT: "{whisper_endpoint}:{whisper_port}" ASR_ENDPOINT: "http://{whisper_endpoint}:{whisper_port}" - http_proxy: "" - https_proxy: "" - no_proxy: "" + http_proxy: "${HTTP_PROXY}" + https_proxy: "${HTTP_PROXY}" + no_proxy: "${NO_PROXY}" LOGFLAG: "True" --- diff --git a/studio-backend/app/templates/microsvc-manifests/data-prep.yaml b/studio-backend/app/templates/microsvc-manifests/data-prep.yaml index 734821a..31337b6 100644 --- a/studio-backend/app/templates/microsvc-manifests/data-prep.yaml +++ b/studio-backend/app/templates/microsvc-manifests/data-prep.yaml @@ -17,9 +17,9 @@ data: SEARCH_BATCH_SIZE: "10" HF_TOKEN: "{huggingFaceToken}" HF_HOME: "/tmp/.cache/huggingface" - http_proxy: "" - https_proxy: "" - no_proxy: "" + http_proxy: "${HTTP_PROXY}" + https_proxy: "${HTTP_PROXY}" + no_proxy: "${NO_PROXY}" LOGFLAG: "True" --- # Source: data-prep/templates/service.yaml diff --git a/studio-backend/app/templates/microsvc-manifests/embedding-usvc.yaml b/studio-backend/app/templates/microsvc-manifests/embedding-usvc.yaml index 997d13c..e6befed 100644 --- a/studio-backend/app/templates/microsvc-manifests/embedding-usvc.yaml +++ b/studio-backend/app/templates/microsvc-manifests/embedding-usvc.yaml @@ -10,9 +10,9 @@ metadata: data: HEALTHCHECK_ENDPOINT: "{tei_endpoint}:{tei_port}" TEI_EMBEDDING_ENDPOINT: "http://{tei_endpoint}:{tei_port}" - http_proxy: "" - https_proxy: "" - no_proxy: "" + http_proxy: "${HTTP_PROXY}" + https_proxy: "${HTTP_PROXY}" + no_proxy: "${NO_PROXY}" TRANSFORMERS_CACHE: "/tmp/transformers_cache" HF_HOME: "/tmp/.cache/huggingface" LOGFLAG: "True" diff --git a/studio-backend/app/templates/microsvc-manifests/reranking-usvc.yaml b/studio-backend/app/templates/microsvc-manifests/reranking-usvc.yaml index 07cec32..4916907 100644 --- a/studio-backend/app/templates/microsvc-manifests/reranking-usvc.yaml +++ b/studio-backend/app/templates/microsvc-manifests/reranking-usvc.yaml @@ -10,9 +10,9 @@ metadata: data: HEALTHCHECK_ENDPOINT: "{tei_endpoint}:{tei_port}" TEI_RERANKING_ENDPOINT: "http://{tei_endpoint}:{tei_port}" - http_proxy: "" - https_proxy: "" - no_proxy: "" + http_proxy: "${HTTP_PROXY}" + https_proxy: "${HTTP_PROXY}" + no_proxy: "${NO_PROXY}" LOGFLAG: "True" --- # Source: reranking-usvc/templates/service.yaml diff --git a/studio-backend/app/templates/microsvc-manifests/retriever-usvc.yaml b/studio-backend/app/templates/microsvc-manifests/retriever-usvc.yaml index a508bb1..7b09a81 100644 --- a/studio-backend/app/templates/microsvc-manifests/retriever-usvc.yaml +++ b/studio-backend/app/templates/microsvc-manifests/retriever-usvc.yaml @@ -14,9 +14,9 @@ data: REDIS_URL: "redis://{redis_vector_store_endpoint}:{redis_vector_store_port}" INDEX_NAME: "rag-redis" EASYOCR_MODULE_PATH: "/tmp/.EasyOCR" - http_proxy: "" - https_proxy: "" - no_proxy: "" + http_proxy: "${HTTP_PROXY}" + https_proxy: "${HTTP_PROXY}" + no_proxy: "${NO_PROXY}" HF_HOME: "/tmp/.cache/huggingface" HF_TOKEN: "{huggingFaceToken}" LOGFLAG: "True" diff --git a/studio-backend/app/utils/exporter_utils.py b/studio-backend/app/utils/exporter_utils.py index 0fd6fe7..766034a 100644 --- a/studio-backend/app/utils/exporter_utils.py +++ b/studio-backend/app/utils/exporter_utils.py @@ -206,7 +206,7 @@ def process_opea_services(proj_info_json): # Remove the 'opea_service@' prefix and append the node_name suffix if any node_suffix = node_name.split('_')[-1] if '_' in node_name else '' service_type_cleaned = node_info['service_type'].replace('opea_service@', '') - opea_service_endpoint = f"{service_type_cleaned.replace('_','-')}-{node_suffix}".strip('-') + opea_service_endpoint = f"opea-{service_type_cleaned.replace('_','-')}-{node_suffix}".strip('-') # Iterate through the dependent_services to map to the service info for service_type, service_info in node_info.get('dependent_services', {}).items(): diff --git a/studio-backend/app/utils/placeholders_utils.py b/studio-backend/app/utils/placeholders_utils.py index 6e686fd..17d4fca 100644 --- a/studio-backend/app/utils/placeholders_utils.py +++ b/studio-backend/app/utils/placeholders_utils.py @@ -63,7 +63,23 @@ def replace_manifest_placeholders(obj, variables): value = value.replace("${REGISTRY}", os.getenv("REGISTRY", "opea")) value = value.replace("${TAG}", os.getenv("TAG", "latest")) value = value.replace("${HTTP_PROXY}", os.getenv("SBX_HTTP_PROXY", "")) - value = value.replace("${NO_PROXY}", os.getenv("SBX_NO_PROXY", "")) + + # Enhanced NO_PROXY handling - extract service hostnames from variables + base_no_proxy = os.getenv("SBX_NO_PROXY", "") + if "${NO_PROXY}" in value and variables: + service_hostnames = [] + # Extract hostnames from all services in variables + for var_key, var_value in variables.items(): + if var_key.endswith('_endpoint') and isinstance(var_value, str): + service_hostnames.append(var_value) + + if service_hostnames: + enhanced_no_proxy = f"{base_no_proxy},{','.join(service_hostnames)}" if base_no_proxy else ','.join(service_hostnames) + value = value.replace("${NO_PROXY}", enhanced_no_proxy) + else: + value = value.replace("${NO_PROXY}", base_no_proxy) + else: + value = value.replace("${NO_PROXY}", base_no_proxy) # Attempt to replace placeholders in the string formatted_value = value.format(**variables) # If the key is a port-related field and the formatted value is a digit, convert to int diff --git a/studio-frontend/.env.development b/studio-frontend/.env.development new file mode 100644 index 0000000..0a6398e --- /dev/null +++ b/studio-frontend/.env.development @@ -0,0 +1,6 @@ +NODE_TLS_REJECT_UNAUTHORIZED=0 +VITE_DISABLE_KEYCLOAK=true +NODE_ENV=development +VITE_HOST=0.0.0.0 +VITE_PORT=8088 +FINETUNING_HOST= # Command to get your host ip: ip route get 1.1.1.1 | awk '{print $7}' \ No newline at end of file diff --git a/studio-frontend/Dockerfile b/studio-frontend/Dockerfile index e3079f4..e59da67 100644 --- a/studio-frontend/Dockerfile +++ b/studio-frontend/Dockerfile @@ -1,4 +1,14 @@ -FROM node:23-alpine +FROM node:20-alpine + +# Accept proxy build arguments +ARG http_proxy +ARG https_proxy +ARG no_proxy + +# Set proxy environment variables for package managers +ENV http_proxy=${http_proxy} +ENV https_proxy=${https_proxy} +ENV no_proxy=${no_proxy} # Install necessary packages RUN apk update && apk upgrade && \ @@ -7,10 +17,8 @@ RUN apk update && apk upgrade && \ build-base cairo-dev pango-dev \ # Install Chromium chromium && \ - # Update npm to the latest version - npm install -g npm@latest && \ # Install PNPM globally - npm install -g pnpm@latest + npm install -g pnpm@9 # Debug step to verify git installation RUN git --version diff --git a/studio-frontend/docker-compose.dev.yml b/studio-frontend/docker-compose.dev.yml new file mode 100644 index 0000000..3624dcb --- /dev/null +++ b/studio-frontend/docker-compose.dev.yml @@ -0,0 +1,47 @@ +version: '3.8' + +services: + finetuning-server: + image: opea/finetuning:latest + container_name: finetuning-server + user: "0:0" + ipc: host + ports: + - "8015:8015" + - "8265:8265" + environment: + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + - no_proxy=${no_proxy} + volumes: + - finetune-output:/home/user/comps/finetuning/src/output + restart: unless-stopped + + studio-frontend: + image: studio-frontend:latest + build: + context: . + dockerfile: Dockerfile + container_name: studio-frontend-dev + ports: + - "3000:3000" + - "8088:8088" + volumes: + - .:/usr/src + - node_modules:/usr/src/node_modules + - pnpm_store:/usr/src/.pnpm-store + - /usr/src/packages/ui/build + - finetune-output:/tmp/finetuning/output + command: ["sh", "-c", "pnpm install && pnpm dev"] + environment: + - http_proxy=${http_proxy} + - https_proxy=${https_proxy} + - no_proxy=${no_proxy} + stdin_open: true + tty: true + restart: unless-stopped + +volumes: + node_modules: + pnpm_store: + finetune-output: \ No newline at end of file diff --git a/studio-frontend/package.json b/studio-frontend/package.json index 115e3d0..0205332 100644 --- a/studio-frontend/package.json +++ b/studio-frontend/package.json @@ -60,6 +60,10 @@ "overrides": { "set-value": "^3.0.3", "form-data": "4.0.4" + }, + "peerDependencyRules": { + "ignoreMissing": [], + "allowAny": [] } }, "engines": { @@ -81,7 +85,8 @@ "cross-spawn": ">=7.0.5", "solid-js": ">=1.9.4", "tar-fs": ">=3.0.8", - "form-data": "4.0.4" + "form-data": "4.0.4", + "zod": ">=3.23.0" }, "eslintIgnore": [ "**/dist", diff --git a/studio-frontend/packages/server/package.json b/studio-frontend/packages/server/package.json index 5ab2abd..7ef55d6 100644 --- a/studio-frontend/packages/server/package.json +++ b/studio-frontend/packages/server/package.json @@ -76,6 +76,7 @@ "moment-timezone": "^0.5.34", "multer": "^1.4.5-lts.1", "mysql2": "^3.9.2", + "form-data": "^4.0.0", "openai": "^4.57.3", "pg": "^8.11.1", "posthog-node": "^3.5.0", @@ -85,7 +86,8 @@ "sqlite3": "^5.1.6", "typeorm": "^0.3.6", "uuid": "^9.0.1", - "winston": "^3.9.0" + "winston": "^3.9.0", + "https-proxy-agent": "^7.0.4" }, "devDependencies": { "@types/content-disposition": "0.5.8", diff --git a/studio-frontend/packages/server/src/controllers/finetuning/index.ts b/studio-frontend/packages/server/src/controllers/finetuning/index.ts new file mode 100644 index 0000000..add0c5a --- /dev/null +++ b/studio-frontend/packages/server/src/controllers/finetuning/index.ts @@ -0,0 +1,211 @@ +import { Request, Response, NextFunction } from 'express' +import { StatusCodes } from 'http-status-codes' +import { InternalFlowiseError } from '../../errors/internalFlowiseError' +import finetuningService from '../../services/finetuning' + +/** + * Upload a training file + * POST /api/v1/finetuning/files + */ +const uploadTrainingFile = async (req: Request, res: Response, next: NextFunction) => { + try { + if (!req.file) { + // Debug: log request body and files to help trace upload issues + console.debug('finetuningController.uploadTrainingFile - no file received. req.body=', req.body, 'req.files=', (req as any).files) + throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, 'Error: finetuningController.uploadTrainingFile - file not provided!') + } + + const purpose = req.body.purpose || 'fine-tune' + const apiResponse = await finetuningService.uploadTrainingFile(req.file, purpose) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * Create a fine-tuning job + * POST /api/v1/finetuning/jobs + */ +const createFineTuningJob = async (req: Request, res: Response, next: NextFunction) => { + try { + const hasFile = !!req.body?.training_file + if (!req.body || !hasFile || !req.body.model) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.createFineTuningJob - model and training_file are required!' + ) + } + + const apiResponse = await finetuningService.createFineTuningJob(req.body) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * List all fine-tuning jobs + * GET /api/v1/finetuning/jobs + */ +const listFineTuningJobs = async (req: Request, res: Response, next: NextFunction) => { + try { + const apiResponse = await finetuningService.listFineTuningJobs() + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * Retrieve a specific fine-tuning job + * POST /api/v1/finetuning/jobs/retrieve + */ +const retrieveFineTuningJob = async (req: Request, res: Response, next: NextFunction) => { + try { + if (typeof req.body === 'undefined' || !req.body.fine_tuning_job_id) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.retrieveFineTuningJob - fine_tuning_job_id not provided!' + ) + } + + const apiResponse = await finetuningService.retrieveFineTuningJob(req.body.fine_tuning_job_id) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * Cancel a fine-tuning job + * POST /api/v1/finetuning/jobs/cancel + */ +const cancelFineTuningJob = async (req: Request, res: Response, next: NextFunction) => { + try { + if (typeof req.body === 'undefined' || !req.body.fine_tuning_job_id) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.cancelFineTuningJob - fine_tuning_job_id not provided!' + ) + } + + const apiResponse = await finetuningService.cancelFineTuningJob(req.body.fine_tuning_job_id) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + +/** + * Delete a fine-tuning job (cancel remote if possible and remove local records) + * POST /api/v1/finetuning/jobs/delete + */ +const deleteFineTuningJob = async (req: Request, res: Response, next: NextFunction) => { + try { + if (typeof req.body === 'undefined' || !req.body.fine_tuning_job_id) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.deleteFineTuningJob - fine_tuning_job_id not provided!' + ) + } + + const apiResponse = await finetuningService.deleteFineTuningJob(req.body.fine_tuning_job_id) + return res.json(apiResponse) + } catch (error) { + next(error) + } +} + + + +/** + * Fetch Ray/job logs for a fine-tuning job + * POST /api/v1/finetuning/jobs/logs + * body: { fine_tuning_job_id: string, ray_job_id?: string, tail?: number } + */ +const getFineTuningJobLogs = async (req: Request, res: Response, next: NextFunction) => { + try { + if (typeof req.body === 'undefined' || !req.body.fine_tuning_job_id) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.getFineTuningJobLogs - fine_tuning_job_id not provided!' + ) + } + + const fine_tuning_job_id = req.body.fine_tuning_job_id + const ray_job_id = req.body.ray_job_id + + try { + const apiResponse = await finetuningService.getFineTuningJobLogs(fine_tuning_job_id, { ray_job_id }) + // Service returns either { logs: string } or { logs: '', error: string } + return res.json(apiResponse) + } catch (err: any) { + // If the service throws, return a structured error payload instead of propagating a 500 + const message = err?.message || String(err) || 'Unknown error fetching logs' + return res.json({ logs: '', error: `Error: ${message}` }) + } + } catch (error) { + next(error) + } +} + + +/** + * Download fine-tuning job output as a zip file + * GET /api/v1/finetuning/download-ft/:jobId + */ +const downloadFineTuningOutput = async (req: Request, res: Response, next: NextFunction) => { + try { + const { jobId } = req.params + + if (!jobId) { + throw new InternalFlowiseError( + StatusCodes.BAD_REQUEST, + 'Error: finetuningController.downloadFineTuningOutput - jobId is required!' + ) + } + + // Get the zip file path (creates if needed, but returns immediately if already exists) + const filePath = await finetuningService.downloadFineTuningOutput(jobId) + if (!filePath) { + throw new InternalFlowiseError( + StatusCodes.NOT_FOUND, + `Error: finetuningController.downloadFineTuningOutput - output not found for job: ${jobId}` + ) + } + + // Set response headers for file download + const fileName = `${jobId}-output.zip` + res.setHeader('Content-Type', 'application/zip') + res.setHeader('Content-Disposition', `attachment; filename="${fileName}"`) + + // Stream the file + const fs = require('fs') + const fileStream = fs.createReadStream(filePath) + fileStream.on('error', (err: any) => { + console.error('Error streaming fine-tuning output file:', err) + if (!res.headersSent) { + res.status(StatusCodes.INTERNAL_SERVER_ERROR).json({ + error: 'Error streaming fine-tuning output file' + }) + } + }) + fileStream.pipe(res) + } catch (error) { + next(error) + } +} + + + +export default { + uploadTrainingFile, + createFineTuningJob, + listFineTuningJobs, + retrieveFineTuningJob, + cancelFineTuningJob, + deleteFineTuningJob, + getFineTuningJobLogs, + downloadFineTuningOutput +} diff --git a/studio-frontend/packages/server/src/database/entities/FineTuningCheckpoint.ts b/studio-frontend/packages/server/src/database/entities/FineTuningCheckpoint.ts new file mode 100644 index 0000000..9d119f3 --- /dev/null +++ b/studio-frontend/packages/server/src/database/entities/FineTuningCheckpoint.ts @@ -0,0 +1,19 @@ +import { Entity, Column, PrimaryColumn, CreateDateColumn } from 'typeorm' + +@Entity('fine_tuning_checkpoint') +export class FineTuningCheckpoint { + @PrimaryColumn() + id!: string + + @Column() + fine_tuning_job_id!: string + + @Column() + filename!: string + + @Column({ type: 'text', nullable: true }) + metadata?: string + + @CreateDateColumn({ type: 'datetime' }) + createdDate!: Date +} diff --git a/studio-frontend/packages/server/src/database/entities/FineTuningJob.ts b/studio-frontend/packages/server/src/database/entities/FineTuningJob.ts new file mode 100644 index 0000000..ba74cf9 --- /dev/null +++ b/studio-frontend/packages/server/src/database/entities/FineTuningJob.ts @@ -0,0 +1,34 @@ +import { Entity, Column, PrimaryColumn, CreateDateColumn } from 'typeorm' + +@Entity('fine_tuning_job') +export class FineTuningJob { + @PrimaryColumn() + id!: string + + @Column({ nullable: true }) + model?: string + + @Column({ nullable: true }) + task?: string + + @Column({ nullable: true }) + status?: string + + @Column({ nullable: true }) + training_file?: string + + @Column({ type: 'text', nullable: true }) + hyperparameters?: string + + @Column({ type: 'text', nullable: true }) + result_files?: string + + @Column({ type: 'text', nullable: true }) + error?: string + + @Column({ nullable: true, type: 'int' }) + trained_tokens?: number + + @CreateDateColumn({ type: 'datetime' }) + createdDate!: Date +} diff --git a/studio-frontend/packages/server/src/database/entities/index.ts b/studio-frontend/packages/server/src/database/entities/index.ts index 4cb079b..ba8e1fa 100644 --- a/studio-frontend/packages/server/src/database/entities/index.ts +++ b/studio-frontend/packages/server/src/database/entities/index.ts @@ -11,6 +11,7 @@ import { Lead } from './Lead' import { UpsertHistory } from './UpsertHistory' import { ApiKey } from './ApiKey' import { CustomTemplate } from './CustomTemplate' +import { FineTuningJob } from './FineTuningJob' export const entities = { ChatFlow, @@ -25,5 +26,6 @@ export const entities = { Lead, UpsertHistory, ApiKey, - CustomTemplate + CustomTemplate, + FineTuningJob } diff --git a/studio-frontend/packages/server/src/database/migrations/mysql/1760424809635-AddFineTuningTables.ts b/studio-frontend/packages/server/src/database/migrations/mysql/1760424809635-AddFineTuningTables.ts new file mode 100644 index 0000000..67381d4 --- /dev/null +++ b/studio-frontend/packages/server/src/database/migrations/mysql/1760424809635-AddFineTuningTables.ts @@ -0,0 +1,38 @@ +import { MigrationInterface, QueryRunner } from 'typeorm' + +export class AddFineTuningTables1760424809635 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query( + `CREATE TABLE IF NOT EXISTS fine_tuning_job ( + id varchar(255) PRIMARY KEY NOT NULL, + model varchar(255), + task varchar(255), + status varchar(255), + training_file varchar(255), + hyperparameters longtext, + result_files longtext, + error longtext, + trained_tokens int, + createdDate datetime NOT NULL DEFAULT CURRENT_TIMESTAMP + ) ENGINE=InnoDB; + ` + ) + + await queryRunner.query( + `CREATE TABLE IF NOT EXISTS fine_tuning_checkpoint ( + id varchar(255) PRIMARY KEY NOT NULL, + fine_tuning_job_id varchar(255) NOT NULL, + filename varchar(255) NOT NULL, + metadata longtext, + createdDate datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, + INDEX IDX_fine_tuning_checkpoint_job (fine_tuning_job_id) + ) ENGINE=InnoDB; + ` + ) + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(`DROP TABLE IF EXISTS fine_tuning_checkpoint`) + await queryRunner.query(`DROP TABLE IF EXISTS fine_tuning_job`) + } +} diff --git a/studio-frontend/packages/server/src/database/migrations/mysql/index.ts b/studio-frontend/packages/server/src/database/migrations/mysql/index.ts index 3645d89..6df72eb 100644 --- a/studio-frontend/packages/server/src/database/migrations/mysql/index.ts +++ b/studio-frontend/packages/server/src/database/migrations/mysql/index.ts @@ -30,6 +30,7 @@ import { AddStudioFieldsToChatFlow1733282099772 } from './1733282099772-AddStudi import { AddSandboxTracerUrlToChatFlow1743740099772 } from './1743740099772-AddSandboxTracerUrlToChatFlow' import { AddSandboxDebugLogsUrlToChatFlow1749612373191 } from './1749612373191-AddSandboxDebugLogsUrlToChatFlow' import { AddDeploymentStatusToChatFlow1754700956637 } from './1754700956637-AddDeploymentStatusToChatFlow' +import { AddFineTuningTables1760424809635 } from './1760424809635-AddFineTuningTables' export const mysqlMigrations = [ @@ -64,5 +65,6 @@ export const mysqlMigrations = [ AddStudioFieldsToChatFlow1733282099772, AddSandboxTracerUrlToChatFlow1743740099772, AddSandboxDebugLogsUrlToChatFlow1749612373191, - AddDeploymentStatusToChatFlow1754700956637 + AddDeploymentStatusToChatFlow1754700956637, + AddFineTuningTables1760424809635 ] diff --git a/studio-frontend/packages/server/src/database/migrations/sqlite/1760424809635-AddFineTuningTables.ts b/studio-frontend/packages/server/src/database/migrations/sqlite/1760424809635-AddFineTuningTables.ts new file mode 100644 index 0000000..83e91d6 --- /dev/null +++ b/studio-frontend/packages/server/src/database/migrations/sqlite/1760424809635-AddFineTuningTables.ts @@ -0,0 +1,39 @@ +import { MigrationInterface, QueryRunner } from 'typeorm' + +export class AddFineTuningTables1760424809635 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query( + `CREATE TABLE IF NOT EXISTS "fine_tuning_job" ( + "id" varchar PRIMARY KEY NOT NULL, + "model" varchar, + "task" varchar, + "status" varchar, + "training_file" varchar, + "hyperparameters" text, + "result_files" text, + "error" text, + "trained_tokens" integer, + "createdDate" datetime NOT NULL DEFAULT (datetime('now')) + );` + ) + + await queryRunner.query( + `CREATE TABLE IF NOT EXISTS "fine_tuning_checkpoint" ( + "id" varchar PRIMARY KEY NOT NULL, + "fine_tuning_job_id" varchar NOT NULL, + "filename" varchar NOT NULL, + "metadata" text, + "createdDate" datetime NOT NULL DEFAULT (datetime('now')) + );` + ) + + await queryRunner.query( + `CREATE INDEX IF NOT EXISTS "IDX_fine_tuning_checkpoint_job" ON "fine_tuning_checkpoint" ("fine_tuning_job_id") ;` + ) + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(`DROP TABLE IF EXISTS fine_tuning_checkpoint`) + await queryRunner.query(`DROP TABLE IF EXISTS fine_tuning_job`) + } +} diff --git a/studio-frontend/packages/server/src/database/migrations/sqlite/index.ts b/studio-frontend/packages/server/src/database/migrations/sqlite/index.ts index c9ed343..1b87c17 100644 --- a/studio-frontend/packages/server/src/database/migrations/sqlite/index.ts +++ b/studio-frontend/packages/server/src/database/migrations/sqlite/index.ts @@ -29,6 +29,7 @@ import { AddStudioFieldsToChatFlow1733282099772 } from './1733282099772-AddStudi import { AddSandboxTracerUrlToChatFlow1743740099772 } from './1743740099772-AddSandboxTracerUrlToChatFlow' import { AddSandboxDebugLogsUrlToChatFlow1749612373191 } from './1749612373191-AddSandboxDebugLogsUrlToChatFlow' import { AddDeploymentStatusToChatFlow1754700956637 } from './1754700956637-AddDeploymentStatusToChatFlow' +import { AddFineTuningTables1760424809635 } from './1760424809635-AddFineTuningTables' export const sqliteMigrations = [ Init1693835579790, @@ -62,4 +63,5 @@ export const sqliteMigrations = [ AddSandboxTracerUrlToChatFlow1743740099772, AddSandboxDebugLogsUrlToChatFlow1749612373191, AddDeploymentStatusToChatFlow1754700956637 + ,AddFineTuningTables1760424809635 ] diff --git a/studio-frontend/packages/server/src/index.ts b/studio-frontend/packages/server/src/index.ts index 9fa5dad..06b236d 100644 --- a/studio-frontend/packages/server/src/index.ts +++ b/studio-frontend/packages/server/src/index.ts @@ -22,6 +22,8 @@ import flowiseApiV1Router from './routes' import errorHandlerMiddleware from './middlewares/errors' import { SSEStreamer } from './utils/SSEStreamer' import { validateAPIKey } from './utils/validateKey' +import { setupFineTuningDownloadHandlers } from './ws/finetuningDownload' +import { setupFineTuningStatusHandlers } from './ws/finetuningStatus' declare global { namespace Express { @@ -141,7 +143,8 @@ export class App { '/api/v1/leads', '/api/v1/get-upload-file', '/api/v1/ip', - '/api/v1/ping' + '/api/v1/ping', + '/api/v1/finetuning/download-ft/' ] const URL_CASE_INSENSITIVE_REGEX: RegExp = /\/api\/v1\//i const URL_CASE_SENSITIVE_REGEX: RegExp = /\/api\/v1\// @@ -227,13 +230,36 @@ export class App { const packagePath = getNodeModulesPackagePath('flowise-ui') const uiBuildPath = path.join(packagePath, 'build') const uiHtmlPath = path.join(packagePath, 'build', 'index.html') + const nodeEnv = process.env.NODE_ENV || 'undefined' + + // Treat any non-production environment as development for the landing page + if (nodeEnv === 'development') { + this.app.get('/', (req: Request, res: Response) => { + res.send(` + + + + + Flowise Server (development) + + + +

Flowise Server

+

Mode: development

+

Server is listening on port 3000.

+

UI is listening on port 8088.

+

Ping API

+ + `) + }) + } else { + this.app.use('/', express.static(uiBuildPath)) - this.app.use('/', express.static(uiBuildPath)) - - // All other requests not handled will return React app - this.app.use((req: Request, res: Response) => { - res.sendFile(uiHtmlPath) - }) + // All other requests not handled will return React app + this.app.use((req: Request, res: Response) => { + res.sendFile(uiHtmlPath) + }) + } // Error handling this.app.use(errorHandlerMiddleware) @@ -267,6 +293,10 @@ export async function start(): Promise { cors: getCorsOptions() }) + // Setup WebSocket handlers + setupFineTuningDownloadHandlers(io) + setupFineTuningStatusHandlers(io) + await serverApp.initDatabase() await serverApp.config(io) diff --git a/studio-frontend/packages/server/src/middlewares/errors/index.ts b/studio-frontend/packages/server/src/middlewares/errors/index.ts index 75cd2c2..06b5422 100644 --- a/studio-frontend/packages/server/src/middlewares/errors/index.ts +++ b/studio-frontend/packages/server/src/middlewares/errors/index.ts @@ -5,14 +5,24 @@ import { InternalFlowiseError } from '../../errors/internalFlowiseError' // we need eslint because we have to pass next arg for the error middleware // eslint-disable-next-line async function errorHandlerMiddleware(err: InternalFlowiseError, req: Request, res: Response, next: NextFunction) { - let displayedError = { + // Safely read streaming flag from body (req.body may be undefined) + const streamingFlag = req && (req as any).body ? (req as any).body.streaming : undefined + + // Build the response payload + const displayedError = { statusCode: err.statusCode || StatusCodes.INTERNAL_SERVER_ERROR, success: false, message: err.message, // Provide error stack trace only in development stack: process.env.NODE_ENV === 'development' ? err.stack : {} } - if (!req.body.streaming || req.body.streaming === 'false') { + + // Log the error server-side for easier debugging + // Keep this server-side only; we still control what is returned to the client + // eslint-disable-next-line no-console + console.error('Unhandled error caught by errorHandlerMiddleware:', err) + + if (!streamingFlag || streamingFlag === 'false') { res.setHeader('Content-Type', 'application/json') res.status(displayedError.statusCode).json(displayedError) } diff --git a/studio-frontend/packages/server/src/routes/finetuning/index.ts b/studio-frontend/packages/server/src/routes/finetuning/index.ts new file mode 100644 index 0000000..35c6114 --- /dev/null +++ b/studio-frontend/packages/server/src/routes/finetuning/index.ts @@ -0,0 +1,32 @@ +import express from 'express' +import multer from 'multer' +import finetuningController from '../../controllers/finetuning' + +const router = express.Router() + +// Use memory storage for multer to store files in buffer +const upload = multer({ storage: multer.memoryStorage() }) + +// Upload training file +router.post('/files', upload.single('file'), finetuningController.uploadTrainingFile) + +// Create fine-tuning job +router.post('/jobs', finetuningController.createFineTuningJob) + +// List all fine-tuning jobs +router.get('/jobs', finetuningController.listFineTuningJobs) + +// Retrieve a specific fine-tuning job +router.post('/jobs/retrieve', finetuningController.retrieveFineTuningJob) + +// Fetch logs for a fine-tuning job +router.post('/jobs/logs', finetuningController.getFineTuningJobLogs) + +// Cancel a fine-tuning job +router.post('/jobs/cancel', finetuningController.cancelFineTuningJob) +router.post('/jobs/delete', finetuningController.deleteFineTuningJob) + +// Download fine-tuning job output +router.get('/download-ft/:jobId', finetuningController.downloadFineTuningOutput) + +export default router diff --git a/studio-frontend/packages/server/src/routes/index.ts b/studio-frontend/packages/server/src/routes/index.ts index 6501f55..e1a92a5 100644 --- a/studio-frontend/packages/server/src/routes/index.ts +++ b/studio-frontend/packages/server/src/routes/index.ts @@ -13,6 +13,7 @@ import documentStoreRouter from './documentstore' import exportImportRouter from './export-import' import feedbackRouter from './feedback' import fetchLinksRouter from './fetch-links' +import finetuningRouter from './finetuning' import flowConfigRouter from './flow-config' import getUploadFileRouter from './get-upload-file' import getUploadPathRouter from './get-upload-path' @@ -59,6 +60,7 @@ router.use('/document-store', documentStoreRouter) router.use('/export-import', exportImportRouter) router.use('/feedback', feedbackRouter) router.use('/fetch-links', fetchLinksRouter) +router.use('/finetuning', finetuningRouter) router.use('/flow-config', flowConfigRouter) router.use('/internal-chatmessage', internalChatmessagesRouter) router.use('/internal-prediction', internalPredictionRouter) diff --git a/studio-frontend/packages/server/src/services/chatflows/index.ts b/studio-frontend/packages/server/src/services/chatflows/index.ts index 4ae6c1d..d5abbcd 100644 --- a/studio-frontend/packages/server/src/services/chatflows/index.ts +++ b/studio-frontend/packages/server/src/services/chatflows/index.ts @@ -13,8 +13,25 @@ import { containsBase64File, updateFlowDataWithFilePaths } from '../../utils/fil import { getRunningExpressApp } from '../../utils/getRunningExpressApp' import { utilGetUploadsConfig } from '../../utils/getUploadsConfig' import logger from '../../utils/logger' -import axios from 'axios' -import { Readable } from 'stream' +import axios, { AxiosRequestConfig } from 'axios' +import { HttpsProxyAgent } from 'https-proxy-agent' + +// Configure github axios to support HTTP_PROXY/HTTPS_PROXY environment variables +const getGithubAxiosConfig = (): AxiosRequestConfig => { + const http_proxy = process.env.http_proxy || process.env.HTTP_PROXY + const agent = (http_proxy && http_proxy.trim() !== "") ? new HttpsProxyAgent(http_proxy) : undefined + + return { + headers: { + Accept: 'application/vnd.github.v3+json', + }, + proxy: false, + ...(agent && { + httpAgent: agent, + httpsAgent: agent, + }), + } +} const STUDIO_SERVER_URL = process.env.STUDIO_SERVER_URL || 'http://studio-backend.studio.svc.cluster.local:5000' @@ -161,17 +178,23 @@ const getAllChatflowsbyUserId = async (userid: string, type?: ChatflowType): Pro const importSampleChatflowsbyUserId = async (userid: string, type?: ChatflowType): Promise => { try { - const response = await axios.get('https://api.github.com/repos/opea-project/GenAIStudio/contents/sample-workflows'); + const axiosConfig = getGithubAxiosConfig() + + console.log('Importing sample chatflows for user:', userid); + + const response = await axios.get( + 'https://api.github.com/repos/opea-project/GenAIStudio/contents/sample-workflows', + axiosConfig + ); + + console.log('Response from GitHub:', response.data); + const files = response.data.filter((item: any) => item.type === 'file'); - console.log(`Number of files: ${files.length}`); const chatflows: Partial[] = []; - for (const file of files) { - console.log(`Download URL: ${file.download_url}`); - const fileResponse = await axios.get(file.download_url); + const fileResponse = await axios.get(file.download_url, axiosConfig); const parsedFlowData = fileResponse.data; - const newChatflow: Partial = { userid: userid, name: file.name.replace('.json', ''), @@ -180,7 +203,6 @@ const importSampleChatflowsbyUserId = async (userid: string, type?: ChatflowType deployed: false, isPublic: false }; - chatflows.push(newChatflow); } const insertResponse = await importChatflows(chatflows); @@ -617,4 +639,4 @@ export default { getSinglePublicChatbotConfig, oneClickDeploymentService, updateDeploymentStatus -} +} \ No newline at end of file diff --git a/studio-frontend/packages/server/src/services/finetuning/index.ts b/studio-frontend/packages/server/src/services/finetuning/index.ts new file mode 100644 index 0000000..0fed1c8 --- /dev/null +++ b/studio-frontend/packages/server/src/services/finetuning/index.ts @@ -0,0 +1,752 @@ +import axios, { AxiosInstance } from 'axios' +import http from 'http' +import https from 'https' +import * as fs from 'fs' +import * as path from 'path' +import { exec } from 'child_process' +import { promisify } from 'util' +import { StatusCodes } from 'http-status-codes' +import { InternalFlowiseError } from '../../errors/internalFlowiseError' +import { getErrorMessage } from '../../errors/utils' +import { getRunningExpressApp } from '../../utils/getRunningExpressApp' +import { FineTuningJob } from '../../database/entities/FineTuningJob' +import logger from '../../utils/logger' + +const execAsync = promisify(exec) + +const FINETUNING_SERVICE_URL = process.env.FINETUNING_HOST ? `http://${process.env.FINETUNING_HOST}:8015` : 'undefined' +console.debug('finetuningService - FINETUNING_SERVICE_URL', FINETUNING_SERVICE_URL) + +// Create an axios client with keep-alive to reduce connection churn +const agentOptions = { keepAlive: true, maxSockets: 20 } +const httpAgent = new http.Agent(agentOptions) +const httpsAgent = new https.Agent(agentOptions) + +const axiosClient: AxiosInstance = axios.create({ + baseURL: FINETUNING_SERVICE_URL, + timeout: 60000, // increase timeout to 60s + httpAgent, + httpsAgent, + headers: { + 'Content-Type': 'application/json' + } +}) + +// In-memory mapping: filename (raw and decoded) -> { id, rawFilename } +const uploadedFileIdMap: Map = new Map() + +/** + * Helper function to zip a fine-tuning job output directory + * Checks if zip already exists and is up-to-date before creating a new one + * @param outputDir - Full path to the output directory for the job + * @param jobId - ID of the fine-tuning job + * @returns Path to the zipped file or null if failed + */ +const ensureFineTuningOutputZip = async (outputDir: string, jobId: string): Promise => { + try { + // eslint-disable-next-line no-console + console.debug(`finetuningService.ensureFineTuningOutputZip - processing output for job: ${jobId}`) + + // Validate output directory exists + if (!fs.existsSync(outputDir)) { + // eslint-disable-next-line no-console + console.warn(`finetuningService.ensureFineTuningOutputZip - output directory not found: ${outputDir}`) + return null + } + + const zipFilePath = `${outputDir}.zip` + const outputStats = fs.statSync(outputDir) + + // Check if zip exists and is up-to-date + if (fs.existsSync(zipFilePath)) { + const zipStats = fs.statSync(zipFilePath) + // If zip is newer than the output directory, skip re-zipping + if (zipStats.mtimeMs > outputStats.mtimeMs) { + // eslint-disable-next-line no-console + console.debug(`finetuningService.ensureFineTuningOutputZip - zip already up-to-date: ${zipFilePath}`) + return zipFilePath + } + // Remove outdated zip + try { + fs.unlinkSync(zipFilePath) + // eslint-disable-next-line no-console + console.debug(`finetuningService.ensureFineTuningOutputZip - removed outdated zip: ${zipFilePath}`) + } catch (e) { + // eslint-disable-next-line no-console + console.warn(`finetuningService.ensureFineTuningOutputZip - failed to remove outdated zip: ${e}`) + } + } + + // Create zip file using tar (more efficient than node zip libraries) + // eslint-disable-next-line no-console + console.debug(`finetuningService.ensureFineTuningOutputZip - starting to zip output for job ${jobId}`) + try { + const parentDir = path.dirname(outputDir) + const dirName = path.basename(outputDir) + const cmd = `cd "${parentDir}" && tar -czf "${path.basename(zipFilePath)}" "${dirName}"` + await execAsync(cmd, { + maxBuffer: 1024 * 1024 * 100, // 100MB buffer for large outputs + timeout: 600000 // 10 minute timeout + }) + + // eslint-disable-next-line no-console + console.debug(`finetuningService.ensureFineTuningOutputZip - zip created successfully for job ${jobId}: ${zipFilePath}`) + return zipFilePath + } catch (execErr: any) { + // eslint-disable-next-line no-console + console.error(`finetuningService.ensureFineTuningOutputZip - tar failed for job ${jobId}: ${execErr?.message || execErr}`) + return null + } + } catch (error: any) { + // eslint-disable-next-line no-console + console.error(`finetuningService.ensureFineTuningOutputZip - error: ${error?.message || error}`) + return null + } +} + +/** + * Upload a training file to the finetuning service + */ +const uploadTrainingFile = async (file: Express.Multer.File, purpose: string = 'fine-tune') => { + try { + // Create FormData using the browser/Node.js FormData API + const FormData = require('form-data') + const formData = new FormData() + + formData.append('file', file.buffer, { + filename: file.originalname, + contentType: file.mimetype + }) + formData.append('purpose', purpose) + + const response = await axios.post(`${FINETUNING_SERVICE_URL}/v1/files`, formData, { + headers: { + ...formData.getHeaders() + } + }) + + // Debug: log the response from the finetuning service for uploaded file + try { + // eslint-disable-next-line no-console + console.debug('finetuningService.uploadTrainingFile - response.data:', response.data) + } catch (logErr) { + // ignore logging errors + } + + return response.data + } catch (error: any) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.uploadTrainingFile - ${getErrorMessage(error)}` + ) + } +} + +// Helper: persist or update a fine-tuning job record in the local DB +const persistJobToDb = async (jobData: any) => { + try { + if (!jobData) return + const appServer = getRunningExpressApp() + if (!appServer || !appServer.AppDataSource) return + + const repo = appServer.AppDataSource.getRepository(FineTuningJob) + + // Determine canonical id from the response + const id = jobData.id || jobData.job_id || jobData.fine_tuning_job_id || jobData.fine_tuning_id + if (!id) return + + // Build entity object mapping common fields; fall back to stringifying objects + let taskVal: any = jobData.task || undefined + try { + if (!taskVal && jobData.General) { + if (typeof jobData.General === 'string') { + const parsed = JSON.parse(jobData.General) + taskVal = parsed?.task || taskVal + } else if (typeof jobData.General === 'object') { + taskVal = jobData.General?.task || taskVal + } + } + } catch (e) { + // ignore parse errors + } + + const entity: any = { + id: String(id), + name: jobData.name || jobData.id || undefined, + model: jobData.model || undefined, + status: jobData.status || jobData.state || undefined, + training_file: jobData.training_file || jobData.trainingFile || undefined, + task: taskVal || undefined, + progress: typeof jobData.progress === 'number' ? jobData.progress : undefined, + trained_tokens: typeof jobData.trained_tokens === 'number' ? jobData.trained_tokens : undefined + } + + + if (jobData.hyperparameters) { + try { + entity.hyperparameters = typeof jobData.hyperparameters === 'object' ? JSON.stringify(jobData.hyperparameters) : String(jobData.hyperparameters) + } catch (e) {} + } + + if (jobData.result_files) { + try { + entity.result_files = typeof jobData.result_files === 'object' ? JSON.stringify(jobData.result_files) : String(jobData.result_files) + } catch (e) {} + } + + if (jobData.error) { + try { + entity.error = typeof jobData.error === 'object' ? JSON.stringify(jobData.error) : String(jobData.error) + } catch (e) {} + } + + if (jobData.estimated_finish) { + entity.estimated_finish = new Date(jobData.estimated_finish) + } + if (jobData.finishedDate || jobData.finished_at || jobData.completed_at) { + entity.finishedDate = new Date(jobData.finishedDate || jobData.finished_at || jobData.completed_at) + } + + // Upsert: merge if exists + let existing = await repo.findOneBy({ id: String(id) }) + if (!existing) { + const created = repo.create(entity) + await repo.save(created) + } else { + repo.merge(existing, entity) + await repo.save(existing) + } + } catch (e) { + // Don't fail the main flow if DB persistence fails; only log + try { + // eslint-disable-next-line no-console + console.error('finetuningService.persistJobToDb - failed to persist job', e) + } catch (logErr) { + // ignore + } + } +} + +// Helper: update specific fields for a job in the DB +const updateJobInDb = async (jobId: string, updates: Partial) => { + try { + if (!jobId) return + const appServer = getRunningExpressApp() + if (!appServer || !appServer.AppDataSource) return + const repo = appServer.AppDataSource.getRepository(FineTuningJob) + const existing = await repo.findOneBy({ id: String(jobId) }) + if (!existing) return + repo.merge(existing, updates) + await repo.save(existing) + } catch (e) { + try { + // eslint-disable-next-line no-console + console.error('finetuningService.updateJobInDb - failed to update job', jobId, e) + } catch (logErr) { + // ignore + } + } +} + +/** + * Create a fine-tuning job + */ +const createFineTuningJob = async (jobConfig: { + training_file: string + model: string + General?: { + task?: string + lora_config?: any + } + Dataset?: { + max_length?: number + query_max_len?: number + passage_max_len?: number + padding?: string + } + Training?: { + epochs?: number + batch_size?: number + gradient_accumulation_steps?: number + } +}) => { + try { + // Work with the jobConfig as-provided by the UI. + const forwardedJobConfig = { ...jobConfig } + + // (Removed verbose initial jobConfig logging to reduce noise) + const sanitizedPayload = JSON.parse(JSON.stringify(forwardedJobConfig)) + + // Remove empty nested objects that may confuse the server + if (sanitizedPayload.General && Object.keys(sanitizedPayload.General).length === 0) { + delete sanitizedPayload.General + } + if (sanitizedPayload.Dataset && Object.keys(sanitizedPayload.Dataset).length === 0) { + delete sanitizedPayload.Dataset + } + if (sanitizedPayload.Training && Object.keys(sanitizedPayload.Training).length === 0) { + delete sanitizedPayload.Training + } + + if (sanitizedPayload.training_file && typeof sanitizedPayload.training_file === 'string') { + const originalFilename = sanitizedPayload.training_file + + // Try to decode first in case it's URL-encoded + let lookupKey = originalFilename + try { + const decoded = decodeURIComponent(originalFilename) + lookupKey = decoded + } catch (e) { + // ignore decode errors + } + + // Check if we have a stored mapping from the upload + let stored = uploadedFileIdMap.get(lookupKey) + if (!stored && lookupKey !== originalFilename) { + // Also try the original (encoded) key + stored = uploadedFileIdMap.get(originalFilename) + } + + if (stored && stored.rawFilename) { + sanitizedPayload.training_file = stored.rawFilename + } + } + + // Try a sequence of attempts to accommodate naming/encoding/id differences. + const attemptPost = async (payload: any, label = 'attempt') => { + try { + // eslint-disable-next-line no-console + console.debug(`finetuningService.createFineTuningJob - ${label} payload:`, payload) + const resp = await axiosClient.post('/v1/fine_tuning/jobs', payload) + // eslint-disable-next-line no-console + console.debug(`finetuningService.createFineTuningJob - ${label} response:`, typeof resp?.data === 'string' ? resp.data : JSON.stringify(resp?.data)) + return resp + } catch (err: any) { + // Log detailed info for debugging + try { + // eslint-disable-next-line no-console + console.error(`finetuningService.createFineTuningJob - ${label} failed`, { + message: err?.message, + status: err?.response?.status, + responseData: typeof err?.response?.data === 'string' ? err.response.data : JSON.stringify(err?.response?.data), + payload + }) + } catch (logErr) { + // ignore logging errors + } + throw err + } + } + + // Send the sanitized payload + const resp = await attemptPost(sanitizedPayload, 'final') + const respData = resp.data + // If the external service didn't echo back the task, preserve task from our sanitized payload + try { + const payloadTask = sanitizedPayload?.General?.task || sanitizedPayload?.task + if (payloadTask && !respData.task) { + // attach task so persistJobToDb stores it + try { respData.task = payloadTask } catch (e) { /* ignore */ } + } + } catch (e) { + // ignore + } + + // Persist to local DB + try { + await persistJobToDb(respData) + } catch (e) { + // ignore + } + return respData + } catch (error: any) { + // Log error details from external service if available for debugging + try { + // eslint-disable-next-line no-console + console.error('finetuningService.createFineTuningJob - axios error:', { + message: error.message, + responseData: error.response ? (typeof error.response.data === 'string' ? error.response.data : JSON.stringify(error.response.data)) : undefined, + status: error.response ? error.response.status : undefined, + headers: error.response ? error.response.headers : undefined + }) + } catch (logErr) { + // ignore logging errors + } + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.createFineTuningJob - ${getErrorMessage(error)}` + ) + } +} + +/** + * List all fine-tuning jobs + */ +const listFineTuningJobs = async () => { + try { + // First try to read persisted jobs from local DB + try { + const appServer = getRunningExpressApp() + const repo = appServer.AppDataSource.getRepository(FineTuningJob) + const persisted = await repo.find() + if (persisted && persisted.length > 0) { + return persisted + } + } catch (e) { + // If DB read fails, we'll fall back to external service + // eslint-disable-next-line no-console + console.debug('finetuningService.listFineTuningJobs - DB read failed, falling back to external service', e) + } + + // Fallback: query external finetuning service and persist results + const response = await axiosClient.get('/v1/fine_tuning/jobs') + const data = response.data + try { + if (Array.isArray(data)) { + for (const j of data) { + // best-effort persist + // eslint-disable-next-line no-await-in-loop + await persistJobToDb(j) + } + } + } catch (e) { + // ignore persistence errors + } + + return data + } catch (error: any) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.listFineTuningJobs - ${getErrorMessage(error)}` + ) + } +} + +/** + * Retrieve a specific fine-tuning job + */ +const retrieveFineTuningJob = async (fineTuningJobId: string) => { + const maxAttempts = 3 + const baseDelayMs = 500 + + const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)) + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + try { + const response = await axiosClient.post('/v1/fine_tuning/jobs/retrieve', { + fine_tuning_job_id: fineTuningJobId + }) + const respData = response.data + // Persist/update DB with latest status + try { + await persistJobToDb(respData) + } catch (e) { + // ignore + } + return respData + } catch (error: any) { + const msg = getErrorMessage(error) + const isTransient = msg && ( + msg.toLowerCase().includes('socket hang up') || + msg.toLowerCase().includes('econnreset') || + msg.toLowerCase().includes('etimedout') || + msg.toLowerCase().includes('timeout') || + msg.toLowerCase().includes('connect') + ) + + if (attempt < maxAttempts && isTransient) { + const delay = baseDelayMs * Math.pow(2, attempt - 1) + // back off and retry silently + // eslint-disable-next-line no-await-in-loop + await sleep(delay) + continue + } + + // Only log a concise warning when this is the final attempt + if (attempt === maxAttempts) { + logger.warn(`finetuningService.retrieveFineTuningJob - final attempt ${attempt} failed for job ${fineTuningJobId}: ${msg}`) + } + + // Final failure: log details and throw + try { + logger.error('finetuningService.retrieveFineTuningJob - error details:', { + message: error?.message, + status: error?.response?.status, + responseData: error?.response?.data + }) + } catch (logErr) { + // ignore logging errors + } + + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.retrieveFineTuningJob - ${msg}` + ) + } + } + + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.retrieveFineTuningJob - failed after ${maxAttempts} attempts` + ) +} + +/** + * Cancel a fine-tuning job + */ +const cancelFineTuningJob = async (fineTuningJobId: string) => { + try { + const response = await axiosClient.post('/v1/fine_tuning/jobs/cancel', { + fine_tuning_job_id: fineTuningJobId + }) + // Update local DB to reflect cancelled status + try { + await updateJobInDb(fineTuningJobId, { status: 'cancelled', finishedDate: new Date() }) + } catch (e) { + // ignore + } + return response.data + } catch (error: any) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.cancelFineTuningJob - ${getErrorMessage(error)}` + ) + } +} + +/** + * Delete a fine-tuning job locally and attempt to cancel it remotely. + * This will cancel the external job and remove DB records for the job and checkpoints. + */ +const deleteFineTuningJob = async (fineTuningJobId: string) => { + try { + // Attempt to cancel external job + try { + await axiosClient.post('/v1/fine_tuning/jobs/cancel', { + fine_tuning_job_id: fineTuningJobId + }) + } catch (e) { + // ignore external cancel errors + try { + // eslint-disable-next-line no-console + console.debug('finetuningService.deleteFineTuningJob - external cancel failed, continuing to delete locally', e) + } catch (logErr) {} + } + + // Remove local DB records + try { + const appServer = getRunningExpressApp() + const repo = appServer.AppDataSource.getRepository(FineTuningJob) + + // delete job + await repo.delete({ id: String(fineTuningJobId) }) + } catch (e) { + try { + // eslint-disable-next-line no-console + console.error('finetuningService.deleteFineTuningJob - failed to delete local DB records', e) + } catch (logErr) {} + } + + // Attempt to remove any output files/directories for this job under /tmp/finetuning/output + try { + const OUTPUT_BASE_DIR = '/tmp/finetuning/output' + const jobOutputDir = path.join(OUTPUT_BASE_DIR, String(fineTuningJobId)) + const resolvedJobDir = path.resolve(jobOutputDir) + const resolvedBaseDir = path.resolve(OUTPUT_BASE_DIR) + + // Safety: ensure the resolved path is within the expected base directory + if (resolvedJobDir.startsWith(resolvedBaseDir)) { + // Remove directory recursively if it exists + if (fs.existsSync(resolvedJobDir)) { + try { + // Use fs.rmSync when available; fallback to recursive unlink if necessary + if (typeof fs.rmSync === 'function') { + fs.rmSync(resolvedJobDir, { recursive: true, force: true }) + } else { + // older Node versions: remove files inside then rmdir + const rimraf = require('rimraf') + rimraf.sync(resolvedJobDir) + } + // eslint-disable-next-line no-console + console.debug(`finetuningService.deleteFineTuningJob - removed output dir: ${resolvedJobDir}`) + } catch (rmErr) { + try { console.warn('finetuningService.deleteFineTuningJob - failed to remove output dir', rmErr) } catch (ignore) {} + } + } + + // Also remove zip file if present + const zipPath = `${resolvedJobDir}.zip` + if (fs.existsSync(zipPath)) { + try { + fs.unlinkSync(zipPath) + // eslint-disable-next-line no-console + console.debug(`finetuningService.deleteFineTuningJob - removed zip: ${zipPath}`) + } catch (zipErr) { + try { console.warn('finetuningService.deleteFineTuningJob - failed to remove zip file', zipErr) } catch (ignore) {} + } + } + } else { + try { console.warn('finetuningService.deleteFineTuningJob - output path outside base dir, skipping removal:', resolvedJobDir) } catch (ignore) {} + } + } catch (e) { + try { console.warn('finetuningService.deleteFineTuningJob - error while removing output files', e) } catch (ignore) {} + } + + return { success: true } + } catch (error: any) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.deleteFineTuningJob - ${getErrorMessage(error)}` + ) + } +} + +/** + * Download fine-tuning job output as a zip file + * Creates zip if needed, or returns existing zip immediately + * @param jobId - ID of the fine-tuning job + * @returns Path to the zipped file or null if not found + */ +const downloadFineTuningOutput = async (jobId: string): Promise => { + try { + if (!jobId) { + throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, 'Job ID is required') + } + + const OUTPUT_BASE_DIR = '/tmp/finetuning/output' + const jobOutputDir = path.join(OUTPUT_BASE_DIR, jobId) + + // eslint-disable-next-line no-console + console.debug(`finetuningService.downloadFineTuningOutput - checking for output: ${jobOutputDir}`) + + // Verify job output directory exists + if (!fs.existsSync(jobOutputDir)) { + // eslint-disable-next-line no-console + console.warn(`finetuningService.downloadFineTuningOutput - output directory not found: ${jobOutputDir}`) + throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Fine-tuning job output not found for job: ${jobId}`) + } + + // Security check: ensure path is within the expected directory + const resolvedJobDir = path.resolve(jobOutputDir) + const resolvedBaseDir = path.resolve(OUTPUT_BASE_DIR) + if (!resolvedJobDir.startsWith(resolvedBaseDir)) { + // eslint-disable-next-line no-console + console.error(`finetuningService.downloadFineTuningOutput - path traversal attempt: ${jobOutputDir}`) + throw new InternalFlowiseError(StatusCodes.FORBIDDEN, 'Invalid job output path') + } + + // Ensure the output is zipped (returns immediately if zip is up-to-date) + const finalZipPath = await ensureFineTuningOutputZip(jobOutputDir, jobId) + if (!finalZipPath) { + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Failed to create zip for job ${jobId}` + ) + } + + // eslint-disable-next-line no-console + console.debug(`finetuningService.downloadFineTuningOutput - file ready for download: ${finalZipPath}`) + return finalZipPath + } catch (error: any) { + if (error instanceof InternalFlowiseError) { + throw error + } + // eslint-disable-next-line no-console + console.error(`finetuningService.downloadFineTuningOutput - error: ${error?.message || error}`) + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.downloadFineTuningOutput - ${getErrorMessage(error)}` + ) + } +} + +/** + * Get logs for a fine-tuning job by querying the Ray head node HTTP API. + * It will call: http:///api/jobs//logs + * Environment: set RAY_HEAD_NODE to the host:port of the Ray head (e.g. "ray-head.example.com:8265"). + */ +const getFineTuningJobLogs = async ( + fineTuningJobId: string, + options: { ray_job_id?: string } = {} +) => { + try { + const rayHost = process.env.FINETUNING_HOST ? `${process.env.FINETUNING_HOST}:8265` : 'undefined' + + // If caller provided an explicit ray_job_id, use it. Otherwise attempt to discover the Ray submission id + let submissionId: string | undefined = options.ray_job_id + + // Query Ray /api/jobs/ and select entries where entrypoint contains the FT id (jq-like) + const listUrl = `http://${rayHost}/api/jobs/` + console.debug('finetuningService.getFineTuningJobLogs - listUrl:', listUrl) + try { + const listResp = await axios.get(listUrl, { timeout: 20000 }) + // Debug: log status and length of Ray /api/jobs/ output; full dump only when explicitly enabled + try { + const raw = listResp.data + const len = typeof raw === 'string' ? raw.length : JSON.stringify(raw).length + console.debug('finetuningService.getFineTuningJobLogs - Ray /api/jobs/ status=', listResp.status, 'len=', len) + if (String(process.env.RAY_DUMP_JOBS).toLowerCase() === 'true') { + try { + const pretty = typeof raw === 'string' ? raw : JSON.stringify(raw, null, 2) + console.debug('finetuningService.getFineTuningJobLogs - Ray /api/jobs/ FULL DUMP:\n' + pretty) + } catch (e) { + try { console.debug('finetuningService.getFineTuningJobLogs - failed to stringify full Ray jobs list', String(e)) } catch (ignore) {} + } + } + } catch (logErr) { + try { console.debug('finetuningService.getFineTuningJobLogs - failed to inspect Ray jobs list', String(logErr)) } catch (ignore) {} + } + const jobsList = Array.isArray(listResp.data) ? listResp.data : [] + // Apply strict filter: entrypoint contains the exact FT id + const match = jobsList.find((j: any) => { + try { + const entrypoint = j?.entrypoint || '' + return String(entrypoint).includes(String(fineTuningJobId)) + } catch (e) { + return false + } + }) + if (match) { + submissionId = match.submission_id || match.job_id + } + } catch (e) { + try { console.error('finetuningService.getFineTuningJobLogs - failed to list Ray jobs', String(e)) } catch (err) {} + } + + // Construct logs URL with optional tail and fetch logs + const url = `http://${rayHost}/api/jobs/${encodeURIComponent(String(submissionId))}/logs` + const resp = await axios.get(url, { timeout: 30000 }) + // Normalize logs response so newlines are preserved and objects/arrays are readable + try { + const rawLogs = resp.data + if (typeof rawLogs === 'string') { + // string likely contains proper newlines + return { logs: rawLogs } + } + if (Array.isArray(rawLogs)) { + return { logs: rawLogs.join('\n') } + } + // object -> pretty-print with indentation to preserve newlines + return { logs: JSON.stringify(rawLogs, null, 2) } + } catch (e) { + // fallback to safe stringify + return { logs: JSON.stringify(resp.data, null, 2) } + } + } catch (error: any) { + // Provide helpful error details and return a structured error instead of throwing + const msg = `Error fetching logs: ${getErrorMessage(error)}` + try { (globalThis as any).console?.error && (globalThis as any).console.error('finetuningService.getFineTuningJobLogs -', String(error)) } catch (e) {} + return { logs: '', error: msg } + } +} + +export default { + uploadTrainingFile, + createFineTuningJob, + listFineTuningJobs, + retrieveFineTuningJob, + cancelFineTuningJob, + deleteFineTuningJob, + getFineTuningJobLogs, + downloadFineTuningOutput +} diff --git a/studio-frontend/packages/server/src/utils/webSocketDownloadManager.ts b/studio-frontend/packages/server/src/utils/webSocketDownloadManager.ts new file mode 100644 index 0000000..e69de29 diff --git a/studio-frontend/packages/server/src/ws/finetuningDownload.ts b/studio-frontend/packages/server/src/ws/finetuningDownload.ts new file mode 100644 index 0000000..484d2e8 --- /dev/null +++ b/studio-frontend/packages/server/src/ws/finetuningDownload.ts @@ -0,0 +1,172 @@ +import { Server, Socket } from 'socket.io' +import finetuningService from '../services/finetuning' +import logger from '../utils/logger' + +// Declare timer globals so this file compiles regardless of lib settings +declare function setTimeout(cb: (...args: any[]) => void, ms?: number): any +declare function clearTimeout(id: any): void + +/** + * Setup WebSocket handlers for fine-tuning output downloads + * This allows non-blocking, asynchronous zip creation and download + */ +export const setupFineTuningDownloadHandlers = (io: Server) => { + + logger.info('[WS Download] Setting up fine-tuning download namespace: /finetuning-download') + + // Create a dedicated namespace so download sockets don't mix with other WS handlers + const nsp = io.of('/finetuning-download') + + /** + * In-memory tracking of ongoing download tasks so multiple sockets can + * subscribe to the same job and reconnect (page refresh) without losing state. + * + * Map, + * downloadUrl?: string, + * fileName?: string, + * error?: string, + * timeoutHandle?: any + * }> + */ + const downloadTasks = new Map() + + // Grace period to keep completed task info for late reconnects (ms) + const COMPLETED_TASK_RETENTION_MS = 60 * 1000 // 60s + + nsp.on('connection', (socket: Socket) => { + logger.info(`[WS Download] Client connected - Socket ID: ${socket.id}`) + + const attachSubscriber = (jobId: string) => { + let task = downloadTasks.get(jobId) + if (!task) { + task = { + status: 'starting', + subscribers: new Set(), + downloadUrl: null, + fileName: null, + error: null, + timeoutHandle: null + } + downloadTasks.set(jobId, task) + } + + task.subscribers.add(socket) + return task + } + + // Handle fine-tuning output download request + // Client sends: { jobId: string } + socket.on('download-finetuning-output', async (data: { jobId: string }) => { + try { + const { jobId } = data + logger.info(`[WS Download] Download requested - Socket ID: ${socket.id}, Job ID: ${jobId}`) + + if (!jobId) { + socket.emit('download-finetuning-error', { + jobId: null, + error: 'Job ID is required' + }) + return + } + + // Attach this socket as a subscriber for this job + const task = attachSubscriber(jobId) + + // If task already completed, reply immediately with complete event + if (task.status === 'complete') { + socket.emit('download-finetuning-complete', { + jobId, + downloadUrl: task.downloadUrl, + fileName: task.fileName + }) + return + } + + // Emit current progress state to the newly connected socket + socket.emit('download-finetuning-progress', { + jobId, + status: task.status, + message: task.status === 'starting' ? 'Preparing download...' : 'Creating zip archive (this may take a few minutes)' + }) + + // If task is already zipping or starting and has a running promise, do nothing else + if (task.promise) { + // existing background work will notify subscribers when done + return + } + + // Kick off the async preparation and store the promise so others can join + task.status = 'zipping' + task.promise = (async () => { + try { + // Call the service to prepare the zip file (returns path) + const zipFilePath = await finetuningService.downloadFineTuningOutput(jobId) + + if (!zipFilePath) { + task.status = 'error' + task.error = 'Failed to create output archive' + // Notify all subscribers + task.subscribers.forEach((s: Socket) => { + s.emit('download-finetuning-error', { jobId, error: task.error }) + }) + return + } + + task.status = 'complete' + task.downloadUrl = `/api/v1/finetuning/download-ft/${jobId}` + task.fileName = `${jobId}-output.zip` + + logger.info(`[WS Download] Download ready for job: ${jobId}`) + + // Emit completion to all current subscribers + task.subscribers.forEach((s: Socket) => { + s.emit('download-finetuning-complete', { + jobId, + downloadUrl: task.downloadUrl, + fileName: task.fileName + }) + }) + + // Schedule cleanup of the completed task after retention period + task.timeoutHandle = setTimeout(() => { + downloadTasks.delete(jobId) + }, COMPLETED_TASK_RETENTION_MS) + + } catch (error: any) { + task.status = 'error' + task.error = error?.message || String(error) + logger.error(`[WS Download] Error preparing download for job ${jobId}: ${task.error}`) + task.subscribers.forEach((s: Socket) => { + s.emit('download-finetuning-error', { jobId, error: task.error }) + }) + // cleanup soon + task.timeoutHandle = setTimeout(() => { + downloadTasks.delete(jobId) + }, 5000) + } + })() + } catch (error: any) { + const errorMessage = error?.message || String(error) || 'Unknown error' + logger.error(`[WS Download] Handler error: ${errorMessage}`) + socket.emit('download-finetuning-error', { + jobId: data?.jobId || null, + error: errorMessage + }) + } + }) + + socket.on('disconnect', (reason: any) => { + logger.info(`[WS Download] Client disconnected - Socket ID: ${socket.id}, Reason: ${reason}`) + // Remove this socket from all task subscriber lists + downloadTasks.forEach((task, jobId) => { + if (task.subscribers && task.subscribers.has(socket)) { + task.subscribers.delete(socket) + } + }) + }) + + logger.debug(`[WS Download] Fine-tuning download handlers attached to socket ${socket.id}`) + }) +} diff --git a/studio-frontend/packages/server/src/ws/finetuningStatus.ts b/studio-frontend/packages/server/src/ws/finetuningStatus.ts new file mode 100644 index 0000000..3a78788 --- /dev/null +++ b/studio-frontend/packages/server/src/ws/finetuningStatus.ts @@ -0,0 +1,226 @@ +import { Server, Socket } from 'socket.io' +import finetuningService from '../services/finetuning' +import logger from '../utils/logger' + +// Declare timer globals so this file compiles regardless of lib settings +declare function setInterval(cb: (...args: any[]) => void, ms?: number): any +declare function clearInterval(id: any): void +declare function setTimeout(cb: (...args: any[]) => void, ms?: number): any + +// Store active job subscriptions: jobId -> Set of socket IDs +const jobSubscriptions = new Map>() + +// Background monitoring state +let monitoringInterval: any | null = null +const POLLING_INTERVAL = 5000 // 5 seconds - backend polls Ray API + +/** + * Setup WebSocket handlers for fine-tuning job status monitoring + * Clients can subscribe to specific job updates and receive real-time status changes + */ +export const setupFineTuningStatusHandlers = (io: Server) => { + io.on('connection', (socket: Socket) => { + logger.info(`[WS Status] Client connected - Socket ID: ${socket.id}`) + + /** + * Subscribe to job status updates + * Client sends: { jobIds: string[] } + * Server will emit 'job-status-update' events for these jobs + */ + socket.on('subscribe-job-status', (data: { jobIds: string[] }) => { + try { + const { jobIds } = data + + if (!Array.isArray(jobIds) || jobIds.length === 0) { + return + } + + // Add this socket to each job's subscription set + jobIds.forEach(jobId => { + if (!jobSubscriptions.has(jobId)) { + jobSubscriptions.set(jobId, new Set()) + } + jobSubscriptions.get(jobId)!.add(socket.id) + }) + + // Start background monitoring if not already running + startBackgroundMonitoring(io) + + // Send immediate acknowledgment + socket.emit('subscription-confirmed', { + jobIds, + message: 'Subscribed to job updates' + }) + + } catch (error: any) { + socket.emit('subscription-error', { + error: error?.message || 'Failed to subscribe' + }) + } + }) + + /** + * Unsubscribe from job status updates + * Client sends: { jobIds: string[] } + */ + socket.on('unsubscribe-job-status', (data: { jobIds: string[] }) => { + try { + const { jobIds } = data + + if (!Array.isArray(jobIds)) return + + jobIds.forEach(jobId => { + const subscribers = jobSubscriptions.get(jobId) + if (subscribers) { + subscribers.delete(socket.id) + if (subscribers.size === 0) { + jobSubscriptions.delete(jobId) + } + } + }) + + // Stop monitoring if no more subscriptions + if (jobSubscriptions.size === 0) { + stopBackgroundMonitoring() + } + + } catch (error: any) { + // Silent error handling + } + }) + + /** + * Handle client disconnect - clean up subscriptions + */ + socket.on('disconnect', (reason: any) => { + logger.info(`[WS Status] Client disconnected - Socket ID: ${socket.id}`) + + // Remove this socket from all job subscriptions + let removedCount = 0 + jobSubscriptions.forEach((subscribers, jobId) => { + if (subscribers.has(socket.id)) { + subscribers.delete(socket.id) + removedCount++ + if (subscribers.size === 0) { + jobSubscriptions.delete(jobId) + } + } + }) + + // Stop monitoring if no more subscriptions + if (jobSubscriptions.size === 0) { + stopBackgroundMonitoring() + } + }) + }) +} + +/** + * Start background monitoring of subscribed jobs + * Polls the fine-tuning service and emits updates via WebSocket + */ +function startBackgroundMonitoring(io: Server) { + // Already running + if (monitoringInterval) return + + // Poll immediately, then at regular intervals + checkJobStatuses(io) + + // Use global.setInterval to satisfy TypeScript without depending on DOM lib + // store as any to avoid NodeJS type issues in this repository's tsconfig + monitoringInterval = (setInterval(() => { + checkJobStatuses(io) + }, POLLING_INTERVAL) as unknown) as any +} + +/** + * Stop background monitoring + */ +function stopBackgroundMonitoring() { + if (!monitoringInterval) return + + clearInterval(monitoringInterval as any) + monitoringInterval = null +} + +/** + * Check status of all subscribed jobs and emit updates + */ +async function checkJobStatuses(io: Server) { + const jobIds = Array.from(jobSubscriptions.keys()) + + if (jobIds.length === 0) { + stopBackgroundMonitoring() + return + } + + // Keep routine checks quiet - debug level only + logger.debug(`[WS Status] Checking ${jobIds.length} subscribed jobs`) + + // Retrieve all subscribed jobs in parallel (non-blocking) + const promises = jobIds.map(async (jobId) => { + try { + const jobData = await finetuningService.retrieveFineTuningJob(jobId) + return { jobId, jobData, error: null } + } catch (error: any) { + logger.error(`[WS Status] Error retrieving job ${jobId}: ${error?.message || error}`) + return { jobId, jobData: null, error: error?.message || 'Failed to retrieve job' } + } + }) + + const results = await Promise.allSettled(promises) + + // Emit updates to subscribed clients + results.forEach((result) => { + if (result.status === 'rejected') { + logger.error(`[WS Status] Promise rejected: ${result.reason}`) + return + } + + const { jobId, jobData, error } = result.value + + // Get subscribers for this job + const subscribers = jobSubscriptions.get(jobId) + if (!subscribers || subscribers.size === 0) return + + if (error || !jobData) { + // Emit error to subscribers + subscribers.forEach(socketId => { + io.to(socketId).emit('job-status-error', { + jobId, + error: error || 'No data returned' + }) + }) + return + } + + // Normalize job data + const normalizedJob = { + id: jobData.id || jobData.job_id || jobData.fine_tuning_job_id || jobId, + name: jobData.name || jobData.id || jobId, + status: jobData.status || jobData.state || 'unknown', + model: jobData.model || 'N/A', + dataset: jobData.dataset || jobData.training_file || jobData.trainingFile || 'N/A', + createdDate: jobData.createdDate || jobData.created_at || jobData.createdAt || new Date().toISOString(), + // Include all original data + ...jobData + } + + // Emit update to all subscribers + subscribers.forEach(socketId => { + io.to(socketId).emit('job-status-update', normalizedJob) + }) + + // If job is no longer running, automatically unsubscribe after a delay + const finalStatuses = ['succeeded', 'completed', 'failed', 'cancelled', 'canceled'] + if (finalStatuses.includes((normalizedJob.status || '').toLowerCase())) { + // Delay cleanup slightly to allow any final events to be delivered + setTimeout(() => { + const subs = jobSubscriptions.get(jobId) + if (subs) { + jobSubscriptions.delete(jobId) + } + }, 10000) // Keep sending updates for 10 more seconds, then clean up + } + }) +} diff --git a/studio-frontend/packages/ui/public/silent-check-sso.html b/studio-frontend/packages/ui/public/silent-check-sso.html new file mode 100644 index 0000000..fb906ac --- /dev/null +++ b/studio-frontend/packages/ui/public/silent-check-sso.html @@ -0,0 +1,16 @@ + + + + Silent Check SSO + + + + + diff --git a/studio-frontend/packages/ui/src/KeycloakContext.jsx b/studio-frontend/packages/ui/src/KeycloakContext.jsx index 9753ee6..48b9001 100644 --- a/studio-frontend/packages/ui/src/KeycloakContext.jsx +++ b/studio-frontend/packages/ui/src/KeycloakContext.jsx @@ -1,51 +1,89 @@ import React, { createContext, useContext, useEffect, useState } from 'react'; -import Keycloak from 'keycloak-js'; // Create the Keycloak context const KeycloakContext = createContext(null); +// Check if Keycloak is disabled via environment variable +const isKeycloakDisabled = import.meta.env.VITE_DISABLE_KEYCLOAK === 'true'; +console.log('isKeycloakDisabled: ', isKeycloakDisabled); + +// Simple user object for when Keycloak is disabled +const createAdminUser = () => ({ + authenticated: true, + tokenParsed: { + email: 'admin@admin.com', + preferred_username: 'admin', + name: 'Admin User', + given_name: 'Admin', + family_name: 'User', + resource_access: { + genaistudio: { + roles: ['admin'] + } + } + }, + logout: () => { + console.log('Logout called - refreshing page'); + window.location.href = '/'; + } +}); + // Provide the Keycloak context to the application export const KeycloakProvider = ({ children }) => { const [keycloak, setKeycloak] = useState(null); const [isInitialized, setIsInitialized] = useState(false); useEffect(() => { - if (!window.crypto || !window.crypto.subtle) { - console.error("Web Crypto API is not available. This may cause security issues."); + // If Keycloak is disabled, use simple admin user + if (isKeycloakDisabled) { + console.info("Keycloak authentication is disabled. Using admin@admin.com as default user."); + const adminUser = createAdminUser(); + setKeycloak(adminUser); + setIsInitialized(true); + return; } - const initOptions = { - url: '/auth/', - realm: 'genaistudio', - clientId: 'genaistudio', - onLoad: 'login-required', // check-sso | login-required - responseType: 'code', // Corrected from KeycloakResponseType to responseType - silentCheckSsoRedirectUri: window.location.origin + "/silent-check-sso.html", - checkLoginIframe: false, - }; - - const kc = new Keycloak(initOptions); - - kc.init({ - onLoad: initOptions.onLoad, - responseType: 'code', // Corrected from KeycloakResponseType to responseType - }).then((auth) => { - if (!auth) { - window.location.reload(); - } else { - console.info("Authenticated"); - console.log('auth', auth); - console.log('Keycloak', kc); - - kc.onTokenExpired = () => { - console.log('token expired'); - }; - - setKeycloak(kc); // Set the Keycloak instance in state - setIsInitialized(true); // Mark initialization as complete + // Keycloak is enabled - dynamically import and initialize + import('keycloak-js').then((KeycloakModule) => { + const Keycloak = KeycloakModule.default; + + if (!window.crypto || !window.crypto.subtle) { + console.error("Web Crypto API is not available. This may cause security issues."); } - }).catch((error) => { - console.error("Authentication Failed", error); + + const initOptions = { + url: '/auth/', + realm: 'genaistudio', + clientId: 'genaistudio', + onLoad: 'login-required', + responseType: 'code', + silentCheckSsoRedirectUri: window.location.origin + "/silent-check-sso.html", + checkLoginIframe: false, + }; + + const kc = new Keycloak(initOptions); + + kc.init({ + onLoad: initOptions.onLoad, + responseType: 'code', + }).then((auth) => { + if (!auth) { + window.location.reload(); + } else { + console.info("Authenticated with Keycloak"); + console.log('auth', auth); + console.log('Keycloak', kc); + + kc.onTokenExpired = () => { + console.log('token expired'); + }; + + setKeycloak(kc); + setIsInitialized(true); + } + }).catch((error) => { + console.error("Authentication Failed", error); + }); }); }, []); diff --git a/studio-frontend/packages/ui/src/api/client.js b/studio-frontend/packages/ui/src/api/client.js index d2dd873..7606ec2 100644 --- a/studio-frontend/packages/ui/src/api/client.js +++ b/studio-frontend/packages/ui/src/api/client.js @@ -4,7 +4,7 @@ import { baseURL } from '@/store/constant' const apiClient = axios.create({ baseURL: `${baseURL}/api/v1`, headers: { - 'Content-type': 'application/json', + 'x-request-from': 'internal' } }) diff --git a/studio-frontend/packages/ui/src/api/finetuning.js b/studio-frontend/packages/ui/src/api/finetuning.js new file mode 100644 index 0000000..b3f4e4a --- /dev/null +++ b/studio-frontend/packages/ui/src/api/finetuning.js @@ -0,0 +1,299 @@ +import client from './client' +import { io } from 'socket.io-client' +import { baseURL as apiBaseURL } from '@/store/constant' + +// Get the base URL for WebSocket connection +const getSocketUrl = () => { + // Use the base URL from constants (without /api/v1) + return apiBaseURL || window.location.origin +} + +// Track active download sockets per jobId to avoid duplicate connections +const downloadSocketMap = new Map() + +const finetuningApi = { + // Upload training file + uploadFile: (file, purpose = 'fine-tune', onUploadProgress) => { + const formData = new FormData() + formData.append('file', file) + formData.append('purpose', purpose) + + return client.post('/finetuning/files', formData, { + // DO NOT set Content-Type here; letting axios set it ensures the multipart boundary is included + onUploadProgress + }) + }, + + // Create new fine-tuning job + createJob: (jobData) => { + const payload = { + training_file: jobData.training_file, + model: jobData.model + } + + // Add optional General configuration + if (jobData.General) { + payload.General = jobData.General + } + + // Add optional Dataset configuration + if (jobData.Dataset) { + payload.Dataset = jobData.Dataset + } + + // Add optional Training configuration + if (jobData.Training) { + payload.Training = jobData.Training + } + + return client.post('/finetuning/jobs', payload) + }, + + // List all fine-tuning jobs + getAllJobs: () => client.get('/finetuning/jobs'), + + // Retrieve specific fine-tuning job + getJob: (fineTuningJobId) => { + return client.post('/finetuning/jobs/retrieve', { + fine_tuning_job_id: fineTuningJobId + }) + }, + + // Cancel a fine-tuning job + cancelJob: (fineTuningJobId) => { + return client.post('/finetuning/jobs/cancel', { + fine_tuning_job_id: fineTuningJobId + }) + }, + + // Get logs for a fine-tuning job + getJobLogs: (fineTuningJobId, opts = {}) => { + return client.post('/finetuning/jobs/logs', { + fine_tuning_job_id: fineTuningJobId, + ray_job_id: opts.ray_job_id + }) + }, + + // Download fine-tuning job output as a zip file + // This returns a blob that can be saved as a file + // Accepts optional `onDownloadProgress` callback (progress event) and `signal` (AbortSignal) + downloadFinetuningOutput: (jobId, onDownloadProgress = undefined, signal = undefined) => { + const cfg = { + responseType: 'blob', + // allow long-running / large downloads + timeout: 0, + maxContentLength: Infinity, + maxBodyLength: Infinity + } + if (typeof onDownloadProgress === 'function') cfg.onDownloadProgress = onDownloadProgress + if (signal) cfg.signal = signal + return client.get(`/finetuning/download-ft/${encodeURIComponent(jobId)}`, cfg) + }, + + /** + * Download fine-tuning output using WebSocket for async zip preparation + * Each download gets its own dedicated WebSocket connection + * No timeout - waits indefinitely until zip is ready + * @param {string} jobId - The fine-tuning job ID + * @param {Object} callbacks - Callback functions { onProgress, onComplete, onError } + * @returns {Function} Cleanup function to disconnect this socket + */ + downloadFinetuningOutputWS: (jobId, callbacks = {}) => { + const { onProgress, onComplete, onError } = callbacks + + // Reuse existing socket for this jobId if present (even if not yet connected). + // This prevents duplicate sockets when React StrictMode mounts components twice. + const existingSocket = downloadSocketMap.get(jobId) + if (existingSocket) { + console.log(`[WS Download ${jobId}] Reusing existing socket (id: ${existingSocket.id || 'pending'})`) + // Attach provided callbacks to the existing socket + if (onProgress) existingSocket.on('download-finetuning-progress', onProgress) + if (onComplete) existingSocket.on('download-finetuning-complete', onComplete) + if (onError) existingSocket.on('download-finetuning-error', onError) + + // Return cleanup that detaches these listeners + return () => { + try { + if (onProgress) existingSocket.off('download-finetuning-progress', onProgress) + if (onComplete) existingSocket.off('download-finetuning-complete', onComplete) + if (onError) existingSocket.off('download-finetuning-error', onError) + } catch (e) {} + } + } + + // Connect specifically to the '/finetuning-download' namespace so server-side + // download handlers are isolated from status sockets. + const socketUrl = getSocketUrl() + + // Create dedicated socket for this download namespace + // Append the namespace to the URL so socket.io-client connects to it directly + const socket = io(`${socketUrl}/finetuning-download`, { + transports: ['websocket', 'polling'], + reconnection: true, + reconnectionDelay: 10000, + reconnectionAttempts: 5, + timeout: false + }) + + // Store socket for this job so future calls reuse it + try { downloadSocketMap.set(jobId, socket) } catch (e) {} + + // Handle connection + socket.on('connect', () => { + // Notify that the download socket is connected for this jobId + console.log(`[WS Download ${jobId}] Connected (socket id: ${socket.id})`) + // Request download preparation + socket.emit('download-finetuning-output', { jobId }) + }) + + // Handle completion + socket.on('download-finetuning-complete', (data) => { + console.log(`[WS Download ${jobId}] Complete`) + if (onComplete) onComplete(data) + // Disconnect after completion + try { socket.disconnect() } catch (e) {} + // remove from map + try { downloadSocketMap.delete(jobId) } catch (e) {} + }) + + // Handle errors + socket.on('download-finetuning-error', (data) => { + console.error(`[WS Download ${jobId}] Error:`, data) + if (onError) onError(data) + try { socket.disconnect() } catch (e) {} + try { downloadSocketMap.delete(jobId) } catch (e) {} + }) + + // Handle connection errors + socket.on('connect_error', (error) => { + console.error(`[WS Download ${jobId}] Connection error:`, error.message) + // Don't call onError for connection errors - let it retry + }) + + // Handle disconnect + socket.on('disconnect', (reason) => { + console.log(`[WS Download ${jobId}] Disconnected:`, reason) + try { downloadSocketMap.delete(jobId) } catch (e) {} + }) + + // Return cleanup function + return () => { + try { + if (onProgress) socket.off('download-finetuning-progress', onProgress) + if (onComplete) socket.off('download-finetuning-complete', onComplete) + if (onError) socket.off('download-finetuning-error', onError) + } catch (e) {} + try { + if (socket && socket.connected) { + console.log(`[WS Download ${jobId}] Manually disconnecting`) + socket.disconnect() + } + } catch (e) {} + try { downloadSocketMap.delete(jobId) } catch (e) {} + } + }, + + /** + * Subscribe to real-time job status updates via WebSocket + * Creates a dedicated WebSocket connection per job ID + * @param {string} jobId - Single job ID to monitor + * @param {Object} callbacks - Callback functions { onUpdate, onError, onConnected } + * @returns {Function} Cleanup function to disconnect + */ + subscribeToJobStatus: (jobId, callbacks = {}) => { + const { onUpdate, onError, onConnected } = callbacks + + if (!jobId) { + if (onError) onError({ error: 'No job ID provided' }) + return () => {} + } + + const socketUrl = getSocketUrl() + const socket = io(socketUrl, { + transports: ['websocket', 'polling'], + reconnection: true, + reconnectionDelay: 1000, + reconnectionAttempts: Infinity, + timeout: false + }) + + const FINAL_STATUSES = ['succeeded', 'completed', 'failed', 'cancelled', 'canceled'] + + socket.on('connect', () => { + console.log(`[WS] Connected for job ${jobId}`) + socket.emit('subscribe-job-status', { jobIds: [jobId] }) + }) + + socket.on('subscription-confirmed', (data) => { + if (onConnected) onConnected(data) + }) + + socket.on('subscription-error', (data) => { + if (onError) onError(data) + }) + + socket.on('job-status-update', (jobData) => { + // Only process updates for this specific job + if (jobData.id === jobId) { + if (onUpdate) onUpdate(jobData) + + // Check if job reached final status + const status = (jobData.status || '').toString().toLowerCase() + if (FINAL_STATUSES.includes(status)) { + // Auto-disconnect after final status + setTimeout(() => { + if (socket && socket.connected) { + socket.disconnect() + } + }, 1000) + } + } + }) + + socket.on('job-status-error', (err) => { + if (err.jobId === jobId && onError) { + onError(err) + } + }) + + socket.on('disconnect', () => { + console.log(`[WS] Disconnected for job ${jobId}`) + }) + + socket.on('connect_error', (error) => { + console.error(`[WS] Connection error for job ${jobId}:`, error.message) + }) + + // Return cleanup function + return () => { + if (socket && socket.connected) { + socket.emit('unsubscribe-job-status', { jobIds: [jobId] }) + socket.disconnect() + } + } + }, + + // Delete job API + deleteJob: (jobId) => { + // Call the backend delete endpoint which will cancel remote job and remove local DB records + return client.post('/finetuning/jobs/delete', { fine_tuning_job_id: jobId }) + }, + + // Get available base models (to be implemented on backend) + getBaseModels: () => { + // Return common models for now + return Promise.resolve({ + data: [ + 'meta-llama/Llama-2-7b-chat-hf', + 'meta-llama/Llama-2-7b-hf', + 'meta-llama/Llama-2-13b-hf', + 'BAAI/bge-reranker-large', + 'BAAI/bge-base-en-v1.5', + 'Qwen/Qwen2.5-3B', + 'Qwen/Qwen2.5-7B' + ] + }) + } +} + +export default finetuningApi \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/hooks/useApi.jsx b/studio-frontend/packages/ui/src/hooks/useApi.jsx index 932f0a6..8f594d9 100644 --- a/studio-frontend/packages/ui/src/hooks/useApi.jsx +++ b/studio-frontend/packages/ui/src/hooks/useApi.jsx @@ -10,6 +10,8 @@ export default (apiFunc) => { try { const result = await apiFunc(...args) setData(result.data) + // return the data so callers awaiting request() get the payload + return result.data } catch (err) { setError(err || 'Unexpected Error!') } finally { diff --git a/studio-frontend/packages/ui/src/hooks/useFineTuningDownloadSocket.js b/studio-frontend/packages/ui/src/hooks/useFineTuningDownloadSocket.js new file mode 100644 index 0000000..e69de29 diff --git a/studio-frontend/packages/ui/src/hooks/useWebSocketDownload.js b/studio-frontend/packages/ui/src/hooks/useWebSocketDownload.js new file mode 100644 index 0000000..e69de29 diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/Header/index.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/Header/index.jsx index f276535..5ddb510 100644 --- a/studio-frontend/packages/ui/src/layout/MainLayout/Header/index.jsx +++ b/studio-frontend/packages/ui/src/layout/MainLayout/Header/index.jsx @@ -5,8 +5,9 @@ import { useNavigate } from 'react-router-dom' // material-ui import { useTheme } from '@mui/material/styles' -import { Avatar, Box, ButtonBase, Switch } from '@mui/material' +import { Avatar, Box, ButtonBase, Switch, Typography, IconButton, useMediaQuery } from '@mui/material' import { styled } from '@mui/material/styles' +import MenuIcon from '@mui/icons-material/Menu' // project imports import LogoSection from '../LogoSection' @@ -87,9 +88,10 @@ const MaterialUISwitch = styled(Switch)(({ theme }) => ({ } })) -const Header = ({userId}) => { +const Header = ({userId, handleLeftDrawerToggle}) => { // console.log ('Header', userId) const theme = useTheme() + const matchDownMd = useMediaQuery(theme.breakpoints.down('md')) // const navigate = useNavigate() // const customization = useSelector((state) => state.customization) @@ -122,18 +124,39 @@ const Header = ({userId}) => { width: '100%', // Full width of the parent container }} > - {/* Logo Section */} + {/* Left Section - Mobile menu + Logo */} - + {/* Mobile menu button */} + {matchDownMd && handleLeftDrawerToggle && ( + + + + )} + + {/* Logo - always visible on mobile, hidden on desktop in header */} + + + + + {/* Desktop logo - hidden on mobile */} + diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/CollapsedMenuList/index.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/CollapsedMenuList/index.jsx new file mode 100644 index 0000000..8d24ca3 --- /dev/null +++ b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/CollapsedMenuList/index.jsx @@ -0,0 +1,104 @@ +import { forwardRef } from 'react' +import { Link } from 'react-router-dom' +import { useDispatch, useSelector } from 'react-redux' + +// material-ui +import { useTheme } from '@mui/material/styles' +import { Box, IconButton, Tooltip, useMediaQuery } from '@mui/material' + +// project imports +import { MENU_OPEN, SET_MENU } from '@/store/actions' +import config from '@/config' +import menuItem from '@/menu-items' + +// ==============================|| COLLAPSED SIDEBAR MENU LIST ||============================== // + +const CollapsedMenuList = () => { + const theme = useTheme() + const dispatch = useDispatch() + const customization = useSelector((state) => state.customization) + const matchesSM = useMediaQuery(theme.breakpoints.down('lg')) + + // Get all menu items + const getAllMenuItems = (items) => { + let allItems = [] + items.forEach(item => { + if (item.type === 'group' && item.children) { + item.children.forEach(child => { + if (child.type === 'item') { + allItems.push(child) + } + }) + } + }) + return allItems + } + + const menuItems = getAllMenuItems(menuItem.items) + + const itemHandler = (item) => { + dispatch({ type: MENU_OPEN, id: item.id }) + if (matchesSM) dispatch({ type: SET_MENU, opened: false }) + } + + const CollapsedNavItem = ({ item }) => { + const Icon = item.icon + const isSelected = customization.isOpen.findIndex((id) => id === item.id) > -1 + + let itemTarget = '_self' + if (item.target) { + itemTarget = '_blank' + } + + let linkProps = { + component: forwardRef(function CollapsedNavItemComponent(props, ref) { + return + }) + } + if (item?.external) { + linkProps = { component: 'a', href: item.url, target: itemTarget } + } + + return ( + + itemHandler(item)} + sx={{ + width: '40px', + height: '40px', + margin: '4px 0', + backgroundColor: isSelected ? theme.palette.action.selected : 'transparent', + color: isSelected ? theme.palette.primary.main : theme.palette.text.secondary, + '&:hover': { + backgroundColor: theme.palette.action.hover, + color: theme.palette.primary.main + }, + borderRadius: '8px' + }} + disabled={item.disabled} + > + {item.icon ? : null} + + + ) + } + + return ( + + {menuItems.map((item) => ( + + ))} + + ) +} + +export default CollapsedMenuList \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/NavGroup/index.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/NavGroup/index.jsx index 0625d54..f93ecfd 100644 --- a/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/NavGroup/index.jsx +++ b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/MenuList/NavGroup/index.jsx @@ -44,13 +44,11 @@ const NavGroup = ({ item }) => { ) } - sx={{ py: '20px' }} + sx={{ py: 0 }} > {items} - {/* group divider */} - ) } diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/index.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/index.jsx index efdda72..6ce5c01 100644 --- a/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/index.jsx +++ b/studio-frontend/packages/ui/src/layout/MainLayout/Sidebar/index.jsx @@ -2,7 +2,10 @@ import PropTypes from 'prop-types' // material-ui import { useTheme } from '@mui/material/styles' -import { Box, Drawer, useMediaQuery } from '@mui/material' +import { Box, Drawer, useMediaQuery, IconButton, Tooltip, Typography } from '@mui/material' +import ChevronLeftIcon from '@mui/icons-material/ChevronLeft' +import ChevronRightIcon from '@mui/icons-material/ChevronRight' +import MenuIcon from '@mui/icons-material/Menu' // third-party import PerfectScrollbar from 'react-perfect-scrollbar' @@ -10,8 +13,9 @@ import { BrowserView, MobileView } from 'react-device-detect' // project imports import MenuList from './MenuList' +import CollapsedMenuList from './MenuList/CollapsedMenuList' import LogoSection from '../LogoSection' -import { drawerWidth, headerHeight } from '@/store/constant' +import { drawerWidth, drawerWidthCollapsed, headerHeight } from '@/store/constant' // ==============================|| SIDEBAR DRAWER ||============================== // @@ -19,23 +23,86 @@ const Sidebar = ({ drawerOpen, drawerToggle, window }) => { const theme = useTheme() const matchUpMd = useMediaQuery(theme.breakpoints.up('md')) - const drawer = ( + // Desktop collapsed drawer content + const collapsedDrawer = ( + + {/* Sidebar icon when collapsed with tooltip */} + + + + + + + + + {/* Collapsed Menu Items */} + + + + + ) + + // Desktop expanded drawer content + const expandedDrawer = ( <> + {/* Header with GenAI Studio text and collapse button */} - - - + + GenAI Studio + + + + + + + + + {/* Menu content */} { - - - - - ) - const container = window !== undefined ? () => window.document.body : undefined - - return ( - - + - {drawer} - - + + GenAI Studio + + + + + + + {/* Mobile Menu content */} + + + + + ) + + const container = window !== undefined ? () => window.document.body : undefined + + return ( + <> + {/* Desktop Sidebar - Always present, changes width */} + {matchUpMd && ( + + {drawerOpen ? expandedDrawer : collapsedDrawer} + + )} + + {/* Mobile Sidebar - Overlay */} + {!matchUpMd && ( + + {mobileDrawer} + + )} + ) } diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/ViewHeader.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/ViewHeader.jsx index 9648888..cf39de8 100644 --- a/studio-frontend/packages/ui/src/layout/MainLayout/ViewHeader.jsx +++ b/studio-frontend/packages/ui/src/layout/MainLayout/ViewHeader.jsx @@ -24,17 +24,18 @@ const ViewHeader = ({ const theme = useTheme() return ( - + - + {isBackButton && ( diff --git a/studio-frontend/packages/ui/src/layout/MainLayout/index.jsx b/studio-frontend/packages/ui/src/layout/MainLayout/index.jsx index f53facc..491708f 100644 --- a/studio-frontend/packages/ui/src/layout/MainLayout/index.jsx +++ b/studio-frontend/packages/ui/src/layout/MainLayout/index.jsx @@ -4,12 +4,13 @@ import { Outlet } from 'react-router-dom' // material-ui import { styled, useTheme } from '@mui/material/styles' -import { AppBar, Box, Chip, CssBaseline, Toolbar, useMediaQuery } from '@mui/material' +import { AppBar, Box, Chip, CssBaseline, Toolbar, useMediaQuery, IconButton, Fab } from '@mui/material' +import ChevronRightIcon from '@mui/icons-material/ChevronRight' // project imports import Header from './Header' import Sidebar from './Sidebar' -import { drawerWidth, headerHeight } from '@/store/constant' +import { drawerWidth, drawerWidthCollapsed, headerHeight } from '@/store/constant' import { SET_MENU } from '@/store/actions' import {useKeycloak } from '../../KeycloakContext.jsx' @@ -17,43 +18,31 @@ import {useKeycloak } from '../../KeycloakContext.jsx' // styles const Main = styled('main', { shouldForwardProp: (prop) => prop !== 'open' })(({ theme, open }) => ({ ...theme.typography.mainContent, - ...(!open && { - backgroundColor: 'transparent', - borderBottomLeftRadius: 0, - borderBottomRightRadius: 0, - transition: theme.transitions.create('all', { - easing: theme.transitions.easing.sharp, - duration: theme.transitions.duration.leavingScreen - }), - marginRight: 0, - [theme.breakpoints.up('md')]: { - marginLeft: -drawerWidth, - width: `calc(100% - ${drawerWidth}px)` - }, - [theme.breakpoints.down('md')]: { - marginLeft: '20px', - width: `calc(100% - ${drawerWidth}px)`, - padding: '16px' - }, - [theme.breakpoints.down('sm')]: { - marginLeft: '10px', - width: `calc(100% - ${drawerWidth}px)`, - padding: '16px', - marginRight: '10px' - } + backgroundColor: 'transparent', + borderBottomLeftRadius: 0, + borderBottomRightRadius: 0, + transition: theme.transitions.create(['margin', 'width'], { + easing: theme.transitions.easing.sharp, + duration: theme.transitions.duration.leavingScreen }), - ...(open && { - backgroundColor: 'transparent', - transition: theme.transitions.create('all', { + marginRight: 0, + [theme.breakpoints.up('md')]: { + marginLeft: 0, + width: `calc(100% - ${open ? drawerWidth : drawerWidthCollapsed}px)`, + transition: theme.transitions.create(['margin', 'width'], { easing: theme.transitions.easing.easeOut, duration: theme.transitions.duration.enteringScreen }), + paddingLeft: '8px', + paddingRight: '8px', + paddingBottom: '8px', + paddingTop: '2px' + }, + [theme.breakpoints.down('md')]: { marginLeft: 0, - marginRight: 0, - borderBottomLeftRadius: 0, - borderBottomRightRadius: 0, - width: `calc(100% - ${drawerWidth}px)` - }) + width: '100%', + padding: '16px' + } })) // ==============================|| MAIN LAYOUT ||============================== // @@ -81,6 +70,7 @@ const MainLayout = () => { } useEffect(() => { + // On desktop, start with sidebar open; on mobile, keep it closed until user opens setTimeout(() => dispatch({ type: SET_MENU, opened: !matchDownMd }), 0) // eslint-disable-next-line react-hooks/exhaustive-deps }, [matchDownMd]) @@ -101,12 +91,12 @@ const MainLayout = () => { }} > -
+
{/* drawer */} - {/* */} + {/* main content */} (
diff --git a/studio-frontend/packages/ui/src/menu-items/dashboard.js b/studio-frontend/packages/ui/src/menu-items/dashboard.js index 6c7c3b1..ba88227 100644 --- a/studio-frontend/packages/ui/src/menu-items/dashboard.js +++ b/studio-frontend/packages/ui/src/menu-items/dashboard.js @@ -8,11 +8,13 @@ import { IconLock, IconRobot, IconVariable, - IconFiles + IconFiles, + IconApps, + IconBrain } from '@tabler/icons-react' // constant -const icons = { IconUsersGroup, IconHierarchy, IconBuildingStore, IconKey, IconTool, IconLock, IconRobot, IconVariable, IconFiles } +const icons = { IconUsersGroup, IconHierarchy, IconBuildingStore, IconKey, IconTool, IconLock, IconRobot, IconVariable, IconFiles, IconApps, IconBrain } // ==============================|| DASHBOARD MENU ITEMS ||============================== // @@ -23,27 +25,19 @@ const dashboard = { children: [ { id: 'opeaflows', - title: 'OPEA Flows', + title: 'Workflows', type: 'item', url: '/opeaflows', - icon: icons.IconHierarchy, - breakcrumbs: true + icon: icons.IconApps, + breadcrumbs: true }, { - id: 'sandbox', - title: 'OPEA Sandbox Evaluation', + id: 'finetuning', + title: 'Fine-tuning', type: 'item', - url: '/opeaflows', - icon: icons.IconHierarchy, - breakcrumbs: true - }, - { - id: 'opeadeployment', - title: 'OPEA Deployment', - type: 'item', - url: '/opeaflows', - icon: icons.IconHierarchy, - breakcrumbs: true + url: '/finetuning', + icon: icons.IconBrain, + breadcrumbs: true } // { // id: 'chatflows', diff --git a/studio-frontend/packages/ui/src/routes/MainRoutes.jsx b/studio-frontend/packages/ui/src/routes/MainRoutes.jsx index cd77b07..6b7a388 100644 --- a/studio-frontend/packages/ui/src/routes/MainRoutes.jsx +++ b/studio-frontend/packages/ui/src/routes/MainRoutes.jsx @@ -7,6 +7,9 @@ import Loadable from '@/ui-component/loading/Loadable' // chatflows routing const Opeaflows = Loadable(lazy(() => import('@/views/opeaflows'))) +// finetuning routing +const Finetuning = Loadable(lazy(() => import('@/views/finetuning'))) + // tracer routing const Tracer = Loadable(lazy(() => import('@/views/tracer'))) @@ -59,6 +62,10 @@ const MainRoutes = { path: '/opeaflows', element: }, + { + path: '/finetuning', + element: + }, { path:'/tracer/:ns', element: diff --git a/studio-frontend/packages/ui/src/store/constant.js b/studio-frontend/packages/ui/src/store/constant.js index de700eb..1143c56 100644 --- a/studio-frontend/packages/ui/src/store/constant.js +++ b/studio-frontend/packages/ui/src/store/constant.js @@ -1,6 +1,7 @@ // constant export const gridSpacing = 3 export const drawerWidth = 260 +export const drawerWidthCollapsed = 64 export const appDrawerWidth = 320 export const headerHeight = 80 export const maxScroll = 100000 diff --git a/studio-frontend/packages/ui/src/ui-component/button/AnimateButton.jsx b/studio-frontend/packages/ui/src/ui-component/button/AnimateButton.jsx index ce2d3fb..98b8852 100644 --- a/studio-frontend/packages/ui/src/ui-component/button/AnimateButton.jsx +++ b/studio-frontend/packages/ui/src/ui-component/button/AnimateButton.jsx @@ -5,7 +5,7 @@ import { motion, useCycle } from 'framer-motion' // ==============================|| ANIMATION BUTTON ||============================== // -const AnimateButton = forwardRef(function AnimateButton({ children, type, direction, offset, scale }, ref) { +const AnimateButton = forwardRef(function AnimateButton({ children, type = 'scale', direction = 'right', offset = 10, scale = { hover: 1, tap: 0.9 } }, ref) { let offset1 let offset2 switch (direction) { @@ -84,14 +84,6 @@ AnimateButton.propTypes = { scale: PropTypes.oneOfType([PropTypes.number, PropTypes.object]) } -AnimateButton.defaultProps = { - type: 'scale', - offset: 10, - direction: 'right', - scale: { - hover: 1, - tap: 0.9 - } -} +// defaults handled via function parameter defaults export default AnimateButton diff --git a/studio-frontend/packages/ui/src/ui-component/cards/MainCard.jsx b/studio-frontend/packages/ui/src/ui-component/cards/MainCard.jsx index 61bbf75..a295737 100644 --- a/studio-frontend/packages/ui/src/ui-component/cards/MainCard.jsx +++ b/studio-frontend/packages/ui/src/ui-component/cards/MainCard.jsx @@ -18,8 +18,9 @@ const MainCard = forwardRef(function MainCard( content = true, contentClass = '', contentSX = { - px: 2, - py: 0 + px: 3, + pt: 3, + pb: 3 }, darkTitle, secondary, @@ -39,8 +40,9 @@ const MainCard = forwardRef(function MainCard( ':hover': { boxShadow: boxShadow ? shadow || '0 2px 14px 0 rgb(32 40 45 / 8%)' : 'inherit' }, - maxWidth: '1280px', - mx: 'auto', + width: '100%', + mx: 0, + my: 0, ...sx }} > diff --git a/studio-frontend/packages/ui/src/ui-component/extended/Logo.jsx b/studio-frontend/packages/ui/src/ui-component/extended/Logo.jsx index e1a862d..153bb1f 100644 --- a/studio-frontend/packages/ui/src/ui-component/extended/Logo.jsx +++ b/studio-frontend/packages/ui/src/ui-component/extended/Logo.jsx @@ -15,8 +15,7 @@ const Logo = () => { Flowise ) diff --git a/studio-frontend/packages/ui/src/ui-component/extended/Transitions.jsx b/studio-frontend/packages/ui/src/ui-component/extended/Transitions.jsx index 4942dee..1b4ba51 100644 --- a/studio-frontend/packages/ui/src/ui-component/extended/Transitions.jsx +++ b/studio-frontend/packages/ui/src/ui-component/extended/Transitions.jsx @@ -6,7 +6,7 @@ import { Collapse, Fade, Box, Grow, Slide, Zoom } from '@mui/material' // ==============================|| TRANSITIONS ||============================== // -const Transitions = forwardRef(function Transitions({ children, position, type, direction, ...others }, ref) { +const Transitions = forwardRef(function Transitions({ children, position = 'top-left', type = 'grow', direction = 'up', ...others }, ref) { let positionSX = { transformOrigin: '0 0 0' } @@ -98,10 +98,6 @@ Transitions.propTypes = { direction: PropTypes.oneOf(['up', 'down', 'left', 'right']) } -Transitions.defaultProps = { - type: 'grow', - position: 'top-left', - direction: 'up' -} +// defaults handled via function parameter defaults export default Transitions diff --git a/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx b/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx index bcc415a..28703c4 100644 --- a/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx +++ b/studio-frontend/packages/ui/src/ui-component/table/FlowListTable.jsx @@ -367,7 +367,7 @@ export const FlowListTable = ({ data, images, isLoading, filterFunction, updateF setDeployWebSocketForId(id, wsInstance); wsInstance.onopen = () => { - console.log('[WebSocket] Connected for click deployment monitoring', id); + console.log('[WS] Connected for click deployment monitoring', id); wsInstance.send(JSON.stringify({ hostname: deploymentConfig.hostname, username: deploymentConfig.username, @@ -378,7 +378,7 @@ export const FlowListTable = ({ data, images, isLoading, filterFunction, updateF wsInstance.onmessage = (event) => { let data; try { data = JSON.parse(event.data); } catch { return; } - console.log('[WebSocket] Click deployment message:', data); + console.log('[WS] Click deployment message:', data); if (data.status === 'Success') { setDeployStatusForId(id, ['Success', data.message]); @@ -431,19 +431,19 @@ export const FlowListTable = ({ data, images, isLoading, filterFunction, updateF }; wsInstance.onerror = (error) => { - console.error('[WebSocket] Click deployment error:', error); + console.error('[WS] Click deployment error:', error); setDeployStatusForId(id, ['Error', 'Connection error during deployment monitoring']); wsInstance.close(); setDeployWebSocketForId(id, null); }; wsInstance.onclose = (event) => { - console.log(`[WebSocket] Click deployment closed: code=${event.code}, reason='${event.reason}', wasClean=${event.wasClean}`); + console.log(`[WS] Click deployment closed: code=${event.code}, reason='${event.reason}', wasClean=${event.wasClean}`); setDeployWebSocketForId(id, null); // Check deployment status if abnormal closure if (event.code !== 1000 && event.code !== 1001) { - console.log('[WebSocket] Abnormal closure detected, checking deployment status...'); + console.log('[WS] Abnormal closure detected, checking deployment status...'); setTimeout(async () => { try { const response = await chatflowsApi.getSpecificChatflow(id); @@ -617,11 +617,10 @@ export const FlowListTable = ({ data, images, isLoading, filterFunction, updateF {userRole === 'admin' && - + User diff --git a/studio-frontend/packages/ui/src/views/debuglogs/index.jsx b/studio-frontend/packages/ui/src/views/debuglogs/index.jsx index fb92fc2..0201e18 100644 --- a/studio-frontend/packages/ui/src/views/debuglogs/index.jsx +++ b/studio-frontend/packages/ui/src/views/debuglogs/index.jsx @@ -10,6 +10,7 @@ import { Paper, Button, Box, + Stack, Typography, Divider, Fade, @@ -127,17 +128,30 @@ export default function PodLogsView() { const selectedEventPod = podsData.pods.find(p => p.name === selectedPodEvents); return ( - // - - - {workflowName && ( - - Workflow name: {workflowName} - - )} - {/* Namespace: {podsData.namespace} */} - - + + + + + Debug Logs + + {workflowName && ( + + Workflow name: {workflowName} + + )} + + {/* Namespace: {podsData.namespace} */} + + Auto refresh: + + + ) : ( + + + {/* File Info */} + + + + + {uploadedFile.name} + + } + label="Selected" + color="primary" + size="small" + variant="outlined" + /> + + + + + + + + {formatFileSize(uploadedFile.size)} + + + {/* Upload Progress */} + {uploadProgress > 0 && uploadProgress < 100 && ( + + )} + + {/* File Preview */} + {preview && ( + + + Preview ({preview.totalLines} total lines): + + + + {preview.lines.join('\n')} + {preview.lines.length < preview.totalLines && '\n...'} + + + + )} + + + )} + + {/* Error Display */} + {(uploadError || error) && ( + } + > + {uploadError || error} + + )} + + ) +} + +FileUploadArea.propTypes = { + onFileUpload: PropTypes.func.isRequired, + acceptedTypes: PropTypes.arrayOf(PropTypes.string), + maxSizeMB: PropTypes.number, + error: PropTypes.string +} + +export default FileUploadArea \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx new file mode 100644 index 0000000..e9c9b13 --- /dev/null +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx @@ -0,0 +1,1345 @@ +import { useState } from 'react' +import PropTypes from 'prop-types' + +// material-ui +import { + Box, + Button, + Dialog, + DialogActions, + DialogContent, + DialogTitle, + TextField, + FormControl, + InputLabel, + Select, + MenuItem, + Checkbox, + FormControlLabel, + Typography, + Stack, + IconButton, + CircularProgress, +} from '@mui/material' +import Autocomplete from '@mui/material/Autocomplete' +import { useTheme } from '@mui/material/styles' + +// icons +import { IconX } from '@tabler/icons-react' + +// components +import FileUploadArea from './FileUploadArea' + +// API +import finetuningApi from '@/api/finetuning' + +const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { + const theme = useTheme() + + const [formData, setFormData] = useState({ + baseModel: '', + trainingDataset: null, + hf_token: '', + // OpenAI standard parameters + openai_params: { + n_epochs: 3, + batch_size: 16, + learning_rate_multiplier: 1.0, + prompt_loss_weight: 0.01 + }, + // Extended configuration + general: { + task: 'instruction_tuning', + output_dir: './tmp', + report_to: 'none', + save_strategy: 'no', + enable_gradient_checkpointing: false, + trust_remote_code: false + }, + dataset: { + max_length: 512, + block_size: 512, + validation_split_percentage: 5, + padding_side: 'right', + truncation_side: 'right', + max_source_length: 384, + pad_to_max: false, + query_max_len: 128, + passage_max_len: 128, + train_group_size: 8, + query_instruction_for_retrieval: '', + passage_instruction_for_retrieval: '', + reasoning_dataset_keys: ['Question', 'Complex_CoT', 'Response'], + // raw input string to preserve trailing commas/spaces while editing + reasoning_dataset_keys_input: 'Question, Complex_CoT, Response', + max_prompt_length: 512, + data_preprocess_type: 'neural_chat', + data_preprocess_neural_chat: true, + padding: 'true', + truncation: true, + mask_input: true, + mask_response: true + }, + training: { + optimizer: 'adamw_torch', + device: 'cpu', + batch_size: 2, + epochs: 1, + max_train_steps: null, + learning_rate: 5.0e-5, + lr_scheduler: 'linear', + weight_decay: 0.0, + num_training_workers: 1, + accelerate_mode: 'DDP', + mixed_precision: 'no', + gradient_accumulation_steps: 1, + logging_steps: 10, + dpo_beta: 0.1 + , + // Embedding-specific training config (only used when task === 'embedding') + embedding_training_config: { + + temperature: 0.02, + sentence_pooling_method: 'cls', + normalized: true, + use_inbatch_neg: true + } + }, + lora: { + r: 8, + lora_alpha: 32, + lora_dropout: 0.1, + task_type: 'CAUSAL_LM' + } + }) + + const [errors, setErrors] = useState({}) + const [isSubmitting, setIsSubmitting] = useState(false) + + const [loraEnabled, setLoraEnabled] = useState(false) + + + const baseModels = [ + 'meta-llama/Llama-2-7b-chat-hf', + 'meta-llama/Llama-2-7b-hf', + 'meta-llama/Llama-2-13b-hf', + 'BAAI/bge-reranker-large', + 'BAAI/bge-base-en-v1.5', + 'Qwen/Qwen2.5-3B', + 'Qwen/Qwen2.5-7B' + ] + + const taskTypes = [ + { value: 'instruction_tuning', label: 'Instruction Tuning' }, + { value: 'rerank', label: 'Reranking' }, + { value: 'embedding', label: 'Embedding' }, + { value: 'pretraining', label: 'Pretraining' }, + { value: 'dpo', label: 'Direct Preference Optimization (DPO)' }, + { value: 'reasoning', label: 'Reasoning' } + ] + + const handleInputChange = (field, value) => { + setFormData(prev => ({ + ...prev, + [field]: value + })) + + // Clear error for this field + if (errors[field]) { + setErrors(prev => ({ + ...prev, + [field]: null + })) + } + } + + const handleOpenAIParamChange = (param, value) => { + setFormData(prev => ({ + ...prev, + openai_params: { + ...prev.openai_params, + [param]: value + } + })) + } + + const handleConfigChange = (section, param, value) => { + setFormData(prev => ({ + ...prev, + [section]: { + ...prev[section], + [param]: value + } + })) + } + + // When a file is selected in FileUploadArea, just store the File object locally. + // The actual upload to the server will happen when the user clicks Create Job. + const handleFileUpload = (fileType, file) => { + if (!file) { + setFormData(prev => ({ + ...prev, + [fileType]: null + })) + return + } + + // Store the raw File object and its name; do not upload now + const fileEntry = { + file, + name: file.name + } + + setFormData(prev => ({ + ...prev, + [fileType]: fileEntry + })) + + // Clear any previous error for this field + if (errors[fileType]) { + setErrors(prev => ({ + ...prev, + [fileType]: null + })) + } + } + + const validateForm = () => { + const newErrors = {} + + // Base validation + if (!formData.baseModel) { + newErrors.baseModel = 'Base model is required' + } + + if (!formData.trainingDataset) { + newErrors.trainingDataset = 'Training dataset is required' + } + + // OpenAI parameters validation + if (formData.openai_params.learning_rate_multiplier <= 0) { + newErrors.learning_rate_multiplier = 'Learning rate multiplier must be greater than 0' + } + + if (formData.openai_params.batch_size <= 0) { + newErrors.batch_size = 'Batch size must be greater than 0' + } + + if (formData.openai_params.n_epochs <= 0) { + newErrors.n_epochs = 'Number of epochs must be greater than 0' + } + + // Training parameters validation + if (formData.training.learning_rate <= 0) { + newErrors.learning_rate = 'Learning rate must be greater than 0' + } + + if (formData.training.epochs <= 0) { + newErrors.epochs = 'Epochs must be greater than 0' + } + + if (formData.training.logging_steps <= 0) { + newErrors.logging_steps = 'Logging steps must be greater than 0' + } + + // General validation + if (!formData.general.output_dir) { + newErrors.output_dir = 'Output directory is required' + } + + // Dataset validation + if (formData.dataset.max_length <= 0) { + newErrors.dataset_max_length = 'Max length must be greater than 0' + } + + // LoRA parameters validation (only when enabled) + if (loraEnabled) { + if (formData.lora.r <= 0) { + newErrors.lora_r = 'LoRA rank must be greater than 0' + } + + if (formData.lora.lora_alpha <= 0) { + newErrors.lora_alpha = 'LoRA alpha must be greater than 0' + } + + if (formData.lora.lora_dropout < 0 || formData.lora.lora_dropout > 1) { + newErrors.lora_dropout = 'LoRA dropout must be between 0 and 1' + } + } + + setErrors(newErrors) + return Object.keys(newErrors).length === 0 + } + + const handleSubmit = async () => { + if (!validateForm()) { + return + } + + setIsSubmitting(true) + + try { + // Create the job configuration payload + // Build General object and set lora_config based on the LoRA checkbox + const generalPayload = { ...formData.general } + // If user enabled LoRA, include the object; otherwise send explicit null + generalPayload.lora_config = loraEnabled ? formData.lora : null + + // If the user selected a file but hasn't uploaded it yet, upload it now + let trainingFileName = formData.trainingDataset?.uploadedName || null + if (formData.trainingDataset && formData.trainingDataset.file) { + try { + setIsSubmitting(true) + const uploadResp = await finetuningApi.uploadFile(formData.trainingDataset.file, 'fine-tune', () => {}) + trainingFileName = uploadResp.data?.filename || null + } catch (err) { + console.error('Error uploading training file before job creation:', err) + setErrors(prev => ({ ...prev, trainingDataset: 'Failed to upload training file: ' + (err.message || 'Unknown') })) + setIsSubmitting(false) + return + } + } + + // Build payload + const jobPayload = { + model: formData.baseModel, + training_file: trainingFileName + } + + // General configuration with LoRA config + const gen = { ...formData.general } + gen.lora_config = loraEnabled ? formData.lora : null + gen.config = gen.config || {} + if (formData.hf_token) { + gen.config.token = formData.hf_token + } + jobPayload.General = gen + jobPayload.task = gen.task || 'instruction_tuning' + + // Dataset configuration + jobPayload.Dataset = { + max_length: formData.dataset.max_length, + block_size: formData.dataset.block_size, + max_source_length: formData.dataset.max_source_length, + padding_side: formData.dataset.padding_side, + truncation_side: formData.dataset.truncation_side, + padding: formData.dataset.padding, + truncation: formData.dataset.truncation, + mask_input: formData.dataset.mask_input, + mask_response: formData.dataset.mask_response, + query_max_len: formData.dataset.query_max_len, + passage_max_len: formData.dataset.passage_max_len, + train_group_size: formData.dataset.train_group_size, + query_instruction_for_retrieval: formData.dataset.query_instruction_for_retrieval, + passage_instruction_for_retrieval: formData.dataset.passage_instruction_for_retrieval, + pad_to_max: formData.dataset.pad_to_max, + data_preprocess_type: formData.dataset.data_preprocess_neural_chat ? 'neural_chat' : null + } + + // Training configuration + jobPayload.Training = { + optimizer: formData.training.optimizer, + device: formData.training.device, + batch_size: formData.training.batch_size, + epochs: formData.training.epochs, + max_train_steps: formData.training.max_train_steps, + learning_rate: formData.training.learning_rate, + lr_scheduler: formData.training.lr_scheduler, + weight_decay: formData.training.weight_decay, + num_training_workers: formData.training.num_training_workers, + accelerate_mode: formData.training.accelerate_mode, + mixed_precision: formData.training.mixed_precision, + gradient_accumulation_steps: formData.training.gradient_accumulation_steps, + logging_steps: formData.training.logging_steps, + // embedding_training_config will be attached below only for embedding task + dpo_beta: formData.training.dpo_beta + } + + // If embedding task, attach embedding_training_config + if (jobPayload.task === 'embedding') { + jobPayload.Training.embedding_training_config = formData.training.embedding_training_config + } + + // Call the actual API + const response = await finetuningApi.createJob(jobPayload) + + // Create job object from response + const newJob = { + id: response.data?.id || response.data?.fine_tuning_job_id || Date.now().toString(), + status: response.data?.status || 'pending', + model: formData.baseModel, + task: jobPayload.task || 'instruction_tuning', + dataset: formData.trainingDataset?.suffixedName || formData.trainingDataset?.name || 'Unknown', + progress: '0%', + createdDate: response.data?.created_at || new Date().toISOString(), + training_file: jobPayload.training_file, + openai_params: formData.openai_params, + training: formData.training, + general: formData.general, + dataset_config: formData.dataset + } + + if (formData.hf_token) { + newJob.general = { ...newJob.general, config: { ...(newJob.general.config || {}), token: formData.hf_token } } + } + + onJobCreated(newJob) + handleClose() + } catch (error) { + console.error('Error creating fine-tuning job:', error) + // TODO: Show error notification + } finally { + setIsSubmitting(false) + } + } + + const handleClose = () => { + setFormData({ + baseModel: '', + trainingDataset: null, + hf_token: '', + // OpenAI standard parameters + openai_params: { + n_epochs: 3, + batch_size: 16, + learning_rate_multiplier: 1.0, + prompt_loss_weight: 0.01 + }, + // Extended configuration + general: { + task: 'instruction_tuning', + output_dir: './tmp', + report_to: 'none', + save_strategy: 'no', + enable_gradient_checkpointing: false, + trust_remote_code: false + }, + dataset: { + max_length: 512, + block_size: 512, + validation_split_percentage: 5, + padding_side: 'right', + truncation_side: 'right', + max_source_length: 384, + pad_to_max: false, + query_max_len: 128, + passage_max_len: 128, + train_group_size: 8, + query_instruction_for_retrieval: '', + passage_instruction_for_retrieval: '', + reasoning_dataset_keys: ['Question', 'Complex_CoT', 'Response'], + reasoning_dataset_keys_input: 'Question, Complex_CoT, Response', + max_prompt_length: 512, + data_preprocess_type: 'neural_chat', + data_preprocess_neural_chat: true, + padding: 'true', + truncation: true, + mask_input: true, + mask_response: true + }, + training: { + optimizer: 'adamw_torch', + device: 'cpu', + batch_size: 2, + epochs: 1, + max_train_steps: null, + learning_rate: 5.0e-5, + lr_scheduler: 'linear', + weight_decay: 0.0, + num_training_workers: 1, + accelerate_mode: 'DDP', + mixed_precision: 'no', + gradient_accumulation_steps: 1, + logging_steps: 10, + dpo_beta: 0.1 + , + embedding_training_config: { + temperature: 0.02, + sentence_pooling_method: 'cls', + normalized: true, + use_inbatch_neg: true + } + }, + lora: { + r: 8, + lora_alpha: 32, + lora_dropout: 0.1, + task_type: 'CAUSAL_LM' + } + }) + setLoraEnabled(false) + setErrors({}) + setIsSubmitting(false) + onClose() + } + + return ( + + + + Create New Fine-tuning Job + + + + {/* Close button moved out of title: absolutely positioned within the dialog Paper */} + + + + + + + {/* Left Column: Model & Task Setup */} + + + {/* Base Model */} + + handleInputChange('baseModel', newValue || '')} + onInputChange={(event, newInputValue) => handleInputChange('baseModel', newInputValue)} + renderInput={(params) => ( + + )} + /> + {errors.baseModel && ( + + {errors.baseModel} + + )} + + + {/* HF Token */} + handleInputChange('hf_token', e.target.value)} + fullWidth + size="medium" + sx={{ '& .MuiInputBase-root': { minHeight: 48 } }} + /> + + {/* Task Type */} + + Task Type + + + + {/* Inline Instruction Tuning config shown right under Task Type */} + {formData.general.task === 'instruction_tuning' && ( + + + {/* 2-column responsive CSS grid for short-value fields */} + + + handleConfigChange('dataset', 'max_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'block_size', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'max_source_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'max_prompt_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + + handleConfigChange('dataset', 'padding_side', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'truncation_side', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'padding', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'truncation', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'mask_input', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'mask_response', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'data_preprocess_neural_chat', e.target.checked)} + /> + } + label="Use neural_chat for data preprocess type" + size="small" + sx={{ mt: 0 }} + /> + + + )} + + {/* Reasoning task dataset config (mirrors instruction tuning controls) */} + {formData.general.task === 'reasoning' && ( + + + {/* Comma-separated keys field that maps to array */} + { + const raw = e.target.value + // update raw input so trailing separators are preserved while typing + handleConfigChange('dataset', 'reasoning_dataset_keys_input', raw) + // allow comma or whitespace as separators to derive the array + const arr = raw.split(/[,\s]+/).map(s => s.trim()).filter(Boolean) + handleConfigChange('dataset', 'reasoning_dataset_keys', arr) + }} + size="small" + fullWidth + /> + + {/* Numeric fields: inline+scroll on small screens, 3-column fluid layout on md+ (no scrollbar) */} + + + handleConfigChange('dataset', 'max_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'max_source_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'max_prompt_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + {/* 2-column responsive CSS grid for short-value fields */} + + + handleConfigChange('dataset', 'padding_side', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + handleConfigChange('dataset', 'truncation_side', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + handleConfigChange('dataset', 'padding', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + handleConfigChange('dataset', 'truncation', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + handleConfigChange('dataset', 'mask_input', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + handleConfigChange('dataset', 'mask_response', e.target.value)} + size="small" + sx={{ width: '100%', maxWidth: 240 }} + /> + + + + + )} + + {/* Pretraining task dataset config: minimal fields (max_length, truncation, padding) */} + {formData.general.task === 'pretraining' && ( + + + + + handleConfigChange('dataset', 'max_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'block_size', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'padding', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + handleConfigChange('dataset', 'truncation', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + + + )} + + {/* Rerank task dataset config */} + {formData.general.task === 'rerank' && ( + + + + + handleConfigChange('dataset', 'max_length', e.target.value)} + size="small" + fullWidth + /> + + + handleConfigChange('dataset', 'train_group_size', e.target.value)} + size="small" + fullWidth + /> + + + + + )} + + {/* Embedding task dataset config */} + {formData.general.task === 'embedding' && ( + + + handleConfigChange('dataset', 'query_instruction_for_retrieval', e.target.value)} + size="small" + fullWidth + /> + + handleConfigChange('dataset', 'passage_instruction_for_retrieval', e.target.value)} + size="small" + fullWidth + /> + + + + handleConfigChange('dataset', 'query_max_len', e.target.value)} + size="small" + fullWidth + /> + + + handleConfigChange('dataset', 'passage_max_len', e.target.value)} + size="small" + fullWidth + /> + + + + handleConfigChange('dataset', 'padding', e.target.value)} + size="small" + fullWidth + /> + + handleConfigChange('dataset', 'train_group_size', e.target.value)} + size="small" + fullWidth + /> + + + )} + + {/* DPO task dataset config: max_length, max_prompt_length, pad_to_max */} + {formData.general.task === 'dpo' && ( + + + + + handleConfigChange('dataset', 'max_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'max_prompt_length', e.target.value)} + size="small" + sx={{ width: '100%' }} + fullWidth + /> + + + + handleConfigChange('dataset', 'pad_to_max', e.target.value)} + size="small" + sx={{ width: '100%' }} + /> + + + + + )} + + + + {/* Right Column: Training Dataset & Training Parameters */} + + + {/* Training Dataset Upload */} + + handleFileUpload('trainingDataset', file)} + acceptedTypes={['.json', '.jsonl', '.csv']} + maxSizeMB={100} + error={errors.trainingDataset} + /> + + + {/* Training Parameters */} + + + {/* compact grid similar to task-type configs */} + + + handleConfigChange('training', 'epochs', parseInt(e.target.value))} + error={!!errors.epochs} + inputProps={{ min: 1, step: 1 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'batch_size', parseInt(e.target.value))} + error={!!errors.batch_size} + inputProps={{ min: 1, max: 256, step: 1 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + + + + handleConfigChange('training', 'learning_rate', parseFloat(e.target.value))} + error={!!errors.learning_rate} + inputProps={{ min: 0.00001, max: 0.01, step: 0.00001 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'max_train_steps', e.target.value ? parseInt(e.target.value) : null)} + inputProps={{ min: 1, step: 1 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + + + + handleConfigChange('training', 'optimizer', e.target.value)} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'gradient_accumulation_steps', parseInt(e.target.value))} + inputProps={{ min: 1, step: 1 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + + + + + Device + + + + + + Mixed Precision + + + + + + Accelerate Mode + + + + + + + + handleConfigChange('training', 'weight_decay', parseFloat(e.target.value))} + inputProps={{ min: 0, step: 0.01 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'logging_steps', parseInt(e.target.value))} + error={!!errors.logging_steps} + inputProps={{ min: 1, step: 1 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'lr_scheduler', e.target.value)} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + + + + {}} + InputProps={{ readOnly: true }} + inputProps={{ min: 1, step: 1, 'aria-readonly': true }} + disabled + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + handleConfigChange('training', 'dpo_beta', parseFloat(e.target.value))} + inputProps={{ min: 0, step: 0.01 }} + size="small" + fullWidth + sx={{ '& .MuiInputBase-root': { minHeight: 40 } }} + /> + + + + {formData.general.task === 'embedding' ? ( + + + + handleConfigChange('training', 'embedding_training_config', { + ...formData.training.embedding_training_config, + temperature: e.target.value === '' ? null : parseFloat(e.target.value) + })} + inputProps={{ step: 0.01 }} + size="small" + fullWidth + /> + + + handleConfigChange('training', 'embedding_training_config', { + ...formData.training.embedding_training_config, + sentence_pooling_method: e.target.value + })} + size="small" + fullWidth + /> + + + + handleConfigChange('training', 'embedding_training_config', { + ...formData.training.embedding_training_config, + normalized: e.target.checked + })} + /> + } + label="Normalized embeddings" + /> + + handleConfigChange('training', 'embedding_training_config', { + ...formData.training.embedding_training_config, + use_inbatch_neg: e.target.checked + })} + /> + } + label="Use in-batch negatives" + /> + + ) : null } + + + + + + + + + + + + + ) +} + +FinetuningJobModal.propTypes = { + open: PropTypes.bool.isRequired, + onClose: PropTypes.func.isRequired, + onJobCreated: PropTypes.func.isRequired +} + +export default FinetuningJobModal \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx new file mode 100644 index 0000000..7137603 --- /dev/null +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx @@ -0,0 +1,782 @@ +import { useState, useMemo, useEffect, useRef } from 'react' +import { useSelector } from 'react-redux' +import PropTypes from 'prop-types' + +// material-ui +import { + Box, + Button, + Chip, + LinearProgress, + Paper, + Stack, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + TableSortLabel, + Typography, + IconButton, + Tooltip, + Menu, + MenuItem, + Dialog, + DialogTitle, + DialogContent, + DialogActions +} from '@mui/material' +import { useTheme, styled } from '@mui/material/styles' +import { tableCellClasses } from '@mui/material/TableCell' +import { CircularProgress } from '@mui/material' + +// icons +import { IconDots, IconEye, IconTrash, IconDownload, IconPlayerStop } from '@tabler/icons-react' + +// API +import finetuningApi from '@/api/finetuning' + +// utils - format created date as 'MonthName DayOrdinal, Year' e.g. 'September 4th, 2025' +const formatDate = (date) => { + if (!date) return 'Unknown' + let dt + try { + if (typeof date === 'number') { + dt = date < 1e12 ? new Date(date * 1000) : new Date(date) + } else if (typeof date === 'string' && /^\d+$/.test(date)) { + const n = parseInt(date, 10) + dt = n < 1e12 ? new Date(n * 1000) : new Date(n) + } else { + dt = new Date(date) + } + if (isNaN(dt.getTime())) return 'Unknown' + + const month = dt.toLocaleString('default', { month: 'long' }) + const day = dt.getDate() + const year = dt.getFullYear() + + const ordinal = (n) => { + const s = ["th", "st", "nd", "rd"] + const v = n % 100 + return s[(v - 20) % 10] || s[v] || s[0] + } + + return `${month} ${day}${ordinal(day)}, ${year}` + } catch (e) { + return 'Unknown' + } +} + +const StyledTableCell = styled(TableCell)(({ theme }) => ({ + borderColor: theme.palette.grey[900] + 25, + + [`&.${tableCellClasses.head}`]: { + color: theme.palette.grey[900] + }, + [`&.${tableCellClasses.body}`]: { + fontSize: 14, + height: 64 + } +})) + +const StyledTableRow = styled(TableRow)(() => ({ + '&:last-child td, &:last-child th': { + border: 0 + } +})) + +const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filterFunction = null }) => { + const theme = useTheme() + const customization = useSelector((state) => state.customization) + // sorting + const [order, setOrder] = useState('asc') + const [orderBy, setOrderBy] = useState('createdDate') + const handleRequestSort = (property) => { + const isAsc = orderBy === property && order === 'asc' + setOrder(isAsc ? 'desc' : 'asc') + setOrderBy(property) + } + // sorted data + const sortedData = useMemo(() => { + if (!data) return [] + return [...data].sort((a, b) => { + let cmp = 0 + if (orderBy === 'id') { + cmp = String(a.id).localeCompare(String(b.id)) + } else if (orderBy === 'createdDate') { + cmp = new Date(a.createdDate) - new Date(b.createdDate) + } else { + cmp = 0 + } + return order === 'asc' ? cmp : -cmp + }) + }, [data, order, orderBy]) + const [anchorEl, setAnchorEl] = useState(null) + const [selectedJob, setSelectedJob] = useState(null) + const [actionLoading, setActionLoading] = useState(false) + // Track multiple concurrent downloads: { [jobId]: { progress: number } } + const [downloadingJobs, setDownloadingJobs] = useState({}) + const [downloadDialogOpen, setDownloadDialogOpen] = useState(false) + const [downloadProgress, setDownloadProgress] = useState(0) + const [detailsOpen, setDetailsOpen] = useState(false) + const [detailsData, setDetailsData] = useState(null) + const [logsOpen, setLogsOpen] = useState(false) + const [logsData, setLogsData] = useState('') + const [logsLoading, setLogsLoading] = useState(false) + const logsContainerRef = useRef(null) + + // Auto-refresh logs every 3 seconds when logs dialog is open + useEffect(() => { + if (!logsOpen || !selectedJob) return + + const fetchLogs = async () => { + try { + const response = await finetuningApi.getJobLogs(selectedJob.id) + const body = response.data + // Support two shapes: { logs: string } or raw string + if (body && typeof body === 'object' && 'logs' in body) { + // If the service provided an error message, prefer showing that when logs are empty + const logsStr = body.logs || '' + if (!logsStr && body.error) { + setLogsData(`Error: ${body.error}`) + } else { + setLogsData(normalizeLogs(logsStr)) + } + } else if (typeof body === 'string') { + setLogsData(normalizeLogs(body)) + } else { + setLogsData(JSON.stringify(body, null, 2)) + } + } catch (error) { + console.error('Error auto-refreshing logs:', error) + } + } + + // Initial fetch when dialog opens + fetchLogs() + + // Set up interval for auto-refresh every 5 seconds + const intervalId = setInterval(fetchLogs, 5000) + + return () => clearInterval(intervalId) + }, [logsOpen, selectedJob]) + + // When logs dialog opens or logsData changes, scroll to bottom + useEffect(() => { + if (!logsOpen) return + // scroll after next paint to ensure content is rendered + const id = setTimeout(() => { + try { + const el = logsContainerRef.current + if (el) { + el.scrollTop = el.scrollHeight + } + } catch (e) { + // ignore + } + }, 0) + return () => clearTimeout(id) + }, [logsOpen, logsData]) + + const handleMenuClick = (event, job) => { + setAnchorEl(event.currentTarget) + setSelectedJob(job) + } + + const handleMenuClose = () => { + setAnchorEl(null) + setSelectedJob(null) + } + + const handleCancelJob = async () => { + if (!selectedJob) return + + setActionLoading(true) + try { + await finetuningApi.cancelJob(selectedJob.id) + handleMenuClose() + if (onRefresh) onRefresh() + } catch (error) { + console.error('Error canceling job:', error) + alert('Failed to cancel job: ' + (error.message || 'Unknown error')) + } finally { + setActionLoading(false) + } + } + + const handleDownloadFinetuningOutput = async (job) => { + if (!job) { + alert('Job is required') + return + } + + const id = String(job.id) + setDownloadProgress(0) + // mark this job as preparing; show dialog (user can close dialog without cancelling) + setDownloadingJobs((prev) => ({ ...(prev || {}), [id]: { progress: 0 } })) + setDownloadDialogOpen(true) + + // Persist pending download so we can recover on page refresh + try { + const pending = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + if (!pending.includes(id)) { + pending.push(id) + sessionStorage.setItem('ft_pending_downloads', JSON.stringify(pending)) + } + } catch (e) { + // ignore sessionStorage errors + } + + // Use WebSocket-based download for non-blocking zip creation + const cleanup = finetuningApi.downloadFinetuningOutputWS(job.id, { + onProgress: (data) => { + // Update UI to show preparation is in progress + setDownloadingJobs((prev) => ({ + ...(prev || {}), + [id]: { progress: 0, status: data.status, message: data.message } + })) + }, + onComplete: async (data) => { + // File is ready - trigger native browser download + // No authentication needed (endpoint is whitelisted) + const downloadUrl = data.downloadUrl || `/api/v1/finetuning/download-ft/${job.id}` + console.log('Starting native browser download:', downloadUrl) + + // Use window.location.href to trigger native browser download + // Browser will show download in download manager with progress bar + window.location.href = downloadUrl + + // Remove from pending list + try { + const pending = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + const filtered = (pending || []).filter((x) => x !== id) + sessionStorage.setItem('ft_pending_downloads', JSON.stringify(filtered)) + } catch (e) {} + + // Mark this job finished and close dialog + setDownloadingJobs((prev) => ({ ...(prev || {}), [id]: { progress: 100 } })) + setDownloadProgress(100) + setTimeout(() => { + setDownloadingJobs((prev) => { + const copy = { ...(prev || {}) } + delete copy[id] + return copy + }) + setDownloadDialogOpen(false) + }, 800) + }, + onError: (data) => { + console.error('Download preparation error:', data) + alert('Failed to prepare download: ' + (data.error || 'Unknown error')) + // Remove from pending list + try { + const pending = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + const filtered = (pending || []).filter((x) => x !== id) + sessionStorage.setItem('ft_pending_downloads', JSON.stringify(filtered)) + } catch (e) {} + + // Clear downloading state + setDownloadingJobs((prev) => { + const copy = { ...(prev || {}) } + delete copy[id] + return copy + }) + setDownloadProgress(0) + setActionLoading(false) + setDownloadDialogOpen(false) + } + }) + + // Store cleanup function to allow cancellation if needed + // (optional enhancement: you could add a cancel button to call this) + window._ftDownloadCleanup = cleanup + } + + const handleViewLogs = async (jobArg = null) => { + const jobToUse = jobArg || selectedJob + if (!jobToUse) return + + // ensure selectedJob is set for downstream operations + setSelectedJob(jobToUse) + + // Clear any existing logs data and show loading + setLogsLoading(true) + + // Open the dialog - the auto-refresh effect will handle fetching logs + setLogsOpen(true) + // Close the menu but keep selectedJob set so auto-refresh can use it + setAnchorEl(null) + + // Stop loading indicator after a brief moment as auto-refresh takes over + setTimeout(() => setLogsLoading(false), 500) + } + + // Normalize logs string: + // - decode common escaped sequences ("\\n", "\\r", "\\t", "\\uXXXX", "\\xHH") + // - convert escaped ESC sequences into the real ESC character + // - strip ANSI escape sequences (colors / CSI sequences) + // - remove C0 control chars except newline, carriage return and tab + const normalizeLogs = (raw) => { + if (!raw && raw !== 0) return '' + try { + let s = String(raw) + + // Iteratively decode escaped sequences up to a safe depth (handles double-escaped strings) + for (let i = 0; i < 6; i++) { + const prev = s + // common escapes + s = s.replace(/\\r/g, '\r').replace(/\\n/g, '\n').replace(/\\t/g, '\t') + // hex and unicode escapes + s = s.replace(/\\x([0-9A-Fa-f]{2})/g, (_, h) => String.fromCharCode(parseInt(h, 16))) + s = s.replace(/\\u([0-9A-Fa-f]{4})/g, (_, h) => String.fromCharCode(parseInt(h, 16))) + // octal-ish common sequence for ESC + s = s.replace(/\\0?33/g, '\x1b') + if (s === prev) break + } + + // Replace any textual \u001b or \x1b left as two-char sequences with actual ESC char + s = s.replace(/\\u001b/g, '\x1b').replace(/\\x1b/g, '\x1b') + // Also replace literal textual backslash-u forms that may have survived + s = s.replace(/\u001b/g, '\x1b') + + // Now convert the string '\x1b' into the real ESC character + s = s.replace(/\\x1b/g, '\x1b') + s = s.replace(/\x1b/g, '\x1b') + // If we have the two-character sequence \x1b, coerce into the actual character + s = s.replace(/\\x1b/g, '\x1b') + // Best-effort: turn any remaining textual '\u001b' into the ESC char + s = s.replace(/\\u001b/g, '\x1b') + + // Finally replace the textual token '\x1b' with the actual ESC character + s = s.replace(/\\x1b/g, '\x1b') + s = s.replace(/\x1b/g, String.fromCharCode(27)) + + // Remove ANSI CSI/SGR sequences (ESC [ ... letters) + s = s.replace(/\x1b\[[0-9;=?]*[A-Za-z]/g, '') + // Remove OSC sequences ESC ] ... BEL or ESC \ + s = s.replace(/\x1b\][^\x1b]*?(\x07|\x1b\\)/g, '') + // Remove any leftover ESC followed by non-printable run + s = s.replace(/\x1b[^\n\r]*/g, '') + + // Remove C0 control chars except newline (10), carriage return (13) and tab (9) + s = s.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '') + + // Normalize CR to newline so progress-carriage returns become visible lines + s = s.replace(/\r\n/g, '\n').replace(/\r/g, '\n') + + // Collapse repeated blank lines a bit (optional) + s = s.replace(/\n{3,}/g, '\n\n') + + // Trim trailing whitespace on each line but keep indentation + s = s.split('\n').map((l) => l.replace(/[\s\u00A0]+$/u, '')).join('\n') + + return s + } catch (e) { + // On any failure just return the original string representation + return String(raw) + } + } + + const handleViewDetails = async () => { + if (!selectedJob) return + + setActionLoading(true) + try { + const response = await finetuningApi.getJob(selectedJob.id) + setDetailsData(response.data) + setDetailsOpen(true) + handleMenuClose() + } catch (error) { + console.error('Error fetching job details:', error) + alert('Failed to fetch job details: ' + (error.message || 'Unknown error')) + } finally { + setActionLoading(false) + } + } + + // On mount: re-establish any pending download WS connections saved in sessionStorage + useEffect(() => { + try { + const pending = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + if (Array.isArray(pending) && pending.length > 0) { + // For each pending job id, re-attach a download WS to get status + pending.forEach((jobId) => { + // avoid duplicate entries in state + if (!downloadingJobs || !downloadingJobs[jobId]) { + setDownloadingJobs((prev) => ({ ...(prev || {}), [jobId]: { progress: 0 } })) + } + finetuningApi.downloadFinetuningOutputWS(jobId, { + onProgress: (data) => { + setDownloadingJobs((prev) => ({ ...(prev || {}), [jobId]: { progress: 0, status: data.status, message: data.message } })) + setDownloadDialogOpen(true) + }, + onComplete: (data) => { + // Trigger native download + const downloadUrl = data.downloadUrl || `/api/v1/finetuning/download-ft/${jobId}` + window.location.href = downloadUrl + // cleanup pending + try { + const pending2 = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + const filtered = (pending2 || []).filter((x) => x !== jobId) + sessionStorage.setItem('ft_pending_downloads', JSON.stringify(filtered)) + } catch (e) {} + setDownloadingJobs((prev) => { + const copy = { ...(prev || {}) } + delete copy[jobId] + return copy + }) + setDownloadDialogOpen(false) + }, + onError: (data) => { + console.error('Recovered download preparation error:', data) + try { + const pending2 = JSON.parse(sessionStorage.getItem('ft_pending_downloads') || '[]') + const filtered = (pending2 || []).filter((x) => x !== jobId) + sessionStorage.setItem('ft_pending_downloads', JSON.stringify(filtered)) + } catch (e) {} + setDownloadingJobs((prev) => { + const copy = { ...(prev || {}) } + delete copy[jobId] + return copy + }) + setDownloadDialogOpen(false) + } + }) + }) + } + } catch (e) { + // ignore sessionStorage parse errors + } + }, []) + + const getStatusColor = (status) => { + switch (status?.toLowerCase()) { + case 'completed': + case 'succeeded': + return 'success' + case 'running': + return 'primary' + case 'failed': + return 'error' + case 'pending': + return 'default' + default: + return 'default' + } + } + + const getProgressValue = (progress) => { + if (typeof progress === 'string' && progress.includes('%')) { + return parseInt(progress.replace('%', '')) + } + return progress || 0 + } + + // Only allow downloads when job status indicates completion/success + const isDownloadableStatus = (status) => { + if (!status) return false + const s = String(status).toLowerCase() + return s === 'succeeded' + } + + if (isLoading) { + return ( + + + + Loading fine-tuning jobs... + + + ) + } + + if (!data || data.length === 0) { + return ( + + + No fine-tuning jobs found + + + ) + } + + const visibleData = useMemo(() => { + if (!sortedData || sortedData.length === 0) return [] + if (typeof filterFunction === 'function') { + try { + return sortedData.filter(filterFunction) + } catch (e) { + console.error('Error in filterFunction:', e) + return sortedData + } + } + return sortedData + }, [sortedData, filterFunction]) + + return ( + <> + {/* toolbar is provided by parent*/} + + + + + + + handleRequestSort('id')} + > + ID + + + Status + Model + Task + Dataset + Output + Logs + Actions + + handleRequestSort('createdDate')} + > + Created Date + + + + + + {visibleData.length === 0 ? ( + + + No fine-tuning jobs match the current filter + + + ) : ( + visibleData.map((job) => ( + + + + {job.id} + + + + {/* Status with blinking indicator when running; show Chip only for other statuses */} + {(() => { + const s = String(job.status || '').toLowerCase() + return (s === 'running' || s === 'pending') ? ( + + + {job.status} + + ) : ( + + ) + })()} + + + + + {job.model || 'N/A'} + + + + + {job.task || job.task_type || job.taskType || 'N/A'} + + + + + {job.dataset || 'N/A'} + + + + {(() => { + const jid = String(job.id) + const isPreparing = Boolean(downloadingJobs && downloadingJobs[jid]) + return ( + + + handleDownloadFinetuningOutput(job)} + disabled={ + actionLoading || + isPreparing || + !isDownloadableStatus(job.status) + } + title={isPreparing ? 'Preparing download' : 'Download fine-tuning output'} + > + {isPreparing ? ( + + ) : ( + + )} + + + + ) + })()} + + + + handleViewLogs(job)} title="View Logs"> + + + + + + handleMenuClick(e, job)} + > + + + + + + {job.createdDate ? formatDate(job.createdDate) : 'Unknown'} + + + + )) + )} + +
+
+ + + + View Details + + + + Cancel Job + + { + if (!selectedJob) return + if (!window.confirm('Are you sure you want to delete this job?')) return + setActionLoading(true) + try { + await finetuningApi.deleteJob(selectedJob.id) + handleMenuClose() + if (onRefresh) onRefresh() + } catch (error) { + console.error('Error deleting job:', error) + alert('Failed to delete job: ' + (error.message || 'Unknown error')) + } finally { + setActionLoading(false) + } + }} + disabled={actionLoading} + > + + Delete Job + + + + {/* Preparing Download Dialog */} + setDownloadDialogOpen(false)} maxWidth="xs" fullWidth> + Preparing download + + + The server is preparing the job output for download. This may take a few moments for large outputs. + + 0 ? 'determinate' : 'indeterminate'} value={downloadProgress} /> + + + + + + + {/* Details Dialog */} + setDetailsOpen(false)} maxWidth="md" fullWidth> + Job Details + + {detailsData ? ( +
{JSON.stringify(detailsData, null, 2)}
+ ) : ( + No details available + )} +
+ + + +
+ + {/* Logs Dialog */} + { + setLogsOpen(false) + // clear selected job when dialog closes to avoid stale selection + setSelectedJob(null) + }} + maxWidth="lg" + fullWidth + > + Job Logs + + {logsLoading ? ( + Loading logs... + ) : ( +
+
{logsData || 'No logs available'}
+
+ )} +
+ + + +
+ + ) +} + +FinetuningJobsTable.propTypes = { + data: PropTypes.array.isRequired, + isLoading: PropTypes.bool, + onRefresh: PropTypes.func, + filterFunction: PropTypes.func +} + +export default FinetuningJobsTable \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/views/finetuning/index.jsx b/studio-frontend/packages/ui/src/views/finetuning/index.jsx new file mode 100644 index 0000000..617da61 --- /dev/null +++ b/studio-frontend/packages/ui/src/views/finetuning/index.jsx @@ -0,0 +1,311 @@ +import { useEffect, useState, useRef } from 'react' +import { useNavigate } from 'react-router-dom' + +// material-ui +import { Box, Skeleton, Stack, ToggleButton, ToggleButtonGroup, Typography, Input } from '@mui/material' +import { useTheme } from '@mui/material/styles' + +// project imports +import MainCard from '@/ui-component/cards/MainCard' +import ItemCard from '@/ui-component/cards/ItemCard' +import { gridSpacing } from '@/store/constant' +import WorkflowEmptySVG from '@/assets/images/workflow_empty.svg' +import LoginDialog from '@/ui-component/dialog/LoginDialog' +import ConfirmDialog from '@/ui-component/dialog/ConfirmDialog' +import { StyledButton } from '@/ui-component/button/StyledButton' +import ViewHeader from '@/layout/MainLayout/ViewHeader' +import ErrorBoundary from '@/ErrorBoundary' +import FinetuningJobsTable from './FinetuningJobsTable' +import FinetuningJobModal from './FinetuningJobModal' + +// API +import finetuningApi from '@/api/finetuning' + +// Hooks +import useApi from '@/hooks/useApi' + +// icons +import { IconPlus, IconLayoutGrid, IconList, IconSearch } from '@tabler/icons-react' + +//keycloak +import { useKeycloak } from '../../KeycloakContext' + +// ==============================|| Fine-tuning ||============================== // + +const Finetuning = () => { + const keycloak = useKeycloak() + const navigate = useNavigate() + const theme = useTheme() + + const [isLoading, setLoading] = useState(true) + const [error, setError] = useState(null) + const [search, setSearch] = useState('') + const [loginDialogOpen, setLoginDialogOpen] = useState(false) + const [loginDialogProps, setLoginDialogProps] = useState({}) + const [jobs, setJobs] = useState([]) + const [jobModalOpen, setJobModalOpen] = useState(false) + + let userRole = keycloak?.tokenParsed?.resource_access?.genaistudio?.roles[0] + let getAllJobsApi = null + + if (keycloak.authenticated) { + getAllJobsApi = useApi(finetuningApi.getAllJobs) + } + + useEffect(() => { + loadJobs() + + // Cleanup all WebSocket connections on unmount + return () => { + Object.keys(jobSocketsRef.current).forEach(jobId => { + if (jobSocketsRef.current[jobId]) { + jobSocketsRef.current[jobId]() + } + }) + jobSocketsRef.current = {} + } + }, []) + + // Store cleanup functions for each job's WebSocket connection + const jobSocketsRef = useRef({}) + + // Function to start monitoring a specific job + const startJobMonitoring = (jobId) => { + // Don't create duplicate connections + if (jobSocketsRef.current[jobId]) { + return + } + + // Subscribe to this job's status updates + const cleanup = finetuningApi.subscribeToJobStatus(jobId, { + onUpdate: (jobData) => { + // Update the job in state + setJobs(prev => prev.map(j => + j.id === jobData.id ? { ...j, ...jobData } : j + )) + }, + onError: (error) => { + console.error(`[Job ${jobId}] WebSocket error:`, error) + } + }) + + // Store cleanup function + jobSocketsRef.current[jobId] = cleanup + } + + // Function to stop monitoring a specific job + const stopJobMonitoring = (jobId) => { + if (jobSocketsRef.current[jobId]) { + jobSocketsRef.current[jobId]() + delete jobSocketsRef.current[jobId] + } + } + + const loadJobs = async () => { + if (!getAllJobsApi) return + + try { + setLoading(true) + const response = await getAllJobsApi.request() + // Normalize server objects (TypeORM entities or external API objects) + const normalizeJob = (j) => { + if (!j) return null + const id = j.id || j.job_id || j.fine_tuning_job_id || String(Date.now()) + const name = j.name || id + const status = j.status || j.state || 'pending' + const model = j.model || 'N/A' + const dataset = j.dataset || j.training_file || j.trainingFile || 'N/A' + const progress = typeof j.progress === 'number' ? `${j.progress}%` : (j.progress || '0%') + const createdDate = j.createdDate || j.created_at || j.createdAt || new Date().toISOString() + return { + ...j, + id, + name, + status, + model, + dataset, + progress, + createdDate + } + } + + const jobsData = Array.isArray(response) ? response.map(normalizeJob).filter(Boolean) : [] + setJobs(jobsData) + setLoading(false) + + // Start monitoring any active jobs + const activeStatuses = ['pending', 'validating_files', 'running'] + jobsData.forEach(job => { + const status = (job?.status || '').toString().toLowerCase() + if (activeStatuses.includes(status)) { + startJobMonitoring(job.id) + } + }) + } catch (error) { + console.error('Error loading fine-tuning jobs:', error) + setJobs([]) + setError(error) + setLoading(false) + } + } + + const handleCreateJob = () => { + try { + if (document.activeElement instanceof HTMLElement) { + document.activeElement.blur() + } + } catch (e) { + // ignore in non-browser environments + } + setTimeout(() => setJobModalOpen(true), 0) + } + + const handleJobCreated = (newJob) => { + setJobs(prev => [...prev, newJob]) + setJobModalOpen(false) + + // Start monitoring the new job if it's in an active state + const activeStatuses = ['pending', 'validating_files', 'running'] + const status = (newJob?.status || '').toString().toLowerCase() + if (activeStatuses.includes(status)) { + startJobMonitoring(newJob.id) + } + } + + const onSearchChange = (event) => { + setSearch(event.target.value) + } + + // Predicate function used by FinetuningJobsTable to show/hide rows + const filterJobs = (job) => { + if (!search || search.trim() === '') return true + const q = search.toLowerCase() + const id = (job?.id || '').toString().toLowerCase() + const name = (job?.name || '').toString().toLowerCase() + const model = (job?.model || '').toString().toLowerCase() + const dataset = (job?.dataset || job?.training_file || '').toString().toLowerCase() + const task = (job?.task || job?.task_type || job?.taskType || '').toString().toLowerCase() + const status = (job?.status || '').toString().toLowerCase() + return id.includes(q) || name.includes(q) || model.includes(q) || dataset.includes(q) || task.includes(q) || status.includes(q) + } + + return ( + <> + + + + + Fine-tuning Jobs + + + + } + sx={{ borderRadius: 2, height: 40 }} + > + Create New Job + + + + + + + } + type='search' + /> + +
+ + {isLoading ? ( + + + + + + ) : ( + + {jobs.length === 0 ? ( + + + WorkflowEmptySVG + +

No Fine-tuning Jobs Yet

+ Create your first fine-tuning job to get started! +
+
+
+ ) : ( + + )} +
+ )} + + + + setJobModalOpen(false)} + onJobCreated={handleJobCreated} + /> + + setLoginDialogOpen(false)} + onConfirm={() => setLoginDialogOpen(false)} + /> + + ) +} + +export default Finetuning \ No newline at end of file diff --git a/studio-frontend/packages/ui/src/views/opeaflows/index.jsx b/studio-frontend/packages/ui/src/views/opeaflows/index.jsx index 1beefee..5e7e28f 100644 --- a/studio-frontend/packages/ui/src/views/opeaflows/index.jsx +++ b/studio-frontend/packages/ui/src/views/opeaflows/index.jsx @@ -2,7 +2,7 @@ import { useEffect, useState } from 'react' import { useNavigate } from 'react-router-dom' // material-ui -import { Box, Skeleton, Stack, ToggleButton, ToggleButtonGroup } from '@mui/material' +import { Box, Skeleton, Stack, Input, Typography } from '@mui/material' import { useTheme } from '@mui/material/styles' // project imports @@ -27,7 +27,7 @@ import useApi from '@/hooks/useApi' import { baseURL } from '@/store/constant' // icons -import { IconPlus, IconLayoutGrid, IconList } from '@tabler/icons-react' +import { IconPlus, IconLayoutGrid, IconList, IconSearch } from '@tabler/icons-react' //keycloak import { useKeycloak } from '../../KeycloakContext' @@ -161,47 +161,58 @@ const Opeaflows = () => { ) : ( - - {/* + - - - - - - - */} - - - } sx={{ borderRadius: 2, height: 40, width: 250 }}> - Create New Workflow - - } sx={{ borderRadius: 2, height: 40, width: 250 }}> - Import Sample Workflows - + Workflows + + + + } sx={{ borderRadius: 2, height: 40, width: 250 }}> + Create New Workflow + + } sx={{ borderRadius: 2, height: 40, width: 250 }}> + Import Sample Workflows + + + + + + + } + type='search' + /> +
{!view || view === 'card' ? ( <> diff --git a/studio-frontend/packages/ui/src/views/tracer/index.jsx b/studio-frontend/packages/ui/src/views/tracer/index.jsx index f6034f0..d3fa4e9 100644 --- a/studio-frontend/packages/ui/src/views/tracer/index.jsx +++ b/studio-frontend/packages/ui/src/views/tracer/index.jsx @@ -11,6 +11,7 @@ import { Paper, Button, Box, + Stack, Typography, Divider, TablePagination, @@ -170,15 +171,27 @@ export default function LLMTraces() { return ( - // - - - - {workflowName && ( - - Workflow name: {workflowName} + + + + + LLM Call Traces - )} + {workflowName && ( + + Workflow name: {workflowName} + + )} + {traceList.length > 0 ? ( <> Traces: @@ -215,7 +228,7 @@ export default function LLMTraces() { ) : ( No traces found )} - + {selectedTrace && ( diff --git a/studio-frontend/packages/ui/vite.config.js b/studio-frontend/packages/ui/vite.config.js index c987920..a2591b2 100644 --- a/studio-frontend/packages/ui/vite.config.js +++ b/studio-frontend/packages/ui/vite.config.js @@ -9,16 +9,17 @@ export default defineConfig(async ({ mode }) => { const serverEnv = dotenv.config({ processEnv: {}, path: '../server/.env' }).parsed const serverHost = serverEnv?.['HOST'] ?? 'localhost' const serverPort = parseInt(serverEnv?.['PORT'] ?? '3000') - if (!Number.isNaN(serverPort) && serverPort > 0 && serverPort < 65535) { - proxy = { - '/api': { - target: `http://${serverHost}:${serverPort}`, - changeOrigin: true - }, - '/socket.io': { - target: `http://${serverHost}:${serverPort}`, - changeOrigin: true - } + proxy = { + '/api': { + target: `http://${serverHost}:${serverPort}`, + changeOrigin: true, + secure: false + }, + '/socket.io': { + target: `http://${serverHost}:${serverPort}`, + changeOrigin: true, + ws: true, + secure: false } } } From 73705880e3ea20534849abcdbfe334228cee73d9 Mon Sep 17 00:00:00 2001 From: wanhakim Date: Mon, 15 Dec 2025 01:57:30 +0800 Subject: [PATCH 2/2] added finetuning playwright and fixes (#75) * added test case 005 for V1.5 Signed-off-by: wwanarif * add more finetuning tests Signed-off-by: Hee, Tyan Huey * fix finetuning issues + add error UI for import sample workflow fails Signed-off-by: wwanarif * fix validation issues Signed-off-by: wwanarif * optimizing finetuning downloads Signed-off-by: wwanarif * implement test name changes Signed-off-by: wwanarif * fix race condition and update some UI Signed-off-by: wwanarif * cve fixes Signed-off-by: wwanarif * add compat shim for urllib3 2.x Signed-off-by: wwanarif --------- Signed-off-by: wwanarif Signed-off-by: Hee, Tyan Huey Co-authored-by: Hee, Tyan Huey --- .github/workflows/manual-docker-scan.yml | 1 + app-backend/Dockerfile | 3 +- .../manifests/studio-manifest.yaml | 2 +- studio-backend/app/__init__.py | 4 + studio-backend/app/compat.py | 22 + studio-backend/app/main.py | 6 + studio-backend/app/requirements.txt | 5 +- studio-backend/app/routers/sandbox_router.py | 2 + studio-backend/tests/requirements.txt | 5 +- studio-frontend/Dockerfile | 68 +- studio-frontend/package.json | 60 +- studio-frontend/packages/server/package.json | 31 +- .../packages/server/src/NodesPool.ts | 2 +- .../packages/server/src/commands/start.ts | 4 +- .../src/controllers/finetuning/index.ts | 37 +- .../src/controllers/get-upload-file/index.ts | 9 +- .../controllers/openai-assistants/index.ts | 3 +- .../server/src/routes/apikey/index.ts | 2 +- .../server/src/routes/assistants/index.ts | 2 +- .../server/src/routes/chat-messages/index.ts | 2 +- .../src/routes/chatflows-sandbox/index.ts | 2 +- .../src/routes/chatflows-streaming/index.ts | 2 +- .../src/routes/chatflows-uploads/index.ts | 2 +- .../server/src/routes/chatflows/index.ts | 2 +- .../components-credentials-icon/index.ts | 2 +- .../routes/components-credentials/index.ts | 2 +- .../server/src/routes/credentials/index.ts | 2 +- .../server/src/routes/documentstore/index.ts | 2 +- .../server/src/routes/export-import/index.ts | 2 +- .../server/src/routes/feedback/index.ts | 2 +- .../server/src/routes/fetch-links/index.ts | 2 +- .../server/src/routes/finetuning/index.ts | 2 +- .../server/src/routes/flow-config/index.ts | 2 +- .../src/routes/get-upload-file/index.ts | 2 +- .../src/routes/get-upload-path/index.ts | 2 +- .../packages/server/src/routes/index.ts | 2 +- .../routes/internal-chat-messages/index.ts | 2 +- .../src/routes/internal-predictions/index.ts | 2 +- .../packages/server/src/routes/leads/index.ts | 2 +- .../server/src/routes/load-prompts/index.ts | 2 +- .../server/src/routes/marketplaces/index.ts | 2 +- .../server/src/routes/node-configs/index.ts | 2 +- .../src/routes/node-custom-functions/index.ts | 2 +- .../server/src/routes/node-icons/index.ts | 2 +- .../src/routes/node-load-methods/index.ts | 2 +- .../packages/server/src/routes/nodes/index.ts | 2 +- .../routes/openai-assistants-files/index.ts | 2 +- .../openai-assistants-vector-store/index.ts | 2 +- .../src/routes/openai-assistants/index.ts | 2 +- .../packages/server/src/routes/ping/index.ts | 2 +- .../server/src/routes/predictions/index.ts | 2 +- .../server/src/routes/prompts-lists/index.ts | 2 +- .../src/routes/public-chatbots/index.ts | 2 +- .../src/routes/public-chatflows/index.ts | 2 +- .../packages/server/src/routes/stats/index.ts | 2 +- .../packages/server/src/routes/tools/index.ts | 2 +- .../server/src/routes/upsert-history/index.ts | 2 +- .../server/src/routes/variables/index.ts | 2 +- .../server/src/routes/vectors/index.ts | 2 +- .../server/src/routes/verify/index.ts | 2 +- .../server/src/routes/versions/index.ts | 2 +- .../server/src/services/finetuning/index.ts | 595 ++++++++++++------ .../openai-assistants-vector-store/index.ts | 20 +- .../src/services/openai-assistants/index.ts | 4 +- .../packages/server/src/utils/SSEStreamer.ts | 107 ++++ .../server/src/utils/buildAgentGraph.ts | 2 +- .../server/src/utils/fileRepository.ts | 8 +- .../server/src/ws/finetuningDownload.ts | 12 +- studio-frontend/packages/ui/package.json | 24 +- .../ui-component/dialog/InputHintDialog.jsx | 27 +- .../dialog/PromptLangsmithHubDialog.jsx | 28 +- .../dialog/ViewMessagesDialog.jsx | 90 +-- .../markdown/MemoizedReactMarkdown.jsx | 165 ++++- .../ui/src/views/chatmessage/ChatMessage.jsx | 73 +-- .../views/finetuning/FinetuningJobModal.jsx | 12 +- .../views/finetuning/FinetuningJobsTable.jsx | 51 +- .../packages/ui/src/views/opeaflows/index.jsx | 27 +- studio-frontend/turbo.json | 5 +- .../005_test_finetuning_rerank.spec.ts | 68 ++ .../006_test_finetuning_embedding.spec.ts | 68 ++ .../007_test_finetuning_reasoning.spec.ts | 68 ++ tests/playwright/utils.ts | 2 +- tests/test-files/medical_o1_sft_50.json | 52 ++ tests/test-files/toy_finetune_data.jsonl | 10 + 84 files changed, 1294 insertions(+), 576 deletions(-) create mode 100644 studio-backend/app/compat.py create mode 100644 tests/playwright/studio-e2e/005_test_finetuning_rerank.spec.ts create mode 100644 tests/playwright/studio-e2e/006_test_finetuning_embedding.spec.ts create mode 100644 tests/playwright/studio-e2e/007_test_finetuning_reasoning.spec.ts create mode 100644 tests/test-files/medical_o1_sft_50.json create mode 100644 tests/test-files/toy_finetune_data.jsonl diff --git a/.github/workflows/manual-docker-scan.yml b/.github/workflows/manual-docker-scan.yml index 0f7f42e..4fa5e0c 100644 --- a/.github/workflows/manual-docker-scan.yml +++ b/.github/workflows/manual-docker-scan.yml @@ -69,6 +69,7 @@ jobs: ignore-unfixed: true vuln-type: 'os,library' severity: 'CRITICAL,HIGH' + # timeout: '10m0s' - name: Cleanup if: always() diff --git a/app-backend/Dockerfile b/app-backend/Dockerfile index ee9995b..a539919 100644 --- a/app-backend/Dockerfile +++ b/app-backend/Dockerfile @@ -26,7 +26,8 @@ RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git WORKDIR /home/user/GenAIComps RUN pip install --no-cache-dir --upgrade pip==24.3.1 setuptools==78.1.1 && \ pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \ - pip install --no-cache-dir --upgrade mcp==1.10.0 pillow==11.3.0 + pip install --no-cache-dir --upgrade mcp==1.23.0 pillow==11.3.0 \ + langchain-core==0.3.80 urllib3==2.6.0 starlette==0.49.1 COPY ./templates/microservices/* /home/user/templates/microservices/ COPY ./megaservice.py /home/user/megaservice.py diff --git a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml index 4b5440e..d2e2f5b 100644 --- a/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml +++ b/setup-scripts/setup-genai-studio/manifests/studio-manifest.yaml @@ -426,7 +426,7 @@ spec: storageClassName: local-path resources: requests: - storage: 1Gi + storage: 30Gi --- apiVersion: v1 kind: Service diff --git a/studio-backend/app/__init__.py b/studio-backend/app/__init__.py index e69de29..68623b9 100644 --- a/studio-backend/app/__init__.py +++ b/studio-backend/app/__init__.py @@ -0,0 +1,4 @@ +from app.compat import ensure_urllib3_getheaders + +# Ensure urllib3 2.x exposes HTTPResponse.getheaders for kubernetes client compatibility. +ensure_urllib3_getheaders() diff --git a/studio-backend/app/compat.py b/studio-backend/app/compat.py new file mode 100644 index 0000000..ecbb7b3 --- /dev/null +++ b/studio-backend/app/compat.py @@ -0,0 +1,22 @@ +""" +Compatibility shims for third-party libraries. + +Currently used to keep kubernetes-python working with urllib3 2.x, which +removed HTTPResponse.getheaders(). Older kubernetes versions still call +getheaders when building ApiException objects. This shim reintroduces a +minimal getheaders that mirrors the previous behavior. +""" +from urllib3.response import HTTPResponse + + +def ensure_urllib3_getheaders() -> None: + """Add HTTPResponse.getheaders if urllib3 2.x removed it. + + Returns the header items as a list of (key, value) tuples, matching the + old http.client.HTTPResponse API used by kubernetes-python. + """ + if not hasattr(HTTPResponse, "getheaders"): + def _getheaders(self): # type: ignore[override] + return list(self.headers.items()) + + HTTPResponse.getheaders = _getheaders # type: ignore[attr-defined] \ No newline at end of file diff --git a/studio-backend/app/main.py b/studio-backend/app/main.py index ed54961..85c291b 100644 --- a/studio-backend/app/main.py +++ b/studio-backend/app/main.py @@ -1,5 +1,11 @@ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware + +from app.compat import ensure_urllib3_getheaders + +# Restore HTTPResponse.getheaders expected by kubernetes-python when running with urllib3 2.x. +ensure_urllib3_getheaders() + from kubernetes import config # Load the kubeconfig file diff --git a/studio-backend/app/requirements.txt b/studio-backend/app/requirements.txt index bb1ba66..fea8e19 100644 --- a/studio-backend/app/requirements.txt +++ b/studio-backend/app/requirements.txt @@ -1,9 +1,10 @@ -fastapi==0.115.4 +fastapi==0.121.0 uvicorn==0.30.6 kubernetes==30.1.0 requests==2.32.3 +urllib3==2.6.0 pydantic==1.10.18 -starlette==0.41.2 +starlette==0.49.1 websockets==10.3 clickhouse-driver==0.2.9 paramiko==3.5.1 \ No newline at end of file diff --git a/studio-backend/app/routers/sandbox_router.py b/studio-backend/app/routers/sandbox_router.py index dddd905..5bdf4d7 100644 --- a/studio-backend/app/routers/sandbox_router.py +++ b/studio-backend/app/routers/sandbox_router.py @@ -19,6 +19,8 @@ async def deploy_sandbox(request: PipelineFlow): try: response = deploy_manifest_in_namespace(core_v1_api, apps_v1_api, json.loads(workflow_info.export_to_json())) except Exception as e: + import traceback + traceback.print_exc() raise HTTPException(status_code=500, detail=str(e)) return response diff --git a/studio-backend/tests/requirements.txt b/studio-backend/tests/requirements.txt index a329e9a..82a03fb 100644 --- a/studio-backend/tests/requirements.txt +++ b/studio-backend/tests/requirements.txt @@ -1,5 +1,6 @@ pytest==8.3.3 -fastapi==0.115.0 +fastapi==0.121.0 httpx==0.27.2 kubernetes==30.1.0 -pydantic==1.10.18 \ No newline at end of file +pydantic==1.10.18 +urllib3==2.6.0 \ No newline at end of file diff --git a/studio-frontend/Dockerfile b/studio-frontend/Dockerfile index e59da67..828818e 100644 --- a/studio-frontend/Dockerfile +++ b/studio-frontend/Dockerfile @@ -1,4 +1,5 @@ -FROM node:20-alpine +# Build stage +FROM node:20-alpine AS builder # Accept proxy build arguments ARG http_proxy @@ -10,18 +11,51 @@ ENV http_proxy=${http_proxy} ENV https_proxy=${https_proxy} ENV no_proxy=${no_proxy} -# Install necessary packages +# Install build dependencies RUN apk update && apk upgrade && \ apk add --no-cache gcompat python3 make g++ git \ - # Needed for pdfjs-dist - build-base cairo-dev pango-dev \ - # Install Chromium - chromium && \ - # Install PNPM globally + build-base cairo-dev pango-dev && \ npm install -g pnpm@9 -# Debug step to verify git installation -RUN git --version +ENV NODE_OPTIONS=--max-old-space-size=8192 + +WORKDIR /usr/src + +# Copy package files first for better layer caching +COPY package.json pnpm-workspace.yaml turbo.json ./ +COPY packages/server/package.json ./packages/server/ +COPY packages/ui/package.json ./packages/ui/ + +# Install dependencies +RUN pnpm install + +# Copy source code +COPY . . + +# Build the app and clean up +RUN pnpm build && \ + # Prune to production dependencies only + pnpm prune --prod && \ + rm -rf node_modules/.cache && \ + rm -rf packages/*/node_modules/.cache + +# Production stage +FROM node:20-alpine + +# Accept proxy build arguments +ARG http_proxy +ARG https_proxy +ARG no_proxy + +ENV http_proxy=${http_proxy} +ENV https_proxy=${https_proxy} +ENV no_proxy=${no_proxy} + +# Install only runtime dependencies with patched npm +RUN apk update && apk upgrade && \ + apk add --no-cache gcompat chromium && \ + npm install -g npm@latest pnpm@latest && \ + rm -rf /var/cache/apk/* ENV PUPPETEER_SKIP_DOWNLOAD=true ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser @@ -29,18 +63,10 @@ ENV NODE_OPTIONS=--max-old-space-size=8192 WORKDIR /usr/src -# Copy app source -COPY . . - -# Install dependencies and build the app -RUN pnpm config set store-dir .pnpm-store && \ - pnpm install && \ - pnpm update cross-spawn@7.0.5 && \ - pnpm build && \ - pnpm remove esbuild && \ - rm -rf .pnpm-store && \ - rm -rf /root/.local/share/pnpm && \ - pnpm prune --prod +# Copy only necessary files from builder +COPY --from=builder /usr/src/package.json /usr/src/pnpm-workspace.yaml ./ +COPY --from=builder /usr/src/packages ./packages +COPY --from=builder /usr/src/node_modules ./node_modules EXPOSE 3000 diff --git a/studio-frontend/package.json b/studio-frontend/package.json index 0205332..b6751a4 100644 --- a/studio-frontend/package.json +++ b/studio-frontend/package.json @@ -1,17 +1,15 @@ { "name": "flowise", - "version": "2.1.4", + "version": "3.0.10", "private": true, "homepage": "https://flowiseai.com", "workspaces": [ - "packages/*", - "flowise", - "ui" + "packages/*" ], "scripts": { "build": "turbo run build", "build-force": "pnpm clean && turbo run build --force", - "dev": "turbo run dev --parallel", + "dev": "turbo run dev --parallel --no-cache", "start": "run-script-os", "start:windows": "cd packages/server/bin && run start", "start:default": "cd packages/server/bin && ./run start", @@ -32,7 +30,6 @@ "@babel/preset-typescript": "7.18.6", "@types/express": "^4.17.13", "@typescript-eslint/typescript-estree": "^7.13.1", - "esbuild": ">=0.25.0", "eslint": "^8.24.0", "eslint-config-prettier": "^8.3.0", "eslint-config-react-app": "^7.0.1", @@ -42,6 +39,9 @@ "eslint-plugin-react": "^7.26.1", "eslint-plugin-react-hooks": "^4.6.0", "eslint-plugin-unused-imports": "^2.0.0", + "cross-spawn": "^7.0.6", + "glob": "^10.5.0", + "tar-fs": "^3.1.1", "husky": "^8.0.1", "kill-port": "^2.0.1", "lint-staged": "^13.0.3", @@ -49,7 +49,7 @@ "pretty-quick": "^3.1.3", "rimraf": "^3.0.2", "run-script-os": "^1.1.6", - "turbo": "latest", + "turbo": "^2.3.3", "typescript": "^5.4.5" }, "pnpm": { @@ -58,8 +58,30 @@ "sqlite3" ], "overrides": { - "set-value": "^3.0.3", - "form-data": "4.0.4" + "@modelcontextprotocol/sdk": ">=1.24.0", + "axios": "1.12.0", + "body-parser": "2.0.2", + "braces": "3.0.3", + "cross-spawn": "7.0.6", + "esbuild": "0.27.1", + "form-data": "4.0.4", + "glob": "10.5.0", + "glob-parent": "6.0.2", + "http-proxy-middleware": "3.0.3", + "json5": "2.2.3", + "nth-check": "2.1.1", + "path-to-regexp": "0.1.12", + "prismjs": "1.29.0", + "rollup": "4.45.0", + "semver": "7.7.1", + "set-value": "4.1.0", + "solid-js": "1.9.7", + "tar-fs": ">=3.1.1", + "unset-value": "2.0.1", + "webpack-dev-middleware": "7.4.2", + "ws": "8.18.3", + "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz", + "zod": "^3.25.0" }, "peerDependencyRules": { "ignoreMissing": [], @@ -71,22 +93,12 @@ "pnpm": ">=9" }, "resolutions": { + "@google/generative-ai": "^0.24.0", + "@grpc/grpc-js": "^1.10.10", + "@langchain/core": "0.3.61", "@qdrant/openapi-typescript-fetch": "1.2.6", - "@google/generative-ai": "^0.15.0", - "openai": "4.57.3", - "@langchain/core": "0.2.18", - "axios": "1.8.2", - "nth-check": "2.0.1", - "pdfjs-dist": "4.2.67", - "prismjs": "1.27.0", - "semver": "7.5.2", - "ws": "8.17.1", - "esbuild": ">=0.25.0", - "cross-spawn": ">=7.0.5", - "solid-js": ">=1.9.4", - "tar-fs": ">=3.0.8", - "form-data": "4.0.4", - "zod": ">=3.23.0" + "openai": "4.96.0", + "protobufjs": "7.4.0" }, "eslintIgnore": [ "**/dist", diff --git a/studio-frontend/packages/server/package.json b/studio-frontend/packages/server/package.json index 7ef55d6..f35f64f 100644 --- a/studio-frontend/packages/server/package.json +++ b/studio-frontend/packages/server/package.json @@ -1,6 +1,6 @@ { "name": "flowise", - "version": "2.1.4", + "version": "3.0.10", "description": "Flowiseai Server", "main": "dist/index", "types": "dist/index.d.ts", @@ -26,7 +26,7 @@ "nuke": "rimraf dist node_modules .turbo", "start:windows": "cd bin && run start", "start:default": "cd bin && ./run start", - "dev": "tsc-watch --noClear -p ./tsconfig.json --onSuccess \"pnpm start\"", + "dev": "nodemon", "oclif-dev": "run-script-os", "oclif-dev:windows": "cd bin && dev start", "oclif-dev:default": "cd bin && ./dev start", @@ -35,13 +35,14 @@ "typeorm": "typeorm-ts-node-commonjs", "typeorm:migration-generate": "pnpm typeorm migration:generate -d ./src/utils/typeormDataSource.ts", "typeorm:migration-run": "pnpm typeorm migration:run -d ./src/utils/typeormDataSource.ts", + "typeorm:migration-revert": "pnpm typeorm migration:revert -d ./src/utils/typeormDataSource.ts", "watch": "tsc --watch", "version": "oclif readme && git add README.md", "cypress:open": "cypress open", "cypress:run": "cypress run", "e2e": "start-server-and-test dev http://localhost:3000 cypress:run", "cypress:ci": "START_SERVER_AND_TEST_INSECURE=1 start-server-and-test start https-get://localhost:3000 cypress:run", - "test": "jest" + "test": "jest --runInBand --detectOpenHandles --forceExit" }, "keywords": [], "homepage": "https://flowiseai.com", @@ -54,11 +55,11 @@ }, "license": "SEE LICENSE IN LICENSE.md", "dependencies": { - "@oclif/core": "^1.13.10", - "@types/lodash": "^4.14.202", + "@oclif/core": "4.0.7", + "@types/lodash": "^4.17.20", "@types/uuid": "^9.0.7", "async-mutex": "^0.4.0", - "axios": "^1.8.2", + "axios": "1.12.0", "content-disposition": "0.5.4", "cors": "^2.8.5", "crypto-js": "^4.1.1", @@ -66,18 +67,20 @@ "express": "^4.17.3", "express-basic-auth": "^1.2.1", "express-rate-limit": "^6.9.0", - "flowise-components": "^2.1.4", + "flowise-components": "^3.0.8", "flowise-ui": "workspace:^", "http-errors": "^2.0.0", "http-status-codes": "^2.3.0", + "@langchain/core": "^0.2.0", + "@langchain/langgraph": "^0.0.15", "langchainhub": "^0.0.11", "lodash": "^4.17.21", "moment": "^2.29.3", "moment-timezone": "^0.5.34", "multer": "^1.4.5-lts.1", - "mysql2": "^3.9.2", + "mysql2": "^3.11.3", "form-data": "^4.0.0", - "openai": "^4.57.3", + "openai": "^4.96.0", "pg": "^8.11.1", "posthog-node": "^3.5.0", "reflect-metadata": "^0.1.13", @@ -87,24 +90,28 @@ "typeorm": "^0.3.6", "uuid": "^9.0.1", "winston": "^3.9.0", - "https-proxy-agent": "^7.0.4" + "https-proxy-agent": "^7.0.4", + "archiver": "^6.0.1", + "turndown": "^7.2.0" }, "devDependencies": { "@types/content-disposition": "0.5.8", "@types/cors": "^2.8.12", + "@types/express": "^4.17.17", "@types/crypto-js": "^4.1.1", "@types/multer": "^1.4.7", "@types/sanitize-html": "^2.9.5", "concurrently": "^7.1.0", "cypress": "^13.13.0", "nodemon": "^2.0.22", - "oclif": "^3", + "oclif": "^4.20.5", "rimraf": "^5.0.5", "run-script-os": "^1.1.6", "shx": "^0.3.3", "start-server-and-test": "^2.0.3", "ts-node": "^10.7.0", "tsc-watch": "^6.0.4", - "typescript": "^5.4.5" + "typescript": "^5.4.5", + "@types/archiver": "^6.0.2" } } diff --git a/studio-frontend/packages/server/src/NodesPool.ts b/studio-frontend/packages/server/src/NodesPool.ts index c433bf7..62c5595 100644 --- a/studio-frontend/packages/server/src/NodesPool.ts +++ b/studio-frontend/packages/server/src/NodesPool.ts @@ -108,7 +108,7 @@ export class NodesPool { private async getFiles(dir: string): Promise { const dirents = await promises.readdir(dir, { withFileTypes: true }) const files = await Promise.all( - dirents.map((dirent: Dirent) => { + dirents.map((dirent) => { const res = path.resolve(dir, dirent.name) return dirent.isDirectory() ? this.getFiles(res) : res }) diff --git a/studio-frontend/packages/server/src/commands/start.ts b/studio-frontend/packages/server/src/commands/start.ts index a31c37c..579053a 100644 --- a/studio-frontend/packages/server/src/commands/start.ts +++ b/studio-frontend/packages/server/src/commands/start.ts @@ -1,4 +1,4 @@ -import { Command, Flags } from '@oclif/core' +import { Command, Flags, Args } from '@oclif/core' import path from 'path' import * as Server from '../index' import * as DataSource from '../DataSource' @@ -14,7 +14,7 @@ enum EXIT_CODE { let processExitCode = EXIT_CODE.SUCCESS export default class Start extends Command { - static args = [] + static args = {} static flags = { FLOWISE_USERNAME: Flags.string(), FLOWISE_PASSWORD: Flags.string(), diff --git a/studio-frontend/packages/server/src/controllers/finetuning/index.ts b/studio-frontend/packages/server/src/controllers/finetuning/index.ts index add0c5a..a8e6dcc 100644 --- a/studio-frontend/packages/server/src/controllers/finetuning/index.ts +++ b/studio-frontend/packages/server/src/controllers/finetuning/index.ts @@ -3,6 +3,9 @@ import { StatusCodes } from 'http-status-codes' import { InternalFlowiseError } from '../../errors/internalFlowiseError' import finetuningService from '../../services/finetuning' +// Declare timer globals for Node.js +declare function setTimeout(cb: (...args: any[]) => void, ms?: number): any + /** * Upload a training file * POST /api/v1/finetuning/files @@ -154,6 +157,7 @@ const getFineTuningJobLogs = async (req: Request, res: Response, next: NextFunct /** * Download fine-tuning job output as a zip file * GET /api/v1/finetuning/download-ft/:jobId + * Creates zip, streams it to client, then deletes the zip file after download completes */ const downloadFineTuningOutput = async (req: Request, res: Response, next: NextFunction) => { try { @@ -166,31 +170,56 @@ const downloadFineTuningOutput = async (req: Request, res: Response, next: NextF ) } - // Get the zip file path (creates if needed, but returns immediately if already exists) + // Get the zip file path from service const filePath = await finetuningService.downloadFineTuningOutput(jobId) + if (!filePath) { throw new InternalFlowiseError( StatusCodes.NOT_FOUND, - `Error: finetuningController.downloadFineTuningOutput - output not found for job: ${jobId}` + `Error: finetuningController.downloadFineTuningOutput - zip file not found for job: ${jobId}. Please request download via WebSocket first.` ) } + const fs = require('fs') + + // Get file stats for Content-Length header (enables browser progress bar) + const fileStats = fs.statSync(filePath) + const fileSize = fileStats.size + // Set response headers for file download const fileName = `${jobId}-output.zip` res.setHeader('Content-Type', 'application/zip') res.setHeader('Content-Disposition', `attachment; filename="${fileName}"`) + res.setHeader('Content-Length', fileSize) // Stream the file - const fs = require('fs') const fileStream = fs.createReadStream(filePath) + + // Log when stream opens + fileStream.on('open', () => { + console.debug(`finetuningController.downloadFineTuningOutput - starting to stream: ${filePath} (${fileSize} bytes)`) + }) + + // Log when the file stream closes (end of stream on server side) + fileStream.on('close', () => { + console.debug(`finetuningController.downloadFineTuningOutput - end stream: ${filePath}`) + }) + + // Multiple users can download the same ZIP simultaneously fileStream.on('error', (err: any) => { - console.error('Error streaming fine-tuning output file:', err) + console.error('finetuningController.downloadFineTuningOutput - error streaming file:', err) if (!res.headersSent) { res.status(StatusCodes.INTERNAL_SERVER_ERROR).json({ error: 'Error streaming fine-tuning output file' }) } }) + + // Log when HTTP response finishes sending bytes to client + res.on('finish', () => { + console.debug(`finetuningController.downloadFineTuningOutput - response finished streaming: ${filePath}`) + }) + fileStream.pipe(res) } catch (error) { next(error) diff --git a/studio-frontend/packages/server/src/controllers/get-upload-file/index.ts b/studio-frontend/packages/server/src/controllers/get-upload-file/index.ts index a33b73e..09a8784 100644 --- a/studio-frontend/packages/server/src/controllers/get-upload-file/index.ts +++ b/studio-frontend/packages/server/src/controllers/get-upload-file/index.ts @@ -5,6 +5,12 @@ import { streamStorageFile } from 'flowise-components' import { StatusCodes } from 'http-status-codes' import { InternalFlowiseError } from '../../errors/internalFlowiseError' +interface AuthenticatedRequest extends Request { + user?: { + activeOrganizationId?: string + } +} + const streamUploadedFile = async (req: Request, res: Response, next: NextFunction) => { try { if (!req.query.chatflowId || !req.query.chatId || !req.query.fileName) { @@ -13,8 +19,9 @@ const streamUploadedFile = async (req: Request, res: Response, next: NextFunctio const chatflowId = req.query.chatflowId as string const chatId = req.query.chatId as string const fileName = req.query.fileName as string + const orgId = (req as AuthenticatedRequest).user?.activeOrganizationId || '' res.setHeader('Content-Disposition', contentDisposition(fileName)) - const fileStream = await streamStorageFile(chatflowId, chatId, fileName) + const fileStream = await streamStorageFile(chatflowId, chatId, fileName, orgId) if (!fileStream) throw new InternalFlowiseError(StatusCodes.INTERNAL_SERVER_ERROR, `Error: streamStorageFile`) diff --git a/studio-frontend/packages/server/src/controllers/openai-assistants/index.ts b/studio-frontend/packages/server/src/controllers/openai-assistants/index.ts index 448d678..8c4175e 100644 --- a/studio-frontend/packages/server/src/controllers/openai-assistants/index.ts +++ b/studio-frontend/packages/server/src/controllers/openai-assistants/index.ts @@ -53,8 +53,9 @@ const getFileFromAssistant = async (req: Request, res: Response, next: NextFunct const chatflowId = req.body.chatflowId as string const chatId = req.body.chatId as string const fileName = req.body.fileName as string + const orgId = '' // Organization ID not required in this context res.setHeader('Content-Disposition', contentDisposition(fileName)) - const fileStream = await streamStorageFile(chatflowId, chatId, fileName) + const fileStream = await streamStorageFile(chatflowId, chatId, fileName, orgId) if (!fileStream) throw new InternalFlowiseError(StatusCodes.INTERNAL_SERVER_ERROR, `Error: getFileFromAssistant`) diff --git a/studio-frontend/packages/server/src/routes/apikey/index.ts b/studio-frontend/packages/server/src/routes/apikey/index.ts index dbc043d..84b8fb4 100644 --- a/studio-frontend/packages/server/src/routes/apikey/index.ts +++ b/studio-frontend/packages/server/src/routes/apikey/index.ts @@ -1,6 +1,6 @@ import express from 'express' import apikeyController from '../../controllers/apikey' -const router = express.Router() +const router: express.Router = express.Router() // CREATE router.post('/', apikeyController.createApiKey) diff --git a/studio-frontend/packages/server/src/routes/assistants/index.ts b/studio-frontend/packages/server/src/routes/assistants/index.ts index 7b01cdd..1499855 100644 --- a/studio-frontend/packages/server/src/routes/assistants/index.ts +++ b/studio-frontend/packages/server/src/routes/assistants/index.ts @@ -1,7 +1,7 @@ import express from 'express' import assistantsController from '../../controllers/assistants' -const router = express.Router() +const router: express.Router = express.Router() // CREATE router.post('/', assistantsController.createAssistant) diff --git a/studio-frontend/packages/server/src/routes/chat-messages/index.ts b/studio-frontend/packages/server/src/routes/chat-messages/index.ts index ca90abc..e050453 100644 --- a/studio-frontend/packages/server/src/routes/chat-messages/index.ts +++ b/studio-frontend/packages/server/src/routes/chat-messages/index.ts @@ -1,6 +1,6 @@ import express from 'express' import chatMessageController from '../../controllers/chat-messages' -const router = express.Router() +const router: express.Router = express.Router() // CREATE router.post(['/', '/:id'], chatMessageController.createChatMessage) diff --git a/studio-frontend/packages/server/src/routes/chatflows-sandbox/index.ts b/studio-frontend/packages/server/src/routes/chatflows-sandbox/index.ts index 1b22281..13d9735 100644 --- a/studio-frontend/packages/server/src/routes/chatflows-sandbox/index.ts +++ b/studio-frontend/packages/server/src/routes/chatflows-sandbox/index.ts @@ -1,6 +1,6 @@ import express from 'express' import chatflowsController from '../../controllers/chatflows' -const router = express.Router() +const router: express.Router = express.Router() // Deploy a chatflow to sandbox router.post(['/deploy/','/deploy/:id'], chatflowsController.deployChatflowSandbox) diff --git a/studio-frontend/packages/server/src/routes/chatflows-streaming/index.ts b/studio-frontend/packages/server/src/routes/chatflows-streaming/index.ts index cc8dc25..41ad33b 100644 --- a/studio-frontend/packages/server/src/routes/chatflows-streaming/index.ts +++ b/studio-frontend/packages/server/src/routes/chatflows-streaming/index.ts @@ -1,7 +1,7 @@ import express from 'express' import chatflowsController from '../../controllers/chatflows' -const router = express.Router() +const router: express.Router = express.Router() // READ router.get(['/', '/:id'], chatflowsController.checkIfChatflowIsValidForStreaming) diff --git a/studio-frontend/packages/server/src/routes/chatflows-uploads/index.ts b/studio-frontend/packages/server/src/routes/chatflows-uploads/index.ts index 591718c..bcc55a8 100644 --- a/studio-frontend/packages/server/src/routes/chatflows-uploads/index.ts +++ b/studio-frontend/packages/server/src/routes/chatflows-uploads/index.ts @@ -1,7 +1,7 @@ import express from 'express' import chatflowsController from '../../controllers/chatflows' -const router = express.Router() +const router: express.Router = express.Router() // READ router.get(['/', '/:id'], chatflowsController.checkIfChatflowIsValidForUploads) diff --git a/studio-frontend/packages/server/src/routes/chatflows/index.ts b/studio-frontend/packages/server/src/routes/chatflows/index.ts index 669aaf1..aee15e5 100644 --- a/studio-frontend/packages/server/src/routes/chatflows/index.ts +++ b/studio-frontend/packages/server/src/routes/chatflows/index.ts @@ -1,6 +1,6 @@ import express from 'express' import chatflowsController from '../../controllers/chatflows' -const router = express.Router() +const router: express.Router = express.Router() // CREATE router.post('/', chatflowsController.saveChatflow) diff --git a/studio-frontend/packages/server/src/routes/components-credentials-icon/index.ts b/studio-frontend/packages/server/src/routes/components-credentials-icon/index.ts index 50d2213..5541202 100644 --- a/studio-frontend/packages/server/src/routes/components-credentials-icon/index.ts +++ b/studio-frontend/packages/server/src/routes/components-credentials-icon/index.ts @@ -1,6 +1,6 @@ import express from 'express' import componentsCredentialsController from '../../controllers/components-credentials' -const router = express.Router() +const router: express.Router = express.Router() // CREATE diff --git a/studio-frontend/packages/server/src/routes/components-credentials/index.ts b/studio-frontend/packages/server/src/routes/components-credentials/index.ts index 16aff2f..2d0ccfc 100644 --- a/studio-frontend/packages/server/src/routes/components-credentials/index.ts +++ b/studio-frontend/packages/server/src/routes/components-credentials/index.ts @@ -1,6 +1,6 @@ import express from 'express' import componentsCredentialsController from '../../controllers/components-credentials' -const router = express.Router() +const router: express.Router = express.Router() // READ router.get('/', componentsCredentialsController.getAllComponentsCredentials) diff --git a/studio-frontend/packages/server/src/routes/credentials/index.ts b/studio-frontend/packages/server/src/routes/credentials/index.ts index 9f118b4..1c143b0 100644 --- a/studio-frontend/packages/server/src/routes/credentials/index.ts +++ b/studio-frontend/packages/server/src/routes/credentials/index.ts @@ -1,6 +1,6 @@ import express from 'express' import credentialsController from '../../controllers/credentials' -const router = express.Router() +const router: express.Router = express.Router() // CREATE router.post('/', credentialsController.createCredential) diff --git a/studio-frontend/packages/server/src/routes/documentstore/index.ts b/studio-frontend/packages/server/src/routes/documentstore/index.ts index 3f4cb94..ddc774a 100644 --- a/studio-frontend/packages/server/src/routes/documentstore/index.ts +++ b/studio-frontend/packages/server/src/routes/documentstore/index.ts @@ -1,6 +1,6 @@ import express from 'express' import documentStoreController from '../../controllers/documentstore' -const router = express.Router() +const router: express.Router = express.Router() /** Document Store Routes */ // Create document store diff --git a/studio-frontend/packages/server/src/routes/export-import/index.ts b/studio-frontend/packages/server/src/routes/export-import/index.ts index 40c3930..ac6ac00 100644 --- a/studio-frontend/packages/server/src/routes/export-import/index.ts +++ b/studio-frontend/packages/server/src/routes/export-import/index.ts @@ -1,6 +1,6 @@ import express from 'express' import exportImportController from '../../controllers/export-import' -const router = express.Router() +const router: express.Router = express.Router() router.post('/export', exportImportController.exportData) diff --git a/studio-frontend/packages/server/src/routes/feedback/index.ts b/studio-frontend/packages/server/src/routes/feedback/index.ts index bcec7c7..b642535 100644 --- a/studio-frontend/packages/server/src/routes/feedback/index.ts +++ b/studio-frontend/packages/server/src/routes/feedback/index.ts @@ -1,6 +1,6 @@ import express from 'express' import feedbackController from '../../controllers/feedback' -const router = express.Router() +const router: express.Router = express.Router() // CREATE router.post(['/', '/:id'], feedbackController.createChatMessageFeedbackForChatflow) diff --git a/studio-frontend/packages/server/src/routes/fetch-links/index.ts b/studio-frontend/packages/server/src/routes/fetch-links/index.ts index a02abd5..462296a 100644 --- a/studio-frontend/packages/server/src/routes/fetch-links/index.ts +++ b/studio-frontend/packages/server/src/routes/fetch-links/index.ts @@ -1,6 +1,6 @@ import express from 'express' import fetchLinksController from '../../controllers/fetch-links' -const router = express.Router() +const router: express.Router = express.Router() // READ router.get('/', fetchLinksController.getAllLinks) diff --git a/studio-frontend/packages/server/src/routes/finetuning/index.ts b/studio-frontend/packages/server/src/routes/finetuning/index.ts index 35c6114..27c68a5 100644 --- a/studio-frontend/packages/server/src/routes/finetuning/index.ts +++ b/studio-frontend/packages/server/src/routes/finetuning/index.ts @@ -2,7 +2,7 @@ import express from 'express' import multer from 'multer' import finetuningController from '../../controllers/finetuning' -const router = express.Router() +const router: express.Router = express.Router() // Use memory storage for multer to store files in buffer const upload = multer({ storage: multer.memoryStorage() }) diff --git a/studio-frontend/packages/server/src/routes/flow-config/index.ts b/studio-frontend/packages/server/src/routes/flow-config/index.ts index bd84150..1d2cb69 100644 --- a/studio-frontend/packages/server/src/routes/flow-config/index.ts +++ b/studio-frontend/packages/server/src/routes/flow-config/index.ts @@ -1,6 +1,6 @@ import express from 'express' import flowConfigsController from '../../controllers/flow-configs' -const router = express.Router() +const router: express.Router = express.Router() // CREATE diff --git a/studio-frontend/packages/server/src/routes/get-upload-file/index.ts b/studio-frontend/packages/server/src/routes/get-upload-file/index.ts index 319c73e..714af31 100644 --- a/studio-frontend/packages/server/src/routes/get-upload-file/index.ts +++ b/studio-frontend/packages/server/src/routes/get-upload-file/index.ts @@ -1,6 +1,6 @@ import express from 'express' import getUploadFileController from '../../controllers/get-upload-file' -const router = express.Router() +const router: express.Router = express.Router() // READ router.get('/', getUploadFileController.streamUploadedFile) diff --git a/studio-frontend/packages/server/src/routes/get-upload-path/index.ts b/studio-frontend/packages/server/src/routes/get-upload-path/index.ts index 48827c9..ddfe3f0 100644 --- a/studio-frontend/packages/server/src/routes/get-upload-path/index.ts +++ b/studio-frontend/packages/server/src/routes/get-upload-path/index.ts @@ -1,6 +1,6 @@ import express from 'express' import getUploadPathController from '../../controllers/get-upload-path' -const router = express.Router() +const router: express.Router = express.Router() // READ router.get('/', getUploadPathController.getPathForUploads) diff --git a/studio-frontend/packages/server/src/routes/index.ts b/studio-frontend/packages/server/src/routes/index.ts index e1a92a5..bd57a14 100644 --- a/studio-frontend/packages/server/src/routes/index.ts +++ b/studio-frontend/packages/server/src/routes/index.ts @@ -43,7 +43,7 @@ import vectorRouter from './vectors' import verifyRouter from './verify' import versionRouter from './versions' -const router = express.Router() +const router: express.Router = express.Router() router.use('/ping', pingRouter) router.use('/apikey', apikeyRouter) diff --git a/studio-frontend/packages/server/src/routes/internal-chat-messages/index.ts b/studio-frontend/packages/server/src/routes/internal-chat-messages/index.ts index 5dcf1e6..b5814c8 100644 --- a/studio-frontend/packages/server/src/routes/internal-chat-messages/index.ts +++ b/studio-frontend/packages/server/src/routes/internal-chat-messages/index.ts @@ -1,6 +1,6 @@ import express from 'express' import chatMessagesController from '../../controllers/chat-messages' -const router = express.Router() +const router: express.Router = express.Router() // CREATE diff --git a/studio-frontend/packages/server/src/routes/internal-predictions/index.ts b/studio-frontend/packages/server/src/routes/internal-predictions/index.ts index 8e39dce..80fb8e1 100644 --- a/studio-frontend/packages/server/src/routes/internal-predictions/index.ts +++ b/studio-frontend/packages/server/src/routes/internal-predictions/index.ts @@ -1,6 +1,6 @@ import express from 'express' import internalPredictionsController from '../../controllers/internal-predictions' -const router = express.Router() +const router: express.Router = express.Router() // CREATE router.post(['/', '/:id'], internalPredictionsController.createInternalPrediction) diff --git a/studio-frontend/packages/server/src/routes/leads/index.ts b/studio-frontend/packages/server/src/routes/leads/index.ts index 64209ee..8acdfc4 100644 --- a/studio-frontend/packages/server/src/routes/leads/index.ts +++ b/studio-frontend/packages/server/src/routes/leads/index.ts @@ -1,6 +1,6 @@ import express from 'express' import leadsController from '../../controllers/leads' -const router = express.Router() +const router: express.Router = express.Router() // CREATE router.post('/', leadsController.createLeadInChatflow) diff --git a/studio-frontend/packages/server/src/routes/load-prompts/index.ts b/studio-frontend/packages/server/src/routes/load-prompts/index.ts index a12afba..b11e7c1 100644 --- a/studio-frontend/packages/server/src/routes/load-prompts/index.ts +++ b/studio-frontend/packages/server/src/routes/load-prompts/index.ts @@ -1,6 +1,6 @@ import express from 'express' import loadPromptsController from '../../controllers/load-prompts' -const router = express.Router() +const router: express.Router = express.Router() // CREATE router.post('/', loadPromptsController.createPrompt) diff --git a/studio-frontend/packages/server/src/routes/marketplaces/index.ts b/studio-frontend/packages/server/src/routes/marketplaces/index.ts index d97f96f..3242789 100644 --- a/studio-frontend/packages/server/src/routes/marketplaces/index.ts +++ b/studio-frontend/packages/server/src/routes/marketplaces/index.ts @@ -1,6 +1,6 @@ import express from 'express' import marketplacesController from '../../controllers/marketplaces' -const router = express.Router() +const router: express.Router = express.Router() // READ router.get('/templates', marketplacesController.getAllTemplates) diff --git a/studio-frontend/packages/server/src/routes/node-configs/index.ts b/studio-frontend/packages/server/src/routes/node-configs/index.ts index 4c44513..232c15b 100644 --- a/studio-frontend/packages/server/src/routes/node-configs/index.ts +++ b/studio-frontend/packages/server/src/routes/node-configs/index.ts @@ -1,6 +1,6 @@ import express from 'express' import nodeConfigsController from '../../controllers/node-configs' -const router = express.Router() +const router: express.Router = express.Router() // CREATE router.post('/', nodeConfigsController.getAllNodeConfigs) diff --git a/studio-frontend/packages/server/src/routes/node-custom-functions/index.ts b/studio-frontend/packages/server/src/routes/node-custom-functions/index.ts index 9fa33d4..8a66627 100644 --- a/studio-frontend/packages/server/src/routes/node-custom-functions/index.ts +++ b/studio-frontend/packages/server/src/routes/node-custom-functions/index.ts @@ -1,6 +1,6 @@ import express from 'express' import nodesRouter from '../../controllers/nodes' -const router = express.Router() +const router: express.Router = express.Router() // CREATE diff --git a/studio-frontend/packages/server/src/routes/node-icons/index.ts b/studio-frontend/packages/server/src/routes/node-icons/index.ts index 3dc51b9..27d755a 100644 --- a/studio-frontend/packages/server/src/routes/node-icons/index.ts +++ b/studio-frontend/packages/server/src/routes/node-icons/index.ts @@ -1,6 +1,6 @@ import express from 'express' import nodesController from '../../controllers/nodes' -const router = express.Router() +const router: express.Router = express.Router() // CREATE diff --git a/studio-frontend/packages/server/src/routes/node-load-methods/index.ts b/studio-frontend/packages/server/src/routes/node-load-methods/index.ts index 317fd81..954252c 100644 --- a/studio-frontend/packages/server/src/routes/node-load-methods/index.ts +++ b/studio-frontend/packages/server/src/routes/node-load-methods/index.ts @@ -1,6 +1,6 @@ import express from 'express' import nodesRouter from '../../controllers/nodes' -const router = express.Router() +const router: express.Router = express.Router() router.post(['/', '/:name'], nodesRouter.getSingleNodeAsyncOptions) diff --git a/studio-frontend/packages/server/src/routes/nodes/index.ts b/studio-frontend/packages/server/src/routes/nodes/index.ts index 1c9e59c..e8a2b76 100644 --- a/studio-frontend/packages/server/src/routes/nodes/index.ts +++ b/studio-frontend/packages/server/src/routes/nodes/index.ts @@ -1,6 +1,6 @@ import express from 'express' import nodesController from '../../controllers/nodes' -const router = express.Router() +const router: express.Router = express.Router() // READ router.get('/', nodesController.getAllNodes) diff --git a/studio-frontend/packages/server/src/routes/openai-assistants-files/index.ts b/studio-frontend/packages/server/src/routes/openai-assistants-files/index.ts index 771e5fd..419fcef 100644 --- a/studio-frontend/packages/server/src/routes/openai-assistants-files/index.ts +++ b/studio-frontend/packages/server/src/routes/openai-assistants-files/index.ts @@ -3,7 +3,7 @@ import multer from 'multer' import path from 'path' import openaiAssistantsController from '../../controllers/openai-assistants' -const router = express.Router() +const router: express.Router = express.Router() const upload = multer({ dest: `${path.join(__dirname, '..', '..', '..', 'uploads')}/` }) router.post('/download/', openaiAssistantsController.getFileFromAssistant) diff --git a/studio-frontend/packages/server/src/routes/openai-assistants-vector-store/index.ts b/studio-frontend/packages/server/src/routes/openai-assistants-vector-store/index.ts index 1560e37..efd0f8e 100644 --- a/studio-frontend/packages/server/src/routes/openai-assistants-vector-store/index.ts +++ b/studio-frontend/packages/server/src/routes/openai-assistants-vector-store/index.ts @@ -3,7 +3,7 @@ import multer from 'multer' import path from 'path' import openaiAssistantsVectorStoreController from '../../controllers/openai-assistants-vector-store' -const router = express.Router() +const router: express.Router = express.Router() const upload = multer({ dest: `${path.join(__dirname, '..', '..', '..', 'uploads')}/` }) // CREATE diff --git a/studio-frontend/packages/server/src/routes/openai-assistants/index.ts b/studio-frontend/packages/server/src/routes/openai-assistants/index.ts index 1c82a92..cfac2ea 100644 --- a/studio-frontend/packages/server/src/routes/openai-assistants/index.ts +++ b/studio-frontend/packages/server/src/routes/openai-assistants/index.ts @@ -1,6 +1,6 @@ import express from 'express' import openaiAssistantsController from '../../controllers/openai-assistants' -const router = express.Router() +const router: express.Router = express.Router() // CREATE diff --git a/studio-frontend/packages/server/src/routes/ping/index.ts b/studio-frontend/packages/server/src/routes/ping/index.ts index b026710..205f0b7 100644 --- a/studio-frontend/packages/server/src/routes/ping/index.ts +++ b/studio-frontend/packages/server/src/routes/ping/index.ts @@ -1,6 +1,6 @@ import express from 'express' import pingController from '../../controllers/ping' -const router = express.Router() +const router: express.Router = express.Router() // GET router.get('/', pingController.getPing) diff --git a/studio-frontend/packages/server/src/routes/predictions/index.ts b/studio-frontend/packages/server/src/routes/predictions/index.ts index ded2d34..a8cf640 100644 --- a/studio-frontend/packages/server/src/routes/predictions/index.ts +++ b/studio-frontend/packages/server/src/routes/predictions/index.ts @@ -3,7 +3,7 @@ import multer from 'multer' import path from 'path' import predictionsController from '../../controllers/predictions' -const router = express.Router() +const router: express.Router = express.Router() const upload = multer({ dest: `${path.join(__dirname, '..', '..', '..', 'uploads')}/` }) diff --git a/studio-frontend/packages/server/src/routes/prompts-lists/index.ts b/studio-frontend/packages/server/src/routes/prompts-lists/index.ts index 9b92c36..51800a4 100644 --- a/studio-frontend/packages/server/src/routes/prompts-lists/index.ts +++ b/studio-frontend/packages/server/src/routes/prompts-lists/index.ts @@ -1,6 +1,6 @@ import express from 'express' import promptsListController from '../../controllers/prompts-lists' -const router = express.Router() +const router: express.Router = express.Router() // CREATE router.post('/', promptsListController.createPromptsList) diff --git a/studio-frontend/packages/server/src/routes/public-chatbots/index.ts b/studio-frontend/packages/server/src/routes/public-chatbots/index.ts index 18ee9e4..7750fe7 100644 --- a/studio-frontend/packages/server/src/routes/public-chatbots/index.ts +++ b/studio-frontend/packages/server/src/routes/public-chatbots/index.ts @@ -1,6 +1,6 @@ import express from 'express' import chatflowsController from '../../controllers/chatflows' -const router = express.Router() +const router: express.Router = express.Router() // CREATE diff --git a/studio-frontend/packages/server/src/routes/public-chatflows/index.ts b/studio-frontend/packages/server/src/routes/public-chatflows/index.ts index 640fe3a..31c930f 100644 --- a/studio-frontend/packages/server/src/routes/public-chatflows/index.ts +++ b/studio-frontend/packages/server/src/routes/public-chatflows/index.ts @@ -1,6 +1,6 @@ import express from 'express' import chatflowsController from '../../controllers/chatflows' -const router = express.Router() +const router: express.Router = express.Router() // CREATE diff --git a/studio-frontend/packages/server/src/routes/stats/index.ts b/studio-frontend/packages/server/src/routes/stats/index.ts index 8ca64d3..7708b3a 100644 --- a/studio-frontend/packages/server/src/routes/stats/index.ts +++ b/studio-frontend/packages/server/src/routes/stats/index.ts @@ -1,7 +1,7 @@ import express from 'express' import statsController from '../../controllers/stats' -const router = express.Router() +const router: express.Router = express.Router() // READ router.get(['/', '/:id'], statsController.getChatflowStats) diff --git a/studio-frontend/packages/server/src/routes/tools/index.ts b/studio-frontend/packages/server/src/routes/tools/index.ts index e97fb5c..b29e823 100644 --- a/studio-frontend/packages/server/src/routes/tools/index.ts +++ b/studio-frontend/packages/server/src/routes/tools/index.ts @@ -1,7 +1,7 @@ import express from 'express' import toolsController from '../../controllers/tools' -const router = express.Router() +const router: express.Router = express.Router() // CREATE router.post('/', toolsController.createTool) diff --git a/studio-frontend/packages/server/src/routes/upsert-history/index.ts b/studio-frontend/packages/server/src/routes/upsert-history/index.ts index 3e3c9c1..cdf5512 100644 --- a/studio-frontend/packages/server/src/routes/upsert-history/index.ts +++ b/studio-frontend/packages/server/src/routes/upsert-history/index.ts @@ -1,6 +1,6 @@ import express from 'express' import upsertHistoryController from '../../controllers/upsert-history' -const router = express.Router() +const router: express.Router = express.Router() // CREATE diff --git a/studio-frontend/packages/server/src/routes/variables/index.ts b/studio-frontend/packages/server/src/routes/variables/index.ts index f6d3625..945f99f 100644 --- a/studio-frontend/packages/server/src/routes/variables/index.ts +++ b/studio-frontend/packages/server/src/routes/variables/index.ts @@ -1,7 +1,7 @@ import express from 'express' import variablesController from '../../controllers/variables' -const router = express.Router() +const router: express.Router = express.Router() // CREATE router.post('/', variablesController.createVariable) diff --git a/studio-frontend/packages/server/src/routes/vectors/index.ts b/studio-frontend/packages/server/src/routes/vectors/index.ts index cc257bc..843ea63 100644 --- a/studio-frontend/packages/server/src/routes/vectors/index.ts +++ b/studio-frontend/packages/server/src/routes/vectors/index.ts @@ -3,7 +3,7 @@ import multer from 'multer' import path from 'path' import vectorsController from '../../controllers/vectors' -const router = express.Router() +const router: express.Router = express.Router() const upload = multer({ dest: `${path.join(__dirname, '..', '..', '..', 'uploads')}/` }) diff --git a/studio-frontend/packages/server/src/routes/verify/index.ts b/studio-frontend/packages/server/src/routes/verify/index.ts index b00a59a..15d358c 100644 --- a/studio-frontend/packages/server/src/routes/verify/index.ts +++ b/studio-frontend/packages/server/src/routes/verify/index.ts @@ -1,6 +1,6 @@ import express from 'express' import apikeyController from '../../controllers/apikey' -const router = express.Router() +const router: express.Router = express.Router() // READ router.get(['/apikey/', '/apikey/:apikey'], apikeyController.verifyApiKey) diff --git a/studio-frontend/packages/server/src/routes/versions/index.ts b/studio-frontend/packages/server/src/routes/versions/index.ts index 8aa60a2..1dd434d 100644 --- a/studio-frontend/packages/server/src/routes/versions/index.ts +++ b/studio-frontend/packages/server/src/routes/versions/index.ts @@ -1,6 +1,6 @@ import express from 'express' import versionsController from '../../controllers/versions' -const router = express.Router() +const router: express.Router = express.Router() // READ router.get('/', versionsController.getVersion) diff --git a/studio-frontend/packages/server/src/services/finetuning/index.ts b/studio-frontend/packages/server/src/services/finetuning/index.ts index 0fed1c8..da74ff9 100644 --- a/studio-frontend/packages/server/src/services/finetuning/index.ts +++ b/studio-frontend/packages/server/src/services/finetuning/index.ts @@ -6,12 +6,17 @@ import * as path from 'path' import { exec } from 'child_process' import { promisify } from 'util' import { StatusCodes } from 'http-status-codes' +import archiver from 'archiver' import { InternalFlowiseError } from '../../errors/internalFlowiseError' import { getErrorMessage } from '../../errors/utils' import { getRunningExpressApp } from '../../utils/getRunningExpressApp' import { FineTuningJob } from '../../database/entities/FineTuningJob' import logger from '../../utils/logger' +// Declare timer globals for Node.js +declare function setTimeout(cb: (...args: any[]) => void, ms?: number): any +declare function clearTimeout(id: any): void + const execAsync = promisify(exec) const FINETUNING_SERVICE_URL = process.env.FINETUNING_HOST ? `http://${process.env.FINETUNING_HOST}:8015` : 'undefined' @@ -35,75 +40,6 @@ const axiosClient: AxiosInstance = axios.create({ // In-memory mapping: filename (raw and decoded) -> { id, rawFilename } const uploadedFileIdMap: Map = new Map() -/** - * Helper function to zip a fine-tuning job output directory - * Checks if zip already exists and is up-to-date before creating a new one - * @param outputDir - Full path to the output directory for the job - * @param jobId - ID of the fine-tuning job - * @returns Path to the zipped file or null if failed - */ -const ensureFineTuningOutputZip = async (outputDir: string, jobId: string): Promise => { - try { - // eslint-disable-next-line no-console - console.debug(`finetuningService.ensureFineTuningOutputZip - processing output for job: ${jobId}`) - - // Validate output directory exists - if (!fs.existsSync(outputDir)) { - // eslint-disable-next-line no-console - console.warn(`finetuningService.ensureFineTuningOutputZip - output directory not found: ${outputDir}`) - return null - } - - const zipFilePath = `${outputDir}.zip` - const outputStats = fs.statSync(outputDir) - - // Check if zip exists and is up-to-date - if (fs.existsSync(zipFilePath)) { - const zipStats = fs.statSync(zipFilePath) - // If zip is newer than the output directory, skip re-zipping - if (zipStats.mtimeMs > outputStats.mtimeMs) { - // eslint-disable-next-line no-console - console.debug(`finetuningService.ensureFineTuningOutputZip - zip already up-to-date: ${zipFilePath}`) - return zipFilePath - } - // Remove outdated zip - try { - fs.unlinkSync(zipFilePath) - // eslint-disable-next-line no-console - console.debug(`finetuningService.ensureFineTuningOutputZip - removed outdated zip: ${zipFilePath}`) - } catch (e) { - // eslint-disable-next-line no-console - console.warn(`finetuningService.ensureFineTuningOutputZip - failed to remove outdated zip: ${e}`) - } - } - - // Create zip file using tar (more efficient than node zip libraries) - // eslint-disable-next-line no-console - console.debug(`finetuningService.ensureFineTuningOutputZip - starting to zip output for job ${jobId}`) - try { - const parentDir = path.dirname(outputDir) - const dirName = path.basename(outputDir) - const cmd = `cd "${parentDir}" && tar -czf "${path.basename(zipFilePath)}" "${dirName}"` - await execAsync(cmd, { - maxBuffer: 1024 * 1024 * 100, // 100MB buffer for large outputs - timeout: 600000 // 10 minute timeout - }) - - // eslint-disable-next-line no-console - console.debug(`finetuningService.ensureFineTuningOutputZip - zip created successfully for job ${jobId}: ${zipFilePath}`) - return zipFilePath - } catch (execErr: any) { - // eslint-disable-next-line no-console - console.error(`finetuningService.ensureFineTuningOutputZip - tar failed for job ${jobId}: ${execErr?.message || execErr}`) - return null - } - } catch (error: any) { - // eslint-disable-next-line no-console - console.error(`finetuningService.ensureFineTuningOutputZip - error: ${error?.message || error}`) - return null - } -} - /** * Upload a training file to the finetuning service */ @@ -248,136 +184,298 @@ const updateJobInDb = async (jobId: string, updates: Partial) => { } } -/** - * Create a fine-tuning job - */ -const createFineTuningJob = async (jobConfig: { - training_file: string - model: string - General?: { - task?: string - lora_config?: any + +// Utility: convert "true"/"false" (string) to boolean; leave non-boolean inputs as-is. +const coerceBooleanString = (v: any): any => { + if (typeof v === 'string') { + const s = v.trim().toLowerCase(); + if (s === 'true') return true; + if (s === 'false') return false; + } + return v; +}; + +// Utility: ensure padding is one of allowed values or a boolean. +// If pad_to_max is true, force "max_length". +const sanitizePadding = (dataset: any) => { + if (!dataset) return; + + // Coerce common string booleans first + if (dataset.hasOwnProperty('padding')) { + const val = dataset.padding; + const coerced = coerceBooleanString(val); + + // Allowed enum values + const allowedEnums = ['longest', 'max_length', 'do_not_pad'] as const; + + if (typeof coerced === 'boolean') { + dataset.padding = coerced; // transformers accepts boolean true/false + } else if (typeof coerced === 'string') { + const s = coerced.trim().toLowerCase(); + if (allowedEnums.includes(s as any)) { + dataset.padding = s; // valid enum string + } else if (s === 'true') { + // Defensive: sometimes people pass "true" explicitly + dataset.padding = true; + } else if (s === 'false') { + dataset.padding = false; + } else { + // Fallback: pick a safe default + dataset.padding = 'max_length'; + } + } else { + // Fallback if something weird comes in + dataset.padding = 'max_length'; } - Dataset?: { - max_length?: number - query_max_len?: number - passage_max_len?: number - padding?: string + } + + // If pad_to_max is present as string, coerce to boolean. + if (dataset.hasOwnProperty('pad_to_max')) { + dataset.pad_to_max = coerceBooleanString(dataset.pad_to_max); + } + + // If pad_to_max is true, padding MUST be "max_length" for consistency + if (dataset.pad_to_max === true) { + dataset.padding = 'max_length'; + // Also ensure max_length is set when needed + if (!dataset.max_length && (dataset.query_max_len || dataset.passage_max_len)) { + // If per-type max lengths exist, we can keep them; otherwise default global max_length. + dataset.max_length = Math.max( + Number(dataset.query_max_len || 0), + Number(dataset.passage_max_len || 0), + 512 + ); } - Training?: { - epochs?: number - batch_size?: number - gradient_accumulation_steps?: number + } +}; + +// Utility: coerce other Dataset booleans and align preprocessor with task. +const sanitizeDataset = (payload: any) => { + const ds = payload?.Dataset; + if (!ds) return; + + // Coerce known booleans that might arrive as strings + if (ds.hasOwnProperty('truncation')) ds.truncation = coerceBooleanString(ds.truncation); + if (ds.hasOwnProperty('mask_input')) ds.mask_input = coerceBooleanString(ds.mask_input); + if (ds.hasOwnProperty('mask_response')) ds.mask_response = coerceBooleanString(ds.mask_response); + + sanitizePadding(ds); + + // Align preprocessor with task if embedding + const task = payload?.General?.task || payload?.task; + if (task === 'embedding') { + if (ds.data_preprocess_type === 'neural_chat') { + ds.data_preprocess_type = 'embedding'; } -}) => { - try { - // Work with the jobConfig as-provided by the UI. - const forwardedJobConfig = { ...jobConfig } + // Masking typically not used for embeddings + if (typeof ds.mask_input !== 'undefined') ds.mask_input = false; + if (typeof ds.mask_response !== 'undefined') ds.mask_response = false; + } + + // Optional: ensure padding_side/truncation_side sanity (keep defaults if not set) + if (!ds.padding_side) ds.padding_side = 'right'; + if (!ds.truncation_side) ds.truncation_side = 'right'; +}; + +// Utility: avoid DDP for single-worker CPU runs (if these fields are present) +const sanitizeTraining = (payload: any) => { + const tr = payload?.Training; + if (!tr) return; + // If accelerate_mode is DDP but workers <= 1, remove accelerate_mode + // so the backend can choose a sensible default. + const workers = Number(tr.num_training_workers ?? 1); + const device = (tr.device ?? '').toString().toLowerCase(); + if (tr.accelerate_mode === 'DDP' && workers <= 1) { + try { + delete tr.accelerate_mode + } catch (e) { + // Fallback: set to undefined to avoid sending an invalid literal + // when the payload is serialized. + // eslint-disable-next-line no-param-reassign + tr.accelerate_mode = undefined as any + } +} - // (Removed verbose initial jobConfig logging to reduce noise) - const sanitizedPayload = JSON.parse(JSON.stringify(forwardedJobConfig)) + // Optional: if device unspecified but accelerate_mode DDP, leave as-is; trainer may decide backend. +}; - // Remove empty nested objects that may confuse the server - if (sanitizedPayload.General && Object.keys(sanitizedPayload.General).length === 0) { - delete sanitizedPayload.General - } - if (sanitizedPayload.Dataset && Object.keys(sanitizedPayload.Dataset).length === 0) { - delete sanitizedPayload.Dataset - } - if (sanitizedPayload.Training && Object.keys(sanitizedPayload.Training).length === 0) { - delete sanitizedPayload.Training - } - - if (sanitizedPayload.training_file && typeof sanitizedPayload.training_file === 'string') { - const originalFilename = sanitizedPayload.training_file - - // Try to decode first in case it's URL-encoded - let lookupKey = originalFilename - try { - const decoded = decodeURIComponent(originalFilename) - lookupKey = decoded - } catch (e) { - // ignore decode errors - } - - // Check if we have a stored mapping from the upload - let stored = uploadedFileIdMap.get(lookupKey) - if (!stored && lookupKey !== originalFilename) { - // Also try the original (encoded) key - stored = uploadedFileIdMap.get(originalFilename) - } - - if (stored && stored.rawFilename) { - sanitizedPayload.training_file = stored.rawFilename - } - } +// Central sanitizer that mutates the payload +const sanitizeFineTuningPayload = (payload: any) => { + if (!payload || typeof payload !== 'object') return payload; - // Try a sequence of attempts to accommodate naming/encoding/id differences. - const attemptPost = async (payload: any, label = 'attempt') => { - try { - // eslint-disable-next-line no-console - console.debug(`finetuningService.createFineTuningJob - ${label} payload:`, payload) - const resp = await axiosClient.post('/v1/fine_tuning/jobs', payload) - // eslint-disable-next-line no-console - console.debug(`finetuningService.createFineTuningJob - ${label} response:`, typeof resp?.data === 'string' ? resp.data : JSON.stringify(resp?.data)) - return resp - } catch (err: any) { - // Log detailed info for debugging - try { - // eslint-disable-next-line no-console - console.error(`finetuningService.createFineTuningJob - ${label} failed`, { - message: err?.message, - status: err?.response?.status, - responseData: typeof err?.response?.data === 'string' ? err.response.data : JSON.stringify(err?.response?.data), - payload - }) - } catch (logErr) { - // ignore logging errors - } - throw err + // Coerce top-level task if provided as string-like (no-op if already OK) + if (payload.General && typeof payload.General.task === 'string') { + payload.General.task = payload.General.task.trim(); + } + + // If lora_config is explicitly null/empty, remove it entirely so we don't + // send `lora_config: null` to the backend which may assert on it. + try { + if (payload.General && Object.prototype.hasOwnProperty.call(payload.General, 'lora_config')) { + const lc = payload.General.lora_config + const isEmptyObject = lc && typeof lc === 'object' && Object.keys(lc).length === 0 + if (lc === null || typeof lc === 'undefined' || isEmptyObject) { + delete payload.General.lora_config } } + } catch (e) { + // ignore + } + + // Apply dataset and training sanitizers + sanitizeDataset(payload); + sanitizeTraining(payload); + + return payload; +}; + +const createFineTuningJob = async (jobConfig: { + training_file: string; + model: string; + General?: { + task?: string; + lora_config?: any; + }; + Dataset?: { + max_length?: number; + query_max_len?: number; + passage_max_len?: number; + padding?: string; + }; + Training?: { + epochs?: number; + batch_size?: number; + gradient_accumulation_steps?: number; + }; +}) => { + try { + // Work with the jobConfig as-provided by the UI. + const forwardedJobConfig = { ...jobConfig }; + + // (Removed verbose initial jobConfig logging to reduce noise) + const sanitizedPayload = JSON.parse(JSON.stringify(forwardedJobConfig)); + + // Remove empty nested objects that may confuse the server + if (sanitizedPayload.General && Object.keys(sanitizedPayload.General).length === 0) { + delete sanitizedPayload.General; + } + if (sanitizedPayload.Dataset && Object.keys(sanitizedPayload.Dataset).length === 0) { + delete sanitizedPayload.Dataset; + } + if (sanitizedPayload.Training && Object.keys(sanitizedPayload.Training).length === 0) { + delete sanitizedPayload.Training; + } - // Send the sanitized payload - const resp = await attemptPost(sanitizedPayload, 'final') - const respData = resp.data - // If the external service didn't echo back the task, preserve task from our sanitized payload + // Normalize file name using uploadedFileIdMap + if (sanitizedPayload.training_file && typeof sanitizedPayload.training_file === 'string') { + const originalFilename = sanitizedPayload.training_file; + + // Try to decode first in case it's URL-encoded + let lookupKey = originalFilename; + try { + const decoded = decodeURIComponent(originalFilename); + lookupKey = decoded; + } catch (e) { + // ignore decode errors + } + + // Check if we have a stored mapping from the upload + let stored = uploadedFileIdMap.get(lookupKey); + if (!stored && lookupKey !== originalFilename) { + // Also try the original (encoded) key + stored = uploadedFileIdMap.get(originalFilename); + } + + if (stored && stored.rawFilename) { + sanitizedPayload.training_file = stored.rawFilename; + } + } + + // >>> NEW: sanitize fragile fields (padding, truncation, preprocessor, DDP, etc.) + sanitizeFineTuningPayload(sanitizedPayload); + + // Try a sequence of attempts to accommodate naming/encoding/id differences. + const attemptPost = async (payload: any, label = 'attempt') => { + try { + // eslint-disable-next-line no-console + console.debug(`finetuningService.createFineTuningJob - ${label} payload:`, payload); + const resp = await axiosClient.post('/v1/fine_tuning/jobs', payload); + // eslint-disable-next-line no-console + console.debug( + `finetuningService.createFineTuningJob - ${label} response:`, + typeof resp?.data === 'string' ? resp.data : JSON.stringify(resp?.data) + ); + return resp; + } catch (err: any) { + // Log detailed info for debugging try { - const payloadTask = sanitizedPayload?.General?.task || sanitizedPayload?.task - if (payloadTask && !respData.task) { - // attach task so persistJobToDb stores it - try { respData.task = payloadTask } catch (e) { /* ignore */ } - } - } catch (e) { - // ignore + // eslint-disable-next-line no-console + console.error(`finetuningService.createFineTuningJob - ${label} failed`, { + message: err?.message, + status: err?.response?.status, + responseData: + typeof err?.response?.data === 'string' + ? err.response.data + : JSON.stringify(err?.response?.data), + payload, + }); + } catch (logErr) { + // ignore logging errors } + throw err; + } + }; + + // Send the sanitized payload + const resp = await attemptPost(sanitizedPayload, 'final'); + const respData = resp.data; - // Persist to local DB + // If the external service didn't echo back the task, preserve task from our sanitized payload + try { + const payloadTask = sanitizedPayload?.General?.task || sanitizedPayload?.task; + if (payloadTask && !respData.task) { + // attach task so persistJobToDb stores it try { - await persistJobToDb(respData) + respData.task = payloadTask; } catch (e) { - // ignore + /* ignore */ } - return respData - } catch (error: any) { - // Log error details from external service if available for debugging - try { - // eslint-disable-next-line no-console - console.error('finetuningService.createFineTuningJob - axios error:', { - message: error.message, - responseData: error.response ? (typeof error.response.data === 'string' ? error.response.data : JSON.stringify(error.response.data)) : undefined, - status: error.response ? error.response.status : undefined, - headers: error.response ? error.response.headers : undefined - }) - } catch (logErr) { - // ignore logging errors - } - throw new InternalFlowiseError( - StatusCodes.INTERNAL_SERVER_ERROR, - `Error: finetuningService.createFineTuningJob - ${getErrorMessage(error)}` - ) + } + } catch (e) { + // ignore } -} + + // Persist to local DB + try { + await persistJobToDb(respData); + } catch (e) { + // ignore + } + return respData; + } catch (error: any) { + // Log error details from external service if available for debugging + try { + // eslint-disable-next-line no-console + console.error('finetuningService.createFineTuningJob - axios error:', { + message: error.message, + responseData: error.response + ? typeof error.response.data === 'string' + ? error.response.data + : JSON.stringify(error.response.data) + : undefined, + status: error.response ? error.response.status : undefined, + headers: error.response ? error.response.headers : undefined, + }); + } catch (logErr) { + // ignore logging errors + } + throw new InternalFlowiseError( + StatusCodes.INTERNAL_SERVER_ERROR, + `Error: finetuningService.createFineTuningJob - ${getErrorMessage(error)}` + ); + } +}; + /** * List all fine-tuning jobs @@ -602,12 +700,12 @@ const deleteFineTuningJob = async (fineTuningJobId: string) => { } /** - * Download fine-tuning job output as a zip file - * Creates zip if needed, or returns existing zip immediately + * Prepare fine-tuning job output as a zip file for download + * Called by WebSocket to create and cache the zip * @param jobId - ID of the fine-tuning job * @returns Path to the zipped file or null if not found */ -const downloadFineTuningOutput = async (jobId: string): Promise => { +const prepareFineTuningOutputZip = async (jobId: string): Promise => { try { if (!jobId) { throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, 'Job ID is required') @@ -635,18 +733,82 @@ const downloadFineTuningOutput = async (jobId: string): Promise = throw new InternalFlowiseError(StatusCodes.FORBIDDEN, 'Invalid job output path') } - // Ensure the output is zipped (returns immediately if zip is up-to-date) - const finalZipPath = await ensureFineTuningOutputZip(jobOutputDir, jobId) - if (!finalZipPath) { - throw new InternalFlowiseError( - StatusCodes.INTERNAL_SERVER_ERROR, - `Failed to create zip for job ${jobId}` - ) - } + const zipFilePath = `${jobOutputDir}.zip` + // Create zip file using archiver // eslint-disable-next-line no-console - console.debug(`finetuningService.downloadFineTuningOutput - file ready for download: ${finalZipPath}`) - return finalZipPath + console.debug(`finetuningService.downloadFineTuningOutput - creating zip for job ${jobId}`) + + // Log directory contents for diagnostics + try { + const dirContents = fs.readdirSync(jobOutputDir) + // eslint-disable-next-line no-console + console.debug(`finetuningService.downloadFineTuningOutput - output directory contains ${dirContents.length} items: ${dirContents.slice(0, 10).join(', ')}${dirContents.length > 10 ? '...' : ''}`) + } catch (e) { + // eslint-disable-next-line no-console + console.warn(`finetuningService.downloadFineTuningOutput - could not list directory: ${e}`) + } + + try { + return await new Promise((resolve, reject) => { + const output = fs.createWriteStream(zipFilePath) + const archive = archiver('zip', { + zlib: { level: 0 } // no compression for speed + }) + + const zipTimeoutMs = 30 * 60 * 1000 // 30 minutes + let resolved = false + + const timeoutHandle = setTimeout(() => { + if (!resolved) { + resolved = true + // eslint-disable-next-line no-console + console.error(`finetuningService.downloadFineTuningOutput - archiver timeout for job ${jobId}`) + try { output.destroy() } catch (e) {} + try { archive.destroy() } catch (e) {} + reject(new Error('Archiver timeout')) + } + }, zipTimeoutMs) + + output.on('close', () => { + if (!resolved) { + resolved = true + clearTimeout(timeoutHandle) + // eslint-disable-next-line no-console + console.debug(`finetuningService.downloadFineTuningOutput - zip created: ${zipFilePath}`) + resolve(zipFilePath) + } + }) + + output.on('error', (err: any) => { + if (!resolved) { + resolved = true + clearTimeout(timeoutHandle) + // eslint-disable-next-line no-console + console.error(`finetuningService.downloadFineTuningOutput - write stream error: ${err?.message || err}`) + reject(err) + } + }) + + archive.on('error', (err: any) => { + if (!resolved) { + resolved = true + clearTimeout(timeoutHandle) + // eslint-disable-next-line no-console + console.error(`finetuningService.downloadFineTuningOutput - archiver error: ${err?.message || err}`) + reject(err) + } + }) + + archive.pipe(output) + archive.directory(jobOutputDir, path.basename(jobOutputDir)) + archive.finalize() + }) + } catch (archiverErr: any) { + // eslint-disable-next-line no-console + console.error(`finetuningService.downloadFineTuningOutput - archiver failed for job ${jobId}: ${archiverErr?.message || archiverErr}`) + return null + } } catch (error: any) { if (error instanceof InternalFlowiseError) { throw error @@ -660,6 +822,46 @@ const downloadFineTuningOutput = async (jobId: string): Promise = } } +/** + * Download fine-tuning job output - HTTP endpoint + * Returns path to cached ZIP file + * @param jobId - ID of the fine-tuning job + * @returns Path to the zipped file or null if not found + */ +const downloadFineTuningOutput = async (jobId: string): Promise => { + try { + if (!jobId) { + return null + } + + const OUTPUT_BASE_DIR = '/tmp/finetuning/output' + const zipFilePath = `${OUTPUT_BASE_DIR}/${jobId}.zip` + + // Check if zip file exists + if (fs.existsSync(zipFilePath)) { + try { + const stat = fs.statSync(zipFilePath) + if (stat.size > 0) { + // eslint-disable-next-line no-console + console.debug(`finetuningService.downloadFineTuningOutput - returning cached zip: ${zipFilePath}`) + return zipFilePath + } + } catch (e) { + // eslint-disable-next-line no-console + console.warn(`finetuningService.downloadFineTuningOutput - could not stat zip file: ${e}`) + } + } + + // eslint-disable-next-line no-console + console.warn(`finetuningService.downloadFineTuningOutput - zip file not found: ${zipFilePath}`) + return null + } catch (error: any) { + // eslint-disable-next-line no-console + console.error(`finetuningService.downloadFineTuningOutput - error: ${error?.message || error}`) + return null + } +} + /** * Get logs for a fine-tuning job by querying the Ray head node HTTP API. * It will call: http:///api/jobs//logs @@ -748,5 +950,6 @@ export default { cancelFineTuningJob, deleteFineTuningJob, getFineTuningJobLogs, + prepareFineTuningOutputZip, downloadFineTuningOutput } diff --git a/studio-frontend/packages/server/src/services/openai-assistants-vector-store/index.ts b/studio-frontend/packages/server/src/services/openai-assistants-vector-store/index.ts index 46f9c18..7b598e1 100644 --- a/studio-frontend/packages/server/src/services/openai-assistants-vector-store/index.ts +++ b/studio-frontend/packages/server/src/services/openai-assistants-vector-store/index.ts @@ -24,7 +24,7 @@ const getAssistantVectorStore = async (credentialId: string, vectorStoreId: stri } const openai = new OpenAI({ apiKey: openAIApiKey }) - const dbResponse = await openai.beta.vectorStores.retrieve(vectorStoreId) + const dbResponse = await openai.vectorStores.retrieve(vectorStoreId) return dbResponse } catch (error) { throw new InternalFlowiseError( @@ -51,7 +51,7 @@ const listAssistantVectorStore = async (credentialId: string) => { } const openai = new OpenAI({ apiKey: openAIApiKey }) - const dbResponse = await openai.beta.vectorStores.list() + const dbResponse = await openai.vectorStores.list() return dbResponse.data } catch (error) { throw new InternalFlowiseError( @@ -61,7 +61,7 @@ const listAssistantVectorStore = async (credentialId: string) => { } } -const createAssistantVectorStore = async (credentialId: string, obj: OpenAI.Beta.VectorStores.VectorStoreCreateParams) => { +const createAssistantVectorStore = async (credentialId: string, obj: OpenAI.VectorStores.VectorStoreCreateParams) => { try { const appServer = getRunningExpressApp() const credential = await appServer.AppDataSource.getRepository(Credential).findOneBy({ @@ -78,7 +78,7 @@ const createAssistantVectorStore = async (credentialId: string, obj: OpenAI.Beta } const openai = new OpenAI({ apiKey: openAIApiKey }) - const dbResponse = await openai.beta.vectorStores.create(obj) + const dbResponse = await openai.vectorStores.create(obj) return dbResponse } catch (error) { throw new InternalFlowiseError( @@ -91,7 +91,7 @@ const createAssistantVectorStore = async (credentialId: string, obj: OpenAI.Beta const updateAssistantVectorStore = async ( credentialId: string, vectorStoreId: string, - obj: OpenAI.Beta.VectorStores.VectorStoreUpdateParams + obj: OpenAI.VectorStores.VectorStoreUpdateParams ) => { try { const appServer = getRunningExpressApp() @@ -109,8 +109,8 @@ const updateAssistantVectorStore = async ( } const openai = new OpenAI({ apiKey: openAIApiKey }) - const dbResponse = await openai.beta.vectorStores.update(vectorStoreId, obj) - const vectorStoreFiles = await openai.beta.vectorStores.files.list(vectorStoreId) + const dbResponse = await openai.vectorStores.update(vectorStoreId, obj) + const vectorStoreFiles = await openai.vectorStores.files.list(vectorStoreId) if (vectorStoreFiles.data?.length) { const files = [] for (const file of vectorStoreFiles.data) { @@ -145,7 +145,7 @@ const deleteAssistantVectorStore = async (credentialId: string, vectorStoreId: s } const openai = new OpenAI({ apiKey: openAIApiKey }) - const dbResponse = await openai.beta.vectorStores.del(vectorStoreId) + const dbResponse = await openai.vectorStores.del(vectorStoreId) return dbResponse } catch (error) { throw new InternalFlowiseError( @@ -189,7 +189,7 @@ const uploadFilesToAssistantVectorStore = async ( const file_ids = [...uploadedFiles.map((file) => file.id)] - const res = await openai.beta.vectorStores.fileBatches.createAndPoll(vectorStoreId, { + const res = await openai.vectorStores.fileBatches.createAndPoll(vectorStoreId, { file_ids }) if (res.status === 'completed' && res.file_counts.completed === uploadedFiles.length) return uploadedFiles @@ -231,7 +231,7 @@ const deleteFilesFromAssistantVectorStore = async (credentialId: string, vectorS const deletedFileIds = [] let count = 0 for (const file of file_ids) { - const res = await openai.beta.vectorStores.files.del(vectorStoreId, file) + const res = await openai.vectorStores.files.del(vectorStoreId, file) if (res.deleted) { deletedFileIds.push(file) count += 1 diff --git a/studio-frontend/packages/server/src/services/openai-assistants/index.ts b/studio-frontend/packages/server/src/services/openai-assistants/index.ts index c908a54..7c3df3e 100644 --- a/studio-frontend/packages/server/src/services/openai-assistants/index.ts +++ b/studio-frontend/packages/server/src/services/openai-assistants/index.ts @@ -68,10 +68,10 @@ const getSingleOpenaiAssistant = async (credentialId: string, assistantId: strin if (dbResponse.tool_resources?.file_search?.vector_store_ids?.length) { // Since there can only be 1 vector store per assistant const vectorStoreId = dbResponse.tool_resources.file_search.vector_store_ids[0] - const vectorStoreFiles = await openai.beta.vectorStores.files.list(vectorStoreId) + const vectorStoreFiles = await openai.vectorStores.files.list(vectorStoreId) const fileIds = vectorStoreFiles.data?.map((file) => file.id) ?? [] ;(dbResponse.tool_resources.file_search as any).files = [...existingFiles.filter((file) => fileIds.includes(file.id))] - ;(dbResponse.tool_resources.file_search as any).vector_store_object = await openai.beta.vectorStores.retrieve(vectorStoreId) + ;(dbResponse.tool_resources.file_search as any).vector_store_object = await openai.vectorStores.retrieve(vectorStoreId) } return dbResponse } catch (error) { diff --git a/studio-frontend/packages/server/src/utils/SSEStreamer.ts b/studio-frontend/packages/server/src/utils/SSEStreamer.ts index 4217925..ef7139d 100644 --- a/studio-frontend/packages/server/src/utils/SSEStreamer.ts +++ b/studio-frontend/packages/server/src/utils/SSEStreamer.ts @@ -211,8 +211,115 @@ export class SSEStreamer implements IServerSideEventStreamer { if (apiResponse.memoryType) { metadataJson['memoryType'] = apiResponse.memoryType } + if (apiResponse.followUpPrompts) { + metadataJson['followUpPrompts'] = + typeof apiResponse.followUpPrompts === 'string' ? JSON.parse(apiResponse.followUpPrompts) : apiResponse.followUpPrompts + } + if (apiResponse.flowVariables) { + metadataJson['flowVariables'] = + typeof apiResponse.flowVariables === 'string' ? JSON.parse(apiResponse.flowVariables) : apiResponse.flowVariables + } if (Object.keys(metadataJson).length > 0) { this.streamCustomEvent(chatId, 'metadata', metadataJson) } } + + streamCalledToolsEvent(chatId: string, data: any): void { + const client = this.clients[chatId] + if (client) { + const clientResponse = { + event: 'calledTools', + data: data + } + client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n') + } + } + + streamAgentFlowExecutedDataEvent(chatId: string, data: any): void { + const client = this.clients[chatId] + if (client) { + const clientResponse = { + event: 'agentFlowExecutedData', + data: data + } + client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n') + } + } + + streamAgentFlowEvent(chatId: string, data: any): void { + const client = this.clients[chatId] + if (client) { + const clientResponse = { + event: 'agentFlowEvent', + data: data + } + client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n') + } + } + + streamNextAgentFlowEvent(chatId: string, data: any): void { + const client = this.clients[chatId] + if (client) { + const clientResponse = { + event: 'nextAgentFlow', + data: data + } + client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n') + } + } + + streamUsageMetadataEvent(chatId: string, data: any): void { + const client = this.clients[chatId] + if (client) { + const clientResponse = { + event: 'usageMetadata', + data: data + } + client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n') + } + } + + streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void { + const client = this.clients[chatId] + if (client) { + const clientResponse = { + event: 'tts_start', + data: { chatMessageId, format } + } + client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n') + } + } + + streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void { + const client = this.clients[chatId] + if (client) { + const clientResponse = { + event: 'tts_data', + data: { chatMessageId, audioChunk } + } + client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n') + } + } + + streamTTSEndEvent(chatId: string, chatMessageId: string): void { + const client = this.clients[chatId] + if (client) { + const clientResponse = { + event: 'tts_end', + data: { chatMessageId } + } + client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n') + } + } + + streamTTSAbortEvent(chatId: string, chatMessageId: string): void { + const client = this.clients[chatId] + if (client) { + const clientResponse = { + event: 'tts_abort', + data: { chatMessageId } + } + client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n') + } + } } diff --git a/studio-frontend/packages/server/src/utils/buildAgentGraph.ts b/studio-frontend/packages/server/src/utils/buildAgentGraph.ts index ea44382..fd9a198 100644 --- a/studio-frontend/packages/server/src/utils/buildAgentGraph.ts +++ b/studio-frontend/packages/server/src/utils/buildAgentGraph.ts @@ -361,7 +361,7 @@ export const buildAgentGraph = async ( const connectedToolNode = reactFlowNodes.find((node) => node.id === tooNodeId) // Map raw tool calls to used tools, to be shown on interrupted message - const mappedToolCalls = lastMessageRaw.tool_calls.map((toolCall) => { + const mappedToolCalls = lastMessageRaw.tool_calls.map((toolCall: any) => { return { tool: toolCall.name, toolInput: toolCall.args, toolOutput: '' } }) diff --git a/studio-frontend/packages/server/src/utils/fileRepository.ts b/studio-frontend/packages/server/src/utils/fileRepository.ts index 1147aeb..245b9ec 100644 --- a/studio-frontend/packages/server/src/utils/fileRepository.ts +++ b/studio-frontend/packages/server/src/utils/fileRepository.ts @@ -46,7 +46,7 @@ export const containsBase64File = (chatflow: ChatFlow) => { return found } -export const updateFlowDataWithFilePaths = async (chatflowid: string, flowData: string) => { +export const updateFlowDataWithFilePaths = async (chatflowid: string, flowData: string, orgId: string = '') => { try { const parsedFlowData: IReactFlowObject = JSON.parse(flowData) const re = new RegExp('^data.*;base64', 'i') @@ -75,14 +75,16 @@ export const updateFlowDataWithFilePaths = async (chatflowid: string, flowData: for (let j = 0; j < files.length; j++) { const file = files[j] if (re.test(file)) { - node.data.inputs[key] = await addBase64FilesToStorage(file, chatflowid, fileNames) + const result = await addBase64FilesToStorage(file, chatflowid, fileNames, orgId) + node.data.inputs[key] = result.path } } } catch (e) { continue } } else if (re.test(input)) { - node.data.inputs[key] = await addBase64FilesToStorage(input, chatflowid, fileNames) + const result = await addBase64FilesToStorage(input, chatflowid, fileNames, orgId) + node.data.inputs[key] = result.path } } } diff --git a/studio-frontend/packages/server/src/ws/finetuningDownload.ts b/studio-frontend/packages/server/src/ws/finetuningDownload.ts index 484d2e8..eebd900 100644 --- a/studio-frontend/packages/server/src/ws/finetuningDownload.ts +++ b/studio-frontend/packages/server/src/ws/finetuningDownload.ts @@ -99,10 +99,18 @@ export const setupFineTuningDownloadHandlers = (io: Server) => { // Kick off the async preparation and store the promise so others can join task.status = 'zipping' + + // Emit progress update to socket immediately + socket.emit('download-finetuning-progress', { + jobId, + status: 'zipping', + message: 'Creating zip archive (this may take a few minutes)...' + }) + task.promise = (async () => { try { - // Call the service to prepare the zip file (returns path) - const zipFilePath = await finetuningService.downloadFineTuningOutput(jobId) + // Call the service to prepare the zip file + const zipFilePath = await finetuningService.prepareFineTuningOutputZip(jobId) if (!zipFilePath) { task.status = 'error' diff --git a/studio-frontend/packages/ui/package.json b/studio-frontend/packages/ui/package.json index 94840aa..6051cb7 100644 --- a/studio-frontend/packages/ui/package.json +++ b/studio-frontend/packages/ui/package.json @@ -1,6 +1,6 @@ { "name": "flowise-ui", - "version": "2.1.4", + "version": "3.0.10", "license": "SEE LICENSE IN LICENSE.md", "homepage": "https://flowiseai.com", "author": { @@ -13,23 +13,29 @@ "@opentelemetry/sdk-trace-node":"latest", "@codemirror/lang-javascript": "^6.2.1", "@codemirror/lang-json": "^6.0.1", - "@codemirror/view": "^6.22.3", + "@codemirror/lang-markdown": "^6.2.5", + "@codemirror/view": "^6.26.3", "@emotion/cache": "^11.4.0", "@emotion/react": "^11.10.6", "@emotion/styled": "^11.10.6", + "@lezer/highlight": "^1.2.1", "@microsoft/fetch-event-source": "^2.0.1", "@mui/base": "5.0.0-beta.40", "@mui/icons-material": "^5.15.0", "@mui/lab": "5.0.0-alpha.156", "@mui/material": "5.15.0", + "@mui/system": "^5.15.0", "@mui/x-data-grid": "6.8.0", + "@mui/x-tree-view": "^6.17.0", "@react-keycloak/web": "^3.4.0", - "@tabler/icons-react": "3.7.0", + "@reduxjs/toolkit": "^2.2.7", + "@tabler/icons-react": "^3.30.0", "@uiw/codemirror-theme-sublime": "^4.21.21", "@uiw/codemirror-theme-vscode": "^4.21.21", "@uiw/react-codemirror": "^4.21.21", - "axios": "1.8.2", + "axios": "1.12.0", "clsx": "^1.1.1", + "dompurify": "^3.2.6", "dotenv": "^16.0.0", "flowise-embed": "latest", "flowise-embed-react": "latest", @@ -40,11 +46,12 @@ "html-react-parser": "^3.0.4", "keycloak-js": "^26.0.5", "lodash": "^4.17.21", + "lowlight": "^3.3.0", "moment": "^2.29.3", "notistack": "^2.0.4", "prop-types": "^15.7.2", "react": "^18.2.0", - "react-code-blocks": "^0.0.9-0", + "react-code-blocks": "^0.1.6", "react-color": "^2.19.3", "react-datepicker": "^4.21.0", "react-device-detect": "^1.17.0", @@ -52,21 +59,24 @@ "react-markdown": "^8.0.6", "react-perfect-scrollbar": "^1.5.8", "react-redux": "^8.0.5", + "react-rewards": "^2.1.0", "react-router": "~6.3.0", "react-router-dom": "~6.3.0", "react-syntax-highlighter": "^15.5.0", "reactflow": "^11.5.6", + "recharts": "^2.12.6", "redux": "^4.0.5", "rehype-mathjax": "^4.0.2", - "rehype-raw": "^7.0.0", "remark-gfm": "^3.0.1", "remark-math": "^5.1.1", + "showdown": "^2.1.0", "socket.io-client": "^4.6.1", + "tippy.js": "^6.3.7", "uuid": "^9.0.1", "yup": "^0.32.9" }, "scripts": { - "dev": "vite --host", + "dev": "vite", "start": "vite", "build": "vite build", "clean": "rimraf build", diff --git a/studio-frontend/packages/ui/src/ui-component/dialog/InputHintDialog.jsx b/studio-frontend/packages/ui/src/ui-component/dialog/InputHintDialog.jsx index 0eb8651..d1ec20e 100644 --- a/studio-frontend/packages/ui/src/ui-component/dialog/InputHintDialog.jsx +++ b/studio-frontend/packages/ui/src/ui-component/dialog/InputHintDialog.jsx @@ -1,11 +1,6 @@ import { createPortal } from 'react-dom' import PropTypes from 'prop-types' -import rehypeMathjax from 'rehype-mathjax' -import rehypeRaw from 'rehype-raw' -import remarkGfm from 'remark-gfm' -import remarkMath from 'remark-math' import { MemoizedReactMarkdown } from '@/ui-component/markdown/MemoizedReactMarkdown' -import { CodeBlock } from '@/ui-component/markdown/CodeBlock' import { Dialog, DialogContent, DialogTitle } from '@mui/material' const InputHintDialog = ({ show, dialogProps, onCancel }) => { @@ -24,27 +19,7 @@ const InputHintDialog = ({ show, dialogProps, onCancel }) => { {dialogProps.label} - - ) : ( - - {children} - - ) - } - }} - > + {dialogProps?.value} diff --git a/studio-frontend/packages/ui/src/ui-component/dialog/PromptLangsmithHubDialog.jsx b/studio-frontend/packages/ui/src/ui-component/dialog/PromptLangsmithHubDialog.jsx index acc246f..00ee677 100644 --- a/studio-frontend/packages/ui/src/ui-component/dialog/PromptLangsmithHubDialog.jsx +++ b/studio-frontend/packages/ui/src/ui-component/dialog/PromptLangsmithHubDialog.jsx @@ -3,11 +3,6 @@ import { useState, useEffect } from 'react' import { useDispatch, useSelector } from 'react-redux' import PropTypes from 'prop-types' -import rehypeMathjax from 'rehype-mathjax' -import rehypeRaw from 'rehype-raw' -import remarkGfm from 'remark-gfm' -import remarkMath from 'remark-math' - // MUI import { Box, @@ -536,28 +531,7 @@ const PromptLangsmithHubDialog = ({ promptType, show, onCancel, onSubmit }) => { } }} > - - ) : ( - - {children} - - ) - } - }} - > + {selectedPrompt?.readme} diff --git a/studio-frontend/packages/ui/src/ui-component/dialog/ViewMessagesDialog.jsx b/studio-frontend/packages/ui/src/ui-component/dialog/ViewMessagesDialog.jsx index 5b40d1e..ed8fc2d 100644 --- a/studio-frontend/packages/ui/src/ui-component/dialog/ViewMessagesDialog.jsx +++ b/studio-frontend/packages/ui/src/ui-component/dialog/ViewMessagesDialog.jsx @@ -3,10 +3,6 @@ import { useDispatch, useSelector } from 'react-redux' import { useState, useEffect, forwardRef } from 'react' import PropTypes from 'prop-types' import moment from 'moment' -import rehypeMathjax from 'rehype-mathjax' -import rehypeRaw from 'rehype-raw' -import remarkGfm from 'remark-gfm' -import remarkMath from 'remark-math' import axios from 'axios' import { cloneDeep } from 'lodash' @@ -642,27 +638,8 @@ const ViewMessagesDialog = ({ show, dialogProps, onCancel }) => { } else { return ( - ) : ( - - {children} - - ) - } - }} + chatflowid={dialogProps.chatflow.id} + isDialog={true} > {item.data} @@ -1113,44 +1090,8 @@ const ViewMessagesDialog = ({ show, dialogProps, onCancel }) => { )} {agent.messages.length > 0 && ( - ) : ( - - {children} - - ) - } - }} + chatflowid={dialogProps.chatflow.id} + isDialog={true} > {agent.messages.length > 1 ? agent.messages.join('\\n') @@ -1264,27 +1205,8 @@ const ViewMessagesDialog = ({ show, dialogProps, onCancel }) => {
{/* Messages are being rendered in Markdown format */} - ) : ( - - {children} - - ) - } - }} + chatflowid={dialogProps.chatflow.id} + isDialog={true} > {message.message} diff --git a/studio-frontend/packages/ui/src/ui-component/markdown/MemoizedReactMarkdown.jsx b/studio-frontend/packages/ui/src/ui-component/markdown/MemoizedReactMarkdown.jsx index 523585f..680dd22 100644 --- a/studio-frontend/packages/ui/src/ui-component/markdown/MemoizedReactMarkdown.jsx +++ b/studio-frontend/packages/ui/src/ui-component/markdown/MemoizedReactMarkdown.jsx @@ -1,19 +1,168 @@ -import { memo } from 'react' +import { memo, useMemo } from 'react' import PropTypes from 'prop-types' import ReactMarkdown from 'react-markdown' import './Markdown.css' +import { CodeBlock } from '../markdown/CodeBlock' +import remarkGfm from 'remark-gfm' +import remarkMath from 'remark-math' +import rehypeMathjax from 'rehype-mathjax' +/** + * Checks if text likely contains LaTeX math notation + * @param {string} text - Text to check for LaTeX math + * @param {Object[]} customPatterns - Additional regex patterns to check + * @returns {boolean} - Whether LaTeX math is likely present + */ +const containsLaTeX = (text, customPatterns = []) => { + if (!text || typeof text !== 'string') return false + + // Common LaTeX patterns - more permissive to catch edge cases + const defaultPatterns = [ + { regex: /\$\$.+?\$\$/s, name: 'Block math: $$...$$' }, + { regex: /\\\(.+?\\\)/s, name: 'Inline math: \\(...\\)' }, + { regex: /\\\[[\s\S]*?\\\]/, name: 'Display math: \\[...\\]' }, + { + regex: /\\begin{(equation|align|gather|math|matrix|bmatrix|pmatrix|vmatrix|cases)}.+?\\end{\1}/s, + name: 'Environment math' + }, + { regex: /\$(.*?[\\{}_^].*?)\$/, name: 'Inline math with $' }, + { regex: /\\frac/, name: 'LaTeX command: \\frac' }, + { regex: /\\sqrt/, name: 'LaTeX command: \\sqrt' }, + { regex: /\\pm/, name: 'LaTeX command: \\pm' }, + { regex: /\\cdot/, name: 'LaTeX command: \\cdot' }, + { regex: /\\text/, name: 'LaTeX command: \\text' }, + { regex: /\\sum/, name: 'LaTeX command: \\sum' }, + { regex: /\\prod/, name: 'LaTeX command: \\prod' }, + { regex: /\\int/, name: 'LaTeX command: \\int' } + ] + + // Combine default and custom patterns + const patterns = [...defaultPatterns, ...customPatterns] + + for (const pattern of patterns) { + if (pattern.regex.test(text)) { + return true + } + } + + return false +} + +/** + * Preprocesses text to make LaTeX syntax more compatible with Markdown + * @param {string} text - Original text with potentially problematic LaTeX syntax + * @returns {string} - Text with LaTeX syntax adjusted for better compatibility + */ +const preprocessLatex = (text) => { + if (!text || typeof text !== 'string') return text + + // Replace problematic LaTeX patterns with more compatible alternatives + const processedText = text + // Convert display math with indentation to dollar-dollar format + .replace(/(\n\s*)\\\[([\s\S]*?)\\\](\s*\n|$)/g, (match, before, content, after) => { + // Preserve indentation but use $$ format which is more reliably handled + return `${before}$$${content}$$${after}` + }) + // Convert inline math to dollar format with spaces to avoid conflicts + .replace(/\\\(([\s\S]*?)\\\)/g, '$ $1 $') + + return processedText +} + +/** + * Enhanced Markdown component with memoization for better performance + * Supports various plugins and custom rendering components + */ export const MemoizedReactMarkdown = memo( - ({ children, ...props }) => ( -
- {children} -
- ), - (prevProps, nextProps) => prevProps.children === nextProps.children + ({ children, ...props }) => { + // Preprocess text to improve LaTeX compatibility + const processedChildren = useMemo( + () => (typeof children === 'string' ? preprocessLatex(children) : children), + [children] + ) + + // Enable math by default unless explicitly disabled + const shouldEnableMath = useMemo(() => { + const hasLatex = processedChildren && containsLaTeX(processedChildren, props.mathPatterns || []) + return props.disableMath === true ? false : props.forceMath || hasLatex + }, [processedChildren, props.forceMath, props.disableMath, props.mathPatterns]) + + // Configure plugins based on content + const remarkPlugins = useMemo(() => { + if (props.remarkPlugins) return props.remarkPlugins + return shouldEnableMath ? [remarkGfm, remarkMath] : [remarkGfm] + }, [props.remarkPlugins, shouldEnableMath]) + + const rehypePlugins = useMemo(() => { + if (props.rehypePlugins) return props.rehypePlugins + return shouldEnableMath ? [rehypeMathjax] : [] + }, [props.rehypePlugins, shouldEnableMath]) + + return ( +
+ + ) : ( + + {children} + + ) + }, + p({ children }) { + return

{children}

+ }, + ...props.components + }} + {...props} + > + {processedChildren} +
+
+ ) + }, + (prevProps, nextProps) => { + // More detailed comparison for better memoization + if (prevProps.children !== nextProps.children) return false + + // Check if other props have changed + const prevEntries = Object.entries(prevProps).filter(([key]) => key !== 'children') + const nextEntries = Object.entries(nextProps).filter(([key]) => key !== 'children') + + if (prevEntries.length !== nextEntries.length) return false + + // Simple shallow comparison of remaining props + for (const [key, value] of prevEntries) { + if (key === 'components' || key === 'remarkPlugins' || key === 'rehypePlugins') continue // Skip complex objects + if (nextProps[key] !== value) return false + } + + return true + } ) MemoizedReactMarkdown.displayName = 'MemoizedReactMarkdown' MemoizedReactMarkdown.propTypes = { - children: PropTypes.any + children: PropTypes.any, + chatflowid: PropTypes.string, + isFullWidth: PropTypes.bool, + remarkPlugins: PropTypes.array, + rehypePlugins: PropTypes.array, + components: PropTypes.object, + forceMath: PropTypes.bool, + disableMath: PropTypes.bool, + mathPatterns: PropTypes.array } diff --git a/studio-frontend/packages/ui/src/views/chatmessage/ChatMessage.jsx b/studio-frontend/packages/ui/src/views/chatmessage/ChatMessage.jsx index ebabf12..91afdae 100644 --- a/studio-frontend/packages/ui/src/views/chatmessage/ChatMessage.jsx +++ b/studio-frontend/packages/ui/src/views/chatmessage/ChatMessage.jsx @@ -2,10 +2,6 @@ import { useState, useRef, useEffect, useCallback, Fragment } from 'react' import { useSelector, useDispatch } from 'react-redux' import PropTypes from 'prop-types' import { cloneDeep } from 'lodash' -import rehypeMathjax from 'rehype-mathjax' -import rehypeRaw from 'rehype-raw' -import remarkGfm from 'remark-gfm' -import remarkMath from 'remark-math' import axios from 'axios' import { v4 as uuidv4 } from 'uuid' import { EventStreamContentType, fetchEventSource } from '@microsoft/fetch-event-source' @@ -1342,27 +1338,8 @@ export const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, preview } else { return ( - ) : ( - - {children} - - ) - } - }} + chatflowid={chatflowid} + isDialog={isDialog} > {item.data} @@ -1588,27 +1565,8 @@ export const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, preview )} {agent.messages.length > 0 && ( - ) : ( - - {children} - - ) - } - }} + chatflowid={chatflowid} + isDialog={isDialog} > {agent.messages.length > 1 ? agent.messages.join('\\n') @@ -1774,27 +1732,8 @@ export const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, preview <> {/* Messages are being rendered in Markdown format */} - ) : ( - - {children} - - ) - } - }} + chatflowid={chatflowid} + isDialog={isDialog} > {message.message} diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx index e9c9b13..03653df 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobModal.jsx @@ -779,7 +779,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { value={formData.dataset.padding_side} onChange={(e) => handleConfigChange('dataset', 'padding_side', e.target.value)} size="small" - sx={{ width: '100%', maxWidth: 240 }} + sx={{ width: '100%' }} /> @@ -789,7 +789,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { value={formData.dataset.truncation_side} onChange={(e) => handleConfigChange('dataset', 'truncation_side', e.target.value)} size="small" - sx={{ width: '100%', maxWidth: 240 }} + sx={{ width: '100%' }} /> @@ -799,7 +799,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { value={String(formData.dataset.padding)} onChange={(e) => handleConfigChange('dataset', 'padding', e.target.value)} size="small" - sx={{ width: '100%', maxWidth: 240 }} + sx={{ width: '100%' }} /> @@ -809,7 +809,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { value={String(formData.dataset.truncation)} onChange={(e) => handleConfigChange('dataset', 'truncation', e.target.value)} size="small" - sx={{ width: '100%', maxWidth: 240 }} + sx={{ width: '100%' }} /> @@ -819,7 +819,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { value={String(formData.dataset.mask_input)} onChange={(e) => handleConfigChange('dataset', 'mask_input', e.target.value)} size="small" - sx={{ width: '100%', maxWidth: 240 }} + sx={{ width: '100%' }} /> @@ -829,7 +829,7 @@ const FinetuningJobModal = ({ open, onClose, onJobCreated }) => { value={String(formData.dataset.mask_response)} onChange={(e) => handleConfigChange('dataset', 'mask_response', e.target.value)} size="small" - sx={{ width: '100%', maxWidth: 240 }} + sx={{ width: '100%' }} /> diff --git a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx index 7137603..998d34b 100644 --- a/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx +++ b/studio-frontend/packages/ui/src/views/finetuning/FinetuningJobsTable.jsx @@ -124,6 +124,7 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter const [logsOpen, setLogsOpen] = useState(false) const [logsData, setLogsData] = useState('') const [logsLoading, setLogsLoading] = useState(false) + const [deleteConfirmOpen, setDeleteConfirmOpen] = useState(false) const logsContainerRef = useRef(null) // Auto-refresh logs every 3 seconds when logs dialog is open @@ -687,18 +688,7 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter { if (!selectedJob) return - if (!window.confirm('Are you sure you want to delete this job?')) return - setActionLoading(true) - try { - await finetuningApi.deleteJob(selectedJob.id) - handleMenuClose() - if (onRefresh) onRefresh() - } catch (error) { - console.error('Error deleting job:', error) - alert('Failed to delete job: ' + (error.message || 'Unknown error')) - } finally { - setActionLoading(false) - } + setDeleteConfirmOpen(true) }} disabled={actionLoading} > @@ -768,6 +758,43 @@ const FinetuningJobsTable = ({ data, isLoading = false, onRefresh = null, filter + + {/* Delete Confirmation Dialog */} + setDeleteConfirmOpen(false)} maxWidth="xs" fullWidth> + Delete Job + + + Are you sure you want to delete this job? This action cannot be undone. + + + + + + + ) } diff --git a/studio-frontend/packages/ui/src/views/opeaflows/index.jsx b/studio-frontend/packages/ui/src/views/opeaflows/index.jsx index 5e7e28f..b3c95db 100644 --- a/studio-frontend/packages/ui/src/views/opeaflows/index.jsx +++ b/studio-frontend/packages/ui/src/views/opeaflows/index.jsx @@ -2,7 +2,7 @@ import { useEffect, useState } from 'react' import { useNavigate } from 'react-router-dom' // material-ui -import { Box, Skeleton, Stack, Input, Typography } from '@mui/material' +import { Box, Skeleton, Stack, Input, Typography, Alert } from '@mui/material' import { useTheme } from '@mui/material/styles' // project imports @@ -41,6 +41,7 @@ const Opeaflows = () => { const [isLoading, setLoading] = useState(true) const [error, setError] = useState(null) + const [importError, setImportError] = useState(null) const [images, setImages] = useState({}) const [search, setSearch] = useState('') const [loginDialogOpen, setLoginDialogOpen] = useState(false) @@ -95,11 +96,20 @@ const Opeaflows = () => { const importSamples = () => { setLoading(true); - chatflowsApi.importSampleChatflowsbyUserId(keycloak.tokenParsed.email).then(() => { - getAllOpeaflowsApi.request(); - }).catch(() => { - setLoading(false); - }); + setImportError(null); + chatflowsApi.importSampleChatflowsbyUserId(keycloak.tokenParsed.email) + .then(() => { + getAllOpeaflowsApi.request(); + setImportError(null); + }) + .catch((error) => { + setLoading(false); + const errorMessage = error?.response?.data?.message || + error?.message || + 'Failed to import sample workflows. Please try again later.'; + setImportError(errorMessage); + console.error('Error importing sample chatflows:', error); + }); } const goToCanvas = (selectedChatflow) => { @@ -161,6 +171,11 @@ const Opeaflows = () => { ) : ( + {importError && ( + setImportError(null)} sx={{ mb: 2 }}> + Import Failed: {importError} + + )} { + if (response.url().includes('/v1/app-backend') && response.request().method() === 'POST') { + const contentType = response.headers()['content-type']; + if (contentType.includes('text/event-stream')) { + const responseBody = await response.text(); + // Parse SSE stream + const events = responseBody.split('\n\n'); + for (const event of events) { + const lines = event.split('\n'); + for (const line of lines) { + if (line.startsWith('data: ')) { + const cleanedData = line.slice(6, -1).trim(); // Remove 'data: ' prefix + apiResponse.value += cleanedData + " "; + } + } + } + } else { + console.error('Response is not SSE'); + } + } + }); +} + +test('005_test_finetuning_rerank', async ({ browser, baseURL }) => { + test.setTimeout(1200000); + let apiResponse = { value: '' }; + const context = await browser.newContext({ + ignoreHTTPSErrors: true, + recordVideo: { + dir: './videos/', + size: { width: 1280, height: 720 } + } + }); + const page = await context.newPage(); + const IDC_URL = baseURL || "" + await page.goto(IDC_URL); + await page.getByLabel('Username or email').fill('test_automation@gmail.com'); + await page.getByLabel('Password', { exact: true }).click(); + await page.getByLabel('Password', { exact: true }).fill('test'); + await page.getByRole('button', { name: 'Sign In' }).click(); + await page.getByRole('button', { name: 'Fine-tuning' }).click(); + await page.getByRole('button', { name: 'Create New Job' }).click(); + await page.getByRole('combobox', { name: 'Base Model' }).click(); + await page.getByRole('option', { name: 'BAAI/bge-reranker-large' }).click(); + await page.getByText('Instruction Tuning').click(); + await page.getByRole('option', { name: 'Rerank' }).click(); + let fileChooserPromise = page.waitForEvent('filechooser'); + await page.getByRole('button', { name: 'Choose File' }).click(); + let fileChooser = await fileChooserPromise; + await fileChooser.setFiles(trainDataset); + await page.waitForTimeout(5000); + await page.getByRole('button', { name: 'Create Job' }).click(); + await page.waitForTimeout(20000); + await expect(page.getByRole('cell', { name: 'running' })).toHaveText('running'); + await expect(page.locator('div').filter({ hasText: 'Fine-tuning JobsCreate New' }).nth(3)).toContainText('rerank'); + await waitForStatusText(page, 'td.MuiTableCell-root div.MuiChip-root', 'succeeded', 20, 60000); + + await page.locator('button').nth(5).click(); + await page.getByRole('menuitem', { name: 'Delete Job' }).click(); + await page.getByRole('button', { name: 'Delete', exact: true }).click(); +}); \ No newline at end of file diff --git a/tests/playwright/studio-e2e/006_test_finetuning_embedding.spec.ts b/tests/playwright/studio-e2e/006_test_finetuning_embedding.spec.ts new file mode 100644 index 0000000..7886c38 --- /dev/null +++ b/tests/playwright/studio-e2e/006_test_finetuning_embedding.spec.ts @@ -0,0 +1,68 @@ +import { test, expect } from '@playwright/test'; +import { waitForStatusText } from '../utils'; +import path from 'path'; + +const trainDataset = path.resolve(__dirname, '../../test-files/toy_finetune_data.jsonl'); + +async function setupResponseListener(page, apiResponse) { + page.on('response', async (response) => { + if (response.url().includes('/v1/app-backend') && response.request().method() === 'POST') { + const contentType = response.headers()['content-type']; + if (contentType.includes('text/event-stream')) { + const responseBody = await response.text(); + // Parse SSE stream + const events = responseBody.split('\n\n'); + for (const event of events) { + const lines = event.split('\n'); + for (const line of lines) { + if (line.startsWith('data: ')) { + const cleanedData = line.slice(6, -1).trim(); // Remove 'data: ' prefix + apiResponse.value += cleanedData + " "; + } + } + } + } else { + console.error('Response is not SSE'); + } + } + }); +} + +test('006_test_finetuning_embedding', async ({ browser, baseURL }) => { + test.setTimeout(1200000); + let apiResponse = { value: '' }; + const context = await browser.newContext({ + ignoreHTTPSErrors: true, + recordVideo: { + dir: './videos/', + size: { width: 1280, height: 720 } + } + }); + const page = await context.newPage(); + const IDC_URL = baseURL || "" + await page.goto(IDC_URL); + await page.getByLabel('Username or email').fill('test_automation@gmail.com'); + await page.getByLabel('Password', { exact: true }).click(); + await page.getByLabel('Password', { exact: true }).fill('test'); + await page.getByRole('button', { name: 'Sign In' }).click(); + await page.getByRole('button', { name: 'Fine-tuning' }).click(); + await page.getByRole('button', { name: 'Create New Job' }).click(); + await page.getByRole('combobox', { name: 'Base Model' }).click(); + await page.getByRole('option', { name: 'BAAI/bge-base-en-v1.5' }).click(); + await page.getByText('Instruction Tuning').click(); + await page.getByRole('option', { name: 'Embedding' }).click(); + let fileChooserPromise = page.waitForEvent('filechooser'); + await page.getByRole('button', { name: 'Choose File' }).click(); + let fileChooser = await fileChooserPromise; + await fileChooser.setFiles(trainDataset); + await page.waitForTimeout(5000); + await page.getByRole('button', { name: 'Create Job' }).click(); + await page.waitForTimeout(20000); + await expect(page.getByRole('cell', { name: 'running' })).toHaveText('running'); + await expect(page.locator('div').filter({ hasText: 'Fine-tuning JobsCreate New' }).nth(3)).toContainText('embedding'); + await waitForStatusText(page, 'td.MuiTableCell-root div.MuiChip-root', 'succeeded', 20, 60000); + + await page.locator('button').nth(5).click(); + await page.getByRole('menuitem', { name: 'Delete Job' }).click(); + await page.getByRole('button', { name: 'Delete', exact: true }).click(); +}); \ No newline at end of file diff --git a/tests/playwright/studio-e2e/007_test_finetuning_reasoning.spec.ts b/tests/playwright/studio-e2e/007_test_finetuning_reasoning.spec.ts new file mode 100644 index 0000000..72472ba --- /dev/null +++ b/tests/playwright/studio-e2e/007_test_finetuning_reasoning.spec.ts @@ -0,0 +1,68 @@ +import { test, expect } from '@playwright/test'; +import { waitForStatusText } from '../utils'; +import path from 'path'; + +const trainDataset = path.resolve(__dirname, '../../test-files/medical_o1_sft_50.json'); + +async function setupResponseListener(page, apiResponse) { + page.on('response', async (response) => { + if (response.url().includes('/v1/app-backend') && response.request().method() === 'POST') { + const contentType = response.headers()['content-type']; + if (contentType.includes('text/event-stream')) { + const responseBody = await response.text(); + // Parse SSE stream + const events = responseBody.split('\n\n'); + for (const event of events) { + const lines = event.split('\n'); + for (const line of lines) { + if (line.startsWith('data: ')) { + const cleanedData = line.slice(6, -1).trim(); // Remove 'data: ' prefix + apiResponse.value += cleanedData + " "; + } + } + } + } else { + console.error('Response is not SSE'); + } + } + }); +} + +test('007_test_finetuning_reasoning', async ({ browser, baseURL }) => { + test.setTimeout(1200000); + let apiResponse = { value: '' }; + const context = await browser.newContext({ + ignoreHTTPSErrors: true, + recordVideo: { + dir: './videos/', + size: { width: 1280, height: 720 } + } + }); + const page = await context.newPage(); + const IDC_URL = baseURL || "" + await page.goto(IDC_URL); + await page.getByLabel('Username or email').fill('test_automation@gmail.com'); + await page.getByLabel('Password', { exact: true }).click(); + await page.getByLabel('Password', { exact: true }).fill('test'); + await page.getByRole('button', { name: 'Sign In' }).click(); + await page.getByRole('button', { name: 'Fine-tuning' }).click(); + await page.getByRole('button', { name: 'Create New Job' }).click(); + await page.getByRole('combobox', { name: 'Base Model' }).click(); + await page.getByRole('option', { name: 'Qwen/Qwen2.5-7B' }).click(); + await page.getByText('Instruction Tuning').click(); + await page.getByRole('option', { name: 'Reasoning' }).click(); + let fileChooserPromise = page.waitForEvent('filechooser'); + await page.getByRole('button', { name: 'Choose File' }).click(); + let fileChooser = await fileChooserPromise; + await fileChooser.setFiles(trainDataset); + await page.waitForTimeout(5000); + await page.getByRole('button', { name: 'Create Job' }).click(); + await page.waitForTimeout(20000); + await expect(page.getByRole('cell', { name: 'running' })).toHaveText('running'); + await expect(page.locator('div').filter({ hasText: 'Fine-tuning JobsCreate New' }).nth(3)).toContainText('reasoning'); + await waitForStatusText(page, 'td.MuiTableCell-root div.MuiChip-root', 'succeeded', 20, 60000); + + await page.locator('button').nth(5).click(); + await page.getByRole('menuitem', { name: 'Delete Job' }).click(); + await page.getByRole('button', { name: 'Delete', exact: true }).click(); +}); \ No newline at end of file diff --git a/tests/playwright/utils.ts b/tests/playwright/utils.ts index 1a299d2..f042ff9 100644 --- a/tests/playwright/utils.ts +++ b/tests/playwright/utils.ts @@ -4,7 +4,7 @@ export async function waitForStatusText(page: any, selector: string, statusText: for (let i = 0; i < maxAttempts; i++) { try { const text = await page.locator(selector).first().innerText(); - if (text === 'Error') { + if (text === 'Error' || text === 'failed') { throw new Error(`Encountered unwanted status text "Error" in element "${selector}"`); } await expect(page.locator(selector).first()).toHaveText(statusText, { timeout: intervalTimeout }); diff --git a/tests/test-files/medical_o1_sft_50.json b/tests/test-files/medical_o1_sft_50.json new file mode 100644 index 0000000..3afe666 --- /dev/null +++ b/tests/test-files/medical_o1_sft_50.json @@ -0,0 +1,52 @@ +[ + { + "Question": "Given the symptoms of sudden weakness in the left arm and leg, recent long-distance travel, and the presence of swollen and tender right lower leg, what specific cardiac abnormality is most likely to be found upon further evaluation that could explain these findings?", + "Complex_CoT": "Okay, let's see what's going on here. We've got sudden weakness in the person's left arm and leg - and that screams something neuro-related, maybe a stroke?\n\nBut wait, there's more. The right lower leg is swollen and tender, which is like waving a big flag for deep vein thrombosis, especially after a long flight or sitting around a lot.\n\nSo, now I'm thinking, how could a clot in the leg end up causing issues like weakness or stroke symptoms?\n\nOh, right! There's this thing called a paradoxical embolism. It can happen if there's some kind of short circuit in the heart - like a hole that shouldn't be there.\n\nLet's put this together: if a blood clot from the leg somehow travels to the left side of the heart, it could shoot off to the brain and cause that sudden weakness by blocking blood flow there.\n\nHmm, but how would the clot get from the right side of the heart to the left without going through the lungs and getting filtered out?\n\nHere's where our cardiac anomaly comes in: a patent foramen ovale or PFO. That's like a sneaky little shortcut in the heart between the right and left atria.\n\nAnd it's actually pretty common, found in about a quarter of adults, which definitely makes it the top suspect here.\n\nSo with all these pieces - long travel, leg clot, sudden weakness - a PFO fits the bill perfectly, letting a clot cross over and cause all this.\n\nEverything fits together pretty neatly, so I'd bet PFO is the heart issue waiting to be discovered. Yeah, that really clicks into place!", + "Response": "The specific cardiac abnormality most likely to be found in this scenario is a patent foramen ovale (PFO). This condition could allow a blood clot from the venous system, such as one from a deep vein thrombosis in the leg, to bypass the lungs and pass directly into the arterial circulation. This can occur when the clot moves from the right atrium to the left atrium through the PFO. Once in the arterial system, the clot can travel to the brain, potentially causing an embolic stroke, which would explain the sudden weakness in the left arm and leg. The connection between the recent travel, which increases the risk of deep vein thrombosis, and the neurological symptoms suggests the presence of a PFO facilitating a paradoxical embolism." + }, + { + "Question": "A 33-year-old woman is brought to the emergency department 15 minutes after being stabbed in the chest with a screwdriver. Given her vital signs of pulse 110/min, respirations 22/min, and blood pressure 90/65 mm Hg, along with the presence of a 5-cm deep stab wound at the upper border of the 8th rib in the left midaxillary line, which anatomical structure in her chest is most likely to be injured?", + "Complex_CoT": "Okay, let's figure out what's going on here. A woman comes in with a stab wound from a screwdriver. It's in her chest, upper border of the 8th rib, left side, kind of around the midaxillary line. First thought, that's pretty close to where the lung sits, right?\n\nLet's talk about location first. This spot is along the left side of her body. Above the 8th rib, like that, is where a lot of important stuff lives, like the bottom part of the left lung, possibly the diaphragm too, especially considering how deep the screwdriver went.\n\nThe wound is 5 cm deep. That sounds pretty deep. I mean, it could definitely reach down to the lung tissue or maybe the diaphragm. Given that it's midaxillary, we're in the territory where the lower lobe of the left lung hangs out. It's also possible there's some intersection with where the diaphragm begins, but the lung feels more probable somehow.\n\nNow, her vitals are concerning: elevated heart rate and low blood pressure. This is serious. My gut says that this kind of vital sign picture could mean something like pneumothorax or maybe hemothorax. Both can happen if the lung gets punctured, and they can make the blood pressure drop and the heart rate skyrocket since she's obviously distressed.\n\nSo, putting it all together, the most obvious culprit is the lower lobe of the left lung. The wound's depth and her condition point that way. And, yeah, this adds up with pneumothorax or maybe blood collecting in the chest—the kind of stuff that can really mess with breathing and circulation.\n\nAlright, with this in mind, it sure seems like the lung is the most likely thing that got hurt here. Makes sense given the situation, where the wound is, considering her symptoms, and the whole setup.", + "Response": "In this scenario, the most likely anatomical structure to be injured is the lower lobe of the left lung. The location of the stab wound—at the upper border of the 8th rib in the left midaxillary line—indicates proximity to the lower lobe of the lung. The depth of the wound (5 cm) suggests it is sufficient to reach lung tissue. Her vital signs of elevated heart rate and low blood pressure could signal complications like a pneumothorax or hemothorax, common consequences of lung trauma that would result from a penetrating injury in this area. Given these considerations, the lower lobe of the left lung is the most probable structure injured." + }, + { + "Question": "A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?", + "Complex_CoT": "Okay, let's think about this step by step. There's a 61-year-old woman here who's been dealing with involuntary urine leakages whenever she's doing something that ups her abdominal pressure like coughing or sneezing. This sounds a lot like stress urinary incontinence to me. Now, it's interesting that she doesn't have any issues at night; she isn't experiencing leakage while sleeping. This likely means her bladder's ability to hold urine is fine when she isn't under physical stress. Hmm, that's a clue that we're dealing with something related to pressure rather than a bladder muscle problem. \n\nThe fact that she underwent a Q-tip test is intriguing too. This test is usually done to assess urethral mobility. In stress incontinence, a Q-tip might move significantly, showing urethral hypermobility. This kind of movement often means there's a weakness in the support structures that should help keep the urethra closed during increases in abdominal pressure. So, that's aligning well with stress incontinence.\n\nNow, let's think about what would happen during cystometry. Since stress incontinence isn't usually about sudden bladder contractions, I wouldn't expect to see involuntary detrusor contractions during this test. Her bladder isn't spasming or anything; it's more about the support structure failing under stress. Plus, she likely empties her bladder completely because stress incontinence doesn't typically involve incomplete emptying. So, her residual volume should be pretty normal. \n\nAll in all, it seems like if they do a cystometry on her, it will likely show a normal residual volume and no involuntary contractions. Yup, I think that makes sense given her symptoms and the typical presentations of stress urinary incontinence.", + "Response": "Cystometry in this case of stress urinary incontinence would most likely reveal a normal post-void residual volume, as stress incontinence typically does not involve issues with bladder emptying. Additionally, since stress urinary incontinence is primarily related to physical exertion and not an overactive bladder, you would not expect to see any involuntary detrusor contractions during the test." + }, + { + "Question": "A 45-year-old man with a history of alcohol use, who has been abstinent for the past 10 years, presents with sudden onset dysarthria, shuffling gait, and intention tremors. Given this clinical presentation and history, what is the most likely diagnosis?", + "Complex_CoT": "Alright, let’s break this down. We have a 45-year-old man here, who suddenly starts showing some pretty specific symptoms: dysarthria, shuffling gait, and those intention tremors. This suggests something's going wrong with motor control, probably involving the cerebellum or its connections.\n\nNow, what's intriguing is that he's had a history of alcohol use, but he's been off it for the past 10 years. Alcohol can do a number on the cerebellum, leading to degeneration, and apparently, the effects can hang around or even appear long after one stops drinking.\n\nAt first glance, these symptoms look like they could be some kind of chronic degeneration, maybe something like alcoholic cerebellar degeneration, but hold on. This looks different. The symptoms just came on all of a sudden. Chronic degenerations typically have a more gradual onset.\n\nOkay, let’s reconsider this sudden nature. It’s making me think of something more acute, more rapid onset. Hmm, if we dig back into his history of drinking, there might have been some damage done, leading to nutritional deficiencies, like a lack of thiamine.\n\nWernicke’s encephalopathy is a classic possibility here. That condition pops up due to thiamine deficiency, often after a history of alcohol use. It’s known for causing ataxia, confusion, and eye movement issues. However, he’s not showing the full triad of symptoms; there's no mention of confusion or eye problems, so maybe it doesn’t fit perfectly.\n\nOh, wait a second, maybe we're missing something simpler. Given the acute nature of the symptoms, maybe this is more in line with something like a stroke. Sudden onset can definitely suggest a vascular event. With his alcohol history, he’s at risk for things like high blood pressure, which can increase stroke risk.\n\nIn particular, lacunar strokes can mess with motor coordination, speech, the works. These are small, but significant enough to cause these exact symptoms: dysarthria, tremors, and a shuffling gait.\n\nBut hang on, what if there’s another angle we’re missing? Chronic alcohol effects might still play a role here, just in a different way. There’s a condition called acquired hepatocerebral degeneration. This can happen in people with a history of liver disease due to alcohol, even years after they’ve quit. It can result in symptoms like these because of how substances get deposited in the brain.\n\nLinking this back to our patient, his history with alcohol could’ve led to some liver issues, and now, here we are with these symptoms showing up suddenly. Makes sense, right? So, considering everything, acquired hepatocerebral degeneration seems to fit quite nicely. That’s probably our most fitting diagnosis for this situation.", + "Response": "Considering the clinical presentation of sudden onset dysarthria, shuffling gait, and intention tremors in a 45-year-old man with a history of alcohol use who has been abstinent for the past 10 years, the most likely diagnosis is acquired hepatocerebral degeneration.\n\nThis condition is associated with chronic liver disease, which can often be a consequence of long-term alcohol use. Despite the patient's abstinence from alcohol for a decade, previous alcohol use may have led to underlying liver dysfunction. This dysfunction, even if subclinical, can cause encephalopathy due to the accumulation of neurotoxic substances that affect the brain. The sudden onset of these neurological symptoms aligns with how acquired hepatocerebral degeneration can manifest, making it a probable diagnosis in this scenario." + }, + { + "Question": "A 45-year-old man presents with symptoms including a wide-based gait, a blank facial expression, hallucinations, memory issues, a resting tremor that resolves with movement, and bradykinesia. Based on these clinical findings, what is most likely to be observed in the histological specimen of his brain?", + "Complex_CoT": "Okay, so here's a 45-year-old guy who's experiencing some pretty concerning symptoms. Let's break this down. He's got a wide-based gait. Right off the bat, that makes me think about balance and coordination issues; something affecting his motor control could be at play here.\n\nThen there's the blank facial expression. That's often referred to as facial masking, commonly seen in Parkinson's disease due to a lack of facial muscle movement.\n\nHallucinations and memory issues, though, those are interesting. Hallucinations make me think about some sort of cognitive or psychiatric involvement. Memory issues definitely point towards a possibility of cognitive decline.\n\nNow, a resting tremor that stops with movement is a classic sign of Parkinson's. It's one of those distinguishing features. Paired with bradykinesia, which means slowness of movement, these two symptoms are screaming Parkinson’s disease to me.\n\nWide-based gait might initially make one consider cerebellar issues, but in the context of Parkinsonism, it can also be due to balance problems stemming from basal ganglia dysfunction or even Parkinson's-related gait disturbances.\n\nWhen I think about all these symptoms coming together, it certainly makes Parkinson's disease a strong contender, especially considering his age and the symptom profile.\n\nBut here's the thing, I can't ignore the cognitive and psychiatric symptoms. These could suggest something like Lewy body dementia, which is related but incorporates more of the psychiatric features alongside Parkinsonian symptoms.\n\nIn terms of what's going on at the cellular level, Parkinson's disease and dementia with Lewy bodies share a common pathological hallmark: the presence of Lewy bodies. These are clumps of proteins, mainly alpha-synuclein, that accumulate in the brain cells and can cause both movement and cognitive problems.\n\nSo, taking all of this into account, when they look at the brain tissue under a microscope, I would expect them to find Lewy bodies. Those would explain the whole picture—movement issues, cognitive decline, and hallucinations. It ties everything together nicely.", + "Response": "Based on the clinical findings presented—wide-based gait, blank facial expression, hallucinations, memory issues, resting tremor that resolves with movement, and bradykinesia—it is likely that the 45-year-old man is experiencing a condition related to Parkinsonism, possibly Parkinson's disease or dementia with Lewy bodies. Both of these conditions are associated with the presence of Lewy bodies in the brain. Lewy bodies are abnormal aggregates of protein, primarily alpha-synuclein, which can cause both the motor and cognitive symptoms observed in this patient. Therefore, in the histological specimen of his brain, you would most likely observe the presence of Lewy bodies." + }, + { + "Question": "A patient with psoriasis was treated with systemic steroids, and upon discontinuation of the treatment, developed generalized pustules all over the body. What is the most likely cause of this condition?", + "Complex_CoT": "I'm thinking about this patient with psoriasis who's been on systemic steroids. Okay, so psoriasis is this chronic skin condition that can be pretty stubborn and flares up now and then. They stopped their steroid treatment, and then suddenly there are these generalized pustules appearing all over the place. What's going on here? \n\nAlright, pustules are like little bumps that are filled with pus—they can be a sign of different skin issues, but in someone with psoriasis, a major change like stopping treatment can really mess with things. Steroids are powerful at tamping down inflammation and managing the immune system. When you take someone off steroids, particularly suddenly, the body can react in unexpected ways. It's like the immune system gets a bit confused and overreacts—a rebound effect. \n\nOh, right, I remember reading that systemic steroids can sometimes mask the severity of psoriasis, keeping it under control. But pulling those steroids too fast can allow the psoriasis to come back stronger, even shifting into another form. This sounds familiar—there's a type called pustular psoriasis. If I recall, that's when the psoriasis turns into this more extreme version with pustules. \n\nSo connecting the dots: it's likely that stopping the steroids led to a rebound and we might be seeing a case of generalized pustular psoriasis. That makes sense given the scenario—I've seen this mentioned before where physicians need to be cautious about how they taper steroids off in psoriasis patients to prevent exactly this kind of escalation. It fits pretty well with what I'd expect based on what I know about steroid withdrawal and psoriasis flare-ups.", + "Response": "The development of generalized pustules in a patient with psoriasis following the discontinuation of systemic steroid treatment is most likely due to a condition called generalized pustular psoriasis (GPP). This condition can occur as a rebound effect when systemic steroids are suddenly stopped or rapidly tapered down. In patients with psoriasis, steroids can suppress the immune system and mitigate symptoms. When these drugs are withdrawn too quickly, the immune system can overreact, leading to more severe symptoms, including the transformation of psoriasis into a pustular form. Careful management and slow tapering of steroids are critical in psoriasis patients to prevent such exacerbations." + }, + { + "Question": "What is the most likely diagnosis for a 2-year-old 70 kg child who presents with limitation of abduction and internal rotation, tenderness in Scarpa's triangle, and abduction of the limb upon flexing the hip?", + "Complex_CoT": "Okay, so we're dealing with a 2-year-old child here. But wait, this kid is listed as 70 kg – that's a lot for a toddler! It might be a typo, or maybe there's something else going on. Anyway, let's focus on the symptoms.\n\nThis child can't abduct the hip or internally rotate it very well. That generally screams hip issues. We're looking at something more than just temporary discomfort, I think.\n\nThere's tenderness in the Scarpa’s triangle. That points us right to the hip, to something maybe structural or possibly some inflammation around the joint. So, the focus needs to stay around the hip region.\n\nHere's something interesting: when you flex the hip, the limb abducts. That feels like a tell-tale sign. Normally, you'd associate this with instability or maybe a slipped joint or something affecting the way the joint moves.\n\nTransient synovitis is a common condition in young kids that affects the hip – it makes sense initially since it causes hip pain and a limp. But, hmm, that doesn’t line up perfectly with the limb movement upon flexion.\n\nWe must consider hip dysplasia or even something affecting joint congruence, but keeping age in mind, these shouldn't be the first thoughts.\n\nNow, considering this obvious weight issue combined with hip symptoms: has to lead us to think about conditions tied to growth or structural issues.\n\nOn the other hand, slipped capital femoral epiphysis (SCFE) is more common in older kids, true, but it's also linked with heavier weight, and that annoying abduction on flexion does fit.\n\nLet’s face it, the child’s weight and the weird abduction movement on flexion make SCFE pretty suspicious, even if the kid is very young for it.\n\nLogically, considering all these factors and given the hip symptoms and the weight, the signs point more convincingly towards SCFE. And yes, this seems like the most plausible conclusion given the current scenario. Even if it's a stretch for their age.", + "Response": "Based on the described symptoms and the unusual weight for a 2-year-old child, the most likely diagnosis is Slipped Capital Femoral Epiphysis (SCFE). Even though SCFE typically occurs in older children, mainly adolescents, the combination of excessive weight, limited hip abduction and internal rotation, tenderness in the hip area, and the characteristic limb movement (abduction upon hip flexion) strongly points towards SCFE as the most plausible diagnosis in this scenario." + }, + { + "Question": "Scientists are developing a new non-steroidal anti-inflammatory drug for osteoarthritis, aiming for higher potency but the same efficacy as ibuprofen to reduce gastrointestinal side effects. If ibuprofen is represented by curve C in the figure and the desired therapeutic effect is marked by the dashed line Y, what curve would represent the new drug that fulfills these criteria?", + "Complex_CoT": "Okay, so I need to figure out the characteristics of this new drug compared to ibuprofen. Let's see, we're looking at two main things here: potency and efficacy. If I remember correctly, potency is about how much of a drug we need to get a particular effect. So if a drug is more potent, we need less of it to get the job done. Efficacy, on the other hand, is about how good the drug is at its best; it's the maximum effect it can achieve.\n\nNow, for this new drug, they want it to be more potent than ibuprofen. That means it should work at a lower dose. But they still want it to be just as effective in terms of the maximum impact – so, same efficacy.\n\nThinking about how this would look on a graph, if I have an efficacy curve for ibuprofen already, any drug that's more potent will have its curve shift to the left. Why? Oh right, because it achieves the effect at a lower dose. But the peak of the curve, or the height, would stay the same because our maximum effect, or efficacy, isn't changing.\n\nAlright, looking for which curve could represent the new drug requires me to find one that hits the desired effect line – they mentioned a dashed line Y – at a lower concentration than ibuprofen, but still reaches the same top level or height.\n\nHmm, with this in mind, I need to verify with the graph. Curve C is for ibuprofen, so I need to spot another curve that rises quicker and hits line Y before ibuprofen does. But crucially, it has to level out at the same height, or else it won't meet the efficacy requirement.\n\nOh, I see. The new drug's curve needs to not just start lower at achieving effects but also maintain its power up to the height of line Y. This means while the curve is left compared to Curve C, it should still touch the same maximum point.\n\nThat's clear in my head now. A drug curve to the left of C that reaches the same ultimate height signifies greater potency without sacrificing efficacy. Perfect, this should match the target for the new drug!", + "Response": "To determine which curve would represent the new drug meeting the criteria of higher potency but the same efficacy as ibuprofen (represented by curve C), we need to find a curve that achieves the desired therapeutic effect (marked by the dashed line Y) at a lower concentration than curve C. Additionally, this curve should plateau at the same maximum effect level as curve C to indicate identical efficacy.\n\nThis means we are looking for a curve that is shifted to the left of curve C, indicating the drug requires a lower dose to achieve the therapeutic effect, without surpassing the height or plateau level of curve C, ensuring the same maximum efficacy.\n\nIf you have a set of labeled curves to choose from, the ideal curve for the new drug would be the one that intersects the dashed line Y before curve C does, but at the same height. Please identify such a curve based on your graph's available options." + }, + { + "Question": "A 15-year-old boy presents with decreased facial hair, bilateral breast enlargement, long extremities, scant pubic hair, small testes, and normal-sized penis. He is at the 97th percentile for height and the 50th percentile for weight. What karyotype is most likely to be found in further evaluation?", + "Complex_CoT": "Alright, we have a 15-year-old boy with some intriguing symptoms. He’s got decreased facial and pubic hair and that’s worth noting right away because these are signs that sexual development might be off.\n\nThen there’s the breast enlargement—gynecomastia—and, interestingly, his limbs are unusually long. It sounds like his body proportions fall into what’s sometimes called eunuchoid proportions. That’s quite a specific term, but it basically means he’s got long arms and legs relative to his torso.\n\nAnother telltale sign is his small testes, but he has a normal-sized penis. This combination usually points towards an imbalance in sex hormones, often linked with hypogonadism.\n\nNow, when we consider his height, he’s at the 97th percentile. This is quite tall, especially when compared to his average weight at the 50th percentile. Hmm, this hint about height immediately brings Klinefelter syndrome to mind because guys with this condition are often taller due to that extra X chromosome.\n\nEverything seems to fit together like puzzle pieces: gynecomastia, hypogonadism, long limbs, and his tall stature. These are classic symptoms of Klinefelter.\n\nSo, what’s going on at the chromosomal level? If he has Klinefelter syndrome, it means his karyotype would typically be 47,XXY. That extra X chromosome explains a lot about these symptoms.\n\nGiven all of this, I’m confident in saying that 47,XXY is the most likely karyotype we’d see in this kid. Yep, everything aligns well with Klinefelter syndrome. That makes perfect sense!", + "Response": "Given the symptoms described, the most likely karyotype for this 15-year-old boy is 47,XXY, which is characteristic of Klinefelter syndrome. The combination of decreased facial and pubic hair, gynecomastia, small testes, long extremities, and tall stature aligns with this chromosomal pattern. Klinefelter syndrome is caused by the presence of an extra X chromosome, leading to the 47,XXY karyotype." + }, + { + "Question": "In a case where a child with pneumonia is diagnosed, and their chest X-ray indicates a pattern consistent with a common bacterial infection, what is the most likely causative organism based on typical presentations?", + "Complex_CoT": "Okay, so we have a child diagnosed with pneumonia, and their chest X-ray shows a pattern. Let's see... Pneumonia in children, hmm... the most common bacterial culprit is usually Streptococcus pneumoniae, right? I've read it's quite a leading cause in these cases. Now, about the chest X-ray... what did it show again? Ah, lobar consolidation. That's a classic sign for bacterial infections, especially when we're talking about Streptococcus pneumoniae. It tends to show these sorts of patterns. Children with bacterial pneumonia often exhibit symptoms like a high fever, productive cough, and maybe even trouble breathing. These seem to match up with what we have on the X-ray: lobar consolidation. Yep, this lines up well. So it looks like Streptococcus pneumoniae is probably the organism causing this.\n\nBut, wait a sec, let's just verify and consider other possibilities too. Other bacteria can cause pneumonia in kids, like Haemophilus influenzae or maybe Mycoplasma pneumoniae, among others. However, Mycoplasma pneumoniae usually shows a different kind of X-ray pattern — more diffuse, more interstitial changes — and I doubt it's the same situation here. What about Haemophilus influenzae? Well, it could show similar X-ray findings, but it's not as prevalent as Streptococcus pneumoniae.\n\nFor shallow confirmation... The symptoms we're seeing, the lobar consolidation—all this strongly aligns with Streptococcus pneumoniae. It's a common cause after all. I should also consider Staphylococcus aureus, especially if there are severe symptoms or unusual X-ray features like round pneumonia—yes, that's something Staph might do. But if we are just sticking with the typical presentations and commonality in children, Streptococcus pneumoniae really seems to fit the bill.\n\nLet's not forget, Staphylococcus aureus can sometimes show distinctive round or sometimes patchy infiltrates on X-rays, and can lead to more severe symptoms, which might make you think twice. However, unless there's specific clinical context—like recent skin infections—it might not be the first go-to diagnosis for a straightforward case like this.\n\nSo, all things considered, if we're talking about likelihood and fitting the most common presentation seen on a chest X-ray in a child with pneumonia, I would feel confident saying it's Streptococcus pneumoniae. Still, keep an eye on any variations in the X-ray or clinical presentation that might point toward Staph or something else.", + "Response": "In a case where a child's pneumonia diagnosis is supported by a chest X-ray showing a pattern of lobar consolidation, the most likely causative organism is Streptococcus pneumoniae. This bacterium is the leading cause of bacterial pneumonia in children and typically presents with the kind of X-ray patterns described. Other bacteria can also cause pneumonia, but based on commonality and typical presentation, Streptococcus pneumoniae is the most probable culprit." + } +] \ No newline at end of file diff --git a/tests/test-files/toy_finetune_data.jsonl b/tests/test-files/toy_finetune_data.jsonl new file mode 100644 index 0000000..51cad7d --- /dev/null +++ b/tests/test-files/toy_finetune_data.jsonl @@ -0,0 +1,10 @@ +{"query": "Five women walk along a beach wearing flip-flops.", "pos": ["Some women with flip-flops on, are walking along the beach"], "neg": ["The 4 women are sitting on the beach.", "There was a reform in 1996.", "She's not going to court to clear her record.", "The man is talking about hawaii.", "A woman is standing outside.", "The battle was over. ", "A group of people plays volleyball."]} +{"query": "A woman standing on a high cliff on one leg looking over a river.", "pos": ["A woman is standing on a cliff."], "neg": ["A woman sits on a chair.", "George Bush told the Republicans there was no way he would let them even consider this foolish idea, against his top advisors advice.", "The family was falling apart.", "no one showed up to the meeting", "A boy is sitting outside playing in the sand.", "Ended as soon as I received the wire.", "A child is reading in her bedroom."]} +{"query": "Two woman are playing instruments; one a clarinet, the other a violin.", "pos": ["Some people are playing a tune."], "neg": ["Two women are playing a guitar and drums.", "A man is skiing down a mountain.", "The fatal dose was not taken when the murderer thought it would be.", "Person on bike", "The girl is standing, leaning against the archway.", "A group of women watch soap operas.", "No matter how old people get they never forget. "]} +{"query": "A girl with a blue tank top sitting watching three dogs.", "pos": ["A girl is wearing blue."], "neg": ["A girl is with three cats.", "The people are watching a funeral procession.", "The child is wearing black.", "Financing is an issue for us in public schools.", "Kids at a pool.", "It is calming to be assaulted.", "I face a serious problem at eighteen years old. "]} +{"query": "A yellow dog running along a forest path.", "pos": ["a dog is running"], "neg": ["a cat is running", "Steele did not keep her original story.", "The rule discourages people to pay their child support.", "A man in a vest sits in a car.", "Person in black clothing, with white bandanna and sunglasses waits at a bus stop.", "Neither the Globe or Mail had comments on the current state of Canada's road system. ", "The Spring Creek facility is old and outdated."]} +{"query": "It sets out essential activities in each phase along with critical factors related to those activities.", "pos": ["Critical factors for essential activities are set out."], "neg": ["It lays out critical activities but makes no provision for critical factors related to those activities.", "People are assembled in protest.", "The state would prefer for you to do that.", "A girl sits beside a boy.", "Two males are performing.", "Nobody is jumping", "Conrad was being plotted against, to be hit on the head."]} +{"query": "A man giving a speech in a restaurant.", "pos": ["A person gives a speech."], "neg": ["The man sits at the table and eats food.", "This is definitely not an endorsement.", "They sold their home because they were retiring and not because of the loan.", "The seal of Missouri is perfect.", "Someone is raising their hand.", "An athlete is competing in the 1500 meter swimming competition.", "Two men watching a magic show."]} +{"query": "Indians having a gathering with coats and food and drinks.", "pos": ["A group of Indians are having a gathering with food and drinks"], "neg": ["A group of Indians are having a funeral", "It is only staged on Winter afternoons in Palma's large bullring.", "Right information can empower the legal service practices and the justice system. ", "Meanwhile, the mainland was empty of population.", "Two children is sleeping.", "a fisherman is trying to catch a monkey", "the people are in a train"]} +{"query": "A woman with violet hair rides her bicycle outside.", "pos": ["A woman is riding her bike."], "neg": ["A woman is jogging in the park.", "The street was lined with white-painted houses.", "A group watches a movie inside.", "man at picnics cut steak", "Several chefs are sitting down and talking about food.", "The Commission notes that no significant alternatives were considered.", "We ran out of firewood and had to use pine needles for the fire."]} +{"query": "A man pulls two women down a city street in a rickshaw.", "pos": ["A man is in a city."], "neg": ["A man is a pilot of an airplane.", "It is boring and mundane.", "The morning sunlight was shining brightly and it was warm. ", "Two people jumped off the dock.", "People watching a spaceship launch.", "Mother Teresa is an easy choice.", "It's worth being able to go at a pace you prefer."]} \ No newline at end of file