diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 5033ca6483..fc1636b743 100755 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -5,6 +5,7 @@ /AgentQnA/ abolfazl.shahbazi@intel.com kaokao.lv@intel.com minmin.hou@intel.com xinyu.ye@intel.com /AudioQnA/ sihan.chen@intel.com wenjiao.yue@intel.com /AvatarChatbot/ chun.tao@intel.com kaokao.lv@intel.com xinyu.ye@intel.com +/BrowserUseAgent/ letong.han@intel.com yi.a.yao@intel.com /ChatQnA/ liang1.lv@intel.com letong.han@intel.com /CodeGen/ liang1.lv@intel.com qing.yao@intel.com /CodeTrans/ sihan.chen@intel.com letong.han@intel.com diff --git a/.github/code_spell_ignore.txt b/.github/code_spell_ignore.txt index c72099bfd8..49716a1268 100644 --- a/.github/code_spell_ignore.txt +++ b/.github/code_spell_ignore.txt @@ -1,4 +1,5 @@ ModelIn modelin pressEnter -PromptIn \ No newline at end of file +PromptIn +OT \ No newline at end of file diff --git a/.github/env/_build_image.sh b/.github/env/_build_image.sh deleted file mode 100644 index 61b0d902ed..0000000000 --- a/.github/env/_build_image.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -export VLLM_VER=v0.10.0 -export VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0 diff --git a/.github/workflows/_build_image.yml b/.github/workflows/_build_image.yml index a91c522847..09c698a427 100644 --- a/.github/workflows/_build_image.yml +++ b/.github/workflows/_build_image.yml @@ -78,7 +78,7 @@ jobs: fi - name: Checkout out GenAIExamples - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: ref: ${{ env.CHECKOUT_REF }} fetch-depth: 0 @@ -87,13 +87,6 @@ jobs: run: | cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml - source ${{ github.workspace }}/.github/env/_build_image.sh - if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then - git clone -b ${VLLM_VER} --single-branch https://github.com/vllm-project/vllm.git - fi - if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then - git clone -b ${VLLM_FORK_VER} --single-branch https://github.com/HabanaAI/vllm-fork.git - fi git clone --depth 1 --branch ${{ inputs.opea_branch }} https://github.com/opea-project/GenAIComps.git cd GenAIComps && git rev-parse HEAD && cd ../ diff --git a/.github/workflows/_get-image-list.yml b/.github/workflows/_get-image-list.yml index 0061070762..7f8e785cc7 100644 --- a/.github/workflows/_get-image-list.yml +++ b/.github/workflows/_get-image-list.yml @@ -31,10 +31,10 @@ jobs: run_matrix: ${{ steps.get-matrix.outputs.run_matrix }} steps: - name: Checkout out Repo - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 - name: Checkout GenAIComps Repository - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: repository: opea-project/GenAIComps path: GenAIComps @@ -45,7 +45,7 @@ jobs: image_list=[] run_matrix="{\"include\":[" if [[ ! -z "${{ inputs.examples }}" ]]; then - pip install yq + pip install yq==3.4.3 examples=($(echo ${{ inputs.examples }} | tr ',' ' ')) for example in ${examples[@]} do diff --git a/.github/workflows/_get-test-matrix.yml b/.github/workflows/_get-test-matrix.yml index 6608884eae..f710564d2d 100644 --- a/.github/workflows/_get-test-matrix.yml +++ b/.github/workflows/_get-test-matrix.yml @@ -47,7 +47,7 @@ jobs: echo "checkout ref ${{ env.CHECKOUT_REF }}" - name: Checkout out Repo - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: ref: ${{ env.CHECKOUT_REF }} fetch-depth: 0 diff --git a/.github/workflows/_helm-e2e.yml b/.github/workflows/_helm-e2e.yml index 3bc03826c5..f05f046cdc 100644 --- a/.github/workflows/_helm-e2e.yml +++ b/.github/workflows/_helm-e2e.yml @@ -55,7 +55,7 @@ jobs: echo "checkout ref ${CHECKOUT_REF}" - name: Checkout Repo - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: ref: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }} fetch-depth: 0 @@ -128,7 +128,7 @@ jobs: echo "checkout ref ${CHECKOUT_REF}" - name: Checkout Repo - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: ref: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }} fetch-depth: 0 diff --git a/.github/workflows/_run-docker-compose.yml b/.github/workflows/_run-docker-compose.yml index 33efac6c0e..b8fb7ff6e4 100644 --- a/.github/workflows/_run-docker-compose.yml +++ b/.github/workflows/_run-docker-compose.yml @@ -61,7 +61,7 @@ jobs: echo "checkout ref ${CHECKOUT_REF}" - name: Checkout out Repo - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: ref: ${{ steps.get-checkout-ref.outputs.CHECKOUT_REF }} fetch-depth: 0 @@ -69,17 +69,19 @@ jobs: - name: Get test matrix shell: bash id: test-case-matrix + env: + HARDWARE: ${{ inputs.hardware }} run: | example_l=$(echo ${{ inputs.example }} | tr '[:upper:]' '[:lower:]') cd ${{ github.workspace }}/${{ inputs.example }}/tests run_test_cases="" - if [[ "${{ inputs.hardware }}" == "gaudi"* ]]; then + if [[ "$HARDWARE" == "gaudi"* ]]; then hardware="gaudi" - elif [[ "${{ inputs.hardware }}" == "xeon"* ]]; then + elif [[ "$HARDWARE" == "xeon"* ]]; then hardware="xeon" else - hardware="${{ inputs.hardware }}" + hardware="$HARDWARE" fi default_test_case=$(find . -type f -name "test_compose_on_$hardware.sh" | cut -d/ -f2) if [ "$default_test_case" ]; then run_test_cases="$default_test_case"; fi @@ -110,6 +112,13 @@ jobs: run_test_cases=$other_test_cases fi + if [[ "$hardware" == "xeon"* ]]; then + if [ -f "${{ github.workspace }}/${{ inputs.example }}/tests/test_ui_on_xeon.sh" ]; then + run_test_cases="$run_test_cases test_ui_on_xeon.sh"; + elif [ -f "${{ github.workspace }}/${{ inputs.example }}/tests/test_ui_on_gaudi.sh" ]; then + run_test_cases="$run_test_cases test_ui_on_gaudi.sh"; + fi + fi test_cases=$(echo $run_test_cases | tr ' ' '\n' | sort -u | jq -R '.' | jq -sc '.') echo "test_cases=$test_cases" echo "test_cases=$test_cases" >> $GITHUB_OUTPUT @@ -141,7 +150,7 @@ jobs: docker images - name: Checkout out Repo - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: ref: ${{ needs.get-test-case.outputs.CHECKOUT_REF }} fetch-depth: 0 @@ -234,7 +243,7 @@ jobs: - name: Publish pipeline artifact if: ${{ !cancelled() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 with: name: ${{ inputs.hardware }}_${{ inputs.example }}_${{ matrix.test_case }} path: ${{ github.workspace }}/${{ inputs.example }}/tests/*.log diff --git a/.github/workflows/_run-one-click.yml b/.github/workflows/_run-one-click.yml index a44822e2d2..78527d5938 100644 --- a/.github/workflows/_run-one-click.yml +++ b/.github/workflows/_run-one-click.yml @@ -27,6 +27,10 @@ on: description: 'Test examples with docker compose or k8s charts' required: true type: string + os: + description: OS to run the test on + required: true + type: string hardware: description: Hardware to run the test on required: true @@ -56,7 +60,7 @@ jobs: - name: Checkout out Repo if: ${{ inputs.deploy_method == 'docker' }} - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: fetch-depth: 0 @@ -90,7 +94,7 @@ jobs: - name: Checkout out Repo if: ${{ inputs.deploy_method == 'k8s' }} - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: fetch-depth: 0 @@ -103,7 +107,8 @@ jobs: INPUT_DATA=$( jq -r --arg type "${{ inputs.deploy_method }}" --arg id "${{ inputs.example }}" \ '.[$type][$id].inputs.clear[]' ${{ github.workspace }}/.github/workflows/one-click-inputs-nobuild.json | \ - sed "s/\$hardware/${{ inputs.hardware }}/g" + sed "s|\$hardware|${{ inputs.hardware }}|g; \ + s|\$os|${{ inputs.os }}|g" ) echo "Generated input:" echo "$INPUT_DATA" @@ -122,20 +127,11 @@ jobs: id: get-default-params if: ${{ inputs.deploy_method == 'k8s' }} run: | - cd ${{ github.workspace }} - example=$(echo "${{ inputs.example }}" | cut -d'-' -f1) - PARAMS_JSON=$(python3 .github/workflows/scripts/get-params.py $example) - echo "LLM_model=$(echo "$PARAMS_JSON" | jq -r '.llm_model')" >> $GITHUB_ENV - if [ "$example" = "ChatQnA" ]; then - echo "LLM_model=$(echo "$PARAMS_JSON" | jq -r '.llm_model')" >> $GITHUB_ENV - echo "Embedding_model=$(echo "$PARAMS_JSON" | jq -r '.embed_model')" >> $GITHUB_ENV - echo "Reranking_model=$(echo "$PARAMS_JSON" | jq -r '.rerank_model')" >> $GITHUB_ENV - echo "Mount_dir=$(echo "$PARAMS_JSON" | jq -r '.mount_dir')" >> $GITHUB_ENV - elif [ "$example" = "VisualQnA" ]; then - echo "LVM_model=$(echo "$PARAMS_JSON" | jq -r '.lvm_model')" >> $GITHUB_ENV - elif [ "$example" = "AgentQnA" ]; then - echo "LLM_model=$(echo "$PARAMS_JSON" | jq -r '.gaudi.llm_model')" >> $GITHUB_ENV - fi + echo "LLM_model=" >> $GITHUB_ENV + echo "Embedding_model=" >> $GITHUB_ENV + echo "Reranking_model=" >> $GITHUB_ENV + echo "Mount_dir=" >> $GITHUB_ENV + echo "LVM_model=" >> $GITHUB_ENV - name: deploy and test shell: bash @@ -157,7 +153,7 @@ jobs: LVM_model: ${{ env.LVM_model }} run: | cd ${{ github.workspace }}/one_click_deploy - python3 -m pip install -r requirements.txt + python3 -m pip install --require-hashes -r requirements.txt if [ "${{ inputs.deploy_method }}" = "k8s" ]; then export OPEA_K8S_VLLM_SKIP_WARMUP=TRUE @@ -168,6 +164,7 @@ jobs: jq -r --arg type "${{ inputs.deploy_method }}" --arg id "${{ inputs.example }}" \ '.[$type][$id].inputs.deploy[]' ${{ github.workspace }}/.github/workflows/one-click-inputs-nobuild.json | \ sed "s|\$hardware|${{ inputs.hardware }}|g; \ + s|\$os|${{ inputs.os }}|g; \ s|\$HF_TOKEN|$HF_TOKEN|g; \ s|\$LLM_model|$LLM_model|g; \ s|\$Embedding_model|$Embedding_model|g; \ @@ -196,7 +193,8 @@ jobs: INPUT_DATA=$( jq -r --arg type "${{ inputs.deploy_method }}" --arg id "${{ inputs.example }}" \ '.[$type][$id].inputs.clear[]' ${{ github.workspace }}/.github/workflows/one-click-inputs-nobuild.json | \ - sed "s/\$hardware/${{ inputs.hardware }}/g" + sed "s|\$hardware|${{ inputs.hardware }}|g; \ + s|\$os|${{ inputs.os }}|g" ) echo "Generated input:" echo "$INPUT_DATA" @@ -207,7 +205,7 @@ jobs: - name: Publish pipeline artifact if: ${{ !cancelled() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 with: name: ${{ inputs.deploy_method }} path: ${{ github.workspace }}/${{ inputs.deploy_method }}-tests/test-results.log diff --git a/.github/workflows/_trivy-scan.yml b/.github/workflows/_trivy-scan.yml index 0ad85891ea..c90d74350b 100644 --- a/.github/workflows/_trivy-scan.yml +++ b/.github/workflows/_trivy-scan.yml @@ -43,20 +43,13 @@ jobs: sudo rm -rf ${{github.workspace}}/* || true docker system prune -f - name: Checkout out Repo - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 - name: Install Dependencies run: | sudo apt-get update sudo apt-get install -y jq - name: Clone Required Repo run: | - source ${{ github.workspace }}/.github/env/_build_image.sh - if [[ "${{ inputs.image }}" == "vllm" ]]; then - git clone -b ${VLLM_VER} --single-branch https://github.com/vllm-project/vllm.git - fi - if [[ "${{ inputs.image }}" == "vllm-gaudi" ]]; then - git clone -b ${VLLM_FORK_VER} --single-branch https://github.com/HabanaAI/vllm-fork.git - fi git clone --depth 1 https://github.com/opea-project/GenAIComps.git cd GenAIComps && git rev-parse HEAD && cd ../ - name: Pull Image @@ -98,7 +91,7 @@ jobs: shell: bash - name: Security Scan Container - uses: aquasecurity/trivy-action@0.24.0 + uses: aquasecurity/trivy-action@6e7b7d1fd3e4fef0c5fa8cce1229c54b2c9bd0d8 if: ${{ inputs.trivy_scan }} with: image-ref: ${{ env.OPEA_IMAGE_REPO }}opea/${{ inputs.image }}:${{ inputs.tag }} diff --git a/.github/workflows/check-online-doc-build.yml b/.github/workflows/check-online-doc-build.yml index fecb227503..3a3db1c4c8 100644 --- a/.github/workflows/check-online-doc-build.yml +++ b/.github/workflows/check-online-doc-build.yml @@ -17,12 +17,12 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: path: GenAIExamples - name: Checkout docs - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: repository: opea-project/docs path: docs diff --git a/.github/workflows/daily-update-vllm-version.yml b/.github/workflows/daily-update-vllm-version.yml.disabled similarity index 98% rename from .github/workflows/daily-update-vllm-version.yml rename to .github/workflows/daily-update-vllm-version.yml.disabled index 0263c75239..ea579157e0 100644 --- a/.github/workflows/daily-update-vllm-version.yml +++ b/.github/workflows/daily-update-vllm-version.yml.disabled @@ -32,7 +32,7 @@ jobs: fail-fast: false steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: fetch-depth: 0 ref: ${{ github.ref }} diff --git a/.github/workflows/docker/code-scan.dockerfile b/.github/workflows/docker/code-scan.dockerfile index 73fad03a26..0a91d86320 100644 --- a/.github/workflows/docker/code-scan.dockerfile +++ b/.github/workflows/docker/code-scan.dockerfile @@ -1,8 +1,8 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -ARG UBUNTU_VER=22.04 -FROM ubuntu:${UBUNTU_VER} as devel +# ARG UBUNTU_VER=22.04 +FROM ubuntu@sha256:4e0171b9275e12d375863f2b3ae9ce00a4c53ddda176bd55868df97ac6f21a6e as devel ENV LANG=C.UTF-8 diff --git a/.github/workflows/dockerhub-description.yml b/.github/workflows/dockerhub-description.yml index 296f464f47..a737eb5399 100644 --- a/.github/workflows/dockerhub-description.yml +++ b/.github/workflows/dockerhub-description.yml @@ -84,25 +84,25 @@ jobs: fail-fast: false steps: - name: Checkout GenAIExamples - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: repository: opea-project/GenAIExamples path: GenAIExamples - name: Checkout GenAIComps - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: repository: opea-project/GenAIComps path: GenAIComps - name: Checkout vllm-openvino - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: repository: vllm-project/vllm path: vllm - name: Checkout vllm-gaudi - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: repository: HabanaAI/vllm-fork ref: habana_main diff --git a/.github/workflows/manual-docker-scan.yml b/.github/workflows/manual-docker-scan.yml index 4aec879c02..b4c0e26988 100644 --- a/.github/workflows/manual-docker-scan.yml +++ b/.github/workflows/manual-docker-scan.yml @@ -59,7 +59,7 @@ jobs: runs-on: "docker-build-${{ inputs.node }}" if: always() steps: - - uses: actions/upload-artifact@v4.3.4 + - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b with: name: trivy-scan-${{ inputs.tag }}-${{ github.run_number }} path: /tmp/scan-${{ inputs.tag }}-${{ github.run_number }}/*-trivy-scan.txt diff --git a/.github/workflows/manual-freeze-tag.yml b/.github/workflows/manual-freeze-tag.yml index 88c1bb6c85..bda0d134c4 100644 --- a/.github/workflows/manual-freeze-tag.yml +++ b/.github/workflows/manual-freeze-tag.yml @@ -3,7 +3,7 @@ name: Freeze OPEA images release tag permissions: - contents: read + contents: write on: workflow_dispatch: inputs: @@ -19,7 +19,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: fetch-depth: 0 ref: ${{ github.ref }} diff --git a/.github/workflows/manual-one-click-workflow.yml b/.github/workflows/manual-one-click-workflow.yml index 0463da0f19..a1aec74c40 100644 --- a/.github/workflows/manual-one-click-workflow.yml +++ b/.github/workflows/manual-one-click-workflow.yml @@ -24,6 +24,14 @@ on: description: "Tag to apply to images" required: true type: string + os: + default: "debian" + description: "OS to run the test on debian or openeuler" + type: choice + required: true + options: + - debian + - openeuler deploy_methods: default: "docker,k8s" description: 'Test examples with docker compose or k8s charts(exclude AgentQnA k8s on Xeon)' @@ -41,6 +49,7 @@ jobs: examples: ${{ steps.get-matrix.outputs.examples }} nodes: ${{ steps.get-matrix.outputs.nodes }} deploy_methods: ${{ steps.get-matrix.outputs.deploy_methods }} + OSs: ${{ steps.get-matrix.outputs.OSs }} steps: - name: Create Matrix id: get-matrix @@ -54,9 +63,13 @@ jobs: deploy_methods=($(echo ${{ inputs.deploy_methods }} | tr ',' ' ')) deploy_methods_json=$(printf '%s\n' "${deploy_methods[@]}" | sort -u | jq -R '.' | jq -sc '.') echo "deploy_methods=$deploy_methods_json" >> $GITHUB_OUTPUT + OSs=($(echo ${{ inputs.os }} | tr ',' ' ')) + OSs_json=$(printf '%s\n' "${OSs[@]}" | sort -u | jq -R '.' | jq -sc '.') + echo "OSs=$OSs_json" >> $GITHUB_OUTPUT echo "examples=$examples_json" echo "nodes=$nodes_json" echo "deploy_methods=$deploy_methods_json" + echo "OSs=$OSs_json" run-examples: # permissions: @@ -81,12 +94,14 @@ jobs: example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }} hardware: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }} deploy_method: ${{ fromJson(needs.get-test-matrix.outputs.deploy_methods) }} + os: ${{ fromJson(needs.get-test-matrix.outputs.OSs) }} fail-fast: false uses: ./.github/workflows/_run-one-click.yml with: hardware: ${{ matrix.hardware }} example: ${{ matrix.example }} deploy_method: ${{ matrix.deploy_method }} + os: ${{ matrix.os }} tag: ${{ inputs.tag }} registry: "opea" secrets: inherit diff --git a/.github/workflows/mix-trellix.yml b/.github/workflows/mix-trellix.yml index 65f18e6dbc..bad18fa631 100644 --- a/.github/workflows/mix-trellix.yml +++ b/.github/workflows/mix-trellix.yml @@ -17,7 +17,7 @@ jobs: run: sudo rm -rf ${{github.workspace}}/* - name: Checkout out Repo - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 - name: Run Trellix Scanner env: @@ -26,6 +26,6 @@ jobs: - name: Publish pipeline artifact if: ${{ !cancelled() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 with: path: ${{ github.workspace }}/.github/workflows/scripts/codeScan/report.html diff --git a/.github/workflows/one-click-inputs-nobuild.json b/.github/workflows/one-click-inputs-nobuild.json index e1fef7ea43..610b20f563 100644 --- a/.github/workflows/one-click-inputs-nobuild.json +++ b/.github/workflows/one-click-inputs-nobuild.json @@ -7,6 +7,7 @@ "1", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -21,7 +22,7 @@ "y", "Y" ], - "clear": ["1", "1", "2", "docker", "$hardware", "Y"] + "clear": ["1", "1", "2", "docker", "$os", "$hardware", "Y"] } }, "ChatQnA-Qwen": { @@ -31,6 +32,7 @@ "1", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -45,7 +47,7 @@ "y", "Y" ], - "clear": ["1", "1", "2", "docker", "$hardware", "Y"] + "clear": ["1", "1", "2", "docker", "$os", "$hardware", "Y"] } }, "CodeTrans": { @@ -55,6 +57,7 @@ "1", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -66,7 +69,7 @@ "y", "Y" ], - "clear": ["2", "1", "2", "docker", "$hardware", "Y"] + "clear": ["2", "1", "2", "docker", "$os", "$hardware", "Y"] } }, "DocSum": { @@ -76,6 +79,7 @@ "1", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -87,7 +91,7 @@ "y", "Y" ], - "clear": ["3", "1", "2", "docker", "$hardware", "Y"] + "clear": ["3", "1", "2", "docker", "$os", "$hardware", "Y"] } }, "CodeGen": { @@ -97,6 +101,7 @@ "1", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -108,7 +113,7 @@ "y", "Y" ], - "clear": ["4", "1", "2", "docker", "$hardware", "Y"] + "clear": ["4", "1", "2", "docker", "$os", "$hardware", "Y"] } }, "AudioQnA": { @@ -118,6 +123,7 @@ "1", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -129,7 +135,7 @@ "y", "Y" ], - "clear": ["5", "1", "2", "docker", "$hardware", "Y"] + "clear": ["5", "1", "2", "docker", "$os", "$hardware", "Y"] } }, "VisualQnA": { @@ -139,6 +145,7 @@ "1", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -150,7 +157,7 @@ "y", "Y" ], - "clear": ["6", "1", "2", "docker", "$hardware", "Y"] + "clear": ["6", "1", "2", "docker", "$os", "$hardware", "Y"] } }, "FaqGen": { @@ -160,6 +167,7 @@ "1", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -171,7 +179,7 @@ "y", "Y" ], - "clear": ["7", "1", "2", "docker", "$hardware", "Y"] + "clear": ["7", "1", "2", "docker", "$os", "$hardware", "Y"] } }, "AgentQnA": { @@ -181,6 +189,7 @@ "1", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -193,7 +202,7 @@ "y", "Y" ], - "clear": ["8", "1", "2", "docker", "$hardware", "Y"] + "clear": ["8", "1", "2", "docker", "$os", "$hardware", "Y"] } } }, @@ -205,6 +214,7 @@ "1", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -219,7 +229,7 @@ "8080", "Y" ], - "clear": ["1", "1", "2", "k8s", "y", "Y"] + "clear": ["1", "1", "2", "k8s", "$os", "y", "Y"] } }, "ChatQnA-Qwen": { @@ -229,6 +239,7 @@ "1", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -243,7 +254,7 @@ "8080", "Y" ], - "clear": ["1", "1", "2", "k8s", "y", "Y"] + "clear": ["1", "1", "2", "k8s", "$os", "y", "Y"] } }, "CodeTrans": { @@ -253,6 +264,7 @@ "1", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -265,7 +277,7 @@ "8080", "Y" ], - "clear": ["2", "1", "2", "k8s", "y", "Y"] + "clear": ["2", "1", "2", "k8s", "$os", "y", "Y"] } }, "DocSum": { @@ -275,6 +287,7 @@ "1", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -287,7 +300,7 @@ "8080", "Y" ], - "clear": ["3", "1", "2", "k8s", "y", "Y"] + "clear": ["3", "1", "2", "k8s", "$os", "y", "Y"] } }, "CodeGen": { @@ -297,6 +310,7 @@ "1", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -309,7 +323,7 @@ "8080", "Y" ], - "clear": ["4", "1", "2", "k8s", "y", "Y"] + "clear": ["4", "1", "2", "k8s", "$os", "y", "Y"] } }, "AudioQnA": { @@ -319,6 +333,7 @@ "1", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -331,7 +346,7 @@ "8080", "Y" ], - "clear": ["5", "1", "2", "k8s", "y", "Y"] + "clear": ["5", "1", "2", "k8s", "$os", "y", "Y"] } }, "VisualQnA": { @@ -341,6 +356,7 @@ "1", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -353,7 +369,7 @@ "8080", "Y" ], - "clear": ["6", "1", "2", "k8s", "y", "Y"] + "clear": ["6", "1", "2", "k8s", "$os", "y", "Y"] } }, "FaqGen": { @@ -363,6 +379,7 @@ "1", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -375,7 +392,7 @@ "8080", "Y" ], - "clear": ["7", "1", "2", "k8s", "y", "Y"] + "clear": ["7", "1", "2", "k8s", "$os", "y", "Y"] } }, "AgentQnA": { @@ -385,6 +402,7 @@ "1", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -398,7 +416,7 @@ "8080", "Y" ], - "clear": ["8", "1", "2", "k8s", "y", "Y"] + "clear": ["8", "1", "2", "k8s", "$os", "y", "Y"] } } } diff --git a/.github/workflows/one-click-inputs.json b/.github/workflows/one-click-inputs.json index dd2c516a2d..f42b163996 100644 --- a/.github/workflows/one-click-inputs.json +++ b/.github/workflows/one-click-inputs.json @@ -6,6 +6,7 @@ "1", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -22,7 +23,7 @@ "y", "Y" ], - "clear": ["1", "2", "docker", "$hardware", "Y"] + "clear": ["1", "2", "docker", "$os", "$hardware", "Y"] } }, "CodeTrans": { @@ -31,6 +32,7 @@ "2", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -44,7 +46,7 @@ "y", "Y" ], - "clear": ["2", "2", "docker", "$hardware", "Y"] + "clear": ["2", "2", "docker", "$os", "$hardware", "Y"] } }, "DocSum": { @@ -53,6 +55,7 @@ "3", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -66,7 +69,7 @@ "y", "Y" ], - "clear": ["3", "2", "docker", "$hardware", "Y"] + "clear": ["3", "2", "docker", "$os", "$hardware", "Y"] } }, "CodeGen": { @@ -75,6 +78,7 @@ "4", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -88,7 +92,7 @@ "y", "Y" ], - "clear": ["4", "2", "docker", "$hardware", "Y"] + "clear": ["4", "2", "docker", "$os", "$hardware", "Y"] } }, "AudioQnA": { @@ -97,6 +101,7 @@ "5", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -110,7 +115,7 @@ "y", "Y" ], - "clear": ["5", "2", "docker", "$hardware", "Y"] + "clear": ["5", "2", "docker", "$os", "$hardware", "Y"] } }, "VisualQnA": { @@ -119,6 +124,7 @@ "6", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -132,7 +138,7 @@ "y", "Y" ], - "clear": ["6", "2", "docker", "$hardware", "Y"] + "clear": ["6", "2", "docker", "$os", "$hardware", "Y"] } }, "FaqGen": { @@ -141,6 +147,7 @@ "7", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -154,7 +161,7 @@ "y", "Y" ], - "clear": ["7", "2", "docker", "$hardware", "Y"] + "clear": ["7", "2", "docker", "$os", "$hardware", "Y"] } }, "AgentQnA": { @@ -163,6 +170,7 @@ "8", "1", "docker", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -177,7 +185,7 @@ "y", "Y" ], - "clear": ["8", "2", "docker", "$hardware", "Y"] + "clear": ["8", "2", "docker", "$os", "$hardware", "Y"] } } }, @@ -188,6 +196,7 @@ "1", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -204,7 +213,7 @@ "8080", "Y" ], - "clear": ["1", "2", "k8s", "y", "Y"] + "clear": ["1", "2", "k8s", "$os", "y", "Y"] } }, "CodeTrans": { @@ -213,6 +222,7 @@ "2", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -227,7 +237,7 @@ "8080", "Y" ], - "clear": ["2", "2", "k8s", "y", "Y"] + "clear": ["2", "2", "k8s", "$os", "y", "Y"] } }, "DocSum": { @@ -236,6 +246,7 @@ "3", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -250,7 +261,7 @@ "8080", "Y" ], - "clear": ["3", "2", "k8s", "y", "Y"] + "clear": ["3", "2", "k8s", "$os", "y", "Y"] } }, "CodeGen": { @@ -259,6 +270,7 @@ "4", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -273,7 +285,7 @@ "8080", "Y" ], - "clear": ["4", "2", "k8s", "y", "Y"] + "clear": ["4", "2", "k8s", "$os", "y", "Y"] } }, "AudioQnA": { @@ -282,6 +294,7 @@ "5", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -296,7 +309,7 @@ "8080", "Y" ], - "clear": ["5", "2", "k8s", "y", "Y"] + "clear": ["5", "2", "k8s", "$os", "y", "Y"] } }, "VisualQnA": { @@ -305,6 +318,7 @@ "6", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -319,7 +333,7 @@ "8080", "Y" ], - "clear": ["6", "2", "k8s", "y", "Y"] + "clear": ["6", "2", "k8s", "$os", "y", "Y"] } }, "FaqGen": { @@ -328,6 +342,7 @@ "7", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -342,7 +357,7 @@ "8080", "Y" ], - "clear": ["7", "2", "k8s", "y", "Y"] + "clear": ["7", "2", "k8s", "$os", "y", "Y"] } }, "AgentQnA": { @@ -351,6 +366,7 @@ "8", "1", "k8s", + "$os", "$hardware", "$HF_TOKEN", "$http_proxy", @@ -366,7 +382,7 @@ "8080", "Y" ], - "clear": ["8", "2", "k8s", "y", "Y"] + "clear": ["8", "2", "k8s", "$os", "y", "Y"] } } } diff --git a/.github/workflows/pr-chart-e2e.yml b/.github/workflows/pr-chart-e2e.yml index 3990e5fce9..256151fb09 100644 --- a/.github/workflows/pr-chart-e2e.yml +++ b/.github/workflows/pr-chart-e2e.yml @@ -28,7 +28,7 @@ jobs: run_matrix: ${{ steps.get-test-matrix.outputs.run_matrix }} steps: - name: Checkout Repo - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: ref: "refs/pull/${{ github.event.number }}/merge" fetch-depth: 0 diff --git a/.github/workflows/pr-check-duplicated-image.yml b/.github/workflows/pr-check-duplicated-image.yml index 2922b8f4fe..8c39c64fd8 100644 --- a/.github/workflows/pr-check-duplicated-image.yml +++ b/.github/workflows/pr-check-duplicated-image.yml @@ -27,11 +27,11 @@ jobs: run: sudo rm -rf ${{github.workspace}}/* - name: Checkout Repo - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 - name: Check all the docker image build files run: | - pip install PyYAML + pip install PyYAML==6.0.3 cd ${{github.workspace}} build_files="" for f in `find . -path "*/docker_image_build/build.yaml"`; do diff --git a/.github/workflows/pr-code-scan.yml b/.github/workflows/pr-code-scan.yml index f64184c519..839c196311 100644 --- a/.github/workflows/pr-code-scan.yml +++ b/.github/workflows/pr-code-scan.yml @@ -34,7 +34,7 @@ jobs: fail-fast: false steps: - name: Checkout out Repo - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 - name: Check Dangerous Command Injection uses: opea-project/validation/actions/check-cmd@main @@ -62,7 +62,7 @@ jobs: - name: Publish pipeline artifact if: ${{ !cancelled() }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 with: name: ${{ matrix.job_name }} path: ${{ github.workspace }}/.github/workflows/scripts/codeScan/${{ matrix.job_name }}.* @@ -74,7 +74,7 @@ jobs: contents: read steps: - name: Repository checkout - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: fetch-depth: 0 @@ -98,7 +98,7 @@ jobs: - name: ShellCheck # if: ${{ env.changed_files != '' }} if: steps.get-changed-files.outputs.changed_files_exist == 'true' - uses: ludeeus/action-shellcheck@2.0.0 + uses: ludeeus/action-shellcheck@00cae500b08a931fb5698e11e79bfbd38e612a38 env: SHELLCHECK_OPTS: -e SC2164 -e SC2154 -e SC2034 -e SC2155 -e SC1090 -e SC2153 -e SC2046 with: diff --git a/.github/workflows/pr-dependency-review.yml b/.github/workflows/pr-dependency-review.yml index 0867382269..01527452a8 100644 --- a/.github/workflows/pr-dependency-review.yml +++ b/.github/workflows/pr-dependency-review.yml @@ -13,7 +13,7 @@ jobs: pull-requests: write steps: - name: "Checkout Repository" - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 - name: Dependency Review uses: actions/dependency-review-action@v4 with: diff --git a/.github/workflows/pr-dockerfile-path-and-build-yaml-scan.yml b/.github/workflows/pr-dockerfile-path-and-build-yaml-scan.yml index 1894776d3f..7ecbd9ef09 100644 --- a/.github/workflows/pr-dockerfile-path-and-build-yaml-scan.yml +++ b/.github/workflows/pr-dockerfile-path-and-build-yaml-scan.yml @@ -17,7 +17,7 @@ jobs: run: sudo rm -rf ${{github.workspace}}/* - name: Checkout Repo GenAIExamples - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 - name: Clone Repo GenAIComps run: | @@ -50,7 +50,7 @@ jobs: run: sudo rm -rf ${{github.workspace}}/* - name: Checkout Repo GenAIExamples - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: fetch-depth: 0 @@ -85,11 +85,11 @@ jobs: run: sudo rm -rf ${{github.workspace}}/* - name: Checkout Repo GenAIExamples - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 - name: Check name agreement in build.yaml run: | - pip install ruamel.yaml + pip install ruamel.yaml==0.18.6 cd ${{github.workspace}} consistency="TRUE" build_yamls=$(find . -name 'build.yaml') diff --git a/.github/workflows/pr-image-size.yml b/.github/workflows/pr-image-size.yml index bbed2b0d88..ffd432c7bd 100644 --- a/.github/workflows/pr-image-size.yml +++ b/.github/workflows/pr-image-size.yml @@ -23,7 +23,7 @@ jobs: files: ${{ steps.changed-dockerfiles.outputs.files }} steps: - name: Checkout PR branch - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: fetch-depth: 0 @@ -46,16 +46,16 @@ jobs: matrix: dockerfile: ${{ fromJson(needs.get-check-list.outputs.files) }} fail-fast: false - # outputs: - # comments: ${{ steps.build-check.outputs.comment_message }} + outputs: + skip: ${{ steps.build-check.outputs.skip }} steps: - name: Checkout PR branch - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: fetch-depth: 0 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@d70bba72b1f3fd22344832f00baa16ece964efeb - name: Build and check image sizes id: build-check @@ -92,7 +92,11 @@ jobs: echo "Building base image for $dockerfile" git checkout ${{ github.event.pull_request.base.sha }} echo "::group::Build image_base" - docker build -f $file -t "$image_base" --no-cache . || true + if ! docker build -f "$file" -t "$image_base" --no-cache .; then + echo "skip=true" >> $GITHUB_ENV + echo "skip=true" >> $GITHUB_OUTPUT + exit 0 + fi echo "::endgroup::" size_base=$(docker image inspect "$image_base" | jq '.[0].Size / (1024 * 1024) | round') @@ -123,20 +127,23 @@ jobs: echo "summary_path=${{github.workspace}}/build-$image_name.md" >> $GITHUB_ENV - name: Download origin artifact log - uses: actions/download-artifact@v4 + if: env.skip != 'true' + uses: actions/download-artifact@v4.1.3 with: name: build-comments path: merged-files continue-on-error: true - name: Merge logs + if: env.skip != 'true' run: | mkdir -p merged-files ls merged-files/ cp ${{ env.summary_path }} merged-files/ - name: Save Summary as Artifact - uses: actions/upload-artifact@v4 + if: env.skip != 'true' + uses: actions/upload-artifact@c7d193f32edcb7bfad88892161225aeda64e9392 with: name: build-comments path: merged-files/ @@ -146,13 +153,13 @@ jobs: needs: build-and-check permissions: actions: read - if: always() + if: always() && needs.build-and-check.outputs.skip != 'true' runs-on: ubuntu-latest outputs: all_comments: ${{ steps.summary.outputs.all_comments }} steps: - name: Download Summary - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v4.1.3 with: name: build-comments path: downloaded-files diff --git a/.github/workflows/pr-link-path-scan.yml b/.github/workflows/pr-link-path-scan.yml index 932646a52d..a14eaf3cd3 100644 --- a/.github/workflows/pr-link-path-scan.yml +++ b/.github/workflows/pr-link-path-scan.yml @@ -19,7 +19,7 @@ jobs: run: sudo rm -rf ${{github.workspace}}/* - name: Checkout Repo GenAIExamples - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: fetch-depth: 0 @@ -84,7 +84,7 @@ jobs: run: sudo rm -rf ${{github.workspace}}/* - name: Checkout Repo GenAIExamples - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: fetch-depth: 0 diff --git a/.github/workflows/pr-one-click.yml b/.github/workflows/pr-one-click.yml index eb90bb0e26..b403a21c35 100644 --- a/.github/workflows/pr-one-click.yml +++ b/.github/workflows/pr-one-click.yml @@ -26,12 +26,14 @@ jobs: strategy: matrix: deploy_method: ["docker", "k8s"] + os: ["debian", "openeuler"] fail-fast: false uses: ./.github/workflows/_run-one-click.yml with: registry: "opea" tag: "ci" example: ChatQnA - hardware: gaudi + os: ${{ matrix.os }} + hardware: xeon deploy_method: ${{ matrix.deploy_method }} secrets: inherit diff --git a/.github/workflows/push-images-path-detection.yml b/.github/workflows/push-images-path-detection.yml index 9edfad2678..7137cb225c 100644 --- a/.github/workflows/push-images-path-detection.yml +++ b/.github/workflows/push-images-path-detection.yml @@ -19,7 +19,7 @@ jobs: run: sudo rm -rf ${{github.workspace}}/* - name: Checkout repo GenAIExamples - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 - name: Check the validity of links run: | diff --git a/.github/workflows/push-infra-issue-creation.yml b/.github/workflows/push-infra-issue-creation.yml index 3f8a22baf8..0175ff0e86 100644 --- a/.github/workflows/push-infra-issue-creation.yml +++ b/.github/workflows/push-infra-issue-creation.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout out Repo - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: fetch-depth: 0 @@ -42,7 +42,7 @@ jobs: echo "EOF" >> $GITHUB_ENV - name: Create Issue - uses: dacbd/create-issue-action@main + uses: dacbd/create-issue-action@fec641442c0897e734fad173cfe83ae21a2284a3 with: token: ${{ secrets.ACTION_TOKEN }} owner: opea-project diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index e6286dfa5d..b08b860059 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -76,6 +76,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@v3 + uses: github/codeql-action/upload-sarif@303c0aef88fc2fe5ff6d63d3b1596bfd83dfa1f9 with: sarif_file: results.sarif diff --git a/.github/workflows/scripts/docker_compose_clean_up.sh b/.github/workflows/scripts/docker_compose_clean_up.sh index 1e88bb2c95..b83a8f0ca7 100644 --- a/.github/workflows/scripts/docker_compose_clean_up.sh +++ b/.github/workflows/scripts/docker_compose_clean_up.sh @@ -19,12 +19,12 @@ case "$1" in containers=$(cat $yaml_file | grep container_name | cut -d':' -f2) for container_name in $containers; do cid=$(docker ps -aq --filter "name=$container_name") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi + if [[ -n "$cid" ]]; then docker stop "$cid" && docker rm "$cid" && sleep 1s; fi done ;; ports) echo "Release all ports used by the services in $yaml_file ..." - pip install jq yq + pip install jq==1.10.0 yq==3.4.3 ports=$(yq '.services[].ports[] | split(":")[0]' $yaml_file | grep -o '[0-9a-zA-Z_-]\+') echo "All ports list..." echo "$ports" @@ -32,11 +32,9 @@ case "$1" in if [[ $port =~ [a-zA-Z_-] ]]; then echo "Search port value $port from the test case..." port_fix=$(grep -E "export $port=" tests/$test_case | cut -d'=' -f2) - if [[ "$port_fix" == "" ]]; then - echo "Can't find the port value from the test case, use the default value in yaml..." - port_fix=$(yq '.services[].ports[]' $yaml_file | grep $port | cut -d':' -f2 | grep -o '[0-9a-zA-Z]\+') + if [[ "$port_fix" ]]; then + port=$port_fix fi - port=$port_fix fi if [[ $port =~ [0-9] ]]; then if [[ $port == 5000 ]]; then @@ -45,7 +43,7 @@ case "$1" in fi echo "Check port $port..." cid=$(docker ps --filter "publish=${port}" --format "{{.ID}}") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && echo "release $port"; fi + if [[ -n "$cid" ]]; then docker stop "$cid" && docker rm "$cid" && echo "release $port"; fi fi done ;; diff --git a/.github/workflows/scripts/get-params.py b/.github/workflows/scripts/get-params.py deleted file mode 100644 index d597facaa9..0000000000 --- a/.github/workflows/scripts/get-params.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (C) 2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -import sys - -sys.path.append(os.path.join(os.path.dirname(__file__), "../../../one_click_deploy/core/")) -from config import EXAMPLE_CONFIGS - - -def get_example_defaults(example_name): - if example_name not in EXAMPLE_CONFIGS: - print(f"error: example '{example_name}' not found in EXAMPLE_CONFIGS") - sys.exit(1) - - example_config = EXAMPLE_CONFIGS[example_name] - params = example_config.get("interactive_params", {}) - - if isinstance(params, list): - return {param["name"]: param["default"] for param in params} - elif isinstance(params, dict): - return { - device: {param["name"]: param["default"] for param in device_params} - for device, device_params in params.items() - } - else: - return {"error": "Invalid params format in EXAMPLE_CONFIGS for example: " + example_name} - - -def main(): - example_name = sys.argv[1] - - defaults = get_example_defaults(example_name) - - print(json.dumps(defaults, indent=2, ensure_ascii=False)) - - -if __name__ == "__main__": - main() diff --git a/.github/workflows/scripts/k8s-utils.sh b/.github/workflows/scripts/k8s-utils.sh index 0676a80d38..aa2a4f6d05 100755 --- a/.github/workflows/scripts/k8s-utils.sh +++ b/.github/workflows/scripts/k8s-utils.sh @@ -2,7 +2,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -set -e +# set -e function dump_pod_log() { pod_name=$1 diff --git a/.github/workflows/weekly-one-click-test.yml b/.github/workflows/weekly-one-click-test.yml index 39174073bf..d60d7971e4 100644 --- a/.github/workflows/weekly-one-click-test.yml +++ b/.github/workflows/weekly-one-click-test.yml @@ -26,7 +26,7 @@ jobs: deploy_methods: ${{ steps.get-matrix.outputs.deploy_methods }} steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: fetch-depth: 0 @@ -55,11 +55,16 @@ jobs: example: ${{ fromJson(needs.get-test-matrix.outputs.examples) }} node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }} deploy_method: ${{ fromJson(needs.get-test-matrix.outputs.deploy_methods) }} + exclude: + - example: AgentQnA + node: xeon + deploy_method: k8s fail-fast: false uses: ./.github/workflows/_run-one-click.yml with: hardware: ${{ matrix.node }} example: ${{ matrix.example }} deploy_method: ${{ matrix.deploy_method }} + os: "debian" registry: "opea" secrets: inherit diff --git a/.github/workflows/weekly-update-images.yml b/.github/workflows/weekly-update-images.yml index 9a8d1498a0..134476aeac 100644 --- a/.github/workflows/weekly-update-images.yml +++ b/.github/workflows/weekly-update-images.yml @@ -18,7 +18,7 @@ jobs: BRANCH_NAME: "update_images_tag" steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2 with: fetch-depth: 0 ref: "main" diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 5f197b49c2..8680b83443 100644 --- a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -128,7 +128,7 @@ services: OPENAI_API_BASE_URLS: ${SUPERVISOR_AGENT_ENDPOINT} ENABLE_OLLAMA_API: False vllm-service: - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + image: opea/vllm-gaudi:1.4 container_name: vllm-gaudi-server ports: - "8086:8000" diff --git a/AgentQnA/docker_image_build/build.yaml b/AgentQnA/docker_image_build/build.yaml index 7db63b6fa8..cf757968de 100644 --- a/AgentQnA/docker_image_build/build.yaml +++ b/AgentQnA/docker_image_build/build.yaml @@ -17,12 +17,6 @@ services: dockerfile: ./docker/Dockerfile extends: agent image: ${REGISTRY:-opea}/agent-ui:${TAG:-latest} - vllm-gaudi: - build: - context: vllm-fork - dockerfile: Dockerfile.hpu - extends: agent - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} vllm-rocm: build: context: GenAIComps diff --git a/AgentQnA/kubernetes/helm/rocm-tgi-values.yaml b/AgentQnA/kubernetes/helm/rocm-tgi-values.yaml new file mode 100644 index 0000000000..9c2bc98eb8 --- /dev/null +++ b/AgentQnA/kubernetes/helm/rocm-tgi-values.yaml @@ -0,0 +1,57 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Accelerate inferencing in heaviest components to improve performance +# by overriding their subchart values +vllm: + enabled: false +tgi: + enabled: true + accelDevice: "rocm" + image: + repository: ghcr.io/huggingface/text-generation-inference + tag: "3.0.0-rocm" + LLM_MODEL_ID: meta-llama/Llama-3.3-70B-Instruct + MAX_INPUT_LENGTH: "2048" + MAX_TOTAL_TOKENS: "4096" + PYTORCH_TUNABLEOP_ENABLED: "0" + USE_FLASH_ATTENTION: "true" + FLASH_ATTENTION_RECOMPUTE: "false" + HIP_VISIBLE_DEVICES: "0,1" + MAX_BATCH_SIZE: "4" + extraCmdArgs: [ "--num-shard","2" ] + resources: + limits: + amd.com/gpu: "2" + requests: + cpu: 1 + memory: 16Gi + securityContext: + readOnlyRootFilesystem: false + runAsNonRoot: false + runAsUser: 0 + capabilities: + add: + - SYS_PTRACE + readinessProbe: + initialDelaySeconds: 60 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 + startupProbe: + initialDelaySeconds: 60 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 +supervisor: + llm_endpoint_url: http://{{ .Release.Name }}-tgi + llm_engine: tgi + model: "meta-llama/Llama-3.3-70B-Instruct" +ragagent: + llm_endpoint_url: http://{{ .Release.Name }}-tgi + llm_engine: tgi + model: "meta-llama/Llama-3.3-70B-Instruct" +sqlagent: + llm_endpoint_url: http://{{ .Release.Name }}-tgi + llm_engine: tgi + model: "meta-llama/Llama-3.3-70B-Instruct" diff --git a/AgentQnA/kubernetes/helm/rocm-values.yaml b/AgentQnA/kubernetes/helm/rocm-values.yaml new file mode 100644 index 0000000000..0d5393b70f --- /dev/null +++ b/AgentQnA/kubernetes/helm/rocm-values.yaml @@ -0,0 +1,52 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Accelerate inferencing in heaviest components to improve performance +# by overriding their subchart values + +tgi: + enabled: false +vllm: + enabled: true + accelDevice: "rocm" + image: + repository: opea/vllm-rocm + tag: latest + env: + LLM_MODEL_ID: meta-llama/Llama-3.3-70B-Instruct + HIP_VISIBLE_DEVICES: "0,1" + TENSOR_PARALLEL_SIZE: "2" + HF_HUB_DISABLE_PROGRESS_BARS: "1" + HF_HUB_ENABLE_HF_TRANSFER: "0" + VLLM_USE_TRITON_FLASH_ATTN: "0" + VLLM_WORKER_MULTIPROC_METHOD: "spawn" + PYTORCH_JIT: "0" + HF_HOME: "/data" + extraCmd: + command: [ "python3", "/workspace/api_server.py" ] + extraCmdArgs: [ "--swap-space", "16", + "--disable-log-requests", + "--dtype", "float16", + "--num-scheduler-steps", "1", + "--distributed-executor-backend", "mp" ] + resources: + limits: + amd.com/gpu: "2" + startupProbe: + failureThreshold: 180 + securityContext: + readOnlyRootFilesystem: false + runAsNonRoot: false + runAsUser: 0 +supervisor: + llm_endpoint_url: http://{{ .Release.Name }}-vllm + llm_engine: vllm + model: "meta-llama/Llama-3.3-70B-Instruct" +ragagent: + llm_endpoint_url: http://{{ .Release.Name }}-vllm + llm_engine: vllm + model: "meta-llama/Llama-3.3-70B-Instruct" +sqlagent: + llm_endpoint_url: http://{{ .Release.Name }}-vllm + llm_engine: vllm + model: "meta-llama/Llama-3.3-70B-Instruct" diff --git a/AgentQnA/tests/step1_build_images.sh b/AgentQnA/tests/step1_build_images.sh index 58b5c8d6e8..5a13c8a52b 100644 --- a/AgentQnA/tests/step1_build_images.sh +++ b/AgentQnA/tests/step1_build_images.sh @@ -40,12 +40,8 @@ function build_agent_docker_image_gaudi_vllm() { cd $WORKDIR/GenAIExamples/AgentQnA/docker_image_build/ get_genai_comps - git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork - VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0 - git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../ - echo "Build agent image with --no-cache..." - service_list="agent agent-ui vllm-gaudi" + service_list="agent agent-ui" docker compose -f build.yaml build ${service_list} --no-cache } @@ -83,6 +79,7 @@ function main() { "rocm_vllm") echo "==================== Build agent docker image for ROCm VLLM ====================" build_agent_docker_image_rocm_vllm + docker image ls | grep vllm ;; "gaudi_vllm") echo "==================== Build agent docker image for Gaudi ====================" @@ -97,8 +94,6 @@ function main() { exit 1 ;; esac - - docker image ls | grep vllm } main $1 diff --git a/ArbPostHearingAssistant/Dockerfile b/ArbPostHearingAssistant/Dockerfile new file mode 100644 index 0000000000..9e739b204d --- /dev/null +++ b/ArbPostHearingAssistant/Dockerfile @@ -0,0 +1,17 @@ +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +ARG IMAGE_REPO=opea +ARG BASE_TAG=latest +FROM opea/comps-base:latest + +USER root +# FFmpeg needed for media processing +RUN apt-get update && \ + apt-get install -y --no-install-recommends ffmpeg && \ + apt-get clean && rm -rf /var/lib/apt/lists/* +USER user + +COPY ./arb_post_hearing_assistant.py $HOME/arb_post_hearing_assistant.py + +ENTRYPOINT ["python", "arb_post_hearing_assistant.py"] diff --git a/ArbPostHearingAssistant/README.md b/ArbPostHearingAssistant/README.md new file mode 100644 index 0000000000..536257b50c --- /dev/null +++ b/ArbPostHearingAssistant/README.md @@ -0,0 +1,32 @@ +# Arbitration Post-Hearing Assistant + +The Arbitration Post-Hearing Assistant is a GenAI-based module designed to process and summarize post-hearing transcripts or arbitration-related documents. It intelligently extracts key entities and insights to assist arbitrators, legal teams, and case managers in managing case follow-ups efficiently. + +## Table of contents + +1. [Architecture](#architecture) +2. [Deployment Options](#deployment-options) + +## Architecture + +The architecture of the ArbPostHearingAssistant Application is illustrated below: + +![Architecture](./assets/img/arbitration_architecture.png) + +The ArbPostHearingAssistant example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). + +## Deployment Options + +The table below lists currently available deployment options. They outline in detail the implementation of this example on selected hardware. + +| Category | Deployment Option | Description | +| ---------------------- | ---------------------- | ------------------------------------------------------------------------------- | +| On-premise Deployments | Docker Compose (Xeon) | [ArbPostHearingAssistant deployment on Xeon](./docker_compose/intel/cpu/xeon) | +| | Docker Compose (Gaudi) | [ArbPostHearingAssistant deployment on Gaudi](./docker_compose/intel/hpu/gaudi) | + +## Validated Configurations + +| **Deploy Method** | **LLM Engine** | **LLM Model** | **Hardware** | +| ----------------- | -------------- | ---------------------------------- | ------------ | +| Docker Compose | vLLM, TGI | mistralai/Mistral-7B-Instruct-v0.2 | Intel Gaudi | +| Docker Compose | vLLM, TGI | mistralai/Mistral-7B-Instruct-v0.2 | Intel Xeon | diff --git a/ArbPostHearingAssistant/README_miscellaneous.md b/ArbPostHearingAssistant/README_miscellaneous.md new file mode 100644 index 0000000000..13d87b6913 --- /dev/null +++ b/ArbPostHearingAssistant/README_miscellaneous.md @@ -0,0 +1,45 @@ +# Table of Contents + +- [Table of Contents](#table-of-contents) + - [Build MegaService Docker Image](#build-megaservice-docker-image) + - [Build UI Docker Image](#build-ui-docker-image) + - [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) + - [Troubleshooting](#troubleshooting) + +## Build MegaService Docker Image + +To construct the Megaservice of ArbPostHearingAssistant, the [GenAIExamples](https://github.com/opea-project/GenAIExamples.git) repository is utilized. Build MegaService Docker image via command below: + +```bash +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/ArbPostHearingAssistant +docker build --no-cache -t opea/arb-post-hearing-assistant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +## Build UI Docker Image + +Build frontend Docker image via below command: + +```bash +cd GenAIExamples/ArbPostHearingAssistant/ui +docker build -t opea/arb-post-hearing-assistant-gradio-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . +``` + +## Generate a HuggingFace Access Token + +Some HuggingFace resources, such as certain models, are only accessible if the developer has an access token. If you don't have a HuggingFace access token, you can create one by registering at [HuggingFace](https://huggingface.co/) and following [these steps](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token). + +## Troubleshooting + +1. If you get errors like "Access Denied", [validate micro service](./README.md#validate-microservices) first. A simple example: + + ```bash + http_proxy="" + curl http://${host_ip}:8008/generate \ + -X POST \ + -d '{"inputs":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:","parameters":{"max_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +2. (Docker only) If all microservices work well, check the port ${host_ip}:7777, the port may be allocated by other users, you can modify the `compose.yaml`. +3. (Docker only) If you get errors like "The container name is in use", change container name in `compose.yaml`. diff --git a/ArbPostHearingAssistant/arb_post_hearing_assistant.py b/ArbPostHearingAssistant/arb_post_hearing_assistant.py new file mode 100644 index 0000000000..30ae2271d3 --- /dev/null +++ b/ArbPostHearingAssistant/arb_post_hearing_assistant.py @@ -0,0 +1,148 @@ +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +import asyncio +import base64 +import json +import os +import subprocess +import uuid +from typing import List + +from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType +from comps.cores.mega.utils import handle_message +from comps.cores.proto.api_protocol import ( + ArbPostHearingAssistantChatCompletionRequest, + ChatCompletionRequest, + ChatCompletionResponse, + ChatCompletionResponseChoice, + ChatMessage, + UsageInfo, +) +from fastapi import Request +from fastapi.responses import StreamingResponse + +MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888)) + +LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0") +LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000)) + + +def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): + if self.services[cur_node].service_type == ServiceType.ARB_POST_HEARING_ASSISTANT: + for key_to_replace in ["text", "asr_result"]: + if key_to_replace in inputs: + inputs["messages"] = inputs[key_to_replace] + del inputs[key_to_replace] + + arbPostHearingAssistant_parameters = kwargs.get("arbPostHearingAssistant_parameters", None) + if arbPostHearingAssistant_parameters: + arbPostHearingAssistant_parameters = arbPostHearingAssistant_parameters.model_dump() + del arbPostHearingAssistant_parameters["messages"] + inputs.update(arbPostHearingAssistant_parameters) + if "id" in inputs: + del inputs["id"] + if "max_new_tokens" in inputs: + del inputs["max_new_tokens"] + if "input" in inputs: + del inputs["input"] + return inputs + + +def align_outputs(self, data, *args, **kwargs): + return data + + +class OpeaArbPostHearingAssistantService: + def __init__(self, host="0.0.0.0", port=8000): + self.host = host + self.port = port + ServiceOrchestrator.align_inputs = align_inputs + ServiceOrchestrator.align_outputs = align_outputs + self.megaservice = ServiceOrchestrator() + self.endpoint = "/v1/arb-post-hearing" + + def add_remote_service(self): + + arb_post_hearing_assistant = MicroService( + name="opea_service@arb_post_hearing_assistant", + host=LLM_SERVICE_HOST_IP, + port=LLM_SERVICE_PORT, + endpoint="/v1/arb-post-hearing", + use_remote_service=True, + service_type=ServiceType.ARB_POST_HEARING_ASSISTANT, + ) + self.megaservice.add(arb_post_hearing_assistant) + + async def handle_request(self, request: Request): + """Accept pure text.""" + if "application/json" in request.headers.get("content-type"): + data = await request.json() + chunk_size = data.get("chunk_size", -1) + chunk_overlap = data.get("chunk_overlap", -1) + chat_request = ArbPostHearingAssistantChatCompletionRequest.model_validate(data) + prompt = handle_message(chat_request.messages) + print(f"messages:{chat_request.messages}") + print(f"prompt: {prompt}") + initial_inputs_data = {data["type"]: prompt} + else: + raise ValueError(f"Unknown request type: {request.headers.get('content-type')}") + + arbPostHearingAssistant_parameters = ArbPostHearingAssistantChatCompletionRequest( + messages=chat_request.messages, + max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, + top_k=chat_request.top_k if chat_request.top_k else 10, + top_p=chat_request.top_p if chat_request.top_p else 0.95, + temperature=chat_request.temperature if chat_request.temperature else 0.01, + frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0, + presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0, + repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03, + model=chat_request.model if chat_request.model else None, + language=chat_request.language if chat_request.language else "en", + chunk_overlap=chunk_overlap, + chunk_size=chunk_size, + ) + result_dict, runtime_graph = await self.megaservice.schedule( + initial_inputs=initial_inputs_data, arbPostHearingAssistant_parameters=arbPostHearingAssistant_parameters + ) + + for node, response in result_dict.items(): + # Here it suppose the last microservice in the megaservice is LLM. + if ( + isinstance(response, StreamingResponse) + and node == list(self.megaservice.services.keys())[-1] + and self.megaservice.services[node].service_type == ServiceType.ARB_POST_HEARING_ASSISTANT + ): + return response + + last_node = runtime_graph.all_leaves()[-1] + response = result_dict[last_node]["text"] + choices = [] + usage = UsageInfo() + choices.append( + ChatCompletionResponseChoice( + index=0, + message=ChatMessage(role="assistant", content=response), + finish_reason="stop", + ) + ) + return ChatCompletionResponse(model="arbPostHearingAssistant", choices=choices, usage=usage) + + def start(self): + self.service = MicroService( + self.__class__.__name__, + service_role=ServiceRoleType.MEGASERVICE, + host=self.host, + port=self.port, + endpoint=self.endpoint, + input_datatype=ArbPostHearingAssistantChatCompletionRequest, + output_datatype=ChatCompletionResponse, + ) + self.service.add_route(self.endpoint, self.handle_request, methods=["POST"]) + self.service.start() + + +if __name__ == "__main__": + arbPostHearingAssistant = OpeaArbPostHearingAssistantService(port=MEGA_SERVICE_PORT) + arbPostHearingAssistant.add_remote_service() + arbPostHearingAssistant.start() diff --git a/ArbPostHearingAssistant/assets/img/arbitration_architecture.png b/ArbPostHearingAssistant/assets/img/arbitration_architecture.png new file mode 100644 index 0000000000..1baa205e65 Binary files /dev/null and b/ArbPostHearingAssistant/assets/img/arbitration_architecture.png differ diff --git a/ArbPostHearingAssistant/assets/img/arbritation_post_hearing_ui_gradio_text.png b/ArbPostHearingAssistant/assets/img/arbritation_post_hearing_ui_gradio_text.png new file mode 100644 index 0000000000..ee6355a476 Binary files /dev/null and b/ArbPostHearingAssistant/assets/img/arbritation_post_hearing_ui_gradio_text.png differ diff --git a/ArbPostHearingAssistant/benchmark_arb_post_hearing_assistant.yaml b/ArbPostHearingAssistant/benchmark_arb_post_hearing_assistant.yaml new file mode 100644 index 0000000000..9e3eda52b1 --- /dev/null +++ b/ArbPostHearingAssistant/benchmark_arb_post_hearing_assistant.yaml @@ -0,0 +1,77 @@ +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +deploy: + device: gaudi + version: 1.3.0 + modelUseHostPath: /mnt/models + HF_TOKEN: "" # mandatory + node: [1] + namespace: "" + node_name: [] + timeout: 1000 # timeout in seconds for services to be ready, default 30 minutes + interval: 5 # interval in seconds between service ready checks, default 5 seconds + + services: + backend: + resources: + enabled: False + cores_per_instance: "16" + memory_capacity: "8000Mi" + replicaCount: [1] + + llm: + engine: vllm # or tgi + model_id: "mistralai/Mistral-7B-Instruct-v0.2" # mandatory + replicaCount: [1] + resources: + enabled: False + cards_per_instance: 1 + model_params: + vllm: # VLLM specific parameters + batch_params: + enabled: True + max_num_seqs: "8" # Each value triggers an LLM service upgrade + token_params: + enabled: True + max_input_length: "" + max_total_tokens: "" + max_batch_total_tokens: "" + max_batch_prefill_tokens: "" + tgi: # TGI specific parameters + batch_params: + enabled: True + max_batch_size: [1] # Each value triggers an LLM service upgrade + token_params: + enabled: False + max_input_length: "1280" + max_total_tokens: "2048" + max_batch_total_tokens: "65536" + max_batch_prefill_tokens: "4096" + + arbPostHearingAssistant-ui: + replicaCount: [1] + + llm-uservice: + model_id: "mistralai/Mistral-7B-Instruct-v0.2" # mandatory + replicaCount: [1] + + nginx: + replicaCount: [1] + +benchmark: + # http request behavior related fields + user_queries: [16] + concurrency: [4] + load_shape_type: "constant" # "constant" or "poisson" + poisson_arrival_rate: 1.0 # only used when load_shape_type is "poisson" + warmup_iterations: 10 + seed: 1024 + collect_service_metric: True + + # workload, all of the test cases will run for benchmark + bench_target: ["arbPostHearingAssistantfixed"] # specify the bench_target for benchmark + dataset: "/home/sdp/pubmed_10.txt" # specify the absolute path to the dataset file + llm: + # specify the llm output token size + max_token_size: [1024] diff --git a/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc/README.md b/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc/README.md new file mode 100644 index 0000000000..dd4a1f7ee1 --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc/README.md @@ -0,0 +1,186 @@ +# Deploy Arbitration Post-Hearing Assistant Application on AMD EPYC™ Processors with Docker Compose + +This document details the deployment procedure for a Arbitration Post-Hearing Assistant application using OPEA components on an AMD EPYC™ Processors. + +This example includes the following sections: + +- [Arbitration Post-Hearing Assistant Quick Start Deployment](#arb-post-hearing-assistant-quick-start-deployment): Demonstrates how to quickly deploy a Arbitration Post-Hearing Assistant application/pipeline on AMD EPYC platform. +- [Arbitration Post-Hearing Assistant Docker Compose Files](#arb-post-hearing-assistant-docker-compose-files): Describes some example deployments and their docker compose files. +- [Arbitration Post-Hearing Assistant Detailed Usage](#arb-post-hearing-assistant-detailed-usage): Provide more detailed usage. +- [Launch the UI](#launch-the-ui): Guideline for UI usage + +## arb-post-hearing-assistant Quick Start Deployment + +This section explains how to quickly deploy and manually test the Arbitration Post-Hearing Assistant service on an AMD EPYC platform. The process involves the following basic steps: + +1. [Access the Code](#access-the-code) +2. [Install Docker](#install-docker) +3. [Determine your host external IP address](#determine-your-host-external-ip-address) +4. [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) +5. [Set Up Environment](#set-up-environment) +6. [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose) +7. [Check the Deployment Status](#check-the-deployment-status) +8. [Test the Pipeline](#test-the-pipeline) +9. [Cleanup the Deployment](#cleanup-the-deployment) + +### Access the Code + +Clone the GenAIExample repository and access the Arbitration Post-Hearing Assistant AMD EPYC platform Docker Compose files and supporting scripts: + +```bash +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc +``` + +### Install Docker + +Ensure Docker is installed on your system. If Docker is not already installed, use the provided script to set it up: + + source ./install_docker.sh + +This script installs Docker and its dependencies. After running it, verify the installation by checking the Docker version: + + docker --version + +If Docker is already installed, this step can be skipped. + +### Determine your host external IP address + +Run the following command in your terminal to list network interfaces: + + ifconfig + +Look for the inet address associated with your active network interface (e.g., enp99s0). For example: + + enp99s0: flags=4163 mtu 1500 + inet 10.101.16.119 netmask 255.255.255.0 broadcast 10.101.16.255 + +In this example, the (`host_ip`) would be (`10.101.16.119`). + + # Replace with your host's external IP address + export host_ip="your_external_ip_address" + +### Generate a HuggingFace Access Token + +Some HuggingFace resources, such as some models, are only accessible if you have an access token. If you do not already have a HuggingFace access token, you can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token). + +```bash +export HF_TOKEN="your_huggingface_token" +``` + +### Set Up Environment + +Set the environment variables + +```bash +source ./set_env.sh +``` + +NOTE: by default vLLM does "warmup" at start, to optimize its performance for the specified model and the underlying platform, which can take long time. For development (and e.g. autoscaling) it can be skipped with `export VLLM_SKIP_WARMUP=true`. + +### Deploy the Services Using Docker Compose + +To deploy the Arbitration Post-Hearing Assistant services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute: + +```bash +docker compose up -d +``` + +**Note**: developers should build docker image from source when: + +- Developing off the git main branch (as the container's ports in the repo may be different from the published docker image). +- Unable to download the docker image. +- Use a specific version of Docker image. + +Please refer to the table below to build different microservices from source: + +| Microservice | Deployment Guide | +| ------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | +| vLLM | [vLLM build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/vllm#build-docker) | +| llm-arb-post-hearing-assistant | [LLM-ArbPostHearingAssistant build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/arb_post_hearing_assistant/src/#12-build-docker-image) | +| MegaService | [MegaService build guide](../../../../README_miscellaneous.md#build-megaservice-docker-image) | +| UI | [Basic UI build guide](../../../../README_miscellaneous.md#build-ui-docker-image) | + +### Check the Deployment Status + +After running docker compose, to check if all the containers launched via docker compose have started: + +```bash +docker ps -a +``` + +For the default deployment, the following 4 containers should be running: + +```bash +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +24bd78300413 opea/arb-post-hearing-assistant-gradio-ui:latest "python arb_post_hea…" 2 hours ago Up 2 hours 0.0.0.0:5173->5173/tcp, [::]:5173->5173/tcp arb-post-hearing-assistant-xeon-ui-server +59e60c954e26 opea/arb-post-hearing-assistant:latest "python arb_post_hea…" 2 hours ago Up 2 hours 0.0.0.0:8888->8888/tcp, [::]:8888->8888/tcp arb-post-hearing-assistant-xeon-backend-server +32afc12de996 opea/llm-arb-post-hearing-assistant:latest "python comps/arb_po…" 2 hours ago Up 2 hours 0.0.0.0:9000->9000/tcp, [::]:9000->9000/tcp arb-post-hearing-assistant-xeon-llm-server +c8e539360aff ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu "text-generation-lau…" 2 hours ago Up 2 hours (healthy) 0.0.0.0:8008->80/tcp, [::]:8008->80/tcp arb-post-hearing-assistant-xeon-tgi-server +``` + +### Test the Pipeline + +Once the Arbitration Post-Hearing Assistant services are running, test the pipeline using the following command: + +```bash +curl -X POST http://${host_ip}:8888/v1/arb-post-hearing \ + -H "Content-Type: application/json" \ + -d '{"type": "text", [10:00 AM] Arbitrator Hon. Rebecca Lawson: Good morning. This hearing is now in session for Case No. ARB/2025/0917. Lets begin with appearances. [10:01 AM] Attorney Michael Grant for Mr. Jonathan Reed: Good morning Your Honor. I represent the claimant Mr. Jonathan Reed. [10:01 AM] Attorney Lisa Chen for Ms. Rachel Morgan: Good morning. I represent the respondent Ms. Rachel Morgan. [10:03 AM] Arbitrator Hon. Rebecca Lawson: Thank you. Lets proceed with Mr. Reeds opening statement. [10:04 AM] Attorney Michael Grant: Ms. Morgan failed to deliver services as per the agreement dated March 15 2023. We have submitted relevant documentation including email correspondence and payment records. The delay caused substantial financial harm to our client. [10:15 AM] Attorney Lisa Chen: We deny any breach of contract. The delays were due to regulatory issues outside our control. Furthermore Mr. Reed did not provide timely approvals which contributed to the delay. [10:30 AM] Arbitrator Hon. Rebecca Lawson: Lets turn to Clause Z of the agreement. Id like both parties to submit written briefs addressing the applicability of the force majeure clause and the timeline of approvals. [11:00 AM] Attorney Michael Grant: Understood. Well submit by the deadline. [11:01 AM] Attorney Lisa Chen: Agreed. [11:02 AM] Arbitrator Hon. Rebecca Lawson: The next hearing is scheduled for October 22 2025 at 1030 AM Eastern Time. Please ensure your witnesses are available for cross examination. [4:45 PM] Arbitrator Hon. Rebecca Lawson: This session is adjourned. Thank you everyone.","max_tokens":2000,"language":"en"}' +``` + +**Note** The value of _host_ip_ was set using the _set_env.sh_ script and can be found in the _.env_ file. + +### Cleanup the Deployment + +To stop the containers associated with the deployment, execute the following command: + +```bash +docker compose -f compose.yaml down +``` + +All the Arbitration Post-Hearing Assistant containers will be stopped and then removed on completion of the "down" command. + +## arb-post-hearing-assistant Docker Compose Files + +In the context of deploying a Arbitration Post-Hearing Assistant pipeline on an AMD EPYC platform, we can pick and choose different large language model serving frameworks. The table below outlines the various configurations that are available as part of the application. + +| File | Description | +| -------------------------------------- | ----------------------------------------------------------------------------------------- | +| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework | +| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as the default | + +## arb-post-hearing-assistant Assistant Detailed Usage + +There are also some customized usage. + +### Query with text + +```bash +# form input. Use English mode (default). +curl http://${host_ip}:8888/v1/arb-post-hearing \ + -H "Content-Type: multipart/form-data" \ + -F "type=text" \ + -F "messages=[10:00 AM] Arbitrator Hon. Rebecca Lawson: Good morning. This hearing is now in session for Case No. ARB/2025/0917. Lets begin with appearances. [10:01 AM] Attorney Michael Grant for Mr. Jonathan Reed: Good morning Your Honor. I represent the claimant Mr. Jonathan Reed. [10:01 AM] Attorney Lisa Chen for Ms. Rachel Morgan: Good morning. I represent the respondent Ms. Rachel Morgan. [10:03 AM] Arbitrator Hon. Rebecca Lawson: Thank you. Lets proceed with Mr. Reeds opening statement. [10:04 AM] Attorney Michael Grant: Ms. Morgan failed to deliver services as per the agreement dated March 15 2023. We have submitted relevant documentation including email correspondence and payment records. The delay caused substantial financial harm to our client. [10:15 AM] Attorney Lisa Chen: We deny any breach of contract. The delays were due to regulatory issues outside our control. Furthermore Mr. Reed did not provide timely approvals which contributed to the delay. [10:30 AM] Arbitrator Hon. Rebecca Lawson: Lets turn to Clause Z of the agreement. Id like both parties to submit written briefs addressing the applicability of the force majeure clause and the timeline of approvals. [11:00 AM] Attorney Michael Grant: Understood. Well submit by the deadline. [11:01 AM] Attorney Lisa Chen: Agreed. [11:02 AM] Arbitrator Hon. Rebecca Lawson: The next hearing is scheduled for October 22 2025 at 1030 AM Eastern Time. Please ensure your witnesses are available for cross examination. [4:45 PM] Arbitrator Hon. Rebecca Lawson: This session is adjourned. Thank you everyone." \ + -F "max_tokens=2000" \ + -F "language=en" + +## Launch the UI + +### Gradio UI + +Open this URL `http://{host_ip}:5173` in your browser to access the Gradio based frontend. +![project-screenshot](../../../../assets/img/arbritation_post_hearing_ui_gradio_text.png) + +### Profile Microservices + +To further analyze MicroService Performance, users could follow the instructions to profile MicroServices. + +#### 1. vLLM backend Service + +Users could follow previous section to testing vLLM microservice or Arbitration Post-Hearing Assistant MegaService. By default, vLLM profiling is not enabled. Users could start and stop profiling by following commands. + +## Conclusion + +This guide should enable developer to deploy the default configuration or any of the other compose yaml files for different configurations. It also highlights the configurable parameters that can be set before deployment. +``` diff --git a/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc/compose.yaml b/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc/compose.yaml new file mode 100644 index 0000000000..2f3a622180 --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc/compose.yaml @@ -0,0 +1,87 @@ +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +services: + vllm-service: + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 + container_name: arb-post-hearing-assistant-epyc-vllm-service + ports: + - "8008:80" + volumes: + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + MODEL_CACHE: ${MODEL_CACHE} + LLM_MODEL_ID: ${LLM_MODEL_ID} + VLLM_CPU_KVCACHE_SPACE: 40 + VLLM_TORCH_PROFILER_DIR: "/mnt" + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 + + llm-arbPostHearingAssistant-vllm: + image: ${REGISTRY:-opea}/llm-arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-epyc-llm-server + depends_on: + vllm-service: + condition: service_healthy + ports: + - ${LLM_PORT:-9000}:9000 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + HF_TOKEN: ${HF_TOKEN} + MODEL_CACHE: ${MODEL_CACHE} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} + OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME: ${OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME} + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped + + arbPostHearingAssistant-epyc-backend-server: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-epyc-backend-server + depends_on: + - vllm-service + - llm-arbPostHearingAssistant-vllm + ports: + - "${BACKEND_SERVICE_PORT:-8888}:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + ipc: host + restart: always + + arbPostHearingAssistant-gradio-ui: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant-gradio-ui:${TAG:-latest} + container_name: arb-post-hearing-assistant-epyc-ui-server + depends_on: + - arbPostHearingAssistant-epyc-backend-server + ports: + - "${FRONTEND_SERVICE_PORT:-5173}:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc/compose_tgi.yaml b/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc/compose_tgi.yaml new file mode 100644 index 0000000000..cb6b8c9f48 --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc/compose_tgi.yaml @@ -0,0 +1,86 @@ +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +services: + tgi-server: + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + container_name: arb-post-hearing-assistant-epyc-tgi-server + ports: + - ${LLM_ENDPOINT_PORT:-8008}:80 + volumes: + - "${MODEL_CACHE:-./data}:/data" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + HF_TOKEN: ${HF_TOKEN} + MODEL_CACHE: ${MODEL_CACHE} + host_ip: ${host_ip} + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + shm_size: 1g + command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS} + + llm-arbPostHearingAssistant-tgi: + image: ${REGISTRY:-opea}/llm-arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-epyc-llm-server + depends_on: + tgi-server: + condition: service_healthy + ports: + - ${LLM_PORT:-9000}:9000 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + HF_TOKEN: ${HF_TOKEN} + MODEL_CACHE: ${MODEL_CACHE} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} + OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME: ${OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME} + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped + + arbPostHearingAssistant-epyc-backend-server: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-epyc-backend-server + depends_on: + - tgi-server + - llm-arbPostHearingAssistant-tgi + ports: + - "${BACKEND_SERVICE_PORT:-8888}:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + ipc: host + restart: always + + arbPostHearingAssistant-gradio-ui: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant-gradio-ui:${TAG:-latest} + container_name: arb-post-hearing-assistant-epyc-ui-server + depends_on: + - arbPostHearingAssistant-epyc-backend-server + ports: + - "${FRONTEND_SERVICE_PORT:-5173}:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc/install_docker.sh b/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc/install_docker.sh new file mode 100644 index 0000000000..7a3b3a0a10 --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc/install_docker.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +# Update the package index +sudo apt-get -y update + +# Install prerequisites +sudo apt-get -y install ca-certificates curl + +# Create the directory for the Docker GPG key +sudo install -m 0755 -d /etc/apt/keyrings + +# Add Docker's official GPG key +sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc + +# Set permissions for the GPG key +sudo chmod a+r /etc/apt/keyrings/docker.asc + +# Add Docker repository to the sources list +echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + +# Update the package index with Docker packages +sudo apt-get -y update + +# Install Docker packages +sudo apt-get -y install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + +# Add the current user to the Docker group +sudo usermod -aG docker $USER + +# Optional: Verify that Docker is installed correctly +docker --version + +echo "Docker installation is complete. Log out and back in for the group changes to take effect." diff --git a/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc/set_env.sh b/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc/set_env.sh new file mode 100644 index 0000000000..9a8a24af82 --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/amd/cpu/epyc/set_env.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +# Copyright (C) 2025 Advanced Micro Devices, Inc. +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +host_ip=$(hostname -I | awk '{print $1}') +export host_ip # Example: host_ip="192.168.1.1" + +export no_proxy="${no_proxy},${host_ip}" # Example: no_proxy="localhost,127.0.0.1,192.168.1.1" +export http_proxy=$http_proxy +export https_proxy=$https_proxy +export HF_TOKEN=${HF_TOKEN} + +export LLM_ENDPOINT_PORT=8008 +export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.2" +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 + +export LLM_PORT=9000 +export LLM_SERVICE_PORT=9000 +export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" +export OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME="OpeaArbPostHearingAssistantTgi" # OpeaArbPostHearingAssistantVllm +export FRONTEND_SERVICE_PORT=5173 +export MEGA_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP=${host_ip} + +export BACKEND_SERVICE_PORT=8888 +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/arb-post-hearing" + +export LOGFLAG=True +export MODEL_CACHE=${model_cache:-"./data"} + +export NUM_CARDS=1 +export BLOCK_SIZE=128 +export MAX_NUM_SEQS=256 +export MAX_SEQ_LEN_TO_CAPTURE=2048 diff --git a/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm/README.md b/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm/README.md new file mode 100644 index 0000000000..6261e8d280 --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm/README.md @@ -0,0 +1,217 @@ +# Example Arbitration Post-Hearing Assistant deployments on AMD GPU (ROCm) + +This document outlines the deployment process for a Arbitration Post-Hearing Assistant application utilizing OPEA components on an AMD GPU server. + +This example includes the following sections: + +- [Arbitration Post-Hearing Assistant Quick Start Deployment](#arb-post-hearing-assistant-quick-start-deployment): Demonstrates how to quickly deploy a Arbitration Post-Hearing Assistant application/pipeline on AMD GPU platform. +- [Arbitration Post-Hearing Assistant Docker Compose Files](#arb-post-hearing-assistant-docker-compose-files): Describes some example deployments and their docker compose files. +- [Arbitration Post-Hearing Assistant Detailed Usage](#arb-post-hearing-assistant-detailed-usage): Provide more detailed usage. +- [Launch the UI](#launch-the-ui): Guideline for UI usage + +## arb-post-hearing-assistant Quick Start Deployment + +This section describes how to quickly deploy and test the Arbitration Post-Hearing Assistant service manually on an AMD GPU platform. The basic steps are: + +1. [Access the Code](#access-the-code) +2. [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) +3. [Configure the Deployment Environment](#configure-the-deployment-environment) +4. [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose) +5. [Check the Deployment Status](#check-the-deployment-status) +6. [Test the Pipeline](#test-the-pipeline) +7. [Cleanup the Deployment](#cleanup-the-deployment) + +### Access the Code + +Clone the GenAIExample repository and access the Arbitration Post-Hearing Assistant AMD GPU platform Docker Compose files and supporting scripts: + +```bash +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm +``` + +### Generate a HuggingFace Access Token + +Some HuggingFace resources, such as some models, are only accessible if you have an access token. If you do not already have a HuggingFace access token, you can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token). + +### Configure the Deployment Environment + +To set up environment variables for deploying Arbitration Post-Hearing Assistant services, set up some parameters specific to the deployment environment and source the `set_env_*.sh` script in this directory: + +- if used vLLM - set_env_vllm.sh +- if used TGI - set_env.sh + +Set the values of the variables: + +- **HOST_IP, HOST_IP_EXTERNAL** - These variables are used to configure the name/address of the service in the operating system environment for the application services to interact with each other and with the outside world. + + If your server uses only an internal address and is not accessible from the Internet, then the values for these two variables will be the same and the value will be equal to the server's internal name/address. + + If your server uses only an external, Internet-accessible address, then the values for these two variables will be the same and the value will be equal to the server's external name/address. + + If your server is located on an internal network, has an internal address, but is accessible from the Internet via a proxy/firewall/load balancer, then the HOST_IP variable will have a value equal to the internal name/address of the server, and the EXTERNAL_HOST_IP variable will have a value equal to the external name/address of the proxy/firewall/load balancer behind which the server is located. + + We set these values in the file set_env\*\*\*\*.sh + +- **Variables with names like "**\*\*\*\*\*\*\_PORT"\*\* - These variables set the IP port numbers for establishing network connections to the application services. + The values shown in the file set_env.sh or set_env_vllm.sh they are the values used for the development and testing of the application, as well as configured for the environment in which the development is performed. These values must be configured in accordance with the rules of network access to your environment's server, and must not overlap with the IP ports of other applications that are already in use. + +Setting variables in the operating system environment: + +```bash +export HF_TOKEN="Your_HuggingFace_API_Token" +source ./set_env_*.sh # replace the script name with the appropriate one +``` + +Consult the section on [Arbitration Post-Hearing Assistant Service configuration](#arb-post-hearing-assistant-configuration) for information on how service specific configuration parameters affect deployments. + +### Deploy the Services Using Docker Compose + +To deploy the Arbitration Post-Hearing Assistant services, execute the `docker compose up` command with the appropriate arguments. For a default deployment with TGI, execute the command below. It uses the 'compose.yaml' file. + +```bash +cd docker_compose/amd/gpu/rocm +# if used TGI +docker compose -f compose.yaml up -d +# if used vLLM +# docker compose -f compose_vllm.yaml up -d +``` + +To enable GPU support for AMD GPUs, the following configuration is added to the Docker Compose file: + +- compose_vllm.yaml - for vLLM-based application +- compose.yaml - for TGI-based + +```yaml +shm_size: 1g +devices: + - /dev/kfd:/dev/kfd + - /dev/dri:/dev/dri +cap_add: + - SYS_PTRACE +group_add: + - video +security_opt: + - seccomp:unconfined +``` + +This configuration forwards all available GPUs to the container. To use a specific GPU, specify its `cardN` and `renderN` device IDs. For example: + +```yaml +shm_size: 1g +devices: + - /dev/kfd:/dev/kfd + - /dev/dri/card0:/dev/dri/card0 + - /dev/dri/render128:/dev/dri/render128 +cap_add: + - SYS_PTRACE +group_add: + - video +security_opt: + - seccomp:unconfined +``` + +**How to Identify GPU Device IDs:** +Use AMD GPU driver utilities to determine the correct `cardN` and `renderN` IDs for your GPU. + +> **Note**: developers should build docker image from source when: +> +> - Developing off the git main branch (as the container's ports in the repo may be different > from the published docker image). +> - Unable to download the docker image. +> - Use a specific version of Docker image. + +Please refer to the table below to build different microservices from source: + +| Microservice | Deployment Guide | +| ------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | +| TGI | [TGI project](https://github.com/huggingface/text-generation-inference.git) | +| vLLM | [vLLM build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/vllm#build-docker) | +| llm-arb-post-hearing-assistant | [LLM-ArbPostHearingAssistant build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/arb_post_hearing_assistant/src/#12-build-docker-image) | +| MegaService | [MegaService build guide](../../../../README_miscellaneous.md#build-megaservice-docker-image) | +| UI | [Basic UI build guide](../../../../README_miscellaneous.md#build-ui-docker-image) | + +### Check the Deployment Status + +After running docker compose, check if all the containers launched via docker compose have started: + +``` +docker ps -a +``` + +For the default deployment, the following 4 containers should have started: + +If used TGI: + +```bash +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +24bd78300413 opea/arb-post-hearing-assistant-gradio-ui:latest "python arb_post_hea…" 2 hours ago Up 2 hours 0.0.0.0:5173->5173/tcp, [::]:5173->5173/tcp arb-post-hearing-assistant-xeon-ui-server +59e60c954e26 opea/arb-post-hearing-assistant:latest "python arb_post_hea…" 2 hours ago Up 2 hours 0.0.0.0:8888->8888/tcp, [::]:8888->8888/tcp arb-post-hearing-assistant-xeon-backend-server +32afc12de996 opea/llm-arb-post-hearing-assistant:latest "python comps/arb_po…" 2 hours ago Up 2 hours 0.0.0.0:9000->9000/tcp, [::]:9000->9000/tcp arb-post-hearing-assistant-xeon-llm-server +c8e539360aff ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu "text-generation-lau…" 2 hours ago Up 2 hours (healthy) 0.0.0.0:8008->80/tcp, [::]:8008->80/tcp arb-post-hearing-assistant-xeon-tgi-server +``` + +### Test the Pipeline + +Once the Arbitration Post-Hearing Assistant services are running, test the pipeline using the following command: + +```bash +curl -X POST http://${host_ip}:8888/v1/arb-post-hearing \ + -H "Content-Type: application/json" \ + -d '{"type": "text", [10:00 AM] Arbitrator Hon. Rebecca Lawson: Good morning. This hearing is now in session for Case No. ARB/2025/0917. Lets begin with appearances. [10:01 AM] Attorney Michael Grant for Mr. Jonathan Reed: Good morning Your Honor. I represent the claimant Mr. Jonathan Reed. [10:01 AM] Attorney Lisa Chen for Ms. Rachel Morgan: Good morning. I represent the respondent Ms. Rachel Morgan. [10:03 AM] Arbitrator Hon. Rebecca Lawson: Thank you. Lets proceed with Mr. Reeds opening statement. [10:04 AM] Attorney Michael Grant: Ms. Morgan failed to deliver services as per the agreement dated March 15 2023. We have submitted relevant documentation including email correspondence and payment records. The delay caused substantial financial harm to our client. [10:15 AM] Attorney Lisa Chen: We deny any breach of contract. The delays were due to regulatory issues outside our control. Furthermore Mr. Reed did not provide timely approvals which contributed to the delay. [10:30 AM] Arbitrator Hon. Rebecca Lawson: Lets turn to Clause Z of the agreement. Id like both parties to submit written briefs addressing the applicability of the force majeure clause and the timeline of approvals. [11:00 AM] Attorney Michael Grant: Understood. Well submit by the deadline. [11:01 AM] Attorney Lisa Chen: Agreed. [11:02 AM] Arbitrator Hon. Rebecca Lawson: The next hearing is scheduled for October 22 2025 at 1030 AM Eastern Time. Please ensure your witnesses are available for cross examination. [4:45 PM] Arbitrator Hon. Rebecca Lawson: This session is adjourned. Thank you everyone.","max_tokens":2000,"language":"en"}' +``` + +**Note** The value of _host_ip_ was set using the _set_env.sh_ script and can be found in the _.env_ file. + +### Cleanup the Deployment + +To stop the containers associated with the deployment, execute the following command: + +```bash +docker compose -f compose.yaml down +``` + +All the Arbitration Post-Hearing Assistant containers will be stopped and then removed on completion of the "down" command. + +## arb-post-hearing-assistant Docker Compose Files + +In the context of deploying a Arbitration Post-Hearing Assistant pipeline on an AMD GPU platform, we can pick and choose different large language model serving frameworks. The table below outlines the various configurations that are available as part of the application. + +| File | Description | +| ---------------------------------------- | ------------------------------------------------------------------------------------------ | +| [compose.yaml](./compose.yaml) | Default compose file using tgi as serving framework | +| [compose_vllm.yaml](./compose_vllm.yaml) | The LLM serving framework is vLLM. All other configurations remain the same as the default | + +## arb-post-hearing-assistant Assistant Detailed Usage + +There are also some customized usage. + +### Query with text + +```bash +# form input. Use English mode (default). +curl http://${host_ip}:8888/v1/arb-post-hearing \ + -H "Content-Type: multipart/form-data" \ + -F "type=text" \ + -F "messages=[10:00 AM] Arbitrator Hon. Rebecca Lawson: Good morning. This hearing is now in session for Case No. ARB/2025/0917. Lets begin with appearances. [10:01 AM] Attorney Michael Grant for Mr. Jonathan Reed: Good morning Your Honor. I represent the claimant Mr. Jonathan Reed. [10:01 AM] Attorney Lisa Chen for Ms. Rachel Morgan: Good morning. I represent the respondent Ms. Rachel Morgan. [10:03 AM] Arbitrator Hon. Rebecca Lawson: Thank you. Lets proceed with Mr. Reeds opening statement. [10:04 AM] Attorney Michael Grant: Ms. Morgan failed to deliver services as per the agreement dated March 15 2023. We have submitted relevant documentation including email correspondence and payment records. The delay caused substantial financial harm to our client. [10:15 AM] Attorney Lisa Chen: We deny any breach of contract. The delays were due to regulatory issues outside our control. Furthermore Mr. Reed did not provide timely approvals which contributed to the delay. [10:30 AM] Arbitrator Hon. Rebecca Lawson: Lets turn to Clause Z of the agreement. Id like both parties to submit written briefs addressing the applicability of the force majeure clause and the timeline of approvals. [11:00 AM] Attorney Michael Grant: Understood. Well submit by the deadline. [11:01 AM] Attorney Lisa Chen: Agreed. [11:02 AM] Arbitrator Hon. Rebecca Lawson: The next hearing is scheduled for October 22 2025 at 1030 AM Eastern Time. Please ensure your witnesses are available for cross examination. [4:45 PM] Arbitrator Hon. Rebecca Lawson: This session is adjourned. Thank you everyone." \ + -F "max_tokens=2000" \ + -F "language=en" + +## Launch the UI + +### Gradio UI + +Open this URL `http://{host_ip}:5173` in your browser to access the Gradio based frontend. +![project-screenshot](../../../../assets/img/arbritation_post_hearing_ui_gradio_text.png) + +### Profile Microservices + +To further analyze MicroService Performance, users could follow the instructions to profile MicroServices. + +#### 1. vLLM backend Service + +Users could follow previous section to testing vLLM microservice or Arbitration Post-Hearing Assistant MegaService. By default, vLLM profiling is not enabled. Users could start and stop profiling by following commands. + +## Conclusion + +This guide should enable developer to deploy the default configuration or any of the other compose yaml files for different configurations. It also highlights the configurable parameters that can be set before deployment. +``` diff --git a/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm/compose.yaml b/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm/compose.yaml new file mode 100644 index 0000000000..833cd25bc4 --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm/compose.yaml @@ -0,0 +1,96 @@ +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +services: + arbPostHearingAssistant-tgi-service: + image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm + container_name: arb-post-hearing-assistant-tgi-service + ports: + - "${ARB_POSTHEARING_ASSISTANT_TGI_SERVICE_PORT:-8008}:80" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: "http://${HOST_IP}:${ARB_POSTHEARING_ASSISTANT_TGI_SERVICE_PORT}" + HF_TOKEN: ${ARB_POSTHEARING_ASSISTANT_HUGGINGFACEHUB_API_TOKEN} + host_ip: ${host_ip} + ARB_POSTHEARING_ASSISTANT_TGI_SERVICE_PORT: ${ARB_POSTHEARING_ASSISTANT_TGI_SERVICE_PORT} + volumes: + - "${MODEL_CACHE:-./data}:/data" + shm_size: 20g + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + ipc: host + healthcheck: + test: ["CMD-SHELL", "curl -f http://${host_ip}:${ARB_POSTHEARING_ASSISTANT_TGI_SERVICE_PORT}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model-id ${ARB_POSTHEARING_ASSISTANT_LLM_MODEL_ID} --max-input-length ${ARB_POSTHEARING_ASSISTANT_MAX_INPUT_TOKENS} --max-total-tokens ${ARB_POSTHEARING_ASSISTANT_MAX_TOTAL_TOKENS} + + arbPostHearingAssistant-llm-server: + image: ${REGISTRY:-opea}/llm-arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-llm-server + depends_on: + arbPostHearingAssistant-tgi-service: + condition: service_healthy + ports: + - "${ARB_POSTHEARING_ASSISTANT_LLM_SERVER_PORT}:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${ARB_POSTHEARING_ASSISTANT_TGI_LLM_ENDPOINT} + HF_TOKEN: ${ARB_POSTHEARING_ASSISTANT_HUGGINGFACEHUB_API_TOKEN} + MAX_INPUT_TOKENS: ${ARB_POSTHEARING_ASSISTANT_MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${ARB_POSTHEARING_ASSISTANT_MAX_TOTAL_TOKENS} + LLM_MODEL_ID: ${ARB_POSTHEARING_ASSISTANT_LLM_MODEL_ID} + OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME: "OpeaArbPostHearingAssistantTgi" + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped + + arbPostHearingAssistant-backend-server: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-backend-server + depends_on: + - arbPostHearingAssistant-tgi-service + - arbPostHearingAssistant-llm-server + ports: + - "${ARB_POSTHEARING_ASSISTANT_BACKEND_SERVER_PORT}:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${HOST_IP} + LLM_SERVICE_HOST_IP: ${HOST_IP} + LLM_SERVICE_PORT: ${ARB_POSTHEARING_ASSISTANT_LLM_SERVER_PORT} + ipc: host + restart: always + + arbPostHearingAssistant-gradio-ui: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant-gradio-ui:${TAG:-latest} + container_name: arb-post-hearing-assistant-ui-server + depends_on: + - arbPostHearingAssistant-backend-server + ports: + - "5173:5173" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_SERVICE_ENDPOINT: ${BACKEND_SERVICE_ENDPOINT} + DOC_BASE_URL: ${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm/compose_vllm.yaml b/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm/compose_vllm.yaml new file mode 100644 index 0000000000..4884644117 --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm/compose_vllm.yaml @@ -0,0 +1,97 @@ +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +services: + arbPostHearingAssistant-vllm-service: + image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest} + container_name: arb-post-hearing-assistant-vllm-service + ports: + - "${ARB_POSTHEARING_ASSISTANT_VLLM_SERVICE_PORT:-8081}:8011" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + VLLM_USE_TRITON_FLASH_ATTENTION: 0 + PYTORCH_JIT: 0 + healthcheck: + test: [ "CMD-SHELL", "curl -f http://${HOST_IP}:${ARB_POSTHEARING_ASSISTANT_VLLM_SERVICE_PORT:-8081}/health || exit 1" ] + interval: 10s + timeout: 10s + retries: 100 + volumes: + - "${MODEL_CACHE:-./data}:/data" + shm_size: 20G + devices: + - /dev/kfd:/dev/kfd + - /dev/dri/:/dev/dri/ + cap_add: + - SYS_PTRACE + group_add: + - video + security_opt: + - seccomp:unconfined + - apparmor=unconfined + command: "--model ${ARB_POSTHEARING_ASSISTANT_LLM_MODEL_ID} --swap-space 16 --disable-log-requests --dtype float16 --tensor-parallel-size 4 --host 0.0.0.0 --port 8011 --num-scheduler-steps 1 --distributed-executor-backend \"mp\"" + ipc: host + + arbPostHearingAssistant-llm-server: + image: ${REGISTRY:-opea}/llm-arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-llm-server + depends_on: + arbPostHearingAssistant-vllm-service: + condition: service_healthy + ports: + - "${ARB_POSTHEARING_ASSISTANT_LLM_SERVER_PORT}:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${ARB_POSTHEARING_ASSISTANT_LLM_ENDPOINT} + HF_TOKEN: ${ARB_POSTHEARING_ASSISTANT_HUGGINGFACEHUB_API_TOKEN} + MAX_INPUT_TOKENS: ${ARB_POSTHEARING_ASSISTANT_MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${ARB_POSTHEARING_ASSISTANT_MAX_TOTAL_TOKENS} + LLM_MODEL_ID: ${ARB_POSTHEARING_ASSISTANT_LLM_MODEL_ID} + OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME: "OpeaArbPostHearingAssistantVllm" + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped + + arbPostHearingAssistant-backend-server: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-backend-server + depends_on: + - arbPostHearingAssistant-vllm-service + - arbPostHearingAssistant-llm-server + ports: + - "${ARB_POSTHEARING_ASSISTANT_BACKEND_SERVER_PORT}:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${HOST_IP} + LLM_SERVICE_HOST_IP: ${HOST_IP} + ipc: host + restart: always + + arbPostHearingAssistant-gradio-ui: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant-gradio-ui:${TAG:-latest} + container_name: arb-post-hearing-assistant-ui-server + depends_on: + - arbPostHearingAssistant-backend-server + ports: + - "${ARB_POSTHEARING_ASSISTANT_FRONTEND_PORT:-5173}:5173" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + BACKEND_SERVICE_ENDPOINT: ${BACKEND_SERVICE_ENDPOINT} + DOC_BASE_URL: ${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm/set_env.sh b/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm/set_env.sh new file mode 100644 index 0000000000..330cacca9f --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm/set_env.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +export HOST_IP=${ip_address} +export ARB_POSTHEARING_ASSISTANT_MAX_INPUT_TOKENS="2048" +export ARB_POSTHEARING_ASSISTANT_MAX_TOTAL_TOKENS="4096" +export ARB_POSTHEARING_ASSISTANT_LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.2" +export ARB_POSTHEARING_ASSISTANT_TGI_SERVICE_PORT="8008" +export ARB_POSTHEARING_ASSISTANT_TGI_LLM_ENDPOINT="http://${HOST_IP}:${ARB_POSTHEARING_ASSISTANT_TGI_SERVICE_PORT}" +export HF_TOKEN=${HF_TOKEN} +export ARB_POSTHEARING_ASSISTANT_LLM_SERVER_PORT="9000" +export ARB_POSTHEARING_ASSISTANT_BACKEND_SERVER_PORT="18072" +export ARB_POSTHEARING_ASSISTANT_FRONTEND_PORT="18073" +export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${ARB_POSTHEARING_ASSISTANT_BACKEND_SERVER_PORT}/v1/arb-post-hearing" diff --git a/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm/set_env_vllm.sh b/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm/set_env_vllm.sh new file mode 100644 index 0000000000..7ea62528f4 --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/amd/gpu/rocm/set_env_vllm.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +export HOST_IP=${ip_address} +export HF_TOKEN=${HF_TOKEN} +export ARB_POSTHEARING_ASSISTANT_MAX_INPUT_TOKENS=2048 +export ARB_POSTHEARING_ASSISTANT_MAX_TOTAL_TOKENS=4096 +export ARB_POSTHEARING_ASSISTANT_LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.2" +export ARB_POSTHEARING_ASSISTANT_VLLM_SERVICE_PORT="8008" +export ARB_POSTHEARING_ASSISTANT_LLM_ENDPOINT="http://${HOST_IP}:${ARB_POSTHEARING_ASSISTANT_VLLM_SERVICE_PORT}" +export ARB_POSTHEARING_ASSISTANT_LLM_SERVER_PORT="9000" +export ARB_POSTHEARING_ASSISTANT_BACKEND_SERVER_PORT="18072" +export ARB_POSTHEARING_ASSISTANT_FRONTEND_PORT="18073" +export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${ARB_POSTHEARING_ASSISTANT_BACKEND_SERVER_PORT}/v1/arb-post-hearing" diff --git a/ArbPostHearingAssistant/docker_compose/intel/cpu/xeon/README.md b/ArbPostHearingAssistant/docker_compose/intel/cpu/xeon/README.md new file mode 100644 index 0000000000..c82ba63ced --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/intel/cpu/xeon/README.md @@ -0,0 +1,176 @@ +# Example Arbitration Post-Hearing Assistant deployments on Intel Xeon Processor + +This document outlines the deployment process for a Arbitration Post-Hearing Assistant application utilizing OPEA components on an Intel Xeon server. + +This example includes the following sections: + +- [Arbitration Post-Hearing Assistant Quick Start Deployment](#arb-post-hearing-assistant-quick-start-deployment): Demonstrates how to quickly deploy a Arbitration Post-Hearing Assistant application/pipeline on Intel Xeon platform. +- [Arbitration Post-Hearing Assistant Docker Compose Files](#arb-post-hearing-assistant-docker-compose-files): Describes some example deployments and their docker compose files. +- [Arbitration Post-Hearing Assistant Detailed Usage](#arb-post-hearing-assistant-detailed-usage): Provide more detailed usage. +- [Launch the UI](#launch-the-ui): Guideline for UI usage + +## arb-post-hearing-assistant Quick Start Deployment + +This section describes how to quickly deploy and test the Arbitration Post-Hearing Assistant service manually on an Intel Xeon platform. The basic steps are: + +- [Example Arbitration Post-Hearing Assistant deployments on Intel Xeon Processor](#example-arbitration-post-hearing-assistant-deployments-on-intel-xeon-processor) + - [arb-post-hearing-assistant Quick Start Deployment](#arb-post-hearing-assistant-quick-start-deployment) + - [Access the Code and Set Up Environment](#access-the-code-and-set-up-environment) + - [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) + - [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose) + - [Check the Deployment Status](#check-the-deployment-status) + - [Test the Pipeline](#test-the-pipeline) + - [Cleanup the Deployment](#cleanup-the-deployment) + - [arb-post-hearing-assistant Docker Compose Files](#arb-post-hearing-assistant-docker-compose-files) + - [Running LLM models with remote endpoints](#running-llm-models-with-remote-endpoints) + - [arb-post-hearing-assistant Detailed Usage](#arb-post-hearing-assistant-detailed-usage) + - [Query with text](#query-with-text) + +### Access the Code and Set Up Environment + +Clone the GenAIExample repository and access the Arbitration Post-Hearing Assistant Intel Xeon platform Docker Compose files and supporting scripts: + +```bash +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/ArbPostHearingAssistant/docker_compose +source intel/set_env.sh +``` + +> NOTE: by default vLLM does "warmup" at start, to optimize its performance for the specified model and the underlying platform, which can take long time. For development (and e.g. autoscaling) it can be skipped with `export VLLM_SKIP_WARMUP=true`. + +> NOTE: If any port on your local machine is occupied (like `9000/8008/8888`, etc.), modify it in `set_env.sh`, then run `source set_env.sh` again. + +### Generate a HuggingFace Access Token + +Some HuggingFace resources, such as some models, are only accessible if you have an access token. If you do not already have a HuggingFace access token, you can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token). + +### Deploy the Services Using Docker Compose + +To deploy the Arbitration Post-Hearing Assistant services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute: + +```bash +cd intel/cpu/xeon/ +docker compose up -d +``` + +**Note**: developers should build docker image from source when: + +- Developing off the git main branch (as the container's ports in the repo may be different from the published docker image). +- Unable to download the docker image. +- Use a specific version of Docker image. + +Please refer to the table below to build different microservices from source: + +| Microservice | Deployment Guide | +| ------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | +| vLLM | [vLLM build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/vllm#build-docker) | +| llm-arb-post-hearing-assistant | [LLM-ArbPostHearingAssistant build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/arb_post_hearing_assistant/src/#12-build-docker-image) | +| MegaService | [MegaService build guide](../../../../README_miscellaneous.md#build-megaservice-docker-image) | +| UI | [Basic UI build guide](../../../../README_miscellaneous.md#build-ui-docker-image) | + +### Check the Deployment Status + +After running docker compose, to check if all the containers launched via docker compose have started: + +```bash +docker ps -a +``` + +For the default deployment, the following 4 containers should be running: + +```bash +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +24bd78300413 opea/arb-post-hearing-assistant-gradio-ui:latest "python arb_post_hea…" 2 hours ago Up 2 hours 0.0.0.0:5173->5173/tcp, [::]:5173->5173/tcp arb-post-hearing-assistant-xeon-ui-server +59e60c954e26 opea/arb-post-hearing-assistant:latest "python arb_post_hea…" 2 hours ago Up 2 hours 0.0.0.0:8888->8888/tcp, [::]:8888->8888/tcp arb-post-hearing-assistant-xeon-backend-server +32afc12de996 opea/llm-arb-post-hearing-assistant:latest "python comps/arb_po…" 2 hours ago Up 2 hours 0.0.0.0:9000->9000/tcp, [::]:9000->9000/tcp arb-post-hearing-assistant-xeon-llm-server +c8e539360aff ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu "text-generation-lau…" 2 hours ago Up 2 hours (healthy) 0.0.0.0:8008->80/tcp, [::]:8008->80/tcp arb-post-hearing-assistant-xeon-tgi-server +``` + +### Test the Pipeline + +Once the Arbitration Post-Hearing Assistant services are running, test the pipeline using the following command: + +```bash +curl -X POST http://${host_ip}:8888/v1/arb-post-hearing \ + -H "Content-Type: application/json" \ + -d '{"type": "text", [10:00 AM] Arbitrator Hon. Rebecca Lawson: Good morning. This hearing is now in session for Case No. ARB/2025/0917. Lets begin with appearances. [10:01 AM] Attorney Michael Grant for Mr. Jonathan Reed: Good morning Your Honor. I represent the claimant Mr. Jonathan Reed. [10:01 AM] Attorney Lisa Chen for Ms. Rachel Morgan: Good morning. I represent the respondent Ms. Rachel Morgan. [10:03 AM] Arbitrator Hon. Rebecca Lawson: Thank you. Lets proceed with Mr. Reeds opening statement. [10:04 AM] Attorney Michael Grant: Ms. Morgan failed to deliver services as per the agreement dated March 15 2023. We have submitted relevant documentation including email correspondence and payment records. The delay caused substantial financial harm to our client. [10:15 AM] Attorney Lisa Chen: We deny any breach of contract. The delays were due to regulatory issues outside our control. Furthermore Mr. Reed did not provide timely approvals which contributed to the delay. [10:30 AM] Arbitrator Hon. Rebecca Lawson: Lets turn to Clause Z of the agreement. Id like both parties to submit written briefs addressing the applicability of the force majeure clause and the timeline of approvals. [11:00 AM] Attorney Michael Grant: Understood. Well submit by the deadline. [11:01 AM] Attorney Lisa Chen: Agreed. [11:02 AM] Arbitrator Hon. Rebecca Lawson: The next hearing is scheduled for October 22 2025 at 1030 AM Eastern Time. Please ensure your witnesses are available for cross examination. [4:45 PM] Arbitrator Hon. Rebecca Lawson: This session is adjourned. Thank you everyone.","max_tokens":2000,"language":"en"}' +``` + +**Note** The value of _host_ip_ was set using the _set_env.sh_ script and can be found in the _.env_ file. + +### Cleanup the Deployment + +To stop the containers associated with the deployment, execute the following command: + +```bash +docker compose -f compose.yaml down +``` + +All the Arbitration Post-Hearing Assistant containers will be stopped and then removed on completion of the "down" command. + +## arb-post-hearing-assistant Docker Compose Files + +In the context of deploying a arb-post-hearing-assistant pipeline on an Intel® Xeon® platform, we can pick and choose different large language model serving frameworks. The table below outlines the various configurations that are available as part of the application. + +| File | Description | +| -------------------------------------------- | -------------------------------------------------------------------------------------- | +| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework | +| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as default | +| [compose_remote.yaml](./compose_remote.yaml) | Uses remote inference endpoints for LLMs. All other configurations are same as default | + +### Running LLM models with remote endpoints + +When models are deployed on a remote server, a base URL and an API key are required to access them. To set up a remote server and acquire the base URL and API key, refer to [Intel® AI for Enterprise Inference](https://www.intel.com/content/www/us/en/developer/topic-technology/artificial-intelligence/enterprise-inference.html) offerings. + +Set the following environment variables. + +- `REMOTE_ENDPOINT` is the HTTPS endpoint of the remote server with the model of choice (i.e. https://api.example.com). **Note:** If the API for the models does not use LiteLLM, the second part of the model card needs to be appended to the URL. For example, set `REMOTE_ENDPOINT` to https://api.example.com/Llama-3.3-70B-Instruct if the model card is `meta-llama/Llama-3.3-70B-Instruct`. +- `API_KEY` is the access token or key to access the model(s) on the server. +- `LLM_MODEL_ID` is the model card which may need to be overwritten depending on what it is set to `set_env.sh`. + +```bash +export REMOTE_ENDPOINT= +export API_KEY= +export LLM_MODEL_ID= +``` + +After setting these environment variables, run `docker compose` with `compose_remote.yaml`: + +```bash +docker compose -f compose_remote.yaml up -d +``` + +## arb-post-hearing-assistant Detailed Usage + +There are also some customized usage. + +### Query with text + +```bash +# form input. Use English mode (default). +curl http://${host_ip}:8888/v1/arb-post-hearing \ + -H "Content-Type: application/json" \ + -F "type=text" \ + -F "messages=[10:00 AM] Arbitrator Hon. Rebecca Lawson: Good morning. This hearing is now in session for Case No. ARB/2025/0917. Lets begin with appearances. [10:01 AM] Attorney Michael Grant for Mr. Jonathan Reed: Good morning Your Honor. I represent the claimant Mr. Jonathan Reed. [10:01 AM] Attorney Lisa Chen for Ms. Rachel Morgan: Good morning. I represent the respondent Ms. Rachel Morgan. [10:03 AM] Arbitrator Hon. Rebecca Lawson: Thank you. Lets proceed with Mr. Reeds opening statement. [10:04 AM] Attorney Michael Grant: Ms. Morgan failed to deliver services as per the agreement dated March 15 2023. We have submitted relevant documentation including email correspondence and payment records. The delay caused substantial financial harm to our client. [10:15 AM] Attorney Lisa Chen: We deny any breach of contract. The delays were due to regulatory issues outside our control. Furthermore Mr. Reed did not provide timely approvals which contributed to the delay. [10:30 AM] Arbitrator Hon. Rebecca Lawson: Lets turn to Clause Z of the agreement. Id like both parties to submit written briefs addressing the applicability of the force majeure clause and the timeline of approvals. [11:00 AM] Attorney Michael Grant: Understood. Well submit by the deadline. [11:01 AM] Attorney Lisa Chen: Agreed. [11:02 AM] Arbitrator Hon. Rebecca Lawson: The next hearing is scheduled for October 22 2025 at 1030 AM Eastern Time. Please ensure your witnesses are available for cross examination. [4:45 PM] Arbitrator Hon. Rebecca Lawson: This session is adjourned. Thank you everyone." \ + -F "max_tokens=2000" \ + -F "language=en" + +## Launch the UI + +### Gradio UI + +Open this URL `http://{host_ip}:5173` in your browser to access the Gradio based frontend. +![project-screenshot](../../../../assets/img/arbritation_post_hearing_ui_gradio_text.png) + +### Profile Microservices + +To further analyze MicroService Performance, users could follow the instructions to profile MicroServices. + +#### 1. vLLM backend Service + +Users could follow previous section to testing vLLM microservice or Arbitration Post-Hearing Assistant MegaService. By default, vLLM profiling is not enabled. Users could start and stop profiling by following commands. + +## Conclusion + +This guide should enable developer to deploy the default configuration or any of the other compose yaml files for different configurations. It also highlights the configurable parameters that can be set before deployment. +``` diff --git a/ArbPostHearingAssistant/docker_compose/intel/cpu/xeon/compose.yaml b/ArbPostHearingAssistant/docker_compose/intel/cpu/xeon/compose.yaml new file mode 100644 index 0000000000..954a78adc2 --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/intel/cpu/xeon/compose.yaml @@ -0,0 +1,86 @@ +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +services: + vllm-service: + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 + container_name: arb-post-hearing-assistant-xeon-vllm-service + ports: + - ${LLM_ENDPOINT_PORT:-8008}:80 + volumes: + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + LLM_MODEL_ID: ${LLM_MODEL_ID} + VLLM_TORCH_PROFILER_DIR: "/mnt" + VLLM_CPU_KVCACHE_SPACE: 40 + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 + + llm-arbPostHearingAssistant-vllm: + image: ${REGISTRY:-opea}/llm-arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-xeon-llm-server + depends_on: + vllm-service: + condition: service_healthy + ports: + - ${LLM_PORT:-9000}:9000 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + HF_TOKEN: ${HF_TOKEN} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} + OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME: ${OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME} + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped + + arbPostHearingAssistant-xeon-backend-server: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-xeon-backend-server + depends_on: + - vllm-service + - llm-arbPostHearingAssistant-vllm + ports: + - "${BACKEND_SERVICE_PORT:-8888}:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - LLM_SERVICE_PORT=${LLM_PORT} + ipc: host + restart: always + + arbPostHearingAssistant-gradio-ui: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant-gradio-ui:${TAG:-latest} + container_name: arb-post-hearing-assistant-xeon-ui-server + depends_on: + - arbPostHearingAssistant-xeon-backend-server + ports: + - "${FRONTEND_SERVICE_PORT:-5173}:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/ArbPostHearingAssistant/docker_compose/intel/cpu/xeon/compose_remote.yaml b/ArbPostHearingAssistant/docker_compose/intel/cpu/xeon/compose_remote.yaml new file mode 100644 index 0000000000..a9726b2df4 --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/intel/cpu/xeon/compose_remote.yaml @@ -0,0 +1,61 @@ +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +services: + llm-arbPostHearingAssistant-vllm: + image: ${REGISTRY:-opea}/llm-arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-xeon-llm-server + ports: + - ${LLM_PORT:-9000}:9000 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${REMOTE_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + OPENAI_API_KEY: ${API_KEY} + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + HF_TOKEN: ${HF_TOKEN} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} + OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME: ${OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME} + + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped + + arbPostHearingAssistant-xeon-backend-server: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-xeon-backend-server + depends_on: + - llm-arbPostHearingAssistant-vllm + ports: + - "${BACKEND_SERVICE_PORT:-8888}:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + ipc: host + restart: always + + arbPostHearingAssistant-gradio-ui: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant-gradio-ui:${TAG:-latest} + container_name: arb-post-hearing-assistant-xeon-ui-server + depends_on: + - arbPostHearingAssistant-xeon-backend-server + ports: + - "${FRONTEND_SERVICE_PORT:-5173}:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/ArbPostHearingAssistant/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/ArbPostHearingAssistant/docker_compose/intel/cpu/xeon/compose_tgi.yaml new file mode 100644 index 0000000000..fe0d31388f --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/intel/cpu/xeon/compose_tgi.yaml @@ -0,0 +1,85 @@ +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +services: + tgi-server: + image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + container_name: arb-post-hearing-assistant-xeon-tgi-server + ports: + - ${LLM_ENDPOINT_PORT:-8008}:80 + volumes: + - "${MODEL_CACHE:-./data}:/data" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + HF_TOKEN: ${HF_TOKEN} + host_ip: ${host_ip} + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + shm_size: 1g + command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS} + + llm-arbPostHearingAssistant-tgi: + image: ${REGISTRY:-opea}/llm-arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-xeon-llm-server + depends_on: + tgi-server: + condition: service_healthy + ports: + - ${LLM_PORT:-9000}:9000 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + HF_TOKEN: ${HF_TOKEN} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} + OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME: ${OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME} + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped + + arbPostHearingAssistant-xeon-backend-server: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-xeon-backend-server + depends_on: + - tgi-server + - llm-arbPostHearingAssistant-tgi + ports: + - "${BACKEND_SERVICE_PORT:-8888}:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - LLM_SERVICE_PORT=${LLM_PORT} + ipc: host + restart: always + + arbPostHearingAssistant-gradio-ui: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant-gradio-ui:${TAG:-latest} + container_name: arb-post-hearing-assistant-xeon-ui-server + depends_on: + - arbPostHearingAssistant-xeon-backend-server + ports: + - "${FRONTEND_SERVICE_PORT:-5173}:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/ArbPostHearingAssistant/docker_compose/intel/hpu/gaudi/README.md b/ArbPostHearingAssistant/docker_compose/intel/hpu/gaudi/README.md new file mode 100644 index 0000000000..23eac48e93 --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/intel/hpu/gaudi/README.md @@ -0,0 +1,150 @@ +# Example Arbitration Post-Hearing Assistant deployments on Intel® Gaudi® Platform + +This document outlines the deployment process for a Arbitration Post-Hearing Assistant application utilizing OPEA components on Intel® Gaudi® AI Accelerators. + +This example includes the following sections: + +- [Arbitration Post-Hearing Assistant Quick Start Deployment](#arb-post-hearing-assistant-quick-start-deployment): Demonstrates how to quickly deploy a Arbitration Post-Hearing Assistant application/pipeline on Intel® Gaudi® platform. +- [Arbitration Post-Hearing Assistant Docker Compose Files](#arb-post-hearing-assistant-docker-compose-files): Describes some example deployments and their docker compose files. +- [Arbitration Post-Hearing Assistant Detailed Usage](#arb-post-hearing-assistant-detailed-usage): Provide more detailed usage. +- [Launch the UI](#launch-the-ui): Guideline for UI usage + +**Note** This example requires access to a properly installed Intel® Gaudi® platform with a functional Docker service configured to use the habanalabs-container-runtime. Please consult the [Intel® Gaudi® software Installation Guide](https://docs.habana.ai/en/v1.20.0/Installation_Guide/Driver_Installation.html) for more information. + +## arb-post-hearing-assistant Quick Start Deployment + +This section describes how to quickly deploy and test the arb-post-hearing-assistant service manually on an Intel® Gaudi® platform. The basic steps are: + +1. [Access the Code](#access-the-code) +2. [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) +3. [Configure the Deployment Environment](#configure-the-deployment-environment) +4. [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose) +5. [Check the Deployment Status](#check-the-deployment-status) +6. [Test the Pipeline](#test-the-pipeline) +7. [Cleanup the Deployment](#cleanup-the-deployment) + +### Access the Code and Set Up Environment + +Clone the GenAIExample repository and access the Arbitration Post-Hearing Assistant Intel® Gaudi® platform Docker Compose files and supporting scripts: + +```bash +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/ArbPostHearingAssistant/docker_compose +source intel/set_env.sh +``` + +> NOTE: by default vLLM does "warmup" at start, to optimize its performance for the specified model and the underlying platform, which can take long time. For development (and e.g. autoscaling) it can be skipped with `export VLLM_SKIP_WARMUP=true`. + +> NOTE: If any port on your local machine is occupied (like `9000/8008/8888`, etc.), modify it in `set_env.sh`, then run `source set_env.sh` again. + +### Generate a HuggingFace Access Token + +Some HuggingFace resources, such as some models, are only accessible if you have an access token. If you do not already have a HuggingFace access token, you can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token). + +### Deploy the Services Using Docker Compose + +To deploy the Arbitration Post-Hearing Assistant services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute: + +```bash +cd intel/hpu/gaudi/ +docker compose up -d +``` + +**Note**: developers should build docker image from source when: + +- Developing off the git main branch (as the container's ports in the repo may be different from the published docker image). +- Unable to download the docker image. +- Use a specific version of Docker image. + +Please refer to the table below to build different microservices from source: + +| Microservice | Deployment Guide | +| ------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------- | +| vLLM | [vLLM build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/third_parties/vllm#build-docker) | +| llm-arb-post-hearing-assistant | [LLM-ArbPostHearingAssistant build guide](https://github.com/opea-project/GenAIComps/tree/main/comps/arb_post_hearing_assistant/src/#12-build-docker-image) | +| MegaService | [MegaService build guide](../../../../README_miscellaneous.md#build-megaservice-docker-image) | +| UI | [Basic UI build guide](../../../../README_miscellaneous.md#build-ui-docker-image) | + +### Check the Deployment Status + +After running docker compose, check if all the containers launched via docker compose have started: + +```bash +docker ps -a +``` + +For the default deployment, the following 4 containers should be running: + +```bash +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +24bd78300413 opea/arb-post-hearing-assistant-gradio-ui:latest "python arb_post_hea…" 2 hours ago Up 2 hours 0.0.0.0:5173->5173/tcp, [::]:5173->5173/tcp arb-post-hearing-assistant-xeon-ui-server +59e60c954e26 opea/arb-post-hearing-assistant:latest "python arb_post_hea…" 2 hours ago Up 2 hours 0.0.0.0:8888->8888/tcp, [::]:8888->8888/tcp arb-post-hearing-assistant-xeon-backend-server +32afc12de996 opea/llm-arb-post-hearing-assistant:latest "python comps/arb_po…" 2 hours ago Up 2 hours 0.0.0.0:9000->9000/tcp, [::]:9000->9000/tcp arb-post-hearing-assistant-xeon-llm-server +c8e539360aff ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu "text-generation-lau…" 2 hours ago Up 2 hours (healthy) 0.0.0.0:8008->80/tcp, [::]:8008->80/tcp arb-post-hearing-assistant-xeon-tgi-server +``` + +### Test the Pipeline + +Once the Arbitration Post-Hearing Assistant services are running, test the pipeline using the following command: + +```bash +curl -X POST http://${host_ip}:8888/v1/arb-post-hearing \ + -H "Content-Type: application/json" \ + -d '{"type": "text", [10:00 AM] Arbitrator Hon. Rebecca Lawson: Good morning. This hearing is now in session for Case No. ARB/2025/0917. Lets begin with appearances. [10:01 AM] Attorney Michael Grant for Mr. Jonathan Reed: Good morning Your Honor. I represent the claimant Mr. Jonathan Reed. [10:01 AM] Attorney Lisa Chen for Ms. Rachel Morgan: Good morning. I represent the respondent Ms. Rachel Morgan. [10:03 AM] Arbitrator Hon. Rebecca Lawson: Thank you. Lets proceed with Mr. Reeds opening statement. [10:04 AM] Attorney Michael Grant: Ms. Morgan failed to deliver services as per the agreement dated March 15 2023. We have submitted relevant documentation including email correspondence and payment records. The delay caused substantial financial harm to our client. [10:15 AM] Attorney Lisa Chen: We deny any breach of contract. The delays were due to regulatory issues outside our control. Furthermore Mr. Reed did not provide timely approvals which contributed to the delay. [10:30 AM] Arbitrator Hon. Rebecca Lawson: Lets turn to Clause Z of the agreement. Id like both parties to submit written briefs addressing the applicability of the force majeure clause and the timeline of approvals. [11:00 AM] Attorney Michael Grant: Understood. Well submit by the deadline. [11:01 AM] Attorney Lisa Chen: Agreed. [11:02 AM] Arbitrator Hon. Rebecca Lawson: The next hearing is scheduled for October 22 2025 at 1030 AM Eastern Time. Please ensure your witnesses are available for cross examination. [4:45 PM] Arbitrator Hon. Rebecca Lawson: This session is adjourned. Thank you everyone.","max_tokens":2000,"language":"en"}' +``` + +**Note** The value of _host_ip_ was set using the _set_env.sh_ script and can be found in the _.env_ file. + +### Cleanup the Deployment + +To stop the containers associated with the deployment, execute the following command: + +```bash +docker compose -f compose.yaml down +``` + +All the Arbitration Post-Hearing Assistant containers will be stopped and then removed on completion of the "down" command. + +## Arbitration Post-Hearing Assistant Docker Compose Files + +In the context of deploying a Arbitration Post-Hearing Assistant pipeline on an Intel® Gaudi® platform, the allocation and utilization of Gaudi devices across different services are important considerations for optimizing performance and resource efficiency. Each of the example deployments, defined by the example Docker compose yaml files, demonstrates a unique approach to leveraging Gaudi hardware, reflecting different priorities and operational strategies. + +| File | Description | +| -------------------------------------- | ----------------------------------------------------------------------------------------- | +| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework | +| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as the default | + +## Arbitration Post-Hearing Assistant Detailed Usage + +There are also some customized usage. + +### Query with text + +```bash +# form input. Use English mode (default). +curl http://${host_ip}:8888/v1/arb-post-hearing \ + -H "Content-Type: application/json" \ + -F "type=text" \ + -F "messages=[10:00 AM] Arbitrator Hon. Rebecca Lawson: Good morning. This hearing is now in session for Case No. ARB/2025/0917. Lets begin with appearances. [10:01 AM] Attorney Michael Grant for Mr. Jonathan Reed: Good morning Your Honor. I represent the claimant Mr. Jonathan Reed. [10:01 AM] Attorney Lisa Chen for Ms. Rachel Morgan: Good morning. I represent the respondent Ms. Rachel Morgan. [10:03 AM] Arbitrator Hon. Rebecca Lawson: Thank you. Lets proceed with Mr. Reeds opening statement. [10:04 AM] Attorney Michael Grant: Ms. Morgan failed to deliver services as per the agreement dated March 15 2023. We have submitted relevant documentation including email correspondence and payment records. The delay caused substantial financial harm to our client. [10:15 AM] Attorney Lisa Chen: We deny any breach of contract. The delays were due to regulatory issues outside our control. Furthermore Mr. Reed did not provide timely approvals which contributed to the delay. [10:30 AM] Arbitrator Hon. Rebecca Lawson: Lets turn to Clause Z of the agreement. Id like both parties to submit written briefs addressing the applicability of the force majeure clause and the timeline of approvals. [11:00 AM] Attorney Michael Grant: Understood. Well submit by the deadline. [11:01 AM] Attorney Lisa Chen: Agreed. [11:02 AM] Arbitrator Hon. Rebecca Lawson: The next hearing is scheduled for October 22 2025 at 1030 AM Eastern Time. Please ensure your witnesses are available for cross examination. [4:45 PM] Arbitrator Hon. Rebecca Lawson: This session is adjourned. Thank you everyone." \ + -F "max_tokens=2000" \ + -F "language=en" + +## Launch the UI + +### Gradio UI + +Open this URL `http://{host_ip}:5173` in your browser to access the Gradio based frontend. +![project-screenshot](../../../../assets/img/arbritation_post_hearing_ui_gradio_text.png) + +### Profile Microservices + +To further analyze MicroService Performance, users could follow the instructions to profile MicroServices. + +#### 1. vLLM backend Service + +Users could follow previous section to testing vLLM microservice or Arbitration Post-Hearing Assistant MegaService. By default, vLLM profiling is not enabled. Users could start and stop profiling by following commands. + +## Conclusion + +This guide should enable developer to deploy the default configuration or any of the other compose yaml files for different configurations. It also highlights the configurable parameters that can be set before deployment. +``` diff --git a/ArbPostHearingAssistant/docker_compose/intel/hpu/gaudi/compose.yaml b/ArbPostHearingAssistant/docker_compose/intel/hpu/gaudi/compose.yaml new file mode 100644 index 0000000000..de9a98cda1 --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/intel/hpu/gaudi/compose.yaml @@ -0,0 +1,97 @@ +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +services: + vllm-service: + image: opea/vllm-gaudi:1.4 + container_name: arb-post-hearing-assistant-gaudi-vllm-service + ports: + - ${LLM_ENDPOINT_PORT:-8008}:80 + volumes: + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + LLM_MODEL_ID: ${LLM_MODEL_ID} + NUM_CARDS: ${NUM_CARDS} + VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false} + VLLM_TORCH_PROFILER_DIR: "/mnt" + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + runtime: habana + cap_add: + - SYS_NICE + ipc: host + command: --model $LLM_MODEL_ID --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq-len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE} + + llm-arbPostHearingAssistant-vllm: + image: ${REGISTRY:-opea}/llm-arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-gaudi-llm-server + depends_on: + vllm-service: + condition: service_healthy + ports: + - ${LLM_PORT:-9000}:9000 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME: ${OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME} + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped + + arbPostHearingAssistant-gaudi-backend-server: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-gaudi-backend-server + depends_on: + - vllm-service + - llm-arbPostHearingAssistant-vllm + ports: + - "${BACKEND_SERVICE_PORT:-8888}:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - LLM_SERVICE_PORT=${LLM_PORT} + - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP} + - ASR_SERVICE_PORT=${ASR_SERVICE_PORT} + ipc: host + restart: always + + arbPostHearingAssistant-gradio-ui: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant-gradio-ui:${TAG:-latest} + container_name: arb-post-hearing-assistant-gaudi-ui-server + depends_on: + - arbPostHearingAssistant-gaudi-backend-server + ports: + - "${FRONTEND_SERVICE_PORT:-5173}:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge + +volumes: + data: diff --git a/ArbPostHearingAssistant/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/ArbPostHearingAssistant/docker_compose/intel/hpu/gaudi/compose_tgi.yaml new file mode 100644 index 0000000000..3664ff31ca --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/intel/hpu/gaudi/compose_tgi.yaml @@ -0,0 +1,98 @@ +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +services: + tgi-gaudi-server: + image: ghcr.io/huggingface/tgi-gaudi:2.3.1 + container_name: arb-post-hearing-assistant-gaudi-tgi-server + ports: + - ${LLM_ENDPOINT_PORT:-8008}:80 + volumes: + - "${MODEL_CACHE}:/data" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + ENABLE_HPU_GRAPH: true + LIMIT_HPU_GRAPH: true + USE_FLASH_ATTENTION: true + FLASH_ATTENTION_RECOMPUTE: true + host_ip: ${host_ip} + LLM_ENDPOINT_PORT: ${LLM_ENDPOINT_PORT} + runtime: habana + cap_add: + - SYS_NICE + ipc: host + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model-id ${LLM_MODEL_ID} --max-input-length ${MAX_INPUT_TOKENS} --max-total-tokens ${MAX_TOTAL_TOKENS} + + llm-arbPostHearingAssistant-tgi: + image: ${REGISTRY:-opea}/llm-arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-gaudi-llm-server + depends_on: + tgi-gaudi-server: + condition: service_healthy + ports: + - ${LLM_PORT:-9000}:9000 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS} + MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME: ${OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME} + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped + + arbPostHearingAssistant-gaudi-backend-server: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant:${TAG:-latest} + container_name: arb-post-hearing-assistant-gaudi-backend-server + depends_on: + - tgi-gaudi-server + - llm-arbPostHearingAssistant-tgi + ports: + - "${BACKEND_SERVICE_PORT:-8888}:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - LLM_SERVICE_PORT=${LLM_PORT} + - ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP} + - ASR_SERVICE_PORT=${ASR_SERVICE_PORT} + ipc: host + restart: always + + arbPostHearingAssistant-gradio-ui: + image: ${REGISTRY:-opea}/arb-post-hearing-assistant-gradio-ui:${TAG:-latest} + container_name: arb-post-hearing-assistant-gaudi-ui-server + depends_on: + - arbPostHearingAssistant-gaudi-backend-server + ports: + - "${FRONTEND_SERVICE_PORT:-5173}:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT} + - DOC_BASE_URL=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/ArbPostHearingAssistant/docker_compose/intel/set_env.sh b/ArbPostHearingAssistant/docker_compose/intel/set_env.sh new file mode 100644 index 0000000000..f21595cb12 --- /dev/null +++ b/ArbPostHearingAssistant/docker_compose/intel/set_env.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +pushd "${SCRIPT_DIR}/../../.." > /dev/null +source .set_env.sh +popd > /dev/null + +export host_ip=$(hostname -I | awk '{print $1}') # Example: host_ip="192.168.1.1" +export no_proxy="${no_proxy},${host_ip}" # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" +export http_proxy=$http_proxy +export https_proxy=$https_proxy +export HF_TOKEN=${HF_TOKEN} #Enter your HF Token here + +export LLM_ENDPOINT_PORT=8008 +export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.2" + +export BLOCK_SIZE=128 +export MAX_NUM_SEQS=256 +export MAX_SEQ_LEN_TO_CAPTURE=2048 +export NUM_CARDS=1 +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 + +export LLM_PORT=9000 +export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" +export OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME="OpeaArbPostHearingAssistantTgi" # OpeaArbPostHearingAssistantVllm +export FRONTEND_SERVICE_PORT=5173 + +export MEGA_SERVICE_HOST_IP=${host_ip} #Example: MEGA_SERVICE_HOST_IP="localhost" +export LLM_SERVICE_HOST_IP=${host_ip} #Example: LLM_SERVICE_HOST_IP="localhost" + +# uncomment below during development +# export VLLM_SKIP_WARMUP=true + +export BACKEND_SERVICE_PORT=8888 +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/arb-post-hearing" + +export LOGFLAG=True diff --git a/ArbPostHearingAssistant/docker_image_build/build.yaml b/ArbPostHearingAssistant/docker_image_build/build.yaml new file mode 100644 index 0000000000..3ab58ba7ec --- /dev/null +++ b/ArbPostHearingAssistant/docker_image_build/build.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +services: + arb-post-hearing-assistant: + build: + args: + IMAGE_REPO: ${REGISTRY} + BASE_TAG: ${TAG} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + context: ../ + dockerfile: ./Dockerfile + image: ${REGISTRY:-opea}/arb-post-hearing-assistant:${TAG:-latest} + arb-post-hearing-assistant-gradio-ui: + build: + args: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + context: ../ui + dockerfile: ./docker/Dockerfile.gradio + extends: arb-post-hearing-assistant + image: ${REGISTRY:-opea}/arb-post-hearing-assistant-gradio-ui:${TAG:-latest} + llm-arb-post-hearing-assistant: + build: + context: GenAIComps + dockerfile: comps/arb_post_hearing_assistant/src/Dockerfile + extends: arb-post-hearing-assistant + image: ${REGISTRY:-opea}/llm-arb-post-hearing-assistant:${TAG:-latest} diff --git a/ArbPostHearingAssistant/kubernetes/gmc/README.md b/ArbPostHearingAssistant/kubernetes/gmc/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ArbPostHearingAssistant/kubernetes/helm/README.md b/ArbPostHearingAssistant/kubernetes/helm/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/ArbPostHearingAssistant/tests/README.md b/ArbPostHearingAssistant/tests/README.md new file mode 100644 index 0000000000..ba28b90048 --- /dev/null +++ b/ArbPostHearingAssistant/tests/README.md @@ -0,0 +1,15 @@ +# DocSum E2E test scripts + +## Set the required environment variable + +```bash +export HF_TOKEN="Your_Huggingface_API_Token" +``` + +## Run test + +On Intel Xeon with TGI: + +```bash +bash test_compose_tgi_on_xeon.sh +``` diff --git a/ArbPostHearingAssistant/tests/test_compose_tgi_on_xeon.sh b/ArbPostHearingAssistant/tests/test_compose_tgi_on_xeon.sh new file mode 100644 index 0000000000..e8ab41c43b --- /dev/null +++ b/ArbPostHearingAssistant/tests/test_compose_tgi_on_xeon.sh @@ -0,0 +1,161 @@ +#!/bin/bash +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +export http_proxy=$http_proxy +export https_proxy=$https_proxy +export host_ip=$(hostname -I | awk '{print $1}') +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} + +source $WORKPATH/docker_compose/intel/set_env.sh +export MODEL_CACHE=${model_cache:-"./data"} + +export MAX_INPUT_TOKENS=2048 +export MAX_TOTAL_TOKENS=4096 + +export OPEA_ARB_POSTHEARING_ASSISTANT_COMPONENT_NAME="OpeaArbPostHearingAssistantTgi" + +# Get the root folder of the current script +ROOT_FOLDER=$(dirname "$(readlink -f "$0")") + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + pushd GenAIComps + echo "GenAIComps test commit is $(git rev-parse HEAD)" + docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . + popd && sleep 1s + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="arb-post-hearing-assistant arb-post-hearing-assistant-gradio-ui llm-arb-post-hearing-assistant" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + export no_proxy="localhost,127.0.0.1,$ip_address" + docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + sleep 1m +} + +function validate_service() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local VALIDATE_TYPE="$5" + local INPUT_DATA="$6" + + if [[ $VALIDATE_TYPE == *"json"* ]]; then + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + else + echo "Skipping request: VALIDATE_TYPE does not contain 'json'." + fi + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then + echo "EXPECTED_RESULT==> $EXPECTED_RESULT" + echo "RESPONSE_BODY==> $RESPONSE_BODY" + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + + sleep 1s +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # tgi for llm service + validate_service \ + "${host_ip}:${LLM_ENDPOINT_PORT}/generate" \ + "generated_text" \ + "tgi-server" \ + "arb-post-hearing-assistant-xeon-tgi-server" \ + "json" \ + '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' + + # llm microservice + validate_service \ + "${host_ip}:${LLM_PORT}/v1/arb-post-hearing" \ + "case_number" \ + "llm-arbPostHearingAssistant-tgi" \ + "arb-post-hearing-assistant-xeon-llm-server" \ + "json" \ + '{"messages": "[10:00 AM] Arbitrator Hon. Rebecca Lawson: Good morning. This hearing is now in session for Case No. ARB/2025/0917. Lets begin with appearances. [10:01 AM] Attorney Michael Grant for Mr. Jonathan Reed: Good morning Your Honor. I represent the claimant Mr. Jonathan Reed. [10:01 AM] Attorney Lisa Chen for Ms. Rachel Morgan: Good morning. I represent the respondent Ms. Rachel Morgan. [10:03 AM] Arbitrator Hon. Rebecca Lawson: Thank you. Lets proceed with Mr. Reeds opening statement. [10:04 AM] Attorney Michael Grant: Ms. Morgan failed to deliver services as per the agreement dated March 15 2023. We have submitted relevant documentation including email correspondence and payment records. The delay caused substantial financial harm to our client. [10:15 AM] Attorney Lisa Chen: We deny any breach of contract. The delays were due to regulatory issues outside our control. Furthermore Mr. Reed did not provide timely approvals which contributed to the delay. [10:30 AM] Arbitrator Hon. Rebecca Lawson: Lets turn to Clause Z of the agreement. Id like both parties to submit written briefs addressing the applicability of the force majeure clause and the timeline of approvals. [11:00 AM] Attorney Michael Grant: Understood. Well submit by the deadline. [11:01 AM] Attorney Lisa Chen: Agreed. [11:02 AM] Arbitrator Hon. Rebecca Lawson: The next hearing is scheduled for October 22 2025 at 1030 AM Eastern Time. Please ensure your witnesses are available for cross examination. [4:45 PM] Arbitrator Hon. Rebecca Lawson: This session is adjourned. Thank you everyone.","type":"text","language":"en"}' + +} + +function validate_megaservice_text() { + echo ">>> Checking text data in json format" + validate_service \ + "${host_ip}:${BACKEND_SERVICE_PORT}/v1/arb-post-hearing" \ + "summary" \ + "arbPostHearingAssistant-xeon-backend-server" \ + "arb-post-hearing-assistant-xeon-backend-server" \ + "json" \ + '{"messages": "[10:00 AM] Arbitrator Hon. Rebecca Lawson: Good morning. This hearing is now in session for Case No. ARB/2025/0917. Lets begin with appearances. [10:01 AM] Attorney Michael Grant for Mr. Jonathan Reed: Good morning Your Honor. I represent the claimant Mr. Jonathan Reed. [10:01 AM] Attorney Lisa Chen for Ms. Rachel Morgan: Good morning. I represent the respondent Ms. Rachel Morgan. [10:03 AM] Arbitrator Hon. Rebecca Lawson: Thank you. Lets proceed with Mr. Reeds opening statement. [10:04 AM] Attorney Michael Grant: Ms. Morgan failed to deliver services as per the agreement dated March 15 2023. We have submitted relevant documentation including email correspondence and payment records. The delay caused substantial financial harm to our client. [10:15 AM] Attorney Lisa Chen: We deny any breach of contract. The delays were due to regulatory issues outside our control. Furthermore Mr. Reed did not provide timely approvals which contributed to the delay. [10:30 AM] Arbitrator Hon. Rebecca Lawson: Lets turn to Clause Z of the agreement. Id like both parties to submit written briefs addressing the applicability of the force majeure clause and the timeline of approvals. [11:00 AM] Attorney Michael Grant: Understood. Well submit by the deadline. [11:01 AM] Attorney Lisa Chen: Agreed. [11:02 AM] Arbitrator Hon. Rebecca Lawson: The next hearing is scheduled for October 22 2025 at 1030 AM Eastern Time. Please ensure your witnesses are available for cross examination. [4:45 PM] Arbitrator Hon. Rebecca Lawson: This session is adjourned. Thank you everyone.","type":"text","language":"en"}' + +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + docker compose -f compose_tgi.yaml stop && docker compose rm -f +} + +function main() { + + echo "::group:: Stopping any running Docker containers..." + stop_docker + echo "::endgroup::" + + echo "::group::build_docker_images" + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + echo "::endgroup::" + + echo "::group::start_services" + start_services + echo "::endgroup::" + + echo "::group:: Validating microservices" + validate_microservices + echo "::endgroup::" + + echo "::group::validate_megaservice_text" + validate_megaservice_text + echo "::endgroup::" + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + docker system prune -f + +} + +main diff --git a/ArbPostHearingAssistant/ui/docker/Dockerfile.gradio b/ArbPostHearingAssistant/ui/docker/Dockerfile.gradio new file mode 100644 index 0000000000..7739d8f05b --- /dev/null +++ b/ArbPostHearingAssistant/ui/docker/Dockerfile.gradio @@ -0,0 +1,34 @@ +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +# Use the official Python 3.11 slim image as the base image +FROM python:3.11-slim + +# Set the default language environment variable +ENV LANG=C.UTF-8 + +# Define a build argument for architecture (default is "cpu") +ARG ARCH="cpu" + +# Update the package list and install necessary packages +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing build-essential + +# Create a directory for the application +RUN mkdir -p /home/user + +# Copy the application code and requirements file to the container +COPY ./gradio/arb_post_hearing_assistant_ui_gradio.py /home/user/arb_post_hearing_assistant_ui_gradio.py +COPY ./gradio/requirements.txt /home/user/requirements.txt + +# Install Python dependencies +RUN pip install --no-cache-dir --upgrade pip setuptools && \ + pip install --no-cache-dir -r /home/user/requirements.txt + +# Set the working directory +WORKDIR /home/user/ + +# Expose the port that the application will run on +EXPOSE 5173 + +# Define the command to run the application +CMD ["python", "arb_post_hearing_assistant_ui_gradio.py"] diff --git a/ArbPostHearingAssistant/ui/gradio/README.md b/ArbPostHearingAssistant/ui/gradio/README.md new file mode 100644 index 0000000000..fb6aaaaf05 --- /dev/null +++ b/ArbPostHearingAssistant/ui/gradio/README.md @@ -0,0 +1,97 @@ +# Arbitration Post-Hearing Assistant + +The Arbitration Post-Hearing Assistant is a GenAI-based module designed to process and summarize post-hearing transcripts or arbitration-related documents. It intelligently extracts key entities and insights to assist arbitrators, legal teams, and case managers in managing case follow-ups efficiently. + +## Key Features + +Automated Information Extraction: +Identifies and extracts essential details such as: + +- Case number +- Parties involved (claimant/respondent) +- Arbitrator(s) +- Hearing date and time +- Next hearing schedule and purpose +- Hearing outcomes and reasons + +## Docker + +### Build UI Docker Image + +To build the frontend Docker image, navigate to the `GenAIExamples/ArbPostHearingAssistant/ui` directory and run the following command: + +```bash +cd GenAIExamples/ArbPostHearingAssistant/ui +docker build -t opea/arb-post-hearing-assistant-gradio-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile.gradio . +``` + +This command builds the Docker image with the tag `opea/arb-post-hearing-assistant-gradio-ui:latest`. It also passes the proxy settings as build arguments to ensure that the build process can access the internet if you are behind a corporate firewall. + +### Run UI Docker Image + +To run the frontend Docker image, navigate to the `GenAIExamples/ArbPostHearingAssistant/ui/docker` directory and execute the following commands: + +```bash +cd GenAIExamples/ArbPostHearingAssistant/ui/docker + +ip_address=$(hostname -I | awk '{print $1}') +docker run -d -p 5173:5173 --ipc=host \ + -e http_proxy=$http_proxy \ + -e https_proxy=$https_proxy \ + -e no_proxy=$no_proxy \ + -e BACKEND_SERVICE_ENDPOINT=http://localhost:8888/v1/docsum \ + opea/arb-post-hearing-assistant-gradio-ui:latest +``` + +This command runs the Docker container in interactive mode, mapping port 5173 of the host to port 5173 of the container. It also sets several environment variables, including the backend service endpoint, which is required for the frontend to communicate with the backend service. + +### Python + +To run the frontend application directly using Python, navigate to the `GenAIExamples/ArbPostHearingAssistant/ui/gradio` directory and run the following command: + +```bash +cd GenAIExamples/ArbPostHearingAssistant/ui/gradio +python arb_post_hearing_assistant_ui_gradio.py +``` + +This command starts the frontend application using Python. + +## 📸 Project Screenshots + +![project-screenshot](../../assets/img/arbritation_post_hearing_ui_gradio_text.png) + +### 🧐 Features + +Here are some of the project's features: + +## Features + +- **Automated Case Extraction:** Extracts key arbitration details including case number, claimant/respondent, arbitrator, hearing dates, next hearing schedule, and outcome. +- **Hearing Summarization:** Generates concise summaries of post-hearing proceedings. +- **LLM-Powered Processing:** Integrates with vLLM or TGI backends for natural language understanding. +- **Structured Output:** Returns all extracted information in JSON format for easy storage, display, or integration with case management systems. +- **Easy Deployment:** Containerized microservice, lightweight and reusable across legal workflows. +- **Typical Flow:** + 1. Upload or stream post-hearing transcript. + 2. LLM backend analyzes text and extracts entities. + 3. Returns structured JSON with case details and summary. + +## Additional Information + +### Prerequisites + +Ensure you have Docker installed and running on your system. Also, make sure you have the necessary proxy settings configured if you are behind a corporate firewall. + +### Environment Variables + +- `http_proxy`: Proxy setting for HTTP connections. +- `https_proxy`: Proxy setting for HTTPS connections. +- `no_proxy`: Comma-separated list of hosts that should be excluded from proxying. +- `BACKEND_SERVICE_ENDPOINT`: The endpoint of the backend service that the frontend will communicate with. + +### Troubleshooting + +- Docker Build Issues: If you encounter issues while building the Docker image, ensure that your proxy settings are correctly configured and that you have internet access. +- Docker Run Issues: If the Docker container fails to start, check the environment variables and ensure that the backend service is running and accessible. + +This README file provides detailed instructions and explanations for building and running the Dockerized frontend application, as well as running it directly using Python. It also highlights the key features of the project and provides additional information for troubleshooting and configuring the environment. diff --git a/ArbPostHearingAssistant/ui/gradio/arb_post_hearing_assistant_ui_gradio.py b/ArbPostHearingAssistant/ui/gradio/arb_post_hearing_assistant_ui_gradio.py new file mode 100644 index 0000000000..92db2872e7 --- /dev/null +++ b/ArbPostHearingAssistant/ui/gradio/arb_post_hearing_assistant_ui_gradio.py @@ -0,0 +1,120 @@ +# Copyright (C) 2025 Zensar Technologies Private Ltd. +# SPDX-License-Identifier: Apache-2.0 + +import json +import logging +import os +import re + +import gradio as gr +import requests +import uvicorn +from fastapi import FastAPI + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class ArbPostHearingAssistantUI: + def __init__(self): + """Initialize class with headers and backend endpoint.""" + self.HEADERS = {"Content-Type": "application/json"} + self.BACKEND_SERVICE_ENDPOINT = os.getenv( + "BACKEND_SERVICE_ENDPOINT", "http://localhost:8888/v1/arb-post-hearing" + ) + + def extract_json(self, text: str): + """Extract and clean embedded JSON from text.""" + try: + match = re.search(r"\{.*\}", text, re.DOTALL) + if match: + json_str = match.group(0) + return json.loads(json_str) + except Exception as e: + logger.error("Error extracting JSON: %s", e) + return None + + def summarize_arbitration_transcript(self, transcript): + """Generate a summary for the given document content.""" + logger.info(">>> BACKEND_SERVICE_ENDPOINT - %s", self.BACKEND_SERVICE_ENDPOINT) + + data = {"messages": transcript, "type": "text", "language": "en"} + + try: + response = requests.post( + url=self.BACKEND_SERVICE_ENDPOINT, + headers=self.HEADERS, + data=json.dumps(data), + proxies={ + "http": os.environ.get("http_proxy", ""), + "https": os.environ.get("https_proxy", ""), + }, + ) + + if response.status_code == 200: + result = response.json() + raw_text = result["choices"][0]["message"]["content"] + extracted_json = self.extract_json(raw_text) + + # Return pretty JSON if available + if extracted_json: + return json.dumps(extracted_json, indent=4) + + # Fallback if no JSON found + return json.dumps({"message": "something went wrong, please try again"}, indent=4) + + except requests.exceptions.RequestException as e: + logger.error("Request exception: %s", e) + return json.dumps({"message": "something went wrong, please try again"}, indent=4) + + return json.dumps({"message": "something went wrong, please try again"}, indent=4) + + def render(self): + """Render the Gradio UI.""" + logger.info(">>> Rendering Gradio UI") + + with gr.Blocks() as text_ui: + with gr.Row(): + with gr.Column(): + input_text = gr.TextArea( + label="Enter your arbitration transcript to process:", + placeholder="Please enter arbitration transcript before submitting", + lines=20, + ) + submit_btn = gr.Button("Generate") + with gr.Column(): + # ✅ Use Textbox to show formatted JSON properly + generated_text = gr.JSON(label="Generated arbitration Summary", height=462, max_height=500) + submit_btn.click(fn=self.summarize_arbitration_transcript, inputs=[input_text], outputs=[generated_text]) + + with gr.Blocks() as self.demo: + gr.Markdown( + "

⚖️ Arbitration Post Hearing Assistant

", + elem_classes=["centered-title"], + ) + with gr.Tabs(): + with gr.TabItem("Paste Arbitration Transcript"): + text_ui.render() + + return self.demo + + +# FastAPI + Gradio Integration +app = FastAPI() + +demo = ArbPostHearingAssistantUI().render() +demo.queue() +app = gr.mount_gradio_app(app, demo, path="/") + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--host", type=str, default="0.0.0.0") + parser.add_argument("--port", type=int, default=5173) + + args = parser.parse_args() + logger.info(">>> Starting server at %s:%d", args.host, args.port) + + uvicorn.run("arb_post_hearing_assistant_ui_gradio:app", host=args.host, port=args.port) diff --git a/ArbPostHearingAssistant/ui/gradio/requirements.txt b/ArbPostHearingAssistant/ui/gradio/requirements.txt new file mode 100644 index 0000000000..095dee2b06 --- /dev/null +++ b/ArbPostHearingAssistant/ui/gradio/requirements.txt @@ -0,0 +1,4 @@ +gradio>5.22.0,<=5.34.0 +numpy==1.26.4 +Pillow==10.3.0 + diff --git a/AudioQnA/Dockerfile.openEuler b/AudioQnA/Dockerfile.openEuler new file mode 100644 index 0000000000..3a6c5928a5 --- /dev/null +++ b/AudioQnA/Dockerfile.openEuler @@ -0,0 +1,10 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +ARG IMAGE_REPO=opea +ARG BASE_TAG=latest +FROM $IMAGE_REPO/comps-base:$BASE_TAG-openeuler + +COPY ./audioqna.py $HOME/audioqna.py + +ENTRYPOINT ["python", "audioqna.py"] \ No newline at end of file diff --git a/AudioQnA/docker_compose/amd/cpu/epyc/compose.yaml b/AudioQnA/docker_compose/amd/cpu/epyc/compose.yaml index 9d614bd738..588113d72b 100644 --- a/AudioQnA/docker_compose/amd/cpu/epyc/compose.yaml +++ b/AudioQnA/docker_compose/amd/cpu/epyc/compose.yaml @@ -26,7 +26,7 @@ services: https_proxy: ${https_proxy} restart: unless-stopped vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-service ports: - ${LLM_SERVER_PORT:-3006}:80 diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/README.md b/AudioQnA/docker_compose/intel/cpu/xeon/README.md index 78be7c12da..bbb604c394 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/README.md +++ b/AudioQnA/docker_compose/intel/cpu/xeon/README.md @@ -15,12 +15,19 @@ Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying This section describes how to quickly deploy and test the AudioQnA service manually on an Intel® Xeon® processor. The basic steps are: -1. [Access the Code](#access-the-code) -2. [Configure the Deployment Environment](#configure-the-deployment-environment) -3. [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose) -4. [Check the Deployment Status](#check-the-deployment-status) -5. [Validate the Pipeline](#validate-the-pipeline) -6. [Cleanup the Deployment](#cleanup-the-deployment) +- [Deploying AudioQnA on Intel® Xeon® Processors](#deploying-audioqna-on-intel-xeon-processors) + - [Table of Contents](#table-of-contents) + - [AudioQnA Quick Start Deployment](#audioqna-quick-start-deployment) + - [Access the Code](#access-the-code) + - [Configure the Deployment Environment](#configure-the-deployment-environment) + - [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose) + - [Check the Deployment Status](#check-the-deployment-status) + - [Validate the Pipeline](#validate-the-pipeline) + - [Cleanup the Deployment](#cleanup-the-deployment) + - [AudioQnA Docker Compose Files](#audioqna-docker-compose-files) + - [Running LLM models with remote endpoints](#running-llm-models-with-remote-endpoints) + - [Validate MicroServices](#validate-microservices) + - [Conclusion](#conclusion) ### Access the Code @@ -59,7 +66,7 @@ To deploy the AudioQnA services, execute the `docker compose up` command with th ```bash cd docker_compose/intel/cpu/xeon -docker compose -f compose.yaml up -d +docker compose -f compose_tgi.yaml up -d ``` > **Note**: developers should build docker image from source when: @@ -80,6 +87,13 @@ Please refer to the table below to build different microservices from source: | MegaService | [MegaService build guide](../../../../README_miscellaneous.md#build-megaservice-docker-image) | | UI | [Basic UI build guide](../../../../README_miscellaneous.md#build-ui-docker-image) | +(Optional) Enabling monitoring using the command: + +```bash +cd docker_compose/intel/cpu/xeon +docker compose -f compose_tgi.yaml -f compose.monitoring.yaml up -d +``` + ### Check the Deployment Status After running docker compose, check if all the containers launched via docker compose have started: @@ -127,19 +141,26 @@ curl http://${host_ip}:3008/v1/audioqna \ To stop the containers associated with the deployment, execute the following command: ```bash -docker compose -f compose.yaml down +docker compose -f compose_tgi.yaml down +``` + +If monitoring is enabled, stop the containers using the following command: + +```bash +docker compose -f compose_tgi.yaml -f compose.monitoring.yaml down ``` ## AudioQnA Docker Compose Files In the context of deploying an AudioQnA pipeline on an Intel® Xeon® platform, we can pick and choose different large language model serving frameworks, or single English TTS/multi-language TTS component. The table below outlines the various configurations that are available as part of the application. These configurations can be used as templates and can be extended to different components available in [GenAIComps](https://github.com/opea-project/GenAIComps.git). -| File | Description | -| -------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework and redis as vector database | -| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as the default | -| [compose_multilang.yaml](./compose_multilang.yaml) | The TTS component is GPT-SoVITS. All other configurations remain the same as the default | -| [compose_remote.yaml](./compose_remote.yaml) | The LLM used is hosted on a remote server and an endpoint is used to access this model. Additional environment variables need to be set before running. See [instructions](#running-llm-models-with-remote-endpoints) below. | +| File | Description | +| ---------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework and redis as vector database | +| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as the default | +| [compose_multilang.yaml](./compose_multilang.yaml) | The TTS component is GPT-SoVITS. All other configurations remain the same as the default | +| [compose_remote.yaml](./compose_remote.yaml) | The LLM used is hosted on a remote server and an endpoint is used to access this model. Additional environment variables need to be set before running. See [instructions](#running-llm-models-with-remote-endpoints) below. | +| [compose.monitoring.yaml](./compose.monitoring.yaml) | Helper file for monitoring features. Can be used along with any compose files | ### Running LLM models with remote endpoints diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/README_vllm.md b/AudioQnA/docker_compose/intel/cpu/xeon/README_vllm.md index 8b94343e32..76e8eb2ea4 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/README_vllm.md +++ b/AudioQnA/docker_compose/intel/cpu/xeon/README_vllm.md @@ -23,7 +23,7 @@ git clone https://github.com/opea-project/GenAIExamples.git If you are using the main branch, then you do not need to make the transition, the main branch is used by default ```bash -cd ~/searchqna-test/GenAIExamples/SearchQnA/docker_image_build +cd GenAIExamples/AudioQnA/docker_image_build git clone https://github.com/opea-project/GenAIComps.git ``` @@ -31,7 +31,7 @@ If you are using a specific branch or tag, then we perform git checkout to the d ```bash ### Replace "v1.3" with the code version you need (branch or tag) -cd cd ~/searchqna-test/GenAIExamples/SearchQnA/docker_image_build && git checkout v1.3 +cd GenAIExamples/AudioQnA/docker_image_build && git checkout v1.3 git clone https://github.com/opea-project/GenAIComps.git ``` @@ -74,7 +74,7 @@ export HF_TOKEN='your_huggingfacehub_token' ### Setting variables in the file set_env_vllm.sh ```bash -cd cd cd ~/searchqna-test/GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm +cd ~/searchqna-test/GenAIExamples/SearchQnA/docker_compose/amd/gpu/rocm ### The example uses the Nano text editor. You can use any convenient text editor nano set_env_vllm.sh ``` @@ -106,8 +106,8 @@ export https_proxy="Your_HTTPs_Proxy" ### 3.1. Deploying applications using Docker Compose ```bash -cd cd ~/audioqna-test/GenAIExamples/AudioQnA/docker_compose/amd/gpu/rocm/ -docker compose -f compose_vllm up -d +cd GenAIExamples/AudioQnA/docker_compose/intel/cpu/xeon/ +docker compose up -d ``` After starting the containers, you need to view their status with the command: @@ -126,6 +126,12 @@ The following containers should be running: Containers should not restart. +(Optional) Enabling monitoring using the command: + +```bash +docker compose -f compose.yaml -f compose.monitoring.yaml up -d +``` + #### 3.1.1. Configuring GPU forwarding By default, in the Docker Compose file, compose_vllm.yaml is configured to forward all GPUs to the audioqna-vllm-service container. diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose.monitoring.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose.monitoring.yaml new file mode 100644 index 0000000000..187427d348 --- /dev/null +++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.monitoring.yaml @@ -0,0 +1,59 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + prometheus: + image: prom/prometheus:v2.52.0 + container_name: opea_prometheus + user: root + volumes: + - ./prometheus.yaml:/etc/prometheus/prometheus.yaml + - ./prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yaml' + ports: + - '9090:9090' + ipc: host + restart: unless-stopped + + grafana: + image: grafana/grafana:11.0.0 + container_name: grafana + volumes: + - ./grafana_data:/var/lib/grafana + - ./grafana/dashboards:/var/lib/grafana/dashboards + - ./grafana/provisioning:/etc/grafana/provisioning + user: root + environment: + GF_SECURITY_ADMIN_PASSWORD: admin + GF_RENDERING_CALLBACK_URL: http://grafana:3000/ + GF_LOG_FILTERS: rendering:debug + no_proxy: ${no_proxy} + host_ip: ${host_ip} + depends_on: + - prometheus + ports: + - '3000:3000' + ipc: host + restart: unless-stopped + + node-exporter: + image: prom/node-exporter + container_name: node-exporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.sysfs=/host/sys' + - --collector.filesystem.ignored-mount-points + - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" + environment: + no_proxy: ${no_proxy} + ports: + - 9100:9100 + ipc: host + restart: always + deploy: + mode: global diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml index a9020a4b89..b48593a233 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -25,7 +25,7 @@ services: https_proxy: ${https_proxy} restart: unless-stopped vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-service ports: - ${LLM_SERVER_PORT:-3006}:80 diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml index 16b72813e2..21b3ecfc4d 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml +++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_multilang.yaml @@ -29,7 +29,7 @@ services: # - ./pretrained_models/:/home/user/GPT-SoVITS/GPT_SoVITS/pretrained_models/ restart: unless-stopped vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-service ports: - ${LLM_SERVER_PORT:-3006}:80 diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/compose_openeuler.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/compose_openeuler.yaml new file mode 100644 index 0000000000..5ae931a78f --- /dev/null +++ b/AudioQnA/docker_compose/intel/cpu/xeon/compose_openeuler.yaml @@ -0,0 +1,91 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +services: + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest}-openeuler + container_name: whisper-service + ports: + - ${WHISPER_SERVER_PORT:-7066}:7066 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + speecht5-service: + image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}-openeuler + container_name: speecht5-service + ports: + - ${SPEECHT5_SERVER_PORT:-7055}:7055 + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + vllm-service: + image: openeuler/vllm-cpu:0.10.1-oe2403lts + container_name: vllm-service + ports: + - ${LLM_SERVER_PORT:-3006}:80 + volumes: + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" + shm_size: 128g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + LLM_MODEL_ID: ${LLM_MODEL_ID} + VLLM_TORCH_PROFILER_DIR: "/mnt" + LLM_SERVER_PORT: ${LLM_SERVER_PORT} + VLLM_CPU_OMP_THREADS_BIND: all + VLLM_CPU_KVCACHE_SPACE: 30 + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:${LLM_SERVER_PORT}/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80 + audioqna-xeon-backend-server: + image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}-openeuler + container_name: audioqna-xeon-backend-server + depends_on: + - whisper-service + - vllm-service + - speecht5-service + ports: + - "3008:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP} + - WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT} + - LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP} + - LLM_SERVER_PORT=${LLM_SERVER_PORT} + - LLM_MODEL_ID=${LLM_MODEL_ID} + - SPEECHT5_SERVER_HOST_IP=${SPEECHT5_SERVER_HOST_IP} + - SPEECHT5_SERVER_PORT=${SPEECHT5_SERVER_PORT} + ipc: host + restart: always + audioqna-xeon-ui-server: + image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest}-openeuler + container_name: audioqna-xeon-ui-server + depends_on: + - audioqna-xeon-backend-server + ports: + - "5173:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - CHAT_URL=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh b/AudioQnA/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh new file mode 100644 index 0000000000..52cc7632d3 --- /dev/null +++ b/AudioQnA/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +if ls *.json 1> /dev/null 2>&1; then + rm *.json +fi + +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/vllm_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/tgi_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/audioqna_megaservice_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/node_grafana.json diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/grafana/provisioning/dashboards/local.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/grafana/provisioning/dashboards/local.yaml new file mode 100644 index 0000000000..13922a769b --- /dev/null +++ b/AudioQnA/docker_compose/intel/cpu/xeon/grafana/provisioning/dashboards/local.yaml @@ -0,0 +1,14 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: 1 + +providers: +- name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 #how often Grafana will scan for changed dashboards + options: + path: /var/lib/grafana/dashboards diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/grafana/provisioning/datasources/datasource.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/grafana/provisioning/datasources/datasource.yaml new file mode 100644 index 0000000000..a206521d67 --- /dev/null +++ b/AudioQnA/docker_compose/intel/cpu/xeon/grafana/provisioning/datasources/datasource.yaml @@ -0,0 +1,54 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# config file version +apiVersion: 1 + +# list of datasources that should be deleted from the database +deleteDatasources: + - name: Prometheus + orgId: 1 + +# list of datasources to insert/update depending +# what's available in the database +datasources: + # name of the datasource. Required +- name: Prometheus + # datasource type. Required + type: prometheus + # access mode. direct or proxy. Required + access: proxy + # org id. will default to orgId 1 if not specified + orgId: 1 + # url + url: http://$host_ip:9090 + # database password, if used + password: + # database user, if used + user: + # database name, if used + database: + # enable/disable basic auth + basicAuth: false + # basic auth username, if used + basicAuthUser: + # basic auth password, if used + basicAuthPassword: + # enable/disable with credentials headers + withCredentials: + # mark as default datasource. Max one per org + isDefault: true + # fields that will be converted to json and stored in json_data + jsonData: + httpMethod: GET + graphiteVersion: "1.1" + tlsAuth: false + tlsAuthWithCACert: false + # json object of data that will be encrypted. + secureJsonData: + tlsCACert: "..." + tlsClientCert: "..." + tlsClientKey: "..." + version: 1 + # allow users to edit datasources from the UI. + editable: true diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/prometheus.yaml b/AudioQnA/docker_compose/intel/cpu/xeon/prometheus.yaml new file mode 100644 index 0000000000..0f1e5ff81d --- /dev/null +++ b/AudioQnA/docker_compose/intel/cpu/xeon/prometheus.yaml @@ -0,0 +1,29 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# [IP_ADDR]:{PORT_OUTSIDE_CONTAINER} -> {PORT_INSIDE_CONTAINER} / {PROTOCOL} +global: + scrape_interval: 5s + external_labels: + monitor: "my-monitor" +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["opea_prometheus:9090"] + - job_name: "vllm" + metrics_path: /metrics + static_configs: + - targets: ["vllm-service:80"] + - job_name: "tgi" + metrics_path: /metrics + static_configs: + - targets: ["tgi-service:80"] + - job_name: "audioqna-backend-server" + metrics_path: /metrics + static_configs: + - targets: ["audioqna-xeon-backend-server:8888"] + - job_name: "prometheus-node-exporter" + scrape_interval: 30s + scrape_timeout: 25s + metrics_path: /metrics + static_configs: + - targets: ["node-exporter:9100"] diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh b/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh index 3fb001855d..0c0f0a410f 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ b/AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh @@ -3,6 +3,8 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) + # export host_ip= export host_ip=$(hostname -I | awk '{print $1}') export HF_TOKEN=${HF_TOKEN} @@ -21,3 +23,9 @@ export SPEECHT5_SERVER_PORT=7055 export LLM_SERVER_PORT=3006 export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna + +pushd "${SCRIPT_DIR}/grafana/dashboards" > /dev/null +source download_opea_dashboard.sh +popd > /dev/null + +export no_proxy="${no_proxy},localhost,127.0.0.1,${host_ip},node-exporter,opea_prometheus,grafana" diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md index ae2ede434e..031fc81f79 100644 --- a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md @@ -15,12 +15,18 @@ Note: The default LLM is `meta-llama/Meta-Llama-3-8B-Instruct`. Before deploying This section describes how to quickly deploy and test the AudioQnA service manually on an Intel® Gaudi® processor. The basic steps are: -1. [Access the Code](#access-the-code) -2. [Configure the Deployment Environment](#configure-the-deployment-environment) -3. [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose) -4. [Check the Deployment Status](#check-the-deployment-status) -5. [Validate the Pipeline](#validate-the-pipeline) -6. [Cleanup the Deployment](#cleanup-the-deployment) +- [Deploying AudioQnA on Intel® Gaudi® Processors](#deploying-audioqna-on-intel-gaudi-processors) + - [Table of Contents](#table-of-contents) + - [AudioQnA Quick Start Deployment](#audioqna-quick-start-deployment) + - [Access the Code](#access-the-code) + - [Configure the Deployment Environment](#configure-the-deployment-environment) + - [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose) + - [Check the Deployment Status](#check-the-deployment-status) + - [Validate the Pipeline](#validate-the-pipeline) + - [Cleanup the Deployment](#cleanup-the-deployment) + - [AudioQnA Docker Compose Files](#audioqna-docker-compose-files) + - [Validate MicroServices](#validate-microservices) + - [Conclusion](#conclusion) ### Access the Code @@ -79,6 +85,13 @@ Please refer to the table below to build different microservices from source: | MegaService | [MegaService build guide](../../../../README_miscellaneous.md#build-megaservice-docker-image) | | UI | [Basic UI build guide](../../../../README_miscellaneous.md#build-ui-docker-image) | +(Optional) Enabling monitoring using the command: + +```bash +cd docker_compose/intel/hpu/gaudi +docker compose -f compose.yaml -f compose.monitoring.yaml up -d +``` + ### Check the Deployment Status After running docker compose, check if all the containers launched via docker compose have started: @@ -128,6 +141,12 @@ To stop the containers associated with the deployment, execute the following com docker compose -f compose.yaml down ``` +If monitoring is enabled, stop the containers using the following command: + +```bash +docker compose -f compose.yaml -f compose.monitoring.yaml down +``` + ## AudioQnA Docker Compose Files In the context of deploying an AudioQnA pipeline on an Intel® Gaudi® platform, we can pick and choose different large language model serving frameworks. The table below outlines the various configurations that are available as part of the application. These configurations can be used as templates and can be extended to different components available in [GenAIComps](https://github.com/opea-project/GenAIComps.git). diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.monitoring.yaml b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.monitoring.yaml new file mode 100644 index 0000000000..d64b97673e --- /dev/null +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.monitoring.yaml @@ -0,0 +1,74 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + prometheus: + image: prom/prometheus:v2.52.0 + container_name: opea_prometheus + user: root + volumes: + - ./prometheus.yaml:/etc/prometheus/prometheus.yaml + - ./prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yaml' + ports: + - '9090:9090' + ipc: host + restart: unless-stopped + + grafana: + image: grafana/grafana:11.0.0 + container_name: grafana + volumes: + - ./grafana_data:/var/lib/grafana + - ./grafana/dashboards:/var/lib/grafana/dashboards + - ./grafana/provisioning:/etc/grafana/provisioning + user: root + environment: + GF_SECURITY_ADMIN_PASSWORD: admin + GF_RENDERING_CALLBACK_URL: http://grafana:3000/ + GF_LOG_FILTERS: rendering:debug + no_proxy: ${no_proxy} + host_ip: ${host_ip} + depends_on: + - prometheus + ports: + - '3000:3000' + ipc: host + restart: unless-stopped + + node-exporter: + image: prom/node-exporter + container_name: node-exporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.sysfs=/host/sys' + - --collector.filesystem.mount-points-exclude + - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" + environment: + no_proxy: ${no_proxy} + ports: + - 9100:9100 + ipc: host + restart: always + deploy: + mode: global + + gaudi-metrics-exporter: + image: vault.habana.ai/gaudi-metric-exporter/metric-exporter:latest + privileged: true + container_name: gaudi-metrics-exporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + - /dev:/dev + deploy: + mode: global + ports: + - 41611:41611 + restart: unless-stopped diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 3cfd68c9b1..b99050f722 100644 --- a/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -35,7 +35,7 @@ services: - SYS_NICE restart: unless-stopped vllm-service: - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + image: opea/vllm-gaudi:1.22.0 container_name: vllm-gaudi-service ports: - ${LLM_SERVER_PORT:-3006}:80 diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh b/AudioQnA/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh new file mode 100644 index 0000000000..0dcd3d9b04 --- /dev/null +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +if ls *.json 1> /dev/null 2>&1; then + rm *.json +fi + +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/vllm_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/tgi_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/gaudi_grafana_v2.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/audioqna_megaservice_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/node_grafana.json diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/grafana/provisioning/dashboards/local.yaml b/AudioQnA/docker_compose/intel/hpu/gaudi/grafana/provisioning/dashboards/local.yaml new file mode 100644 index 0000000000..13922a769b --- /dev/null +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/grafana/provisioning/dashboards/local.yaml @@ -0,0 +1,14 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: 1 + +providers: +- name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 #how often Grafana will scan for changed dashboards + options: + path: /var/lib/grafana/dashboards diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/grafana/provisioning/datasources/datasource.yaml b/AudioQnA/docker_compose/intel/hpu/gaudi/grafana/provisioning/datasources/datasource.yaml new file mode 100644 index 0000000000..a206521d67 --- /dev/null +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/grafana/provisioning/datasources/datasource.yaml @@ -0,0 +1,54 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# config file version +apiVersion: 1 + +# list of datasources that should be deleted from the database +deleteDatasources: + - name: Prometheus + orgId: 1 + +# list of datasources to insert/update depending +# what's available in the database +datasources: + # name of the datasource. Required +- name: Prometheus + # datasource type. Required + type: prometheus + # access mode. direct or proxy. Required + access: proxy + # org id. will default to orgId 1 if not specified + orgId: 1 + # url + url: http://$host_ip:9090 + # database password, if used + password: + # database user, if used + user: + # database name, if used + database: + # enable/disable basic auth + basicAuth: false + # basic auth username, if used + basicAuthUser: + # basic auth password, if used + basicAuthPassword: + # enable/disable with credentials headers + withCredentials: + # mark as default datasource. Max one per org + isDefault: true + # fields that will be converted to json and stored in json_data + jsonData: + httpMethod: GET + graphiteVersion: "1.1" + tlsAuth: false + tlsAuthWithCACert: false + # json object of data that will be encrypted. + secureJsonData: + tlsCACert: "..." + tlsClientCert: "..." + tlsClientKey: "..." + version: 1 + # allow users to edit datasources from the UI. + editable: true diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/prometheus.yaml b/AudioQnA/docker_compose/intel/hpu/gaudi/prometheus.yaml new file mode 100644 index 0000000000..cdd5170266 --- /dev/null +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/prometheus.yaml @@ -0,0 +1,34 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# [IP_ADDR]:{PORT_OUTSIDE_CONTAINER} -> {PORT_INSIDE_CONTAINER} / {PROTOCOL} +global: + scrape_interval: 5s + external_labels: + monitor: "my-monitor" +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["opea_prometheus:9090"] + - job_name: "vllm" + metrics_path: /metrics + static_configs: + - targets: ["vllm-gaudi-service:80"] + - job_name: "tgi" + metrics_path: /metrics + static_configs: + - targets: ["tgi-gaudi-service:80"] + - job_name: "audioqna-backend-server" + metrics_path: /metrics + static_configs: + - targets: ["audioqna-gaudi-backend-server:8888"] + - job_name: "prometheus-node-exporter" + scrape_interval: 30s + scrape_timeout: 25s + metrics_path: /metrics + static_configs: + - targets: ["node-exporter:9100"] + - job_name: "gaudi-metrics-exporter" + scrape_interval: 30s + metrics_path: /metrics + static_configs: + - targets: ["gaudi-metrics-exporter:41611"] diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh index 4b19d19c08..4e18b81ac7 100644 --- a/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh @@ -3,6 +3,8 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) + # export host_ip= export host_ip=$(hostname -I | awk '{print $1}') export HF_TOKEN=${HF_TOKEN} @@ -26,3 +28,9 @@ export SPEECHT5_SERVER_PORT=7055 export LLM_SERVER_PORT=3006 export BACKEND_SERVICE_ENDPOINT=http://${host_ip}:3008/v1/audioqna + +pushd "${SCRIPT_DIR}/grafana/dashboards" > /dev/null +source download_opea_dashboard.sh +popd > /dev/null + +export no_proxy="${no_proxy},localhost,127.0.0.1,${host_ip},node-exporter,opea_prometheus,grafana" diff --git a/AudioQnA/docker_image_build/build.yaml b/AudioQnA/docker_image_build/build.yaml index e7688555c1..696a2bfdb6 100644 --- a/AudioQnA/docker_image_build/build.yaml +++ b/AudioQnA/docker_image_build/build.yaml @@ -13,12 +13,29 @@ services: context: ../ dockerfile: ./Dockerfile image: ${REGISTRY:-opea}/audioqna:${TAG:-latest} + audioqna-openeuler: + build: + args: + IMAGE_REPO: ${REGISTRY} + BASE_TAG: ${TAG} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + context: ../ + dockerfile: ./Dockerfile.openEuler + image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}-openeuler audioqna-ui: build: context: ../ui dockerfile: ./docker/Dockerfile extends: audioqna image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest} + audioqna-ui-openeuler: + build: + context: ../ui + dockerfile: ./docker/Dockerfile.openEuler + extends: audioqna + image: ${REGISTRY:-opea}/audioqna-ui:${TAG:-latest}-openeuler audioqna-multilang: build: context: ../ @@ -37,6 +54,12 @@ services: dockerfile: comps/third_parties/whisper/src/Dockerfile extends: audioqna image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + whisper-openeuler: + build: + context: GenAIComps + dockerfile: comps/third_parties/whisper/src/Dockerfile.openEuler + extends: audioqna + image: ${REGISTRY:-opea}/whisper:${TAG:-latest}-openeuler asr: build: context: GenAIComps @@ -61,6 +84,12 @@ services: dockerfile: comps/third_parties/speecht5/src/Dockerfile extends: audioqna image: ${REGISTRY:-opea}/speecht5:${TAG:-latest} + speecht5-openeuler: + build: + context: GenAIComps + dockerfile: comps/third_parties/speecht5/src/Dockerfile.openEuler + extends: audioqna + image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}-openeuler tts: build: context: GenAIComps @@ -73,18 +102,6 @@ services: dockerfile: comps/third_parties/gpt-sovits/src/Dockerfile extends: audioqna image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest} - vllm: - build: - context: vllm - dockerfile: docker/Dockerfile.cpu - extends: audioqna - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} - vllm-gaudi: - build: - context: vllm-fork - dockerfile: Dockerfile.hpu - extends: audioqna - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} vllm-rocm: build: context: GenAIComps diff --git a/AudioQnA/kubernetes/helm/cpu-openeuler-values.yaml b/AudioQnA/kubernetes/helm/cpu-openeuler-values.yaml new file mode 100644 index 0000000000..a9f0559098 --- /dev/null +++ b/AudioQnA/kubernetes/helm/cpu-openeuler-values.yaml @@ -0,0 +1,40 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +# This file is based on cpu-values.yaml and overrides image tags to 'latest-openeuler' +# for all enabled services to run on openEuler. +image: + tag: latest-openeuler + +tgi: + enabled: false + +# Overrides from cpu-values.yaml +vllm: + enabled: true + image: + repository: openeuler/vllm-cpu + tag: 0.10.1-oe2403lts + +speecht5: + enabled: true + image: + tag: latest-openeuler + +whisper: + enabled: true + image: + tag: latest-openeuler + +gpt-sovits: + enabled: false + +nginx: + image: + tag: latest-openeuler + service: + type: NodePort + +audioqna-ui: + image: + tag: latest-openeuler diff --git a/AudioQnA/tests/test_compose_multilang_on_xeon.sh b/AudioQnA/tests/test_compose_multilang_on_xeon.sh index 770838c1e2..22e569dc26 100644 --- a/AudioQnA/tests/test_compose_multilang_on_xeon.sh +++ b/AudioQnA/tests/test_compose_multilang_on_xeon.sh @@ -25,14 +25,8 @@ function build_docker_images() { docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git - cd ./vllm/ - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &> /dev/null && cd ../ - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="audioqna-multilang audioqna-ui whisper gpt-sovits vllm" + service_list="audioqna-multilang audioqna-ui whisper gpt-sovits" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -41,6 +35,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ export host_ip=${ip_address} + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env diff --git a/AudioQnA/tests/test_compose_on_epyc.sh b/AudioQnA/tests/test_compose_on_epyc.sh index 6fc56775a8..ac154e3632 100644 --- a/AudioQnA/tests/test_compose_on_epyc.sh +++ b/AudioQnA/tests/test_compose_on_epyc.sh @@ -27,20 +27,8 @@ function build_docker_images() { docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git - cd ./vllm/ - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &>/dev/null - VLLM_REQ_FILE="requirements/cpu.txt" - if ! grep -q "^transformers" "$VLLM_REQ_FILE"; then - echo "Adding transformers<4.54.0 to $VLLM_REQ_FILE" - echo "transformers<4.54.0" >>"$VLLM_REQ_FILE" - fi - cd ../ - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="audioqna audioqna-ui whisper speecht5 vllm" + service_list="audioqna audioqna-ui whisper speecht5" docker compose -f build.yaml build ${service_list} --no-cache >${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -49,6 +37,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/amd/cpu/epyc/ export host_ip=${ip_address} + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env diff --git a/AudioQnA/tests/test_compose_on_gaudi.sh b/AudioQnA/tests/test_compose_on_gaudi.sh index c24f5ff82e..b42d5cee72 100644 --- a/AudioQnA/tests/test_compose_on_gaudi.sh +++ b/AudioQnA/tests/test_compose_on_gaudi.sh @@ -25,14 +25,8 @@ function build_docker_images() { docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/HabanaAI/vllm-fork.git - cd vllm-fork/ - VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0 - echo "Check out vLLM tag ${VLLM_FORK_VER}" - git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../ - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi vllm-gaudi" + service_list="audioqna audioqna-ui whisper-gaudi speecht5-gaudi" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -41,14 +35,15 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/hpu/gaudi export host_ip=${ip_address} + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env # Start Docker Containers - docker compose up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose.yaml -f compose.monitoring.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 200 ]]; do - docker logs vllm-gaudi-service > $LOG_PATH/vllm_service_start.log 2>&1 + docker logs vllm-gaudi-service 2>&1| tee $LOG_PATH/vllm_service_start.log if grep -q complete $LOG_PATH/vllm_service_start.log; then break fi @@ -58,8 +53,8 @@ function start_services() { n=0 until [[ "$n" -ge 100 ]]; do - docker logs whisper-service > $LOG_PATH/whisper_service_start.log - if grep -q "Uvicorn server setup on port" $LOG_PATH/whisper_service_start.log; then + docker logs whisper-service 2>&1| tee $LOG_PATH/whisper_service_start.log + if grep -q "Uvicorn running on" $LOG_PATH/whisper_service_start.log; then break fi sleep 5s @@ -70,16 +65,15 @@ function start_services() { function validate_megaservice() { response=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json') - # always print the log - docker logs whisper-service > $LOG_PATH/whisper-service.log - docker logs speecht5-service > $LOG_PATH/tts-service.log - docker logs vllm-gaudi-service > $LOG_PATH/vllm-gaudi-service.log - docker logs audioqna-gaudi-backend-server > $LOG_PATH/audioqna-gaudi-backend-server.log echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3 if [[ $(file speech.mp3) == *"RIFF"* ]]; then echo "Result correct." else + docker logs whisper-service > $LOG_PATH/whisper-service.log + docker logs speecht5-service > $LOG_PATH/tts-service.log + docker logs vllm-gaudi-service > $LOG_PATH/vllm-gaudi-service.log + docker logs audioqna-gaudi-backend-server > $LOG_PATH/audioqna-gaudi-backend-server.log echo "Result wrong." exit 1 fi @@ -87,7 +81,7 @@ function validate_megaservice() { function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose -f compose.yaml stop && docker compose rm -f + docker compose -f compose.yaml -f compose.monitoring.yaml down } function main() { diff --git a/AudioQnA/tests/test_compose_on_rocm.sh b/AudioQnA/tests/test_compose_on_rocm.sh index 9456bf6bd1..8a235a6728 100644 --- a/AudioQnA/tests/test_compose_on_rocm.sh +++ b/AudioQnA/tests/test_compose_on_rocm.sh @@ -35,6 +35,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/amd/gpu/rocm/ + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh # Start Docker Containers docker compose up -d > ${LOG_PATH}/start_services_with_compose.log diff --git a/AudioQnA/tests/test_compose_on_xeon.sh b/AudioQnA/tests/test_compose_on_xeon.sh index a83e4a598e..023a5999b7 100644 --- a/AudioQnA/tests/test_compose_on_xeon.sh +++ b/AudioQnA/tests/test_compose_on_xeon.sh @@ -25,14 +25,8 @@ function build_docker_images() { docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git - cd ./vllm/ - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &> /dev/null && cd ../ - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="audioqna audioqna-ui whisper speecht5 vllm" + service_list="audioqna audioqna-ui whisper speecht5" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -41,11 +35,12 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ export host_ip=${ip_address} + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env # Start Docker Containers - docker compose up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose.yaml -f compose.monitoring.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 200 ]]; do docker logs vllm-service > $LOG_PATH/vllm_service_start.log 2>&1 @@ -79,7 +74,7 @@ function validate_megaservice() { function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose -f compose.yaml stop && docker compose rm -f + docker compose -f compose.yaml -f compose.monitoring.yaml down } function main() { diff --git a/AudioQnA/tests/test_compose_openeuler_on_xeon.sh b/AudioQnA/tests/test_compose_openeuler_on_xeon.sh new file mode 100644 index 0000000000..3b30adfe2d --- /dev/null +++ b/AudioQnA/tests/test_compose_openeuler_on_xeon.sh @@ -0,0 +1,104 @@ +#!/bin/bash +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +set -e +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${model_cache:-"./data"} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + pushd GenAIComps + echo "GenAIComps test commit is $(git rev-parse HEAD)" + docker build --no-cache -t ${REGISTRY}/comps-base:${TAG}-openeuler --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.openEuler . + popd && sleep 1s + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="audioqna-openeuler audioqna-ui-openeuler whisper-openeuler speecht5-openeuler" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + export host_ip=${ip_address} + export no_proxy="localhost,127.0.0.1,$ip_address" + source set_env.sh + # sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env + + # Start Docker Containers + docker compose -f compose_openeuler.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + n=0 + until [[ "$n" -ge 200 ]]; do + docker logs vllm-service > $LOG_PATH/vllm_service_start.log 2>&1 + if grep -q complete $LOG_PATH/vllm_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done +} + + +function validate_megaservice() { + response=$(http_proxy="" curl http://${ip_address}:3008/v1/audioqna -XPOST -d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' -H 'Content-Type: application/json') + # always print the log + docker logs whisper-service > $LOG_PATH/whisper-service.log + docker logs speecht5-service > $LOG_PATH/tts-service.log + docker logs vllm-service > $LOG_PATH/vllm-service.log + docker logs audioqna-xeon-backend-server > $LOG_PATH/audioqna-xeon-backend-server.log + echo "$response" | sed 's/^"//;s/"$//' | base64 -d > speech.mp3 + + if [[ $(file speech.mp3) == *"RIFF"* ]]; then + echo "Result correct." + else + echo "Result wrong." + exit 1 + fi + +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + docker compose -f compose_openeuler.yaml stop && docker compose rm -f +} + +function main() { + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + echo "::group::build_docker_images" + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + echo "::endgroup::" + + echo "::group::start_services" + start_services + echo "::endgroup::" + + echo "::group::validate_megaservice" + validate_megaservice + echo "::endgroup::" + + echo "::group::stop_docker" + stop_docker + docker system prune -f + echo "::endgroup::" + +} + +main diff --git a/AudioQnA/tests/test_compose_tgi_on_epyc.sh b/AudioQnA/tests/test_compose_tgi_on_epyc.sh index b442a155a8..e9c671839e 100644 --- a/AudioQnA/tests/test_compose_tgi_on_epyc.sh +++ b/AudioQnA/tests/test_compose_tgi_on_epyc.sh @@ -38,6 +38,7 @@ function build_docker_images() { function start_services() { echo $WORKPATH cd $WORKPATH/docker_compose/amd/cpu/epyc/ + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh # Start Docker Containers docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log diff --git a/AudioQnA/tests/test_compose_tgi_on_gaudi.sh b/AudioQnA/tests/test_compose_tgi_on_gaudi.sh index dd68dfe770..13f170c865 100644 --- a/AudioQnA/tests/test_compose_tgi_on_gaudi.sh +++ b/AudioQnA/tests/test_compose_tgi_on_gaudi.sh @@ -35,9 +35,10 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/hpu/gaudi export host_ip=${ip_address} + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh # Start Docker Containers - docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose_tgi.yaml -f compose.monitoring.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 200 ]]; do docker logs tgi-gaudi-service > $LOG_PATH/tgi_service_start.log @@ -51,7 +52,7 @@ function start_services() { n=0 until [[ "$n" -ge 100 ]]; do docker logs whisper-service > $LOG_PATH/whisper_service_start.log - if grep -q "Uvicorn server setup on port" $LOG_PATH/whisper_service_start.log; then + if grep -q "Uvicorn running on" $LOG_PATH/whisper_service_start.log; then break fi sleep 5s @@ -80,7 +81,7 @@ function validate_megaservice() { function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose -f compose_tgi.yaml stop && docker compose rm -f + docker compose -f compose_tgi.yaml -f compose.monitoring.yaml down } function main() { diff --git a/AudioQnA/tests/test_compose_tgi_on_xeon.sh b/AudioQnA/tests/test_compose_tgi_on_xeon.sh index bc1f945062..d6e588f266 100644 --- a/AudioQnA/tests/test_compose_tgi_on_xeon.sh +++ b/AudioQnA/tests/test_compose_tgi_on_xeon.sh @@ -35,9 +35,10 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ export host_ip=${ip_address} + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh # Start Docker Containers - docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose_tgi.yaml -f compose.monitoring.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 200 ]]; do docker logs tgi-service > $LOG_PATH/tgi_service_start.log @@ -71,7 +72,7 @@ function validate_megaservice() { function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose -f compose_tgi.yaml stop && docker compose rm -f + docker compose -f compose_tgi.yaml -f compose.monitoring.yaml down } function main() { diff --git a/AudioQnA/tests/test_compose_vllm_on_rocm.sh b/AudioQnA/tests/test_compose_vllm_on_rocm.sh index 925b0ba9d8..0d15500107 100644 --- a/AudioQnA/tests/test_compose_vllm_on_rocm.sh +++ b/AudioQnA/tests/test_compose_vllm_on_rocm.sh @@ -33,6 +33,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/amd/gpu/rocm/ + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env_vllm.sh sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env diff --git a/AudioQnA/ui/docker/Dockerfile.openEuler b/AudioQnA/ui/docker/Dockerfile.openEuler new file mode 100644 index 0000000000..7392bb62fd --- /dev/null +++ b/AudioQnA/ui/docker/Dockerfile.openEuler @@ -0,0 +1,31 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +# Use node 20.11.1 as the base image +FROM openeuler/node:20.11.1-oe2403lts@sha256:25c790f93c2243b361919620c069812319f614fd697e32e433402ae706a19ffd + +# Update package manager and install Git +RUN yum update -y && \ + yum install -y \ + git && \ + yum clean all && \ + rm -rf /var/cache/yum + +# Copy the front-end code repository +COPY svelte /home/user/svelte + +# Set the working directory +WORKDIR /home/user/svelte + +# Install front-end dependencies +RUN npm install --package-lock-only +RUN npm ci + +# Build the front-end application +RUN npm run build + +# Expose the port of the front-end application +EXPOSE 5173 + +# Run the front-end application in preview mode +CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"] diff --git a/AudioQnA/ui/svelte/package.json b/AudioQnA/ui/svelte/package.json index c956cea6f6..928abd8b0a 100644 --- a/AudioQnA/ui/svelte/package.json +++ b/AudioQnA/ui/svelte/package.json @@ -18,7 +18,7 @@ "@fortawesome/free-solid-svg-icons": "6.2.0", "@playwright/test": "^1.45.2", "@sveltejs/adapter-auto": "^3.0.0", - "@sveltejs/kit": "2.0.0", + "@sveltejs/kit": "2.20.6", "@sveltejs/vite-plugin-svelte": "^3.0.0", "@tailwindcss/typography": "0.5.7", "@types/debug": "4.1.7", diff --git a/BrowserUseAgent/Dockerfile b/BrowserUseAgent/Dockerfile new file mode 100644 index 0000000000..ad7b3377db --- /dev/null +++ b/BrowserUseAgent/Dockerfile @@ -0,0 +1,21 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +ARG IMAGE_REPO=opea +ARG BASE_TAG=latest +FROM $IMAGE_REPO/comps-base:$BASE_TAG + +USER root + +COPY ./requirements.txt $HOME/requirements.txt +COPY ./browser_use_agent.py $HOME/browser_use_agent.py + +ARG uvpip='uv pip install --system --no-cache-dir' +RUN uv pip install --system --upgrade pip setuptools uv && \ + $uvpip pytest-playwright && \ + playwright install chromium --with-deps --no-shell && \ + $uvpip -r requirements.txt && \ + $uvpip posthog==5.4.0 + +USER user +ENTRYPOINT ["python", "browser_use_agent.py"] \ No newline at end of file diff --git a/BrowserUseAgent/README.md b/BrowserUseAgent/README.md new file mode 100644 index 0000000000..a15ca1e5d5 --- /dev/null +++ b/BrowserUseAgent/README.md @@ -0,0 +1,18 @@ +# Browser-use Agent Application + +Browser-use agent empowers anyone to automate repetitive web tasks. It controls your web browser to perform tasks like visiting websites and extracting data. The application is powered by [browser-use](https://github.com/browser-use/browser-use) and OPEA LLM serving microservice. + +## Deployment Options + +The table below lists currently available deployment options. They outline in detail the implementation of this example on selected hardware. + +| Category | Deployment Option | Description | +| ---------------------- | ---------------------- | ----------------------------------------------------------------- | +| On-premise Deployments | Docker Compose (Gaudi) | [Deployment on Gaudi](./docker_compose/intel/hpu/gaudi/README.md) | + +## Validated Configurations + +| **Deploy Method** | **LLM Engine** | **LLM Model** | **Hardware** | +| ----------------- | -------------- | ---------------------------- | ------------ | +| Docker Compose | vLLM | Qwen/Qwen2.5-VL-32B-Instruct | Intel Gaudi | +| Docker Compose | vLLM | Qwen/Qwen2.5-VL-72B-Instruct | Intel Gaudi | diff --git a/BrowserUseAgent/browser_use_agent.py b/BrowserUseAgent/browser_use_agent.py new file mode 100644 index 0000000000..78b97d464b --- /dev/null +++ b/BrowserUseAgent/browser_use_agent.py @@ -0,0 +1,90 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +import os + +from browser_use import Agent, BrowserProfile +from comps import opea_microservices, register_microservice +from comps.cores.telemetry.opea_telemetry import opea_telemetry +from fastapi import Request +from langchain_openai import ChatOpenAI +from pydantic import BaseModel, SecretStr + +LLM = None +BROWSER_PROFILE = None +LLM_ENDPOINT = os.getenv("LLM_ENDPOINT", "http://0.0.0.0:8008") +LLM_MODEL = os.getenv("LLM_MODEL", "Qwen/Qwen2.5-VL-32B-Instruct") + + +def initiate_llm_and_browser(llm_endpoint: str, model: str, secret_key: str = "sk-xxxxxx"): + # Initialize global LLM and BrowserProfile if not already initialized + global LLM, BROWSER_PROFILE + if not LLM: + LLM = ChatOpenAI(base_url=f"{llm_endpoint}/v1", model=model, api_key=SecretStr(secret_key), temperature=0.1) + if not BROWSER_PROFILE: + BROWSER_PROFILE = BrowserProfile( + headless=True, + chromium_sandbox=False, + ) + return LLM, BROWSER_PROFILE + + +class BrowserUseRequest(BaseModel): + task_prompt: str + use_vision: bool = True + secret_key: str = "sk-xxxxxx" + llm_endpoint: str = LLM_ENDPOINT + llm_model: str = LLM_MODEL + agent_max_steps: int = 10 + + +class BrowserUseResponse(BaseModel): + is_success: bool = False + model: str + task_prompt: str + use_vision: bool + agent_researched_urls: list[str] = [] + agent_actions: list[str] = [] + agent_durations: float + agent_steps: int + final_result: str + + +@register_microservice( + name="opea_service@browser_use_agent", + endpoint="/v1/browser_use_agent", + host="0.0.0.0", + port=8022, +) +@opea_telemetry +async def run(request: Request): + data = await request.json() + chat_request = BrowserUseRequest.model_validate(data) + llm, browser_profile = initiate_llm_and_browser( + llm_endpoint=chat_request.llm_endpoint, model=chat_request.llm_model, secret_key=chat_request.secret_key + ) + agent = Agent( + task=chat_request.task_prompt, + llm=llm, + use_vision=chat_request.use_vision, + enable_memory=False, + browser_profile=browser_profile, + ) + history = await agent.run(max_steps=chat_request.agent_max_steps) + + return BrowserUseResponse( + is_success=history.is_successful() if history.is_successful() is not None else False, + model=chat_request.llm_model, + task_prompt=chat_request.task_prompt, + use_vision=chat_request.use_vision, + agent_researched_urls=history.urls(), + agent_actions=history.action_names(), + agent_durations=round(history.total_duration_seconds(), 3), + agent_steps=history.number_of_steps(), + final_result=history.final_result() if history.is_successful() else f"Task failed: {history.errors()}", + ) + + +if __name__ == "__main__": + opea_microservices["opea_service@browser_use_agent"].start() diff --git a/BrowserUseAgent/docker_compose/intel/hpu/gaudi/README.md b/BrowserUseAgent/docker_compose/intel/hpu/gaudi/README.md new file mode 100644 index 0000000000..7a82c0aebe --- /dev/null +++ b/BrowserUseAgent/docker_compose/intel/hpu/gaudi/README.md @@ -0,0 +1,94 @@ +# Example BrowserUseAgent deployments on an Intel® Gaudi® Platform + +This example covers the single-node on-premises deployment of the BrowserUseAgent example using OPEA components. This example begins with a Quick Start section and then documents how to modify deployments, leverage new models and configure the number of allocated devices. + +**Note** This example requires access to a properly installed Intel® Gaudi® platform with a functional Docker service configured to use the habanalabs-container-runtime. Please consult the [Intel® Gaudi® software Installation Guide](https://docs.habana.ai/en/v1.20.1/Installation_Guide/Driver_Installation.html) for more information. + +## Quick Start Deployment + +This section describes how to quickly deploy and test the BrowserUseAgent service manually on an Intel® Gaudi® platform. The basic steps are: + +1. [Access the Code](#access-the-code) +2. [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) +3. [Configure the Deployment Environment](#configure-the-deployment-environment) +4. [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose) +5. [Check the Deployment Status](#check-the-deployment-status) +6. [Test the Pipeline](#test-the-pipeline) +7. [Cleanup the Deployment](#cleanup-the-deployment) + +### Access the Code + +Clone the GenAIExample repository and access the BrowserUseAgent Intel® Gaudi® platform Docker Compose files and supporting scripts: + +```bash +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/BrowserUseAgent/docker_compose/intel/hpu/gaudi/ +``` + +Checkout a released version, such as v1.5: + +```bash +git checkout v1.5 +``` + +### Generate a HuggingFace Access Token + +Some HuggingFace resources, such as some models, are only accessible if you have an access token. If you do not already have a HuggingFace access token, you can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token). + +### Configure the Deployment Environment + +To set up environment variables for deploying BrowserUseAgent services, source the _setup_env.sh_ script in this directory: + +```bash +source ./set_env.sh +``` + +The _set_env.sh_ script will prompt for required and optional environment variables used to configure the BrowserUseAgent services. If a value is not entered, the script will use a default value for the same. Users need to check if the values fit your deployment environment. + +### Deploy the Services Using Docker Compose + +To deploy the BrowserUseAgent services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute: + +```bash +docker compose up -d +``` + +The BrowserUseAgent docker images should automatically be downloaded from the `OPEA registry` and deployed on the Intel® Gaudi® Platform. + +### Check the Deployment Status + +After running docker compose, check if all the containers launched via docker compose have started: + +```bash +docker ps -a +``` + +For the default deployment, the following 10 containers should have started: + +``` +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +96cb590c749c opea/browser-use-agent:latest "python browser_use_…" 9 seconds ago Up 8 seconds 0.0.0.0:8022->8022/tcp, :::8022->8022/tcp browser-use-agent-server +8072e1c33a4b opea/vllm-gaudi:1.22.0 "python3 -m vllm.ent…" 9 seconds ago Up 8 seconds (health: starting) 0.0.0.0:8008->80/tcp, [::]:8008->80/tcp vllm-gaudi-server +``` + +### Test the Pipeline + +If you don't have existing websites to test, follow the [guide](./../../../../tests/webarena/README.md) to deploy one in your local environment. + +Once the BrowserUseAgent services are running, test the pipeline using the following command: + +```bash +curl -X POST http://${host_ip}:${BROWSER_USE_AGENT_PORT}/v1/browser_use_agent \ + -H "Content-Type: application/json" \ + -d '{"task_prompt": "Navigate to http://10.7.4.57:8083/admin and login with the credentials: username: admin, password: admin1234. Then, find out What are the top-2 best-selling product in 2022?"}' +``` + +- Note that Update the `task_prompt` to match the evaluation question relevant to your configured website. + +### Cleanup the Deployment + +To stop the containers associated with the deployment, execute the following command: + +```bash +docker compose -f compose.yaml down +``` diff --git a/BrowserUseAgent/docker_compose/intel/hpu/gaudi/compose.yaml b/BrowserUseAgent/docker_compose/intel/hpu/gaudi/compose.yaml new file mode 100644 index 0000000000..e7a2ffd867 --- /dev/null +++ b/BrowserUseAgent/docker_compose/intel/hpu/gaudi/compose.yaml @@ -0,0 +1,50 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +x-common-environment: + &common-env + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + +services: + vllm-gaudi-server: + image: opea/vllm-gaudi:1.22.0 + container_name: vllm-gaudi-server + ports: + - ${LLM_ENDPOINT_PORT:-8008}:80 + volumes: + - "${DATA_PATH:-./data}:/data" + environment: + <<: *common-env + HF_TOKEN: ${HF_TOKEN} + HF_HOME: /data + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + LLM_MODEL_ID: ${LLM_MODEL_ID} + VLLM_TORCH_PROFILER_DIR: "/mnt" + VLLM_SKIP_WARMUP: true + PT_HPU_ENABLE_LAZY_COLLECTIVES: true + runtime: habana + cap_add: + - SYS_NICE + ipc: host + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] + interval: 10s + timeout: 10s + retries: 150 + command: --model $LLM_MODEL_ID --tensor-parallel-size $NUM_CARDS --host 0.0.0.0 --port 80 --max-seq-len-to-capture $MAX_TOTAL_TOKENS + + browser-use-agent-server: + image: ${REGISTRY:-opea}/browser-use-agent:${TAG:-latest} + container_name: browser-use-agent-server + depends_on: + - vllm-gaudi-server + ports: + - ${BROWSER_USE_AGENT_PORT:-8022}:8022 + environment: + <<: *common-env + LLM_ENDPOINT: ${LLM_ENDPOINT-http://0.0.0.0:8008} + LLM_MODEL: ${LLM_MODEL_ID-Qwen/Qwen2-VL-72B-Instruct} + ipc: host diff --git a/BrowserUseAgent/docker_compose/intel/hpu/gaudi/set_env.sh b/BrowserUseAgent/docker_compose/intel/hpu/gaudi/set_env.sh new file mode 100644 index 0000000000..b11bbf903d --- /dev/null +++ b/BrowserUseAgent/docker_compose/intel/hpu/gaudi/set_env.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Navigate to the parent directory and source the environment +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) + +pushd "$SCRIPT_DIR/../../../../../" > /dev/null +source .set_env.sh +popd > /dev/null + +# Function to check if a variable is set +check_var() { + if [ "$#" -ne 1 ]; then + echo "Error: Usage: check_var " >&2 + return 2 + fi + + local var_name="$1" + if [ -n "${!var_name}" ]; then + # Variable value is non-empty + return 0 + else + # Variable is unset or set to an empty string + return 1 + fi +} + +check_var "HF_TOKEN" +export ip_address=$(hostname -I | awk '{print $1}') + +export LLM_ENDPOINT_PORT="${LLM_ENDPOINT_PORT:-8008}" +export LLM_ENDPOINT="http://${ip_address}:${LLM_ENDPOINT_PORT}" +export DATA_PATH="${DATA_PATH-"./data"}" +export LLM_MODEL_ID="${LLM_MODEL_ID-"Qwen/Qwen2.5-VL-32B-Instruct"}" +export MAX_TOTAL_TOKENS="${MAX_TOTAL_TOKENS-12288}" +export NUM_CARDS="${NUM_CARDS-4}" diff --git a/BrowserUseAgent/docker_image_build/build.yaml b/BrowserUseAgent/docker_image_build/build.yaml new file mode 100644 index 0000000000..b7a7268d58 --- /dev/null +++ b/BrowserUseAgent/docker_image_build/build.yaml @@ -0,0 +1,15 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + browser-use-agent: + build: + args: + IMAGE_REPO: ${REGISTRY} + BASE_TAG: ${TAG} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + context: ../ + dockerfile: ./Dockerfile + image: ${REGISTRY:-opea}/browser-use-agent:${TAG:-latest} diff --git a/BrowserUseAgent/requirements.txt b/BrowserUseAgent/requirements.txt new file mode 100644 index 0000000000..08afe30806 --- /dev/null +++ b/BrowserUseAgent/requirements.txt @@ -0,0 +1 @@ +browser-use==0.3.2 diff --git a/BrowserUseAgent/tests/README.md b/BrowserUseAgent/tests/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/BrowserUseAgent/tests/test_compose_on_gaudi.sh b/BrowserUseAgent/tests/test_compose_on_gaudi.sh new file mode 100644 index 0000000000..63f234345a --- /dev/null +++ b/BrowserUseAgent/tests/test_compose_on_gaudi.sh @@ -0,0 +1,149 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${model_cache:-"./data"} +export HF_TOKEN=${HF_TOKEN} +export LLM_ENDPOINT_PORT=8008 +export LLM_ENDPOINT="http://0.0.0.0:${LLM_ENDPOINT_PORT}" +export BROWSER_USE_AGENT_PORT=8022 +export LLM_MODEL_ID="Qwen/Qwen2.5-VL-32B-Instruct" +export MAX_TOTAL_TOKENS=131072 +export NUM_CARDS=4 + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + pushd GenAIComps + echo "GenAIComps test commit is $(git rev-parse HEAD)" + docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . + popd && sleep 1s + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + docker compose -f build.yaml build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/intel/hpu/gaudi + source set_env.sh + + # Start Docker Containers + docker compose -f compose.yaml up -d --quiet-pull > ${LOG_PATH}/start_services_with_compose.log + n=0 + until [[ "$n" -ge 200 ]]; do + echo "n=$n" + docker logs vllm-gaudi-server > vllm_service_start.log 2>&1 + if grep -q "Application startup complete" vllm_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done +} + +function validate_service() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_microservices() { + # vllm for llm service + validate_service \ + "${ip_address}:${LLM_ENDPOINT_PORT}/v1/chat/completions" \ + "content" \ + "vllm-llm" \ + "vllm-gaudi-server" \ + '{"model": "'${LLM_MODEL_ID}'", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' +} + +function validate_megaservice() { + # start web server for testing + cd $WORKPATH/tests/webarena + bash shopping_admin.sh start + + # Curl the Mega Service + validate_service \ + "${ip_address}:${BROWSER_USE_AGENT_PORT}/v1/browser_use_agent" \ + "\"is_success\":true" \ + "browser-use-agent" \ + "browser-use-agent-server" \ + '{"task_prompt": "Navigate to http://'${ip_address}':8084/admin and login with the credentials: username: admin, password: admin1234. Then, find out What are the top-3 best-selling product in 2022?"}' +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/hpu/gaudi + docker compose -f compose.yaml down + + cd $WORKPATH/tests/webarena + bash shopping_admin.sh stop +} + +function main() { + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + echo "::group::build_docker_images" + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + echo "::endgroup::" + + echo "::group::start_services" + start_services + sleep 30 + echo "::endgroup::" + + echo "::group::validate_microservices" + validate_microservices + echo "::endgroup::" + + echo "::group::validate_megaservice" + validate_megaservice + echo "::endgroup::" + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + docker system prune -f + +} + +main diff --git a/BrowserUseAgent/tests/webarena/README.md b/BrowserUseAgent/tests/webarena/README.md new file mode 100644 index 0000000000..12be9ebade --- /dev/null +++ b/BrowserUseAgent/tests/webarena/README.md @@ -0,0 +1,23 @@ +# Setup Scripts for Webarena + +We will launch a shopping admin website, part of [WebArena](https://github.com/web-arena-x/webarena), to serve as a web server for agent evaluation. The deployment process will follow the instructions in the [webarena-setup](https://github.com/gasse/webarena-setup) repository. + +## Download Docker Image + +1. Download shopping_admin_final_0719.tar from the [official webarena repo](https://github.com/web-arena-x/webarena/tree/main/environment_docker). + +2. Place the archive file, shopping_admin_final_0719.tar, into the directory specified by the `ARCHIVES_LOCATION` parameter within `tests/webarena/set_env.sh` + +## Launch the Web Site + +Please ensure Docker services work in your environment, and perform the following command to launch the web site: + +```bash +bash shopping_admin.sh start +``` + +## Stop the Web Site + +```bash +bash shopping_admin.sh stop +``` diff --git a/BrowserUseAgent/tests/webarena/set_env.sh b/BrowserUseAgent/tests/webarena/set_env.sh new file mode 100644 index 0000000000..0acd826453 --- /dev/null +++ b/BrowserUseAgent/tests/webarena/set_env.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +WORKING_DIR="$(pwd)" +PUBLIC_HOSTNAME="$(hostname -I | awk '{print $1}')" +SHOPPING_ADMIN_USER="admin" +SHOPPING_ADMIN_PASSWORD="admin1234" +SHOPPING_ADMIN_PORT=8084 +SHOPPING_ADMIN_URL="http://${PUBLIC_HOSTNAME}:${SHOPPING_ADMIN_PORT}/admin" +ARCHIVES_LOCATION="/data2/hf_model" diff --git a/BrowserUseAgent/tests/webarena/shopping_admin.sh b/BrowserUseAgent/tests/webarena/shopping_admin.sh new file mode 100644 index 0000000000..e3a5a998be --- /dev/null +++ b/BrowserUseAgent/tests/webarena/shopping_admin.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# Reference: https://github.com/gasse/webarena-setup + +# stop if any error occur +set -e + +BASE_DIR=`dirname "${BASH_SOURCE[0]}"` +source ${BASE_DIR}/set_env.sh + +assert() { + if ! "$@"; then + echo "Assertion failed: $*" >&2 + exit 1 + fi +} + +load_docker_image() { + local IMAGE_NAME="$1" + local INPUT_FILE="$2" + + if ! docker images --format "{{.Repository}}:{{.Tag}}" | grep -q "^${IMAGE_NAME}:"; then + echo "Loading Docker image ${IMAGE_NAME} from ${INPUT_FILE}" + docker load --input "${INPUT_FILE}" + else + echo "Docker image ${IMAGE_NAME} is already loaded." + fi +} + +start() { + # Verify that the docker image archive file exists + assert [ -f ${ARCHIVES_LOCATION}/shopping_admin_final_0719.tar ] + + # Load image + load_docker_image "shopping_admin_final_0719" ${ARCHIVES_LOCATION}/shopping_admin_final_0719.tar + + # Create and run the container + docker create --name shopping_admin_server -p ${SHOPPING_ADMIN_PORT}:80 shopping_admin_final_0719 + + # Start the container + docker start shopping_admin_server + echo -n -e "Waiting 60 seconds for all services to start..." + sleep 60 + echo -n -e " done\n" + + echo -n -e "Configuring Magento settings inside the container..." + docker exec shopping_admin_server php /var/www/magento2/bin/magento config:set admin/security/password_is_forced 0 + docker exec shopping_admin_server php /var/www/magento2/bin/magento config:set admin/security/password_lifetime 0 + docker exec shopping_admin_server /var/www/magento2/bin/magento setup:store-config:set --base-url="http://${PUBLIC_HOSTNAME}:${SHOPPING_ADMIN_PORT}" + docker exec shopping_admin_server mysql -u magentouser -pMyPassword magentodb -e "UPDATE core_config_data SET value='http://$PUBLIC_HOSTNAME:$SHOPPING_ADMIN_PORT/' WHERE path = 'web/secure/base_url';" + docker exec shopping_admin_server /var/www/magento2/bin/magento cache:flush + echo -n -e " done\n" +} + +stop() { + docker stop shopping_admin_server || true + docker rm shopping_admin_server || true +} + +case "$1" in + start) + echo "Starting shopping_admin server..." + start + echo "shopping_admin server started." + ;; + stop) + echo "Stopping shopping_admin server..." + stop + echo "shopping_admin server stopped." + ;; + restart) + echo "Restarting shopping_admin server..." + stop + sleep 2 + start + echo "shopping_admin server restarted." + ;; + *) + echo "Usage: $0 {start|stop|restart}" + exit 1 + ;; +esac diff --git a/ChatQnA/chatqna.py b/ChatQnA/chatqna.py index 2e462b0f6e..ac67f4a16c 100644 --- a/ChatQnA/chatqna.py +++ b/ChatQnA/chatqna.py @@ -3,10 +3,11 @@ import argparse import json +import logging import os import re -from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType +from comps import CustomLogger, MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType from comps.cores.mega.utils import handle_message from comps.cores.proto.api_protocol import ( ChatCompletionRequest, @@ -20,6 +21,10 @@ from fastapi.responses import StreamingResponse from langchain_core.prompts import PromptTemplate +logger = CustomLogger(__name__) +log_level = logging.DEBUG if os.getenv("LOGFLAG", "").lower() == "true" else logging.INFO +logging.basicConfig(level=log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + class ChatTemplate: @staticmethod @@ -62,6 +67,10 @@ def generate_rag_prompt(question, documents): def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): + logger.debug( + f"Aligning inputs for service: {self.services[cur_node].name}, type: {self.services[cur_node].service_type}" + ) + if self.services[cur_node].service_type == ServiceType.EMBEDDING: inputs["inputs"] = inputs["text"] del inputs["text"] @@ -83,6 +92,9 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k # next_inputs["repetition_penalty"] = inputs["repetition_penalty"] next_inputs["temperature"] = inputs["temperature"] inputs = next_inputs + + # Log the aligned inputs (be careful with sensitive data) + logger.debug(f"Aligned inputs for {self.services[cur_node].name}: {type(inputs)}") return inputs @@ -123,7 +135,9 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di elif input_variables == ["question"]: prompt = prompt_template.format(question=data["initial_query"]) else: - print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']") + logger.warning( + f"{prompt_template} not used, we only support 2 input variables ['question', 'context']" + ) prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs) else: prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs) @@ -152,7 +166,7 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di elif input_variables == ["question"]: prompt = prompt_template.format(question=prompt) else: - print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']") + logger.warning(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']") prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs) else: prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs) @@ -171,27 +185,65 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di def align_generator(self, gen, **kwargs): - # OpenAI response format - # b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n' + """Aligns the generator output to match ChatQnA's format of sending bytes. + + Handles different LLM output formats (TGI, OpenAI) and properly filters + empty or null content chunks to avoid UI display issues. + """ + # OpenAI response format example: + # b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct", + # "system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"}, + # "logprobs":null,"finish_reason":null}]}\n\n' + for line in gen: - line = line.decode("utf-8") - chunks = [chunk.strip() for chunk in line.split("\n\n") if chunk.strip()] - for line in chunks: + try: + line = line.decode("utf-8") start = line.find("{") end = line.rfind("}") + 1 + + # Skip lines with invalid JSON structure + if start == -1 or end <= start: + logger.debug("Skipping line with invalid JSON structure") + continue + json_str = line[start:end] - try: - # sometimes yield empty chunk, do a fallback here - json_data = json.loads(json_str) - if "ops" in json_data and "op" in json_data["ops"][0]: - if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str): - yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n" - else: - pass - elif "content" in json_data["choices"][0]["delta"]: - yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n" - except Exception as e: - yield f"data: {repr(json_str.encode('utf-8'))}\n\n" + + # Parse the JSON data + json_data = json.loads(json_str) + + # Handle TGI format responses + if "ops" in json_data and "op" in json_data["ops"][0]: + if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str): + yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n" + # Empty value chunks are silently skipped + + # Handle OpenAI format responses + elif "choices" in json_data and len(json_data["choices"]) > 0: + # Only yield content if it exists and is not null + if ( + "delta" in json_data["choices"][0] + and "content" in json_data["choices"][0]["delta"] + and json_data["choices"][0]["delta"]["content"] is not None + ): + content = json_data["choices"][0]["delta"]["content"] + yield f"data: {repr(content.encode('utf-8'))}\n\n" + # Null content chunks are silently skipped + elif ( + "delta" in json_data["choices"][0] + and "content" in json_data["choices"][0]["delta"] + and json_data["choices"][0]["delta"]["content"] is None + ): + logger.debug("Skipping null content chunk") + + except json.JSONDecodeError as e: + # Log the error with the problematic JSON string for better debugging + logger.error(f"JSON parsing error in align_generator: {e}\nProblematic JSON: {json_str[:200]}") + # Skip sending invalid JSON to avoid UI issues + continue + except Exception as e: + logger.error(f"Unexpected error in align_generator: {e}, line snippet: {line[:100]}...") + # Skip sending to avoid UI issues + continue yield "data: [DONE]\n\n" diff --git a/ChatQnA/docker_compose/amd/cpu/epyc/compose.yaml b/ChatQnA/docker_compose/amd/cpu/epyc/compose.yaml index ee5e810900..9b44783b5b 100644 --- a/ChatQnA/docker_compose/amd/cpu/epyc/compose.yaml +++ b/ChatQnA/docker_compose/amd/cpu/epyc/compose.yaml @@ -90,7 +90,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-service ports: - "9009:80" diff --git a/ChatQnA/docker_compose/amd/cpu/epyc/compose_faqgen.yaml b/ChatQnA/docker_compose/amd/cpu/epyc/compose_faqgen.yaml index 13888c3e81..61580e1865 100644 --- a/ChatQnA/docker_compose/amd/cpu/epyc/compose_faqgen.yaml +++ b/ChatQnA/docker_compose/amd/cpu/epyc/compose_faqgen.yaml @@ -83,7 +83,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-server ports: - ${LLM_ENDPOINT_PORT:-9009}:80 diff --git a/ChatQnA/docker_compose/amd/cpu/epyc/compose_milvus.yaml b/ChatQnA/docker_compose/amd/cpu/epyc/compose_milvus.yaml index ef1b271d76..7936c11135 100644 --- a/ChatQnA/docker_compose/amd/cpu/epyc/compose_milvus.yaml +++ b/ChatQnA/docker_compose/amd/cpu/epyc/compose_milvus.yaml @@ -147,7 +147,7 @@ services: command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-service ports: - "9009:80" diff --git a/ChatQnA/docker_compose/amd/cpu/epyc/compose_pinecone.yaml b/ChatQnA/docker_compose/amd/cpu/epyc/compose_pinecone.yaml index 2681a92642..514a8803f6 100644 --- a/ChatQnA/docker_compose/amd/cpu/epyc/compose_pinecone.yaml +++ b/ChatQnA/docker_compose/amd/cpu/epyc/compose_pinecone.yaml @@ -77,7 +77,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-service ports: - "9009:80" diff --git a/ChatQnA/docker_compose/amd/cpu/epyc/compose_qdrant.yaml b/ChatQnA/docker_compose/amd/cpu/epyc/compose_qdrant.yaml index 821bc02450..2850318d50 100644 --- a/ChatQnA/docker_compose/amd/cpu/epyc/compose_qdrant.yaml +++ b/ChatQnA/docker_compose/amd/cpu/epyc/compose_qdrant.yaml @@ -83,7 +83,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-service ports: - "6042:80" diff --git a/ChatQnA/docker_compose/amd/cpu/epyc/compose_without_rerank.yaml b/ChatQnA/docker_compose/amd/cpu/epyc/compose_without_rerank.yaml index 1b701c0279..296546e986 100644 --- a/ChatQnA/docker_compose/amd/cpu/epyc/compose_without_rerank.yaml +++ b/ChatQnA/docker_compose/amd/cpu/epyc/compose_without_rerank.yaml @@ -67,7 +67,7 @@ services: RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-service ports: - "9009:80" diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md index 08b61642db..899ee51db2 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md @@ -190,7 +190,7 @@ docker compose -f compose_remote.yaml up -d ## ChatQnA with Conversational UI (Optional) -To access the Conversational UI (react based) frontend, modify the UI service in the `compose` file used to deploy. Replace `chaqna-xeon-ui-server` service with the `chatqna-xeon-conversation-ui-server` service as per the config below: +To access the Conversational UI (react based) frontend, modify the UI service in the `compose` file used to deploy. Replace `chatqna-xeon-ui-server` service with the `chatqna-xeon-conversation-ui-server` service as per the config below: ```yaml chatqna-xeon-conversation-ui-server: @@ -202,7 +202,7 @@ chatqna-xeon-conversation-ui-server: ports: - "5174:80" depends_on: - - chaqna-xeon-backend-server + - chatqna-xeon-backend-server ipc: host restart: always ``` @@ -210,7 +210,7 @@ chatqna-xeon-conversation-ui-server: Once the services are up, open the following URL in the browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If the developer prefers to use a different host port to access the frontend, it can be modified by port mapping in the `compose.yaml` file as shown below: ```yaml - chaqna-gaudi-conversation-ui-server: + chatqna-gaudi-conversation-ui-server: image: opea/chatqna-conversation-ui:latest ... ports: diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_endpoint_openai.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_endpoint_openai.md new file mode 100644 index 0000000000..f9e84d07ec --- /dev/null +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_endpoint_openai.md @@ -0,0 +1,453 @@ +# Build Mega Service of ChatQnA on Xeon with an LLM Endpoint + +This document outlines the single node deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservices on Intel Xeon server. The steps include pulling Docker images, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank` and `llm`. + +## Table of contents + +1. [ChatQnA Quick Start Deployment](#chatqna-quick-start-Deployment) +2. [ChatQnA Docker Compose file Options](#chatqna-docker-compose-files) +3. [ChatQnA with Conversational UI](#chatqna-with-conversational-ui-optional) + +## ChatQnA Quick Start Deployment + +This section describes how to quickly deploy and test the ChatQnA service manually on an Intel® Xeon® processor. The basic steps are: + +1. [Access the Code](#access-the-code) +2. [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) +3. [Configure the Deployment Environment](#configure-the-deployment-environment) +4. [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose) +5. [Check the Deployment Status](#check-the-deployment-status) +6. [Test the Pipeline](#test-the-pipeline) +7. [Cleanup the Deployment](#cleanup-the-deployment) + +### Access the Code + +Clone the GenAIExample repository and access the ChatQnA Intel® Gaudi® platform Docker Compose files and supporting scripts: + +``` +git clone https://github.com/opea-project/GenAIComps +cd GenAIComps + +# Build the opea/llm-textgen image. + +docker build \ + --no-cache \ + --build-arg https_proxy=$https_proxy \ + --build-arg http_proxy=$http_proxy \ + -t opea/llm-textgen:latest \ + -f comps/llms/src/text-generation/Dockerfile . + + +cd ../ +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/ +``` + +### Generate a HuggingFace Access Token + +Some HuggingFace resources, such as some models, are only accessible if the developer have an access token. In the absence of a HuggingFace access token, the developer can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token). + +## Endpoint Access + +An OpenAI-compatible endpoint is required e.g., OpenRouter.ai. Please obtain a valid API key. + +### Configure the Deployment Environment + +To set up environment variables for deploying ChatQnA services, set up some parameters specific to the deployment environment and source the _setup_env.sh_ script in this directory: + +```bash +cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon +source set_env.sh # source environment variables then override below. + +export host_ip="External_Public_IP" # e.g. export host_ip=$(hostname -I | awk '{print $1}') +export HF_TOKEN="Your_Huggingface_API_Token" +export OPENAI_API_KEY="key for openAI-like endpoint" + +export LLM_MODEL_ID="" # e.g. "google/gemma-3-1b-it:free" +export LLM_ENDPOINT="" # e.g. "https://openrouter.ai/api" (please make sure to omit /v1 suffix) +export no_proxy="" # Can set if any no proxy variables. See set_env.sh +``` + +Consult the section on [ChatQnA Service configuration](#chatqna-configuration) for information on how service specific configuration parameters affect deployments. + +### Deploy the Services Using Docker Compose + +To deploy the ChatQnA services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute the command below. It uses the 'compose.yaml' file. + +```bash +NGINX_PORT=8080 docker compose -f compose_endpoint_openai.yaml up -d +``` + +Usage of NGINX_PORT=8080 allows you to access the chat console on localhost:8080 since webbrowser may use port 80. + +To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file. +CPU example with Open Telemetry feature: + +> NOTE : To get supported Grafana Dashboard, please run download_opea_dashboard.sh following below commands. + +```bash +./grafana/dashboards/download_opea_dashboard.sh +NGINX_PORT=8080 docker compose -f compose_endpoint_openai.yaml -f compose.telemetry.yaml up -d +``` + +**Note**: developers should build docker image from source when: + +- Developing off the git main branch (as the container's ports in the repo may be different from the published docker image). +- Unable to download the docker image. +- Use a specific version of Docker image. + +Please refer to the table below to build different microservices from source: + +| Microservice | Deployment Guide | +| ------------ | --------------------------------------------------------------------------------------------- | +| Dataprep | https://github.com/opea-project/GenAIComps/tree/main/comps/dataprep | +| Embedding | https://github.com/opea-project/GenAIComps/tree/main/comps/embeddings | +| Retriever | https://github.com/opea-project/GenAIComps/tree/main/comps/retrievers | +| Reranker | https://github.com/opea-project/GenAIComps/tree/main/comps/rerankings | +| LLM | https://github.com/opea-project/GenAIComps/tree/main/comps/llms | +| Megaservice | [Megaservice build guide](../../../../README_miscellaneous.md#build-megaservice-docker-image) | +| UI | [Basic UI build guide](../../../../README_miscellaneous.md#build-ui-docker-image) | + +### Check the Deployment Status + +After running docker compose, check if all the containers launched via docker compose have started: + +``` +docker ps -a +``` + +For the endpoint-based deployment, the following 9 containers should be running: + +```bash +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +04f0e3607457 opea/nginx:${RELEASE_VERSION} "/docker-entrypoint.…" 17 minutes ago Up 16 minutes 0.0.0.0:8080->80/tcp, [::]:8080->80/tcp chatqna-xeon-nginx-server +6d7fe1bfd0a5 opea/chatqna-ui:${RELEASE_VERSION} "docker-entrypoint.s…" 17 minutes ago Up 16 minutes 0.0.0.0:5173->5173/tcp, :::5173->5173/tcp chatqna-xeon-ui-server +71d01fe8bc94 opea/chatqna:${RELEASE_VERSION} "python chatqna.py" 17 minutes ago Up 16 minutes 0.0.0.0:8888->8888/tcp, :::8888->8888/tcp chatqna-xeon-backend-server +ea12fab1c70e opea/retriever:${RELEASE_VERSION} "python opea_retriev…" 17 minutes ago Up 17 minutes 0.0.0.0:7000->7000/tcp, :::7000->7000/tcp retriever-redis-server +253622403ed6 opea/dataprep:${RELEASE_VERSION} "sh -c 'python $( [ …" 17 minutes ago Up 17 minutes (healthy) 0.0.0.0:6007->5000/tcp, [::]:6007->5000/tcp dataprep-redis-server +a552cf4f0dd0 redis/redis-stack:7.2.0-v9 "/entrypoint.sh" 17 minutes ago Up 17 minutes (healthy) 0.0.0.0:6379->6379/tcp, :::6379->6379/tcp, 0.0.0.0:8001->8001/tcp, :::8001->8001/tcp redis-vector-db +6795a52137f7 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 17 minutes ago Up 17 minutes 0.0.0.0:6006->80/tcp, [::]:6006->80/tcp tei-embedding-server +3e55313e714b opea/llm-textgen:${RELEASE_VERSION} "bash entrypoint.sh" 17 minutes ago Up 17 minutes 0.0.0.0:9000->9000/tcp, :::9000->9000/tcp textgen-service-endpoint-openai +10318f82c943 ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 "text-embeddings-rou…" 17 minutes ago Up 17 minutes 0.0.0.0:8808->80/tcp, [::]:8808->80/tcp tei-reranking-server +``` + +If any issues are encountered during deployment, refer to the [troubleshooting](../../../../README_miscellaneous.md##troubleshooting) section. + +### Test the Pipeline + +Once the ChatQnA services are running, test the pipeline using the following command. This will send a sample query to the ChatQnA service and return a response. + +```bash +curl http://${host_ip}:8888/v1/chatqna \ + -H "Content-Type: application/json" \ + -d '{ + "messages": "What is the revenue of Nike in 2023?" + }' +``` + +**Note** : Access the ChatQnA UI by web browser through this URL: `http://${host_ip}:8080`. Please confirm the `8080` port is opened in the firewall. To validate each microservice used in the pipeline refer to the [Validate microservices](#validate-microservices) section. + +### Cleanup the Deployment + +To stop the containers associated with the deployment, execute the following command: + +``` +docker compose -f compose.yaml down +``` + +## ChatQnA Docker Compose Files + +In the context of deploying a ChatQnA pipeline on an Intel® Xeon® platform, we can pick and choose different vector databases, large language model serving frameworks, and remove pieces of the pipeline such as the reranker. The table below outlines the various configurations that are available as part of the application. These configurations can be used as templates and can be extended to different components available in [GenAIComps](https://github.com/opea-project/GenAIComps.git). + +| File | Description | +| -------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework and redis as vector database | +| [compose_endpoint_openai.yaml](./compose_endpoint_openai.yaml) | Uses OpenAI-compatible endpoint (remote or local) as LLM serving framework with redis as vector database. | +| [compose_milvus.yaml](./compose_milvus.yaml) | Uses Milvus as the vector database. All other configurations remain the same as the default | +| [compose_pinecone.yaml](./compose_pinecone.yaml) | Uses Pinecone as the vector database. All other configurations remain the same as the default. For more details, refer to [README_pinecone.md](./README_pinecone.md). | +| [compose_qdrant.yaml](./compose_qdrant.yaml) | Uses Qdrant as the vector database. All other configurations remain the same as the default. For more details, refer to [README_qdrant.md](./README_qdrant.md). | +| [compose_tgi.yaml](./compose_tgi.yaml) | Uses TGI as the LLM serving framework. All other configurations remain the same as the default | +| [compose_without_rerank.yaml](./compose_without_rerank.yaml) | Default configuration without the reranker | +| [compose_faqgen.yaml](./compose_faqgen.yaml) | Enables FAQ generation using vLLM as the LLM serving framework. For more details, refer to [README_faqgen.md](./README_faqgen.md). | +| [compose_faqgen_tgi.yaml](./compose_faqgen_tgi.yaml) | Enables FAQ generation using TGI as the LLM serving framework. For more details, refer to [README_faqgen.md](./README_faqgen.md). | +| [compose.telemetry.yaml](./compose.telemetry.yaml) | Helper file for telemetry features for vllm. Can be used along with any compose files that serves vllm | +| [compose_tgi.telemetry.yaml](./compose_tgi.telemetry.yaml) | Helper file for telemetry features for tgi. Can be used along with any compose files that serves tgi | +| [compose_mariadb.yaml](./compose_mariadb.yaml) | Uses MariaDB Server as the vector database. All other configurations remain the same as the default | + +## ChatQnA with Conversational UI (Optional) + +To access the Conversational UI (react based) frontend, modify the UI service in the `compose` file used to deploy. Replace `chatqna-xeon-ui-server` service with the `chatqna-xeon-conversation-ui-server` service as per the config below: + +```yaml +chatqna-xeon-conversation-ui-server: + image: opea/chatqna-conversation-ui:latest + container_name: chatqna-xeon-conversation-ui-server + environment: + - APP_BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT} + - APP_DATA_PREP_SERVICE_URL=${DATAPREP_SERVICE_ENDPOINT} + ports: + - "5174:80" + depends_on: + - chatqna-xeon-backend-server + ipc: host + restart: always +``` + +Once the services are up, open the following URL in the browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If the developer prefers to use a different host port to access the frontend, it can be modified by port mapping in the `compose.yaml` file as shown below: + +```yaml + chatqna-xeon-conversation-ui-server: + image: opea/chatqna-conversation-ui:latest + ... + ports: + - "80:80" +``` + +Here is an example of running ChatQnA (default UI): + +![project-screenshot](../../../../assets/img/chat_ui_response.png) + +Here is an example of running ChatQnA with Conversational UI (React): + +![project-screenshot](../../../../assets/img/conversation_ui_response.png) + +### Validate Microservices + +Note, when verifying the microservices by curl or API from remote client, please make sure the **ports** of the microservices are opened in the firewall of the cloud node. +Follow the instructions to validate MicroServices. +For details on how to verify the correctness of the response, refer to [how-to-validate_service](../../hpu/gaudi/how_to_validate_service.md). + +1. **TEI Embedding Service** + Send a test request to the TEI Embedding Service to ensure it is running correctly: + + ```bash + curl http://${host_ip}:6006/embed \ + -X POST \ + -d '{"inputs":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' + ``` + + If you receive a connection error, ensure that the service is running and the port 6006 is open in the firewall. + +2. **Retriever Microservice** + + To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector + is determined by the embedding model. + Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768. + + Check the vector dimension of your embedding model, set `your_embedding` dimension equal to it. + + ```bash + export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + curl http://${host_ip}:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' + ``` + + If the response indicates an invalid embedding vector, verify that the vector size matches the model's expected dimension. + +3. **TEI Reranking Service** + + To test the TEI Reranking Service, use the following `curl` command: + + > Skip for ChatQnA without Rerank pipeline + + ```bash + curl http://${host_ip}:8808/rerank \ + -X POST \ + -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ + -H 'Content-Type: application/json' + ``` + +4. **LLM Backend Service** + + In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready. + + Try the command below to check whether the LLM serving is ready. + + ```bash + docker logs textgen-service-endpoint-openai 2>&1 | grep complete + # If the service is ready, you will get the response like below. + INFO: Application startup complete. + ``` + + Then try the `cURL` command below to validate services. + +You may also test your underlying LLM endpoint. E.g., if OpenRouter.ai: + +```bash +curl https://openrouter.ai/api/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": ${LLM_MODEL_ID}, + "messages": [ + { + "role": "user", + "content": "What is the meaning of life?" + } + ] +}' +``` + +To test the OPEA service that is based on the above: + +```bash + curl http://${host_ip}:9000/v1/chat/completions \ + -X POST \ + -d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ + -H 'Content-Type: application/json' +``` + +5. **MegaService** + + Use the following `curl` command to test the MegaService: + + ```bash + curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{ + "messages": "What is the revenue of Nike in 2023?" + }' + ``` + +6. **Nginx Service** + + Use the following curl command to test the Nginx Service: + + ```bash + curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \ + -H "Content-Type: application/json" \ + -d '{"messages": "What is the revenue of Nike in 2023?"}' + ``` + +7. **Dataprep Microservice(Optional) ** + + If you want to update the default knowledge base, you can use the following commands: + + Update Knowledge Base via Local File [nke-10k-2023.pdf](https://github.com/opea-project/GenAIComps/blob/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf). Or + click [here](https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf) to download the file via any web browser. + Or run this command to get the file on a terminal. + + ```bash + wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf + ``` + + Upload: + + ```bash + curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./nke-10k-2023.pdf" + ``` + + This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment. + + Add Knowledge Base via HTTP Links: + + ```bash + curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ + -H "Content-Type: multipart/form-data" \ + -F 'link_list=["https://opea.dev"]' + ``` + + This command updates a knowledge base by submitting a list of HTTP links for processing. + + Also, you are able to get the file list that you uploaded: + + ```bash + curl -X POST "http://${host_ip}:6007/v1/dataprep/get" \ + -H "Content-Type: application/json" + ``` + + Then you will get the response JSON like this. Notice that the returned `name`/`id` of the uploaded link is `https://xxx.txt`. + + ```json + [ + { + "name": "nke-10k-2023.pdf", + "id": "nke-10k-2023.pdf", + "type": "File", + "parent": "" + }, + { + "name": "https://opea.dev.txt", + "id": "https://opea.dev.txt", + "type": "File", + "parent": "" + } + ] + ``` + + To delete the file/link you uploaded: + + The `file_path` here should be the `id` get from `/v1/dataprep/get` API. + + ```bash + # delete link + curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ + -d '{"file_path": "https://opea.dev.txt"}' \ + -H "Content-Type: application/json" + + # delete file + curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ + -d '{"file_path": "nke-10k-2023.pdf"}' \ + -H "Content-Type: application/json" + + # delete all uploaded files and links + curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ + -d '{"file_path": "all"}' \ + -H "Content-Type: application/json" + ``` + +### Profile Microservices + +To further analyze MicroService Performance, users could follow the instructions to profile MicroServices. + +#### 1. LLM Endpoint Service + +Users can profile the performance of the endpoint service using standard HTTP/network profiling tools such as: + +- cURL timing statistics +- Browser developer tools +- Network monitoring tools + +Example using cURL with timing data: + +```bash +curl -w "\nTime Statistics:\n-----------------\n\ +DNS Lookup: %{time_namelookup}s\n\ +TCP Connect: %{time_connect}s\n\ +TLS Handshake: %{time_appconnect}s\n\ +First Byte: %{time_starttransfer}s\n\ +Total Time: %{time_total}s\n" \ +-H "Content-Type: application/json" \ +-H "Authorization: Bearer $OPENAI_API_KEY" \ +-d '{ + "model": "${LLM_MODEL_ID}", + "messages": [ + { + "role": "user", + "content": "What is machine learning?" + } + ] +}' \ +${LLM_ENDPOINT}/v1/chat/completions +``` + +You can also use tools like `ab` (Apache Benchmark) for load testing: + +```bash +ab -n 100 -c 10 -p payload.json -T 'application/json' \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + ${LLM_ENDPOINT}/v1/chat/completions +``` + +For detailed API latency monitoring, consider using: + +- Grafana for visualization +- Prometheus for metrics collection +- OpenTelemetry for distributed tracing + +## Conclusion + +This guide should enable developer to deploy the default configuration or any of the other compose yaml files for different configurations. It also highlights the configurable parameters that can be set before deployment. diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_opengauss.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_opengauss.md new file mode 100644 index 0000000000..4cf2670150 --- /dev/null +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_opengauss.md @@ -0,0 +1,424 @@ +# Deploying ChatQnA with openGauss on Intel® Xeon® Processors + +This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel® Xeon® servers. The pipeline integrates **openGauss** as the vector database (VectorDB) and includes microservices such as `embedding`, `retriever`, `rerank`, and `llm`. + +--- + +## Table of Contents + +1. [Quick Start](#quick-start) +2. [Build Docker Images](#build-docker-images) +3. [Validate Microservices](#validate-microservices) +4. [Launch the UI](#launch-the-ui) +5. [Launch the Conversational UI (Optional)](#launch-the-conversational-ui-optional) + +--- + +## Quick Start + +### 1.Set up Environment Variable + +To set up environment variables for deploying ChatQnA services, follow these steps: + +1. Set the required environment variables: + + ```bash + # Example: host_ip="192.168.1.1" + export host_ip="External_Public_IP" + export HF_TOKEN="Your_Huggingface_API_Token" + export GS_USER="gaussdb" + export GS_PASSWORD="openGauss@123" + export GS_DB="postgres" + export GS_CONNECTION_STRING="opengauss+psycopg2://${GS_USER}:${GS_PASSWORD}@${ip_address}:5432/${GS_DB}" + ``` + +2. If you are in a proxy environment, also set the proxy-related environment variables: + + ```bash + export http_proxy="Your_HTTP_Proxy" + export https_proxy="Your_HTTPS_Proxy" + # Example: no_proxy="localhost,127.0.0.1,192.168.1.1" + export no_proxy="Your_No_Proxy",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-opengauss-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service + ``` + +3. Set up other environment variables: + ```bash + source ./set_env.sh + ``` + +### 2.Run Docker Compose + +```bash +docker compose -f compose_opengauss.yaml up -d +``` + +It will automatically download the Docker image on `Docker hub`: + +```bash +docker pull opea/chatqna:latest +docker pull opea/chatqna-ui:latest +``` + +Note: You should build docker image from source by yourself if: + +- You are developing off the git main branch (as the container's ports in the repo may be different from the published docker image). +- You can't download the docker image. +- You want to use a specific version of Docker image. + +Please refer to ['Build Docker Images'](#build-docker-images) in below. + +### 3.Consume the ChatQnA Service + +```bash +curl http://${host_ip}:8888/v1/chatqna \ + -H "Content-Type: application/json" \ + -d '{ + "messages": "What is the revenue of Nike in 2023?" + }' +``` + +## Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +``` + +### 1. Build Retriever Image + +```bash +docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . +``` + +### 2. Build Dataprep Image + +```bash +docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . +cd .. +``` + +### 3. Build MegaService Docker Image + +1. MegaService with Rerank + + To construct the Mega Service with Rerank, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command: + + ```bash + git clone https://github.com/opea-project/GenAIExamples.git + cd GenAIExamples/ChatQnA + docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . + ``` + +2. MegaService without Rerank + + To construct the Mega Service without Rerank, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna_without_rerank.py` Python script. Build MegaService Docker image via below command: + + ```bash + git clone https://github.com/opea-project/GenAIExamples.git + cd GenAIExamples/ChatQnA + docker build --no-cache -t opea/chatqna-without-rerank:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.without_rerank . + ``` + +### 4. Build UI Docker Image + +Build frontend Docker image via below command: + +```bash +cd GenAIExamples/ChatQnA/ui +docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . +``` + +### 5. Build Conversational React UI Docker Image (Optional) + +Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command: + +**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable** + +```bash +cd GenAIExamples/ChatQnA/ui +docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . +``` + +### 6. Build Nginx Docker Image + +```bash +cd GenAIComps +docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/nginx/src/Dockerfile . +``` + +Then run the command `docker images`, you will have the following 5 Docker Images: + +1. `opea/dataprep:latest` +2. `opea/retriever:latest` +3. `opea/chatqna:latest` or `opea/chatqna-without-rerank:latest` +4. `opea/chatqna-ui:latest` +5. `opea/nginx:latest` + +## 🚀 Start Microservices + +### Required Models + +By default, the embedding, reranking and LLM models are set to a default value as listed below: + +| Service | Model | +| --------- | ----------------------------------- | +| Embedding | BAAI/bge-base-en-v1.5 | +| Reranking | BAAI/bge-reranker-base | +| LLM | meta-llama/Meta-Llama-3-8B-Instruct | + +Change the `xxx_MODEL_ID` below for your needs. + +For users in China who are unable to download models directly from Huggingface, you can use [ModelScope](https://www.modelscope.cn/models) or a Huggingface mirror to download models. The vLLM can load the models either online or offline as described below: + +1. Online + + ```bash + export HF_TOKEN=${your_hf_token} + export HF_ENDPOINT="https://hf-mirror.com" + model_name="meta-llama/Meta-Llama-3-8B-Instruct" + docker run -p 8008:80 -v ./data:/root/.cache/huggingface/hub --name vllm-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 128g opea/vllm:latest --model $model_name --host 0.0.0.0 --port 80 + ``` + +2. Offline + + - Search your model name in ModelScope. For example, check [this page](https://modelscope.cn/models/LLM-Research/Meta-Llama-3-8B-Instruct/files) for model `Meta-Llama-3-8B-Instruct`. + + - Click on `Download this model` button, and choose one way to download the model to your local path `/path/to/model`. + + - Run the following command to start the LLM service. + + ```bash + export HF_TOKEN=${your_hf_token} + export model_path="/path/to/model" + docker run -p 8008:80 -v $model_path:/root/.cache/huggingface/hub --name vllm-service --shm-size 128g opea/vllm:latest --model /root/.cache/huggingface/hub --host 0.0.0.0 --port 80 + ``` + +### Set up Environment Variables + +1. Set the required environment variables: + + ```bash + # Example: host_ip="192.168.1.1" + export host_ip="External_Public_IP" + export HF_TOKEN="Your_Huggingface_API_Token" + # Example: NGINX_PORT=80 + export NGINX_PORT=${your_nginx_port} + export GS_USER="gaussdb" + export GS_PASSWORD="openGauss@123" + export GS_DB="postgres" + export GS_CONNECTION_STRING="opengauss+psycopg2://${GS_USER}:${GS_PASSWORD}@${ip_address}:5432/${GS_DB}" + ``` + +2. If you are in a proxy environment, also set the proxy-related environment variables: + + ```bash + export http_proxy="Your_HTTP_Proxy" + export https_proxy="Your_HTTPs_Proxy" + # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" + export no_proxy="Your_No_Proxy",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-opengauss-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm-service + ``` + +3. Set up other environment variables, make sure to update the INDEX_NAME variable to use openGauss Index name: + + ```bash + source ./set_env.sh + ``` + +### Start all the services Docker Containers + +> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file + +```bash +cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/ +``` + +If use vLLM backend. + +```bash +# Start ChatQnA with Rerank Pipeline +docker compose -f compose_opengauss.yaml up -d +``` + +## Validate Microservices + +Note, when verify the microservices by curl or API from remote client, please make sure the **ports** of the microservices are opened in the firewall of the cloud node. +Follow the instructions to validate MicroServices. +For details on how to verify the correctness of the response, refer to [how-to-validate_service](../../hpu/gaudi/how_to_validate_service.md). + +1. TEI Embedding Service + + ```bash + curl ${host_ip}:6006/embed \ + -X POST \ + -d '{"inputs":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' + ``` + +2. Retriever Microservice + + To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector + is determined by the embedding model. + Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768. + + Check the vector dimension of your embedding model, set `your_embedding` dimension equals to it. + + ```bash + export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + curl http://${host_ip}:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' + ``` + +3. TEI Reranking Service + + > Skip for ChatQnA without Rerank pipeline + + ```bash + curl http://${host_ip}:8808/rerank \ + -X POST \ + -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ + -H 'Content-Type: application/json' + ``` + +4. LLM backend Service + + In the first startup, this service will take more time to download, load and warm up the model. After it's finished, the service will be ready. + + Try the command below to check whether the LLM serving is ready. + + ```bash + docker logs vllm-service 2>&1 | grep complete + ``` + + If the service is ready, you will get the response like below. + + ```text + INFO: Application startup complete. + ``` + + Then try the `cURL` command below to validate services. + + ```bash + curl http://${host_ip}:9009/v1/chat/completions \ + -X POST \ + -d '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \ + -H 'Content-Type: application/json' + ``` + +5. MegaService + + ```bash + curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{ + "messages": "What is the revenue of Nike in 2023?" + }' + ``` + +6. Nginx Service + + ```bash + curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \ + -H "Content-Type: application/json" \ + -d '{"messages": "What is the revenue of Nike in 2023?"}' + ``` + +7. Dataprep Microservice(Optional) + +If you want to update the default knowledge base, you can use the following commands: + +Update Knowledge Base via Local File [nke-10k-2023.pdf](https://raw.githubusercontent.com/opea-project/GenAIComps/v1.3/comps/third_parties/pathway/src/data/nke-10k-2023.pdf). Or +click [here](https://raw.githubusercontent.com/opea-project/GenAIComps/v1.3/comps/third_parties/pathway/src/data/nke-10k-2023.pdf) to download the file via any web browser. +Or run this command to get the file on a terminal. + +```bash +wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.3/comps/third_parties/pathway/src/data/nke-10k-2023.pdf + +``` + +Upload: + +```bash +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./nke-10k-2023.pdf" +``` + +This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment. + +Add Knowledge Base via HTTP Links: + +```bash +curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \ + -H "Content-Type: multipart/form-data" \ + -F 'link_list=["https://opea.dev"]' +``` + +This command updates a knowledge base by submitting a list of HTTP links for processing. + +To delete the files/link you uploaded: + +```bash +# delete all uploaded files and links +curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \ + -d '{"file_path": "all"}' \ + -H "Content-Type: application/json" +``` + +## Launch the UI + +### Launch with origin port + +To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: + +```yaml + chaqna-gaudi-ui-server: + image: opea/chatqna-ui:latest + ... + ports: + - "80:5173" +``` + +### Launch with Nginx + +If you want to launch the UI using Nginx, open this URL: `http://${host_ip}:${NGINX_PORT}` in your browser to access the frontend. + +## Launch the Conversational UI (Optional) + +To access the Conversational UI (react based) frontend, modify the UI service in the `compose.yaml` file. Replace `chaqna-xeon-ui-server` service with the `chatqna-xeon-conversation-ui-server` service as per the config below: + +```yaml +chaqna-xeon-conversation-ui-server: + image: opea/chatqna-conversation-ui:latest + container_name: chatqna-xeon-conversation-ui-server + environment: + - APP_BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT} + - APP_DATA_PREP_SERVICE_URL=${DATAPREP_SERVICE_ENDPOINT} + ports: + - "5174:80" + depends_on: + - chaqna-xeon-backend-server + ipc: host + restart: always +``` + +Once the services are up, open the following URL in your browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: + +```yaml + chaqna-gaudi-conversation-ui-server: + image: opea/chatqna-conversation-ui:latest + ... + ports: + - "80:80" +``` + +![project-screenshot](../../../../assets/img/chat_ui_init.png) + +Here is an example of running ChatQnA: + +![project-screenshot](../../../../assets/img/chat_ui_response.png) + +Here is an example of running ChatQnA with Conversational UI (React): + +![project-screenshot](../../../../assets/img/conversation_ui_response.png) diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml index fb9d4ce9c8..e276089e36 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -89,7 +89,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-service ports: - "9009:80" diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_endpoint_openai.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_endpoint_openai.yaml new file mode 100644 index 0000000000..ed8045276e --- /dev/null +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_endpoint_openai.yaml @@ -0,0 +1,173 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + ports: + - "6379:6379" + - "8001:8001" + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 3s + retries: 10 + dataprep-redis-service: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + redis-vector-db: + condition: service_healthy + tei-embedding-service: + condition: service_started + ports: + - "6007:5000" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: redis://redis-vector-db:6379 + REDIS_HOST: redis-vector-db + INDEX_NAME: ${INDEX_NAME} + TEI_ENDPOINT: http://tei-embedding-service:80 + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] + interval: 10s + timeout: 5s + retries: 50 + restart: unless-stopped + tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.7 + container_name: tei-embedding-server + ports: + - "6006:80" + volumes: + - "${MODEL_CACHE:-./data}:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate + retriever: + image: ${REGISTRY:-opea}/retriever:${TAG:-latest} + container_name: retriever-redis-server + depends_on: + - redis-vector-db + ports: + - "7000:7000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: redis://redis-vector-db:6379 + REDIS_HOST: redis-vector-db + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + LOGFLAG: ${LOGFLAG} + RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" + restart: unless-stopped + tei-reranking-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.7 + container_name: tei-reranking-server + ports: + - "8808:80" + volumes: + - "${MODEL_CACHE:-./data}:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + command: --model-id ${RERANK_MODEL_ID} --auto-truncate + # Substitute vllm with OpeaTextGenService + textgen-service-endpoint-openai: # Used instead of vllm + image: opea/llm-textgen:${TAG:-latest} # Changed image + container_name: textgen-service-endpoint-openai # Updated container name + ipc: host + ports: + - "9000:9000" # Changed port mapping + environment: + LLM_COMPONENT_NAME: OpeaTextGenService + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_MODEL_ID: ${LLM_MODEL_ID} # Set to model ID + LLM_ENDPOINT: ${LLM_ENDPOINT} # An openai compatible endpoint, e.g. Hugging Face, OpenRouter, OpenAI + OPENAI_API_KEY: ${OPENAI_API_KEY} # Add OpenRouter API Key + chatqna-xeon-backend-server: + image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} + container_name: chatqna-xeon-backend-server + depends_on: + redis-vector-db: + condition: service_started + dataprep-redis-service: + condition: service_healthy + tei-embedding-service: + condition: service_started + retriever: + condition: service_started + tei-reranking-service: + condition: service_started + textgen-service-endpoint-openai: + condition: service_started + ports: + - "8888:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server + - EMBEDDING_SERVER_HOST_IP=tei-embedding-service + - EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80} + - RETRIEVER_SERVICE_HOST_IP=retriever + - RERANK_SERVER_HOST_IP=tei-reranking-service + - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80} + - LLM_SERVER_HOST_IP=textgen-service-endpoint-openai # Updated host IP + - LLM_SERVER_PORT=${LLM_SERVER_PORT:-9000} + - LLM_MODEL=${LLM_MODEL_ID} + - LOGFLAG=${LOGFLAG} + ipc: host + restart: always + chatqna-xeon-ui-server: + image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest} + container_name: chatqna-xeon-ui-server + depends_on: + - chatqna-xeon-backend-server + ports: + - "5173:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + ipc: host + restart: always + chatqna-xeon-nginx-server: + image: ${REGISTRY:-opea}/nginx:${TAG:-latest} + container_name: chatqna-xeon-nginx-server + depends_on: + - chatqna-xeon-backend-server + - chatqna-xeon-ui-server + ports: + - "${NGINX_PORT:-80}:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - FRONTEND_SERVICE_IP=chatqna-xeon-ui-server + - FRONTEND_SERVICE_PORT=5173 + - BACKEND_SERVICE_NAME=chatqna + - BACKEND_SERVICE_IP=chatqna-xeon-backend-server + - BACKEND_SERVICE_PORT=8888 + - DATAPREP_SERVICE_IP=dataprep-redis-service + - DATAPREP_SERVICE_PORT=5000 + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen.yaml index e34f6f0062..a1a7d05fba 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_faqgen.yaml @@ -81,7 +81,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-server ports: - ${LLM_ENDPOINT_PORT:-9009}:80 diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_mariadb.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_mariadb.yaml index ccd55bbce3..ab5217f359 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_mariadb.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_mariadb.yaml @@ -90,7 +90,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-service ports: - "9009:80" diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml index 67226bd404..2f6fe6d439 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_milvus.yaml @@ -144,7 +144,7 @@ services: command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-service ports: - "9009:80" diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_openeuler.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_openeuler.yaml index 44fef78971..679ddd2fd8 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_openeuler.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_openeuler.yaml @@ -88,14 +88,13 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: - image: openeuler/vllm-cpu:0.9.1-oe2403lts + image: openeuler/vllm-cpu:0.10.1-oe2403lts container_name: vllm-service ports: - "9009:80" volumes: - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" shm_size: 128g - privileged: true environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_opengauss.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_opengauss.yaml new file mode 100644 index 0000000000..de131ba873 --- /dev/null +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_opengauss.yaml @@ -0,0 +1,185 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + opengauss-db: + image: opengauss/opengauss:7.0.0-RC2.B015 + container_name: opengauss-db + ports: + - "${GS_PORT:-5432}:5432" + restart: always + environment: + - GS_PASSWORD=${GS_PASSWORD} + healthcheck: + test: ["CMD-SHELL", "sleep 10 && exit 0"] + interval: 1s + timeout: 15s + retries: 1 + dataprep-opengauss-service: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + container_name: dataprep-opengauss-server + depends_on: + opengauss-db: + condition: service_healthy + tei-embedding-service: + condition: service_started + ports: + - "6007:5000" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + GS_CONNECTION_STRING: ${GS_CONNECTION_STRING} + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_OPENGAUSS" + TEI_ENDPOINT: http://tei-embedding-service:80 + TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 + HF_TOKEN: ${HF_TOKEN} + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] + interval: 10s + timeout: 5s + retries: 50 + restart: unless-stopped + tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.7 + container_name: tei-embedding-server + ports: + - "6006:80" + volumes: + - "${MODEL_CACHE:-./data}:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate + retriever: + image: ${REGISTRY:-opea}/retriever:${TAG:-latest} + container_name: retriever-opengauss-server + depends_on: + opengauss-db: + condition: service_healthy + ports: + - "7000:7000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 + HF_TOKEN: ${HF_TOKEN} + LOGFLAG: ${LOGFLAG} + GS_CONNECTION_STRING: ${GS_CONNECTION_STRING} + RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_OPENGAUSS" + restart: unless-stopped + tei-reranking-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.7 + container_name: tei-reranking-server + ports: + - "8808:80" + volumes: + - "${MODEL_CACHE:-./data}:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + command: --model-id ${RERANK_MODEL_ID} --auto-truncate + vllm-service: + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 + container_name: vllm-service + ports: + - "9009:80" + volumes: + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" + shm_size: 128g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + LLM_MODEL_ID: ${LLM_MODEL_ID} + VLLM_TORCH_PROFILER_DIR: "/mnt" + HF_HUB_OFFLINE: ${HF_HUB_OFFLINE:-0} + VLLM_CPU_KVCACHE_SPACE: 40 + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:9009/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 + chatqna-xeon-backend-server: + image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} + container_name: chatqna-xeon-backend-server + depends_on: + opengauss-db: + condition: service_started + dataprep-opengauss-service: + condition: service_healthy + tei-embedding-service: + condition: service_started + retriever: + condition: service_started + tei-reranking-service: + condition: service_started + vllm-service: + condition: service_healthy + ports: + - "8888:8888" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=chatqna-xeon-backend-server + - EMBEDDING_SERVER_HOST_IP=tei-embedding-service + - EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80} + - RETRIEVER_SERVICE_HOST_IP=retriever + - RERANK_SERVER_HOST_IP=tei-reranking-service + - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80} + - LLM_SERVER_HOST_IP=vllm-service + - LLM_SERVER_PORT=80 + - LLM_MODEL=${LLM_MODEL_ID} + - LOGFLAG=${LOGFLAG} + ipc: host + restart: always + chatqna-xeon-ui-server: + image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest} + container_name: chatqna-xeon-ui-server + depends_on: + - chatqna-xeon-backend-server + ports: + - "5173:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + ipc: host + restart: always + chatqna-xeon-nginx-server: + image: ${REGISTRY:-opea}/nginx:${TAG:-latest} + container_name: chatqna-xeon-nginx-server + depends_on: + - chatqna-xeon-backend-server + - chatqna-xeon-ui-server + ports: + - "${NGINX_PORT:-80}:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - FRONTEND_SERVICE_IP=chatqna-xeon-ui-server + - FRONTEND_SERVICE_PORT=5173 + - BACKEND_SERVICE_NAME=chatqna + - BACKEND_SERVICE_IP=chatqna-xeon-backend-server + - BACKEND_SERVICE_PORT=8888 + - DATAPREP_SERVICE_IP=dataprep-opengauss-service + - DATAPREP_SERVICE_PORT=5000 + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml index cfa6c5aebe..ffa4dd67f8 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml @@ -76,7 +76,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-service ports: - "9009:80" diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml index fb12b77e1a..3fdd295374 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml @@ -81,7 +81,7 @@ services: HF_HUB_ENABLE_HF_TRANSFER: 0 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-service ports: - "6042:80" diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml index 841a0ad531..00e6474b86 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml +++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml @@ -65,7 +65,7 @@ services: RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-service ports: - "9009:80" diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md index 9a1cd54bb8..40d282cc43 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md @@ -294,6 +294,43 @@ The `tgi-guardrails-service` uses the `GUARDRAILS_MODEL_ID` parameter to select The `vllm-guardrails-service` uses the `GUARDRAILS_MODEL_ID` parameter to select a [supported model](https://docs.vllm.ai/en/latest/models/supported_models.html) for the associated `opea/vllm-gaudi:latest` image. It uses the `NUM_CARDS` parameter. +## ChatQnA with Conversational UI (Optional) + +To access the Conversational UI (react based) frontend, modify the UI service in the `compose` file used to deploy. Replace `chatqna-xeon-ui-server` service with the `chatqna-xeon-conversation-ui-server` service as per the config below: + +```yaml +chatqna-xeon-conversation-ui-server: + image: opea/chatqna-conversation-ui:latest + container_name: chatqna-xeon-conversation-ui-server + environment: + - APP_BACKEND_SERVICE_ENDPOINT=${BACKEND_SERVICE_ENDPOINT} + - APP_DATA_PREP_SERVICE_URL=${DATAPREP_SERVICE_ENDPOINT} + ports: + - "5174:80" + depends_on: + - chatqna-xeon-backend-server + ipc: host + restart: always +``` + +Once the services are up, open the following URL in the browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If the developer prefers to use a different host port to access the frontend, it can be modified by port mapping in the `compose.yaml` file as shown below: + +```yaml + chatqna-gaudi-conversation-ui-server: + image: opea/chatqna-conversation-ui:latest + ... + ports: + - "80:80" +``` + +Here is an example of running ChatQnA (default UI): + +![project-screenshot](../../../../assets/img/chat_ui_response.png) + +Here is an example of running ChatQnA with Conversational UI (React): + +![project-screenshot](../../../../assets/img/conversation_ui_response.png) + ## Conclusion In examining the various services and configurations across different deployments, developers should gain a comprehensive understanding of how each component contributes to the overall functionality and performance of a ChatQnA pipeline on an Intel® Gaudi® platform. Key services such as the `vllm-service`, `tei-embedding-service`, `tei-reranking-service`, `tgi-guardrails-service`and `vllm-guardrails-service` each consume Gaudi accelerators, leveraging specific models and hardware resources to optimize their respective tasks. The `LLM_MODEL_ID`, `EMBEDDING_MODEL_ID`, `RERANK_MODEL_ID`, and `GUARDRAILS_MODEL_ID` parameters specify the models used, directly impacting the quality and effectiveness of language processing, embedding, reranking, and safety operations. diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.telemetry.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.telemetry.yaml index 00ace1e451..428271991c 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.telemetry.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.telemetry.yaml @@ -62,7 +62,7 @@ services: command: - '--path.procfs=/host/proc' - '--path.sysfs=/host/sys' - - --collector.filesystem.ignored-mount-points + - --collector.filesystem.mount-points-exclude - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" ports: - 9100:9100 diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 23e7cee19b..a762b21767 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -92,7 +92,7 @@ services: MAX_WARMUP_SEQUENCE_LENGTH: 512 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + image: opea/vllm-gaudi:1.22.0 container_name: vllm-gaudi-server ports: - "8007:80" diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml index 3fa1b5c4af..182c2b5bc2 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml @@ -85,7 +85,7 @@ services: MAX_WARMUP_SEQUENCE_LENGTH: 512 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + image: opea/vllm-gaudi:1.22.0 container_name: vllm-gaudi-server ports: - ${LLM_ENDPOINT_PORT:-8007}:80 diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml index e2ec071f1e..46ccee4773 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml @@ -32,7 +32,7 @@ services: retries: 50 restart: unless-stopped vllm-guardrails-service: - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + image: opea/vllm-gaudi:1.22.0 container_name: vllm-guardrails-server ports: - "8088:80" @@ -130,7 +130,7 @@ services: MAX_WARMUP_SEQUENCE_LENGTH: 512 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + image: opea/vllm-gaudi:1.22.0 container_name: vllm-gaudi-server ports: - "8008:80" diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml index 9c38b38772..017bf8d204 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml @@ -65,7 +65,7 @@ services: RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped vllm-service: - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + image: opea/vllm-gaudi:1.22.0 container_name: vllm-gaudi-server ports: - "8007:80" diff --git a/ChatQnA/docker_image_build/build.yaml b/ChatQnA/docker_image_build/build.yaml index 3a5f6d4be3..2a5f8ccd62 100644 --- a/ChatQnA/docker_image_build/build.yaml +++ b/ChatQnA/docker_image_build/build.yaml @@ -114,18 +114,6 @@ services: context: GenAIComps dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest} - vllm: - build: - context: vllm - dockerfile: docker/Dockerfile.cpu - extends: chatqna - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} - vllm-gaudi: - build: - context: vllm-fork - dockerfile: Dockerfile.hpu - extends: chatqna - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} nginx: build: context: GenAIComps diff --git a/ChatQnA/kubernetes/helm/README.md b/ChatQnA/kubernetes/helm/README.md index 8ada19b070..5186fe3c8d 100644 --- a/ChatQnA/kubernetes/helm/README.md +++ b/ChatQnA/kubernetes/helm/README.md @@ -14,7 +14,7 @@ helm install chatqna oci://ghcr.io/opea-project/charts/chatqna --set global.HUG ``` export HFTOKEN="insert-your-huggingface-token-here" -helm install chatqna oci://ghcr.io/opea-project/charts/chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-vllm-values.yaml +helm install chatqna oci://ghcr.io/opea-project/charts/chatqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml ``` ## Deploy variants of ChatQnA diff --git a/ChatQnA/kubernetes/helm/cpu-openeuler-values.yaml b/ChatQnA/kubernetes/helm/cpu-openeuler-values.yaml new file mode 100644 index 0000000000..5206d1abf9 --- /dev/null +++ b/ChatQnA/kubernetes/helm/cpu-openeuler-values.yaml @@ -0,0 +1,58 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +# This file is based on cpu-values.yaml and overrides image tags to 'latest-openeuler' +# for all enabled services to run on openEuler. + +# Overrides for the main chart image +image: + tag: latest-openeuler + +# Overrides from cpu-values.yaml +vllm: + image: + repository: openeuler/vllm-cpu + tag: 0.10.1-oe2403lts + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct + + # Uncomment the following model specific settings for DeepSeek models + #VLLM_CPU_KVCACHE_SPACE: 40 + #resources: + # requests: + # memory: 60Gi # 40G for KV cache, and 20G for DeepSeek-R1-Distill-Qwen-7B, need to adjust it for other models + +# Overrides for subchart images +# Based on the default values in opea-project/GenAIInfra/helm-charts/chatqna/values.yaml, +# the following services are enabled by default. + +# data-prep service +data-prep: + image: + tag: latest-openeuler + +# retriever-usvc service +retriever-usvc: + image: + tag: latest-openeuler + +# tei-rerank service +teirerank: + image: + repository: openeuler/text-embeddings-inference-cpu + tag: 1.7.0-oe2403lts + +# tei service +tei: + image: + repository: openeuler/text-embeddings-inference-cpu + tag: 1.7.0-oe2403lts + +# nginx service +nginx: + image: + tag: latest-openeuler + +# chatqna-ui service +chatqna-ui: + image: + tag: latest-openeuler diff --git a/ChatQnA/kubernetes/helm/faqgen-gaudi-tgi-values.yaml b/ChatQnA/kubernetes/helm/faqgen-gaudi-tgi-values.yaml index 88fca4ed55..99b7fb5c58 100644 --- a/ChatQnA/kubernetes/helm/faqgen-gaudi-tgi-values.yaml +++ b/ChatQnA/kubernetes/helm/faqgen-gaudi-tgi-values.yaml @@ -49,8 +49,8 @@ teirerank: OMPI_MCA_btl_vader_single_copy_mechanism: "none" MAX_WARMUP_SEQUENCE_LENGTH: "512" image: - repository: ghcr.io/huggingface/tei-gaudi - tag: 1.5.0 + repository: ghcr.io/huggingface/text-embeddings-inference + tag: hpu-1.7 resources: limits: habana.ai/gaudi: 1 diff --git a/ChatQnA/kubernetes/helm/faqgen-gaudi-values.yaml b/ChatQnA/kubernetes/helm/faqgen-gaudi-values.yaml index 7dd455e112..6e34ab4b09 100644 --- a/ChatQnA/kubernetes/helm/faqgen-gaudi-values.yaml +++ b/ChatQnA/kubernetes/helm/faqgen-gaudi-values.yaml @@ -42,8 +42,8 @@ teirerank: OMPI_MCA_btl_vader_single_copy_mechanism: "none" MAX_WARMUP_SEQUENCE_LENGTH: "512" image: - repository: ghcr.io/huggingface/tei-gaudi - tag: 1.5.0 + repository: ghcr.io/huggingface/text-embeddings-inference + tag: hpu-1.7 resources: limits: habana.ai/gaudi: 1 diff --git a/ChatQnA/kubernetes/helm/faqgen-rocm-tgi-values.yaml b/ChatQnA/kubernetes/helm/faqgen-rocm-tgi-values.yaml new file mode 100644 index 0000000000..19ca79f10a --- /dev/null +++ b/ChatQnA/kubernetes/helm/faqgen-rocm-tgi-values.yaml @@ -0,0 +1,52 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +CHATQNA_TYPE: "CHATQNA_FAQGEN" +llm-uservice: + enabled: true + image: + repository: opea/llm-faqgen + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct + FAQGEN_BACKEND: "TGI" + service: + port: 80 +tgi: + enabled: true + accelDevice: "rocm" + image: + repository: ghcr.io/huggingface/text-generation-inference + tag: "3.0.0-rocm" + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct + MAX_INPUT_LENGTH: "2048" + MAX_TOTAL_TOKENS: "4096" + USE_FLASH_ATTENTION: "true" + FLASH_ATTENTION_RECOMPUTE: "false" + PYTORCH_TUNABLEOP_ENABLED: "0" + HIP_VISIBLE_DEVICES: "0,1" + MAX_BATCH_SIZE: "4" + extraCmdArgs: [ "--num-shard","2" ] + resources: + limits: + amd.com/gpu: "2" + requests: + cpu: 1 + memory: 16Gi + securityContext: + readOnlyRootFilesystem: false + runAsNonRoot: false + runAsUser: 0 + capabilities: + add: + - SYS_PTRACE + readinessProbe: + initialDelaySeconds: 60 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 + startupProbe: + initialDelaySeconds: 60 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 +vllm: + enabled: false diff --git a/ChatQnA/kubernetes/helm/faqgen-rocm-values.yaml b/ChatQnA/kubernetes/helm/faqgen-rocm-values.yaml new file mode 100644 index 0000000000..e8941d8153 --- /dev/null +++ b/ChatQnA/kubernetes/helm/faqgen-rocm-values.yaml @@ -0,0 +1,45 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +CHATQNA_TYPE: "CHATQNA_FAQGEN" +llm-uservice: + enabled: true + image: + repository: opea/llm-faqgen + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct + FAQGEN_BACKEND: "vLLM" + service: + port: 80 +tgi: + enabled: false +vllm: + enabled: true + accelDevice: "rocm" + image: + repository: opea/vllm-rocm + tag: latest + env: + HIP_VISIBLE_DEVICES: "0" + TENSOR_PARALLEL_SIZE: "1" + HF_HUB_DISABLE_PROGRESS_BARS: "1" + HF_HUB_ENABLE_HF_TRANSFER: "0" + VLLM_USE_TRITON_FLASH_ATTN: "0" + VLLM_WORKER_MULTIPROC_METHOD: "spawn" + PYTORCH_JIT: "0" + HF_HOME: "/data" + extraCmd: + command: [ "python3", "/workspace/api_server.py" ] + extraCmdArgs: [ "--swap-space", "16", + "--disable-log-requests", + "--dtype", "float16", + "--num-scheduler-steps", "1", + "--distributed-executor-backend", "mp" ] + resources: + limits: + amd.com/gpu: "1" + startupProbe: + failureThreshold: 180 + securityContext: + readOnlyRootFilesystem: false + runAsNonRoot: false + runAsUser: 0 diff --git a/ChatQnA/kubernetes/helm/gaudi-tgi-values.yaml b/ChatQnA/kubernetes/helm/gaudi-tgi-values.yaml index 027fecb3fd..c5fc0fa8f8 100644 --- a/ChatQnA/kubernetes/helm/gaudi-tgi-values.yaml +++ b/ChatQnA/kubernetes/helm/gaudi-tgi-values.yaml @@ -43,8 +43,8 @@ teirerank: OMPI_MCA_btl_vader_single_copy_mechanism: "none" MAX_WARMUP_SEQUENCE_LENGTH: "512" image: - repository: ghcr.io/huggingface/tei-gaudi - tag: 1.5.0 + repository: ghcr.io/huggingface/text-embeddings-inference + tag: hpu-1.7 resources: limits: habana.ai/gaudi: 1 @@ -60,8 +60,8 @@ teirerank: # OMPI_MCA_btl_vader_single_copy_mechanism: "none" # MAX_WARMUP_SEQUENCE_LENGTH: "512" # image: -# repository: ghcr.io/huggingface/tei-gaudi -# tag: 1.5.0 +# repository: ghcr.io/huggingface/text-embeddings-inference +# tag: hpu-1.7 # resources: # limits: # habana.ai/gaudi: 1 diff --git a/ChatQnA/kubernetes/helm/gaudi-values.yaml b/ChatQnA/kubernetes/helm/gaudi-values.yaml index 19471c0e43..36a1ee29de 100644 --- a/ChatQnA/kubernetes/helm/gaudi-values.yaml +++ b/ChatQnA/kubernetes/helm/gaudi-values.yaml @@ -37,8 +37,8 @@ teirerank: OMPI_MCA_btl_vader_single_copy_mechanism: "none" MAX_WARMUP_SEQUENCE_LENGTH: "512" image: - repository: ghcr.io/huggingface/tei-gaudi - tag: 1.5.0 + repository: ghcr.io/huggingface/text-embeddings-inference + tag: hpu-1.7 resources: limits: habana.ai/gaudi: 1 diff --git a/ChatQnA/kubernetes/helm/guardrails-gaudi-values.yaml b/ChatQnA/kubernetes/helm/guardrails-gaudi-values.yaml index 5c88e73d0f..bc2faef9a8 100644 --- a/ChatQnA/kubernetes/helm/guardrails-gaudi-values.yaml +++ b/ChatQnA/kubernetes/helm/guardrails-gaudi-values.yaml @@ -19,8 +19,8 @@ guardrails-usvc: # tei: # accelDevice: "gaudi" # image: -# repository: ghcr.io/huggingface/tei-gaudi -# tag: 1.5.0 +# repository: ghcr.io/huggingface/text-embeddings-inference +# tag: hpu-1.7 # resources: # limits: # habana.ai/gaudi: 1 @@ -32,8 +32,8 @@ teirerank: OMPI_MCA_btl_vader_single_copy_mechanism: "none" MAX_WARMUP_SEQUENCE_LENGTH: "512" image: - repository: ghcr.io/huggingface/tei-gaudi - tag: "1.5.0" + repository: ghcr.io/huggingface/text-embeddings-inference + tag: hpu-1.7 resources: limits: habana.ai/gaudi: 1 diff --git a/ChatQnA/kubernetes/helm/rocm-tgi-values.yaml b/ChatQnA/kubernetes/helm/rocm-tgi-values.yaml new file mode 100644 index 0000000000..1a76b460d3 --- /dev/null +++ b/ChatQnA/kubernetes/helm/rocm-tgi-values.yaml @@ -0,0 +1,47 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Accelerate inferencing in heaviest components to improve performance +# by overriding their subchart values + +tgi: + enabled: true + accelDevice: "rocm" + image: + repository: ghcr.io/huggingface/text-generation-inference + tag: "3.0.0-rocm" + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct + MAX_INPUT_LENGTH: "2048" + MAX_TOTAL_TOKENS: "4096" + PYTORCH_TUNABLEOP_ENABLED: "0" + USE_FLASH_ATTENTION: "true" + FLASH_ATTENTION_RECOMPUTE: "true" + HIP_VISIBLE_DEVICES: "0,1" + MAX_BATCH_SIZE: "4" + extraCmdArgs: [ "--num-shard","2" ] + resources: + limits: + amd.com/gpu: "2" + requests: + cpu: 1 + memory: 16Gi + securityContext: + readOnlyRootFilesystem: false + runAsNonRoot: false + runAsUser: 0 + capabilities: + add: + - SYS_PTRACE + readinessProbe: + initialDelaySeconds: 60 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 + startupProbe: + initialDelaySeconds: 60 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 + +vllm: + enabled: false diff --git a/ChatQnA/kubernetes/helm/rocm-values.yaml b/ChatQnA/kubernetes/helm/rocm-values.yaml new file mode 100644 index 0000000000..4d637bd8c6 --- /dev/null +++ b/ChatQnA/kubernetes/helm/rocm-values.yaml @@ -0,0 +1,39 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Accelerate inferencing in heaviest components to improve performance +# by overriding their subchart values + +tgi: + enabled: false +vllm: + enabled: true + accelDevice: "rocm" + image: + repository: opea/vllm-rocm + tag: latest + env: + HIP_VISIBLE_DEVICES: "0" + TENSOR_PARALLEL_SIZE: "1" + HF_HUB_DISABLE_PROGRESS_BARS: "1" + HF_HUB_ENABLE_HF_TRANSFER: "0" + VLLM_USE_TRITON_FLASH_ATTN: "0" + VLLM_WORKER_MULTIPROC_METHOD: "spawn" + PYTORCH_JIT: "0" + HF_HOME: "/data" + extraCmd: + command: [ "python3", "/workspace/api_server.py" ] + extraCmdArgs: [ "--swap-space", "16", + "--disable-log-requests", + "--dtype", "float16", + "--num-scheduler-steps", "1", + "--distributed-executor-backend", "mp" ] + resources: + limits: + amd.com/gpu: "1" + startupProbe: + failureThreshold: 180 + securityContext: + readOnlyRootFilesystem: false + runAsNonRoot: false + runAsUser: 0 diff --git a/ChatQnA/tests/test_compose_faqgen_on_epyc.sh b/ChatQnA/tests/test_compose_faqgen_on_epyc.sh index 1571349095..c480fd1f2e 100644 --- a/ChatQnA/tests/test_compose_faqgen_on_epyc.sh +++ b/ChatQnA/tests/test_compose_faqgen_on_epyc.sh @@ -25,19 +25,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &>/dev/null - VLLM_REQ_FILE="requirements/cpu.txt" - if ! grep -q "^transformers" "$VLLM_REQ_FILE"; then - echo "Adding transformers<4.54.0 to $VLLM_REQ_FILE" - echo "transformers<4.54.0" >>"$VLLM_REQ_FILE" - fi - cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever llm-faqgen vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever llm-faqgen nginx" docker compose -f build.yaml build ${service_list} --no-cache >${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -206,37 +196,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - CONDA_ROOT=$(conda info --base) - source "${CONDA_ROOT}/etc/profile.d/conda.sh" - conda activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - # npm install && npm ci && npx playwright install --with-deps - npm install && npm ci && npx playwright install - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/amd/cpu/epyc docker compose -f compose_faqgen.yaml down @@ -264,10 +223,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_faqgen_on_gaudi.sh b/ChatQnA/tests/test_compose_faqgen_on_gaudi.sh index 58ab7526c2..08e718e1ca 100644 --- a/ChatQnA/tests/test_compose_faqgen_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_faqgen_on_gaudi.sh @@ -23,12 +23,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork - VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0 - git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever llm-faqgen vllm-gaudi nginx" + service_list="chatqna chatqna-ui dataprep retriever llm-faqgen nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -36,6 +33,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/hpu/gaudi + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env_faqgen.sh # Start Docker Containers @@ -187,34 +185,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi docker compose -f compose_faqgen.yaml down @@ -242,10 +212,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_faqgen_on_rocm.sh b/ChatQnA/tests/test_compose_faqgen_on_rocm.sh index 893807377f..e3248c7999 100644 --- a/ChatQnA/tests/test_compose_faqgen_on_rocm.sh +++ b/ChatQnA/tests/test_compose_faqgen_on_rocm.sh @@ -163,36 +163,6 @@ function validate_megaservice() { } -function validate_frontend() { - echo "[ TEST INFO ]: --------- frontend test started ---------" - cd "$WORKPATH"/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniconda3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - echo "[ TEST INFO ]: --------- conda env activated ---------" - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd "$WORKPATH"/docker_compose/amd/gpu/rocm docker compose -f compose_faqgen.yaml stop && docker compose rm -f @@ -220,10 +190,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_faqgen_on_xeon.sh b/ChatQnA/tests/test_compose_faqgen_on_xeon.sh index bb18e3db55..76270813a8 100644 --- a/ChatQnA/tests/test_compose_faqgen_on_xeon.sh +++ b/ChatQnA/tests/test_compose_faqgen_on_xeon.sh @@ -23,13 +23,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &> /dev/null && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever llm-faqgen vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever llm-faqgen nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -198,34 +194,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon docker compose -f compose_faqgen.yaml down @@ -253,10 +221,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_faqgen_tgi_on_epyc.sh b/ChatQnA/tests/test_compose_faqgen_tgi_on_epyc.sh index 75d86a4a6e..d9800d543e 100644 --- a/ChatQnA/tests/test_compose_faqgen_tgi_on_epyc.sh +++ b/ChatQnA/tests/test_compose_faqgen_tgi_on_epyc.sh @@ -25,16 +25,6 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &>/dev/null - VLLM_REQ_FILE="requirements/cpu.txt" - if ! grep -q "^transformers" "$VLLM_REQ_FILE"; then - echo "Adding transformers<4.54.0 to $VLLM_REQ_FILE" - echo "transformers<4.54.0" >>"$VLLM_REQ_FILE" - fi - cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." service_list="chatqna chatqna-ui dataprep retriever llm-faqgen nginx" @@ -206,37 +196,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - CONDA_ROOT=$(conda info --base) - source "${CONDA_ROOT}/etc/profile.d/conda.sh" - conda activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - # npm install && npm ci && npx playwright install --with-deps - npm install && npm ci && npx playwright install - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/amd/cpu/epyc docker compose -f compose_faqgen_tgi.yaml down @@ -264,10 +223,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_faqgen_tgi_on_gaudi.sh b/ChatQnA/tests/test_compose_faqgen_tgi_on_gaudi.sh index e9868e0052..b346d8018d 100644 --- a/ChatQnA/tests/test_compose_faqgen_tgi_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_faqgen_tgi_on_gaudi.sh @@ -34,6 +34,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/hpu/gaudi export FAQGen_COMPONENT_NAME="OpeaFaqGenTgi" + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env_faqgen.sh # Start Docker Containers @@ -185,34 +186,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi docker compose -f compose_faqgen_tgi.yaml down @@ -240,10 +213,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_faqgen_tgi_on_xeon.sh b/ChatQnA/tests/test_compose_faqgen_tgi_on_xeon.sh index d0b823f204..ca3993b8be 100644 --- a/ChatQnA/tests/test_compose_faqgen_tgi_on_xeon.sh +++ b/ChatQnA/tests/test_compose_faqgen_tgi_on_xeon.sh @@ -198,34 +198,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon docker compose -f compose_faqgen_tgi.yaml down @@ -253,10 +225,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_faqgen_vllm_on_rocm.sh b/ChatQnA/tests/test_compose_faqgen_vllm_on_rocm.sh index f344f0030a..8c24185f31 100644 --- a/ChatQnA/tests/test_compose_faqgen_vllm_on_rocm.sh +++ b/ChatQnA/tests/test_compose_faqgen_vllm_on_rocm.sh @@ -138,35 +138,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniconda3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/amd/gpu/rocm docker compose -f compose_vllm.yaml down @@ -194,10 +165,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh index da8bc25b48..09bfffabe1 100644 --- a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh @@ -23,12 +23,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork - VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0 - git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi guardrails nginx" + service_list="chatqna chatqna-ui dataprep retriever guardrails nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -154,34 +151,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi docker compose -f compose_guardrails.yaml down @@ -209,10 +178,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_mariadb_on_xeon.sh b/ChatQnA/tests/test_compose_mariadb_on_xeon.sh index 4d834e2abc..3c369357e4 100644 --- a/ChatQnA/tests/test_compose_mariadb_on_xeon.sh +++ b/ChatQnA/tests/test_compose_mariadb_on_xeon.sh @@ -23,15 +23,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &> /dev/null - # make sure NOT change the pwd - cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -40,6 +34,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon export MARIADB_PASSWORD="test" + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env_mariadb.sh # Start Docker Containers diff --git a/ChatQnA/tests/test_compose_milvus_on_epyc.sh b/ChatQnA/tests/test_compose_milvus_on_epyc.sh index 851f6cb33d..efba619f58 100644 --- a/ChatQnA/tests/test_compose_milvus_on_epyc.sh +++ b/ChatQnA/tests/test_compose_milvus_on_epyc.sh @@ -26,19 +26,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &>/dev/null - VLLM_REQ_FILE="requirements/cpu.txt" - if ! grep -q "^transformers" "$VLLM_REQ_FILE"; then - echo "Adding transformers<4.54.0 to $VLLM_REQ_FILE" - echo "transformers<4.54.0" >>"$VLLM_REQ_FILE" - fi - cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache >${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -181,39 +171,6 @@ function validate_megaservice() { } -function validate_frontend() { - echo "[ TEST INFO ]: --------- frontend test started ---------" - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - CONDA_ROOT=$(conda info --base) - source "${CONDA_ROOT}/etc/profile.d/conda.sh" - conda activate ${conda_env_name} - echo "[ TEST INFO ]: --------- conda env activated ---------" - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - # npm install && npm ci && npx playwright install --with-deps - npm install && npm ci && npx playwright install - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { echo "In stop docker" echo $WORKPATH @@ -243,10 +200,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_milvus_on_xeon.sh b/ChatQnA/tests/test_compose_milvus_on_xeon.sh index 2bd97cb05a..338c5515dc 100644 --- a/ChatQnA/tests/test_compose_milvus_on_xeon.sh +++ b/ChatQnA/tests/test_compose_milvus_on_xeon.sh @@ -24,15 +24,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &> /dev/null - # make sure NOT change the pwd - cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -40,6 +34,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ export LOGFLAG=true + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh # Start Docker Containers @@ -175,36 +170,6 @@ function validate_megaservice() { } -function validate_frontend() { - echo "[ TEST INFO ]: --------- frontend test started ---------" - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - echo "[ TEST INFO ]: --------- conda env activated ---------" - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { echo "In stop docker" echo $WORKPATH @@ -234,10 +199,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_on_epyc.sh b/ChatQnA/tests/test_compose_on_epyc.sh index 851f6cb33d..efba619f58 100644 --- a/ChatQnA/tests/test_compose_on_epyc.sh +++ b/ChatQnA/tests/test_compose_on_epyc.sh @@ -26,19 +26,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &>/dev/null - VLLM_REQ_FILE="requirements/cpu.txt" - if ! grep -q "^transformers" "$VLLM_REQ_FILE"; then - echo "Adding transformers<4.54.0 to $VLLM_REQ_FILE" - echo "transformers<4.54.0" >>"$VLLM_REQ_FILE" - fi - cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache >${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -181,39 +171,6 @@ function validate_megaservice() { } -function validate_frontend() { - echo "[ TEST INFO ]: --------- frontend test started ---------" - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - CONDA_ROOT=$(conda info --base) - source "${CONDA_ROOT}/etc/profile.d/conda.sh" - conda activate ${conda_env_name} - echo "[ TEST INFO ]: --------- conda env activated ---------" - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - # npm install && npm ci && npx playwright install --with-deps - npm install && npm ci && npx playwright install - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { echo "In stop docker" echo $WORKPATH @@ -243,10 +200,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_on_gaudi.sh b/ChatQnA/tests/test_compose_on_gaudi.sh index 221f59c6e5..ace8e10b41 100644 --- a/ChatQnA/tests/test_compose_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_on_gaudi.sh @@ -23,12 +23,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork - VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0 - git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -133,36 +130,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - export no_proxy="localhost,127.0.0.1,$ip_address" - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi docker compose -f compose.yaml -f compose.telemetry.yaml down @@ -190,10 +157,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_on_rocm.sh b/ChatQnA/tests/test_compose_on_rocm.sh index 5e31ea9969..63d978b92b 100644 --- a/ChatQnA/tests/test_compose_on_rocm.sh +++ b/ChatQnA/tests/test_compose_on_rocm.sh @@ -148,36 +148,6 @@ function validate_megaservice() { } -function validate_frontend() { - echo "[ TEST INFO ]: --------- frontend test started ---------" - cd "$WORKPATH"/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniconda3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - echo "[ TEST INFO ]: --------- conda env activated ---------" - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd "$WORKPATH"/docker_compose/amd/gpu/rocm docker compose stop && docker compose rm -f @@ -205,10 +175,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_on_xeon.sh b/ChatQnA/tests/test_compose_on_xeon.sh index 965ddb34dd..a241e4eeff 100644 --- a/ChatQnA/tests/test_compose_on_xeon.sh +++ b/ChatQnA/tests/test_compose_on_xeon.sh @@ -23,15 +23,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &> /dev/null - # make sure NOT change the pwd - cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -39,7 +33,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon - + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh # Start Docker Containers @@ -132,35 +126,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon docker compose -f compose.yaml -f compose.telemetry.yaml down @@ -188,10 +153,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_openeuler_on_xeon.sh b/ChatQnA/tests/test_compose_openeuler_on_xeon.sh index 5f641f5d62..38c275dd9c 100644 --- a/ChatQnA/tests/test_compose_openeuler_on_xeon.sh +++ b/ChatQnA/tests/test_compose_openeuler_on_xeon.sh @@ -33,7 +33,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon - + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh # Start Docker Containers @@ -126,36 +126,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - - echo "[TEST INFO]: Preparing frontend test using Docker..." - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - echo "[TEST INFO]: Running frontend tests in Docker..." - exit_status=0 - - docker run --rm \ - --network="host" \ - -v $PWD:/work \ - -w /work \ - mcr.microsoft.com/playwright:v1.40.0-focal \ - /bin/bash -c " - npm install && - npm ci && - npx playwright install && - npx playwright test - " || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon docker compose -f compose_openeuler.yaml -f compose.telemetry.yaml down @@ -183,10 +153,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_opengauss_on_xeon.sh b/ChatQnA/tests/test_compose_opengauss_on_xeon.sh new file mode 100644 index 0000000000..cda5c20b03 --- /dev/null +++ b/ChatQnA/tests/test_compose_opengauss_on_xeon.sh @@ -0,0 +1,215 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${model_cache:-"./data"} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +export host_ip=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + pushd GenAIComps + echo "GenAIComps test commit is $(git rev-parse HEAD)" + docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . + popd && sleep 1s + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="chatqna chatqna-ui dataprep retriever nginx" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} +function start_services() { + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + export LOGFLAG=true + export no_proxy="localhost,127.0.0.1,$ip_address" + export GS_USER="gaussdb" + export GS_PASSWORD="openGauss@123" + export GS_DB="postgres" + export GS_CONNECTION_STRING="opengauss+psycopg2://${GS_USER}:${GS_PASSWORD}@$ip_address:5432/${GS_DB}" + + source set_env.sh + + # Start Docker Containers + docker compose -f compose_opengauss.yaml up -d --quiet-pull > ${LOG_PATH}/start_services_with_compose.log + + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1 + if grep -q complete ${LOG_PATH}/vllm_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done +} + +function validate_service() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + if [[ $SERVICE_NAME == *"dataprep_upload_file"* ]]; then + cd $LOG_PATH + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL") + elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL") + else + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + fi + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + + + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + echo "Response" + echo $RESPONSE_BODY + echo "Expected Result" + echo $EXPECTED_RESULT + # check response body + if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + exit 1 + else + echo "[ $SERVICE_NAME ] Content is as expected." + fi + + sleep 1s +} + +function validate_microservices() { + # Check if the microservices are running correctly. + + # tei for embedding service + validate_service \ + "${ip_address}:6006/embed" \ + "[[" \ + "tei-embedding" \ + "tei-embedding-server" \ + '{"inputs":"What is Deep Learning?"}' + + sleep 1m # retrieval can't curl as expected, try to wait for more time + + # test /v1/dataprep/ingest upload file + echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt + validate_service \ + "http://${ip_address}:6007/v1/dataprep/ingest" \ + "Data preparation succeeded" \ + "dataprep_upload_file" \ + "dataprep-opengauss-server" + + # test /v1/dataprep/delete + validate_service \ + "http://${ip_address}:6007/v1/dataprep/delete" \ + '{"status":true}' \ + "dataprep_del" \ + "dataprep-opengauss-server" + + # test /v1/dataprep/delete + validate_service \ + "http://${ip_address}:6007/v1/dataprep/delete" \ + '{"status":true}' \ + "dataprep_del" \ + "dataprep-opengauss-server" + + + # retrieval microservice + test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + validate_service \ + "${ip_address}:7000/v1/retrieval" \ + " " \ + "retrieval" \ + "retriever-opengauss-server" \ + "{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}" + + # tei for rerank microservice + echo "Validating reranking service" + validate_service \ + "${ip_address}:8808/rerank" \ + '{"index":1,"score":' \ + "tei-rerank" \ + "tei-reranking-server" \ + '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' + + + # tgi for llm service + echo "Validating llm service" + validate_service \ + "${ip_address}:9009/v1/chat/completions" \ + "content" \ + "vllm-llm" \ + "vllm-service" \ + '{"model": "meta-llama/Meta-Llama-3-8B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 17}' +} + +function validate_megaservice() { + # Curl the Mega Service + validate_service \ + "${ip_address}:8888/v1/chatqna" \ + "Nike" \ + "chatqna-megaservice" \ + "chatqna-xeon-backend-server" \ + '{"messages": "What is the revenue of Nike in 2023?"}' + +} + +function stop_docker() { + echo "In stop docker" + echo $WORKPATH + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + docker compose -f compose_opengauss.yaml down +} + +function main() { + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + echo "::group::build_docker_images" + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + echo "::endgroup::" + + echo "::group::start_services" + start_services + echo "::endgroup::" + + echo "::group::validate_microservices" + validate_microservices + echo "::endgroup::" + + echo "::group::validate_megaservice" + validate_megaservice + echo "::endgroup::" + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + docker system prune -f + +} + +main diff --git a/ChatQnA/tests/test_compose_pinecone_on_epyc.sh b/ChatQnA/tests/test_compose_pinecone_on_epyc.sh index 02bddd1450..10bb819ca5 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_epyc.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_epyc.sh @@ -26,19 +26,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &>/dev/null - VLLM_REQ_FILE="requirements/cpu.txt" - if ! grep -q "^transformers" "$VLLM_REQ_FILE"; then - echo "Adding transformers<4.54.0 to $VLLM_REQ_FILE" - echo "transformers<4.54.0" >>"$VLLM_REQ_FILE" - fi - cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache >${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -176,39 +166,6 @@ function validate_megaservice() { } -function validate_frontend() { - echo "[ TEST INFO ]: --------- frontend test started ---------" - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - CONDA_ROOT=$(conda info --base) - source "${CONDA_ROOT}/etc/profile.d/conda.sh" - conda activate ${conda_env_name} - echo "[ TEST INFO ]: --------- conda env activated ---------" - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - # npm install && npm ci && npx playwright install --with-deps - npm install && npm ci && npx playwright install - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { echo "In stop docker" echo $WORKPATH diff --git a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh index 64c8efc7df..f5efee6c80 100755 --- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh +++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh @@ -24,15 +24,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &> /dev/null - # Not change the pwd - cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -174,36 +168,6 @@ function validate_megaservice() { } -function validate_frontend() { - echo "[ TEST INFO ]: --------- frontend test started ---------" - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - echo "[ TEST INFO ]: --------- conda env activated ---------" - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { echo "In stop docker" echo $WORKPATH diff --git a/ChatQnA/tests/test_compose_qdrant_on_epyc.sh b/ChatQnA/tests/test_compose_qdrant_on_epyc.sh index 1489a346c7..e93983ec31 100644 --- a/ChatQnA/tests/test_compose_qdrant_on_epyc.sh +++ b/ChatQnA/tests/test_compose_qdrant_on_epyc.sh @@ -24,19 +24,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &>/dev/null - VLLM_REQ_FILE="requirements/cpu.txt" - if ! grep -q "^transformers" "$VLLM_REQ_FILE"; then - echo "Adding transformers<4.54.0 to $VLLM_REQ_FILE" - echo "transformers<4.54.0" >>"$VLLM_REQ_FILE" - fi - cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache >${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -167,36 +157,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - CONDA_ROOT=$(conda info --base) - source "${CONDA_ROOT}/etc/profile.d/conda.sh" - conda activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - # npm install && npm ci && npx playwright install --with-deps - npm install && npm ci && npx playwright install - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/amd/cpu/epyc docker compose -f compose_qdrant.yaml down @@ -224,10 +184,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_qdrant_on_xeon.sh b/ChatQnA/tests/test_compose_qdrant_on_xeon.sh index c7a17aac49..2a522fa7a8 100644 --- a/ChatQnA/tests/test_compose_qdrant_on_xeon.sh +++ b/ChatQnA/tests/test_compose_qdrant_on_xeon.sh @@ -23,15 +23,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &> /dev/null - # Not change the pwd - cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -39,7 +33,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon - + export no_proxy="localhost,127.0.0.1,$ip_address" export INDEX_NAME="rag-qdrant" source set_env.sh @@ -159,33 +153,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon docker compose -f compose_qdrant.yaml down @@ -213,10 +180,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_tgi_on_epyc.sh b/ChatQnA/tests/test_compose_tgi_on_epyc.sh index 81cce137e9..98efb16223 100644 --- a/ChatQnA/tests/test_compose_tgi_on_epyc.sh +++ b/ChatQnA/tests/test_compose_tgi_on_epyc.sh @@ -178,39 +178,6 @@ function validate_megaservice() { } -function validate_frontend() { - echo "[ TEST INFO ]: --------- frontend test started ---------" - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - CONDA_ROOT=$(conda info --base) - source "${CONDA_ROOT}/etc/profile.d/conda.sh" - conda activate ${conda_env_name} - echo "[ TEST INFO ]: --------- conda env activated ---------" - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - # npm install && npm ci && npx playwright install --with-deps - npm install && npm ci && npx playwright install - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/amd/cpu/epyc docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml down @@ -238,10 +205,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_tgi_on_gaudi.sh b/ChatQnA/tests/test_compose_tgi_on_gaudi.sh index 356914ea1c..eb79b34440 100644 --- a/ChatQnA/tests/test_compose_tgi_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_tgi_on_gaudi.sh @@ -35,6 +35,7 @@ function start_services() { export NON_INTERACTIVE=true export host_ip=${ip_address} export telemetry=yes + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh # Start Docker Containers @@ -172,36 +173,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - export no_proxy="localhost,127.0.0.1,$ip_address" - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml down @@ -229,10 +200,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_tgi_on_xeon.sh b/ChatQnA/tests/test_compose_tgi_on_xeon.sh index b2a56091a9..f3c1e8bffd 100644 --- a/ChatQnA/tests/test_compose_tgi_on_xeon.sh +++ b/ChatQnA/tests/test_compose_tgi_on_xeon.sh @@ -32,7 +32,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon - + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh # Start Docker Containers @@ -170,36 +170,6 @@ function validate_megaservice() { } -function validate_frontend() { - echo "[ TEST INFO ]: --------- frontend test started ---------" - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - echo "[ TEST INFO ]: --------- conda env activated ---------" - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml down @@ -227,10 +197,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_vllm_on_rocm.sh b/ChatQnA/tests/test_compose_vllm_on_rocm.sh index 9f61c05b89..d0e856960f 100644 --- a/ChatQnA/tests/test_compose_vllm_on_rocm.sh +++ b/ChatQnA/tests/test_compose_vllm_on_rocm.sh @@ -129,35 +129,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniconda3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/amd/gpu/rocm docker compose -f compose_vllm.yaml down @@ -185,10 +156,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_without_rerank_on_epyc.sh b/ChatQnA/tests/test_compose_without_rerank_on_epyc.sh index 06f96f9290..1f14c10d10 100644 --- a/ChatQnA/tests/test_compose_without_rerank_on_epyc.sh +++ b/ChatQnA/tests/test_compose_without_rerank_on_epyc.sh @@ -24,19 +24,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &>/dev/null - VLLM_REQ_FILE="requirements/cpu.txt" - if ! grep -q "^transformers" "$VLLM_REQ_FILE"; then - echo "Adding transformers<4.54.0 to $VLLM_REQ_FILE" - echo "transformers<4.54.0" >>"$VLLM_REQ_FILE" - fi - cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache >${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -176,39 +166,6 @@ function validate_megaservice() { } -function validate_frontend() { - echo "[ TEST INFO ]: --------- frontend test started ---------" - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - CONDA_ROOT=$(conda info --base) - source "${CONDA_ROOT}/etc/profile.d/conda.sh" - conda activate ${conda_env_name} - echo "[ TEST INFO ]: --------- conda env activated ---------" - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - # npm install && npm ci && npx playwright install --with-deps - npm install && npm ci && npx playwright install - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/amd/cpu/epyc/ docker compose -f compose_without_rerank.yaml down @@ -236,10 +193,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh index b6fb222b39..62cbe30d3e 100644 --- a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh +++ b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh @@ -23,12 +23,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork - VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0 - git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -166,36 +163,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - export no_proxy="localhost,127.0.0.1,$ip_address" - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi docker compose -f compose_without_rerank.yaml down @@ -223,10 +190,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh b/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh index f236a8ab76..7e2157cee7 100644 --- a/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh +++ b/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh @@ -23,16 +23,9 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &> /dev/null - # Not change the pwd - cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="chatqna chatqna-ui dataprep retriever vllm nginx" + service_list="chatqna chatqna-ui dataprep retriever nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -40,7 +33,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon - + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh # Start Docker Containers @@ -169,36 +162,6 @@ function validate_megaservice() { } -function validate_frontend() { - echo "[ TEST INFO ]: --------- frontend test started ---------" - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - echo "[ TEST INFO ]: --------- conda env activated ---------" - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ docker compose -f compose_without_rerank.yaml down @@ -226,10 +189,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/ChatQnA/tests/test_ui_on_xeon.sh b/ChatQnA/tests/test_ui_on_xeon.sh new file mode 100644 index 0000000000..5c3eb88d3f --- /dev/null +++ b/ChatQnA/tests/test_ui_on_xeon.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${model_cache:-"./data"} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + pushd GenAIComps + echo "GenAIComps test commit is $(git rev-parse HEAD)" + docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . + popd && sleep 1s + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="chatqna chatqna-ui dataprep retriever nginx" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/intel/cpu/xeon + + source set_env.sh + + # Start Docker Containers + docker compose -f compose.yaml -f compose.telemetry.yaml up -d --quiet-pull > ${LOG_PATH}/start_services_with_compose.log + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1 + if grep -q complete ${LOG_PATH}/vllm_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/cpu/xeon + docker compose -f compose.yaml -f compose.telemetry.yaml down +} + +function main() { + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + echo "::group::build_docker_images" + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + echo "::endgroup::" + + echo "::group::start_services" + start_services + echo "::endgroup::" + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + docker system prune -f + +} + +main diff --git a/ChatQnA/ui/docker/Dockerfile.openEuler b/ChatQnA/ui/docker/Dockerfile.openEuler index cb594a33bb..b61654e440 100644 --- a/ChatQnA/ui/docker/Dockerfile.openEuler +++ b/ChatQnA/ui/docker/Dockerfile.openEuler @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # Use node 20.11.1 as the base image -FROM openeuler/node:20.11.1-oe2403lts +FROM openeuler/node:20.11.1-oe2403lts@sha256:25c790f93c2243b361919620c069812319f614fd697e32e433402ae706a19ffd # Update package manager and install Git RUN yum update -y && \ diff --git a/ChatQnA/ui/docker/Dockerfile.react.openEuler b/ChatQnA/ui/docker/Dockerfile.react.openEuler index edd8a21bca..3a89471fda 100644 --- a/ChatQnA/ui/docker/Dockerfile.react.openEuler +++ b/ChatQnA/ui/docker/Dockerfile.react.openEuler @@ -2,13 +2,13 @@ # SPDX-License-Identifier: Apache-2.0 # Use node 20.11.1 as the base image -FROM openeuler/node:20.11.1-oe2403lts as vite-app +FROM openeuler/node:20.11.1-oe2403lts@sha256:25c790f93c2243b361919620c069812319f614fd697e32e433402ae706a19ffd as vite-app COPY react /usr/app/react WORKDIR /usr/app/react - -RUN ["npm", "install"] +RUN ["npm", "install", "--package-lock-only"] +RUN ["npm", "ci"] RUN ["npm", "run", "build"] diff --git a/ChatQnA/ui/svelte/package.json b/ChatQnA/ui/svelte/package.json index eaa981570a..d62d64712b 100644 --- a/ChatQnA/ui/svelte/package.json +++ b/ChatQnA/ui/svelte/package.json @@ -18,7 +18,7 @@ "@fortawesome/free-solid-svg-icons": "6.2.0", "@playwright/test": "^1.45.2", "@sveltejs/adapter-auto": "^3.0.0", - "@sveltejs/kit": "2.0.0", + "@sveltejs/kit": "2.20.6", "@sveltejs/vite-plugin-svelte": "^3.0.0", "@tailwindcss/typography": "0.5.7", "@types/debug": "4.1.7", diff --git a/ChatQnA/ui/svelte/src/lib/network/chat/Network.ts b/ChatQnA/ui/svelte/src/lib/network/chat/Network.ts index 060c5a5ffb..f7fb57c9b3 100644 --- a/ChatQnA/ui/svelte/src/lib/network/chat/Network.ts +++ b/ChatQnA/ui/svelte/src/lib/network/chat/Network.ts @@ -18,7 +18,7 @@ import { SSE } from "sse.js"; const CHAT_BASE_URL = env.CHAT_BASE_URL; const MODEL_ID = env.MODEL_ID; -export async function fetchTextStream(query: string) { +export async function fetchTextStream(query: object) { let payload = {}; let url = ""; let modelId = "meta-llama/Meta-Llama-3-8B-Instruct"; diff --git a/ChatQnA/ui/svelte/src/routes/+page.svelte b/ChatQnA/ui/svelte/src/routes/+page.svelte index bcd0b8b708..3d7f6afadf 100644 --- a/ChatQnA/ui/svelte/src/routes/+page.svelte +++ b/ChatQnA/ui/svelte/src/routes/+page.svelte @@ -102,7 +102,7 @@ return decoded; } - const callTextStream = async (query: string, startSendTime: number) => { + const callTextStream = async (query: object, startSendTime: number) => { try { const eventSource = await fetchTextStream(query); eventSource.addEventListener("error", (e: any) => { @@ -179,6 +179,22 @@ } }; + function mapRole(r: number): "user" | "assistant" | "system" { + if (r === 1) return "user"; + if (r === 0) return "assistant"; + return "system"; + } + + function multiMessages( + history: any[] + ): { role: "user" | "assistant" | "system"; content: string }[] { + return history.map((m) => ({ + role: mapRole(m.role), + content: + typeof m.content === "string" ? m.content : String(m.content ?? ""), + })); + } + const handleTextSubmit = async () => { loading = true; const newMessage = { @@ -192,7 +208,7 @@ storeMessages(); query = ""; - await callTextStream(newMessage.content, getCurrentTimeStamp()); + await callTextStream(multiMessages(chatMessages), getCurrentTimeStamp()); scrollToBottom(scrollToDiv); storeMessages(); diff --git a/CodeGen/Dockerfile.openEuler b/CodeGen/Dockerfile.openEuler new file mode 100644 index 0000000000..ef31614245 --- /dev/null +++ b/CodeGen/Dockerfile.openEuler @@ -0,0 +1,10 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +ARG IMAGE_REPO=opea +ARG BASE_TAG=latest +FROM $IMAGE_REPO/comps-base:$BASE_TAG-openeuler + +COPY ./codegen.py $HOME/codegen.py + +ENTRYPOINT ["python", "codegen.py"] diff --git a/CodeGen/README.md b/CodeGen/README.md index 479ad11f29..9aebba4472 100644 --- a/CodeGen/README.md +++ b/CodeGen/README.md @@ -106,19 +106,58 @@ flowchart LR This CodeGen example can be deployed manually on various hardware platforms using Docker Compose or Kubernetes. Select the appropriate guide based on your target environment: -| Hardware | Deployment Mode | Guide Link | -| :-------------- | :------------------- | :----------------------------------------------------------------------- | -| Intel Xeon CPU | Single Node (Docker) | [Xeon Docker Compose Guide](./docker_compose/intel/cpu/xeon/README.md) | -| Intel Gaudi HPU | Single Node (Docker) | [Gaudi Docker Compose Guide](./docker_compose/intel/hpu/gaudi/README.md) | -| AMD EPYC CPU | Single Node (Docker) | [EPYC Docker Compose Guide](./docker_compose/amd/cpu/epyc/README.md) | -| AMD ROCm GPU | Single Node (Docker) | [ROCm Docker Compose Guide](./docker_compose/amd/gpu/rocm/README.md) | -| Intel Xeon CPU | Kubernetes (Helm) | [Kubernetes Helm Guide](./kubernetes/helm/README.md) | -| Intel Gaudi HPU | Kubernetes (Helm) | [Kubernetes Helm Guide](./kubernetes/helm/README.md) | -| Intel Xeon CPU | Kubernetes (GMC) | [Kubernetes GMC Guide](./kubernetes/gmc/README.md) | -| Intel Gaudi HPU | Kubernetes (GMC) | [Kubernetes GMC Guide](./kubernetes/gmc/README.md) | +| Hardware | Deployment Mode | Guide Link | +| :-------------- | :----------------------------------- | :--------------------------------------------------------------------------------------- | +| Intel Xeon CPU | Single Node (Docker) | [Xeon Docker Compose Guide](./docker_compose/intel/cpu/xeon/README.md) | +| Intel Xeon CPU | Single Node (Docker) with Monitoring | [Xeon Docker Compose with Monitoring Guide](./docker_compose/intel/cpu/xeon/README.md) | +| Intel Gaudi HPU | Single Node (Docker) | [Gaudi Docker Compose Guide](./docker_compose/intel/hpu/gaudi/README.md) | +| Intel Gaudi HPU | Single Node (Docker) with Monitoring | [Gaudi Docker Compose with Monitoring Guide](./docker_compose/intel/hpu/gaudi/README.md) | +| AMD EPYC CPU | Single Node (Docker) | [EPYC Docker Compose Guide](./docker_compose/amd/cpu/epyc/README.md) | +| AMD ROCm GPU | Single Node (Docker) | [ROCm Docker Compose Guide](./docker_compose/amd/gpu/rocm/README.md) | +| Intel Xeon CPU | Kubernetes (Helm) | [Kubernetes Helm Guide](./kubernetes/helm/README.md) | +| Intel Gaudi HPU | Kubernetes (Helm) | [Kubernetes Helm Guide](./kubernetes/helm/README.md) | +| Intel Xeon CPU | Kubernetes (GMC) | [Kubernetes GMC Guide](./kubernetes/gmc/README.md) | +| Intel Gaudi HPU | Kubernetes (GMC) | [Kubernetes GMC Guide](./kubernetes/gmc/README.md) | _Note: Building custom microservice images can be done using the resources in [GenAIComps](https://github.com/opea-project/GenAIComps)._ +## Monitoring + +The CodeGen example supports monitoring capabilities for Intel Xeon and Intel Gaudi platforms. Monitoring includes: + +- **Prometheus**: For metrics collection and querying +- **Grafana**: For visualization and dashboards +- **Node Exporter**: For system metrics collection + +### Monitoring Features + +- Real-time metrics collection from all CodeGen microservices +- Pre-configured dashboards for: + - vLLM/TGI performance metrics + - CodeGen MegaService metrics + - System resource utilization + - Node-level metrics + +### Enabling Monitoring + +Monitoring can be enabled by using the `compose.monitoring.yaml` file along with the main compose file: + +```bash +# For Intel Xeon +docker compose -f compose.yaml -f compose.monitoring.yaml up -d + +# For Intel Gaudi +docker compose -f compose.yaml -f compose.monitoring.yaml up -d +``` + +### Accessing Monitoring Services + +Once deployed with monitoring, you can access: + +- **Prometheus**: `http://${HOST_IP}:9090` +- **Grafana**: `http://${HOST_IP}:3000` (username: `admin`, password: `admin`) +- **Node Exporter**: `http://${HOST_IP}:9100` + ## Benchmarking Guides for evaluating the performance and accuracy of this CodeGen deployment are available: diff --git a/CodeGen/docker_compose/amd/cpu/epyc/compose.yaml b/CodeGen/docker_compose/amd/cpu/epyc/compose.yaml index 3622248e0b..3c19a7e459 100644 --- a/CodeGen/docker_compose/amd/cpu/epyc/compose.yaml +++ b/CodeGen/docker_compose/amd/cpu/epyc/compose.yaml @@ -4,7 +4,7 @@ services: vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-server ports: - "8028:80" diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md index f8cb1e03ff..ceda275bdc 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/README.md +++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md @@ -49,7 +49,8 @@ This uses the default vLLM-based deployment using `compose.yaml`. # export https_proxy="your_https_proxy" # export no_proxy="localhost,127.0.0.1,${HOST_IP}" # Add other hosts if necessary source intel/set_env.sh - cd /intel/cpu/xeon + cd intel/cpu/xeon + bash grafana/dashboards/download_opea_dashboard.sh ``` _Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._ @@ -83,13 +84,13 @@ Different Docker Compose files are available to select the LLM serving backend. - **Compose File:** `compose.yaml` - **Description:** Uses vLLM optimized for Intel CPUs as the LLM serving engine. This is the default deployment option used in the Quick Start. -- **Services Deployed:** `codegen-vllm-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`. +- **Services Deployed:** `codegen-vllm-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-ui-server`. #### TGI-based Deployment (`compose_tgi.yaml`) - **Compose File:** `compose_tgi.yaml` - **Description:** Uses Hugging Face Text Generation Inference (TGI) optimized for Intel CPUs as the LLM serving engine. -- **Services Deployed:** `codegen-tgi-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`. +- **Services Deployed:** `codegen-tgi-server`, `codegen-llm-server`, `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-ui-server`. - **To Run:** ```bash @@ -101,7 +102,7 @@ Different Docker Compose files are available to select the LLM serving backend. - **Compose File:** `compose_remote.yaml` - **Description:** Uses remote endpoints to access the served LLM's. This is the default configurations except for the LLM serving engine. -- **Services Deployed:** `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-gradio-ui-server`. +- **Services Deployed:** `codegen-tei-embedding-server`, `codegen-retriever-server`, `redis-vector-db`, `codegen-dataprep-server`, `codegen-backend-server`, `codegen-ui-server`. - **To Run:** When models are deployed on a remote server, a base URL and an API key are required to access them. To set up a remote server and acquire the base URL and API key, refer to [Intel® AI for Enterprise Inference](https://www.intel.com/content/www/us/en/developer/topic-technology/artificial-intelligence/enterprise-inference.html) offerings. @@ -146,7 +147,7 @@ Key parameters are configured via environment variables set before running `dock Most of these parameters are in `set_env.sh`, you can either modify this file or overwrite the env variables by setting them. ```shell -source CodeGen/docker_compose/set_env.sh +source CodeGen/docker_compose/intel/set_env.sh ``` #### Compose Files @@ -158,7 +159,7 @@ Different Docker Compose files (`compose.yaml`, `compose_tgi.yaml`) control whic If you need to modify the microservices: 1. Clone the [OPEA GenAIComps](https://github.com/opea-project/GenAIComps) repository. -2. Follow build instructions in the respective component directories (e.g., `comps/llms/text-generation`, `comps/codegen`, `comps/ui/gradio`, etc.). Use the provided Dockerfiles (e.g., `CodeGen/Dockerfile`, `CodeGen/ui/docker/Dockerfile.gradio`). +2. Follow build instructions in the respective component directories (e.g., `comps/llms/text-generation`, `comps/codegen`, etc.). Use the provided Dockerfiles (e.g., `CodeGen/Dockerfile`, `CodeGen/ui/docker/Dockerfile`). 3. Tag your custom images appropriately (e.g., `my-custom-codegen:latest`). 4. Update the `image:` fields in the compose files (`compose.yaml` or `compose_tgi.yaml`) to use your custom image tags. @@ -206,18 +207,17 @@ Use `curl` commands to test the main service endpoints. Ensure `HOST_IP` is corr Multiple UI options can be configured via the compose files. -### Gradio UI (Default) +### Svelte UI (Default) -Access the default Gradio UI by navigating to: +Access the default Svelte UI by navigating to: `http://{HOST_IP}:5173` -_(Port `5173` is the default host mapping for `codegen-gradio-ui-server`)_ +_(Port `5173` is the default host mapping for `codegen-ui-server`)_ -![Gradio UI - Code Generation](../../../../assets/img/codegen_gradio_ui_main.png) -![Gradio UI - Resource Management](../../../../assets/img/codegen_gradio_ui_dataprep.png) +![Svelte UI - Code Generation](../../../../assets/img/codeGen_ui_init.jpg) -### Svelte UI (Optional) +### Gradio UI (Optional) -1. Modify the compose file (either `compose.yaml` or `compose_tgi.yaml`): Comment out the `codegen-gradio-ui-server` service and uncomment/add the `codegen-xeon-ui-server` (Svelte) service definition, ensuring the port mapping is correct (e.g., `"- 5173:5173"`). +1. Modify the compose file (either `compose.yaml` or `compose_tgi.yaml`): Comment out the `codegen-xeon-ui-server` (Svelte) service and uncomment/add the `codegen-gradio-ui-server` service definition, ensuring the port mapping is correct (e.g., `"- 5173:5173"`). 2. Restart Docker Compose: `docker compose up -d` or `docker compose -f compose_tgi.yaml up -d` 3. Access: `http://{HOST_IP}:5173` (or the host port you mapped). @@ -252,7 +252,63 @@ Users can interact with the backend service using the `Neural Copilot` VS Code e - **"Container name is in use"**: Stop existing containers (`docker compose down`) or change `container_name` in the compose file. - **Resource Issues:** CodeGen models can be memory-intensive. Monitor host RAM usage. Increase Docker resources if needed. -## Stopping the Application +## Monitoring Deployment + +To enable monitoring for the CodeGen application, you can use the monitoring Docker Compose file along with the main deployment. + +### Option #1: Default Deployment (without monitoring) + +To deploy the CodeGen services without monitoring, execute: + +```bash +docker compose up -d +``` + +### Option #2: Deployment with Monitoring + +> NOTE: To enable monitoring, `compose.monitoring.yaml` file need to be merged along with default `compose.yaml` file. + +To deploy with monitoring: + +```bash +bash grafana/dashboards/download_opea_dashboard.sh +docker compose -f compose.yaml -f compose.monitoring.yaml up -d +``` + +### Accessing Monitoring Services + +Once deployed with monitoring, you can access: + +- **Prometheus**: `http://${HOST_IP}:9090` +- **Grafana**: `http://${HOST_IP}:3000` (username: `admin`, password: `admin`) +- **Node Exporter**: `http://${HOST_IP}:9100` + +### Monitoring Components + +The monitoring stack includes: + +- **Prometheus**: For metrics collection and querying +- **Grafana**: For visualization and dashboards +- **Node Exporter**: For system metrics collection + +### Monitoring Dashboards + +The following dashboards are automatically downloaded and configured: + +- vLLM Dashboard +- TGI Dashboard +- CodeGen MegaService Dashboard +- Node Exporter Dashboard + +### Stopping the Application + +If monitoring is enabled, execute the following command: + +```bash +docker compose -f compose.yaml -f compose.monitoring.yaml down +``` + +If monitoring is not enabled, execute: ```bash docker compose down # for vLLM (compose.yaml) diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.monitoring.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.monitoring.yaml new file mode 100644 index 0000000000..dea34085b3 --- /dev/null +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.monitoring.yaml @@ -0,0 +1,58 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + prometheus: + image: prom/prometheus:v2.52.0 + container_name: opea_prometheus + user: root + volumes: + - ./prometheus.yaml:/etc/prometheus/prometheus.yaml + - ./prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yaml' + ports: + - '9090:9090' + ipc: host + restart: unless-stopped + + grafana: + image: grafana/grafana:11.0.0 + container_name: grafana + volumes: + - ./grafana_data:/var/lib/grafana + - ./grafana/dashboards:/var/lib/grafana/dashboards + - ./grafana/provisioning:/etc/grafana/provisioning + user: root + environment: + GF_SECURITY_ADMIN_PASSWORD: admin + GF_RENDERING_CALLBACK_URL: http://grafana:3000/ + GF_LOG_FILTERS: rendering:debug + no_proxy: ${no_proxy} + host_ip: ${host_ip} + depends_on: + - prometheus + ports: + - '3000:3000' + ipc: host + restart: unless-stopped + + node-exporter: + image: prom/node-exporter + container_name: node-exporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.sysfs=/host/sys' + - --collector.filesystem.ignored-mount-points + - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" + environment: + no_proxy: ${no_proxy} + ports: + - 9100:9100 + restart: always + deploy: + mode: global diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index abb1933789..99cbe8a373 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -4,7 +4,7 @@ services: vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: vllm-server ports: - "8028:80" @@ -66,7 +66,7 @@ services: ipc: host restart: always codegen-xeon-ui-server: - image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest} + image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest} container_name: codegen-xeon-ui-server depends_on: - codegen-xeon-backend-server diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose_openeuler.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose_openeuler.yaml new file mode 100644 index 0000000000..fd2b9fc9ba --- /dev/null +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose_openeuler.yaml @@ -0,0 +1,174 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +services: + + vllm-service: + image: openeuler/vllm-cpu:0.10.1-oe2403lts + container_name: vllm-server + ports: + - "8028:80" + volumes: + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + host_ip: ${host_ip} + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model ${LLM_MODEL_ID} --host 0.0.0.0 --port 80 + llm-base: + image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}-openeuler + container_name: llm-textgen-server + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + HF_TOKEN: ${HF_TOKEN} + restart: unless-stopped + llm-vllm-service: + extends: llm-base + container_name: llm-codegen-vllm-server + ports: + - "9000:9000" + ipc: host + depends_on: + vllm-service: + condition: service_healthy + codegen-xeon-backend-server: + image: ${REGISTRY:-opea}/codegen:${TAG:-latest}-openeuler + container_name: codegen-xeon-backend-server + depends_on: + llm-base: + condition: service_started + dataprep-redis-server: + condition: service_healthy + ports: + - "7778:7778" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP} + - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT} + - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP} + - EMBEDDER_PORT=${EMBEDDER_PORT} + ipc: host + restart: always + codegen-xeon-ui-server: + image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest}-openeuler + container_name: codegen-xeon-ui-server + depends_on: + - codegen-xeon-backend-server + ports: + - "5173:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BASIC_URL=${BACKEND_SERVICE_ENDPOINT} + - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT} + - host_ip=${host_ip} + - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT} + - DATAPREP_REDIS_PORT=${DATAPREP_REDIS_PORT} + ipc: host + restart: always + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + ports: + - "${REDIS_DB_PORT:-6379}:${REDIS_DB_PORT:-6379}" + - "${REDIS_INSIGHTS_PORT:-8001}:${REDIS_INSIGHTS_PORT:-8001}" + dataprep-redis-server: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}-openeuler + container_name: dataprep-redis-server + depends_on: + - redis-vector-db + ports: + - "${DATAPREP_REDIS_PORT}:5000" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + REDIS_HOST: ${host_ip} + INDEX_NAME: ${INDEX_NAME} + HF_TOKEN: ${HF_TOKEN} + LOGFLAG: true + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] + interval: 10s + timeout: 5s + retries: 10 + restart: unless-stopped + tei-embedding-serving: + image: openeuler/text-embeddings-inference-cpu:1.7.0-oe2403lts + container_name: tei-embedding-serving + entrypoint: /bin/sh -c "yum update -y && yum install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate" + ports: + - "${TEI_EMBEDDER_PORT:-12000}:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + host_ip: ${host_ip} + HF_TOKEN: ${HF_TOKEN} + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:80/health"] + interval: 10s + timeout: 6s + retries: 48 + tei-embedding-server: + image: ${REGISTRY:-opea}/embedding:${TAG:-latest}-openeuler + container_name: tei-embedding-server + ports: + - "${EMBEDDER_PORT:-10201}:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + EMBEDDING_COMPONENT_NAME: "OPEA_TEI_EMBEDDING" + depends_on: + tei-embedding-serving: + condition: service_healthy + restart: unless-stopped + retriever-redis: + image: ${REGISTRY:-opea}/retriever:${TAG:-latest}-openeuler + container_name: retriever-redis + depends_on: + - redis-vector-db + ports: + - "${REDIS_RETRIEVER_PORT:-7000}:${REDIS_RETRIEVER_PORT:-7000}" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + REDIS_DB_PORT: ${REDIS_DB_PORT} + REDIS_INSIGHTS_PORT: ${REDIS_INSIGHTS_PORT} + REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT} + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HF_TOKEN: ${HF_TOKEN} + LOGFLAG: ${LOGFLAG} + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS} + restart: unless-stopped +networks: + default: + driver: bridge diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml index e4ad0fd364..62b79796ee 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose_remote.yaml @@ -26,7 +26,7 @@ services: ipc: host restart: always codegen-xeon-ui-server: - image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest} + image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest} container_name: codegen-xeon-ui-server depends_on: - codegen-xeon-backend-server diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose_tgi.yaml index 7c1c3802e5..0da9cdddd3 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose_tgi.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose_tgi.yaml @@ -66,7 +66,7 @@ services: ipc: host restart: always codegen-xeon-ui-server: - image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest} + image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest} container_name: codegen-xeon-ui-server depends_on: - codegen-xeon-backend-server diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose_tgi_openeuler.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose_tgi_openeuler.yaml new file mode 100644 index 0000000000..93e997371f --- /dev/null +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose_tgi_openeuler.yaml @@ -0,0 +1,174 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +services: + + tgi-service: + image: openeuler/text-generation-inference-cpu:2.4.0-oe2403lts + container_name: tgi-server + ports: + - "8028:80" + volumes: + - "${MODEL_CACHE:-./data}:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + host_ip: ${host_ip} + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 + llm-base: + image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}-openeuler + container_name: llm-textgen-server + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + HF_TOKEN: ${HF_TOKEN} + restart: unless-stopped + llm-tgi-service: + extends: llm-base + container_name: llm-codegen-tgi-server + ports: + - "9000:9000" + ipc: host + depends_on: + tgi-service: + condition: service_healthy + codegen-xeon-backend-server: + image: ${REGISTRY:-opea}/codegen:${TAG:-latest}-openeuler + container_name: codegen-xeon-backend-server + depends_on: + llm-base: + condition: service_started + dataprep-redis-server: + condition: service_healthy + ports: + - "7778:7778" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - RETRIEVAL_SERVICE_HOST_IP=${RETRIEVAL_SERVICE_HOST_IP} + - REDIS_RETRIEVER_PORT=${REDIS_RETRIEVER_PORT} + - TEI_EMBEDDING_HOST_IP=${TEI_EMBEDDING_HOST_IP} + - EMBEDDER_PORT=${EMBEDDER_PORT} + ipc: host + restart: always + codegen-xeon-ui-server: + image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest}-openeuler + container_name: codegen-xeon-ui-server + depends_on: + - codegen-xeon-backend-server + ports: + - "5173:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BASIC_URL=${BACKEND_SERVICE_ENDPOINT} + - MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT} + - host_ip=${host_ip} + - DATAPREP_ENDPOINT=${DATAPREP_ENDPOINT} + - DATAPREP_REDIS_PORT=${DATAPREP_REDIS_PORT} + ipc: host + restart: always + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + ports: + - "${REDIS_DB_PORT}:${REDIS_DB_PORT}" + - "${REDIS_INSIGHTS_PORT}:${REDIS_INSIGHTS_PORT}" + dataprep-redis-server: + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}-openeuler + container_name: dataprep-redis-server + depends_on: + - redis-vector-db + ports: + - "${DATAPREP_REDIS_PORT}:5000" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + REDIS_HOST: ${host_ip} + INDEX_NAME: ${INDEX_NAME} + HF_TOKEN: ${HF_TOKEN} + LOGFLAG: true + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:5000/v1/health_check || exit 1"] + interval: 10s + timeout: 5s + retries: 10 + restart: unless-stopped + tei-embedding-serving: + image: openeuler/text-embeddings-inference-cpu:1.7.0-oe2403lts + container_name: tei-embedding-serving + entrypoint: /bin/sh -c "yum update -y && yum install -y curl && text-embeddings-router --json-output --model-id ${EMBEDDING_MODEL_ID} --auto-truncate" + ports: + - "${TEI_EMBEDDER_PORT:-12000}:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + host_ip: ${host_ip} + HF_TOKEN: ${HF_TOKEN} + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:80/health"] + interval: 10s + timeout: 6s + retries: 48 + tei-embedding-server: + image: ${REGISTRY:-opea}/embedding:${TAG:-latest}-openeuler + container_name: tei-embedding-server + ports: + - "${EMBEDDER_PORT:-10201}:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + EMBEDDING_COMPONENT_NAME: "OPEA_TEI_EMBEDDING" + depends_on: + tei-embedding-serving: + condition: service_healthy + restart: unless-stopped + retriever-redis: + image: ${REGISTRY:-opea}/retriever:${TAG:-latest}-openeuler + container_name: retriever-redis + depends_on: + - redis-vector-db + ports: + - "${REDIS_RETRIEVER_PORT}:${REDIS_RETRIEVER_PORT}" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + REDIS_DB_PORT: ${REDIS_DB_PORT} + REDIS_INSIGHTS_PORT: ${REDIS_INSIGHTS_PORT} + REDIS_RETRIEVER_PORT: ${REDIS_RETRIEVER_PORT} + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HF_TOKEN: ${HF_TOKEN} + LOGFLAG: ${LOGFLAG} + RETRIEVER_COMPONENT_NAME: ${RETRIEVER_COMPONENT_NAME:-OPEA_RETRIEVER_REDIS} + restart: unless-stopped +networks: + default: + driver: bridge diff --git a/CodeGen/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh b/CodeGen/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh new file mode 100644 index 0000000000..48a4d78cf9 --- /dev/null +++ b/CodeGen/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" +if ls *.json 1> /dev/null 2>&1; then + rm *.json +fi + +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/vllm_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/tgi_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/codegen_megaservice_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/node_grafana.json diff --git a/CodeGen/docker_compose/intel/cpu/xeon/grafana/provisioning/dashboards/local.yaml b/CodeGen/docker_compose/intel/cpu/xeon/grafana/provisioning/dashboards/local.yaml new file mode 100644 index 0000000000..13922a769b --- /dev/null +++ b/CodeGen/docker_compose/intel/cpu/xeon/grafana/provisioning/dashboards/local.yaml @@ -0,0 +1,14 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: 1 + +providers: +- name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 #how often Grafana will scan for changed dashboards + options: + path: /var/lib/grafana/dashboards diff --git a/CodeGen/docker_compose/intel/cpu/xeon/grafana/provisioning/datasources/datasource.yml b/CodeGen/docker_compose/intel/cpu/xeon/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 0000000000..a206521d67 --- /dev/null +++ b/CodeGen/docker_compose/intel/cpu/xeon/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,54 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# config file version +apiVersion: 1 + +# list of datasources that should be deleted from the database +deleteDatasources: + - name: Prometheus + orgId: 1 + +# list of datasources to insert/update depending +# what's available in the database +datasources: + # name of the datasource. Required +- name: Prometheus + # datasource type. Required + type: prometheus + # access mode. direct or proxy. Required + access: proxy + # org id. will default to orgId 1 if not specified + orgId: 1 + # url + url: http://$host_ip:9090 + # database password, if used + password: + # database user, if used + user: + # database name, if used + database: + # enable/disable basic auth + basicAuth: false + # basic auth username, if used + basicAuthUser: + # basic auth password, if used + basicAuthPassword: + # enable/disable with credentials headers + withCredentials: + # mark as default datasource. Max one per org + isDefault: true + # fields that will be converted to json and stored in json_data + jsonData: + httpMethod: GET + graphiteVersion: "1.1" + tlsAuth: false + tlsAuthWithCACert: false + # json object of data that will be encrypted. + secureJsonData: + tlsCACert: "..." + tlsClientCert: "..." + tlsClientKey: "..." + version: 1 + # allow users to edit datasources from the UI. + editable: true diff --git a/CodeGen/docker_compose/intel/cpu/xeon/prometheus.yaml b/CodeGen/docker_compose/intel/cpu/xeon/prometheus.yaml new file mode 100644 index 0000000000..27d0940e09 --- /dev/null +++ b/CodeGen/docker_compose/intel/cpu/xeon/prometheus.yaml @@ -0,0 +1,27 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# [IP_ADDR]:{PORT_OUTSIDE_CONTAINER} -> {PORT_INSIDE_CONTAINER} / {PROTOCOL} +global: + scrape_interval: 5s + external_labels: + monitor: "my-monitor" +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["opea_prometheus:9090"] + - job_name: "vllm" + metrics_path: /metrics + static_configs: + - targets: ["vllm-server:80"] + - job_name: "tgi" + metrics_path: /metrics + static_configs: + - targets: [ "tgi-service:80" ] + - job_name: "codegen-backend-server" + metrics_path: /metrics + static_configs: + - targets: ["codegen-xeon-backend-server:7778"] + - job_name: "prometheus-node-exporter" + metrics_path: /metrics + static_configs: + - targets: ["node-exporter:9100"] diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/README.md b/CodeGen/docker_compose/intel/hpu/gaudi/README.md index 0f1e438cf8..2d72155ff3 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md +++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md @@ -49,7 +49,10 @@ This uses the default vLLM-based deployment using `compose.yaml`. # export https_proxy="your_https_proxy" # export no_proxy="localhost,127.0.0.1,${HOST_IP}" # Add other hosts if necessary source intel/set_env.sh - cd /intel/hpu/gaudi + cd intel/hpu/gaudi + cd grafana/dashboards + bash download_opea_dashboard.sh + cd ../.. ``` _Note: The compose file might read additional variables from set_env.sh. Ensure all required variables like ports (`LLM_SERVICE_PORT`, `MEGA_SERVICE_PORT`, etc.) are set if not using defaults from the compose file._ @@ -83,7 +86,7 @@ There are two separate Docker Compose files to select the LLM serving backend ac - **Compose File:** `compose.yaml` - **Description:** Uses vLLM optimized for Intel Gaudi HPUs as the LLM serving engine. This is the default deployment used in the Quick Start. - **Gaudi Service:** `codegen-vllm-gaudi-server` -- **Other Services:** `codegen-llm-server`, `codegen-tei-embedding-server` (CPU), `codegen-retriever-server` (CPU), `redis-vector-db` (CPU), `codegen-dataprep-server` (CPU), `codegen-backend-server` (CPU), `codegen-gradio-ui-server` (CPU). +- **Other Services:** `codegen-llm-server`, `codegen-tei-embedding-server` (CPU), `codegen-retriever-server` (CPU), `redis-vector-db` (CPU), `codegen-dataprep-server` (CPU), `codegen-backend-server` (CPU), `codegen-ui-server` (CPU). ### TGI-based Deployment (`compose_tgi.yaml`) @@ -194,23 +197,23 @@ Use `curl` commands targeting the main service endpoints. Ensure `HOST_IP` is co UI options are similar to the Xeon deployment. -### Gradio UI (Default) +### Svelte UI (Default) -Access the default Gradio UI: +Access the default Svelte UI: `http://{HOST_IP}:5173` _(Port `5173` is the default host mapping)_ -![Gradio UI](../../../../assets/img/codegen_gradio_ui_main.png) +![Svelte UI](../../../../assets/img/codeGen_ui_init.jpg) -### Svelte UI (Optional) +### Gradio UI (Optional) -1. Modify the compose file (either `compose.yaml` or `compose_tgi.yaml`): Swap Gradio service for Svelte (`codegen-gaudi-ui-server`), check port map (e.g., `5173:5173`). +1. Modify the compose file (either `compose.yaml` or `compose_tgi.yaml`): Swap Svelte service for Gradio (`codegen-gradio-ui-server`), check port map (e.g., `5173:5173`). 2. Restart: `docker compose up -d` or `docker compose -f compose_tgi.yaml up -d` 3. Access: `http://{HOST_IP}:5173` ### React UI (Optional) -1. Modify the compose file (either `compose.yaml` or `compose_tgi.yaml`): Swap Gradio service for React (`codegen-gaudi-react-ui-server`), check port map (e.g., `5174:80`). +1. Modify the compose file (either `compose.yaml` or `compose_tgi.yaml`): Swap Svelte service for React (`codegen-gaudi-react-ui-server`), check port map (e.g., `5174:80`). 2. Restart: `docker compose up -d` or `docker compose -f compose_tgi.yaml up -d` 3. Access: `http://{HOST_IP}:5174` @@ -228,7 +231,62 @@ Use the `Neural Copilot` extension configured with the CodeGen backend URL: `htt - **Model Download Issues:** Check `HF_TOKEN`, internet access, proxy settings. Check LLM service logs. - **Connection Errors:** Verify `HOST_IP`, ports, and proxy settings. Use `docker ps` and check service logs. -## Stopping the Application +## Monitoring Deployment + +To enable monitoring for the CodeGen application on Gaudi, you can use the monitoring Docker Compose file along with the main deployment. + +### Option #1: Default Deployment (without monitoring) + +To deploy the CodeGen services without monitoring, execute: + +```bash +docker compose up -d +``` + +### Option #2: Deployment with Monitoring + +> NOTE: To enable monitoring, `compose.monitoring.yaml` file need to be merged along with default `compose.yaml` file. + +To deploy with monitoring: + +```bash +docker compose -f compose.yaml -f compose.monitoring.yaml up -d +``` + +### Accessing Monitoring Services + +Once deployed with monitoring, you can access: + +- **Prometheus**: `http://${HOST_IP}:9090` +- **Grafana**: `http://${HOST_IP}:3000` (username: `admin`, password: `admin`) +- **Node Exporter**: `http://${HOST_IP}:9100` + +### Monitoring Components + +The monitoring stack includes: + +- **Prometheus**: For metrics collection and querying +- **Grafana**: For visualization and dashboards +- **Node Exporter**: For system metrics collection + +### Monitoring Dashboards + +The following dashboards are automatically downloaded and configured: + +- vLLM Dashboard +- TGI Dashboard +- CodeGen MegaService Dashboard +- Node Exporter Dashboard + +### Stopping the Application + +If monitoring is enabled, execute the following command: + +```bash +docker compose -f compose.yaml -f compose.monitoring.yaml down +``` + +If monitoring is not enabled, execute: ```bash docker compose down # for vLLM (compose.yaml) diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.monitoring.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.monitoring.yaml new file mode 100644 index 0000000000..cd891a445d --- /dev/null +++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.monitoring.yaml @@ -0,0 +1,73 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + prometheus: + image: prom/prometheus:v2.52.0 + container_name: opea_prometheus + user: root + volumes: + - ./prometheus.yaml:/etc/prometheus/prometheus.yaml + - ./prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yaml' + ports: + - '9090:9090' + ipc: host + restart: unless-stopped + + grafana: + image: grafana/grafana:11.0.0 + container_name: grafana + volumes: + - ./grafana_data:/var/lib/grafana + - ./grafana/dashboards:/var/lib/grafana/dashboards + - ./grafana/provisioning:/etc/grafana/provisioning + user: root + environment: + GF_SECURITY_ADMIN_PASSWORD: admin + GF_RENDERING_CALLBACK_URL: http://grafana:3000/ + GF_LOG_FILTERS: rendering:debug + no_proxy: ${no_proxy} + host_ip: ${host_ip} + depends_on: + - prometheus + ports: + - '3000:3000' + ipc: host + restart: unless-stopped + + node-exporter: + image: prom/node-exporter + container_name: node-exporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.sysfs=/host/sys' + - --collector.filesystem.ignored-mount-points + - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" + environment: + no_proxy: ${no_proxy} + ports: + - 9100:9100 + restart: always + deploy: + mode: global + + gaudi-metrics-exporter: + image: vault.habana.ai/gaudi-metric-exporter/metric-exporter:latest + privileged: true + container_name: gaudi-metrics-exporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + - /dev:/dev + deploy: + mode: global + ports: + - 41611:41611 + restart: unless-stopped diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml index 2e56d1b913..6b33db7e82 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml @@ -3,7 +3,7 @@ services: vllm-service: - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + image: opea/vllm-gaudi:1.4 container_name: vllm-gaudi-server ports: - "8028:80" @@ -74,7 +74,7 @@ services: ipc: host restart: always codegen-gaudi-ui-server: - image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest} + image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest} container_name: codegen-gaudi-ui-server depends_on: - codegen-gaudi-backend-server diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose_tgi.yaml index 8339451000..aa8f4a51ea 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/compose_tgi.yaml +++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose_tgi.yaml @@ -74,7 +74,7 @@ services: ipc: host restart: always codegen-gaudi-ui-server: - image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest} + image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest} container_name: codegen-gaudi-ui-server depends_on: - codegen-gaudi-backend-server diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh b/CodeGen/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh new file mode 100644 index 0000000000..40f2cb12a6 --- /dev/null +++ b/CodeGen/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +if ls *.json 1> /dev/null 2>&1; then + rm *.json +fi + +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/vllm_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/tgi_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/codegen_megaservice_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/node_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/gaudi_grafana_v2.json diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/grafana/provisioning/dashboards/local.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/grafana/provisioning/dashboards/local.yaml new file mode 100644 index 0000000000..13922a769b --- /dev/null +++ b/CodeGen/docker_compose/intel/hpu/gaudi/grafana/provisioning/dashboards/local.yaml @@ -0,0 +1,14 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: 1 + +providers: +- name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 #how often Grafana will scan for changed dashboards + options: + path: /var/lib/grafana/dashboards diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/grafana/provisioning/datasources/datasource.yml b/CodeGen/docker_compose/intel/hpu/gaudi/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 0000000000..a206521d67 --- /dev/null +++ b/CodeGen/docker_compose/intel/hpu/gaudi/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,54 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# config file version +apiVersion: 1 + +# list of datasources that should be deleted from the database +deleteDatasources: + - name: Prometheus + orgId: 1 + +# list of datasources to insert/update depending +# what's available in the database +datasources: + # name of the datasource. Required +- name: Prometheus + # datasource type. Required + type: prometheus + # access mode. direct or proxy. Required + access: proxy + # org id. will default to orgId 1 if not specified + orgId: 1 + # url + url: http://$host_ip:9090 + # database password, if used + password: + # database user, if used + user: + # database name, if used + database: + # enable/disable basic auth + basicAuth: false + # basic auth username, if used + basicAuthUser: + # basic auth password, if used + basicAuthPassword: + # enable/disable with credentials headers + withCredentials: + # mark as default datasource. Max one per org + isDefault: true + # fields that will be converted to json and stored in json_data + jsonData: + httpMethod: GET + graphiteVersion: "1.1" + tlsAuth: false + tlsAuthWithCACert: false + # json object of data that will be encrypted. + secureJsonData: + tlsCACert: "..." + tlsClientCert: "..." + tlsClientKey: "..." + version: 1 + # allow users to edit datasources from the UI. + editable: true diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/prometheus.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/prometheus.yaml new file mode 100644 index 0000000000..f259e2f7f9 --- /dev/null +++ b/CodeGen/docker_compose/intel/hpu/gaudi/prometheus.yaml @@ -0,0 +1,32 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# [IP_ADDR]:{PORT_OUTSIDE_CONTAINER} -> {PORT_INSIDE_CONTAINER} / {PROTOCOL} +global: + scrape_interval: 5s + external_labels: + monitor: "my-monitor" +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["opea_prometheus:9090"] + - job_name: "vllm" + metrics_path: /metrics + static_configs: + - targets: ["codegen-vllm-gaudi-server:80"] + - job_name: "tgi" + metrics_path: /metrics + static_configs: + - targets: ["codegen-tgi-gaudi-server:80"] + - job_name: "codegen-backend-server" + metrics_path: /metrics + static_configs: + - targets: ["codegen-gaudi-backend-server:7778"] + - job_name: "prometheus-node-exporter" + metrics_path: /metrics + static_configs: + - targets: ["node-exporter:9100"] + - job_name: "gaudi-metrics-exporter" + scrape_interval: 30s + metrics_path: /metrics + static_configs: + - targets: [ "gaudi-metrics-exporter:41611" ] diff --git a/CodeGen/docker_compose/intel/set_env.sh b/CodeGen/docker_compose/intel/set_env.sh index 543e9fee88..5e08df213e 100644 --- a/CodeGen/docker_compose/intel/set_env.sh +++ b/CodeGen/docker_compose/intel/set_env.sh @@ -51,3 +51,9 @@ export DATAPREP_ENDPOINT="http://${HOST_IP}:${DATAPREP_REDIS_PORT}/v1/dataprep" export LOGFLAG=false export MODEL_CACHE=${model_cache:-"./data"} export NUM_CARDS=1 + + +# Set network proxy settings +export no_proxy="${no_proxy},${HOST_IP},vllm-server,codegen-xeon-backend-server,codegen-xeon-ui-server,redis-vector-db,dataprep-redis-server,tei-embedding-serving,tei-embedding-server,retriever-redis,opea_prometheus,grafana,node-exporter,$JAEGER_IP" # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" +export http_proxy=$http_proxy +export https_proxy=$https_proxy diff --git a/CodeGen/docker_image_build/build.yaml b/CodeGen/docker_image_build/build.yaml index 282c29766c..f0bfa23580 100644 --- a/CodeGen/docker_image_build/build.yaml +++ b/CodeGen/docker_image_build/build.yaml @@ -31,45 +31,86 @@ services: dockerfile: ./docker/Dockerfile.gradio extends: codegen image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest} + codegen-openeuler: + build: + args: + IMAGE_REPO: ${REGISTRY} + BASE_TAG: ${TAG} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + context: ../ + dockerfile: ./Dockerfile.openEuler + image: ${REGISTRY:-opea}/codegen:${TAG:-latest}-openeuler + codegen-ui-openeuler: + build: + context: ../ui + dockerfile: ./docker/Dockerfile.openEuler + extends: codegen + image: ${REGISTRY:-opea}/codegen-ui:${TAG:-latest}-openeuler + codegen-react-ui-openeuler: + build: + context: ../ui + dockerfile: ./docker/Dockerfile.react.openEuler + extends: codegen + image: ${REGISTRY:-opea}/codegen-react-ui:${TAG:-latest}-openeuler + codegen-gradio-ui-openeuler: + build: + context: ../ui + dockerfile: ./docker/Dockerfile.gradio.openEuler + extends: codegen + image: ${REGISTRY:-opea}/codegen-gradio-ui:${TAG:-latest}-openeuler llm-textgen: build: context: GenAIComps dockerfile: comps/llms/src/text-generation/Dockerfile extends: codegen image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} + llm-textgen-openeuler: + build: + context: GenAIComps + dockerfile: comps/llms/src/text-generation/Dockerfile.openEuler + extends: codegen + image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}-openeuler vllm-rocm: build: context: GenAIComps dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu extends: codegen image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest} - vllm: - build: - context: vllm - dockerfile: docker/Dockerfile.cpu - extends: codegen - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} - vllm-gaudi: - build: - context: vllm-fork - dockerfile: Dockerfile.hpu - extends: codegen - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} dataprep: build: context: GenAIComps dockerfile: comps/dataprep/src/Dockerfile extends: codegen image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} + dataprep-openeuler: + build: + context: GenAIComps + dockerfile: comps/dataprep/src/Dockerfile.openEuler + extends: codegen + image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}-openeuler retriever: build: context: GenAIComps dockerfile: comps/retrievers/src/Dockerfile extends: codegen image: ${REGISTRY:-opea}/retriever:${TAG:-latest} + retriever-openeuler: + build: + context: GenAIComps + dockerfile: comps/retrievers/src/Dockerfile.openEuler + extends: codegen + image: ${REGISTRY:-opea}/retriever:${TAG:-latest}-openeuler embedding: build: context: GenAIComps dockerfile: comps/embeddings/src/Dockerfile extends: codegen image: ${REGISTRY:-opea}/embedding:${TAG:-latest} + embedding-openeuler: + build: + context: GenAIComps + dockerfile: comps/embeddings/src/Dockerfile.openEuler + extends: codegen + image: ${REGISTRY:-opea}/embedding:${TAG:-latest}-openeuler diff --git a/CodeGen/kubernetes/helm/cpu-openeuler-values.yaml b/CodeGen/kubernetes/helm/cpu-openeuler-values.yaml new file mode 100644 index 0000000000..884d7177c7 --- /dev/null +++ b/CodeGen/kubernetes/helm/cpu-openeuler-values.yaml @@ -0,0 +1,40 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +# This file is based on cpu-values.yaml and overrides image tags to 'latest-openeuler' +# for all enabled services to run on openEuler. +image: + tag: latest-openeuler + +tgi: + enabled: false +vllm: + image: + repository: openeuler/vllm-cpu + tag: 0.10.1-oe2403lts + enabled: true +llm-uservice: + image: + tag: latest-openeuler + TEXTGEN_BACKEND: vLLM + +tei: + image: + repository: openeuler/text-embeddings-inference-cpu + tag: 1.7.0-oe2403lts + +data-prep: + image: + tag: latest-openeuler + +retriever-usvc: + image: + tag: latest-openeuler + +embedding-usvc: + image: + tag: latest-openeuler + +codegen-ui: + image: + tag: latest-openeuler diff --git a/CodeGen/kubernetes/helm/cpu-tgi-openeuler-values.yaml b/CodeGen/kubernetes/helm/cpu-tgi-openeuler-values.yaml new file mode 100644 index 0000000000..42839003e0 --- /dev/null +++ b/CodeGen/kubernetes/helm/cpu-tgi-openeuler-values.yaml @@ -0,0 +1,42 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +# This file is based on cpu-values.yaml and overrides image tags to 'latest-openeuler' +# for all enabled services to run on openEuler. + +image: + tag: latest-openeuler + +tgi: + enabled: true + image: + repository: openeuler/text-generation-inference-cpu + tag: 2.4.0-oe2403lts + +vllm: + enabled: false +llm-uservice: + image: + tag: latest-openeuler + TEXTGEN_BACKEND: TGI + +tei: + image: + repository: openeuler/text-embeddings-inference-cpu + tag: 1.7.0-oe2403lts + +data-prep: + image: + tag: latest-openeuler + +retriever-usvc: + image: + tag: latest-openeuler + +embedding-usvc: + image: + tag: latest-openeuler + +codegen-ui: + image: + tag: latest-openeuler diff --git a/CodeGen/tests/test_compose_on_epyc.sh b/CodeGen/tests/test_compose_on_epyc.sh index efe3c25598..56dfb45b37 100644 --- a/CodeGen/tests/test_compose_on_epyc.sh +++ b/CodeGen/tests/test_compose_on_epyc.sh @@ -28,19 +28,8 @@ function build_docker_images() { docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &>/dev/null - VLLM_REQ_FILE="requirements/cpu.txt" - if ! grep -q "^transformers" "$VLLM_REQ_FILE"; then - echo "Adding transformers<4.54.0 to $VLLM_REQ_FILE" - echo "transformers<4.54.0" >>"$VLLM_REQ_FILE" - fi - cd ../ - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="codegen codegen-gradio-ui llm-textgen vllm dataprep retriever embedding" + service_list="codegen codegen-ui llm-textgen dataprep retriever embedding" docker compose -f build.yaml build ${service_list} --no-cache >${LOG_PATH}/docker_image_build.log @@ -51,7 +40,7 @@ function build_docker_images() { function start_services() { local compose_file="$1" local llm_container_name="$2" - + export no_proxy="localhost,127.0.0.1,$ip_address" cd $WORKPATH/docker_compose/amd/cpu/epyc/ # Start Docker Containers @@ -170,47 +159,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - # npm install && npm ci && npx playwright install --with-deps - npm install && npm ci && npx playwright install - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - -function validate_gradio() { - local URL="http://${ip_address}:5173/health" - local HTTP_STATUS=$(curl "$URL") - local SERVICE_NAME="Gradio" - - if [ "$HTTP_STATUS" = '{"status":"ok"}' ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. UI server is running successfully..." - else - echo "[ $SERVICE_NAME ] UI server has failed..." - fi -} - function stop_docker() { local compose_file="$1" @@ -258,10 +206,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_gradio" - validate_gradio - echo "::endgroup::" - stop_docker "${docker_compose_files[${i}]}" sleep 5s done diff --git a/CodeGen/tests/test_compose_on_gaudi.sh b/CodeGen/tests/test_compose_on_gaudi.sh index 516624827f..88c2c9aeee 100644 --- a/CodeGen/tests/test_compose_on_gaudi.sh +++ b/CodeGen/tests/test_compose_on_gaudi.sh @@ -25,14 +25,8 @@ function build_docker_images() { docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - # Download Gaudi vllm of latest tag - git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork - VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0 - echo "Check out vLLM tag ${VLLM_FORK_VER}" - git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../ - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="codegen codegen-gradio-ui llm-textgen vllm-gaudi dataprep retriever embedding" + service_list="codegen codegen-ui llm-textgen dataprep retriever embedding" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -41,11 +35,13 @@ function build_docker_images() { function start_services() { local compose_file="$1" local llm_container_name="$2" - + export no_proxy="localhost,127.0.0.1,$ip_address" cd $WORKPATH/docker_compose/intel/hpu/gaudi + # download grafana dashboard + bash grafana/dashboards/download_opea_dashboard.sh # Start Docker Containers - docker compose -f ${compose_file} up -d | tee ${LOG_PATH}/start_services_with_compose.log + docker compose -f ${compose_file} -f compose.monitoring.yaml up -d | tee ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 100 ]]; do @@ -152,51 +148,11 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - -function validate_gradio() { - local URL="http://${ip_address}:5173/health" - local HTTP_STATUS=$(curl "$URL") - local SERVICE_NAME="Gradio" - - if [ "$HTTP_STATUS" = '{"status":"ok"}' ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. UI server is running successfully..." - else - echo "[ $SERVICE_NAME ] UI server has failed..." - fi -} - function stop_docker() { local compose_file="$1" cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose -f ${compose_file} down + docker compose -f ${compose_file} -f compose.monitoring.yaml down } function main() { @@ -240,10 +196,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_gradio" - validate_gradio - echo "::endgroup::" - stop_docker "${docker_compose_files[${i}]}" sleep 5s done diff --git a/CodeGen/tests/test_compose_on_rocm.sh b/CodeGen/tests/test_compose_on_rocm.sh index 173a0538fa..a0b9f5e024 100644 --- a/CodeGen/tests/test_compose_on_rocm.sh +++ b/CodeGen/tests/test_compose_on_rocm.sh @@ -111,35 +111,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniconda3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - - function stop_docker() { cd $WORKPATH/docker_compose/amd/gpu/rocm/ docker compose stop && docker compose rm -f @@ -167,10 +138,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/CodeGen/tests/test_compose_on_xeon.sh b/CodeGen/tests/test_compose_on_xeon.sh index 0b138b2235..678108c494 100644 --- a/CodeGen/tests/test_compose_on_xeon.sh +++ b/CodeGen/tests/test_compose_on_xeon.sh @@ -26,14 +26,8 @@ function build_docker_images() { docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &> /dev/null - cd ../ - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="codegen codegen-gradio-ui llm-textgen vllm dataprep retriever embedding" + service_list="codegen codegen-ui llm-textgen dataprep retriever embedding" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log @@ -44,11 +38,14 @@ function build_docker_images() { function start_services() { local compose_file="$1" local llm_container_name="$2" - + export no_proxy="localhost,127.0.0.1,$ip_address" cd $WORKPATH/docker_compose/intel/cpu/xeon/ + # download grafana dashboard + bash grafana/dashboards/download_opea_dashboard.sh + # Start Docker Containers - docker compose -f ${compose_file} up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f ${compose_file} -f compose.monitoring.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 100 ]]; do @@ -163,51 +160,11 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - -function validate_gradio() { - local URL="http://${ip_address}:5173/health" - local HTTP_STATUS=$(curl "$URL") - local SERVICE_NAME="Gradio" - - if [ "$HTTP_STATUS" = '{"status":"ok"}' ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. UI server is running successfully..." - else - echo "[ $SERVICE_NAME ] UI server has failed..." - fi -} - function stop_docker() { local compose_file="$1" cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose -f ${compose_file} down + docker compose -f ${compose_file} -f compose.monitoring.yaml down } function main() { @@ -250,10 +207,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_gradio" - validate_gradio - echo "::endgroup::" - stop_docker "${docker_compose_files[${i}]}" sleep 5s done diff --git a/CodeGen/tests/test_compose_openeuler_on_xeon.sh b/CodeGen/tests/test_compose_openeuler_on_xeon.sh new file mode 100644 index 0000000000..b07dc99dcd --- /dev/null +++ b/CodeGen/tests/test_compose_openeuler_on_xeon.sh @@ -0,0 +1,214 @@ +#!/bin/bash +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${model_cache:-"./data"} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +source $WORKPATH/docker_compose/intel/set_env.sh + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + pushd GenAIComps + echo "GenAIComps test commit is $(git rev-parse HEAD)" + docker build --no-cache -t ${REGISTRY}/comps-base:${TAG}-openeuler --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.openEuler . + popd && sleep 1s + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="codegen-openeuler codegen-gradio-ui-openeuler llm-textgen-openeuler dataprep-openeuler retriever-openeuler embedding-openeuler" + + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker pull openeuler/text-generation-inference-cpu:2.4.0-oe2403lts + docker images && sleep 1s +} + +function start_services() { + local compose_file="$1" + local llm_container_name="$2" + export no_proxy="localhost,127.0.0.1,$ip_address" + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + + # Start Docker Containers + docker compose -f ${compose_file} up -d > ${LOG_PATH}/start_services_with_compose.log + + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs ${llm_container_name} > ${LOG_PATH}/llm_service_start.log 2>&1 + if grep -E "Connected|complete" ${LOG_PATH}/llm_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + if [[ "$SERVICE_NAME" == "ingest" ]]; then + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -F index_name=test_redis -H 'Content-Type: multipart/form-data' "$URL") + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Data preparation succeeded..." + else + echo "[ $SERVICE_NAME ] Data preparation failed..." + fi + + else + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + fi + sleep 5s +} + +function validate_microservices() { + local llm_container_name="$1" + + # tgi for llm service + validate_services \ + "${ip_address}:8028/v1/chat/completions" \ + "completion_tokens" \ + "llm-service" \ + "${llm_container_name}" \ + '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens": 256}' + + # llm microservice + validate_services \ + "${ip_address}:9000/v1/chat/completions" \ + "data: " \ + "llm" \ + "llm-textgen-server" \ + '{"query":"def print_hello_world():", "max_tokens": 256}' + + # Data ingest microservice + validate_services \ + "${ip_address}:6007/v1/dataprep/ingest" \ + "Data preparation succeeded" \ + "ingest" \ + "dataprep-redis-server" \ + 'link_list=["https://modin.readthedocs.io/en/latest/index.html"]' + +} + +function validate_megaservice() { + # Curl the Mega Service + validate_services \ + "${ip_address}:7778/v1/codegen" \ + "print" \ + "mega-codegen" \ + "codegen-xeon-backend-server" \ + '{"messages": "def print_hello_world():", "max_tokens": 256}' + + # Curl the Mega Service with stream as false + validate_services \ + "${ip_address}:7778/v1/codegen" \ + "" \ + "mega-codegen" \ + "codegen-xeon-backend-server" \ + '{ "messages": "def print_hello_world():", "max_tokens": 256, "stream": false}' + + # Curl the Mega Service with index_name and agents_flag + validate_services \ + "${ip_address}:7778/v1/codegen" \ + "" \ + "mega-codegen" \ + "codegen-xeon-backend-server" \ + '{ "index_name": "test_redis", "agents_flag": "True", "messages": "def print_hello_world():", "max_tokens": 256}' + + validate_services \ + "${ip_address}:7778/v1/codegen" \ + "class" \ + "mega-codegen" \ + "codegen-xeon-backend-server" \ + '{"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "messages": [{"role": "user", "content": "Implement a basic Python class"}], "max_tokens":32}' + +} + +function stop_service() { + local compose_file="$1" + + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + docker compose -f ${compose_file} down +} + +function main() { + # all docker docker compose files for Xeon Platform + docker_compose_files=("compose_openeuler.yaml") + docker_llm_container_names=( "vllm-server") + + # get number of compose files and LLM docker container names + len_compose_files=${#docker_compose_files[@]} + len_containers=${#docker_llm_container_names[@]} + + # number of compose files and docker container names must be matched + if [ ${len_compose_files} -ne ${len_containers} ]; then + echo "Error: number of docker compose files ${len_compose_files} and container names ${len_containers} mismatched" + exit 1 + fi + + # stop_service, stop all compose files + for ((i = 0; i < len_compose_files; i++)); do + stop_service "${docker_compose_files[${i}]}" + done + + # build docker images + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + + # loop all compose files + for ((i = 0; i < len_compose_files; i++)); do + echo "Process [${i}]: ${docker_compose_files[$i]}, ${docker_llm_container_names[${i}]}" + docker ps -a + + echo "::group::start_services" + start_services "${docker_compose_files[${i}]}" "${docker_llm_container_names[${i}]}" + echo "::endgroup::" + + echo "::group::validate_microservices" + validate_microservices "${docker_llm_container_names[${i}]}" + echo "::endgroup::" + + echo "::group::validate_megaservice" + validate_megaservice + echo "::endgroup::" + + stop_service "${docker_compose_files[${i}]}" + sleep 5s + done + + docker system prune -f +} + +main diff --git a/CodeGen/tests/test_compose_tgi_on_epyc.sh b/CodeGen/tests/test_compose_tgi_on_epyc.sh index 9476925d3f..347b322ccc 100644 --- a/CodeGen/tests/test_compose_tgi_on_epyc.sh +++ b/CodeGen/tests/test_compose_tgi_on_epyc.sh @@ -28,19 +28,8 @@ function build_docker_images() { docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &>/dev/null - VLLM_REQ_FILE="requirements/cpu.txt" - if ! grep -q "^transformers" "$VLLM_REQ_FILE"; then - echo "Adding transformers<4.54.0 to $VLLM_REQ_FILE" - echo "transformers<4.54.0" >>"$VLLM_REQ_FILE" - fi - cd ../ - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="codegen codegen-gradio-ui llm-textgen vllm dataprep retriever embedding" + service_list="codegen codegen-ui llm-textgen dataprep retriever embedding" docker compose -f build.yaml build ${service_list} --no-cache >${LOG_PATH}/docker_image_build.log @@ -51,7 +40,7 @@ function build_docker_images() { function start_services() { local compose_file="$1" local llm_container_name="$2" - + export no_proxy="localhost,127.0.0.1,$ip_address" cd $WORKPATH/docker_compose/amd/cpu/epyc/ # Start Docker Containers @@ -170,47 +159,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - # npm install && npm ci && npx playwright install --with-deps - npm install && npm ci && npx playwright install - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - -function validate_gradio() { - local URL="http://${ip_address}:5173/health" - local HTTP_STATUS=$(curl "$URL") - local SERVICE_NAME="Gradio" - - if [ "$HTTP_STATUS" = '{"status":"ok"}' ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. UI server is running successfully..." - else - echo "[ $SERVICE_NAME ] UI server has failed..." - fi -} - function stop_docker() { local compose_file="$1" @@ -258,10 +206,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_gradio" - validate_gradio - echo "::endgroup::" - stop_docker "${docker_compose_files[${i}]}" sleep 5s done diff --git a/CodeGen/tests/test_compose_vllm_on_rocm.sh b/CodeGen/tests/test_compose_vllm_on_rocm.sh index 33fef0b279..053d8aeffb 100644 --- a/CodeGen/tests/test_compose_vllm_on_rocm.sh +++ b/CodeGen/tests/test_compose_vllm_on_rocm.sh @@ -110,35 +110,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniconda3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - - function stop_docker() { echo "OPENAI_API_KEY - ${OPENAI_API_KEY}" cd $WORKPATH/docker_compose/amd/gpu/rocm/ @@ -167,10 +138,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/CodeGen/tests/test_ui_on_xeon.sh b/CodeGen/tests/test_ui_on_xeon.sh new file mode 100644 index 0000000000..e83b6c4079 --- /dev/null +++ b/CodeGen/tests/test_ui_on_xeon.sh @@ -0,0 +1,193 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${model_cache:-"./data"} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') +source $WORKPATH/docker_compose/intel/set_env.sh + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + pushd GenAIComps + echo "GenAIComps test commit is $(git rev-parse HEAD)" + docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . + popd && sleep 1s + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="codegen codegen-ui llm-textgen dataprep retriever embedding" + + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + docker images && sleep 1s +} + +function start_services() { + local compose_file="$1" + local llm_container_name="$2" + export no_proxy="localhost,127.0.0.1,$ip_address" + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + + # Start Docker Containers + docker compose -f ${compose_file} up -d > ${LOG_PATH}/start_services_with_compose.log + + n=0 + until [[ "$n" -ge 200 ]]; do + docker logs ${llm_container_name} > ${LOG_PATH}/llm_service_start.log 2>&1 + if grep -E "Connected|complete|healthy" ${LOG_PATH}/llm_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done +} + +function validate_frontend() { + cd $WORKPATH/ui/svelte + local conda_env_name="OPEA_e2e" + export PATH=${HOME}/miniforge3/bin/:$PATH + if conda info --envs | grep -q "$conda_env_name"; then + echo "$conda_env_name exist!" + else + conda create -n ${conda_env_name} python=3.12 -y + fi + source activate ${conda_env_name} + + sed -i "s/localhost/$ip_address/g" playwright.config.ts + + conda install -c conda-forge nodejs=22.6.0 -y + npm install && npm ci && npx playwright install --with-deps + node -v && npm -v && pip list + + export no_proxy="localhost,127.0.0.1,$ip_address" + + # Wait for backend service to be ready + echo "Waiting for backend service to be ready..." + n=0 + until [[ "$n" -ge 60 ]]; do + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "http://${ip_address}:7778/v1/chatcompletions") + if [ "$HTTP_STATUS" = "200" ] || [ "$HTTP_STATUS" = "500" ]; then # 500 might be expected if model is loading + echo "Backend service is ready" + break + fi + echo "Waiting for backend service... (attempt $n/60), HTTP status: $HTTP_STATUS" + sleep 5s + n=$((n+1)) + done + + # Wait for UI service to be ready + echo "Waiting for UI service to be ready..." + n=0 + until [[ "$n" -ge 60 ]]; do + HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "http://${ip_address}:5173/health") + if [ "$HTTP_STATUS" = "200" ]; then + echo "UI service is ready" + break + fi + echo "Waiting for UI service... (attempt $n/60), HTTP status: $HTTP_STATUS" + sleep 5s + n=$((n+1)) + done + + # Run tests with better logging + echo "Starting Playwright tests..." + exit_status=0 + npx playwright test --reporter=list > ${LOG_PATH}/frontend_test.log 2>&1 || exit_status=$? + + if [ $exit_status -ne 0 ]; then + echo "[TEST INFO]: ---------frontend test failed---------" + echo "Test logs:" + cat ${LOG_PATH}/frontend_test.log + exit $exit_status + else + echo "[TEST INFO]: ---------frontend test passed---------" + fi +} + +function validate_gradio() { + local URL="http://${ip_address}:5173/health" + local HTTP_STATUS=$(curl -s "$URL") + local SERVICE_NAME="CodeGen UI" + + if [ "$HTTP_STATUS" = '{"status":"ok"}' ] || [ "$HTTP_STATUS" = "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. UI server is running successfully..." + else + echo "[ $SERVICE_NAME ] UI server health check failed. Response: $HTTP_STATUS" + fi +} + +function stop_docker() { + local compose_file="$1" + + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + docker compose -f ${compose_file} down +} + +function main() { + # all docker docker compose files for Xeon Platform + docker_compose_files=("compose_tgi.yaml" "compose.yaml") + docker_llm_container_names=("tgi-server" "vllm-server") + + # get number of compose files and LLM docker container names + len_compose_files=${#docker_compose_files[@]} + len_containers=${#docker_llm_container_names[@]} + + # number of compose files and docker container names must be matched + if [ ${len_compose_files} -ne ${len_containers} ]; then + echo "Error: number of docker compose files ${len_compose_files} and container names ${len_containers} mismatched" + exit 1 + fi + + # stop_docker, stop all compose files + for ((i = 0; i < len_compose_files; i++)); do + stop_docker "${docker_compose_files[${i}]}" + done + + # build docker images + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + + # loop all compose files + for ((i = 0; i < len_compose_files; i++)); do + echo "Process [${i}]: ${docker_compose_files[$i]}, ${docker_llm_container_names[${i}]}" + docker ps -a + + echo "::group::start_services" + start_services "${docker_compose_files[${i}]}" "${docker_llm_container_names[${i}]}" + echo "::endgroup::" + + echo "::group::validate_ui" + validate_frontend + if [ $? -ne 0 ]; then + echo "Frontend validation failed, checking logs..." + if [ -f "${LOG_PATH}/frontend_test.log" ]; then + echo "Frontend test logs:" + cat "${LOG_PATH}/frontend_test.log" + fi + fi + echo "::endgroup::" + + echo "::group::validate_gradio" + validate_gradio + echo "::endgroup::" + + stop_docker "${docker_compose_files[${i}]}" + sleep 5s + done + + docker system prune -f +} + +main diff --git a/CodeGen/ui/docker/Dockerfile.gradio.openEuler b/CodeGen/ui/docker/Dockerfile.gradio.openEuler new file mode 100644 index 0000000000..a0605e33d2 --- /dev/null +++ b/CodeGen/ui/docker/Dockerfile.gradio.openEuler @@ -0,0 +1,30 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +FROM openeuler/python:3.11.13-oe2403lts@sha256:472759fb1852f35c9c0be20ff931aba6fbac1487175a0e46d6a73b6f6c33dd3c + +ENV LANG=C.UTF-8 + +ARG ARCH="cpu" + +RUN yum update -y && \ + yum install -y \ + gcc \ + g++ \ + make \ + java-21-openjdk \ + jemalloc-devel \ + ffmpeg \ + wget && \ + yum clean all && \ + rm -rf /var/cache/yum + +RUN mkdir -p /home/user + +COPY gradio /home/user/gradio + +RUN pip install --no-cache-dir --upgrade pip setuptools && \ +pip install --no-cache-dir -r /home/user/gradio/requirements.txt + +WORKDIR /home/user/gradio +ENTRYPOINT ["python", "codegen_ui_gradio.py"] diff --git a/CodeGen/ui/docker/Dockerfile.openEuler b/CodeGen/ui/docker/Dockerfile.openEuler new file mode 100644 index 0000000000..efa2cf2723 --- /dev/null +++ b/CodeGen/ui/docker/Dockerfile.openEuler @@ -0,0 +1,32 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +# Use node 20.11.1 as the base image + +FROM openeuler/node:20.11.1-oe2403lts@sha256:25c790f93c2243b361919620c069812319f614fd697e32e433402ae706a19ffd + +# Update package manager and install Git +RUN yum update -y && \ + yum install -y \ + git && \ + yum clean all && \ + rm -rf /var/cache/yum + +# Copy the front-end code repository +COPY svelte /home/user/svelte + +# Set the working directory +WORKDIR /home/user/svelte + +# Install front-end dependencies +RUN npm install --package-lock-only +RUN npm ci + +# Build the front-end application +RUN npm run build + +# Expose the port of the front-end application +EXPOSE 5173 + +# Run the front-end application in preview mode +CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"] diff --git a/CodeGen/ui/docker/Dockerfile.react.openEuler b/CodeGen/ui/docker/Dockerfile.react.openEuler new file mode 100644 index 0000000000..9cba915a56 --- /dev/null +++ b/CodeGen/ui/docker/Dockerfile.react.openEuler @@ -0,0 +1,21 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +# Use node 20.11.1 as the base image +FROM openeuler/node@sha256:25c790f93c2243b361919620c069812319f614fd697e32e433402ae706a19ffd as vite-app + +COPY react /usr/app/react +WORKDIR /usr/app/react + +RUN ["npm", "install", "--package-lock-only"] +RUN ["npm", "ci"] +RUN ["npm", "run", "build"] + + +FROM openeuler/nginx:1.29.0-oe2403lts@sha256:80dcb7274946ef6c0c5c6ccaac5c81b8ac9c4c7c49f1f0bf61d4d9371b165b18 + +COPY --from=vite-app /usr/app/react/dist /usr/share/nginx/html +COPY ./react/env.sh /docker-entrypoint.d/env.sh + +COPY ./react/nginx.conf /etc/nginx/conf.d/default.conf +RUN chmod +x /docker-entrypoint.d/env.sh diff --git a/CodeGen/ui/gradio/codegen_ui_gradio.py b/CodeGen/ui/gradio/codegen_ui_gradio.py index b84269a577..10207d1090 100644 --- a/CodeGen/ui/gradio/codegen_ui_gradio.py +++ b/CodeGen/ui/gradio/codegen_ui_gradio.py @@ -164,7 +164,13 @@ def generate_code(query, index=None, use_agent=False): def ingest_file(file, index=None, chunk_size=100, chunk_overlap=150): headers = {} - file_input = {"files": open(file, "rb")} + # Restrict file access to UPLOAD_ROOT directory + UPLOAD_ROOT = os.path.abspath("./") + normalized_path = os.path.normpath(os.path.join(UPLOAD_ROOT, file)) + # Ensure the constructed path is still within the upload root + if not normalized_path.startswith(UPLOAD_ROOT): + raise Exception("Access to the specified file is not allowed.") + file_input = {"files": open(normalized_path, "rb")} if index: data = {"index_name": index, "chunk_size": chunk_size, "chunk_overlap": chunk_overlap} diff --git a/CodeGen/ui/gradio/requirements.txt b/CodeGen/ui/gradio/requirements.txt index 2a4c8e1a30..14b3080e22 100644 --- a/CodeGen/ui/gradio/requirements.txt +++ b/CodeGen/ui/gradio/requirements.txt @@ -1,4 +1,4 @@ -gradio==5.22.0 +gradio>5.22.0,<=5.34.0 numpy==1.26.4 opencv-python==4.10.0.82 Pillow==10.3.0 diff --git a/CodeGen/ui/svelte/package.json b/CodeGen/ui/svelte/package.json index cb2f611c58..050bd5b351 100644 --- a/CodeGen/ui/svelte/package.json +++ b/CodeGen/ui/svelte/package.json @@ -18,7 +18,7 @@ "@fortawesome/free-solid-svg-icons": "6.2.0", "@playwright/test": "^1.45.2", "@sveltejs/adapter-auto": "^3.0.0", - "@sveltejs/kit": "2.0.0", + "@sveltejs/kit": "2.20.6", "@sveltejs/vite-plugin-svelte": "^3.0.0", "@tailwindcss/typography": "0.5.7", "@types/debug": "4.1.7", diff --git a/CodeGen/ui/svelte/src/lib/modules/chat/Output.svelte b/CodeGen/ui/svelte/src/lib/modules/chat/Output.svelte index 015375ad6f..5bf3012431 100644 --- a/CodeGen/ui/svelte/src/lib/modules/chat/Output.svelte +++ b/CodeGen/ui/svelte/src/lib/modules/chat/Output.svelte @@ -37,12 +37,24 @@ import bash from "svelte-highlight/languages/bash"; import sql from "svelte-highlight/languages/sql"; import { marked } from "marked"; - export let label = ""; + import { afterUpdate, onMount } from "svelte"; + export let output = ""; - export let languages = "Python"; + export let lang = "Python"; export let isCode = false; + export let md_output = ""; + export let segments: Segment[] = []; + let outputEl: HTMLDivElement; let copyText = "copy"; + let shouldAutoscroll = true; + + type Segment = { + id: number; + type: "text" | "code"; + content: string; + lang?: string; + }; const languagesTag = { Typescript: typescript, @@ -65,53 +77,194 @@ Lua: lua, Bash: bash, Sql: sql, - } as { [key: string]: any }; - - function copyToClipboard(text) { - const textArea = document.createElement("textarea"); - textArea.value = text; - document.body.appendChild(textArea); - textArea.select(); - document.execCommand("copy"); - document.body.removeChild(textArea); + } as const; + + type LangKey = keyof typeof languagesTag; + + const aliasMap: Record = { + javascript: "Javascript", + js: "Javascript", + jsx: "Javascript", + typescript: "Typescript", + ts: "Typescript", + tsx: "Typescript", + + python: "Python", + py: "Python", + + c: "C", + "c++": "Cpp", + cpp: "Cpp", + cxx: "Cpp", + csharp: "Csharp", + "c#": "Csharp", + + go: "Go", + golang: "Go", + java: "Java", + swift: "Swift", + ruby: "Ruby", + rust: "Rust", + php: "Php", + kotlin: "Kotlin", + objectivec: "Objectivec", + objc: "Objectivec", + "objective-c": "Objectivec", + perl: "Perl", + matlab: "Matlab", + r: "R", + lua: "Lua", + + bash: "Bash", + sh: "Bash", + shell: "Bash", + zsh: "Bash", + + sql: "Sql", + }; + + $: normalizedLangKey = (() => { + const raw = (lang ?? "").toString().trim(); + if (!raw) return null; + const lower = raw.toLowerCase(); + + if (lower in aliasMap) return aliasMap[lower]; + + const hit = (Object.keys(languagesTag) as LangKey[]).find( + (k) => k.toLowerCase() === lower + ); + return hit ?? null; + })(); + + $: fullText = buildFullText(); + + function atBottom(el: HTMLElement, threshold = 8) { + return el.scrollHeight - el.scrollTop - el.clientHeight <= threshold; + } + + function handleScroll() { + if (!outputEl) return; + shouldAutoscroll = atBottom(outputEl); } - function handelCopy() { - copyToClipboard(output); + function scrollToBottom() { + if (!outputEl) return; + requestAnimationFrame(() => + requestAnimationFrame(() => { + if (outputEl.scrollHeight) { + outputEl.scrollTop = outputEl.scrollHeight; + } + }) + ); + } + + onMount(() => { + scrollToBottom(); + }); + + afterUpdate(() => { + if (shouldAutoscroll) scrollToBottom(); + }); + async function copyAllFromDiv() { + await navigator.clipboard.writeText(outputEl.innerText); copyText = "copied!"; - setTimeout(() => { - copyText = "copy"; - }, 1000); + setTimeout(() => (copyText = "copy"), 1000); + } + + function copyToClipboard(text: string) { + if (navigator?.clipboard?.writeText) { + navigator.clipboard.writeText(text); + } else { + const textArea = document.createElement("textarea"); + textArea.value = text; + document.body.appendChild(textArea); + textArea.select(); + document.execCommand("copy"); + document.body.removeChild(textArea); + } + } + + function normalizeToKey(raw?: string | null) { + const s = (raw ?? "").trim().toLowerCase(); + if (!s) return null; + if (s in aliasMap) return aliasMap[s as keyof typeof aliasMap]; + const hit = ( + Object.keys(languagesTag) as (keyof typeof languagesTag)[] + ).find((k) => k.toLowerCase() === s); + return hit ?? null; + } + + function buildFullText(): string { + if (segments && segments.length > 0) { + return segments + .map((seg) => { + if (seg.type === "code") { + const key = normalizeToKey(seg.lang) ?? "text"; + return ["```" + key.toLowerCase(), seg.content, "```"].join("\n"); + } + return seg.content; + }) + .join("\n\n"); + } + + const parts: string[] = []; + if (isCode && output) { + const key = (normalizedLangKey ?? "text").toLowerCase(); + parts.push(["```" + key, output, "```"].join("\n")); + } + if (md_output) { + parts.push(md_output); + } + return parts.join("\n\n"); }
- {label} -
{copyText}
+
- {#if isCode} - - - + {#if segments && segments.length > 0} + {#each segments as seg (seg.id)} + {#if seg.type === "code"} +
+ + + +
+ {:else} +
{@html marked(seg.content)}
+ {/if} + {/each} {:else} -
- {@html marked(output)} -
+ {#if isCode && output} + + + + {/if} + {#if md_output} +
+ {@html marked(md_output)} +
+ {/if} {/if}
@@ -120,17 +273,8 @@ .hiddenScroll::-webkit-scrollbar { display: none; } - .hiddenScroll { -ms-overflow-style: none; /* IE and Edge */ scrollbar-width: none; /* Firefox */ } - - .code-format-style { - resize: none; - font-size: 16px; - border: solid rgba(128, 0, 128, 0) 4px; - box-shadow: 0 0 8px rgba(0, 0, 0, 0.19); - transition: 0.1s linear; - } diff --git a/CodeGen/ui/svelte/src/routes/+page.svelte b/CodeGen/ui/svelte/src/routes/+page.svelte index 0e7d43beaf..d927ea9322 100644 --- a/CodeGen/ui/svelte/src/routes/+page.svelte +++ b/CodeGen/ui/svelte/src/routes/+page.svelte @@ -23,38 +23,196 @@ import PaperAirplane from "$lib/assets/chat/svelte/PaperAirplane.svelte"; import Output from "$lib/modules/chat/Output.svelte"; - let code_output: string = ""; let query: string = ""; let loading: boolean = false; - let deleteFlag: boolean = false; + let inFence = false; + let tickRun = 0; + let skipLangLine = false; + let langBuf = ""; + let currentLang = ""; + + type Segment = { + id: number; + type: "text" | "code"; + content: string; + lang?: string; + }; + let segments: Segment[] = []; + let _sid = 0; + + const languageAliases: Record = { + javascript: "Javascript", + js: "Javascript", + jsx: "Javascript", + typescript: "Typescript", + ts: "Typescript", + tsx: "Typescript", + + python: "Python", + py: "Python", + + c: "C", + "c++": "Cpp", + cpp: "Cpp", + cxx: "Cpp", + csharp: "Csharp", + "c#": "Csharp", + + go: "Go", + golang: "Go", + java: "Java", + swift: "Swift", + ruby: "Ruby", + rust: "Rust", + php: "Php", + kotlin: "Kotlin", + objectivec: "Objectivec", + objc: "Objectivec", + "objective-c": "Objectivec", + perl: "Perl", + matlab: "Matlab", + r: "R", + lua: "Lua", + + bash: "Bash", + sh: "Bash", + shell: "Bash", + zsh: "Bash", + + sql: "Sql", + }; + + function canonicalLang(raw?: string | null): string | null { + const s = (raw ?? "").toString().trim(); + if (!s) return null; + const lower = s.toLowerCase(); + return languageAliases[lower] ?? s; + } + + function appendText(s: string) { + if (!s) return; + const last = segments[segments.length - 1]; + if (!last || last.type !== "text") { + segments = [...segments, { id: ++_sid, type: "text", content: "" }]; + } + segments[segments.length - 1].content += s; + } + + function appendCode(s: string) { + if (!s) return; + const last = segments[segments.length - 1]; + if (!last || last.type !== "code") { + segments = [ + ...segments, + { + id: ++_sid, + type: "code", + content: "", + lang: currentLang || "python", + }, + ]; + } + segments[segments.length - 1].content += s; + } + + function settleTicks() { + if (tickRun === 0) return; + + if (tickRun >= 3) { + const toggles = Math.floor(tickRun / 3); + for (let i = 0; i < toggles; i++) { + inFence = !inFence; + if (inFence) { + skipLangLine = true; + langBuf = ""; + currentLang = ""; + } else { + skipLangLine = false; + } + } + const leftovers = tickRun % 3; + if (leftovers) (inFence ? appendCode : appendText)("`".repeat(leftovers)); + } else { + (inFence ? appendCode : appendText)("`".repeat(tickRun)); + } + tickRun = 0; + } + + function consumeChunk(s: string) { + for (let i = 0; i < s.length; i++) { + const ch = s[i]; + + if (ch === "`") { + tickRun++; + continue; + } + + settleTicks(); + + if (skipLangLine) { + if (ch === "\n") { + skipLangLine = false; + const canon = canonicalLang(langBuf); + currentLang = canon ?? (langBuf.trim() || "python"); + langBuf = ""; + } else { + langBuf += ch; + } + continue; + } + + if (inFence) appendCode(ch); + else appendText(ch); + } + } const callTextStream = async (query: string) => { loading = true; - code_output = ""; + + segments = []; + _sid = 0; + inFence = false; + tickRun = 0; + skipLangLine = false; + langBuf = ""; + currentLang = ""; + const eventSource = await fetchTextStream(query); eventSource.addEventListener("message", (e: any) => { - let res = e.data; + const raw = String(e.data); + const payloads = raw + .split(/\r?\n/) + .map((l) => l.replace(/^data:\s*/, "").trim()) + .filter((l) => l.length > 0); - if (res === "[DONE]") { - deleteFlag = false; - loading = false; - query = ''; - } else { - let Msg = JSON.parse(res).choices[0].text; - if (Msg.includes("'''")) { - deleteFlag = true; - } else if (deleteFlag && Msg.includes("\\n")) { - deleteFlag = false; - } else if (Msg !== "" && !deleteFlag) { - code_output += Msg.replace(/\\n/g, "\n"); - } + for (const part of payloads) { + if (part === "[DONE]") { + settleTicks(); + loading = false; + return; } + try { + const json = JSON.parse(part); + const msg = + json.choices?.[0]?.delta?.content ?? json.choices?.[0]?.text ?? ""; + if (!msg || msg === "") continue; + consumeChunk(msg); + } catch (err) { + console.error("JSON chunk parse error:", err, part); + } + } + }); + + eventSource.addEventListener("error", () => { + loading = false; }); + eventSource.stream(); }; const handleTextSubmit = async () => { + if (!query) return; await callTextStream(query); }; @@ -62,48 +220,47 @@
-
-
-
- +
+
+ { - if (event.key === "Enter" && !event.shiftKey && query) { - event.preventDefault(); - handleTextSubmit(); - } - }} - /> - -
+ type="text" + data-testid="code-input" + placeholder="Enter prompt here" + disabled={loading} + maxlength="1200" + bind:value={query} + on:keydown={(event) => { + if (event.key === "Enter" && !event.shiftKey && query) { + event.preventDefault(); + handleTextSubmit(); + } + }} + /> + +
- {#if code_output !== ""} -
- -
+ {#if segments.length} +
+ +
{/if} {#if loading} diff --git a/CodeGen/ui/svelte/tests/codeGen.spec.ts b/CodeGen/ui/svelte/tests/codeGen.spec.ts index 751dd92b57..5c3de2789e 100644 --- a/CodeGen/ui/svelte/tests/codeGen.spec.ts +++ b/CodeGen/ui/svelte/tests/codeGen.spec.ts @@ -18,8 +18,12 @@ async function enterMessageToChat(page: Page, message: string) { await page.getByTestId("code-input").click(); await page.getByTestId("code-input").fill(message); await page.getByTestId("code-input").press("Enter"); - await page.waitForTimeout(10000); - await expect(page.getByTestId("code-output")).toContainText("copy"); + + // Wait for the output container to appear first + await expect(page.getByTestId("code-output")).toBeVisible({ timeout: 30000 }); + + // Wait for content to be loaded and copy button to appear + await expect(page.getByTestId("code-output")).toContainText("copy", { timeout: 30000 }); } // Test description: New Code Gen diff --git a/CodeTrans/Dockerfile.openEuler b/CodeTrans/Dockerfile.openEuler new file mode 100644 index 0000000000..9f62fe3280 --- /dev/null +++ b/CodeTrans/Dockerfile.openEuler @@ -0,0 +1,10 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +ARG IMAGE_REPO=opea +ARG BASE_TAG=latest +FROM $IMAGE_REPO/comps-base:$BASE_TAG-openeuler + +COPY ./code_translation.py $HOME/code_translation.py + +ENTRYPOINT ["python", "code_translation.py"] diff --git a/CodeTrans/docker_compose/amd/cpu/epyc/compose.yaml b/CodeTrans/docker_compose/amd/cpu/epyc/compose.yaml index 3b499de95b..d1b539e5f4 100644 --- a/CodeTrans/docker_compose/amd/cpu/epyc/compose.yaml +++ b/CodeTrans/docker_compose/amd/cpu/epyc/compose.yaml @@ -4,7 +4,7 @@ services: vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: codetrans-epyc-vllm-service ports: - "8008:80" diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/README.md b/CodeTrans/docker_compose/intel/cpu/xeon/README.md index 131f03a999..43e76c8e6c 100755 --- a/CodeTrans/docker_compose/intel/cpu/xeon/README.md +++ b/CodeTrans/docker_compose/intel/cpu/xeon/README.md @@ -54,6 +54,8 @@ Consult the section on [CodeTrans Service configuration](#codetrans-configuratio ### Deploy the Services Using Docker Compose +#### Option #1 + To deploy the CodeTrans services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute the command below. It uses the 'compose.yaml' file. ```bash @@ -61,6 +63,19 @@ cd cpu/xeon docker compose -f compose.yaml up -d ``` +#### Option #2 + +> NOTE : To enable monitoring, `compose.monitoring.yaml` file need to be merged along with default `compose.yaml` file. + +To deploy with monitoring: + +```bash +cd cpu/xeon/ +# download grafana dashboard +bash grafana/dashboards/download_opea_dashboard.sh +docker compose -f compose.yaml -f compose.monitoring.yaml up -d +``` + > **Note**: developers should build docker image from source when: > > - Developing off the git main branch (as the container's ports in the repo may be different > from the published docker image). @@ -117,6 +132,15 @@ To stop the containers associated with the deployment, execute the following com docker compose -f compose.yaml down ``` +If monitoring is enabled, execute the following command: + +```bash +cd cpu/xeon/ +# download grafana dashboard +bash grafana/dashboards/download_opea_dashboard.sh +docker compose -f compose.yaml -f compose.monitoring.yaml down +``` + ## Configuration Parameters Key parameters are configured via environment variables set before running `docker compose up`. @@ -137,11 +161,12 @@ Key parameters are configured via environment variables set before running `dock In the context of deploying a CodeTrans pipeline on an Intel® Xeon® platform, we can pick and choose different large language model serving frameworks. The table below outlines the various configurations that are available as part of the application. These configurations can be used as templates and can be extended to different components available in [GenAIComps](https://github.com/opea-project/GenAIComps.git). -| File | Description | -| -------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework and redis as vector database. | -| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as the default. | -| [compose_remote.yaml](./compose_remote.yaml) | The LLM used is hosted on a remote server and an endpoint is used to access this model. vLLM is the serving framework. Additional environment variables need to be set before running. See [instructions](#running-llm-models-with-remote-endpoints) below. | +| File | Description | +| ---------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework and redis as vector database. | +| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as the default. | +| [compose_remote.yaml](./compose_remote.yaml) | The LLM used is hosted on a remote server and an endpoint is used to access this model. vLLM is the serving framework. Additional environment variables need to be set before running. See [instructions](#running-llm-models-with-remote-endpoints) below. | +| [compose.monitoring.yaml](./compose.monitoring.yaml) | Helper file for monitoring features. Can be used along with any compose files | ### Running LLM models with remote endpoints diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/compose.monitoring.yaml b/CodeTrans/docker_compose/intel/cpu/xeon/compose.monitoring.yaml new file mode 100644 index 0000000000..dea34085b3 --- /dev/null +++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose.monitoring.yaml @@ -0,0 +1,58 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + prometheus: + image: prom/prometheus:v2.52.0 + container_name: opea_prometheus + user: root + volumes: + - ./prometheus.yaml:/etc/prometheus/prometheus.yaml + - ./prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yaml' + ports: + - '9090:9090' + ipc: host + restart: unless-stopped + + grafana: + image: grafana/grafana:11.0.0 + container_name: grafana + volumes: + - ./grafana_data:/var/lib/grafana + - ./grafana/dashboards:/var/lib/grafana/dashboards + - ./grafana/provisioning:/etc/grafana/provisioning + user: root + environment: + GF_SECURITY_ADMIN_PASSWORD: admin + GF_RENDERING_CALLBACK_URL: http://grafana:3000/ + GF_LOG_FILTERS: rendering:debug + no_proxy: ${no_proxy} + host_ip: ${host_ip} + depends_on: + - prometheus + ports: + - '3000:3000' + ipc: host + restart: unless-stopped + + node-exporter: + image: prom/node-exporter + container_name: node-exporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.sysfs=/host/sys' + - --collector.filesystem.ignored-mount-points + - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" + environment: + no_proxy: ${no_proxy} + ports: + - 9100:9100 + restart: always + deploy: + mode: global diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml index f950c770ec..4e4464f220 100644 --- a/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose.yaml @@ -3,7 +3,7 @@ services: vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: codetrans-xeon-vllm-service ports: - "8008:80" diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/compose_openeuler.yaml b/CodeTrans/docker_compose/intel/cpu/xeon/compose_openeuler.yaml new file mode 100644 index 0000000000..4fa6f9f5c3 --- /dev/null +++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose_openeuler.yaml @@ -0,0 +1,96 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +services: + vllm-service: + image: openeuler/vllm-cpu:0.10.1-oe2403lts + container_name: codetrans-xeon-vllm-service + ports: + - "8008:80" + volumes: + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + LLM_MODEL_ID: ${LLM_MODEL_ID} + VLLM_TORCH_PROFILER_DIR: "/mnt" + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80 + llm: + image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}-openeuler + container_name: codetrans-xeon-llm-server + depends_on: + vllm-service: + condition: service_healthy + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME} + HF_TOKEN: ${HF_TOKEN} + restart: unless-stopped + codetrans-xeon-backend-server: + image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}-openeuler + container_name: codetrans-xeon-backend-server + depends_on: + - vllm-service + - llm + ports: + - "${BACKEND_SERVICE_PORT:-7777}:7777" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + ipc: host + restart: always + codetrans-xeon-ui-server: + image: ${REGISTRY:-opea}/codetrans-ui:${TAG:-latest}-openeuler + container_name: codetrans-xeon-ui-server + depends_on: + - codetrans-xeon-backend-server + ports: + - "${FRONTEND_SERVICE_PORT:-5173}:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BASE_URL=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + codetrans-xeon-nginx-server: + image: ${REGISTRY:-opea}/nginx:${TAG:-latest}-openeuler + container_name: codetrans-xeon-nginx-server + depends_on: + - codetrans-xeon-backend-server + - codetrans-xeon-ui-server + ports: + - "${NGINX_PORT:-80}:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP} + - FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT} + - BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME} + - BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP} + - BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi_openeuler.yaml b/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi_openeuler.yaml new file mode 100644 index 0000000000..2806472fc9 --- /dev/null +++ b/CodeTrans/docker_compose/intel/cpu/xeon/compose_tgi_openeuler.yaml @@ -0,0 +1,95 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +services: + tgi-service: + image: openeuler/text-generation-inference-cpu:2.4.0-oe2403lts + container_name: codetrans-xeon-tgi-service + ports: + - "8008:80" + volumes: + - "${MODEL_CACHE}:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HF_TOKEN: ${HF_TOKEN} + host_ip: ${host_ip} + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 + llm: + image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}-openeuler + container_name: codetrans-xeon-llm-server + depends_on: + tgi-service: + condition: service_healthy + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + LLM_ENDPOINT: ${LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} + LLM_COMPONENT_NAME: ${LLM_COMPONENT_NAME} + HF_TOKEN: ${HF_TOKEN} + restart: unless-stopped + codetrans-xeon-backend-server: + image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}-openeuler + container_name: codetrans-xeon-backend-server + depends_on: + - tgi-service + - llm + ports: + - "${BACKEND_SERVICE_PORT:-7777}:7777" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} + - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + ipc: host + restart: always + codetrans-xeon-ui-server: + image: ${REGISTRY:-opea}/codetrans-ui:${TAG:-latest}-openeuler + container_name: codetrans-xeon-ui-server + depends_on: + - codetrans-xeon-backend-server + ports: + - "${FRONTEND_SERVICE_PORT:-5173}:5173" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - BASE_URL=${BACKEND_SERVICE_ENDPOINT} + ipc: host + restart: always + codetrans-xeon-nginx-server: + image: ${REGISTRY:-opea}/nginx:${TAG:-latest}-openeuler + container_name: codetrans-xeon-nginx-server + depends_on: + - codetrans-xeon-backend-server + - codetrans-xeon-ui-server + ports: + - "${NGINX_PORT:-80}:80" + environment: + - no_proxy=${no_proxy} + - https_proxy=${https_proxy} + - http_proxy=${http_proxy} + - FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP} + - FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT} + - BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME} + - BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP} + - BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT} + ipc: host + restart: always + +networks: + default: + driver: bridge diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh b/CodeTrans/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh new file mode 100644 index 0000000000..47d4f84587 --- /dev/null +++ b/CodeTrans/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" +if ls *.json 1> /dev/null 2>&1; then + rm *.json +fi + +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/vllm_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/tgi_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/codetrans_megaservice_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/node_grafana.json diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/grafana/provisioning/dashboards/local.yaml b/CodeTrans/docker_compose/intel/cpu/xeon/grafana/provisioning/dashboards/local.yaml new file mode 100644 index 0000000000..13922a769b --- /dev/null +++ b/CodeTrans/docker_compose/intel/cpu/xeon/grafana/provisioning/dashboards/local.yaml @@ -0,0 +1,14 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: 1 + +providers: +- name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 #how often Grafana will scan for changed dashboards + options: + path: /var/lib/grafana/dashboards diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/grafana/provisioning/datasources/datasource.yml b/CodeTrans/docker_compose/intel/cpu/xeon/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 0000000000..a206521d67 --- /dev/null +++ b/CodeTrans/docker_compose/intel/cpu/xeon/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,54 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# config file version +apiVersion: 1 + +# list of datasources that should be deleted from the database +deleteDatasources: + - name: Prometheus + orgId: 1 + +# list of datasources to insert/update depending +# what's available in the database +datasources: + # name of the datasource. Required +- name: Prometheus + # datasource type. Required + type: prometheus + # access mode. direct or proxy. Required + access: proxy + # org id. will default to orgId 1 if not specified + orgId: 1 + # url + url: http://$host_ip:9090 + # database password, if used + password: + # database user, if used + user: + # database name, if used + database: + # enable/disable basic auth + basicAuth: false + # basic auth username, if used + basicAuthUser: + # basic auth password, if used + basicAuthPassword: + # enable/disable with credentials headers + withCredentials: + # mark as default datasource. Max one per org + isDefault: true + # fields that will be converted to json and stored in json_data + jsonData: + httpMethod: GET + graphiteVersion: "1.1" + tlsAuth: false + tlsAuthWithCACert: false + # json object of data that will be encrypted. + secureJsonData: + tlsCACert: "..." + tlsClientCert: "..." + tlsClientKey: "..." + version: 1 + # allow users to edit datasources from the UI. + editable: true diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/prometheus.yaml b/CodeTrans/docker_compose/intel/cpu/xeon/prometheus.yaml new file mode 100644 index 0000000000..57bbf6e0db --- /dev/null +++ b/CodeTrans/docker_compose/intel/cpu/xeon/prometheus.yaml @@ -0,0 +1,23 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# [IP_ADDR]:{PORT_OUTSIDE_CONTAINER} -> {PORT_INSIDE_CONTAINER} / {PROTOCOL} +global: + scrape_interval: 5s + external_labels: + monitor: "my-monitor" +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["opea_prometheus:9090"] + - job_name: "vllm" + metrics_path: /metrics + static_configs: + - targets: ["codetrans-xeon-vllm-service:80"] + - job_name: "codetrans-backend-server" + metrics_path: /metrics + static_configs: + - targets: ["codetrans-xeon-backend-server:7777"] + - job_name: "prometheus-node-exporter" + metrics_path: /metrics + static_configs: + - targets: ["node-exporter:9100"] diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/README.md b/CodeTrans/docker_compose/intel/hpu/gaudi/README.md index 2597e1aeb6..830e9f7a7c 100755 --- a/CodeTrans/docker_compose/intel/hpu/gaudi/README.md +++ b/CodeTrans/docker_compose/intel/hpu/gaudi/README.md @@ -54,6 +54,8 @@ Consult the section on [CodeTrans Service configuration](#codetrans-configuratio ### Deploy the Services Using Docker Compose +#### Option #1 + To deploy the CodeTrans services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute the command below. It uses the 'compose.yaml' file. ```bash @@ -61,6 +63,19 @@ cd hpu/gaudi docker compose -f compose.yaml up -d ``` +#### Option #2 + +> NOTE : To enable monitoring, `compose.monitoring.yaml` file need to be merged along with default `compose.yaml` file. + +To deploy with monitoring: + +```bash +cd hpu/gaudi/ +# download grafana dashboard +bash grafana/dashboards/download_opea_dashboard.sh +docker compose -f compose.yaml -f compose.monitoring.yaml up -d +``` + > **Note**: developers should build docker image from source when: > > - Developing off the git main branch (as the container's ports in the repo may be different > from the published docker image). @@ -117,6 +132,15 @@ To stop the containers associated with the deployment, execute the following com docker compose -f compose.yaml down ``` +If monitoring is enabled, execute the following command: + +```bash +cd hpu/gaudi/ +# download grafana dashboard +bash grafana/dashboards/download_opea_dashboard.sh +docker compose -f compose.yaml -f compose.monitoring.yaml down +``` + ## Configuration Parameters Key parameters are configured via environment variables set before running `docker compose up`. diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/compose.monitoring.yaml b/CodeTrans/docker_compose/intel/hpu/gaudi/compose.monitoring.yaml new file mode 100644 index 0000000000..691671e656 --- /dev/null +++ b/CodeTrans/docker_compose/intel/hpu/gaudi/compose.monitoring.yaml @@ -0,0 +1,75 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + prometheus: + image: prom/prometheus:v2.52.0 + container_name: opea_prometheus + user: root + volumes: + - ./prometheus.yaml:/etc/prometheus/prometheus.yaml + - ./prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yaml' + ports: + - '9090:9090' + ipc: host + restart: unless-stopped + + grafana: + image: grafana/grafana:11.0.0 + container_name: grafana + volumes: + - ./grafana_data:/var/lib/grafana + - ./grafana/dashboards:/var/lib/grafana/dashboards + - ./grafana/provisioning:/etc/grafana/provisioning + user: root + environment: + GF_SECURITY_ADMIN_PASSWORD: admin + GF_RENDERING_CALLBACK_URL: http://grafana:3000/ + GF_LOG_FILTERS: rendering:debug + no_proxy: ${no_proxy} + host_ip: ${host_ip} + depends_on: + - prometheus + ports: + - '3000:3000' + ipc: host + restart: unless-stopped + + node-exporter: + image: prom/node-exporter + container_name: node-exporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.rootfs=/rootfs' + - '--path.sysfs=/host/sys' + - '--path.udev.data=/rootfs/run/udev/data' + - --collector.filesystem.ignored-mount-points + - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" + environment: + no_proxy: ${no_proxy} + ports: + - 9100:9100 + restart: always + deploy: + mode: global + + gaudi-metrics-exporter: + image: vault.habana.ai/gaudi-metric-exporter/metric-exporter:latest + privileged: true + container_name: gaudi-metrics-exporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + - /dev:/dev + deploy: + mode: global + ports: + - 41611:41611 + restart: unless-stopped diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml index 60728feabf..96ea18b3b0 100644 --- a/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml @@ -3,7 +3,7 @@ services: vllm-service: - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + image: opea/vllm-gaudi:1.4 container_name: codetrans-gaudi-vllm-service ports: - "8008:80" diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh b/CodeTrans/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh new file mode 100644 index 0000000000..b601762739 --- /dev/null +++ b/CodeTrans/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" +if ls *.json 1> /dev/null 2>&1; then + rm *.json +fi + +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/vllm_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/tgi_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/gaudi_grafana_v2.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/codetrans_megaservice_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/node_grafana.json diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/grafana/provisioning/dashboards/local.yaml b/CodeTrans/docker_compose/intel/hpu/gaudi/grafana/provisioning/dashboards/local.yaml new file mode 100644 index 0000000000..13922a769b --- /dev/null +++ b/CodeTrans/docker_compose/intel/hpu/gaudi/grafana/provisioning/dashboards/local.yaml @@ -0,0 +1,14 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: 1 + +providers: +- name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 #how often Grafana will scan for changed dashboards + options: + path: /var/lib/grafana/dashboards diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/grafana/provisioning/datasources/datasource.yml b/CodeTrans/docker_compose/intel/hpu/gaudi/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 0000000000..a206521d67 --- /dev/null +++ b/CodeTrans/docker_compose/intel/hpu/gaudi/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,54 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# config file version +apiVersion: 1 + +# list of datasources that should be deleted from the database +deleteDatasources: + - name: Prometheus + orgId: 1 + +# list of datasources to insert/update depending +# what's available in the database +datasources: + # name of the datasource. Required +- name: Prometheus + # datasource type. Required + type: prometheus + # access mode. direct or proxy. Required + access: proxy + # org id. will default to orgId 1 if not specified + orgId: 1 + # url + url: http://$host_ip:9090 + # database password, if used + password: + # database user, if used + user: + # database name, if used + database: + # enable/disable basic auth + basicAuth: false + # basic auth username, if used + basicAuthUser: + # basic auth password, if used + basicAuthPassword: + # enable/disable with credentials headers + withCredentials: + # mark as default datasource. Max one per org + isDefault: true + # fields that will be converted to json and stored in json_data + jsonData: + httpMethod: GET + graphiteVersion: "1.1" + tlsAuth: false + tlsAuthWithCACert: false + # json object of data that will be encrypted. + secureJsonData: + tlsCACert: "..." + tlsClientCert: "..." + tlsClientKey: "..." + version: 1 + # allow users to edit datasources from the UI. + editable: true diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/prometheus.yaml b/CodeTrans/docker_compose/intel/hpu/gaudi/prometheus.yaml new file mode 100644 index 0000000000..a9c3b5fc14 --- /dev/null +++ b/CodeTrans/docker_compose/intel/hpu/gaudi/prometheus.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# [IP_ADDR]:{PORT_OUTSIDE_CONTAINER} -> {PORT_INSIDE_CONTAINER} / {PROTOCOL} +global: + scrape_interval: 5s + external_labels: + monitor: "my-monitor" +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["opea_prometheus:9090"] + - job_name: "vllm" + metrics_path: /metrics + static_configs: + - targets: ["codetrans-gaudi-vllm-service:80"] + - job_name: "codetrans-backend-server" + metrics_path: /metrics + static_configs: + - targets: ["codetrans-gaudi-backend-server:7777"] + - job_name: "prometheus-node-exporter" + scrape_interval: 30s + scrape_timeout: 25s + metrics_path: /metrics + static_configs: + - targets: ["node-exporter:9100"] + - job_name: "gaudi-metrics-exporter" + scrape_interval: 30s + metrics_path: /metrics + static_configs: + - targets: ["gaudi-metrics-exporter:41611"] diff --git a/CodeTrans/docker_compose/intel/set_env.sh b/CodeTrans/docker_compose/intel/set_env.sh index 04c4048c52..0c2d0883c5 100644 --- a/CodeTrans/docker_compose/intel/set_env.sh +++ b/CodeTrans/docker_compose/intel/set_env.sh @@ -24,3 +24,9 @@ export FRONTEND_SERVICE_PORT=5173 export BACKEND_SERVICE_NAME=codetrans export BACKEND_SERVICE_IP=${host_ip} export BACKEND_SERVICE_PORT=7777 + + +# Set network proxy settings +export no_proxy="${no_proxy},${HOST_IP},vllm-server,codetrans-xeon-backend-server,codetrans-xeon-ui-server,redis-vector-db,dataprep-redis-server,tei-embedding-serving,tei-embedding-server,retriever-redis,opea_prometheus,grafana,node-exporter,$JAEGER_IP" # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" +export http_proxy=$http_proxy +export https_proxy=$https_proxy diff --git a/CodeTrans/docker_image_build/build.yaml b/CodeTrans/docker_image_build/build.yaml index b230d1d4ec..81cd9c9f50 100644 --- a/CodeTrans/docker_image_build/build.yaml +++ b/CodeTrans/docker_image_build/build.yaml @@ -13,36 +13,53 @@ services: context: ../ dockerfile: ./Dockerfile image: ${REGISTRY:-opea}/codetrans:${TAG:-latest} + codetrans-openeuler: + build: + args: + IMAGE_REPO: ${REGISTRY:-opea} + BASE_TAG: ${TAG:-latest} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + context: ../ + dockerfile: ./Dockerfile.openEuler + image: ${REGISTRY:-opea}/codetrans:${TAG:-latest}-openeuler codetrans-ui: build: context: ../ui dockerfile: ./docker/Dockerfile extends: codetrans image: ${REGISTRY:-opea}/codetrans-ui:${TAG:-latest} + codetrans-ui-openeuler: + build: + context: ../ui + dockerfile: ./docker/Dockerfile.openEuler + extends: codetrans + image: ${REGISTRY:-opea}/codetrans-ui:${TAG:-latest}-openeuler llm-textgen: build: context: GenAIComps dockerfile: comps/llms/src/text-generation/Dockerfile extends: codetrans image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest} - vllm: - build: - context: vllm - dockerfile: docker/Dockerfile.cpu - extends: codetrans - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} - vllm-gaudi: + llm-textgen-openeuler: build: - context: vllm-fork - dockerfile: Dockerfile.hpu + context: GenAIComps + dockerfile: comps/llms/src/text-generation/Dockerfile.openEuler extends: codetrans - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}-openeuler nginx: build: context: GenAIComps dockerfile: comps/third_parties/nginx/src/Dockerfile extends: codetrans image: ${REGISTRY:-opea}/nginx:${TAG:-latest} + nginx-openeuler: + build: + context: GenAIComps + dockerfile: comps/third_parties/nginx/src/Dockerfile.openEuler + extends: codetrans + image: ${REGISTRY:-opea}/nginx:${TAG:-latest}-openeuler vllm-rocm: build: context: GenAIComps diff --git a/CodeTrans/tests/test_compose_on_epyc.sh b/CodeTrans/tests/test_compose_on_epyc.sh index 50a9fb68b0..28f31f675b 100644 --- a/CodeTrans/tests/test_compose_on_epyc.sh +++ b/CodeTrans/tests/test_compose_on_epyc.sh @@ -28,19 +28,8 @@ function build_docker_images() { docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &>/dev/null - VLLM_REQ_FILE="requirements/cpu.txt" - if ! grep -q "^transformers" "$VLLM_REQ_FILE"; then - echo "Adding transformers<4.54.0 to $VLLM_REQ_FILE" - echo "transformers<4.54.0" >>"$VLLM_REQ_FILE" - fi - cd ../ - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="codetrans codetrans-ui llm-textgen vllm nginx" + service_list="codetrans codetrans-ui llm-textgen nginx" docker compose -f build.yaml build ${service_list} --no-cache >${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -49,7 +38,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/amd/cpu/epyc export HF_TOKEN=${HF_TOKEN} - + export no_proxy="localhost,127.0.0.1,$ip_address" export NGINX_PORT=80 source set_env.sh @@ -129,37 +118,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - CONDA_ROOT=$(conda info --base) - source "${CONDA_ROOT}/etc/profile.d/conda.sh" - conda activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - # npm install && npm ci && npx playwright install --with-deps - npm install && npm ci && npx playwright install - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/amd/cpu/epyc/ docker compose -f compose.yaml stop && docker compose rm -f @@ -187,10 +145,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/CodeTrans/tests/test_compose_on_gaudi.sh b/CodeTrans/tests/test_compose_on_gaudi.sh index 07af411cc1..edfca863d7 100644 --- a/CodeTrans/tests/test_compose_on_gaudi.sh +++ b/CodeTrans/tests/test_compose_on_gaudi.sh @@ -25,12 +25,8 @@ function build_docker_images() { docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork - VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0 - git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../ - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="codetrans codetrans-ui llm-textgen vllm-gaudi nginx" + service_list="codetrans codetrans-ui llm-textgen nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -40,13 +36,17 @@ function start_services() { cd $WORKPATH/docker_compose/intel export HF_TOKEN=${HF_TOKEN} export NGINX_PORT=80 + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh cd hpu/gaudi sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env + # download grafana dashboard + bash grafana/dashboards/download_opea_dashboard.sh + # Start Docker Containers - docker compose up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose.yaml -f compose.monitoring.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 100 ]]; do @@ -118,38 +118,9 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi - -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose -f compose.yaml stop && docker compose rm -f + docker compose -f compose.yaml -f compose.monitoring.yaml stop && docker compose rm -f } function main() { @@ -174,10 +145,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/CodeTrans/tests/test_compose_on_rocm.sh b/CodeTrans/tests/test_compose_on_rocm.sh index ecc6a4fdfc..5524c1757b 100644 --- a/CodeTrans/tests/test_compose_on_rocm.sh +++ b/CodeTrans/tests/test_compose_on_rocm.sh @@ -120,34 +120,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniconda3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/amd/gpu/rocm/ docker compose stop && docker compose rm -f @@ -175,10 +147,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/CodeTrans/tests/test_compose_on_xeon.sh b/CodeTrans/tests/test_compose_on_xeon.sh index 8418ba05d1..bbd75208e8 100644 --- a/CodeTrans/tests/test_compose_on_xeon.sh +++ b/CodeTrans/tests/test_compose_on_xeon.sh @@ -25,14 +25,8 @@ function build_docker_images() { docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &> /dev/null - cd ../ - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="codetrans codetrans-ui llm-textgen vllm nginx" + service_list="codetrans codetrans-ui llm-textgen nginx" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -41,15 +35,18 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel export HF_TOKEN=${HF_TOKEN} - + export no_proxy="localhost,127.0.0.1,$ip_address" export NGINX_PORT=80 source set_env.sh cd cpu/xeon/ + # download grafana dashboard + bash grafana/dashboards/download_opea_dashboard.sh + sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env # Start Docker Containers - docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose.yaml -f compose.monitoring.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 100 ]]; do @@ -122,37 +119,9 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose -f compose.yaml stop && docker compose rm -f + docker compose -f compose.yaml -f compose.monitoring.yaml down } function main() { @@ -177,10 +146,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/CodeTrans/tests/test_compose_openeuler_on_xeon.sh b/CodeTrans/tests/test_compose_openeuler_on_xeon.sh new file mode 100644 index 0000000000..3b8d2010e2 --- /dev/null +++ b/CodeTrans/tests/test_compose_openeuler_on_xeon.sh @@ -0,0 +1,154 @@ +#!/bin/bash +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${model_cache:-"./data"} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + pushd GenAIComps + echo "GenAIComps test commit is $(git rev-parse HEAD)" + docker build --no-cache -t ${REGISTRY}/comps-base:${TAG}-openeuler --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.openEuler . + popd && sleep 1s + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="codetrans-openeuler codetrans-ui-openeuler llm-textgen-openeuler nginx-openeuler" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/intel + export HF_TOKEN=${HF_TOKEN} + export no_proxy="localhost,127.0.0.1,$ip_address" + export NGINX_PORT=80 + source set_env.sh + cd cpu/xeon/ + + sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env + + # Start Docker Containers + docker compose -f compose_openeuler.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs codetrans-xeon-vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1 + if grep -q complete ${LOG_PATH}/vllm_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done + + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 5s +} + +function validate_microservices() { + # llm microservice + validate_services \ + "${ip_address}:9000/v1/chat/completions" \ + "data: " \ + "llm" \ + "codetrans-xeon-llm-server" \ + '{"query":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:"}' + +} + +function validate_megaservice() { + # Curl the Mega Service + validate_services \ + "${ip_address}:${BACKEND_SERVICE_PORT}/v1/codetrans" \ + "print" \ + "mega-codetrans" \ + "codetrans-xeon-backend-server" \ + '{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}"}' + + # test the megaservice via nginx + validate_services \ + "${ip_address}:${NGINX_PORT}/v1/codetrans" \ + "print" \ + "mega-codetrans-nginx" \ + "codetrans-xeon-nginx-server" \ + '{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}"}' + +} + +function stop_service() { + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + docker compose -f compose_openeuler.yaml stop && docker compose rm -f +} + +function main() { + + echo "::group::stop_service" + stop_service + echo "::endgroup::" + + echo "::group::build_docker_images" + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + echo "::endgroup::" + + echo "::group::start_services" + start_services + echo "::endgroup::" + + echo "::group::validate_microservices" + validate_microservices + echo "::endgroup::" + + echo "::group::validate_megaservice" + validate_megaservice + echo "::endgroup::" + + echo "::group::stop_service" + stop_service + echo "::endgroup::" + + docker system prune -f + +} + +main diff --git a/CodeTrans/tests/test_compose_tgi_on_epyc.sh b/CodeTrans/tests/test_compose_tgi_on_epyc.sh index 2eb464c485..53dcb0caa0 100644 --- a/CodeTrans/tests/test_compose_tgi_on_epyc.sh +++ b/CodeTrans/tests/test_compose_tgi_on_epyc.sh @@ -39,7 +39,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/amd/cpu/epyc/ export HF_TOKEN=${HF_TOKEN} - + export no_proxy="localhost,127.0.0.1,$ip_address" export NGINX_PORT=80 source set_env.sh @@ -127,37 +127,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - CONDA_ROOT=$(conda info --base) - source "${CONDA_ROOT}/etc/profile.d/conda.sh" - conda activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - # npm install && npm ci && npx playwright install --with-deps - npm install && npm ci && npx playwright install - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/amd/cpu/epyc/ docker compose -f compose_tgi.yaml stop && docker compose rm -f @@ -185,10 +154,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/CodeTrans/tests/test_compose_tgi_on_gaudi.sh b/CodeTrans/tests/test_compose_tgi_on_gaudi.sh index 129e677149..964f6b6f01 100644 --- a/CodeTrans/tests/test_compose_tgi_on_gaudi.sh +++ b/CodeTrans/tests/test_compose_tgi_on_gaudi.sh @@ -36,15 +36,18 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel export HF_TOKEN=${HF_TOKEN} - + export no_proxy="localhost,127.0.0.1,$ip_address" export NGINX_PORT=80 source set_env.sh cd hpu/gaudi/ sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env + # download grafana dashboard + bash grafana/dashboards/download_opea_dashboard.sh + # Start Docker Containers - docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose_tgi.yaml -f compose.monitoring.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 100 ]]; do @@ -125,37 +128,9 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi/ - docker compose -f compose_tgi.yaml stop && docker compose rm -f + docker compose -f compose_tgi.yaml -f compose.monitoring.yaml stop && docker compose rm -f } function main() { @@ -180,10 +155,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/CodeTrans/tests/test_compose_tgi_on_xeon.sh b/CodeTrans/tests/test_compose_tgi_on_xeon.sh index aacd5dfda5..2957093520 100644 --- a/CodeTrans/tests/test_compose_tgi_on_xeon.sh +++ b/CodeTrans/tests/test_compose_tgi_on_xeon.sh @@ -36,15 +36,18 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel export HF_TOKEN=${HF_TOKEN} - + export no_proxy="localhost,127.0.0.1,$ip_address" export NGINX_PORT=80 source set_env.sh cd cpu/xeon/ + # download grafana dashboard + bash grafana/dashboards/download_opea_dashboard.sh + sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env # Start Docker Containers - docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose_tgi.yaml -f compose.monitoring.yaml up -d > ${LOG_PATH}/start_services_with_compose.log n=0 until [[ "$n" -ge 100 ]]; do @@ -125,37 +128,9 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose -f compose_tgi.yaml stop && docker compose rm -f + docker compose -f compose_tgi.yaml -f compose.monitoring.yaml stop && docker compose rm -f } function main() { @@ -180,10 +155,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/CodeTrans/tests/test_compose_tgi_openeuler_on_xeon.sh b/CodeTrans/tests/test_compose_tgi_openeuler_on_xeon.sh new file mode 100644 index 0000000000..42c8cdee62 --- /dev/null +++ b/CodeTrans/tests/test_compose_tgi_openeuler_on_xeon.sh @@ -0,0 +1,163 @@ +#!/bin/bash +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${model_cache:-"./data"} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + pushd GenAIComps + echo "GenAIComps test commit is $(git rev-parse HEAD)" + docker build --no-cache -t ${REGISTRY}/comps-base:${TAG}-openeuler --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.openEuler . + popd && sleep 1s + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="codetrans-openeuler codetrans-ui-openeuler llm-textgen-openeuler nginx-openeuler" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker pull openeuler/text-generation-inference-cpu:2.4.0-oe2403lts + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/intel + export HF_TOKEN=${HF_TOKEN} + export no_proxy="localhost,127.0.0.1,$ip_address" + export NGINX_PORT=80 + source set_env.sh + cd cpu/xeon/ + + sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env + + # Start Docker Containers + docker compose -f compose_tgi_openeuler.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs codetrans-xeon-tgi-service > ${LOG_PATH}/tgi_service_start.log + if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done + + sleep 1m +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + + local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) + + if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then + echo "[ $SERVICE_NAME ] Content is as expected." + else + echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 5s +} + +function validate_microservices() { + # tgi for embedding service + validate_services \ + "${ip_address}:8008/generate" \ + "generated_text" \ + "tgi" \ + "codetrans-xeon-tgi-service" \ + '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' + + # llm microservice + validate_services \ + "${ip_address}:9000/v1/chat/completions" \ + "data: " \ + "llm" \ + "codetrans-xeon-llm-server" \ + '{"query":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:"}' + +} + +function validate_megaservice() { + # Curl the Mega Service + validate_services \ + "${ip_address}:${BACKEND_SERVICE_PORT}/v1/codetrans" \ + "print" \ + "mega-codetrans" \ + "codetrans-xeon-backend-server" \ + '{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}"}' + + # test the megaservice via nginx + validate_services \ + "${ip_address}:${NGINX_PORT}/v1/codetrans" \ + "print" \ + "mega-codetrans-nginx" \ + "codetrans-xeon-nginx-server" \ + '{"language_from": "Golang","language_to": "Python","source_code": "package main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n}"}' + +} + +function stop_service() { + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + docker compose -f compose_tgi_openeuler.yaml stop && docker compose rm -f +} + +function main() { + + echo "::group::stop_service" + stop_service + echo "::endgroup::" + + echo "::group::build_docker_images" + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + echo "::endgroup::" + + echo "::group::start_services" + start_services + echo "::endgroup::" + + echo "::group::validate_microservices" + validate_microservices + echo "::endgroup::" + + echo "::group::validate_megaservice" + validate_megaservice + echo "::endgroup::" + + echo "::group::stop_service" + stop_service + echo "::endgroup::" + + docker system prune -f + +} + +main diff --git a/CodeTrans/tests/test_compose_vllm_on_rocm.sh b/CodeTrans/tests/test_compose_vllm_on_rocm.sh index 2ef8709607..6023b59467 100644 --- a/CodeTrans/tests/test_compose_vllm_on_rocm.sh +++ b/CodeTrans/tests/test_compose_vllm_on_rocm.sh @@ -119,34 +119,6 @@ function validate_megaservice() { } -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniconda3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/amd/gpu/rocm/ docker compose -f compose_vllm.yaml stop && docker compose -f compose_vllm.yaml rm -f @@ -174,10 +146,6 @@ function main() { validate_megaservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/CodeTrans/tests/test_ui_on_xeon.sh b/CodeTrans/tests/test_ui_on_xeon.sh new file mode 100644 index 0000000000..4ee17bad47 --- /dev/null +++ b/CodeTrans/tests/test_ui_on_xeon.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${model_cache:-"./data"} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + pushd GenAIComps + echo "GenAIComps test commit is $(git rev-parse HEAD)" + docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . + popd && sleep 1s + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + service_list="codetrans codetrans-ui llm-textgen nginx" + docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/intel + export HF_TOKEN=${HF_TOKEN} + export no_proxy="localhost,127.0.0.1,$ip_address" + export NGINX_PORT=80 + source set_env.sh + cd cpu/xeon/ + + sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env + + # Start Docker Containers + docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs codetrans-xeon-vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1 + if grep -q complete ${LOG_PATH}/vllm_service_start.log; then + break + fi + sleep 5s + n=$((n+1)) + done + + sleep 1m +} + +function validate_frontend() { + cd $WORKPATH/ui/svelte + local conda_env_name="OPEA_e2e" + export PATH=${HOME}/miniforge3/bin/:$PATH + if conda info --envs | grep -q "$conda_env_name"; then + echo "$conda_env_name exist!" + else + conda create -n ${conda_env_name} python=3.12 -y + fi + source activate ${conda_env_name} + + sed -i "s/localhost/$ip_address/g" playwright.config.ts + + conda install -c conda-forge nodejs=22.6.0 -y + npm install && npm ci && npx playwright install --with-deps + node -v && npm -v && pip list + + exit_status=0 + npx playwright test || exit_status=$? + + if [ $exit_status -ne 0 ]; then + echo "[TEST INFO]: ---------frontend test failed---------" + exit $exit_status + else + echo "[TEST INFO]: ---------frontend test passed---------" + fi +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/cpu/xeon/ + docker compose -f compose.yaml stop && docker compose rm -f +} + +function main() { + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + echo "::group::build_docker_images" + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + echo "::endgroup::" + + echo "::group::start_services" + start_services + echo "::endgroup::" + + echo "::group::validate_frontend" + validate_frontend + echo "::endgroup::" + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + docker system prune -f + +} + +main diff --git a/CodeTrans/ui/docker/Dockerfile b/CodeTrans/ui/docker/Dockerfile index 1d5115f4b5..3a6ca7f7c9 100644 --- a/CodeTrans/ui/docker/Dockerfile +++ b/CodeTrans/ui/docker/Dockerfile @@ -1,8 +1,8 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Use node 20.11.1 as the base image -FROM node:20.11.1 +# Use node 20.19.0 as the base image (required for chokidar@5.0.0 compatibility) +FROM node:20.19.0 # Update package manager and install Git RUN apt-get update -y && apt-get install -y git diff --git a/CodeTrans/ui/docker/Dockerfile.openEuler b/CodeTrans/ui/docker/Dockerfile.openEuler new file mode 100644 index 0000000000..08d34fea47 --- /dev/null +++ b/CodeTrans/ui/docker/Dockerfile.openEuler @@ -0,0 +1,30 @@ +# Copyright (C) 2025 Huawei Technologies Co., Ltd. +# SPDX-License-Identifier: Apache-2.0 + +# Use node 20.19.0 as the base image +FROM openeuler/node:20.19.0-oe2403lts + +# Update package manager and install Git +RUN yum update -y && \ + yum install -y \ + git && \ + yum clean all && \ + rm -rf /var/cache/yum + +# Copy the front-end code repository +COPY svelte /home/user/svelte + +# Set the working directory +WORKDIR /home/user/svelte + +# Install front-end dependencies +RUN npm install + +# Build the front-end application +RUN npm run build + +# Expose the port of the front-end application +EXPOSE 5173 + +# Run the front-end application in preview mode +CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"] diff --git a/CodeTrans/ui/svelte/package.json b/CodeTrans/ui/svelte/package.json index ed45ce245f..855f74265c 100644 --- a/CodeTrans/ui/svelte/package.json +++ b/CodeTrans/ui/svelte/package.json @@ -16,7 +16,7 @@ "devDependencies": { "@playwright/test": "^1.44.1", "@sveltejs/adapter-auto": "^3.0.0", - "@sveltejs/kit": "2.0.0", + "@sveltejs/kit": "2.20.6", "@sveltejs/package": "^2.0.0", "@sveltejs/vite-plugin-svelte": "^3.0.0", "@types/prismjs": "^1.26.3", diff --git a/CodeTrans/ui/svelte/src/routes/+page.svelte b/CodeTrans/ui/svelte/src/routes/+page.svelte index fd6be39310..af19e26d31 100644 --- a/CodeTrans/ui/svelte/src/routes/+page.svelte +++ b/CodeTrans/ui/svelte/src/routes/+page.svelte @@ -83,12 +83,37 @@ let deleteFlag: boolean = false; let inputClick: boolean = true; - function handelCopy() { - navigator.clipboard.writeText(output); - copyText = "copied!"; + async function handelCopy() { + try { + if (navigator.clipboard && navigator.clipboard.writeText) { + await navigator.clipboard.writeText(output); + copyText = "copied!"; + } else { + const textArea = document.createElement('textarea'); + textArea.value = output; + textArea.style.position = 'fixed'; + textArea.style.left = '-999999px'; + textArea.style.top = '-999999px'; + document.body.appendChild(textArea); + textArea.focus(); + textArea.select(); + + if (document.execCommand('copy')) { + copyText = "copied!"; + } else { + copyText = "copy failed"; + } + + document.body.removeChild(textArea); + } + } catch (err) { + console.error('Copy failed:', err); + copyText = "copy failed"; + } + setTimeout(() => { copyText = "copy"; - }, 1000); + }, 2000); } function handelInputClick() { diff --git a/DBQnA/docker_compose/amd/gpu/rocm/README.md b/DBQnA/docker_compose/amd/gpu/rocm/README.md index f15b5db2b4..a0eda0c9bf 100644 --- a/DBQnA/docker_compose/amd/gpu/rocm/README.md +++ b/DBQnA/docker_compose/amd/gpu/rocm/README.md @@ -9,13 +9,17 @@ This document outlines the deployment process for DBQnA application which helps This section describes how to quickly deploy and test the DBQnA service manually on AMD GPU (ROCm). The basic steps are: -1. [Access the Code](#access-the-code) -2. [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) -3. [Configure the Deployment Environment](#configure-the-deployment-environment) -4. [Deploy the Service Using Docker Compose](#deploy-the-service-using-docker-compose) -5. [Check the Deployment Status](#check-the-deployment-status) -6. [Test the Pipeline](#test-the-pipeline) -7. [Cleanup the Deployment](#cleanup-the-deployment) +- [Example DBQnA Deployment on AMD GPU (ROCm)](#example-dbqna-deployment-on-amd-gpu-rocm) + - [DBQnA Quick Start Deployment](#dbqna-quick-start-deployment) + - [Access the Code](#access-the-code) + - [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) + - [Configure the Deployment Environment](#configure-the-deployment-environment) + - [Deploy the Service Using Docker Compose](#deploy-the-service-using-docker-compose) + - [Check the Deployment Status](#check-the-deployment-status) + - [Test the Pipeline](#test-the-pipeline) + - [Cleanup the Deployment](#cleanup-the-deployment) + - [DBQnA Docker Compose Files](#dbqna-docker-compose-files) + - [DBQnA Service Configuration for AMD GPUs](#dbqna-service-configuration-for-amd-gpus) ### Access the Code @@ -73,10 +77,11 @@ For the default deployment, the following 4 containers should be running. Once the DBQnA service are running, test the pipeline using the following command: ```bash -curl http://${host_ip}:${DBQNA_TEXT_TO_SQL_PORT}/v1/texttosql \ +url="postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${host_ip}:5442/${POSTGRES_DB}" +curl --connect-timeout 5 --max-time 120000 http://${host_ip}:9090/v1/text2query\ -X POST \ - -d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${host_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}}' \ - -H 'Content-Type: application/json' + -d '{"query": "Find the total number of Albums.","conn_type": "sql", "conn_url": "'${url}'", "conn_user": "'${POSTGRES_USER}'","conn_password": "'${POSTGRES_PASSWORD}'","conn_dialect": "postgresql" }' \ + -H 'Content-Type: application/json') ``` ### Cleanup the Deployment @@ -97,7 +102,7 @@ The compose.yaml is default compose file using tgi as serving framework | ----------------- | -------------------------------------------------------- | | dbqna-tgi-service | ghcr.io/huggingface/text-generation-inference:2.4.1-rocm | | postgres | postgres:latest | -| text2sql | opea/text2sql:latest | +| text2sql | opea/text2query-sql:latest | | text2sql-react-ui | opea/text2sql-react-ui:latest | ## DBQnA Service Configuration for AMD GPUs @@ -108,5 +113,5 @@ The table provides a comprehensive overview of the DBQnA service utilized across | ----------------- | -------------------------------------------------------- | -------- | --------------------------------------------------------------------------------------------------- | | dbqna-tgi-service | ghcr.io/huggingface/text-generation-inference:2.4.1-rocm | No | Specific to the TGI deployment, focuses on text generation inference using AMD GPU (ROCm) hardware. | | postgres | postgres:latest | No | Provides the relational database backend for storing and querying data used by the DBQnA pipeline. | -| text2sql | opea/text2sql:latest | No | Handles text-to-SQL conversion tasks. | +| text2sql | opea/text2query-sql:latest | No | Handles text-to-SQL conversion tasks. | | text2sql-react-ui | opea/text2sql-react-ui:latest | No | Provides the user interface for the DBQnA service. | diff --git a/DBQnA/docker_compose/amd/gpu/rocm/compose.yaml b/DBQnA/docker_compose/amd/gpu/rocm/compose.yaml index f9585acf00..deaf099774 100644 --- a/DBQnA/docker_compose/amd/gpu/rocm/compose.yaml +++ b/DBQnA/docker_compose/amd/gpu/rocm/compose.yaml @@ -47,12 +47,16 @@ services: - ./chinook.sql:/docker-entrypoint-initdb.d/chinook.sql text2sql: - image: opea/text2sql:latest + image: opea/text2query-sql:latest container_name: text2sql ports: - - "${DBQNA_TEXT_TO_SQL_PORT:-9090}:8080" + - "${DBQNA_TEXT_TO_SQL_PORT:-9090}:9097" environment: TGI_LLM_ENDPOINT: ${DBQNA_TGI_LLM_ENDPOINT} + TEXT2QUERY_COMPONENT_NAME: OPEA_TEXT2QUERY_SQL + depends_on: + - dbqna-tgi-service + - postgres text2sql-react-ui: image: opea/text2sql-react-ui:latest diff --git a/DBQnA/docker_compose/intel/cpu/xeon/README.md b/DBQnA/docker_compose/intel/cpu/xeon/README.md index 1e816fed2a..08b57c4341 100644 --- a/DBQnA/docker_compose/intel/cpu/xeon/README.md +++ b/DBQnA/docker_compose/intel/cpu/xeon/README.md @@ -9,13 +9,17 @@ This document outlines the deployment process for DBQnA application which helps This section describes how to quickly deploy and test the DBQnA service manually on Intel® Xeon® platform. The basic steps are: -1. [Access the Code](#access-the-code) -2. [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) -3. [Configure the Deployment Environment](#configure-the-deployment-environment) -4. [Deploy the Service Using Docker Compose](#deploy-the-service-using-docker-compose) -5. [Check the Deployment Status](#check-the-deployment-status) -6. [Test the Pipeline](#test-the-pipeline) -7. [Cleanup the Deployment](#cleanup-the-deployment) +- [Example DBQnA Deployment on Intel® Xeon® Platform](#example-dbqna-deployment-on-intel-xeon-platform) + - [DBQnA Quick Start Deployment](#dbqna-quick-start-deployment) + - [Access the Code](#access-the-code) + - [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) + - [Configure the Deployment Environment](#configure-the-deployment-environment) + - [Deploy the Service Using Docker Compose](#deploy-the-service-using-docker-compose) + - [Check the Deployment Status](#check-the-deployment-status) + - [Test the Pipeline](#test-the-pipeline) + - [Cleanup the Deployment](#cleanup-the-deployment) + - [DBQnA Docker Compose Files](#dbqna-docker-compose-files) + - [DBQnA Service Configuration](#dbqna-service-configuration) ### Access the Code @@ -80,7 +84,7 @@ CONTAINER ID IMAGE 2728db31368b opea/text2sql-react-ui:latest "nginx -g 'daemon of…" 9 minutes ago Up 9 minutes 0.0.0.0:5174->80/tcp, :::5174->80/tcp dbqna-xeon-react-ui-server 0ab75b92c300 postgres:latest "docker-entrypoint.s…" 9 minutes ago Up 9 minutes 0.0.0.0:5442->5432/tcp, :::5442->5432/tcp postgres-container 2662a69b515b ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu "text-generation-lau…" 9 minutes ago Up 9 minutes 0.0.0.0:8008->80/tcp, :::8008->80/tcp tgi-service -bb44512be80e opea/text2sql:latest "python opea_text2sq…" 9 minutes ago Up 9 minutes 0.0.0.0:9090->8080/tcp, :::9090->8080/tcp text2sql-service +bb44512be80e opea/text2query-sql:latest "python opea_text2sq…" 9 minutes ago Up 9 minutes 0.0.0.0:9090->8080/tcp, :::9090->8080/tcp text2sql-service ``` ### Test the Pipeline @@ -88,10 +92,11 @@ bb44512be80e opea/text2sql:latest Once the DBQnA service are running, test the pipeline using the following command: ```bash -curl http://${host_ip}:9090/v1/text2sql\ +url="postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${host_ip}:5442/${POSTGRES_DB}" +curl --connect-timeout 5 --max-time 120000 http://${host_ip}:9090/v1/text2query\ -X POST \ - -d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${host_ip}'", "port": "5442", "database": "'${POSTGRES_DB}'"}}' \ - -H 'Content-Type: application/json' + -d '{"query": "Find the total number of Albums.","conn_type": "sql", "conn_url": "'${url}'", "conn_user": "'${POSTGRES_USER}'","conn_password": "'${POSTGRES_PASSWORD}'","conn_dialect": "postgresql" }' \ + -H 'Content-Type: application/json') ``` ### Cleanup the Deployment @@ -121,7 +126,7 @@ The compose.yaml is default compose file using tgi as serving framework | -------------------------- | ------------------------------------------------------------- | | tgi-service | ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu | | postgres | postgres:latest | -| text2sql | opea/text2sql:latest | +| text2sql | opea/text2query-sql:latest | | dbqna-xeon-react-ui-server | opea/text2sql-react-ui:latest | ## DBQnA Service Configuration @@ -132,5 +137,5 @@ The table provides a comprehensive overview of the DBQnA service utilized across | -------------------------- | ------------------------------------------------------------- | -------- | --------------------------------------------------------------------------------------------------- | | tgi-service | ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu | No | Specific to the TGI deployment, focuses on text generation inference using AMD GPU (ROCm) hardware. | | postgres | postgres:latest | No | Provides the relational database backend for storing and querying data used by the DBQnA pipeline. | -| text2sql | opea/text2sql:latest | No | Handles text-to-SQL conversion tasks. | +| text2sql | opea/text2query-sql:latest | No | Handles text-to-SQL conversion tasks. | | dbqna-xeon-react-ui-server | opea/text2sql-react-ui:latest | No | Provides the user interface for the DBQnA service. | diff --git a/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml b/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml index cb9339b834..b12eb32f3f 100644 --- a/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/DBQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -31,12 +31,16 @@ services: - ./chinook.sql:/docker-entrypoint-initdb.d/chinook.sql text2sql-service: - image: ${REGISTRY:-opea}/text2sql:${TAG:-latest} + image: ${REGISTRY:-opea}/text2query-sql:${TAG:-latest} container_name: text2sql-service ports: - - "${TEXT2SQL_PORT}:8080" + - "${TEXT2SQL_PORT}:9097" environment: - TGI_LLM_ENDPOINT=${TGI_LLM_ENDPOINT} + - TEXT2QUERY_COMPONENT_NAME=OPEA_TEXT2QUERY_SQL + depends_on: + - tgi-service + - postgres dbqna-xeon-react-ui-server: image: ${REGISTRY:-opea}/text2sql-react-ui:${TAG:-latest} diff --git a/DBQnA/docker_image_build/build.yaml b/DBQnA/docker_image_build/build.yaml index 11d7f518b6..39d00f5916 100644 --- a/DBQnA/docker_image_build/build.yaml +++ b/DBQnA/docker_image_build/build.yaml @@ -2,22 +2,22 @@ # SPDX-License-Identifier: Apache-2.0 services: - text2sql: + text2query-sql: build: context: GenAIComps - dockerfile: comps/text2sql/src/Dockerfile + dockerfile: comps/text2query/src/Dockerfile args: IMAGE_REPO: ${REGISTRY:-opea} BASE_TAG: ${TAG:-latest} http_proxy: ${http_proxy} https_proxy: ${https_proxy} no_proxy: ${no_proxy} - image: ${REGISTRY:-opea}/text2sql:${TAG:-latest} + image: ${REGISTRY:-opea}/text2query-sql:${TAG:-latest} text2sql-react-ui: build: context: ../ui dockerfile: ./docker/Dockerfile.react args: texttosql_url: ${build_texttosql_url} - extends: text2sql + extends: text2query-sql image: ${REGISTRY:-opea}/text2sql-react-ui:${TAG:-latest} diff --git a/DBQnA/tests/test_compose_on_rocm.sh b/DBQnA/tests/test_compose_on_rocm.sh index e2dc0b81d6..f162c073c4 100644 --- a/DBQnA/tests/test_compose_on_rocm.sh +++ b/DBQnA/tests/test_compose_on_rocm.sh @@ -48,9 +48,10 @@ function start_services() { } function validate_microservice() { - result=$(http_proxy="" curl --connect-timeout 5 --max-time 120000 http://${ip_address}:${DBQNA_TEXT_TO_SQL_PORT}/v1/text2sql \ + url="postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${ip_address}:5442/${POSTGRES_DB}" + result=$(http_proxy="" curl --connect-timeout 5 --max-time 120000 http://${ip_address}:$TEXT2SQL_PORT/v1/text2query\ -X POST \ - -d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${ip_address}'", "port": "5442", "database": "'${POSTGRES_DB}'" }}' \ + -d '{"query": "Find the total number of Albums.","conn_type": "sql", "conn_url": "'${url}'", "conn_user": "'${POSTGRES_USER}'","conn_password": "'${POSTGRES_PASSWORD}'","conn_dialect": "postgresql" }' \ -H 'Content-Type: application/json') if echo "$result" | jq -e '.result.output' > /dev/null 2>&1; then @@ -66,35 +67,6 @@ function validate_microservice() { } -function validate_frontend() { - echo "[ TEST INFO ]: --------- frontend test started ---------" - cd $WORKPATH/ui/react - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniconda3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - - source activate ${conda_env_name} - echo "[ TEST INFO ]: --------- conda env activated ---------" - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci - node -v && npm -v && pip list - - exit_status=0 - npm run test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/amd/gpu/rocm/ docker compose stop && docker compose rm -f @@ -118,10 +90,6 @@ function main() { validate_microservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/DBQnA/tests/test_compose_on_xeon.sh b/DBQnA/tests/test_compose_on_xeon.sh index c410cc48f8..09eef4494f 100755 --- a/DBQnA/tests/test_compose_on_xeon.sh +++ b/DBQnA/tests/test_compose_on_xeon.sh @@ -29,6 +29,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon + export no_proxy="localhost,127.0.0.1,$ip_address" source ./set_env.sh # Start Docker Containers @@ -47,9 +48,10 @@ function start_services() { } function validate_microservice() { - result=$(http_proxy="" curl --connect-timeout 5 --max-time 120000 http://${ip_address}:$TEXT2SQL_PORT/v1/text2sql\ + url="postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${ip_address}:5442/${POSTGRES_DB}" + result=$(http_proxy="" curl --connect-timeout 5 --max-time 120000 http://${ip_address}:$TEXT2SQL_PORT/v1/text2query\ -X POST \ - -d '{"input_text": "Find the total number of Albums.","conn_str": {"user": "'${POSTGRES_USER}'","password": "'${POSTGRES_PASSWORD}'","host": "'${ip_address}'", "port": "5442", "database": "'${POSTGRES_DB}'" }}' \ + -d '{"query": "Find the total number of Albums.","conn_type": "sql", "conn_url": "'${url}'", "conn_user": "'${POSTGRES_USER}'","conn_password": "'${POSTGRES_PASSWORD}'","conn_dialect": "postgresql" }' \ -H 'Content-Type: application/json') if echo "$result" | jq -e '.result.output' > /dev/null 2>&1; then @@ -65,35 +67,6 @@ function validate_microservice() { } -function validate_frontend() { - echo "[ TEST INFO ]: --------- frontend test started ---------" - cd $WORKPATH/ui/react - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - - source activate ${conda_env_name} - echo "[ TEST INFO ]: --------- conda env activated ---------" - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci - node -v && npm -v && pip list - - exit_status=0 - npm run test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon docker compose stop && docker compose rm -f @@ -117,10 +90,6 @@ function main() { validate_microservice echo "::endgroup::" - echo "::group::validate_frontend" - validate_frontend - echo "::endgroup::" - echo "::group::stop_docker" stop_docker echo "::endgroup::" diff --git a/DBQnA/tests/test_ui_on_xeon.sh b/DBQnA/tests/test_ui_on_xeon.sh new file mode 100755 index 0000000000..650157e9ce --- /dev/null +++ b/DBQnA/tests/test_ui_on_xeon.sh @@ -0,0 +1,110 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} +export MODEL_CACHE=${model_cache:-"./data"} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH/docker_image_build + git clone --single-branch --branch "${opea_branch:-"main"}" https://github.com/opea-project/GenAIComps.git + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log + + docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/intel/cpu/xeon + export no_proxy="localhost,127.0.0.1,$ip_address" + source ./set_env.sh + + # Start Docker Containers + docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + + # check whether tgi is fully ready. + n=0 + until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do + docker logs tgi-service > ${LOG_PATH}/tgi.log + n=$((n+1)) + if grep -q Connected ${LOG_PATH}/tgi.log; then + break + fi + sleep 5s + done +} + +function validate_frontend() { + echo "[ TEST INFO ]: --------- frontend test started ---------" + cd $WORKPATH/ui/react + local conda_env_name="OPEA_e2e" + export PATH=${HOME}/miniforge3/bin/:$PATH + if conda info --envs | grep -q "$conda_env_name"; then + echo "$conda_env_name exist!" + else + conda create -n ${conda_env_name} python=3.12 -y + fi + + source activate ${conda_env_name} + echo "[ TEST INFO ]: --------- conda env activated ---------" + + conda install -c conda-forge nodejs=22.6.0 -y + npm install && npm ci + node -v && npm -v && pip list + + exit_status=0 + npm run test || exit_status=$? + + if [ $exit_status -ne 0 ]; then + echo "[TEST INFO]: ---------frontend test failed---------" + exit $exit_status + else + echo "[TEST INFO]: ---------frontend test passed---------" + fi +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/cpu/xeon + docker compose stop && docker compose rm -f +} + +function main() { + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + echo "::group::build_docker_images" + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + echo "::endgroup::" + + echo "::group::start_services" + start_services + echo "::endgroup::" + + echo "::group::validate_frontend" + validate_frontend + echo "::endgroup::" + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + docker system prune -f + +} + +main diff --git a/DBQnA/ui/react/src/App.test.tsx b/DBQnA/ui/react/src/App.test.tsx index 5b346e8cb1..db16928d3a 100644 --- a/DBQnA/ui/react/src/App.test.tsx +++ b/DBQnA/ui/react/src/App.test.tsx @@ -21,21 +21,17 @@ const getHostIP = () => { test('testing api with dynamic host', async () => { // Get the dynamic host IP const host = await getHostIP(); - const endpointUrl = `http://${host}:9090/v1/text2sql`; - - const formData = { - user: 'postgres', - database: 'chinook', - host: host, - password: 'testpwd', - port: '5442', - }; - + const endpointUrl = `http://${host}:9090/v1/text2query`; + const connUrl = `postgresql://postgres:testpwd@${host}:5442/chinook`; const question = "Find the total number of invoices."; const payload = { - input_text: question, - conn_str: formData, + query: question, + conn_type: "sql", + conn_url: connUrl, + conn_user: "postgres", + conn_password: "testpwd", + conn_dialect: "postgresql", }; const response = await axios.post(endpointUrl, payload); @@ -47,6 +43,6 @@ test('testing api with dynamic host', async () => { expect(result.hasOwnProperty('sql')).toBe(true); expect(result.hasOwnProperty('output')).toBe(true); expect(result.hasOwnProperty('input')).toBe(true); - expect(result.input.input_text).toBe(question); + expect(result.input.query).toBe(question); }, apiTimeOutInSeconds * 1000); diff --git a/DBQnA/ui/react/src/components/DbConnect/DBConnect.tsx b/DBQnA/ui/react/src/components/DbConnect/DBConnect.tsx index bd41665e73..41e1a5b114 100644 --- a/DBQnA/ui/react/src/components/DbConnect/DBConnect.tsx +++ b/DBQnA/ui/react/src/components/DbConnect/DBConnect.tsx @@ -42,8 +42,15 @@ const DBConnect: React.FC = () => { e.preventDefault(); try { let api_response: Record; - let unifiedConnData = {"conn_str":formData}; - api_response = await axios.post(`${TEXT_TO_SQL_URL}/postgres/health`, unifiedConnData); + let connUrl = `postgresql://${formData.user}:${formData.password}@${formData.host}:${formData.port}/${formData.database}`; + let unifiedConnData = { + conn_type: "sql", + conn_url: connUrl, + conn_user: formData.user, + conn_password: formData.password, + conn_dialect: "postgresql", + }; + api_response = await axios.post(`${TEXT_TO_SQL_URL}/db/health`, unifiedConnData); setSqlStatus(null); setSqlError(null); @@ -74,13 +81,18 @@ const DBConnect: React.FC = () => { e.preventDefault(); setIsLoading(true); try { + const connUrl = `postgresql://${formData.user}:${formData.password}@${formData.host}:${formData.port}/${formData.database}`; const payload = { - input_text: question, - conn_str: formData, + query: question, + conn_type: "sql", + conn_url: connUrl, + conn_user: formData.user, + conn_password: formData.password, + conn_dialect: "postgresql", }; let api_response: Record; - api_response = await axios.post(`${TEXT_TO_SQL_URL}/text2sql`, payload); + api_response = await axios.post(`${TEXT_TO_SQL_URL}/text2query`, payload); setSqlQuery(api_response.data.result.sql); // Assuming the API returns an SQL query setQueryOutput(api_response.data.result.output); diff --git a/DeepResearchAgent/Dockerfile b/DeepResearchAgent/Dockerfile index d35d3ea7ea..e84b5e34ae 100644 --- a/DeepResearchAgent/Dockerfile +++ b/DeepResearchAgent/Dockerfile @@ -3,7 +3,7 @@ ARG IMAGE_REPO=opea ARG BASE_TAG=latest -FROM $IMAGE_REPO/comps-base:$BASE_TAG +FROM opea/comps-base:$BASE_TAG COPY ./deep_researcher.yaml $HOME/deep_researcher.yaml COPY ./utils.py $HOME/utils.py @@ -17,4 +17,4 @@ RUN pip install --no-cache-dir --upgrade pip setuptools uv && \ USER user -ENTRYPOINT ["python", "research_agent.py"] +ENTRYPOINT ["python", "research_agent.py"] \ No newline at end of file diff --git a/DeepResearchAgent/docker_compose/intel/hpu/gaudi/compose.yaml b/DeepResearchAgent/docker_compose/intel/hpu/gaudi/compose.yaml index dc7a05c270..d49af13a94 100644 --- a/DeepResearchAgent/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/DeepResearchAgent/docker_compose/intel/hpu/gaudi/compose.yaml @@ -20,7 +20,7 @@ x-common-agent-environment: services: vllm-service: - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + image: opea/vllm-gaudi:1.22.0 container_name: vllm-gaudi-server ports: - "8000:8000" diff --git a/DeepResearchAgent/docker_compose/intel/hpu/gaudi/set_env.sh b/DeepResearchAgent/docker_compose/intel/hpu/gaudi/set_env.sh index e38d0ef378..9df0330f46 100644 --- a/DeepResearchAgent/docker_compose/intel/hpu/gaudi/set_env.sh +++ b/DeepResearchAgent/docker_compose/intel/hpu/gaudi/set_env.sh @@ -4,7 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 # Navigate to the parent directory and source the environment -SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)" pushd "$SCRIPT_DIR/../../../../../" > /dev/null source .set_env.sh diff --git a/DeepResearchAgent/docker_image_build/build.yaml b/DeepResearchAgent/docker_image_build/build.yaml index 5ac2e17a90..09ef66da26 100644 --- a/DeepResearchAgent/docker_image_build/build.yaml +++ b/DeepResearchAgent/docker_image_build/build.yaml @@ -13,9 +13,3 @@ services: context: ../ dockerfile: ./Dockerfile image: ${REGISTRY:-opea}/deep-research-agent:${TAG:-latest} - vllm-gaudi: - build: - context: vllm-fork - dockerfile: Dockerfile.hpu - extends: deep-research-agent - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} diff --git a/DeepResearchAgent/tests/test_compose_on_gaudi.sh b/DeepResearchAgent/tests/test_compose_on_gaudi.sh index c180640201..e76a66b9cc 100644 --- a/DeepResearchAgent/tests/test_compose_on_gaudi.sh +++ b/DeepResearchAgent/tests/test_compose_on_gaudi.sh @@ -25,9 +25,6 @@ function build_docker_images() { echo "GenAIComps test commit is $(git rev-parse HEAD)" docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork - VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0 - git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../ echo "Build all the images with --no-cache, check docker_image_build.log for details..." docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log @@ -37,6 +34,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/hpu/gaudi + export no_proxy="localhost,127.0.0.1,$ip_address" source set_env.sh # Start Docker Containers diff --git a/DocIndexRetriever/tests/test_compose_milvus_on_gaudi.sh b/DocIndexRetriever/tests/test_compose_milvus_on_gaudi.sh index 46c5f22f0e..5f3f90842b 100644 --- a/DocIndexRetriever/tests/test_compose_milvus_on_gaudi.sh +++ b/DocIndexRetriever/tests/test_compose_milvus_on_gaudi.sh @@ -35,6 +35,7 @@ function build_docker_images() { function start_services() { echo "Starting Docker Services...." cd $WORKPATH/docker_compose/intel/hpu/gaudi + export no_proxy="localhost,127.0.0.1,$ip_address" source ./set_env.sh # Start Docker Containers diff --git a/DocIndexRetriever/tests/test_compose_milvus_on_xeon.sh b/DocIndexRetriever/tests/test_compose_milvus_on_xeon.sh index 37bf681dcf..80f18fa515 100755 --- a/DocIndexRetriever/tests/test_compose_milvus_on_xeon.sh +++ b/DocIndexRetriever/tests/test_compose_milvus_on_xeon.sh @@ -35,6 +35,7 @@ function build_docker_images() { function start_services() { echo "Starting Docker Services...." cd $WORKPATH/docker_compose/intel/cpu/xeon + export no_proxy="localhost,127.0.0.1,$ip_address" source ./set_env.sh # Start Docker Containers diff --git a/DocIndexRetriever/tests/test_compose_on_gaudi.sh b/DocIndexRetriever/tests/test_compose_on_gaudi.sh index 11541eca1e..e65dbb8a15 100644 --- a/DocIndexRetriever/tests/test_compose_on_gaudi.sh +++ b/DocIndexRetriever/tests/test_compose_on_gaudi.sh @@ -35,6 +35,7 @@ function build_docker_images() { function start_services() { echo "Starting Docker Services...." cd $WORKPATH/docker_compose/intel/hpu/gaudi + export no_proxy="localhost,127.0.0.1,$ip_address" source ./set_env.sh # Start Docker Containers diff --git a/DocIndexRetriever/tests/test_compose_on_xeon.sh b/DocIndexRetriever/tests/test_compose_on_xeon.sh index 229e47efea..92289e8bf5 100644 --- a/DocIndexRetriever/tests/test_compose_on_xeon.sh +++ b/DocIndexRetriever/tests/test_compose_on_xeon.sh @@ -35,6 +35,7 @@ function build_docker_images() { function start_services() { echo "Starting Docker Services...." cd $WORKPATH/docker_compose/intel/cpu/xeon + export no_proxy="localhost,127.0.0.1,$ip_address" source ./set_env.sh # Start Docker Containers diff --git a/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh b/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh index 37c477b2ad..96a8da8f6d 100644 --- a/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh +++ b/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh @@ -39,6 +39,7 @@ function build_docker_images() { function start_services() { echo "Starting Docker Services...." cd $WORKPATH/docker_compose/intel/cpu/xeon + export no_proxy="localhost,127.0.0.1,$ip_address" source ./set_env.sh # Start Docker Containers diff --git a/DocSum/docker_compose/amd/cpu/epyc/compose.yaml b/DocSum/docker_compose/amd/cpu/epyc/compose.yaml index ba0c4c0178..3a687b9b9c 100644 --- a/DocSum/docker_compose/amd/cpu/epyc/compose.yaml +++ b/DocSum/docker_compose/amd/cpu/epyc/compose.yaml @@ -4,7 +4,7 @@ services: vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: docsum-epyc-vllm-service ports: - "8008:80" diff --git a/DocSum/docker_compose/intel/cpu/xeon/README.md b/DocSum/docker_compose/intel/cpu/xeon/README.md index e0b2ab26c0..acd64b9eca 100644 --- a/DocSum/docker_compose/intel/cpu/xeon/README.md +++ b/DocSum/docker_compose/intel/cpu/xeon/README.md @@ -13,13 +13,26 @@ This example includes the following sections: This section describes how to quickly deploy and test the DocSum service manually on an Intel Xeon platform. The basic steps are: -1. [Access the Code](#access-the-code) -2. [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) -3. [Configure the Deployment Environment](#configure-the-deployment-environment) -4. [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose) -5. [Check the Deployment Status](#check-the-deployment-status) -6. [Test the Pipeline](#test-the-pipeline) -7. [Cleanup the Deployment](#cleanup-the-deployment) +- [Example DocSum deployments on Intel Xeon Processor](#example-docsum-deployments-on-intel-xeon-processor) + - [DocSum Quick Start Deployment](#docsum-quick-start-deployment) + - [Access the Code and Set Up Environment](#access-the-code-and-set-up-environment) + - [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) + - [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose) + - [Option #1](#option-1) + - [Option #2](#option-2) + - [Check the Deployment Status](#check-the-deployment-status) + - [Test the Pipeline](#test-the-pipeline) + - [Cleanup the Deployment](#cleanup-the-deployment) + - [DocSum Docker Compose Files](#docsum-docker-compose-files) + - [Running LLM models with remote endpoints](#running-llm-models-with-remote-endpoints) + - [DocSum Detailed Usage](#docsum-detailed-usage) + - [Query with text](#query-with-text) + - [Query with audio and video](#query-with-audio-and-video) + - [Query with long context](#query-with-long-context) + - [Launch the UI](#launch-the-ui) + - [Gradio UI](#gradio-ui) + - [Launch the Svelte UI](#launch-the-svelte-ui) + - [Launch the React UI (Optional)](#launch-the-react-ui-optional) ### Access the Code and Set Up Environment @@ -28,7 +41,7 @@ Clone the GenAIExample repository and access the ChatQnA Intel Xeon platform Doc ```bash git clone https://github.com/opea-project/GenAIExamples.git cd GenAIExamples/DocSum/docker_compose -source intel/set_env.sh +source intel/cpu/xeon/set_env.sh ``` > NOTE: by default vLLM does "warmup" at start, to optimize its performance for the specified model and the underlying platform, which can take long time. For development (and e.g. autoscaling) it can be skipped with `export VLLM_SKIP_WARMUP=true`. @@ -47,6 +60,8 @@ Some HuggingFace resources, such as some models, are only accessible if you have ### Deploy the Services Using Docker Compose +#### Option #1 + To deploy the DocSum services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute: ```bash @@ -54,6 +69,17 @@ cd intel/cpu/xeon/ docker compose up -d ``` +#### Option #2 + +> NOTE : To enable monitoring, `compose.monitoring.yaml` file need to be merged along with default `compose.yaml` file. + +To deploy with monitoring: + +```bash +cd intel/cpu/xeon/ +docker compose -f compose.yaml -f compose.monitoring.yaml up -d +``` + **Note**: developers should build docker image from source when: - Developing off the git main branch (as the container's ports in the repo may be different from the published docker image). @@ -109,17 +135,25 @@ To stop the containers associated with the deployment, execute the following com docker compose -f compose.yaml down ``` +If mornitoring is enabled, execute the following command: + +```bash +cd intel/cpu/xeon/ +docker compose -f compose.yaml -f compose.monitoring.yaml down +``` + All the DocSum containers will be stopped and then removed on completion of the "down" command. ## DocSum Docker Compose Files In the context of deploying a DocSum pipeline on an Intel® Xeon® platform, we can pick and choose different large language model serving frameworks. The table below outlines the various configurations that are available as part of the application. -| File | Description | -| -------------------------------------------- | -------------------------------------------------------------------------------------- | -| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework | -| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as default | -| [compose_remote.yaml](./compose_remote.yaml) | Uses remote inference endpoints for LLMs. All other configurations are same as default | +| File | Description | +| ---------------------------------------------------- | -------------------------------------------------------------------------------------- | +| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework | +| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as default | +| [compose_remote.yaml](./compose_remote.yaml) | Uses remote inference endpoints for LLMs. All other configurations are same as default | +| [compose.monitoring.yaml](./compose.monitoring.yaml) | Helper file for monitoring features. Can be used along with any compose files | ### Running LLM models with remote endpoints diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose.monitoring.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose.monitoring.yaml new file mode 100644 index 0000000000..187427d348 --- /dev/null +++ b/DocSum/docker_compose/intel/cpu/xeon/compose.monitoring.yaml @@ -0,0 +1,59 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + prometheus: + image: prom/prometheus:v2.52.0 + container_name: opea_prometheus + user: root + volumes: + - ./prometheus.yaml:/etc/prometheus/prometheus.yaml + - ./prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yaml' + ports: + - '9090:9090' + ipc: host + restart: unless-stopped + + grafana: + image: grafana/grafana:11.0.0 + container_name: grafana + volumes: + - ./grafana_data:/var/lib/grafana + - ./grafana/dashboards:/var/lib/grafana/dashboards + - ./grafana/provisioning:/etc/grafana/provisioning + user: root + environment: + GF_SECURITY_ADMIN_PASSWORD: admin + GF_RENDERING_CALLBACK_URL: http://grafana:3000/ + GF_LOG_FILTERS: rendering:debug + no_proxy: ${no_proxy} + host_ip: ${host_ip} + depends_on: + - prometheus + ports: + - '3000:3000' + ipc: host + restart: unless-stopped + + node-exporter: + image: prom/node-exporter + container_name: node-exporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.sysfs=/host/sys' + - --collector.filesystem.ignored-mount-points + - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" + environment: + no_proxy: ${no_proxy} + ports: + - 9100:9100 + ipc: host + restart: always + deploy: + mode: global diff --git a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml index 917bfc1140..163129ce95 100644 --- a/DocSum/docker_compose/intel/cpu/xeon/compose.yaml +++ b/DocSum/docker_compose/intel/cpu/xeon/compose.yaml @@ -3,7 +3,7 @@ services: vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + image: public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.10.1 container_name: docsum-xeon-vllm-service ports: - ${LLM_ENDPOINT_PORT:-8008}:80 diff --git a/DocSum/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh b/DocSum/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh new file mode 100644 index 0000000000..5b59b3cd34 --- /dev/null +++ b/DocSum/docker_compose/intel/cpu/xeon/grafana/dashboards/download_opea_dashboard.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +if ls *.json 1> /dev/null 2>&1; then + rm *.json +fi + +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/vllm_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/tgi_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/docsum_megaservice_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/node_grafana.json diff --git a/DocSum/docker_compose/intel/cpu/xeon/grafana/provisioning/dashboards/local.yaml b/DocSum/docker_compose/intel/cpu/xeon/grafana/provisioning/dashboards/local.yaml new file mode 100644 index 0000000000..13922a769b --- /dev/null +++ b/DocSum/docker_compose/intel/cpu/xeon/grafana/provisioning/dashboards/local.yaml @@ -0,0 +1,14 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: 1 + +providers: +- name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 #how often Grafana will scan for changed dashboards + options: + path: /var/lib/grafana/dashboards diff --git a/DocSum/docker_compose/intel/cpu/xeon/grafana/provisioning/datasources/datasource.yml b/DocSum/docker_compose/intel/cpu/xeon/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 0000000000..a206521d67 --- /dev/null +++ b/DocSum/docker_compose/intel/cpu/xeon/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,54 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# config file version +apiVersion: 1 + +# list of datasources that should be deleted from the database +deleteDatasources: + - name: Prometheus + orgId: 1 + +# list of datasources to insert/update depending +# what's available in the database +datasources: + # name of the datasource. Required +- name: Prometheus + # datasource type. Required + type: prometheus + # access mode. direct or proxy. Required + access: proxy + # org id. will default to orgId 1 if not specified + orgId: 1 + # url + url: http://$host_ip:9090 + # database password, if used + password: + # database user, if used + user: + # database name, if used + database: + # enable/disable basic auth + basicAuth: false + # basic auth username, if used + basicAuthUser: + # basic auth password, if used + basicAuthPassword: + # enable/disable with credentials headers + withCredentials: + # mark as default datasource. Max one per org + isDefault: true + # fields that will be converted to json and stored in json_data + jsonData: + httpMethod: GET + graphiteVersion: "1.1" + tlsAuth: false + tlsAuthWithCACert: false + # json object of data that will be encrypted. + secureJsonData: + tlsCACert: "..." + tlsClientCert: "..." + tlsClientKey: "..." + version: 1 + # allow users to edit datasources from the UI. + editable: true diff --git a/DocSum/docker_compose/intel/cpu/xeon/prometheus.yaml b/DocSum/docker_compose/intel/cpu/xeon/prometheus.yaml new file mode 100644 index 0000000000..758627c077 --- /dev/null +++ b/DocSum/docker_compose/intel/cpu/xeon/prometheus.yaml @@ -0,0 +1,27 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# [IP_ADDR]:{PORT_OUTSIDE_CONTAINER} -> {PORT_INSIDE_CONTAINER} / {PROTOCOL} +global: + scrape_interval: 5s + external_labels: + monitor: "my-monitor" +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["opea_prometheus:9090"] + - job_name: "vllm" + metrics_path: /metrics + static_configs: + - targets: ["docsum-xeon-vllm-service:80"] + - job_name: "tgi" + metrics_path: /metrics + static_configs: + - targets: ["docsum-xeon-tgi-server:80"] + - job_name: "docsum-backend-server" + metrics_path: /metrics + static_configs: + - targets: ["docsum-xeon-backend-server:8888"] + - job_name: "prometheus-node-exporter" + metrics_path: /metrics + static_configs: + - targets: ["node-exporter:9100"] diff --git a/DocSum/docker_compose/intel/set_env.sh b/DocSum/docker_compose/intel/cpu/xeon/set_env.sh similarity index 68% rename from DocSum/docker_compose/intel/set_env.sh rename to DocSum/docker_compose/intel/cpu/xeon/set_env.sh index 0411335847..07f734f36e 100644 --- a/DocSum/docker_compose/intel/set_env.sh +++ b/DocSum/docker_compose/intel/cpu/xeon/set_env.sh @@ -2,15 +2,14 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -pushd "${SCRIPT_DIR}/../../.." > /dev/null + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) + +pushd "$SCRIPT_DIR/../../../../../" > /dev/null source .set_env.sh popd > /dev/null export host_ip=$(hostname -I | awk '{print $1}') # Example: host_ip="192.168.1.1" -export no_proxy="${no_proxy},${host_ip}" # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" -export http_proxy=$http_proxy -export https_proxy=$https_proxy export HF_TOKEN=${HF_TOKEN} export LLM_ENDPOINT_PORT=8008 @@ -41,3 +40,13 @@ export NUM_CARDS=1 export BLOCK_SIZE=128 export MAX_NUM_SEQS=256 export MAX_SEQ_LEN_TO_CAPTURE=2048 + +# Download Grafana configurations +pushd "${SCRIPT_DIR}/grafana/dashboards" > /dev/null +source download_opea_dashboard.sh +popd > /dev/null + +# Set network proxy settings +export no_proxy="${no_proxy},${host_ip},docsum-xeon-vllm-service,docsum-xeon-tgi-server,docsum-xeon-backend-server,opea_prometheus,grafana,node-exporter,$JAEGER_IP" # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" +export http_proxy=$http_proxy +export https_proxy=$https_proxy diff --git a/DocSum/docker_compose/intel/hpu/gaudi/README.md b/DocSum/docker_compose/intel/hpu/gaudi/README.md index 03e53101e1..70e251e869 100644 --- a/DocSum/docker_compose/intel/hpu/gaudi/README.md +++ b/DocSum/docker_compose/intel/hpu/gaudi/README.md @@ -15,13 +15,25 @@ This example includes the following sections: This section describes how to quickly deploy and test the DocSum service manually on an Intel® Gaudi® platform. The basic steps are: -1. [Access the Code](#access-the-code) -2. [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) -3. [Configure the Deployment Environment](#configure-the-deployment-environment) -4. [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose) -5. [Check the Deployment Status](#check-the-deployment-status) -6. [Test the Pipeline](#test-the-pipeline) -7. [Cleanup the Deployment](#cleanup-the-deployment) +- [Example DocSum deployments on Intel® Gaudi® Platform](#example-docsum-deployments-on-intel-gaudi-platform) + - [DocSum Quick Start Deployment](#docsum-quick-start-deployment) + - [Access the Code and Set Up Environment](#access-the-code-and-set-up-environment) + - [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token) + - [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose) + - [Option #1](#option-1) + - [Option #2](#option-2) + - [Check the Deployment Status](#check-the-deployment-status) + - [Test the Pipeline](#test-the-pipeline) + - [Cleanup the Deployment](#cleanup-the-deployment) + - [DocSum Docker Compose Files](#docsum-docker-compose-files) + - [DocSum Detailed Usage](#docsum-detailed-usage) + - [Query with text](#query-with-text) + - [Query with audio and video](#query-with-audio-and-video) + - [Query with long context](#query-with-long-context) + - [Launch the UI](#launch-the-ui) + - [Gradio UI](#gradio-ui) + - [Launch the Svelte UI](#launch-the-svelte-ui) + - [Launch the React UI (Optional)](#launch-the-react-ui-optional) ### Access the Code and Set Up Environment @@ -30,7 +42,7 @@ Clone the GenAIExample repository and access the DocSum Intel® Gaudi® platform ```bash git clone https://github.com/opea-project/GenAIExamples.git cd GenAIExamples/DocSum/docker_compose -source intel/set_env.sh +source intel/hpu/gaudi/set_env.sh ``` > NOTE: by default vLLM does "warmup" at start, to optimize its performance for the specified model and the underlying platform, which can take long time. For development (and e.g. autoscaling) it can be skipped with `export VLLM_SKIP_WARMUP=true`. @@ -49,6 +61,8 @@ Some HuggingFace resources, such as some models, are only accessible if you have ### Deploy the Services Using Docker Compose +#### Option #1 + To deploy the DocSum services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute: ```bash @@ -56,6 +70,17 @@ cd intel/hpu/gaudi/ docker compose up -d ``` +#### Option #2 + +> NOTE : To enable monitoring, `compose.monitoring.yaml` file need to be merged along with default `compose.yaml` file. + +To deploy with monitoring: + +```bash +cd intel/cpu/xeon/ +docker compose -f compose.yaml -f compose.monitoring.yaml up -d +``` + **Note**: developers should build docker image from source when: - Developing off the git main branch (as the container's ports in the repo may be different from the published docker image). @@ -117,10 +142,11 @@ All the DocSum containers will be stopped and then removed on completion of the In the context of deploying a DocSum pipeline on an Intel® Gaudi® platform, the allocation and utilization of Gaudi devices across different services are important considerations for optimizing performance and resource efficiency. Each of the example deployments, defined by the example Docker compose yaml files, demonstrates a unique approach to leveraging Gaudi hardware, reflecting different priorities and operational strategies. -| File | Description | -| -------------------------------------- | ----------------------------------------------------------------------------------------- | -| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework | -| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as the default | +| File | Description | +| ---------------------------------------------------- | ----------------------------------------------------------------------------------------- | +| [compose.yaml](./compose.yaml) | Default compose file using vllm as serving framework | +| [compose_tgi.yaml](./compose_tgi.yaml) | The LLM serving framework is TGI. All other configurations remain the same as the default | +| [compose.monitoring.yaml](./compose.monitoring.yaml) | Helper file for monitoring features. Can be used along with any compose files | ## DocSum Detailed Usage diff --git a/DocSum/docker_compose/intel/hpu/gaudi/compose.monitoring.yaml b/DocSum/docker_compose/intel/hpu/gaudi/compose.monitoring.yaml new file mode 100644 index 0000000000..691671e656 --- /dev/null +++ b/DocSum/docker_compose/intel/hpu/gaudi/compose.monitoring.yaml @@ -0,0 +1,75 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + prometheus: + image: prom/prometheus:v2.52.0 + container_name: opea_prometheus + user: root + volumes: + - ./prometheus.yaml:/etc/prometheus/prometheus.yaml + - ./prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yaml' + ports: + - '9090:9090' + ipc: host + restart: unless-stopped + + grafana: + image: grafana/grafana:11.0.0 + container_name: grafana + volumes: + - ./grafana_data:/var/lib/grafana + - ./grafana/dashboards:/var/lib/grafana/dashboards + - ./grafana/provisioning:/etc/grafana/provisioning + user: root + environment: + GF_SECURITY_ADMIN_PASSWORD: admin + GF_RENDERING_CALLBACK_URL: http://grafana:3000/ + GF_LOG_FILTERS: rendering:debug + no_proxy: ${no_proxy} + host_ip: ${host_ip} + depends_on: + - prometheus + ports: + - '3000:3000' + ipc: host + restart: unless-stopped + + node-exporter: + image: prom/node-exporter + container_name: node-exporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.rootfs=/rootfs' + - '--path.sysfs=/host/sys' + - '--path.udev.data=/rootfs/run/udev/data' + - --collector.filesystem.ignored-mount-points + - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" + environment: + no_proxy: ${no_proxy} + ports: + - 9100:9100 + restart: always + deploy: + mode: global + + gaudi-metrics-exporter: + image: vault.habana.ai/gaudi-metric-exporter/metric-exporter:latest + privileged: true + container_name: gaudi-metrics-exporter + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + - /dev:/dev + deploy: + mode: global + ports: + - 41611:41611 + restart: unless-stopped diff --git a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml index 9311817f32..fbc10c06ca 100644 --- a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml @@ -3,7 +3,7 @@ services: vllm-service: - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} + image: opea/vllm-gaudi:1.4 container_name: docsum-gaudi-vllm-service ports: - ${LLM_ENDPOINT_PORT:-8008}:80 diff --git a/DocSum/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh b/DocSum/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh new file mode 100644 index 0000000000..b02827a300 --- /dev/null +++ b/DocSum/docker_compose/intel/hpu/gaudi/grafana/dashboards/download_opea_dashboard.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +if ls *.json 1> /dev/null 2>&1; then + rm *.json +fi + +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/vllm_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/tgi_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/gaudi_grafana_v2.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/docsum_megaservice_grafana.json +wget https://raw.githubusercontent.com/opea-project/GenAIEval/refs/heads/main/evals/benchmark/grafana/node_grafana.json diff --git a/DocSum/docker_compose/intel/hpu/gaudi/grafana/provisioning/dashboards/local.yaml b/DocSum/docker_compose/intel/hpu/gaudi/grafana/provisioning/dashboards/local.yaml new file mode 100644 index 0000000000..13922a769b --- /dev/null +++ b/DocSum/docker_compose/intel/hpu/gaudi/grafana/provisioning/dashboards/local.yaml @@ -0,0 +1,14 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: 1 + +providers: +- name: 'default' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 #how often Grafana will scan for changed dashboards + options: + path: /var/lib/grafana/dashboards diff --git a/DocSum/docker_compose/intel/hpu/gaudi/grafana/provisioning/datasources/datasource.yml b/DocSum/docker_compose/intel/hpu/gaudi/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 0000000000..a206521d67 --- /dev/null +++ b/DocSum/docker_compose/intel/hpu/gaudi/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,54 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# config file version +apiVersion: 1 + +# list of datasources that should be deleted from the database +deleteDatasources: + - name: Prometheus + orgId: 1 + +# list of datasources to insert/update depending +# what's available in the database +datasources: + # name of the datasource. Required +- name: Prometheus + # datasource type. Required + type: prometheus + # access mode. direct or proxy. Required + access: proxy + # org id. will default to orgId 1 if not specified + orgId: 1 + # url + url: http://$host_ip:9090 + # database password, if used + password: + # database user, if used + user: + # database name, if used + database: + # enable/disable basic auth + basicAuth: false + # basic auth username, if used + basicAuthUser: + # basic auth password, if used + basicAuthPassword: + # enable/disable with credentials headers + withCredentials: + # mark as default datasource. Max one per org + isDefault: true + # fields that will be converted to json and stored in json_data + jsonData: + httpMethod: GET + graphiteVersion: "1.1" + tlsAuth: false + tlsAuthWithCACert: false + # json object of data that will be encrypted. + secureJsonData: + tlsCACert: "..." + tlsClientCert: "..." + tlsClientKey: "..." + version: 1 + # allow users to edit datasources from the UI. + editable: true diff --git a/DocSum/docker_compose/intel/hpu/gaudi/prometheus.yaml b/DocSum/docker_compose/intel/hpu/gaudi/prometheus.yaml new file mode 100644 index 0000000000..16693ae112 --- /dev/null +++ b/DocSum/docker_compose/intel/hpu/gaudi/prometheus.yaml @@ -0,0 +1,34 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# [IP_ADDR]:{PORT_OUTSIDE_CONTAINER} -> {PORT_INSIDE_CONTAINER} / {PROTOCOL} +global: + scrape_interval: 5s + external_labels: + monitor: "my-monitor" +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["opea_prometheus:9090"] + - job_name: "vllm" + metrics_path: /metrics + static_configs: + - targets: ["docsum-gaudi-vllm-service:80"] + - job_name: "tgi" + metrics_path: /metrics + static_configs: + - targets: ["docsum-gaudi-tgi-server:80"] + - job_name: "docsum-backend-server" + metrics_path: /metrics + static_configs: + - targets: ["docsum-gaudi-backend-server:8888"] + - job_name: "prometheus-node-exporter" + scrape_interval: 30s + scrape_timeout: 25s + metrics_path: /metrics + static_configs: + - targets: ["node-exporter:9100"] + - job_name: "gaudi-metrics-exporter" + scrape_interval: 30s + metrics_path: /metrics + static_configs: + - targets: ["gaudi-metrics-exporter:41611"] diff --git a/DocSum/docker_compose/intel/hpu/gaudi/set_env.sh b/DocSum/docker_compose/intel/hpu/gaudi/set_env.sh new file mode 100644 index 0000000000..e571ad82ab --- /dev/null +++ b/DocSum/docker_compose/intel/hpu/gaudi/set_env.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) + +pushd "$SCRIPT_DIR/../../../../../" > /dev/null +source .set_env.sh +popd > /dev/null + +export host_ip=$(hostname -I | awk '{print $1}') # Example: host_ip="192.168.1.1" +export HF_TOKEN=${HF_TOKEN} + +export LLM_ENDPOINT_PORT=8008 +export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct" + +export BLOCK_SIZE=128 +export MAX_NUM_SEQS=256 +export MAX_SEQ_LEN_TO_CAPTURE=2048 +export NUM_CARDS=1 +export MAX_INPUT_TOKENS=1024 +export MAX_TOTAL_TOKENS=2048 + +export LLM_PORT=9000 +export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}" +export ASR_SERVICE_PORT=7066 +export DocSum_COMPONENT_NAME="OpeaDocSumvLLM" # OpeaDocSumTgi +export FRONTEND_SERVICE_PORT=5173 +export MEGA_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP=${host_ip} +export ASR_SERVICE_HOST_IP=${host_ip} + +export BACKEND_SERVICE_PORT=8888 +export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum" + +export LOGFLAG=True + +export NUM_CARDS=1 +export BLOCK_SIZE=128 +export MAX_NUM_SEQS=256 +export MAX_SEQ_LEN_TO_CAPTURE=2048 + +# Download Grafana configurations +pushd "${SCRIPT_DIR}/grafana/dashboards" > /dev/null +source download_opea_dashboard.sh +popd > /dev/null + +# Set network proxy settings +export no_proxy="${no_proxy},${host_ip},docsum-gaudi-vllm-service,docsum-gaudi-tgi-server,docsum-gaudi-backend-server,gaudi-metrics-exporter,opea_prometheus,grafana,node-exporter,$JAEGER_IP" # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" +export http_proxy=$http_proxy +export https_proxy=$https_proxy diff --git a/DocSum/docker_image_build/build.yaml b/DocSum/docker_image_build/build.yaml index b4a2eb9c54..dcd4433ad0 100644 --- a/DocSum/docker_image_build/build.yaml +++ b/DocSum/docker_image_build/build.yaml @@ -54,15 +54,3 @@ services: context: GenAIComps dockerfile: comps/third_parties/vllm/src/Dockerfile.amd_gpu image: ${REGISTRY:-opea}/vllm-rocm:${TAG:-latest} - vllm: - build: - context: vllm - dockerfile: docker/Dockerfile.cpu - extends: docsum - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} - vllm-gaudi: - build: - context: vllm-fork - dockerfile: Dockerfile.hpu - extends: docsum - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} diff --git a/DocSum/tests/test_compose_on_epyc.sh b/DocSum/tests/test_compose_on_epyc.sh index 1a4959366a..a67e05c681 100644 --- a/DocSum/tests/test_compose_on_epyc.sh +++ b/DocSum/tests/test_compose_on_epyc.sh @@ -38,19 +38,8 @@ function build_docker_images() { docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &>/dev/null - VLLM_REQ_FILE="requirements/cpu.txt" - if ! grep -q "^transformers" "$VLLM_REQ_FILE"; then - echo "Adding transformers<4.54.0 to $VLLM_REQ_FILE" - echo "transformers<4.54.0" >>"$VLLM_REQ_FILE" - fi - cd ../ - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="docsum docsum-gradio-ui whisper llm-docsum vllm" + service_list="docsum docsum-gradio-ui whisper llm-docsum" docker compose -f build.yaml build ${service_list} --no-cache >${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -58,6 +47,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/amd/cpu/epyc/ + export no_proxy="localhost,127.0.0.1,$ip_address" docker compose -f compose.yaml up -d >${LOG_PATH}/start_services_with_compose.log sleep 1m } diff --git a/DocSum/tests/test_compose_on_gaudi.sh b/DocSum/tests/test_compose_on_gaudi.sh index 2e13e41c9d..654ad01282 100644 --- a/DocSum/tests/test_compose_on_gaudi.sh +++ b/DocSum/tests/test_compose_on_gaudi.sh @@ -16,7 +16,7 @@ echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" echo "TAG=IMAGE_TAG=${IMAGE_TAG}" export REGISTRY=${IMAGE_REPO} export TAG=${IMAGE_TAG} -source $WORKPATH/docker_compose/intel/set_env.sh +source $WORKPATH/docker_compose/intel/hpu/gaudi/set_env.sh export MODEL_CACHE=${model_cache:-"./data"} @@ -27,12 +27,6 @@ export MAX_SEQ_LEN_TO_CAPTURE=2048 export MAX_INPUT_TOKENS=2048 export MAX_TOTAL_TOKENS=4096 -# set service host and no_proxy -export LLM_ENDPOINT="http://vllm-service:80" -export LLM_SERVICE_HOST_IP="llm-docsum-vllm" -export ASR_SERVICE_HOST_IP="whisper" -export no_proxy=$no_proxy,$LLM_SERVICE_HOST_IP,$ASR_SERVICE_HOST_IP,"vllm-service" - # Get the root folder of the current script ROOT_FOLDER=$(dirname "$(readlink -f "$0")") @@ -46,12 +40,8 @@ function build_docker_images() { docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork - VLLM_FORK_VER=v0.6.6.post1+Gaudi-1.20.0 - git checkout ${VLLM_FORK_VER} &> /dev/null && cd ../ - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="docsum docsum-gradio-ui whisper llm-docsum vllm-gaudi" + service_list="docsum docsum-gradio-ui whisper llm-docsum" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log 2>&1 docker images && sleep 1s @@ -59,7 +49,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose.yaml -f compose.monitoring.yaml up -d > ${LOG_PATH}/start_services_with_compose.log sleep 2m } @@ -359,7 +349,7 @@ function validate_megaservice_long_text() { function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose -f compose.yaml stop && docker compose rm -f + docker compose -f compose.yaml -f compose.monitoring.yaml stop && docker compose -f compose.yaml -f compose.monitoring.yaml rm -f } function main() { diff --git a/DocSum/tests/test_compose_on_xeon.sh b/DocSum/tests/test_compose_on_xeon.sh index 5ceed1ec03..0b7d678db2 100644 --- a/DocSum/tests/test_compose_on_xeon.sh +++ b/DocSum/tests/test_compose_on_xeon.sh @@ -17,7 +17,7 @@ echo "TAG=IMAGE_TAG=${IMAGE_TAG}" export REGISTRY=${IMAGE_REPO} export TAG=${IMAGE_TAG} -source $WORKPATH/docker_compose/intel/set_env.sh +source $WORKPATH/docker_compose/intel/cpu/xeon/set_env.sh export MODEL_CACHE=${model_cache:-"./data"} export MAX_INPUT_TOKENS=2048 @@ -36,14 +36,8 @@ function build_docker_images() { docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . popd && sleep 1s - git clone https://github.com/vllm-project/vllm.git && cd vllm - VLLM_VER=v0.10.0 - echo "Check out vLLM tag ${VLLM_VER}" - git checkout ${VLLM_VER} &> /dev/null - cd ../ - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - service_list="docsum docsum-gradio-ui whisper llm-docsum vllm" + service_list="docsum docsum-gradio-ui whisper llm-docsum" docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log docker images && sleep 1s @@ -51,7 +45,8 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + export no_proxy="localhost,127.0.0.1,$ip_address" + docker compose -f compose.yaml -f compose.monitoring.yaml up -d > ${LOG_PATH}/start_services_with_compose.log sleep 1m } @@ -351,7 +346,7 @@ function validate_megaservice_long_text() { function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose stop && docker compose rm -f + docker compose -f compose.yaml -f compose.monitoring.yaml down } function main() { diff --git a/DocSum/tests/test_compose_tgi_on_epyc.sh b/DocSum/tests/test_compose_tgi_on_epyc.sh index b1caf7685c..33442bb270 100644 --- a/DocSum/tests/test_compose_tgi_on_epyc.sh +++ b/DocSum/tests/test_compose_tgi_on_epyc.sh @@ -47,6 +47,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/amd/cpu/epyc/ + export no_proxy="localhost,127.0.0.1,$ip_address" docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log sleep 1m } diff --git a/DocSum/tests/test_compose_tgi_on_gaudi.sh b/DocSum/tests/test_compose_tgi_on_gaudi.sh index c6133515cf..19111a3ded 100644 --- a/DocSum/tests/test_compose_tgi_on_gaudi.sh +++ b/DocSum/tests/test_compose_tgi_on_gaudi.sh @@ -16,7 +16,7 @@ echo "TAG=IMAGE_TAG=${IMAGE_TAG}" export REGISTRY=${IMAGE_REPO} export TAG=${IMAGE_TAG} -source $WORKPATH/docker_compose/intel/set_env.sh +source $WORKPATH/docker_compose/intel/hpu/gaudi/set_env.sh export MODEL_CACHE=${model_cache:-"./data"} export MAX_INPUT_TOKENS=2048 @@ -45,7 +45,7 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose -f compose_tgi.yaml -f compose.monitoring.yaml up -d > ${LOG_PATH}/start_services_with_compose.log sleep 1m } @@ -88,36 +88,51 @@ function validate_service() { local FORM_DATA5="${11}" local FORM_DATA6="${12}" - if [[ $VALIDATE_TYPE == *"json"* ]]; then - HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - else - CURL_CMD=(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$FORM_DATA1" -F "$FORM_DATA2" -F "$FORM_DATA3" -F "$FORM_DATA4" -F "$FORM_DATA5" -H 'Content-Type: multipart/form-data' "$URL") - if [[ -n "$FORM_DATA6" ]]; then - CURL_CMD+=(-F "$FORM_DATA6") + local MAX_RETRIES=${13:-3} + local RETRY_DELAY=${14:-2} + + local retry_count=0 + local success=false + + while [ $retry_count -lt $MAX_RETRIES ] && [ "$success" = false ]; do + retry_count=$((retry_count + 1)) + + echo "[ $SERVICE_NAME ] 尝试第 $retry_count/$MAX_RETRIES 次..." + if [[ $VALIDATE_TYPE == *"json"* ]]; then + HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + else + CURL_CMD=(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F "$FORM_DATA1" -F "$FORM_DATA2" -F "$FORM_DATA3" -F "$FORM_DATA4" -F "$FORM_DATA5" -H 'Content-Type: multipart/form-data' "$URL") + if [[ -n "$FORM_DATA6" ]]; then + CURL_CMD+=(-F "$FORM_DATA6") + fi + HTTP_RESPONSE=$("${CURL_CMD[@]}") + fi + HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') + RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') + + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + + # check response status + if [ "$HTTP_STATUS" -ne "200" ]; then + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + exit 1 + else + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + fi + # check response body + if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then + echo "EXPECTED_RESULT==> $EXPECTED_RESULT" + echo "RESPONSE_BODY==> $RESPONSE_BODY" + echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" + if [ $retry_count -lt $max_retries ]; then + echo "[ $SERVICE_NAME ] will retry after $retry_delay seconds ..." + sleep ${retry_delay}s + fi + else + success=true + echo "[ $SERVICE_NAME ] Content is as expected." fi - HTTP_RESPONSE=$("${CURL_CMD[@]}") - fi - HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://') - RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g') - - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - - # check response status - if [ "$HTTP_STATUS" -ne "200" ]; then - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - exit 1 - else - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - fi - # check response body - if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then - echo "EXPECTED_RESULT==> $EXPECTED_RESULT" - echo "RESPONSE_BODY==> $RESPONSE_BODY" - echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY" - exit 1 - else - echo "[ $SERVICE_NAME ] Content is as expected." - fi + done sleep 1s } @@ -354,7 +369,7 @@ function validate_megaservice_long_text() { function stop_docker() { cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose -f compose_tgi.yaml stop && docker compose rm -f + docker compose -f compose_tgi.yaml -f compose.monitoring.yaml stop && docker compose -f compose_tgi.yaml -f compose.monitoring.yaml rm -f } function main() { diff --git a/DocSum/tests/test_compose_tgi_on_xeon.sh b/DocSum/tests/test_compose_tgi_on_xeon.sh index e107bfca63..1fbaa4d357 100644 --- a/DocSum/tests/test_compose_tgi_on_xeon.sh +++ b/DocSum/tests/test_compose_tgi_on_xeon.sh @@ -16,7 +16,7 @@ echo "TAG=IMAGE_TAG=${IMAGE_TAG}" export REGISTRY=${IMAGE_REPO} export TAG=${IMAGE_TAG} -source $WORKPATH/docker_compose/intel/set_env.sh +source $WORKPATH/docker_compose/intel/cpu/xeon/set_env.sh export MODEL_CACHE=${model_cache:-"./data"} export MAX_INPUT_TOKENS=2048 @@ -45,7 +45,8 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log + export no_proxy="localhost,127.0.0.1,$ip_address" + docker compose -f compose_tgi.yaml -f compose.monitoring.yaml up -d > ${LOG_PATH}/start_services_with_compose.log sleep 1m } @@ -354,7 +355,7 @@ function validate_megaservice_long_text() { function stop_docker() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose -f compose_tgi.yaml stop && docker compose rm -f + docker compose -f compose_tgi.yaml -f compose.monitoring.yaml stop && docker compose -f compose_tgi.yaml -f compose.monitoring.yaml rm -f } function main() { diff --git a/DocSum/ui/docker/Dockerfile b/DocSum/ui/docker/Dockerfile index 1d5115f4b5..3a6ca7f7c9 100644 --- a/DocSum/ui/docker/Dockerfile +++ b/DocSum/ui/docker/Dockerfile @@ -1,8 +1,8 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Use node 20.11.1 as the base image -FROM node:20.11.1 +# Use node 20.19.0 as the base image (required for chokidar@5.0.0 compatibility) +FROM node:20.19.0 # Update package manager and install Git RUN apt-get update -y && apt-get install -y git diff --git a/DocSum/ui/gradio/requirements.txt b/DocSum/ui/gradio/requirements.txt index 5824f07218..27687edf4e 100644 --- a/DocSum/ui/gradio/requirements.txt +++ b/DocSum/ui/gradio/requirements.txt @@ -1,5 +1,5 @@ docx2txt -gradio==5.11.0 +gradio>5.22.0,<=5.34.0 langchain_community moviepy==1.0.3 numpy==1.26.4 diff --git a/DocSum/ui/svelte/package.json b/DocSum/ui/svelte/package.json index d5efc343c0..f6054bc229 100644 --- a/DocSum/ui/svelte/package.json +++ b/DocSum/ui/svelte/package.json @@ -27,7 +27,7 @@ "devDependencies": { "@playwright/test": "^1.44.1", "@sveltejs/adapter-auto": "^3.0.0", - "@sveltejs/kit": "2.0.0", + "@sveltejs/kit": "2.20.6", "@sveltejs/package": "^2.0.0", "@sveltejs/vite-plugin-svelte": "^3.0.0", "autoprefixer": "^10.4.16", diff --git a/EdgeCraftRAG/Dockerfile.server b/EdgeCraftRAG/Dockerfile.server old mode 100755 new mode 100644 index 4ac52700a8..f910252616 --- a/EdgeCraftRAG/Dockerfile.server +++ b/EdgeCraftRAG/Dockerfile.server @@ -1,30 +1,17 @@ FROM python:3.11-slim SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ - libjemalloc-dev \ - libmagic1 \ - libglib2.0-0 \ - poppler-utils \ - tesseract-ocr - -RUN apt-get update && apt-get install -y gnupg wget git -RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ - gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg -RUN echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | \ - tee /etc/apt/sources.list.d/intel-gpu-jammy.list -RUN apt-get update && apt-get install -y \ - intel-opencl-icd intel-level-zero-gpu \ - intel-level-zero-gpu-raytracing \ - intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \ - libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \ - libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \ - mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo +RUN apt-get update && apt-get install -y gnupg2 wget git +RUN apt-get remove -y libze-intel-gpu1 libigc1 libigdfcl1 libze-dev || true; \ + apt-get update; \ + apt-get install -y curl +RUN curl -sL 'https://keyserver.ubuntu.com/pks/lookup?fingerprint=on&op=get&search=0x0C0E6AF955CE463C03FC51574D098D70AFBE5E1F' | tee /etc/apt/trusted.gpg.d/driver.asc +RUN echo -e "Types: deb\nURIs: https://ppa.launchpadcontent.net/kobuk-team/intel-graphics/ubuntu/\nSuites: plucky\nComponents: main\nSigned-By: /etc/apt/trusted.gpg.d/driver.asc" > /etc/apt/sources.list.d/driver.sources +RUN apt-get update && apt-get install -y libze-intel-gpu1 libze1 intel-metrics-discovery intel-opencl-icd clinfo intel-gsc && apt-get install -y libze-intel-gpu1 libze1 intel-metrics-discovery intel-opencl-icd clinfo intel-gsc && apt-get install -y libze-dev intel-ocloc libze-intel-gpu-raytracing RUN useradd -m -s /bin/bash user && \ mkdir -p /home/user && \ - chown -R user /home/user/ + chown -R user /home/user/ RUN mkdir /templates && \ chown -R user /templates @@ -33,17 +20,21 @@ RUN chown -R user /templates/default_prompt.txt COPY ./edgecraftrag /home/user/edgecraftrag -RUN mkdir -p /home/user/ui_cache +RUN mkdir -p /home/user/ui_cache ENV UI_UPLOAD_PATH=/home/user/ui_cache USER user WORKDIR /home/user/edgecraftrag -RUN pip install --no-cache-dir --upgrade pip setuptools==70.0.0 && \ - pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt +RUN pip3 install --no-cache-dir --upgrade setuptools==70.0.0 --break-system-packages && \ + pip3 install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt --break-system-packages + +RUN pip3 install --no-cache-dir docarray==0.40.0 --break-system-packages WORKDIR /home/user/ RUN git clone https://github.com/openvinotoolkit/openvino.genai.git genai ENV PYTHONPATH="$PYTHONPATH:/home/user/genai/tools/llm_bench" -ENTRYPOINT ["python", "-m", "edgecraftrag.server"] \ No newline at end of file +RUN python3 -m nltk.downloader -d /home/user/nltk_data punkt_tab averaged_perceptron_tagger_eng + +ENTRYPOINT ["python3", "-m", "edgecraftrag.server"] diff --git a/EdgeCraftRAG/README.md b/EdgeCraftRAG/README.md old mode 100755 new mode 100644 index 314feb3480..0f225c9b91 --- a/EdgeCraftRAG/README.md +++ b/EdgeCraftRAG/README.md @@ -5,6 +5,13 @@ Retrieval-Augmented Generation system for edge solutions. It is designed to curate the RAG pipeline to meet hardware requirements at edge with guaranteed quality and performance. +## What's New + +1. Support Agent component and enable deep_search agent +2. Optimize pipeline execution performance with asynchronous api +3. Support session list display in UI +4. Support vllm-based embedding service + ## Table of contents 1. [Architecture](#architecture) diff --git a/EdgeCraftRAG/assets/img/kbadmin_index.png b/EdgeCraftRAG/assets/img/kbadmin_index.png new file mode 100644 index 0000000000..7383a01c79 Binary files /dev/null and b/EdgeCraftRAG/assets/img/kbadmin_index.png differ diff --git a/EdgeCraftRAG/assets/img/kbadmin_kb.png b/EdgeCraftRAG/assets/img/kbadmin_kb.png new file mode 100644 index 0000000000..40f6909a9b Binary files /dev/null and b/EdgeCraftRAG/assets/img/kbadmin_kb.png differ diff --git a/EdgeCraftRAG/assets/img/kbadmin_type.png b/EdgeCraftRAG/assets/img/kbadmin_type.png new file mode 100644 index 0000000000..012ebb0bd8 Binary files /dev/null and b/EdgeCraftRAG/assets/img/kbadmin_type.png differ diff --git a/EdgeCraftRAG/chatqna.py b/EdgeCraftRAG/chatqna.py old mode 100755 new mode 100644 index 1073a66eb4..48b7359cb4 --- a/EdgeCraftRAG/chatqna.py +++ b/EdgeCraftRAG/chatqna.py @@ -44,7 +44,7 @@ async def handle_request(self, request: Request): input = await request.json() stream_opt = input.get("stream", False) input["user"] = request.headers.get("sessionid", None) - chat_request = ChatCompletionRequest.parse_obj(input) + chat_request = ChatCompletionRequest.construct(**input) parameters = LLMParams( max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, top_k=chat_request.top_k if chat_request.top_k else 10, diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md b/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md index d4d283d8f1..9a5663ddb2 100755 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md @@ -10,25 +10,27 @@ This document outlines the deployment process for Edge Craft Retrieval-Augmented This section describes how to quickly deploy and test the EdgeCraftRAG service manually on Intel® Arc® platform. The basic steps are: -1. [Prerequisites](#prerequisites) -2. [Access the Code](#access-the-code) -3. [Prepare models](#prepare-models) -4. [Prepare env variables and configurations](#prepare-env-variables-and-configurations) -5. [Configure the Deployment Environment](#configure-the-deployment-environment) -6. [Deploy the Service Using Docker Compose](#deploy-the-service-using-docker-compose) -7. [Access UI](#access-ui) -8. [Cleanup the Deployment](#cleanup-the-deployment) +1. [Prerequisites](#1-prerequisites) +2. [Access the Code](#2-access-the-code) +3. [Prepare models](#3-prepare-models) +4. [Prepare env variables and configurations](#4-prepare-env-variables-and-configurations) +5. [Deploy the Service on Arc GPU Using Docker Compose](#5-deploy-the-service-on-intel-gpu-using-docker-compose) +6. [Access UI](#6-access-ui) +7. [Cleanup the Deployment](#7-cleanup-the-deployment) -### Prerequisites +### 1. Prerequisites EC-RAG supports vLLM deployment(default method) and local OpenVINO deployment for Intel Arc GPU. Prerequisites are shown as below: Hardware: Intel Arc A770 OS: Ubuntu Server 22.04.1 or newer (at least 6.2 LTS kernel) Driver & libraries: please to [Installing GPUs Drivers](https://dgpu-docs.intel.com/driver/installation-rolling.html#installing-gpu-drivers) for detailed driver & libraries setup +Hardware: Intel Arc B60 +please to [Install Native Environment](https://github.com/intel/llm-scaler/tree/main/vllm#11-install-native-environment) for detailed setup + Below steps are based on **vLLM** as inference engine, if you want to choose **OpenVINO**, please refer to [OpenVINO Local Inference](../../../../docs/Advanced_Setup.md#openvino-local-inference) -### Access the Code +### 2. Access the Code Clone the GenAIExample repository and access the EdgeCraftRAG Intel® Arc® platform Docker Compose files and supporting scripts: @@ -43,7 +45,7 @@ Checkout a released version, such as v1.3: git checkout v1.3 ``` -### Prepare models +### 3. Prepare models ```bash # Prepare models for embedding, reranking: @@ -62,9 +64,7 @@ modelscope download --model $LLM_MODEL --local_dir "${MODEL_PATH}/${LLM_MODEL}" # huggingface-cli download $LLM_MODEL --local-dir "${MODEL_PATH}/${LLM_MODEL}" ``` -### Prepare env variables and configurations - -Below steps are for single Intel Arc GPU inference, if you want to setup multi Intel Arc GPUs inference, please refer to [Multi-ARC Setup](../../../../docs/Advanced_Setup.md#multi-arc-setup) +### 4. Prepare env variables and configurations #### Prepare env variables for vLLM deployment @@ -77,32 +77,25 @@ export HOST_IP=$ip_address # Your host ip export VIDEOGROUPID=$(getent group video | cut -d: -f3) export RENDERGROUPID=$(getent group render | cut -d: -f3) -# If you have a proxy configured, uncomment below line -# export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server -# export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server +# If you have a proxy configured, execute below line +export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server +export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server # If you have a HF mirror configured, it will be imported to the container # export HF_ENDPOINT=https://hf-mirror.com # your HF mirror endpoint" # Make sure all 3 folders have 1000:1000 permission, otherwise -# chown 1000:1000 ${MODEL_PATH} ${PWD} # the default value of DOC_PATH and TMPFILE_PATH is PWD ,so here we give permission to ${PWD} +export DOC_PATH=${PWD}/tests +export TMPFILE_PATH=${PWD}/tests +chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH} # In addition, also make sure the .cache folder has 1000:1000 permission, otherwise -# chown 1000:1000 -R $HOME/.cache +chown 1000:1000 -R $HOME/.cache ``` For more advanced env variables and configurations, please refer to [Prepare env variables for vLLM deployment](../../../../docs/Advanced_Setup.md#prepare-env-variables-for-vllm-deployment) -#### Generate nginx config file +### 5. Deploy the Service on Intel GPU Using Docker Compose -```bash -export VLLM_SERVICE_PORT_0=8100 # You can set your own port for vllm service -# Generate your nginx config file -# nginx-conf-generator.sh requires 2 parameters: DP_NUM and output filepath -bash nginx/nginx-conf-generator.sh 1 nginx/nginx.conf -# set NGINX_CONFIG_PATH -export NGINX_CONFIG_PATH="${PWD}/nginx/nginx.conf" -``` - -### Deploy the Service Using Docker Compose +set Milvus DB and chat history round for inference: ```bash # EC-RAG support Milvus as persistent database, by default milvus is disabled, you can choose to set MILVUS_ENABLED=1 to enable it @@ -112,12 +105,40 @@ export MILVUS_ENABLED=0 # EC-RAG support chat history round setting, by default chat history is disabled, you can set CHAT_HISTORY_ROUND to control it # export CHAT_HISTORY_ROUND= # change to your preference +``` + +#### option a. Deploy the Service on Arc A770 Using Docker Compose + +```bash +export VLLM_SERVICE_PORT_A770=8086 # You can set your own port for vllm service # Launch EC-RAG service with compose -docker compose -f docker_compose/intel/gpu/arc/compose_vllm.yaml up -d +docker compose --profile a770 -f docker_compose/intel/gpu/arc/compose.yaml up -d +``` + +#### option b. Deploy the Service on Arc B60 Using Docker Compose + +```bash +# Besides MILVUS_ENABLED and CHAT_HISTORY_ROUND, below environments are exposed for vLLM config, you can change them to your preference: +# export VLLM_SERVICE_PORT_B60=8086 +# export DTYPE=float16 +# export TP=1 # for multi GPU, you can change TP value +# export DP=1 +# export ZE_AFFINITY_MASK=0 # for multi GPU, you can export ZE_AFFINITY_MASK=0,1,2... +# export ENFORCE_EAGER=1 +# export TRUST_REMOTE_CODE=1 +# export DISABLE_SLIDING_WINDOW=1 +# export GPU_MEMORY_UTIL=0.8 +# export NO_ENABLE_PREFIX_CACHING=1 +# export MAX_NUM_BATCHED_TOKENS=8192 +# export DISABLE_LOG_REQUESTS=1 +# export MAX_MODEL_LEN=49152 +# export BLOCK_SIZE=64 +# export QUANTIZATION=fp8 +docker compose --profile b60 -f docker_compose/intel/gpu/arc/compose.yaml up -d ``` -### Access UI +### 6. Access UI Open your browser, access http://${HOST_IP}:8082 @@ -126,12 +147,12 @@ Open your browser, access http://${HOST_IP}:8082 Below is the UI front page, for detailed operations on UI and EC-RAG settings, please refer to [Explore_Edge_Craft_RAG](../../../../docs/Explore_Edge_Craft_RAG.md) ![front_page](../../../../assets/img/front_page.png) -### Cleanup the Deployment +### 7. Cleanup the Deployment To stop the containers associated with the deployment, execute the following command: ``` -docker compose -f docker_compose/intel/gpu/arc/compose_vllm.yaml down +docker compose -f docker_compose/intel/gpu/arc/compose.yaml down ``` All the EdgeCraftRAG containers will be stopped and then removed on completion of the "down" command. diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml old mode 100755 new mode 100644 index 488bccdf7a..707be2328c --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml @@ -1,9 +1,11 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 + services: etcd: container_name: milvus-etcd image: quay.io/coreos/etcd:v3.5.5 + restart: always environment: - ETCD_AUTO_COMPACTION_MODE=revision - ETCD_AUTO_COMPACTION_RETENTION=1000 @@ -22,6 +24,7 @@ services: minio: container_name: milvus-minio image: minio/minio:RELEASE.2023-03-20T20-16-18Z + restart: always environment: MINIO_ACCESS_KEY: minioadmin MINIO_SECRET_KEY: minioadmin @@ -41,6 +44,7 @@ services: milvus-standalone: container_name: milvus-standalone image: milvusdb/milvus:v2.4.6 + restart: always command: ["milvus", "run", "standalone"] security_opt: - seccomp:unconfined @@ -48,7 +52,7 @@ services: ETCD_ENDPOINTS: etcd:2379 MINIO_ADDRESS: minio:9000 volumes: - - ./milvus.yaml:/milvus/configs/milvus.yaml + - ./milvus-config.yaml:/milvus/configs/milvus.yaml - ${DOCKER_VOLUME_DIRECTORY:-${PWD}}/volumes/milvus:/var/lib/milvus healthcheck: test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] @@ -71,16 +75,16 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HF_ENDPOINT: ${HF_ENDPOINT} - vLLM_ENDPOINT: ${vLLM_ENDPOINT:-http://${HOST_IP}:${NGINX_PORT:-8086}} + vLLM_ENDPOINT: ${vLLM_ENDPOINT:-http://${HOST_IP}:${VLLM_SERVICE_PORT_B60:-8086}} + LLM_MODEL: ${LLM_MODEL} ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false} - MAX_MODEL_LEN: ${MAX_MODEL_LEN:-5000} + MAX_MODEL_LEN: ${MAX_MODEL_LEN:-49152} CHAT_HISTORY_ROUND: ${CHAT_HISTORY_ROUND:-0} + METADATA_DATABASE_URL: ${METADATA_DATABASE_URL:-""} volumes: - ${MODEL_PATH:-${PWD}}:/home/user/models - ${DOC_PATH:-${PWD}}:/home/user/docs - ${TMPFILE_PATH:-${PWD}}:/home/user/ui_cache - - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache - ${PROMPT_PATH:-${PWD}}:/templates/custom restart: always ports: @@ -127,6 +131,96 @@ services: depends_on: - edgecraftrag-server - ecrag + llm-serving-xpu-b60: + container_name: ipex-serving-xpu-container + image: intel/llm-scaler-vllm:1.1-preview + privileged: true + restart: always + ports: + - ${VLLM_SERVICE_PORT_B60:-8086}:${VLLM_SERVICE_PORT_B60:-8086} + volumes: + - ${MODEL_PATH}:/workspace/vllm/models + devices: + - /dev/dri:/dev/dri + environment: + DTYPE: ${DTYPE:-float16} + VLLM_SERVICE_PORT_B60: ${VLLM_SERVICE_PORT_B60:-8086} + ZE_AFFINITY_MASK: ${ZE_AFFINITY_MASK:-0} + ENFORCE_EAGER: ${ENFORCE_EAGER:-1} + TRUST_REMOTE_CODE: ${TRUST_REMOTE_CODE:-1} + DISABLE_SLIDING_WINDOW: ${DISABLE_SLIDING_WINDOW:-1} + GPU_MEMORY_UTIL: ${GPU_MEMORY_UTIL:-0.8} + NO_ENABLE_PREFIX_CACHING: ${NO_ENABLE_PREFIX_CACHING:-1} + MAX_NUM_BATCHED_TOKENS: ${MAX_NUM_BATCHED_TOKENS:-8192} + DISABLE_LOG_REQUESTS: ${DISABLE_LOG_REQUESTS:-1} + MAX_MODEL_LEN: ${MAX_MODEL_LEN:-49152} + BLOCK_SIZE: ${BLOCK_SIZE:-64} + QUANTIZATION: ${QUANTIZATION:-fp8} + LLM_MODEL: ${LLM_MODEL} + TP: ${TP:-1} + DP: ${DP:-1} + entrypoint: + /bin/bash -c " + cd /workspace/vllm/models && source /opt/intel/oneapi/setvars.sh --force && + VLLM_OFFLOAD_WEIGHTS_BEFORE_QUANT=1 \ + TORCH_LLM_ALLREDUCE=1 \ + VLLM_USE_V1=1 \ + CCL_ZE_IPC_EXCHANGE=pidfd \ + VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \ + VLLM_WORKER_MULTIPROC_METHOD=spawn \ + python3 -m vllm.entrypoints.openai.api_server \ + --model $${LLM_MODEL} \ + --dtype $${DTYPE} \ + --enforce-eager \ + --port $${VLLM_SERVICE_PORT_B60} \ + --trust-remote-code \ + --disable-sliding-window \ + --gpu-memory-util $${GPU_MEMORY_UTIL} \ + --no-enable-prefix-caching \ + --max-num-batched-tokens $${MAX_NUM_BATCHED_TOKENS} \ + --disable-log-requests \ + --max-model-len $${MAX_MODEL_LEN} \ + --block-size $${BLOCK_SIZE} \ + --quantization $${QUANTIZATION} \ + -tp=$${TP} \ + -dp=$${DP}" + profiles: + - b60 + llm-serving-xpu-770: + container_name: ipex-llm-serving-xpu-770 + image: intelanalytics/ipex-llm-serving-xpu:0.8.3-b20 + privileged: true + restart: always + ports: + - ${VLLM_SERVICE_PORT_A770:-8086}:${VLLM_SERVICE_PORT_A770:-8086} + group_add: + - video + - ${VIDEOGROUPID:-44} + - ${RENDERGROUPID:-109} + volumes: + - ${LLM_MODEL_PATH:-${MODEL_PATH}/${LLM_MODEL}}:/llm/models + devices: + - /dev/dri + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + MODEL_PATH: "/llm/models" + SERVED_MODEL_NAME: ${LLM_MODEL} + TENSOR_PARALLEL_SIZE: ${TENSOR_PARALLEL_SIZE:-1} + MAX_NUM_SEQS: ${MAX_NUM_SEQS:-64} + MAX_NUM_BATCHED_TOKENS: ${MAX_NUM_BATCHED_TOKENS:-10240} + MAX_MODEL_LEN: ${MAX_MODEL_LEN:-10240} + LOAD_IN_LOW_BIT: ${LOAD_IN_LOW_BIT:-fp8} + CCL_DG2_USM: ${CCL_DG2_USM:-""} + PORT: ${VLLM_SERVICE_PORT_A770:-8086} + ZE_AFFINITY_MASK: ${SELECTED_XPU_0:-0} + shm_size: '32g' + entrypoint: /bin/bash -c "\ + cd /llm && \ + bash start-vllm-service.sh" + profiles: + - a770 networks: default: driver: bridge diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml deleted file mode 100644 index a1afa20eb9..0000000000 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml +++ /dev/null @@ -1,177 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - etcd: - container_name: milvus-etcd - image: quay.io/coreos/etcd:v3.5.5 - environment: - - ETCD_AUTO_COMPACTION_MODE=revision - - ETCD_AUTO_COMPACTION_RETENTION=1000 - - ETCD_QUOTA_BACKEND_BYTES=4294967296 - - ETCD_SNAPSHOT_COUNT=50000 - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-${PWD}}/volumes/etcd:/etcd - command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd - healthcheck: - test: ["CMD", "etcdctl", "endpoint", "health"] - interval: 30s - timeout: 20s - retries: 3 - deploy: - replicas: ${MILVUS_ENABLED:-0} - minio: - container_name: milvus-minio - image: minio/minio:RELEASE.2023-03-20T20-16-18Z - environment: - MINIO_ACCESS_KEY: minioadmin - MINIO_SECRET_KEY: minioadmin - ports: - - "${MINIO_PORT1:-5044}:9001" - - "${MINIO_PORT2:-5043}:9000" - volumes: - - ${DOCKER_VOLUME_DIRECTORY:-${PWD}}/volumes/minio:/minio_data - command: minio server /minio_data --console-address ":9001" - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] - interval: 30s - timeout: 20s - retries: 3 - deploy: - replicas: ${MILVUS_ENABLED:-0} - milvus-standalone: - container_name: milvus-standalone - image: milvusdb/milvus:v2.4.6 - command: ["milvus", "run", "standalone"] - security_opt: - - seccomp:unconfined - environment: - ETCD_ENDPOINTS: etcd:2379 - MINIO_ADDRESS: minio:9000 - volumes: - - ./milvus.yaml:/milvus/configs/milvus.yaml - - ${DOCKER_VOLUME_DIRECTORY:-${PWD}}/volumes/milvus:/var/lib/milvus - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] - interval: 30s - start_period: 90s - timeout: 20s - retries: 3 - ports: - - "19530:19530" - - "${MILVUS_STANDALONE_PORT:-9091}:9091" - depends_on: - - "etcd" - - "minio" - deploy: - replicas: ${MILVUS_ENABLED:-0} - edgecraftrag-server: - image: ${REGISTRY:-opea}/edgecraftrag-server:${TAG:-latest} - container_name: edgecraftrag-server - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_ENDPOINT: ${HF_ENDPOINT} - vLLM_ENDPOINT: ${vLLM_ENDPOINT:-http://${HOST_IP}:${NGINX_PORT:-8086}} - LLM_MODEL: ${LLM_MODEL} - ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false} - MAX_MODEL_LEN: ${MAX_MODEL_LEN:-5000} - CHAT_HISTORY_ROUND: ${CHAT_HISTORY_ROUND:-0} - volumes: - - ${MODEL_PATH:-${PWD}}:/home/user/models - - ${DOC_PATH:-${PWD}}:/home/user/docs - - ${TMPFILE_PATH:-${PWD}}:/home/user/ui_cache - - ${HF_CACHE:-${HOME}/.cache}:/home/user/.cache - - ${PROMPT_PATH:-${PWD}}:/templates/custom - restart: always - ports: - - ${PIPELINE_SERVICE_PORT:-16010}:${PIPELINE_SERVICE_PORT:-16010} - devices: - - /dev/dri:/dev/dri - group_add: - - ${VIDEOGROUPID:-44} - - ${RENDERGROUPID:-109} - ecrag: - image: ${REGISTRY:-opea}/edgecraftrag:${TAG:-latest} - container_name: edgecraftrag - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011} - MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}} - PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010} - PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}} - restart: always - ports: - - ${MEGA_SERVICE_PORT:-16011}:${MEGA_SERVICE_PORT:-16011} - depends_on: - - edgecraftrag-server - nginx: - image: nginx:latest - restart: always - ports: - - ${NGINX_PORT:-8086}:8086 - volumes: - - ${NGINX_CONFIG_PATH:-${PWD}}:/etc/nginx/nginx.conf - depends_on: - - edgecraftrag-server - edgecraftrag-ui: - image: ${REGISTRY:-opea}/edgecraftrag-ui:${TAG:-latest} - container_name: edgecraftrag-ui - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - MEGA_SERVICE_PORT: ${MEGA_SERVICE_PORT:-16011} - MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP:-${HOST_IP}} - PIPELINE_SERVICE_PORT: ${PIPELINE_SERVICE_PORT:-16010} - PIPELINE_SERVICE_HOST_IP: ${PIPELINE_SERVICE_HOST_IP:-${HOST_IP}} - UI_SERVICE_PORT: ${UI_SERVICE_PORT:-8082} - UI_SERVICE_HOST_IP: ${UI_SERVICE_HOST_IP:-0.0.0.0} - volumes: - - ${TMPFILE_PATH:-${PWD}}:/home/user/ui_cache - restart: always - ports: - - ${UI_SERVICE_PORT:-8082}:${UI_SERVICE_PORT:-8082} - depends_on: - - edgecraftrag-server - - ecrag - llm-serving-xpu-0: - container_name: ipex-llm-serving-xpu-container-0 - image: intelanalytics/ipex-llm-serving-xpu:0.8.3-b20 - privileged: true - restart: always - ports: - - ${VLLM_SERVICE_PORT_0:-8100}:${VLLM_SERVICE_PORT_0:-8100} - group_add: - - video - - ${VIDEOGROUPID:-44} - - ${RENDERGROUPID:-109} - volumes: - - ${LLM_MODEL_PATH:-${MODEL_PATH}/${LLM_MODEL}}:/llm/models - devices: - - /dev/dri - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_ENDPOINT: ${HF_ENDPOINT} - MODEL_PATH: "/llm/models" - SERVED_MODEL_NAME: ${LLM_MODEL} - TENSOR_PARALLEL_SIZE: ${TENSOR_PARALLEL_SIZE:-1} - MAX_NUM_SEQS: ${MAX_NUM_SEQS:-64} - MAX_NUM_BATCHED_TOKENS: ${MAX_NUM_BATCHED_TOKENS:-10240} - MAX_MODEL_LEN: ${MAX_MODEL_LEN:-10240} - LOAD_IN_LOW_BIT: ${LOAD_IN_LOW_BIT:-fp8} - CCL_DG2_USM: ${CCL_DG2_USM:-""} - PORT: ${VLLM_SERVICE_PORT_0:-8100} - ZE_AFFINITY_MASK: ${SELECTED_XPU_0:-0} - shm_size: '32g' - entrypoint: /bin/bash -c "\ - cd /llm && \ - bash start-vllm-service.sh" -networks: - default: - driver: bridge diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/milvus.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/milvus-config.yaml similarity index 100% rename from EdgeCraftRAG/docker_compose/intel/gpu/arc/milvus.yaml rename to EdgeCraftRAG/docker_compose/intel/gpu/arc/milvus-config.yaml diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/multi-arc-yaml-generator.sh b/EdgeCraftRAG/docker_compose/intel/gpu/arc/multi-arc-yaml-generator.sh deleted file mode 100644 index 2a819eb552..0000000000 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/multi-arc-yaml-generator.sh +++ /dev/null @@ -1,202 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Check if the correct number of arguments is provided -if [ "$#" -ne 2 ]; then - echo "Usage: $0 DP_NUM output-file-path" - exit 1 -fi - -# Get the port number from the command line argument -PORT_NUM=$1 - -# Start generating the Nginx configuration -cat < $2 -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - etcd: - container_name: milvus-etcd - image: quay.io/coreos/etcd:v3.5.5 - environment: - - ETCD_AUTO_COMPACTION_MODE=revision - - ETCD_AUTO_COMPACTION_RETENTION=1000 - - ETCD_QUOTA_BACKEND_BYTES=4294967296 - - ETCD_SNAPSHOT_COUNT=50000 - volumes: - - \${DOCKER_VOLUME_DIRECTORY:-\${PWD}}/volumes/etcd:/etcd - command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd - healthcheck: - test: ["CMD", "etcdctl", "endpoint", "health"] - interval: 30s - timeout: 20s - retries: 3 - deploy: - replicas: \${MILVUS_ENABLED:-0} - minio: - container_name: milvus-minio - image: minio/minio:RELEASE.2023-03-20T20-16-18Z - environment: - MINIO_ACCESS_KEY: minioadmin - MINIO_SECRET_KEY: minioadmin - ports: - - "\${MINIO_PORT1:-5044}:9001" - - "\${MINIO_PORT2:-5043}:9000" - volumes: - - \${DOCKER_VOLUME_DIRECTORY:-\${PWD}}/volumes/minio:/minio_data - command: minio server /minio_data --console-address ":9001" - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] - interval: 30s - timeout: 20s - retries: 3 - deploy: - replicas: \${MILVUS_ENABLED:-0} - milvus-standalone: - container_name: milvus-standalone - image: milvusdb/milvus:v2.4.6 - command: ["milvus", "run", "standalone"] - security_opt: - - seccomp:unconfined - environment: - ETCD_ENDPOINTS: etcd:2379 - MINIO_ADDRESS: minio:9000 - volumes: - - ./milvus.yaml:/milvus/configs/milvus.yaml - - \${DOCKER_VOLUME_DIRECTORY:-\${PWD}}/volumes/milvus:/var/lib/milvus - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] - interval: 30s - start_period: 90s - timeout: 20s - retries: 3 - ports: - - "19530:19530" - - "\${MILVUS_STANDALONE_PORT:-9091}:9091" - depends_on: - - "etcd" - - "minio" - deploy: - replicas: \${MILVUS_ENABLED:-0} - edgecraftrag-server: - image: \${REGISTRY:-opea}/edgecraftrag-server:\${TAG:-latest} - container_name: edgecraftrag-server - environment: - no_proxy: \${no_proxy} - http_proxy: \${http_proxy} - https_proxy: \${https_proxy} - HF_ENDPOINT: \${HF_ENDPOINT} - vLLM_ENDPOINT: \${vLLM_ENDPOINT:-http://\${HOST_IP}:\${NGINX_PORT:-8086}} - LLM_MODEL: \${LLM_MODEL} - ENABLE_BENCHMARK: \${ENABLE_BENCHMARK:-false} - MAX_MODEL_LEN: \${MAX_MODEL_LEN:-5000} - CHAT_HISTORY_ROUND: \${CHAT_HISTORY_ROUND:-0} - volumes: - - \${MODEL_PATH:-\${PWD}}:/home/user/models - - \${DOC_PATH:-\${PWD}}:/home/user/docs - - \${TMPFILE_PATH:-\${PWD}}:/home/user/ui_cache - - \${HF_CACHE:-\${HOME}/.cache}:/home/user/.cache - - \${PROMPT_PATH:-\${PWD}}:/templates/custom - restart: always - ports: - - \${PIPELINE_SERVICE_PORT:-16010}:\${PIPELINE_SERVICE_PORT:-16010} - devices: - - /dev/dri:/dev/dri - group_add: - - \${VIDEOGROUPID:-44} - - \${RENDERGROUPID:-109} - ecrag: - image: \${REGISTRY:-opea}/edgecraftrag:\${TAG:-latest} - container_name: edgecraftrag - environment: - no_proxy: \${no_proxy} - http_proxy: \${http_proxy} - https_proxy: \${https_proxy} - MEGA_SERVICE_PORT: \${MEGA_SERVICE_PORT:-16011} - MEGA_SERVICE_HOST_IP: \${MEGA_SERVICE_HOST_IP:-\${HOST_IP}} - PIPELINE_SERVICE_PORT: \${PIPELINE_SERVICE_PORT:-16010} - PIPELINE_SERVICE_HOST_IP: \${PIPELINE_SERVICE_HOST_IP:-\${HOST_IP}} - restart: always - ports: - - \${MEGA_SERVICE_PORT:-16011}:\${MEGA_SERVICE_PORT:-16011} - depends_on: - - edgecraftrag-server - nginx: - image: nginx:latest - restart: always - ports: - - \${NGINX_PORT:-8086}:8086 - volumes: - - \${NGINX_CONFIG_PATH:-\${PWD}}:/etc/nginx/nginx.conf - depends_on: - - edgecraftrag-server - edgecraftrag-ui: - image: \${REGISTRY:-opea}/edgecraftrag-ui:\${TAG:-latest} - container_name: edgecraftrag-ui - environment: - no_proxy: \${no_proxy} - http_proxy: \${http_proxy} - https_proxy: \${https_proxy} - MEGA_SERVICE_PORT: \${MEGA_SERVICE_PORT:-16011} - MEGA_SERVICE_HOST_IP: \${MEGA_SERVICE_HOST_IP:-\${HOST_IP}} - PIPELINE_SERVICE_PORT: \${PIPELINE_SERVICE_PORT:-16010} - PIPELINE_SERVICE_HOST_IP: \${PIPELINE_SERVICE_HOST_IP:-\${HOST_IP}} - UI_SERVICE_PORT: \${UI_SERVICE_PORT:-8082} - UI_SERVICE_HOST_IP: \${UI_SERVICE_HOST_IP:-0.0.0.0} - volumes: - - \${TMPFILE_PATH:-\${PWD}}:/home/user/ui_cache - restart: always - ports: - - \${UI_SERVICE_PORT:-8082}:\${UI_SERVICE_PORT:-8082} - depends_on: - - edgecraftrag-server - - ecrag -EOL - -for ((i = 0; i < PORT_NUM; i++)); do - cat <> $2 - llm-serving-xpu-$i: - container_name: ipex-llm-serving-xpu-container-$i - image: intelanalytics/ipex-llm-serving-xpu:0.8.3-b20 - privileged: true - restart: always - ports: - - \${VLLM_SERVICE_PORT_$i:-8$((i+1))00}:\${VLLM_SERVICE_PORT_$i:-8$((i+1))00} - group_add: - - video - - \${VIDEOGROUPID:-44} - - \${RENDERGROUPID:-109} - volumes: - - \${LLM_MODEL_PATH:-\${MODEL_PATH}/\${LLM_MODEL}}:/llm/models - devices: - - /dev/dri - environment: - no_proxy: \${no_proxy} - http_proxy: \${http_proxy} - https_proxy: \${https_proxy} - HF_ENDPOINT: \${HF_ENDPOINT} - MODEL_PATH: "/llm/models" - SERVED_MODEL_NAME: \${LLM_MODEL} - TENSOR_PARALLEL_SIZE: \${TENSOR_PARALLEL_SIZE:-1} - MAX_NUM_SEQS: \${MAX_NUM_SEQS:-64} - MAX_NUM_BATCHED_TOKENS: \${MAX_NUM_BATCHED_TOKENS:-10240} - MAX_MODEL_LEN: \${MAX_MODEL_LEN:-10240} - LOAD_IN_LOW_BIT: \${LOAD_IN_LOW_BIT:-fp8} - CCL_DG2_USM: \${CCL_DG2_USM:-""} - PORT: \${VLLM_SERVICE_PORT_$i:-8$((i+1))00} - ZE_AFFINITY_MASK: \${SELECTED_XPU_$i:-$i} - shm_size: '32g' - entrypoint: /bin/bash -c "\\ - cd /llm && \\ - bash start-vllm-service.sh" -EOL -done -cat <> $2 -networks: - default: - driver: bridge -EOL - -echo "compose_vllm.yaml generated" diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh b/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh index b587c32426..3247f3ce91 100755 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh @@ -17,20 +17,15 @@ export HF_TOKEN=${HF_TOKEN} export no_proxy="localhost, 127.0.0.1, 192.168.1.1" export UI_UPLOAD_PATH=${UI_UPLOAD_PATH} export LLM_MODEL_PATH=${LLM_MODEL_PATH} -export NGINX_PORT_0=${NGINX_PORT_0} -export NGINX_PORT_1=${NGINX_PORT_1} -export NGINX_PORT_2=${NGINX_PORT_2} -export VLLM_SERVICE_PORT_0=${VLLM_SERVICE_PORT_0} -export VLLM_SERVICE_PORT_1=${VLLM_SERVICE_PORT_1} -export VLLM_SERVICE_PORT_2=${VLLM_SERVICE_PORT_2} + +export VLLM_SERVICE_PORT_B60=${VLLM_SERVICE_PORT_B60} +export VLLM_SERVICE_PORT_A770=${VLLM_SERVICE_PORT_A770} export TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE} -export NGINX_CONFIG_PATH=${NGINX_CONFIG_PATH} -export SELECTED_XPU_0=${SELECTED_XPU_0} -export SELECTED_XPU_1=${SELECTED_XPU_1} -export SELECTED_XPU_2=${SELECTED_XPU_2} + export vLLM_ENDPOINT=${vLLM_ENDPOINT} export MAX_NUM_SEQS=${MAX_NUM_SEQS} export MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS} export MAX_MODEL_LEN=${MAX_MODEL_LEN} export LOAD_IN_LOW_BIT=${LOAD_IN_LOW_BIT} export CCL_DG2_USM=${CCL_DG2_USM} +export ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK} diff --git a/EdgeCraftRAG/docs/API_Guide.md b/EdgeCraftRAG/docs/API_Guide.md index 804fa45397..c13753596c 100644 --- a/EdgeCraftRAG/docs/API_Guide.md +++ b/EdgeCraftRAG/docs/API_Guide.md @@ -205,3 +205,18 @@ curl -X POST http://${HOST_IP}:16010/v1/chatqna/prompt/reset -H "Content-Type: a ```bash curl -X POST http://${HOST_IP}:16010/v1/chatqna/prompt-file -H "Content-Type: multipart/form-data" -F "file=@your_prompt_file.txt" ``` + +## ChatQnA + +### Retrieval API + +```bash +curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d '{"messages":"#Please enter the question you need to retrieve here#", "top_n":5, "max_tokens":512}' | jq '.' + +``` + +### ChatQnA API + +```bash +curl -X POST http://${HOST_IP}:16011/v1/chatqna -H "Content-Type: application/json" -d '{"messages":"#REPLACE WITH YOUR QUESTION HERE#", "top_n":5, "max_tokens":512}' | jq '.' +``` diff --git a/EdgeCraftRAG/docs/Advanced_Setup.md b/EdgeCraftRAG/docs/Advanced_Setup.md index ef864fc1f4..af1cce8522 100644 --- a/EdgeCraftRAG/docs/Advanced_Setup.md +++ b/EdgeCraftRAG/docs/Advanced_Setup.md @@ -1,49 +1,5 @@ # Edge Craft Retrieval-Augmented Generation Advanced Setup -## Query Search - -Query Search features allow EC-RAG to do query pre-process before retrieval and reranking. To enable query search, vllm inference is required. Chinese version is available in [Query Search Zh](Query_Search_Zh.md) - -### 1. Sub-question file(s) example - -Sub-question files need to end with `.json` and follow json file format: main question as json key, sub-questions as json value. See below example: - -```json -{ - "Issue1": "Sub-question1.1? Sub-question1.2?", - "Issue2": "Sub-question2.1? Sub-question2.2? Sub-question2.3?" -} -``` - -> Note: 1. At lease one sub-question file is required. 2. Increasing main question amount would increase query time for EC-RAG. - -### 2. Sub-question file(s) location - -All sub-question files need to be placed under `${TMPFILE_PATH}/configs/search_dir`. - -### 3. Config file example - -Configure file includes variables such as prompts, temperature, etc. - -`instruction`, `input_template`, `output_template` would affect final prompt for query search. -`json_key` and `json_levels` are related to each other. For example, if `json_key` is set to "similarity", `json_levels` need list options for "similarity", such as "Low, Medium, High". - -One example for DeesSeep-R1-Distill-Qwen-32B configs is listed below: - -```yaml -query_matcher: - instructions: "Decide similarity of two queries. For exactly the same, mark as High, for totally different, mark as Low.\n" - input_template: " {} \n {} \n" - output_template: "output from {json_levels}.\n" - json_key: "similarity" - json_levels: ["Low", "Medium", "High"] - temperature: 3.7 -``` - -### 4. Config file location - -Config file needs to be placed under `${TMPFILE_PATH}/configs` and named as `search_config.yaml`, which gives final path as `${TMPFILE_PATH}/configs/search_config.yaml`. - ## OpenVINO Local Inference EC-RAG support using local OpenVINO models to do inference, please follow below steps to run local inference: @@ -55,9 +11,9 @@ EC-RAG support using local OpenVINO models to do inference, please follow below ```bash git clone https://github.com/opea-project/GenAIExamples.git cd GenAIExamples/EdgeCraftRAG -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy="$no_proxy" -t opea/edgecraftrag:latest -f Dockerfile . -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy="$no_proxy" -t opea/edgecraftrag-server:latest -f Dockerfile.server . -docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy="$no_proxy" -t opea/edgecraftrag-ui:latest -f ui/docker/Dockerfile.ui . +docker build --no-cache --pull --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy="$no_proxy" -t opea/edgecraftrag:latest -f Dockerfile . +docker build --no-cache --pull --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy="$no_proxy" -t opea/edgecraftrag-server:latest -f Dockerfile.server . +docker build --no-cache --pull --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy="$no_proxy" -t opea/edgecraftrag-ui:latest -f ui/docker/Dockerfile.ui . ``` ### 2. Prepare models @@ -79,28 +35,22 @@ ip_address=$(hostname -I | awk '{print $1}') # Use `ip a` to check your active ip export HOST_IP=$ip_address # Your host ip -export DOC_PATH=${PWD} # Your doc path for uploading a dir of files -export TMPFILE_PATH=${PWD} # Your UI cache path for transferring files - # Check group id of video and render export VIDEOGROUPID=$(getent group video | cut -d: -f3) export RENDERGROUPID=$(getent group render | cut -d: -f3) -# If you have a proxy configured, uncomment below line -# export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server -# export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server +# If you have a proxy configured, execute below line +export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server +export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server # If you have a HF mirror configured, it will be imported to the container # export HF_ENDPOINT=https://hf-mirror.com # your HF mirror endpoint" -# By default, the ports of the containers are set, uncomment if you want to change -# export MEGA_SERVICE_PORT=16011 -# export PIPELINE_SERVICE_PORT=16010 -# export UI_SERVICE_PORT="8082" - # Make sure all 3 folders have 1000:1000 permission, otherwise -# chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH} +export DOC_PATH=${PWD}/tests +export TMPFILE_PATH=${PWD}/tests +chown 1000:1000 -R ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH} # In addition, also make sure the .cache folder has 1000:1000 permission, otherwise -# chown 1000:1000 -R $HOME/.cache +chown 1000:1000 -R $HOME/.cache ``` ### 4. Start Edge Craft RAG Services with Docker Compose @@ -117,82 +67,22 @@ export MILVUS_ENABLED=0 # EC-RAG support pipeline performance benchmark, use ENABLE_BENCHMARK=true/false to turn on/off benchmark # export ENABLE_BENCHMARK= # change to your preference +export MAX_MODEL_LEN=5000 # Launch EC-RAG service with compose docker compose -f docker_compose/intel/gpu/arc/compose.yaml up -d ``` -## Multi-ARC Setup +## EC-RAG with Kbadmin -EC-RAG support run inference with multi-ARC in multiple isolated containers -Docker Images preparation is the same as local inference section, please refer to [Build Docker Images](#1-optional-build-docker-images-for-mega-service-server-and-ui-by-your-own) +EC-RAG support kbadmin as a knowledge base manager +Please make sure all the kbadmin services have been launched +EC-RAG Docker Images preparation is the same as local inference section, please refer to [Build Docker Images](#1-optional-build-docker-images-for-mega-service-server-and-ui-by-your-own) Model preparation is the same as vLLM inference section, please refer to [Prepare models](../docker_compose/intel/gpu/arc/README.md#2-prepare-models) -After docker images preparation and model preparation, please follow below steps to run multi-ARC Setup(Below steps show 2 vLLM container(2 DP) with multi Intel Arc GPUs): -### 1. Prepare env variables and configurations +### 1. Start Edge Craft RAG Services with Docker Compose -#### Prepare env variables for vLLM deployment - -```bash -ip_address=$(hostname -I | awk '{print $1}') -# Use `ip a` to check your active ip -export HOST_IP=$ip_address # Your host ip - -# The default LLM_MODEL_PATH is "${MODEL_PATH}/${LLM_MODEL}", you can change to your model path -# export LLM_MODEL_PATH= # change to your model path -export DOC_PATH=${PWD} # Your doc path for uploading a dir of files -export TMPFILE_PATH=${PWD} # Your UI cache path for transferring files - -# Check group id of video and render -export VIDEOGROUPID=$(getent group video | cut -d: -f3) -export RENDERGROUPID=$(getent group render | cut -d: -f3) - -# If you have a proxy configured, uncomment below line -# export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server -# export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server -# If you have a HF mirror configured, it will be imported to the container -# export HF_ENDPOINT=https://hf-mirror.com # your HF mirror endpoint" - -# By default, the ports of the containers are set, uncomment if you want to change -# export MEGA_SERVICE_PORT=16011 -# export PIPELINE_SERVICE_PORT=16010 -# export UI_SERVICE_PORT="8082" - -# Make sure all 3 folders have 1000:1000 permission, otherwise -# chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH} -# In addition, also make sure the .cache folder has 1000:1000 permission, otherwise -# chown 1000:1000 -R $HOME/.cache - -export NGINX_PORT=8086 # Set port for nginx -export vLLM_ENDPOINT="http://${HOST_IP}:${NGINX_PORT}" -export DP_NUM=2 # How many containers you want to start to run inference -export VLLM_SERVICE_PORT_0=8100 # You can set your own vllm service port -export VLLM_SERVICE_PORT_1=8200 # You can set your own vllm service port -export TENSOR_PARALLEL_SIZE=1 # Your Intel Arc GPU number to do TP inference -export SELECTED_XPU_0=0 # Which GPU to select to run for container 0 -export SELECTED_XPU_1=1 # Which GPU to select to run for container 1 - -# Below are the extra env you can set for vllm -export MAX_NUM_SEQS=64 # MAX_NUM_SEQS value -export MAX_NUM_BATCHED_TOKENS=5000 # MAX_NUM_BATCHED_TOKENS value -export MAX_MODEL_LEN=5000 # MAX_MODEL_LEN value -export LOAD_IN_LOW_BIT=fp8 # the weight type value, expected: sym_int4, asym_int4, sym_int5, asym_int5 or sym_int8 -export CCL_DG2_USM="" # Need to set to 1 on Core to enable USM (Shared Memory GPUDirect). Xeon supports P2P and doesn't need this. -``` - -### 2. Generate nginx config file and compose yaml file - -```bash -# Generate your nginx config file -# nginx-conf-generator.sh requires 2 parameters: DP_NUM and output filepath -bash nginx/nginx-conf-generator.sh $DP_NUM nginx/nginx.conf # You can change TEMP_FILE_PATH to your reference -# set NGINX_CONFIG_PATH -export NGINX_CONFIG_PATH="${PWD}/nginx/nginx.conf" - -# Generate compose_vllm.yaml file -# multi-arc-yaml-generator.sh requires 2 parameters: DP_NUM and output filepath -bash docker_compose/intel/gpu/arc/multi-arc-yaml-generator.sh $DP_NUM docker_compose/intel/gpu/arc/compose_vllm.yaml -``` +This section is the same as default vLLM inference section, please refer to [Prepare env variables and configurations](../docker_compose/intel/gpu/arc/README.md#prepare-env-variables-and-configurations) and [Start Edge Craft RAG Services with Docker Compose](../docker_compose/intel/gpu/arc/README.md#deploy-the-service-on-arc-a770-using-docker-compose) -### 3. Start Edge Craft RAG Services with Docker Compose +### 2. Access Kbadmin UI -This section is the same as default vLLM inference section, please refer to [Start Edge Craft RAG Services with Docker Compose](../docker_compose/intel/gpu/arc/README.md#deploy-the-service-using-docker-compose) +please refer to [ChatQnA with Kbadmin in UI](./Explore_Edge_Craft_RAG.md#chatqna-with-kbadmin-in-ui) diff --git a/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md b/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md index a63b3a9f0e..0a5a91ba5e 100644 --- a/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md +++ b/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md @@ -33,3 +33,23 @@ After knowledge base creation, you can upload the documents for retrieval. Then, you can submit messages in the chat box in `Chat` page. ![chat_with_rag](../assets/img/chatqna.png) + +## ChatQnA with Kbadmin in UI + +### Kbadmin Pipeline + +In the `Node Parser` page, select 'kbadmin' as node parser. +![kbadmin_node_parser](../assets/img/kbadmin_type.png) + +In the `Indexer` page, input embedding and vector DB information, please note the embedding service port is 13020; the vector DB port is 29530. +![kbadmin_indexer](../assets/img/kbadmin_index.png) + +### Upload files & ChatQnA + +After the pipeline creation, you can go to `Knowledge Base` page and click `Create Knowledge Base` button to create your knowledge base. +Please select 'kbadmin' in `Type`and select kb name from the kbs you created in kbadmin UI page. + +![upload_data](../assets/img/kbadmin_kb.png) + +Then, you can submit messages in the chat box in `Chat` page. +![chat_with_rag](../assets/img/chatqna.png) diff --git a/EdgeCraftRAG/edgecraftrag/VERSION b/EdgeCraftRAG/edgecraftrag/VERSION old mode 100755 new mode 100644 index 40766d6bb4..0198c14133 --- a/EdgeCraftRAG/edgecraftrag/VERSION +++ b/EdgeCraftRAG/edgecraftrag/VERSION @@ -1 +1 @@ -25.05-Release +25.11-Dev diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/agent.py b/EdgeCraftRAG/edgecraftrag/api/v1/agent.py new file mode 100644 index 0000000000..00a1b5db09 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/api/v1/agent.py @@ -0,0 +1,148 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +import time + +from edgecraftrag.api_schema import AgentCreateIn +from edgecraftrag.base import AgentType +from edgecraftrag.config_repository import MilvusConfigRepository, save_agent_configurations +from edgecraftrag.context import ctx +from edgecraftrag.env import AGENT_FILE +from fastapi import FastAPI, HTTPException, status + +agent_app = FastAPI() + + +# GET Agents +@agent_app.get(path="/v1/settings/agents") +async def get_all_agents(): + out = [] + agents = ctx.get_agent_mgr().get_agents() + active_id = ctx.get_agent_mgr().get_active_agent_id() + for k, agent in agents.items(): + out.append( + AgentCreateIn( + idx=agent.idx, + name=agent.name, + type=agent.comp_subtype, + pipeline_idx=agent.pipeline_idx, + configs=agent.configs, + active=True if agent.idx == active_id else False, + ) + ) + return out + + +# GET Agent +@agent_app.get(path="/v1/settings/agents/{name}") +async def get_agent(name): + agent = ctx.get_agent_mgr().get_agent_by_name(name) + if agent: + isactive = True if agent.idx == ctx.get_agent_mgr().get_active_agent_id() else False + return AgentCreateIn( + idx=agent.idx, + name=agent.name, + type=agent.comp_subtype, + pipeline_idx=agent.pipeline_idx, + configs=agent.configs, + active=isactive, + ) + else: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) + + +# POST Agent +@agent_app.post(path="/v1/settings/agents") +async def create_agent(request: AgentCreateIn, status_code=status.HTTP_201_CREATED): + try: + agent = ctx.get_agent_mgr().create_agent(request) + if agent: + await save_agent_configurations("add", ctx.get_agent_mgr().get_agents()) + return agent + except (ValueError, Exception) as e: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + +# PATCH Agent +@agent_app.patch(path="/v1/settings/agents/{name}") +async def update_agent(name, request: AgentCreateIn): + try: + agentmgr = ctx.get_agent_mgr() + if agentmgr.get_agent_by_name(name): + ret = agentmgr.update_agent(name, request) + if ret: + await save_agent_configurations("update", ctx.get_agent_mgr().get_agents()) + return ret + else: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) + except (ValueError, Exception) as e: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + +# DELETE Agent +@agent_app.delete(path="/v1/settings/agents/{name}", status_code=status.HTTP_204_NO_CONTENT) +async def delete_agent(name): + try: + agentmgr = ctx.get_agent_mgr() + if agentmgr.get_agent_by_name(name): + if agentmgr.remove_agent(name): + await save_agent_configurations("delete", ctx.get_agent_mgr().get_agents()) + return + else: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR) + else: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) + except (ValueError, Exception) as e: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + +# GET Agent Type default configs +@agent_app.get(path="/v1/settings/agents/configs/{agent_type}") +async def get_agent_default_configs(agent_type): + try: + if agent_type in [e.value for e in AgentType]: + return ctx.get_agent_mgr().get_agent_default_configs(agent_type) + else: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND) + except (ValueError, Exception) as e: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + +# Restore agent configurations +async def restore_agent_configurations(): + milvus_repo = MilvusConfigRepository.create_connection("agent_config", 1) + all_agents = [] + if milvus_repo: + time.sleep(10) + all_agents_repo = milvus_repo.get_configs() + for agent in all_agents_repo: + all_agents.append(agent.get("config_json")) + else: + if os.path.exists(AGENT_FILE): + with open(AGENT_FILE, "r", encoding="utf-8") as f: + all_agents = f.read() + if all_agents: + all_agents = json.loads(all_agents) + try: + for agent_data in all_agents: + agent_req = AgentCreateIn(**agent_data) + await load_agent(agent_req) + except Exception as e: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + +async def load_agent(request: AgentCreateIn): + agentmgr = ctx.get_agent_mgr() + agent = agentmgr.get_agent_by_name(request.name) + if agent is None: + # TODO: Restore idx back + # TODO: Update agent by import a json + agent = agentmgr.create_agent(request) + try: + await save_agent_configurations("add", ctx.get_agent_mgr().get_agents()) + except (ValueError, Exception) as e: + agentmgr.remove_agent_by_name(request.name) + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + return agent diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py old mode 100755 new mode 100644 index c3facef41b..cb3fcdb409 --- a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py @@ -1,23 +1,36 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import asyncio +import json +from concurrent.futures import ThreadPoolExecutor +from typing import List + import requests -from comps import GeneratedDoc from comps.cores.proto.api_protocol import ChatCompletionRequest from edgecraftrag.api_schema import RagOut from edgecraftrag.context import ctx -from edgecraftrag.utils import serialize_contexts, set_current_session -from fastapi import Body, FastAPI, File, HTTPException, UploadFile, status +from edgecraftrag.utils import chain_async_generators, serialize_contexts, stream_generator +from fastapi import Body, FastAPI, HTTPException, status from fastapi.responses import StreamingResponse chatqna_app = FastAPI() +thread_pool = ThreadPoolExecutor(max_workers=16) # Retrieval @chatqna_app.post(path="/v1/retrieval") async def retrieval(request: ChatCompletionRequest): try: - contexts = ctx.get_pipeline_mgr().run_retrieve(chat_request=request) + active_kb = ctx.knowledgemgr.get_active_knowledge_base() + if active_kb: + request.user = active_kb + else: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Retrieval needs to have an active knowledgebase", + ) + contexts = await ctx.get_pipeline_mgr().run_retrieve_postprocess(chat_request=request) serialized_contexts = serialize_contexts(contexts) ragout = RagOut(query=request.messages, contexts=serialized_contexts, response="") @@ -30,38 +43,81 @@ async def retrieval(request: ChatCompletionRequest): @chatqna_app.post(path="/v1/chatqna") async def chatqna(request: ChatCompletionRequest): try: + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() sessionid = request.user - set_current_session(sessionid) - generator = ctx.get_pipeline_mgr().get_active_pipeline().generator - if generator: - request.model = generator.model_id + ctx.get_session_mgr().set_current_session(sessionid) + experience_kb = ctx.knowledgemgr.get_active_experience() + active_kb = ctx.knowledgemgr.get_active_knowledge_base() + request.user = active_kb if active_kb else None + if experience_kb: + request.tool_choice = "auto" if experience_kb.experience_active else "none" + + request.input = ctx.get_session_mgr().concat_history( + sessionid, active_pl.generator.inference_type, request.messages + ) + + # Run agent if activated, otherwise, run pipeline + if ctx.get_agent_mgr().get_active_agent(): + run_agent_gen = await ctx.get_agent_mgr().run_agent(chat_request=request) + return StreamingResponse(save_session(sessionid, run_agent_gen), media_type="text/plain") + + else: + generator = active_pl.generator + if generator: + request.model = generator.model_id + if request.stream: - ret, contexts = ctx.get_pipeline_mgr().run_pipeline(chat_request=request) - return ret + run_pipeline_gen, contexts = await ctx.get_pipeline_mgr().run_pipeline(chat_request=request) + return StreamingResponse(save_session(sessionid, run_pipeline_gen), media_type="text/plain") else: - ret, contexts = ctx.get_pipeline_mgr().run_pipeline(chat_request=request) + ret, contexts = await ctx.get_pipeline_mgr().run_pipeline(chat_request=request) + ctx.get_session_mgr().save_current_message(sessionid, "assistant", str(ret)) return str(ret) + except Exception as e: - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"ChatQnA Error: {e}", + ) # RAGQnA @chatqna_app.post(path="/v1/ragqna") async def ragqna(request: ChatCompletionRequest): try: - res, contexts = ctx.get_pipeline_mgr().run_pipeline(chat_request=request) - if isinstance(res, GeneratedDoc): - res = res.text - elif isinstance(res, StreamingResponse): - collected_data = [] - async for chunk in res.body_iterator: - collected_data.append(chunk) - res = "".join(collected_data) + sessionid = request.user + experience_kb = ctx.knowledgemgr.get_active_experience() + active_kb = ctx.knowledgemgr.get_active_knowledge_base() + request.user = active_kb if active_kb else None + if experience_kb: + request.tool_choice = "auto" if experience_kb.experience_active else "none" + generator = ctx.get_pipeline_mgr().get_active_pipeline().generator + if generator: + request.model = generator.model_id + if request.stream: + res_gen, contexts = await ctx.get_pipeline_mgr().run_pipeline(chat_request=request) - serialized_contexts = serialize_contexts(contexts) + # Escape newlines for json format as value + async def res_gen_json(): + async for token in res_gen: + yield token.replace("\n", "\\n") + + # Reconstruct RagOut in stream response + query_gen = stream_generator('{"query":"' + request.messages + '",') + + s_contexts = json.dumps(serialize_contexts(contexts)) + context_gen = stream_generator('"contexts":' + s_contexts + ',"response":"') + final_gen = stream_generator('"}') + output_gen = chain_async_generators([query_gen, context_gen, res_gen_json(), final_gen]) + + return StreamingResponse(output_gen, media_type="text/plain") + else: + ret, contexts = await ctx.get_pipeline_mgr().run_pipeline(chat_request=request) + serialized_contexts = serialize_contexts(contexts) + + ragout = RagOut(query=request.messages, contexts=serialized_contexts, response=str(ret)) + return ragout - ragout = RagOut(query=request.messages, contexts=serialized_contexts, response=str(res)) - return ragout except Exception as e: raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) @@ -82,3 +138,16 @@ def check_vllm(request_data: dict = Body(...)): raise HTTPException(status_code=500) except Exception as e: return {"status": "500", "message": f"connection failed: {str(e)}"} + + +async def save_session(sessionid, run_agent_gen): + collected_data = [] + session_mgr = ctx.get_session_mgr() + async for chunk in run_agent_gen: + if chunk: + collected_data.append(chunk) + current_content = "".join(collected_data) + session_mgr.update_current_message(sessionid, "assistant", current_content) + yield chunk or "" + await asyncio.sleep(0) + session_mgr.save_current_message(sessionid, "assistant", current_content) diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/data.py b/EdgeCraftRAG/edgecraftrag/api/v1/data.py old mode 100755 new mode 100644 index 35ef7e25b7..9d5472f105 --- a/EdgeCraftRAG/edgecraftrag/api/v1/data.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/data.py @@ -1,12 +1,15 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import json import os +from typing import List from edgecraftrag.api_schema import DataIn, FilesIn +from edgecraftrag.config_repository import MilvusConfigRepository from edgecraftrag.context import ctx +from edgecraftrag.env import UI_DIRECTORY from fastapi import FastAPI, File, HTTPException, UploadFile, status -from werkzeug.utils import secure_filename data_app = FastAPI() @@ -14,16 +17,17 @@ # Upload a text or files @data_app.post(path="/v1/data") async def add_data(request: DataIn): + pl = ctx.get_pipeline_mgr().get_active_pipeline() docs = [] if request.text is not None: docs.extend(ctx.get_file_mgr().add_text(text=request.text)) if request.local_path is not None: docs.extend(ctx.get_file_mgr().add_files(docs=request.local_path)) - nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=docs) - if nodelist is None or len(nodelist) == 0: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="File not found") - pl = ctx.get_pipeline_mgr().get_active_pipeline() + nodelist = await ctx.get_pipeline_mgr().run_data_prepare(docs=docs) + if pl.indexer.comp_subtype != "kbadmin_indexer": + if nodelist is None or len(nodelist) == 0: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="File not found") ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist) return "Done" @@ -32,8 +36,8 @@ async def add_data(request: DataIn): @data_app.post(path="/v1/data/reindex") async def redindex_data(): pl = ctx.get_pipeline_mgr().get_active_pipeline() - ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx) + pl.indexer.reinitialize_indexer() pl.update_indexer_to_retriever() @@ -45,6 +49,71 @@ async def redindex_data(): return "Done" +# Gets the current nodelist +@data_app.get(path="/v1/data/nodes") +async def get_nodes_with_kb(kb_name=None): + node_lists = {} + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + if kb_name: + kb = ctx.get_knowledge_mgr().get_knowledge_base_by_name_or_id(kb_name) + else: + kb = ctx.get_knowledge_mgr().get_active_knowledge_base() + if active_pl.indexer.comp_subtype == "faiss_vector": + return active_pl.indexer.docstore.docs + elif active_pl.indexer.comp_subtype == "milvus_vector": + collection_name = kb.name + active_pl.name + Milvus_node_list = MilvusConfigRepository.create_connection(collection_name, 1, active_pl.indexer.vector_url) + results = Milvus_node_list.get_configs(output_fields=["text", "_node_content", "doc_id"]) + for node_list in results: + text = node_list.get("text") + node_content = json.loads(node_list.get("_node_content")) + node_content["doc_id"] = node_list.get("doc_id") + node_content["text"] = text + node_lists[node_content.get("id_")] = node_content + return node_lists + node_list = ctx.get_node_mgr().get_nodes(active_pl.node_parser.idx) + return node_list + + +# GET chunks by document name +@data_app.get(path="/v1/data/{document_name}/nodes") +async def get_nodes_by_document_name(document_name: str): + all_nodes = await get_nodes_with_kb() + matching_nodes = [] + for node in all_nodes.values() if isinstance(all_nodes, dict) else all_nodes: + metadata = node.get("metadata", {}) if isinstance(node, dict) else getattr(node, "metadata", {}) + node_file_name = metadata.get("file_name", "") + node_file_path = metadata.get("file_path", "") + if node_file_name == document_name or document_name in node_file_name or document_name in node_file_path: + matching_nodes.append(node) + return matching_nodes + + +# GET available document names +@data_app.get(path="/v1/data/documents") +async def get_document_names(): + all_nodes = await get_nodes_with_kb() + if not all_nodes: + return {"documents": []} + + documents = {} + for node in all_nodes.values() if isinstance(all_nodes, dict) else all_nodes: + metadata = node.get("metadata", {}) if isinstance(node, dict) else getattr(node, "metadata", {}) + file_name = metadata.get("file_name") + file_path = metadata.get("file_path") + if file_name and file_name not in documents: + documents[file_name] = { + "file_name": file_name, + "file_path": file_path, + "file_type": metadata.get("file_type", "unknown"), + "chunk_count": 0, + } + if file_name: + documents[file_name]["chunk_count"] += 1 + + return {"total_documents": len(documents), "documents": list(documents.values())} + + # Upload files by a list of file_path @data_app.post(path="/v1/data/files") async def add_files(request: FilesIn): @@ -103,16 +172,13 @@ async def upload_file(file_name: str, file: UploadFile = File(...)): ) try: # DIR for server to save files uploaded by UI - UI_DIRECTORY = os.getenv("TMPFILE_PATH", "/home/user/ui_cache") UPLOAD_DIRECTORY = os.path.normpath(os.path.join(UI_DIRECTORY, file_name)) if not UPLOAD_DIRECTORY.startswith(os.path.abspath(UI_DIRECTORY) + os.sep): raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid file_name: directory traversal detected" ) os.makedirs(UPLOAD_DIRECTORY, exist_ok=True) - safe_filename = secure_filename(file.filename) - # Sanitize the uploaded file's name - safe_filename = secure_filename(file.filename) + safe_filename = file.filename file_path = os.path.normpath(os.path.join(UPLOAD_DIRECTORY, safe_filename)) # Ensure file_path is within UPLOAD_DIRECTORY if not file_path.startswith(os.path.abspath(UPLOAD_DIRECTORY)): diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py b/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py old mode 100755 new mode 100644 index 21e0c0621e..6379b9c5ac --- a/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py @@ -1,24 +1,32 @@ # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import copy import json import os import re - -from edgecraftrag.api.v1.data import add_data -from edgecraftrag.api_schema import DataIn, KnowledgeBaseCreateIn -from edgecraftrag.base import IndexerType +from typing import Dict, List, Union + +from edgecraftrag.api.v1.data import get_nodes_with_kb +from edgecraftrag.api_schema import DataIn, ExperienceIn, KnowledgeBaseCreateIn +from edgecraftrag.components.query_preprocess import query_search +from edgecraftrag.components.retriever import get_kbs_info +from edgecraftrag.config_repository import ( + MilvusConfigRepository, + save_knowledge_configurations, + save_pipeline_configurations, +) from edgecraftrag.context import ctx -from edgecraftrag.utils import compare_mappings +from edgecraftrag.env import ( + KNOWLEDGEBASE_FILE, + SEARCH_CONFIG_PATH, + SEARCH_DIR, + UI_DIRECTORY, +) from fastapi import FastAPI, HTTPException, status -from pymilvus.exceptions import MilvusException +from llama_index.core.schema import Document kb_app = FastAPI() -# Define the root directory for knowledge base files -KNOWLEDGE_BASE_ROOT = "/home/user/ui_cache" - # Get all knowledge bases @kb_app.get(path="/v1/knowledge") @@ -41,16 +49,24 @@ async def get_knowledge_base(knowledge_name: str): async def create_knowledge_base(knowledge: KnowledgeBaseCreateIn): try: active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + if not active_pl: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Please activate pipeline", + ) if not re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", knowledge.name): raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Knowledge base names must begin with a letter or underscore", ) - kb = ctx.knowledgemgr.create_knowledge_base(knowledge) - if kb.active: - active_pl.indexer.reinitialize_indexer(kb.name) + + if knowledge.active and knowledge.comp_type == "knowledge" and knowledge.comp_subtype == "origin_kb": + active_pl.indexer.reinitialize_indexer(knowledge.name) active_pl.update_indexer_to_retriever() - await save_knowledge_to_file() + elif knowledge.active and knowledge.comp_subtype == "kbadmin_kb": + active_pl.retriever.config_kbadmin_milvus(knowledge.name) + kb = ctx.knowledgemgr.create_knowledge_base(knowledge) + await save_knowledge_configurations("add", kb) return "Create knowledge base successfully" except Exception as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) @@ -63,19 +79,33 @@ async def delete_knowledge_base(knowledge_name: str): rm_kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) active_kb = ctx.knowledgemgr.get_active_knowledge_base() active_pl = ctx.get_pipeline_mgr().get_active_pipeline() - if active_kb.name == knowledge_name or active_kb.idx == knowledge_name: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Cannot delete a running knowledge base." - ) - kb_file_path = rm_kb.get_file_paths() - if kb_file_path: - if active_pl.indexer.comp_subtype == "milvus_vector": - await remove_file_handler([], knowledge_name) + if rm_kb.comp_type == "knowledge" and rm_kb.comp_subtype == "origin_kb": if active_kb: - active_pl.indexer.reinitialize_indexer(active_kb.name) - active_pl.update_indexer_to_retriever() + if active_kb.name == knowledge_name or active_kb.idx == knowledge_name: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Cannot delete a running knowledge base.", + ) + kb_file_path = rm_kb.get_file_paths() + if kb_file_path: + if active_pl.indexer.comp_subtype == "milvus_vector": + active_pl.indexer.clear_milvus_collection(knowledge_name) + active_pl.clear_document_cache(knowledge_name) + if active_kb: + active_pl.indexer.reinitialize_indexer(active_kb.name) + active_pl.update_indexer_to_retriever() + rm_kb.clear_documents(active_pl.name) + if rm_kb.comp_type == "experience": + if rm_kb.experience_active: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Cannot delete a running experience knowledge base.", + ) + else: + rm_kb.clear_experiences() result = ctx.knowledgemgr.delete_knowledge_base(knowledge_name) - await save_knowledge_to_file() + await save_knowledge_configurations("delete", rm_kb) + await save_pipeline_configurations("update", active_pl) return result except Exception as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) @@ -87,28 +117,51 @@ async def update_knowledge_base(knowledge: KnowledgeBaseCreateIn): try: kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge.name) active_pl = ctx.get_pipeline_mgr().get_active_pipeline() - if active_pl.indexer.comp_subtype != "milvus_vector": - if knowledge.active and knowledge.active != kb.active: - file_paths = kb.get_file_paths() - await update_knowledge_base_handler(file_paths, knowledge.name) - elif not knowledge.active and kb.description != knowledge.description: - pass - elif not knowledge.active: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Must have an active knowledge base" - ) - else: + if active_pl.indexer.comp_subtype == "kbadmin_indexer" and kb.comp_subtype != "kbadmin_kb": + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="The kbadmin pipeline must correspond to the kbadmin type kb.", + ) + if active_pl.indexer.comp_subtype != "kbadmin_indexer" and kb.comp_subtype == "kbadmin_kb": + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Not kbadmin pipeline cannot active kbadmin type kb.", + ) + if kb.comp_type == "knowledge" and kb.comp_subtype == "origin_kb": + if active_pl.indexer.comp_subtype != "milvus_vector": + if knowledge.active and knowledge.active != kb.active: + await handle_reload_data(kb, active_pl) + elif not knowledge.active and kb.description != knowledge.description: + pass + else: + if knowledge.active and knowledge.active != kb.active: + current_paths = kb.file_paths + file_paths = active_pl.compare_file_lists(kb.name, current_paths) + if "del_docs" not in file_paths: + await handle_pipeline_change(kb, active_pl, file_paths) + else: + need_delete_document_path = file_paths["del_docs"] + need_add_document_path = file_paths["add_docs"] + active_pl.indexer.reinitialize_indexer(kb.name) + if need_delete_document_path: + for file_path in need_delete_document_path: + await remove_file_from_knowledge_base(kb.name, DataIn(local_path=file_path)) + if need_add_document_path: + for file_path in need_add_document_path: + add_document = await add_file_to_knowledge_base( + kb.name, DataIn(local_path=file_path), False + ) + await add_document_handler(add_document) + active_pl.indexer.reinitialize_indexer(kb.name) + active_pl.update_indexer_to_retriever() + elif not knowledge.active and kb.description != knowledge.description: + pass + elif kb.comp_subtype == "kbadmin_kb": if knowledge.active and knowledge.active != kb.active: - active_pl.indexer.reinitialize_indexer(knowledge.name) - active_pl.update_indexer_to_retriever() - elif not knowledge.active and kb.description != knowledge.description: - pass - elif not knowledge.active: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Must have an active knowledge base" - ) + active_pl.retriever.config_kbadmin_milvus(kb.name) result = ctx.knowledgemgr.update_knowledge_base(knowledge) - await save_knowledge_to_file() + await save_knowledge_configurations("update", kb) + await save_pipeline_configurations("update", active_pl) return result except Exception as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) @@ -116,49 +169,68 @@ async def update_knowledge_base(knowledge: KnowledgeBaseCreateIn): # Add a files to the knowledge base @kb_app.post(path="/v1/knowledge/{knowledge_name}/files") -async def add_file_to_knowledge_base(knowledge_name, file_path: DataIn): +async def add_file_to_knowledge_base(knowledge_name, file_path: DataIn, only_add_file: bool = True): try: active_pl = ctx.get_pipeline_mgr().get_active_pipeline() kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) + if kb.comp_type == "experience": + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="The experience type cannot perform file operations.", + ) + if kb.comp_subtype == "kbadmin_kb" or active_pl.indexer.comp_subtype == "kbadmin_indexer": + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Please proceed to the kbadmin interface to perform the operation.", + ) + # Validate and normalize the user-provided path user_path = file_path.local_path - normalized_path = os.path.normpath(os.path.join(KNOWLEDGE_BASE_ROOT, user_path)) - if not normalized_path.startswith(KNOWLEDGE_BASE_ROOT): + add_document = ctx.get_file_mgr().add_files(docs=user_path) + normalized_path = os.path.normpath(os.path.join(UI_DIRECTORY, user_path)) + if not normalized_path.startswith(UI_DIRECTORY): raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid file path") if os.path.isdir(normalized_path): for root, _, files in os.walk(normalized_path): for file in files: file_full_path = os.path.join(root, file) if file_full_path not in kb.get_file_paths(): - kb.add_file_path(file_full_path) + kb.add_file_path(file_full_path, add_document, active_pl.name, only_add_file) + active_pl.add_docs_to_list(knowledge_name, file_full_path) else: - raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="File upload failed") - elif os.path.isfile(normalized_path) and normalized_path not in kb.get_file_paths(): - kb.add_file_path(normalized_path) + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=f"File already exists {file_full_path}", + ) + elif os.path.isfile(normalized_path) and normalized_path in kb.get_file_paths() and only_add_file: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=f"File already exists {normalized_path}", + ) + elif os.path.isfile(normalized_path) and only_add_file: + kb.add_file_path(normalized_path, add_document, active_pl.name, only_add_file) + active_pl.add_docs_to_list(knowledge_name, user_path) + elif os.path.isfile(normalized_path): + kb.add_file_path(normalized_path, add_document, active_pl.name, only_add_file) + active_pl.add_docs_to_list(knowledge_name, user_path) + return add_document else: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="File upload failed") + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Error uploading file.") active_kb = ctx.knowledgemgr.get_active_knowledge_base() - kb_file_path = kb.get_file_paths() if active_pl.indexer.comp_subtype == "milvus_vector": - if active_kb: - if active_kb.name == knowledge_name or active_kb.idx == knowledge_name: - active_pl.indexer.reinitialize_indexer(active_kb.name) - active_pl.update_indexer_to_retriever() - await update_knowledge_base_handler(file_path, knowledge_name, add_file=True) - else: - await update_knowledge_base_handler(kb_file_path, knowledge_name) - active_pl.indexer.reinitialize_indexer(active_kb.name) - active_pl.update_indexer_to_retriever() + if knowledge_name == active_kb.name: + await add_document_handler(add_document) else: - await update_knowledge_base_handler(kb_file_path, knowledge_name) + active_pl.indexer.reinitialize_indexer(knowledge_name) + await add_document_handler(add_document) active_pl.indexer.reinitialize_indexer(active_kb.name) active_pl.update_indexer_to_retriever() else: if active_kb: if active_kb.name == knowledge_name or active_kb.idx == knowledge_name: - await update_knowledge_base_handler(file_path, knowledge_name, add_file=True) - - await save_knowledge_to_file() + await add_document_handler(add_document) + await save_knowledge_configurations("update", kb) + await save_pipeline_configurations("update", active_pl) return "File upload successfully" except ValueError as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) @@ -170,193 +242,325 @@ async def remove_file_from_knowledge_base(knowledge_name, file_path: DataIn): try: active_pl = ctx.get_pipeline_mgr().get_active_pipeline() kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) - active_kb = ctx.knowledgemgr.get_active_knowledge_base() - if file_path.local_path in kb.get_file_paths(): - kb.remove_file_path(file_path.local_path) - else: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="File remove failure") - - kb_file_path = kb.get_file_paths() - if active_pl.indexer.comp_subtype == "milvus_vector": - if active_kb: - if active_kb.name == knowledge_name or active_kb.idx == knowledge_name: - await remove_file_handler(kb_file_path, knowledge_name) - else: - await remove_file_handler(kb_file_path, knowledge_name) - active_pl.indexer.reinitialize_indexer(active_kb.name) - active_pl.update_indexer_to_retriever() - else: - await remove_file_handler(kb_file_path, knowledge_name) - active_pl.indexer.reinitialize_indexer(active_kb.name) - active_pl.update_indexer_to_retriever() - elif active_kb: - if active_kb.name == knowledge_name or active_kb.idx == knowledge_name: - await update_knowledge_base_handler(kb_file_path, knowledge_name) - await save_knowledge_to_file() + if kb.comp_type == "experience": + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="The experience type cannot perform file operations.", + ) + if kb.comp_subtype == "kbadmin_kb" or active_pl.indexer.comp_subtype == "kbadmin_indexer": + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Please proceed to the kbadmin interface to perform the operation.", + ) + document_list = kb.remove_file_path(file_path.local_path, active_pl.name) + active_pl.del_docs_to_list(knowledge_name, file_path.local_path) + if not document_list: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Deleted file documents not found", + ) + await remove_document_handler(document_list, knowledge_name) + await save_knowledge_configurations("update", kb) + await save_pipeline_configurations("update", active_pl) return "File deleted successfully" except ValueError as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) +@kb_app.post("/v1/experience") +def get_experience_by_id_or_question(req: ExperienceIn): + kb = ctx.knowledgemgr.get_experience_kb() + result = kb.get_experience_by_id_or_question(req) + if not result: + raise HTTPException(404, detail="Experience not found") + return result + + +@kb_app.get("/v1/experiences") +def get_all_experience(): + kb = ctx.knowledgemgr.get_experience_kb() + if kb: + return kb.get_all_experience() + else: + return kb + + +@kb_app.patch("/v1/experiences") +def update_experience(experience: ExperienceIn): + kb = ctx.knowledgemgr.get_experience_kb() + result = kb.update_experience(experience.idx, experience.question, experience.content) + if not result: + raise HTTPException(404, detail="Question_idx or question not found") + return result + + +@kb_app.delete("/v1/experiences") +def delete_experience(req: ExperienceIn): + kb = ctx.knowledgemgr.get_experience_kb() + success = kb.delete_experience(req.idx) + if not success: + raise HTTPException(404, detail=f"Question {req.question} not found") + return {"message": "Question deleted"} + + +@kb_app.post("/v1/multiple_experiences/check") +def check_duplicate_multiple_experiences( + experiences: List[Dict[str, Union[str, List[str]]]], +): + kb = ctx.knowledgemgr.get_experience_kb() + if not kb: + raise HTTPException(404, detail="No active experience type knowledge base") + all_existing = kb.get_all_experience() + existing_questions = {item["question"] for item in all_existing if "question" in item} + new_questions = [exp["question"] for exp in experiences if "question" in exp and exp["question"]] + duplicate_questions = [q for q in new_questions if q in existing_questions] + if duplicate_questions: + return { + "code": 2001, + "detail": "Duplicate experiences are appended OR overwritten!", + } + else: + kb.add_multiple_experiences(experiences, True) + return { + "status": "success", + "detail": "No duplicate experiences, added successfully", + } + + +@kb_app.post("/v1/multiple_experiences/confirm") +def confirm_multiple_experiences(experiences: List[Dict[str, Union[str, List[str]]]], flag: bool): + kb = ctx.knowledgemgr.get_experience_kb() + try: + if not kb: + raise HTTPException(404, detail="No active experience type knowledge base") + kb.add_multiple_experiences(experiences, flag) + return {"status": "success", "detail": "Experiences added successfully"} + except Exception as e: + raise HTTPException(status_code=500, detail=f"Add Failure:{str(e)}") + + +@kb_app.post("/v1/experiences/files") +def add_experiences_from_file(req: DataIn): + kb = ctx.knowledgemgr.get_experience_kb() + try: + kb.add_experiences_from_file(req.local_path) + return {"status": "success"} + except Exception as e: + raise HTTPException(status_code=400, detail=str(e)) + + +@kb_app.post(path="/v1/view_sub_questions") +async def view_sub_questions(que: ExperienceIn): + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + top1_issue, sub_questions_result = await query_search( + user_input=que.question, + SEARCH_CONFIG_PATH=SEARCH_CONFIG_PATH, + SEARCH_DIR=SEARCH_DIR, + pl=active_pl, + ) + return sub_questions_result + + +@kb_app.get("/v1/kbadmin/kbs_list") +def get_kbs_list(): + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + try: + if not active_pl or active_pl.indexer.comp_subtype != "kbadmin_indexer": + return [] + CONNECTION_ARGS = {"uri": active_pl.indexer.vector_url} + kbs_list = get_kbs_info(CONNECTION_ARGS) + kb_names = [name for name in kbs_list.keys()] + return kb_names + except Exception as e: + raise HTTPException(status_code=400, detail=str(e)) + + # Update knowledge base data -async def update_knowledge_base_handler(file_path=None, knowledge_name: str = "default_kb", add_file: bool = False): +async def add_document_handler(all_document=None): if ctx.get_pipeline_mgr().get_active_pipeline() is None: - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Please activate pipeline") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Please activate pipeline", + ) - pl = ctx.get_pipeline_mgr().get_active_pipeline() - if add_file and file_path: - return await add_data(file_path) - else: - try: - ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx) - pl.indexer.reinitialize_indexer(knowledge_name) - pl.update_indexer_to_retriever() - if file_path: - for file in file_path: - request = DataIn(local_path=file) - await add_data(request) - except MilvusException as e: - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) - return "Done" + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + if all_document: + nodelist = await ctx.get_pipeline_mgr().run_data_prepare(docs=all_document) + if active_pl.indexer.comp_subtype != "kbadmin_indexer": + if nodelist is None or len(nodelist) == 0: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="File not found") + ctx.get_node_mgr().add_nodes(active_pl.node_parser.idx, nodelist) + return "success update file" # Update knowledge base data -async def remove_file_handler(file_path=None, knowledge_name: str = "default_kb"): +async def remove_document_handler(document_list=None, knowledge_name: str = "default_kb"): if ctx.get_pipeline_mgr().get_active_pipeline() is None: - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Please activate pipeline") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Please activate pipeline", + ) - pl = ctx.get_pipeline_mgr().get_active_pipeline() - ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx) - try: - pl.indexer.clear_milvus_collection(knowledge_name) - pl.indexer.reinitialize_indexer(knowledge_name) - except MilvusException as e: - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) - pl.update_indexer_to_retriever() - if file_path: - for file in file_path: - request = DataIn(local_path=file) - await add_data(request) - return "Done" + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + active_kb = ctx.get_knowledge_mgr().get_active_knowledge_base() + ctx.get_node_mgr().del_nodes_by_np_idx(active_pl.node_parser.idx) + if active_pl.indexer.comp_subtype == "milvus_vector": + active_pl.indexer.reinitialize_indexer(knowledge_name) + active_pl.indexer.delete(document_list) + if active_kb: + active_pl.indexer.reinitialize_indexer(active_kb.name) + active_pl.update_indexer_to_retriever() + elif active_kb.name == knowledge_name: + await handle_reload_data(active_kb, active_pl) # Restore knowledge base configuration -async def load_knowledge_from_file(): - CONFIG_DIR = "/home/user/ui_cache/configs" - KNOWLEDGEBASE_FILE = os.path.join(CONFIG_DIR, "knowledgebase.json") +async def restore_knowledge_configurations(): + knowledgebase_config_repo = MilvusConfigRepository.create_connection("knowledgebase_config", 1) + all_datas = [] active_pl = ctx.get_pipeline_mgr().get_active_pipeline() - if os.path.exists(KNOWLEDGEBASE_FILE): - with open(KNOWLEDGEBASE_FILE, "r", encoding="utf-8") as f: - all_Knowledgebases = f.read() - try: + if knowledgebase_config_repo: + all_Knowledgebases_repo = knowledgebase_config_repo.get_configs() + for Knowledgebase_data in all_Knowledgebases_repo: + config_json = Knowledgebase_data.get("config_json") + all_datas.append(config_json) + else: + if os.path.exists(KNOWLEDGEBASE_FILE): + with open(KNOWLEDGEBASE_FILE, "r", encoding="utf-8") as f: + all_Knowledgebases = f.read() all_data = json.loads(all_Knowledgebases) for Knowledgebase_data in all_data: - pipeline_req = KnowledgeBaseCreateIn(**Knowledgebase_data) - kb = ctx.knowledgemgr.create_knowledge_base(pipeline_req) - if Knowledgebase_data["file_map"]: + all_datas.append(Knowledgebase_data) + try: + for Knowledgebase_data in all_datas: + Knoweldge_req = KnowledgeBaseCreateIn(**Knowledgebase_data) + kb = ctx.knowledgemgr.create_knowledge_base(Knoweldge_req) + if kb.comp_type == "knowledge" and kb.comp_subtype == "origin_kb": + if Knowledgebase_data["file_paths"]: if active_pl.indexer.comp_subtype != "milvus_vector" and Knowledgebase_data["active"]: - for file_path in Knowledgebase_data["file_map"].values(): - await update_knowledge_base_handler( - DataIn(local_path=file_path), Knowledgebase_data["name"], add_file=True - ) - kb.add_file_path(file_path) + await handle_reload_data(kb, active_pl) elif Knowledgebase_data["active"]: active_pl.indexer.reinitialize_indexer(Knowledgebase_data["name"]) active_pl.update_indexer_to_retriever() - for file_path in Knowledgebase_data["file_map"].values(): - kb.add_file_path(file_path) else: - for file_path in Knowledgebase_data["file_map"].values(): - kb.add_file_path(file_path) - except Exception as e: - print(f"Error load Knowledge base: {e}") - - -# Configuration of knowledge base for persistence -async def save_knowledge_to_file(): - CONFIG_DIR = "/home/user/ui_cache/configs" - KNOWLEDGEBASE_FILE = os.path.join(CONFIG_DIR, "knowledgebase.json") - if not os.path.exists(CONFIG_DIR): - os.makedirs(CONFIG_DIR, exist_ok=True) - try: - kb_base = ctx.knowledgemgr.get_all_knowledge_bases() - knowledgebases_data = [] - for kb in kb_base: - kb_json = {"name": kb.name, "description": kb.description, "active": kb.active, "file_map": kb.file_map} - knowledgebases_data.append(kb_json) - json_str = json.dumps(knowledgebases_data, indent=2, ensure_ascii=False) - with open(KNOWLEDGEBASE_FILE, "w", encoding="utf-8") as f: - f.write(json_str) + pass + elif kb.comp_subtype == "kbadmin_kb": + if Knowledgebase_data["active"]: + active_pl.retriever.config_kbadmin_milvus(kb.name) except Exception as e: - print(f"Error saving Knowledge base: {e}") - - -all_pipeline_milvus_maps = {} -current_pipeline_kb_map = {} - - -async def refresh_milvus_map(milvus_name): - current_pipeline_kb_map.clear() - knowledge_bases_list = await get_all_knowledge_bases() - for kb in knowledge_bases_list: - current_pipeline_kb_map[kb.name] = kb.file_map - all_pipeline_milvus_maps[milvus_name] = copy.deepcopy(current_pipeline_kb_map) + print(f"Error load Knowledge base: {e}") async def Synchronizing_vector_data(old_active_pl, new_active_pl): try: active_kb = ctx.knowledgemgr.get_active_knowledge_base() active_pl = ctx.get_pipeline_mgr().get_active_pipeline() - milvus_name = ( - old_active_pl.name + str(old_active_pl.indexer.model_extra["d"]) if old_active_pl else "default_kb" - ) + # Determine whether it is kbadmin type + if old_active_pl: + if ( + old_active_pl.retriever.comp_subtype == "kbadmin_retriever" + and new_active_pl.retriever.comp_subtype == "kbadmin_retriever" + ): + if active_kb: + if active_kb.comp_subtype == "kbadmin_kb": + new_active_pl.retriever.config_kbadmin_milvus(active_kb.name) + return True + elif old_active_pl.retriever.comp_subtype == "kbadmin_retriever": + return True if not active_kb: return True - if not active_pl: - if old_active_pl: - if old_active_pl.indexer.comp_subtype == "milvus_vector": - await refresh_milvus_map(milvus_name) + if new_active_pl.retriever.comp_subtype == "kbadmin_retriever": + if active_kb: + if active_kb.comp_subtype == "kbadmin_kb": + new_active_pl.retriever.config_kbadmin_milvus(active_kb.name) return True - + # Perform milvus data synchronization if new_active_pl.indexer.comp_subtype == "milvus_vector": - new_milvus_map = {} - kb_list = await get_all_knowledge_bases() - for kb in kb_list: - new_milvus_map[kb.name] = kb.file_map - added_files, deleted_files = compare_mappings( - new_milvus_map, - all_pipeline_milvus_maps.get(new_active_pl.name + str(new_active_pl.indexer.model_extra["d"]), {}), - ) - # Synchronization of deleted files - for kb_name, file_paths in deleted_files.items(): - if file_paths: - new_active_pl.indexer.clear_milvus_collection(kb_name) - if kb_name not in new_milvus_map.keys(): - continue - kb = await get_knowledge_base(kb_name) - new_active_pl.indexer.reinitialize_indexer(kb_name) - file_paths = kb.get_file_paths() - if file_paths: - for file in file_paths: - await add_data(DataIn(local_path=file)) - # Synchronization of added files - for kb_name, file_paths in added_files.items(): - if file_paths: - for file_path in file_paths.values(): - new_active_pl.indexer.reinitialize_indexer(kb_name) - await add_data(DataIn(local_path=file_path)) - - new_active_pl.indexer.reinitialize_indexer(active_kb.name) - new_active_pl.update_indexer_to_retriever() - await refresh_milvus_map(milvus_name) + # Pipeline component state not changed + current_paths = active_kb.file_paths + file_paths = active_pl.compare_file_lists(active_kb.name, current_paths) + if "del_docs" not in file_paths: + await handle_pipeline_change(active_kb, active_pl, file_paths) + else: + need_delete_document_path = file_paths["del_docs"] + need_add_document_path = file_paths["add_docs"] + active_pl.indexer.reinitialize_indexer(active_kb.name) + if need_delete_document_path: + for file_path in need_delete_document_path: + await remove_file_from_knowledge_base(active_kb.name, DataIn(local_path=file_path)) + if need_add_document_path: + for file_path in need_add_document_path: + add_document = await add_file_to_knowledge_base( + active_kb.name, DataIn(local_path=file_path), False + ) + await add_document_handler(add_document) + active_pl.indexer.reinitialize_indexer(active_kb.name) + active_pl.update_indexer_to_retriever() else: - new_active_pl.indexer.reinitialize_indexer() - new_active_pl.update_indexer_to_retriever() - add_list = active_kb.get_file_paths() - for file in add_list: - await add_data(DataIn(local_path=file)) - if old_active_pl: - if old_active_pl.indexer.comp_subtype == "milvus_vector": - await refresh_milvus_map(milvus_name) + await handle_reload_data(active_kb, active_pl) + await save_knowledge_configurations("update", active_kb) except Exception as e: raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=e) + + +# When the pipeline is changed, the current kb and the data of the pipeline are reconstructed +async def handle_pipeline_change(kb, pl, file_paths): + exist_file = False + need_add_document_path = file_paths["add_docs"] + node_lists = await get_nodes_with_kb(kb.name) + pl.indexer.clear_milvus_collection(kb.name) + if need_add_document_path: + if os.path.isfile(need_add_document_path[0]): + kb.clear_documents(pl.name) + exist_file = True + pl.indexer.reinitialize_indexer(kb.name) + for file_path in need_add_document_path: + if exist_file: + add_document = await add_file_to_knowledge_base(kb.name, DataIn(local_path=file_path), False) + await add_document_handler(add_document) + else: + add_document = [] + document = {} + documents_list = kb.get_all_document(file_path, pl.name) + for document in documents_list: + need_add_node_list = {} + for node in node_lists.values(): + if document.get("doc_id") == node.get("doc_id"): + need_add_node_list[node["id_"]] = node + docuement_text = pl.nodes_to_document(need_add_node_list) + document["id_"] = document.get("doc_id") + document["text"] = docuement_text + document["excluded_embed_metadata_keys"] = [ + "file_name", + "file_type", + "file_size", + "creation_date", + "last_modified_date", + "last_accessed_date", + ] + document["excluded_llm_metadata_keys"] = [ + "file_name", + "file_type", + "file_size", + "creation_date", + "last_modified_date", + "last_accessed_date", + ] + document["metadata"] = document.get("metadata") + result_document = Document.from_dict(data=document) + add_document.append(result_document) + pl.add_docs_to_list(kb.name, file_path) + await add_document_handler(add_document) + + +# reloading data that is not a milvus indexer +async def handle_reload_data(kb, pl): + pl.indexer.reinitialize_indexer() + pl.update_indexer_to_retriever() + need_add_document_path = kb.get_file_paths() + ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx) + kb.clear_documents(pl.name) + if need_add_document_path: + for file_path in need_add_document_path: + add_document = await add_file_to_knowledge_base(kb.name, DataIn(local_path=file_path), False) + await add_document_handler(add_document) diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/model.py b/EdgeCraftRAG/edgecraftrag/api/v1/model.py index bbc0d9806b..8707384cdf 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/model.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/model.py @@ -3,10 +3,12 @@ import gc import os +from typing import Optional +import requests from edgecraftrag.api_schema import ModelIn from edgecraftrag.context import ctx -from fastapi import FastAPI, HTTPException, status +from fastapi import FastAPI, HTTPException, Query, status model_app = FastAPI() @@ -30,11 +32,22 @@ async def get_model_weight(model_id): # Search available model id @model_app.get(path="/v1/settings/avail-models/{model_type}") -async def get_model_id(model_type): +async def get_model_id( + model_type: str, + server_address: Optional[str] = Query(default=None, description="vLLM server address (optional)"), +): try: - return get_available_models(model_type) + if model_type == "vLLM": + if not server_address: + server_address = "http://localhost:8086" + return get_available_vllm_models(server_address) + else: + return get_available_models(model_type) except Exception as e: - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=" GET model failed") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=" GET model failed", + ) # GET Models @@ -123,7 +136,10 @@ def get_available_weights(model_path): def get_available_models(model_type): avail_models = [] - if model_type == "LLM": + if model_type == "vLLM": + LLM_MODEL = os.getenv("LLM_MODEL", "Qwen/Qwen3-8B") + avail_models.append(LLM_MODEL) + elif model_type == "LLM": items = os.listdir(CONTAINER_MODEL_PATH) for item in items: if item == "BAAI": @@ -134,9 +150,29 @@ def get_available_models(model_type): avail_models.append(item + "/" + sub_path) else: avail_models.append(item) + elif model_type == "kbadmin_embedding_model": + return ["BAAI/bge-large-zh-v1.5"] else: for item in os.listdir(CONTAINER_MODEL_PATH + "BAAI"): if (model_type == "reranker" and "rerank" in item) or (model_type == "embedding" and "rerank" not in item): avail_models.append("BAAI/" + item) return avail_models + + +@model_app.get(path="/v1/available_models") +def get_available_vllm_models(server_address: str): + try: + url = f"{server_address}/v1/models" + response = requests.get(url, timeout=60) + response.raise_for_status() + response_data = response.json() + model_entries = response_data.get("data", []) + models = [entry.get("id") for entry in model_entries if entry.get("id")] + + return models + + except requests.exceptions.RequestException as e: + raise HTTPException(status_code=500, detail=f"Failed to connect to vLLM server: {str(e)}") + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}") diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py old mode 100755 new mode 100644 index e1cd5b8345..216145fb9f --- a/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py @@ -4,24 +4,42 @@ import asyncio import json import os +import re +import time import weakref from concurrent.futures import ThreadPoolExecutor from edgecraftrag.api.v1.knowledge_base import Synchronizing_vector_data from edgecraftrag.api_schema import MilvusConnectRequest, PipelineCreateIn -from edgecraftrag.base import IndexerType, InferenceType, ModelType, NodeParserType, PostProcessorType, RetrieverType +from edgecraftrag.base import ( + GeneratorType, + IndexerType, + InferenceType, + ModelType, + NodeParserType, + PostProcessorType, + RetrieverType, +) from edgecraftrag.components.benchmark import Benchmark -from edgecraftrag.components.generator import QnAGenerator -from edgecraftrag.components.indexer import VectorIndexer +from edgecraftrag.components.generator import FreeChatGenerator, QnAGenerator +from edgecraftrag.components.indexer import KBADMINIndexer, VectorIndexer from edgecraftrag.components.node_parser import ( HierarchyNodeParser, + KBADMINParser, SimpleNodeParser, SWindowNodeParser, UnstructedNodeParser, ) from edgecraftrag.components.postprocessor import MetadataReplaceProcessor, RerankProcessor -from edgecraftrag.components.retriever import AutoMergeRetriever, SimpleBM25Retriever, VectorSimRetriever +from edgecraftrag.components.retriever import ( + AutoMergeRetriever, + KBadminRetriever, + SimpleBM25Retriever, + VectorSimRetriever, +) +from edgecraftrag.config_repository import MilvusConfigRepository, save_pipeline_configurations from edgecraftrag.context import ctx +from edgecraftrag.env import PIPELINE_FILE from fastapi import FastAPI, File, HTTPException, UploadFile, status from pymilvus import connections @@ -51,17 +69,33 @@ async def get_pipeline_json(name): # GET Pipeline benchmark -@pipeline_app.get(path="/v1/settings/pipelines/{name}/benchmark") -async def get_pipeline_benchmark(name): - pl = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(name) +@pipeline_app.get(path="/v1/settings/pipeline/benchmark") +async def get_pipeline_benchmark(): + pl = ctx.get_pipeline_mgr().get_active_pipeline() if pl and pl.benchmark: return pl.benchmark +# GET Pipeline benchmark +@pipeline_app.get(path="/v1/settings/pipelines/{name}/benchmarks") +async def get_pipeline_benchmarks(name): + pl = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(name) + if pl and pl.benchmark: + return pl.benchmark.benchmark_data_list + + # POST Pipeline @pipeline_app.post(path="/v1/settings/pipelines") async def add_pipeline(request: PipelineCreateIn): - return load_pipeline(request) + pattern = re.compile(r"^[a-zA-Z0-9_]+$") + if not pattern.fullmatch(request.name): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Pipeline name must consist of letters, numbers, and underscores.", + ) + pl = await load_pipeline(request) + await save_pipeline_configurations("add", pl) + return pl # PATCH Pipeline @@ -73,15 +107,18 @@ async def update_pipeline(name, request: PipelineCreateIn): active_pl = ctx.get_pipeline_mgr().get_active_pipeline() if pl == active_pl: if request.active: - raise HTTPException(status_code=status.HTTP_423_LOCKED, detail="Unable to patch an active pipeline...") + raise HTTPException( + status_code=status.HTTP_423_LOCKED, + detail="Unable to patch an active pipeline...", + ) async with ctx.get_pipeline_mgr()._lock: try: - update_pipeline_handler(pl, request) + await update_pipeline_handler(pl, request) pipeline_dict = request.dict() pl.update_pipeline_json(pipeline_dict) except (ValueError, Exception) as e: raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) - save_pipeline_to_file() + await save_pipeline_configurations("update", pl) return pl @@ -89,8 +126,12 @@ async def update_pipeline(name, request: PipelineCreateIn): @pipeline_app.delete(path="/v1/settings/pipelines/{name}") async def remove_pipeline(name): try: + pl = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(name) + for _, agent in ctx.agentmgr.get_agents().items(): + if pl.idx == agent.pipeline_idx: + raise Exception(f"Please cancel the {agent.name}'s agent associated with the current pipeline first") res = ctx.get_pipeline_mgr().remove_pipeline_by_name_or_id(name) - save_pipeline_to_file() + await save_pipeline_configurations("delete", pl) return res except Exception as e: raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) @@ -102,30 +143,37 @@ async def upload_file(file: UploadFile = File(...)): content = await file.read() request = json.loads(content) pipeline_req = PipelineCreateIn(**request) - return load_pipeline(pipeline_req) + pl = await load_pipeline(pipeline_req) + await save_pipeline_configurations("add", pl) + return pl -def load_pipeline(request): +async def load_pipeline(request): pl = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(request.name) if pl is None: pipeline_json = request.model_dump_json() - pl = ctx.get_pipeline_mgr().create_pipeline(request.name, pipeline_json) + if request.idx is not None: + pl = ctx.get_pipeline_mgr().create_pipeline(request, pipeline_json) + else: + pl = ctx.get_pipeline_mgr().create_pipeline(request.name, pipeline_json) active_pl = ctx.get_pipeline_mgr().get_active_pipeline() if pl == active_pl and request.active: - raise HTTPException(status_code=status.HTTP_423_LOCKED, detail="Unable to patch an active pipeline...") + raise HTTPException( + status_code=status.HTTP_423_LOCKED, + detail="Unable to patch an active pipeline...", + ) try: - update_pipeline_handler(pl, request) - save_pipeline_to_file() + await update_pipeline_handler(pl, request) except (ValueError, Exception) as e: ctx.get_pipeline_mgr().remove_pipeline_by_name_or_id(request.name) raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) return pl -def update_pipeline_handler(pl, req): +async def update_pipeline_handler(pl, req): active_kb = ctx.knowledgemgr.get_active_knowledge_base() active_pipeline = ctx.get_pipeline_mgr().get_active_pipeline() - kb_name = active_kb.name if active_kb else "default_kb" + kb_name = active_kb.name if active_kb else "default" if req.node_parser is not None: np = req.node_parser @@ -153,12 +201,10 @@ def update_pipeline_handler(pl, req): pl.node_parser = SWindowNodeParser.from_defaults(window_size=np.window_size) case NodeParserType.UNSTRUCTURED: pl.node_parser = UnstructedNodeParser(chunk_size=np.chunk_size, chunk_overlap=np.chunk_overlap) + case NodeParserType.KBADMINPARSER: + pl.node_parser = KBADMINParser() ctx.get_node_parser_mgr().add(pl.node_parser) - all_docs = ctx.get_file_mgr().get_all_docs() - nodelist = pl.node_parser.run(docs=all_docs) - if nodelist is not None and len(nodelist) > 0: - ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist) pl._node_changed = True if req.indexer is not None: @@ -168,17 +214,24 @@ def update_pipeline_handler(pl, req): pl.indexer = found_indexer else: embed_model = None - if ind.embedding_model: - embed_model = ctx.get_model_mgr().search_model(ind.embedding_model) - if embed_model is None: - ind.embedding_model.model_type = ModelType.EMBEDDING - embed_model = ctx.get_model_mgr().load_model(ind.embedding_model) - ctx.get_model_mgr().add(embed_model) match ind.indexer_type: case IndexerType.DEFAULT_VECTOR | IndexerType.FAISS_VECTOR | IndexerType.MILVUS_VECTOR: + if ind.embedding_model: + embed_model = ctx.get_model_mgr().search_model(ind.embedding_model) + if embed_model is None: + ind.embedding_model.model_type = ModelType.EMBEDDING + embed_model = ctx.get_model_mgr().load_model(ind.embedding_model) + ctx.get_model_mgr().add(embed_model) # TODO: **RISK** if considering 2 pipelines with different # nodes, but same indexer, what will happen? - pl.indexer = VectorIndexer(embed_model, ind.indexer_type, ind.vector_uri, kb_name) + pl.indexer = VectorIndexer(embed_model, ind.indexer_type, ind.vector_url, kb_name) + case IndexerType.KBADMIN_INDEXER: + kbadmin_embedding_url = ind.embedding_url + KBADMIN_VECTOR_URL = ind.vector_url + embed_model = ind.embedding_model.model_id + pl.indexer = KBADMINIndexer( + embed_model, ind.indexer_type, kbadmin_embedding_url, KBADMIN_VECTOR_URL + ) case _: pass ctx.get_indexer_mgr().add(pl.indexer) @@ -208,6 +261,8 @@ def update_pipeline_handler(pl, req): pl.retriever = SimpleBM25Retriever(pl.indexer, similarity_top_k=retr.retrieve_topk) else: return Exception("No indexer") + case RetrieverType.KBADMIN_RETRIEVER: + pl.retriever = KBadminRetriever(pl.indexer, similarity_top_k=retr.retrieve_topk) case _: pass # Index is updated to retriever @@ -252,9 +307,12 @@ def update_pipeline_handler(pl, req): ctx.get_model_mgr().add(model) # Use weakref to achieve model deletion and memory release model_ref = weakref.ref(model) - pl.generator = QnAGenerator( - model_ref, gen.prompt_path, gen.inference_type, gen.vllm_endpoint, gen.prompt_content - ) + if gen.generator_type == GeneratorType.CHATQNA: + pl.generator = QnAGenerator( + model_ref, gen.prompt_path, gen.inference_type, gen.vllm_endpoint, gen.prompt_content + ) + elif gen.generator_type == GeneratorType.FREECHAT: + pl.generator = FreeChatGenerator(model_ref, gen.inference_type, gen.vllm_endpoint) if pl.enable_benchmark: if "tokenizer" not in locals() or tokenizer is None: _, tokenizer, bench_hook = ctx.get_model_mgr().load_model_ben(gen.model) @@ -268,73 +326,49 @@ def update_pipeline_handler(pl, req): ctx.get_pipeline_mgr().activate_pipeline(pl.name, req.active, ctx.get_node_mgr(), kb_name) # Create and set up a separate event loop to run asynchronous tasks in threads - def run_async_task(): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - loop.run_until_complete(Synchronizing_vector_data(active_pipeline, pl)) - except Exception as e: - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Synchronization error: {e}") - finally: - loop.close() - - with ThreadPoolExecutor(max_workers=1) as executor: - future = executor.submit(run_async_task) - future.result() + if req.active: + await Synchronizing_vector_data(active_pipeline, pl) return pl # Restore pipeline configuration -def load_pipeline_from_file(): - CONFIG_DIR = "/home/user/ui_cache/configs" - PIPELINE_FILE = os.path.join(CONFIG_DIR, "pipeline.json") - if os.path.exists(PIPELINE_FILE): - with open(PIPELINE_FILE, "r", encoding="utf-8") as f: - all_pipelines = f.read() - try: - all_da = json.loads(all_pipelines) - for pipeline_data in all_da: - one_pipelinejson = json.loads(pipeline_data) - pipeline_req = PipelineCreateIn(**one_pipelinejson) - load_pipeline(pipeline_req) - except Exception as e: - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) - - -# Configuration of the persistence pipeline -def save_pipeline_to_file(): - CONFIG_DIR = "/home/user/ui_cache/configs" - PIPELINE_FILE = os.path.join(CONFIG_DIR, "pipeline.json") - - if not os.path.exists(CONFIG_DIR): - os.makedirs(CONFIG_DIR, exist_ok=True) +async def restore_pipeline_configurations(): + milvus_repo = MilvusConfigRepository.create_connection("pipeline_config", 20) + all_pipelines = [] + if milvus_repo: + time.sleep(10) + all_pipelines_repo = milvus_repo.get_configs() + for pipeline in all_pipelines_repo: + all_pipelines.append(pipeline.get("config_json")) + else: + if os.path.exists(PIPELINE_FILE): + with open(PIPELINE_FILE, "r", encoding="utf-8") as f: + all_pipelines = f.read() + if all_pipelines: + all_pipelines = json.loads(all_pipelines) try: - pipelines_data = ctx.get_pipeline_mgr().get_pipelines() - all_pipeline_json = [] - for pipeline in pipelines_data: - all_pipeline_json.append(pipeline.get_pipeline_json) - json_str = json.dumps(all_pipeline_json, indent=2, ensure_ascii=False) - with open(PIPELINE_FILE, "w", encoding="utf-8") as f: - f.write(json_str) + for pipeline_data in all_pipelines: + pipeline_req = PipelineCreateIn(**pipeline_data) + await load_pipeline(pipeline_req) except Exception as e: - print(f"Error saving pipelines: {e}") + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) # Detecting if milvus is connected @pipeline_app.post(path="/v1/check/milvus") async def check_milvus(request: MilvusConnectRequest): - vector_uri = request.vector_uri + vector_url = request.vector_url try: - if vector_uri.startswith("http://"): - host_port = vector_uri.replace("http://", "") - elif vector_uri.startswith("https://"): - host_port = vector_uri.replace("https://", "") + if vector_url.startswith("http://"): + host_port = vector_url.replace("http://", "") + elif vector_url.startswith("https://"): + host_port = vector_url.replace("https://", "") else: - host_port = vector_uri + host_port = vector_url host, port = host_port.split(":", 1) - connections.connect(alias="default", host=host, port=port) + connections.connect(alias="knowledge_default", host=host, port=port) - if connections.has_connection("default"): + if connections.has_connection("knowledge_default"): return {"status": "200", "message": "Milvus connection successful."} else: return {"status": "404", "message": "Milvus connection failed."} diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/prompt.py b/EdgeCraftRAG/edgecraftrag/api/v1/prompt.py index 86639a40a7..51ed5be6d4 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/prompt.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/prompt.py @@ -1,8 +1,10 @@ # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +from edgecraftrag.api.v1.pipeline import save_pipeline_configurations from edgecraftrag.api_schema import PromptIn from edgecraftrag.context import ctx +from edgecraftrag.utils import DEFAULT_TEMPLATE from fastapi import FastAPI, File, HTTPException, UploadFile, status prompt_app = FastAPI() @@ -12,11 +14,13 @@ @prompt_app.post(path="/v1/chatqna/prompt-file") async def load_prompt_file(file: UploadFile = File(...)): try: - generator = ctx.get_pipeline_mgr().get_active_pipeline().generator + pl = ctx.get_pipeline_mgr().get_active_pipeline() + generator = pl.generator if generator: content = await file.read() prompt_str = content.decode("utf-8") generator.set_prompt(prompt_str) + await save_pipeline_configurations("update", pl) return "Set LLM Prompt Successfully" except Exception as e: raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) @@ -26,10 +30,12 @@ async def load_prompt_file(file: UploadFile = File(...)): @prompt_app.post(path="/v1/chatqna/prompt") async def load_prompt(request: PromptIn): try: - generator = ctx.get_pipeline_mgr().get_active_pipeline().generator + pl = ctx.get_pipeline_mgr().get_active_pipeline() + generator = pl.generator if generator: prompt_str = request.prompt generator.set_prompt(prompt_str) + await save_pipeline_configurations("update", pl) return "Set LLM Prompt Successfully" except Exception as e: raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) @@ -38,14 +44,30 @@ async def load_prompt(request: PromptIn): # Get prompt of LLM ChatQnA @prompt_app.get(path="/v1/chatqna/prompt") async def get_prompt(): + try: + generator = ctx.get_pipeline_mgr().get_active_pipeline().generator + if generator: + return generator.original_template + except Exception as e: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + +@prompt_app.get(path="/v1/chatqna/prompt/tagged") +async def get_tagged_prompt(): try: generator = ctx.get_pipeline_mgr().get_active_pipeline().generator if generator: return generator.prompt + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Tagged prompt not found") except Exception as e: raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) +@prompt_app.get(path="/v1/chatqna/prompt/default") +async def get_default_prompt(): + return DEFAULT_TEMPLATE + + # Reset prompt for LLM ChatQnA @prompt_app.post(path="/v1/chatqna/prompt/reset") async def reset_prompt(): diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/session.py b/EdgeCraftRAG/edgecraftrag/api/v1/session.py new file mode 100644 index 0000000000..d2427f7334 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/api/v1/session.py @@ -0,0 +1,19 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from edgecraftrag.api_schema import SessionIn +from edgecraftrag.context import ctx +from fastapi import FastAPI + +session_app = FastAPI() + + +@session_app.get("/v1/sessions") +def get_all_sessions(): + return ctx.get_session_mgr().get_all_sessions() + + +@session_app.get("/v1/session/{idx}") +def get_session_by_id(idx: str): + content = ctx.get_session_mgr().get_session_by_id(idx) + return {"session_id": idx, "session_content": content} diff --git a/EdgeCraftRAG/edgecraftrag/api_schema.py b/EdgeCraftRAG/edgecraftrag/api_schema.py index d7ae1c8478..b57b2da604 100644 --- a/EdgeCraftRAG/edgecraftrag/api_schema.py +++ b/EdgeCraftRAG/edgecraftrag/api_schema.py @@ -1,8 +1,9 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Optional +from typing import Any, Dict, Optional +from edgecraftrag import base from pydantic import BaseModel @@ -12,6 +13,7 @@ class ModelIn(BaseModel): model_path: Optional[str] = "./" weight: Optional[str] = "INT4" device: Optional[str] = "cpu" + api_base: Optional[str] = None class NodeParserIn(BaseModel): @@ -25,7 +27,8 @@ class NodeParserIn(BaseModel): class IndexerIn(BaseModel): indexer_type: str embedding_model: Optional[ModelIn] = None - vector_uri: Optional[str] = None + embedding_url: Optional[str] = None + vector_url: Optional[str] = None class RetrieverIn(BaseModel): @@ -40,6 +43,7 @@ class PostProcessorIn(BaseModel): class GeneratorIn(BaseModel): + generator_type: str prompt_path: Optional[str] = None prompt_content: Optional[str] = None model: Optional[ModelIn] = None @@ -48,6 +52,7 @@ class GeneratorIn(BaseModel): class PipelineCreateIn(BaseModel): + idx: Optional[str] = None name: Optional[str] = None node_parser: Optional[NodeParserIn] = None indexer: Optional[IndexerIn] = None @@ -55,6 +60,7 @@ class PipelineCreateIn(BaseModel): postprocessor: Optional[list[PostProcessorIn]] = None generator: Optional[GeneratorIn] = None active: Optional[bool] = False + documents_cache: Optional[Dict] = None class DataIn(BaseModel): @@ -77,10 +83,35 @@ class PromptIn(BaseModel): class KnowledgeBaseCreateIn(BaseModel): + idx: Optional[str] = None name: str description: Optional[str] = None active: Optional[bool] = None + comp_type: Optional[str] = "knowledge" + comp_subtype: Optional[str] = "origin_kb" + experience_active: Optional[bool] = None + all_document_maps: Optional[Dict] = None + file_paths: Optional[list] = None + + +class ExperienceIn(BaseModel): + idx: Optional[str] = None + question: Optional[str] = None + content: list[str] = None class MilvusConnectRequest(BaseModel): - vector_uri: str + vector_url: str + + +class AgentCreateIn(BaseModel): + idx: Optional[str] = None + name: Optional[str] = "" + type: Optional[base.AgentType] = None + pipeline_idx: Optional[str] = None + configs: Optional[dict] = None + active: Optional[bool] = False + + +class SessionIn(BaseModel): + idx: Optional[str] = None diff --git a/EdgeCraftRAG/edgecraftrag/base.py b/EdgeCraftRAG/edgecraftrag/base.py index db1dc414b8..195302451c 100644 --- a/EdgeCraftRAG/edgecraftrag/base.py +++ b/EdgeCraftRAG/edgecraftrag/base.py @@ -4,7 +4,7 @@ import abc import uuid from enum import Enum -from typing import Any, Callable, List, Optional +from typing import Any, Optional from pydantic import BaseModel, ConfigDict, Field, model_serializer @@ -19,7 +19,12 @@ class CompType(str, Enum): RETRIEVER = "retriever" POSTPROCESSOR = "postprocessor" GENERATOR = "generator" + QUERYSEARCH = "querysearch" FILE = "file" + CHUNK_NUM = "chunk_num" + KNOWLEDGE = "knowledge" + AGENT = "agent" + SESSION = "session" class ModelType(str, Enum): @@ -28,6 +33,7 @@ class ModelType(str, Enum): RERANKER = "reranker" LLM = "llm" VLLM = "vllm" + VLLM_EMBEDDING = "vllm_embedding" class FileType(str, Enum): @@ -44,6 +50,7 @@ class NodeParserType(str, Enum): HIERARCHY = "hierarchical" SENTENCEWINDOW = "sentencewindow" UNSTRUCTURED = "unstructured" + KBADMINPARSER = "kbadmin_parser" class IndexerType(str, Enum): @@ -51,6 +58,7 @@ class IndexerType(str, Enum): FAISS_VECTOR = "faiss_vector" DEFAULT_VECTOR = "vector" MILVUS_VECTOR = "milvus_vector" + KBADMIN_INDEXER = "kbadmin_indexer" class RetrieverType(str, Enum): @@ -58,6 +66,7 @@ class RetrieverType(str, Enum): VECTORSIMILARITY = "vectorsimilarity" AUTOMERGE = "auto_merge" BM25 = "bm25" + KBADMIN_RETRIEVER = "kbadmin_retriever" class PostProcessorType(str, Enum): @@ -69,6 +78,7 @@ class PostProcessorType(str, Enum): class GeneratorType(str, Enum): CHATQNA = "chatqna" + FREECHAT = "freechat" class InferenceType(str, Enum): @@ -81,7 +91,18 @@ class CallbackType(str, Enum): DATAPREP = "dataprep" RETRIEVE = "retrieve" + RETRIEVE_POSTPROCESS = "retrieve_postprocess" + POSTPROCESS = "postprocess" + GENERATE = "generate" PIPELINE = "pipeline" + RUNAGENT = "run_agent" + QUERYSEARCH = "query_search" + + +class AgentType(str, Enum): + + SIMPLE = "simple" + DEEPSEARCH = "deep_search" class BaseComponent(BaseModel): @@ -113,9 +134,19 @@ class BaseMgr: def __init__(self): self.components = {} - def add(self, comp: BaseComponent): + def add(self, comp: BaseComponent, name: str = None): + if name: + self.components[name] = comp + return True self.components[comp.idx] = comp + def append(self, comp: BaseComponent, name: str = None): + key = name if name else comp.idx + if key not in self.components: + self.components[key] = [] + self.components[key].append(comp) + return True + def get(self, idx: str) -> BaseComponent: if idx in self.components: return self.components[idx] diff --git a/EdgeCraftRAG/edgecraftrag/components/agent.py b/EdgeCraftRAG/edgecraftrag/components/agent.py new file mode 100644 index 0000000000..fd6c4ff7a2 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/agent.py @@ -0,0 +1,114 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +from abc import abstractmethod + +from comps.cores.proto.api_protocol import ChatCompletionRequest +from edgecraftrag.base import BaseComponent, CallbackType, CompType +from edgecraftrag.components.agents.utils import remove_think_tags +from edgecraftrag.utils import stream_generator +from langgraph.config import get_stream_writer +from pydantic import model_serializer + + +class Agent(BaseComponent): + + def __init__(self, name, agent_type, pipeline_idx, configs): + super().__init__(name=name, comp_type=CompType.AGENT, comp_subtype=agent_type) + if self.name == "" or self.name is None: + self.name = self.idx + self.enable_benchmark = os.getenv("ENABLE_BENCHMARK", "False").lower() == "true" + self.pipeline_idx = pipeline_idx + self.manager = None + self.configs = configs + + @classmethod + @abstractmethod + def get_default_configs(cls): + pass + + def get_bound_pipeline(self): + if self.manager is not None: + pl = self.manager.get_pipeline_by_name_or_id(self.pipeline_idx) + return pl + raise ValueError("No pipeline bound or bound pipeline not found") + + def get_active_knowledge_base(self): + if self.manager is not None: + kb = self.manager.get_active_knowledge_base() + return kb + return None + + async def llm_generate(self, request: ChatCompletionRequest, streaming): + request.stream = streaming + request.messages = self._messages + response = await self._run_pipeline_generate(request) + return response + + async def llm_generate_astream_writer(self, request, prefix=None, suffix=None) -> str: + response = "" + writer = get_stream_writer() + first = True + generator = await self.llm_generate(request, True) + async for chunk in generator: + if first and prefix: + writer(prefix + chunk) + first = False + else: + writer(chunk) + response += chunk + if suffix: + writer(suffix) + response = remove_think_tags(response) + return response + + # wrappers for calling pipeline + async def run_pipeline_chatqna(self, request): + pl = self.get_bound_pipeline() + if pl is not None: + return await pl.run(cbtype=CallbackType.PIPELINE, chat_request=request) + + async def _run_pipeline_generate(self, request): + pl = self.get_bound_pipeline() + if pl is not None: + return await pl.run(cbtype=CallbackType.GENERATE, chat_request=request) + + async def run_pipeline_retrieve_and_rerank(self, request): + pl = self.get_bound_pipeline() + if pl is not None: + return await pl.run(cbtype=CallbackType.RETRIEVE_POSTPROCESS, chat_request=request) + + async def run_pipeline_retrieve(self, request): + pl = self.get_bound_pipeline() + if pl is not None: + return await pl.run(cbtype=CallbackType.RETRIEVE, chat_request=request) + + async def run_pipeline_rerank(self, request, contexts): + pl = self.get_bound_pipeline() + if pl is not None: + return await pl.run(cbtype=CallbackType.POSTPROCESS, chat_request=request, contexts=contexts) + + async def run_pipeline_query_search(self, request): + pl = self.get_bound_pipeline() + if pl is not None: + return await pl.run(cbtype=CallbackType.QUERYSEARCH, chat_request=request) + + @model_serializer + def ser_model(self): + isactive = True if self.idx == self.manager.get_active_agent_id() else False + set = { + "idx": self.idx, + "name": self.name, + "type": self.comp_subtype, + "pipeline_idx": self.pipeline_idx, + "configs": self.configs, + "active": isactive, + } + return set + + +async def stream_writer(input): + writer = get_stream_writer() + async for chunk in stream_generator(input): + writer(chunk) diff --git a/EdgeCraftRAG/edgecraftrag/components/agents/__init__.py b/EdgeCraftRAG/edgecraftrag/components/agents/__init__.py new file mode 100644 index 0000000000..4057dc0163 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/agents/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/__init__.py b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/__init__.py new file mode 100644 index 0000000000..4057dc0163 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 diff --git a/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/cfgs/default.json b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/cfgs/default.json new file mode 100644 index 0000000000..98afc83cb3 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/cfgs/default.json @@ -0,0 +1,24 @@ +{ + "system_instruction": "As an expert AI assistant for TCB Bonder systems, your goal is to provide accurate solutions. Analyze the user's question, create a retrieval plan, gather information, and synthesize a step-by-step answer. Follow all instructions.", + "plan_instruction": "To maximize retrieval recall, create a multi-step query plan. First, deconstruct the user's question into its core components and symptoms. Then, generate hypotheses about the potential root causes. Finally, create a numbered list of 2-5 queries to investigate these hypotheses.\n\n* **Step 1 (Rephrase and Broaden):** Start with a comprehensive query that rephrases the user's question, including synonyms and alternative phrasings to ensure broad initial coverage.\n* **Subsequent Steps (Hypothesis Testing):** Each following query should be a targeted, self-contained question designed to confirm or deny a specific hypothesis. These queries must include precise technical terms, component names, and potential error codes to retrieve the most relevant documents.\n\nYour final output must be only the numbered list of queries.", + "query_instruction": "After each retrieval, evaluate if you have enough information to solve the problem. If not, and if your plan has more steps, formulate the next query. This query must be a concise, targeted sub-question with precise keywords to fill a specific knowledge gap. Do not use prefixes like 'Query:'./no_think", + "answer_instruction": "As a TCB Bonder expert, synthesize the retrieved information into a final, actionable answer for the user.\n\n**User's Question:**\n{question}\n\n**Retrieved Information:**\n{plan_with_information}\n\n**Your Task:**\n1. **Synthesize and Filter:** Review all retrieved context, using only the most relevant information to address the user's problem.\n2. **Structure and Format:** Organize the solution into a clear, step-by-step guide. Present it as a numbered or bulleted list, highlighting any warnings at the beginning./no_think", + "domain_knowledge": "", + "prompt_templates": { + "system": "{system_instruction}\n\n{query_instruction}\n\n{domain_knowledge}\n\n{experiences}\n", + "generate_query": "Now generate a query for the next retrieval./no_think", + "make_plan": "Now generate a plan based on the user's question above. \n\n{plan_instruction}\n\nFormat the plan as a (Python) list containing the ordered steps, each step is a string./no_think", + "plan": "The following is the plan to step by step retrieve knowledge needed and work out an answer to user's question:\n{plan_steps}\n", + "plan_step": "Step {num}: {step}.", + "context": "\n{context}\n\n", + "contexts": "The following are the retrieved contexts for current query.\n{contexts}\n", + "continue_decision": "Is more information needed? Answer Yes or No. Then explain why or why not.", + "experiences": "The following are question-plan examples by human experts. Refer to them to better make your plan. If you find that there is a question that is highly similar or exactly match the input question, then strictly follow the subquestions to make the plan.\n\n{experiences}\n" + }, + + "retrieve_top_k": 60, + "rerank_top_k": 3, + "mece_retrieval": true, + "max_retrievals": 3, + "max_plan_steps": 3 +} diff --git a/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/cfgs/tcb_demo_v8_rs_v3.json b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/cfgs/tcb_demo_v8_rs_v3.json new file mode 100644 index 0000000000..914924a59c --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/cfgs/tcb_demo_v8_rs_v3.json @@ -0,0 +1,25 @@ +{ + "system_instruction": "As an expert AI assistant for TCB Bonder systems, your goal is to provide accurate solutions. Analyze the user's question, create a retrieval plan, gather information, and synthesize a step-by-step answer. Follow all instructions.", + "plan_instruction": "To maximize retrieval recall, create a multi-step query plan. First, deconstruct the user's question into its core components and symptoms. Then, generate hypotheses about the potential root causes. Finally, create a numbered list of 2-5 queries to investigate these hypotheses.\n\n* **Step 1 (Rephrase and Broaden):** Start with a comprehensive query that rephrases the user's question, including synonyms and alternative phrasings to ensure broad initial coverage.\n* **Subsequent Steps (Hypothesis Testing):** Each following query should be a targeted, self-contained question designed to confirm or deny a specific hypothesis. These queries must include precise technical terms, component names, and potential error codes to retrieve the most relevant documents.\n\nYour final output must be only the numbered list of queries.", + "query_instruction": "After each retrieval, evaluate if you have enough information to solve the problem. If not, and if your plan has more steps, formulate the next query. This query must be a concise, targeted sub-question with precise keywords to fill a specific knowledge gap. Do not use prefixes like 'Query:'.", + "answer_instruction": "As a TCB Bonder expert, your task is to synthesize the raw, unfiltered retrieved documents into a final, actionable answer for the user.\n\n**User's Question:**\n{question}\n\n**Retrieved Information:**\n{plan_with_information}\n\n**Your Task:**\n1. **Synthesize and Filter:** The retrieved information consists of raw, and potentially noisy, document chunks. Critically evaluate the relevance and accuracy of all retrieved context. You must filter out irrelevant, redundant, or contradictory information to distill only the most pertinent facts for solving the user's problem.\n2. **Structure and Format:** Organize the solution into a clear, step-by-step guide. Present it as a numbered or bulleted list, highlighting any warnings at the beginning. Your answer must be based *only* on the provided retrieved information.", + "recur_summarize_instruction": "Now, identify the useful context (and ignore the irrelevant text) from the previous search steps and summarize the search process in a concise manner.", + "domain_knowledge": "", + "prompt_templates": { + "system": "{system_instruction}\n\n{query_instruction}\n\n{domain_knowledge}\n\n{experiences}\n", + "generate_query": "Now generate a query for the next retrieval.", + "make_plan": "Now generate a plan based on the user's question above. \n\n{plan_instruction}\n\nFormat the plan as a (Python) list containing the ordered steps, each step is a string.", + "plan": "The following is the plan to step by step retrieve knolwedge needed and work out a answer to user's question:\n{plan_steps}\n", + "plan_step": "Step {num}: {step}.", + "context": "\n{context}\n\n", + "contexts": "The following are the retrieved contexts for current query.\n{contexts}\n", + "continue_decision": "Is more information needed? Answer Yes or No. Then explain why or why not.", + "experiences": "The following are question-plan examples by human experts. Refer to them to better make your plan. If you find that there is a question that is highly similar or exactly match the input question, then strictly follow the subquestions to make the plan.\n\n{experiences}\n" + }, + + "retrieve_top_k": 60, + "rerank_top_k": 3, + "mece_retrieval": true, + "max_retrievals": 3, + "max_plan_steps": 6 +} diff --git a/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/cfgs/tcb_demo_v8_rss_v3.json b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/cfgs/tcb_demo_v8_rss_v3.json new file mode 100644 index 0000000000..b236df0a27 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/cfgs/tcb_demo_v8_rss_v3.json @@ -0,0 +1,26 @@ +{ + "system_instruction": "As an expert AI assistant for TCB Bonder systems, your goal is to provide accurate solutions. Analyze the user's question, create a retrieval plan, gather information, and synthesize a step-by-step answer. Follow all instructions.", + "plan_instruction": "To maximize retrieval recall, create a multi-step query plan. First, deconstruct the user's question into its core components and symptoms. Then, generate hypotheses about the potential root causes. Finally, create a numbered list of 2-5 queries to investigate these hypotheses.\n\n* **Step 1 (Rephrase and Broaden):** Start with a comprehensive query that rephrases the user's question, including synonyms and alternative phrasings to ensure broad initial coverage.\n* **Subsequent Steps (Hypothesis Testing):** Each following query should be a targeted, self-contained question designed to confirm or deny a specific hypothesis. These queries must include precise technical terms, component names, and potential error codes to retrieve the most relevant documents.\n\nYour final output must be only the numbered list of queries.", + "query_instruction": "After each retrieval, evaluate if you have enough information to solve the problem. If not, and if your plan has more steps, formulate the next query. This query must be a concise, targeted sub-question with precise keywords to fill a specific knowledge gap. Do not use prefixes like 'Query:'.", + "answer_instruction": "As a TCB Bonder expert, your task is to synthesize the pre-processed, summarized information into a final, actionable answer for the user.\n\n**User's Question:**\n{question}\n\n**Retrieved Information:**\n{plan_with_information}\n\n**Your Task:**\n1. **Synthesize and Filter:** The retrieved information consists of concise summaries from multiple retrieval steps. Your primary goal is to integrate these summaries into a single, coherent response. While the information is pre-filtered, you must still identify and use only the most relevant details to address the user's specific problem.\n2. **Structure and Format:** Organize the solution into a clear, step-by-step guide. Present it as a numbered or bulleted list, highlighting any warnings at the beginning. Your answer must be based *only* on the provided summarized information.", + "recur_summarize_instruction": "Now, identify the useful context (and ignore the irrelevant text) from the previous search steps and summarize the search process in a concise manner.", + "domain_knowledge": "", + "prompt_templates": { + "system": "{system_instruction}\n\n{query_instruction}\n\n{domain_knowledge}\n\n{experiences}\n", + "generate_query": "Now generate a query for the next retrieval.", + "make_plan": "Now generate a plan based on the user's question above. \n\n{plan_instruction}\n\nFormat the plan as a (Python) list containing the ordered steps, each step is a string.", + "plan": "The following is the plan to step by step retrieve knolwedge needed and work out a answer to user's question:\n{plan_steps}\n", + "plan_step": "Step {num}: {step}.", + "context": "\n{context}\n\n", + "contexts": "The following are the retrieved contexts for current query.\n{contexts}\n", + "continue_decision": "Is more information needed? Answer Yes or No. Then explain why or why not.", + "experiences": "The following are question-plan examples by human experts. Refer to them to better make your plan. If you find that there is a question that is highly similar or exactly match the input question, then strictly follow the subquestions to make the plan.\n\n{experiences}\n" + }, + + "retrieve_top_k": 60, + "rerank_top_k": 3, + "mece_retrieval": true, + "max_retrievals": 3, + "max_plan_steps": 6, + "use_summarized_context": true +} diff --git a/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/config.py b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/config.py new file mode 100644 index 0000000000..1c40ed023c --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/config.py @@ -0,0 +1,86 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""Configuration models and helpers for Mini Deep Search.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Dict + +from pydantic import BaseModel, Field + + +class PromptTemplates(BaseModel): + """Collection of prompt templates used by the DeepSearch workflow.""" + + system: str = Field(..., description="Template for the system prompt.") + generate_query: str = Field(..., description="Instruction for generating the next retrieval query.") + make_plan: str = Field(..., description="Instruction for constructing the retrieval plan.") + plan: str = Field(..., description="Format string for presenting the plan back to the model.") + plan_step: str = Field(..., description="Template used for each individual plan step.") + context: str = Field(..., description="Template for wrapping a single context chunk.") + contexts: str = Field(..., description="Template for presenting all contexts for evaluation.") + continue_decision: str = Field(..., description="Instruction asking the model whether more retrieval is needed.") + experiences: str = Field(..., description="Template used when experience search results are available.") + + +class Config(BaseModel): + """Runtime configuration for the Mini Deep Search pipeline.""" + + system_instruction: str + plan_instruction: str = "" + query_instruction: str + answer_instruction: str + domain_knowledge: str + retrieve_top_k: int + rerank_top_k: int + mece_retrieval: bool = False + max_retrievals: int + max_plan_steps: int = 7 + recur_summarize_instruction: str = "" + postproc: str = "defaults.py" + use_summarized_context: bool = False + prompt_templates: PromptTemplates + + +def _resolve_path(value: str, base_path: Path) -> str: + """Resolve value relative to ``base_path`` if it is an existing file.""" + if not value: + return value + value_path = Path(value) + if value_path.is_absolute(): + return str(value_path) + candidate = base_path / value + return str(candidate) if candidate.exists() else value + + +def load_config(config_path: str) -> Config: + """Load and normalise a configuration file. + + Args: + config_path: Path to the configuration JSON. + + Returns: + A fully-populated :class:`Config` instance. + """ + + config_file = Path(config_path).expanduser().resolve() + with config_file.open("r", encoding="utf-8") as handle: + config_dict: Dict[str, Any] = json.load(handle) + + base_dir = config_file.parent + + # Resolve relative paths where applicable. + for key in ("domain_knowledge", "postproc"): + if key in config_dict and isinstance(config_dict[key], str): + config_dict[key] = _resolve_path(config_dict[key], base_dir) + + cfg = Config(**config_dict) + + # Expand domain knowledge file lazily if it points to a file. + domain_path = Path(cfg.domain_knowledge) + if domain_path.exists() and domain_path.is_file(): + cfg.domain_knowledge = domain_path.read_text(encoding="utf-8") + + return cfg diff --git a/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/deep_search.py b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/deep_search.py new file mode 100644 index 0000000000..ada6713be1 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/deep_search.py @@ -0,0 +1,628 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""Core DeepSearch implementation.""" + +from __future__ import annotations + +import asyncio +import os +from typing import Any, List, Tuple + +from comps.cores.proto.api_protocol import ChatCompletionRequest +from edgecraftrag.base import AgentType, CallbackType, CompType +from edgecraftrag.components.agent import Agent, stream_writer +from langgraph.graph import END, START, StateGraph +from pydantic import BaseModel, Field + +from .config import load_config +from .logging_utils import format_terminal_str, log_status +from .postprocessing import postproc_answer as default_postproc_answer +from .postprocessing import postproc_plan as default_postproc_plan +from .postprocessing import postproc_query as default_postproc_query +from .utils import Role, import_module_from_path + +DEFAULT_CONFIG = "./edgecraftrag/components/agents/deep_search/cfgs/default.json" + + +class Retrieval(BaseModel): + step: str + query: str + retrieved: List[Any] = Field(...) + reranked: List[Any] = Field(...) + + +class DeepSearchState(BaseModel): + question: str + query: str + step: int + num_retrievals: int + answer: str + + plan: List[str] = Field(default_factory=list) + retrievals: List[Retrieval] = Field(default_factory=list) + context_chunk_ids: List[str] = Field(default_factory=list) + search_summaries: List[str] = Field(default_factory=list) + + request: ChatCompletionRequest + + +class DeepSearchAgent(Agent): + """Driver class orchestrating the deep search workflow.""" + + def __init__(self, idx, name, pipeline_idx, cfg): + super().__init__(name=name, agent_type=AgentType.DEEPSEARCH, pipeline_idx=pipeline_idx, configs=cfg) + + # Load the configuration + # TODO: remove deep path + self.cfg = load_config(DEFAULT_CONFIG) + if idx is not None: + self.idx = idx + if "retrieve_top_k" in cfg: + self.cfg.retrieve_top_k = cfg["retrieve_top_k"] + if "rerank_top_k" in cfg: + self.cfg.rerank_top_k = cfg["rerank_top_k"] + if "mece_retrieval" in cfg: + self.cfg.mece_retrieval = cfg["mece_retrieval"] + if "max_retrievals" in cfg: + self.cfg.max_retrievals = cfg["max_retrievals"] + if "max_plan_steps" in cfg: + self.cfg.max_plan_steps = cfg["max_plan_steps"] + + self.graph = self._build_graph() + self._messages: List[dict] = [] + self.conversation_history: List[dict] = [] + + postproc_module = None + if self.cfg.postproc: + try: + postproc_module = import_module_from_path(self.cfg.postproc) + except ImportError as exc: + log_status( + "⚠️", + format_terminal_str( + f"Failed to import postproc module '{self.cfg.postproc}': {exc}", color="yellow" + ), + ) + postproc_module = postproc_module or None + self.postproc_query = getattr(postproc_module, "postproc_query", default_postproc_query) + self.postproc_answer = getattr(postproc_module, "postproc_answer", default_postproc_answer) + self.postproc_plan = getattr(postproc_module, "postproc_plan", default_postproc_plan) + + @classmethod + def get_default_configs(cls): + cfg = load_config(DEFAULT_CONFIG) + return { + "retrieve_top_k": cfg.retrieve_top_k, + "rerank_top_k": cfg.rerank_top_k, + "mece_retrieval": cfg.mece_retrieval, + "max_retrievals": cfg.max_retrievals, + "max_plan_steps": cfg.max_plan_steps, + } + + def update(self, cfg): + retrieve = cfg.get("retrieve_top_k", None) + if retrieve and isinstance(retrieve, int): + self.cfg.retrieve_top_k = retrieve + self.configs["retrieve_top_k"] = retrieve + + rerank = cfg.get("rerank_top_k", None) + if rerank and isinstance(rerank, int): + self.cfg.rerank_top_k = rerank + self.configs["rerank_top_k"] = rerank + + mr = cfg.get("mece_retrieval", None) + if mr and isinstance(mr, int): + self.cfg.mece_retrieval = mr + self.configs["mece_retrieval"] = mr + + maxr = cfg.get("max_retrievals", None) + if maxr and isinstance(maxr, int): + self.cfg.max_retrievals = maxr + self.configs["max_retrievals"] = maxr + + mps = cfg.get("max_plan_steps", None) + if mps and isinstance(mps, int): + self.cfg.max_plan_steps = mps + self.configs["max_plan_steps"] = mps + + async def _build_init_messages(self, request: ChatCompletionRequest) -> List[dict]: + if os.path.isfile(self.cfg.domain_knowledge): + with open(self.cfg.domain_knowledge, "r", encoding="utf-8") as file: + self.cfg.domain_knowledge = file.read() + + experiences_block = "" + experience_status = True if request.tool_choice == "auto" else False + if experience_status: + log_status( + "🔍", + format_terminal_str( + "Retrieving experiences from experience knowledge base ...\n", + color="cyan", + bold=True, + ), + ) + _, query_search_result = await self.run_pipeline_query_search(request) + raw_examples = query_search_result + if isinstance(query_search_result, dict): + raw_examples = query_search_result.get("results") or "" + if isinstance(raw_examples, str): + examples = [chunk for chunk in raw_examples.split("\n\n") if chunk.strip()] + elif isinstance(raw_examples, list): + examples = [chunk for chunk in raw_examples if isinstance(chunk, str) and chunk.strip()] + else: + examples = [] + if examples: + num_retrieved = len(examples) + num_max_examples = 3 + if num_retrieved > num_max_examples: + examples = examples[:num_max_examples] + log_status( + "📚", + f"Retrieved {format_terminal_str(str(num_retrieved), color='cyan', bold=True)} similar questions from experience database.", + ) + log_status( + "⚠️", + f"Truncated to top {format_terminal_str(str(num_max_examples), color='cyan', bold=True)} examples for prompt.\n", + ) + else: + log_status( + "📚", + f"Retrieved {format_terminal_str(str(num_retrieved), color='cyan', bold=True)} similar questions from experience database.\n", + ) + experiences_block = self.cfg.prompt_templates.experiences.format(experiences="\n\n".join(examples)) + return [ + { + "role": Role.SYSTEM.value, + "content": self.cfg.prompt_templates.system.format( + system_instruction=self.cfg.system_instruction, + query_instruction=self.cfg.query_instruction, + domain_knowledge=self.cfg.domain_knowledge, + experiences=experiences_block, + ), + } + ] + + async def _retrieve_and_rerank( + self, state: DeepSearchState, mece_retrieve: bool = False + ) -> Tuple[List[Any], List[Any], List[str]]: + retrieval_query, rerank_query = self.postproc_query(state.query, state) + mece_retrieve = mece_retrieve or self.cfg.mece_retrieval + request = state.request + request.messages = retrieval_query + contexts = await self.run_pipeline_retrieve(request) + # Llamaindex NodeWithScore Structure + retrieved = contexts[CompType.RETRIEVER] + + if mece_retrieve: + new_retrieved = [node for node in retrieved if node.node_id not in state.context_chunk_ids] + # TODO: Using top_k from request, need to change? + new_retrieved = new_retrieved[: request.k] + else: + new_retrieved = retrieved + + contexts[CompType.RETRIEVER] = new_retrieved + + request = state.request + request.messages = rerank_query + contexts = await self.run_pipeline_rerank(request, contexts) + reranked = contexts[CompType.POSTPROCESSOR] + reranked_chunk_ids = [node.node_id for node in reranked] + return new_retrieved, reranked, state.context_chunk_ids + reranked_chunk_ids + + async def retrieve(self, state: DeepSearchState) -> dict: + retrieved, reranked, updated_chunk_ids = await self._retrieve_and_rerank(state) + log_status( + "🔍", + f"Retrieved {format_terminal_str(str(len(retrieved)), color='magenta', bold=True)} documents, " + f"Reranked to top {format_terminal_str(str(len(reranked)), color='magenta', bold=True)}.", + ) + await stream_writer(f"\n\n🔍 **Retrieved {str(len(retrieved))}, Reranked to top {str(len(reranked))}**\n\n") + new_retrieval = Retrieval( + step=state.plan[state.step], + query=state.query, + retrieved=retrieved, + reranked=reranked, + ) + return { + "num_retrievals": state.num_retrievals + 1, + "retrievals": [*state.retrievals, new_retrieval], + "context_chunk_ids": updated_chunk_ids, + } + + async def check_retrieved(self, state: DeepSearchState) -> str: + log_status("🤔", format_terminal_str("Evaluating if more information is needed", color="green")) + await stream_writer("\n\n🤔 **Evaluating if more information is needed**\n\n") + contexts = self.cfg.prompt_templates.contexts.format( + contexts="\n".join( + [self.cfg.prompt_templates.context.format(context=doc.text) for doc in state.retrievals[-1].reranked] + ) + ) + messages = [ + { + "role": Role.SYSTEM.value, + "content": contexts, + }, + { + "role": Role.SYSTEM.value, + "content": self.cfg.prompt_templates.continue_decision, + }, + ] + self._messages.extend(messages) + self.conversation_history.extend(messages) + if state.num_retrievals >= self.cfg.max_retrievals: + log_status( + "⚠️", + format_terminal_str( + f"Reached maximum retrievals: {self.cfg.max_retrievals}, stopping search\n", + color="yellow", + bold=True, + ), + ) + await stream_writer(f"\n\n⚠️ **Reached maximum retrievals: {self.cfg.max_retrievals}, stopping search**\n\n") + return "stop" + + response = await self.llm_generate_astream_writer(state.request) + + message = { + "role": Role.ASSISTANT.value, + "content": response, + } + self._messages.append(message) + self.conversation_history.append(message) + if response.upper().startswith("NO"): + log_status( + "✅", + format_terminal_str("Information is sufficient, moving to next step\n", color="green"), + ) + await stream_writer("\n\n✅ **Information is sufficient, moving to next step**\n\n") + return "stop" + log_status( + "🔄", + format_terminal_str("Need more information, generating new query ...", color="green"), + ) + await stream_writer("\n\n🔄 **Need more information, generating new query**\n\n") + return "continue" + + async def generate_query(self, state: DeepSearchState) -> dict: + await stream_writer("\n\n💡 **Generating a query to help to understand the question**\n\n") + message = { + "role": Role.SYSTEM.value, + "content": self.cfg.prompt_templates.generate_query, + } + self._messages.append(message) + self.conversation_history.append(message) + + response = await self.llm_generate_astream_writer(state.request) + + message = { + "role": Role.ASSISTANT.value, + "content": response, + } + self._messages.append(message) + self.conversation_history.append(message) + return {"query": response} + + async def execute_next_step(self, state: DeepSearchState) -> None: + step = state.plan[state.step] + title_str = format_terminal_str( + f"Executing Step {state.step + 1}/{len(state.plan)}:", + color="green", + bold=True, + ) + log_status("🚀", f"{title_str} {format_terminal_str(step, italic=True)}") + log_status("💡", format_terminal_str("Generating the initial query ...", color="green")) + await stream_writer(f'') + message = { + "role": Role.SYSTEM.value, + "content": f"Start to execute the step: {step}\n", + } + self._messages.append(message) + self.conversation_history.append(message) + + async def finish_search(self, state: DeepSearchState) -> dict: + await stream_writer("") + return {"step": state.step + 1, "num_retrievals": 0} + + async def check_execution(self, state: DeepSearchState) -> str: + if state.step >= len(state.plan): + log_status("🏁", format_terminal_str("All planned steps completed", color="cyan", bold=True)) + await stream_writer('') + return "stop" + return "continue" + + async def make_plan(self, state: DeepSearchState) -> dict: + log_status("📋", format_terminal_str("Making a plan ...", color="cyan", bold=True)) + await stream_writer('') + messages = [ + { + "role": Role.USER.value, + "content": state.question, + }, + { + "role": Role.SYSTEM.value, + "content": self.cfg.prompt_templates.make_plan.format(plan_instruction=self.cfg.plan_instruction), + }, + ] + self._messages.extend(messages) + self.conversation_history.extend(messages) + + response = await self.llm_generate(state.request, False) + + plan = self.postproc_plan(response, state, self.cfg) + num_plan_step = len(plan) + for i, step in enumerate(plan): + step_num_str = format_terminal_str(f"Step{i+1: >2d}:", color="green", bold=True) + step_str = format_terminal_str(step, bold=False, italic=True) + suffix = "\n" if i == num_plan_step - 1 else "" + log_status("📌", f"{step_num_str} {step_str}{suffix}") + await stream_writer(f"📌 Step{i+1: >2d}: {step}\n\n") + await stream_writer("") + plan_prompt = self.cfg.prompt_templates.plan.format( + plan_steps="\n".join( + [self.cfg.prompt_templates.plan_step.format(num=i + 1, step=step) for i, step in enumerate(plan)] + ) + ) + message = { + "role": Role.ASSISTANT.value, + "content": plan_prompt, + } + self._messages.append(message) + self.conversation_history.append(message) + return {"plan": plan, "step": 0, "num_retrievals": 0} + + async def summarize_search(self, state: DeepSearchState) -> dict: + log_status("📝", format_terminal_str("Summarizing the search process ...", color="cyan", bold=True)) + await stream_writer("📝 **Summarizing the search process**") + messages = [ + { + "role": Role.SYSTEM.value, + "content": self.cfg.recur_summarize_instruction, + } + ] + self._messages.extend(messages) + self.conversation_history.extend(messages) + + response = await self.llm_generate_astream_writer(state.request) + + message = { + "role": Role.ASSISTANT.value, + "content": response, + } + self.conversation_history.append(message) + self._messages = [ + self._messages[0], + self._messages[1], + self._messages[3], + ] + self._messages.append( + { + "role": Role.ASSISTANT.value, + "content": "The following is the summarized information from previous search steps:\n" + response, + } + ) + log_status("✅", format_terminal_str("Search process summarized\n", color="cyan", bold=True)) + await stream_writer("✅ **Search process summarized**") + return {"search_summaries": [*state.search_summaries, response]} + + async def generate_answer(self, state: DeepSearchState) -> dict: + log_status("📝", format_terminal_str("Generating the final answer ...", color="cyan", bold=True)) + await stream_writer('') + + if self.cfg.use_summarized_context and state.search_summaries: + plan_with_information = "Plan with Summarized Information:\n" + for i, step in enumerate(state.plan): + plan_with_information += f"Step {i+1}: {step}\n" + if i < len(state.search_summaries): + plan_with_information += f"- Summary: {state.search_summaries[i]}\n\n" + else: + plan_with_information += "- Summary: N/A\n\n" + else: + if not self.cfg.mece_retrieval: + plan_with_information = ( + "Plan:\n" + "\n".join([f"{i+1}. {step}" for i, step in enumerate(state.plan)]) + "\n\n" + ) + plan_with_information += "Retrieved Information:\n" + presented_ids = [] + for retrieval in state.retrievals: + for doc in retrieval.reranked: + node_id = doc.node_id + if node_id not in presented_ids: + plan_with_information += f"{doc.text}\n\n" + presented_ids.append(node_id) + else: + plan_with_information = "Plan with Retrieved Information:\n" + for i, step in enumerate(state.plan): + plan_with_information += f"Step {i+1}: {step}\n" + related_docs = [] + for retrieval in state.retrievals: + if retrieval.step == step: + related_docs = retrieval.reranked + break + for doc in related_docs: + plan_with_information += f"- {doc.text}\n" + plan_with_information += "\n" + + self._messages = [ + { + "role": Role.SYSTEM.value, + "content": self.cfg.answer_instruction.format( + question=state.question, + plan_with_information=plan_with_information, + ), + } + ] + self.conversation_history.extend(self._messages) + + response = await self.llm_generate_astream_writer(state.request) + + self.conversation_history.append( + { + "role": Role.ASSISTANT.value, + "content": response, + } + ) + answer = self.postproc_answer(response, state) + title_str = format_terminal_str("Final Answer:", color="blue", bold=True) + log_status( + "✅", + format_terminal_str( + f"{title_str}\n{format_terminal_str(answer, italic=True, bold=True)}", + color="blue", + bold=True, + ), + ) + return {"answer": answer} + + def _build_graph(self): + search = StateGraph(DeepSearchState) + search.add_node("generate_query", self.generate_query) + search.add_node("retrieve", self.retrieve) + search.add_node("finish_search", self.finish_search) + + search.add_edge(START, "generate_query") + search.add_edge("generate_query", "retrieve") + search.add_conditional_edges( + "retrieve", + self.check_retrieved, + { + "stop": "finish_search", + "continue": "generate_query", + }, + ) + if self.cfg.recur_summarize_instruction: + search.add_edge("finish_search", "summarize") + search.add_node("summarize", self.summarize_search) + search.add_edge("summarize", END) + else: + search.add_edge("finish_search", END) + + deep_search = StateGraph(DeepSearchState) + deep_search.add_node("make_plan", self.make_plan) + deep_search.add_node("execute_search_step", self.execute_next_step) + deep_search.add_node("search", search.compile()) + deep_search.add_node("final_answer", self.generate_answer) + + deep_search.add_edge(START, "make_plan") + deep_search.add_edge("make_plan", "execute_search_step") + deep_search.add_edge("execute_search_step", "search") + deep_search.add_conditional_edges( + "search", + self.check_execution, + { + "stop": "final_answer", + "continue": "execute_search_step", + }, + ) + deep_search.add_edge("final_answer", END) + + return deep_search.compile() + + def generate_report(self, result: dict, report_path: str) -> str: + import datetime + + log_status( + "📝", + format_terminal_str( + f"Generating markdown report at {report_path}", + color="cyan", + bold=True, + ), + ) + question = result.get("question", "No question provided") + plan = result.get("plan", []) + answer = result.get("answer", "No answer provided") + retrievals = result.get("retrievals", []) + search_summaries = result.get("search_summaries", []) + graph_mermaid = result.get("graph_mermaid", "") + timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + report = [ + "# Deep Search Report", + f"*Generated on: {timestamp}*", + "", + "## Original Question", + f"> {question}", + "", + "## Research Plan", + ] + for i, step in enumerate(plan): + report.append(f"{i+1}. {step}") + report.append("") + report.append("---") + report.append("## Search Statistics") + report.append(f"- **Total Retrieval Operations:** {len(retrievals)}") + if retrievals: + total_docs = sum(len(r.retrieved) for r in retrievals) + report.append(f"- **Total Documents Retrieved:** {total_docs}") + total_reranked = sum(len(r.reranked) for r in retrievals) + report.append(f"- **Total Documents After Reranking:** {total_reranked}") + report.append("") + report.append("---") + report.append("## Final Answer") + report.append(answer) + if search_summaries: + report.append("## Search Summaries") + for i, summary in enumerate(search_summaries): + report.append(f"### Summary for Step {i+1}") + report.append(summary) + report.append("") + report.append("---") + report.append("## Search Process Details") + for i, retrieval in enumerate(retrievals): + step_index = i + 1 + step_desc = retrieval.step + report.append(f"### Retrieval {step_index}: {step_desc}") + report.append(f'**Query:** "{retrieval.query}"') + report.append("#### Retrieved Documents Summary") + for j, doc in enumerate(retrieval.reranked[:3]): + doc_content = doc.text + if len(doc_content) > 500: + doc_content = doc_content[:500] + "..." + doc_content = doc_content.replace("\n", "\n> ") + report.append(f"**Document {j+1}:**") + report.append(f"> {doc_content}") + report.append("") + if i < len(retrievals) - 1: + report.append("---") + if graph_mermaid: + report.append("## Search Graph") + report.append("```mermaid") + report.append(graph_mermaid) + report.append("```") + report.append("") + with open(report_path, "w", encoding="utf-8") as handle: + handle.write("\n\n".join(report)) + return report_path + + # Implement abstract run function + # callback dispatcher + async def run(self, **kwargs) -> Any: + if "cbtype" in kwargs: + if kwargs["cbtype"] == CallbackType.RUNAGENT: + request = kwargs["chat_request"] + + log_status( + "🤿", + f"{format_terminal_str('Starting DeepSearch:', color='cyan', bold=True)} {format_terminal_str(request.messages, italic=True)}\n", + ) + state = DeepSearchState( + question=request.messages, + query="", + step=0, + num_retrievals=0, + answer="", + plan=[], + retrievals=[], + request=request, + ) + self._messages = await self._build_init_messages(request) + + async def async_gen(): + async for event, chunk in self.graph.astream(state, subgraphs=True, stream_mode="custom"): + yield chunk + await asyncio.sleep(0) + + # log_status("✅", format_terminal_str("DeepSearch process completed", color="cyan", bold=True)) + # result["conversation"] = [*self.conversation_history] + # result["graph_mermaid"] = self.graph.get_graph(xray=True).draw_mermaid() + return async_gen() diff --git a/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/logging_utils.py b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/logging_utils.py new file mode 100644 index 0000000000..bfa596dedb --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/logging_utils.py @@ -0,0 +1,74 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""Logging helpers for Mini Deep Search.""" + +import logging +import os +from typing import List + +_LOGGER_NAME = "deep_search" + + +def _configure_logger() -> logging.Logger: + logger = logging.getLogger(_LOGGER_NAME) + if logger.handlers: + return logger + + logger.setLevel(logging.INFO) + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + formatter = logging.Formatter("[%(asctime)s] %(message)s", datefmt="%H:%M:%S") + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + return logger + + +LOGGER = _configure_logger() + + +def log_status(emoji: str, message: str, indent: int = 0) -> None: + """Emit a formatted status message with optional indent.""" + indent_str = " " * indent + LOGGER.info("%s%s %s", indent_str, emoji, message) + + +def format_terminal_str(text: str, color: str = "", bold: bool = False, italic: bool = False) -> str: + """Format ``text`` with ANSI colours, bold or italics.""" + if text is None: + text = "" + + if os.environ.get("NO_COLOR"): + return text + + color_map = { + "black": 30, + "red": 31, + "green": 32, + "yellow": 33, + "blue": 34, + "magenta": 35, + "cyan": 36, + "white": 37, + "bright_black": 90, + "bright_red": 91, + "bright_green": 92, + "bright_yellow": 93, + "bright_blue": 94, + "bright_magenta": 95, + "bright_cyan": 96, + "bright_white": 97, + } + + style_seq: List[str] = [] + if color and color.lower() in color_map: + style_seq.append(str(color_map[color.lower()])) + if bold: + style_seq.append("1") + if italic: + style_seq.append("3") + + if not style_seq: + return text + + prefix = f"\033[{';'.join(style_seq)}m" + return f"{prefix}{text}\033[0m" diff --git a/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/postproc_jqa.py b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/postproc_jqa.py new file mode 100644 index 0000000000..4a0939a2ee --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/postproc_jqa.py @@ -0,0 +1,36 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json_repair +from mini_deep_search.logging_utils import format_terminal_str, log_status + + +def postproc_query(response_text, state): + """ + load query for retrieval and rerank from a predefined JSON: + { + "keywords": "keywords for retrieval", + "query": "query for rerank" + } + """ + # Default use the raw response text as the query for both retrieval and rerank + try: + # Attempt to parse the response text as JSON + parsed_json = json_repair.loads(response_text) + keywords_str = parsed_json.get("keywords", "") + query_str = parsed_json.get("query", "") + log_status( + "🧲", + f"{format_terminal_str('Keywords for retrieval:', color='magenta')} {format_terminal_str(keywords_str, italic=True)}", + ) + log_status( + "🔮", + f"{format_terminal_str('Query for reranking:', color='magenta')} {format_terminal_str(query_str, italic=True)}", + ) + except Exception as e: + # If parsing fails, return the original response text + print("Failed to parse JSON, returning original response text.") + print(e) + return response_text, response_text + # return keywords_str, query_str + return f"{state.question}\n{state.step}\n{keywords_str}", f"{query_str}" diff --git a/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/postprocessing.py b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/postprocessing.py new file mode 100644 index 0000000000..e48e3a61c6 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/postprocessing.py @@ -0,0 +1,135 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""Default post-processing logic for Mini Deep Search.""" + +from __future__ import annotations + +import re +from typing import List, Optional, Tuple + +import json_repair + +from .logging_utils import format_terminal_str, log_status + + +def _merge_plan_steps(plan: List[str], max_steps: int) -> List[str]: + """Merge plan steps if the generated plan exceeds ``max_steps``.""" + if len(plan) <= max_steps: + return plan + + merged_plan: List[str] = [] + steps_per_group = len(plan) // max_steps + extra_steps = len(plan) % max_steps + index = 0 + for i in range(max_steps): + group_size = steps_per_group + 1 if i < extra_steps else steps_per_group + if index < len(plan): + merged_plan.append(" ".join(plan[index : index + group_size])) + index += group_size + log_status( + "✨", + format_terminal_str( + f"Merged plan from {len(plan)} steps to {len(merged_plan)} steps.", + color="yellow", + bold=True, + ), + ) + return merged_plan + + +def _extract_pattern_and_text(line: str) -> Optional[Tuple[str, int, str]]: + match = re.match(r"^(.*?)(\d+)(.*)", line) + if match: + prefix, digit_str, text = match.groups() + if text.strip(): + return prefix, int(digit_str), text.strip() + return None + + +def parse_plan_from_text(text_content: str) -> List[str]: + """Parse a block of text to extract a numbered plan.""" + lines = text_content.splitlines() + longest_plan: List[str] = [] + for i, start_line in enumerate(lines): + processed_line = start_line.strip() + if processed_line.lower().startswith("step"): + processed_line = re.sub(r"^step\s*[:\-\s#]*", "", processed_line, flags=re.IGNORECASE) + pattern_info = _extract_pattern_and_text(processed_line) + if not pattern_info: + continue + prefix, digit, text = pattern_info + if digit not in (0, 1): + continue + current_plan = [text] + expected_digit = digit + 1 + for next_line in lines[i + 1 :]: + processed_next_line = next_line.strip() + if processed_next_line.lower().startswith("step"): + processed_next_line = re.sub(r"^step\s*[:\-\s#]*", "", processed_next_line, flags=re.IGNORECASE) + expected_pattern = re.match(f"^{re.escape(prefix)}{expected_digit}(.*)", processed_next_line) + if not expected_pattern: + break + next_text = expected_pattern.group(1).strip() + if not next_text: + break + current_plan.append(next_text) + expected_digit += 1 + if len(current_plan) > len(longest_plan): + longest_plan = current_plan + return [step.lstrip(" .:-") for step in longest_plan] + + +def postproc_plan(text: str, state, cfg) -> List[str]: # type: ignore[valid-type] + try: + plan = json_repair.loads(text) + if not plan: + try: + plan = parse_plan_from_text(text) + except Exception as exc: # pragma: no cover - defensive logging only + log_status( + "⚠️", + format_terminal_str( + f"Error parsing plan from text: {exc}. Using question as single step plan", + color="red", + bold=True, + ), + ) + plan = None + elif any(not isinstance(step, str) for step in plan): + new_plan = [] + for step in plan: + if isinstance(step, str): + new_plan.append(step) + elif isinstance(step, dict) and "step" in step: + new_plan.append(step["step"]) + elif isinstance(step, list) and all(isinstance(s, str) for s in step): + new_plan.extend(step) + else: + log_status("⚠️", f"Invalid step format: {step}. Using as-is.") + new_plan.append(str(step)) + plan = new_plan + log_status( + "✨", + format_terminal_str(f"Plan created with {len(plan)} steps.", color="cyan", bold=True), + ) + except Exception as exc: # pragma: no cover - defensive logging only + log_status( + "⚠️", + format_terminal_str( + f"Error evaluating plan: {exc}. Using question as single step plan", + color="red", + bold=True, + ), + ) + plan = None + plan = plan or [state.question] + return _merge_plan_steps(plan, cfg.max_plan_steps) + + +def postproc_query(text: str, state): # type: ignore[valid-type] + log_status("💡", f"{format_terminal_str('Query generated:', color='cyan', bold=True)} '{text}'") + return text, text + + +def postproc_answer(text: str, state): # type: ignore[valid-type] + return text diff --git a/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/utils.py b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/utils.py new file mode 100644 index 0000000000..964be4cf13 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/utils.py @@ -0,0 +1,45 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""Internal helper utilities for Mini Deep Search.""" + +from __future__ import annotations + +import importlib.util +import os +import re +import sys +from enum import Enum +from typing import Optional + + +class Role(str, Enum): + SYSTEM = "system" + ASSISTANT = "assistant" + USER = "user" + + +def import_module_from_path(file_path: str): + """Import and return a Python module from the given path.""" + if not os.path.isfile(file_path): + raise ImportError(f"File not found: {file_path}") + + module_name = os.path.splitext(os.path.basename(file_path))[0] + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Failed to create spec for: {file_path}") + + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + try: + if spec.loader is None: + raise ImportError(f"Module loader missing for: {file_path}") + spec.loader.exec_module(module) + except Exception as exc: # pragma: no cover - propagates import errors + raise ImportError(f"Error executing module {module_name}: {exc}") from exc + return module + + +def remove_tagged(text: str, tag: str = "think") -> str: + """Remove sections wrapped in a custom tag from ``text``.""" + pattern = f"<{tag}>.*?" + return re.sub(pattern, "", text, flags=re.DOTALL).strip() diff --git a/EdgeCraftRAG/edgecraftrag/components/agents/simple.py b/EdgeCraftRAG/edgecraftrag/components/agents/simple.py new file mode 100644 index 0000000000..722bd3acda --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/agents/simple.py @@ -0,0 +1,274 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import asyncio +from typing import Any, List + +from comps.cores.proto.api_protocol import ChatCompletionRequest +from edgecraftrag.base import AgentType, CallbackType, CompType +from edgecraftrag.components.agent import Agent, stream_writer +from edgecraftrag.components.agents.utils import ROLE, format_terminal_str +from langgraph.graph import END, START, StateGraph +from pydantic import BaseModel, Field + + +class Retrieval(BaseModel): + step: int + query: str + retrieved: List[Any] = Field(...) + reranked: List[Any] = Field(...) + + +class QnaState(BaseModel): + question: str + query: str + num_retrievals: int + answer: str + + retrievals: List[Retrieval] = Field(default_factory=list) + context_chunk_ids: List[str] = Field(default_factory=list) + request: ChatCompletionRequest + + +class SimpleRAGAgent(Agent): + + def __init__(self, idx, name, pipeline_idx, cfg): + super().__init__(name=name, agent_type=AgentType.SIMPLE, pipeline_idx=pipeline_idx, configs=cfg) + self.graph = self._build_graph() + self._messages = [] + self.conversation_history = [] + if idx is not None: + self.idx = idx + if "max_retrievals" in cfg: + self.max_retrievals = int(cfg["max_retrievals"]) + else: + self.max_retrievals = 3 + self.postproc_query = postproc_query + self.postproc_answer = postproc_answer + + @classmethod + def get_default_configs(cls): + return {"max_retrievals": 3} + + def update(self, cfg): + max_r = cfg.get("max_retrievals", None) + if max_r and isinstance(max_r, int): + self.max_retrievals = int(max_r) + self.configs["max_retrievals"] = self.max_retrievals + return True + else: + return False + + def _build_graph(self): + + qnagraph = StateGraph(QnaState) + qnagraph.add_node("generate_query", self.generate_query) + qnagraph.add_node("retrieve", self.retrieve) + qnagraph.add_node("final_answer", self.generate_answer) + + qnagraph.add_edge(START, "generate_query") + qnagraph.add_edge("generate_query", "retrieve") + qnagraph.add_conditional_edges( + "retrieve", self.check_retrieved, {"stop": "final_answer", "continue": "generate_query"} + ) + qnagraph.add_edge("final_answer", END) + + return qnagraph.compile() + + async def retrieve(self, state: QnaState) -> dict: + # print(f"State Retrieve {state}") + request = state.request + request.messages = state.query + contexts = await self.run_pipeline_retrieve_and_rerank(request) + + retrieved = contexts[CompType.RETRIEVER] + reranked = contexts[CompType.POSTPROCESSOR] + print( + "🔍", + f"Retrieved {format_terminal_str(str(len(retrieved)), color='magenta', bold=True)} documents, Reranked to top {format_terminal_str(str(len(reranked)), color='magenta', bold=True)}.", + ) + await stream_writer( + f"\n\n🔍 **Retrieved {str(len(retrieved))} documents, Reranked to top {str(len(reranked))}**\n\n" + ) + + new_retrieval = Retrieval(step=state.num_retrievals, query=state.query, retrieved=retrieved, reranked=reranked) + return { + "num_retrievals": state.num_retrievals + 1, + "retrievals": [*state.retrievals, new_retrieval], + } + + async def generate_query(self, state: QnaState) -> dict: + # print(f"State generate_query {state}") + await stream_writer('') + + messages = [ + {"role": ROLE.USER, "content": state.question}, + {"role": ROLE.SYSTEM, "content": PROMPT_TEMPLATE.GENERATE_QUERY}, + ] + self._messages.extend(messages) + self.conversation_history.extend(messages) + + response = await self.llm_generate_astream_writer(state.request) + + message = {"role": ROLE.ASSISTANT, "content": response} + self._messages.append(message) + self.conversation_history.append(message) + return { + "query": response, + } + + async def check_retrieved(self, state: QnaState) -> str: + # print(f"State check_retrieved {state}") + print("🤔", format_terminal_str("Evaluating if more information is needed", color="green")) + await stream_writer("🤔 **Evaluating if more information is needed...**\n\n") + + # Format context for the next decision + contexts = PROMPT_TEMPLATE.CONTEXTS.format( + contexts="\n".join( + [PROMPT_TEMPLATE.CONTEXT.format(context=doc.text) for doc in state.retrievals[-1].reranked] + ) + ) + messages = [ + {"role": ROLE.SYSTEM, "content": contexts}, + {"role": ROLE.SYSTEM, "content": PROMPT_TEMPLATE.CONTINUE}, + ] + self._messages.extend(messages) + self.conversation_history.extend(messages) + if state.num_retrievals >= self.max_retrievals: + print( + "⚠️", + format_terminal_str( + f"Reached maximum retrievals: {self.max_retrievals}, stopping search\n", color="yellow", bold=True + ), + ) + await stream_writer( + f"\n\n⚠️ **Reached maximum retrievals: {self.max_retrievals}, stopping searching...**\n\n" + ) + return "stop" + else: + response = await self.llm_generate_astream_writer(state.request) + message = {"role": ROLE.ASSISTANT, "content": response} + self._messages.append(message) + self.conversation_history.append(message) + if response.upper().startswith("NO"): + print("✅", format_terminal_str("Information is sufficient, moving to next step\n", color="green")) + await stream_writer("\n\n✅ **Information is sufficient, moving to next step...**\n\n") + return "stop" + else: + print("🔄", format_terminal_str("Need more information, generating new query ...", color="green")) + await stream_writer("\n\n🔄 **Need more information, generating new query...**\n\n") + return "continue" + + async def generate_answer(self, state: QnaState) -> dict: + # print(f"State generate_answer {state}") + print("📝", format_terminal_str("Generating the final answer ...", color="cyan", bold=True)) + await stream_writer('') + plan_with_information = "" + prev_step = "" + for i, r in enumerate(state.retrievals): + if r.step != prev_step: + plan_with_information += f"Step {i+1}\n\nRetrieved:\n" + for doc in r.reranked: + plan_with_information += doc.text + "\n" + plan_with_information += "\n" + prev_step = r.step + + self._messages = [ + { + "role": ROLE.SYSTEM, + "content": answer_instruction.format( + question=state.question, plan_with_information=plan_with_information + ), + } + ] + self.conversation_history.extend(self._messages) + + response = await self.llm_generate_astream_writer(state.request) + + self.conversation_history.append({"role": ROLE.ASSISTANT, "content": response}) + answer = self.postproc_answer(response, state) + title_str = format_terminal_str("Final Answer:", color="blue", bold=True) + print( + "✅", + format_terminal_str( + f"{title_str}\n{format_terminal_str(answer, italic=True, bold=True)}", color="blue", bold=True + ), + ) + return {"answer": answer} + + # Implement abstract run function + # callback dispatcher + async def run(self, **kwargs) -> Any: + if "cbtype" in kwargs: + if kwargs["cbtype"] == CallbackType.RUNAGENT: + request = kwargs["chat_request"] + + print( + "🤿", + f"{format_terminal_str('Starting DeepSearch:', color='cyan', bold=True)} {format_terminal_str(request.messages, bold=False, italic=True)}\n", + ) + # Initialize state + state = QnaState( + question=request.messages, query="", num_retrievals=0, answer="", retrievals=[], request=request + ) + self._messages = self._build_init_messages(request.messages) + + async def async_gen(): + async for chunk in self.graph.astream(state, stream_mode="custom"): + yield chunk + await asyncio.sleep(0) + + print("✅", format_terminal_str("RAG process completed", color="cyan", bold=True)) + return async_gen() + + def _build_init_messages(self, question) -> List[dict]: + return [ + { + "role": ROLE.SYSTEM, + "content": PROMPT_TEMPLATE.SYSTEM.format( + system_instruction=system_instruction, + query_instruction=query_instruction, + domain_knowledge="", + ), + } + ] + + +def postproc_query(text, state): + """Default post-process the response text generated for new query. + + This function is a placeholder for any specific post-processing logic needed. + The returned values are a tuple of (retrieval_query, rerank_query). + """ + print("💡", f"{format_terminal_str('Query generated:', color='cyan', bold=True)} '{text}'") + # Default use the raw response text as the query for both retrieval and rerank + return text, text + + +def postproc_answer(text, state): + return text + + +system_instruction = "You will be provided with a question from a user, and you need to create queries and execute them based on the question for the final answer.\nYou should only use the information provided in the search results to answer the user's question. \nMake your response in the same language as the user's question./no_think" +query_instruction = 'Every time when asked if more information is needed, check the retrieved contexts and try to identify new content that is related. Then based on what you get and all above, decide if a new query is needed to gather more potential useful information. The query should be a very concise and clear sub-question that is specific to the user\'s question. A good query should include all the related actions or keywords that can help to retrieve the most related context. Response with the query directly.\nDO NOT use any prefix, such as "Query:"/no_think' +answer_instruction = "You have been provided with a question from user:\n{question}\n\nThe following are the plan steps you generated and the corresponding retrieved information:{plan_with_information}\n\nBased on the above, come up with a final answer for the user's question. Format the answer as a list of steps that can guide the user to solve the problem./no_think" + + +class PROMPT_TEMPLATE: + # only contain formatting related instructions here + + SYSTEM = """{system_instruction} + +{query_instruction} + +{domain_knowledge} + +""" + GENERATE_QUERY = "Now generate a query for the next retrieval." + + CONTEXT = """\n{context}\n\n""" + CONTEXTS = """The following are the retrieved contexts for current query.\n{contexts}\n""" + + CONTINUE = "Is more information needed? Answer Yes or No. Then explain why or why not." + + EXPERIENCES = """The following are question-plan examples by human experts. Refer to them to better make your plan. If you find that there is a question that is highly similar or exactly match the input question, then strictly follow the subquestions to make the plan.\n\n{experiences}\n""" diff --git a/EdgeCraftRAG/edgecraftrag/components/agents/utils.py b/EdgeCraftRAG/edgecraftrag/components/agents/utils.py new file mode 100644 index 0000000000..e96c2af9da --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/agents/utils.py @@ -0,0 +1,401 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +import asyncio +import importlib.util +import json +import logging +import os +import re +import sys +from typing import Any, Dict, List, Optional, Tuple, Union + +import numpy +from pydantic import BaseModel + +# from wrapped_atomic_apis import call_logits_next_token + +# Configure logging +logger = logging.getLogger("deep_search") +logger.setLevel(logging.INFO) + +# Create console handler with a formatter that includes timestamps and emojis +if not logger.handlers: + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + # Format: [YYYY-MM-DD HH:MM:SS] Message with emoji + formatter = logging.Formatter("[%(asctime)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S") + formatter = logging.Formatter("[%(asctime)s] %(message)s", datefmt="%H:%M:%S") + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + + +def log_status(emoji: str, message: str, indent: int = 0) -> None: + """Log a formatted status message with emoji indicator using Python's logging module. + + Args: + emoji: Emoji character to prepend to the message + message: The message text to log + indent: Number of indentation levels (2 spaces each) + """ + indent_str = " " * indent + logger.info(f"{indent_str}{emoji} {message}") + + +class Config(BaseModel): + system_instruction: str + plan_instruction: str = "" + query_instruction: str + answer_instruction: str + domain_knowledge: str + retrieve_top_k: int + rerank_top_k: int + max_retrievals: int + max_plan_steps: int = 7 + embedding_endpoint: str + reranker_endpoint: str + llm_endpoint: str + query_search_endpoint: str = "" + generation_config: Dict[str, Any] = {} + postproc: str = "defaults.py" + + +def import_module_from_path(file_path: str): + """Import a module from a full file path. + + Args: + file_path: Full path to the Python file to import + + Returns: + The imported module + + Raises: + ImportError: If the module cannot be imported + """ + if not os.path.isfile(file_path): + raise ImportError(f"File not found: {file_path}") + + # Get the module name (filename without extension) + module_name = os.path.splitext(os.path.basename(file_path))[0] + + # Create the spec + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Failed to create spec for: {file_path}") + + # Create the module + module = importlib.util.module_from_spec(spec) + + # Add the module to sys.modules + sys.modules[module_name] = module + + # Execute the module + try: + spec.loader.exec_module(module) + except Exception as e: + raise ImportError(f"Error executing module {module_name}: {e}") + + return module + + +def load_config(config_path: str) -> Config: + """Load configuration from a JSON file. + + Args: + config_path: Path to the configuration JSON file + + Returns: + Config object with loaded configuration + """ + with open(config_path, "r") as f: + config_dict = json.load(f) + cfg = Config(**config_dict) + if os.path.isfile(cfg.domain_knowledge): + with open(cfg.domain_knowledge, "r") as f: + cfg.domain_knowledge = f.read() + return cfg + + +class ROLE: + SYSTEM = "system" + ASSISTANT = "assistant" + USER = "user" + + +def remove_tagged(text, tag="think"): + pattern = f"<{tag}>.*?" + return re.sub(pattern, "", text, flags=re.DOTALL).strip() + + +def _extract_pattern_and_text(line: str) -> Optional[Tuple[str, int, str, str]]: + """Checks if a line matches the pattern [prefix][digit][suffix][text]. + + Args: + line: The line to check. + + Returns: + A tuple of (prefix, digit, suffix, step_text) if a match is found, + otherwise None. + - prefix: Characters before the digit (e.g., " "). Can be empty. + - digit: The integer value of the digit. + - suffix: The separator characters after the digit (e.g., ". "). + - step_text: The actual description of the step. + """ + # Pattern: Start, any prefix (non-greedy), a digit, a suffix of non-digits, + # and the rest of the line as text. + # The suffix (\D+) is followed by (.*) which will grab the text. + # This structure correctly separates the separator from the text. + match = re.match(r"^(.*?)(\d+)(.*)", line) + if match: + prefix, digit_str, text = match.groups() + # We require actual text for it to be a valid step + if text.strip(): + return prefix, int(digit_str), text.strip() + return None + + +def parse_plan_from_text(text_content: str) -> List[str]: + """Parses a block of text to extract a list of plan steps by finding a + consecutive sequence of numbered lines. + + Args: + text_content: A string containing the plan. + + Returns: + A list of strings, where each string is a single plan step. + """ + lines = text_content.splitlines() + longest_plan = [] + + # Iterate through each line, treating it as a potential start of a plan + for i, start_line in enumerate(lines): + + # 1. Pre-process the line + processed_line = start_line.strip() + if processed_line.lower().startswith("step"): + # Remove "step" and any space/punctuation immediately after + processed_line = re.sub(r"^step\s*[:\-\s#]*", "", processed_line, flags=re.IGNORECASE) + + # 2. Check if it matches the generic pattern and starts with 0 or 1 + pattern_info = _extract_pattern_and_text(processed_line) + if pattern_info: + prefix, digit, text = pattern_info + + # Allow multi-digit numbers but only start a plan on 0 or 1 + if digit in [0, 1]: + current_plan = [text] + expected_digit = digit + 1 + + # 3. If it's a valid start, check subsequent lines for the same pattern + for next_line in lines[i + 1 :]: + + # Pre-process the next line similarly + processed_next_line = next_line.strip() + if processed_next_line.lower().startswith("step"): + processed_next_line = re.sub(r"^step\s*[:\-\s#]*", "", processed_next_line, flags=re.IGNORECASE) + + # Check if the next line matches the *exact* pattern with the next number + # We escape prefix/suffix in case they contain special regex characters + expected_pattern = re.match(f"^{re.escape(prefix)}{expected_digit}(.*)", processed_next_line) + + if expected_pattern: + next_text = expected_pattern.group(1).strip() + if next_text: # Ensure the step is not empty + current_plan.append(next_text) + expected_digit += 1 + else: + break # Empty step text breaks sequence + else: + # The consecutive sequence is broken + break + + # If the plan we just found is the longest so far, save it + if len(current_plan) > len(longest_plan): + longest_plan = current_plan + longest_plan = [_.lstrip(" .:-") for _ in longest_plan] + return longest_plan + + +def format_terminal_str(text: str, color: str = "", bold: bool = False, italic: bool = False) -> str: + """Format a string with ANSI escape codes for color, bold, and italic. + + Args: + text: The text to format. + color: The color name (e.g., 'red', 'green', 'blue'). + bold: Whether to apply bold formatting. + italic: Whether to apply italic formatting. + + Returns: + The formatted string with ANSI codes. + + Notes: + - If the environment variable NO_COLOR is set (per https://no-color.org/), + the function returns the original text without styling. + - Unsupported color names are ignored (text returned with other + requested styles, if any). + - Color names are case-insensitive. Both standard and bright variants + are supported (e.g., 'red', 'bright_red'). + """ + if text is None: + text = "" + + # Honor NO_COLOR convention + if os.environ.get("NO_COLOR"): + return text + + color_lower = color.lower() + + # Standard and bright ANSI color codes + color_map = { + "black": 30, + "red": 31, + "green": 32, + "yellow": 33, + "blue": 34, + "magenta": 35, + "cyan": 36, + "white": 37, + "bright_black": 90, + "bright_red": 91, + "bright_green": 92, + "bright_yellow": 93, + "bright_blue": 94, + "bright_magenta": 95, + "bright_cyan": 96, + "bright_white": 97, + } + + style_seq: List[str] = [] + + # Add color if valid + if color_lower in color_map: + style_seq.append(str(color_map[color_lower])) + + # Bold and italic attributes + if bold: + style_seq.append("1") + if italic: + style_seq.append("3") + + # If no styling requested or recognized, return original text + if not style_seq: + return text + + prefix = f"\033[{';'.join(style_seq)}m" + suffix = "\033[0m" + return f"{prefix}{text}{suffix}" + + +_DEFAULT_TEMPLATE_MESSAGES = [ + { + "role": "system", + "content": """You are an impartial quality rater for TCB Bonder troubleshooting answers. Your task is to rate if the answer by user well covers the steps in the reference answer. + +Task instructions: +- Parse the reference answer into its essential checkpoints (split on punctuation such as "?", ";", or line breaks) and understand what each step expects the technician to do or verify. The order of the checkpoints has low importance. +- Examine the user's answer and decide if each checkpoint is substantively addressed with accurate, actionable guidance. +- Treat synonymous language or additional helpful context as a match when it fulfills the intent of the checkpoint. +- Mark a checkpoint as uncovered if the user's answer omits it, contradicts it, or gives incorrect or unsafe guidance. +- Ignore extra steps that do not conflict with the reference; they should not reduce the score. +- The mismatch of the step number between user's answer and reference answer does not matter, as long as all the content is well covered. +- Keep all reasoning internal; do not expose the intermediate analysis in the final reply. +- Focus solely on the provided texts. Do not rely on your knowledge. + +The following are domain terms you may refer to: +BFS: Bulk feed system +BH: Bondhead +BS: Bond stage/Bond pedestal +CAH: Carrier handler +COG: Chrome on glass +COS: Cover opening station +COT: Change over trolley +DCP: Data collection plan +DFH: Die feed handler +DL: Down-look +DPA: Die pick arm +DTA: Die transfer arm +FOV: Field of view +HW: Highway +MBH: Media buffer handler +MHS: Material Handling System +NCF: No clean flux +OT: Optics table +PFLU: Power feeder loading unit +PHS: Pre/Post heat station +PR: Pattern recognition +SC: Station controller +BLPCS: Bond line process control system +""", + }, + { + "role": "user", + "content": """User's answer: +{llm_answer} + +Reference answer: +{ref_answer} +""", + "template_message": True, + }, + { + "role": "system", + "content": """Does the user's answer well cover the steps in the reference answer? Yes or No. + +Scoring rubric: +- Answer "Yes" only when every checkpoint from the reference is fully covered and nothing in the user's answer conflicts with the reference guidance. +- Answer "No" if any checkpoint is missing, incorrectly addressed, or contradicted by the user's answer. +""", + }, + {"role": "assistant", "content": '{"label": "'}, +] +DEFAULT_TARGET_TOKENS = ["No", "Yes"] +DEFAULT_TRANSFORM_PARAMS = (5, -1, 10) # a, b, T + + +def batch_cal_score(x, a=1.0, b=0.0, T=1.0, s=10): + _, d = x.shape + levels = numpy.arange(d)[None,] / (d - 1) + transformed = numpy.exp(x / T) + probs = transformed / transformed.sum(axis=1, keepdims=True) + expected_levels = (probs * levels).sum(axis=1) + scores = a * expected_levels + b + return s * numpy.tanh(scores) + + +def llm_evaluate( + ref_answer, + llm_answer, + eval_endpoint, + template_messages=_DEFAULT_TEMPLATE_MESSAGES, + target_tokens=DEFAULT_TARGET_TOKENS, + transform_params=DEFAULT_TRANSFORM_PARAMS, + return_logits=False, +) -> Union[float, List[float]]: + messages = [ + ( + message + if not message.get("template_message") + else { + "role": message["role"], + "content": message["content"].format(ref_answer=ref_answer, llm_answer=llm_answer), + } + ) + for message in template_messages + ] + # result_json = call_logits_next_token( + # endpoint=eval_endpoint, + # messages=messages, + # target_tokens=target_tokens + # ) + result_json = "" + raw_logits = {_["token"]: _["logit"] for _ in result_json["target_token_logits"]} + raw_logits = [raw_logits[k] for k in target_tokens] + if return_logits: + return raw_logits + else: + score = batch_cal_score(numpy.array(raw_logits)[None,], *transform_params)[0] + return score + + +def remove_think_tags(text: str) -> str: + """Remove ... sections from Qwen3 output.""" + return re.sub(r".*?", "", text, flags=re.DOTALL).strip() diff --git a/EdgeCraftRAG/edgecraftrag/components/benchmark.py b/EdgeCraftRAG/edgecraftrag/components/benchmark.py index fc3801b5d3..3bf2a7e602 100644 --- a/EdgeCraftRAG/edgecraftrag/components/benchmark.py +++ b/EdgeCraftRAG/edgecraftrag/components/benchmark.py @@ -49,7 +49,14 @@ def cal_input_token_size(self, input_text_list): return input_token_size def init_benchmark_data(self): - pipeline_comp = [CompType.RETRIEVER, CompType.POSTPROCESSOR, CompType.GENERATOR] + pipeline_comp = [ + CompType.NODEPARSER, + CompType.CHUNK_NUM, + CompType.RETRIEVER, + CompType.POSTPROCESSOR, + CompType.QUERYSEARCH, + CompType.GENERATOR, + ] if self.is_enabled(): with self._idx_lock: self.last_idx += 1 @@ -58,6 +65,8 @@ def init_benchmark_data(self): data["idx"] = idx for comp in pipeline_comp: data[comp] = "" + data[CompType.NODEPARSER] = 0 + data[CompType.CHUNK_NUM] = 0 return idx, data def update_benchmark_data(self, idx, comp_type, start, end): diff --git a/EdgeCraftRAG/edgecraftrag/components/data.py b/EdgeCraftRAG/edgecraftrag/components/data.py old mode 100755 new mode 100644 diff --git a/EdgeCraftRAG/edgecraftrag/components/generator.py b/EdgeCraftRAG/edgecraftrag/components/generator.py old mode 100755 new mode 100644 index cb170fcd10..fe5974e71f --- a/EdgeCraftRAG/edgecraftrag/components/generator.py +++ b/EdgeCraftRAG/edgecraftrag/components/generator.py @@ -2,16 +2,16 @@ # SPDX-License-Identifier: Apache-2.0 import asyncio -import dataclasses import json import os import urllib.request +from concurrent.futures import ThreadPoolExecutor from urllib.parse import urlparse +from comps.cores.proto.api_protocol import ChatCompletionRequest from edgecraftrag.base import BaseComponent, CompType, GeneratorType, InferenceType, NodeParserType -from edgecraftrag.utils import concat_history, get_prompt_template, save_history +from edgecraftrag.utils import get_prompt_template from fastapi.responses import StreamingResponse -from langchain_core.prompts import PromptTemplate from llama_index.llms.openai_like import OpenAILike from pydantic import model_serializer from unstructured.staging.base import elements_from_base64_gzipped_json @@ -44,11 +44,11 @@ def extract_unstructured_eles(retrieved_nodes=[], text_gen_context=""): continue metadata = node.node.metadata # extract referenced docs - if "filename" in metadata: + if "file_name" in metadata: reference_doc = ( - metadata["filename"] + metadata["file_name"] if "page_number" not in metadata - else metadata["filename"] + " --page" + str(metadata["page_number"]) + else metadata["file_name"] + " --page" + str(metadata["page_number"]) ) reference_docs.add(reference_doc) # extract hyperlinks in chunk @@ -69,48 +69,52 @@ def extract_unstructured_eles(retrieved_nodes=[], text_gen_context=""): # extract hyperlinks in chunk link_urls.extend(extract_urls(text_gen_context)) unstructured_str = "" - if image_paths: - unstructured_str += "\n\n参考图片:\n\n" - for image_path in image_paths: - unstructured_str += f"![]({image_path})" - if link_urls: - unstructured_str += "\n\n相关链接:\n\n" - for link in link_urls: - unstructured_str += f"[{link}]({link})\n\n" if reference_docs: - unstructured_str += "\n\n内容来源:\n\n" + unstructured_str += "\n\n --- \n\n### Document Source:\n" for reference_doc in reference_docs: - unstructured_str += f"{reference_doc}\n\n" + unstructured_str += f"- {reference_doc}\n\n" return unstructured_str +def build_stream_response(status=None, content=None, error=None): + response = {"status": status, "contentType": "text"} + if content is not None: + response["content"] = content + if error is not None: + response["error"] = error + return response + + async def local_stream_generator(lock, llm, prompt_str, unstructured_str): async with lock: - response = llm.stream_complete(prompt_str) - collected_data = [] - for r in response: - collected_data.append(r.delta) - yield r.delta - await asyncio.sleep(0) - if unstructured_str: - collected_data.append(unstructured_str) - yield unstructured_str - res = "".join(collected_data) - save_history(res) + response = await llm.astream_complete(prompt_str) + try: + async for r in response: + yield r.delta or "" + await asyncio.sleep(0) + if unstructured_str: + yield unstructured_str + except Exception as e: + start_idx = str(e).find("message") + len("message") + result_error = str(e)[start_idx:] + yield f"code:0000{result_error}" async def stream_generator(llm, prompt_str, unstructured_str): - response = llm.stream_complete(prompt_str) - collected_data = [] - for r in response: - collected_data.append(r.delta) - yield r.delta - await asyncio.sleep(0) - if unstructured_str: - collected_data.append(unstructured_str) - yield unstructured_str - res = "".join(collected_data) - save_history(res) + response = await llm.astream_complete(prompt_str) + try: + async for r in response: + yield r.delta or "" + await asyncio.sleep(0) + if unstructured_str: + yield unstructured_str + await asyncio.sleep(0) + except asyncio.CancelledError as e: + response.aclose() + except Exception as e: + start_idx = str(e).find("message") + len("message") + result_error = str(e)[start_idx:] + yield f"code:0000{result_error}" class QnAGenerator(BaseComponent): @@ -127,22 +131,25 @@ def __init__(self, llm_model, prompt_template_file, inference_type, vllm_endpoin ("\t\n", "\n"), ) self.enable_think = False - self.llm = llm_model - if isinstance(llm_model, str): - self.model_id = llm_model - else: - self.model_id = llm_model().model_id - if self.inference_type == InferenceType.LOCAL: - self.lock = asyncio.Lock() + self.enable_rag_retrieval = True self.prompt_content = prompt_content self.prompt_template_file = prompt_template_file - self.prompt = self.init_prompt(self.model_id, self.prompt_content, self.prompt_template_file) - - self.llm = llm_model if isinstance(llm_model, str): self.model_id = llm_model + self.model_path = llm_model else: - self.model_id = llm_model().model_id + llm_instance = llm_model() + if llm_instance.model_path is None or llm_instance.model_path == "": + self.model_id = llm_instance.model_id + self.model_path = os.path.join("/home/user/models", os.getenv("LLM_MODEL", "Qwen/Qwen3-8B")) + else: + self.model_id = llm_instance.model_id + self.model_path = llm_instance.model_path + self.original_template, self.prompt = self.prompt_handler( + self.model_path, self.prompt_content, self.prompt_template_file + ) + + self.llm = llm_model if self.inference_type == InferenceType.LOCAL: self.lock = asyncio.Lock() if self.inference_type == InferenceType.VLLM: @@ -151,45 +158,43 @@ def __init__(self, llm_model, prompt_template_file, inference_type, vllm_endpoin vllm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8086") self.vllm_endpoint = vllm_endpoint - def init_prompt(self, model_id, prompt_content=None, prompt_template_file=None, enable_think=False): - # using the prompt template enhancement strategy(only tested on Qwen2-7B-Instruction) if template_enhance_on is true - template_enhance_on = True if "Qwen2" in self.model_id else False + def prompt_handler( + self, model_path, prompt_content=None, prompt_template_file=None, enable_think=False, enable_rag_retrieval=True + ): if prompt_content: - self.set_prompt(prompt_content) - return get_prompt_template(model_id, prompt_content, prompt_template_file, enable_think) + return get_prompt_template(model_path, prompt_content, prompt_template_file, enable_think) elif prompt_template_file is None: print("There is no template file, using the default template.") - prompt_template = get_prompt_template(model_id, prompt_content, prompt_template_file, enable_think) - return ( - DocumentedContextRagPromptTemplate.from_template(prompt_template) - if template_enhance_on - else prompt_template - ) + prompt_template = get_prompt_template(model_path, prompt_content, prompt_template_file, enable_think) + return prompt_template else: - safe_root = "/templates" + if enable_rag_retrieval: + safe_root = "/templates" + else: + prompt_content = "### User Guide ###You are a helpful assistant. Please respond to user inquiries with concise and professional answers.### Historical Content ###{chat_history}" + return get_prompt_template(model_path, prompt_content, prompt_template_file, enable_think) + prompt_template_file = os.path.normpath(os.path.join(safe_root, prompt_template_file)) if not prompt_template_file.startswith(safe_root): raise ValueError("Invalid template path") if not os.path.exists(prompt_template_file): raise ValueError("Template file not exists") - if template_enhance_on: - return DocumentedContextRagPromptTemplate.from_file(prompt_template_file) - else: - return get_prompt_template(model_id, prompt_content, prompt_template_file, enable_think) + return get_prompt_template(model_path, prompt_content, prompt_template_file, enable_think) def set_prompt(self, prompt): if "{context}" not in prompt: prompt += "\n<|im_start|>{context}<|im_end|>" if "{chat_history}" not in prompt: prompt += "\n<|im_start|>{chat_history}" - self.prompt = prompt + self.prompt_content = prompt + self.original_template, self.prompt = self.prompt_handler( + self.model_path, self.prompt_content, self.prompt_template_file + ) def reset_prompt(self): - prompt_template = get_prompt_template(self.model_id) - self.prompt = ( - DocumentedContextRagPromptTemplate.from_template(prompt_template) - if self.template_enhance_on - else prompt_template + self.prompt_content = None + self.original_template, self.prompt = self.prompt_handler( + self.model_path, self.prompt_content, self.prompt_template_file ) def clean_string(self, string): @@ -206,20 +211,30 @@ def query_transform(self, chat_request, retrieved_nodes, sub_questions=None): :return: Generated text_gen_context and prompt_str.""" text_gen_context = "" for n in retrieved_nodes: - origin_text = n.node.get_text() + origin_text = n.node.text text_gen_context += self.clean_string(origin_text.strip()) query = chat_request.messages - chat_history = concat_history(chat_request.messages) + chat_history = chat_request.input # Modify model think status if chat_request.chat_template_kwargs: - if self.enable_think != chat_request.chat_template_kwargs["enable_thinking"]: - self.prompt = self.init_prompt( - self.model_id, + change_flag = False + if "enable_rag_retrieval" in chat_request.chat_template_kwargs: + if self.enable_rag_retrieval != chat_request.chat_template_kwargs["enable_rag_retrieval"]: + self.enable_rag_retrieval = chat_request.chat_template_kwargs["enable_rag_retrieval"] + change_flag = True + if "enable_thinking" in chat_request.chat_template_kwargs: + if self.enable_think != chat_request.chat_template_kwargs["enable_thinking"]: + self.enable_think = chat_request.chat_template_kwargs["enable_thinking"] + change_flag = True + if change_flag: + self.original_template, self.prompt = self.prompt_handler( + self.model_path, self.prompt_content, self.prompt_template_file, - chat_request.chat_template_kwargs["enable_thinking"], + self.enable_think, + self.enable_rag_retrieval, ) - self.enable_think = chat_request.chat_template_kwargs["enable_thinking"] + if sub_questions: final_query = f"{query}\n\n### Sub-questions ###\nThe following list is how you should consider the answer, you MUST follow these steps when responding:\n\n{sub_questions}" else: @@ -227,7 +242,7 @@ def query_transform(self, chat_request, retrieved_nodes, sub_questions=None): prompt_str = self.prompt.format(input=final_query, chat_history=chat_history, context=text_gen_context) return text_gen_context, prompt_str - def run(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): + async def run(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): if self.llm() is None: # This could happen when User delete all LLMs through RESTful API raise ValueError("No LLM available, please load LLM") @@ -245,19 +260,21 @@ def run(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): self.llm().generate_kwargs = generate_kwargs self.llm().max_new_tokens = chat_request.max_tokens unstructured_str = "" - if node_parser_type == NodeParserType.UNSTRUCTURED: + if node_parser_type == NodeParserType.UNSTRUCTURED or node_parser_type == NodeParserType.SIMPLE: unstructured_str = extract_unstructured_eles(retrieved_nodes, text_gen_context) if chat_request.stream: - return StreamingResponse( - local_stream_generator(self.lock, self.llm(), prompt_str, unstructured_str), - media_type="text/event-stream", - ) + # Asynchronous generator + async def generator(): + async for chunk in local_stream_generator(self.lock, self.llm(), prompt_str, unstructured_str): + yield chunk or "" + await asyncio.sleep(0) + + return generator() else: result = self.llm().complete(prompt_str) - save_history(str(result.text)) return result - def run_vllm(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): + async def run_vllm(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): # query transformation sub_questions = kwargs.get("sub_questions", None) text_gen_context, prompt_str = self.query_transform(chat_request, retrieved_nodes, sub_questions=sub_questions) @@ -273,15 +290,19 @@ def run_vllm(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): repetition_penalty=chat_request.repetition_penalty, ) unstructured_str = "" - if node_parser_type == NodeParserType.UNSTRUCTURED: + if node_parser_type == NodeParserType.UNSTRUCTURED or node_parser_type == NodeParserType.SIMPLE: unstructured_str = extract_unstructured_eles(retrieved_nodes, text_gen_context) if chat_request.stream: - return StreamingResponse( - stream_generator(llm, prompt_str, unstructured_str), media_type="text/event-stream" - ) + + # Asynchronous generator + async def generator(): + async for chunk in stream_generator(llm, prompt_str, unstructured_str): + yield chunk or "" + await asyncio.sleep(0) + + return generator() else: - result = llm.complete(prompt_str) - save_history(str(result)) + result = await llm.acomplete(prompt_str) return result @model_serializer @@ -296,69 +317,89 @@ def ser_model(self): return set -@dataclasses.dataclass -class INSTRUCTIONS: - IM_START = "You are an AI assistant that helps users answer questions given a specific context." - SUCCINCT = "Ensure your response is succinct" - ACCURATE = "Ensure your response is accurate." - SUCCINCT_AND_ACCURATE = "Ensure your response is succinct. Try to be accurate if possible." - ACCURATE_AND_SUCCINCT = "Ensure your response is accurate. Try to be succinct if possible." - NO_RAMBLING = "Avoid posing new questions or self-questioning and answering, and refrain from repeating words in your response." - SAY_SOMETHING = "Avoid meaningless answer such a random symbol or blanks." - ENCOURAGE = "If you cannot well understand the question, try to translate it into English, and translate the answer back to the language of the question." - NO_IDEA = ( - 'If the answer is not discernible, please respond with "Sorry. I have no idea" in the language of the question.' - ) - CLOZE_TEST = """The task is a fill-in-the-blank/cloze test.""" - NO_MEANINGLESS_SYMBOLS = "Meaningless symbols and ``` should not be included in your response." - ADAPT_NATIVE_LANGUAGE = "Please try to think like a person that speak the same language that the question used." - - -def _is_cloze(question): - return ("()" in question or "()" in question) and ("填" in question or "fill" in question or "cloze" in question) - - -# depreciated -def get_instructions(question): - # naive pre-retrieval rewrite - # cloze - if _is_cloze(question): - instructions = [ - INSTRUCTIONS.CLOZE_TEST, - ] - else: - instructions = [ - INSTRUCTIONS.ACCURATE_AND_SUCCINCT, - INSTRUCTIONS.NO_RAMBLING, - INSTRUCTIONS.NO_MEANINGLESS_SYMBOLS, - ] - return ["System: {}".format(_) for _ in instructions] - - -def preprocess_question(question): - if _is_cloze(question): - question = question.replace(" ", "").replace("(", "(").replace(")", ")") - # .replace("()", " <|blank|> ") - ret = "User: Please finish the following fill-in-the-blank question marked by $$$ at the beginning and end. Make sure all the () are filled.\n$$$\n{}\n$$$\nAssistant: ".format( - question +class FreeChatGenerator(BaseComponent): + + def __init__(self, llm_model, inference_type, vllm_endpoint, **kwargs): + BaseComponent.__init__( + self, + comp_type=CompType.GENERATOR, + comp_subtype=GeneratorType.FREECHAT, ) - else: - ret = "User: {}\nAssistant: 从上下文提供的信息中可以知道,".format(question) - return ret - - -class DocumentedContextRagPromptTemplate(PromptTemplate): - - def format(self, **kwargs) -> str: - # context = '\n'.join([clean_string(f"{_.page_content}".strip()) for i, _ in enumerate(kwargs["context"])]) - context = kwargs["context"] - question = kwargs["input"] - preprocessed_question = preprocess_question(question) - if "instructions" in self.template: - instructions = get_instructions(question) - prompt_str = self.template.format( - context=context, instructions="\n".join(instructions), input=preprocessed_question - ) + self.inference_type = inference_type + self.prompt_content = "" + self.prompt_template_file = "" + self._REPLACE_PAIRS = ( + ("\n\n", "\n"), + ("\t\n", "\n"), + ) + self.enable_think = False + if isinstance(llm_model, str): + self.model_id = llm_model + self.model_path = llm_model else: - prompt_str = self.template.format(context=context, input=preprocessed_question) - return prompt_str + llm_instance = llm_model() + if llm_instance.model_path is None or llm_instance.model_path == "": + self.model_id = llm_instance.model_id + self.model_path = os.path.join("/home/user/models", os.getenv("LLM_MODEL", "Qwen/Qwen3-8B")) + else: + self.model_id = llm_instance.model_id + self.model_path = llm_instance.model_path + + self.llm = llm_model + if self.inference_type == InferenceType.VLLM: + self.vllm_name = llm_model().model_id + if vllm_endpoint == "": + vllm_endpoint = os.getenv("vLLM_ENDPOINT", "http://localhost:8086") + self.vllm_endpoint = vllm_endpoint + + async def run(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): + response = await self.run_vllm(chat_request, retrieved_nodes, node_parser_type, **kwargs) + return response + + async def run_vllm(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): + llm = OpenAILike( + api_key="fake", + api_base=self.vllm_endpoint + "/v1", + max_tokens=chat_request.max_tokens, + model=self.vllm_name, + top_p=chat_request.top_p, + top_k=chat_request.top_k, + temperature=chat_request.temperature, + streaming=chat_request.stream, + repetition_penalty=chat_request.repetition_penalty, + ) + prompt_str = chatcompletion_to_chatml(chat_request) + if chat_request.stream: + + # Asynchronous generator + async def generator(): + gen = await llm.astream_complete(prompt_str) + async for chunk in gen: + yield chunk.delta or "" + await asyncio.sleep(0) + + return generator() + else: + result = await llm.acomplete(prompt_str) + return str(result) + + @model_serializer + def ser_model(self): + set = { + "idx": self.idx, + "generator_type": self.comp_subtype, + "inference_type": self.inference_type, + "model": self.llm(), + "vllm_endpoint": self.vllm_endpoint, + } + return set + + +def chatcompletion_to_chatml(request: ChatCompletionRequest) -> str: + """Convert a ChatCompletionRequest dict to a ChatML-formatted string.""" + chatml = "" + for msg in request.messages: + chatml += f"<|im_start|>{msg.get('role', '')}\n{msg.get('content', '')}<|im_end|>\n" + # start generation from assistant role + chatml += "<|im_start|>assistant\n" + return chatml diff --git a/EdgeCraftRAG/edgecraftrag/components/indexer.py b/EdgeCraftRAG/edgecraftrag/components/indexer.py index 842122964f..e2a517101d 100644 --- a/EdgeCraftRAG/edgecraftrag/components/indexer.py +++ b/EdgeCraftRAG/edgecraftrag/components/indexer.py @@ -10,11 +10,11 @@ from llama_index.vector_stores.faiss import FaissVectorStore from llama_index.vector_stores.milvus import MilvusVectorStore from pydantic import model_serializer +from pymilvus import Collection, connections class VectorIndexer(BaseComponent, VectorStoreIndex): - - def __init__(self, embed_model, vector_type, milvus_uri="http://localhost:19530", kb_name="default_kb"): + def __init__(self, embed_model, vector_type, vector_url="http://localhost:19530", kb_name="default_kb"): BaseComponent.__init__( self, comp_type=CompType.INDEXER, @@ -26,15 +26,19 @@ def __init__(self, embed_model, vector_type, milvus_uri="http://localhost:19530" from llama_index.core import Settings Settings.embed_model = None - self.milvus_uri = milvus_uri - self._initialize_indexer(embed_model, vector_type, milvus_uri, kb_name) + self.vector_url = vector_url + self._initialize_indexer(embed_model, vector_type, vector_url, kb_name) - def _initialize_indexer(self, embed_model, vector_type, milvus_uri, kb_name): + def _initialize_indexer(self, embed_model, vector_type, vector_url, kb_name): # get active name pl = ctx.get_pipeline_mgr().get_active_pipeline() - plname = pl.name if pl else "" + collection_name = kb_name + pl.name if pl else "default" if embed_model: - self.d = embed_model._model.request.outputs[0].get_partial_shape()[2].get_length() + try: + self.d = len(embed_model.get_text_embedding("test")) + except Exception: + # Fallback for OpenVINO models if the above fails + self.d = embed_model._model.request.outputs[0].get_partial_shape()[2].get_length() else: self.d = 128 match vector_type: @@ -46,24 +50,24 @@ def _initialize_indexer(self, embed_model, vector_type, milvus_uri, kb_name): VectorStoreIndex.__init__(self, embed_model=embed_model, nodes=[], storage_context=faiss_store) case IndexerType.MILVUS_VECTOR: milvus_vector_store = MilvusVectorStore( - uri=milvus_uri, + uri=vector_url, dim=self.d, - collection_name=kb_name + plname + str(self.d), + collection_name=collection_name, overwrite=False, ) milvus_store = StorageContext.from_defaults(vector_store=milvus_vector_store) VectorStoreIndex.__init__(self, embed_model=embed_model, nodes=[], storage_context=milvus_store) def reinitialize_indexer(self, kb_name="default_kb"): - self._initialize_indexer(self.model, self.comp_subtype, self.milvus_uri, kb_name) + self._initialize_indexer(self.model, self.comp_subtype, self.vector_url, kb_name) def clear_milvus_collection(self, kb_name="default_kb"): # get active name pl = ctx.get_pipeline_mgr().get_active_pipeline() plname = pl.name if pl else "" milvus_vector_store = MilvusVectorStore( - uri=self.milvus_uri, - collection_name=kb_name + plname + str(self.d), + uri=self.vector_url, + collection_name=kb_name + plname, overwrite=False, ) milvus_vector_store.clear() @@ -75,3 +79,42 @@ def run(self, **kwargs) -> Any: def ser_model(self): set = {"idx": self.idx, "indexer_type": self.comp_subtype, "model": self.model} return set + + +class KBADMINIndexer(BaseComponent): + # Handled in the kbadmin project + def __init__(self, embed_model, vector_type, kbadmin_embedding_url, vector_url="http://localhost:29530"): + BaseComponent.__init__( + self, + comp_type=CompType.INDEXER, + comp_subtype=IndexerType.KBADMIN_INDEXER, + ) + self.embed_model = embed_model + self.kbadmin_embedding_url = kbadmin_embedding_url + self.vector_url = vector_url + + def insert_nodes(self, nodes): + return None + + def _index_struct(self, nodes): + return None + + def run(self, **kwargs) -> Any: + return None + + def reinitialize_indexer(self, kb_name="default_kb"): + return None + + def clear_milvus_collection(self, **kwargs): + return None + + @model_serializer + def ser_model(self): + set = { + "idx": self.idx, + "indexer_type": self.comp_subtype, + "model": {"model_id": self.embed_model}, + "kbadmin_embedding_url": self.kbadmin_embedding_url, + "vector_url": self.vector_url, + } + return set diff --git a/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py b/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py index 259c4a463f..93f578080c 100644 --- a/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py +++ b/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py @@ -1,40 +1,363 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import json import os -from typing import Any, List, Optional +import uuid +from typing import Any, Dict, List, Optional, Union -from edgecraftrag.base import BaseComponent +from edgecraftrag.base import BaseComponent, CompType +from edgecraftrag.config_repository import ( + MilvusConfigRepository, + MilvusDocumentRecordRepository, +) +from edgecraftrag.env import DOCUMENT_DATA_FILE, EXPERIENCE_FILE +from llama_index.core.schema import Document from pydantic import model_serializer class Knowledge(BaseComponent): - file_paths: Optional[List[str]] = [] - file_map: Optional[List[str]] = {} - description: Optional[str] = "None" - comp_type: str = "knowledge" - active: bool + def __init__( + self, + name: str, + description: Optional[str] = None, + active: bool = True, + comp_type: Optional[str] = None, + comp_subtype: Optional[str] = None, + experience_active: bool = False, + idx: Optional[str] = None, + all_document_maps: Optional[Dict] = None, + file_paths: Optional[list] = None, + **kwargs, + ): + super().__init__(name=name, comp_type=CompType.KNOWLEDGE, **kwargs) + + self.description = description + self.experience_active = experience_active + self.active = active + self.comp_type = comp_type + self.comp_subtype = comp_subtype + if idx is not None: + self.idx = str(idx) + if all_document_maps is not None: + self.all_document_maps = all_document_maps + else: + self.all_document_maps: Dict[str, Dict[str, str]] = {} + + self.document_records: List[Dict[str, str]] = [] + + if file_paths is not None: + self.file_paths = file_paths + self._update_file_names() + else: + self.file_paths: List[str] = [] + self.file_map: Dict[str, str] = {} + + self.experience_repo = MilvusConfigRepository.create_connection("experience_data", 1) + self.document_record_repo = MilvusDocumentRecordRepository.create_connection("document_records", 1) def _update_file_names(self) -> None: self.file_map = {os.path.basename(path): path for path in self.file_paths if path is not None} - def add_file_path(self, file_path: str) -> bool: - if file_path not in self.file_paths: + def add_file_path( + self, + file_path: str, + documents: List[Document], + pl_name: str, + only_add_file: bool = True, + ) -> bool: + if pl_name not in self.all_document_maps: + self.all_document_maps[pl_name] = {} + if file_path not in self.all_document_maps[pl_name]: + file_id = str(uuid.uuid4()) + self.all_document_maps[pl_name][file_path] = file_id + else: + file_id = self.all_document_maps[pl_name][file_path] + + records = [ + { + "file_id": file_id, + "file_path": file_path, + "doc_id": doc.id_, + "metadata": doc.metadata, + } + for doc in documents + ] + self._add_document_records(records) + + if only_add_file and file_path not in self.file_paths: self.file_paths.append(file_path) self._update_file_names() - return True - return False - def remove_file_path(self, file_path: str) -> bool: - if file_path in self.file_paths: - self.file_paths.remove(file_path) - self._update_file_names() - return True - return False + def remove_file_path(self, file_path: str, pl_name: str) -> List[str]: + removed_doc_ids = [] + if pl_name in self.all_document_maps and file_path in self.all_document_maps[pl_name]: + file_id = self.all_document_maps[pl_name][file_path] + removed_doc_ids = self._remove_document_records_by_file_id(file_id) + + del self.all_document_maps[pl_name][file_path] + if file_path in self.file_paths: + self.file_paths.remove(file_path) + self._update_file_names() + + return removed_doc_ids def get_file_paths(self) -> List[str]: return self.file_paths + # Content related to experience + def _read_experience_file(self) -> List[Dict]: + if self.experience_repo: + return [item["config_json"] for item in self.experience_repo.get_configs()] + else: + if EXPERIENCE_FILE not in self.file_paths: + self.file_paths.append(EXPERIENCE_FILE) + if not os.path.isfile(self.file_paths[0]): + self.ensure_file_exists(self.file_paths[0]) + with open(self.file_paths[0], "r", encoding="utf-8") as f: + return json.load(f) + + def _write_experience_file(self, data: List[Dict]) -> None: + if self.experience_repo: + return True + else: + with open(self.file_paths[0], "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=4) + return True + + def get_all_experience(self) -> List[Dict]: + return self._read_experience_file() + + def get_experience_by_id_or_question(self, req: str) -> Optional[Dict]: + for item in self.get_all_experience(): + if item.get("idx") == req.idx or item.get("question") == req.question: + return item + return None + + def add_multiple_experiences( + self, experiences: List[Dict[str, Union[str, List[str]]]], flag: bool = True + ) -> List[Dict]: + result = [] + if self.experience_repo: + for exp in experiences: + question = exp.get("question") + if not question: + raise ValueError("Each experience must have a 'question'") + content = exp.get("content", []) + exp_idx = exp.get("idx") or str(uuid.uuid4()) + + existing = self.experience_repo.get_configs(idx=exp_idx) + if not existing: + all_exps = self.get_all_experience() + existing = [item for item in all_exps if item.get("question") == question] + else: + existing = [item["config_json"] for item in existing] + + if existing: + existing_item = existing[0] + exp_idx = existing_item.get("idx") + if flag: + existing_item["content"].extend([c for c in content if c not in existing_item["content"]]) + else: + existing_item["content"] = content + existing_item["question"] = question + success = self.experience_repo.update_config_by_idx(exp_idx, existing_item) + if success: + result.append(existing_item) + else: + new_item = { + "idx": exp_idx, + "question": question, + "content": content, + } + success = self.experience_repo.add_config_by_idx(exp_idx, new_item) + if success: + result.append(new_item) + return result + else: + all_exp = self._read_experience_file() + for exp in experiences: + question = exp.get("question") + if not question: + raise ValueError("Each experience must have a 'question'") + content = exp.get("content", []) + exp_idx = exp.get("idx") or str(uuid.uuid4()) + existing_idx = None + existing_item = None + for i, item in enumerate(all_exp): + if item.get("idx") == exp_idx: + existing_idx = i + existing_item = item + break + if existing_idx is None: + for i, item in enumerate(all_exp): + if item.get("question") == question: + existing_idx = i + existing_item = item + break + if existing_idx is not None: + if flag: + existing_item["content"].extend([c for c in content if c not in existing_item["content"]]) + existing_item["question"] = question + else: + existing_item["content"] = content + existing_item["question"] = question + all_exp[existing_idx] = existing_item + result.append(existing_item) + else: + new_item = { + "idx": exp_idx, + "question": question, + "content": content, + } + all_exp.append(new_item) + result.append(new_item) + self._write_experience_file(all_exp) + return result + + def delete_experience(self, exp_idx: str) -> bool: + if self.experience_repo: + return self.experience_repo.delete_config_by_idx(exp_idx) + else: + all_exp = self._read_experience_file() + remaining = [item for item in all_exp if item.get("idx") != exp_idx] + if len(remaining) != len(all_exp): + self._write_experience_file(remaining) + return True + return False + + def clear_experiences(self) -> bool: + if self.experience_repo: + try: + self.experience_repo.clear_all_config() + return True + except Exception as e: + print(f"Clear Milvus experiences failed: {e}") + return False + else: + self._write_experience_file([]) + return True + + def update_experience(self, exp_idx: str, new_question: str, new_content: List[str]) -> Optional[Dict]: + updated_item = { + "idx": exp_idx, + "question": new_question, + "content": new_content, + } + if self.experience_repo: + success = self.experience_repo.update_config_by_idx(exp_idx, updated_item) + return updated_item if success else None + else: + all_exp = self._read_experience_file() + for i, item in enumerate(all_exp): + if item.get("idx") == exp_idx: + all_exp[i] = updated_item + self._write_experience_file(all_exp) + return updated_item + return None + + def add_experiences_from_file(self, file_path: str, flag: bool = False) -> List[Dict]: + if not file_path.endswith(".json"): + raise ValueError("File must be a JSON file") + try: + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + if not isinstance(data, list): + raise ValueError("File content must be a list of experiences") + experiences = [] + for item in data: + exp = { + "idx": item.get("idx") or str(uuid.uuid4()), + "question": item.get("question", ""), + "content": item.get("content", []), + } + experiences.append(exp) + + return self.add_multiple_experiences(experiences=experiences, flag=flag) + except json.JSONDecodeError as e: + raise ValueError(f"Invalid JSON: {str(e)}") + except Exception as e: + raise ValueError(f"File error: {str(e)}") + + # Related content of document + def _add_document_records(self, records: List[Dict[str, str]]) -> None: + if records and self.document_record_repo: + self.document_record_repo.save_records(records) + elif records: + if not os.path.isfile(DOCUMENT_DATA_FILE): + self.ensure_file_exists(DOCUMENT_DATA_FILE) + if os.path.exists(DOCUMENT_DATA_FILE): + with open(DOCUMENT_DATA_FILE, "r", encoding="utf-8") as f: + existing_data = json.load(f) + else: + existing_data = [] + existing_data.extend(records) + with open(DOCUMENT_DATA_FILE, "w", encoding="utf-8") as f: + json.dump(existing_data, f, ensure_ascii=False, indent=4) + + def _remove_document_records_by_file_id(self, file_id: str) -> List[Dict[str, str]]: + deleted_records = [] + if self.document_record_repo: + deleted_records = self.document_record_repo.delete_records_by_file_id(file_id) + else: + if os.path.exists(DOCUMENT_DATA_FILE): + with open(DOCUMENT_DATA_FILE, "r", encoding="utf-8") as f: + all_document_data = json.load(f) + deleted_records = [item.get("doc_id") for item in all_document_data if item.get("file_id") == file_id] + result_documents = [item for item in all_document_data if item.get("file_id") != file_id] + if len(deleted_records) > 0: + with open(DOCUMENT_DATA_FILE, "w", encoding="utf-8") as f: + json.dump(result_documents, f, ensure_ascii=False, indent=4) + return deleted_records + + def get_all_document(self, file_path, pl_name) -> List[Dict[str, Any]]: + doc_info_list = [] + if pl_name not in self.all_document_maps: + return doc_info_list + file_id = self.all_document_maps[pl_name].get(file_path) + if not file_id: + return doc_info_list + + if self.document_record_repo: + records = self.document_record_repo.get_records_by_file_id(file_id) + doc_info_list = [{"doc_id": rec["doc_id"], "metadata": rec.get("metadata", {})} for rec in records] + else: + if os.path.exists(DOCUMENT_DATA_FILE): + with open(DOCUMENT_DATA_FILE, "r", encoding="utf-8") as f: + all_data = json.load(f) + doc_info_list = [ + {"doc_id": item["doc_id"], "metadata": item.get("metadata", {})} + for item in all_data + if item.get("file_id") == file_id + ] + return doc_info_list + + def clear_documents(self, pl_name): + if pl_name not in self.all_document_maps: + return + for file_id in self.all_document_maps[pl_name].values(): + self._remove_document_records_by_file_id(file_id) + self.all_document_maps[pl_name] = {} + return True + + # Make sure the folder and its files exist + def ensure_file_exists(self, file_paths): + dir_path = os.path.dirname(file_paths) + os.makedirs(dir_path, exist_ok=True) + if not os.path.exists(file_paths): + with open(file_paths, "w", encoding="utf-8") as f: + json.dump([], f, ensure_ascii=False, indent=4) + + # Calculate the number of files or experience + def calculate_totals(self): + if self.comp_type == "knowledge": + total = len(self.file_paths) + elif self.comp_type == "experience": + total = len(self.get_all_experience()) + else: + total = None + return total + def run(self, **kwargs) -> Any: pass @@ -44,8 +367,11 @@ def ser_model(self): "idx": self.idx, "name": self.name, "comp_type": self.comp_type, + "comp_subtype": self.comp_subtype, "file_map": self.file_map, "description": self.description, "active": self.active, + "experience_active": self.experience_active, + "total": self.calculate_totals(), } return set diff --git a/EdgeCraftRAG/edgecraftrag/components/model.py b/EdgeCraftRAG/edgecraftrag/components/model.py index a41b4a7f01..6cb27e623b 100644 --- a/EdgeCraftRAG/edgecraftrag/components/model.py +++ b/EdgeCraftRAG/edgecraftrag/components/model.py @@ -6,6 +6,7 @@ from edgecraftrag.base import BaseComponent, CompType, ModelType from llama_index.embeddings.huggingface_openvino import OpenVINOEmbedding +from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.llms.openvino import OpenVINOLLM from llama_index.postprocessor.openvino_rerank import OpenVINORerank from pydantic import Field, model_serializer @@ -26,6 +27,7 @@ class BaseModelComponent(BaseComponent): model_path: Optional[str] = Field(default="") weight: Optional[str] = Field(default="") device: Optional[str] = Field(default="cpu") + api_base: Optional[str] = Field(default=None) def run(self, **kwargs) -> Any: pass @@ -39,10 +41,24 @@ def ser_model(self): "model_path": self.model_path, "weight": self.weight, "device": self.device, + "api_base": self.api_base, } return set +class OpenAIEmbeddingModel(BaseModelComponent, OpenAIEmbedding): + def __init__(self, model_id, api_base, **kwargs): + super().__init__( + model_id=model_id, + api_base=api_base, + api_key="unused", + **kwargs, + ) + OpenAIEmbedding.__init__(self, model=model_id, api_base=api_base, api_key="unused") + self.comp_type = CompType.MODEL + self.comp_subtype = ModelType.VLLM_EMBEDDING + + class OpenVINOEmbeddingModel(BaseModelComponent, OpenVINOEmbedding): def __init__(self, model_id, model_path, device, weight): diff --git a/EdgeCraftRAG/edgecraftrag/components/node_parser.py b/EdgeCraftRAG/edgecraftrag/components/node_parser.py index 0f386bc61f..0bd49b91b4 100644 --- a/EdgeCraftRAG/edgecraftrag/components/node_parser.py +++ b/EdgeCraftRAG/edgecraftrag/components/node_parser.py @@ -168,3 +168,25 @@ def ser_model(self): "chunk_overlap": self.chunk_overlap, } return set + + +class KBADMINParser(BaseComponent): + # Handled in the kbadmin project + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.comp_type = CompType.NODEPARSER + self.comp_subtype = NodeParserType.KBADMINPARSER + + def run(self, **kwargs) -> Any: + return None + + def insert_nodes(self): + return None + + @model_serializer + def ser_model(self): + set = { + "idx": self.idx, + "parser_type": self.comp_subtype, + } + return set diff --git a/EdgeCraftRAG/edgecraftrag/components/pipeline.py b/EdgeCraftRAG/edgecraftrag/components/pipeline.py index 41780ef88f..8bc0a3c15f 100644 --- a/EdgeCraftRAG/edgecraftrag/components/pipeline.py +++ b/EdgeCraftRAG/edgecraftrag/components/pipeline.py @@ -9,10 +9,11 @@ from typing import Any, Callable, List, Optional from comps.cores.proto.api_protocol import ChatCompletionRequest -from edgecraftrag.base import BaseComponent, CallbackType, CompType, InferenceType, RetrieverType +from edgecraftrag.base import BaseComponent, CallbackType, CompType, InferenceType, NodeParserType, RetrieverType from edgecraftrag.components.postprocessor import RerankProcessor from edgecraftrag.components.query_preprocess import query_search from edgecraftrag.components.retriever import AutoMergeRetriever, SimpleBM25Retriever, VectorSimRetriever +from edgecraftrag.env import SEARCH_CONFIG_PATH, SEARCH_DIR from fastapi.responses import StreamingResponse from llama_index.core.schema import Document, QueryBundle from pydantic import BaseModel, Field, model_serializer @@ -32,22 +33,37 @@ class Pipeline(BaseComponent): benchmark: Optional[BaseComponent] = Field(default=None) status: PipelineStatus = Field(default=PipelineStatus()) run_pipeline_cb: Optional[Callable[..., Any]] = Field(default=None) + run_retriever_postprocessor_cb: Optional[Callable[..., Any]] = Field(default=None) run_retriever_cb: Optional[Callable[..., Any]] = Field(default=None) + run_postprocessor_cb: Optional[Callable[..., Any]] = Field(default=None) run_data_prepare_cb: Optional[Callable[..., Any]] = Field(default=None) + run_query_search_cb: Optional[Callable[..., Any]] = Field(default=None) def __init__( self, name, origin_json=None, + idx=None, + documents_cache=None, ): super().__init__(name=name, comp_type=CompType.PIPELINE) if self.name == "" or self.name is None: self.name = self.idx + if idx is not None: + self.idx = str(idx) + if documents_cache is not None: + self.documents_cache = documents_cache + else: + self.documents_cache = {} + self.enable_benchmark = os.getenv("ENABLE_BENCHMARK", "False").lower() == "true" - self.run_pipeline_cb = run_generator_ben if self.enable_benchmark else run_generator + self.run_pipeline_cb = run_pipeline + self.run_retriever_postprocessor_cb = run_retrieve_postprocess self.run_retriever_cb = run_retrieve + self.run_postprocessor_cb = run_postprocess + self.run_generator_cb = run_generator self.run_data_prepare_cb = run_simple_doc - + self.run_query_search_cb = run_query_search self._node_changed = False self._index_changed = False self._index_to_retriever_updated = True @@ -97,12 +113,10 @@ def check_active(self, nodelist, kb_name): # TODO: update doc changes # TODO: more operations needed, add, del, modify def update_nodes(self, nodes): - print(f"Updating {len(nodes)} nodes ...") if self.indexer is not None: self.indexer.insert_nodes(nodes) def update_indexer_to_retriever(self): - print("Updating indexer to retriever ...") if self.indexer is not None and self.retriever is not None: old_retriever = self.retriever retriever_type = old_retriever.comp_subtype @@ -121,18 +135,31 @@ def update_indexer_to_retriever(self): # Implement abstract run function # callback dispatcher - def run(self, **kwargs) -> Any: - print(kwargs) + async def run(self, **kwargs) -> Any: if "cbtype" in kwargs: if kwargs["cbtype"] == CallbackType.DATAPREP: if "docs" in kwargs: - return self.run_data_prepare_cb(self, docs=kwargs["docs"]) + return await self.run_data_prepare_cb(self, docs=kwargs["docs"]) + if kwargs["cbtype"] == CallbackType.RETRIEVE_POSTPROCESS: + if "chat_request" in kwargs: + return await self.run_retriever_postprocessor_cb(self, chat_request=kwargs["chat_request"]) if kwargs["cbtype"] == CallbackType.RETRIEVE: if "chat_request" in kwargs: - return self.run_retriever_cb(self, chat_request=kwargs["chat_request"]) + return await self.run_retriever_cb(self, chat_request=kwargs["chat_request"]) + if kwargs["cbtype"] == CallbackType.POSTPROCESS: + if "chat_request" in kwargs and "contexts" in kwargs: + return await self.run_postprocessor_cb( + self, chat_request=kwargs["chat_request"], contexts=kwargs["contexts"] + ) + if kwargs["cbtype"] == CallbackType.GENERATE: + if "chat_request" in kwargs: + return await self.run_generator_cb(self, chat_request=kwargs["chat_request"]) if kwargs["cbtype"] == CallbackType.PIPELINE: if "chat_request" in kwargs: - return self.run_pipeline_cb(self, chat_request=kwargs["chat_request"]) + return await self.run_pipeline_cb(self, chat_request=kwargs["chat_request"]) + if kwargs["cbtype"] == CallbackType.QUERYSEARCH: + if "chat_request" in kwargs: + return await self.run_query_search_cb(self, chat_request=kwargs["chat_request"]) def update(self, node_parser=None, indexer=None, retriever=None, postprocessor=None, generator=None): if node_parser is not None: @@ -146,6 +173,86 @@ def update(self, node_parser=None, indexer=None, retriever=None, postprocessor=N if generator is not None: self.generator = generator + def add_docs_to_list(self, kb_name, file_paths): + if self.indexer.comp_subtype != "milvus_vector": + return None + target_config = self.connect_target_config() + if kb_name not in self.documents_cache: + self.documents_cache[kb_name] = {"files": [], "config": target_config} + if isinstance(file_paths, str): + file_paths = [file_paths] + self.documents_cache[kb_name]["files"].extend(file_paths) + + def del_docs_to_list(self, kb_name, file_paths): + if kb_name not in self.documents_cache: + return None + if isinstance(file_paths, str): + file_paths = [file_paths] + for file_path in file_paths: + if file_path in self.documents_cache[kb_name]["files"]: + self.documents_cache[kb_name]["files"].remove(file_path) + + def clear_document_cache(self, kb_name): + if kb_name in self.documents_cache: + del self.documents_cache[kb_name] + + def compare_file_lists(self, kb_name, current_files): + self.add_docs_to_list(kb_name, []) + target_config = self.connect_target_config() + if self.documents_cache[kb_name]["config"] == target_config: + diff = self.compare_mappings(self.documents_cache[kb_name]["files"], current_files) + else: + self.documents_cache[kb_name] = {"files": [], "config": self.connect_target_config()} + diff = {"add_docs": current_files} + return diff + + def compare_mappings(self, stored_files, new_files): + stored = set(stored_files) + new = set(new_files) + return {"add_docs": list(new - stored), "del_docs": list(stored - new)} + + def connect_target_config(self): + target_config = "" + if self.node_parser.comp_subtype == NodeParserType.SIMPLE: + target_config = ( + "simple" + + str(self.node_parser.chunk_size) + + str(self.node_parser.chunk_overlap) + + self.indexer.model.model_id + ) + elif self.node_parser.comp_subtype == NodeParserType.SENTENCEWINDOW: + target_config = "sentencewindow" + str(self.node_parser.window_size) + self.indexer.model.model_id + elif self.node_parser.comp_subtype == NodeParserType.HIERARCHY: + target_config = "hierarchical" + self.indexer.model.model_id + elif self.node_parser.comp_subtype == NodeParserType.UNSTRUCTURED: + target_config = ( + "target_config" + + str(self.node_parser.chunk_size) + + str(self.node_parser.chunk_overlap) + + self.indexer.model.model_id + ) + return target_config + + def nodes_to_document(self, node_dict: dict): + nodes = [] + for node_info in node_dict.values(): + nodes.append({"start": int(node_info["start_char_idx"]), "text": node_info["text"]}) + nodes_sorted = sorted(nodes, key=lambda x: x["start"]) + if not nodes_sorted: + return "" + merged_text = nodes_sorted[0]["text"] + for i in range(1, len(nodes_sorted)): + prev_text = merged_text + curr_text = nodes_sorted[i]["text"] + max_possible_overlap = min(len(prev_text), len(curr_text)) + overlap_len = 0 + for j in range(max_possible_overlap, 0, -1): + if prev_text.endswith(curr_text[:j]): + overlap_len = j + break + merged_text += curr_text[overlap_len:] + return merged_text + @model_serializer def ser_model(self): set = { @@ -181,11 +288,55 @@ def model_existed(self, model_id: str) -> bool: return False -# Test callback to retrieve nodes from query -def run_retrieve(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: +async def run_retrieve(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: + benchmark_data = {} + query = chat_request.messages + top_k = None if chat_request.k == ChatCompletionRequest.model_fields["k"].default else chat_request.k + contexts = {} + start = 0 + if pl.enable_benchmark: + _, benchmark_data = pl.benchmark.init_benchmark_data() + start = time.perf_counter() + retri_res = pl.retriever.run(query=query, top_k=top_k) + if pl.enable_benchmark: + benchmark_data[CompType.RETRIEVER] = time.perf_counter() - start + pl.benchmark.insert_benchmark_data(benchmark_data) + contexts[CompType.RETRIEVER] = retri_res + return contexts + + +async def run_postprocess(pl: Pipeline, chat_request: ChatCompletionRequest, contexts) -> Any: + if CompType.RETRIEVER not in contexts: + raise ValueError("No retrieved contexts identified.") + query = chat_request.messages + query_bundle = QueryBundle(query) + if pl.postprocessor: + # TODO: Consider multiple postprocessors + for processor in pl.postprocessor: + if ( + isinstance(processor, RerankProcessor) + and chat_request.top_n != ChatCompletionRequest.model_fields["top_n"].default + ): + processor.top_n = chat_request.top_n + retri_res = processor.run(retri_res=contexts.get(CompType.RETRIEVER), query_bundle=query_bundle) + contexts[CompType.POSTPROCESSOR] = retri_res + return contexts + + +# Test callback to retrieve and rerank nodes from query +async def run_retrieve_postprocess(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: + benchmark_data = {} query = chat_request.messages + top_k = None if chat_request.k == ChatCompletionRequest.model_fields["k"].default else chat_request.k contexts = {} - retri_res = pl.retriever.run(query=query) + start = 0 + if pl.enable_benchmark: + _, benchmark_data = pl.benchmark.init_benchmark_data() + start = time.perf_counter() + retri_res = pl.retriever.run(query=query, top_k=top_k) + if pl.enable_benchmark: + benchmark_data[CompType.RETRIEVER] = time.perf_counter() - start + pl.benchmark.insert_benchmark_data(benchmark_data) contexts[CompType.RETRIEVER] = retri_res query_bundle = QueryBundle(query) if pl.postprocessor: @@ -200,14 +351,41 @@ def run_retrieve(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: return contexts -def run_simple_doc(pl: Pipeline, docs: List[Document]) -> Any: +async def run_simple_doc(pl: Pipeline, docs: List[Document]) -> Any: + start = 0 + benchmark_data = {} + if pl.enable_benchmark: + _, benchmark_data = pl.benchmark.init_benchmark_data() + start = time.perf_counter() n = pl.node_parser.run(docs=docs) if pl.indexer is not None: pl.indexer.insert_nodes(n) - print(pl.indexer._index_struct) + if pl.enable_benchmark: + benchmark_data[CompType.NODEPARSER] += time.perf_counter() - start + benchmark_data[CompType.CHUNK_NUM] += len(n) + pl.benchmark.insert_benchmark_data(benchmark_data) return n +async def run_query_search(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: + query = chat_request.messages + + def run_async_query_search(): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + return loop.run_until_complete(query_search(query, SEARCH_CONFIG_PATH, SEARCH_DIR, pl)) + finally: + loop.close() + + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(run_async_query_search) + top1_issue, sub_questionss_result = future.result() + if sub_questionss_result: + query = query + sub_questionss_result + return query, sub_questionss_result + + def benchmark_response(ret, benchmark, benchmark_index, benchmark_data, input_token_size, start): if isinstance(ret, StreamingResponse): original_body_iterator = ret.body_iterator @@ -225,114 +403,94 @@ async def timing_wrapper(): return ret -def run_generator_ben(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: - benchmark_index, benchmark_data = pl.benchmark.init_benchmark_data() +async def run_pipeline(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: + if pl.enable_benchmark: + benchmark_index, benchmark_data = pl.benchmark.init_benchmark_data() contexts = {} - start = time.perf_counter() + retri_res = [] + active_kb = chat_request.user if chat_request.user else None + enable_rag_retrieval = ( + chat_request.chat_template_kwargs.get("enable_rag_retrieval", True) + if chat_request.chat_template_kwargs + else True + ) + if not active_kb: + enable_rag_retrieval = False + elif pl.retriever.comp_subtype == "kbadmin_retriever" and active_kb.comp_subtype == "origin_kb": + enable_rag_retrieval = False + elif pl.retriever.comp_subtype != "kbadmin_retriever" and active_kb.comp_subtype == "kbadmin_kb": + enable_rag_retrieval = False query = chat_request.messages - if pl.generator.inference_type == InferenceType.VLLM: - UI_DIRECTORY = os.getenv("TMPFILE_PATH", "/home/user/ui_cache") - search_config_path = os.path.join(UI_DIRECTORY, "configs/search_config.yaml") - search_dir = os.path.join(UI_DIRECTORY, "configs/search_dir") - - def run_async_query_search(): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - return loop.run_until_complete(query_search(query, search_config_path, search_dir, pl)) - finally: - loop.close() - - with ThreadPoolExecutor(max_workers=1) as executor: - future = executor.submit(run_async_query_search) - top1_issue, sub_questionss_result = future.result() - if sub_questionss_result: - query = query + sub_questionss_result - - retri_res = pl.retriever.run(query=query) - query_bundle = QueryBundle(query) - benchmark_data[CompType.RETRIEVER] = time.perf_counter() - start - contexts[CompType.RETRIEVER] = retri_res - - start = time.perf_counter() - if pl.postprocessor: - for processor in pl.postprocessor: - if ( - isinstance(processor, RerankProcessor) - and chat_request.top_n != ChatCompletionRequest.model_fields["top_n"].default - ): - processor.top_n = chat_request.top_n - retri_res = processor.run(retri_res=retri_res, query_bundle=query_bundle) - contexts[CompType.POSTPROCESSOR] = retri_res - benchmark_data[CompType.POSTPROCESSOR] = time.perf_counter() - start + sub_questionss_result = None + experience_status = True if chat_request.tool_choice == "auto" else False + if enable_rag_retrieval: + start = 0 + if pl.enable_benchmark: + start = time.perf_counter() + if pl.generator.inference_type == InferenceType.VLLM and experience_status: + query, sub_questionss_result = await run_query_search(pl, chat_request) + if pl.enable_benchmark: + benchmark_data[CompType.QUERYSEARCH] = time.perf_counter() - start + start = time.perf_counter() + top_k = ( + None + if chat_request.k == pl.retriever.topk or chat_request.k != 0 or chat_request.k is None + else chat_request.k + ) + retri_res = pl.retriever.run(query=query, top_k=top_k) + if pl.enable_benchmark: + benchmark_data[CompType.RETRIEVER] = time.perf_counter() - start + contexts[CompType.RETRIEVER] = retri_res + query_bundle = QueryBundle(query) + if pl.enable_benchmark: + start = time.perf_counter() + if pl.postprocessor: + for processor in pl.postprocessor: + if ( + isinstance(processor, RerankProcessor) + and chat_request.top_n != processor.top_n + and chat_request.top_n != 0 + and chat_request.top_n is not None + ): + processor.top_n = chat_request.top_n + retri_res = processor.run(retri_res=retri_res, query_bundle=query_bundle) + contexts[CompType.POSTPROCESSOR] = retri_res + if pl.enable_benchmark: + benchmark_data[CompType.POSTPROCESSOR] = time.perf_counter() - start if pl.generator is None: raise ValueError("No Generator Specified") - text_gen_context, prompt_str = pl.generator.query_transform(chat_request, retri_res) - input_token_size = pl.benchmark.cal_input_token_size(prompt_str) + if pl.enable_benchmark: + _, prompt_str = pl.generator.query_transform(chat_request, retri_res) + input_token_size = pl.benchmark.cal_input_token_size(prompt_str) np_type = pl.node_parser.comp_subtype - start = time.perf_counter() + if pl.enable_benchmark: + start = time.perf_counter() if pl.generator.inference_type == InferenceType.LOCAL: - ret = pl.generator.run(chat_request, retri_res, np_type) + ret = await pl.generator.run(chat_request, retri_res, np_type) elif pl.generator.inference_type == InferenceType.VLLM: - ret = pl.generator.run_vllm(chat_request, retri_res, np_type, sub_questions=sub_questionss_result) + ret = await pl.generator.run_vllm(chat_request, retri_res, np_type, sub_questions=sub_questionss_result) else: raise ValueError("LLM inference_type not supported") - end = time.perf_counter() - - if isinstance(ret, StreamingResponse): - ret = benchmark_response(ret, pl.benchmark, benchmark_index, benchmark_data, input_token_size, start) - else: - benchmark_data[CompType.GENERATOR] = end - start - pl.benchmark.insert_llm_data(benchmark_index, input_token_size) - pl.benchmark.insert_benchmark_data(benchmark_data) + if pl.enable_benchmark: + end = time.perf_counter() + if isinstance(ret, StreamingResponse): + ret = benchmark_response(ret, pl.benchmark, benchmark_index, benchmark_data, input_token_size, start) + else: + benchmark_data[CompType.GENERATOR] = end - start + pl.benchmark.insert_llm_data(benchmark_index, input_token_size) + pl.benchmark.insert_benchmark_data(benchmark_data) return ret, contexts -def run_generator(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: - query = chat_request.messages - contexts = {} - if pl.generator.inference_type == InferenceType.VLLM: - UI_DIRECTORY = os.getenv("TMPFILE_PATH", "/home/user/ui_cache") - search_config_path = os.path.join(UI_DIRECTORY, "configs/search_config.yaml") - search_dir = os.path.join(UI_DIRECTORY, "configs/search_dir") - - def run_async_query_search(): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - return loop.run_until_complete(query_search(query, search_config_path, search_dir, pl)) - finally: - loop.close() - - with ThreadPoolExecutor(max_workers=1) as executor: - future = executor.submit(run_async_query_search) - top1_issue, sub_questionss_result = future.result() - if sub_questionss_result: - query = query + sub_questionss_result - retri_res = pl.retriever.run(query=query) - contexts[CompType.RETRIEVER] = retri_res - query_bundle = QueryBundle(query) - - if pl.postprocessor: - for processor in pl.postprocessor: - if ( - isinstance(processor, RerankProcessor) - and chat_request.top_n != ChatCompletionRequest.model_fields["top_n"].default - ): - processor.top_n = chat_request.top_n - retri_res = processor.run(retri_res=retri_res, query_bundle=query_bundle) - contexts[CompType.POSTPROCESSOR] = retri_res - - if pl.generator is None: - raise ValueError("No Generator Specified") +async def run_generator(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: np_type = pl.node_parser.comp_subtype if pl.generator.inference_type == InferenceType.LOCAL: - ret = pl.generator.run(chat_request, retri_res, np_type) + ret = await pl.generator.run(chat_request, [], np_type) elif pl.generator.inference_type == InferenceType.VLLM: - ret = pl.generator.run_vllm(chat_request, retri_res, np_type, sub_questions=sub_questionss_result) + ret = await pl.generator.run_vllm(chat_request, [], np_type) else: raise ValueError("LLM inference_type not supported") - return ret, contexts + return ret diff --git a/EdgeCraftRAG/edgecraftrag/components/postprocessor.py b/EdgeCraftRAG/edgecraftrag/components/postprocessor.py index bb59cc3d21..cbd387f59e 100644 --- a/EdgeCraftRAG/edgecraftrag/components/postprocessor.py +++ b/EdgeCraftRAG/edgecraftrag/components/postprocessor.py @@ -60,5 +60,5 @@ def run(self, **kwargs) -> Any: @model_serializer def ser_model(self): - set = {"idx": self.idx, "processor_type": self.comp_subtype, "model": None, "top_n": None} + set = {"idx": self.idx, "processor_type": self.comp_subtype, "top_n": None} return set diff --git a/EdgeCraftRAG/edgecraftrag/components/query_preprocess.py b/EdgeCraftRAG/edgecraftrag/components/query_preprocess.py index 124014a038..e17ab9d724 100644 --- a/EdgeCraftRAG/edgecraftrag/components/query_preprocess.py +++ b/EdgeCraftRAG/edgecraftrag/components/query_preprocess.py @@ -8,6 +8,7 @@ import aiohttp import numpy +from edgecraftrag.config_repository import MilvusConfigRepository from omegaconf import OmegaConf @@ -85,6 +86,7 @@ def __init__( output_template="", json_key="relevance", json_levels=["Low", "High"], + scores_weight=None, temperature=1.0, API_BASE=None, **kwargs, @@ -107,6 +109,19 @@ def __init__( self.json_levels = json_levels self.API_BASE = API_BASE + # dynamically set scores_weight, use default if not provided + if scores_weight is None: + # generate default weights based on json_levels count + if len(json_levels) == 2: + self.scores_weight = [0.0, 1.0] # Low, High + elif len(json_levels) == 3: + self.scores_weight = [0.0, 0.5, 1.0] # Low, Medium, High + else: + # for other counts, generate evenly spaced weights + self.scores_weight = [i / (len(json_levels) - 1) for i in range(len(json_levels))] + else: + self.scores_weight = scores_weight + async def invoke_vllm(self, input_texts): headers = {"Content-Type": "application/json"} payload = { @@ -152,18 +167,22 @@ async def _calculate_logits_score(self, user_input, issue): def _calculate_token_score_vllm(self, outputs, output_index=1, transform="exp"): generated_scores = outputs[output_index] - three_scores = [ - generated_scores.get("Low", -9999.0), - generated_scores.get("Medium", -9999.0), - generated_scores.get("High", -9999.0), - ] - level_scores = [score / self.temperature for score in three_scores] + + # dynamically get scores for all levels + level_scores = [] + for level in self.json_levels: + level_scores.append(generated_scores.get(level, -9999.0)) + + # apply temperature scaling + level_scores = [score / self.temperature for score in level_scores] level_scores_np = numpy.array(level_scores) level_scores_np = numpy.where(level_scores_np < -1000, -1000, level_scores_np) level_scores_np_exp = numpy.exp(level_scores_np - numpy.max(level_scores_np)) scores_probs = level_scores_np_exp / level_scores_np_exp.sum() - scores_weight = numpy.array([0.0, 0.5, 1.0]) # Low=0, Medium=0.5, High=1 + + # using dynamic scores_weight + scores_weight = numpy.array(self.scores_weight) final_score = numpy.dot(scores_probs, scores_weight) return final_score @@ -172,40 +191,51 @@ async def compute_score(self, input_pair): return await self._calculate_logits_score(*input_pair) -def read_json_files(directory: str) -> dict: - result = {} - for filename in os.listdir(directory): - if filename.endswith(".json"): - file_path = os.path.join(directory, filename) - if os.path.isfile(file_path): - try: - with open(file_path, "r", encoding="utf-8") as file: - data = json.load(file) - result.update(data) - except Exception: - continue - return result +experience_repo = MilvusConfigRepository.create_connection("experience_data", 1) + +def read_json_files(file_path: str) -> dict: + experience_lists = [] + if experience_repo: + experience_lists = [] + for experience in experience_repo.get_configs(): + experience_lists.append(experience["config_json"]) + else: + if os.path.isfile(file_path): + with open(file_path, "r", encoding="utf-8") as f: + experience_lists = json.load(f) + return experience_lists -async def query_search(user_input, search_config_path, search_dir, pl): +async def query_search(user_input, SEARCH_CONFIG_PATH, SEARCH_DIR, pl): top1_issue = None - sub_questionss_result = None - if not os.path.exists(search_dir): - return top1_issue, sub_questionss_result + sub_questions_result = None model_id = pl.generator.model_id vllm_endpoint = pl.generator.vllm_endpoint - cfg = OmegaConf.load(search_config_path) - cfg.query_matcher.model_id = model_id - cfg.query_matcher.API_BASE = os.path.join(vllm_endpoint, "v1/completions") - query_matcher = LogitsEstimatorJSON(**cfg.query_matcher) - maintenance_data = read_json_files(search_dir) - issues = list(maintenance_data.keys()) + maintenance_data = read_json_files(SEARCH_DIR) + issues = [] + for i in range(len(maintenance_data)): + issues.append(maintenance_data[i]["question"]) if not issues: - return top1_issue, sub_questionss_result - + return top1_issue, sub_questions_result + + cfg = {} + if not os.path.exists(SEARCH_CONFIG_PATH): + cfg["query_matcher"] = { + "instructions": "You're a knowledgeable assistant. Your task is to judge if two queries ask for the same information about the same primary subject. Output only 'Yes' or 'No'. Yes = same subject entity AND same information need, with only wording or stylistic differences. No = different subject entity, different spec or numeric constraint, different attribute/metric, or scope changed by adding/removing a restricting condition. Entity changes MUST lead to No.", + "input_template": "Query 1: {}\nQuery 2: {}\n", + "output_template": "\nAre these queries equivalent? Answer 'Yes' or 'No':", + "json_key": "similarity", + "json_levels": ["No", "Yes"], + "temperature": 0.1, + } + else: + cfg = OmegaConf.load(SEARCH_CONFIG_PATH) + cfg["query_matcher"]["model_id"] = model_id + cfg["query_matcher"]["API_BASE"] = os.path.join(vllm_endpoint, "v1/completions") + query_matcher = LogitsEstimatorJSON(**cfg["query_matcher"]) semaphore = asyncio.Semaphore(200) async def limited_compute_score(query_matcher, user_input, issue): @@ -219,9 +249,9 @@ async def limited_compute_score(query_matcher, user_input, issue): # Maximum less than 0.6, we don't use query search. if match_scores[0][1] < 0.6: - return top1_issue, sub_questionss_result + return top1_issue, sub_questions_result top1_issue = match_scores[0][0] - for key, value in maintenance_data.items(): - if key == top1_issue: - sub_questionss_result = value - return top1_issue, sub_questionss_result + for i in range(len(maintenance_data)): + if maintenance_data[i]["question"] == top1_issue: + sub_questions_result = "\n".join(maintenance_data[i]["content"]) + return top1_issue, sub_questions_result diff --git a/EdgeCraftRAG/edgecraftrag/components/retriever.py b/EdgeCraftRAG/edgecraftrag/components/retriever.py index fa8553346a..cdd3fe0bc2 100644 --- a/EdgeCraftRAG/edgecraftrag/components/retriever.py +++ b/EdgeCraftRAG/edgecraftrag/components/retriever.py @@ -1,14 +1,19 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from typing import Any, List, cast +import warnings +from typing import Any, List, Optional, cast +import requests from edgecraftrag.base import BaseComponent, CompType, RetrieverType +from langchain_milvus import Milvus +from langchain_openai import OpenAIEmbeddings from llama_index.core.indices.vector_store.retrievers import VectorIndexRetriever from llama_index.core.retrievers import AutoMergingRetriever -from llama_index.core.schema import BaseNode +from llama_index.core.schema import BaseNode, Document, NodeWithScore from llama_index.retrievers.bm25 import BM25Retriever from pydantic import model_serializer +from pymilvus import Collection, MilvusException, connections, utility class VectorSimRetriever(BaseComponent, VectorIndexRetriever): @@ -39,6 +44,8 @@ def __init__(self, indexer, **kwargs): def run(self, **kwargs) -> Any: for k, v in kwargs.items(): if k == "query": + top_k = kwargs["top_k"] if kwargs["top_k"] else self.topk + self.similarity_top_k = top_k return self.retrieve(v) return None @@ -75,8 +82,9 @@ def __init__(self, indexer, **kwargs): def run(self, **kwargs) -> Any: for k, v in kwargs.items(): if k == "query": + top_k = kwargs["top_k"] if kwargs["top_k"] else self.topk # vector_retriever needs to be updated - self._vector_retriever = self._index.as_retriever(similarity_top_k=self.topk) + self._vector_retriever = self._index.as_retriever(similarity_top_k=top_k) return self.retrieve(v) return None @@ -108,8 +116,9 @@ def __init__(self, indexer, **kwargs): def run(self, **kwargs) -> Any: for k, v in kwargs.items(): if k == "query": + top_k = kwargs["top_k"] if kwargs["top_k"] else self.topk nodes = cast(List[BaseNode], list(self._docstore.docs.values())) - similarity_top_k = min(len(nodes), self.topk) + similarity_top_k = min(len(nodes), top_k) bm25_retr = BM25Retriever.from_defaults(nodes=nodes, similarity_top_k=similarity_top_k) return bm25_retr.retrieve(v) @@ -123,3 +132,139 @@ def ser_model(self): "retrieve_topk": self.topk, } return set + + +class KBadminRetriever(BaseComponent): + def __init__(self, indexer, **kwargs): + BaseComponent.__init__( + self, + comp_type=CompType.RETRIEVER, + comp_subtype=RetrieverType.KBADMIN_RETRIEVER, + ) + self.vector_db = None + self.collection_name = None + self.topk = kwargs.get("similarity_top_k", 30) + self.KBADMIN_MILVUS_URL = indexer.vector_url + self.CONNECTION_ARGS = {"uri": indexer.vector_url} + self.vector_field = "q_1024_vec" + self.text_field = "content_with_weight" + self.embedding_model_name = indexer.embed_model + self.embedding_url = indexer.kbadmin_embedding_url + "/v3" + self.embedding = OpenAIEmbeddings( + model=self.embedding_model_name, + api_key="unused", + base_url=self.embedding_url, + tiktoken_enabled=False, + embedding_ctx_length=510, + ) + + def config_kbadmin_milvus(self, knowledge_name): + collection_name = knowledge_name + if not kbs_rev_maps: + get_kbs_info(self.CONNECTION_ARGS) + collection_name = kbs_rev_maps[collection_name] + self.vector_db = Milvus( + self.embedding, + connection_args=self.CONNECTION_ARGS, + collection_name=collection_name, + vector_field=self.vector_field, + text_field=self.text_field, + enable_dynamic_field=True, + index_params={"index_type": "FLAT", "metric_type": "IP", "params": {}}, + ) + + def similarity_search_with_embedding(self, query: str, k) -> list[tuple[Document, float]]: + url = self.embedding_url + "/embeddings" + embedding_info = {"model": self.embedding_model_name, "input": query} + # Get embedding result from embedding service + response = requests.post(url, headers={"Content-Type": "application/json"}, json=embedding_info) + embedding_json = response.json() + embedding = embedding_json["data"][0]["embedding"] + docs_and_scores = self.vector_db.similarity_search_with_score_by_vector(embedding=embedding, k=k) + relevance_score_fn = self.vector_db._select_relevance_score_fn() + return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores] + + def run(self, **kwargs) -> Any: + query = kwargs["query"] + top_k = kwargs["top_k"] if kwargs["top_k"] else self.topk + # langchain retrieval + docs_and_similarities = self.similarity_search_with_embedding(query=query, k=top_k) + node_with_scores: List[NodeWithScore] = [] + for doc, similarity in docs_and_similarities: + score: Optional[float] = None + if similarity is not None: + score = similarity + # convert langchain store format into llamaindex + node = Document.from_langchain_format(doc) + node_with_scores.append(NodeWithScore(node=node, score=score)) + return node_with_scores + + @model_serializer + def ser_model(self): + set = {"idx": self.idx, "retriever_type": self.comp_subtype, "CONNECTION_ARGS": self.CONNECTION_ARGS} + return set + + +# global kbs maps. +global kbs_rev_maps +kbs_rev_maps = {} + + +def get_kbs_info(CONNECTION_ARGS): + alias = "default" + try: + connections.connect("default", **CONNECTION_ARGS) + collections = utility.list_collections() + all_kb_infos = {} + new_infos = {} + for kb in collections: + collection = Collection(kb) + collection.load() + try: + if any(field.name == "kb_id" for field in collection.schema.fields): + docs = collection.query( + expr="pk != 0", + output_fields=["kb_name", "kb_id", "docnm_kwd"], + timeout=10, + ) + else: + docs = collection.query( + expr="pk != 0", + output_fields=["filename"], + timeout=10, + ) + collection.release() + except MilvusException as e: + continue + this_kbinfo = {} + for doc in docs: + try: + if "kb_name" in doc: + if not this_kbinfo: + this_kbinfo["name"] = doc["kb_name"] + this_kbinfo["uuid"] = doc["kb_id"] + this_kbinfo["files"] = set([doc["docnm_kwd"]]) + else: + this_kbinfo["files"].add(doc["docnm_kwd"]) + else: + if not this_kbinfo: + this_kbinfo["name"] = kb + this_kbinfo["uuid"] = "" + this_kbinfo["files"] = set([doc["filename"]]) + else: + this_kbinfo["files"].add(doc["filename"]) + except KeyError: + this_kbinfo = None + break + if this_kbinfo: + unique_files = list(this_kbinfo["files"]) + this_kbinfo["files"] = unique_files + new_infos[kb] = this_kbinfo + all_kb_infos.update(new_infos) + kbs_rev_maps.clear() + for kb_id in all_kb_infos: + kbs_rev_maps[all_kb_infos[kb_id]["name"]] = kb_id + return kbs_rev_maps + finally: + if connections.has_connection(alias): + connections.disconnect(alias) diff --git a/EdgeCraftRAG/edgecraftrag/components/session.py b/EdgeCraftRAG/edgecraftrag/components/session.py new file mode 100644 index 0000000000..65b1aeb4fd --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/components/session.py @@ -0,0 +1,75 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from datetime import datetime +from typing import Any, Dict, List, Optional + +from edgecraftrag.base import BaseComponent, CompType +from pydantic import model_serializer + + +class Session(BaseComponent): + def __init__(self, session_id: str): + super().__init__(comp_type=CompType.SESSION) + self.session_id = session_id + self.messages: List[Dict[str, str]] = [] + self.created_at: datetime = datetime.now() + self.current_messages: Optional[Dict[str, str]] = None + + def add_message(self, role: str, content: str) -> None: + if role not in ("user", "assistant"): + raise ValueError("Role should be 'user' or 'assistant'") + self.messages.append({"role": role, "content": content}) + self.current_messages = None + + def get_messages(self) -> List[Dict[str, str]]: + return self.messages.copy() + + def clear_messages(self) -> None: + self.messages = [] + + def get_user_message_titel(self) -> Optional[str]: + for msg in self.messages: + if msg["role"] == "user": + return msg["content"] + return None + + def to_dict(self) -> Dict[str, Any]: + concat_messages = self.messages.copy() + if self.current_messages: + concat_messages.append(self.current_messages) + return { + "session_id": self.session_id, + "created_at": self.created_at.isoformat(), + "messages": concat_messages, + "idx": self.idx, + } + + def update_current_message(self, role: str, content: str) -> None: + self.current_messages = {"role": role, "content": content} + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "Session": + session_id = data.get("session_id", f"session_{data.get('idx', 'unknown')}") + session = cls(session_id) + session.idx = data.get("idx", session.idx) + created_at_str = data.get("created_at") + session.created_at = datetime.fromisoformat(created_at_str) if created_at_str else datetime.now() + for item in data.get("messages", []): + if isinstance(item, dict) and "role" in item and "content" in item: + role = item["role"] + content = item["content"] + if role in ("user", "assistant") and isinstance(content, str): + session.add_message(role, content) + return session + + def run(self, **kwargs) -> Any: + pass + + @model_serializer + def ser_model(self): + return { + "session_id": self.session_id, + "created_at": self.created_at.isoformat(), + "messages": self.messages, + } diff --git a/EdgeCraftRAG/edgecraftrag/config_repository.py b/EdgeCraftRAG/edgecraftrag/config_repository.py new file mode 100644 index 0000000000..761297f4f7 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/config_repository.py @@ -0,0 +1,420 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +import time +from typing import Dict, List, Optional + +from edgecraftrag.env import AGENT_FILE, KNOWLEDGEBASE_FILE, PIPELINE_FILE +from pymilvus import ( + Collection, + CollectionSchema, + DataType, + FieldSchema, + connections, + utility, +) + + +class MilvusConfigRepository: + def __init__( + self, + Repo_config_name: Optional[str] = "pipeline_config", + vector_url: Optional[str] = None, + ): + self.vector_url = vector_url or os.getenv("METADATA_DATABASE_URL") + self.host, self.port = None, None + if self.vector_url: + host_port = self.vector_url.replace("http://", "").replace("https://", "") + if ":" in host_port: + self.host, self.port = host_port.split(":", 1) + self.collection_name = Repo_config_name + self.alias = Repo_config_name + self.collection = None + self.connected = False + + def _connect(self) -> None: + try: + connections.connect(host=self.host, port=self.port, alias=self.alias) + except Exception as e: + raise RuntimeError(f"Connect Milvus failed: {str(e)}") + + def _init_collection(self) -> Collection: + if not utility.has_collection(self.collection_name, using=self.alias): + fields = [ + FieldSchema( + name="idx", + dtype=DataType.VARCHAR, + max_length=100, + is_primary=True, + auto_id=False, + ), + FieldSchema(name="config_json", dtype=DataType.JSON), + FieldSchema(name="dummy_vector", dtype=DataType.FLOAT_VECTOR, dim=2), + ] + schema = CollectionSchema(fields, description="Config storage (idx as primary key)") + collection = Collection(self.collection_name, schema, using=self.alias) + collection.create_index("dummy_vector", {"index_type": "FLAT", "metric_type": "L2"}) + return collection + return Collection(self.collection_name, using=self.alias) + + @classmethod + def create_connection( + cls, + Repo_config_name: Optional[str] = "pipeline_config", + max_retries: Optional[int] = 10, + vector_url: Optional[str] = None, + ): + instance = cls(Repo_config_name, vector_url) + retry_interval = 6 + if instance.host: + for retry in range(max_retries): + try: + instance._connect() + instance.collection = instance._init_collection() + instance.collection.load() + instance.connected = True + return instance + except Exception as e: + print(f"Attempt {retry + 1} failed: {str(e)}") + if retry < max_retries - 1: + time.sleep(retry_interval) + raise ConnectionError(f"Max retries ({max_retries}) reached") + return None + + def save_configs(self, configs: List[Dict]) -> None: + self.collection.delete("idx != ''") + insert_data = [] + for config in configs: + insert_data.append( + { + "idx": config["idx"], + "config_json": config, + "dummy_vector": [0.0, 0.0], + } + ) + if insert_data: + idx_list = [i["idx"] for i in insert_data] + configs_list = [i["config_json"] for i in insert_data] + vectors = [i["dummy_vector"] for i in insert_data] + self.collection.insert([idx_list, configs_list, vectors]) + self.collection.flush() + else: + print("No data to insert") + + def get_configs(self, idx: Optional[str] = None, output_fields: Optional[list] = None) -> List[Dict]: + try: + self.collection.load() + output_fields = output_fields or ["idx", "config_json"] + if idx: + expr = f'idx == "{idx}"' + else: + expr = "idx != ''" + results = self.collection.query(expr=expr, output_fields=output_fields) + return results + except Exception as e: + print(f"Read error: {e}") + return [] + + def add_config_by_idx(self, idx: str, config_json: Dict) -> bool: + if not self.connected or not self.collection: + raise RuntimeError("Not connected to Milvus") + try: + self.collection.load() + self.collection.insert([[idx], [config_json], [[0.0, 0.0]]]) + return True + except Exception as e: + print(f"Add failed: {e}") + return False + + def delete_config_by_idx(self, idx: str) -> int: + if not self.connected or not self.collection: + raise RuntimeError("Not connected to Milvus") + try: + self.collection.load() + res = self.collection.delete(f'idx == "{idx}"') + self.collection.flush() + return True + except Exception as e: + print(f"Delete failed: {e}") + return 0 + + def update_config_by_idx(self, idx: str, new_config_json: Dict) -> bool: + if not self.connected or not self.collection: + raise RuntimeError("Not connected to Milvus") + try: + self.collection.load() + upsert_data = [[idx], [new_config_json], [[0.0, 0.0]]] + self.collection.upsert(upsert_data) + return True + except Exception as e: + print(f"Upsert failed: {str(e)}") + return False + + def clear_all_config(self): + try: + self.collection.load() + res = self.collection.delete("idx != ''") + self.collection.flush() + return True + except Exception as e: + print(f"Clear all configs failed: {e}") + return False + + +class MilvusDocumentRecordRepository: + def __init__( + self, + repo_name: Optional[str] = "document_records", + vector_url: Optional[str] = None, + ): + if vector_url: + self.vector_url = vector_url + else: + self.vector_url = os.getenv("METADATA_DATABASE_URL") + self.host, self.port = None, None + if self.vector_url: + if self.vector_url.startswith(("http://", "https://")): + host_port = self.vector_url.replace("http://", "").replace("https://", "") + else: + host_port = self.vector_url + if ":" in host_port: + self.host, self.port = host_port.split(":", 1) + + self.collection_name = repo_name + self.alias = repo_name + self.collection = None + self.connected = False + + def _connect(self) -> None: + try: + connections.connect(host=self.host, port=self.port, alias=self.alias) + except Exception as e: + raise RuntimeError(f"Unable to connect to Milvus server: {str(e)}") + + def _init_collection(self) -> Collection: + if not utility.has_collection(self.collection_name, using=self.alias): + fields = [ + FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True), + FieldSchema(name="file_id", dtype=DataType.VARCHAR, max_length=100), + FieldSchema(name="file_path", dtype=DataType.VARCHAR, max_length=512), + FieldSchema(name="doc_id", dtype=DataType.VARCHAR, max_length=100), + FieldSchema(name="metadata", dtype=DataType.JSON), + FieldSchema(name="dummy_vector", dtype=DataType.FLOAT_VECTOR, dim=2), + ] + schema = CollectionSchema(fields, description="File-Document association records (with metadata)") + collection = Collection(name=self.collection_name, schema=schema, using=self.alias) + index_params = {"index_type": "FLAT", "metric_type": "L2"} + collection.create_index(field_name="dummy_vector", index_params=index_params) + return collection + else: + return Collection(self.collection_name, using=self.alias) + + @classmethod + def create_connection( + cls, + repo_name: Optional[str] = "document_records", + max_retries: Optional[int] = 10, + vector_url: Optional[str] = None, + ): + instance = cls(repo_name, vector_url) + retry_interval = 6 + if instance.host: + for retry in range(max_retries): + try: + instance._connect() + instance.collection = instance._init_collection() + instance.collection.load() + instance.connected = True + return instance + except Exception as e: + print(f"Attempt {retry + 1} failed: {str(e)}") + if retry < max_retries - 1: + print(f"Retrying in {retry_interval}s...") + time.sleep(retry_interval) + raise ConnectionError(f"Max retries ({max_retries}) reached. Failed to connect to Milvus") + else: + return None + + def save_records(self, records: List[Dict[str, str]]) -> None: + if not records: + print("No records to save") + return + insert_data = [ + [rec["file_id"] for rec in records], + [rec["file_path"] for rec in records], + [rec["doc_id"] for rec in records], + [rec.get("metadata", {}) for rec in records], + [[0.0, 0.0] for _ in records], + ] + self.collection.insert(insert_data) + + def delete_records_by_file_id(self, file_id: str) -> List[str]: + expr = f'file_id == "{file_id}"' + results = self.collection.query(expr=expr, output_fields=["doc_id"]) + deleted_doc_ids = [res["doc_id"] for res in results] + + if deleted_doc_ids: + self.collection.delete(expr=expr) + return deleted_doc_ids + + def get_records_by_file_id(self, file_id: str) -> List[Dict]: + expr = f'file_id == "{file_id}"' + results = self.collection.query( + expr=expr, + output_fields=["id", "file_id", "file_path", "doc_id", "metadata"], + ) + return results + + +# Configuration of the persistence pipeline +pipeline_milvus_repo = MilvusConfigRepository.create_connection("pipeline_config", 20) + + +async def save_pipeline_configurations(operation: str = None, pipeline=None): + try: + json_str = pipeline.get_pipeline_json + target_data = json.loads(json_str) + target_data["idx"] = pipeline.idx + target_idx = target_data.get("idx") + if "generator" in target_data and operation != "delete": + target_data["generator"]["prompt_content"] = pipeline.generator.prompt_content + target_data["documents_cache"] = pipeline.documents_cache + target_data["active"] = pipeline.status.active + + if pipeline_milvus_repo: + if operation == "add": + success = pipeline_milvus_repo.add_config_by_idx(target_idx, target_data) + elif operation == "delete": + success = pipeline_milvus_repo.delete_config_by_idx(target_idx) + elif operation == "update": + success = pipeline_milvus_repo.update_config_by_idx(target_idx, target_data) + if not success: + return False + return True + else: + existing_pipelines = [] + if os.path.exists(PIPELINE_FILE): + with open(PIPELINE_FILE, "r", encoding="utf-8") as f: + existing_pipelines = json.load(f) + if not isinstance(existing_pipelines, list): + existing_pipelines = [] + + if operation == "add": + if any(p.get("idx") == target_idx for p in existing_pipelines): + return {"message": "Pipeline already exists"} + existing_pipelines.append(target_data) + elif operation == "delete": + existing_pipelines = [p for p in existing_pipelines if p.get("idx") != target_idx] + elif operation == "update": + for i in range(len(existing_pipelines)): + if existing_pipelines[i].get("idx") == target_idx: + existing_pipelines[i] = target_data + else: + return {"message": f"Invalid operation: {operation}"} + with open(PIPELINE_FILE, "w", encoding="utf-8") as f: + json.dump(existing_pipelines, f, indent=2, ensure_ascii=False) + return True + except Exception as e: + print(f"Error saving pipelines: {e}") + + +# Configuration of knowledge base for persistence +knowledgebase_config_repo = MilvusConfigRepository.create_connection("knowledgebase_config", 1) + + +async def save_knowledge_configurations(operation: str = None, kb=None): + try: + if not kb: + return {"message": "Missing knowledgebase data"} + target_kb = { + "idx": kb.idx, + "name": kb.name, + "description": kb.description, + "active": kb.active, + "file_paths": kb.file_paths, + "comp_type": kb.comp_type, + "comp_subtype": kb.comp_subtype, + "experience_active": kb.experience_active, + "all_document_maps": kb.all_document_maps, + } + target_idx = target_kb.get("idx") + if not target_idx: + return {"message": "Missing 'idx' in knowledgebase data"} + + if knowledgebase_config_repo: + if operation == "add": + success = knowledgebase_config_repo.add_config_by_idx(target_idx, target_kb) + elif operation == "delete": + success = knowledgebase_config_repo.delete_config_by_idx(target_idx) + elif operation == "update": + success = knowledgebase_config_repo.update_config_by_idx(target_idx, target_kb) + else: + return {"message": f"Invalid operation: {operation}"} + return success + else: + existing_kbs = [] + if os.path.exists(KNOWLEDGEBASE_FILE): + with open(KNOWLEDGEBASE_FILE, "r", encoding="utf-8") as f: + existing_kbs = json.load(f) + if not isinstance(existing_kbs, list): + existing_kbs = [] + if operation == "add": + existing_kbs.append(target_kb) + elif operation == "delete": + existing_kbs = [item for item in existing_kbs if item.get("idx") != target_idx] + elif operation == "update": + for i in range(len(existing_kbs)): + if existing_kbs[i].get("idx") == target_idx: + existing_kbs[i] = target_kb + else: + return {"message": f"Invalid operation: {operation}"} + with open(KNOWLEDGEBASE_FILE, "w", encoding="utf-8") as f: + json.dump(existing_kbs, f, indent=2, ensure_ascii=False) + return True + except Exception as e: + print(f"Error saving Knowledge base: {e}") + + +# Configuration of the persistence agent +agent_milvus_repo = MilvusConfigRepository.create_connection("agent_config", 1) + + +async def save_agent_configurations(operation: str = None, agents=None): + try: + if agent_milvus_repo: + if not agents: + return False + for agent in agents.values(): + target_data = agent.model_dump(mode="json") + if operation == "delete": + success = agent_milvus_repo.delete_config_by_idx(agent.idx) + if not success: + return False + continue + + target_idx = target_data.get("idx") + if not target_idx: + return {"message": "Missing 'idx' in data"} + + if operation == "add": + success = agent_milvus_repo.add_config_by_idx(target_idx, target_data) + + elif operation == "update": + success = agent_milvus_repo.update_config_by_idx(target_idx, target_data) + + if not success: + return False + return True + else: + if not agents: + return False + agent_list = [] + for agent in agents.values(): + agent_list.append(agent.model_dump(mode="json")) + json_str = json.dumps(agent_list, indent=2, ensure_ascii=False) + with open(AGENT_FILE, "w", encoding="utf-8") as f: + f.write(json_str) + except Exception as e: + print(f"Error saving agents: {e}") diff --git a/EdgeCraftRAG/edgecraftrag/context.py b/EdgeCraftRAG/edgecraftrag/context.py index 4d013b9bd2..94a5d91c40 100644 --- a/EdgeCraftRAG/edgecraftrag/context.py +++ b/EdgeCraftRAG/edgecraftrag/context.py @@ -1,12 +1,20 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from edgecraftrag.controllers.compmgr import GeneratorMgr, IndexerMgr, NodeParserMgr, PostProcessorMgr, RetrieverMgr +from edgecraftrag.controllers.agentmgr import AgentManager +from edgecraftrag.controllers.compmgr import ( + GeneratorMgr, + IndexerMgr, + NodeParserMgr, + PostProcessorMgr, + RetrieverMgr, +) from edgecraftrag.controllers.filemgr import FilelMgr from edgecraftrag.controllers.knowledge_basemgr import KnowledgeManager from edgecraftrag.controllers.modelmgr import ModelMgr from edgecraftrag.controllers.nodemgr import NodeMgr from edgecraftrag.controllers.pipelinemgr import PipelineMgr +from edgecraftrag.controllers.sessionmgr import SessionManager class Context: @@ -22,6 +30,8 @@ def __init__(self): self.genmgr = GeneratorMgr() self.filemgr = FilelMgr() self.knowledgemgr = KnowledgeManager() + self.agentmgr = AgentManager(self.plmgr) + self.sessionmgr = SessionManager() def get_pipeline_mgr(self): return self.plmgr @@ -53,5 +63,11 @@ def get_file_mgr(self): def get_knowledge_mgr(self): return self.knowledgemgr + def get_agent_mgr(self): + return self.agentmgr + + def get_session_mgr(self): + return self.sessionmgr + ctx = Context() diff --git a/EdgeCraftRAG/edgecraftrag/controllers/agentmgr.py b/EdgeCraftRAG/edgecraftrag/controllers/agentmgr.py new file mode 100644 index 0000000000..db03dab3fd --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/controllers/agentmgr.py @@ -0,0 +1,117 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any, Dict, Optional + +from comps.cores.proto.api_protocol import ChatCompletionRequest +from edgecraftrag.api_schema import AgentCreateIn +from edgecraftrag.base import AgentType, BaseMgr, CallbackType +from edgecraftrag.components.agent import Agent +from edgecraftrag.components.agents.deep_search.deep_search import DeepSearchAgent +from edgecraftrag.components.agents.simple import SimpleRAGAgent + + +class AgentManager(BaseMgr): + + active_agent_idx: Optional[str] = None + + def __init__(self, pipeline_mgr): + super().__init__() + self.active_agent_idx = None + self.agents = {} + self.pipeline_mgr = pipeline_mgr + + def set_manager(self, agent: Agent): + agent.manager = self + + def get_pipeline_by_name_or_id(self, name_or_id): + return self.pipeline_mgr.get_pipeline_by_name_or_id(name_or_id) + + def get_agents(self) -> Dict[str, Any]: + return self.agents + + def get_agent_by_id(self, idx): + return self.agents.get(idx, None) + + def get_agent_by_name(self, name): + for k, a in self.agents.items(): + if a.name == name: + return a + return None + + def get_agent_id_by_name(self, name): + for k, a in self.agents.items(): + if a.name == name: + return k + return None + + def create_agent(self, cfgs: AgentCreateIn): + new_agent = None + if not self.get_pipeline_by_name_or_id(cfgs.pipeline_idx): + return "Create Agent failed. Pipeline id not found." + if cfgs.type == AgentType.SIMPLE: + new_agent = SimpleRAGAgent(cfgs.idx, cfgs.name, cfgs.pipeline_idx, cfgs.configs) + elif cfgs.type == AgentType.DEEPSEARCH: + new_agent = DeepSearchAgent(cfgs.idx, cfgs.name, cfgs.pipeline_idx, cfgs.configs) + if new_agent is not None: + self.set_manager(new_agent) + self.agents[new_agent.idx] = new_agent + if cfgs.active: + self.active_agent_idx = new_agent.idx + return new_agent + else: + return "Create Agent failed." + + def update_agent(self, name, cfgs: AgentCreateIn): + idx = self.get_agent_id_by_name(name) + if idx: + agent = self.get_agent_by_id(idx) + if cfgs.configs: + agent.update(cfgs.configs) + if cfgs.active: + return self.activate_agent(idx) + else: + return self.deactivate_agent(idx) + return True + else: + return False + + def remove_agent(self, name): + idx = self.get_agent_id_by_name(name) + if self.agents.pop(idx, None): + return True + return False + + def activate_agent(self, idx): + if idx in self.agents: + self.active_agent_idx = idx + return True + else: + return False + + def deactivate_agent(self, idx): + if idx in self.agents: + self.active_agent_idx = None + return True + else: + return False + + def get_active_agent_id(self): + return self.active_agent_idx + + def get_active_agent(self): + if self.active_agent_idx: + return self.agents[self.active_agent_idx] + else: + return None + + def get_agent_default_configs(self, agent_type): + if agent_type == AgentType.SIMPLE: + return SimpleRAGAgent.get_default_configs() + if agent_type == AgentType.DEEPSEARCH: + return DeepSearchAgent.get_default_configs() + + async def run_agent(self, chat_request: ChatCompletionRequest) -> Any: + active_agent = self.get_active_agent() + if active_agent is not None: + return await active_agent.run(cbtype=CallbackType.RUNAGENT, chat_request=chat_request) diff --git a/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py b/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py index b8dd82ab7b..4e5d93896a 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/compmgr.py @@ -43,6 +43,7 @@ def search_indexer(self, indin: IndexerIn) -> BaseComponent: (v.model.model_id_or_path == indin.embedding_model.model_id) or (v.model.model_id_or_path == indin.embedding_model.model_path) ) + and v.model.device == indin.embedding_model.device ): return v return None diff --git a/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py index dc69943eb2..233a2bd5bd 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py @@ -13,6 +13,7 @@ class KnowledgeManager(BaseMgr): def __init__(self): super().__init__() self.active_knowledge_idx: Optional[str] = None + self.active_experience_idx: Optional[str] = None def get_knowledge_base_by_name_or_id(self, name: str): for _, kb in self.components.items(): @@ -26,8 +27,17 @@ def get_active_knowledge_base(self) -> Optional[Knowledge]: else: return None + def get_active_experience(self): + if self.active_experience_idx: + return self.get_knowledge_base_by_name_or_id(self.active_experience_idx) + else: + return None + def active_knowledge(self, knowledge: KnowledgeBaseCreateIn): kb = self.get_knowledge_base_by_name_or_id(knowledge.name) + if kb.comp_type != "knowledge": + raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="Experience type cannot be active") + kb = self.get_knowledge_base_by_name_or_id(knowledge.name) self.active_knowledge_idx = kb.idx if knowledge.active else None for idx, comp in self.components.items(): @@ -35,16 +45,47 @@ def active_knowledge(self, knowledge: KnowledgeBaseCreateIn): comp.active = idx == self.active_knowledge_idx return kb + def active_experience(self, knowledge: KnowledgeBaseCreateIn): + kb = self.get_knowledge_base_by_name_or_id(knowledge.name) + if kb.comp_type != "experience": + raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="Knowledge type cannot be active") + self.active_experience_idx = kb.idx if knowledge.experience_active else None + if kb.experience_active != knowledge.experience_active: + for idx, comp in self.components.items(): + if isinstance(comp, Knowledge): + comp.experience_active = idx == self.active_experience_idx + return kb + def create_knowledge_base(self, knowledge: KnowledgeBaseCreateIn) -> Knowledge: for _, kb in self.components.items(): if kb.name == knowledge.name: raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="The knowledge base already exists.") + if knowledge.comp_type == "experience": + for idx, kb in self.components.items(): + if kb.comp_type == "experience": + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, detail="Only one experience class can be created." + ) + if knowledge.comp_type == "experience": + knowledge.active = False if knowledge.active is None: knowledge.active = False - kb = Knowledge(name=knowledge.name, description=knowledge.description, active=knowledge.active) + kb = Knowledge( + idx=knowledge.idx, + name=knowledge.name, + description=knowledge.description, + active=knowledge.active, + comp_type=knowledge.comp_type, + comp_subtype=knowledge.comp_subtype, + experience_active=knowledge.experience_active, + all_document_maps=knowledge.all_document_maps, + file_paths=knowledge.file_paths, + ) self.add(kb) if knowledge.active: self.active_knowledge(knowledge) + if knowledge.experience_active: + self.active_experience(knowledge) return kb def delete_knowledge_base(self, name: str): @@ -54,12 +95,16 @@ def delete_knowledge_base(self, name: str): def update_knowledge_base(self, knowledge) -> Knowledge: kb = self.get_knowledge_base_by_name_or_id(knowledge.name) - - if knowledge.description is not None: - kb.description = knowledge.description - - if knowledge.active is not None and kb.active != knowledge.active: - kb = self.active_knowledge(knowledge) + if kb.comp_type == "knowledge": + if knowledge.description is not None: + kb.description = knowledge.description + if knowledge.active is not None and kb.active != knowledge.active: + kb = self.active_knowledge(knowledge) + if kb.comp_type == "experience": + if knowledge.description is not None: + kb.description = knowledge.description + if knowledge.experience_active is not None and kb.experience_active != knowledge.experience_active: + kb = self.active_experience(knowledge) return "Knowledge base update successfully" def get_all_knowledge_bases(self) -> List[Dict[str, Any]]: @@ -67,3 +112,8 @@ def get_all_knowledge_bases(self) -> List[Dict[str, Any]]: for idx, kb in self.components.items(): kb_list.append(kb) return kb_list + + def get_experience_kb(self): + for idx, kb in self.components.items(): + if kb.comp_type == "experience": + return kb diff --git a/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py b/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py index b2715c27fa..966632705e 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py @@ -7,6 +7,7 @@ from edgecraftrag.base import BaseComponent, BaseMgr, CompType, ModelType from edgecraftrag.components.model import ( BaseModelComponent, + OpenAIEmbeddingModel, OpenVINOEmbeddingModel, OpenVINOLLMModel, OpenVINORerankModel, @@ -92,6 +93,8 @@ def load_model(model_para: ModelIn): device=model_para.device, weight=model_para.weight, ) + case ModelType.VLLM_EMBEDDING: + model = OpenAIEmbeddingModel(model_id=model_para.model_id, api_base=model_para.api_base) case ModelType.LLM: model = OpenVINOLLMModel( model_id=model_para.model_id, diff --git a/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py index 81524a3754..d44a227325 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py @@ -19,8 +19,15 @@ def __init__(self): self._lock = asyncio.Lock() super().__init__() - def create_pipeline(self, name: str, origin_json: str): - pl = Pipeline(name, origin_json) + def create_pipeline(self, request, origin_json: str): + if isinstance(request, str): + name = request + idx, documents_cache = None, None + else: + name = request.name + idx = request.idx + documents_cache = request.documents_cache + pl = Pipeline(name, origin_json, idx, documents_cache) self.add(pl) return pl @@ -66,8 +73,8 @@ def activate_pipeline(self, name: str, active: bool, nm: NodeMgr, kb_name: None) return nodelist = None - if pl.node_changed: - nodelist = nm.get_nodes(pl.node_parser.idx) + # if pl.node_changed: + # nodelist = nm.get_nodes(pl.node_parser.idx) pl.check_active(nodelist, kb_name) prevactive = self._active_pipeline if prevactive: @@ -83,24 +90,38 @@ def notify_node_change(self): for _, pl in self.components.items(): pl.set_node_change() - def run_pipeline(self, chat_request: ChatCompletionRequest) -> Any: + async def run_pipeline(self, chat_request: ChatCompletionRequest) -> Any: + ap = self.get_active_pipeline() + if ap is not None: + return await ap.run(cbtype=CallbackType.PIPELINE, chat_request=chat_request) + return -1 + + async def run_retrieve_postprocess(self, chat_request: ChatCompletionRequest) -> Any: + ap = self.get_active_pipeline() + out = None + if ap is not None: + out = await ap.run(cbtype=CallbackType.RETRIEVE_POSTPROCESS, chat_request=chat_request) + return out + return -1 + + async def run_retrieve(self, chat_request: ChatCompletionRequest) -> Any: ap = self.get_active_pipeline() out = None if ap is not None: - out = ap.run(cbtype=CallbackType.PIPELINE, chat_request=chat_request) + out = await ap.run(cbtype=CallbackType.RETRIEVE, chat_request=chat_request) return out return -1 - def run_retrieve(self, chat_request: ChatCompletionRequest) -> Any: + async def run_postprocess(self, chat_request: ChatCompletionRequest, contexts) -> Any: ap = self.get_active_pipeline() out = None if ap is not None: - out = ap.run(cbtype=CallbackType.RETRIEVE, chat_request=chat_request) + out = await ap.run(cbtype=CallbackType.POSTPROCESS, chat_request=chat_request, contexts=contexts) return out return -1 - def run_data_prepare(self, docs: List[Document]) -> Any: + async def run_data_prepare(self, docs: List[Document]) -> Any: ap = self.get_active_pipeline() if ap is not None: - return ap.run(cbtype=CallbackType.DATAPREP, docs=docs) + return await ap.run(cbtype=CallbackType.DATAPREP, docs=docs) return -1 diff --git a/EdgeCraftRAG/edgecraftrag/controllers/sessionmgr.py b/EdgeCraftRAG/edgecraftrag/controllers/sessionmgr.py new file mode 100644 index 0000000000..53dac2bc04 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/controllers/sessionmgr.py @@ -0,0 +1,178 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from typing import Any, Dict, List, Optional + +from edgecraftrag.api_schema import SessionIn +from edgecraftrag.base import BaseMgr, InferenceType +from edgecraftrag.components.session import Session +from edgecraftrag.config_repository import MilvusConfigRepository +from edgecraftrag.env import SESSION_FILE + + +class SessionManager(BaseMgr): + def __init__(self): + super().__init__() + self._current_session_id: Optional[str] = None + self.session_file = SESSION_FILE + + self.milvus_repo = MilvusConfigRepository.create_connection(Repo_config_name="session_storage", max_retries=1) + self.components: Dict[str, Session] = {} + + if self.milvus_repo and self.milvus_repo.connected: + self._load_from_milvus() + else: + self.load_from_file() + + def set_current_session(self, session_id: str) -> None: + self._current_session_id = session_id if session_id and session_id not in ("None", "") else "default_session" + if self._current_session_id not in self.components: + new_session = Session(self._current_session_id) + self.add(new_session, name=self._current_session_id) + + def get_current_session(self) -> Optional[Session]: + if not self._current_session_id: + return None + return self.components.get(self._current_session_id) + + def create_session(self, session: SessionIn) -> str: + session_id = session.idx if session and session.idx else None + + if not session_id or session_id in ("", "None"): + session_id = f"session_{len(self.components) + 1}" + while session_id in self.components: + session_id = f"session_{len(self.components) + 1}" + + if session_id in self.components: + raise ValueError(f"Session ID {session_id} already exists") + + new_session = Session(session_id) + self.add(new_session, name=session_id) + return session_id + + def add(self, session: Session, name: str) -> None: + self.components[name] = session + if self.milvus_repo and self.milvus_repo.connected: + self.milvus_repo.add_config_by_idx(name, session.to_dict()) + else: + self.save_to_file() + + def clear_current_history(self) -> None: + current_session = self.get_current_session() + if current_session: + current_session.clear_messages() + self._persist_session(current_session.idx) + + def save_current_message(self, sessionid: str, role: str, content: str) -> str: + current_session = self.get(sessionid) + if not current_session: + return "No current session set" + + try: + current_session.add_message(role, content) + self._persist_session(sessionid) + return "Message added successfully" + except ValueError as e: + return f"Failed to add message: {str(e)}" + + def update_current_message(self, sessionid: str, role: str, content: str) -> str: + current_session = self.get(sessionid) + if not current_session: + return "No current session set" + try: + current_session.update_current_message(role, content) + return "Message updated successfully" + except ValueError as e: + return f"Failed to update message: {str(e)}" + + def concat_history(self, sessionid: str, inference_type: str, user_message: str) -> str: + max_token = 6000 + if inference_type == InferenceType.VLLM: + vllm_max_len = int(os.getenv("MAX_MODEL_LEN", "10240")) + if vllm_max_len > 5000: + max_token = vllm_max_len - 1024 + + current_session = self.get(sessionid) + if not current_session: + return "" + history_messages = current_session.get_messages() + recent_str = self.get_recent_chat_rounds(history_messages) + + self.save_current_message(sessionid, "user", user_message) + return recent_str[-max_token:] if len(recent_str) > max_token else recent_str + + def get_recent_chat_rounds(self, messages: List[Dict[str, str]]) -> str: + history_num = int(os.getenv("CHAT_HISTORY_ROUND", "0")) + if history_num <= 0: + return "" + total = len(messages) + start_idx = max(0, total - (history_num * 2)) + return str(messages[start_idx:]) + + def get_all_sessions(self): + return { + sid: session.get_user_message_titel() + for sid, session in reversed(self.components.items()) + if isinstance(session, Session) + } + + def get_session_by_id(self, session_id: str) -> Dict[str, Any]: + session = self.get(session_id) + if not session or not isinstance(session, Session): + return {"session_id": session_id, "exists": False} + return session.to_dict() + + def _persist_session(self, session_id: str): + session = self.components.get(session_id) + if not session: + return + + if self.milvus_repo and self.milvus_repo.connected: + self.milvus_repo.update_config_by_idx(session_id, session.to_dict()) + else: + self.save_to_file() + + def save_to_file(self) -> Dict[str, str]: + try: + os.makedirs(os.path.dirname(self.session_file), exist_ok=True) + data = {sid: session.to_dict() for sid, session in self.components.items() if isinstance(session, Session)} + with open(self.session_file, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + return {"status": "success", "message": f"Saved to {self.session_file}"} + except Exception as e: + return {"status": "error", "message": f"Save failed: {str(e)}"} + + def load_from_file(self) -> Dict[str, str]: + try: + if not os.path.exists(self.session_file): + return {"status": "warning", "message": "Session file does not exist"} + with open(self.session_file, "r", encoding="utf-8") as f: + loaded_data = json.load(f) + if not isinstance(loaded_data, dict): + raise ValueError("Invalid session file format: expected dict") + + self.components.clear() + for session_id, session_data in loaded_data.items(): + session = Session.from_dict(session_data) + self.components[session_id] = session + return { + "status": "success", + "message": f"Loaded {len(self.components)} sessions from {self.session_file}", + } + except Exception as e: + return {"status": "error", "message": f"Load failed: {str(e)}"} + + def _load_from_milvus(self): + try: + milvus_sessions = self.milvus_repo.get_configs() + for item in milvus_sessions: + session_id = item.get("idx") + config_json = item.get("config_json", {}) + if session_id and isinstance(config_json, dict): + session = Session.from_dict(config_json) + self.components[session_id] = session + print(f"Loaded {len(self.components)} sessions from Milvus.") + except Exception as e: + print(f"Error loading sessions from Milvus: {str(e)}") diff --git a/EdgeCraftRAG/edgecraftrag/env.py b/EdgeCraftRAG/edgecraftrag/env.py new file mode 100644 index 0000000000..b5371278b1 --- /dev/null +++ b/EdgeCraftRAG/edgecraftrag/env.py @@ -0,0 +1,24 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +UI_DIRECTORY = os.getenv("TMPFILE_PATH", "/home/user/ui_cache") +# Define the root directory for knowledge base files +CONFIG_DIRECTORY = os.path.join(UI_DIRECTORY, "configs") +if not os.path.exists(CONFIG_DIRECTORY): + os.makedirs(CONFIG_DIRECTORY, exist_ok=True) + +IMG_OUTPUT_DIR = os.path.join(UI_DIRECTORY, "pic") +os.makedirs(IMG_OUTPUT_DIR, exist_ok=True) + +KNOWLEDGEBASE_FILE = os.path.join(CONFIG_DIRECTORY, "knowledgebase.json") +PIPELINE_FILE = os.path.join(CONFIG_DIRECTORY, "pipeline.json") +AGENT_FILE = os.path.join(CONFIG_DIRECTORY, "agent.json") + +EXPERIENCE_FILE = os.path.join(UI_DIRECTORY, "experience_dir/experience.json") +DOCUMENT_DATA_FILE = os.path.join(UI_DIRECTORY, "document_data.json") +SESSION_FILE = os.path.join(UI_DIRECTORY, "session.json") + +SEARCH_CONFIG_PATH = os.path.join(UI_DIRECTORY, "configs/search_config.yaml") +SEARCH_DIR = os.path.join(UI_DIRECTORY, "configs/experience_dir/experience.json") diff --git a/EdgeCraftRAG/edgecraftrag/requirements.txt b/EdgeCraftRAG/edgecraftrag/requirements.txt index 94cbef7b6c..6c4b7e4451 100644 --- a/EdgeCraftRAG/edgecraftrag/requirements.txt +++ b/EdgeCraftRAG/edgecraftrag/requirements.txt @@ -2,8 +2,12 @@ docx2txt EbookLib>=0.18 faiss-cpu>=1.8.0.post1 html2text>=2025.4.15 -langchain-core==0.3.60 -llama-index==0.12.41 +json-repair==0.52.0 +langchain-core==0.3.80 +langchain-milvus +langchain-openai +langgraph==0.6.10 +llama-index==0.12.36 llama-index-core==0.12.37 llama-index-embeddings-openvino==0.5.2 llama-index-llms-openai==0.3.44 @@ -20,6 +24,10 @@ pillow>=10.4.0 py-cpuinfo>=9.0.0 pymilvus==2.5.10 python-docx==1.1.2 -unstructured==0.16.11 +torch==2.8.0+cpu +torchvision==0.23.0+cpu +transformers==4.53.3 +unstructured +unstructured[all-docs] unstructured[pdf] werkzeug==3.1.3 diff --git a/EdgeCraftRAG/edgecraftrag/server.py b/EdgeCraftRAG/edgecraftrag/server.py index a8117cf014..44b63c4d32 100644 --- a/EdgeCraftRAG/edgecraftrag/server.py +++ b/EdgeCraftRAG/edgecraftrag/server.py @@ -4,14 +4,16 @@ import os import uvicorn +from edgecraftrag.api.v1.agent import agent_app, restore_agent_configurations from edgecraftrag.api.v1.chatqna import chatqna_app from edgecraftrag.api.v1.data import data_app -from edgecraftrag.api.v1.knowledge_base import kb_app, load_knowledge_from_file +from edgecraftrag.api.v1.knowledge_base import kb_app, restore_knowledge_configurations from edgecraftrag.api.v1.model import model_app -from edgecraftrag.api.v1.pipeline import load_pipeline_from_file, pipeline_app +from edgecraftrag.api.v1.pipeline import pipeline_app, restore_pipeline_configurations from edgecraftrag.api.v1.prompt import prompt_app +from edgecraftrag.api.v1.session import session_app from edgecraftrag.api.v1.system import system_app -from edgecraftrag.utils import UI_DIRECTORY +from edgecraftrag.env import UI_DIRECTORY from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles @@ -19,10 +21,14 @@ async def lifespan(app: FastAPI): - print("Restore pipeline configuration and knowledge base configuration...") - load_pipeline_from_file() - await load_knowledge_from_file() - yield + try: + print("Restore pipeline configuration and knowledge base configuration...") + await restore_pipeline_configurations() + await restore_knowledge_configurations() + await restore_agent_configurations() + yield + except Exception as e: + raise app = FastAPI(lifespan=lifespan) @@ -36,7 +42,17 @@ async def lifespan(app: FastAPI): ) -sub_apps = [data_app, model_app, pipeline_app, chatqna_app, system_app, prompt_app, kb_app] +sub_apps = [ + data_app, + model_app, + pipeline_app, + chatqna_app, + system_app, + prompt_app, + kb_app, + agent_app, + session_app, +] for sub_app in sub_apps: for route in sub_app.routes: app.router.routes.append(route) diff --git a/EdgeCraftRAG/edgecraftrag/utils.py b/EdgeCraftRAG/edgecraftrag/utils.py old mode 100755 new mode 100644 index 18a43e5879..a4b06ebf97 --- a/EdgeCraftRAG/edgecraftrag/utils.py +++ b/EdgeCraftRAG/edgecraftrag/utils.py @@ -1,6 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import asyncio import io import os from pathlib import Path @@ -8,16 +9,12 @@ from docx.text.paragraph import Paragraph from edgecraftrag.base import InferenceType -from edgecraftrag.context import ctx +from edgecraftrag.env import IMG_OUTPUT_DIR from PIL import Image as Img from transformers import AutoTokenizer from unstructured.documents.elements import ElementMetadata, Image from unstructured.partition.docx import DocxPartitionerOptions -UI_DIRECTORY = os.getenv("TMPFILE_PATH", "/home/user/ui_cache") -IMG_OUTPUT_DIR = os.path.join(UI_DIRECTORY, "pic") -os.makedirs(IMG_OUTPUT_DIR, exist_ok=True) - DEFAULT_TEMPLATE = """You are an AI assistant. Your task is to learn from the following context. Then answer the user's question based on what you learned from the context but not your own knowledge. {context} @@ -44,15 +41,21 @@ def iter_elements(cls, paragraph: Paragraph, opts: DocxPartitionerOptions) -> It yield Image(text="IMAGE", metadata=element_metadata) -def get_prompt_template(model_id, prompt_content=None, template_path=None, enable_think=False): +def get_prompt_template(model_path, prompt_content=None, template_path=None, enable_think=False): if prompt_content is not None: template = prompt_content elif template_path is not None: - template = Path(template_path).read_text(encoding=None) + # Safely load the template only if it is inside /templates (or other safe root) + safe_root = "/templates" + normalized_path = os.path.normpath(os.path.join(safe_root, template_path)) + if not normalized_path.startswith(safe_root): + raise ValueError("Template path is outside of the allowed directory.") + if not os.path.exists(normalized_path): + raise FileNotFoundError("Template file does not exist.") + template = Path(normalized_path).read_text(encoding=None) else: template = DEFAULT_TEMPLATE - tokenizer = AutoTokenizer.from_pretrained(model_id) - model_id = model_id.split("/")[-1] + tokenizer = AutoTokenizer.from_pretrained(model_path) messages = [{"role": "system", "content": template}, {"role": "user", "content": "\n{input}\n"}] prompt_template = tokenizer.apply_chat_template( messages, @@ -60,10 +63,13 @@ def get_prompt_template(model_id, prompt_content=None, template_path=None, enabl add_generation_prompt=True, enable_thinking=enable_think, # Switches between thinking and non-thinking modes. Default is True. ) - return prompt_template + return template, prompt_template def serialize_node_with_score(node_with_score): + # relationships is not serializable + # No need for this information right now + node_with_score.node.relationships = {} return { "node": node_with_score.node.__dict__, "score": node_with_score.score.item() if hasattr(node_with_score.score, "item") else node_with_score.score, @@ -74,74 +80,13 @@ def serialize_contexts(contexts): return {key: [serialize_node_with_score(node) for node in nodes] for key, nodes in contexts.items()} -def compare_mappings(new_dict, old_dict): - added_files = {} - deleted_files = {} - for key in set(new_dict) - set(old_dict): - added_files[key] = new_dict[key] - for key in set(old_dict) - set(new_dict): - deleted_files[key] = old_dict[key] - for key in set(new_dict) & set(old_dict): - new_files = new_dict[key] - old_files = old_dict[key] - added = {name: new_files[name] for name in set(new_files) - set(old_files)} - if added: - added_files[key] = added - deleted = {name: old_files[name] for name in set(old_files) - set(new_files)} - if deleted: - deleted_files[key] = deleted - - for key in list(added_files.keys()): - if key in deleted_files: - del added_files[key] - return added_files, deleted_files - - -_history_map = {} -_current_session_id: Optional[str] = None - - -def set_current_session(session_id: str) -> None: - global _current_session_id - _current_session_id = session_id if session_id not in (None, "", "None") else "default_session" - - -def get_current_session() -> Optional[str]: - return _current_session_id - - -def clear_history() -> None: - session_id = get_current_session() - if session_id in _history_map: - _history_map[session_id] = [] - - -def save_history(message: str) -> str: - session_id = get_current_session() - _history_map.setdefault(session_id, []).append(f"content: {message}") - return "History appended successfully" - - -def concat_history(message: str) -> str: - max_token = 6000 - active_pl = ctx.get_pipeline_mgr().get_active_pipeline() - if active_pl.generator.inference_type == InferenceType.VLLM: - vllm_max_len = int(os.getenv("MAX_MODEL_LEN", "5000")) - if vllm_max_len > 5000: - max_token = vllm_max_len - 1024 - - history_id = get_current_session() - history_id_list = _history_map.get(history_id, []) - str_message = get_recent_chat_rounds(history_id_list) - _history_map.setdefault(history_id, []).append(f"user: {message}") - return str_message[-max_token:] if len(str_message) > max_token else str_message +async def stream_generator(string: str): + for token in iter(string): + yield token + await asyncio.sleep(0) -def get_recent_chat_rounds(history_id_list: List[str]) -> str: - history_num = int(os.getenv("CHAT_HISTORY_ROUND", "0")) - actual_rounds = min(history_num, len(history_id_list) // 2) - if actual_rounds <= 0: - return "" - start_index = max(0, len(history_id_list) - (actual_rounds * 2 + 1)) - recent_history = history_id_list[start_index:] - return "".join(recent_history) +async def chain_async_generators(gen_list: List): + for stream in gen_list: + async for token in stream: + yield token diff --git a/EdgeCraftRAG/nginx/nginx-conf-generator.sh b/EdgeCraftRAG/nginx/nginx-conf-generator.sh deleted file mode 100644 index bd8e5b194c..0000000000 --- a/EdgeCraftRAG/nginx/nginx-conf-generator.sh +++ /dev/null @@ -1,68 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Check if the correct number of arguments is provided -if [ "$#" -ne 2 ]; then - echo "Usage: $0 DP_NUM output-file-path" - exit 1 -fi - -# Get the port number from the command line argument -PORT_NUM=$1 - -# Start generating the Nginx configuration -cat < $2 -worker_processes auto; -events { - worker_connections 1024; -} -http { - - upstream multi-arc-serving-container { -EOL - -# Generate the server lines -for ((i=0; i> $2 -done - -# Close the upstream block and the http block -cat <> $2 - } - include /etc/nginx/mime.types; - default_type application/octet-stream; - client_max_body_size 50M; - sendfile on; - - keepalive_timeout 65; - keepalive_requests 1000; - server { - listen 8086; - server_name _; - location / { - root /usr/share/nginx/html; - index index.html index.htm; - add_header Cache-Control "no-cache"; - try_files $uri $uri/ /index.html; - } - location /v1/completions { - proxy_pass http://multi-arc-serving-container/v1/completions; - proxy_http_version 1.1; - proxy_set_header Connection ""; - } - location /metrics { - proxy_pass http://multi-arc-serving-container/metrics; - proxy_http_version 1.1; - proxy_set_header Connection ""; - } - - location ~ /\. { - deny all; - } - } -} -EOL - -echo "Nginx configuration generated in nginx.conf" diff --git a/EdgeCraftRAG/tests/configs/test_pipeline_ipex_vllm.json b/EdgeCraftRAG/tests/configs/test_pipeline_ipex_vllm.json index 097309d7e2..cac241ec73 100644 --- a/EdgeCraftRAG/tests/configs/test_pipeline_ipex_vllm.json +++ b/EdgeCraftRAG/tests/configs/test_pipeline_ipex_vllm.json @@ -38,6 +38,7 @@ "device": "", "weight": "" }, + "generator_type": "chatqna", "prompt_path": "./default_prompt.txt", "vllm_endpoint": "" }, diff --git a/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json b/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json index 39ee2ef0f1..811e119690 100644 --- a/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json +++ b/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json @@ -37,6 +37,7 @@ "device": "auto", "weight": "INT4" }, + "generator_type": "chatqna", "prompt_path": "./default_prompt.txt", "inference_type": "local" }, diff --git a/EdgeCraftRAG/tests/test_compose_on_arc.sh b/EdgeCraftRAG/tests/test_compose_on_arc.sh index d8d0fbd0da..0e2148dc28 100755 --- a/EdgeCraftRAG/tests/test_compose_on_arc.sh +++ b/EdgeCraftRAG/tests/test_compose_on_arc.sh @@ -21,10 +21,11 @@ HOST_IP=$ip_address COMPOSE_FILE="compose.yaml" EC_RAG_SERVICE_PORT=16010 -MODEL_PATH="${HOME}/models" +export MODEL_PATH="${HOME}/models" # MODEL_PATH="$WORKPATH/models" DOC_PATH="$WORKPATH/tests" UI_UPLOAD_PATH="$WORKPATH/tests" +MAX_MODEL_LEN=5000 HF_ENDPOINT=https://hf-mirror.com diff --git a/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh b/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh index 192e18b67c..50f458ae8a 100755 --- a/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh +++ b/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh @@ -18,7 +18,7 @@ LOG_PATH="$WORKPATH/tests" ip_address=$(hostname -I | awk '{print $1}') HOST_IP=$ip_address -COMPOSE_FILE="compose_vllm.yaml" +COMPOSE_FILE="compose.yaml" EC_RAG_SERVICE_PORT=16010 MODEL_PATH="${HOME}/models" @@ -27,18 +27,12 @@ DOC_PATH="$WORKPATH/tests" UI_UPLOAD_PATH="$WORKPATH/tests" HF_ENDPOINT=https://hf-mirror.com -NGINX_PORT=8086 -NGINX_PORT_0=8100 -NGINX_PORT_1=8100 -VLLM_SERVICE_PORT_0=8100 +VLLM_SERVICE_PORT_A770=8086 TENSOR_PARALLEL_SIZE=1 -SELECTED_XPU_0=0 -vLLM_ENDPOINT="http://${HOST_IP}:${NGINX_PORT}" +vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT_A770}" LLM_MODEL="Qwen/Qwen3-8B" -LLM_MODEL_PATH="${HOME}/qwen/" -NGINX_CONFIG_PATH="$WORKPATH/nginx/nginx.conf" +LLM_MODEL_PATH="${MODEL_PATH}/${LLM_MODEL}" VLLM_IMAGE_TAG="0.8.3-b20" -DP_NUM=1 function build_docker_images() { opea_branch=${opea_branch:-"main"} @@ -61,17 +55,13 @@ function build_docker_images() { function start_services() { cd $WORKPATH/docker_compose/intel/gpu/arc source set_env.sh - # generate nginx config file according to container count - bash $WORKPATH/nginx/nginx-conf-generator.sh $DP_NUM $WORKPATH/nginx/nginx.conf - # generate yaml file according to container count - bash multi-arc-yaml-generator.sh $DP_NUM $COMPOSE_FILE # Start Docker Containers - docker compose -f $COMPOSE_FILE up -d > ${LOG_PATH}/start_services_with_compose.log + docker compose --profile a770 -f $COMPOSE_FILE up -d > ${LOG_PATH}/start_services_with_compose.log echo "ipex-llm-serving-xpu is booting, please wait." sleep 30s n=0 until [[ "$n" -ge 100 ]]; do - docker logs ipex-llm-serving-xpu-container-0 > ${LOG_PATH}/ipex-llm-serving-xpu-container.log 2>&1 + docker logs ipex-llm-serving-xpu-770 > ${LOG_PATH}/ipex-llm-serving-xpu-container.log 2>&1 if grep -q "Starting vLLM API server on http://0.0.0.0:" ${LOG_PATH}/ipex-llm-serving-xpu-container.log; then break fi @@ -129,7 +119,7 @@ function validate_rag() { "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/chatqna" \ "1234567890" \ "query" \ - "ipex-llm-serving-xpu-container-0" \ + "ipex-llm-serving-xpu-770" \ '{"messages":"What is the test id?","max_tokens":5}' } @@ -139,12 +129,13 @@ function validate_megaservice() { "${HOST_IP}:16011/v1/chatqna" \ "1234567890" \ "query" \ - "ipex-llm-serving-xpu-container-0" \ + "ipex-llm-serving-xpu-770" \ '{"messages":"What is the test id?","max_tokens":5}' } function stop_docker() { cd $WORKPATH/docker_compose/intel/gpu/arc + export MODEL_PATH="${HOME}/models" docker compose -f $COMPOSE_FILE down } diff --git a/EdgeCraftRAG/tests/test_compose_vllm_on_arc_b60.sh b/EdgeCraftRAG/tests/test_compose_vllm_on_arc_b60.sh new file mode 100755 index 0000000000..0d443b391e --- /dev/null +++ b/EdgeCraftRAG/tests/test_compose_vllm_on_arc_b60.sh @@ -0,0 +1,174 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -e +source ./common.sh + +IMAGE_REPO=${IMAGE_REPO:-"opea"} +IMAGE_TAG=${IMAGE_TAG:-"latest"} +echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" +echo "TAG=IMAGE_TAG=${IMAGE_TAG}" +export REGISTRY=${IMAGE_REPO} +export TAG=${IMAGE_TAG} + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" + +ip_address=$(hostname -I | awk '{print $1}') +HOST_IP=$ip_address + +COMPOSE_FILE="compose.yaml" +EC_RAG_SERVICE_PORT=16010 + +MODEL_PATH="${HOME}/models" +# MODEL_PATH="$WORKPATH/models" +DOC_PATH="$WORKPATH/tests" +UI_UPLOAD_PATH="$WORKPATH/tests" + +HF_ENDPOINT=https://hf-mirror.com +VLLM_SERVICE_PORT_B60=8086 +TP=1 +vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT_B60}" +LLM_MODEL="Qwen/Qwen3-8B" +VLLM_IMAGE_TAG="1.1-preview" +DP=1 +ZE_AFFINITY_MASK=1 + +function build_docker_images() { + opea_branch=${opea_branch:-"main"} + cd $WORKPATH/docker_image_build + git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git + pushd GenAIComps + echo "GenAIComps test commit is $(git rev-parse HEAD)" + docker build --no-cache -t ${REGISTRY}/comps-base:${TAG} --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . + popd && sleep 1s + + echo "Pull intel/llm-scaler-vllm image" + docker pull intel/llm-scaler-vllm:${VLLM_IMAGE_TAG} + + echo "Build all the images with --no-cache, check docker_image_build.log for details..." + docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log + + docker images && sleep 1s +} + +function start_services() { + cd $WORKPATH/docker_compose/intel/gpu/arc + source set_env.sh + # Start Docker Containers + docker compose --profile b60 -f $COMPOSE_FILE up -d > ${LOG_PATH}/start_services_with_compose.log + echo "ipex-serving-xpu is booting, please wait." + sleep 30s + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs ipex-serving-xpu-container > ${LOG_PATH}/ipex-serving-xpu-container.log 2>&1 + if grep -q "Starting vLLM API server" ${LOG_PATH}/ipex-serving-xpu-container.log; then + break + fi + sleep 6s + n=$((n+1)) + done +} + +function validate_services() { + local URL="$1" + local EXPECTED_RESULT="$2" + local SERVICE_NAME="$3" + local DOCKER_NAME="$4" + local INPUT_DATA="$5" + + echo "[ $SERVICE_NAME ] Validating $SERVICE_NAME service..." + local RESPONSE=$(curl -s -w "%{http_code}" -o ${LOG_PATH}/${SERVICE_NAME}.log -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") + while [ ! -f ${LOG_PATH}/${SERVICE_NAME}.log ]; do + sleep 1 + done + local HTTP_STATUS="${RESPONSE: -3}" + local CONTENT=$(cat ${LOG_PATH}/${SERVICE_NAME}.log) + + if [ "$HTTP_STATUS" -eq 200 ]; then + echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." + else + echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" + docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log + exit 1 + fi + sleep 1s +} + +function validate_rag() { + cd $WORKPATH/tests + + # setup pipeline + validate_services \ + "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/settings/pipelines" \ + "active" \ + "pipeline" \ + "edgecraftrag-server" \ + '@configs/test_pipeline_ipex_vllm.json' + + # add data + validate_services \ + "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/data" \ + "Done" \ + "data" \ + "edgecraftrag-server" \ + '@configs/test_data.json' + + # query + validate_services \ + "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/chatqna" \ + "1234567890" \ + "query" \ + "ipex-serving-xpu-container" \ + '{"messages":"What is the test id?","max_tokens":5}' +} + +function validate_megaservice() { + # Curl the Mega Service + validate_services \ + "${HOST_IP}:16011/v1/chatqna" \ + "1234567890" \ + "query" \ + "ipex-serving-xpu-container" \ + '{"messages":"What is the test id?","max_tokens":5}' +} + +function stop_docker() { + cd $WORKPATH/docker_compose/intel/gpu/arc + export MODEL_PATH="${HOME}/models" + docker compose -f $COMPOSE_FILE down +} + + +function main() { + mkdir -p $LOG_PATH + + echo "::group::stop_docker" + stop_docker + echo "::endgroup::" + + echo "::group::build_docker_images" + if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi + echo "::endgroup::" + + echo "::group::start_services" + start_services + echo "::endgroup::" + + echo "::group::validate_rag" + validate_rag + echo "::endgroup::" + + echo "::group::validate_megaservice" + validate_megaservice + echo "::endgroup::" + + echo "::group::stop_docker" + stop_docker + echo y | docker system prune + echo "::endgroup::" + +} + +main diff --git a/EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json b/EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json index 097309d7e2..cac241ec73 100644 --- a/EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json +++ b/EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json @@ -38,6 +38,7 @@ "device": "", "weight": "" }, + "generator_type": "chatqna", "prompt_path": "./default_prompt.txt", "vllm_endpoint": "" }, diff --git a/EdgeCraftRAG/tests/test_pipeline_local_llm.json b/EdgeCraftRAG/tests/test_pipeline_local_llm.json index 39ee2ef0f1..811e119690 100644 --- a/EdgeCraftRAG/tests/test_pipeline_local_llm.json +++ b/EdgeCraftRAG/tests/test_pipeline_local_llm.json @@ -37,6 +37,7 @@ "device": "auto", "weight": "INT4" }, + "generator_type": "chatqna", "prompt_path": "./default_prompt.txt", "inference_type": "local" }, diff --git a/EdgeCraftRAG/tools/quick_start.sh b/EdgeCraftRAG/tools/quick_start.sh index fdefa797f0..909e34dd43 100755 --- a/EdgeCraftRAG/tools/quick_start.sh +++ b/EdgeCraftRAG/tools/quick_start.sh @@ -5,6 +5,8 @@ set -e WORKPATH=$(dirname "$(pwd)") +ip_address=$(hostname -I | awk '{print $1}') +HOST_IP=$ip_address get_user_input() { local var_name=$1 @@ -21,7 +23,7 @@ get_enable_function() { } function start_vllm_services() { - COMPOSE_FILE="compose_vllm.yaml" + COMPOSE_FILE="compose.yaml" echo "stop former service..." docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE down @@ -32,7 +34,7 @@ function start_vllm_services() { MILVUS_ENABLED=$(get_enable_function "MILVUS DB(Enter 1 for enable)" "0") CHAT_HISTORY_ROUND=$(get_user_input "chat history round" "0") LLM_MODEL=$(get_user_input "your LLM model" "Qwen/Qwen3-8B") - MODEL_PATH=$(get_user_input "your model path" "${HOME}/models") + MODEL_PATH=$(get_user_input "your model path" "${PWD}/models") read -p "Have you prepare models in ${MODEL_PATH}:(yes/no) [yes]" user_input user_input=${user_input:-"yes"} @@ -61,18 +63,11 @@ function start_vllm_services() { sudo chown -R 1000:1000 ${HF_CACHE} HF_ENDPOINT=https://hf-mirror.com # vllm ENV - export NGINX_PORT=8086 - export vLLM_ENDPOINT="http://${HOST_IP}:${NGINX_PORT}" - TENSOR_PARALLEL_SIZE=$(get_user_input "your tp size" 1) - read -p "selected GPU [$(seq -s, 0 $((TENSOR_PARALLEL_SIZE - 1)))] " SELECTED_XPU_0; SELECTED_XPU_0=${SELECTED_XPU_0:-$(seq -s, 0 $((TENSOR_PARALLEL_SIZE - 1)))} - DP_NUM=$(get_user_input "DP number(how many containers to run vLLM)" 1) - for (( x=0; x ipex-llm-serving-xpu-container.log 2>&1 + if grep -q "Starting vLLM API server on http://0.0.0.0:" ipex-llm-serving-xpu-container.log; then + break + fi + sleep 6s + n=$((n+1)) + done + rm -rf ipex-llm-serving-xpu-container.log + echo "service launched, please visit UI at ${HOST_IP}:8082" +} + + +function quick_start_ov_services() { + COMPOSE_FILE="compose.yaml" + echo "stop former service..." + docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE down + + ip_address=$(hostname -I | awk '{print $1}') + export HOST_IP=${HOST_IP:-"${ip_address}"} + export DOC_PATH=${DOC_PATH:-"$WORKPATH/tests"} + export TMPFILE_PATH=${TMPFILE_PATH:-"$WORKPATH/tests"} + export MILVUS_ENABLED=${MILVUS_ENABLED:-1} + export CHAT_HISTORY_ROUND=${CHAT_HISTORY_ROUND:-"0"} + export LLM_MODEL=${LLM_MODEL:-"Qwen/Qwen3-8B"} + export MODEL_PATH=${MODEL_PATH:-"${PWD}/models"} + export VIDEOGROUPID=$(getent group video | cut -d: -f3) + export RENDERGROUPID=$(getent group render | cut -d: -f3) + export MAX_MODEL_LEN=5000 + + check_baai_folder + export HF_CACHE=${HF_CACHE:-"${HOME}/.cache"} + if [ ! -d "${HF_CACHE}" ]; then + mkdir -p "${HF_CACHE}" + echo "Created directory: ${HF_CACHE}" + fi + + sudo chown 1000:1000 "${MODEL_PATH}" "${DOC_PATH}" "${TMPFILE_PATH}" + sudo chown -R 1000:1000 "${HF_CACHE}" + export HF_ENDPOINT=${HF_ENDPOINT:-"https://hf-mirror.com"} + export no_proxy="localhost, 127.0.0.1, 192.168.1.1, ${HOST_IP}" + export CCL_DG2_USM=${CCL_DG2_USM:-0} + + echo "Starting service..." + docker compose -f "$WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE" up -d +} + + +function start_vLLM_B60_services() { + COMPOSE_FILE="compose.yaml" + echo "stop former service..." + export MODEL_PATH=${MODEL_PATH:-"${PWD}/models"} + docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE down + + ip_address=$(hostname -I | awk '{print $1}') + HOST_IP=$(get_user_input "host ip" "${ip_address}") + DOC_PATH=$(get_user_input "DOC_PATH" "$WORKPATH/tests") + TMPFILE_PATH=$(get_user_input "TMPFILE_PATH" "$WORKPATH/tests") + MILVUS_ENABLED=$(get_enable_function "MILVUS DB(Enter 1 for enable)" "0") + CHAT_HISTORY_ROUND=$(get_user_input "chat history round" "0") + LLM_MODEL=$(get_user_input "your LLM model" "Qwen/Qwen3-8B") + MODEL_PATH=$(get_user_input "your model path" "${PWD}/models") + read -p "Have you prepare models in ${MODEL_PATH}:(yes/no) [yes]" user_input + user_input=${user_input:-"yes"} + + if [ "$user_input" == "yes" ]; then + # 模型文件路径请参考以下形式存放, llm为huggingface + # Indexer: ${MODEL_PATH}/BAAI/bge-small-en-v1.5 + # Reranker: ${MODEL_PATH}/BAAI/bge-reranker-large + # llm :${MODEL_PATH}/${LLM_MODEL} (从huggingface或modelscope下载的原始模型,而不是经过OpenVINO转换的模型!) + echo "you skipped model downloading, please make sure you have prepared all models under ${MODEL_PATH}" + else + echo "you have not prepare models, starting to download models into ${MODEL_PATH}..." + mkdir -p $MODEL_PATH + pip install --upgrade --upgrade-strategy eager "optimum[openvino]" + optimum-cli export openvino -m BAAI/bge-small-en-v1.5 ${MODEL_PATH}/BAAI/bge-small-en-v1.5 --task sentence-similarity + optimum-cli export openvino -m BAAI/bge-reranker-large ${MODEL_PATH}/BAAI/bge-reranker-large --task text-classification + pip install -U huggingface_hub + huggingface-cli download $LLM_MODEL --local-dir "${MODEL_PATH}/${LLM_MODEL}" + fi + echo "give permission to related path..." + sudo chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH} + # vllm ENV + export VLLM_SERVICE_PORT_B60=8086 + export vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT_B60}" + read -p "DP number(how many containers to run B60_vLLM) [4] , press Enter to confirm, or type a new value:" DP; DP=${DP:-4} + read -p "Tensor parallel size(your tp size [1]), press Enter to confirm, or type a new value:" TP; TP=${TP:-1} + DTYPE=$(get_user_input "DTYPE (vLLM data type, e.g. float16/bfloat16)" "float16") + ZE_AFFINITY_MASK=$(get_user_input "ZE_AFFINITY_MASK (GPU affinity mask, multi-GPU use 0,1,2...)" "0,1,2,3") + ENFORCE_EAGER=$(get_user_input "ENFORCE_EAGER (enable eager execution, 1=enable/0=disable)" "1") + TRUST_REMOTE_CODE=$(get_user_input "TRUST_REMOTE_CODE (trust remote code for custom models, 1=enable/0=disable)" "1") + DISABLE_SLIDING_WINDOW=$(get_user_input "DISABLE_SLIDING_WINDOW (disable sliding window attention, 1=disable/0=enable)" "1") + GPU_MEMORY_UTIL=$(get_user_input "GPU_MEMORY_UTIL (GPU memory utilization, range 0.1-1.0)" "0.8") + NO_ENABLE_PREFIX_CACHING=$(get_user_input "NO_ENABLE_PREFIX_CACHING (disable prefix caching, 1=disable/0=enable)" "1") + MAX_NUM_BATCHED_TOKENS=$(get_user_input "MAX_NUM_BATCHED_TOKENS (max number of batched tokens)" "8192") + DISABLE_LOG_REQUESTS=$(get_user_input "DISABLE_LOG_REQUESTS (disable request logs, 1=disable/0=enable)" "1") + MAX_MODEL_LEN=$(get_user_input "MAX_MODEL_LEN (max model context length, e.g. 40000/10240)" "40000") + BLOCK_SIZE=$(get_user_input "BLOCK_SIZE (vLLM block size)" "64") + QUANTIZATION=$(get_user_input "QUANTIZATION (model quantization method, e.g. fp8/int4)" "fp8") + # export ENV + export HOST_IP=${HOST_IP:-"${ip_address}"} + export MODEL_PATH=${MODEL_PATH} + export DOC_PATH=${DOC_PATH} + export TMPFILE_PATH=${TMPFILE_PATH} + export LLM_MODEL=${LLM_MODEL} + export no_proxy="localhost, 127.0.0.1, 192.168.1.1, ${HOST_IP}" + export MILVUS_ENABLED=${MILVUS_ENABLED} + export CHAT_HISTORY_ROUND=${CHAT_HISTORY_ROUND} + export SELECTED_XPU_0=${SELECTED_XPU_0} + export VIDEOGROUPID=$(getent group video | cut -d: -f3) + export RENDERGROUPID=$(getent group render | cut -d: -f3) + # export vllm ENV + export DP=${DP} + export TP=${TP} + export DTYPE=${DTYPE} + export ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK} + export ENFORCE_EAGER=${ENFORCE_EAGER} + export TRUST_REMOTE_CODE=${TRUST_REMOTE_CODE} + export DISABLE_SLIDING_WINDOW=${DISABLE_SLIDING_WINDOW} + export GPU_MEMORY_UTIL=${GPU_MEMORY_UTIL} + export NO_ENABLE_PREFIX_CACHING=${NO_ENABLE_PREFIX_CACHING} + export MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS} + export DISABLE_LOG_REQUESTS=${DISABLE_LOG_REQUESTS} + export MAX_MODEL_LEN=${MAX_MODEL_LEN} + export BLOCK_SIZE=${BLOCK_SIZE} + export QUANTIZATION=${QUANTIZATION} + + # Start Docker Containers + docker compose --profile b60 -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE up -d + echo "ipex-llm-serving-xpu is booting, please wait..." + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs ipex-llm-serving-xpu-container-0 > ipex-llm-serving-xpu-container.log 2>&1 + if grep -q "Starting vLLM API server on http://0.0.0.0:" ipex-llm-serving-xpu-container.log; then + break + fi + sleep 6s + n=$((n+1)) + done + rm -rf ipex-llm-serving-xpu-container.log + echo "service launched, please visit UI at ${HOST_IP}:8082" +} + + +function quick_start_vllm_B60_services() { + WORKPATH=$(dirname "$PWD") + COMPOSE_FILE="compose.yaml" + EC_RAG_SERVICE_PORT=16010 + docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE down + + ip_address=$(hostname -I | awk '{print $1}') + export HOST_IP=${HOST_IP:-"${ip_address}"} + export MODEL_PATH=${MODEL_PATH:-"${PWD}/models"} + export DOC_PATH=${DOC_PATH:-"$WORKPATH/tests"} + export TMPFILE_PATH=${TMPFILE_PATH:-"$WORKPATH/tests"} + export MILVUS_ENABLED=${MILVUS_ENABLED:-1} + export CHAT_HISTORY_ROUND=${CHAT_HISTORY_ROUND:-2} + export LLM_MODEL=${LLM_MODEL:-Qwen/Qwen3-8B} + export VIDEOGROUPID=$(getent group video | cut -d: -f3) + export RENDERGROUPID=$(getent group render | cut -d: -f3) + # export vllm ENV + export DP=${DP:-1} + export TP=${TP:-1} + export DTYPE=${DTYPE:-float16} + export ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK:-0} + export ENFORCE_EAGER=${ENFORCE_EAGER:-1} + export TRUST_REMOTE_CODE=${TRUST_REMOTE_CODE:-1} + export DISABLE_SLIDING_WINDOW=${DISABLE_SLIDING_WINDOW:-1} + export GPU_MEMORY_UTIL=${GPU_MEMORY_UTIL:-0.8} + export NO_ENABLE_PREFIX_CACHING=${NO_ENABLE_PREFIX_CACHING:-1} + export MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-8192} + export DISABLE_LOG_REQUESTS=${disable_LOG_REQUESTS:-1} + export MAX_MODEL_LEN=${MAX_MODEL_LEN:-40000} + export BLOCK_SIZE=${BLOCK_SIZE:-64} + export QUANTIZATION=${QUANTIZATION:-fp8} + + + check_baai_folder + export no_proxy="localhost, 127.0.0.1, 192.168.1.1, ${HOST_IP}" + sudo chown -R 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH} + docker compose --profile b60 -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE up -d + echo "ipex-llm-serving-xpu is booting, please wait..." + n=0 + until [[ "$n" -ge 100 ]]; do + docker logs ipex-llm-serving-xpu-container-0 > ipex-llm-serving-xpu-container.log 2>&1 + if grep -q "Starting vLLM API server on http://0.0.0.0:" ipex-llm-serving-xpu-container.log; then + break + fi + sleep 6s + n=$((n+1)) + done + rm -rf ipex-llm-serving-xpu-container.log + echo "service launched, please visit UI at ${HOST_IP}:8082" +} + + +function main { + if [[ $- == *i* ]]; then + read -p "Do you want to start vLLM or local OpenVINO services? (vLLM_A770/vLLM_B60/ov) [vLLM_A770]: " user_input + user_input=${user_input:-"vLLM_A770"} + if [[ "$user_input" == "vLLM_A770" ]]; then + start_vllm_services + elif [[ "$user_input" == "vLLM_B60" ]]; then + start_vLLM_B60_services + else + start_services + fi else - start_services + export COMPOSE_PROFILES=${COMPOSE_PROFILES:-""} + if [[ "$COMPOSE_PROFILES" == "vLLM_A770" || "$COMPOSE_PROFILES" == "vLLM" || "$COMPOSE_PROFILES" == "vllm_on_a770" ]]; then + quick_start_vllm_services + elif [[ "$COMPOSE_PROFILES" == "vLLM_B60" || "$COMPOSE_PROFILES" == "vLLM_b60" || "$COMPOSE_PROFILES" == "vllm_on_b60" ]]; then + quick_start_vllm_B60_services + else + quick_start_ov_services + fi fi } diff --git a/EdgeCraftRAG/ui/vue/.env.development b/EdgeCraftRAG/ui/vue/.env.development index d7ef344a8a..ea6834f8a0 100644 --- a/EdgeCraftRAG/ui/vue/.env.development +++ b/EdgeCraftRAG/ui/vue/.env.development @@ -2,5 +2,5 @@ ENV = development # Local Api -VITE_API_URL = http://10.67.106.238:16010/ -VITE_CHATBOT_URL = http://10.67.106.238:16011/ +VITE_API_URL = / +VITE_CHATBOT_URL = / diff --git a/EdgeCraftRAG/ui/vue/components.d.ts b/EdgeCraftRAG/ui/vue/components.d.ts index 7959bda79a..5103652b37 100644 --- a/EdgeCraftRAG/ui/vue/components.d.ts +++ b/EdgeCraftRAG/ui/vue/components.d.ts @@ -10,7 +10,7 @@ export {}; /* prettier-ignore */ declare module 'vue' { export interface GlobalComponents { - AAffix: typeof import('ant-design-vue/es')['Affix'] + AAutoComplete: typeof import('ant-design-vue/es')['AutoComplete'] AButton: typeof import('ant-design-vue/es')['Button'] ACheckbox: typeof import('ant-design-vue/es')['Checkbox'] ACheckboxGroup: typeof import('ant-design-vue/es')['CheckboxGroup'] @@ -29,6 +29,7 @@ declare module 'vue' { AImage: typeof import('ant-design-vue/es')['Image'] AInput: typeof import('ant-design-vue/es')['Input'] AInputNumber: typeof import('ant-design-vue/es')['InputNumber'] + AInputSearch: typeof import('ant-design-vue/es')['InputSearch'] ALayout: typeof import('ant-design-vue/es')['Layout'] ALayoutContent: typeof import('ant-design-vue/es')['LayoutContent'] ALayoutHeader: typeof import('ant-design-vue/es')['LayoutHeader'] @@ -45,13 +46,18 @@ declare module 'vue' { ASelectOption: typeof import('ant-design-vue/es')['SelectOption'] ASlider: typeof import('ant-design-vue/es')['Slider'] ASpace: typeof import('ant-design-vue/es')['Space'] + ASpin: typeof import('ant-design-vue/es')['Spin'] ASteps: typeof import('ant-design-vue/es')['Steps'] + ASwitch: typeof import('ant-design-vue/es')['Switch'] ATable: typeof import('ant-design-vue/es')['Table'] + ATabPane: typeof import('ant-design-vue/es')['TabPane'] + ATabs: typeof import('ant-design-vue/es')['Tabs'] ATag: typeof import('ant-design-vue/es')['Tag'] ATextarea: typeof import('ant-design-vue/es')['Textarea'] ATooltip: typeof import('ant-design-vue/es')['Tooltip'] AUploadDragger: typeof import('ant-design-vue/es')['UploadDragger'] FormTooltip: typeof import('./src/components/FormTooltip.vue')['default'] + PartialLoading: typeof import('./src/components/PartialLoading.vue')['default'] RouterLink: typeof import('vue-router')['RouterLink'] RouterView: typeof import('vue-router')['RouterView'] SvgIcon: typeof import('./src/components/SvgIcon.vue')['default'] diff --git a/EdgeCraftRAG/ui/vue/index.html b/EdgeCraftRAG/ui/vue/index.html index df137679ef..c871332d3c 100644 --- a/EdgeCraftRAG/ui/vue/index.html +++ b/EdgeCraftRAG/ui/vue/index.html @@ -9,6 +9,7 @@ + Edge Craft RAG based Q&A Chatbot diff --git a/EdgeCraftRAG/ui/vue/nginx.conf b/EdgeCraftRAG/ui/vue/nginx.conf index 6d9a233bf8..8b6701e78a 100644 --- a/EdgeCraftRAG/ui/vue/nginx.conf +++ b/EdgeCraftRAG/ui/vue/nginx.conf @@ -28,7 +28,7 @@ http { proxy_pass http://edgecraftrag-server:16010; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_http_version 1.1; - proxy_read_timeout 180s; + proxy_read_timeout 600s; proxy_set_header Connection ""; } diff --git a/EdgeCraftRAG/ui/vue/package.json b/EdgeCraftRAG/ui/vue/package.json index 516e870406..b11bf4d991 100644 --- a/EdgeCraftRAG/ui/vue/package.json +++ b/EdgeCraftRAG/ui/vue/package.json @@ -9,7 +9,6 @@ "preview": "vite preview" }, "dependencies": { - "@vueuse/i18n": "^4.0.0-beta.12", "ant-design-vue": "^4.0.0-rc.6", "axios": "^1.7.9", "clipboard": "^2.0.11", @@ -19,6 +18,7 @@ "highlight.js": "^11.11.1", "http": "^0.0.1-security", "js-cookie": "^3.0.5", + "jszip": "^3.10.1", "lodash": "^4.17.21", "marked": "^15.0.6", "mitt": "^3.0.1", diff --git a/EdgeCraftRAG/ui/vue/src/api/agent/index.ts b/EdgeCraftRAG/ui/vue/src/api/agent/index.ts new file mode 100644 index 0000000000..f4b27ed609 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/api/agent/index.ts @@ -0,0 +1,55 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +import request from "../request"; + +export const getAgentList = () => { + return request({ + url: "/v1/settings/agents", + method: "get", + }); +}; + +export const getAgentDetailByName = (name: String) => { + return request({ + url: `/v1/settings/agents/${name}`, + method: "get", + }); +}; +export const requestAgentCreate = (data: Object) => { + return request({ + url: "/v1/settings/agents", + method: "post", + data, + showLoading: true, + showSuccessMsg: true, + successMsg: "request.agent.createSucc", + }); +}; +export const requestAgentUpdate = (name: String, data: Object) => { + return request({ + url: `/v1/settings/agents/${name}`, + method: "patch", + data, + showLoading: true, + showSuccessMsg: true, + successMsg: "request.agent.updateSucc", + }); +}; + +export const requestAgentDelete = (name: String) => { + return request({ + url: `/v1/settings/agents/${name}`, + method: "delete", + showLoading: true, + showSuccessMsg: true, + successMsg: "request.agent.deleteSucc", + }); +}; + +export const getAgentConfigs = (type: String) => { + return request({ + url: `/v1/settings/agents/configs/${type}`, + method: "get", + }); +}; diff --git a/EdgeCraftRAG/ui/vue/src/api/chatbot/index.ts b/EdgeCraftRAG/ui/vue/src/api/chatbot/index.ts index f7946ad72d..0182bd6c16 100644 --- a/EdgeCraftRAG/ui/vue/src/api/chatbot/index.ts +++ b/EdgeCraftRAG/ui/vue/src/api/chatbot/index.ts @@ -21,9 +21,33 @@ export const requestChatbotConfig = (data: Object) => { }); }; -export const getBenchmark = (name: String) => { +export const getBenchmark = () => { return request({ - url: `/v1/settings/pipelines/${name}/benchmark`, + url: `/v1/settings/pipeline/benchmark`, method: "get", }); }; + +export const getHistorySessionList = () => { + return request({ + url: "/v1/sessions", + method: "get", + }); +}; + +export const getSessionDetailById = (SessionId: String) => { + return request({ + url: `v1/session/${SessionId}`, + method: "get", + }); +}; + +export const requestSessionDelete = (SessionId: String) => { + return request({ + url: `/v1/session/${SessionId}`, + method: "delete", + showLoading: true, + showSuccessMsg: true, + successMsg: "request.knowledge.deleteSucc", + }); +}; diff --git a/EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts b/EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts index bb7bc9a494..fe300d6b33 100644 --- a/EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts +++ b/EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts @@ -10,7 +10,7 @@ export const getKnowledgeBaseList = () => { }); }; -export const getKnowledgeBaseDetialByName = (kbName: String) => { +export const getKnowledgeBaseDetailByName = (kbName: String) => { return request({ url: `/v1/knowledge/${kbName}`, method: "get", @@ -54,15 +54,14 @@ export const requestKnowledgeBaseRelation = (kbName: String, data: Object) => { url: `/v1/knowledge/${kbName}/files`, method: "post", data, - showLoading: true, showSuccessMsg: true, successMsg: "request.knowledge.uploadSucc", }); }; -export const requestFileDelete = (kbName: String, data: Object) => { +export const requestFileDelete = (name: String, data: Object) => { return request({ - url: `/v1/knowledge/${kbName}/files`, + url: `/v1/knowledge/${name}/files`, method: "delete", data, showLoading: true, @@ -71,4 +70,86 @@ export const requestFileDelete = (kbName: String, data: Object) => { }); }; +export const getExperienceList = () => { + return request({ + url: "/v1/experiences", + method: "get", + }); +}; + +export const requestExperienceCreate = (data: EmptyArrayType) => { + return request({ + url: "/v1/multiple_experiences/check", + method: "post", + data, + showLoading: true, + }); +}; +export const requestExperienceConfirm = (flag: Boolean, data: EmptyArrayType) => { + return request({ + url: `/v1/multiple_experiences/confirm?flag=${flag}`, + method: "post", + data, + showLoading: true, + showSuccessMsg: true, + successMsg: "request.experience.createSucc", + }); +}; +export const getExperienceDetailByName = (data: Object) => { + return request({ + url: `/v1/experience`, + method: "post", + data, + }); +}; + +export const requestExperienceUpdate = (data: Object) => { + return request({ + url: `/v1/experiences`, + method: "patch", + data, + showLoading: true, + showSuccessMsg: true, + successMsg: "request.experience.updateSucc", + }); +}; + +export const requestExperienceDelete = (data: Object) => { + return request({ + url: `/v1/experiences`, + method: "delete", + data, + showLoading: true, + showSuccessMsg: true, + successMsg: "request.experience.deleteSucc", + }); +}; + +export const requestExperienceRelation = (data: Object) => { + return request({ + url: "/v1/experiences/files", + method: "post", + data, + showLoading: true, + showSuccessMsg: true, + successMsg: "experience.importSuccTip", + }); +}; + +export const getkbadminList = () => { + return request({ + url: "/v1/kbadmin/kbs_list", + method: "get", + }); +}; + +export const requestUploadFileUrl = (kbName: String, data: Object) => { + return request({ + url: `v1/data/file/${kbName}`, + method: "post", + data, + type: "files", + }); +}; + export const uploadFileUrl = `${import.meta.env.VITE_API_URL}v1/data/file/`; diff --git a/EdgeCraftRAG/ui/vue/src/api/pipeline/index.ts b/EdgeCraftRAG/ui/vue/src/api/pipeline/index.ts index fd06d1d3d8..0da1fc3015 100644 --- a/EdgeCraftRAG/ui/vue/src/api/pipeline/index.ts +++ b/EdgeCraftRAG/ui/vue/src/api/pipeline/index.ts @@ -14,7 +14,6 @@ export const getPipelineList = () => { return request({ url: "/v1/settings/pipelines", method: "get", - showLoading: true, }); }; @@ -75,10 +74,11 @@ export const getRunDevice = () => { }); }; -export const getModelList = (type: string) => { +export const getModelList = (type: string, params?: Object) => { return request({ url: `/v1/settings/avail-models/${type}`, method: "get", + params, }); }; diff --git a/EdgeCraftRAG/ui/vue/src/api/request.ts b/EdgeCraftRAG/ui/vue/src/api/request.ts index 91805dbab5..44f6cf2051 100644 --- a/EdgeCraftRAG/ui/vue/src/api/request.ts +++ b/EdgeCraftRAG/ui/vue/src/api/request.ts @@ -7,8 +7,6 @@ import axios, { AxiosInstance } from "axios"; import qs from "qs"; import i18n from "@/i18n"; -const antNotification = serviceManager.getService("antNotification"); - const service: AxiosInstance = axios.create({ baseURL: import.meta.env.VITE_API_URL, timeout: 600000, @@ -39,6 +37,8 @@ service.interceptors.response.use( if (NextLoading) NextLoading.done(); const res = response.data; if (config.showSuccessMsg) { + const antNotification = serviceManager.getService("antNotification"); + if (antNotification) antNotification("success", i18n.global.t("common.success"), i18n.global.t(config.successMsg)); } @@ -55,6 +55,7 @@ service.interceptors.response.use( } else { errorMessage = error.message; } + const antNotification = serviceManager.getService("antNotification"); if (antNotification) antNotification("error", i18n.global.t("common.error"), errorMessage); return Promise.reject(error); diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css index dedd20b357..d5a513f8c4 100644 --- a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css +++ b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.css @@ -1,9 +1,9 @@ @font-face { font-family: "iconfont"; /* Project id 4784207 */ src: - url("iconfont.woff2?t=1754038546130") format("woff2"), - url("iconfont.woff?t=1754038546130") format("woff"), - url("iconfont.ttf?t=1754038546130") format("truetype"); + url("iconfont.woff2?t=1762502048420") format("woff2"), + url("iconfont.woff?t=1762502048420") format("woff"), + url("iconfont.ttf?t=1762502048420") format("truetype"); } .iconfont { @@ -14,6 +14,58 @@ -moz-osx-font-smoothing: grayscale; } +.icon-simple-robot:before { + content: "\e604"; +} + +.icon-deep_search:before { + content: "\e620"; +} + +.icon-jiqiren-05:before { + content: "\ec1e"; +} + +.icon-recursive:before { + content: "\e61e"; +} + +.icon-simple:before { + content: "\e61f"; +} + +.icon-lujing:before { + content: "\e61d"; +} + +.icon-agent:before { + content: "\e62c"; +} + +.icon-pipeline:before { + content: "\e650"; +} + +.icon-skip1:before { + content: "\e67f"; +} + +.icon-annotation:before { + content: "\e62e"; +} + +.icon-groundtruth:before { + content: "\e9b7"; +} + +.icon-kb:before { + content: "\e639"; +} + +.icon-experience:before { + content: "\e68e"; +} + .icon-deep-think:before { content: "\e772"; } diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js index f6731b5c1a..6c670f647f 100644 --- a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js +++ b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.js @@ -2,12 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 (window._iconfont_svg_string_4784207 = - ''), - ((h) => { + ''), + ((c) => { var l = (a = (a = document.getElementsByTagName("script"))[a.length - 1]).getAttribute("data-injectcss"), a = a.getAttribute("data-disable-injectsvg"); if (!a) { - var c, + var h, t, i, o, @@ -15,8 +15,8 @@ m = function (l, a) { a.parentNode.insertBefore(l, a); }; - if (l && !h.__iconfont__svg__cssinject__) { - h.__iconfont__svg__cssinject__ = !0; + if (l && !c.__iconfont__svg__cssinject__) { + c.__iconfont__svg__cssinject__ = !0; try { document.write( "", @@ -25,10 +25,10 @@ console && console.log(l); } } - (c = function () { + (h = function () { var l, a = document.createElement("div"); - (a.innerHTML = h._iconfont_svg_string_4784207), + (a.innerHTML = c._iconfont_svg_string_4784207), (a = a.getElementsByTagName("svg")[0]) && (a.setAttribute("aria-hidden", "true"), (a.style.position = "absolute"), @@ -40,29 +40,29 @@ }), document.addEventListener ? ~["complete", "loaded", "interactive"].indexOf(document.readyState) - ? setTimeout(c, 0) + ? setTimeout(h, 0) : ((t = function () { - document.removeEventListener("DOMContentLoaded", t, !1), c(); + document.removeEventListener("DOMContentLoaded", t, !1), h(); }), document.addEventListener("DOMContentLoaded", t, !1)) : document.attachEvent && - ((i = c), - (o = h.document), + ((i = h), + (o = c.document), (v = !1), - s(), + p(), (o.onreadystatechange = function () { - "complete" == o.readyState && ((o.onreadystatechange = null), e()); + "complete" == o.readyState && ((o.onreadystatechange = null), z()); })); } - function e() { + function z() { v || ((v = !0), i()); } - function s() { + function p() { try { o.documentElement.doScroll("left"); } catch (l) { - return void setTimeout(s, 50); + return void setTimeout(p, 50); } - e(); + z(); } })(window); diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json index a8fea13f43..54eab718fe 100644 --- a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json +++ b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.json @@ -5,6 +5,97 @@ "css_prefix_text": "icon-", "description": "", "glyphs": [ + { + "icon_id": "13542590", + "name": "simple-robot", + "font_class": "simple-robot", + "unicode": "e604", + "unicode_decimal": 58884 + }, + { + "icon_id": "14551080", + "name": "deep_search", + "font_class": "deep_search", + "unicode": "e620", + "unicode_decimal": 58912 + }, + { + "icon_id": "38847045", + "name": "机器人-05", + "font_class": "jiqiren-05", + "unicode": "ec1e", + "unicode_decimal": 60446 + }, + { + "icon_id": "46000445", + "name": "recursive", + "font_class": "recursive", + "unicode": "e61e", + "unicode_decimal": 58910 + }, + { + "icon_id": "46000444", + "name": "simple", + "font_class": "simple", + "unicode": "e61f", + "unicode_decimal": 58911 + }, + { + "icon_id": "45982435", + "name": "路径", + "font_class": "lujing", + "unicode": "e61d", + "unicode_decimal": 58909 + }, + { + "icon_id": "3638339", + "name": "agent", + "font_class": "agent", + "unicode": "e62c", + "unicode_decimal": 58924 + }, + { + "icon_id": "4772844", + "name": "Pipeline", + "font_class": "pipeline", + "unicode": "e650", + "unicode_decimal": 58960 + }, + { + "icon_id": "15838538", + "name": "skip", + "font_class": "skip1", + "unicode": "e67f", + "unicode_decimal": 59007 + }, + { + "icon_id": "12753474", + "name": "annotation", + "font_class": "annotation", + "unicode": "e62e", + "unicode_decimal": 58926 + }, + { + "icon_id": "33279726", + "name": "groundtruth", + "font_class": "groundtruth", + "unicode": "e9b7", + "unicode_decimal": 59831 + }, + { + "icon_id": "687788", + "name": "知识库", + "font_class": "kb", + "unicode": "e639", + "unicode_decimal": 58937 + }, + { + "icon_id": "5299955", + "name": "experience", + "font_class": "experience", + "unicode": "e68e", + "unicode_decimal": 59022 + }, { "icon_id": "44419262", "name": "deep-think", diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.ttf b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.ttf index d49803ef9c..8c90a44513 100644 Binary files a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.ttf and b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.ttf differ diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff index e9b153a7f5..b18a749da0 100644 Binary files a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff and b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff differ diff --git a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff2 b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff2 index 1f66b46dff..75fdccb56d 100644 Binary files a/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff2 and b/EdgeCraftRAG/ui/vue/src/assets/iconFont/iconfont.woff2 differ diff --git a/EdgeCraftRAG/ui/vue/src/components.d.ts b/EdgeCraftRAG/ui/vue/src/components.d.ts index f79d1e1d69..35e756d199 100644 --- a/EdgeCraftRAG/ui/vue/src/components.d.ts +++ b/EdgeCraftRAG/ui/vue/src/components.d.ts @@ -10,11 +10,51 @@ export {}; /* prettier-ignore */ declare module 'vue' { export interface GlobalComponents { + AAffix: typeof import('ant-design-vue/es')['Affix'] AButton: typeof import('ant-design-vue/es')['Button'] + ACheckbox: typeof import('ant-design-vue/es')['Checkbox'] + ACheckboxGroup: typeof import('ant-design-vue/es')['CheckboxGroup'] + ACol: typeof import('ant-design-vue/es')['Col'] + ACollapse: typeof import('ant-design-vue/es')['Collapse'] + ACollapsePanel: typeof import('ant-design-vue/es')['CollapsePanel'] + AConfigProvider: typeof import('ant-design-vue/es')['ConfigProvider'] + ADescriptions: typeof import('ant-design-vue/es')['Descriptions'] + ADescriptionsItem: typeof import('ant-design-vue/es')['DescriptionsItem'] + ADivider: typeof import('ant-design-vue/es')['Divider'] + ADrawer: typeof import('ant-design-vue/es')['Drawer'] + ADropdown: typeof import('ant-design-vue/es')['Dropdown'] + AEmpty: typeof import('ant-design-vue/es')['Empty'] + AForm: typeof import('ant-design-vue/es')['Form'] + AFormItem: typeof import('ant-design-vue/es')['FormItem'] + AImage: typeof import('ant-design-vue/es')['Image'] AInput: typeof import('ant-design-vue/es')['Input'] + AInputNumber: typeof import('ant-design-vue/es')['InputNumber'] ALayout: typeof import('ant-design-vue/es')['Layout'] ALayoutContent: typeof import('ant-design-vue/es')['LayoutContent'] + ALayoutHeader: typeof import('ant-design-vue/es')['LayoutHeader'] + ALayoutSider: typeof import('ant-design-vue/es')['LayoutSider'] + AMenu: typeof import('ant-design-vue/es')['Menu'] + AMenuItem: typeof import('ant-design-vue/es')['MenuItem'] + AModal: typeof import('ant-design-vue/es')['Modal'] + APagination: typeof import('ant-design-vue/es')['Pagination'] + APopover: typeof import('ant-design-vue/es')['Popover'] + ARadio: typeof import('ant-design-vue/es')['Radio'] + ARadioGroup: typeof import('ant-design-vue/es')['RadioGroup'] + ARow: typeof import('ant-design-vue/es')['Row'] + ASelect: typeof import('ant-design-vue/es')['Select'] + ASelectOption: typeof import('ant-design-vue/es')['SelectOption'] + ASlider: typeof import('ant-design-vue/es')['Slider'] + ASpace: typeof import('ant-design-vue/es')['Space'] + ASteps: typeof import('ant-design-vue/es')['Steps'] + ATable: typeof import('ant-design-vue/es')['Table'] + ATag: typeof import('ant-design-vue/es')['Tag'] + ATextarea: typeof import('ant-design-vue/es')['Textarea'] + ATooltip: typeof import('ant-design-vue/es')['Tooltip'] + AUploadDragger: typeof import('ant-design-vue/es')['UploadDragger'] + FormTooltip: typeof import('./components/FormTooltip.vue')['default'] RouterLink: typeof import('vue-router')['RouterLink'] RouterView: typeof import('vue-router')['RouterView'] + SvgIcon: typeof import('./components/SvgIcon.vue')['default'] + TableColumns: typeof import('./components/TableColumns.vue')['default'] } } diff --git a/EdgeCraftRAG/ui/vue/src/components/PartialLoading.vue b/EdgeCraftRAG/ui/vue/src/components/PartialLoading.vue new file mode 100644 index 0000000000..b744f43e11 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/components/PartialLoading.vue @@ -0,0 +1,70 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/i18n/en.ts b/EdgeCraftRAG/ui/vue/src/i18n/en.ts index 39d3cf0fa8..9f267a1cf0 100644 --- a/EdgeCraftRAG/ui/vue/src/i18n/en.ts +++ b/EdgeCraftRAG/ui/vue/src/i18n/en.ts @@ -26,7 +26,21 @@ export default { clear: "Clear", all: "All", reset: "Reset", + export: "Export", uploadTip: "Click or drag file to this area to upload", + loading: "Loading", + waitTip: "Please wait patiently and do not refresh the page during this period.", + copy: "Copy", + send: "Send", + regenerate: "Regenerate", + copySucc: "Copy successful !", + copyError: "Copy failed !", + emptyText: "The content is empty !", + yes: "Yes", + no: "No", + manual: "Create Now", + inputTip: "Please enter ", + connect: "Connect", }, system: { title: "System Status", @@ -38,7 +52,7 @@ export default { notUsed: "Not Used", info: "System Information", kernel: "Kernel", - processor: "KerProcessornel", + processor: "Processor", os: "OS", time: "Current time", }, @@ -85,10 +99,12 @@ export default { indexer: "Indexer", indexerType: "Indexer Type", embedding: "Embedding Model", + embeddingUrl: "Embedding URL", embeddingDevice: "Embedding run device", retriever: "Retriever", retrieverType: "Retriever Type", topk: "Search top k", + topn: "Rerank top n", postProcessor: "PostProcessor", postProcessorType: "PostProcessor Type", rerank: "Rerank Model", @@ -99,15 +115,17 @@ export default { language: "Large Language Model", llmDevice: "LLM run device", weights: "Weights", - local: "Local", - vllm: "Vllm", - vector_uri: "Vector Uri", + local: "Local (OpenVINO)", + vllm: "Remote (vLLM)", + vector_url: "Vector Database URL", modelName: "Model Name", - vllm_url: "Vllm Url", + vllm_url: "vLLM URL", + kbadmin: "kbadmin", }, valid: { nameValid1: "Please input name", nameValid2: "Name should be between 2 and 30 characters", + nameValid3: "The name only supports letters, numbers, and underscores.", nodeParserType: "Please select Node Parser Type", chunkSizeValid1: "Please select Chunk Size", chunkSizeValid2: "The value of Chunk Size cannot be less than Chunk Overlap", @@ -115,25 +133,43 @@ export default { chunkOverlapValid2: "The value of Chunk Overlap cannot be greater than Chunk Size", windowSize: "Please select Chunk Window Size", indexerType: "Please select Indexer Type", - embedding: "Please select Embedding Model", + embedding: "Please select embedding Model", + embeddingUrl: "IP : Port, (e.g. 192.168.1.1:13020)", embeddingDevice: "Please select Embedding run device", retrieverType: "Please select Retriever Type", + retrieverTypeFormat: "Retriever type can only select kbadmin", topk: "Please select Top k", + topn: "Please select Top n", postProcessorType: "Please select PostProcessor Type", rerank: "Please select Rerank Model", rerankDevice: "Please select Rerank run device", generatorType: "Please select Generator Type", - language: "Please select Large Language Model", + language: "Please select or enter Large Language Model", llmDevice: "Please select LLM run device", weights: "Please select Weights", - vector_uri: "IP : Port, (e.g. 192.168.1.1:19530)", + kb_vector_url: "IP : Port, (e.g. 192.168.1.1:29530)", + vector_url: "IP : Port, (e.g. 192.168.1.1:19530)", vllm_url: "IP : Port, (e.g. 192.168.1.1:8086)", - urlValid1: "Please enter url", + urlValid1: "Please enter vector url", urlValid2: "Please enter the correct url", urlValid3: "URL cannot be accessed", urlValid4: "Test passed !", urlValid5: "The URL has not passed verification yet", modelName: "Please enter model name", + vllmUrlValid1: "Please enter vLLM url", + vllmUrlValid2: "Please enter the correct url", + vllmUrlValid3: "URL cannot be accessed", + vllmUrlValid4: "Test passed !", + vllmUrlValid5: "The URL has not passed verification yet", + nodeParserTypeTip: "Both Indexer Type and Retriever Type will be set to kbadmin at the same time", + indexerTypeTip: "Both Node Parser Type and Retriever Type will be set to kbadmin at the same time", + retrieverTypeTip: "Both Node Parser Type and Indexer Type will be set to kbadmin at the same time", + retrieverChangeTip: "Please go to the Indexer stage to complete the data", + indexerTypeValid1: "Indexer type can only select kbadmin", + modelRequired: "Please enter embedding model url", + modelFormat: "Please enter the correct url", + retrieverValid: "Please return to the Indexer stage to supplement information.", + modelTip: "Please connect to vLLM service", }, desc: { name: "The name identifier of the pipeline", @@ -143,14 +179,15 @@ export default { windowSize: "The number of sentences on each side of a sentence to capture", indexerType: "The type of index structure responsible for building based on the parsed nodes", embedding: "Embed the text data to represent it and build a vector index", - embeddingDevice: "The device used by the Embedding Model", + embeddingUrl: "Connecting embedding model url", + embeddingDevice: "The device used by the embedding model", retrieverType: - "The retrieval type used when retrieving relevant nodes from the index according to the user's query", + "The retrieval type used when retrieving relevant nodes from the index according to the user's experience", topk: "The number of top k results to return", postProcessorType: "Select postprocessors for post-processing of the context", rerank: "Rerank Model", rerankDevice: "Rerank run device", - generatorType: "Local inference generator or vllm generator", + generatorType: "Local inference generator or vLLM generator", language: "The large model used for generating dialogues", llmDevice: "The device used by the LLM", weights: "Model weight", @@ -167,15 +204,17 @@ export default { "Sentence window node parser. Splits a document into Nodes, with each node being a sentence. Each node contains a window from the surrounding sentences in the metadata.", unstructured: "UnstructedNodeParser is a component that processes unstructured data.", milvusVector: "Embedding vectors stored in milvus", - vector_uri: "Connecting milvus uri", + vector_url: "Connecting milvus vector url", test: "Test", - modelName: "Vllm model name", - vllm_url: " Test if Vllm url is available ", + modelName: "vLLM model name", + vllm_url: " Test if vLLM url is available ", + kbadmin: "Third party knowledge base engine", }, }, generation: { title: "Generation Configuration", retriever: "Retriever Configuration", + tips: "Setting the parameter to 0 will inherit the default value from the currently active Pipeline.", config: { top_n: "Rerank top n", temperature: "Temperature", @@ -203,12 +242,14 @@ export default { tip4: "Please enter your question...", new: "New Chat", rag: "EC RAG", - setting: "Pipeline Setting", + setting: "Settings", clear: "Clear Message", think: "Think", reason: "Think", thinkStart: "Thinking...", thinkEnd: "Thoughts", + history: "Chat History", + notExist: "Session not found.", }, knowledge: { title: "Knowledge Base", @@ -218,18 +259,36 @@ export default { edit: "Edit Knowledge Base", deleteTip: "Are you sure delete this knowledge base?", activeTip: "Are you sure activate this knowledge base?", - uploadTip: "Supports PDF, Word, TXT,Doc,Html,PPT formats, with a single file size not exceeding 200M", + uploadTip: "Supports PDF, Word, TXT,Doc,Html,PPT,ZIP formats, with a single file size not exceeding 200M", notFileTip: "The knowledge base is empty. Go upload your files.", name: "Name", des: "Description", activated: "Activated", nameValid1: "Please input knowledge base name", nameValid2: "Name should be between 2 and 30 characters", - nameValid3: "The name cannot start with a number", + nameValid3: "Alphanumeric and underscore only, starting with a letter or underscore.", desValid: "Please input knowledge base description", activeValid: "Please select whether to activate", - uploadValid: "Single file size not exceeding 50M.", + uploadValid: "Single file size not exceeding 200M.", + zipNoFiles: "The ZIP file is empty.", deleteFileTip: "Are you sure delete this file?", + selectTitle: "Create Type Select", + selectDes: "Please select the type you want to create", + experience: "Experience", + experienceDes: + "Experience refers to the knowledge and skills acquired through practical involvement, trial, and reflection, serving as a key foundation for solving real-world problems.", + kbDes: + "A Knowledge Base is a centralized repository for storing organized information such as documents, FAQs, and guides, enabling teams or users to quickly access and share knowledge.", + type: "Type", + original: "Original", + kbadmin: "kbadmin", + typeValid: "Please select knowledge base type", + nameRequired: "Please select kbadmin name", + waitTip: "Please be patient and wait for the file upload to complete.", + done: "Finished", + successfully: "Successfully ", + failed: "Failed", + totalTip: "files", }, request: { pipeline: { @@ -237,6 +296,7 @@ export default { updateSucc: "Pipeline update successfully !", deleteSucc: "Pipeline deleted successfully !", switchSucc: "Pipeline state switch successful !", + connectSucc: "vLLM service connection successful!", }, chatbot: { updateSucc: "Configuration update successful !", @@ -248,9 +308,97 @@ export default { updateSucc: "Knowledge Base update successfully !", deleteSucc: "Knowledge Base deleted successfully !", }, + experience: { + createSucc: "Experience created successfully!", + updateSucc: "Experience update successful!", + deleteSucc: "Experience deleted successfully!", + }, + agent: { + createSucc: "Agent created successfully!", + updateSucc: "Agent update successful!", + deleteSucc: "Agent deleted successfully!", + }, }, error: { notFoundTip: "Uh oh! It seems like you're lost", back: "Go Home", }, + experience: { + create: "Create Experience", + edit: "Edit Experience", + import: "Import Experience", + fileFormatTip: "Supports JSON format, with file size not exceeding 100M.", + importSuccTip: "Files upload successful!", + importErrTip: "Files upload failed!", + uploadValid: "Single file size not exceeding 100M.", + experience: "Experience", + detail: "Detail", + operation: "Operation", + deleteTip: "Are you sure delete this experience?", + addExperience: "Add Experience", + delExperience: "Delete Experience", + addContent: "Add Content", + delContent: "Delete Content", + total: "Total experience: ", + unique: "Unique", + selectTip: "Please choose an appropriate method for data update", + cover: "Cover", + increase: "Append", + deactivateTip: "Are you sure deactivate this experience?", + activeTip: "Are you sure activate this experience?", + label: { + experience: "Experience", + contents: "Experience Content", + content: "Content", + }, + placeholder: { + experience: "Please enter Experience", + content: "Please enter content", + }, + valid: { + experience: "Experience cannot be empty", + content: "Content cannot be empty", + }, + }, + agent: { + agent: "Agent", + create: "Create Agent", + edit: "Edit Agent", + detail: "Agent Details", + import: "Import Agent", + activated: "Activated", + inactive: "Inactive", + isActive: "Activated", + importSuccTip: "Files upload successful!", + importErrTip: "Files upload failed!", + name: "Name", + id: "ID", + status: "Status", + operation: "Operation", + configs: "Configs", + deactivateTip: "Are you sure deactivate this agent?", + activeTip: "Are you sure activate this agent?", + deleteTip: "Are you sure you want to delete this conversation record ?", + addConfigs: "Add Configs", + delConfigs: "Delete Config", + noData: "No settings configured", + think: "Agent Thinking", + pipeline: "Pipeline ID", + label: { + name: "Agent Name", + type: "Agent Type ", + pipeline: "Pipeline Name", + configs: "Agent Configs", + key: "Config Key", + value: "Config Value", + }, + valid: { + name: "Please enter name", + type: "Please select agent type", + pipeline: "Please select pipeline name", + configs: "Please enter agent configs", + key: "Please enter config key", + value: "Please enter config value", + }, + }, }; diff --git a/EdgeCraftRAG/ui/vue/src/i18n/zh.ts b/EdgeCraftRAG/ui/vue/src/i18n/zh.ts index 2a1a318851..f374633a1a 100644 --- a/EdgeCraftRAG/ui/vue/src/i18n/zh.ts +++ b/EdgeCraftRAG/ui/vue/src/i18n/zh.ts @@ -26,7 +26,21 @@ export default { clear: "清除", all: "全选", reset: "重置", + export: "导出", uploadTip: "点击或将文件拖到此区域进行上传", + loading: "加载中", + waitTip: "请耐心等待,在此期间不要刷新页面。", + copy: "复制", + send: "发送", + regenerate: "重新生成", + copySucc: "复制成功!", + copyError: "复制失败!", + emptyText: "内容为空!", + yes: "是", + no: "否", + manual: "去创建", + inputTip: "请输入 ", + connect: "连接", }, system: { title: "系统状态", @@ -84,10 +98,12 @@ export default { indexer: "索引器", indexerType: "索引器类型", embedding: "嵌入模型", + embeddingUrl: "嵌入模型地址", embeddingDevice: "模型运行设备", retriever: "检索器", retrieverType: "检索器类型", topk: "检索 top k", + topn: "Top n", postProcessor: "节点后处理器", postProcessorType: "节点后处理器类型", rerank: "重排模型", @@ -98,15 +114,17 @@ export default { language: "语言大模型", llmDevice: "运行设备", weights: "权重", - local: "本地", - vllm: "Vllm", - vector_uri: "Vector Uri", + local: "本地(OpenVINO)", + vllm: "远程(vLLM)", + vector_url: "向量数据库地址", modelName: "模型名称", - vllm_url: "Vllm 地址", + vllm_url: "vLLM 地址", + kbadmin: "kbadmin", }, valid: { nameValid1: "请输入名称", nameValid2: "请输入 2 到 30 个字符的名称", + nameValid3: "名称仅支持字母、数字和下划线", nodeParserType: "请选择节点解析器类型", chunkSizeValid1: "请选择分块大小", chunkSizeValid2: "分块大小的值不能小于分块重叠值", @@ -115,24 +133,42 @@ export default { windowSize: "请选择句子上下文窗口大小", indexerType: "请选择索引器类型", embedding: "请选择嵌入模型", + embeddingUrl: "IP : 端口,(例如 192.168.1.1:13020)", embeddingDevice: "请选择嵌入模型运行设备", retrieverType: "请选择检索器类型", + retrieverTypeFormat: "检索器类型只能选择kbadmin", topk: "请选择Top k", + topn: "请选择Top n", postProcessorType: "请选择后处理器类型", rerank: "请选择重排模型", rerankDevice: "请选择重排模型运行设备", generatorType: "请选择生成器类型", - language: "请选择大语言模型", + language: "请选择或输入大语言模型", llmDevice: "请选择大语言模型运行设备", weights: "请选择模型权重", - vector_uri: "IP : 端口,(例如 192.168.1.1:19530)", + kb_vector_url: "IP : 端口,(例如 192.168.1.1:29530)", + vector_url: "IP : 端口,(例如 192.168.1.1:19530)", vllm_url: "IP : 端口,(例如 192.168.1.1:8086)", - urlValid1: "URL 不能为空", - urlValid2: "请输入正确的URL", - urlValid3: "URL无法访问", + urlValid1: "向量数据库地址不能为空", + urlValid2: "请输入正确的向量数据库地址", + urlValid3: "向量数据库地址无法访问", urlValid4: "测试通过!", - urlValid5: "URL还未通过校验", + urlValid5: "向量数据库地址还未通过校验", modelName: "请输入模型名称", + vllmUrlValid1: "vLLM地址不能为空", + vllmUrlValid2: "请输入正确的vLLM地址", + vllmUrlValid3: "vLLM地址无法访问", + vllmUrlValid4: "测试通过!", + vllmUrlValid5: "vLLM地址还未通过校验", + nodeParserTypeTip: "索引器类型和检索器类型将同时设置为kbadmin", + indexerTypeTip: "节点解析器类型和检索器类型将同时设置为kbadmin", + retrieverTypeTip: "索引器类型和节点解析器类型将同时设置为kbadmin", + retrieverChangeTip: "请前往索引器阶段补全数据", + indexerTypeValid1: "索引器类型只能选择kbadmin", + modelRequired: "请输入嵌入模型地址", + modelFormat: "请输入正确的模型地址", + retrieverValid: "请回到Indexer阶段补充信息", + modelTip: "请先连接vLLM服务", }, desc: { name: "Pipeline的名称标识,用于区分不同工作流", @@ -142,13 +178,14 @@ export default { windowSize: "每个节点捕获的上下文句子窗口大小,用于增强语义完整性", indexerType: "基于解析节点构建的索引结构类型", embedding: "将文本转换为向量表示的过程", + embeddingUrl: "嵌入模型地址", embeddingDevice: "执行嵌入模型推理的硬件设备(CPU/GPU)", retrieverType: "根据用户查询从索引中检索节点的算法类型", topk: "检索时返回的最相关结果数量", postProcessorType: "对检索结果进行后处理的组件类型", rerank: "对检索结果进行二次排序的模型,提升答案相关性", rerankDevice: "执行重排模型推理的硬件设备(CPU/GPU)", - generatorType: "回答生成方式的类型(本地部署模型或 vllm 高效推理框架)", + generatorType: "回答生成方式的类型(本地部署模型或 vLLM 高效推理框架)", language: "用于生成自然语言回答的大模型(如 LLaMA、ChatGLM)", llmDevice: "大语言模型推理的硬件设备(需匹配模型规模要求)", weights: "大模型的权重", @@ -157,22 +194,24 @@ export default { vectorsimilarity: "根据向量相似性进行检索", autoMerge: "该检索器会尝试将上下文合并到父级上下文中", bm25: "使用BM25算法检索节点的BM25检索器", - faissVector: "嵌入存储在Faiss索引中。", + faissVector: "矢量索引存储在Faiss中。", vector: "矢量存储索引", simple: "解析文本,优先选择完整的句子。", - hierarchical: "使用借点解析将文档分割成递归层次节点", + hierarchical: "使用NodeParser将文档拆分为递归层次结构的节点。", sentencewindow: "将文档分割成节点,每个节点代表一个句子。每个节点包含一个来自元数据中周围句子的窗口", unstructured: "一个处理非结构化数据的组件", - milvusVector: "嵌入存储在Milvus索引中", - vector_uri: "测试Milvus地址是否可用", + milvusVector: "矢量索引存储在Milvus中", + vector_url: "测试Milvus地址是否可用", test: "测 试", - modelName: "Vllm 模型名称", - vllm_url: "测试Vllm地址是否可用", + modelName: "vLLM 模型名称", + vllm_url: "测试vLLM地址是否可用", + kbadmin: "第三方知识库系统", }, }, generation: { title: "生成器配置", retriever: "检索器配置", + tips: "参数为 0 时,将继承当前激活 Pipeline 的默认值。", config: { top_n: "Rerank top n", temperature: "Temperature", @@ -199,12 +238,14 @@ export default { tip4: "有什么问题?请在这里输入...", new: "开启新对话", rag: "EC RAG", - setting: "Pipeline 配置", + setting: "配置", clear: "清除消息", think: "思考", reason: "思考", thinkStart: "思考中...", thinkEnd: "思考过程", + history: "历史会话", + notExist: "会话不存在!", }, knowledge: { title: "知识库", @@ -214,18 +255,36 @@ export default { edit: "编辑知识库", deleteTip: "您确定要删除此知识库吗?此操作不可恢复。", activeTip: "您确定要激活此知识库吗?", - uploadTip: "支持 PDF、Word、TXT、Doc、HTML、PPT 格式,单个文件大小不超过 200M。", + uploadTip: "支持 PDF、Word、TXT、Doc、HTML、PPT、ZIP 格式,单个文件大小不超过 200M。", notFileTip: "您还没有上传任何文件,点击“上传”按钮开始添加内容吧~", name: "名称", des: "描述", activated: "激活状态", nameValid1: "请输入知识库名称", nameValid2: "请输入 2 到 30 个字符的名称", - nameValid3: "名称不能以数字开头", + nameValid3: "仅支持字母、数字和下划线,必须以字母或下划线开头。", desValid: "请输入知识库描述", activeValid: "请选择是否启用该功能。", - uploadValid: "单个文件大小不得超过 50MB", + uploadValid: "单个文件大小不得超过 200MB", + zipNoFiles: "上传的 ZIP 压缩包为空", deleteFileTip: "您确定要删除此文档吗?此操作不可恢复。", + selectTitle: "创建类型选择", + selectDes: "请选择要创建的数据类型", + experience: "经验注入", + experienceDes: + "Experience是指个人或团队在实践过程中积累的知识和技能,通常通过实际操作、试错和反思获得,是解决实际问题的重要依据", + kbDes: + "知识库是系统化存储信息的集合,用于集中管理文档、常见问题、操作指南等知识内容,便于团队或用户快速查找和共享信息。", + type: "类型", + original: "原始的", + kbadmin: "kbadmin", + typeValid: "请选择知识库类型", + nameRequired: "请选择kbadmin名称", + waitTip: "请耐心等待所有文件上传完成!", + done: "已完成", + successfully: "成功", + failed: "失败", + totalTip: "个文件", }, request: { pipeline: { @@ -233,6 +292,7 @@ export default { updateSucc: "Pipeline 更新成功!", deleteSucc: "Pipeline 删除成功!", switchSucc: "Pipeline 状态切换成功!", + connectSucc: "vLLM 服务连接成功!", }, chatbot: { updateSucc: "配置更新成功!", @@ -242,11 +302,100 @@ export default { deleteFileSucc: "文档删除成功!", createSucc: "知识库创建成功!", updateSucc: "知识库更新成功!", - deleteSucc: " 知识库删除成功!", + deleteSucc: "知识库删除成功!", + }, + experience: { + createSucc: "经验创建成功!", + updateSucc: "经验更新成功!", + deleteSucc: "经验删除成功!", + }, + agent: { + createSucc: "智能体创建成功!", + updateSucc: "智能体更新成功!", + deleteSucc: "智能体删除成功!", }, }, error: { notFoundTip: "Oops 好像走错地方啦~", back: "首页", }, + experience: { + create: "新建经验", + edit: "编辑经验", + import: "导入经验", + fileFormatTip: "仅支持JSON格式,文件大小不超过100M", + importSuccTip: "文件上传成功!", + importErrTip: "文件上传失败!", + uploadValid: "单个文件大小不得超过 200MB", + experience: "经验", + detail: "详情", + operation: "操作", + deleteTip: "确定要删除这个经验?此操作不可恢复。", + addExperience: "新增经验", + delExperience: "删除经验", + addContent: "新增内容", + delContent: "删除内容", + total: "经验总数: ", + unique: "唯一", + selectTip: "请选择合适的方式进行数据更新", + cover: "覆盖", + increase: "追加", + deactivateTip: "您确定要停用该经验库吗?", + activeTip: "您确定要启用该经验库吗?", + label: { + experience: "经验", + contents: "经验内容", + content: "内容", + }, + placeholder: { + experience: "请输入经验", + content: "请输入内容", + }, + valid: { + experience: "经验不能为空", + content: "内容不能为空", + }, + }, + agent: { + agent: "智能体", + create: "创建智能体", + edit: "编辑智能体", + detail: "智能体详情", + import: "导入智能体", + isActive: "状态", + activated: "已启用", + inactive: "已停用", + pipelineFormatTip: "仅支持JSON格式,文件大小不超过10M", + importSuccTip: "智能体配置导入成功!", + importErrTip: "智能体配置导入失败!", + name: "名称", + id: "ID", + status: "状态", + operation: "操作", + configs: "配置", + deactivateTip: "您确定要停用该智能体吗?", + activeTip: "您确定要启用该智能体吗?", + deleteTip: "确认删除这条对话记录吗?此操作不可恢复。", + addConfigs: "新增配置", + delConfigs: "删除配置", + noData: "暂无配置", + think: "智能体思考", + pipeline: "Pipeline ID", + label: { + name: "智能体名称", + type: "智能体类型 ", + pipeline: "Pipeline 名称", + configs: "智能体配置", + key: "配置名称", + value: "配置内容", + }, + valid: { + name: "请输入名称", + type: "请选择智能体类型", + pipeline: "请选择Pipeline名称", + configs: "请录入智能体配置", + key: "请输入配置名称", + value: "请输入配置内容", + }, + }, }; diff --git a/EdgeCraftRAG/ui/vue/src/layout/Header.vue b/EdgeCraftRAG/ui/vue/src/layout/Header.vue index 33d0038d62..0de80ece3d 100644 --- a/EdgeCraftRAG/ui/vue/src/layout/Header.vue +++ b/EdgeCraftRAG/ui/vue/src/layout/Header.vue @@ -2,7 +2,7 @@
- +
- + +
+ diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/ChatHistory.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/ChatHistory.vue new file mode 100644 index 0000000000..9816964249 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/ChatHistory.vue @@ -0,0 +1,436 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/ConfigDrawer.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/ConfigDrawer.vue index 16258dd73e..8e1aaa2ba6 100644 --- a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/ConfigDrawer.vue +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/ConfigDrawer.vue @@ -18,7 +18,13 @@ class="form-wrap" >
-
{{ $t("generation.retriever") }}
+
+

{{ $t("generation.retriever") }}

+
+ {{ $t("generation.tips") }} +
+
+
{{ $t("generation.desc.top_n") }}
+ + +
+ {{ $t("pipeline.desc.topk") }} +
+
{{ $t("generation.title") }}
@@ -128,7 +149,10 @@ diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/SseService.ts b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/SseService.ts index 1f57ce25b1..4c7b47138f 100644 --- a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/SseService.ts +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/SseService.ts @@ -1,49 +1,97 @@ // Copyright (C) 2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 + import { getChatSessionId } from "@/utils/common"; +import { message } from "ant-design-vue"; + +export interface StreamController { + cancel: () => void; +} -export const handleMessageSend = async ( +export const handleMessageSend = ( url: string, postData: any, - onDisplay: (data: any) => void, + onDisplay: (data: string) => void, onEnd?: () => void, -): Promise => { - try { - const response = await fetch(url, { - method: "POST", - headers: { - "Content-Type": "application/json", - Accept: "text/event-stream", - sessionid: getChatSessionId(), - }, - body: JSON.stringify(postData), - }); - - if (!response.ok) { - throw new Error(`Network response was not ok: ${response.statusText}`); - } - - const reader = response.body?.getReader(); - const decoder = new TextDecoder("utf-8"); - let buffer = ""; +): StreamController => { + let reader: ReadableStreamDefaultReader | undefined; + const controller = new AbortController(); + const execute = async () => { try { + const response = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + Accept: "text/event-stream", + sessionid: getChatSessionId(), + }, + body: JSON.stringify(postData), + signal: controller.signal, + }); + + if (!response.ok) { + let errorMessage = ""; + try { + const errorText = await response.text(); + if (errorText) { + errorMessage = errorText; + } + } catch (parseError) { + console.warn("Failed to read error response:", parseError); + } + message.error(errorMessage || "Request failed"); + onEnd?.(); + return; + } + + reader = response.body?.getReader(); + if (!reader) { + throw new Error("Readable stream is not available"); + } + + const decoder = new TextDecoder("utf-8"); + let buffer = ""; + while (true) { const { done, value } = await reader.read(); + if (done) { onEnd?.(); break; } - buffer += decoder.decode(value, { stream: true }); + buffer += decoder.decode(value, { stream: true }); onDisplay(buffer); } - } catch (error) { - console.error(error); + } catch (error: any) { + if (error.name === "AbortError") { + console.log("Stream was aborted by user."); + } else { + console.error("Request or stream error:", error); + if (error.message !== "Request failed") { + message.error(error.message || "Stream error"); + } + } onEnd?.(); + } finally { + if (reader) { + try { + await reader.cancel(); + } catch (cancelError) { + console.warn("Failed to cancel reader:", cancelError); + } + } } - } catch (error) { - console.error(error); - onEnd?.(); - } + }; + + execute().catch(console.error); + + return { + cancel: () => { + if (!controller.signal.aborted) { + controller.abort(); + } + }, + }; }; diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/index.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/index.vue index 4d46a60654..4e03e048ae 100644 --- a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/index.vue +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/Chatbot/index.vue @@ -22,7 +22,8 @@ import { Local } from "@/utils/storage"; const chatbotStore = chatbotAppStore(); let chatbotConfiguration = reactive({ - top_n: 25, + top_n: 0, + k: 0, temperature: 0.01, top_p: 0.95, top_k: 10, diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/DetailComponent.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/DetailComponent.vue new file mode 100644 index 0000000000..5ba1d94094 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/DetailComponent.vue @@ -0,0 +1,93 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ExperienceDetail.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ExperienceDetail.vue new file mode 100644 index 0000000000..c81057bb8e --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ExperienceDetail.vue @@ -0,0 +1,225 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ImportDialog.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ImportDialog.vue new file mode 100644 index 0000000000..b558bac195 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/ImportDialog.vue @@ -0,0 +1,100 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/UpdateDialog.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/UpdateDialog.vue new file mode 100644 index 0000000000..a379762400 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/UpdateDialog.vue @@ -0,0 +1,399 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/index.ts b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/index.ts new file mode 100644 index 0000000000..c9d1df62ed --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/Experience/index.ts @@ -0,0 +1,7 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +import UpdateDialog from "./UpdateDialog.vue"; +import ImportDialog from "./ImportDialog.vue"; + +export { UpdateDialog, ImportDialog }; diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetail.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetail.vue new file mode 100644 index 0000000000..956b2dbe2b --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetail.vue @@ -0,0 +1,398 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetial.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetial.vue deleted file mode 100644 index 290a85cd8e..0000000000 --- a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetial.vue +++ /dev/null @@ -1,302 +0,0 @@ - - - - - diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/SelectTypeDialog.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/SelectTypeDialog.vue new file mode 100644 index 0000000000..8b9ab7978e --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/SelectTypeDialog.vue @@ -0,0 +1,134 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateDialog.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateDialog.vue index f987ff5cb4..e95e7436f6 100644 --- a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateDialog.vue +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateDialog.vue @@ -17,14 +17,45 @@ autocomplete="off" :label-col="{ style: { width: '100px' } }" > - + + + {{ $t("knowledge.original") }} + {{ $t("knowledge.kbadmin") }} + + + + + + {{ + item + }} + + - + {{ $t("pipeline.activated") }} {{ $t("pipeline.inactive") }} @@ -59,10 +90,11 @@ import { requestKnowledgeBaseCreate, requestKnowledgeBaseUpdate, + getkbadminList, } from "@/api/knowledgeBase"; import { isValidName } from "@/utils/validate"; import { FormInstance } from "ant-design-vue"; -import { computed, ref } from "vue"; +import { computed, ref, onMounted } from "vue"; import { useI18n } from "vue-i18n"; const props = defineProps({ @@ -74,11 +106,17 @@ const props = defineProps({ type: String, default: "create", }, + dialogFlag: { + type: String, + default: "knowledge", + }, }); interface FormType { - name: string; + name: string | undefined; description: string; + comp_type: string; active: boolean; + comp_subtype: string; } const validateName = async (rule: any, value: string) => { @@ -89,7 +127,6 @@ const validateName = async (rule: any, value: string) => { if (len < 2 || len > 30) { return Promise.reject(t("knowledge.nameValid2")); } - console.log(isValidName(value)); if (!isValidName(value)) { return Promise.reject(t("knowledge.nameValid3")); } @@ -98,9 +135,11 @@ const validateName = async (rule: any, value: string) => { const { t } = useI18n(); const emit = defineEmits(["close", "switch"]); +const { dialogFlag } = props; + const typeMap = { - create: t("knowledge.create"), - edit: t("knowledge.edit"), + create: t(`${dialogFlag}.create`), + edit: t(`${dialogFlag}.edit`), } as const; const dialogTitle = computed(() => { return typeMap[props.dialogType as keyof typeof typeMap]; @@ -108,20 +147,39 @@ const dialogTitle = computed(() => { const isEdit = computed(() => { return props.dialogType === "edit"; }); -const isActivated = computed(() => { - return props.dialogData?.active; +const isExperience = computed(() => { + return props.dialogFlag === "experience"; +}); + +const isOriginal = computed(() => { + return form.comp_subtype === "origin_kb"; }); const modelVisible = ref(true); const submitLoading = ref(false); const formRef = ref(); -const { name = "", description = "", active = false } = props.dialogData; +const { + comp_subtype = "origin_kb", + name = undefined, + description = "", + active = false, + experience_active = false, +} = props.dialogData; const form = reactive({ - name, + comp_subtype, + name: isExperience.value ? "Experience" : name, description, - active, + comp_type: dialogFlag, + active: isExperience.value ? experience_active : active, }); - -const rules = reactive({ +const kbList = ref([]); +const rules: FormRules = reactive({ + comp_subtype: [ + { + required: true, + message: t("knowledge.typeValid"), + trigger: "change", + }, + ], name: [ { required: true, @@ -129,6 +187,13 @@ const rules = reactive({ trigger: ["blur", "change"], }, ], + kbName: [ + { + required: true, + message: t("knowledge.nameRequired"), + trigger: "change", + }, + ], active: [ { required: true, @@ -137,17 +202,36 @@ const rules = reactive({ }, ], }); +const handleTypeChange = () => { + form.name = undefined; +}; +const queryKbadmin = async () => { + const data: any = await getkbadminList(); + kbList.value = [].concat(data); +}; +// Format parameter +const formatFormParam = () => { + const { name, description, comp_type, active, comp_subtype } = form; + return { + name, + description, + comp_type, + comp_subtype: !isExperience.value ? comp_subtype : undefined, + active: !isExperience.value ? active : undefined, + experience_active: isExperience.value ? active : undefined, + }; +}; // Submit const handleSubmit = () => { formRef.value?.validate().then(() => { submitLoading.value = true; const { name } = form; - const apiUrl = - props.dialogType === "edit" - ? requestKnowledgeBaseUpdate - : requestKnowledgeBaseCreate; - apiUrl(form) + const apiUrl = isEdit.value + ? requestKnowledgeBaseUpdate + : requestKnowledgeBaseCreate; + + apiUrl(formatFormParam()) .then(() => { emit("switch", name); handleCancel(); @@ -165,6 +249,9 @@ const handleSubmit = () => { const handleCancel = () => { emit("close"); }; +onMounted(() => { + queryKbadmin(); +}); diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/type.ts b/EdgeCraftRAG/ui/vue/src/views/chatbot/type.ts index b426be5df8..044af6ad9f 100644 --- a/EdgeCraftRAG/ui/vue/src/views/chatbot/type.ts +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/type.ts @@ -9,13 +9,17 @@ export interface Benchmark { export interface IMessage { role: string; content: string; + query?: string; + errorMessage?: string; benchmark?: Benchmark | undefined; } export interface ThinkType { enable_thinking?: boolean; + enable_rag_retrieval?: boolean; } export interface ConfigType { top_n: number; + k: number; temperature: number; top_p: number; top_k: number; diff --git a/EdgeCraftRAG/ui/vue/src/views/error/404.vue b/EdgeCraftRAG/ui/vue/src/views/error/404.vue index 4683c892f2..ce18c7340a 100644 --- a/EdgeCraftRAG/ui/vue/src/views/error/404.vue +++ b/EdgeCraftRAG/ui/vue/src/views/error/404.vue @@ -2,12 +2,11 @@

{{ $t("error.notFoundTip") }}

- {{ $t("error.back") }} + + + {{ $t("error.back") }}
@@ -16,8 +15,6 @@ import notFound from "@/assets/svgs/404-icon.svg"; import router from "@/router"; import { HomeFilled } from "@ant-design/icons-vue"; -import { h } from "vue"; - const handleGoHome = () => { router.push("/"); }; diff --git a/EdgeCraftRAG/ui/vue/src/views/pipeline/components/QuickStart.vue b/EdgeCraftRAG/ui/vue/src/views/main/QuickStart.vue similarity index 98% rename from EdgeCraftRAG/ui/vue/src/views/pipeline/components/QuickStart.vue rename to EdgeCraftRAG/ui/vue/src/views/main/QuickStart.vue index a817fbba68..93c5abb1b9 100644 --- a/EdgeCraftRAG/ui/vue/src/views/pipeline/components/QuickStart.vue +++ b/EdgeCraftRAG/ui/vue/src/views/main/QuickStart.vue @@ -50,7 +50,7 @@ - - - diff --git a/EdgeCraftRAG/ui/vue/src/views/pipeline/components/UpdateDialog/Retriever.vue b/EdgeCraftRAG/ui/vue/src/views/pipeline/components/UpdateDialog/Retriever.vue deleted file mode 100644 index eb12bef097..0000000000 --- a/EdgeCraftRAG/ui/vue/src/views/pipeline/components/UpdateDialog/Retriever.vue +++ /dev/null @@ -1,135 +0,0 @@ - - - - - diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/columnsList.ts b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/columnsList.ts new file mode 100644 index 0000000000..f508282abd --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/columnsList.ts @@ -0,0 +1,64 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +const getTableColumns = (t: (key: string) => string): TableColumns[] => [ + { + title: t("agent.name"), + key: "name", + dataIndex: "name", + fixed: "left", + minWidth: 100, + visible: true, + disabled: true, + }, + { + title: t("agent.id"), + dataIndex: "idx", + key: "idx", + minWidth: 100, + ellipsis: true, + visible: true, + }, + { + title: t("agent.pipeline"), + dataIndex: "pipeline_idx", + key: "pipeline_idx", + minWidth: 100, + ellipsis: true, + visible: true, + }, + { + title: t("agent.label.type"), + dataIndex: "type", + key: "type", + minWidth: 60, + ellipsis: true, + visible: true, + }, + { + title: t("agent.configs"), + dataIndex: "configs", + key: "configs", + minWidth: 120, + ellipsis: true, + visible: true, + }, + { + title: t("agent.status"), + dataIndex: "active", + key: "active", + minWidth: 80, + ellipsis: true, + visible: true, + }, + { + title: t("pipeline.operation"), + key: "operation", + dataIndex: "operation", + fixed: "right", + visible: true, + disabled: true, + }, +]; + +export default getTableColumns; diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/DynamicConfigs.vue b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/DynamicConfigs.vue new file mode 100644 index 0000000000..b767282194 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/DynamicConfigs.vue @@ -0,0 +1,116 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/Table.vue b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/Table.vue new file mode 100644 index 0000000000..48764d1b53 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/Table.vue @@ -0,0 +1,204 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/UpdateDialog.vue b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/UpdateDialog.vue new file mode 100644 index 0000000000..09f2dc9bd5 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/UpdateDialog.vue @@ -0,0 +1,382 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/index.ts b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/index.ts new file mode 100644 index 0000000000..cc6876330d --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/components/index.ts @@ -0,0 +1,8 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +import Table from "./Table.vue"; +import UpdateDialog from "./UpdateDialog.vue"; +import DynamicConfigs from "./DynamicConfigs.vue"; + +export { Table, UpdateDialog, DynamicConfigs }; diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/enum.ts b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/enum.ts new file mode 100644 index 0000000000..b9f2afa271 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/enum.ts @@ -0,0 +1,13 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +export const AgentType = [ + { + name: "Simple", + value: "simple", + }, + { + name: "Deep Search", + value: "deep_search", + }, +] as const; diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/index.vue b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/index.vue new file mode 100644 index 0000000000..47da93d7ed --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/index.vue @@ -0,0 +1,74 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/type.ts b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/type.ts new file mode 100644 index 0000000000..1ceaa77944 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Agent/type.ts @@ -0,0 +1,10 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +export interface ModelType { + model_id: string | undefined; + model_path: string; + model_url?: string; + device: string; + weight?: string; +} diff --git a/EdgeCraftRAG/ui/vue/src/views/pipeline/components/columnsList.ts b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/columnsList.ts similarity index 100% rename from EdgeCraftRAG/ui/vue/src/views/pipeline/components/columnsList.ts rename to EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/columnsList.ts diff --git a/EdgeCraftRAG/ui/vue/src/views/pipeline/components/DetailDrawer.vue b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/DetailDrawer.vue similarity index 90% rename from EdgeCraftRAG/ui/vue/src/views/pipeline/components/DetailDrawer.vue rename to EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/DetailDrawer.vue index 616ca925f2..a2aca72f05 100644 --- a/EdgeCraftRAG/ui/vue/src/views/pipeline/components/DetailDrawer.vue +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/DetailDrawer.vue @@ -83,6 +83,17 @@ formData.indexer.indexer_type }} +
  • + {{ + $t("pipeline.config.embeddingUrl") + }} + {{ + formData.indexer?.embedding_url + }} +
  • {{ $t("pipeline.config.embedding") @@ -101,13 +112,17 @@
  • {{ - $t("pipeline.config.vector_uri") + $t("pipeline.config.vector_url") }} {{ - formData.indexer.vector_uri + formData.indexer?.vector_url }}
  • @@ -165,6 +180,10 @@ {{ item.processor_type }}