Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion onnxruntime/contrib_ops/webgpu/bert/group_query_attention.cc
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,12 @@ Status GroupQueryAttention::ComputeInternal(onnxruntime::webgpu::ComputeContext&
Tensor* present_value = context.Output(2, present_kv_shape);
parameters.past_present_share_buffer_ = present_key != nullptr && present_value != nullptr && past_key != nullptr && past_value != nullptr && past_key->DataRaw() == present_key->DataRaw() && past_value->DataRaw() == present_value->DataRaw();

ORT_ENFORCE(parameters.total_sequence_length_ <= parameters.seqlen_present_kv_cache_, "Total sequence length cannot be greater than the existing KV cache length.");
// Use a sliding window if the total sequence exceeds the window's length.
bool use_sliding_window = (local_window_size_ != -1 && local_window_size_ < parameters.total_sequence_length_);
if (!do_rotary_ &&
head_sink == nullptr && !use_smooth_softmax_ &&
local_window_size_ == -1 &&
!use_sliding_window &&
CanApplyFlashAttention(attention_bias, present_key, present_value, parameters, context)) {
return ApplyFlashAttention(query, key, value, attention_bias, output, past_key, present_key, past_value,
present_value, parameters, context);
Expand Down
134 changes: 17 additions & 117 deletions tools/ci_build/github/azure-pipelines/c-api-noopenmp-test-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,109 +121,6 @@ stages:
parameters:
StageSuffix: 'macOS_CPU_x64'

- template: templates/final-jar-testing.yml
parameters:
OS: Windows
PoolName: 'onnxruntime-Win-CPU-2022'

- template: templates/final-jar-testing.yml
parameters:
OS: Linux
PoolName: 'onnxruntime-Ubuntu2204-AMD-CPU'

- template: templates/final-jar-testing.yml
parameters:
OS: MacOS
PoolName: 'macOS-14'


- stage: GPU_JAR_Testing
dependsOn: Setup
jobs:
- job: Final_Jar_Testing_Windows_GPU
workspace:
clean: all
pool: 'onnxruntime-Win2022-GPU-A10'
timeoutInMinutes: 60
variables:
- name: runCodesignValidationInjection
value: false

steps:
- template: templates/set-version-number-variables-step.yml

- template: templates/jobs/download_win_gpu_library.yml
parameters:
CudaVersion: 12.2
DownloadCUDA: true
DownloadTRT: true

- template: templates/flex-downloadPipelineArtifact.yml
parameters:
StepName: 'Download Final Jar'
ArtifactName: onnxruntime-java-gpu
TargetPath: '$(Build.BinariesDirectory)\final-jar'

- template: templates/flex-downloadPipelineArtifact.yml
parameters:
StepName: 'Download Jar Tools'
ArtifactName: onnxruntime-java-tools
TargetPath: '$(Build.BinariesDirectory)\final-jar'

- task: CmdLine@2
inputs:
script: |
mkdir test
pushd test
jar xf $(Build.BinariesDirectory)\final-jar\testing.jar
popd
java -DUSE_CUDA=1 -jar junit-platform-console-standalone-1.6.2.jar -cp .;.\test;protobuf-java-3.25.5.jar;onnxruntime_gpu-$(OnnxRuntimeVersion).jar --scan-class-path --fail-if-no-tests --disable-banner
workingDirectory: '$(Build.BinariesDirectory)\final-jar'

- job: Final_Jar_Testing_Linux_GPU
workspace:
clean: all
pool:
name: 'Onnxruntime-Linux-GPU-A10'
os: linux
variables:
- name: runCodesignValidationInjection
value: false
- name: docker_base_image
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc12:20250724.1
timeoutInMinutes: 60
steps:
- checkout: self
submodules: false
- template: templates/set-version-number-variables-step.yml

- template: templates/flex-downloadPipelineArtifact.yml
parameters:
StepName: 'Download Final Jar'
ArtifactName: onnxruntime-java-gpu
TargetPath: '$(Build.BinariesDirectory)/final-jar'

- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0
Context: tools/ci_build/github/linux/docker/
DockerBuildArgs: "
--build-arg BUILD_UID=$( id -u )
--build-arg BASEIMAGE=${{ variables.docker_base_image }}
--build-arg TRT_VERSION=${{ variables.linux_trt_version }}
"
Repository: onnxruntimeubi8packagestest

- bash: |
docker run -e SYSTEM_COLLECTIONURI --rm \
--gpus all \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--volume /data/models:/build/models:ro \
onnxruntimeubi8packagestest \
/bin/bash /onnxruntime_src/tools/ci_build/github/linux/java_linux_final_test.sh -r /build -v $(OnnxRuntimeVersion)
displayName: 'Test'

- template: nuget/templates/test_win.yml
parameters:
AgentPool: 'onnxruntime-Win2022-GPU-A10'
Expand Down Expand Up @@ -274,13 +171,14 @@ stages:
clean: true
submodules: none

- template: templates/flex-downloadPipelineArtifact.yml
parameters:
ArtifactName: "Windows_Packaging_cuda_build_artifacts"
StepName: 'Download Pipeline Artifact - Windows GPU Packages Build'
TargetPath: '$(Build.BinariesDirectory)/RelWithDebInfo/'
- download: build
artifact: 'Windows_Packaging_cuda_build_artifacts'
displayName: 'Download Windows GPU Packages Build'

- template: templates/telemetry-steps.yml
- task: CmdLine@2
inputs:
script: |
move $(Pipeline.Workspace)/build/Windows_Packaging_cuda_build_artifacts $(Build.BinariesDirectory)/RelWithDebInfo

- template: templates/set-version-number-variables-step.yml

Expand Down Expand Up @@ -323,7 +221,7 @@ stages:
displayName: 'test'
inputs:
scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py'
arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --enable_onnx_tests $(TelemetryOption) '
arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --enable_onnx_tests'
workingDirectory: '$(Build.BinariesDirectory)'
# Previous stage only assembles the java binaries, testing will be done in this stage with GPU machine
- template: templates/make_java_win_binaries.yml
Expand All @@ -345,13 +243,15 @@ stages:
clean: true
submodules: none

- template: templates/flex-downloadPipelineArtifact.yml
parameters:
ArtifactName: "Windows_Packaging_tensorrt_build_artifacts"
StepName: 'Download Pipeline Artifact - Windows GPU Packages Build'
TargetPath: '$(Build.BinariesDirectory)/RelWithDebInfo/'

- template: templates/telemetry-steps.yml
- download: build
artifact: 'Windows_Packaging_tensorrt_build_artifacts'
displayName: 'Download Windows GPU Packages Build'

- task: CmdLine@2
inputs:
script: |
move $(Pipeline.Workspace)/build/Windows_Packaging_tensorrt_build_artifacts $(Build.BinariesDirectory)/RelWithDebInfo

- template: templates/set-version-number-variables-step.yml

Expand Down Expand Up @@ -394,7 +294,7 @@ stages:
displayName: 'test'
inputs:
scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py'
arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --enable_onnx_tests $(TelemetryOption) '
arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --enable_onnx_tests'
workingDirectory: '$(Build.BinariesDirectory)'
# Previous stage only assembles the java binaries, testing will be done in this stage with GPU machine
- template: templates/make_java_win_binaries.yml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ extends:
targetType: 'inline'
script: |
mkdir -p $(Build.BinariesDirectory)/osx-x64
Move-Item -Path $(Build.BinariesDirectory)/osx/onnxruntime-osx-x86_64* -Destination $(Build.BinariesDirectory)/osx-x64
Move-Item -Path $(Build.BinariesDirectory)/osx/onnxruntime-osx-x86_64* -Destination $(Build.BinariesDirectory)/osx-x64

mkdir -p $(Build.BinariesDirectory)/osx-arm64
Move-Item -Path $(Build.BinariesDirectory)/osx/onnxruntime-osx-arm64* -Destination $(Build.BinariesDirectory)/osx-arm64
Expand Down
162 changes: 162 additions & 0 deletions tools/ci_build/github/azure-pipelines/jar_package_testing.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
resources:
pipelines:
- pipeline: build
source: 'Zip-Nuget-Java-Nodejs Packaging Pipeline'
trigger: true
branch: main

variables:
mavenVersion: '3.9.8'

stages:
- template: templates/final-jar-testing-win.yml
parameters:
PoolName: 'onnxruntime-Win-CPU-2022'

- template: templates/final-jar-testing-linux.yml
parameters:
OS: Linux
PoolName: 'onnxruntime-Ubuntu2204-AMD-CPU'

- template: templates/final-jar-testing-linux.yml
parameters:
OS: MacOS
PoolName: 'macOS-14'

- stage: GPU_JAR_Testing
dependsOn: []
jobs:
- job: Final_Jar_Testing_Windows_GPU
workspace:
clean: all
pool: 'onnxruntime-Win2022-GPU-A10'
timeoutInMinutes: 60
variables:
- name: runCodesignValidationInjection
value: false

steps:
- template: templates/set-version-number-variables-step.yml

- template: templates/jobs/download_win_gpu_library.yml
parameters:
CudaVersion: 12.2
DownloadCUDA: true
DownloadTRT: true

- template: templates/download_maven_for_tests.yml
- download: build
artifact: 'onnxruntime-java-gpu'
displayName: 'Download Final Jar'
- script: |
move $(Pipeline.Workspace)\build\onnxruntime-java-gpu\*.jar $(Pipeline.Workspace)\build\onnxruntime-java\

- task: PowerShell@2
displayName: 'Run Java Tests with PowerShell'
inputs:
targetType: 'inline'
script: |
# Exit script on any error
$ErrorActionPreference = "Stop"

cd $(Pipeline.Workspace)/build/onnxruntime-java
del *.asc
del *.sha256
del *.sha512
del *.pom
cd ..
mkdir tests
cd tests
jar xf $(Pipeline.Workspace)/build/onnxruntime-java/testing.jar
del $(Pipeline.Workspace)/build/onnxruntime-java/testing.jar
dir $(Pipeline.Workspace)/build/tests
Write-Host "Running JUnit Tests..."
& java -DUSE_CUDA=1 `
-cp "$(Pipeline.Workspace)\build\tests;$(Pipeline.Workspace)\build\onnxruntime-java\*" org.junit.platform.console.ConsoleLauncher --scan-classpath=$(Pipeline.Workspace)\build\tests `
--fail-if-no-tests --disable-banner --reports-dir "$($env:Build_ArtifactStagingDirectory)/TestResults"

- task: PublishTestResults@2
displayName: 'Publish Test Results'
inputs:
testResultsFormat: 'JUnit'
testResultsFiles: '$(Build.ArtifactStagingDirectory)/TestResults/TEST-junit-jupiter.xml'
failTaskOnFailedTests: true


- job: Final_Jar_Testing_Linux_GPU
workspace:
clean: all
pool:
name: 'Onnxruntime-Linux-GPU-A10'
variables:
- name: runCodesignValidationInjection
value: false
- name: docker_base_image
value: onnxruntimebuildcache.azurecr.io/internal/azureml/onnxruntime/build/cuda12_x64_almalinux8_gcc12:20250724.1
timeoutInMinutes: 60
steps:
- checkout: self
submodules: false

- template: templates/set-version-number-variables-step.yml

- bash: |
sudo apt-get install -y msopenjdk-17
dpkg -l msopenjdk-17

- bash: |
echo "Downloading and installing Maven $(mavenVersion) for Linux..."
MAVEN_DIR="$(Agent.TempDirectory)/apache-maven-$(mavenVersion)"
# Download Maven binary
wget https://archive.apache.org/dist/maven/maven-3/$(mavenVersion)/binaries/apache-maven-$(mavenVersion)-bin.tar.gz -O $(Agent.TempDirectory)/maven.tar.gz

# Extract to the temp directory
mkdir -p ${MAVEN_DIR}
tar -xzf $(Agent.TempDirectory)/maven.tar.gz -C $(Agent.TempDirectory)

# Add Maven's bin directory to the PATH for subsequent tasks in the job
echo "##vso[task.prependpath]${MAVEN_DIR}/bin"
displayName: 'Install Maven (Linux)'

- script: |
echo "Maven is now on the PATH."
mvn --version

- download: build
artifact: 'onnxruntime-java-gpu'
displayName: 'Download Final Jar'

# Rename the downloaded folder
- script: |
mv $(Pipeline.Workspace)/build/onnxruntime-java-gpu $(Pipeline.Workspace)/build/onnxruntime-java

- task: Maven@4
displayName: 'Download Dependencies'
inputs:
mavenPomFile: '$(Build.SourcesDirectory)/tools/ci_build/java/pom.xml'
goals: 'dependency:copy-dependencies'
options: '-DoutputDirectory=$(Pipeline.Workspace)/build/onnxruntime-java'
publishJUnitTestResults: false
javaHomeOption: 'Path'
jdkDirectory: '/usr/lib/jvm/msopenjdk-17-amd64'
jdkVersionOption: 'Default'
mavenVersionOption: 'Default'

# Now all the jars are in the $(Pipeline.Workspace)/build folder

- template: templates/get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.package_ubi8_cuda_tensorrt10_0
Context: tools/ci_build/github/linux/docker/
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u ) --build-arg BASEIMAGE=${{ variables.docker_base_image }} --build-arg TRT_VERSION=${{ variables.linux_trt_version }}"
Repository: onnxruntimeubi8packagestest

- bash: |
docker run --network=none --rm \
--gpus all \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Pipeline.Workspace)/build:/build \
--volume /data/models:/build/models:ro \
onnxruntimeubi8packagestest \
/bin/bash /onnxruntime_src/tools/ci_build/github/linux/java_linux_final_test.sh -r /build -v $(OnnxRuntimeVersion)
displayName: 'Test'
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,13 @@ steps:
displayName: 'Download NPM_packages'
artifact: 'NPM_packages'

- script: |
mv $(Pipeline.Workspace)/build/NPM_packages '$(Build.BinariesDirectory)/nodejs-artifact'

- task: PowerShell@2
displayName: 'Move Artifact Directory'
inputs:
targetType: 'inline'
script: |
Move-Item -Path "$(Pipeline.Workspace)/build/NPM_packages" -Destination "$(Build.BinariesDirectory)/nodejs-artifact"

- script: mkdir e2e_test
workingDirectory: '$(Build.BinariesDirectory)'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@ stages:
clean: all
timeoutInMinutes: 120
pool:
name: 'Azure Pipelines'
image: 'macOS-15'
os: 'macOS'
vmImage: 'macOS-15'

variables:
- name: OnnxRuntimeBuildDirectory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ extends:
sourceAnalysisPool:
name: onnxruntime-Win-CPU-2022
os: windows
codeql:
compiled:
enabled: false
justificationForDisabling: 'CodeQL causes the React Native Android tests to fail when trying to load Linux x64 .so'

stages:
- template: templates/web-ci.yml
parameters:
Expand Down
Loading
Loading