Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add C API for streaming HLG decoding #734

Merged
merged 7 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix swift api
  • Loading branch information
csukuangfj committed Apr 4, 2024
commit 41af8534b36a572394f8eed2e81bdc776cfe8678
5 changes: 5 additions & 0 deletions .github/scripts/test-swift.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ echo "pwd: $PWD"
cd swift-api-examples
ls -lh

./run-streaming-hlg-decode-file.sh
rm ./streaming-hlg-decode-file
rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18

./run-spoken-language-identification.sh
rm -rf sherpa-onnx-whisper*

Expand All @@ -31,4 +35,5 @@ sed -i.bak '20d' ./decode-file.swift

./run-decode-file-non-streaming.sh


ls -lh
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ option(SHERPA_ONNX_ENABLE_C_API "Whether to build C API" ON)
option(SHERPA_ONNX_ENABLE_WEBSOCKET "Whether to build webscoket server/client" ON)
option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF)
option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF)
option(SHERPA_ONNX_ENABLE_WASM_THREAD "Whether to enable WASM thread" OFF)
option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF)
option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF)
option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF)
Expand Down Expand Up @@ -110,6 +111,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_C_API ${SHERPA_ONNX_ENABLE_C_API}")
message(STATUS "SHERPA_ONNX_ENABLE_WEBSOCKET ${SHERPA_ONNX_ENABLE_WEBSOCKET}")
message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM_THREAD ${SHERPA_ONNX_ENABLE_WASM_THREAD}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}")
message(STATUS "SHERPA_ONNX_ENABLE_WASM_KWS ${SHERPA_ONNX_ENABLE_WASM_KWS}")
Expand All @@ -119,6 +121,12 @@ message(STATUS "SHERPA_ONNX_ENABLE_TTS ${SHERPA_ONNX_ENABLE_TTS}")
message(STATUS "SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY ${SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY}")
message(STATUS "SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE ${SHERPA_ONNX_USE_PRE_INSTALLED_ONNXRUNTIME_IF_AVAILABLE}")

if(SHERPA_ONNX_ENABLE_WASM_THREAD)
if(NOT SHERPA_ONNX_ENABLE_WASM)
message(FATAL_ERROR "Please enable WASM when WASM thread is enabled")
endif()
endif()

if(SHERPA_ONNX_ENABLE_TTS)
message(STATUS "TTS is enabled")
add_definitions(-DSHERPA_ONNX_ENABLE_TTS=1)
Expand Down
1 change: 1 addition & 0 deletions build-wasm-simd-nodejs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ cmake \
-DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
-DSHERPA_ONNX_ENABLE_GPU=OFF \
-DSHERPA_ONNX_ENABLE_WASM=ON \
-DSHERPA_ONNX_ENABLE_WASM_THREAD=ON \
-DSHERPA_ONNX_ENABLE_WASM_NODEJS=ON \
-DSHERPA_ONNX_ENABLE_BINARY=OFF \
-DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
Expand Down
60 changes: 60 additions & 0 deletions cmake/onnxruntime-wasm-simd-thread.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright (c) 2022-2024 Xiaomi Corporation
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")

if(NOT SHERPA_ONNX_ENABLE_WASM)
message(FATAL_ERROR "This file is for WebAssembly.")
endif()

if(BUILD_SHARED_LIBS)
message(FATAL_ERROR "BUILD_SHARED_LIBS should be OFF for WebAssembly")
endif()

set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-wasm-static_lib-simd-threads-1.17.1.zip")
set(onnxruntime_URL "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.1/onnxruntime-wasm-static_lib-simd-threads-1.17.1.zip")
set(onnxruntime_HASH "SHA256=6d2a668d6fad94038fb4dff7ca5eecd67ce6e4d38cbbde0c993b4aead5476986")

# If you don't have access to the Internet,
# please download onnxruntime to one of the following locations.
# You can add more if you want.
set(possible_file_locations
$ENV{HOME}/Downloads/onnxruntime-wasm-static_lib-simd-threads-1.17.1.zip
${CMAKE_SOURCE_DIR}/onnxruntime-wasm-static_lib-simd-threads-1.17.1.zip
${CMAKE_BINARY_DIR}/onnxruntime-wasm-static_lib-simd-threads-1.17.1.zip
/tmp/onnxruntime-wasm-static_lib-simd-threads-1.17.1.zip
/star-fj/fangjun/download/github/onnxruntime-wasm-static_lib-simd-threads-1.17.1.zip
)

foreach(f IN LISTS possible_file_locations)
if(EXISTS ${f})
set(onnxruntime_URL "${f}")
file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
set(onnxruntime_URL2)
break()
endif()
endforeach()

FetchContent_Declare(onnxruntime
URL
${onnxruntime_URL}
${onnxruntime_URL2}
URL_HASH ${onnxruntime_HASH}
)

FetchContent_GetProperties(onnxruntime)
if(NOT onnxruntime_POPULATED)
message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
FetchContent_Populate(onnxruntime)
endif()
message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")

# for static libraries, we use onnxruntime_lib_files directly below
include_directories(${onnxruntime_SOURCE_DIR}/include)

file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/lib*.a")

set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE)

message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
install(FILES ${onnxruntime_lib_files} DESTINATION lib)
4 changes: 4 additions & 0 deletions cmake/onnxruntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@ function(download_onnxruntime)
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
if(SHERPA_ONNX_ENABLE_WASM)
if(SHERPA_ONNX_ENABLE_WASM_THREAD)
include(onnxruntime-wasm-simd-thread)
else()
include(onnxruntime-wasm-simd)
endif()
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL riscv64)
if(BUILD_SHARED_LIBS)
include(onnxruntime-linux-riscv64)
Expand Down
1 change: 1 addition & 0 deletions swift-api-examples/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ vits-vctk
sherpa-onnx-paraformer-zh-2023-09-14
!*.sh
*.bak
streaming-hlg-decode-file
16 changes: 14 additions & 2 deletions swift-api-examples/SherpaOnnx.swift
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,15 @@ func sherpaOnnxFeatureConfig(
feature_dim: Int32(featureDim))
}

func sherpaOnnxOnlineCtcFstDecoderConfig(
graph: String = "",
maxActive: Int = 3000
) -> SherpaOnnxOnlineCtcFstDecoderConfig {
return SherpaOnnxOnlineCtcFstDecoderConfig(
graph: toCPointer(graph),
max_active: Int32(maxActive))
}

func sherpaOnnxOnlineRecognizerConfig(
featConfig: SherpaOnnxFeatureConfig,
modelConfig: SherpaOnnxOnlineModelConfig,
Expand All @@ -121,7 +130,8 @@ func sherpaOnnxOnlineRecognizerConfig(
decodingMethod: String = "greedy_search",
maxActivePaths: Int = 4,
hotwordsFile: String = "",
hotwordsScore: Float = 1.5
hotwordsScore: Float = 1.5,
ctcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig = sherpaOnnxOnlineCtcFstDecoderConfig()
) -> SherpaOnnxOnlineRecognizerConfig {
return SherpaOnnxOnlineRecognizerConfig(
feat_config: featConfig,
Expand All @@ -133,7 +143,9 @@ func sherpaOnnxOnlineRecognizerConfig(
rule2_min_trailing_silence: rule2MinTrailingSilence,
rule3_min_utterance_length: rule3MinUtteranceLength,
hotwords_file: toCPointer(hotwordsFile),
hotwords_score: hotwordsScore)
hotwords_score: hotwordsScore,
ctc_fst_decoder_config: ctcFstDecoderConfig
)
}

/// Wrapper for recognition result.
Expand Down
36 changes: 36 additions & 0 deletions swift-api-examples/run-streaming-hlg-decode-file.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env bash

set -ex

if [ ! -d ../build-swift-macos ]; then
echo "Please run ../build-swift-macos.sh first!"
exit 1
fi

if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then
echo "Downloading the pre-trained model for testing."

wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
fi

if [ ! -e ./streaming-hlg-decode-file ]; then
# Note: We use -lc++ to link against libc++ instead of libstdc++
swiftc \
-lc++ \
-I ../build-swift-macos/install/include \
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
./streaming-hlg-decode-file.swift ./SherpaOnnx.swift \
-L ../build-swift-macos/install/lib/ \
-l sherpa-onnx \
-l onnxruntime \
-o streaming-hlg-decode-file

strip ./streaming-hlg-decode-file
else
echo "./streaming-hlg-decode-file exists - skip building"
fi

export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./streaming-hlg-decode-file
79 changes: 79 additions & 0 deletions swift-api-examples/streaming-hlg-decode-file.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import AVFoundation

extension AudioBuffer {
func array() -> [Float] {
return Array(UnsafeBufferPointer(self))
}
}

extension AVAudioPCMBuffer {
func array() -> [Float] {
return self.audioBufferList.pointee.mBuffers.array()
}
}

func run() {
let filePath =
"./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav"
let model =
"./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx"
let tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt"
let zipfomer2CtcModelConfig = sherpaOnnxOnlineZipformer2CtcModelConfig(
model: model
)

let modelConfig = sherpaOnnxOnlineModelConfig(
tokens: tokens,
zipformer2Ctc: zipfomer2CtcModelConfig
)

let featConfig = sherpaOnnxFeatureConfig(
sampleRate: 16000,
featureDim: 80
)

let ctcFstDecoderConfig = sherpaOnnxOnlineCtcFstDecoderConfig(
graph: "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst",
maxActive: 3000
)

var config = sherpaOnnxOnlineRecognizerConfig(
featConfig: featConfig,
modelConfig: modelConfig,
ctcFstDecoderConfig: ctcFstDecoderConfig
)

let recognizer = SherpaOnnxRecognizer(config: &config)

let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
let audioFile = try! AVAudioFile(forReading: fileURL as URL)

let audioFormat = audioFile.processingFormat
assert(audioFormat.channelCount == 1)
assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)

let audioFrameCount = UInt32(audioFile.length)
let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)

try! audioFile.read(into: audioFileBuffer!)
let array: [Float]! = audioFileBuffer?.array()
recognizer.acceptWaveform(samples: array, sampleRate: Int(audioFormat.sampleRate))

let tailPadding = [Float](repeating: 0.0, count: 3200)
recognizer.acceptWaveform(samples: tailPadding, sampleRate: Int(audioFormat.sampleRate))

recognizer.inputFinished()
while recognizer.isReady() {
recognizer.decode()
}

let result = recognizer.getResult()
print("\nresult is:\n\(result.text)")
}

@main
struct App {
static func main() {
run()
}
}