Add benchmark for Inference

PhilippNaused · PhilippNaused · commit e24e5883f20b · 2025-06-28T00:14:32.000+02:00
diff --git a/Benchmark/Benchmark.csproj b/Benchmark/Benchmark.csproj
@@ -10,6 +10,7 @@
 
     <ItemGroup>
         <PackageReference Include="BenchmarkDotNet" Version="0.15.2" />
+        <PackageReference Include="Microsoft.ML.OnnxRuntime.Gpu" Version="1.22.0" />
     </ItemGroup>
 
     <ItemGroup>
diff --git a/Benchmark/Inference.cs b/Benchmark/Inference.cs
@@ -0,0 +1,42 @@
+﻿using BenchmarkDotNet.Attributes;
+
+using KokoroSharp;
+using KokoroSharp.Core;
+
+namespace Benchmark;
+
+[InProcess]
+public class Inference {
+    const string text = "This is a performance benchmark of Kokoro.";
+    static readonly Dictionary<(KModel, bool UseCuda), KokoroModel> models = [];
+    static int[]? tokens;
+    static KokoroVoice? voice;
+
+    [GlobalSetup]
+    public void Setup() {
+        tokens = KokoroSharp.Processing.Tokenizer.Tokenize(text);
+        voice = KokoroVoiceManager.GetVoice("af_heart");
+        foreach (var model in Enum.GetValues<KModel>()) {
+            if (!KokoroTTS.IsDownloaded(model))
+                KokoroTTS.LoadModel(model).Dispose(); // downloads the model if not already present.
+            var options = new Microsoft.ML.OnnxRuntime.SessionOptions();
+            models[(model, false)] = new KokoroModel(KokoroTTS.ModelNamesMap[model], options);
+            var options2 = new Microsoft.ML.OnnxRuntime.SessionOptions();
+            options2.AppendExecutionProvider_CUDA(); // Use CUDA for GPU inference.
+            models[(model, true)] = new KokoroModel(KokoroTTS.ModelNamesMap[model], options2);
+        }
+    }
+
+    [ParamsAllValues]
+    public KModel Model { get; set; }
+
+    [Benchmark]
+    public float[] CPU() {
+        return models[(Model, false)].Infer(tokens, voice!.Features);
+    }
+
+    [Benchmark]
+    public float[] CUDA() {
+        return models[(Model, true)].Infer(tokens, voice!.Features);
+    }
+}
diff --git a/KokoroSharp/HighLevel/KokoroLoader.cs b/KokoroSharp/HighLevel/KokoroLoader.cs
@@ -8,7 +8,7 @@
 public enum KModel { float32, float16, int8 }
 
 public partial class KokoroTTS {
-    static IReadOnlyDictionary<KModel, string> ModelNamesMap { get; } = new Dictionary<KModel, string>() {
+    internal static IReadOnlyDictionary<KModel, string> ModelNamesMap { get; } = new Dictionary<KModel, string>() {
         { float32, "kokoro.onnx" },
         { float16, "kokoro-quant.onnx" },
         { int8,    "kokoro-quant-convinteger.onnx" },