ooples
diff --git a/‎AiDotNet.sln‎
Lines changed: 12 additions & 0 deletions b/‎AiDotNet.sln‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎src/AiDotNet.Serving/AiDotNet.Serving.csproj‎
Lines changed: 21 additions & 0 deletions b/‎src/AiDotNet.Serving/AiDotNet.Serving.csproj‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎src/AiDotNet.Serving/Configuration/ServingOptions.cs‎
Lines changed: 64 additions & 0 deletions b/‎src/AiDotNet.Serving/Configuration/ServingOptions.cs‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎src/AiDotNet.Serving/Controllers/InferenceController.cs‎
Lines changed: 218 additions & 0 deletions b/‎src/AiDotNet.Serving/Controllers/InferenceController.cs‎
Lines changed: 218 additions & 0 deletions
@@ -11,6 +11,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AiDotNetTests", "tests\AiDo
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AiDotNetBenchmarkTests", "AiDotNetBenchmarkTests\AiDotNetBenchmarkTests.csproj", "{42B9395F-DD55-46EB-9AF5-E7837AA5BB1C}"
 EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AiDotNet.Serving", "src\AiDotNet.Serving\AiDotNet.Serving.csproj", "{E8B7F9A1-3C4D-4E5F-9A7B-8C1D2E3F4A5B}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AiDotNet.Serving.Tests", "tests\AiDotNet.Serving.Tests\AiDotNet.Serving.Tests.csproj", "{F9C8E7D6-4B3A-5E2F-8A9B-1D0C3E2F5A4B}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -33,6 +37,14 @@ Global
 		{42B9395F-DD55-46EB-9AF5-E7837AA5BB1C}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{42B9395F-DD55-46EB-9AF5-E7837AA5BB1C}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{42B9395F-DD55-46EB-9AF5-E7837AA5BB1C}.Release|Any CPU.Build.0 = Release|Any CPU
+		{E8B7F9A1-3C4D-4E5F-9A7B-8C1D2E3F4A5B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{E8B7F9A1-3C4D-4E5F-9A7B-8C1D2E3F4A5B}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{E8B7F9A1-3C4D-4E5F-9A7B-8C1D2E3F4A5B}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{E8B7F9A1-3C4D-4E5F-9A7B-8C1D2E3F4A5B}.Release|Any CPU.Build.0 = Release|Any CPU
+		{F9C8E7D6-4B3A-5E2F-8A9B-1D0C3E2F5A4B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{F9C8E7D6-4B3A-5E2F-8A9B-1D0C3E2F5A4B}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{F9C8E7D6-4B3A-5E2F-8A9B-1D0C3E2F5A4B}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{F9C8E7D6-4B3A-5E2F-8A9B-1D0C3E2F5A4B}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
 
@@ -0,0 +1,21 @@
+<Project Sdk="Microsoft.NET.Sdk.Web">
+
+  <PropertyGroup>
+    <TargetFramework>net8.0</TargetFramework>
+    <Nullable>enable</Nullable>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <RootNamespace>AiDotNet.Serving</RootNamespace>
+    <GenerateDocumentationFile>true</GenerateDocumentationFile>
+    <NoWarn>$(NoWarn);1591</NoWarn>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.AspNetCore.OpenApi" Version="8.0.0" />
+    <PackageReference Include="Swashbuckle.AspNetCore" Version="6.5.0" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\AiDotNet.csproj" />
+  </ItemGroup>
+
+</Project>
@@ -0,0 +1,64 @@
+namespace AiDotNet.Serving.Configuration;
+
+/// <summary>
+/// Configuration options for the model serving framework.
+/// This class defines settings for server behavior, request batching, and startup model loading.
+/// </summary>
+public class ServingOptions
+{
+    /// <summary>
+    /// Gets or sets the port number on which the server will listen.
+    /// Default is 5000.
+    /// </summary>
+    public int Port { get; set; } = 5000;
+
+    /// <summary>
+    /// Gets or sets the batching window in milliseconds.
+    /// This is the maximum time the batcher will wait before processing accumulated requests.
+    /// Default is 10 milliseconds.
+    /// </summary>
+    public int BatchingWindowMs { get; set; } = 10;
+
+    /// <summary>
+    /// Gets or sets the maximum batch size for inference requests.
+    /// If set to 0 or less, there is no limit on batch size.
+    /// Default is 100.
+    /// </summary>
+    public int MaxBatchSize { get; set; } = 100;
+
+    /// <summary>
+    /// Gets or sets the root directory where model files are stored.
+    /// Model paths are restricted to this directory for security.
+    /// Default is "models" relative to the application directory.
+    /// </summary>
+    public string ModelDirectory { get; set; } = "models";
+
+    /// <summary>
+    /// Gets or sets the list of models to load at startup.
+    /// </summary>
+    public List<StartupModel> StartupModels { get; set; } = new();
+}
+
+/// <summary>
+/// Represents a model to be loaded when the server starts.
+/// </summary>
+public class StartupModel
+{
+    /// <summary>
+    /// Gets or sets the name of the model.
+    /// This will be used as the identifier for API requests.
+    /// </summary>
+    public string Name { get; set; } = string.Empty;
+
+    /// <summary>
+    /// Gets or sets the file path to the serialized model.
+    /// </summary>
+    public string Path { get; set; } = string.Empty;
+
+    /// <summary>
+    /// Gets or sets the numeric type used by the model.
+    /// Supported values: "double", "float", "decimal"
+    /// Default is "double".
+    /// </summary>
+    public string NumericType { get; set; } = "double";
+}
@@ -0,0 +1,218 @@
+using System.Diagnostics;
+using Microsoft.AspNetCore.Mvc;
+using AiDotNet.LinearAlgebra;
+using AiDotNet.Serving.Models;
+using AiDotNet.Serving.Services;
+
+namespace AiDotNet.Serving.Controllers;
+
+/// <summary>
+/// Controller for model inference operations.
+/// Handles prediction requests and routes them through the request batcher
+/// for high-performance batch processing.
+/// </summary>
+[ApiController]
+[Route("api/[controller]")]
+[Produces("application/json")]
+public class InferenceController : ControllerBase
+{
+    private readonly IModelRepository _modelRepository;
+    private readonly IRequestBatcher _requestBatcher;
+    private readonly ILogger<InferenceController> _logger;
+
+    /// <summary>
+    /// Initializes a new instance of the InferenceController.
+    /// </summary>
+    /// <param name="modelRepository">The model repository service</param>
+    /// <param name="requestBatcher">The request batcher service</param>
+    /// <param name="logger">Logger for diagnostics</param>
+    public InferenceController(
+        IModelRepository modelRepository,
+        IRequestBatcher requestBatcher,
+        ILogger<InferenceController> logger)
+    {
+        _modelRepository = modelRepository ?? throw new ArgumentNullException(nameof(modelRepository));
+        _requestBatcher = requestBatcher ?? throw new ArgumentNullException(nameof(requestBatcher));
+        _logger = logger ?? throw new ArgumentNullException(nameof(logger));
+    }
+
+    /// <summary>
+    /// Performs prediction using the specified model.
+    /// Requests are automatically batched for optimal throughput.
+    /// </summary>
+    /// <param name="modelName">The name of the model to use</param>
+    /// <param name="request">The prediction request containing input features</param>
+    /// <returns>Prediction results</returns>
+    /// <response code="200">Prediction completed successfully</response>
+    /// <response code="400">Invalid request format</response>
+    /// <response code="404">Model not found</response>
+    /// <response code="500">Error during prediction</response>
+    [HttpPost("predict/{modelName}")]
+    [ProducesResponseType(typeof(PredictionResponse), StatusCodes.Status200OK)]
+    [ProducesResponseType(StatusCodes.Status400BadRequest)]
+    [ProducesResponseType(StatusCodes.Status404NotFound)]
+    [ProducesResponseType(StatusCodes.Status500InternalServerError)]
+    public async Task<IActionResult> Predict(string modelName, [FromBody] PredictionRequest request)
+    {
+        var sw = Stopwatch.StartNew();
+
+        try
+        {
+            _logger.LogDebug("Received prediction request for model '{ModelName}'", modelName);
+
+            // Validate request
+            if (request.Features == null || request.Features.Length == 0)
+            {
+                return BadRequest(new { error = "Features array is required and cannot be empty" });
+            }
+
+            // Check if model exists
+            var modelInfo = _modelRepository.GetModelInfo(modelName);
+            if (modelInfo == null)
+            {
+                _logger.LogWarning("Model '{ModelName}' not found", modelName);
+                return NotFound(new { error = $"Model '{modelName}' not found" });
+            }
+
+            // Validate feature dimensions
+            for (int i = 0; i < request.Features.Length; i++)
+            {
+                if (request.Features[i].Length != modelInfo.InputDimension)
+                {
+                    return BadRequest(new
+                    {
+                        error = $"Feature vector at index {i} has {request.Features[i].Length} dimensions, " +
+                                $"but model '{modelName}' expects {modelInfo.InputDimension} dimensions"
+                    });
+                }
+            }
+
+            // Process based on numeric type
+            double[][] predictions;
+            int batchSize = request.Features.Length;
+
+            switch (modelInfo.NumericType.ToLower())
+            {
+                case "double":
+                    predictions = await PredictWithType<double>(modelName, request.Features);
+                    break;
+                case "single":
+                    predictions = await PredictWithType<float>(modelName, request.Features);
+                    break;
+                case "decimal":
+                    predictions = await PredictWithType<decimal>(modelName, request.Features);
+                    break;
+                default:
+                    return BadRequest(new { error = $"Unsupported numeric type: {modelInfo.NumericType}" });
+            }
+
+            sw.Stop();
+
+            var response = new PredictionResponse
+            {
+                Predictions = predictions,
+                RequestId = request.RequestId,
+                ProcessingTimeMs = sw.ElapsedMilliseconds,
+                BatchSize = batchSize
+            };
+
+            _logger.LogInformation(
+                "Prediction completed for model '{ModelName}' in {ElapsedMs}ms (batch size: {BatchSize})",
+                modelName, sw.ElapsedMilliseconds, batchSize);
+
+            return Ok(response);
+        }
+        catch (InvalidOperationException ex)
+        {
+            _logger.LogError(ex, "Invalid operation during prediction for model '{ModelName}'", modelName);
+            return StatusCode(500, new { error = $"Model operation error: {ex.Message}" });
+        }
+        catch (NotSupportedException ex)
+        {
+            _logger.LogError(ex, "Unsupported operation for model '{ModelName}'", modelName);
+            return StatusCode(500, new { error = $"Unsupported operation: {ex.Message}" });
+        }
+        catch (ArgumentException ex)
+        {
+            _logger.LogError(ex, "Invalid argument during prediction for model '{ModelName}'", modelName);
+            return BadRequest(new { error = $"Invalid input: {ex.Message}" });
+        }
+        catch (Exception ex)
+        {
+            _logger.LogError(ex, "Unexpected error during prediction for model '{ModelName}'", modelName);
+            return StatusCode(500, new { error = $"An unexpected error occurred during prediction: {ex.Message}" });
+        }
+    }
+
+    /// <summary>
+    /// Performs prediction with a specific numeric type.
+    /// </summary>
+    private async Task<double[][]> PredictWithType<T>(string modelName, double[][] features)
+    {
+        // Queue all requests first to enable batching
+        var tasks = features.Select(featureArray =>
+        {
+            var inputVector = ConvertToVector<T>(featureArray);
+            return _requestBatcher.QueueRequest(modelName, inputVector);
+        }).ToArray();
+
+        // Await all requests together
+        var resultVectors = await Task.WhenAll(tasks);
+
+        // Convert results back to double arrays
+        var predictions = new double[resultVectors.Length][];
+        for (int i = 0; i < resultVectors.Length; i++)
+        {
+            predictions[i] = ConvertFromVector(resultVectors[i]);
+        }
+
+        return predictions;
+    }
+
+    /// <summary>
+    /// Converts a double array to a Vector of the specified type.
+    /// </summary>
+    private static Vector<T> ConvertToVector<T>(double[] values)
+    {
+        var result = new Vector<T>(values.Length);
+        for (int i = 0; i < values.Length; i++)
+        {
+            result[i] = ConvertValue<T>(values[i]);
+        }
+        return result;
+    }
+
+    /// <summary>
+    /// Converts a Vector back to a double array.
+    /// </summary>
+    private static double[] ConvertFromVector<T>(Vector<T> vector)
+    {
+        var result = new double[vector.Length];
+        for (int i = 0; i < vector.Length; i++)
+        {
+            result[i] = Convert.ToDouble(vector[i]);
+        }
+        return result;
+    }
+
+    /// <summary>
+    /// Converts a double value to the specified type.
+    /// </summary>
+    private static T ConvertValue<T>(double value)
+    {
+        return (T)Convert.ChangeType(value, typeof(T));
+    }
+
+    /// <summary>
+    /// Gets statistics about the request batcher's performance.
+    /// </summary>
+    /// <returns>Batcher statistics</returns>
+    /// <response code="200">Returns batcher statistics</response>
+    [HttpGet("stats")]
+    [ProducesResponseType(typeof(Dictionary<string, object>), StatusCodes.Status200OK)]
+    public ActionResult<Dictionary<string, object>> GetStatistics()
+    {
+        var stats = _requestBatcher.GetStatistics();
+        return Ok(stats);
+    }
+}