Skip to content

L-BFGS optimization hangs when using TorchSharp.Cuda and not TorchSharp.CPU #1541

@GeorgeS2019

Description

@GeorgeS2019

Xunit Test hangs indefinitely during L-BFGS optimization when using version 0.105.2

FYI: Not using the latest version 0.106.0 because it no longer support Compute Capability: 6.1

using System;
using TorchSharp;
using TorchSharp.Modules;
using Xunit;
using static TorchSharp.torch;

public class LbfgsCudaStressTests
{
    private readonly Device _gpu = CUDA;
    private readonly Device _cpu = CPU;

    [Fact]
    public void Lbfgs_Should_Not_Hang_On_CUDA()
    {
        if (!cuda.is_available())
            return; // Skip on machines without CUDA

        using var scope = NewDisposeScope();

        var device = _gpu;

        // Simple 1-parameter model
        var w = torch.randn(new long[] { 1 }, device: device, requiresGrad: true);

        var optimizer = optim.LBFGS(new[] { w }, lr: 1.0);

        // Stress loop: L-BFGS is known to hang when repeatedly calling step() on CUDA
        for (int i = 0; i < 200; i++)
        {
            bool completed = false;

            // Timeout guard: if L-BFGS hangs, this iteration will never finish
            var task = System.Threading.Tasks.Task.Run(() =>
            {
                optimizer.step(() =>
                {
                    // Simple quadratic loss: (w - 3)^2
                    var loss = (w - 3).pow(2).sum();
                    loss.backward();
                    return loss;
                });

                completed = true;
            });

            // Wait 2 seconds per iteration — enough to detect a hang
            if (!task.Wait(TimeSpan.FromSeconds(2)))
            {
                Assert.False(true, $"L-BFGS hang detected on CUDA at iteration {i}");
            }

            Assert.True(completed);
        }
    }
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions