Skip to content

Adding autograd.backward and unit tests. #692

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/Native/LibTorchSharp/THSAutograd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,18 @@ void THSAutograd_grad(
for (size_t i = 0; i < sz; i++)
result[i] = ResultTensor(res[i]);
}

void THSAutograd_backward(
Tensor* tensors, const int64_t tLength,
Tensor* grad_tensors, const int64_t gtLength,
bool retain_graph, bool create_graph,
Tensor* inputs, const int64_t iLength)
{
CATCH(
torch::autograd::backward(
toTensors<at::Tensor>((torch::Tensor**)tensors, tLength),
toTensors<at::Tensor>((torch::Tensor**)grad_tensors, gtLength),
retain_graph, create_graph,
toTensors<at::Tensor>((torch::Tensor**)inputs, iLength));
);
}
6 changes: 6 additions & 0 deletions src/Native/LibTorchSharp/THSAutograd.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,9 @@ EXPORT_API(void) THSAutograd_grad(
Tensor* grad_outs, const int64_t gLenght,
bool retain_graph, bool create_graph, bool allow_unused,
Tensor* (*allocator)(size_t length));

EXPORT_API(void) THSAutograd_backward(
Tensor* tensors, const int64_t tLength,
Tensor* grad_tensors, const int64_t gtLength,
bool retain_graph, bool create_graph,
Tensor* inputs, const int64_t iLength);
118 changes: 117 additions & 1 deletion src/TorchSharp/Autograd.cs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,9 @@ private static extern void THSAutograd_grad(
IntPtr outputs, long oLength,
IntPtr inputs, long iLength,
IntPtr grad_outs, long gLength,
bool retain_graph, bool create_graph, bool allow_unused,
[MarshalAs(UnmanagedType.U1)] bool retain_graph,
[MarshalAs(UnmanagedType.U1)] bool create_graph,
[MarshalAs(UnmanagedType.U1)] bool allow_unused,
AllocatePinnedArray allocator);

/// <summary>
Expand Down Expand Up @@ -149,9 +151,123 @@ public static IList<Tensor> grad(IList<Tensor> outputs, IList<Tensor> inputs, IL
}

return result.Select(x => new Tensor(x)).ToList();
}

[DllImport("LibTorchSharp")]
private static extern void THSAutograd_backward(
IntPtr tensors, long tLength,
IntPtr grad_tensors, long gtLength,
[MarshalAs(UnmanagedType.U1)] bool retain_graph,
[MarshalAs(UnmanagedType.U1)] bool create_graph,
IntPtr inputs, long iLength);

/// <summary>
/// Computes the sum of gradients of given tensors with respect to graph leaves.
/// </summary>
/// <param name="tensors">Tensors of which the derivative will be computed.</param>
/// <param name="grad_tensors">
/// The “vector” in the Jacobian-vector product, usually gradients w.r.t. each element of corresponding tensors.
/// Null values can be specified for scalar Tensors or ones that don’t require grad.
/// If a null value would be acceptable for all grad_tensors, then this argument is optional.
/// </param>
/// <param name="retain_graph">If false, the graph used to compute the grad will be freed.
/// Note that in nearly all cases setting this option to true is not needed and often can be worked around in a much more efficient way.
/// Defaults to the value of create_graph.</param>
/// <param name="create_graph">If true, graph of the derivative will be constructed, allowing to compute higher order derivative products. Defaults to false.</param>
/// <param name="inputs">
/// Inputs w.r.t. which the gradient be will accumulated into .grad. All other Tensors will be ignored.
/// If not provided, the gradient is accumulated into all the leaf Tensors that were used to compute the attr::tensors.
/// </param>
/// <remarks>
/// The graph is differentiated using the chain rule. If any of tensors are non-scalar (i.e. their data has more than one element) and require gradient,
/// then the Jacobian-vector product would be computed, in this case the function additionally requires specifying grad_tensors.
///
/// It should be a sequence of matching length, that contains the “vector” in the Jacobian-vector product, usually the gradient of the differentiated
/// function w.r.t. corresponding tensors (null is an acceptable value for all tensors that don’t need gradient tensors).
///
/// This function accumulates gradients in the leaves - you might need to zero the .grad properties or set them to null before calling it.
/// </remarks>
public static void backward(IList<Tensor> tensors, IList<Tensor> grad_tensors = null, bool? retain_graph = null, bool create_graph = false, IList<Tensor> inputs = null)
{
bool rt = retain_graph.HasValue ? retain_graph.Value : create_graph;

using (var ts = new PinnedArray<IntPtr>())
using (var gts = new PinnedArray<IntPtr>())
using (var ins = new PinnedArray<IntPtr>()) {

IntPtr tensRef = ts.CreateArray(tensors.Select(p => p.Handle).ToArray());
IntPtr gradsRef = grad_tensors == null ? IntPtr.Zero : gts.CreateArray(grad_tensors.Select(p => p.Handle).ToArray());
IntPtr insRef = inputs == null ? IntPtr.Zero : ins.CreateArray(inputs.Select(p => p.Handle).ToArray());
long insLength = inputs == null ? 0 : ins.Array.Length;
long gradsLength = grad_tensors == null ? 0 : gts.Array.Length;

THSAutograd_backward(tensRef, ts.Array.Length, gradsRef, gradsLength, rt, create_graph, insRef, insLength);
torch.CheckForErrors();
}
}

/// <summary>
/// Computes the sum of gradients of given tensors with respect to graph leaves.
/// </summary>
/// <param name="tensor">Tensor of which the derivative will be computed.</param>
/// <param name="grad_tensors">
/// The “vector” in the Jacobian-vector product, usually gradients w.r.t. each element of corresponding tensors.
/// Null values can be specified for scalar Tensors or ones that don’t require grad.
/// If a null value would be acceptable for all grad_tensors, then this argument is optional.
/// </param>
/// <param name="retain_graph">If false, the graph used to compute the grad will be freed.
/// Note that in nearly all cases setting this option to true is not needed and often can be worked around in a much more efficient way.
/// Defaults to the value of create_graph.</param>
/// <param name="create_graph">If true, graph of the derivative will be constructed, allowing to compute higher order derivative products. Defaults to false.</param>
/// <param name="inputs">
/// Inputs w.r.t. which the gradient be will accumulated into .grad. All other Tensors will be ignored.
/// If not provided, the gradient is accumulated into all the leaf Tensors that were used to compute the attr::tensors.
/// </param>
/// <remarks>
/// The graph is differentiated using the chain rule. If any of tensors are non-scalar (i.e. their data has more than one element) and require gradient,
/// then the Jacobian-vector product would be computed, in this case the function additionally requires specifying grad_tensors.
///
/// It should be a sequence of matching length, that contains the “vector” in the Jacobian-vector product, usually the gradient of the differentiated
/// function w.r.t. corresponding tensors (null is an acceptable value for all tensors that don’t need gradient tensors).
///
/// This function accumulates gradients in the leaves - you might need to zero the .grad properties or set them to null before calling it.
/// </remarks>
public static void backward(Tensor tensor, IList<Tensor> grad_tensors = null, bool? retain_graph = null, bool create_graph = false, IList<Tensor> inputs = null)
{
backward(new[] { tensor }, grad_tensors, retain_graph, create_graph, inputs);
}

/// <summary>
/// Computes the sum of gradients of given tensors with respect to graph leaves.
/// </summary>
/// <param name="tensor">Tensor of which the derivative will be computed.</param>
/// <param name="grad_tensor">
/// The “vector” in the Jacobian-vector product, usually gradients w.r.t. each element of corresponding tensors.
/// Null values can be specified for scalar Tensors or ones that don’t require grad.
/// If a null value would be acceptable for all grad_tensors, then this argument is optional.
/// </param>
/// <param name="retain_graph">If false, the graph used to compute the grad will be freed.
/// Note that in nearly all cases setting this option to true is not needed and often can be worked around in a much more efficient way.
/// Defaults to the value of create_graph.</param>
/// <param name="create_graph">If true, graph of the derivative will be constructed, allowing to compute higher order derivative products. Defaults to false.</param>
/// <param name="inputs">
/// Inputs w.r.t. which the gradient be will accumulated into .grad. All other Tensors will be ignored.
/// If not provided, the gradient is accumulated into all the leaf Tensors that were used to compute the attr::tensors.
/// </param>
/// <remarks>
/// The graph is differentiated using the chain rule. If any of tensors are non-scalar (i.e. their data has more than one element) and require gradient,
/// then the Jacobian-vector product would be computed, in this case the function additionally requires specifying grad_tensors.
///
/// It should be a sequence of matching length, that contains the “vector” in the Jacobian-vector product, usually the gradient of the differentiated
/// function w.r.t. corresponding tensors (null is an acceptable value for all tensors that don’t need gradient tensors).
///
/// This function accumulates gradients in the leaves - you might need to zero the .grad properties or set them to null before calling it.
/// </remarks>
public static void backward(Tensor tensor, Tensor grad_tensor, bool? retain_graph = null, bool create_graph = false, IList<Tensor> inputs = null)
{
backward(new[] { tensor }, new[] { grad_tensor }, retain_graph, create_graph, inputs);
}

}
}
}
3 changes: 3 additions & 0 deletions src/TorchSharp/Tensor/Tensor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,9 @@ public bool is_sparse {
}
}

public void backward(IList<Tensor>? grad_tensors = null, bool create_graph = false, bool retain_graph = false, IList<Tensor>? inputs = null) =>
torch.autograd.backward(new[] { this }, grad_tensors, create_graph, retain_graph, inputs);

[DllImport("LibTorchSharp")]
static extern IntPtr THSTensor_load([MarshalAs(UnmanagedType.LPStr)] string location);

Expand Down
48 changes: 48 additions & 0 deletions test/TorchSharpTest/TestNNUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,53 @@ public void TestPackSequence()
packed_sequence.Dispose();
Assert.True(torch.max(torch.square(inverted_sequences - padded_sequences)).item<long>() == 0);
}

[Fact]
public void TestAutoGradGrad()
{
using var _ = torch.NewDisposeScope();
var x1 = torch.rand(1, requiresGrad: true);
var x2 = torch.rand(1, requiresGrad: true);

var y = x1.pow(2) + 5 * x2;

var grad = torch.autograd.grad(new[] { y }, new[] { x1, x2 }, new[] { torch.ones_like(y) });
Assert.Equal(x1.shape, grad[0].shape);
Assert.Equal(x2.shape, grad[1].shape);
Assert.Equal(2.0f * x1.item<float>(), grad[0].item<float>());
Assert.Equal(5.0f, grad[1].item<float>());
}

[Fact]
public void TestAutoGradBackward1()
{
using var _ = torch.NewDisposeScope();
var x1 = torch.rand(1, requiresGrad: true);
var x2 = torch.rand(1, requiresGrad: true);

var y = x1.pow(2) + 5 * x2;

torch.autograd.backward(new[] { y }, new[] { torch.ones_like(y) });
Assert.Equal(x1.shape, x1.grad().shape);
Assert.Equal(x2.shape, x2.grad().shape);
Assert.Equal(2.0f*x1.item<float>(), x1.grad().item<float>());
Assert.Equal(5.0f, x2.grad().item<float>());
}

[Fact]
public void TestAutoGradBackward2()
{
using var _ = torch.NewDisposeScope();
var x1 = torch.rand(1, requiresGrad: true);
var x2 = torch.rand(1, requiresGrad: true);

var y = x1.pow(2) + 5 * x2;

y.backward(new[] { torch.ones_like(y) });
Assert.Equal(x1.shape, x1.grad().shape);
Assert.Equal(x2.shape, x2.grad().shape);
Assert.Equal(2.0f * x1.item<float>(), x1.grad().item<float>());
Assert.Equal(5.0f, x2.grad().item<float>());
}
}
}