JuliaDiff · jrevels · Apr 13, 2017 · Feb 16, 2017 · Feb 16, 2017 · Feb 17, 2017
diff --git a/.travis.yml b/.travis.yml
@@ -1,6 +1,5 @@
 language: julia
 julia:
-    - 0.5
     - 0.6
     - nightly
 notifications:

diff --git a/REQUIRE b/REQUIRE
@@ -1,6 +1,5 @@
-julia 0.5
+julia 0.6-
 DiffBase 0.0.3
-Compat 0.19.0
 Calculus 0.2.0
 NaNMath 0.2.2
 SpecialFunctions 0.1.0
diff --git a/benchmark/ForwardDiffBenchmarks.jl → benchmarks/benchmarks.jl b/benchmark/ForwardDiffBenchmarks.jl → benchmarks/benchmarks.jl
@@ -1,5 +1,3 @@
-module ForwardDiffBenchmarks
-
 using ForwardDiff, DiffBase
 using BenchmarkTools
 
@@ -39,11 +37,11 @@ for f in (DiffBase.VECTOR_TO_NUMBER_FUNCS..., DiffBase.MATRIX_TO_NUMBER_FUNCS...
         fval[length(x)] = @benchmarkable $(f)($x)
 
         gout = DiffBase.DiffResult(y, similar(x, typeof(y)))
-        gcfg = ForwardDiff.Config(x)
+        gcfg = ForwardDiff.GradientConfig(nothing, x)
         fgrad[length(x)] = @benchmarkable ForwardDiff.gradient!($gout, $f, $x, $gcfg)
 
         hout = DiffBase.DiffResult(y, similar(x, typeof(y)), similar(x, typeof(y), length(x), length(x)))
-        hcfg = ForwardDiff.HessianConfig(hout, x)
+        hcfg = ForwardDiff.HessianConfig(nothing, hout, x)
         fhess[length(x)] = @benchmarkable ForwardDiff.hessian!($hout, $f, $x, $hcfg)
     end
 end
@@ -56,9 +54,7 @@ for f in DiffBase.ARRAY_TO_ARRAY_FUNCS
         fval[length(x)] = @benchmarkable $(f)($x)
 
         out = DiffBase.JacobianResult(y, x)
-        cfg = ForwardDiff.Config(x)
-        fjac[length(x)] = @benchmarkable ForwardDiff.jacobian!($out, $f, $x, $cfg)
+        cfg = ForwardDiff.JacobianConfig(nothing, y, x)
+        fjac[length(x)] = @benchmarkable ForwardDiff.jacobian!($out, $f, $y, $x, $cfg)
     end
 end
-
-end # module
diff --git a/benchmark/cpp/.gitignore → benchmarks/cpp/.gitignore b/benchmark/cpp/.gitignore → benchmarks/cpp/.gitignore
diff --git a/benchmark/cpp/Makefile → benchmarks/cpp/Makefile b/benchmark/cpp/Makefile → benchmarks/cpp/Makefile
diff --git a/benchmark/cpp/benchmarks.cpp → benchmarks/cpp/benchmarks.cpp b/benchmark/cpp/benchmarks.cpp → benchmarks/cpp/benchmarks.cpp
diff --git a/benchmark/cpp/benchmarks.h → benchmarks/cpp/benchmarks.h b/benchmark/cpp/benchmarks.h → benchmarks/cpp/benchmarks.h
diff --git a/benchmark/cpp/dual1.cpp → benchmarks/cpp/dual1.cpp b/benchmark/cpp/dual1.cpp → benchmarks/cpp/dual1.cpp
diff --git a/benchmark/cpp/dual2.cpp → benchmarks/cpp/dual2.cpp b/benchmark/cpp/dual2.cpp → benchmarks/cpp/dual2.cpp
diff --git a/benchmark/cpp/dual3.cpp → benchmarks/cpp/dual3.cpp b/benchmark/cpp/dual3.cpp → benchmarks/cpp/dual3.cpp
diff --git a/benchmark/cpp/dual4.cpp → benchmarks/cpp/dual4.cpp b/benchmark/cpp/dual4.cpp → benchmarks/cpp/dual4.cpp
diff --git a/benchmark/cpp/dual5.cpp → benchmarks/cpp/dual5.cpp b/benchmark/cpp/dual5.cpp → benchmarks/cpp/dual5.cpp
diff --git a/benchmark/py/algopy_benchmarks.py → benchmarks/py/algopy_benchmarks.py b/benchmark/py/algopy_benchmarks.py → benchmarks/py/algopy_benchmarks.py
diff --git a/benchmark/py/autograd_benchmarks.py → benchmarks/py/autograd_benchmarks.py b/benchmark/py/autograd_benchmarks.py → benchmarks/py/autograd_benchmarks.py
diff --git a/docs/_rst/source/advanced_usage.rst b/docs/_rst/source/advanced_usage.rst
@@ -9,7 +9,7 @@ Accessing Lower-Order Results
 
 Let's say you want to calculate the value, gradient, and Hessian of some function ``f`` at
 an input ``x``. You could execute ``f(x)``, ``ForwardDiff.gradient(f, x)`` and
-``ForwardDiff.hessian(f, x)``, but that would be a **horribly redundant way to  accomplish
+``ForwardDiff.hessian(f, x)``, but that would be a **horribly redundant way to accomplish
 this task!**
 
 In the course of calculating higher-order derivatives, ForwardDiff ends up calculating all
@@ -37,7 +37,7 @@ For example:
 
 .. code-block:: julia
 
-    julia> import ForwardDiff
+    julia> using ForwardDiff: GradientConfig, Chunk, gradient!
 
     # let's use a Rosenbrock function as our target function
     julia> function rosenbrock(x)
@@ -58,25 +58,25 @@ For example:
     julia> out = similar(x);
 
     # construct GradientConfig with chunk size of 1
-    julia> cfg1 = ForwardDiff.GradientConfig{1}(x);
+    julia> cfg1 = GradientConfig(rosenbrock, x, Chunk{1}());
 
     # construct GradientConfig with chunk size of 4
-    julia> cfg4 = ForwardDiff.GradientConfig{4}(x);
+    julia> cfg4 = GradientConfig(rosenbrock, x, Chunk{4}());
 
     # construct GradientConfig with chunk size of 10
-    julia> cfg10 = ForwardDiff.GradientConfig{10}(x);
+    julia> cfg10 = GradientConfig(rosenbrock, x, Chunk{10}());
 
     # (input length of 10000) / (chunk size of 1) = (10000 1-element chunks)
-    julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg1);
-      0.408305 seconds (4 allocations: 160 bytes)
+    julia> @time gradient!(out, rosenbrock, x, cfg1);
+      0.775139 seconds (4 allocations: 160 bytes)
 
     # (input length of 10000) / (chunk size of 4) = (2500 4-element chunks)
-    julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg4);
-      0.295764 seconds (4 allocations: 160 bytes)
+    julia> @time gradient!(out, rosenbrock, x, cfg4);
+      0.386459 seconds (4 allocations: 160 bytes)
 
     # (input length of 10000) / (chunk size of 10) = (1000 10-element chunks)
-    julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg10);
-      0.267396 seconds (4 allocations: 160 bytes)
+    julia> @time gradient!(out, rosenbrock, x, cfg10);
+      0.282529 seconds (4 allocations: 160 bytes)
 
 If you do not explicity provide a chunk size, ForwardDiff will try to guess one for you
 based on your input vector:
@@ -85,10 +85,10 @@ based on your input vector:
 
     # The GradientConfig constructor will automatically select a
     # chunk size in one is not explicitly provided
-    julia> cfg = ForwardDiff.GradientConfig(x);
+    julia> cfg = ForwardDiff.GradientConfig(rosenbrock, x);
 
     julia> @time ForwardDiff.gradient!(out, rosenbrock, x, cfg);
-    0.266920 seconds (4 allocations: 160 bytes)
+      0.281853 seconds (4 allocations: 160 bytes)
 
 If your input dimension is a constant, you should explicitly select a chunk size rather than
 relying on ForwardDiff's heuristic. There are two reasons for this. The first is that
@@ -130,8 +130,8 @@ aren't sensitive to the input and thus cause ForwardDiff to incorrectly return `
 
     # the dual number's perturbation component is zero, so this
     # variable should not propagate derivative information
-    julia> log(ForwardDiff.Dual(0.0, 0.0))
-    Dual(-Inf,NaN) # oops, this NaN should be 0.0
+    julia> log(ForwardDiff.Dual{:tag}(0.0, 0.0))
+    Dual{:tag}(-Inf,NaN) # oops, this NaN should be 0.0
 
 Here, ForwardDiff computes the derivative of ``log(0.0)`` as ``NaN`` and then propagates
 this derivative by multiplying it by the perturbation component. Usually, ForwardDiff can
@@ -153,7 +153,6 @@ In the future, we plan on allowing users and downstream library authors to dynam
 enable ``NaN``-safe mode via the ``AbstractConfig`` API (see `the relevant issue
 <https://github.com/JuliaDiff/ForwardDiff.jl/issues/181>`_).
 
-
 Hessian of a vector-valued function
 -----------------------------------
 
@@ -163,17 +162,17 @@ For example:
 
 .. code-block:: julia
 
-    julia> ForwardDiff.jacobian(x -> ForwardDiff.jacobian(sin, x), [1,2,3])
-    9×3 Array{Float64,2}:
-     -0.841471   0.0        0.0
-     -0.0       -0.0       -0.0
-     -0.0       -0.0       -0.0
-     0.0        0.0        0.0
-     -0.0       -0.909297  -0.0
-     -0.0       -0.0       -0.0
-     0.0        0.0        0.0
-     -0.0       -0.0       -0.0
-     -0.0       -0.0       -0.14112
+    julia> ForwardDiff.jacobian(x -> ForwardDiff.jacobian(cumprod, x), [1,2,3])
+    9×3 Array{Int64,2}:
+     0  0  0
+     0  1  0
+     0  3  2
+     0  0  0
+     1  0  0
+     3  0  1
+     0  0  0
+     0  0  0
+     2  1  0
 
 Since this functionality is composed from ForwardDiff's existing API rather than built into
 it, you're free to construct a ``vector_hessian`` function which suits your needs. For
@@ -190,22 +189,22 @@ expensive operation):
        end
     vector_hessian (generic function with 1 method)
 
-    julia> vector_hessian(sin, [1, 2, 3])
-    3×3×3 Array{Float64,3}:
+    julia> vector_hessian(cumprod, [1, 2, 3])
+    3×3×3 Array{Int64,3}:
     [:, :, 1] =
-     -0.841471   0.0   0.0
-     -0.0       -0.0  -0.0
-     -0.0       -0.0  -0.0
+     0  0  0
+     0  1  0
+     0  3  2
 
     [:, :, 2] =
-      0.0   0.0        0.0
-     -0.0  -0.909297  -0.0
-     -0.0  -0.0       -0.0
+     0  0  0
+     1  0  0
+     3  0  1
 
     [:, :, 3] =
-      0.0   0.0   0.0
-     -0.0  -0.0  -0.0
-     -0.0  -0.0  -0.14112
+     0  0  0
+     0  0  0
+     2  1  0
 
 Likewise, you could write a version of ``vector_hessian`` which supports functions of the
 form ``f!(y, x)``, or perhaps an in-place Jacobian with ``ForwardDiff.jacobian!``.
@@ -232,10 +231,10 @@ SIMD instructions (i.e. not starting Julia with ``-O3``):
     julia> using ForwardDiff: Dual
 
     julia> a = Dual(1., 2., 3., 4.)
-    Dual(1.0,2.0,3.0,4.0)
+    Dual{Void}(1.0,2.0,3.0,4.0)
 
     julia> b = Dual(5., 6., 7., 8.)
-    Dual(5.0,6.0,7.0,8.0)
+    Dual{Void}(5.0,6.0,7.0,8.0)
 
     julia> @code_llvm a + b
 

diff --git a/docs/_rst/source/basic_api.rst b/docs/_rst/source/basic_api.rst
@@ -4,22 +4,26 @@ Basic ForwardDiff API
 Derivatives of :math:`f(x) : \mathbb{R} \to \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}`
 --------------------------------------------------------------------------------------------------
 
-Use ``ForwardDiff.derivative`` to differentiate functions of the form ``f(::Real)::Real`` and ``f(::Real)::AbstractArray``.
+Use ``ForwardDiff.derivative`` to differentiate functions of the form ``f(::Real...)::Real`` and ``f(::Real...)::AbstractArray``.
 
 .. function:: ForwardDiff.derivative!(out, f, x)
 
-    Compute :math:`f'(x)`, storing the output in ``out``.
+    Compute :math:`f'(x)`, storing the output in ``out``. If ``x`` is a ``Tuple``,
+    then ``f`` will be called as ``f(x...)`` and the derivatives with respect to
+    each element in `x` will be stored in the respective element of ``out`` (which
+    should also be a ``Tuple``).
 
 .. function:: ForwardDiff.derivative(f, x)
 
-    Compute and return :math:`f'(x)`.
+    Compute and return :math:`f'(x)`. If ``x`` is a ``Tuple``, ``f`` will be
+    called as ``f(x...)``, and a ``Tuple`` of derivatives will be returned.
 
 Gradients of :math:`f(x) : \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k} \to \mathbb{R}`
 ------------------------------------------------------------------------------------------------
 
 Use ``ForwardDiff.gradient`` to differentiate functions of the form ``f(::AbstractArray)::Real``.
 
-.. function:: ForwardDiff.gradient!(out, f, x, cfg = ForwardDiff.GradientConfig(x))
+.. function:: ForwardDiff.gradient!(out, f, x, cfg = ForwardDiff.GradientConfig(f, x))
 
     Compute :math:`\nabla f(\vec{x})`, storing the output in ``out``. It is highly advised
     to preallocate ``cfg`` yourself (see the `AbstractConfig
@@ -34,23 +38,23 @@ Jacobians of :math:`f(x) : \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}
 
 Use ``ForwardDiff.jacobian`` to differentiate functions of the form ``f(::AbstractArray)::AbstractArray``.
 
-.. function:: ForwardDiff.jacobian!(out, f, x, cfg = ForwardDiff.JacobianConfig(x))
+.. function:: ForwardDiff.jacobian!(out, f, x, cfg = ForwardDiff.JacobianConfig(f, x))
 
     Compute :math:`\mathbf{J}(f)(\vec{x})`, storing the output in ``out``. It is highly
     advised to preallocate ``cfg`` yourself (see the `AbstractConfig
     <basic_api.html#the-abstractconfig-types>`_ section below).
 
-.. function:: ForwardDiff.jacobian!(out, f!, y, x, cfg = ForwardDiff.JacobianConfig(y, x))
+.. function:: ForwardDiff.jacobian!(out, f!, y, x, cfg = ForwardDiff.JacobianConfig(f!, y, x))
 
     Compute :math:`\mathbf{J}(f)(\vec{x})`, where :math:`f(\vec{x})` can be called as
     ``f!(y, x)`` such that the output of :math:`f(\vec{x})` is stored in ``y``. The output
     matrix is stored in ``out``.
 
-.. function:: ForwardDiff.jacobian(f, x, cfg = ForwardDiff.JacobianConfig(x))
+.. function:: ForwardDiff.jacobian(f, x, cfg = ForwardDiff.JacobianConfig(f, x))
 
     Compute and return :math:`\mathbf{J}(f)(\vec{x})`.
 
-.. function:: ForwardDiff.jacobian(f!, y, x, cfg = ForwardDiff.JacobianConfig(y, x))
+.. function:: ForwardDiff.jacobian(f!, y, x, cfg = ForwardDiff.JacobianConfig(f!, y, x))
 
     Compute and return :math:`\mathbf{J}(f)(\vec{x})`, where :math:`f(\vec{x})` can be
     called as ``f!(y, x)`` such that the output of :math:`f(\vec{x})` is stored in ``y``.
@@ -60,13 +64,13 @@ Hessians of :math:`f(x) : \mathbb{R}^{n_1} \times \dots \times \mathbb{R}^{n_k}
 
 Use ``ForwardDiff.hessian`` to perform second-order differentiation on functions of the form ``f(::AbstractArray)::Real``.
 
-.. function:: ForwardDiff.hessian!(out, f, x, cfg = ForwardDiff.HessianConfig(x))
+.. function:: ForwardDiff.hessian!(out, f, x, cfg = ForwardDiff.HessianConfig(f, x))
 
     Compute :math:`\mathbf{H}(f)(\vec{x})`, storing the output in ``out``. It is highly
     advised to preallocate ``cfg`` yourself (see the `AbstractConfig
     <basic_api.html#the-abstractconfig-types>`_ section below).
 
-.. function:: ForwardDiff.hessian(f, x, cfg = ForwardDiff.HessianConfig(x))
+.. function:: ForwardDiff.hessian(f, x, cfg = ForwardDiff.HessianConfig(f, x))
 
     Compute and return :math:`\mathbf{H}(f)(\vec{x})`.
 
@@ -77,66 +81,63 @@ For the sake of convenience and performance, all "extra" information used by For
 API methods is bundled up in the ``ForwardDiff.AbstractConfig`` family of types. Theses
 types allow the user to easily feed several different parameters to ForwardDiff's  API
 methods, such as `chunk size <advanced_usage.html#configuring-chunk-size>`_, work buffers,
-multithreading configurations, and perturbation seed configurations.
+and perturbation seed configurations.
 
 ForwardDiff's basic API methods will allocate these types automatically by default,
 but you can drastically reduce memory usage if you preallocate them yourself.
 
-Note that for all constructors below, the chunk size ``N`` may be explictly provided as a
-type parameter, or omitted, in which case ForwardDiff will automatically select a chunk size
-for you. However, it is highly recomended to `specify the chunk size manually when possible
+Note that for all constructors below, the chunk size ``N`` may be explictly provided,
+or omitted, in which case ForwardDiff will automatically select a chunk size for you.
+However, it is highly recomended to `specify the chunk size manually when possible
 <advanced_usage.html#configuring-chunk-size>`_.
 
-.. function:: ForwardDiff.GradientConfig{N}(x)
+Note also that configurations constructed for a specific function ``f`` cannot
+be reused to differentiate other functions (though can be reused to differentiate
+``f`` at different values). To construct a configuration which can be reused to
+differentiate any function, you can pass ``nothing`` as the function argument.
+While this is more flexible, this decreases ForwardDiff's ability to catch
+and prevent `perturbation confusion`_.
 
-    Construct a ``GradientConfig`` instance based on the type and shape of the input vector
-    ``x``. The returned ``GradientConfig`` instance contains all the work buffers required
-    by ForwardDiff's gradient/Jacobian methods. If taking the Jacobian of a target function
-    with the form ``f!(y, x)``, use the constructor ``ForwardDiff.GradientConfig{N}(y, x)``
-    instead.
+.. function:: ForwardDiff.GradientConfig(f, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
+
+    Construct a ``GradientConfig`` instance based on the type of ``f`` and
+    type/shape of the input vector ``x``. The returned ``GradientConfig``
+    instance contains all the work buffers required by ForwardDiff's gradient
+    methods.
 
     This constructor does not store/modify ``x``.
 
-.. function:: ForwardDiff.JacobianConfig{N}(x)
+.. function:: ForwardDiff.JacobianConfig(f, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Exactly like ``ForwardDiff.GradientConfig{N}(x)``, but returns a `JacobianConfig`
-    instead.
+    Exactly like the ``GradientConfig`` constructor, but returns a ``JacobianConfig`` instead.
 
-.. function:: ForwardDiff.JacobianConfig{N}(y, x)
+.. function:: ForwardDiff.JacobianConfig(f!, y, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Construct a ``JacobianConfig`` instance based on the type and shape of the output vector
-    ``y`` and the input vector ``x``. The returned ``JacobianConfig`` instance contains all
-    the work buffers required by  ``ForwardDiff.jacobian``/``ForwardDiff.jacobian!`` with a
-    target function of the form ``f!(y, x)``.
+    Construct a ``JacobianConfig`` instance based on the type of ``f!``, and the
+    types/shapes of the output vector ``y`` and the input vector ``x``. The
+    returned ``JacobianConfig`` instance contains all the work buffers required
+    by ``ForwardDiff.jacobian``/``ForwardDiff.jacobian!`` when the target
+    function takes the form ``f!(y, x)``.
 
     This constructor does not store/modify ``y`` or ``x``.
 
-.. function:: ForwardDiff.HessianConfig{N}(x)
+.. function:: ForwardDiff.HessianConfig(f, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Construct a ``HessianConfig`` instance based on the type and shape of the input vector
-    ``x``. The returned ``HessianConfig`` instance contains all the work buffers required
-    by ForwardDiff's Hessian methods. If using
-    ``ForwardDiff.hessian!(out::DiffBase.DiffResult, args...)``, use the constructor
-    ``ForwardDiff.HessianConfig{N}(out, x)`` instead.
+    Construct a ``HessianConfig`` instance based on the type of ``f`` and
+    type/shape of the input vector ``x``. The returned ``HessianConfig`` instance contains
+    all the work buffers required by ForwardDiff's Hessian methods. If using
+    ``ForwardDiff.hessian!(out::DiffBase.DiffResult, f, x)``, use the constructor
+    ``ForwardDiff.HessianConfig(f, out, x, chunk)`` instead.
 
     This constructor does not store/modify ``x``.
 
-.. function:: ForwardDiff.HessianConfig{N}(out::DiffBase.DiffResult, x)
+.. function:: ForwardDiff.HessianConfig(f, out::DiffBase.DiffResult, x, chunk::ForwardDiff.Chunk{N} = Chunk(x))
 
-    Construct an ``HessianConfig`` instance based on the type and shape of the storage in
-    ``out`` and the input vector ``x``. The returned ``HessianConfig`` instance contains
-    all the work buffers required by ``ForwardDiff.hessian!(out::DiffBase.DiffResult,
-    args...)``.
+    Construct an ``HessianConfig`` instance based on the type of ``f``, types/storage
+    in ``out``, and type/shape of the input vector ``x``. The returned ``HessianConfig``
+    instance contains all the work buffers required by
+    ``ForwardDiff.hessian!(out::DiffBase.DiffResult, args...)``.
 
     This constructor does not store/modify ``out`` or ``x``.
 
-.. function:: ForwardDiff.MultithreadConfig(cfg::AbstractConfig)
-
-    Wrap the given ``cfg`` in a ``MultithreadConfig`` instance, which can then be passed to
-    gradient or Hessian methods in order to enable experimental multithreading. Jacobian
-    methods do not yet support multithreading.
-
-    Note that multithreaded ForwardDiff API methods will attempt to use all available
-    threads. In the future, once Julia exposes more fine-grained threading primitives,
-    a ``MultithreadConfig`` constructor may be added which takes in a user-provided subset
-    of thread IDs instead of using all available threads.
+.. _`perturbation confusion`: https://github.com/JuliaDiff/ForwardDiff.jl/issues/83
diff --git a/docs/_rst/source/conf.py b/docs/_rst/source/conf.py
@@ -57,9 +57,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '0.2'
+version = '0.5'
 # The full version, including alpha/beta/rc tags.
-release = '0.2.3'
+release = '0.5.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.