RimuQMC · mtsch · Nov 29, 2022 · Nov 23, 2022 · Nov 23, 2022 · Nov 23, 2022
diff --git a/Project.toml b/Project.toml
@@ -27,15 +27,13 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
-SplittablesBase = "171d559e-b47b-412a-8079-5efa626c420e"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StrFormat = "b5087856-efa9-5a6d-8e6f-97303a7af894"
 StrLiterals = "68059f60-971f-57ff-a2d0-18e7de9ccc84"
 TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
 TerminalLoggers = "5d786b92-1e48-4d6f-9151-6b4477ca9bed"
-ThreadsX = "ac1d9e8a-700a-412c-b207-f0111f4b6c0d"
 
 [compat]
 Arrow = "1.5, 2"
@@ -56,13 +54,11 @@ ProgressLogging = "0.1.3"
 Reexport = "1"
 Setfield = "0.7, 0.8, 1"
 SpecialFunctions = "1, 2"
-SplittablesBase = "0.1"
 StaticArrays = "1"
 StatsBase = "0.33"
 StrFormat = "1"
 StrLiterals = "1"
 TerminalLoggers = "0.1.4"
-ThreadsX = "0.1"
 julia = "1.6"
 
 [extras]

diff --git a/docs/src/dictvectors.md b/docs/src/dictvectors.md
@@ -19,6 +19,9 @@ deposit!
 storage
 freeze
 localpart
+fciqmc_step!
+sort_into_targets!
+working_memory
 ```
 
 ## Supported operations
@@ -64,4 +67,4 @@ Rimu.DictVectors.CoherentInitiator
 ## Index
 ```@index
 Pages   = ["dictvectors.md"]
-```
+```
diff --git a/docs/src/randomnumbers.md b/docs/src/randomnumbers.md
@@ -6,7 +6,7 @@ Rimu uses Julia's built-in random number generator, which currently defaults to
 ## Reproducibility
 
 If you want FCIQMC runs to be reproducible, make sure to seed the RNG with
-[Random.seed!](https://docs.julialang.org/en/v1/stdlib/Random/#Random.seed!) and to use [`lomc!`](@ref) in single-threaded mode by passing it the `threading=false` keyword argument.
+[Random.seed!](https://docs.julialang.org/en/v1/stdlib/Random/#Random.seed!).
 
 MPI-distributed runs can also be made reproducible by seeding the RNG with
 [`Rimu.RMPI.mpi_seed!`](@ref).
diff --git a/scripts/BHM-example.jl b/scripts/BHM-example.jl
@@ -80,7 +80,6 @@ df, state = lomc!(Ĥ,svec;
             r_strat,
             τ_strat,
             post_step,
-            threading = false, # only for reproducible runs
 );
 
 # Here is how to save the output data stored in `df` into a `.arrow` file,
@@ -110,5 +109,5 @@ println("Energy from $steps_measure steps with $targetwalkers walkers:
 
 using Test                                      #hide
 @test isfile("fciqmcdata.arrow")                #hide
-@test se.mean ≈ -3.9 rtol=0.01                  #hide
+@test se.mean ≈ -4.0215 rtol=0.1                #hide
 rm("fciqmcdata.arrow", force=true)              #hide
diff --git a/scripts/G2-example.jl b/scripts/G2-example.jl
@@ -69,7 +69,6 @@ df, state = lomc!(H, svec;
             r_strat,
             τ_strat,
             replica,
-            threading = false, # only for reproducible runs
 );
 
 # The output `DataFrame` has FCIQMC statistics for each replica

diff --git a/src/DictVectors/DictVectors.jl b/src/DictVectors/DictVectors.jl
@@ -10,10 +10,9 @@ module DictVectors
 
 using Random
 using LinearAlgebra
-import SplittablesBase
 
 using ..Interfaces
-import ..Interfaces: deposit!, storage, StochasticStyle, default_style, freeze
+import ..Interfaces: deposit!, zero!, storage, StochasticStyle, default_style, freeze
 
 export zero!, add!, deposit!, storage, walkernumber
 export DVec, InitiatorDVec

diff --git a/src/DictVectors/abstractdvec.jl b/src/DictVectors/abstractdvec.jl
@@ -34,13 +34,6 @@ Base.ndims(::AbstractDVec) = 1
 ###
 ### copy*, zero*
 ###
-"""
-    zero!(v)
-
-Replace `v` by a zero vector as an inplace operation. For `AbstractDVec` types it means
-removing all non-zero elements. For `AbstractArrays`, it sets all of the values to zero.
-"""
-zero!(v::AbstractVector{T}) where {T} = v .= zero(T)
 zero!(v::AbstractDVec) = empty!(v)
 
 Base.zero(dv::AbstractDVec) = empty(dv)
@@ -146,15 +139,6 @@ function LinearAlgebra.dot(x::AbstractDVec, y::AbstractDVec)
 end
 # For MPI version see mpi_helpers.jl
 
-# threaded dot()
-function LinearAlgebra.dot(x::AbstractDVec{K,T1}, ys::NTuple{N, AbstractDVec{K,T2}}) where {N, K, T1, T2}
-    results = zeros(promote_type(T1,T2), N)
-    Threads.@threads for i in 1:N
-        results[i] = x⋅ys[i]
-    end
-    return sum(results)
-end
-
 Base.isequal(x::AbstractDVec{K1}, y::AbstractDVec{K2}) where {K1,K2} = false
 function Base.isequal(x::AbstractDVec{K}, y::AbstractDVec{K}) where {K}
     x === y && return true

diff --git a/src/DictVectors/initiators.jl b/src/DictVectors/initiators.jl
@@ -304,25 +304,18 @@ end
 ###
 ### Iterators
 ###
-# These are needed because `Iterators.map` does not infer `eltype` correctly and does not work
-# with SplittablesBase.jl.
+# These are needed because `Iterators.map` does not infer `eltype` correctly.
 """
     InitiatorIterator
 
-Iterator over pairs or values of an `InitiatorDVec`. Supports the `SplittablesBase`
-interface.
+Iterator over pairs or values of an `InitiatorDVec`.
 """
 struct InitiatorIterator{T,D,I}
     iter::D
     initiator::I
 
     InitiatorIterator{T}(iter::D, initiator::I) where {T,D,I} = new{T,D,I}(iter, initiator)
 end
-function SplittablesBase.halve(p::InitiatorIterator{T}) where {T}
-    left, right = SplittablesBase.halve(p.iter)
-    return InitiatorIterator{T}(left, p.initiator), InitiatorIterator{T}(right, p.initiator)
-end
-SplittablesBase.amount(p::InitiatorIterator) = SplittablesBase.amount(p.iter)
 
 Base.length(p::InitiatorIterator) = length(p.iter)
 Base.IteratorSize(::InitiatorIterator) = Base.HasLength()

diff --git a/src/Interfaces/Interfaces.jl b/src/Interfaces/Interfaces.jl
@@ -42,7 +42,8 @@ export
     StochasticStyle, default_style, StyleUnknown, fciqmc_col!, step_stats, update_dvec!,
     CompressionStrategy, NoCompression, compress!
 export
-    AbstractDVec, deposit!, storage, localpart, freeze
+    AbstractDVec, deposit!, storage, localpart, freeze, working_memory,
+    fciqmc_step!, sort_into_targets!
 export
     AbstractHamiltonian, diagonal_element, num_offdiagonals, get_offdiagonal, offdiagonals,
     random_offdiagonal, starting_address,

diff --git a/src/Interfaces/dictvectors.jl b/src/Interfaces/dictvectors.jl
@@ -2,7 +2,7 @@
     AbstractDVec{K,V}
 
 Abstract type for data structures that behave similar to sparse vectors, but are indexed
-by an arbitrary type `V` (could be non-integers) similarly to dictionaries. `AbstractDVec`s 
+by an arbitrary type `V` (could be non-integers) similarly to dictionaries. `AbstractDVec`s
 are  designed to work well with [`lomc!`](@ref Main.lomc!) and
 [KrylovKit](https://github.com/Jutho/KrylovKit.jl).
 
@@ -23,10 +23,11 @@ To iterate over an `AbstractDVec`, use `keys`, `pairs`, or `values`.
 
 # Interface
 
-The interface is similar to the `AbstractDict` interface, but with the changed behaviour
-as noted above.
-Implement what would be needed for the `AbstractDict` interface (`pairs`, `keys`, `values`,
-`setindex!, getindex, delete!, length, haskey, empty!, isempty`) and, in addition:
+The interface is similar to the `AbstractDict` interface, but with the changed behaviour as
+noted above.  Implement what would be needed for the `AbstractDict` interface (`pairs`,
+`keys`, `values`, `setindex!`, `getindex`, `delete!`, `length`, `haskey`, `empty!`,
+`isempty`) and, in addition:
+
 * [`StochasticStyle`](@ref)
 * [`storage`](@ref) returns an `AbstractDict` storing the raw data with possibly
   different `valtype` than `V`.
@@ -47,6 +48,14 @@ function deposit!(w, add, val, _)
     w[add] += convert(valtype(w), val)
 end
 
+"""
+    zero!(v)
+
+Replace `v` by a zero vector as an inplace operation. For `AbstractDVec` types it means
+removing all non-zero elements. For `AbstractArrays`, it sets all of the values to zero.
+"""
+zero!(v::AbstractVector{T}) where {T} = v .= zero(T)
+
 """
     localpart(dv) -> AbstractDVec
 
@@ -74,3 +83,58 @@ conventional manner, but supports faster dot products.
 If `dv` is an [`MPIData`](@ref Main.Rimu.RMPI.MPIData), synchronize its contents among the ranks first.
 """
 freeze(v::AbstractVector) = copy(v)
+
+"""
+    working_memory(dv::AbstractDVec)
+
+Create a working memory instance compatible with `dv`. The working memory must be
+compatible with [`sort_into_targets!`](@ref) and [`fciqmc_step!`](@ref).
+"""
+working_memory(dv) = similar(localpart(dv))
+
+"""
+    fciqmc_step!(working_memory, target, source, hamiltonian, shift, dτ) ->
+        stat_names, stats, working_memory, target
+
+Perform a single matrix(/operator)-vector multiplication:
+
+```math
+v^{(n + 1)} = [1 - dτ(\\hat{H} - S)]⋅v^{(n)} ,
+```
+
+where ``Ĥ`` is the `hamiltonian`, ``S`` is the `shift`, ``v^{(n+1)}`` is the `target` and
+``v^{(n)}`` is the `source`. The `working_memory` can be used as temporary storage.
+
+Whether the operation is performed in a stochastic, semistochastic, or determistic way is
+controlled by the trait `StochasticStyle(target)`. See [`StochasticStyle`](@ref).
+
+Returns the step stats generated by the `StochasticStyle`, the working memory and the
+`target` vector.
+
+`target` and `working_memory` may be mutated.
+"""
+function fciqmc_step!(working_memory, target, source, ham, shift, dτ)
+    v = localpart(source)
+    @assert working_memory ≢ v "`w` and `v` must not be the same object"
+    @assert localpart(target) ≢ v "`pv` and `v` must not be the same object"
+    zero!(working_memory)
+
+    stat_names, stats = step_stats(v)
+    for (add, val) in pairs(v)
+        stats += fciqmc_col!(working_memory, ham, add, val, shift, dτ)
+    end
+    # Now, working_memory holds the new values - they need to be moved into the target.
+    target, working_memory, stats = sort_into_targets!(target, working_memory, stats)
+
+    return stat_names, stats, working_memory, target
+end
+
+"""
+    sort_into_targets!(target, source, stats) -> target, source, agg_stats
+
+Aggregate coefficients from `source` to `target` and from `stats` to `agg_stats`
+according to thread- or MPI-level parallelism.
+
+Returns the new `target` and `source`, as the sorting process may involve swapping them.
+"""
+sort_into_targets!(dv::T, wm::T, stats) where {T} = wm, dv, stats
diff --git a/src/Interfaces/stochasticstyles.jl b/src/Interfaces/stochasticstyles.jl
@@ -3,7 +3,7 @@
 
 Abstract type. When called as a function it returns the native style of the
 generalised vector `v` that determines how simulations are to proceed.
- 
+
 # Usage
 
 Concrete `StochasticStyle`s can be used for the `style` keyword argument of
@@ -52,7 +52,7 @@ default_style(::Type{T}) where T = StyleUnknown{T}()
 """
     CompressionStrategy
 
-The `CompressionStrategy` controls how a vector is compressed after a step. 
+The `CompressionStrategy` controls how a vector is compressed after a step.
 
 ## Default implementation:
 * [`NoCompression`](@ref): no vector compression
@@ -64,9 +64,9 @@ constructors for some [`StochasticStyle`](@ref)s. Calling
 default is [`NoCompression`](@ref).
 
 ## Interface
-When defining a new `CompressionStrategy`, subtype it as 
+When defining a new `CompressionStrategy`, subtype it as
 `MyCompressionStrategy <: CompressionStrategy` and define
-a method for 
+a method for
 * [`compress!(s::MyCompressionStrategy, v)`](@ref compress!)
 """
 abstract type CompressionStrategy end
@@ -116,15 +116,7 @@ end
 Return a tuple of names (`Symbol` or `String`) and a tuple of zeros of values of the same
 length. These will be reported as columns in the `DataFrame` returned by [`lomc!`](@ref Main.lomc!).
 """
-step_stats(v, n) = step_stats(StochasticStyle(v), n)
-function step_stats(s::StochasticStyle, ::Val{N}) where N
-    if N == 1
-        return step_stats(s)
-    else
-        names, stats = step_stats(s)
-        return names, MVector(ntuple(_ -> stats, Val(N)))
-    end
-end
+step_stats(v) = step_stats(StochasticStyle(v))
 
 """
     fciqmc_col!(w, ham, add, num, shift, dτ)

diff --git a/src/RMPI/RMPI.jl b/src/RMPI/RMPI.jl
@@ -13,7 +13,7 @@ using LinearAlgebra
 using Random
 using StaticArrays
 
-import Rimu: sort_into_targets!
+import ..Interfaces: sort_into_targets!
 
 export MPIData
 export mpi_rank, is_mpi_root, @mpi_root, mpi_barrier

diff --git a/src/RMPI/helpers.jl b/src/RMPI/helpers.jl
@@ -86,7 +86,7 @@ function mpi_combine_walkers!(dtarget::MPIData, source::AbstractDVec)
 end
 
 # This function is just a wrapper that makes allreduce treat a SVector as a scalar
-function Rimu.sort_into_targets!(dtarget::MPIData, ws::NTuple{NT,W}, statss) where {NT,W}
+function sort_into_targets!(dtarget::MPIData, ws::NTuple{NT,W}, statss) where {NT,W}
     # multi-threaded MPI version
     # should only ever run on thread 1
     @assert Threads.threadid() == 1 "`sort_into_targets!()` is running on `threadid()` == $(Threads.threadid()) instead of 1!"
@@ -99,7 +99,7 @@ function Rimu.sort_into_targets!(dtarget::MPIData, ws::NTuple{NT,W}, statss) whe
     res_stats = MPI.Allreduce(Rimu.MultiScalar(stats), +, dtarget.comm)
     return dtarget, ws, res_stats
 end
-function Rimu.sort_into_targets!(dtarget::MPIData, w::AbstractDVec, stats)
+function sort_into_targets!(dtarget::MPIData, w::AbstractDVec, stats)
     # single threaded MPI version
     mpi_combine_walkers!(dtarget,w) # combine walkers from different MPI ranks
     res_stats = MPI.Allreduce(Rimu.MultiScalar(stats), +, dtarget.comm)

diff --git a/src/RMPI/mpidata.jl b/src/RMPI/mpidata.jl
@@ -59,6 +59,10 @@ function Base.show(io::IO, md::MPIData)
     end
 end
 
+function Base.similar(md::MPIData)
+    return MPIData(similar(md.data), md.comm, md.root, md.s)
+end
+
 ###
 ### Iterators
 ###
@@ -340,6 +344,6 @@ function Rimu.all_overlaps(operators::Tuple, vecs::NTuple{N,MPIData}, ::Val{B})
         get_overlaps_nondiagonal!(names, values, operators, vecs, Val(B))
     end
 
-    num_reports = (N * (N - 1) ÷ 2) * (B + length(operators)) 
+    num_reports = (N * (N - 1) ÷ 2) * (B + length(operators))
     return Tuple(SVector{num_reports,String}(names)), Tuple(SVector{num_reports,T}(values))
 end
diff --git a/src/Rimu.jl b/src/Rimu.jl
@@ -8,10 +8,8 @@ using OrderedCollections # for LittleDict
 using Parameters
 using Reexport
 using Setfield
-using SplittablesBase
 using StaticArrays
 using StatsBase
-using ThreadsX
 using ProgressLogging
 using TerminalLoggers: TerminalLogger
 using Logging: ConsoleLogger

diff --git a/src/apply_memory_noise.jl b/src/apply_memory_noise.jl
@@ -12,21 +12,9 @@ error exception is thrown. See [`MemoryStrategy`](@ref).
 `w` is the walker array after fciqmc step, `v` the previous one, `pnorm` the
 norm of `v`, and `r` the instantaneously applied noise.
 """
-function apply_memory_noise!(w::Union{AbstractArray{T},AbstractDVec{K,T}},
-         v, shift, dτ, pnorm, m
-    ) where  {K,T<:Real}
+function apply_memory_noise!(w, v, shift, dτ, pnorm, m)
     apply_memory_noise!(StochasticStyle(w), w, v, real(shift), dτ, real(pnorm), m)
 end
-# only use real part of the shift and norm if the coefficients are real
-
-# otherwise, pass on complex shift in generic method
-function apply_memory_noise!(w::Union{AbstractArray,AbstractDVec}, args...)
-    apply_memory_noise!(StochasticStyle(w), w, args...)
-end
-
-function apply_memory_noise!(ws::NTuple, args...)
-    apply_memory_noise!(StochasticStyle(first(ws)), ws, args...)
-end
 
 function apply_memory_noise!(s::StochasticStyle, w, v, shift, dτ, pnorm, m::NoMemory)
     return 0.0 # does nothing