Skip to content
Merged
5 changes: 4 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
name = "PointProcesses"
uuid = "af0b7596-9bb0-472a-a012-63904f2b4c55"
authors = ["Guillaume Dalle", "José Kling"]
version = "0.6.0"
authors = ["Guillaume Dalle", "José Kling"]

[deps]
DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
HypothesisTests = "09f84164-cd44-5f33-b23f-e6b0d136a0d5"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Expand All @@ -14,6 +15,8 @@ StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
[compat]
DensityInterface = "0.4"
Distributions = "0.25"
HypothesisTests = "0.11.5"
JuliaFormatter = "2.2.0"
LinearAlgebra = "1.10"
Random = "1.10"
Statistics = "1.10"
Expand Down
5 changes: 4 additions & 1 deletion docs/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,8 @@ DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244"
PointProcesses = "af0b7596-9bb0-472a-a012-63904f2b4c55"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"

[sources]
PointProcesses = {path = ".."}

[compat]
Documenter = "1"
Documenter = "1"
1 change: 1 addition & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using Documenter
using DocumenterCitations
using PointProcesses
using Random

bib = CitationBibliography(joinpath(@__DIR__, "src", "refs.bib"); style=:authoryear)

Expand Down
32 changes: 27 additions & 5 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,6 @@ simulate

```@docs
logdensityof
```

### Learning

```@docs
integrated_ground_intensity
ground_intensity_bound
fit
Expand Down Expand Up @@ -104,6 +99,33 @@ MultivariatePoissonProcessPrior
HawkesProcess
```

## Goodness-of-fit tests

```@docs
Statistic
statistic
PointProcessTest
pvalue
```
### Statistic

```@docs
KSDistance
```

### BootstrapTest

```@docs
BootstrapTest
BootstrapTest(S::Type{<:Statistic}, PP::Type{<:AbstractPointProcess}, h::History)
```

### NoBootstrapTest
```@docs
MonteCarloTest
MonteCarloTest(S::Type{<:Statistic}, pp::AbstractPointProcess, h::History)
```

## Index

```@index
Expand Down
104 changes: 104 additions & 0 deletions src/HypothesisTests/PPTests/bootstrap_test.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""
BootstrapTest <: PointProcessTest

An object containing the results of a bootstrap-based goodness-of-fit test.
The p-value of the test is calculated as
p = (count(sim_stats ≥ stat) + 1) / (n_sims + 1).

# Fields
- `n_sims::Int`: number of bootstrap simulations performed
- `stat::Float64`: observed test statistic value
- `sim_stats::Vector{Float64}`: test statistics from bootstrap simulations
"""
struct BootstrapTest <: PointProcessTest
n_sims::Int
stat::Float64
sim_stats::Vector{Float64}
end
#=
`stat` and `sim_stats` set to type `Float64`, because the `ksstats`
function from `HypothesisTests.jl` always returns a `Float64` value
If implementation changes, could define
`BootstrapTest{R} <: PointProcessTest where {R<:Real}`
=#

function StatsAPI.pvalue(bt::BootstrapTest)
return (count(>=(bt.stat), bt.sim_stats) + 1) / (bt.n_sims + 1)
end

"""
BootstrapTest(S::Type{<:Statistic}, pp::AbstractPointProcess, h::History; n_sims::Int=1000, rng::AbstractRNG=default_rng())

Perform a goodness-of-fit test using simulation with bootstrap resampling, comparing
the test statistic computed on the observed data against the distribution of the same
statistic computed on data simulated from the fitted model.

If λ₀(t) is the true intensity function of the process that generated the observed
history, and λ(t; θ) is a a parametrization of the intensity, then the null hypothesis is

H₀: There exists parameters θₒ such that λ₀(t) = λ(t; θ₀)

This procedure is specifically aimed for testing hypotheses where parameters need to
be estimated. Details are provided in [Kling and Vetter (2025)](https://doi.org/10.1111/sjos.70029).

# Arguments
- `S::Type{<:Statistic}`: the type of test statistic to use
- `pp::Type{<:AbstractPointProcess}`: the null hypothesis model family
- `h::History`: the observed event history
- `n_sims::Int=1000`: number of bootstrap simulations to perform
- `rng::AbstractRNG=default_rng()`: Random number generator

# Returns
- `BootstrapTest`: test result object containing the observed statistic, bootstrap statistics, and test metadata

# Example

```julia
# Bootstrap test for Hawkes process model adequacy
test = BootstrapTest(KSDistance(Exponential), HawkesProcess, history; n_sims=1000)
p = pvalue(test)
```
"""
function BootstrapTest(
S::Type{<:Statistic},
PP::Type{<:AbstractPointProcess},
h::History;
n_sims::Int=1000,
rng::AbstractRNG=default_rng(),
)
if isempty(h.times)
throw(ArgumentError("Test is not valid for empty event history."))
end

# Estimate process and calculate statistic from data
pp_est = fit(PP, h; rng=rng)
stat = statistic(S, pp_est, h)

# Initialize vector with test statistics from simulations
sim_stats = Vector{Float64}(undef, n_sims)

# Split sim_stats in one chunk per thread
chunk_size = max(1, length(sim_stats) ÷ Threads.nthreads())
chunks = Iterators.partition(sim_stats, chunk_size)

# Lock for accessing the master rng safely
l = ReentrantLock()

tasks = map(chunks) do chunk
Threads.@spawn begin

# Local rng seeded deterministically from the master rng
local_rng = lock(() -> Xoshiro(rand(rng, UInt)), l)

for i in eachindex(chunk)
# Simulates a process and uses the process estimated from
# the simulation to calculate the test statistic
sim = simulate(local_rng, pp_est, h.tmin, h.tmax)
sim_est = fit(PP, sim, rng=local_rng)
chunk[i] = statistic(S, sim_est, sim)
end
end
end
fetch.(tasks)
return BootstrapTest(n_sims, stat, sim_stats)
end
115 changes: 115 additions & 0 deletions src/HypothesisTests/PPTests/monte_carlo_test.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""
MonteCarloTest <: PointProcessTest

An object containing the results of a non-bootstrap based goodness-of-fit test.
The p-value of the test is calculated as
p = (count(sim_stats ≥ stat) + 1) / (n_sims + 1).

# Fields
- `n_sims::Int`: number of simulations performed
- `stat::Float64`: observed test statistic value
- `sim_stats::Vector{Float64}`: test statistics from simulated data
"""
struct MonteCarloTest <: PointProcessTest
n_sims::Int
stat::Float64
sim_stats::Vector{Float64}
end

function StatsAPI.pvalue(nbt::MonteCarloTest)
return (count(>=(nbt.stat), nbt.sim_stats) + 1) / (nbt.n_sims + 1)
end

"""
MonteCarloTest(S::Type{<:Statistic}, pp::AbstractPointProcess, h::History; n_sims::Int=1000, rng::AbstractRNG=default_rng())

Perform a goodness-of-fit test using simulation without bootstrap resampling, comparing
the test statistic computed on the observed data against the distribution of the same
statistic computed on data simulated from the fitted model.

If λ₀(t) is the true intensity function of the process that generated the observed
history, and λ(t; θ) is a a parametrization of the intensity, then there are two forms
for the null hypothesis:

1. H₀: λ₀(t) = λ(t; θ₀)
2. H₀: There exists parameters θₒ such that λ₀(t) = λ(t; θ₀)

If `pp` is an instance of an `AbstractPointProcess`, the null hypothesis 1 is considered, if
a `pp` is a `Type{<:AbstractPointProcess}`, the method uses null hypothesis 2.

Notice that this test is better suited when the parameter θ₀ is known (form 1), since this
procedure does not account for parameter estimation error. For more details on this, see
[Jogesh Babu and Rao (2004)](http://www.jstor.org/stable/25053332),
[Reynaud-Bouret et. al. (2014)](https://doi.org/10.1186/2190-8567-4-3),
[Kling and Vetter (2025)](https://doi.org/10.1111/sjos.70029).

# Arguments
- `S::Type{<:Statistic}`: the type of test statistic to use
- `pp::Union{AbstractPointProcess, Type{<:AbstractPointProcess}}`: the null hypothesis model family
- `h::History`: the observed event history
- `n_sims::Int=1000`: number of simulations to perform for the test
- `rng::AbstractRNG=default_rng()`: Random number generator

# Returns
- `MonteCarloTest`: test result object containing the observed statistic, simulated statistics, and test metadata

# Example
```julia
# Test null hypothesis of form 1. Known θ₀
test = MonteCarloTest(KSDistance(Exponential), HawkesProcess(1, 1, 2), history; n_sims=1000)
p = pvalue(test)
# Test null hypothesis of form 2. Unknown θ₀
test = MonteCarloTest(KSDistance(Exponential), HawkesProcess, history; n_sims=1000)
p = pvalue(test)
```
"""
function MonteCarloTest(
S::Type{<:Statistic},
pp::AbstractPointProcess,
h::History;
n_sims::Int=1000,
rng::AbstractRNG=default_rng(),
)
isempty(h.times) && @warn "Event history is empty."

# Calculate statistic from data
stat = statistic(S, pp, h)

# Initialize vector with test statistics from simulations
sim_stats = Vector{typeof(stat)}(undef, n_sims)

# Split sim_stats in one chunk per thread
chunk_size = max(1, length(sim_stats) ÷ Threads.nthreads())
chunks = Iterators.partition(sim_stats, chunk_size)

# Lock for accessing the master rng safely
l = ReentrantLock()

tasks = map(chunks) do chunk
Threads.@spawn begin

# Local rng seeded deterministically from the master rng
local_rng = lock(() -> Xoshiro(rand(rng, UInt)), l)

for i in eachindex(chunk)
# Simulates a process and uses the initial process
# to calculate the test statistic
sim = simulate(local_rng, pp, h.tmin, h.tmax)
chunk[i] = statistic(S, pp, sim)
end
end
end
fetch.(tasks)
return MonteCarloTest(n_sims, stat, sim_stats)
end

function MonteCarloTest(
S::Type{<:Statistic}, PP::Type{<:AbstractPointProcess}, h::History; kwargs...
)
if isempty(h.times)
throw(ArgumentError("Test is not valid for empty event history."))
end

pp_est = fit(PP, h)
return MonteCarloTest(S, pp_est, h; kwargs...)
end
29 changes: 29 additions & 0 deletions src/HypothesisTests/Statistics/KSDistance.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""
KSDistance{T<:UnivariateDistribution}

A Kolmogorov-Smirnov distance statistic for testing goodness-of-fit of point processes
against a specified distribution `D` after appropriate time rescaling.
This test statistic is suitable only for non-marked processes, as it ignores the marks.

# Type parameter
- `D<:UnivariateDistribution`: the target distribution to test against (e.g., `Exponential`, `Uniform`)

# Available test statistics
- KSDistance{Exponential}
Kolmogorov-Smirnov distance between the time-changed interevent times and a standard exponential

- KSDistance{Uniform}
Kolmogorov-Smirnov distance between the time-changed event times and a uniform distribution

# Example
```julia
BootstrapTest(KSDistance{Exponential}, HawkesProcess, history)
```
"""
struct KSDistance{D<:UnivariateDistribution} <: Statistic end

function statistic(::Type{KSDistance{D}}, pp::AbstractPointProcess, h::History) where {D}
X, d = transform(D, pp, h) # X → data computed from event history. d → Distribution to compare X to.
isempty(X) && return 1.0 # No samples ⇒ ecdf(t) = 0 ⇒ distance = 1
return ksstats(X, d)[2] # `ksstats` always returns a `Float64`
end
38 changes: 38 additions & 0 deletions src/HypothesisTests/point_process_tests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
PointProcessTest

Interface for all goodness-of-fit tests
"""
abstract type PointProcessTest <: StatsAPI.HypothesisTest end

"""
pvalue(test::PointProcessTest)

Calculate the p-value of a goodness-of-fit test on a process.

# Arguments
- `::PointProcessTest`: the test result object

# Returns
- `Float64`: p-value in [0, 1], where small values provide evidence against the null hypothesis
"""
function StatsAPI.pvalue(::PointProcessTest) end

function Base.show(io::IO, t::PointProcessTest)
print(io, "$(typeof(t)) - pvalue = $(pvalue(t))")
end

#=
Internal function for performing the appropriate transformation
on the event times according to the selected distribution.
=#
function transform(::Type{<:Uniform}, pp::AbstractPointProcess, h)
transf = time_change(h, pp) # transf → time re-scaled event times
return transf.times, Uniform(transf.tmin, transf.tmax)
end

function transform(::Type{<:Exponential}, pp::AbstractPointProcess, h)
inter_transf = diff(time_change(h, pp).times) # inter_transf → sorted time transformed inter event times
sort!(inter_transf)
return inter_transf, Exponential(1)
end
Loading