Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hawkes cumulants - Tensorflow v1 #505

Merged
1 change: 1 addition & 0 deletions .github/workflows/build_nix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,5 @@ jobs:
- run: |
python3 -m pip install wheel pip --upgrade
python3 -m pip install -r requirements.txt
python3 -m pip install tensorflow-cpu
python3 setup.py build_ext --inplace cpptest pytest
69 changes: 65 additions & 4 deletions lib/cpp/hawkes/inference/hawkes_cumulant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ SArrayDoublePtr HawkesCumulant::compute_A_and_I_ij(ulong r, ulong i, ulong j,
if (abs_t_j_l_minus_t_i_k < width) {
sub_res += width - abs_t_j_l_minus_t_i_k;

if (abs_t_j_l_minus_t_i_k < integration_support)
timestamps_in_interval++;
if (abs_t_j_l_minus_t_i_k < integration_support) timestamps_in_interval++;
} else {
break;
}
Expand All @@ -64,8 +63,7 @@ SArrayDoublePtr HawkesCumulant::compute_A_and_I_ij(ulong r, ulong i, ulong j,
return return_array.as_sarray_ptr();
}

double HawkesCumulant::compute_E_ijk(ulong r, ulong i, ulong j, ulong k,
double mean_intensity_i,
double HawkesCumulant::compute_E_ijk(ulong r, ulong i, ulong j, ulong k, double mean_intensity_i,
double mean_intensity_j, double J_ij) {
auto timestamps_i = timestamps_list[r][i];
auto timestamps_j = timestamps_list[r][j];
Expand Down Expand Up @@ -126,3 +124,66 @@ double HawkesCumulant::compute_E_ijk(ulong r, ulong i, ulong j, ulong k,
res /= (*end_times)[r];
return res;
}

HawkesTheoreticalCumulant::HawkesTheoreticalCumulant(int dim) : d(dim) {
Lambda = SArrayDouble::new_ptr(dim);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can these be part of the constuctor setup arguments like : d(dim) ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. is Lambda the best name?
  2. variables names don't typically start with upper case

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variables Lambda, C, Kc, and R represent first integrated cumulant, second integrated cumulant, (reduced) third integrated cumulant, and inverse of $I - G$ (where $G$ is the matrix of $L^1$-norms of kernel functions), respectively. Their names are directly taken from the math symbols used in the paper: $\Lambda$, $C$, $K$, and $R$, respectively.

I agree that the upper case is uncommon. I can change the name as follows:
Lambda -> first_cumulant
C -> second_cumulant
Kc -> third_cumulant
R -> g_geometric

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done here: 11353f4

C = SArrayDouble2d::new_ptr(dim, dim);
Kc = SArrayDouble2d::new_ptr(dim, dim);
R = SArrayDouble2d::new_ptr(dim, dim);
}

// Formulae from eq. 7 in the paper
void HawkesTheoreticalCumulant::compute_mean_intensity() {
for (int i = 0; i < d; ++i) {
double lambda_i = 0;
for (int m = 0; m < d; ++m) {
int _im = i * d + m;
double mu_m = (*mu)[m];
double R_im = (*R)[_im];
lambda_i += R_im * mu_m;
}
(*Lambda)[i] = lambda_i;
}
};

// Formulae from eq. 8 in the paper
void HawkesTheoreticalCumulant::compute_covariance() {
for (int i = 0; i < d; ++i) {
for (int j = 0; j < d; ++j) {
int _ij = i * d + j;
double C_ij = 0;
for (int m = 0; m < d; ++m) {
int _im = i * d + m;
int _jm = j * d + m;
C_ij += (*Lambda)[m] * (*R)[_im] * (*R)[_jm];
}
(*C)[_ij] = C_ij;
}
}
};

// Formulae from eq. 9 in the paper
void HawkesTheoreticalCumulant::compute_skewness() {
for (int i = 0; i < d; ++i) {
for (int k = 0; k < d; ++k) {
int _ik = i * d + k;
double Kc_ik = 0;
for (int m = 0; m < d; ++m) {
int _im = i * d + m;
int _km = k * d + m;
double R_im = (*R)[_im];
double R_km = (*R)[_km];
double C_km = (*C)[_km];
double C_im = (*C)[_im];
double Lambda_m = (*Lambda)[m];
Kc_ik += (R_im * R_im * C_km + 2 * R_im * R_km * C_im - 2 * Lambda_m * R_im * R_im * R_km);
}
(*Kc)[_ik] = Kc_ik;
}
}
};
void HawkesTheoreticalCumulant::compute_cumulants() {
compute_mean_intensity();
compute_covariance();
compute_skewness();
}
18 changes: 6 additions & 12 deletions lib/cpp/hawkes/simulation/simu_hawkes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ Hawkes::Hawkes(unsigned int n_nodes, int seed)
}
}

void Hawkes::init_intensity_(ArrayDouble &intensity,
double *total_intensity_bound) {
void Hawkes::init_intensity_(ArrayDouble &intensity, double *total_intensity_bound) {
*total_intensity_bound = 0;
for (unsigned int i = 0; i < n_nodes; i++) {
intensity[i] = get_baseline(i, 0.);
Expand All @@ -30,16 +29,14 @@ bool Hawkes::update_time_shift_(double delay, ArrayDouble &intensity,
// We loop on the contributions
for (unsigned int i = 0; i < n_nodes; i++) {
intensity[i] = get_baseline(i, get_time());
if (total_intensity_bound1)
*total_intensity_bound1 += get_baseline_bound(i, get_time());
if (total_intensity_bound1) *total_intensity_bound1 += get_baseline_bound(i, get_time());

for (unsigned int j = 0; j < n_nodes; j++) {
HawkesKernelPtr &k = kernels[i * n_nodes + j];

if (k->get_support() == 0) continue;
double bound = 0;
intensity[i] +=
k->get_convolution(get_time() + delay, *timestamps[j], &bound);
intensity[i] += k->get_convolution(get_time() + delay, *timestamps[j], &bound);

if (total_intensity_bound1) {
*total_intensity_bound1 += bound;
Expand All @@ -56,15 +53,13 @@ bool Hawkes::update_time_shift_(double delay, ArrayDouble &intensity,
void Hawkes::reset() {
for (unsigned int i = 0; i < n_nodes; i++) {
for (unsigned int j = 0; j < n_nodes; j++) {
if (kernels[i * n_nodes + j] != nullptr)
kernels[i * n_nodes + j]->rewind();
if (kernels[i * n_nodes + j] != nullptr) kernels[i * n_nodes + j]->rewind();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know it was unsigned int before, I would generally prefer std::uint32_t or similar

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Understood...
I will open a new PR about that because it is not related to HawkesCumulant or tensorflow

}
}
PP::reset();
}

void Hawkes::set_kernel(unsigned int i, unsigned int j,
HawkesKernelPtr &kernel) {
void Hawkes::set_kernel(unsigned int i, unsigned int j, HawkesKernelPtr &kernel) {
if (i >= n_nodes) TICK_BAD_INDEX(0, n_nodes, i);
if (j >= n_nodes) TICK_BAD_INDEX(0, n_nodes, j);

Expand Down Expand Up @@ -100,8 +95,7 @@ void Hawkes::set_baseline(unsigned int i, TimeFunction time_function) {
set_baseline(i, std::make_shared<HawkesTimeFunctionBaseline>(time_function));
}

void Hawkes::set_baseline(unsigned int i, ArrayDouble &times,
ArrayDouble &values) {
void Hawkes::set_baseline(unsigned int i, ArrayDouble &times, ArrayDouble &values) {
set_baseline(i, std::make_shared<HawkesTimeFunctionBaseline>(times, values));
}

Expand Down
31 changes: 10 additions & 21 deletions lib/cpp/hawkes/simulation/simu_point_process.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
PP::PP(unsigned int n_nodes, int seed) : rand(seed), n_nodes(n_nodes) {
// Setting the process
timestamps.resize(n_nodes);
for (unsigned int i = 0; i < n_nodes; i++)
timestamps[i] = VArrayDouble::new_ptr();
for (unsigned int i = 0; i < n_nodes; i++) timestamps[i] = VArrayDouble::new_ptr();

// Init current time
time = 0;
Expand Down Expand Up @@ -54,8 +53,7 @@ void PP::reset() {

intensity.init_to_zero();

for (unsigned int i = 0; i < n_nodes; ++i)
timestamps[i] = VArrayDouble::new_ptr();
for (unsigned int i = 0; i < n_nodes; ++i) timestamps[i] = VArrayDouble::new_ptr();
activate_itr(itr_time_step);
}

Expand All @@ -81,28 +79,21 @@ void PP::itr_process() {
itr_times->append1(time);
}

void PP::update_time_shift(double delay, bool flag_compute_intensity_bound,
bool flag_itr) {
void PP::update_time_shift(double delay, bool flag_compute_intensity_bound, bool flag_itr) {
flag_negative_intensity = update_time_shift_(
delay, intensity,
(flag_compute_intensity_bound ? &total_intensity_bound : nullptr));
delay, intensity, (flag_compute_intensity_bound ? &total_intensity_bound : nullptr));

time += delay;

if (flag_compute_intensity_bound &&
max_total_intensity_bound < total_intensity_bound)
if (flag_compute_intensity_bound && max_total_intensity_bound < total_intensity_bound)
max_total_intensity_bound = total_intensity_bound;

if (flag_itr) itr_process();
}

void PP::simulate(ulong n_points) {
simulate(std::numeric_limits<double>::max(), n_points);
}
void PP::simulate(ulong n_points) { simulate(std::numeric_limits<double>::max(), n_points); }

void PP::simulate(double run_time) {
simulate(run_time, std::numeric_limits<ulong>::max());
}
void PP::simulate(double run_time) { simulate(run_time, std::numeric_limits<ulong>::max()); }

void PP::simulate(double end_time, ulong n_points) {
// This causes deadlock, see MLPP-334 - Investigate deadlock in PP
Expand All @@ -120,8 +111,7 @@ void PP::simulate(double end_time, ulong n_points) {
while (time < end_time && n_total_jumps < n_points &&
(!flag_negative_intensity || threshold_negative_intensity)) {
// We compute the time of the potential next random jump
const double time_of_next_jump =
time + rand.exponential(total_intensity_bound);
const double time_of_next_jump = time + rand.exponential(total_intensity_bound);

// If we must track record the intensities we perform a loop
if (itr_on()) {
Expand Down Expand Up @@ -209,9 +199,8 @@ void PP::set_timestamps(VArrayDoublePtrList1D &timestamps, double end_time) {

// Find where is next jump
for (ulong i = 0; i < n_nodes; ++i) {
const double next_jump_node_i = current_index[i] < timestamps[i]->size()
? (*timestamps[i])[current_index[i]]
: inf;
const double next_jump_node_i =
current_index[i] < timestamps[i]->size() ? (*timestamps[i])[current_index[i]] : inf;
if (next_jump_node_i < next_jump_time) {
next_jump_node = i;
next_jump_time = next_jump_node_i;
Expand Down
33 changes: 28 additions & 5 deletions lib/include/tick/hawkes/inference/hawkes_cumulant.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@ class DLL_PUBLIC HawkesCumulant : public ModelHawkesList {
public:
explicit HawkesCumulant(double integration_support);

SArrayDoublePtr compute_A_and_I_ij(ulong r, ulong i, ulong j,
double mean_intensity_j);
SArrayDoublePtr compute_A_and_I_ij(ulong r, ulong i, ulong j, double mean_intensity_j);

double compute_E_ijk(ulong r, ulong i, ulong j, ulong k,
double mean_intensity_i, double mean_intensity_j,
double J_ij);
double compute_E_ijk(ulong r, ulong i, ulong j, ulong k, double mean_intensity_i,
double mean_intensity_j, double J_ij);

double get_integration_support() const { return integration_support; }

Expand All @@ -35,4 +33,29 @@ class DLL_PUBLIC HawkesCumulant : public ModelHawkesList {
}
};

class DLL_PUBLIC HawkesTheoreticalCumulant {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Mbompr @PhilipDeegan

This is a new c++ class that implements the theoretical formulae for mean intensity, covariance and skewness of hawkes processes. These are the formulae (7), (8), and (9) in the paper.
Currently, the class is only used to test the accuracy of the estimation of HawkesCumulantMatching. However, the formulae have intrinsic values beside such testing, and we might include these calculations in a more general Hawkes class in the future.

private:
int d;
SArrayDoublePtr mu;
SArrayDoublePtr Lambda;
SArrayDouble2dPtr C;
SArrayDouble2dPtr Kc;
SArrayDouble2dPtr R;

public:
HawkesTheoreticalCumulant(int);
int get_dimension() { return d; }
void set_baseline(const SArrayDoublePtr mu) { this->mu = mu; }
Copy link
Member

@PhilipDeegan PhilipDeegan Jan 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might not have it everywhere, but T const[&] n is the preferred order

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed: fa43f6d

SArrayDoublePtr get_baseline() { return mu; }
void set_R(const SArrayDouble2dPtr R) { this->R = R; }
SArrayDouble2dPtr get_R() { return R; }
void compute_mean_intensity();
void compute_covariance();
void compute_skewness();
void compute_cumulants();
SArrayDoublePtr mean_intensity() { return Lambda; }
SArrayDouble2dPtr covariance() { return C; }
SArrayDouble2dPtr skewness() { return Kc; }
};

#endif // LIB_INCLUDE_TICK_HAWKES_INFERENCE_HAWKES_CUMULANT_H_
16 changes: 15 additions & 1 deletion lib/swig/tick/hawkes/inference/hawkes_cumulant.i
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,18 @@ public:
void set_integration_support(const double integration_support);
bool get_are_cumulants_ready() const;
void set_are_cumulants_ready(const bool recompute_cumulants);
};
};

class HawkesTheoreticalCumulant{
public:
HawkesTheoreticalCumulant(int);
int get_dimension();
void set_baseline(const SArrayDoublePtr mu);
SArrayDoublePtr get_baseline();
void set_R(const SArrayDouble2dPtr R);
SArrayDouble2dPtr get_R();
void compute_cumulants();
SArrayDoublePtr mean_intensity();
SArrayDouble2dPtr covariance();
SArrayDouble2dPtr skewness();
};
6 changes: 5 additions & 1 deletion tick/hawkes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
HawkesKernelSumExp, HawkesKernelTimeFunc)
from .inference import (HawkesADM4, HawkesExpKern, HawkesSumExpKern,
HawkesBasisKernels, HawkesConditionalLaw, HawkesEM,
HawkesSumGaussians, HawkesCumulantMatching)
HawkesSumGaussians, HawkesCumulantMatching,
HawkesCumulantMatchingTf, HawkesCumulantMatchingPyT
)

__all__ = [
"HawkesADM4",
Expand All @@ -39,4 +41,6 @@
"HawkesKernelSumExp",
"HawkesKernelTimeFunc",
"HawkesCumulantMatching",
"HawkesCumulantMatchingTf",
"HawkesCumulantMatchingPyT",
]
4 changes: 3 additions & 1 deletion tick/hawkes/inference/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .hawkes_expkern_fixeddecay import HawkesExpKern
from .hawkes_sumexpkern_fixeddecay import HawkesSumExpKern
from .hawkes_sumgaussians import HawkesSumGaussians
from .hawkes_cumulant_matching import HawkesCumulantMatching
from .hawkes_cumulant_matching import HawkesCumulantMatching, HawkesCumulantMatchingTf, HawkesCumulantMatchingPyT

__all__ = [
"HawkesExpKern",
Expand All @@ -18,4 +18,6 @@
"HawkesBasisKernels",
"HawkesSumGaussians",
"HawkesCumulantMatching",
"HawkesCumulantMatchingTf",
"HawkesCumulantMatchingPyT",
]
Loading