Skip to content
This repository was archived by the owner on Dec 8, 2024. It is now read-only.

Kernel PCA #101

Merged
merged 4 commits into from
Jan 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions qml/kernels/fkpca.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
! MIT License
!
! Copyright (c) 2018 Anders Steen Christensen
!
! Permission is hereby granted, free of charge, to any person obtaining a copy
! of this software and associated documentation files (the "Software"), to deal
! in the Software without restriction, including without limitation the rights
! to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
! copies of the Software, and to permit persons to whom the Software is
! furnished to do so, subject to the following conditions:
!
! The above copyright notice and this permission notice shall be included in all
! copies or substantial portions of the Software.
!
! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
! OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
! SOFTWARE.


subroutine fkpca(k, n, centering, kpca)

implicit none

double precision, dimension(:,:), intent(in) :: k
integer, intent(in) :: n
logical, intent(in) :: centering
double precision, dimension(n,n), intent(out) :: kpca

! Eigenvalues
double precision, dimension(n) :: eigenvals

double precision, allocatable, dimension(:) :: work

integer :: lwork
integer :: info

integer :: i

double precision :: inv_n
double precision, allocatable, dimension(:) :: temp
double precision :: temp_sum

kpca(:,:) = k(:,:)

! This first part centers the matrix,
! basically Kpca = K - G@K - K@G + G@K@G, with G = 1/n
! It is a bit hard to follow, sry, but it is very fast
! and requires very little memory overhead.

if (centering) then

inv_n = 1.0d0 / n

allocate(temp(n))
temp(:) = 0.0d0

!$OMP PARALLEL DO
do i = 1, n
temp(i) = sum(k(i,:)) * inv_n
enddo
!$OMP END PARALLEL DO

temp_sum = sum(temp(:)) * inv_n

!$OMP PARALLEL DO
do i = 1, n
kpca(i,:) = kpca(i,:) + temp_sum
enddo
!$OMP END PARALLEL DO

!$OMP PARALLEL DO
do i = 1, n
kpca(:,i) = kpca(:,i) - temp(i)
enddo
!$OMP END PARALLEL DO

!$OMP PARALLEL DO
do i = 1, n
kpca(i,:) = kpca(i,:) - temp(i)
enddo
!$OMP END PARALLEL DO

deallocate(temp)

endif

! This 2nd part solves the eigenvalue problem with the least
! memory intensive solver, namely DSYEV(). DSYEVD() is twice
! as fast, but requires a lot more memory, which quickly
! becomes prohibitive.

! Dry run which returns the optimal "lwork"
allocate(work(1))
call dsyev("V", "U", n, kpca, n, eigenvals, work, -1, info)
lwork = nint(work(1)) + 1
deallocate(work)

! Get eigenvectors
allocate(work(lwork))
call dsyev("V", "U", n, kpca, n, eigenvals, work, lwork, info)
deallocate(work)

if (info < 0) then

write (*,*) "ERROR: The ", -info, "-th argument to DSYEV() had an illegal value."

else if (info > 0) then

write (*,*) "ERROR: DSYEV() failed to compute an eigenvalue."

end if

! This 3rd part sorts the kernel PCA matrix such that the first PCA is kpca(1)
kpca = kpca(:,n:1:-1)
kpca = transpose(kpca)

!$OMP PARALLEL DO
do i = 1, n
kpca(i,:) = kpca(i,:) * sqrt(eigenvals(n - i + 1))
enddo
!$OMP END PARALLEL DO

end subroutine fkpca
31 changes: 31 additions & 0 deletions qml/kernels/kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
from .fkernels import fget_local_kernels_laplacian
from .fkernels import fget_vector_kernels_gaussian, fget_vector_kernels_gaussian_symmetric

from .fkernels import fkpca

def laplacian_kernel(A, B, sigma):
""" Calculates the Laplacian kernel matrix K, where :math:`K_{ij}`:

Expand Down Expand Up @@ -351,3 +353,32 @@ def get_local_kernels_laplacian(A, B, na, nb, sigmas):
nsigmas = len(sigmas)

return fget_local_kernels_laplacian(A.T, B.T, na, nb, sigmas, nma, nmb, nsigmas)


def kpca(K, n=2, centering=True):
""" Calculates `n` first principal components for the kernel :math:`K`.

The PCA is calculated using an OpenMP parallel Fortran routine.

A square, symmetric kernel matrix is required. Centering of the kernel matrix
is enabled by default, although this isn't a strict requirement.

:param K: 2D kernel matrix
:type K: numpy array
:param n: Number of kernel PCAs to return (default=2)
:type n: integer
:param centering: Whether to center the kernel matrix (default=True)
:type centering: bool

:return: array containing the principal components
:rtype: numpy array
"""

assert K.shape[0] == K.shape[1], "ERROR: Square matrix required for Kernel PCA."
assert np.allclose(K, K.T, atol=1e-8), "ERROR: Symmetric matrix required for Kernel PCA."
assert n <= K.shape[0], "ERROR: Requested more principal components than matrix size."

size = K.shape[0]
pca = fkpca(K, size, centering)

return pca[:n]
7 changes: 5 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,14 @@


ext_fkernels = Extension(name = '.kernels.fkernels',
sources = ['qml/kernels/fkernels.f90'],
sources = [
'qml/kernels/fkernels.f90',
'qml/kernels/fkpca.f90',
],
extra_f90_compile_args = COMPILER_FLAGS,
extra_f77_compile_args = COMPILER_FLAGS,
extra_compile_args = COMPILER_FLAGS,
extra_link_args = LINKER_FLAGS,
extra_link_args = LINKER_FLAGS + MATH_LINKER_FLAGS,
language = FORTRAN,
f2py_options=['--quiet'])

Expand Down
71 changes: 70 additions & 1 deletion test/test_kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,13 @@
from __future__ import print_function

import sys
import os
import numpy as np
import scipy

import sklearn
from sklearn.decomposition import KernelPCA

import qml
from qml.kernels import laplacian_kernel
from qml.kernels import gaussian_kernel
Expand All @@ -32,6 +38,27 @@
from qml.kernels import linear_kernel
from qml.kernels import matern_kernel
from qml.kernels import sargan_kernel
from qml.kernels import kpca

def get_energies(filename):
""" Returns a dictionary with heats of formation for each xyz-file.
"""

f = open(filename, "r")
lines = f.readlines()
f.close()

energies = dict()

for line in lines:
tokens = line.split()

xyz_name = tokens[0]
hof = float(tokens[1])

energies[xyz_name] = hof

return energies

def test_laplacian_kernel():

Expand Down Expand Up @@ -136,7 +163,6 @@ def test_matern_kernel():

for metric in ("l1", "l2"):
for order in (0, 1, 2):
print(metric,order)
matern(metric, order)

def matern(metric, order):
Expand Down Expand Up @@ -220,9 +246,52 @@ def sargan(ngamma):
# Check for symmetry:
assert np.allclose(Ksymm, Ksymm.T), "Error in Sargan kernel"


def array_nan_close(a, b):
# Compares arrays, ignoring nans

m = np.isfinite(a) & np.isfinite(b)
return np.allclose(a[m], b[m], atol=1e-8, rtol=0.0)


def test_kpca():

test_dir = os.path.dirname(os.path.realpath(__file__))

# Parse file containing PBE0/def2-TZVP heats of formation and xyz filenam
data = get_energies(test_dir + "/data/hof_qm7.txt")

# Generate a list of qml.data.Compound() objects
mols = []

keys = sorted(data.keys())

np.random.seed(666)
np.random.shuffle(keys)

n_mols = 100

for xyz_file in keys[:n_mols]:

mol = qml.data.Compound(xyz=test_dir + "/qm7/" + xyz_file)
mol.properties = data[xyz_file]
mol.generate_bob()
mols.append(mol)

X = np.array([mol.representation for mol in mols])
K = laplacian_kernel(X, X, 2e5)

pcas_qml = kpca(K, n=10)
pcas_sklearn = KernelPCA(10, eigen_solver="dense", kernel='precomputed').fit_transform(K)

assert array_nan_close(np.abs(pcas_sklearn.T), np.abs(pcas_qml)), "Error in Kernel PCA decomposition."


if __name__ == "__main__":

test_laplacian_kernel()
test_gaussian_kernel()
test_linear_kernel()
test_matern_kernel()
test_sargan_kernel()
test_kpca()