Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Generalised Linear Models to FSharpStats #334

Merged
merged 54 commits into from
Oct 24, 2024
Merged
Changes from 1 commit
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
6288d55
Implement IRLS solver for GLMs
LibraChris Feb 8, 2024
425d12b
Rename variables
LibraChris May 8, 2024
d1c9c41
add qr based GLM
LibraChris May 10, 2024
d637be7
add inital tests for the glm
LibraChris May 10, 2024
d0fc5ee
Update glm QR Solver
LibraChris May 14, 2024
d24955a
Add new Test for GLMs using Gamma Distribution
LibraChris May 14, 2024
ddcf09c
Add tests for the Poisson linker functions
LibraChris May 14, 2024
60c3ec1
Add tests for the Gamma linker functions
LibraChris May 14, 2024
0f0661c
Rename testcases to Reflect their log function
LibraChris May 14, 2024
fe83ba6
Add tests for the LogitLinkFunction
LibraChris May 14, 2024
ab44068
Add tests for the InverseSquaredLinkFunction
LibraChris May 15, 2024
c00980e
Add tests by example for glm IrLS solver
LibraChris May 15, 2024
1e6a524
Add tests for the IdentityLinkFunction
LibraChris May 15, 2024
ac416bf
Add tests groudwork for the BinomialLinkFunction
LibraChris May 15, 2024
8f88c1e
Add tests for the variance of Binominal Family
LibraChris May 16, 2024
148a933
Add tests for the variance of Poisson Family
LibraChris May 16, 2024
a8b5f00
Add tests for the variance of Gaussian/Normal Family
LibraChris May 16, 2024
2cbef3c
Fix test implemetation for familyVarianceFunctions
LibraChris May 16, 2024
a73a07e
Add tests for the variance of Gamma Family
LibraChris May 16, 2024
4d03d46
Add tests for the variance of Inv.Gaussian Family
LibraChris May 16, 2024
4465115
Rename test Cases based on their DistributionFamily
LibraChris May 16, 2024
a6e6568
Fix LogitLinkFunction
LibraChris May 16, 2024
de1fcd7
remove redundant BinomialLinkFunction
LibraChris May 16, 2024
3554a02
Remove redundant LinkFunction
LibraChris May 16, 2024
c1f38f1
Fix InverseSquaredLinkFunction
LibraChris May 16, 2024
2a3b096
Updated Gamma Distribution Variance function
LibraChris May 16, 2024
3f5a349
add Deriv Functions
LibraChris May 18, 2024
2787fbd
add Tests for Link and deriv
LibraChris May 18, 2024
3ee33e3
fix various Linkfunctions
LibraChris May 18, 2024
43cea23
Rework GLM QR Solver
LibraChris May 22, 2024
3e83833
Modify tests
LibraChris May 22, 2024
2816155
Add tests prototype for QR-Stepwise iteration
LibraChris May 22, 2024
c5ced84
Fix QR based solver for GLMs
LibraChris May 22, 2024
5029c3a
Modify Variance tests
LibraChris May 22, 2024
c3dddcb
Update statistics
LibraChris May 28, 2024
a7c5c1b
Update GeneralisedLinearModel.fs
LibraChris May 28, 2024
d8877b7
Update GeneralisedLinearModel.fs
LibraChris May 29, 2024
3cd68a8
Update GeneralisedLinearModel.fs
LibraChris May 30, 2024
253ac91
Rework GLMStatistics
LibraChris May 31, 2024
19cad0f
Remove deprecated GLM.Irls
LibraChris May 31, 2024
1b3336f
Fix minor testing issue
LibraChris May 31, 2024
a1d0ee4
add getFamilyReisualDeviance for more families
LibraChris Jun 2, 2024
37d03e0
Write code comments and documentation
LibraChris Jun 5, 2024
5e9a1b6
add Documentation for GLM Usage
LibraChris Jun 7, 2024
72bfb83
Update formating for documentation
LibraChris Jun 10, 2024
a8a0004
added data for Documentation
LibraChris Jun 10, 2024
f694340
remote tests for binominal family variance
LibraChris Jun 10, 2024
8dcd8ab
Adress changes requested in #344
LibraChris Jun 12, 2024
170519e
Adress changes requested in #334
LibraChris Jun 18, 2024
ba5ae9c
Update xml comments
LibraChris Jun 19, 2024
6c3a235
fix building error
LibraChris Jul 3, 2024
2e80081
Fix Typo
LibraChris Jul 3, 2024
13b3de9
Fix indentations
LibraChris Aug 26, 2024
df24c3f
Updated XML documentation
LibraChris Oct 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Rework GLMStatistics
LibraChris committed Jul 3, 2024
commit 253ac919335d325b2d8354ef3ea54114eeae7979
207 changes: 81 additions & 126 deletions src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs
Original file line number Diff line number Diff line change
@@ -91,7 +91,6 @@ module LinkFunctions =
getInvLinkDerivative = fun a -> 1.
}


type GlmDistributionFamily =
|Normal
|Exponential
@@ -188,6 +187,12 @@ module GlmDistributionFamily =
| _ ->
raise (System.NotImplementedException())

type GLMReturn =
{
mX:Vector<float>
mu:Vector<float>
}

type GLMStatistics =
{
StandardErrors:Vector<float>
@@ -198,31 +203,55 @@ type GLMStatistics =
AdjustedR2:float
}

type GLMStatisticsPython =
type GLMStatisticsModel =
{
LogLikelihood:float
Deviance:float
PearsonChi2:float
PseudoR2:float
}

type GLMStatisticsPrameter =
{
//Name:string
Coefficient:float
StandardError:float
ZScore:float
PersonOfZ:float
}

module GLMStatistics =
let internal scalarMultiply (matrix:Matrix<float>) (vector:Vector<float>) =
let m = matrix.NumRows
let n = matrix.NumCols

let results = Matrix.zero m n
for i=0 to m-1 do
let scalar = Vector.get vector i
let row = Matrix.getRow matrix i
let scalarRow =
row*scalar
|> RowVector.toArray|>Vector.ofArray
Matrix.setRow results i scalarRow
results


let getStandardError (A: Matrix<float>) (b: Vector<float>) (W: Vector<float>) (mX:Vector<float>) (mDistributionFamily: GlmDistributionFamily) =
let internal getLogLikelihood (b:Vector<float>) (mu: vector) =
Vector.mapi(fun i v ->
let y = b.[i]
let meanDist = v
y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0))
) mu
|> Vector.sum

let internal getChi2 (b:Vector<float>) (mu:Vector<float>) (family: GlmDistributionFamily) =
Vector.map2(fun y yi ->
let a = y - yi
let nominator = a**2.
nominator / (GlmDistributionFamily.getVariance family yi)
) b mu
|> Vector.sum

let getGLMStatisticsModel (b:Vector<float>) (mu: vector) (family: GlmDistributionFamily) =
let logLikelihood = getLogLikelihood b mu
let deviance = GlmDistributionFamily.getFamilyReisualDeviance family b mu
let chi2 = getChi2 b mu family

{
LogLikelihood=logLikelihood
Deviance=deviance
PearsonChi2=chi2
PseudoR2=0.
}


let internal getStandardError (A: Matrix<float>) (b: Vector<float>) (W: Vector<float>) =
let At :Matrix<float> = Matrix.transpose A
let WMatrix = Matrix.diag W
let AtW = At * WMatrix
@@ -238,104 +267,33 @@ module GLMStatistics =
)
stndErrors




let getStatisticsQR (A: Matrix<float>) (b: Vector<float>) (W: Vector<float>) (mX:Vector<float>) (mDistributionFamily: GlmDistributionFamily) =
let At :Matrix<float> = Matrix.transpose A
let AtW = scalarMultiply At W
let AtWA :Matrix<float> = AtW*A
let AtWAInv = Algebra.LinearAlgebra.Inverse AtWA

let n = AtWAInv.NumRows
let m = Vector.length b

let rec crossProdLoop crossProd i j =
if j=n then
crossProd
else
let elementA: float = (Matrix.get A i j)
let elementmX: float = mX[j]
let crossProdNew = crossProd + (elementA*elementmX)
crossProdLoop (crossProdNew) i (j+1)

let linkFunction = GlmDistributionFamily.getLinkFunction mDistributionFamily

let stndErrors: Vector<float> = Vector.init n (fun v -> Matrix.get AtWAInv v v)

let outcomes: Vector<float> = m |> Vector.zeroCreate
let residuals: Vector<float> = m |> Vector.zeroCreate

for count=0 to m-1 do
let crossProd = crossProdLoop 0. count 0
let elementB = b[count]
let link = linkFunction.getInvLink crossProd

residuals[count] <- (elementB-link)
outcomes[count] <- (elementB)

let getStdDev (vec:Vector<float>) (mean:float) =
Vector.fold (fun folder v ->
let a = v - mean
let valNew = System.Math.Pow(a,2)
folder + valNew
) 0. vec
|> fun x -> (System.Math.Sqrt((x)/float vec.Length))

let residualStdDev = getStdDev residuals 0.
let responseMean = Vector.mean(outcomes)
let responseVariance =
let v = getStdDev outcomes responseMean
System.Math.Pow(v, 2)
let r2 = 1. - residualStdDev * residualStdDev / responseVariance
let adjustedR2 = 1. - (residualStdDev * residualStdDev) / responseVariance * (float n) / ((float n) - (float mX.Length) - 1.)

{
StandardErrors=stndErrors
ResidualStandardDeviation=residualStdDev
ResponseMean=responseMean
ResponseVariance=responseVariance
R2=r2
AdjustedR2=adjustedR2
}

let getLogLikelihood (b:Vector<float>) (mu: vector) =
Vector.mapi(fun i v ->
let y = b.[i]
let meanDist = v
y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0))
) mu
|> Vector.sum

let getSumOfSquares (b:Vector<float>) (linPred: vector) =
Vector.mapi(fun i v ->
let y = b.[i]
let yi = v
let a = y - yi
a*a
) linPred
let internal getZStatistic (mx: Vector<float>) (stndError: Vector<float>) =
Vector.map2 (fun x y ->
x/y
) mx stndError


let internal getPearsonOfZ (zStatistic: Vector<float>) =
Vector.map(fun x ->
let phi = Distributions.Continuous.Normal.CDF 0. 1. (abs(x))
let pValue = 2. * (1. - phi)
pValue
)zStatistic

let getGLMParameterStatistics (A:Matrix<float>) (b:Vector<float> ) (solved:GLMReturn) =

let stndErrors = getStandardError A b solved.mu
let zStatistic = getZStatistic solved.mX stndErrors
let pValue = getPearsonOfZ zStatistic
Seq.init (Vector.length solved.mX) (fun i ->
{
Coefficient=solved.mX.[i]
StandardError=stndErrors.[i]
ZScore=zStatistic.[i]
PersonOfZ=pValue.[i]
}
)

let getchi2 (b:Vector<float>) (linPred: vector) =
Vector.map2(fun y yi ->
let a = y - yi
let nominator = a**2.
nominator / yi
) b linPred
|> Vector.sum


let getchi2New _endog mu family =
// chisq = (self._endog - self.mu)**2 / self.family.variance(self.mu)
// chisq *= self._iweights * self._n_trials
// chisqsum = np.sum(chisq)
// return chisqsum
Vector.map2(fun y yi ->
let a = y - yi
let nominator = a**2.
nominator / (GlmDistributionFamily.getVariance family yi)
) _endog mu
|> Vector.sum

let getStatisticsIRLS (A: Matrix<float>) (b: Vector<float>) (mDistributionFamily: GlmDistributionFamily) (vcovmat: Matrix<float>) (mX: Vector<float>) =
let n = vcovmat.NumRows
let m = Vector.length b
@@ -565,7 +523,7 @@ module QR =

mX,r

let stepwiseGainQR
let internal stepwiseGainQR
(A: Matrix<float>)
(b: Vector<float>)
(mDistributionFamily: GlmDistributionFamily)
@@ -631,7 +589,7 @@ module QR =

cost,mu_new,linPred_new,wlsResults,wlsendog

let loopTilIterQR
let internal loopTilIterQR
(A: Matrix<float>)
(b: Vector<float>)
(mDistributionFamily: GlmDistributionFamily)
@@ -708,13 +666,10 @@ module QR =

let mX,R = wlsResult,wlsendog

let deviance = GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily b mu

let stndError = GLMStatistics.getStandardError A b mu mX mDistributionFamily

let zStatistic = Vector.map2 (fun x y -> x/y) mX stndError

printfn $"LogLikely: {(GLMStatistics.getLogLikelihood b mu)} \n Dev: {deviance} \n chi2: {GLMStatistics.getchi2New b mu mDistributionFamily} \n chi2_2: {GLMStatistics.getchi2New b linPred mDistributionFamily} \n stndError: {stndError} \n zStatistic: {zStatistic}"
//Update Stats
let statistics = GLMStatistics.getStatisticsQR A b mu mX mDistributionFamily
mX,(mu,linPred,stndError,zStatistic)
{mX=mX;mu=mu}

let getGLMModelStatistics (b:Vector<float>) (solvedGLM:GLMReturn) (mDistributionFamily:GlmDistributionFamily) =
GLMStatistics.getGLMStatisticsModel b solvedGLM.mu mDistributionFamily

let getGLMParameterStatistics (A:Matrix<float>) (b:Vector<float> ) (solved:GLMReturn) =
GLMStatistics.getGLMParameterStatistics
13 changes: 7 additions & 6 deletions tests/FSharp.Stats.Tests/GeneralisedLinearModels.fs
Original file line number Diff line number Diff line change
@@ -945,9 +945,9 @@ let GLMTestsQR =

let cheeseMatrix,cheeseVector = HelperFunctions.generateBaseMatrixAndVector "Taste" [] cheeseframe

let actualResults,statistics =
let actualResultsRaw =
QR.solveQrNewton cheeseMatrix cheeseVector 200 GlmDistributionFamily.Poisson tolRef

let actualResults = actualResultsRaw.mX

Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong"
Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Acetic wrong"
@@ -971,9 +971,9 @@ let GLMTestsQR =

let energyMatrix,energyVector = HelperFunctions.generateBaseMatrixAndVector "Energy" [] energyframe

let actualResults,statistics =
let actualResultsRaw =
QR.solveQrNewton energyMatrix energyVector 200 GlmDistributionFamily.Poisson tolRef

let actualResults = actualResultsRaw.mX

Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] "GLM Intecept wrong"
Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Fat wrong"
@@ -998,9 +998,10 @@ let GLMTestsQR =

let lungcapMatrix,lungcapVector = HelperFunctions.generateBaseMatrixAndVector "FEV" [] lungcapframe

let actualResults,statistics =
let actualResultsRaw =
QR.solveQrNewton lungcapMatrix lungcapVector 200 GlmDistributionFamily.Gamma tolRef

let actualResults = actualResultsRaw.mX

let x = $"{actualResults.[0]} {actualResults.[1]} {actualResults.[2]} {actualResults.[3]} {actualResults.[4]}"
Expect.floatClose Accuracy.medium actualResults.[0] expected.[0] $"GLM Intecept wrong {x}"
Expect.floatClose Accuracy.medium actualResults.[1] expected.[1] "GLM Age wrong"