Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Generalised Linear Models to FSharpStats #334

Merged
merged 54 commits into from
Oct 24, 2024
Merged
Changes from 1 commit
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
6288d55
Implement IRLS solver for GLMs
LibraChris Feb 8, 2024
425d12b
Rename variables
LibraChris May 8, 2024
d1c9c41
add qr based GLM
LibraChris May 10, 2024
d637be7
add inital tests for the glm
LibraChris May 10, 2024
d0fc5ee
Update glm QR Solver
LibraChris May 14, 2024
d24955a
Add new Test for GLMs using Gamma Distribution
LibraChris May 14, 2024
ddcf09c
Add tests for the Poisson linker functions
LibraChris May 14, 2024
60c3ec1
Add tests for the Gamma linker functions
LibraChris May 14, 2024
0f0661c
Rename testcases to Reflect their log function
LibraChris May 14, 2024
fe83ba6
Add tests for the LogitLinkFunction
LibraChris May 14, 2024
ab44068
Add tests for the InverseSquaredLinkFunction
LibraChris May 15, 2024
c00980e
Add tests by example for glm IrLS solver
LibraChris May 15, 2024
1e6a524
Add tests for the IdentityLinkFunction
LibraChris May 15, 2024
ac416bf
Add tests groudwork for the BinomialLinkFunction
LibraChris May 15, 2024
8f88c1e
Add tests for the variance of Binominal Family
LibraChris May 16, 2024
148a933
Add tests for the variance of Poisson Family
LibraChris May 16, 2024
a8b5f00
Add tests for the variance of Gaussian/Normal Family
LibraChris May 16, 2024
2cbef3c
Fix test implemetation for familyVarianceFunctions
LibraChris May 16, 2024
a73a07e
Add tests for the variance of Gamma Family
LibraChris May 16, 2024
4d03d46
Add tests for the variance of Inv.Gaussian Family
LibraChris May 16, 2024
4465115
Rename test Cases based on their DistributionFamily
LibraChris May 16, 2024
a6e6568
Fix LogitLinkFunction
LibraChris May 16, 2024
de1fcd7
remove redundant BinomialLinkFunction
LibraChris May 16, 2024
3554a02
Remove redundant LinkFunction
LibraChris May 16, 2024
c1f38f1
Fix InverseSquaredLinkFunction
LibraChris May 16, 2024
2a3b096
Updated Gamma Distribution Variance function
LibraChris May 16, 2024
3f5a349
add Deriv Functions
LibraChris May 18, 2024
2787fbd
add Tests for Link and deriv
LibraChris May 18, 2024
3ee33e3
fix various Linkfunctions
LibraChris May 18, 2024
43cea23
Rework GLM QR Solver
LibraChris May 22, 2024
3e83833
Modify tests
LibraChris May 22, 2024
2816155
Add tests prototype for QR-Stepwise iteration
LibraChris May 22, 2024
c5ced84
Fix QR based solver for GLMs
LibraChris May 22, 2024
5029c3a
Modify Variance tests
LibraChris May 22, 2024
c3dddcb
Update statistics
LibraChris May 28, 2024
a7c5c1b
Update GeneralisedLinearModel.fs
LibraChris May 28, 2024
d8877b7
Update GeneralisedLinearModel.fs
LibraChris May 29, 2024
3cd68a8
Update GeneralisedLinearModel.fs
LibraChris May 30, 2024
253ac91
Rework GLMStatistics
LibraChris May 31, 2024
19cad0f
Remove deprecated GLM.Irls
LibraChris May 31, 2024
1b3336f
Fix minor testing issue
LibraChris May 31, 2024
a1d0ee4
add getFamilyReisualDeviance for more families
LibraChris Jun 2, 2024
37d03e0
Write code comments and documentation
LibraChris Jun 5, 2024
5e9a1b6
add Documentation for GLM Usage
LibraChris Jun 7, 2024
72bfb83
Update formating for documentation
LibraChris Jun 10, 2024
a8a0004
added data for Documentation
LibraChris Jun 10, 2024
f694340
remote tests for binominal family variance
LibraChris Jun 10, 2024
8dcd8ab
Adress changes requested in #344
LibraChris Jun 12, 2024
170519e
Adress changes requested in #334
LibraChris Jun 18, 2024
ba5ae9c
Update xml comments
LibraChris Jun 19, 2024
6c3a235
fix building error
LibraChris Jul 3, 2024
2e80081
Fix Typo
LibraChris Jul 3, 2024
13b3de9
Fix indentations
LibraChris Aug 26, 2024
df24c3f
Updated XML documentation
LibraChris Oct 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Update GeneralisedLinearModel.fs
  • Loading branch information
LibraChris committed Jul 3, 2024
commit d8877b74f4525547c6d930f4edaf0f47d1cd177b
230 changes: 109 additions & 121 deletions src/FSharp.Stats/Fitting/GeneralisedLinearModel.fs
Original file line number Diff line number Diff line change
Expand Up @@ -183,20 +183,11 @@ module GlmDistributionFamily =
let d = endV * b - c
2.*d
) endog mu
|> Vector.sum

| _ ->
raise (System.NotImplementedException())

let resid_dev (endog: Vector<float>) (mu: Vector<float>) (func: Vector<float> -> Vector<float> -> Vector<float>) =
let residdev = func endog mu
Vector.map3(fun endV muV reV ->
let a = signFunction (endV-muV)
let b = clean reV
let c = System.Math.Sqrt(b)
a*c
) endog mu residdev
|>Vector.sum

type GLMStatistics =
{
StandardErrors:Vector<float>
Expand All @@ -206,6 +197,15 @@ type GLMStatistics =
R2:float
AdjustedR2:float
}

type GLMStatisticsPython =
{
LogLikelihood:float
Deviance:float
PearsonChi2:float
PseudoR2:float
}

module GLMStatistics =
let internal scalarMultiply (matrix:Matrix<float>) (vector:Vector<float>) =
let m = matrix.NumRows
Expand All @@ -221,6 +221,26 @@ module GLMStatistics =
Matrix.setRow results i scalarRow
results


let getStandardError (A: Matrix<float>) (b: Vector<float>) (W: Vector<float>) (mX:Vector<float>) (mDistributionFamily: GlmDistributionFamily) =
let At :Matrix<float> = Matrix.transpose A
let WMatrix = Matrix.diag W
let AtW = At * WMatrix
let AtWA :Matrix<float> = AtW*A
let AtWAInv = Algebra.LinearAlgebra.Inverse AtWA

let n = AtWAInv.NumRows
let m = Vector.length b
let stndErrors: Vector<float> =
Vector.init n (fun v ->
Matrix.get AtWAInv v v
|> fun x -> System.Math.Sqrt(x)
)
stndErrors




let getStatisticsQR (A: Matrix<float>) (b: Vector<float>) (W: Vector<float>) (mX:Vector<float>) (mDistributionFamily: GlmDistributionFamily) =
let At :Matrix<float> = Matrix.transpose A
let AtW = scalarMultiply At W
Expand Down Expand Up @@ -279,60 +299,86 @@ module GLMStatistics =
AdjustedR2=adjustedR2
}

let getLogLikelihood (b:Vector<float>) (mu: vector) =
Vector.mapi(fun i v ->
let y = b.[i]
let meanDist = v
y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0))
) mu
|> Vector.sum

let getSumOfSquares (b:Vector<float>) (linPred: vector) =
Vector.mapi(fun i v ->
let y = b.[i]
let yi = v
let a = y - yi
a*a
) linPred

let getchi2 (b:Vector<float>) (linPred: vector) =
Vector.map2(fun y yi ->
let a = y - yi
let nominator = a*a
nominator / yi
) b linPred
|> Vector.sum

let getStatisticsIRLS (A: Matrix<float>) (b: Vector<float>) (mDistributionFamily: GlmDistributionFamily) (vcovmat: Matrix<float>) (mX: Vector<float>) =
let n = vcovmat.NumRows
let m = Vector.length b
let n = vcovmat.NumRows
let m = Vector.length b

let rec crossProdLoop crossProd i j =
if j=n then
crossProd
else
let elementA: float = (Matrix.get A i j)
let elementmX: float = mX[j]
let crossProdNew = crossProd + (elementA*elementmX)
crossProdLoop (crossProdNew) i (j+1)
let rec crossProdLoop crossProd i j =
if j=n then
crossProd
else
let elementA: float = (Matrix.get A i j)
let elementmX: float = mX[j]
let crossProdNew = crossProd + (elementA*elementmX)
crossProdLoop (crossProdNew) i (j+1)

let linkFunction = GlmDistributionFamily.getLinkFunction mDistributionFamily
let linkFunction = GlmDistributionFamily.getLinkFunction mDistributionFamily

let stndErrors: Vector<float> = Vector.init n (fun v -> Matrix.get vcovmat v v)
let stndErrors: Vector<float> = Vector.init n (fun v -> Matrix.get vcovmat v v)

let outcomes: Vector<float> = m |> Vector.zeroCreate
let residuals: Vector<float> = m |> Vector.zeroCreate
let outcomes: Vector<float> = m |> Vector.zeroCreate
let residuals: Vector<float> = m |> Vector.zeroCreate

for count=0 to m-1 do
let crossProd = crossProdLoop 0. count 0
let elementB = b[count]
let link = linkFunction.getInvLink crossProd

residuals[count] <- (elementB-link)
outcomes[count] <- (elementB)

let getStdDev (vec:Vector<float>) (mean:float) =
Vector.fold (fun folder v ->
let a = v - mean
let valNew = System.Math.Pow(a,2)
folder + valNew
) 0. vec
|> fun x -> (System.Math.Sqrt((x)/float vec.Length))

let residualStdDev = getStdDev residuals 0.
let responseMean = Vector.mean(outcomes)
let responseVariance =
let v = getStdDev outcomes responseMean
System.Math.Pow(v, 2)

let r2 = 1. - residualStdDev * residualStdDev / responseVariance
let adjustedR2 = 1. - (residualStdDev * residualStdDev) / responseVariance * (float n) / ((float n) - (float mX.Length) - 1.)
for count=0 to m-1 do
let crossProd = crossProdLoop 0. count 0
let elementB = b[count]
let link = linkFunction.getInvLink crossProd

{
StandardErrors=stndErrors
ResidualStandardDeviation=residualStdDev
ResponseMean=responseMean
ResponseVariance=responseVariance
R2=r2
AdjustedR2=adjustedR2
}
residuals[count] <- (elementB-link)
outcomes[count] <- (elementB)

let getStdDev (vec:Vector<float>) (mean:float) =
Vector.fold (fun folder v ->
let a = v - mean
let valNew = System.Math.Pow(a,2)
folder + valNew
) 0. vec
|> fun x -> (System.Math.Sqrt((x)/float vec.Length))

let residualStdDev = getStdDev residuals 0.
let responseMean = Vector.mean(outcomes)
let responseVariance =
let v = getStdDev outcomes responseMean
System.Math.Pow(v, 2)

let r2 = 1. - residualStdDev * residualStdDev / responseVariance
let adjustedR2 = 1. - (residualStdDev * residualStdDev) / responseVariance * (float n) / ((float n) - (float mX.Length) - 1.)

{
StandardErrors=stndErrors
ResidualStandardDeviation=residualStdDev
ResponseMean=responseMean
ResponseVariance=responseVariance
R2=r2
AdjustedR2=adjustedR2
}




module IrLS =

Expand Down Expand Up @@ -622,40 +668,7 @@ module QR =
////printfn $" {loopCount}"

if cost < mTol then
let meanOfDist = mu_new
let meanOfDistTotal = mu_new |> Vector.mean

let logLikely =
Vector.init (n) (fun i ->
let y = b.[i]
let meanDist = mu_new.[i]
y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0))
)
|> Vector.sum
let logLikel2y =
Vector.init (n-1) (fun i0 ->
let i = i0+1
let y = b.[i]
let meanDist = mu_new.[i]
y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0))
)
|> Vector.sum
let sumOfSquares =
Vector.init (n) (fun i ->
let y = b.[i]
let yi = linPred_new.[i]
let a = y - yi
a*a
)
|> Vector.sum

let deviance = GlmDistributionFamily.resid_dev wlsendogNew mu_new (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily)
let deviance2 = GlmDistributionFamily.resid_dev b mu_new (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily)


printfn $"LogLikely: {logLikely} \n LogLikely2: {logLikel2y} \n meanOfDist: {meanOfDist} \n meanOfDistTotal: {meanOfDistTotal} \n sumOfSquares: {sumOfSquares} \n Dev: {deviance} \n Dev2: {deviance2} \n"



t_original,mu,linPred,wlsResult,wlsendog

else
Expand All @@ -680,40 +693,15 @@ module QR =
let t,mu,linPred,wlsResult,wlsendog =
loopTilIterQR A b mDistributionFamily maxIter mTol stepwiseGainQR

let mX,R = wlsResult,wlsendog//solveLinearQR A wlsendog

let meanOfDist = mu
let meanOfDistTotal = mu |> Vector.mean

let logLikely =
Vector.init (n) (fun i ->
let y = b.[i]
let meanDist = mu.[i]
y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0))
)
|> Vector.sum
let logLikel2y =
Vector.init (n-1) (fun i0 ->
let i = i0+1
let y = b.[i]
let meanDist = mu.[i]
y * System.Math.Log(meanDist) - meanDist - (SpecialFunctions.Gamma.gammaLn(y+1.0))
)
|> Vector.sum
let sumOfSquares =
Vector.init (n) (fun i ->
let y = b.[i]
let yi = linPred.[i]
let a = y - yi
a*a
)
|> Vector.sum
let mX,R = wlsResult,wlsendog

let deviance = GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily b mu

let deviance = GlmDistributionFamily.resid_dev wlsendog mu (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily)
let deviance2 = GlmDistributionFamily.resid_dev b mu (GlmDistributionFamily.getFamilyReisualDeviance mDistributionFamily)
let stndError = GLMStatistics.getStandardError A b mu mX mDistributionFamily

let zStatistic = Vector.map2 (fun x y -> x/y) mX stndError

printfn $"LogLikely: {logLikely} \n LogLikely2: {logLikel2y} \n meanOfDist: {meanOfDist} \n meanOfDistTotal: {meanOfDistTotal} \n sumOfSquares: {sumOfSquares} \n Dev: {deviance} \n Dev2: {deviance2} \n"
printfn $"LogLikely: {(GLMStatistics.getLogLikelihood b mu)} \n Dev: {deviance} \n chi2: {GLMStatistics.getchi2 b linPred} \n stndError: {stndError} \n zStatistic: {zStatistic}"
//Update Stats
let statistics = GLMStatistics.getStatisticsQR A b mu mX mDistributionFamily
mX,statistics