Skip to content

Commit

Permalink
Merge pull request #308 from s-weil/minkowski-distance
Browse files Browse the repository at this point in the history
  • Loading branch information
bvenn authored Oct 6, 2023
2 parents 2c0d3de + 9d38186 commit 4719e96
Show file tree
Hide file tree
Showing 2 changed files with 452 additions and 1 deletion.
234 changes: 233 additions & 1 deletion src/FSharp.Stats/DistanceMetrics.fs
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,82 @@ module DistanceMetrics =
for i in 0 .. (dim - 1) do
let x = v1.[i] - v2.[i]
if not (isNan x) then
dist <- dist + System.Math.Abs x
dist <- dist + abs x
dist

/// <summary>The [Minkowski distance](https://en.wikipedia.org/wiki/Minkowski_distance) between two vectors of order `p`.</summary>
/// <remarks>The two vectors need not have equal lengths: when one vectors is exhausted any remaining elements in the other vectors are ignored.</remarks>
/// <param name="s1">first vector</param>
/// <param name="s2">second vector</param>
/// <param name="p">float constrained to `p > 0`</param>
/// <returns>Minkowski distance between elements of given vectors. Returns NaN if vectors contain NaN.</returns>
/// <example>
/// <code>
/// // e.g. v1 and v2 initialization
/// let v1 = vector [3.14; 2.0; 3.1]
/// let v2 = vector [9.1; 2.5; 3.7]
///
/// // Apply the minkowski distance to v1 and v2
/// Vector.minkowski v1 v2 3
/// </code>
/// </example>
let inline minkowski (v1: Vector<'a>) (v2: Vector<'a>) (p: float) : float option =
if p <= 0.0 then
None
else
let dim = min v1.Length v2.Length
let mutable dist = 0.0

for i in 0 .. (dim - 1) do
let diff =
if v1.[i] > v2.[i] then
v1.[i] - v2.[i]
else
v2.[i] - v1.[i]

let d = diff ** p
dist <- dist + d

if p >= 1.0 then
Some (dist ** (1.0 / p))
else
Some dist

/// <summary>The [Minkowski distance](https://en.wikipedia.org/wiki/Minkowski_distance) between two vectors (ignores NaN) of order `p`.</summary>
/// <remarks>Non-regular differences between the sequences are ignored.
/// The two vectors need not have equal lengths: when one vectors is exhausted any remaining elements in the other vectors are ignored.</remarks>
/// <param name="s1">first vector</param>
/// <param name="s2">second vector</param>
/// <param name="p">float constrained to `p > 0`</param>
/// <returns>Minkowski distance between elements of given vectors.</returns>
/// <example>
/// <code>
/// // e.g. v1 and v2 initialization
/// let v1 = vector [3.14; 2.0; 3.1]
/// let v2 = vector [9.1; 2.5; 3.7]
///
/// // Apply the minkowski distance to v1 and v2
/// Vector.minkowskiNaN v1 v2 3
/// </code>
/// </example>
let inline minkowskiNaN (v1: Vector<float>) (v2: Vector<float>) (p: float) : float option =
if p <= 0.0 then
None
else
let dim = min v1.Length v2.Length
let mutable dist = 0.0

for i in 0 .. (dim - 1) do
let diff = abs (v1.[i] - v2.[i])
let d = diff ** p

if not (isNan d) then
dist <- dist + d

if p >= 1.0 then
Some (dist ** (1.0 / p))
else
Some dist

module Array =

Expand Down Expand Up @@ -240,6 +314,81 @@ module DistanceMetrics =
dist <- dist + System.Math.Abs x
dist

/// <summary>The [Minkowski distance](https://en.wikipedia.org/wiki/Minkowski_distance) between two arrays of order `p`.</summary>
/// <remarks>The two arrays need not have equal lengths: when one array is exhausted any remaining elements in the other array are ignored.</remarks>
/// <param name="s1">first array</param>
/// <param name="s2">second array</param>
/// <param name="p">float constrained to `p > 0`</param>
/// <returns>Minkowski distance between elements of given arrays. Returns NaN if arrays contain NaN.</returns>
/// <example>
/// <code>
/// // e.g. a1 and a2 initialization
/// let a1 = [|3.14; 2.0; 3.1|]
/// let a2 = [|9.1; 2.5; 3.7|]
///
/// // Apply the minkowski distance to a1 and a2
/// Array.minkowski a1 a2 3
/// </code>
/// </example>
let inline minkowski (a1: array<'a>) (a2: array<'a>) (p: float) : float option =
if p <= 0.0 then
None
else
let dim = min a1.Length a2.Length
let mutable dist = 0.0

for i in 0 .. (dim - 1) do
let diff =
if a1.[i] > a2.[i] then
a1.[i] - a2.[i]
else
a2.[i] - a1.[i]

let d = diff ** p
dist <- dist + d

if p >= 1.0 then
Some (dist ** (1.0 / p))
else
Some dist

/// <summary>The [Minkowski distance](https://en.wikipedia.org/wiki/Minkowski_distance) between two arrays (ignores NaN) of order `p`.</summary>
/// <remarks>Non-regular differences between the sequences are ignored.
/// The two arrays need not have equal lengths: when one array is exhausted any remaining elements in the other array are ignored.</remarks>
/// <param name="s1">first array</param>
/// <param name="s2">second array</param>
/// <param name="p">float constrained to `p > 0`</param>
/// <returns>Minkowski distance between elements of given arrays.</returns>
/// <example>
/// <code>
/// // e.g. a1 and a2 initialization
/// let a1 = [|3.14; 2.0; 3.1|]
/// let a2 = [|9.1; 2.5; 3.7|]
///
/// // Apply the minkowski distance to a1 and a2
/// Array.minkowskiNaN a1 a2 3
/// </code>
/// </example>
let inline minkowskiNaN (a1: array<float>) (a2: array<float>) (p: float) : float option =
if p <= 0.0 then
None
else
let dim = min a1.Length a2.Length
let mutable dist = 0.0

for i in 0 .. (dim - 1) do
let diff = abs (a1.[i] - a2.[i])
let d = diff ** p

if not (isNan d) then
dist <- dist + d

if p >= 1.0 then
Some (dist ** (1.0 / p))
else
Some dist


/// <summary>Calculates Hamming distance of two coordinate items</summary>
/// <remarks>Note, distance between Nan and Nan is equal to 1</remarks>
/// <param name="s1">first sequence</param>
Expand Down Expand Up @@ -395,6 +544,89 @@ module DistanceMetrics =
d.[i, j] <- dval; dval
dist (m, n)


/// <summary>The [Minkowski distance](https://en.wikipedia.org/wiki/Minkowski_distance) between two sequence of order `p`.</summary>
/// <remarks>The two sequences need not have equal lengths: when one sequence is exhausted any remaining elements in the other sequence are ignored.</remarks>
/// <param name="s1">first sequence</param>
/// <param name="s2">second sequence</param>
/// <param name="p">float constrained to `p > 0`</param>
/// <returns>Minkowski distance between elements of given sequences. Returns NaN if sequences contain NaN.</returns>
/// <example>
/// <code>
/// // e.g. a1 and a2 initialization
/// let s1 = seq { 3.14; 2.0; 3.1 }
/// let s2 = { 9.1; 2.5; 3.7 }
///
/// // Apply the minkowski distance to s1 and s2
/// minkowski s1 s2 3
/// </code>
/// </example>
let inline minkowski (s1: seq<'a>) (s2: seq<'a>) (p: float) =
if p <=0.0 then
None
else
let dist =
(s1, s2)
||> Seq.zip
|> Seq.fold (fun acc (x, y) ->
let diff =
if x > y then
x - y
else
y - x

let d = diff ** p
acc + d)
0.0

if p >= 1.0 then
Some (dist ** (1.0 / p))
else
Some dist


/// The [Minkowski distance](https://en.wikipedia.org/wiki/Minkowski_distance) between two sequences (ignores NaN) of order `p`.
/// <remarks>Non-regular differences between the sequences are ignored.
/// The two sequences need not have equal lengths: when one sequence is exhausted any remaining elements in the other sequence are ignored.</remarks>
/// <param name="s1">first sequence</param>
/// <param name="s2">second sequence</param>
/// <param name="p">float constrained to `p > 0`</param>
/// <returns>Minkowski distance between elements of given sequences.</returns>
/// <example>
/// <code>
/// // e.g. a1 and a2 initialization
/// let s1 = seq { 3.14; 2.0; 3.1 }
/// let s2 = { 9.1; 2.5; 3.7 }
///
/// // Apply the minkowski distance to s1 and s2
/// minkowskiNaN s1 s2 3
/// </code>
let inline minkowskiNaN (s1: seq<'a>) (s2: seq<'a>) (p: float) =
if p <= 0.0 then
None
else
let dist =
(s1, s2)
||> Seq.zip
|> Seq.fold (fun acc (x, y) ->
let diff =
if x > y then
x - y
else
y - x

let d = diff ** p
if not (nan.Equals d) then
acc + d
else
acc)
0.0

if p >= 1.0 then
Some (dist ** (1.0 / p))
else
Some dist




Expand Down
Loading

0 comments on commit 4719e96

Please sign in to comment.