Skip to content

Commit

Permalink
[fix] [Stats ns] [#67] Don't interpolate percentiles
Browse files Browse the repository at this point in the history
  • Loading branch information
ptaoussanis committed Aug 14, 2023
1 parent dce5e40 commit 4f6e05f
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 28 deletions.
38 changes: 16 additions & 22 deletions src/taoensso/tufte/stats.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,6 @@
#?(:clj (let [c (Class/forName "[J")] (defn longs? "Returns true iff given long array" [x] (instance? c x))))
#?(:clj (let [c (Class/forName "[D")] (defn doubles? "Returns true iff given double array" [x] (instance? c x))))

(defn is-p [x]
(if (enc/pnum? x)
x
(throw
(ex-info "Expected number between 0 and 1"
{:value x :type (type x)}))))

;;;; Sorted nums

(deftype SortedDoubles [^doubles a last]
Expand Down Expand Up @@ -122,7 +115,7 @@

;;;; Percentiles

(defn- double-nth
(defn- weighted-nth
^double [nums ^double idx]
(let [idx-floor (Math/floor idx)
idx-ceil (Math/ceil idx)]
Expand All @@ -138,34 +131,35 @@
(* weight-ceil (double (nth nums (int idx-ceil)))))))))

(defn percentile
"Returns ?double"
"Returns ?double element."
[nums p]
(let [snums (sorted-doubles nums)
max-idx (dec (count snums))]
(when (>= max-idx 0)
(let [idx (* max-idx (double (is-p p)))]
(double-nth snums idx)))))
(nth snums (Math/round (* max-idx (enc/as-pnum! p)))))))

(comment (percentile (range 101) 0.8))

(defn percentiles
"Returns ?[min p25 p50 p75 p90 p95 p99 max] doubles in:
"Returns ?[min p25 p50 p75 p90 p95 p99 max] double elements in:
- O(1) for Sorted types (SortedLongs, SortedDoubles),
- O(n.log_n) otherwise."
[nums]
(let [snums (sorted-doubles nums)
max-idx (dec (count nums))]
(when (>= max-idx 0)
[(double (nth snums 0))
(double-nth snums (* max-idx 0.25))
(double-nth snums (* max-idx 0.50))
(double-nth snums (* max-idx 0.75))
(double-nth snums (* max-idx 0.90))
(double-nth snums (* max-idx 0.95))
(double-nth snums (* max-idx 0.99))
(double (nth snums max-idx))])))

(comment (percentiles (range 101)))
[(nth snums 0)
(nth snums (Math/round (* max-idx 0.25)))
(nth snums (Math/round (* max-idx 0.50)))
(nth snums (Math/round (* max-idx 0.75)))
(nth snums (Math/round (* max-idx 0.90)))
(nth snums (Math/round (* max-idx 0.95)))
(nth snums (Math/round (* max-idx 0.99)))
(nth snums max-idx)])))

(comment
(percentiles (range 101))
(percentiles [1 2 3]))

;;;;

Expand Down
12 changes: 6 additions & 6 deletions test/taoensso/stats_tests.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
(deftest multi-reduce
[(is (= (stats/multi-reduce + 0 - 0 (range 1e4)) [49995000 -49995000]))])

(deftest double-nth
[(is (= (#'stats/double-nth [1 3] 0.5) 2.0))
(is (= (#'stats/double-nth [1 10] 0.5) 5.5))
(is (= (#'stats/double-nth [1 10] 0.75) 7.75))])
(deftest weighted-nth
[(is (= (#'stats/weighted-nth [1 3] 0.5) 2.0))
(is (= (#'stats/weighted-nth [1 10] 0.5) 5.5))
(is (= (#'stats/weighted-nth [1 10] 0.75) 7.75))])

(deftest percentiles
[(is (= (stats/percentiles [1 5 2 4 3]) [1.0 2.0 3.0 4.0 4.6 4.8 4.96 5.0]))
(is (= (stats/percentiles (stats/sorted-doubles [1 5 2 4 3])) [1.0 2.0 3.0 4.0 4.6 4.8 4.96 5.0]))])
[(is (= (stats/percentiles [1 5 2 4 3]) [1.0 2.0 3.0 4.0 5.0 5.0 5.0 5.0]))
(is (= (stats/percentiles (stats/sorted-doubles [1 5 2 4 3])) [1.0 2.0 3.0 4.0 5.0 5.0 5.0 5.0]))])

;;;;

Expand Down

0 comments on commit 4f6e05f

Please sign in to comment.