From ff07386850366c58162a5ebaa102b5ca4d5fb477 Mon Sep 17 00:00:00 2001
From: Sampsa Kiiskinen <tuplanolla@gmail.com>
Date: Sun, 16 Jul 2017 16:10:52 +0300
Subject: [PATCH] Power tower.

---
 cpp.h   |   4 ++
 size.c  |  12 ++---
 size.h  | 163 ++++++++++++++++++++++++++++++++------------------------
 tests.c |  18 +++++++
 4 files changed, 122 insertions(+), 75 deletions(-)

diff --git a/cpp.h b/cpp.h
index 2c450ed..52060fc 100644
--- a/cpp.h
+++ b/cpp.h
@@ -6,6 +6,10 @@
 /// expands to the version number string `"x.y.z"`.
 #define BMM_VERSION(x, y, z) #x "." #y "." #z
 
+/// The preprocessor directive `BMM_TESTBIT(x, n)`
+/// checks whether the bit `n` is set in `x`.
+#define BMM_TESTBIT(x, n) (((x) & 1 << (n)) != 0)
+
 /// The preprocessor directive `BMM_MIN(x, y)`
 /// expands to the lesser of `x` and `y`.
 #define BMM_MIN(x, y) ((x) < (y) ? (x) : (y))
diff --git a/size.c b/size.c
index c684c9f..c2c3215 100644
--- a/size.c
+++ b/size.c
@@ -15,8 +15,6 @@ extern inline size_t bmm_size_min(size_t, size_t);
 
 extern inline size_t bmm_size_max(size_t, size_t);
 
-extern inline size_t bmm_size_pow(size_t, size_t);
-
 extern inline size_t bmm_size_identity(size_t);
 
 extern inline size_t bmm_size_constant(size_t, size_t);
@@ -31,14 +29,16 @@ extern inline size_t bmm_size_sq(size_t);
 
 extern inline size_t bmm_size_cb(size_t);
 
-extern inline size_t bmm_size_firt(size_t, size_t);
-
-extern inline size_t bmm_size_cirt(size_t, size_t);
-
 extern inline size_t bmm_size_flog(size_t, size_t);
 
 extern inline size_t bmm_size_clog(size_t, size_t);
 
+extern inline size_t bmm_size_pow(size_t, size_t);
+
+extern inline size_t bmm_size_firt(size_t, size_t);
+
+extern inline size_t bmm_size_cirt(size_t, size_t);
+
 extern inline size_t bmm_size_uclamp(size_t, size_t);
 
 extern inline size_t bmm_size_wrap(size_t, size_t, size_t);
diff --git a/size.h b/size.h
index e1ff794..e435160 100644
--- a/size.h
+++ b/size.h
@@ -6,6 +6,7 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#include "cpp.h"
 #include "ext.h"
 
 /// This structure holds the quotient and remainder of a division
@@ -70,18 +71,6 @@ inline size_t bmm_size_max(size_t const n, size_t const k) {
   return n > k ? n : k;
 }
 
-/// The call `bmm_size_pow(n, k)` returns `n` raised to the power `k`.
-/// This is analogous to `pow`.
-__attribute__ ((__const__, __pure__))
-inline size_t bmm_size_pow(size_t const n, size_t const k) {
-  size_t m = 1;
-
-  for (size_t i = 0; i < k; ++i)
-    m *= n;
-
-  return m;
-}
-
 /// The call `bmm_size_identity(n)` returns `n`.
 /// This is analogous to `bmm_fp_identity`.
 __attribute__ ((__const__, __pure__))
@@ -135,6 +124,75 @@ inline size_t bmm_size_cb(size_t const n) {
   return n * n * n;
 }
 
+/// The call `bmm_size_flog(n, k)`
+/// returns the floor of the base `k` logarithm of `n`.
+/// If `n == 0` or `k < 1`, the behavior is undefined.
+/// Overflows are handled appropriately.
+/// This is analogous to `bmm_fp_log`.
+#ifndef DEBUG
+__attribute__ ((__const__, __pure__))
+#endif
+inline size_t bmm_size_flog(size_t n, size_t const k) {
+#ifdef DEBUG
+  // These do not work together with the attributes.
+  dynamic_assert(n > 0, "Invalid argument");
+  dynamic_assert(k > 1, "Invalid base");
+#endif
+
+  size_t m = 0;
+
+  while (n >= k) {
+    n /= k;
+    ++m;
+  }
+
+  return m;
+}
+
+/// The call `bmm_size_clog(n, k)`
+/// returns the ceiling of the base `k` logarithm of `n`.
+/// If `n == 0` or `k < 1`, the behavior is undefined.
+/// Overflows are handled appropriately.
+/// This is analogous to `bmm_fp_log`.
+#ifndef DEBUG
+__attribute__ ((__const__, __pure__))
+#endif
+inline size_t bmm_size_clog(size_t const n, size_t const k) {
+#ifdef DEBUG
+  // These do not work together with the attributes.
+  dynamic_assert(n > 0, "Invalid argument");
+  dynamic_assert(k > 1, "Invalid base");
+#endif
+
+  return n <= 1 ? 0 : bmm_size_flog(n - 1, k) + 1;
+}
+
+/// The call `bmm_size_pow(n, k)` returns `n` raised to the power `k`.
+/// This is analogous to `bmm_fp_pow`.
+__attribute__ ((__const__, __pure__))
+inline size_t bmm_size_pow(size_t const n, size_t const k) {
+  if (k == 0)
+    return 1;
+
+  size_t m = 1;
+
+  size_t const r = bmm_size_flog(k, 2) + 1;
+  for (size_t i = 0, p = n; i < r; ++i, p = BMM_POW(p, 2))
+    if (BMM_TESTBIT(k, i))
+      m *= p;
+
+  return m;
+
+  // The following implementation is less complicated,
+  // but slower for large powers.
+  // size_t m = 1;
+  //
+  // for (size_t i = 0; i < k; ++i)
+  //   m *= n;
+  //
+  // return m;
+}
+
 /// The call `bmm_size_firt(n, k)`
 /// returns the floor of the `k`th root of `n`.
 /// This is analogous to `bmm_fp_rt`.
@@ -166,37 +224,6 @@ inline size_t bmm_size_cirt(size_t const n, size_t const k) {
   return n <= 1 ? n : bmm_size_firt(n - 1, k) + 1;
 }
 
-/// The call `bmm_size_flog(n, k)`
-/// returns the floor of the base `k` logarithm of `n`.
-/// This is analogous to `bmm_fp_log`.
-__attribute__ ((__const__, __pure__))
-inline size_t bmm_size_flog(size_t n, size_t const k) {
-  // These do not work because of the attributes.
-  // dynamic_assert(n <= 0, "invalid argument");
-  // dynamic_assert(k <= 1, "invalid base");
-
-  size_t m = 0;
-
-  while (n >= k) {
-    n /= k;
-    ++m;
-  }
-
-  return m;
-}
-
-/// The call `bmm_size_clog(n, k)`
-/// returns the ceiling of the base `k` logarithm of `n`.
-/// This is analogous to `bmm_fp_log`.
-__attribute__ ((__const__, __pure__))
-inline size_t bmm_size_clog(size_t const n, size_t const k) {
-  // These do not work because of the attributes.
-  // dynamic_assert(n <= 0, "invalid argument");
-  // dynamic_assert(k <= 1, "invalid base");
-
-  return n <= 1 ? 0 : bmm_size_flog(n - 1, k) + 1;
-}
-
 /// The call `bmm_size_uclamp(n, b)` returns
 ///
 /// * `n` if `0 <= n < b` and
@@ -216,7 +243,11 @@ __attribute__ ((__const__, __pure__))
 inline size_t bmm_size_wrap(size_t const n, size_t const a, size_t const b) {
   size_t const c = b - a;
 
-  // This reference implementation is very slow.
+  return (n % c + c - a % c) % c + a;
+
+  // The following implementation is a lot slower, but easier to understand.
+  // size_t const c = b - a;
+  //
   // size_t k = n;
   //
   // if (k < a)
@@ -229,8 +260,6 @@ inline size_t bmm_size_wrap(size_t const n, size_t const a, size_t const b) {
   //   while (k >= b);
   //
   // return k;
-
-  return (n % c + c - a % c) % c + a;
 }
 
 /// The call `m = bmm_size_uwrap(n, b)`
@@ -388,32 +417,28 @@ inline void bmm_size_hcd(size_t* restrict const pij,
     pij[ndim - 1 - idim] = qr.rem;
   }
 
-  // The following implementation is slower, but suitable for loop fusion.
-  /*
-  for (size_t idim = 0; idim < ndim; ++idim) {
-    bmm_size_div_t qr = {.quot = i, .rem = 0};
-    for (size_t jdim = 0; jdim < ndim - idim; ++jdim)
-      qr = bmm_size_div(qr.quot, nper[ndim - 1 - jdim]);
-
-    pij[idim] = qr.rem;
-  }
-  */
-
   // The following implementation is less reliable,
   // but suitable for loop fusion.
-  /*
-  size_t* const buf = alloca(ndim * sizeof *pij);
-
-  bmm_size_div_t qr = {.quot = i, .rem = 0};
-  for (size_t idim = 0; idim < ndim; ++idim) {
-    qr = bmm_size_div(qr.quot, nper[ndim - 1 - idim]);
-
-    buf[ndim - 1 - idim] = qr.rem;
-  }
+  // size_t* const buf = alloca(ndim * sizeof *buf);
+  //
+  // bmm_size_div_t qr = {.quot = i, .rem = 0};
+  // for (size_t idim = 0; idim < ndim; ++idim) {
+  //   qr = bmm_size_div(qr.quot, nper[ndim - 1 - idim]);
+  //
+  //   buf[ndim - 1 - idim] = qr.rem;
+  // }
+  //
+  // for (size_t idim = 0; idim < ndim; ++idim)
+  //   pij[idim] = buf[idim];
 
-  for (size_t idim = 0; idim < ndim; ++idim)
-    pij[idim] = buf[idim];
-  */
+  // The following implementation is slower, but suitable for loop fusion.
+  // for (size_t idim = 0; idim < ndim; ++idim) {
+  //   bmm_size_div_t qr = {.quot = i, .rem = 0};
+  //   for (size_t jdim = 0; jdim < ndim - idim; ++jdim)
+  //     qr = bmm_size_div(qr.quot, nper[ndim - 1 - jdim]);
+  //
+  //   pij[idim] = qr.rem;
+  // }
 }
 
 /// The call `bmm_size_unhc(ij, ndim, nper)`
diff --git a/tests.c b/tests.c
index a70cbdd..b996920 100644
--- a/tests.c
+++ b/tests.c
@@ -37,6 +37,24 @@ CHEAT_TEST(size_fact2,
   cheat_assert_size(bmm_size_fact(5, 2), 15);
 )
 
+CHEAT_TEST(size_flog,
+  cheat_assert_size(bmm_size_flog(1, 2), 0);
+  cheat_assert_size(bmm_size_flog(2, 2), 1);
+  cheat_assert_size(bmm_size_flog(3, 2), 1);
+  cheat_assert_size(bmm_size_flog(4, 2), 2);
+  cheat_assert_size(bmm_size_flog(5, 2), 2);
+  cheat_assert_size(bmm_size_flog(6, 2), 2);
+)
+
+CHEAT_TEST(size_clog,
+  cheat_assert_size(bmm_size_clog(1, 2), 0);
+  cheat_assert_size(bmm_size_clog(2, 2), 1);
+  cheat_assert_size(bmm_size_clog(3, 2), 2);
+  cheat_assert_size(bmm_size_clog(4, 2), 2);
+  cheat_assert_size(bmm_size_clog(5, 2), 3);
+  cheat_assert_size(bmm_size_clog(6, 2), 3);
+)
+
 CHEAT_TEST(geom2d_shell_inside,
   double const x[] = {0.5, 0.5};
   double const r = 1.0 / sqrt(3.0);