From 199140e50b007a5fcd1bd2e7359464109d539952 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joakim=20Nohlg=C3=A5rd?= <joakim.nohlgard@eistec.se>
Date: Fri, 26 Feb 2016 10:03:55 +0100
Subject: [PATCH] sys/div: Add support for big 64 bit numbers

---
 Makefile.dep                               |  1 +
 sys/div/Makefile                           |  1 +
 sys/div/div.c                              | 46 ++++++++++++++
 sys/include/div.h                          | 71 ++++++++++++++--------
 tests/unittests/tests-div/Makefile.include |  1 +
 tests/unittests/tests-div/tests-div.c      | 51 +++++++++++++---
 6 files changed, 137 insertions(+), 34 deletions(-)
 create mode 100644 sys/div/Makefile
 create mode 100644 sys/div/div.c
 create mode 100644 tests/unittests/tests-div/Makefile.include

diff --git a/Makefile.dep b/Makefile.dep
index 1cf4125dea..65411f6db7 100644
--- a/Makefile.dep
+++ b/Makefile.dep
@@ -535,6 +535,7 @@ endif
 
 ifneq (,$(filter xtimer,$(USEMODULE)))
     FEATURES_REQUIRED += periph_timer
+    USEMODULE += div
 endif
 
 ifneq (,$(filter saul_reg,$(USEMODULE)))
diff --git a/sys/div/Makefile b/sys/div/Makefile
new file mode 100644
index 0000000000..48422e909a
--- /dev/null
+++ b/sys/div/Makefile
@@ -0,0 +1 @@
+include $(RIOTBASE)/Makefile.base
diff --git a/sys/div/div.c b/sys/div/div.c
new file mode 100644
index 0000000000..95b171ed16
--- /dev/null
+++ b/sys/div/div.c
@@ -0,0 +1,46 @@
+/**
+ * Copyright (C) 2016 Eistec AB
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License v2.1. See the file LICENSE in the top level
+ * directory for more details.
+ *
+ * @ingroup sys_util
+ * @{
+ * @file
+ * @brief    Integer division function implementations
+ *
+ * @author   Joakim Nohlgård <joakim.nohlgard@eistec.se>
+ *
+ * @}
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include "div.h"
+
+uint64_t _div_mulhi64(const uint64_t a, const uint64_t b)
+{
+    /* Handle overflow explicit because we don't have 128 bit integers on
+     * our platforms. */
+    const uint32_t a_lo = (const uint32_t)a;
+    const uint32_t a_hi = (const uint32_t)(a >> 32);
+    const uint32_t b_lo = (const uint32_t)b;
+    const uint32_t b_hi = (const uint32_t)(b >> 32);
+
+    const uint64_t a_x_b_mid = (const uint64_t)a_hi * b_lo;
+    const uint64_t b_x_a_mid = (const uint64_t)b_hi * a_lo;
+    const uint64_t a_x_b_lo =  (const uint64_t)a_lo * b_lo;
+    const uint64_t a_x_b_hi =  (const uint64_t)a_hi * b_hi;
+
+    /* We may get up to 2 carry bits from the lower part of the multiplication */
+    const uint32_t carry_bits = ((uint64_t)(uint32_t)a_x_b_mid +
+                          (uint64_t)(uint32_t)b_x_a_mid +
+                          (a_x_b_lo >> 32) ) >> 32;
+
+    const uint64_t multhi = a_x_b_hi +
+                      (a_x_b_mid >> 32) + (b_x_a_mid >> 32) +
+                      carry_bits;
+
+    return multhi;
+}
diff --git a/sys/include/div.h b/sys/include/div.h
index df8f81988e..1870e3e2c9 100644
--- a/sys/include/div.h
+++ b/sys/include/div.h
@@ -8,7 +8,7 @@
  */
 
 /**
- * @brief     Provides integer division functions
+ * @brief     Integer division functions
  *
  * This header provides some integer division functions that can be used
  * to prevent linking in compiler-generated ones, which are often larger.
@@ -31,35 +31,58 @@ extern "C" {
 #endif
 
 /**
- * @brief Integer divide val by 15625
+ * @brief Approximation of (2**l)/d for d=15625, l=12, 32 bits
+ */
+#define DIV_H_INV_15625_32    0x431bde83ul
+
+/**
+ * @brief Approximation of (2**l)/d for d=15625, l=12, 64 bits
+ */
+#define DIV_H_INV_15625_64    0x431bde82d7b634dbull
+
+/**
+ * @brief Required shifts for division by 15625, l above
+ */
+#define DIV_H_INV_15625_SHIFT 12
+
+/**
+ * @internal
+ * @brief Multiply two 64 bit integers into a 128 bit integer and return the upper half.
  *
- * @pre val <= 16383999997
+ * The implementation only uses 64 bit integers internally, no __int128 support
+ * is necessary.
+ *
+ * @see http://stackoverflow.com/questions/28868367/getting-the-high-part-of-64-bit-integer-multiplication
+
+ * @param[in]   a     operand a
+ * @param[in]   b     operand b
+ * @return      (((uint128_t)a * b) >> 64)
+ */
+uint64_t _div_mulhi64(const uint64_t a, const uint64_t b);
+
+/**
+ * @brief Integer divide val by 15625, 64 bit version
  *
  * @param[in]   val     dividend
  * @return      (val / 15625)
  */
 static inline uint64_t div_u64_by_15625(uint64_t val)
 {
-    /* a higher value would overflow 2^64 in the multiplication that follows */
-    assert(val <= 16383999997ull);
-
-    return (val * 0x431bde83UL) >> (12 + 32);
+    if (val > 16383999997ull) {
+        return (_div_mulhi64(DIV_H_INV_15625_64, val) >> DIV_H_INV_15625_SHIFT);
+    }
+    return (val * DIV_H_INV_15625_32) >> (DIV_H_INV_15625_SHIFT + 32);
 }
 
 /**
  * @brief Integer divide val by 1000000
  *
- * @pre val <= 1048575999808
- *
  * @param[in]   val     dividend
  * @return      (val / 1000000)
  */
 static inline uint64_t div_u64_by_1000000(uint64_t val)
 {
-    /* a higher value would overflow 2^64 in the multiplication that follows */
-    assert(val <= 1048575999808ull);
-
-    return div_u64_by_15625(val>>6);
+    return div_u64_by_15625(val) >> 6;
 }
 
 /**
@@ -68,15 +91,17 @@ static inline uint64_t div_u64_by_1000000(uint64_t val)
  * This is used to quantize a 1MHz value to the closest 32768Hz value,
  * e.g., for timers.
  *
- * The algorithm actually multiplies by 512 first, then divides by 15625,
- * keeping the result closer to a floored floating point division.
+ * The algorithm uses the modular multiplicative inverse of 15625 to use only
+ * multiplication and bit shifts to perform the division.
+ *
+ * The result will be equal to the mathematical expression: floor((val * 512) / 15625)
  *
  * @param[in]   val     dividend
  * @return      (val / (15625/512))
  */
 static inline uint32_t div_u32_by_15625div512(uint32_t val)
 {
-    return ((uint64_t)(val) * 0x431bde83ul) >> (12 + 32 - 9);
+    return ((uint64_t)(val) * DIV_H_INV_15625_32) >> (DIV_H_INV_15625_SHIFT + 32 - 9);
 }
 
 /**
@@ -85,23 +110,21 @@ static inline uint32_t div_u32_by_15625div512(uint32_t val)
  * This is used to quantize a 1MHz value to the closest 32768Hz value,
  * e.g., for timers.
  *
- * The algorithm actually multiplies by 512 first, then divides by 15625,
- * keeping the result closer to a floored floating point division.
- *
- * @pre val <= 16383999997
- *
  * @param[in]   val     dividend
  * @return      (val / (15625/512))
  */
 static inline uint64_t div_u64_by_15625div512(uint64_t val)
 {
-    /* a higher value would overflow 2^64 in the multiplication that follows */
-    assert(val <= 16383999997ull);
     /*
      * This saves around 1400 bytes of ROM on Cortex-M platforms (both ARMv6 and
      * ARMv7) from avoiding linking against __aeabi_uldivmod and related helpers
      */
-    return ((uint64_t)(val) * 0x431bde83ul) >> (12 + 32 - 9);
+    if (val > 16383999997ull) {
+        /* this would overflow 2^64 in the multiplication that follows, need to
+         * use the long version */
+        return (_div_mulhi64(DIV_H_INV_15625_64, val) >> (DIV_H_INV_15625_SHIFT - 9));
+    }
+    return (val * DIV_H_INV_15625_32) >> (DIV_H_INV_15625_SHIFT + 32 - 9);
 }
 
 /**
diff --git a/tests/unittests/tests-div/Makefile.include b/tests/unittests/tests-div/Makefile.include
new file mode 100644
index 0000000000..f0e4b6a573
--- /dev/null
+++ b/tests/unittests/tests-div/Makefile.include
@@ -0,0 +1 @@
+USEMODULE += div
diff --git a/tests/unittests/tests-div/tests-div.c b/tests/unittests/tests-div/tests-div.c
index b568263019..85010e0566 100644
--- a/tests/unittests/tests-div/tests-div.c
+++ b/tests/unittests/tests-div/tests-div.c
@@ -15,7 +15,7 @@
 #define ENABLE_DEBUG (0)
 #include "debug.h"
 
-static uint32_t u32_test_values[] = {
+static const uint32_t u32_test_values[] = {
     0,
     1,
     10,
@@ -28,26 +28,57 @@ static uint32_t u32_test_values[] = {
     0xffffffff,
 };
 
-static uint64_t u64_test_values[] = {
-    16383999997ull,
+static const uint64_t u64_test_values[] = {
     11111111111ull,
     0xffffffffull+1,
+    16383999997ull,
+    16383999998ull,
+    16383999999ull,
+    16384000000ull,
+    1048575999807ull,
+    1048575999808ull,
+    1048575999809ull,
+    0xffffffffffffeeull,
+    0x777777777777777ull,
+    0x1111111111111111ull,
+    0xffffffffffffffffull,
+    0x8000000000000000ull,
 };
 
-#define N_U32_VALS (sizeof(u32_test_values)/sizeof(uint32_t))
-#define N_U64_VALS (sizeof(u64_test_values)/sizeof(uint64_t))
+/* These expected values for test_div_u64_by_15625div512
+ * were computed from the expression (u64_test_values * 512) / 15625 using 128
+ * bit integers. */
+static const uint64_t u64_15625_512_expected_values[] = {
+    364088888,
+    140737488,
+    536870911,
+    536870911,
+    536870911,
+    536870912,
+    34359738361,
+    34359738361,
+    34359738361,
+    2361183241434822,
+    17630168202713342,
+    40297527320487639,
+    604462909807314587,
+    302231454903657293,
+};
+
+#define N_U32_VALS (sizeof(u32_test_values)/sizeof(u32_test_values[0]))
+#define N_U64_VALS (sizeof(u64_test_values)/sizeof(u64_test_values[0]))
 
 static void test_div_u64_by_15625(void)
 {
     for (unsigned i = 0; i < N_U32_VALS; i++) {
-        DEBUG("Dividing %"PRIu32" by 15625...\n", u32_test_values[i]);
+        DEBUG("Dividing %12"PRIu32" by 15625...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
-            u32_test_values[i] / 15625,
+            (uint64_t)u32_test_values[i] / 15625,
             div_u64_by_15625(u32_test_values[i]));
     }
 
     for (unsigned i = 0; i < N_U64_VALS; i++) {
-        DEBUG("Dividing %"PRIu64" by 15625...\n", u64_test_values[i]);
+        DEBUG("Dividing %12"PRIu64" by 15625...\n", u64_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
             (uint64_t)u64_test_values[i] / 15625,
             div_u64_by_15625(u64_test_values[i]));
@@ -69,7 +100,7 @@ static void test_div_u64_by_1000000(void)
     for (unsigned i = 0; i < N_U32_VALS; i++) {
         DEBUG("Dividing %"PRIu32" by 1000000...\n", u32_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
-            u32_test_values[i] / 1000000lu,
+            (uint64_t)u32_test_values[i] / 1000000lu,
             div_u64_by_1000000(u32_test_values[i]));
     }
 
@@ -93,7 +124,7 @@ static void test_div_u64_by_15625div512(void)
     for (unsigned i = 0; i < N_U64_VALS; i++) {
         DEBUG("Dividing %"PRIu64" by (15625/512)...\n", u64_test_values[i]);
         TEST_ASSERT_EQUAL_INT(
-            (uint64_t)u64_test_values[i] * 512lu / 15625,
+            u64_15625_512_expected_values[i],
             div_u64_by_15625div512(u64_test_values[i]));
     }
 }