From 692726097334fcccefb8760d5a7ac18d81a5cb5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20Nohlg=C3=A5rd?= Date: Mon, 5 Mar 2018 08:12:07 +0100 Subject: [PATCH] sys/matstat: Integer mathematical statistics library --- sys/include/matstat.h | 120 ++++++++++++++++++++++++++++++++++++++++++ sys/matstat/Makefile | 1 + sys/matstat/matstat.c | 96 +++++++++++++++++++++++++++++++++ 3 files changed, 217 insertions(+) create mode 100644 sys/include/matstat.h create mode 100644 sys/matstat/Makefile create mode 100644 sys/matstat/matstat.c diff --git a/sys/include/matstat.h b/sys/include/matstat.h new file mode 100644 index 0000000000..74c99df101 --- /dev/null +++ b/sys/include/matstat.h @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2018 Eistec AB + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @defgroup sys_matstat Matstat - Integer mathematical statistics library + * @ingroup sys + * @brief Library for computing 1-pass statistics + * + * The Matstat library uses single pass algorithms to compute statistic measures + * such as mean and variance over many values. The values can be immediately + * discarded after processing, keeping the memory requirement constant + * regardless of how many values need to be processed. + * + * The design goal is to provide basic mathematical statistics operations on + * constrained devices with a "good enough" accuracy to be able to provide some + * descriptive measures of data. For more accurate measures of statistics, use a + * fancier library, or copy the data to a PC. + * + * It is important to know that using integer operations will result in lower + * precision in the computed measures because of truncation. + * + * @{ + * @file + * @brief Matstat library declarations + * + * @author Joakim NohlgÄrd + */ + +#ifndef MATSTAT_H +#define MATSTAT_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Internal state for computing running statistics + */ +typedef struct { + int64_t sum; /**< Sum of values added */ + uint64_t sum_sq; /**< Sum of squared differences */ + uint32_t count; /**< Number of values added */ + int32_t mean; /**< Mean value */ + int32_t min; /**< Minimum value seen */ + int32_t max; /**< Maximum value seen */ +} matstat_state_t; + +/** + * @brief Empty state initializer + */ +#define MATSTAT_STATE_INIT (const matstat_state_t) { \ + .sum = 0, \ + .sum_sq = 0, \ + .count = 0, \ + .mean = 0, \ + .min = INT32_MAX, \ + .max = INT32_MIN, \ + } + +/** + * @brief Reset state + * + * @param[in] state State struct to clear + */ +void matstat_clear(matstat_state_t *state); + +/** + * @brief Add a sample to state + * + * @param[in] state State struct to operate on + * @param[in] value Value to add to the state + */ +void matstat_add(matstat_state_t *state, int32_t value); + +/** + * @brief Return the computed mean value of all samples so far + * + * @param[in] state State struct to operate on + * + * @return arithmetic mean + */ +static inline int32_t matstat_mean(const matstat_state_t *state) +{ + return state->mean; +} + +/** + * @brief Compute the sample variance of all samples so far + * + * @param[in] state State struct to operate on + * + * @return sample variance + */ +uint64_t matstat_variance(const matstat_state_t *state); + +/** + * @brief Combine two states + * + * Add the sums and count of @p src and @p dest, take the maximum of the max + * values and minimum of the min values. The result is written to @p dest. + * + * @param[inout] dest destination state struct + * @param[out] src source state struct + */ +void matstat_merge(matstat_state_t *dest, const matstat_state_t *src); + +#ifdef __cplusplus +} +#endif + +#endif /* MATSTAT_H */ + +/** @} */ diff --git a/sys/matstat/Makefile b/sys/matstat/Makefile new file mode 100644 index 0000000000..48422e909a --- /dev/null +++ b/sys/matstat/Makefile @@ -0,0 +1 @@ +include $(RIOTBASE)/Makefile.base diff --git a/sys/matstat/matstat.c b/sys/matstat/matstat.c new file mode 100644 index 0000000000..35f549c188 --- /dev/null +++ b/sys/matstat/matstat.c @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2018 Eistec AB + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +#include +#include "matstat.h" + +#define ENABLE_DEBUG (0) +#include "debug.h" + +void matstat_clear(matstat_state_t *state) +{ + *state = MATSTAT_STATE_INIT; +} + +void matstat_add(matstat_state_t *state, int32_t value) +{ + if (value > state->max) { + state->max = value; + } + if (value < state->min) { + state->min = value; + } + state->sum += value; + /* Using Welford's algorithm for variance */ + ++state->count; + if (state->count == 1) { + state->sum_sq = 0; + state->mean = value; + } + else { + int32_t new_mean = state->sum / state->count; + int64_t diff = (value - state->mean) * (value - new_mean); + if ((diff < 0) && ((uint64_t)(-diff) > state->sum_sq)) { + /* Handle corner cases where sum_sq becomes negative */ + state->sum_sq = 0; + } + else { + state->sum_sq += diff; + } + state->mean = new_mean; + } +} + +uint64_t matstat_variance(const matstat_state_t *state) +{ + if (state->count < 2) { + /* We don't have any way of returning an error */ + return 0; + } + uint64_t variance = state->sum_sq / (state->count - 1); + DEBUG("Var: (%" PRIu64 " / (%" PRId32 " - 1)) = %" PRIu64 "\n", + state->sum_sq, state->count, variance); + return variance; +} + +void matstat_merge(matstat_state_t *dest, const matstat_state_t *src) +{ + if (src->count == 0) { + /* src is empty, no-op */ + return; + } + if (dest->count == 0) { + /* dest is empty, straight copy */ + *dest = *src; + return; + } + /* Combining the variance of the two samples needs some extra + * handling if the means are different between the two states, + * source: https://stats.stackexchange.com/a/43183 + * (using sum_sq = sigma2 * n, instead of sum_sq = sigma2 * (n-1) to simplify algorithm) + */ + dest->sum_sq = (dest->sum_sq + dest->sum * dest->mean + src->sum_sq + src->sum * src->mean); + dest->count += src->count; + dest->sum += src->sum; + int32_t new_mean = dest->sum / dest->count; + int64_t diff = -new_mean * dest->sum; + if ((diff < 0) && ((uint64_t)(-diff) > dest->sum_sq)) { + /* Handle corner cases where sum_sq becomes negative */ + dest->sum_sq = 0; + } + else { + dest->sum_sq += diff; + } + dest->mean = new_mean; + if (src->max > dest->max) { + dest->max = src->max; + } + if (src->min < dest->min) { + dest->min = src->min; + } +}