1
0
mirror of https://github.com/RIOT-OS/RIOT.git synced 2024-12-29 04:50:03 +01:00

sys/matstat: Integer mathematical statistics library

This commit is contained in:
Joakim Nohlgård 2018-03-05 08:12:07 +01:00
parent ded2a44c77
commit 6927260973
3 changed files with 217 additions and 0 deletions

120
sys/include/matstat.h Normal file
View File

@ -0,0 +1,120 @@
/*
* Copyright (C) 2018 Eistec AB
*
* This file is subject to the terms and conditions of the GNU Lesser
* General Public License v2.1. See the file LICENSE in the top level
* directory for more details.
*/
/**
* @defgroup sys_matstat Matstat - Integer mathematical statistics library
* @ingroup sys
* @brief Library for computing 1-pass statistics
*
* The Matstat library uses single pass algorithms to compute statistic measures
* such as mean and variance over many values. The values can be immediately
* discarded after processing, keeping the memory requirement constant
* regardless of how many values need to be processed.
*
* The design goal is to provide basic mathematical statistics operations on
* constrained devices with a "good enough" accuracy to be able to provide some
* descriptive measures of data. For more accurate measures of statistics, use a
* fancier library, or copy the data to a PC.
*
* It is important to know that using integer operations will result in lower
* precision in the computed measures because of truncation.
*
* @{
* @file
* @brief Matstat library declarations
*
* @author Joakim Nohlgård <joakim.nohlgard@eistec.se>
*/
#ifndef MATSTAT_H
#define MATSTAT_H
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief Internal state for computing running statistics
*/
typedef struct {
int64_t sum; /**< Sum of values added */
uint64_t sum_sq; /**< Sum of squared differences */
uint32_t count; /**< Number of values added */
int32_t mean; /**< Mean value */
int32_t min; /**< Minimum value seen */
int32_t max; /**< Maximum value seen */
} matstat_state_t;
/**
* @brief Empty state initializer
*/
#define MATSTAT_STATE_INIT (const matstat_state_t) { \
.sum = 0, \
.sum_sq = 0, \
.count = 0, \
.mean = 0, \
.min = INT32_MAX, \
.max = INT32_MIN, \
}
/**
* @brief Reset state
*
* @param[in] state State struct to clear
*/
void matstat_clear(matstat_state_t *state);
/**
* @brief Add a sample to state
*
* @param[in] state State struct to operate on
* @param[in] value Value to add to the state
*/
void matstat_add(matstat_state_t *state, int32_t value);
/**
* @brief Return the computed mean value of all samples so far
*
* @param[in] state State struct to operate on
*
* @return arithmetic mean
*/
static inline int32_t matstat_mean(const matstat_state_t *state)
{
return state->mean;
}
/**
* @brief Compute the sample variance of all samples so far
*
* @param[in] state State struct to operate on
*
* @return sample variance
*/
uint64_t matstat_variance(const matstat_state_t *state);
/**
* @brief Combine two states
*
* Add the sums and count of @p src and @p dest, take the maximum of the max
* values and minimum of the min values. The result is written to @p dest.
*
* @param[inout] dest destination state struct
* @param[out] src source state struct
*/
void matstat_merge(matstat_state_t *dest, const matstat_state_t *src);
#ifdef __cplusplus
}
#endif
#endif /* MATSTAT_H */
/** @} */

1
sys/matstat/Makefile Normal file
View File

@ -0,0 +1 @@
include $(RIOTBASE)/Makefile.base

96
sys/matstat/matstat.c Normal file
View File

@ -0,0 +1,96 @@
/*
* Copyright (C) 2018 Eistec AB
*
* This file is subject to the terms and conditions of the GNU Lesser
* General Public License v2.1. See the file LICENSE in the top level
* directory for more details.
*/
#include <stdint.h>
#include "matstat.h"
#define ENABLE_DEBUG (0)
#include "debug.h"
void matstat_clear(matstat_state_t *state)
{
*state = MATSTAT_STATE_INIT;
}
void matstat_add(matstat_state_t *state, int32_t value)
{
if (value > state->max) {
state->max = value;
}
if (value < state->min) {
state->min = value;
}
state->sum += value;
/* Using Welford's algorithm for variance */
++state->count;
if (state->count == 1) {
state->sum_sq = 0;
state->mean = value;
}
else {
int32_t new_mean = state->sum / state->count;
int64_t diff = (value - state->mean) * (value - new_mean);
if ((diff < 0) && ((uint64_t)(-diff) > state->sum_sq)) {
/* Handle corner cases where sum_sq becomes negative */
state->sum_sq = 0;
}
else {
state->sum_sq += diff;
}
state->mean = new_mean;
}
}
uint64_t matstat_variance(const matstat_state_t *state)
{
if (state->count < 2) {
/* We don't have any way of returning an error */
return 0;
}
uint64_t variance = state->sum_sq / (state->count - 1);
DEBUG("Var: (%" PRIu64 " / (%" PRId32 " - 1)) = %" PRIu64 "\n",
state->sum_sq, state->count, variance);
return variance;
}
void matstat_merge(matstat_state_t *dest, const matstat_state_t *src)
{
if (src->count == 0) {
/* src is empty, no-op */
return;
}
if (dest->count == 0) {
/* dest is empty, straight copy */
*dest = *src;
return;
}
/* Combining the variance of the two samples needs some extra
* handling if the means are different between the two states,
* source: https://stats.stackexchange.com/a/43183
* (using sum_sq = sigma2 * n, instead of sum_sq = sigma2 * (n-1) to simplify algorithm)
*/
dest->sum_sq = (dest->sum_sq + dest->sum * dest->mean + src->sum_sq + src->sum * src->mean);
dest->count += src->count;
dest->sum += src->sum;
int32_t new_mean = dest->sum / dest->count;
int64_t diff = -new_mean * dest->sum;
if ((diff < 0) && ((uint64_t)(-diff) > dest->sum_sq)) {
/* Handle corner cases where sum_sq becomes negative */
dest->sum_sq = 0;
}
else {
dest->sum_sq += diff;
}
dest->mean = new_mean;
if (src->max > dest->max) {
dest->max = src->max;
}
if (src->min < dest->min) {
dest->min = src->min;
}
}