mirror of
https://github.com/RIOT-OS/RIOT.git
synced 2024-12-29 04:50:03 +01:00
sys/matstat: Integer mathematical statistics library
This commit is contained in:
parent
ded2a44c77
commit
6927260973
120
sys/include/matstat.h
Normal file
120
sys/include/matstat.h
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2018 Eistec AB
|
||||||
|
*
|
||||||
|
* This file is subject to the terms and conditions of the GNU Lesser
|
||||||
|
* General Public License v2.1. See the file LICENSE in the top level
|
||||||
|
* directory for more details.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @defgroup sys_matstat Matstat - Integer mathematical statistics library
|
||||||
|
* @ingroup sys
|
||||||
|
* @brief Library for computing 1-pass statistics
|
||||||
|
*
|
||||||
|
* The Matstat library uses single pass algorithms to compute statistic measures
|
||||||
|
* such as mean and variance over many values. The values can be immediately
|
||||||
|
* discarded after processing, keeping the memory requirement constant
|
||||||
|
* regardless of how many values need to be processed.
|
||||||
|
*
|
||||||
|
* The design goal is to provide basic mathematical statistics operations on
|
||||||
|
* constrained devices with a "good enough" accuracy to be able to provide some
|
||||||
|
* descriptive measures of data. For more accurate measures of statistics, use a
|
||||||
|
* fancier library, or copy the data to a PC.
|
||||||
|
*
|
||||||
|
* It is important to know that using integer operations will result in lower
|
||||||
|
* precision in the computed measures because of truncation.
|
||||||
|
*
|
||||||
|
* @{
|
||||||
|
* @file
|
||||||
|
* @brief Matstat library declarations
|
||||||
|
*
|
||||||
|
* @author Joakim Nohlgård <joakim.nohlgard@eistec.se>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MATSTAT_H
|
||||||
|
#define MATSTAT_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Internal state for computing running statistics
|
||||||
|
*/
|
||||||
|
typedef struct {
|
||||||
|
int64_t sum; /**< Sum of values added */
|
||||||
|
uint64_t sum_sq; /**< Sum of squared differences */
|
||||||
|
uint32_t count; /**< Number of values added */
|
||||||
|
int32_t mean; /**< Mean value */
|
||||||
|
int32_t min; /**< Minimum value seen */
|
||||||
|
int32_t max; /**< Maximum value seen */
|
||||||
|
} matstat_state_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Empty state initializer
|
||||||
|
*/
|
||||||
|
#define MATSTAT_STATE_INIT (const matstat_state_t) { \
|
||||||
|
.sum = 0, \
|
||||||
|
.sum_sq = 0, \
|
||||||
|
.count = 0, \
|
||||||
|
.mean = 0, \
|
||||||
|
.min = INT32_MAX, \
|
||||||
|
.max = INT32_MIN, \
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Reset state
|
||||||
|
*
|
||||||
|
* @param[in] state State struct to clear
|
||||||
|
*/
|
||||||
|
void matstat_clear(matstat_state_t *state);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Add a sample to state
|
||||||
|
*
|
||||||
|
* @param[in] state State struct to operate on
|
||||||
|
* @param[in] value Value to add to the state
|
||||||
|
*/
|
||||||
|
void matstat_add(matstat_state_t *state, int32_t value);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return the computed mean value of all samples so far
|
||||||
|
*
|
||||||
|
* @param[in] state State struct to operate on
|
||||||
|
*
|
||||||
|
* @return arithmetic mean
|
||||||
|
*/
|
||||||
|
static inline int32_t matstat_mean(const matstat_state_t *state)
|
||||||
|
{
|
||||||
|
return state->mean;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Compute the sample variance of all samples so far
|
||||||
|
*
|
||||||
|
* @param[in] state State struct to operate on
|
||||||
|
*
|
||||||
|
* @return sample variance
|
||||||
|
*/
|
||||||
|
uint64_t matstat_variance(const matstat_state_t *state);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Combine two states
|
||||||
|
*
|
||||||
|
* Add the sums and count of @p src and @p dest, take the maximum of the max
|
||||||
|
* values and minimum of the min values. The result is written to @p dest.
|
||||||
|
*
|
||||||
|
* @param[inout] dest destination state struct
|
||||||
|
* @param[out] src source state struct
|
||||||
|
*/
|
||||||
|
void matstat_merge(matstat_state_t *dest, const matstat_state_t *src);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* MATSTAT_H */
|
||||||
|
|
||||||
|
/** @} */
|
1
sys/matstat/Makefile
Normal file
1
sys/matstat/Makefile
Normal file
@ -0,0 +1 @@
|
|||||||
|
include $(RIOTBASE)/Makefile.base
|
96
sys/matstat/matstat.c
Normal file
96
sys/matstat/matstat.c
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2018 Eistec AB
|
||||||
|
*
|
||||||
|
* This file is subject to the terms and conditions of the GNU Lesser
|
||||||
|
* General Public License v2.1. See the file LICENSE in the top level
|
||||||
|
* directory for more details.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include "matstat.h"
|
||||||
|
|
||||||
|
#define ENABLE_DEBUG (0)
|
||||||
|
#include "debug.h"
|
||||||
|
|
||||||
|
void matstat_clear(matstat_state_t *state)
|
||||||
|
{
|
||||||
|
*state = MATSTAT_STATE_INIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
void matstat_add(matstat_state_t *state, int32_t value)
|
||||||
|
{
|
||||||
|
if (value > state->max) {
|
||||||
|
state->max = value;
|
||||||
|
}
|
||||||
|
if (value < state->min) {
|
||||||
|
state->min = value;
|
||||||
|
}
|
||||||
|
state->sum += value;
|
||||||
|
/* Using Welford's algorithm for variance */
|
||||||
|
++state->count;
|
||||||
|
if (state->count == 1) {
|
||||||
|
state->sum_sq = 0;
|
||||||
|
state->mean = value;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int32_t new_mean = state->sum / state->count;
|
||||||
|
int64_t diff = (value - state->mean) * (value - new_mean);
|
||||||
|
if ((diff < 0) && ((uint64_t)(-diff) > state->sum_sq)) {
|
||||||
|
/* Handle corner cases where sum_sq becomes negative */
|
||||||
|
state->sum_sq = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
state->sum_sq += diff;
|
||||||
|
}
|
||||||
|
state->mean = new_mean;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t matstat_variance(const matstat_state_t *state)
|
||||||
|
{
|
||||||
|
if (state->count < 2) {
|
||||||
|
/* We don't have any way of returning an error */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
uint64_t variance = state->sum_sq / (state->count - 1);
|
||||||
|
DEBUG("Var: (%" PRIu64 " / (%" PRId32 " - 1)) = %" PRIu64 "\n",
|
||||||
|
state->sum_sq, state->count, variance);
|
||||||
|
return variance;
|
||||||
|
}
|
||||||
|
|
||||||
|
void matstat_merge(matstat_state_t *dest, const matstat_state_t *src)
|
||||||
|
{
|
||||||
|
if (src->count == 0) {
|
||||||
|
/* src is empty, no-op */
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (dest->count == 0) {
|
||||||
|
/* dest is empty, straight copy */
|
||||||
|
*dest = *src;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
/* Combining the variance of the two samples needs some extra
|
||||||
|
* handling if the means are different between the two states,
|
||||||
|
* source: https://stats.stackexchange.com/a/43183
|
||||||
|
* (using sum_sq = sigma2 * n, instead of sum_sq = sigma2 * (n-1) to simplify algorithm)
|
||||||
|
*/
|
||||||
|
dest->sum_sq = (dest->sum_sq + dest->sum * dest->mean + src->sum_sq + src->sum * src->mean);
|
||||||
|
dest->count += src->count;
|
||||||
|
dest->sum += src->sum;
|
||||||
|
int32_t new_mean = dest->sum / dest->count;
|
||||||
|
int64_t diff = -new_mean * dest->sum;
|
||||||
|
if ((diff < 0) && ((uint64_t)(-diff) > dest->sum_sq)) {
|
||||||
|
/* Handle corner cases where sum_sq becomes negative */
|
||||||
|
dest->sum_sq = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
dest->sum_sq += diff;
|
||||||
|
}
|
||||||
|
dest->mean = new_mean;
|
||||||
|
if (src->max > dest->max) {
|
||||||
|
dest->max = src->max;
|
||||||
|
}
|
||||||
|
if (src->min < dest->min) {
|
||||||
|
dest->min = src->min;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user