mirror of
https://github.com/RIOT-OS/RIOT.git
synced 2024-12-29 04:50:03 +01:00
change the bloom filter to filter arrays of bytes
current implementation of the bloom filter only handles c strings, this commits changes the hash functions to work on byte arrays. additionally I did: added to more hashes moved hashes in its own sys folder
This commit is contained in:
parent
c1fe799487
commit
2e44523b51
@ -82,6 +82,9 @@ endif
|
||||
ifneq (,$(findstring random,$(USEMODULE)))
|
||||
DIRS += random
|
||||
endif
|
||||
ifneq (,$(findstring hashes,$(USEMODULE)))
|
||||
DIRS += hashes
|
||||
endif
|
||||
|
||||
all: $(BINDIR)$(MODULE).a
|
||||
@for i in $(DIRS) ; do $(MAKE) -C $$i ; done ;
|
||||
|
@ -24,10 +24,11 @@
|
||||
#define GETBIT(a,n) (a[n/CHAR_BIT] & (1<<(n%CHAR_BIT)))
|
||||
#define ROUND(size) ((size + CHAR_BIT - 1) / CHAR_BIT)
|
||||
|
||||
struct bloom_t *bloom_new(size_t size, size_t num_hashes, ...) {
|
||||
struct bloom_t *bloom_new(size_t size, size_t num_hashes, ...)
|
||||
{
|
||||
struct bloom_t *bloom;
|
||||
va_list hashes;
|
||||
int n;
|
||||
size_t n;
|
||||
|
||||
/* Allocate Bloom filter container */
|
||||
if (!(bloom = malloc(sizeof(struct bloom_t)))) {
|
||||
@ -41,7 +42,7 @@ struct bloom_t *bloom_new(size_t size, size_t num_hashes, ...) {
|
||||
}
|
||||
|
||||
/* Allocate Bloom filter hash function pointers */
|
||||
if (!(bloom->hash = (hashfp_t *)malloc(num_hashes *sizeof(hashfp_t)))) {
|
||||
if (!(bloom->hash = (hashfp_t *)malloc(num_hashes * sizeof(hashfp_t)))) {
|
||||
free(bloom->a);
|
||||
free(bloom);
|
||||
return NULL;
|
||||
@ -73,24 +74,24 @@ void bloom_del(struct bloom_t *bloom)
|
||||
free(bloom);
|
||||
}
|
||||
|
||||
void bloom_add(struct bloom_t *bloom, const char *s)
|
||||
void bloom_add(struct bloom_t *bloom, const uint8_t *buf, size_t len)
|
||||
{
|
||||
unsigned int hash;
|
||||
int n;
|
||||
uint32_t hash;
|
||||
size_t n;
|
||||
|
||||
for (n = 0; n < bloom->k; n++) {
|
||||
hash = (unsigned int)bloom->hash[n](s);
|
||||
hash = bloom->hash[n](buf, len);
|
||||
SETBIT(bloom->a, (hash % bloom->m));
|
||||
}
|
||||
}
|
||||
|
||||
bool bloom_check(struct bloom_t *bloom, const char *s)
|
||||
bool bloom_check(struct bloom_t *bloom, const uint8_t *buf, size_t len)
|
||||
{
|
||||
unsigned int hash;
|
||||
int n;
|
||||
uint32_t hash;
|
||||
size_t n;
|
||||
|
||||
for (n = 0; n < bloom->k; n++) {
|
||||
hash = (unsigned int)bloom->hash[n](s);
|
||||
hash = bloom->hash[n](buf, len);
|
||||
|
||||
if (!(GETBIT(bloom->a, (hash % bloom->m)))) {
|
||||
return false;
|
||||
|
4
sys/hashes/Makefile
Normal file
4
sys/hashes/Makefile
Normal file
@ -0,0 +1,4 @@
|
||||
INCLUDES = -I../include
|
||||
MODULE = hashes
|
||||
|
||||
include $(RIOTBASE)/Makefile.base
|
112
sys/hashes/hashes.c
Normal file
112
sys/hashes/hashes.c
Normal file
@ -0,0 +1,112 @@
|
||||
/**
|
||||
* This file contains some simple hash function
|
||||
*
|
||||
* Copyright (C) 2013 Freie Universität Berlin
|
||||
*
|
||||
* This file subject to the terms and conditions of the GNU Lesser General
|
||||
* Public License. See the file LICENSE in the top level directory for more
|
||||
* details.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file
|
||||
* @autor Jason Linehan <patientulysses@gmail.com>
|
||||
* @author Freie Universität Berlin, Computer Systems & Telematics
|
||||
* @author Christian Mehlis <mehlis@inf.fu-berlin.de>
|
||||
*/
|
||||
|
||||
#include "hashes.h"
|
||||
|
||||
uint32_t djb2_hash(const uint8_t *buf, size_t len)
|
||||
{
|
||||
uint32_t hash = 5381;
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
hash = hash * 33 + buf[i];
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
uint32_t sdbm_hash(const uint8_t *buf, size_t len)
|
||||
{
|
||||
uint32_t hash = 0;
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
hash = buf[i] + (hash << 6) + (hash << 16) - hash;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
uint32_t kr_hash(const uint8_t *buf, size_t len)
|
||||
{
|
||||
uint32_t hash = 0;
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
hash += buf[i];
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
uint32_t sax_hash(const uint8_t *buf, size_t len)
|
||||
{
|
||||
uint32_t hash = 0;
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
hash ^= (hash << 5) + (hash >> 2) + buf[i];
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
uint32_t dek_hash(const uint8_t *buf, size_t len)
|
||||
{
|
||||
uint32_t hash = 7919; /* prime */
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
hash = (hash << 5) ^ (hash >> 27) ^ buf[i];
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
uint32_t fnv_hash(const uint8_t *buf, size_t len)
|
||||
{
|
||||
uint32_t FNV_PRIME = 0x811C9DC5;
|
||||
uint32_t hash = 0;
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
hash *= FNV_PRIME;
|
||||
hash ^= buf[i];
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
uint32_t rotating_hash(const uint8_t *buf, size_t len)
|
||||
{
|
||||
uint32_t hash = 0;
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
hash = (hash << 4) ^ (hash >> 28) ^ buf[i];
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
uint32_t one_at_a_time_hash(const uint8_t *buf, size_t len)
|
||||
{
|
||||
uint32_t hash = 786431; /* prime */
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
hash += buf[i];
|
||||
hash += hash << 10;
|
||||
hash ^= hash >> 6;
|
||||
}
|
||||
hash += hash << 3;
|
||||
hash ^= hash >> 11;
|
||||
hash += hash << 15;
|
||||
return hash;
|
||||
}
|
@ -116,7 +116,7 @@
|
||||
/**
|
||||
* hashfp_t hash function to use in thee filter
|
||||
*/
|
||||
typedef unsigned int (*hashfp_t)(const char *);
|
||||
typedef uint32_t (*hashfp_t)(const uint8_t *, int len);
|
||||
|
||||
/**
|
||||
* struct bloom_t bloom filter object
|
||||
@ -124,7 +124,7 @@ typedef unsigned int (*hashfp_t)(const char *);
|
||||
struct bloom_t {
|
||||
size_t m;
|
||||
size_t k;
|
||||
unsigned char *a;
|
||||
uint8_t *a;
|
||||
hashfp_t *hash;
|
||||
};
|
||||
|
||||
@ -162,7 +162,7 @@ void bloom_del(struct bloom_t *bloom);
|
||||
* @return nothing
|
||||
*
|
||||
*/
|
||||
void bloom_add(struct bloom_t *bloom, const char *s);
|
||||
void bloom_add(struct bloom_t *bloom, const uint8_t *buf, size_t len);
|
||||
|
||||
/**
|
||||
* bloom_check Determine if a string is in the Bloom filter.
|
||||
@ -199,6 +199,6 @@ void bloom_add(struct bloom_t *bloom, const char *s);
|
||||
* @return true if string is may be in the filter
|
||||
*
|
||||
*/
|
||||
bool bloom_check(struct bloom_t *bloom, const char *s);
|
||||
bool bloom_check(struct bloom_t *bloom, const uint8_t *buf, size_t len);
|
||||
|
||||
#endif
|
||||
|
@ -15,8 +15,11 @@
|
||||
* @author Christian Mehlis <mehlis@inf.fu-berlin.de>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
/**
|
||||
* djb2_hash
|
||||
* @brief djb2_hash
|
||||
*
|
||||
* HISTORY
|
||||
* This algorithm (k=33) was first reported by Dan Bernstein many years
|
||||
@ -27,23 +30,15 @@
|
||||
*
|
||||
* The magic of number 33 (why it works better than many other constants,
|
||||
* prime or not) has never been adequately explained.
|
||||
*
|
||||
* @param buf input buffer to hash
|
||||
* @param len length of buffer
|
||||
* @return 32 bit sized hash
|
||||
*/
|
||||
static inline unsigned long djb2_hash(const char *str)
|
||||
{
|
||||
unsigned long hash;
|
||||
int c;
|
||||
|
||||
hash = 5381;
|
||||
|
||||
while ((c = (unsigned char) * str++)) {
|
||||
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
uint32_t djb2_hash(const uint8_t *buf, size_t len);
|
||||
|
||||
/**
|
||||
* sdbm_hash
|
||||
* @brief sdbm_hash
|
||||
*
|
||||
* HISTORY
|
||||
* This algorithm was created for sdbm (a public-domain reimplementation
|
||||
@ -61,23 +56,14 @@ static inline unsigned long djb2_hash(const char *str)
|
||||
* out to be a prime. this is one of the algorithms used in berkeley db
|
||||
* (see sleepycat) and elsewhere.
|
||||
*
|
||||
* @param buf input buffer to hash
|
||||
* @param len length of buffer
|
||||
* @return 32 bit sized hash
|
||||
*/
|
||||
static inline unsigned long sdbm_hash(const char *str)
|
||||
{
|
||||
unsigned long hash;
|
||||
int c;
|
||||
|
||||
hash = 0;
|
||||
|
||||
while ((c = (unsigned char) * str++)) {
|
||||
hash = c + (hash << 6) + (hash << 16) - hash;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
uint32_t sdbm_hash(const uint8_t *buf, size_t len);
|
||||
|
||||
/**
|
||||
* lose lose
|
||||
* @brief lose lose
|
||||
*
|
||||
* HISTORY
|
||||
* This hash function appeared in K&R (1st ed) but at least the reader
|
||||
@ -94,83 +80,70 @@ static inline unsigned long sdbm_hash(const char *str)
|
||||
* checking something like Knuth's Sorting and Searching, so it stuck.
|
||||
* It is now found mixed with otherwise respectable code, eg. cnews. sigh.
|
||||
* [see also: tpop]
|
||||
*
|
||||
* @param buf input buffer to hash
|
||||
* @param len length of buffer
|
||||
* @return 32 bit sized hash
|
||||
*/
|
||||
static inline unsigned long kr_hash(const char *str)
|
||||
{
|
||||
unsigned int hash;
|
||||
unsigned int c;
|
||||
|
||||
hash = 0;
|
||||
|
||||
while ((c = (unsigned char) * str++)) {
|
||||
hash += c;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
uint32_t kr_hash(const uint8_t *buf, size_t len);
|
||||
|
||||
/**
|
||||
* sax_hash
|
||||
* @bief sax_hash
|
||||
*
|
||||
* Shift, Add, XOR
|
||||
*
|
||||
* @param buf input buffer to hash
|
||||
* @param len length of buffer
|
||||
* @return 32 bit sized hash
|
||||
*/
|
||||
static inline unsigned int sax_hash(const char *key)
|
||||
{
|
||||
unsigned int h;
|
||||
|
||||
h = 0;
|
||||
|
||||
while (*key) {
|
||||
h ^= (h << 5) + (h >> 2) + (unsigned char) * key++;
|
||||
}
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
uint32_t sax_hash(const uint8_t *buf, size_t len);
|
||||
|
||||
/**
|
||||
* dek_hash
|
||||
* @brief dek_hash
|
||||
*
|
||||
* HISTORY
|
||||
* Proposed by Donald E. Knuth in The Art Of Computer Programming Vol. 3,
|
||||
* under the topic of "Sorting and Search", Chapter 6.4.
|
||||
*
|
||||
* @param buf input buffer to hash
|
||||
* @param len length of buffer
|
||||
* @return 32 bit sized hash
|
||||
*/
|
||||
static inline unsigned int dek_hash(const char *str, unsigned int len)
|
||||
{
|
||||
unsigned int hash;
|
||||
unsigned int c;
|
||||
|
||||
hash = len;
|
||||
c = 0;
|
||||
|
||||
while ((c = (unsigned int) * str++)) {
|
||||
hash = ((hash << 5) ^ (hash >> 27)) ^ (c);
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
uint32_t dek_hash(const uint8_t *buf, size_t len);
|
||||
|
||||
/**
|
||||
* fnv_hash
|
||||
* @brief fnv_hash
|
||||
*
|
||||
* NOTE
|
||||
* For a more fully featured and modern version of this hash, see fnv32.c
|
||||
*
|
||||
* @param buf input buffer to hash
|
||||
* @param len length of buffer
|
||||
* @return 32 bit sized hash
|
||||
*/
|
||||
static inline unsigned int fnv_hash(const char *str)
|
||||
{
|
||||
#define FNV_PRIME 0x811C9DC5
|
||||
unsigned int hash;
|
||||
unsigned int c;
|
||||
uint32_t fnv_hash(const uint8_t *buf, size_t len);
|
||||
|
||||
hash = 0;
|
||||
c = 0;
|
||||
|
||||
while ((c = (unsigned int) * str++)) {
|
||||
hash *= FNV_PRIME;
|
||||
hash ^= (c);
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
/**
|
||||
* @brief rotating_hash
|
||||
*
|
||||
* found on
|
||||
* http://burtleburtle.net/bob/hash/doobs.html
|
||||
*
|
||||
* @param buf input buffer to hash
|
||||
* @param len length of buffer
|
||||
* @return 32 bit sized hash
|
||||
*/
|
||||
uint32_t rotating_hash(const uint8_t *buf, size_t len);
|
||||
|
||||
/**
|
||||
* @brief one_at_a_time_hash
|
||||
*
|
||||
* found on
|
||||
* http://burtleburtle.net/bob/hash/doobs.html
|
||||
*
|
||||
* @param buf input buffer to hash
|
||||
* @param len length of buffer
|
||||
* @return 32 bit sized hash
|
||||
*/
|
||||
uint32_t one_at_a_time_hash(const uint8_t *buf, size_t len);
|
||||
|
Loading…
Reference in New Issue
Block a user