1
0
mirror of https://github.com/RIOT-OS/RIOT.git synced 2024-12-29 04:50:03 +01:00

change the bloom filter to filter arrays of bytes

current implementation of the bloom filter only
handles c strings, this commits changes the hash
functions to work on byte arrays.

additionally I did:
	added to more hashes
	moved hashes in its own sys folder
This commit is contained in:
Christian Mehlis 2013-08-20 09:05:07 +02:00
parent c1fe799487
commit 2e44523b51
6 changed files with 195 additions and 102 deletions

View File

@ -82,6 +82,9 @@ endif
ifneq (,$(findstring random,$(USEMODULE)))
DIRS += random
endif
ifneq (,$(findstring hashes,$(USEMODULE)))
DIRS += hashes
endif
all: $(BINDIR)$(MODULE).a
@for i in $(DIRS) ; do $(MAKE) -C $$i ; done ;

View File

@ -24,10 +24,11 @@
#define GETBIT(a,n) (a[n/CHAR_BIT] & (1<<(n%CHAR_BIT)))
#define ROUND(size) ((size + CHAR_BIT - 1) / CHAR_BIT)
struct bloom_t *bloom_new(size_t size, size_t num_hashes, ...) {
struct bloom_t *bloom_new(size_t size, size_t num_hashes, ...)
{
struct bloom_t *bloom;
va_list hashes;
int n;
size_t n;
/* Allocate Bloom filter container */
if (!(bloom = malloc(sizeof(struct bloom_t)))) {
@ -41,7 +42,7 @@ struct bloom_t *bloom_new(size_t size, size_t num_hashes, ...) {
}
/* Allocate Bloom filter hash function pointers */
if (!(bloom->hash = (hashfp_t *)malloc(num_hashes *sizeof(hashfp_t)))) {
if (!(bloom->hash = (hashfp_t *)malloc(num_hashes * sizeof(hashfp_t)))) {
free(bloom->a);
free(bloom);
return NULL;
@ -73,24 +74,24 @@ void bloom_del(struct bloom_t *bloom)
free(bloom);
}
void bloom_add(struct bloom_t *bloom, const char *s)
void bloom_add(struct bloom_t *bloom, const uint8_t *buf, size_t len)
{
unsigned int hash;
int n;
uint32_t hash;
size_t n;
for (n = 0; n < bloom->k; n++) {
hash = (unsigned int)bloom->hash[n](s);
hash = bloom->hash[n](buf, len);
SETBIT(bloom->a, (hash % bloom->m));
}
}
bool bloom_check(struct bloom_t *bloom, const char *s)
bool bloom_check(struct bloom_t *bloom, const uint8_t *buf, size_t len)
{
unsigned int hash;
int n;
uint32_t hash;
size_t n;
for (n = 0; n < bloom->k; n++) {
hash = (unsigned int)bloom->hash[n](s);
hash = bloom->hash[n](buf, len);
if (!(GETBIT(bloom->a, (hash % bloom->m)))) {
return false;

4
sys/hashes/Makefile Normal file
View File

@ -0,0 +1,4 @@
INCLUDES = -I../include
MODULE = hashes
include $(RIOTBASE)/Makefile.base

112
sys/hashes/hashes.c Normal file
View File

@ -0,0 +1,112 @@
/**
* This file contains some simple hash function
*
* Copyright (C) 2013 Freie Universität Berlin
*
* This file subject to the terms and conditions of the GNU Lesser General
* Public License. See the file LICENSE in the top level directory for more
* details.
*/
/**
* @file
* @autor Jason Linehan <patientulysses@gmail.com>
* @author Freie Universität Berlin, Computer Systems & Telematics
* @author Christian Mehlis <mehlis@inf.fu-berlin.de>
*/
#include "hashes.h"
uint32_t djb2_hash(const uint8_t *buf, size_t len)
{
uint32_t hash = 5381;
for (size_t i = 0; i < len; i++) {
hash = hash * 33 + buf[i];
}
return hash;
}
uint32_t sdbm_hash(const uint8_t *buf, size_t len)
{
uint32_t hash = 0;
for (size_t i = 0; i < len; i++) {
hash = buf[i] + (hash << 6) + (hash << 16) - hash;
}
return hash;
}
uint32_t kr_hash(const uint8_t *buf, size_t len)
{
uint32_t hash = 0;
for (size_t i = 0; i < len; i++) {
hash += buf[i];
}
return hash;
}
uint32_t sax_hash(const uint8_t *buf, size_t len)
{
uint32_t hash = 0;
for (size_t i = 0; i < len; i++) {
hash ^= (hash << 5) + (hash >> 2) + buf[i];
}
return hash;
}
uint32_t dek_hash(const uint8_t *buf, size_t len)
{
uint32_t hash = 7919; /* prime */
for (size_t i = 0; i < len; i++) {
hash = (hash << 5) ^ (hash >> 27) ^ buf[i];
}
return hash;
}
uint32_t fnv_hash(const uint8_t *buf, size_t len)
{
uint32_t FNV_PRIME = 0x811C9DC5;
uint32_t hash = 0;
for (size_t i = 0; i < len; i++) {
hash *= FNV_PRIME;
hash ^= buf[i];
}
return hash;
}
uint32_t rotating_hash(const uint8_t *buf, size_t len)
{
uint32_t hash = 0;
for (size_t i = 0; i < len; i++) {
hash = (hash << 4) ^ (hash >> 28) ^ buf[i];
}
return hash;
}
uint32_t one_at_a_time_hash(const uint8_t *buf, size_t len)
{
uint32_t hash = 786431; /* prime */
for (size_t i = 0; i < len; i++) {
hash += buf[i];
hash += hash << 10;
hash ^= hash >> 6;
}
hash += hash << 3;
hash ^= hash >> 11;
hash += hash << 15;
return hash;
}

View File

@ -116,7 +116,7 @@
/**
* hashfp_t hash function to use in thee filter
*/
typedef unsigned int (*hashfp_t)(const char *);
typedef uint32_t (*hashfp_t)(const uint8_t *, int len);
/**
* struct bloom_t bloom filter object
@ -124,7 +124,7 @@ typedef unsigned int (*hashfp_t)(const char *);
struct bloom_t {
size_t m;
size_t k;
unsigned char *a;
uint8_t *a;
hashfp_t *hash;
};
@ -162,7 +162,7 @@ void bloom_del(struct bloom_t *bloom);
* @return nothing
*
*/
void bloom_add(struct bloom_t *bloom, const char *s);
void bloom_add(struct bloom_t *bloom, const uint8_t *buf, size_t len);
/**
* bloom_check Determine if a string is in the Bloom filter.
@ -199,6 +199,6 @@ void bloom_add(struct bloom_t *bloom, const char *s);
* @return true if string is may be in the filter
*
*/
bool bloom_check(struct bloom_t *bloom, const char *s);
bool bloom_check(struct bloom_t *bloom, const uint8_t *buf, size_t len);
#endif

View File

@ -15,8 +15,11 @@
* @author Christian Mehlis <mehlis@inf.fu-berlin.de>
*/
#include <stddef.h>
#include <inttypes.h>
/**
* djb2_hash
* @brief djb2_hash
*
* HISTORY
* This algorithm (k=33) was first reported by Dan Bernstein many years
@ -27,23 +30,15 @@
*
* The magic of number 33 (why it works better than many other constants,
* prime or not) has never been adequately explained.
*
* @param buf input buffer to hash
* @param len length of buffer
* @return 32 bit sized hash
*/
static inline unsigned long djb2_hash(const char *str)
{
unsigned long hash;
int c;
hash = 5381;
while ((c = (unsigned char) * str++)) {
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
}
return hash;
}
uint32_t djb2_hash(const uint8_t *buf, size_t len);
/**
* sdbm_hash
* @brief sdbm_hash
*
* HISTORY
* This algorithm was created for sdbm (a public-domain reimplementation
@ -61,23 +56,14 @@ static inline unsigned long djb2_hash(const char *str)
* out to be a prime. this is one of the algorithms used in berkeley db
* (see sleepycat) and elsewhere.
*
* @param buf input buffer to hash
* @param len length of buffer
* @return 32 bit sized hash
*/
static inline unsigned long sdbm_hash(const char *str)
{
unsigned long hash;
int c;
hash = 0;
while ((c = (unsigned char) * str++)) {
hash = c + (hash << 6) + (hash << 16) - hash;
}
return hash;
}
uint32_t sdbm_hash(const uint8_t *buf, size_t len);
/**
* lose lose
* @brief lose lose
*
* HISTORY
* This hash function appeared in K&R (1st ed) but at least the reader
@ -94,83 +80,70 @@ static inline unsigned long sdbm_hash(const char *str)
* checking something like Knuth's Sorting and Searching, so it stuck.
* It is now found mixed with otherwise respectable code, eg. cnews. sigh.
* [see also: tpop]
*
* @param buf input buffer to hash
* @param len length of buffer
* @return 32 bit sized hash
*/
static inline unsigned long kr_hash(const char *str)
{
unsigned int hash;
unsigned int c;
hash = 0;
while ((c = (unsigned char) * str++)) {
hash += c;
}
return hash;
}
uint32_t kr_hash(const uint8_t *buf, size_t len);
/**
* sax_hash
* @bief sax_hash
*
* Shift, Add, XOR
*
* @param buf input buffer to hash
* @param len length of buffer
* @return 32 bit sized hash
*/
static inline unsigned int sax_hash(const char *key)
{
unsigned int h;
h = 0;
while (*key) {
h ^= (h << 5) + (h >> 2) + (unsigned char) * key++;
}
return h;
}
uint32_t sax_hash(const uint8_t *buf, size_t len);
/**
* dek_hash
* @brief dek_hash
*
* HISTORY
* Proposed by Donald E. Knuth in The Art Of Computer Programming Vol. 3,
* under the topic of "Sorting and Search", Chapter 6.4.
*
* @param buf input buffer to hash
* @param len length of buffer
* @return 32 bit sized hash
*/
static inline unsigned int dek_hash(const char *str, unsigned int len)
{
unsigned int hash;
unsigned int c;
hash = len;
c = 0;
while ((c = (unsigned int) * str++)) {
hash = ((hash << 5) ^ (hash >> 27)) ^ (c);
}
return hash;
}
uint32_t dek_hash(const uint8_t *buf, size_t len);
/**
* fnv_hash
* @brief fnv_hash
*
* NOTE
* For a more fully featured and modern version of this hash, see fnv32.c
*
* @param buf input buffer to hash
* @param len length of buffer
* @return 32 bit sized hash
*/
static inline unsigned int fnv_hash(const char *str)
{
#define FNV_PRIME 0x811C9DC5
unsigned int hash;
unsigned int c;
uint32_t fnv_hash(const uint8_t *buf, size_t len);
hash = 0;
c = 0;
while ((c = (unsigned int) * str++)) {
hash *= FNV_PRIME;
hash ^= (c);
}
return hash;
}
/**
* @brief rotating_hash
*
* found on
* http://burtleburtle.net/bob/hash/doobs.html
*
* @param buf input buffer to hash
* @param len length of buffer
* @return 32 bit sized hash
*/
uint32_t rotating_hash(const uint8_t *buf, size_t len);
/**
* @brief one_at_a_time_hash
*
* found on
* http://burtleburtle.net/bob/hash/doobs.html
*
* @param buf input buffer to hash
* @param len length of buffer
* @return 32 bit sized hash
*/
uint32_t one_at_a_time_hash(const uint8_t *buf, size_t len);