From 2e44523b51c378e8a457ba3ab9d421f4966ddded Mon Sep 17 00:00:00 2001 From: Christian Mehlis Date: Tue, 20 Aug 2013 09:05:07 +0200 Subject: [PATCH] change the bloom filter to filter arrays of bytes current implementation of the bloom filter only handles c strings, this commits changes the hash functions to work on byte arrays. additionally I did: added to more hashes moved hashes in its own sys folder --- sys/Makefile | 3 + sys/bloom/bloom.c | 23 +++---- sys/hashes/Makefile | 4 ++ sys/hashes/hashes.c | 112 +++++++++++++++++++++++++++++++++ sys/include/bloom.h | 8 +-- sys/include/hashes.h | 147 ++++++++++++++++++------------------------- 6 files changed, 195 insertions(+), 102 deletions(-) create mode 100644 sys/hashes/Makefile create mode 100644 sys/hashes/hashes.c diff --git a/sys/Makefile b/sys/Makefile index 62a91bb0bd..823092558b 100644 --- a/sys/Makefile +++ b/sys/Makefile @@ -82,6 +82,9 @@ endif ifneq (,$(findstring random,$(USEMODULE))) DIRS += random endif +ifneq (,$(findstring hashes,$(USEMODULE))) + DIRS += hashes +endif all: $(BINDIR)$(MODULE).a @for i in $(DIRS) ; do $(MAKE) -C $$i ; done ; diff --git a/sys/bloom/bloom.c b/sys/bloom/bloom.c index 941a16e3e5..fea7cbda86 100644 --- a/sys/bloom/bloom.c +++ b/sys/bloom/bloom.c @@ -24,10 +24,11 @@ #define GETBIT(a,n) (a[n/CHAR_BIT] & (1<<(n%CHAR_BIT))) #define ROUND(size) ((size + CHAR_BIT - 1) / CHAR_BIT) -struct bloom_t *bloom_new(size_t size, size_t num_hashes, ...) { +struct bloom_t *bloom_new(size_t size, size_t num_hashes, ...) +{ struct bloom_t *bloom; va_list hashes; - int n; + size_t n; /* Allocate Bloom filter container */ if (!(bloom = malloc(sizeof(struct bloom_t)))) { @@ -41,7 +42,7 @@ struct bloom_t *bloom_new(size_t size, size_t num_hashes, ...) { } /* Allocate Bloom filter hash function pointers */ - if (!(bloom->hash = (hashfp_t *)malloc(num_hashes *sizeof(hashfp_t)))) { + if (!(bloom->hash = (hashfp_t *)malloc(num_hashes * sizeof(hashfp_t)))) { free(bloom->a); free(bloom); return NULL; @@ -73,24 +74,24 @@ void bloom_del(struct bloom_t *bloom) free(bloom); } -void bloom_add(struct bloom_t *bloom, const char *s) +void bloom_add(struct bloom_t *bloom, const uint8_t *buf, size_t len) { - unsigned int hash; - int n; + uint32_t hash; + size_t n; for (n = 0; n < bloom->k; n++) { - hash = (unsigned int)bloom->hash[n](s); + hash = bloom->hash[n](buf, len); SETBIT(bloom->a, (hash % bloom->m)); } } -bool bloom_check(struct bloom_t *bloom, const char *s) +bool bloom_check(struct bloom_t *bloom, const uint8_t *buf, size_t len) { - unsigned int hash; - int n; + uint32_t hash; + size_t n; for (n = 0; n < bloom->k; n++) { - hash = (unsigned int)bloom->hash[n](s); + hash = bloom->hash[n](buf, len); if (!(GETBIT(bloom->a, (hash % bloom->m)))) { return false; diff --git a/sys/hashes/Makefile b/sys/hashes/Makefile new file mode 100644 index 0000000000..c14e137b8b --- /dev/null +++ b/sys/hashes/Makefile @@ -0,0 +1,4 @@ +INCLUDES = -I../include +MODULE = hashes + +include $(RIOTBASE)/Makefile.base diff --git a/sys/hashes/hashes.c b/sys/hashes/hashes.c new file mode 100644 index 0000000000..719c067213 --- /dev/null +++ b/sys/hashes/hashes.c @@ -0,0 +1,112 @@ +/** + * This file contains some simple hash function + * + * Copyright (C) 2013 Freie Universität Berlin + * + * This file subject to the terms and conditions of the GNU Lesser General + * Public License. See the file LICENSE in the top level directory for more + * details. + */ + +/** + * @file + * @autor Jason Linehan + * @author Freie Universität Berlin, Computer Systems & Telematics + * @author Christian Mehlis + */ + +#include "hashes.h" + +uint32_t djb2_hash(const uint8_t *buf, size_t len) +{ + uint32_t hash = 5381; + + for (size_t i = 0; i < len; i++) { + hash = hash * 33 + buf[i]; + } + + return hash; +} + +uint32_t sdbm_hash(const uint8_t *buf, size_t len) +{ + uint32_t hash = 0; + + for (size_t i = 0; i < len; i++) { + hash = buf[i] + (hash << 6) + (hash << 16) - hash; + } + + return hash; +} + +uint32_t kr_hash(const uint8_t *buf, size_t len) +{ + uint32_t hash = 0; + + for (size_t i = 0; i < len; i++) { + hash += buf[i]; + } + + return hash; +} + +uint32_t sax_hash(const uint8_t *buf, size_t len) +{ + uint32_t hash = 0; + + for (size_t i = 0; i < len; i++) { + hash ^= (hash << 5) + (hash >> 2) + buf[i]; + } + + return hash; +} + +uint32_t dek_hash(const uint8_t *buf, size_t len) +{ + uint32_t hash = 7919; /* prime */ + + for (size_t i = 0; i < len; i++) { + hash = (hash << 5) ^ (hash >> 27) ^ buf[i]; + } + + return hash; +} + +uint32_t fnv_hash(const uint8_t *buf, size_t len) +{ + uint32_t FNV_PRIME = 0x811C9DC5; + uint32_t hash = 0; + + for (size_t i = 0; i < len; i++) { + hash *= FNV_PRIME; + hash ^= buf[i]; + } + + return hash; +} + +uint32_t rotating_hash(const uint8_t *buf, size_t len) +{ + uint32_t hash = 0; + + for (size_t i = 0; i < len; i++) { + hash = (hash << 4) ^ (hash >> 28) ^ buf[i]; + } + + return hash; +} + +uint32_t one_at_a_time_hash(const uint8_t *buf, size_t len) +{ + uint32_t hash = 786431; /* prime */ + + for (size_t i = 0; i < len; i++) { + hash += buf[i]; + hash += hash << 10; + hash ^= hash >> 6; + } + hash += hash << 3; + hash ^= hash >> 11; + hash += hash << 15; + return hash; +} diff --git a/sys/include/bloom.h b/sys/include/bloom.h index 19b692a2c5..e149a625ec 100644 --- a/sys/include/bloom.h +++ b/sys/include/bloom.h @@ -116,7 +116,7 @@ /** * hashfp_t hash function to use in thee filter */ -typedef unsigned int (*hashfp_t)(const char *); +typedef uint32_t (*hashfp_t)(const uint8_t *, int len); /** * struct bloom_t bloom filter object @@ -124,7 +124,7 @@ typedef unsigned int (*hashfp_t)(const char *); struct bloom_t { size_t m; size_t k; - unsigned char *a; + uint8_t *a; hashfp_t *hash; }; @@ -162,7 +162,7 @@ void bloom_del(struct bloom_t *bloom); * @return nothing * */ -void bloom_add(struct bloom_t *bloom, const char *s); +void bloom_add(struct bloom_t *bloom, const uint8_t *buf, size_t len); /** * bloom_check Determine if a string is in the Bloom filter. @@ -199,6 +199,6 @@ void bloom_add(struct bloom_t *bloom, const char *s); * @return true if string is may be in the filter * */ -bool bloom_check(struct bloom_t *bloom, const char *s); +bool bloom_check(struct bloom_t *bloom, const uint8_t *buf, size_t len); #endif diff --git a/sys/include/hashes.h b/sys/include/hashes.h index e81aa196a4..0d2e1b8de6 100644 --- a/sys/include/hashes.h +++ b/sys/include/hashes.h @@ -15,8 +15,11 @@ * @author Christian Mehlis */ +#include +#include + /** - * djb2_hash + * @brief djb2_hash * * HISTORY * This algorithm (k=33) was first reported by Dan Bernstein many years @@ -27,23 +30,15 @@ * * The magic of number 33 (why it works better than many other constants, * prime or not) has never been adequately explained. + * + * @param buf input buffer to hash + * @param len length of buffer + * @return 32 bit sized hash */ -static inline unsigned long djb2_hash(const char *str) -{ - unsigned long hash; - int c; - - hash = 5381; - - while ((c = (unsigned char) * str++)) { - hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ - } - - return hash; -} +uint32_t djb2_hash(const uint8_t *buf, size_t len); /** - * sdbm_hash + * @brief sdbm_hash * * HISTORY * This algorithm was created for sdbm (a public-domain reimplementation @@ -61,23 +56,14 @@ static inline unsigned long djb2_hash(const char *str) * out to be a prime. this is one of the algorithms used in berkeley db * (see sleepycat) and elsewhere. * + * @param buf input buffer to hash + * @param len length of buffer + * @return 32 bit sized hash */ -static inline unsigned long sdbm_hash(const char *str) -{ - unsigned long hash; - int c; - - hash = 0; - - while ((c = (unsigned char) * str++)) { - hash = c + (hash << 6) + (hash << 16) - hash; - } - - return hash; -} +uint32_t sdbm_hash(const uint8_t *buf, size_t len); /** - * lose lose + * @brief lose lose * * HISTORY * This hash function appeared in K&R (1st ed) but at least the reader @@ -94,83 +80,70 @@ static inline unsigned long sdbm_hash(const char *str) * checking something like Knuth's Sorting and Searching, so it stuck. * It is now found mixed with otherwise respectable code, eg. cnews. sigh. * [see also: tpop] + * + * @param buf input buffer to hash + * @param len length of buffer + * @return 32 bit sized hash */ -static inline unsigned long kr_hash(const char *str) -{ - unsigned int hash; - unsigned int c; - - hash = 0; - - while ((c = (unsigned char) * str++)) { - hash += c; - } - - return hash; -} +uint32_t kr_hash(const uint8_t *buf, size_t len); /** - * sax_hash + * @bief sax_hash * * Shift, Add, XOR + * + * @param buf input buffer to hash + * @param len length of buffer + * @return 32 bit sized hash */ -static inline unsigned int sax_hash(const char *key) -{ - unsigned int h; - - h = 0; - - while (*key) { - h ^= (h << 5) + (h >> 2) + (unsigned char) * key++; - } - - return h; -} - +uint32_t sax_hash(const uint8_t *buf, size_t len); /** - * dek_hash + * @brief dek_hash * * HISTORY * Proposed by Donald E. Knuth in The Art Of Computer Programming Vol. 3, * under the topic of "Sorting and Search", Chapter 6.4. + * + * @param buf input buffer to hash + * @param len length of buffer + * @return 32 bit sized hash */ -static inline unsigned int dek_hash(const char *str, unsigned int len) -{ - unsigned int hash; - unsigned int c; - - hash = len; - c = 0; - - while ((c = (unsigned int) * str++)) { - hash = ((hash << 5) ^ (hash >> 27)) ^ (c); - } - - return hash; -} - +uint32_t dek_hash(const uint8_t *buf, size_t len); /** - * fnv_hash + * @brief fnv_hash * * NOTE * For a more fully featured and modern version of this hash, see fnv32.c + * + * @param buf input buffer to hash + * @param len length of buffer + * @return 32 bit sized hash */ -static inline unsigned int fnv_hash(const char *str) -{ -#define FNV_PRIME 0x811C9DC5 - unsigned int hash; - unsigned int c; +uint32_t fnv_hash(const uint8_t *buf, size_t len); - hash = 0; - c = 0; - while ((c = (unsigned int) * str++)) { - hash *= FNV_PRIME; - hash ^= (c); - } - - return hash; -} +/** + * @brief rotating_hash + * + * found on + * http://burtleburtle.net/bob/hash/doobs.html + * + * @param buf input buffer to hash + * @param len length of buffer + * @return 32 bit sized hash + */ +uint32_t rotating_hash(const uint8_t *buf, size_t len); +/** + * @brief one_at_a_time_hash + * + * found on + * http://burtleburtle.net/bob/hash/doobs.html + * + * @param buf input buffer to hash + * @param len length of buffer + * @return 32 bit sized hash + */ +uint32_t one_at_a_time_hash(const uint8_t *buf, size_t len);