From 5703dfc30692c400fef086c1492424d005c6b20c Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 31 Dec 2010 12:46:48 +0100 Subject: [PATCH] Import dictionary code from Redis repository (as of 2069d06a) --- dict.c | 590 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ dict.h | 136 +++++++++++++ 2 files changed, 726 insertions(+) create mode 100644 dict.c create mode 100644 dict.h diff --git a/dict.c b/dict.c new file mode 100644 index 0000000..23f7933 --- /dev/null +++ b/dict.c @@ -0,0 +1,590 @@ +/* Hash Tables Implementation. + * + * This file implements in memory hash tables with insert/del/replace/find/ + * get-random-element operations. Hash tables will auto resize if needed + * tables of power of two in size are used, collisions are handled by + * chaining. See the source code for more information... :) + * + * Copyright (c) 2006-2010, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "fmacros.h" + +#include +#include +#include +#include +#include +#include + +#include "dict.h" +#include "zmalloc.h" + +/* ---------------------------- Utility funcitons --------------------------- */ + +static void _dictPanic(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fprintf(stderr, "\nDICT LIBRARY PANIC: "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n\n"); + va_end(ap); +} + +/* ------------------------- Heap Management Wrappers------------------------ */ + +static void *_dictAlloc(size_t size) +{ + void *p = zmalloc(size); + if (p == NULL) + _dictPanic("Out of memory"); + return p; +} + +static void _dictFree(void *ptr) { + zfree(ptr); +} + +/* -------------------------- private prototypes ---------------------------- */ + +static int _dictExpandIfNeeded(dict *ht); +static unsigned long _dictNextPower(unsigned long size); +static int _dictKeyIndex(dict *ht, const void *key); +static int _dictInit(dict *ht, dictType *type, void *privDataPtr); + +/* -------------------------- hash functions -------------------------------- */ + +/* Thomas Wang's 32 bit Mix Function */ +unsigned int dictIntHashFunction(unsigned int key) +{ + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} + +/* Identity hash function for integer keys */ +unsigned int dictIdentityHashFunction(unsigned int key) +{ + return key; +} + +/* Generic hash function (a popular one from Bernstein). + * I tested a few and this was the best. */ +unsigned int dictGenHashFunction(const unsigned char *buf, int len) { + unsigned int hash = 5381; + + while (len--) + hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */ + return hash; +} + +/* ----------------------------- API implementation ------------------------- */ + +/* Reset an hashtable already initialized with ht_init(). + * NOTE: This function should only called by ht_destroy(). */ +static void _dictReset(dict *ht) +{ + ht->table = NULL; + ht->size = 0; + ht->sizemask = 0; + ht->used = 0; +} + +/* Create a new hash table */ +dict *dictCreate(dictType *type, + void *privDataPtr) +{ + dict *ht = _dictAlloc(sizeof(*ht)); + + _dictInit(ht,type,privDataPtr); + return ht; +} + +/* Initialize the hash table */ +int _dictInit(dict *ht, dictType *type, + void *privDataPtr) +{ + _dictReset(ht); + ht->type = type; + ht->privdata = privDataPtr; + return DICT_OK; +} + +/* Resize the table to the minimal size that contains all the elements, + * but with the invariant of a USER/BUCKETS ration near to <= 1 */ +int dictResize(dict *ht) +{ + int minimal = ht->used; + + if (minimal < DICT_HT_INITIAL_SIZE) + minimal = DICT_HT_INITIAL_SIZE; + return dictExpand(ht, minimal); +} + +/* Expand or create the hashtable */ +int dictExpand(dict *ht, unsigned long size) +{ + dict n; /* the new hashtable */ + unsigned long realsize = _dictNextPower(size), i; + + /* the size is invalid if it is smaller than the number of + * elements already inside the hashtable */ + if (ht->used > size) + return DICT_ERR; + + _dictInit(&n, ht->type, ht->privdata); + n.size = realsize; + n.sizemask = realsize-1; + n.table = _dictAlloc(realsize*sizeof(dictEntry*)); + + /* Initialize all the pointers to NULL */ + memset(n.table, 0, realsize*sizeof(dictEntry*)); + + /* Copy all the elements from the old to the new table: + * note that if the old hash table is empty ht->size is zero, + * so dictExpand just creates an hash table. */ + n.used = ht->used; + for (i = 0; i < ht->size && ht->used > 0; i++) { + dictEntry *he, *nextHe; + + if (ht->table[i] == NULL) continue; + + /* For each hash entry on this slot... */ + he = ht->table[i]; + while(he) { + unsigned int h; + + nextHe = he->next; + /* Get the new element index */ + h = dictHashKey(ht, he->key) & n.sizemask; + he->next = n.table[h]; + n.table[h] = he; + ht->used--; + /* Pass to the next element */ + he = nextHe; + } + } + assert(ht->used == 0); + _dictFree(ht->table); + + /* Remap the new hashtable in the old */ + *ht = n; + return DICT_OK; +} + +/* Add an element to the target hash table */ +int dictAdd(dict *ht, void *key, void *val) +{ + int index; + dictEntry *entry; + + /* Get the index of the new element, or -1 if + * the element already exists. */ + if ((index = _dictKeyIndex(ht, key)) == -1) + return DICT_ERR; + + /* Allocates the memory and stores key */ + entry = _dictAlloc(sizeof(*entry)); + entry->next = ht->table[index]; + ht->table[index] = entry; + + /* Set the hash entry fields. */ + dictSetHashKey(ht, entry, key); + dictSetHashVal(ht, entry, val); + ht->used++; + return DICT_OK; +} + +/* Add an element, discarding the old if the key already exists. + * Return 1 if the key was added from scratch, 0 if there was already an + * element with such key and dictReplace() just performed a value update + * operation. */ +int dictReplace(dict *ht, void *key, void *val) +{ + dictEntry *entry, auxentry; + + /* Try to add the element. If the key + * does not exists dictAdd will suceed. */ + if (dictAdd(ht, key, val) == DICT_OK) + return 1; + /* It already exists, get the entry */ + entry = dictFind(ht, key); + /* Free the old value and set the new one */ + /* Set the new value and free the old one. Note that it is important + * to do that in this order, as the value may just be exactly the same + * as the previous one. In this context, think to reference counting, + * you want to increment (set), and then decrement (free), and not the + * reverse. */ + auxentry = *entry; + dictSetHashVal(ht, entry, val); + dictFreeEntryVal(ht, &auxentry); + return 0; +} + +/* Search and remove an element */ +static int dictGenericDelete(dict *ht, const void *key, int nofree) +{ + unsigned int h; + dictEntry *he, *prevHe; + + if (ht->size == 0) + return DICT_ERR; + h = dictHashKey(ht, key) & ht->sizemask; + he = ht->table[h]; + + prevHe = NULL; + while(he) { + if (dictCompareHashKeys(ht, key, he->key)) { + /* Unlink the element from the list */ + if (prevHe) + prevHe->next = he->next; + else + ht->table[h] = he->next; + if (!nofree) { + dictFreeEntryKey(ht, he); + dictFreeEntryVal(ht, he); + } + _dictFree(he); + ht->used--; + return DICT_OK; + } + prevHe = he; + he = he->next; + } + return DICT_ERR; /* not found */ +} + +int dictDelete(dict *ht, const void *key) { + return dictGenericDelete(ht,key,0); +} + +int dictDeleteNoFree(dict *ht, const void *key) { + return dictGenericDelete(ht,key,1); +} + +/* Destroy an entire hash table */ +int _dictClear(dict *ht) +{ + unsigned long i; + + /* Free all the elements */ + for (i = 0; i < ht->size && ht->used > 0; i++) { + dictEntry *he, *nextHe; + + if ((he = ht->table[i]) == NULL) continue; + while(he) { + nextHe = he->next; + dictFreeEntryKey(ht, he); + dictFreeEntryVal(ht, he); + _dictFree(he); + ht->used--; + he = nextHe; + } + } + /* Free the table and the allocated cache structure */ + _dictFree(ht->table); + /* Re-initialize the table */ + _dictReset(ht); + return DICT_OK; /* never fails */ +} + +/* Clear & Release the hash table */ +void dictRelease(dict *ht) +{ + _dictClear(ht); + _dictFree(ht); +} + +dictEntry *dictFind(dict *ht, const void *key) +{ + dictEntry *he; + unsigned int h; + + if (ht->size == 0) return NULL; + h = dictHashKey(ht, key) & ht->sizemask; + he = ht->table[h]; + while(he) { + if (dictCompareHashKeys(ht, key, he->key)) + return he; + he = he->next; + } + return NULL; +} + +dictIterator *dictGetIterator(dict *ht) +{ + dictIterator *iter = _dictAlloc(sizeof(*iter)); + + iter->ht = ht; + iter->index = -1; + iter->entry = NULL; + iter->nextEntry = NULL; + return iter; +} + +dictEntry *dictNext(dictIterator *iter) +{ + while (1) { + if (iter->entry == NULL) { + iter->index++; + if (iter->index >= + (signed)iter->ht->size) break; + iter->entry = iter->ht->table[iter->index]; + } else { + iter->entry = iter->nextEntry; + } + if (iter->entry) { + /* We need to save the 'next' here, the iterator user + * may delete the entry we are returning. */ + iter->nextEntry = iter->entry->next; + return iter->entry; + } + } + return NULL; +} + +void dictReleaseIterator(dictIterator *iter) +{ + _dictFree(iter); +} + +/* Return a random entry from the hash table. Useful to + * implement randomized algorithms */ +dictEntry *dictGetRandomKey(dict *ht) +{ + dictEntry *he; + unsigned int h; + int listlen, listele; + + if (ht->used == 0) return NULL; + do { + h = random() & ht->sizemask; + he = ht->table[h]; + } while(he == NULL); + + /* Now we found a non empty bucket, but it is a linked + * list and we need to get a random element from the list. + * The only sane way to do so is to count the element and + * select a random index. */ + listlen = 0; + while(he) { + he = he->next; + listlen++; + } + listele = random() % listlen; + he = ht->table[h]; + while(listele--) he = he->next; + return he; +} + +/* ------------------------- private functions ------------------------------ */ + +/* Expand the hash table if needed */ +static int _dictExpandIfNeeded(dict *ht) +{ + /* If the hash table is empty expand it to the intial size, + * if the table is "full" dobule its size. */ + if (ht->size == 0) + return dictExpand(ht, DICT_HT_INITIAL_SIZE); + if (ht->used == ht->size) + return dictExpand(ht, ht->size*2); + return DICT_OK; +} + +/* Our hash table capability is a power of two */ +static unsigned long _dictNextPower(unsigned long size) +{ + unsigned long i = DICT_HT_INITIAL_SIZE; + + if (size >= LONG_MAX) return LONG_MAX; + while(1) { + if (i >= size) + return i; + i *= 2; + } +} + +/* Returns the index of a free slot that can be populated with + * an hash entry for the given 'key'. + * If the key already exists, -1 is returned. */ +static int _dictKeyIndex(dict *ht, const void *key) +{ + unsigned int h; + dictEntry *he; + + /* Expand the hashtable if needed */ + if (_dictExpandIfNeeded(ht) == DICT_ERR) + return -1; + /* Compute the key hash value */ + h = dictHashKey(ht, key) & ht->sizemask; + /* Search if this slot does not already contain the given key */ + he = ht->table[h]; + while(he) { + if (dictCompareHashKeys(ht, key, he->key)) + return -1; + he = he->next; + } + return h; +} + +void dictEmpty(dict *ht) { + _dictClear(ht); +} + +#define DICT_STATS_VECTLEN 50 +void dictPrintStats(dict *ht) { + unsigned long i, slots = 0, chainlen, maxchainlen = 0; + unsigned long totchainlen = 0; + unsigned long clvector[DICT_STATS_VECTLEN]; + + if (ht->used == 0) { + printf("No stats available for empty dictionaries\n"); + return; + } + + for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0; + for (i = 0; i < ht->size; i++) { + dictEntry *he; + + if (ht->table[i] == NULL) { + clvector[0]++; + continue; + } + slots++; + /* For each hash entry on this slot... */ + chainlen = 0; + he = ht->table[i]; + while(he) { + chainlen++; + he = he->next; + } + clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++; + if (chainlen > maxchainlen) maxchainlen = chainlen; + totchainlen += chainlen; + } + printf("Hash table stats:\n"); + printf(" table size: %ld\n", ht->size); + printf(" number of elements: %ld\n", ht->used); + printf(" different slots: %ld\n", slots); + printf(" max chain length: %ld\n", maxchainlen); + printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots); + printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots); + printf(" Chain length distribution:\n"); + for (i = 0; i < DICT_STATS_VECTLEN-1; i++) { + if (clvector[i] == 0) continue; + printf(" %s%ld: %ld (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100); + } +} + +/* ----------------------- StringCopy Hash Table Type ------------------------*/ + +static unsigned int _dictStringCopyHTHashFunction(const void *key) +{ + return dictGenHashFunction(key, strlen(key)); +} + +static void *_dictStringCopyHTKeyDup(void *privdata, const void *key) +{ + int len = strlen(key); + char *copy = _dictAlloc(len+1); + DICT_NOTUSED(privdata); + + memcpy(copy, key, len); + copy[len] = '\0'; + return copy; +} + +static void *_dictStringKeyValCopyHTValDup(void *privdata, const void *val) +{ + int len = strlen(val); + char *copy = _dictAlloc(len+1); + DICT_NOTUSED(privdata); + + memcpy(copy, val, len); + copy[len] = '\0'; + return copy; +} + +static int _dictStringCopyHTKeyCompare(void *privdata, const void *key1, + const void *key2) +{ + DICT_NOTUSED(privdata); + + return strcmp(key1, key2) == 0; +} + +static void _dictStringCopyHTKeyDestructor(void *privdata, void *key) +{ + DICT_NOTUSED(privdata); + + _dictFree((void*)key); /* ATTENTION: const cast */ +} + +static void _dictStringKeyValCopyHTValDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + + _dictFree((void*)val); /* ATTENTION: const cast */ +} + +dictType dictTypeHeapStringCopyKey = { + _dictStringCopyHTHashFunction, /* hash function */ + _dictStringCopyHTKeyDup, /* key dup */ + NULL, /* val dup */ + _dictStringCopyHTKeyCompare, /* key compare */ + _dictStringCopyHTKeyDestructor, /* key destructor */ + NULL /* val destructor */ +}; + +/* This is like StringCopy but does not auto-duplicate the key. + * It's used for intepreter's shared strings. */ +dictType dictTypeHeapStrings = { + _dictStringCopyHTHashFunction, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + _dictStringCopyHTKeyCompare, /* key compare */ + _dictStringCopyHTKeyDestructor, /* key destructor */ + NULL /* val destructor */ +}; + +/* This is like StringCopy but also automatically handle dynamic + * allocated C strings as values. */ +dictType dictTypeHeapStringCopyKeyValue = { + _dictStringCopyHTHashFunction, /* hash function */ + _dictStringCopyHTKeyDup, /* key dup */ + _dictStringKeyValCopyHTValDup, /* val dup */ + _dictStringCopyHTKeyCompare, /* key compare */ + _dictStringCopyHTKeyDestructor, /* key destructor */ + _dictStringKeyValCopyHTValDestructor, /* val destructor */ +}; diff --git a/dict.h b/dict.h new file mode 100644 index 0000000..6f9eaa5 --- /dev/null +++ b/dict.h @@ -0,0 +1,136 @@ +/* Hash Tables Implementation. + * + * This file implements in memory hash tables with insert/del/replace/find/ + * get-random-element operations. Hash tables will auto resize if needed + * tables of power of two in size are used, collisions are handled by + * chaining. See the source code for more information... :) + * + * Copyright (c) 2006-2010, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __DICT_H +#define __DICT_H + +#define DICT_OK 0 +#define DICT_ERR 1 + +/* Unused arguments generate annoying warnings... */ +#define DICT_NOTUSED(V) ((void) V) + +typedef struct dictEntry { + void *key; + void *val; + struct dictEntry *next; +} dictEntry; + +typedef struct dictType { + unsigned int (*hashFunction)(const void *key); + void *(*keyDup)(void *privdata, const void *key); + void *(*valDup)(void *privdata, const void *obj); + int (*keyCompare)(void *privdata, const void *key1, const void *key2); + void (*keyDestructor)(void *privdata, void *key); + void (*valDestructor)(void *privdata, void *obj); +} dictType; + +typedef struct dict { + dictEntry **table; + dictType *type; + unsigned long size; + unsigned long sizemask; + unsigned long used; + void *privdata; +} dict; + +typedef struct dictIterator { + dict *ht; + int index; + dictEntry *entry, *nextEntry; +} dictIterator; + +/* This is the initial size of every hash table */ +#define DICT_HT_INITIAL_SIZE 4 + +/* ------------------------------- Macros ------------------------------------*/ +#define dictFreeEntryVal(ht, entry) \ + if ((ht)->type->valDestructor) \ + (ht)->type->valDestructor((ht)->privdata, (entry)->val) + +#define dictSetHashVal(ht, entry, _val_) do { \ + if ((ht)->type->valDup) \ + entry->val = (ht)->type->valDup((ht)->privdata, _val_); \ + else \ + entry->val = (_val_); \ +} while(0) + +#define dictFreeEntryKey(ht, entry) \ + if ((ht)->type->keyDestructor) \ + (ht)->type->keyDestructor((ht)->privdata, (entry)->key) + +#define dictSetHashKey(ht, entry, _key_) do { \ + if ((ht)->type->keyDup) \ + entry->key = (ht)->type->keyDup((ht)->privdata, _key_); \ + else \ + entry->key = (_key_); \ +} while(0) + +#define dictCompareHashKeys(ht, key1, key2) \ + (((ht)->type->keyCompare) ? \ + (ht)->type->keyCompare((ht)->privdata, key1, key2) : \ + (key1) == (key2)) + +#define dictHashKey(ht, key) (ht)->type->hashFunction(key) + +#define dictGetEntryKey(he) ((he)->key) +#define dictGetEntryVal(he) ((he)->val) +#define dictSlots(ht) ((ht)->size) +#define dictSize(ht) ((ht)->used) + +/* API */ +dict *dictCreate(dictType *type, void *privDataPtr); +int dictExpand(dict *ht, unsigned long size); +int dictAdd(dict *ht, void *key, void *val); +int dictReplace(dict *ht, void *key, void *val); +int dictDelete(dict *ht, const void *key); +int dictDeleteNoFree(dict *ht, const void *key); +void dictRelease(dict *ht); +dictEntry * dictFind(dict *ht, const void *key); +int dictResize(dict *ht); +dictIterator *dictGetIterator(dict *ht); +dictEntry *dictNext(dictIterator *iter); +void dictReleaseIterator(dictIterator *iter); +dictEntry *dictGetRandomKey(dict *ht); +void dictPrintStats(dict *ht); +unsigned int dictGenHashFunction(const unsigned char *buf, int len); +void dictEmpty(dict *ht); + +/* Hash table types */ +extern dictType dictTypeHeapStringCopyKey; +extern dictType dictTypeHeapStrings; +extern dictType dictTypeHeapStringCopyKeyValue; + +#endif /* __DICT_H */