This implementation of cuckoo hashing gives guaranteed O(1) lookup complexity and amortized O(1) insert and remove complexity. Hash tables by default grow and shrink automatically. It is posible to preallocate entries and turn automatic shrinking off, taking out the memory management factors for insert and remove operations.
497 lines
11 KiB
C
497 lines
11 KiB
C
/*
|
|
A cuckoo hash implementation
|
|
|
|
Copyright (C)
|
|
2008, Christian Thaeter <ct@pipapo.org>
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
*/
|
|
|
|
#include "cuckoo.h"
|
|
|
|
#include <string.h>
|
|
|
|
enum compact_state
|
|
{
|
|
COMPACTING_OFF,
|
|
COMPACTING_AUTO,
|
|
COMPACTED
|
|
};
|
|
|
|
struct cuckoo_struct
|
|
{
|
|
size_t size; /* t1 = 4*size; t2 = 2*size; t3 = size */
|
|
size_t itemsize;
|
|
|
|
cuckoo_hashfunc h1; /* hash function */
|
|
uint32_t r1; /* random, reset for each rehash */
|
|
cuckoo_hashfunc h2;
|
|
uint32_t r2;
|
|
cuckoo_hashfunc h3;
|
|
uint32_t r3;
|
|
|
|
cuckoo_cmpfunc cmp;
|
|
|
|
void* t1;
|
|
void* t2;
|
|
void* t3;
|
|
|
|
unsigned maxloops; /* sqrt (4 * size) */
|
|
|
|
enum compact_state autocompact;
|
|
size_t elements;
|
|
};
|
|
|
|
|
|
static inline uint32_t cuckoo_fast_prng ()
|
|
{
|
|
static uint32_t rnd = 0xbabeface;
|
|
return rnd = rnd<<1 ^ ((rnd>>30) & 1) ^ ((rnd>>2) & 1);
|
|
}
|
|
|
|
Cuckoo
|
|
cuckoo_init (Cuckoo self,
|
|
cuckoo_hashfunc h1,
|
|
cuckoo_hashfunc h2,
|
|
cuckoo_hashfunc h3,
|
|
cuckoo_cmpfunc cmp,
|
|
size_t itemsize,
|
|
unsigned startsize)
|
|
{
|
|
if (!self)
|
|
return NULL;
|
|
|
|
self->size = 1<<startsize;
|
|
self->itemsize = itemsize;
|
|
self->h1 = h1;
|
|
self->r1 = cuckoo_fast_prng ();
|
|
self->h2 = h2;
|
|
self->r2 = cuckoo_fast_prng ();
|
|
self->h3 = h3;
|
|
self->r3 = cuckoo_fast_prng ();
|
|
|
|
self->cmp = cmp;
|
|
|
|
self->t1 = calloc (self->size * 4, itemsize);
|
|
self->t2 = calloc (self->size * 2, itemsize);
|
|
self->t3 = calloc (self->size, itemsize);
|
|
if (!self->t1 || !self->t2 || !self->t3)
|
|
{
|
|
free (self->t1);
|
|
free (self->t2);
|
|
free (self->t3);
|
|
return NULL;
|
|
}
|
|
|
|
self->maxloops = 1;
|
|
while (self->maxloops * self->maxloops < self->size * 4)
|
|
++self->maxloops;
|
|
|
|
self->autocompact = COMPACTING_AUTO;
|
|
self->elements = 0;
|
|
return self;
|
|
}
|
|
|
|
Cuckoo
|
|
cuckoo_new (cuckoo_hashfunc h1,
|
|
cuckoo_hashfunc h2,
|
|
cuckoo_hashfunc h3,
|
|
cuckoo_cmpfunc cmp,
|
|
size_t itemsize,
|
|
unsigned startsize)
|
|
{
|
|
Cuckoo self = malloc (sizeof (struct cuckoo_struct));
|
|
if (!cuckoo_init (self, h1, h2, h3, cmp, itemsize, startsize))
|
|
{
|
|
free (self);
|
|
return NULL;
|
|
}
|
|
return self;
|
|
}
|
|
|
|
Cuckoo
|
|
cuckoo_destroy (Cuckoo self)
|
|
{
|
|
if (self)
|
|
{
|
|
free (self->t1);
|
|
free (self->t2);
|
|
free (self->t3);
|
|
}
|
|
return self;
|
|
}
|
|
|
|
|
|
void
|
|
cuckoo_free (Cuckoo self)
|
|
{
|
|
free (cuckoo_destroy (self));
|
|
}
|
|
|
|
|
|
static inline int
|
|
iszero (void* mem, size_t size)
|
|
{
|
|
while (size && !*(int*)mem)
|
|
{
|
|
size -= sizeof (int);
|
|
mem += sizeof (int);
|
|
}
|
|
return !size;
|
|
}
|
|
|
|
static inline void
|
|
xmemcpy (void* dst, void* src, size_t size)
|
|
{
|
|
while (size)
|
|
{
|
|
size -= sizeof (int);
|
|
*(int*)(dst + size) = *(int*)(src + size);
|
|
}
|
|
}
|
|
|
|
|
|
static int
|
|
cuckoo_insert_internal_ (Cuckoo self, void* item)
|
|
{
|
|
void* pos;
|
|
char tmp[self->itemsize];
|
|
|
|
for (unsigned n = 0; n < self->maxloops; ++n)
|
|
{
|
|
/* find nest */
|
|
pos = self->t1 + self->itemsize * (self->h1 (item, self->r1) % (4*self->size));
|
|
/* kick old egg out */
|
|
xmemcpy (tmp, pos, self->itemsize);
|
|
/* lay egg */
|
|
xmemcpy (pos, item, self->itemsize);
|
|
|
|
if (iszero (tmp, self->itemsize))
|
|
return 1;
|
|
|
|
/* find nest */
|
|
pos = self->t2 + self->itemsize * (self->h2 (tmp, self->r2) % (2*self->size));
|
|
/* kick old egg out */
|
|
xmemcpy (item, pos, self->itemsize);
|
|
/* lay egg */
|
|
xmemcpy (pos, tmp, self->itemsize);
|
|
|
|
if (iszero (item, self->itemsize))
|
|
return 1;
|
|
|
|
/* find nest */
|
|
pos = self->t3 + self->itemsize * (self->h3 (item, self->r3) % self->size);
|
|
/* kick old egg out */
|
|
xmemcpy (tmp, pos, self->itemsize);
|
|
/* lay egg */
|
|
xmemcpy (pos, item, self->itemsize);
|
|
|
|
if (iszero (tmp, self->itemsize))
|
|
return 1;
|
|
|
|
/* copy tmp to item, which will be reinserted on next interation / after rehashing */
|
|
xmemcpy (item, tmp, self->itemsize);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void
|
|
cuckoo_rehash (Cuckoo self)
|
|
{
|
|
retry1:
|
|
|
|
self->r1 = cuckoo_fast_prng ();
|
|
|
|
for (size_t i = 0; i < 4*self->size; ++i)
|
|
{
|
|
unsigned n;
|
|
void* pos = self->t1 + self->itemsize * i;
|
|
if (!iszero (pos, self->itemsize))
|
|
{
|
|
for (n = 0; n < self->maxloops; ++n)
|
|
{
|
|
unsigned hash = self->h1 (pos, self->r1) % (4*self->size);
|
|
if (hash != i)
|
|
{
|
|
char t[self->itemsize];
|
|
void* hpos = self->t1 + self->itemsize * hash;
|
|
xmemcpy (t, hpos, self->itemsize);
|
|
xmemcpy (hpos, pos, self->itemsize);
|
|
xmemcpy (pos, t, self->itemsize);
|
|
if (iszero (t, self->itemsize))
|
|
break;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
if (n == self->maxloops)
|
|
goto retry1;
|
|
}
|
|
}
|
|
|
|
retry2:
|
|
self->r2 = cuckoo_fast_prng ();
|
|
|
|
for (size_t i = 0; i < 2*self->size; ++i)
|
|
{
|
|
unsigned n;
|
|
void* pos = self->t2 + self->itemsize * i;
|
|
if (!iszero (pos, self->itemsize))
|
|
{
|
|
for (n = 0; n < self->maxloops; ++n)
|
|
{
|
|
unsigned hash = self->h2 (pos, self->r2) % (2*self->size);
|
|
if (hash != i)
|
|
{
|
|
char t[self->itemsize];
|
|
void* hpos = self->t2 + self->itemsize * hash;
|
|
xmemcpy (t, hpos, self->itemsize);
|
|
xmemcpy (hpos, pos, self->itemsize);
|
|
xmemcpy (pos, t, self->itemsize);
|
|
if (iszero (t, self->itemsize))
|
|
break;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
if (n == self->maxloops)
|
|
goto retry2;
|
|
}
|
|
}
|
|
|
|
retry3:
|
|
self->r3 = cuckoo_fast_prng ();
|
|
|
|
for (size_t i = 0; i < self->size; ++i)
|
|
{
|
|
unsigned n;
|
|
void* pos = self->t3 + self->itemsize * i;
|
|
if (!iszero (pos, self->itemsize))
|
|
{
|
|
for (n = 0; n < self->maxloops; ++n)
|
|
{
|
|
unsigned hash = self->h3 (pos, self->r3) % self->size;
|
|
if (hash != i)
|
|
{
|
|
char t[self->itemsize];
|
|
void* hpos = self->t3 + self->itemsize * hash;
|
|
xmemcpy (t, hpos, self->itemsize);
|
|
xmemcpy (hpos, pos, self->itemsize);
|
|
xmemcpy (pos, t, self->itemsize);
|
|
if (iszero (t, self->itemsize))
|
|
break;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
if (n == self->maxloops)
|
|
goto retry3;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int
|
|
cuckoo_grow (Cuckoo self)
|
|
{
|
|
/* rotate hashfuncs, tables, randoms */
|
|
cuckoo_hashfunc th = self->h3;
|
|
self->h3 = self->h2;
|
|
self->h2 = self->h1;
|
|
self->h1 = th;
|
|
|
|
uint32_t tr = self->r3;
|
|
self->r3 = self->r2;
|
|
self->r2 = self->r1;
|
|
self->r1 = tr;
|
|
|
|
void* tt = self->t3;
|
|
self->t3 = self->t2;
|
|
self->t2 = self->t1;
|
|
|
|
/* double new base size */
|
|
self->size *= 2;
|
|
while (self->maxloops * self->maxloops < self->size * 4)
|
|
++self->maxloops;
|
|
|
|
/* alloc new t1 */
|
|
self->t1 = calloc (self->size * 4, self->itemsize);
|
|
if (!self->t1)
|
|
{
|
|
self->t1 = tt;
|
|
return 0;
|
|
}
|
|
|
|
/* reinsert tt */
|
|
size_t ttsize = self->size / 2;
|
|
for (size_t i = 0; i < ttsize; ++i)
|
|
{
|
|
void* pos = tt + i * self->itemsize;
|
|
if (!iszero (pos, self->itemsize))
|
|
{
|
|
while (!cuckoo_insert_internal_ (self, pos))
|
|
cuckoo_rehash (self);
|
|
}
|
|
}
|
|
free (tt);
|
|
|
|
self->autocompact = COMPACTING_AUTO;
|
|
return 1;
|
|
}
|
|
|
|
|
|
int
|
|
cuckoo_reserve (Cuckoo self, size_t more)
|
|
{
|
|
int ret = 1;
|
|
if (more)
|
|
while (self->elements+self->maxloops+more >= 6*self->size)
|
|
ret = cuckoo_grow (self);
|
|
|
|
self->autocompact = COMPACTING_OFF;
|
|
return ret;
|
|
}
|
|
|
|
|
|
int
|
|
cuckoo_compact (Cuckoo self)
|
|
{
|
|
if (self->autocompact == COMPACTED)
|
|
return 1;
|
|
|
|
if (self->size > 2 && self->elements < self->size * 3)
|
|
{
|
|
cuckoo_hashfunc th = self->h1;
|
|
self->h1 = self->h2;
|
|
self->h2 = self->h3;
|
|
self->h3 = th;
|
|
|
|
uint32_t tr = self->r1;
|
|
self->r1 = self->r2;
|
|
self->r2 = self->r3;
|
|
self->r3 = tr;
|
|
|
|
void* tt = self->t1;
|
|
self->t1 = self->t2;
|
|
self->t2 = self->t3;
|
|
|
|
/* halve base size */
|
|
self->size /= 2;
|
|
while (self->maxloops * self->maxloops >= self->size * 4)
|
|
--self->maxloops;
|
|
|
|
/* alloc new t3 */
|
|
self->t3 = calloc (self->size, self->itemsize);
|
|
if (!self->t3)
|
|
{
|
|
self->t3 = tt;
|
|
return 0;
|
|
}
|
|
|
|
/* reinsert tt */
|
|
size_t ttsize = self->size * 8;
|
|
for (size_t i = 0; i < ttsize; ++i)
|
|
{
|
|
void* pos = tt + i * self->itemsize;
|
|
if (!iszero (pos, self->itemsize))
|
|
{
|
|
--self->elements;
|
|
cuckoo_insert (self, pos);
|
|
}
|
|
}
|
|
free (tt);
|
|
self->autocompact = COMPACTED;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
|
|
int
|
|
cuckoo_insert (Cuckoo self, void* item)
|
|
{
|
|
char tmp[self->itemsize];
|
|
|
|
void* found;
|
|
if ((found = cuckoo_find (self, item)))
|
|
{
|
|
xmemcpy (found, item, self->itemsize);
|
|
return 1;
|
|
}
|
|
|
|
xmemcpy (tmp, item, self->itemsize);
|
|
|
|
for (unsigned n = 6; n; --n) /* rehash/grow loop */
|
|
{
|
|
if (cuckoo_insert_internal_ (self, tmp))
|
|
{
|
|
++self->elements;
|
|
return 1;
|
|
}
|
|
|
|
if (self->elements > n*self->size)
|
|
{
|
|
n = 6;
|
|
if (!cuckoo_grow (self))
|
|
return 0;
|
|
}
|
|
else
|
|
cuckoo_rehash (self);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
void*
|
|
cuckoo_find (Cuckoo self, void* item)
|
|
{
|
|
void* pos;
|
|
|
|
pos = self->t1 + self->itemsize * (self->h1 (item, self->r1) % (4*self->size));
|
|
if (!iszero (pos, self->itemsize) && self->cmp (item, pos))
|
|
return pos;
|
|
|
|
pos = self->t2 + self->itemsize * (self->h2 (item, self->r2) % (2*self->size));
|
|
if (!iszero (pos, self->itemsize) && self->cmp (item, pos))
|
|
return pos;
|
|
|
|
pos = self->t3 + self->itemsize * (self->h3 (item, self->r3) % self->size);
|
|
if (!iszero (pos, self->itemsize) && self->cmp (item, pos))
|
|
return pos;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
void
|
|
cuckoo_remove (Cuckoo self, void* item)
|
|
{
|
|
if (item)
|
|
{
|
|
memset (item, 0, self->itemsize);
|
|
--self->elements;
|
|
|
|
if (self->autocompact == COMPACTING_AUTO && self->size > 2 && self->elements <= self->size*2)
|
|
cuckoo_compact (self);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
// Local Variables:
|
|
// mode: C
|
|
// c-file-style: "gnu"
|
|
// indent-tabs-mode: nil
|
|
// End:
|
|
*/
|