blob: c3b7ff4eb25b4ebb8bcc6a68d1f75141d6583617 [file] [log] [blame] [raw]
#ifndef H_FIO_HASH_TABLE_H
/**
The facil.io hash table imeplementation tries to balance performance with memory
footprint and collision risks.
It might be a good implementation and it might suck. Test it out and descide for
yourself.
*/
#define H_FIO_HASH_TABLE_H
#include "fio_list.h"
#include <stdint.h>
#include <stdlib.h>
/* *****************************************************************************
Data Types and API
***************************************************************************** */
#define FIO_FUNC static __attribute__((unused))
typedef struct {
fio_list_s items;
uint64_t count;
uint64_t bin_count;
uint64_t mask;
struct fio_list_s *bins;
} fio_ht_s;
/** Used to initialize an empty `fio_ht_s`. */
#define FIO_HASH_TABLE_INIT(name) \
(fio_ht_s) { .items = FIO_LIST_INIT_STATIC(name.items) }
/** Used to initialize an empty `fio_ht_s`. */
#define FIO_HASH_TABLE_STATIC(name) \
{ .items = FIO_LIST_INIT_STATIC(name.items) }
typedef struct {
fio_list_s items;
fio_list_s siblings;
fio_ht_s *parent;
uint64_t hash;
} fio_ht_node_s;
#ifndef fio_node2obj
/** Takes a node pointer (list/hash/dict, etc') and returns it's container. */
#define fio_node2obj(type, member, ptr) \
((type *)((uintptr_t)(ptr) - (uintptr_t)(&(((type *)0)->member))))
#endif
/** Takes a list pointer and returns a pointer to it's container. */
#define fio_ht_object(type, member, pht) fio_node2obj(type, member, (pht))
/** A simple SipHash2/4 function implementation that can be used for hashing. */
FIO_FUNC uint64_t fio_ht_hash(const void *data, size_t len);
/** A simple SipHash2/4 function for when a string's length is unknown. */
FIO_FUNC uint64_t fio_ht_hash_cstr(const void *data);
/**
* Adds a new item to the hash table.
*
* If a hash collision occurs, the old item is replaced and returned.
*/
inline FIO_FUNC fio_ht_node_s *fio_ht_add(fio_ht_s *table, fio_ht_node_s *item,
uint64_t hash_value);
/** Finds an item in the hash table. */
inline FIO_FUNC fio_ht_node_s *fio_ht_find(fio_ht_s *table,
uint64_t hash_value);
/** Removes the item from the hash table, returning the removed item. */
inline FIO_FUNC fio_ht_node_s *fio_ht_remove(fio_ht_node_s *item);
/** Finds, removes and returns the matching item from the hash table. */
inline FIO_FUNC fio_ht_node_s *fio_ht_pop(fio_ht_s *table, uint64_t hash);
/**
* Re-hashes the hash table. This is automatically when growth is required.
*
* `bin_count` MUST be a power of 2 or 0 (0,1,2,4,8,16,32,64,128...).
*
* If `bin_count`, the hash table's memory is released and it will behave like a
* list (until an item is added).
*/
inline FIO_FUNC void fio_ht_rehash(fio_ht_s *table, uint64_t bin_count);
/** iterates through all Hash Table members. */
#define fio_ht_for_each(type, member, var, table) \
fio_list_for_each(type, member.items, var, (table).items)
/**
* Frees any internal memory used by the hash table.
*
* This soes **NOT** free any items in the table or the table object itself.
*/
#define fio_ht_free(table) fio_ht_rehash(table, 0)
/* *****************************************************************************
Implementations
***************************************************************************** */
/** Adds a new item to the hash table. If a hash collision occurs, the old item
* is removed and replaced. */
inline FIO_FUNC fio_ht_node_s *fio_ht_add(fio_ht_s *table, fio_ht_node_s *item,
uint64_t hash) {
fio_ht_node_s *old = fio_ht_find(table, hash);
if (old == item)
return NULL;
/* initialize item */
item->parent = table;
item->hash = hash;
if (old)
goto found_old;
fio_list_add(table->items.prev, &item->items);
table->count++;
if (table->count >= ((table->bin_count >> 2) * 3)) {
fio_ht_rehash(table, (table->bin_count ? (table->bin_count << 1) : 32));
return NULL;
}
fio_list_add(table->bins[item->hash & table->mask].prev, &item->siblings);
return NULL;
found_old:
/* replace inplace. */
*item = *old;
old->parent = NULL;
old->items = FIO_LIST_INIT(old->items);
old->siblings = FIO_LIST_INIT(old->siblings);
item->items.prev->next = &item->items;
item->items.next->prev = &item->items;
item->siblings.prev->next = &item->siblings;
item->siblings.next->prev = &item->siblings;
return old;
}
/** Finds an item in the hash table. */
inline FIO_FUNC fio_ht_node_s *fio_ht_find(fio_ht_s *table,
uint64_t hash_value) {
fio_ht_node_s *item;
if (!table || !table->bins)
return NULL;
fio_list_for_each(fio_ht_node_s, siblings, item,
table->bins[hash_value & (table->mask)]) {
if (hash_value == item->hash) {
return item;
}
}
return NULL;
}
/** Removes an item to the hash table. */
inline FIO_FUNC fio_ht_node_s *fio_ht_remove(fio_ht_node_s *item) {
if (!item || !item->parent)
return item;
fio_list_remove(&item->siblings);
fio_list_remove(&item->items);
item->parent->count--;
/* ** memory shrinkage? not really... ** */
/*
if (item->parent->bin_count > 1024 &&
(item->parent->bin_count >> 3) >= item->parent->count)
fio_ht_rehash(item->parent, item->parent->bin_count >> 2);
*/
item->parent = NULL;
return item;
}
/** Finds, removes and returns the matching item from the hash table. */
inline FIO_FUNC fio_ht_node_s *fio_ht_pop(fio_ht_s *table, uint64_t hash) {
fio_ht_node_s *item = fio_ht_find(table, hash);
if (!item)
return NULL;
fio_ht_remove(item);
return item;
}
/** Re-hashes the hash table. This is usually automatically called. */
inline FIO_FUNC void fio_ht_rehash(fio_ht_s *table, uint64_t bin_count) {
if (!table)
return;
if (!bin_count) {
if (table->bins) {
free(table->bins);
table->bins = NULL;
}
return;
}
void *mem = realloc(table->bins, bin_count * sizeof(*table->bins));
if (!mem)
return;
table->bin_count = bin_count;
table->bins = mem;
table->mask = bin_count - 1;
while (bin_count) {
bin_count--;
table->bins[bin_count] = FIO_LIST_INIT(table->bins[bin_count]);
}
fio_ht_node_s *item;
fio_list_for_each(fio_ht_node_s, items, item, table->items) {
fio_list_add(table->bins[item->hash & table->mask].prev, &item->siblings);
}
}
/* *****************************************************************************
The Hash Function
***************************************************************************** */
#if !defined(__BIG_ENDIAN__) && !defined(__LITTLE_ENDIAN__) && \
__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
/* the algorithm was designed as little endian... so, byte swap 64 bit. */
#define sip_local64(i) \
(((i)&0xFFULL) << 56) | (((i)&0xFF00ULL) << 40) | \
(((i)&0xFF0000ULL) << 24) | (((i)&0xFF000000ULL) << 8) | \
(((i)&0xFF00000000ULL) >> 8) | (((i)&0xFF0000000000ULL) >> 24) | \
(((i)&0xFF000000000000ULL) >> 40) | (((i)&0xFF00000000000000ULL) >> 56)
#else
/* no need */
#define sip_local64(i) (i)
#endif
/* 64Bit left rotation, inlined. */
#define lrot64(i, bits) \
(((uint64_t)(i) << (bits)) | ((uint64_t)(i) >> (64 - (bits))))
FIO_FUNC uint64_t fio_ht_hash(const void *data, size_t len) {
/* initialize the 4 words */
uint64_t v0 = (0x0706050403020100ULL ^ 0x736f6d6570736575ULL);
uint64_t v1 = (0x0f0e0d0c0b0a0908ULL ^ 0x646f72616e646f6dULL);
uint64_t v2 = (0x0706050403020100ULL ^ 0x6c7967656e657261ULL);
uint64_t v3 = (0x0f0e0d0c0b0a0908ULL ^ 0x7465646279746573ULL);
const uint64_t *w64 = data;
uint8_t len_mod = len & 255;
union {
uint64_t i;
uint8_t str[8];
} word;
#define hash_map_SipRound \
do { \
v2 += v3; \
v3 = lrot64(v3, 16) ^ v2; \
v0 += v1; \
v1 = lrot64(v1, 13) ^ v0; \
v0 = lrot64(v0, 32); \
v2 += v1; \
v0 += v3; \
v1 = lrot64(v1, 17) ^ v2; \
v3 = lrot64(v3, 21) ^ v0; \
v2 = lrot64(v2, 32); \
} while (0);
while (len >= 8) {
word.i = sip_local64(*w64);
v3 ^= word.i;
/* Sip Rounds */
hash_map_SipRound;
hash_map_SipRound;
v0 ^= word.i;
w64 += 1;
len -= 8;
}
word.i = 0;
uint8_t *pos = word.str;
uint8_t *w8 = (void *)w64;
switch (len) { /* fallthrough is intentional */
case 7:
pos[6] = w8[6];
case 6:
pos[5] = w8[5];
case 5:
pos[4] = w8[4];
case 4:
pos[3] = w8[3];
case 3:
pos[2] = w8[2];
case 2:
pos[1] = w8[1];
case 1:
pos[0] = w8[0];
}
word.str[7] = len_mod;
/* last round */
v3 ^= word.i;
hash_map_SipRound;
hash_map_SipRound;
v0 ^= word.i;
/* Finalization */
v2 ^= 0xff;
/* d iterations of SipRound */
hash_map_SipRound;
hash_map_SipRound;
hash_map_SipRound;
hash_map_SipRound;
/* XOR it all together */
v0 ^= v1 ^ v2 ^ v3;
#undef hash_map_SipRound
return v0;
}
FIO_FUNC uint64_t fio_ht_hash_cstr(const void *data) {
/* initialize the 4 words */
uint64_t v0 = (0x0706050403020100ULL ^ 0x736f6d6570736575ULL);
uint64_t v1 = (0x0f0e0d0c0b0a0908ULL ^ 0x646f72616e646f6dULL);
uint64_t v2 = (0x0706050403020100ULL ^ 0x6c7967656e657261ULL);
uint64_t v3 = (0x0f0e0d0c0b0a0908ULL ^ 0x7465646279746573ULL);
const uint64_t *w64 = data;
uint8_t len = 0;
union {
uint64_t i;
uint8_t str[8];
} word;
#define hash_map_SipRound \
do { \
v2 += v3; \
v3 = lrot64(v3, 16) ^ v2; \
v0 += v1; \
v1 = lrot64(v1, 13) ^ v0; \
v0 = lrot64(v0, 32); \
v2 += v1; \
v0 += v3; \
v1 = lrot64(v1, 17) ^ v2; \
v3 = lrot64(v3, 21) ^ v0; \
v2 = lrot64(v2, 32); \
} while (0);
while ((*w64 & 0xFFULL) && (*w64 & 0xFF00ULL) && (*w64 & 0xFF0000ULL) &&
(*w64 & 0xFF000000ULL) && (*w64 & 0xFF00000000ULL) &&
(*w64 & 0xFF0000000000ULL) && (*w64 & 0xFF000000000000ULL) &&
(*w64 & 0xFF00000000000000ULL)) {
word.i = sip_local64(*w64);
v3 ^= word.i;
/* Sip Rounds */
hash_map_SipRound;
hash_map_SipRound;
v0 ^= word.i;
w64 += 1;
len += 8;
}
word.i = 0;
uint8_t *pos = word.str;
uint8_t *w8 = (void *)w64;
while (*w8) {
*(pos++) = *(w8++);
len++;
}
word.str[7] = len & 255;
/* last round */
v3 ^= word.i;
hash_map_SipRound;
hash_map_SipRound;
v0 ^= word.i;
/* Finalization */
v2 ^= 0xff;
/* d iterations of SipRound */
hash_map_SipRound;
hash_map_SipRound;
hash_map_SipRound;
hash_map_SipRound;
/* XOR it all together */
v0 ^= v1 ^ v2 ^ v3;
#undef hash_map_SipRound
return v0;
}
#undef sip_local64
#undef lrot64
#undef FIO_FUNC
#endif