From cc5d22993b653e6ce85c0b3d83702e2c2ed7163f Mon Sep 17 00:00:00 2001 From: Stephan Soller Date: Thu, 7 Apr 2016 20:36:15 +0200 Subject: [PATCH] Added first version of a simple hashmap with fixed types. --- .gitignore | 3 +- Makefile | 1 + slim_hash.h | 301 +++++++++++++++++++++++++++++++++++++++++ tests/slim_hash_test.c | 238 ++++++++++++++++++++++++++++++++ 4 files changed, 542 insertions(+), 1 deletion(-) create mode 100644 slim_hash.h create mode 100644 tests/slim_hash_test.c diff --git a/.gitignore b/.gitignore index c2388c9..1b6eb75 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ tests/slim_test_crashtest -tests/math_3d_test \ No newline at end of file +tests/math_3d_test +tests/slim_hash_test \ No newline at end of file diff --git a/Makefile b/Makefile index bbc1a5d..f8754c6 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,7 @@ all: $(TESTS) tests/slim_test_crashtest.c: slim_test.h tests/math_3d_test: math_3d.h slim_test.h tests/math_3d_test: LDLIBS += -lm +tests/slim_hash_test: slim_hash.h slim_test.h # Clean all files in the .gitignore list, ensures that the ignore file diff --git a/slim_hash.h b/slim_hash.h new file mode 100644 index 0000000..df49d12 --- /dev/null +++ b/slim_hash.h @@ -0,0 +1,301 @@ +#ifndef SLIM_HASH_HEADER +#define SLIM_HASH_HEADER + +#include +#include +#include +#include + +/** + +bool sh_del(hash, key); // ture if value was found and deleted, false if not found + +for(sh_it_p it = sh_start(hash); it != NULL; it = sh_next(it)) { + int64_t key = sh_key(it); + int value = sh_value(it); + sh_remove(it); +} + +int* sh_put_ptr(sh_hash_p hash, int64_t key); // reserve new slot, returns ptr to value +int* sh_get_ptr(sh_hash_p hash, int64_t key); // returns ptr to value or NULL if not in hash + +sh_it_p sh_start(sh_hash_p hash); // iterator +sh_it_p sh_next(sh_hash_p hash, sh_it_p it); // it of next set elem +void sh_remove(sh_hash_p hash, sh_it_p it); // removes current elem from hash (during iteration!) + + +**/ + +/* + +prefix sh_ dict_ +key_type int64_t const char* +value_type int void* +hash_expr sh_murmur3_32(&k, sizeof(k)) sh_murmur3_32(k, strlen(k)) +key_cmp_expr (a == b) strcmp(a, b) == 0 +key_add_expr k strdup(k) +key_del_expr 0 (free(k), NULL) + +SH_GEN_DECL("dict_", const char*, void*) +SH_GEN_DEF("dict_", const char*, void*, sh_murmur3_32(k, strlen(k)), strcmp(a, b) == 0, strdup(k), (free(k), NULL)) +SH_GEN_DICT_DEF("dict_", void*) +SH_GEN_HASH_DEF("con_list_", int32_t, void*) + +*/ + +// example hash(int64_t → int) + +// +// Data +// + +typedef struct { + uint32_t hash_and_flags; + int64_t key; + int value; +} sh_slot_t, *sh_slot_p, *sh_it_p; + +typedef struct { + uint32_t length, capacity, deleted; + sh_slot_p slots; +} sh_hash_t, *sh_hash_p; + +#define SH_SLOT_FREE 0x00000000 +#define SH_SLOT_DELETED 0x00000001 + + +// +// API +// + +void sh_new(sh_hash_p hash); +void sh_destroy(sh_hash_p hash); +void sh_optimize(sh_hash_p hash); + +void sh_put(sh_hash_p hash, int64_t key, int value); +int sh_get(sh_hash_p hash, int64_t key, int default_value); +bool sh_del(sh_hash_p hash, int64_t key); +bool sh_contains(sh_hash_p hash, int64_t key); + +int* sh_put_ptr(sh_hash_p hash, int64_t key); +int* sh_get_ptr(sh_hash_p hash, int64_t key); + +sh_it_p sh_start(sh_hash_p hash); +sh_it_p sh_next(sh_hash_p hash, sh_it_p it); +void sh_remove(sh_hash_p hash, sh_it_p it); + +#endif // SLIM_HASH_HEADER + + + +#ifdef SLIM_HASH_IMPLEMENTATION + + +/** + * Get 32-bit Murmur3 hash of a memory block. Taken from + * https://github.com/wolkykim/qlibc/blob/master/src/utilities/qhash.c + * + * MurmurHash3 was created by Austin Appleby in 2008. The initial implementation + * was published in C++ and placed in the public: https://sites.google.com/site/murmurhash/ + * + * Seungyoung Kim has ported its implementation into C language in 2012 and + * published it as a part of qLibc component. + */ +uint32_t sh_murmur3_32(const void *data, size_t size) { + if (data == NULL || size == 0) + return 0; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + const int nblocks = size / 4; + const uint32_t *blocks = (const uint32_t *) (data); + const uint8_t *tail = (const uint8_t *) (data + (nblocks * 4)); + + uint32_t h = 0; + + int i; + uint32_t k; + for (i = 0; i < nblocks; i++) { + k = blocks[i]; + + k *= c1; + k = (k << 15) | (k >> (32 - 15)); + k *= c2; + + h ^= k; + h = (h << 13) | (h >> (32 - 13)); + h = (h * 5) + 0xe6546b64; + } + + k = 0; + switch (size & 3) { + case 3: + k ^= tail[2] << 16; + case 2: + k ^= tail[1] << 8; + case 1: + k ^= tail[0]; + k *= c1; + k = (k << 15) | (k >> (32 - 15)); + k *= c2; + h ^= k; + }; + + h ^= size; + + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + + +bool sh_resize(sh_hash_p hashmap, size_t new_capacity) { + // on filling empty slot: if exceeding load factor, double capacity + // on deleting slot: if to empty, half capacity + + // Can't make hashmap smaller than it needs to be + if (new_capacity < hashmap->length) + return false; + + sh_hash_t new_hashmap; + new_hashmap.length = 0; + new_hashmap.capacity = new_capacity; + new_hashmap.deleted = 0; + new_hashmap.slots = calloc(new_hashmap.capacity, sizeof(new_hashmap.slots[0])); + + // Failed to allocate memory for new hash map, leave the original untouched + if (new_hashmap.slots == NULL) + return false; + + for(sh_it_p it = sh_start(hashmap); it != NULL; it = sh_next(hashmap, it)) { + sh_put(&new_hashmap, it->key, it->value); + } + + free(hashmap->slots); + *hashmap = new_hashmap; + return true; +} + +void sh_new(sh_hash_p hashmap) { + hashmap->length = 0; + hashmap->capacity = 0; + hashmap->deleted = 0; + hashmap->slots = NULL; + sh_resize(hashmap, 8); +} + +void sh_destroy(sh_hash_p hashmap) { + hashmap->length = 0; + hashmap->capacity = 0; + hashmap->deleted = 0; + free(hashmap->slots); + hashmap->slots = NULL; +} + + +int* sh_put_ptr(sh_hash_p hashmap, int64_t key) { + // add the +1 to the capacity doubling to avoid beeing stuck on a capacity of 0 + if (hashmap->length + hashmap->deleted + 1 > hashmap->capacity * 0.5) + sh_resize(hashmap, (hashmap->capacity + 1) * 2); + + uint32_t hash = sh_murmur3_32(&key, sizeof(key)) | 0x80000000; + size_t index = hash % hashmap->capacity; + while ( !(hashmap->slots[index].hash_and_flags == SH_SLOT_FREE || hashmap->slots[index].hash_and_flags == SH_SLOT_DELETED) ) { + index = (index + 1) % hashmap->capacity; + } + + if (hashmap->slots[index].hash_and_flags == SH_SLOT_DELETED) + hashmap->deleted--; + hashmap->length++; + hashmap->slots[index].hash_and_flags = hash; + hashmap->slots[index].key = key; + return &hashmap->slots[index].value; +} + +int* sh_get_ptr(sh_hash_p hashmap, int64_t key) { + uint32_t hash = sh_murmur3_32(&key, sizeof(key)) | 0x80000000; + size_t index = hash % hashmap->capacity; + while ( !(hashmap->slots[index].hash_and_flags == SH_SLOT_FREE) ) { + if (hashmap->slots[index].hash_and_flags == hash && hashmap->slots[index].key == key) + return &hashmap->slots[index].value; + + index = (index + 1) % hashmap->capacity; + } + + return NULL; +} + +bool sh_del(sh_hash_p hashmap, int64_t key) { + uint32_t hash = sh_murmur3_32(&key, sizeof(key)) | 0x80000000; + size_t index = hash % hashmap->capacity; + while ( !(hashmap->slots[index].hash_and_flags == SH_SLOT_FREE) ) { + if (hashmap->slots[index].hash_and_flags == hash && hashmap->slots[index].key == key) { + hashmap->slots[index].hash_and_flags = SH_SLOT_DELETED; + hashmap->length--; + hashmap->deleted++; + + if (hashmap->length < hashmap->capacity * 0.2) + sh_resize(hashmap, hashmap->capacity / 2); + + return true; + } + + index = (index + 1) % hashmap->capacity; + } + + return false; +} + +void sh_put(sh_hash_p hashmap, int64_t key, int value) { + *sh_put_ptr(hashmap, key) = value; +} + +int sh_get(sh_hash_p hashmap, int64_t key, int default_value) { + int* value_ptr = sh_get_ptr(hashmap, key); + return (value_ptr) ? *value_ptr : default_value; +} + +bool sh_contains(sh_hash_p hashmap, int64_t key) { + return (sh_get_ptr(hashmap, key) != NULL); +} + +// Search for the first not-empty slot +sh_it_p sh_start(sh_hash_p hashmap) { + // We need to start at an invalid slot address since sh_next() increments it + // before it looks at it (so it's safe). + return sh_next(hashmap, hashmap->slots - 1); +} + +sh_it_p sh_next(sh_hash_p hashmap, sh_it_p it) { + if (it == NULL) + return NULL; + + do { + it++; + // Check if we're past the last slot + if (it - hashmap->slots >= hashmap->capacity) + return NULL; + } while( it->hash_and_flags == SH_SLOT_FREE || it->hash_and_flags == SH_SLOT_DELETED ); + + return it; +} + +void sh_remove(sh_hash_p hashmap, sh_it_p it) { + if (it != NULL && it >= hashmap->slots && it - hashmap->slots < hashmap->capacity) { + it->hash_and_flags = SH_SLOT_DELETED; + + hashmap->length--; + hashmap->deleted++; + } +} + +void sh_optimize(sh_hash_p hashmap) { + sh_resize(hashmap, hashmap->capacity); +} + +#endif // SLIM_HASH_IMPLEMENTATION \ No newline at end of file diff --git a/tests/slim_hash_test.c b/tests/slim_hash_test.c new file mode 100644 index 0000000..45b21f4 --- /dev/null +++ b/tests/slim_hash_test.c @@ -0,0 +1,238 @@ +#define SLIM_HASH_IMPLEMENTATION +#include "../slim_hash.h" +#define SLIM_TEST_IMPLEMENTATION +#include "../slim_test.h" + + + + + +void test_new_and_destroy() { + sh_hash_t hash; + + sh_new(&hash); + st_check_int(hash.length, 0); + + sh_destroy(&hash); + st_check_int(hash.length, 0); + st_check_int(hash.capacity, 0); + st_check_null(hash.slots); +} + +void test_put_ptr() { + sh_hash_t hash; + sh_new(&hash); + + int* ptr = sh_put_ptr(&hash, 174); + st_check_not_null(ptr); + st_check_not_null(hash.slots); + st_check(ptr >= &hash.slots[0].value && ptr <= &hash.slots[hash.capacity-1].value); + st_check_int(hash.length, 1); + + sh_destroy(&hash); +} + +void test_get_ptr() { + sh_hash_t hash; + sh_new(&hash); + + int* put_ptr = sh_put_ptr(&hash, 174); + st_check_not_null(put_ptr); + int* get_ptr = sh_get_ptr(&hash, 174); + st_check_not_null(get_ptr); + st_check(put_ptr == get_ptr); + + sh_destroy(&hash); +} + +void test_get_ptr_not_found() { + sh_hash_t hash; + sh_new(&hash); + + int* get_ptr = sh_get_ptr(&hash, 12345); + st_check_null(get_ptr); + + int* put_ptr = sh_put_ptr(&hash, 174); + st_check_not_null(put_ptr); + + get_ptr = sh_get_ptr(&hash, 12345); + st_check_null(get_ptr); + get_ptr = sh_get_ptr(&hash, 175); + st_check_null(get_ptr); + + sh_destroy(&hash); +} + +void test_del() { + sh_hash_t hash; + sh_new(&hash); + + int* put_ptr = sh_put_ptr(&hash, 174); + int* get_ptr = sh_get_ptr(&hash, 174); + st_check(put_ptr == get_ptr); + + bool found = sh_del(&hash, 174); + st_check_int(found, 1); + get_ptr = sh_get_ptr(&hash, 174); + st_check_null(get_ptr); + + found = sh_del(&hash, 174); + st_check_int(found, 0); + + sh_destroy(&hash); +} + +void test_get_and_put() { + sh_hash_t hash; + sh_new(&hash); + + sh_put(&hash, 1, 10); + int value = sh_get(&hash, 1, 0); + st_check_int(value, 10); + + value = sh_get(&hash, 999, 7); + st_check_int(value, 7); + + sh_destroy(&hash); +} + +void test_contains() { + sh_hash_t hash; + sh_new(&hash); + + sh_put(&hash, 1, 10); + bool inside = sh_contains(&hash, 1); + st_check_int(inside, 1); + + inside = sh_contains(&hash, 999); + st_check_int(inside, 0); + + sh_destroy(&hash); +} + +void test_iteration() { + sh_hash_t hash; + sh_new(&hash); + + // empty hash + sh_it_p it = sh_start(&hash); + st_check_null(it); + + // Make sure st_next() doesn't break with NULL as input + it = sh_next(&hash, it); + st_check_null(it); + + // Add test data + sh_put(&hash, 0, 10); + sh_put(&hash, 1, 20); + sh_put(&hash, 2, 30); + + // Do a full iteration + bool visited[3] = {0}; + int loop_counter = 0; + for(sh_it_p it = sh_start(&hash); it != NULL; it = sh_next(&hash, it)) { + st_check(it->key >= 0 && it->key <= 2); + st_check_int(it->value, (it->key + 1) * 10); + visited[it->key] = true; + loop_counter++; + } + st_check_int(loop_counter, 3); + st_check_int(visited[0], 1); + st_check_int(visited[1], 1); + st_check_int(visited[2], 1); + + sh_destroy(&hash); +} + +void test_remove_during_iteration() { + sh_hash_t hash; + sh_new(&hash); + + sh_put(&hash, 0, 10); + sh_put(&hash, 1, 20); + sh_put(&hash, 2, 30); + st_check_int(hash.length, 3); + + for(sh_it_p it = sh_start(&hash); it != NULL; it = sh_next(&hash, it)) { + if (it->key == 1) + sh_remove(&hash, it); + } + st_check_int(hash.length, 2); + int* value_ptr = sh_get_ptr(&hash, 1); + st_check_null(value_ptr); + + sh_destroy(&hash); +} + +void test_growing() { + sh_hash_t hash; + sh_new(&hash); + st_check_int(hash.length, 0); + st_check(hash.capacity < 100); + + for(size_t i = 0; i < 100; i++) + sh_put(&hash, i, i*2); + st_check_int(hash.length, 100); + st_check(hash.capacity >= 100); + + for(size_t i = 0; i < 100; i++) { + int value = sh_get(&hash, i, -1); + st_check_int(value, (int)i*2); + } + + sh_destroy(&hash); +} + +void test_shrinking() { + sh_hash_t hash; + sh_new(&hash); + st_check(hash.capacity < 100); + + for(size_t i = 0; i < 100; i++) + sh_put(&hash, i, i*2); + st_check(hash.capacity >= 100); + + for(size_t i = 0; i < 100; i++) + sh_del(&hash, i); + st_check_int(hash.length, 0); + st_check(hash.capacity < 100); + + sh_destroy(&hash); +} + +void test_optimize() { + sh_hash_t hash; + sh_new(&hash); + + for(size_t i = 0; i < 100; i++) + sh_put(&hash, i, i*2); + st_check(hash.capacity >= 100); + + for(size_t i = 0; i < 30; i++) + sh_del(&hash, i); + st_check_int(hash.length, 70); + st_check(hash.deleted > 0); + + sh_optimize(&hash); + st_check_int(hash.length, 70); + st_check_int(hash.deleted, 0); + + sh_destroy(&hash); +} + + +int main() { + st_run(test_new_and_destroy); + st_run(test_put_ptr); + st_run(test_get_ptr); + st_run(test_get_ptr_not_found); + st_run(test_del); + st_run(test_get_and_put); + st_run(test_contains); + st_run(test_iteration); + st_run(test_remove_during_iteration); + st_run(test_growing); + st_run(test_shrinking); + st_run(test_optimize); + return st_show_report(); +} \ No newline at end of file -- 2.20.1