From 8e2ffc784e293ec4199138ed6f22961a8b8c14a9 Mon Sep 17 00:00:00 2001 From: Stephan Soller Date: Wed, 12 Jul 2017 21:36:46 +0200 Subject: [PATCH] Collected all changes back into slim_hash and cleaned it up. --- Makefile | 11 +- slim_hash.h | 1063 +++++++++++++++++++--------- tests/slim_gl_test.c | 146 ---- tests/slim_hash_no_typedefs_test.c | 43 ++ tests/slim_hash_test.c | 61 +- 5 files changed, 834 insertions(+), 490 deletions(-) create mode 100644 tests/slim_hash_no_typedefs_test.c diff --git a/Makefile b/Makefile index 446cd8c..b4be120 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # Setup implicit rule to build object files CC = gcc -CFLAGS = -std=c99 -Werror -Wall -Wextra +CFLAGS = -std=c99 -Werror -Wall -Wextra -g -Og # Build and run all tests by default @@ -13,10 +13,13 @@ tests/slim_test_crashtest.c: slim_test.h tests/math_3d_test: math_3d.h slim_test.h tests/math_3d_test: LDLIBS += -lm tests/slim_hash_test: slim_hash.h slim_test.h +tests/slim_hash_no_typedefs_test: slim_hash.h slim_test.h + +tests/slim_gl_test.o: slim_gl.h slim_test.h tests/slim_gl_test: LDLIBS += -lGL -# Clean all files in the .gitignore list, ensures that the ignore file -# is properly maintained. +# Clean all files in the .gitignore list, ensures that the ignore file is +# properly maintained. Use bash to execute rm -rf so that wildcards get expanded. clean: - xargs --arg-file .gitignore --verbose rm -fr \ No newline at end of file + xargs --arg-file .gitignore --verbose -I % bash -c "rm -fr %" \ No newline at end of file diff --git a/slim_hash.h b/slim_hash.h index 870fe60..32c918f 100644 --- a/slim_hash.h +++ b/slim_hash.h @@ -1,126 +1,204 @@ /** -Slim Hash v1.0 +Slim Hash v1.1 By Stephan Soller Licensed under the MIT license -Slim Hash is a simple hash table implementation for C99. It's designed to be -simple to use and avoid surprises. To keep it typesafe you have to generate code -for each new hash type you want. +Slim Hash is a simple "drop it in and it just works" hashmap implementation for C99. It's designed +to be simple to use and to avoid surprises. To keep it typesafe you have to generate code for each +new type of hashmap you want. -It's an stb style single header file library. Define SLIM_HASH_IMPLEMENTATION -before you include this file in *one* C file to create the common code that is -used by all hash tables. +It uses closed hashing, meaning that all data is stored in one contiguous memory block. In case of +collisions linear probing is used as it's more cache friendly and there is no need to calculate +prime numbers for the hashmap size. -A hash type itself is declared using the SH_GEN_DECL() macro (usually in a -header file). Then in some C file the implementation is defined using the -SH_GEN_HASH_DEF(), SH_GEN_DICT_DEF() or SH_GEN_DEF() macro. - -SIMPLE EXAMPLE USAGE +EXAMPLE #define SLIM_HASH_IMPLEMENTATION #include "slim_hash.h" - SH_GEN_DECL(env, char*, int); - SH_GEN_DICT_DEF(env, char*, int); + // Generates the types (struct dict and struct dict_slot) and function prototypes for a + // char* -> int hashmap. All functions of the hashmap will start with "dict_". + // This could also go into a separate header file. + SH_GEN_DECL(dict, char*, int); + + // Generates the actual implementaion of all the dict_... functions. This could also go into a + // separate C file. + SH_GEN_DICT_IMPL(dict, char*, int); void main() { - env_t env; - env_new(&env); + struct dict dict; + dict_new(&dict); - env_put(&env, "foo", 3); - env_put(&env, "bar", 17); + dict_put(&dict, "foo", 3); + dict_put(&dict, "bar", 17); - env_get(&env, "foo", -1); // => 3 - env_get(&env, "bar", -1); // => 17 - env_get(&env, "hurdl", -1); // => -1 + dict_get(&dict, "foo", -1); // => 3 + dict_get(&dict, "bar", -1); // => 17 + dict_get(&dict, "hurdl", -1); // => -1 - env_put(&env, "foo", 5); - env_get(&env, "foo", -1); // => 5 + dict_put(&dict, "foo", 5); + dict_get(&dict, "foo", -1); // => 5 int* value_ptr = NULL; - // env_get_ptr() returns ptr to value or NULL if not in hash + // dict_get_ptr() returns a pointer to the value or NULL if not in hash // Pointer only stays valid as long as the hash isn't manipulated! - value_ptr = env_get_ptr(&env, "foo"); // *value_ptr => 5 - value_ptr = env_get_ptr(&env, "hurdl"); // value_ptr => NULL + value_ptr = dict_get_ptr(&dict, "foo"); // *value_ptr => 5 + value_ptr = dict_get_ptr(&dict, "hurdl"); // value_ptr => NULL - // env_put_ptr() reserves new slot, returns ptr to value (useful for struct values) - // Pointer only stays valid as long as the hash isn't manipulated! - value_ptr = env_put_ptr(&env, "grumpf"); + // dict_put_ptr() reserves new slot, returns ptr to value (useful for struct values) + // Pointer only stays valid as long as items are not added or deleted from the hashmap! + value_ptr = dict_put_ptr(&dict, "grumpf"); *value_ptr = 21; - env_get(&env, "grumpf", -1); // => 21 + dict_get(&dict, "grumpf", -1); // => 21 *value_ptr = 42; - env_get(&env, "grumpf", -1); // => 42 + dict_get(&dict, "grumpf", -1); // => 42 - env_contains(&env, "bar"); // => true - env_del(&env, "bar"); // => true (true if value was deleted, false if not found) - env_contains(&env, "bar"); // => false + dict_contains(&dict, "bar"); // => true + dict_del(&dict, "bar"); // => true (true if value was deleted, false if not found) + dict_contains(&dict, "bar"); // => false - // This output all slots in undefined order: + // This outputs all items (filled slots) of the hashmap in undefined order: // foo: 5 // grumpf: 42 - for(env_it_p it = env_start(&env); it != NULL; it = env_next(&env, it)) { + for(struct dict_slot* it = dict_start(&dict); it != NULL; it = dict_next(&dict, it)) { printf("%s: %d\n", it->key, it->value); - // You can remove a slot during iteration with - // env_remove(&env, it); + // You can remove a slot (clear the key and value inside it) during iteration with + // dict_remove(&dict, it); } - env_destroy(&env); + dict_destroy(&dict); } +USAGE + +In *one* of your C files define SLIM_HASH_IMPLEMENTATION before you include this library to create +the common code that is used by all hashmaps (that are acutally the murmur3 and fnv1a hash +functions). All other C files that include this library will then just reference these functions. + +Creating a hashmap with this library happens in 2 steps: + +- First, declare the hashmap with the SH_GEN_DECL() macro. +- Second, generate the implementation of the hashmap with the SH_GEN_HASH_IMPL(), SH_GEN_DICT_IMPL() + or SH_GEN_IMPL() macros. + +You can put the declaration in a header file or simply declare the same hashmap in multiple places. +But the implementation should be generated in only one file (otherwiese you would have multiple +implementation for the same functions). After that the hashmap is ready and can be used via the +generated functions. + +SH_GEN_DECL() will define a struct for the hashmap and for an iterator. struct dict and +struct dict_slot in the example above. By default it will also typedef these to dict_t, dict_p and +dict_it_p (it for iterator and _p because it's a pointer type). If you don't want that or if you're +concerned about possible conflicts with POSIX types you can define SLIM_HASH_NO_TYPEDEFS before +including the library. In that case no typedefs will be generated. + +For most cases you can use the SH_GEN_HASH_IMPL() or SH_GEN_DICT_IMPL() macros to generate the +implementation. They take the same 3 paramters as SH_GEN_DECL() but generate the actual functions +instead of just the prototypes. + +SH_GEN_DICT_IMPL() generates code for a hashmap that uses string keys. It'll use strlen() and +sh_murmur3() to hash the keys. strcmp() is used to compare keys and strdup() to duplicate the keys +when you insert a new key. + +SH_GEN_HASH_IMPL() on the other hand generates code that simply hashes any value type with murmur3. +it uses the == operator to compare keys. This works for key types like int, float, etc. + +SH_GEN_IMPL() is a all purpose most flexible way to generate a hash implementation. But it requires +some code snippets (expressions) to do so. These snippets specify how to calculate the hash, what to +do with keys, how to allocate and free memory, etc. So you can use it to generate all kinds of +hashmaps, for example one that uses structs as keys. + + + struct grid_cell { ... }; + struct grid_id { int x, y; }; + + SH_GEN_DECL(grid, struct grid_id, struct grid); + SH_GEN_IMPL(grid, struct grid_id, struct grid, + sh_murmur3(&key, sizeof(key), 0), // key_hash_expr(key_t key) + (memcmp(&a, &b, sizeof(a)) == 0), // key_cmp_expr(key_t a, key_t b) + key, // key_put_expr(key_t key) + memset(&key, 0, sizeof(key)), // key_del_expr(key_t key) + calloc(capacity, slot_size), // calloc_expr(size_t capacity, size_t slot_size) + free(ptr) // free_expr(void* ptr) + ) + +The difference to SH_GEN_HASH_IMPL() is that memcpy() instead of the == operator is used to compare +two keys. Also when a key is deleted it is zeroed out with memset(). This should work for all kinds +of structs. + +See the SH_GEN_IMPL() documentation below for more details on the different expressions. +SH_GEN_HASH_IMPL() and SH_GEN_DICT_IMPL() are simple shorthands for that macro with code snippes for +the two most common kinds of hashmaps. + + THE PUBLIC API -SH_GEN_DECL(prefix, key_t, value_t) generates: +This is the API declared by SH_GEN_DECL(dict, char*, int). Hashmaps with a different name will of +course have different type names and function prefixes. - typedef struct { + struct dict { uint32_t length, capacity; - // Some internal fields ... - } prefix_t, *prefix_p; + // Some internal fields + }; + struct dict_slot { + // Some internal fields + char* key; + int value; + }; - typedef struct { - // Some internal fields ... - key_t key; - value_t value; - } *prefix_it_p; + // You can disable these two by defining SLIM_HASH_NO_TYPEDEFS + typedef struct dict dict_t, *dict_p; + typedef struct dict_slot *dict_it_p; - void prefix_new(prefix_p hash); - void prefix_destroy(prefix_p hash); - void prefix_optimize(prefix_p hash); + void dict_new(struct dict* hashmap); + void dict_destroy(struct dict* hashmap); + void dict_optimize(struct dict* hashmap); - void prefix_put(prefix_p hash, key_t key, value_t value); - value_t prefix_get(prefix_p hash, key_t key, value_t default_value); - bool prefix_del(prefix_p hash, key_t key); - bool prefix_contains(prefix_p hash, key_t key); + int dict_get(struct dict* hashmap, char* key, int default_value); + void dict_put(struct dict* hashmap, char* key, int value); + bool dict_del(struct dict* hashmap, char* key); + bool dict_contains(struct dict* hashmap, char* key); - value_t* prefix_put_ptr(prefix_p hash, key_t key); - value_t* prefix_get_ptr(prefix_p hash, key_t key); + int* dict_get_ptr(struct dict* hashmap, char* key); + int* dict_put_ptr(struct dict* hashmap, char* key); - prefix_it_p prefix_start(prefix_p hash); - prefix_it_p prefix_next(prefix_p hash, prefix_it_p it); - void prefix_remove(prefix_p hash, prefix_it_p it); - - -HOW HASHES AND DICTS ARE GENERATED WITH SH_GEN_DEF() - -You can use SH_GEN_DEF() to define hashes with more complex keys. As examples -here the way SH_GEN_HASH_DEF() uses SH_GEN_DEF() to define hashes for value -types and SH_GEN_DICT_DEF() for zero-terminated strings. + struct dict_slot* dict_start(struct dict* hashmap); + struct dict_slot* dict_next(struct dict* hashmap, struct dict_slot* it); + void dict_remove(struct dict* hashmap, struct dict_slot* it); + bool dict_shrink_if_necessary(struct dict* hashmap); - SH_GEN_HASH_DEF SH_GEN_DICT_DEF -prefix prefix prefix -key_type key_t const char* or char* -value_type value_t value_t -hash_expr sh_murmur3_32(&k, sizeof(k)) sh_murmur3_32(k, strlen(k)) -key_cmp_expr (a == b) strcmp(a, b) == 0 -key_put_expr k sh_strdup(k) -key_del_expr 0 (free(k), NULL) +See the function implementations in the SH_GEN_IMPL() macro for detailed documentation. VERSION HISTORY v1.0 2016-06-22 Initial release +xxxx xxxx-xx-xx ADD: Added the fnv1a 32 bit hash function (see sh_fnv1a() documentation). + ADD: Manually ported the murmur3 hash function from the original source. Also + added the seed parameter (again, see sh_murmur3() docs). + ADD: Added calloc and free expressions to SH_GEN_IMPL() so memory can be allocated + from a garbage collector. + CHANGE: Cleaned up the code and added documentation. + CHANGE: The hashmap now uses power to two capacities. + CHANGE: Made typedefs optional. They can be removed by defining + SLIM_HASH_NO_TYPEDEFS before including the library. + FIX: Made sure that key_del_expr and free_expr of SH_GEN_IMPL() don't emit unused + variable warnings when they don't use the key argument. + FIX: key_del_expr is now also executed for each key when the hashmap is destroyed + (so each key that is duplicated is also freed). + FIX: key_put_expr is no longer executed when the hashmap is resized. + FIX: Renamed ..._DEF macros to ..._IMPL because it's easy to confuse declaration + and definition. IMPL (implementation) makes more clear what the macros + generate. + FIX: Added missing prototypes for functions shared by all implementations. + + +TODO + +- remove optimize function **/ #ifndef SLIM_HASH_HEADER @@ -133,224 +211,506 @@ v1.0 2016-06-22 Initial release #include -// Flags to mark slots -#define SH_SLOT_FREE 0x00000000 // Choosen so a calloc()ed hash is empty +// Flags for hashtable slots +#define SH_SLOT_FREE 0x00000000 // Choosen so a calloc()ed hash is considered free #define SH_SLOT_DELETED 0x00000001 -#define SH_SLOT_FILLED 0x80000000 // If this bit is set hash_and_flags contains a hash +#define SH_SLOT_FILLED 0x80000000 // If this bit is set hash_or_flags contains a hash + +uint32_t sh_murmur3(const void* key, int size, uint32_t seed); +uint32_t sh_fnv1a(const char* key); + +/** + * This macro declares a hash. It doesn't generate the implementation, just the types and function + * prototypes. You can use this macro at different places or use it once and include that whenever + * you need it. Use one of the SH_GEN_HASH_IMPL(), SH_GEN_DICT_IMPL() or SH_GEN_IMPL() macros with + * the same first three arguments to generate the implementation once. + * + * If SLIM_HASH_NO_TYPEDEFS is defined no typedefs for shorthand types ending with ..._t and ..._p + * are defined. + */ +#ifdef SLIM_HASH_NO_TYPEDEFS + #define SH_GEN_DECL(name, key_t, value_t) SH_GEN_TYPES_AND_PROTOTYPES(name, key_t, value_t) +#else + #define SH_GEN_DECL(name, key_t, value_t) SH_GEN_TYPES_AND_PROTOTYPES(name, key_t, value_t) \ + typedef struct name name##_t, *name##_p; \ + typedef struct name##_slot *name##_it_p; +#endif /** - * This macro declares a hash. It doesn't generate the implementation, just the - * types and function prototypes. Use the SH_GEN_HASH_DEF(), SH_GEN_DICT_DEF() - * or SH_GEN_DEF() macro to generate the implementation once. + * This macro declares a hash. It doesn't generate the implementation, just the types and function + * prototypes. You can use this macro at different places or use it once and include that whenever + * you need it. Use one of the SH_GEN_HASH_IMPL(), SH_GEN_DICT_IMPL() or SH_GEN_IMPL() macros with + * the same first three arguments to generate the implementation once. */ -#define SH_GEN_DECL(prefix, key_t, value_t) \ - typedef struct { \ - uint32_t hash_and_flags; \ - key_t key; \ - value_t value; \ - } prefix##_slot_t, *prefix##_slot_p, *prefix##_it_p; \ - \ - typedef struct { \ - uint32_t length, capacity, deleted; \ - prefix##_slot_p slots; \ - } prefix##_t, *prefix##_p; \ - \ - void prefix##_new(prefix##_p hash); \ - void prefix##_destroy(prefix##_p hash); \ - void prefix##_optimize(prefix##_p hash); \ - \ - void prefix##_put(prefix##_p hash, key_t key, value_t value); \ - value_t prefix##_get(prefix##_p hash, key_t key, value_t default_value); \ - bool prefix##_del(prefix##_p hash, key_t key); \ - bool prefix##_contains(prefix##_p hash, key_t key); \ - \ - value_t* prefix##_put_ptr(prefix##_p hash, key_t key); \ - value_t* prefix##_get_ptr(prefix##_p hash, key_t key); \ - \ - prefix##_it_p prefix##_start(prefix##_p hash); \ - prefix##_it_p prefix##_next(prefix##_p hash, prefix##_it_p it); \ - void prefix##_remove(prefix##_p hash, prefix##_it_p it); \ +#define SH_GEN_TYPES_AND_PROTOTYPES(name, key_t, value_t) \ + struct name##_slot { \ + uint32_t hash_or_flags; \ + key_t key; \ + value_t value; \ + }; \ + \ + struct name { \ + uint32_t length, capacity, deleted; \ + struct name##_slot* slots; \ + }; \ + \ + void name##_new(struct name* hashmap); \ + void name##_destroy(struct name* hashmap); \ + void name##_optimize(struct name* hashmap); \ + \ + value_t name##_get(struct name* hashmap, key_t key, value_t default_value); \ + void name##_put(struct name* hashmap, key_t key, value_t value); \ + bool name##_del(struct name* hashmap, key_t key); \ + bool name##_contains(struct name* hashmap, key_t key); \ + \ + value_t* name##_get_ptr(struct name* hashmap, key_t key); \ + value_t* name##_put_ptr(struct name* hashmap, key_t key); \ + \ + struct name##_slot* name##_start(struct name* hashmap); \ + struct name##_slot* name##_next(struct name* hashmap, struct name##_slot* it); \ + void name##_remove(struct name* hashmap, struct name##_slot* it); \ + bool name##_shrink_if_necessary(struct name* hashmap); \ +/** + * Shorthand macro to generate the implementation of an hash that uses value types as keys. That are + * types like int, float or even pointers (but in that case just the pointer, that is the memory + * address itself not the data it points to). + * + * All types that can be compared with the == operator and work with sizeof() can be used as keys. + * + * You need to generate the declarations first with SH_GEN_DECL() using the same arguments as for + * the implementation. + */ +#define SH_GEN_HASH_IMPL(name, key_t, value_t) \ + SH_GEN_IMPL(name, key_t, value_t, \ + sh_murmur3(&key, sizeof(key), 0), /* key_hash_expr(key_t key) */ \ + (a == b), /* key_cmp_expr(key_t a, key_t b) */ \ + key, /* key_put_expr(key_t key) */ \ + 0, /* key_del_expr(key_t key) */ \ + calloc(capacity, slot_size), /* calloc_expr(size_t capacity, size_t slot_size) */ \ + free(ptr) /* free_expr(void* ptr) */ \ + ) /** - * Macro to generate the definitions (implementation) of an hash. You need to - * generate the declarations first with SH_GEN_DECL(). + * Shorthand macro to generate the implementation of an hash that uses zero terminated strings as + * keys (a dictionary). The string keys are duplicated internally and are freed when an item is + * deleted or the dictionary destroyed. + * + * You need to generate the declarations first with SH_GEN_DECL() using the same arguments as for + * the implementation. */ -#define SH_GEN_DEF(prefix, key_t, value_t, hash_expr, key_cmp_expr, key_put_expr, key_del_expr) \ - bool prefix##_resize(prefix##_p hashmap, uint32_t new_capacity) { \ - /* on filling empty slot: if exceeding load factor, double capacity \ - // on deleting slot: if to empty, half capacity \ - \ - // Can't make hashmap smaller than it needs to be */ \ - if (new_capacity < hashmap->length) \ - return false; \ - \ - prefix##_t new_hashmap; \ - new_hashmap.length = 0; \ - new_hashmap.capacity = new_capacity; \ - new_hashmap.deleted = 0; \ - new_hashmap.slots = calloc(new_hashmap.capacity, sizeof(new_hashmap.slots[0])); \ - \ - /* Failed to allocate memory for new hash map, leave the original untouched */ \ - if (new_hashmap.slots == NULL) \ - return false; \ - \ - for(prefix##_it_p it = prefix##_start(hashmap); it != NULL; it = prefix##_next(hashmap, it)) { \ - prefix##_put(&new_hashmap, it->key, it->value); \ - } \ - \ - free(hashmap->slots); \ - *hashmap = new_hashmap; \ - return true; \ - } \ - \ - void prefix##_new(prefix##_p hashmap) { \ - hashmap->length = 0; \ - hashmap->capacity = 0; \ - hashmap->deleted = 0; \ - hashmap->slots = NULL; \ - prefix##_resize(hashmap, 8); \ - } \ - \ - void prefix##_destroy(prefix##_p hashmap) { \ - hashmap->length = 0; \ - hashmap->capacity = 0; \ - hashmap->deleted = 0; \ - free(hashmap->slots); \ - hashmap->slots = NULL; \ - } \ - \ - value_t* prefix##_put_ptr(prefix##_p hashmap, key_t key) { \ - /* add the +1 to the capacity doubling to avoid beeing stuck on a capacity of 0 */ \ - if (hashmap->length + hashmap->deleted + 1 > hashmap->capacity * 0.5) \ - prefix##_resize(hashmap, (hashmap->capacity + 1) * 2); \ - \ - uint32_t hash = (hash_expr) | SH_SLOT_FILLED; \ - size_t index = hash % hashmap->capacity; \ - while ( !(hashmap->slots[index].hash_and_flags == SH_SLOT_FREE || hashmap->slots[index].hash_and_flags == SH_SLOT_DELETED) ) { \ - if (hashmap->slots[index].hash_and_flags == hash) { \ - key_t a = hashmap->slots[index].key; \ - key_t b = key; \ - if (key_cmp_expr) \ - break; \ - } \ - index = (index + 1) % hashmap->capacity; \ - } \ - \ - if (hashmap->slots[index].hash_and_flags == SH_SLOT_DELETED) \ - hashmap->deleted--; \ - hashmap->length++; \ - hashmap->slots[index].hash_and_flags = hash; \ - hashmap->slots[index].key = (key_put_expr); \ - return &hashmap->slots[index].value; \ - } \ - \ - value_t* prefix##_get_ptr(prefix##_p hashmap, key_t key) { \ - uint32_t hash = (hash_expr) | SH_SLOT_FILLED; \ - size_t index = hash % hashmap->capacity; \ - while ( !(hashmap->slots[index].hash_and_flags == SH_SLOT_FREE) ) { \ - if (hashmap->slots[index].hash_and_flags == hash) { \ - key_t a = hashmap->slots[index].key; \ - key_t b = key; \ - if (key_cmp_expr) \ - return &hashmap->slots[index].value; \ - } \ - \ - index = (index + 1) % hashmap->capacity; \ - } \ - \ - return NULL; \ - } \ - \ - bool prefix##_del(prefix##_p hashmap, key_t key) { \ - uint32_t hash = (hash_expr) | SH_SLOT_FILLED; \ - size_t index = hash % hashmap->capacity; \ - while ( !(hashmap->slots[index].hash_and_flags == SH_SLOT_FREE) ) { \ - if (hashmap->slots[index].hash_and_flags == hash) { \ - key_t a = hashmap->slots[index].key; \ - key_t b = key; \ - if (key_cmp_expr) { \ - key_t key = hashmap->slots[index].key; \ - key = key; /* avoid unused variable warning */ \ - hashmap->slots[index].key = (key_del_expr); \ - hashmap->slots[index].hash_and_flags = SH_SLOT_DELETED; \ - hashmap->length--; \ - hashmap->deleted++; \ - \ - if (hashmap->length < hashmap->capacity * 0.2) \ - prefix##_resize(hashmap, hashmap->capacity / 2); \ - \ - return true; \ - } \ - } \ - \ - index = (index + 1) % hashmap->capacity; \ - } \ - \ - return false; \ - } \ - \ - void prefix##_put(prefix##_p hashmap, key_t key, value_t value) { \ - *prefix##_put_ptr(hashmap, key) = value; \ - } \ - \ - value_t prefix##_get(prefix##_p hashmap, key_t key, value_t default_value) { \ - value_t* value_ptr = prefix##_get_ptr(hashmap, key); \ - return (value_ptr) ? *value_ptr : default_value; \ - } \ - \ - bool prefix##_contains(prefix##_p hashmap, key_t key) { \ - return (prefix##_get_ptr(hashmap, key) != NULL); \ - } \ - \ - /* Search for the first not-empty slot */ \ - prefix##_it_p prefix##_start(prefix##_p hashmap) { \ - /* We need to start at an invalid slot address since sh_next() increments it \ - // before it looks at it (so it's safe). */ \ - return prefix##_next(hashmap, hashmap->slots - 1); \ - } \ - \ - prefix##_it_p prefix##_next(prefix##_p hashmap, prefix##_it_p it) { \ - if (it == NULL) \ - return NULL; \ - \ - do { \ - it++; \ - /* Check if we're past the last slot */ \ - if (it - hashmap->slots >= hashmap->capacity) \ - return NULL; \ - } while( it->hash_and_flags == SH_SLOT_FREE || it->hash_and_flags == SH_SLOT_DELETED ); \ - \ - return it; \ - } \ - \ - void prefix##_remove(prefix##_p hashmap, prefix##_it_p it) { \ - if (it != NULL && it >= hashmap->slots && it - hashmap->slots < hashmap->capacity) { \ - key_t key = it->key; \ - key = key; /* avoid unused variable warning */ \ - key = key; /* avoid unused variable warning */ \ - it->key = (key_del_expr); \ - it->hash_and_flags = SH_SLOT_DELETED; \ - \ - hashmap->length--; \ - hashmap->deleted++; \ - } \ - } \ - \ - void prefix##_optimize(prefix##_p hashmap) { \ - prefix##_resize(hashmap, hashmap->capacity); \ - } \ - - -// -// Shorthand macros to define hashes (keys are byte blocks like ints, floats, -// structs, etc.) and dictionaries (keys are zero-terminated strings). -// - -#define SH_GEN_HASH_DEF(prefix, key_t, value_t) \ - SH_GEN_DEF(prefix, key_t, value_t, sh_murmur3_32(&key, sizeof(key)), (a == b), key, 0) -#define SH_GEN_DICT_DEF(prefix, key_t, value_t) \ - SH_GEN_DEF(prefix, key_t, value_t, sh_murmur3_32(key, strlen(key)), (strcmp(a, b) == 0), sh_strdup(key), (free((void*)key), NULL)) +#define SH_GEN_DICT_IMPL(name, key_t, value_t) \ + SH_GEN_IMPL(name, key_t, value_t, \ + sh_murmur3(key, strlen(key), 0), /* key_hash_expr(key_t key) */ \ + (strcmp(a, b) == 0), /* key_cmp_expr(key_t a, key_t b) */ \ + sh_strdup(key), /* key_put_expr(key_t key) */ \ + (free((void*)key), NULL), /* key_del_expr(key_t key) */ \ + calloc(capacity, slot_size), /* calloc_expr(size_t capacity, size_t slot_size) */ \ + free(ptr) /* free_expr(void* ptr) */ \ + ) + +/** + * Macro to generate the implementation of an hash. This is the full features variant that needs a + * few code snippets (expressions) to generate the finished hash. Depending on the hash you want + * (value type keys, zero terminated string keys, structs as keys, etc.) you have to pass in + * different expressions. This makes this macro very flexible but requires a bit more initial RTFM + * than the SH_GEN_HASH_IMPL() or SH_GEN_DICT_IMPL(). See (TODO: insert ref to documentation header) + * + * You need to generate the declarations first with SH_GEN_DECL(). + * + * The name, key_t, value_t arguments have to be the same as used with SH_GEN_DECL(). All following + * arguments are expressions that are inserted into the generated functions. Each one defining a bit + * of behaviour of the generated hash. Set an expression to NULL if you don't need it. Each + * expression can use some variables from the generated code (here written like function + * prototypes): + * + * uint32_t key_hash_expr(key_t key) + * Executed whenever the hash of a key is needed. + * Examples: sh_murmur3(&key, sizeof(key), 0) or sh_murmur3(key, strlen(key), 0) + * + * bool key_cmp_expr(key_t a, key_t b) + * Expression used as if condition to compare two keys. + * Examples: (a == b) or (strcmp(a, b) == 0) or (memcmp(&a, &b, sizeof(a)) == 0) + * + * key_t key_put_expr(key_t key) + * This expression is executed for every new key that is inserted into the hashmap. The input is + * the key the user passed to the ..._put() function, the output is the key actually stored in + * the hashmap. Can be used to duplicate the keys for internal storage. + * Examples: key or sh_strdup(key) + * + * key_t key_del_expr(key_t key) + * Executed when a key is deleted from the hashmap, e.g. to free a key previously duplicated for + * internal storage. The key field of the deleted hashmap slot is set to the returned value, so + * this expression should return a kind of 0 or NULL value for the key type. + * Examples: (free(key), NULL) or 0 or (struct foo){ 0 } + * + * void* calloc_expr(size_t capacity, size_t slot_size) + * Expression used to allocate memory for the hashmap. Gets the same parameters as the calloc() + * function and is expected return a pointer to allocated and zeroed out memory just like + * calloc() does. This expression can be used to make the hashmap get its memory from somewhere + * else, e.g. a garbage collector or private memory pool. + * Examples: calloc(capacity, slot_size) + * + * void free_expr(void* ptr) + * This expression is used to free memory previously allocated by calloc_expr. + * Examples: free(ptr) + */ +#define SH_GEN_IMPL(name, key_t, value_t, key_hash_expr, key_cmp_expr, key_put_expr, key_del_expr, calloc_expr, free_expr) \ + \ + /* Some forward declarations for internal functions. */ \ + value_t* name##_put_ptr_internal(struct name* hashmap, key_t key); \ + bool name##_resize(struct name* hashmap, uint32_t new_capacity); \ + \ + /** */ \ + /* Initializes a new empty hashmap. A small capacity is allocated so the hashmap doesn't */ \ + /* need to be resized as soon as the first item is inserted. */ \ + void name##_new(struct name* hashmap) { \ + hashmap->length = 0; \ + hashmap->capacity = 0; \ + hashmap->deleted = 0; \ + hashmap->slots = NULL; \ + name##_resize(hashmap, 8); \ + } \ + \ + /** */ \ + /* Destroys the hashmap. Cleans up all the keys be executing the key_del_expr for each one */ \ + /* and frees all associated memory. */ \ + void name##_destroy(struct name* hashmap) { \ + /* Execute key_del_expr for all keys still in the hashmap. */ \ + for(struct name##_slot* it = name##_start(hashmap); it; it = name##_next(hashmap, it)) { \ + key_t key = it->key; \ + key = key; /* avoid unused variable warning */ \ + it->key = (key_del_expr); \ + it->hash_or_flags = SH_SLOT_DELETED; \ + } \ + \ + hashmap->length = 0; \ + hashmap->capacity = 0; \ + hashmap->deleted = 0; \ + \ + /* ptr is used as an argument to the free_expr which should free the memory of ptr */ \ + /* like free() does. */ \ + void* ptr = hashmap->slots; \ + ptr = ptr; /* avoid unused variable warning if free_expr doesn't use ptr */ \ + free_expr; \ + \ + hashmap->slots = NULL; \ + } \ + \ + /** */ \ + /* Resizes the hashmap to the specified new capacity. All existing itmes are rehashed into */ \ + /* the new hashmap. For zero terminated string keys this is slightly faster than using the */ \ + /* existing hashes, for integer keys this is slightly slower. */ \ + /* */ \ + /* Returns `true` if the resizing was successfull, `false` if the memory allocation for the */ \ + /* new capacity failed. In that case the hashmap remains unchanged and can still be used. */ \ + /* */ \ + /* This function is not part of the public API! Don't use it in your code unless you know */ \ + /* _exactly_ what you're doing! */ \ + bool name##_resize(struct name* hashmap, uint32_t new_capacity) { \ + /* Can't make hashmap smaller than it needs to be. */ \ + if (new_capacity < hashmap->length) \ + return false; \ + \ + struct name new_hashmap; \ + new_hashmap.length = 0; \ + new_hashmap.capacity = new_capacity; \ + new_hashmap.deleted = 0; \ + \ + /* capacity and slot_size variables are used to pass arguments to calloc_expr which in */ \ + /* turn should allocate memory like calloc() does. */ \ + size_t capacity = new_capacity; \ + size_t slot_size = sizeof(new_hashmap.slots[0]); \ + new_hashmap.slots = calloc_expr; \ + \ + /* Failed to allocate memory for new hashmap, leave the original untouched. */ \ + if (new_hashmap.slots == NULL) \ + return false; \ + \ + for(struct name##_slot* it = name##_start(hashmap); it; it = name##_next(hashmap, it)) { \ + *name##_put_ptr_internal(&new_hashmap, it->key) = it->value; \ + } \ + \ + /* ptr is used as an argument to the free_expr which should free the memory of ptr like */ \ + /* free() does. */ \ + void* ptr = hashmap->slots; \ + ptr = ptr; /* avoid unused variable warning if free_expr doesn't use ptr */ \ + free_expr; \ + *hashmap = new_hashmap; \ + return true; \ + } \ + \ + /** */ \ + /* Inserts a new key-value pair into the hashmap, growing the hashmap if necessary. Returns */ \ + /* a pointer to the storage for the value. That pointer can then be used to store the value */ \ + /* itself in the hashmap. */ \ + /* */ \ + /* The returned pointer stays valid as long as the hashmap isn't chaned (not adding or */ \ + /* deleting items or resizing the hashmap). These operations potentiall force a resizing of */ \ + /* the hashmap and thus probably invalidate the memory address. So make sure you don't */ \ + /* store the pointer somewhere or use it after a function that changes the hashmap. */ \ + /* */ \ + /* Don't use this function while iterating over the hashmap. This function might resize the */ \ + /* hashmap and thus invalidates the pointer used as iterator, possibly leading to segfaults */ \ + /* */ \ + /* Returns `NULL` if the hashmap needs to grow but failed to allocate more memory for that. */ \ + value_t* name##_put_ptr(struct name* hashmap, key_t key) { \ + if (hashmap->length + hashmap->deleted + 1 > hashmap->capacity * 0.5) { \ + uint32_t new_capacity = (hashmap->capacity == 0) ? 8 : hashmap->capacity * 2; \ + if ( name##_resize(hashmap, new_capacity) == false ) \ + return NULL; \ + } \ + \ + return name##_put_ptr_internal(hashmap, (key_put_expr)); \ + } \ + \ + /** */ \ + /* An internal function that reserves a new slot for the specified key and returns a */ \ + /* pointer to the value of that slot. It DOESN'T check if the hashmap as a free slot nore */ \ + /* executes the key_put_expr. It's only used by name##_put_ptr() and name##_resize() */ \ + /* that already take care of that. */ \ + /* */ \ + /* This function is not part of the public API! Don't use it in your code unless you know */ \ + /* _exactly_ what you're doing! */ \ + /* */ \ + /* Implementation detail: This function uses linear probing in case of hash collisions. */ \ + value_t* name##_put_ptr_internal(struct name* hashmap, key_t key) { \ + uint32_t hash = (key_hash_expr) | SH_SLOT_FILLED; \ + size_t index = hash % hashmap->capacity; \ + \ + while ( !( \ + hashmap->slots[index].hash_or_flags == SH_SLOT_FREE || \ + hashmap->slots[index].hash_or_flags == SH_SLOT_DELETED \ + ) ) { \ + if (hashmap->slots[index].hash_or_flags == hash) { \ + key_t a = hashmap->slots[index].key; \ + key_t b = key; \ + if (key_cmp_expr) \ + break; \ + } \ + index = (index + 1) % hashmap->capacity; \ + } \ + \ + if (hashmap->slots[index].hash_or_flags == SH_SLOT_DELETED) \ + hashmap->deleted--; \ + hashmap->length++; \ + hashmap->slots[index].hash_or_flags = hash; \ + hashmap->slots[index].key = key; \ + \ + return &hashmap->slots[index].value; \ + } \ + \ + /** */ \ + /* Looks up the specified key in the hashmap and returns a pointer to the value stored for */ \ + /* that key. Returns `NULL` if the key could not be found. */ \ + /* */ \ + /* The returned pointer stays valid as long as the hashmap isn't chaned (not adding or */ \ + /* deleting items or resizing the hashmap). These operations potentiall force a resizing of */ \ + /* the hashmap and thus probably invalidate the memory address. So make sure you don't */ \ + /* store the pointer somewhere or use it after a function that changes the hashmap. */ \ + value_t* name##_get_ptr(struct name* hashmap, key_t key) { \ + uint32_t hash = (key_hash_expr) | SH_SLOT_FILLED; \ + size_t index = hash % hashmap->capacity; \ + while ( !(hashmap->slots[index].hash_or_flags == SH_SLOT_FREE) ) { \ + if (hashmap->slots[index].hash_or_flags == hash) { \ + key_t a = hashmap->slots[index].key; \ + key_t b = key; \ + if (key_cmp_expr) \ + return &hashmap->slots[index].value; \ + } \ + \ + index = (index + 1) % hashmap->capacity; \ + } \ + \ + return NULL; \ + } \ + \ + /** */ \ + /* Deletes the key-value pair for the specified key from the hashmap. The hashmap is shrunk */ \ + /* if it is to sparse. Even if no key was deleted. This way you can use this function to */ \ + /* with an arbitary (non-existing) key to shrink the hashmap if necessary after deleting */ \ + /* some items while iterating over the hashmap. */ \ + /* */ \ + /* Returns `true` if the key-value pair was deleted, `false` if the key wasn't found in the */ \ + /* hashmap. */ \ + /* */ \ + /* Don't use this function while iterating over the hashmap. This function might resize the */ \ + /* hashmap and thus invalidates the pointer used as iterator, possibly leading to segfaults */ \ + bool name##_del(struct name* hashmap, key_t key) { \ + uint32_t hash = (key_hash_expr) | SH_SLOT_FILLED; \ + size_t index = hash % hashmap->capacity; \ + while ( !(hashmap->slots[index].hash_or_flags == SH_SLOT_FREE) ) { \ + if (hashmap->slots[index].hash_or_flags == hash) { \ + key_t a = hashmap->slots[index].key; \ + key_t b = key; \ + if (key_cmp_expr) { \ + /* Define a local variable named "key" that is used to pass the key of the */ \ + /* deleted item to key_del_expr. We can't use the function argument named */ \ + /* "key" because the key_put_expr might have duplicated a string key. In */ \ + /* that case the key_del_expr expects to get exactly the value returned by */ \ + /* key_put_expr... and that is the key stored in the deleted slot. */ \ + key_t key = hashmap->slots[index].key; \ + key = key; /* avoid unused variable warning */ \ + hashmap->slots[index].key = (key_del_expr); \ + hashmap->slots[index].hash_or_flags = SH_SLOT_DELETED; \ + hashmap->length--; \ + hashmap->deleted++; \ + \ + name##_shrink_if_necessary(hashmap); \ + return true; \ + } \ + } \ + \ + index = (index + 1) % hashmap->capacity; \ + } \ + \ + return false; \ + } \ + \ + /** */ \ + /* Inserts a value at the specified key into the hashmap. An existing value for the same */ \ + /* key is overwritten. The hashmap is grown if necessary. */ \ + /* */ \ + /* Causes a segmentation fault in case the hashmap needs to grow but couldn't allocate the */ \ + /* necessary memory. Use name##_put_ptr() if you want to handle that case. */ \ + /* */ \ + /* Don't use this function while iterating over the hashmap. This function might resize the */ \ + /* hashmap and thus invalidates the pointer used as iterator, possibly leading to segfaults */ \ + void name##_put(struct name* hashmap, key_t key, value_t value) { \ + *name##_put_ptr(hashmap, key) = value; \ + } \ + \ + /** */ \ + /* Fetches the value for the specified key from the hashmap. In case the key isn't found */ \ + /* the `default_value` is returned. */ \ + value_t name##_get(struct name* hashmap, key_t key, value_t default_value) { \ + value_t* value_ptr = name##_get_ptr(hashmap, key); \ + return (value_ptr) ? *value_ptr : default_value; \ + } \ + \ + /** */ \ + /* Checks if the specified key exists in the hashmap. Is just as fast as name##_get() or */ \ + /* name##_get_ptr() so use those if you want to actually get a value. This function is */ \ + /* useful if you only need to know that the key is there. */ \ + /* */ \ + /* Returns `true` if the key was found, `false` if not. */ \ + bool name##_contains(struct name* hashmap, key_t key) { \ + return (name##_get_ptr(hashmap, key) != NULL); \ + } \ + \ + /** */ \ + /* Returns an iterator pointing to the first item of the hashmap. Returns `NULL` if the */ \ + /* hashmap is empty. Meant to be used with name##_next() in a for loop to iterate over */ \ + /* all items in the hashmap: */ \ + /* */ \ + /* for(hash_it_p it = hash_start(&hash); it != NULL; it = hash_next(&hash, it)) { */ \ + /* it->key; // access the key */ \ + /* it->value; // access the value */ \ + /* hash_remove(&hash, it); // remove the current item */ \ + /* } */ \ + /* */ \ + /* // If you removed almost all items this will shrink the hashmap after the loop. */ \ + /* // This is only needed in extreme cases. */ \ + /* hash_shrink_if_necessary(&hash); */ \ + /* */ \ + /* You can't add new items to the hashmap during iteration (name##_put() or */ \ + /* name##_put_ptr()) since that invalidates the pointer of the iterator (and might lead to */ \ + /* seg faults). name##_del() also can't be used during iteration (same problem) but you can */ \ + /* use name##_remove() to mark the current item as deleted and call */ \ + /* name##_shrink_if_necessary() if chances are that you deleted most items. */ \ + /* */ \ + struct name##_slot* name##_start(struct name* hashmap) { \ + /* We need to start at an invalid slot address since sh_next() increments it before it */ \ + /* looks at it (so it's safe). */ \ + return name##_next(hashmap, hashmap->slots - 1); \ + } \ + \ + /** */ \ + /* Advances the iterator to the next item in the hashmap or returns `NULL` if there is no */ \ + /* next element. */ \ + /* */ \ + /* See name##_start() for how to use this function. */ \ + struct name##_slot* name##_next(struct name* hashmap, struct name##_slot* it) { \ + if (it == NULL) \ + return NULL; \ + \ + do { \ + it++; \ + /* Check if we're past the last slot. */ \ + if (it - hashmap->slots >= hashmap->capacity) \ + return NULL; \ + } while( it->hash_or_flags == SH_SLOT_FREE || it->hash_or_flags == SH_SLOT_DELETED ); \ + \ + return it; \ + } \ + \ + /** */ \ + /* Removes the item the iterator points to from the hashmap. This function is meant to be */ \ + /* used while iterating over all items of the hashmap. The hashmap is not resized, even if */ \ + /* all items are removed. Instead it is shrunk at the next call to name##_del() or */ \ + /* name##_shrink_if_necessary(). */ \ + /* */ \ + /* See name##_start() for how to use this function. */ \ + void name##_remove(struct name* hashmap, struct name##_slot* it) { \ + if (it != NULL && it >= hashmap->slots && it - hashmap->slots < hashmap->capacity) { \ + key_t key = it->key; \ + key = key; /* avoid unused variable warning */ \ + it->key = (key_del_expr); \ + it->hash_or_flags = SH_SLOT_DELETED; \ + \ + hashmap->length--; \ + hashmap->deleted++; \ + } \ + } \ + \ + /** */ \ + /* Shrinks the hashmap down if it became to sparse. Returns `true` if it was shrunk, */ \ + /* `false` if not. This function is meant to be called after you removed many items while */ \ + /* iterating over the hashmap and want to make sure you don't wast to much memory. */ \ + /* */ \ + /* Implementation detail: Other functions assume that the capacity is always > 0 or they'll */ \ + /* crash. Therefore this function only shrinks the capacity down to a minimal value but not */ \ + /* 0. */ \ + bool name##_shrink_if_necessary(struct name* hashmap) { \ + uint32_t new_capacity = hashmap->capacity; \ + while ( hashmap->length < new_capacity * 0.25 && new_capacity > 8 ) \ + new_capacity /= 2; \ + \ + if (new_capacity < hashmap->capacity) { \ + name##_resize(hashmap, new_capacity); \ + return true; \ + } \ + \ + return false; \ + } \ + \ + /** */ \ + /* Rehashes all keys in the hashmap. This is the same as inserting all key-value pairs one */ \ + /* after the other into a new hashmap. */ \ + /* */ \ + /* This function is only usfull if you degenerated the hashmap by _a lot_ of inserts and */ \ + /* deletions that caused collisions. If you plan _a lot_ of lookups after that optimizing */ \ + /* the hashmap might help performance. */ \ + void name##_optimize(struct name* hashmap) { \ + uint32_t new_capacity = hashmap->length; \ + \ + /* http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */ \ + new_capacity--; \ + new_capacity |= new_capacity >> 1; \ + new_capacity |= new_capacity >> 2; \ + new_capacity |= new_capacity >> 4; \ + new_capacity |= new_capacity >> 8; \ + new_capacity |= new_capacity >> 16; \ + new_capacity++; \ + \ + name##_resize(hashmap, new_capacity * 2); \ + } \ + +#if _SVID_SOURCE || _BSD_SOURCE || _XOPEN_SOURCE >= 500 || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED || _POSIX_C_SOURCE >= 200809L +#define sh_strdup strdup +#else +#define SH_NEED_STRDUP_IMPL +#endif #endif // SLIM_HASH_HEADER @@ -358,82 +718,111 @@ v1.0 2016-06-22 Initial release #ifdef SLIM_HASH_IMPLEMENTATION /** - * Get 32-bit Murmur3 hash of a memory block. Taken from - * https://github.com/wolkykim/qlibc/blob/master/src/utilities/qhash.c + * Calculate the 32 bit Murmur3 hash of a memory block. + * + * You can set the seed parameter to 0. You can change it if you want to defend agains denail of + * service attacks that try to send you strings that all hash to the same slot in the hashmap, thus + * destroying the performance of the hashmap. It would be optimal to use a random seed for each + * different hashmap but that would require a random number generator. If you don't need or want + * that complexity just use an arbitary static seed for your hashmap (e.g. 0 or 1337). * - * MurmurHash3 was created by Austin Appleby in 2008. The initial - * implementation was published in C++ and placed in the public: - * https://sites.google.com/site/murmurhash/ + * See https://stackoverflow.com/questions/9241230/what-is-murmurhash3-seed-parameter * - * Seungyoung Kim has ported its implementation into C language in 2012 and - * published it as a part of qLibc component. - **/ -uint32_t sh_murmur3_32(const void *data, size_t size) { - if (data == NULL || size == 0) + * MurmurHash3 was created by Austin Appleby in 2008. The initial implementation was published in + * C++ and placed in the public: https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp + * + * The C++ code has been manually ported to C. The body loop has been changed to count from 0 to + * nblocks-1 instead of -nblocks to 0 so that gcc -O3 doesn't get confused about starting the loop + * with an invalid pointer. + */ +uint32_t sh_murmur3(const void* key, int size, uint32_t seed) { + if (key == NULL || size == 0) return 0; - const uint32_t c1 = 0xcc9e2d51; - const uint32_t c2 = 0x1b873593; - + const uint32_t* data = (const uint32_t*)key; const int nblocks = size / 4; - const uint32_t *blocks = (const uint32_t *) (data); - const uint8_t *tail = (const uint8_t *) (data + (nblocks * 4)); - uint32_t h = 0; + uint32_t h1 = seed; + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; - int i; - uint32_t k; - for (i = 0; i < nblocks; i++) { - k = blocks[i]; + // Body + for(int i = 0; i < nblocks; i++) { + uint32_t k1 = data[i]; - k *= c1; - k = (k << 15) | (k >> (32 - 15)); - k *= c2; + k1 *= c1; + k1 = (k1 << 15) | (k1 >> (32 - 15)); + k1 *= c2; - h ^= k; - h = (h << 13) | (h >> (32 - 13)); - h = (h * 5) + 0xe6546b64; + h1 ^= k1; + h1 = (h1 << 13) | (h1 >> (32 - 13)); + h1 = h1 * 5 + 0xe6546b64; } - k = 0; - switch (size & 3) { + // Tail + const uint8_t* tail = (const uint8_t*)(data + nblocks); + uint32_t k1 = 0; + + switch(size & 3) { case 3: - k ^= tail[2] << 16; + k1 ^= tail[2] << 16; case 2: - k ^= tail[1] << 8; + k1 ^= tail[1] << 8; case 1: - k ^= tail[0]; - k *= c1; - k = (k << 15) | (k >> (32 - 15)); - k *= c2; - h ^= k; + k1 ^= tail[0]; + k1 *= c1; + k1 = (k1 << 15) | (k1 >> (32 - 15));; + k1 *= c2; + h1 ^= k1; }; - h ^= size; + // Finalization + h1 ^= size; - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; + h1 ^= h1 >> 16; + h1 *= 0x85ebca6b; + h1 ^= h1 >> 13; + h1 *= 0xc2b2ae35; + h1 ^= h1 >> 16; - return h; + return h1; } /** - * Portable version of strdup that doesn't require feature test macros. But if - * got a POSIX strdup use it. + * Calculate the 32 bit fnv1a hash of a zero terminated string. This hash function can be used for + * identifiers, common field names or other strings. From the fnv1a page: "The high dispersion of + * the FNV hashes makes them well suited for hashing nearly identical strings such as URLs, + * hostnames, filenames, text, IP addresses, etc.". See http://www.isthe.com/chongo/tech/comp/fnv/ + * + * In performance tests on an old 64 bit x86 CPU the murmur3 hash function was faster than this + * function. + * + * The function is directly implemented from pseudocode on the website. */ -#if _SVID_SOURCE || _BSD_SOURCE || _XOPEN_SOURCE >= 500 || _XOPEN_SOURCE && _XOPEN_SOURCE_EXTENDED || _POSIX_C_SOURCE >= 200809L -#define sh_strdup strdup -#else -char *sh_strdup(const char *s) { - char *d = malloc(strlen(s) + 1); - if (d == NULL) +uint32_t sh_fnv1a(const char* key) { + uint32_t hash = 2166136261; + for(uint8_t c = *key; c != '\0'; c++) { + hash ^= c; + hash *= 16777619; + } + return hash; +} + +/** + * Portable version of strdup() that doesn't require feature test macros to be set. But if we got a + * POSIX strdup() use it. + * + * The feature test macro expression is taken from the strdup() man page. + */ +#ifdef SH_NEED_STRDUP_IMPL +char* sh_strdup(const char* src) { + char* dest = malloc(strlen(src) + 1); + if (dest == NULL) return NULL; - strcpy(d, s); - return d; + strcpy(dest, src); + return dest; } #endif + #endif // SLIM_HASH_IMPLEMENTATION \ No newline at end of file diff --git a/tests/slim_gl_test.c b/tests/slim_gl_test.c index d591506..84057e1 100644 --- a/tests/slim_gl_test.c +++ b/tests/slim_gl_test.c @@ -186,153 +186,7 @@ void test_next_argument_for_draw() { } -/* -void test_parse_name() { - const char* sample = "foo batz bar"; - sgl_option_t option; - - const char* next = sgl__parse_name(sample, &option); - st_check_str(option.name, "foo"); - st_check_int(option.name[3], '\0'); - st_check_str(next, " batz bar"); - - next = sgl__parse_name(next, &option); - st_check_str(option.name, "batz"); - st_check_int(option.name[4], '\0'); - st_check_str(next, " bar"); - - next = sgl__parse_name(next, &option); - st_check_str(option.name, "bar"); - st_check_int(option.name[3], '\0'); - st_check_str(next, ""); - - next = sgl__parse_name(next, &option); - st_check_null(next); -} - -void test_parse_directive() { - const char* sample = "%4f %unb %4x4tm %*rt"; - sgl_option_t option; - - const char* next = sgl__parse_directive(sample, &option); - st_check_str(option.modifiers, "4"); - st_check_int(option.modifiers[1], '\0'); - st_check_int(option.type, 'f'); - st_check_str(next, " %unb %4x4tm %*rt"); - - next = sgl__parse_directive(next, &option); - st_check_str(option.modifiers, "un"); - st_check_int(option.modifiers[2], '\0'); - st_check_int(option.type, 'b'); - st_check_str(next, " %4x4tm %*rt"); - - next = sgl__parse_directive(next, &option); - st_check_str(option.modifiers, "4x4t"); - st_check_int(option.modifiers[4], '\0'); - st_check_int(option.type, 'm'); - st_check_str(next, " %*rt"); - - next = sgl__parse_directive(next, &option); - st_check_str(option.modifiers, "*r"); - st_check_int(option.modifiers[2], '\0'); - st_check_int(option.type, 't'); - st_check_str(next, ""); - - next = sgl__parse_directive(next, &option); - st_check_null(next); -} - -void test_parse_name_and_directive() { - const char* sample = "pos %4f color %unb -index %s"; - const char* next = sample; - sgl_option_t option; - - next = sgl__parse_name(next, &option); - next = sgl__parse_directive(next, &option); - st_check_str(option.name, "pos"); - st_check_int(option.type, 'f'); - st_check_str(option.modifiers, "4"); - st_check_str(next, " color %unb -index %s"); - - next = sgl__parse_name(next, &option); - next = sgl__parse_directive(next, &option); - st_check_str(option.name, "color"); - st_check_int(option.type, 'b'); - st_check_str(option.modifiers, "un"); - st_check_str(next, " -index %s"); - - next = sgl__parse_name(next, &option); - next = sgl__parse_directive(next, &option); - st_check_str(option.name, "-index"); - st_check_int(option.type, 's'); - st_check_str(option.modifiers, ""); - st_check_str(next, ""); - - next = sgl__parse_name(next, &option); - next = sgl__parse_directive(next, &option); - st_check_null(next); -} - -void test_parse_name_and_directive_errors() { - sgl_option_t option = { 0 }; - - const char* sample = ""; - const char* next = sample; - next = sgl__parse_name(next, &option); - next = sgl__parse_directive(next, &option); - st_check_null(next); - - sample = "waaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaay_to_long %4f"; - next = sample; - next = sgl__parse_name(next, &option); - next = sgl__parse_directive(next, &option); - st_check_null(next); - st_check_not_null(option.error_message); - printf("error: %s\n%s\n%*s^\n", option.error_message, sample, (int)(option.error_at - sample), ""); - - sample = "foo+bar %4f"; - next = sample; - next = sgl__parse_name(next, &option); - next = sgl__parse_directive(next, &option); - st_check_null(next); - st_check_not_null(option.error_message); - printf("error: %s\n%s\n%*s^\n", option.error_message, sample, (int)(option.error_at - sample), ""); - - sample = "foo %"; - next = sample; - next = sgl__parse_name(next, &option); - next = sgl__parse_directive(next, &option); - st_check_null(next); - st_check_not_null(option.error_message); - printf("error: %s\n%s\n%*s^\n", option.error_message, sample, (int)(option.error_at - sample), ""); - - sample = "foo %to_looooooooooooooooongf"; - next = sample; - next = sgl__parse_name(next, &option); - next = sgl__parse_directive(next, &option); - st_check_null(next); - st_check_not_null(option.error_message); - printf("error: %s\n%s\n%*s^\n", option.error_message, sample, (int)(option.error_at - sample), ""); - - sample = "foo bar"; - next = sample; - next = sgl__parse_name(next, &option); - next = sgl__parse_directive(next, &option); - st_check_null(next); - st_check_not_null(option.error_message); - printf("error: %s\n%s\n%*s^\n", option.error_message, sample, (int)(option.error_at - sample), ""); -} -*/ - - int main() { - /* - st_run(test_parse_name); - st_run(test_parse_directive); - st_run(test_parse_name_and_directive); - st_run(test_parse_name_and_directive_errors); - */ - st_run(test_next_argument_basics); st_run(test_next_argument_for_program_new); st_run(test_next_argument_for_vao_new); diff --git a/tests/slim_hash_no_typedefs_test.c b/tests/slim_hash_no_typedefs_test.c new file mode 100644 index 0000000..4f7e4fa --- /dev/null +++ b/tests/slim_hash_no_typedefs_test.c @@ -0,0 +1,43 @@ +#define SLIM_HASH_NO_TYPEDEFS +#define SLIM_HASH_IMPLEMENTATION +#include "../slim_hash.h" +#define SLIM_TEST_IMPLEMENTATION +#include "../slim_test.h" + + +SH_GEN_DECL(dict, const char*, int); +SH_GEN_DICT_IMPL(dict, const char*, int); + +void test_struct_types() { + struct dict dict; + dict_new(&dict); + + dict_put(&dict, "a", 1); + st_check_int(dict_contains(&dict, "a"), 1); + st_check_int(dict_contains(&dict, "b"), 0); + st_check_int(dict_get(&dict, "a", 0), 1); + + dict_put(&dict, "b", 2); + dict_put(&dict, "c", 3); + st_check_int(dict_get(&dict, "b", 0), 2); + st_check_int(dict_get(&dict, "c", 0), 3); + + dict_del(&dict, "b"); + st_check_int(dict_get(&dict, "a", 0), 1); + st_check_int(dict_get(&dict, "b", 0), 0); + st_check_int(dict_get(&dict, "c", 0), 3); + + struct dict_slot* it = dict_start(&dict); + st_check(it->value == 1 || it->value == 3); + it = dict_next(&dict, it); + st_check(it->value == 1 || it->value == 3); + it = dict_next(&dict, it); + st_check_null(it); + + dict_destroy(&dict); +} + +int main() { + st_run(test_struct_types); + return st_show_report(); +} \ No newline at end of file diff --git a/tests/slim_hash_test.c b/tests/slim_hash_test.c index 442fa99..de57ff9 100644 --- a/tests/slim_hash_test.c +++ b/tests/slim_hash_test.c @@ -5,10 +5,10 @@ SH_GEN_DECL(sh, int64_t, int); -SH_GEN_HASH_DEF(sh, int64_t, int); +SH_GEN_HASH_IMPL(sh, int64_t, int); SH_GEN_DECL(dict, const char*, int); -SH_GEN_DICT_DEF(dict, const char*, int); +SH_GEN_DICT_IMPL(dict, const char*, int); void test_new_and_destroy() { @@ -263,8 +263,62 @@ void test_dict_update() { dict_destroy(&dict); } +// Key expression test, make sure key_put_expr and key_del_expr are called properly +int ket_put_counter = 0; +int ket_del_counter = 0; + +SH_GEN_DECL(ket_hash, int, int); +SH_GEN_IMPL(ket_hash, int, int, + sh_murmur3(&key, sizeof(key), 0), + a == b, + (ket_put_counter++, key), + (ket_del_counter++, key), + calloc(capacity, slot_size), + free(ptr) +); + + +void test_key_del_expr_on_hash_destroy() { + ket_hash_t hash; + ket_hash_new(&hash); + + ket_hash_put(&hash, 17, 5); + ket_hash_put(&hash, 13, 8); + ket_hash_put(&hash, 11, 7); + ket_hash_put(&hash, 7, 3); + ket_hash_put(&hash, 5, 9); + + st_check_int( ket_hash_get(&hash, 17, 0), 5 ); + st_check_int( ket_hash_get(&hash, 13, 0), 8 ); + st_check_int(ket_put_counter, 5); + st_check_int(ket_del_counter, 0); + + bool was_found = ket_hash_del(&hash, 17); + st_check_int(was_found, true); + st_check_int(ket_put_counter, 5); + st_check_int(ket_del_counter, 1); + + // Force a downsizing to make sure key_del_expr isn't called then + ket_hash_it_p it = ket_hash_start(&hash); + ket_hash_remove(&hash, it); + it = ket_hash_next(&hash, it); + ket_hash_remove(&hash, it); + it = ket_hash_next(&hash, it); + ket_hash_remove(&hash, it); + + st_check_int(ket_put_counter, 5); + st_check_int(ket_del_counter, 4); + + ket_hash_destroy(&hash); + st_check_int(ket_put_counter, 5); + st_check_int(ket_del_counter, 5); +} + + + + SH_GEN_DECL(env, char*, int); -SH_GEN_DICT_DEF(env, char*, int); +SH_GEN_DICT_IMPL(env, char*, int); void test_example() { env_t env; @@ -329,5 +383,6 @@ int main() { st_run(test_dict); st_run(test_dict_update); st_run(test_example); + st_run(test_key_del_expr_on_hash_destroy); return st_show_report(); } \ No newline at end of file -- 2.20.1