From: Daniel Carl Date: Fri, 6 Feb 2015 21:56:34 +0000 (+0100) Subject: Use hash table for duplicate check of history (#163). X-Git-Url: https://git.owens.tech///git?a=commitdiff_plain;h=640d3095a2e78466030c0d26fb6ed8c6255e95f9;p=vimb.git Use hash table for duplicate check of history (#163). The previous duplication check where don on the generated list of history items with callback function which is really slow for large history files. Now the histories url is put into a hash table for a faster duplicate check. Additional to this some unneeded memory allocation where removed. This makes the code a little harder to maintain, but hey we don't want to wast time and memory here. --- diff --git a/src/bookmark.c b/src/bookmark.c index f42ded4..cbe6a33 100644 --- a/src/bookmark.c +++ b/src/bookmark.c @@ -34,8 +34,7 @@ typedef struct { static GList *load(const char *file); static gboolean bookmark_contains_all_tags(Bookmark *bm, char **query, unsigned int qlen); -static Bookmark *line_to_bookmark(const char *line); -static int bookmark_comp(Bookmark *a, Bookmark *b); +static Bookmark *line_to_bookmark(char *uri, char *data); static void free_bookmark(Bookmark *bm); /** @@ -257,10 +256,7 @@ gboolean bookmark_queue_clear(void) static GList *load(const char *file) { - return util_file_to_unique_list( - file, (Util_Content_Func)line_to_bookmark, (GCompareFunc)bookmark_comp, - (GDestroyNotify)free_bookmark, vb.config.history_max - ); + return util_file_to_unique_list(file, (Util_Content_Func)line_to_bookmark, 0); } /** @@ -316,45 +312,28 @@ static gboolean bookmark_contains_all_tags(Bookmark *bm, char **query, return true; } -static Bookmark *line_to_bookmark(const char *line) +static Bookmark *line_to_bookmark(char *uri, char *data) { - char **parts; - int len; + char *p; Bookmark *bm; - while (g_ascii_isspace(*line)) { - line++; - } - if (!*line) { - return NULL; - } - parts = g_strsplit(line, "\t", 3); - len = g_strv_length(parts); - - bm = g_slice_new(Bookmark); - bm->uri = g_strdup(parts[0]); - bm->tags = NULL; - bm->title = NULL; - if (len == 3) { - bm->title = g_strdup(parts[1]); - bm->tags = g_strdup(parts[2]); - } else if (len == 2) { - bm->title = g_strdup(parts[1]); + /* data part may consist of title or titletags*/ + bm = g_slice_new(Bookmark); + bm->uri = uri; + if ((p = strchr(data, '\t'))) { + *p = '\0'; + bm->title = data; + bm->tags = p + 1; + } else { + bm->title = data; + bm->tags = NULL; } - g_strfreev(parts); return bm; } -static int bookmark_comp(Bookmark *a, Bookmark *b) -{ - return g_strcmp0(a->uri, b->uri); -} - static void free_bookmark(Bookmark *bm) { g_free(bm->uri); - g_free(bm->title); - g_free(bm->tags); g_slice_free(Bookmark, bm); } diff --git a/src/history.c b/src/history.c index d329417..9e389af 100644 --- a/src/history.c +++ b/src/history.c @@ -26,6 +26,7 @@ extern VbCore vb; +#define HIST_FILE(t) (vb.files[file_map[t]]) /* map history types to files */ static const VbFile file_map[HISTORY_LAST] = { FILES_COMMAND, @@ -38,13 +39,11 @@ typedef struct { char *second; } History; -static const char *get_file_by_type(HistoryType type); static GList *load(const char *file); static void write_to_file(GList *list, const char *file); -static History *line_to_history(const char *line); static gboolean history_item_contains_all_tags(History *item, char **query, unsigned int qlen); -static int history_comp(History *a, History *b); +static History *line_to_history(char *uri, char *title); static void free_history(History *item); @@ -63,7 +62,7 @@ void history_cleanup(void) } for (HistoryType i = HISTORY_FIRST; i < HISTORY_LAST; i++) { - file = get_file_by_type(i); + file = HIST_FILE(i); list = load(file); write_to_file(list, file); g_list_free_full(list, (GDestroyNotify)free_history); @@ -83,7 +82,7 @@ void history_add(HistoryType type, const char *value, const char *additional) return; } - file = get_file_by_type(type); + file = HIST_FILE(type); if (additional) { util_file_append(file, "%s\t%s\n", value, additional); } else { @@ -100,7 +99,7 @@ gboolean history_fill_completion(GtkListStore *store, HistoryType type, const ch GtkTreeIter iter; History *item; - src = load(get_file_by_type(type)); + src = load(HIST_FILE(type)); src = g_list_reverse(src); if (!input || !*input) { /* without any tags return all items */ @@ -169,12 +168,12 @@ GList *history_get_list(VbInputType type, const char *query) switch (type) { case VB_INPUT_COMMAND: - src = load(get_file_by_type(HISTORY_COMMAND)); + src = load(HIST_FILE(HISTORY_COMMAND)); break; case VB_INPUT_SEARCH_FORWARD: case VB_INPUT_SEARCH_BACKWARD: - src = load(get_file_by_type(HISTORY_SEARCH)); + src = load(HIST_FILE(HISTORY_SEARCH)); break; default: @@ -190,30 +189,23 @@ GList *history_get_list(VbInputType type, const char *query) } g_list_free_full(src, (GDestroyNotify)free_history); - /* prepend the original query as own item like done in vim to have the + /* Prepend the original query as own item like done in vim to have the * original input string in input box if we step before the first real - * item */ + * item. */ result = g_list_prepend(result, g_strdup(query)); return result; } -static const char *get_file_by_type(HistoryType type) -{ - return vb.files[file_map[type]]; -} - /** * Loads history items form file but eleminate duplicates in FIFO order. * - * Returned list must be freed with (GDestroyNotify) free_history. + * Returned list must be freed with (GDestroyNotify)free_history. */ static GList *load(const char *file) { - /* read the history items from file */ return util_file_to_unique_list( - file, (Util_Content_Func)line_to_history, (GCompareFunc)history_comp, - (GDestroyNotify)free_history, vb.config.history_max + file, (Util_Content_Func)line_to_history, vb.config.history_max ); } @@ -241,33 +233,6 @@ static void write_to_file(GList *list, const char *file) } } -static History *line_to_history(const char *line) -{ - char **parts; - int len; - - while (VB_IS_SPACE(*line)) { - line++; - } - if (!*line) { - return NULL; - } - - History *item = g_slice_new0(History); - - parts = g_strsplit(line, "\t", 2); - len = g_strv_length(parts); - if (len == 2) { - item->first = g_strdup(parts[0]); - item->second = g_strdup(parts[1]); - } else { - item->first = g_strdup(parts[0]); - } - g_strfreev(parts); - - return item; -} - /** * Checks if the given array of tags are all found in history item. */ @@ -291,15 +256,20 @@ static gboolean history_item_contains_all_tags(History *item, char **query, return true; } -static int history_comp(History *a, History *b) +static History *line_to_history(char *uri, char *title) { - /* compare only the first part */ - return g_strcmp0(a->first, b->first); + History *item = g_slice_new0(History); + + item->first = uri; + item->second = title; + + return item; } static void free_history(History *item) { + /* The first and second property are created from the same allocated + * string so we only need to free the first. */ g_free(item->first); - g_free(item->second); g_slice_free(History, item); } diff --git a/src/util.c b/src/util.c index cb2361e..d33b240 100644 --- a/src/util.c +++ b/src/util.c @@ -120,62 +120,79 @@ char **util_get_lines(const char *filename) } /** - * Retrieves a list with unique items from file. + * Retrieves a list with unique items from file. The uniqueness is calculated + * based on the lines comparing all chars until the next char or end of + * line. * * @filename: file to read items from - * @func: function to parse a single line to item - * @unique_func: function to decide if two items are equal - * @free_func: function to free already converted item if this isn't unique + * @func: Function to parse a single line to item. This is called by + * two strings of the same allocated memory chunk which isn't + * freed here. This allows to use the strings like they are. But + * in case the memory should be freed, free only that of the + * first string. * @max_items: maximum number of items that are returned, use 0 for * unlimited items */ GList *util_file_to_unique_list(const char *filename, Util_Content_Func func, - GCompareFunc unique_func, GDestroyNotify free_func, unsigned int max_items) + guint max_items) { - GList *gl = NULL; - /* yes, the whole file is read and wen possible don not need all the - * lines, but this is easier to implement compared to reading the file - * line wise from end to beginning */ char *line, **lines; - void *value; - int len, num_items = 0; - - /* return empty list if max items is 0 */ - if (!max_items) { - return gl; - } + int i, len; + GList *gl = NULL; + GHashTable *ht; lines = util_get_lines(filename); - len = g_strv_length(lines); - if (!len) { - return gl; + if (!lines) { + return NULL; } - /* begin with the last line of the file to make unique check easier - - * every already existing item in the list is the latest, so we don't need - * to remove items from the list which takes some time */ - for (int i = len - 1; i >= 0; i--) { + /* Use the hashtable to check for duplicates in a faster way than by + * iterating over the generated list itself. So it's enough to store the + * the keys only. */ + ht = g_hash_table_new(g_str_hash, g_str_equal); + + /* Begin with the last line of the file to make unique check easier - + * every already existing item in the table is the latest, so we don't need + * to do anything if an item already exists in the hash table. */ + len = g_strv_length(lines); + for (i = len - 1; i >= 0; i--) { + char *key, *data; + void *item; + line = lines[i]; g_strstrip(line); if (!*line) { continue; } - if ((value = func(line))) { - /* if the value is already in list, free it and don't put it onto - * the list */ - if (g_list_find_custom(gl, value, unique_func)) { - free_func(value); - } else { - gl = g_list_prepend(gl, value); - /* skip the loop if we precessed max_items unique items */ - if (++num_items >= max_items) { + /* if line contains tab char - separate the line at this */ + if ((data = strchr(line, '\t'))) { + *data = '\0'; + key = line; + data++; + } else { + key = line; + data = NULL; + } + + /* If the key part of file line is not in the hash table, insert it + * into the table and also in the list. */ + if (!g_hash_table_lookup_extended(ht, key, NULL, NULL)) { + if ((item = func(key, data))) { + g_hash_table_insert(ht, key, NULL); + gl = g_list_prepend(gl, item); + + /* Don't put more entries into the list than requested. */ + if (max_items && g_hash_table_size(ht) >= max_items) { break; } } } } - g_strfreev(lines); + + /* Free the memory for the string array but keep the strings untouched. */ + g_free(lines); + g_hash_table_destroy(ht); return gl; } diff --git a/src/util.h b/src/util.h index 694b68b..dcfa0e1 100644 --- a/src/util.h +++ b/src/util.h @@ -28,8 +28,7 @@ enum { UTIL_EXP_SPECIAL = 0x04, /* expand % to current URI */ }; -typedef gboolean (*Util_Comp_Func)(const char*, const char*); -typedef void *(*Util_Content_Func)(const char*); +typedef void *(*Util_Content_Func)(char*, char*); char* util_get_config_dir(void); char* util_get_cache_dir(void); @@ -39,7 +38,7 @@ void util_create_file_if_not_exists(const char* filename); char* util_get_file_contents(const char* filename, gsize* length); char** util_get_lines(const char* filename); GList *util_file_to_unique_list(const char *filename, Util_Content_Func func, - GCompareFunc unique_func, GDestroyNotify free_func, unsigned int max_items); + guint max_items); gboolean util_file_append(const char *file, const char *format, ...); gboolean util_file_prepend(const char *file, const char *format, ...); char* util_strcasestr(const char* haystack, const char* needle);