From 9dd67ade189d799ead56857c01ede3798aa23215 Mon Sep 17 00:00:00 2001 From: Daniel Carl Date: Sat, 6 Sep 2014 14:37:38 +0200 Subject: [PATCH] Added curly brace pattern matching for auto commands (#100). This allows to match pattern with {foo,bar} to match 'foo' or 'bar'. This is really useful for protocol matching for examaple http{s,}://{www,mail,maps}.ugly-domain.com/*. --- src/util.c | 147 ++++++++++++++++++++++++++++++++++------------ tests/test-util.c | 59 +++++++++++++++++++ 2 files changed, 169 insertions(+), 37 deletions(-) diff --git a/src/util.c b/src/util.c index 1084482..7101bca 100644 --- a/src/util.c +++ b/src/util.c @@ -28,6 +28,9 @@ extern VbCore vb; +static int match_list(const char *pattern, const char *subject); + + char *util_get_config_dir(void) { char *path = g_build_filename(g_get_user_config_dir(), PROJECT, NULL); @@ -509,79 +512,149 @@ gboolean util_parse_expansion(const char **input, GString *str, int flags, /** * Compares given string against also given pattern. - * * matches any sequence of characters - * ? matches any single character except of / - * \? matches a ? + * + * * Matches any sequence of characters. + * ? Matches any single character except of '/'. + * {foo,bar} Matches foo or bar - '{', ',' and '}' within this pattern must be + * escaped by '\'. '*' and '?' have no special meaning within the + * curly braces. + * *?{} these chars must always be escaped by '\' to match them literally */ -gboolean util_wildmatch(const char *pattern, const char *string) +gboolean util_wildmatch(const char *pattern, const char *subject) { int i; - char ul, pl; - const char *p, *s; + char sl, pl; - p = pattern; - s = string; - - while (*p) { - switch (*p) { + while (*pattern) { + switch (*pattern) { case '?': - /* match single char except of / or end */ - if (*s == '/' || !*s) { + /* '?' matches a single char except of / and subject end */ + if (*subject == '/' || !*subject) { return false; } break; - case '\\': - /* \ escapes next * or ? char */ - if (*(p + 1) == '*' || *(p + 1) == '?') { - p++; - if (*p != *s) { - return false; - } - } - break; - case '*': /* easiest case - the '*' ist the last char in pattern - this * will always match */ - if (*(p + 1) == '\0') { + if (!pattern[1]) { return true; } /* Try to match as much as possible. Try to match the complete * uri, if that fails move forward in uri and check for a * match. */ - i = strlen(s); - while (i >= 0 && !util_wildmatch(p + 1, s + i)) { + i = strlen(subject); + while (i >= 0 && !util_wildmatch(pattern + 1, subject + i)) { i--; } return i >= 0; + case '}': + /* spurious '}' in pattern */ + return false; + + case '{': + /* possible {foo,bar} pattern */ + return match_list(pattern, subject); + + case '\\': + /* '\' escapes next special char */ + if (strchr("*?{}", pattern[1])) { + pattern++; + if (*pattern != *subject) { + return false; + } + } + break; + default: - ul = *s; - if (VB_IS_UPPER(ul)) { - ul += 'a' - 'A'; + /* compare case insensitive */ + sl = *subject; + if (VB_IS_UPPER(sl)) { + sl += 'a' - 'A'; } - pl = *p; + pl = *pattern; if (VB_IS_UPPER(pl)) { pl += 'a' - 'A'; } - if (ul != pl) { + if (sl != pl) { return false; } break; } - p++; - s++; + /* do another loop run with next pattern and subject char */ + pattern++; + subject++; + } + + /* on end of pattern only a also ended subject is a match */ + return !*subject; +} + +static int match_list(const char *pattern, const char *subject) +{ + const char *end, *s; + + /* finde the next none escaped '}' */ + for (end = pattern; *end && *end != '}'; end++) { + /* if escape char - move pointer one additional step */ + if (*end == '\\') { + end++; + } } - /* if there is uri left on pattern end - this is no match */ - if (!*p) { - return !*s; + if (!*end) { + /* unterminated '{' in pattern */ + return false; } - return false; + s = subject; + end++; /* skip over } */ + pattern++; /* skip over { */ + while (true) { + switch (*pattern) { + case ',': + if (util_wildmatch(end, s)) { + return true; + } + s = subject; + pattern++; + break; + + case '}': + return util_wildmatch(end, s); + + case '\\': + if (pattern[1] == ',' || pattern[1] == '}' || pattern[1] == '{') { + pattern += 1; + } + /* fall through */ + + default: + if (*pattern == *s) { + pattern++; + s++; + } else { + /* this item of the list does not match - move forward to + * the next none escaped ',' or '}' */ + s = subject; + while (*pattern != ',' && *pattern != '}') { + /* if escape char is found - skip next char */ + if (*pattern == '\\') { + pattern++; + } + pattern++; + } + /* found ',' skip over it to check the next list item */ + if (*pattern == ',') { + pattern++; + } + } + } + } } + /** * Fills the given list store by matching data of also given src list. */ diff --git a/tests/test-util.c b/tests/test-util.c index a8d42f7..38b7bf2 100644 --- a/tests/test-util.c +++ b/tests/test-util.c @@ -170,6 +170,8 @@ static void test_wildmatch_questionmark(void) g_assert_false(util_wildmatch("foo\\?bar", "foorbar")); g_assert_false(util_wildmatch("?", "")); g_assert_false(util_wildmatch("b??r", "bar")); + /* ? does not match / in contrast to * which does */ + g_assert_false(util_wildmatch("user?share", "user/share")); } static void test_wildmatch_wildcard(void) @@ -180,7 +182,10 @@ static void test_wildmatch_wildcard(void) g_assert_true(util_wildmatch("match*", "match suffix")); g_assert_true(util_wildmatch("match*", "match*")); g_assert_true(util_wildmatch("match\\*", "match*")); + g_assert_true(util_wildmatch("match\\\\*", "match\\*")); g_assert_true(util_wildmatch("do * match", "do a infix match")); + /* '*' matches also / in contrast to other implementations */ + g_assert_true(util_wildmatch("start*end", "start/something/end")); g_assert_true(util_wildmatch("*://*.io/*", "http://fanglingsu.github.io/vimb/")); /* multiple * should act like a single one */ g_assert_true(util_wildmatch("**", "")); @@ -191,6 +196,58 @@ static void test_wildmatch_wildcard(void) g_assert_false(util_wildmatch("f***u", "full")); } +static void test_wildmatch_curlybraces(void) +{ + g_assert_true(util_wildmatch("{foo}", "foo")); + g_assert_true(util_wildmatch("{foo,bar}", "foo")); + g_assert_true(util_wildmatch("{foo,bar}", "bar")); + g_assert_true(util_wildmatch("foo{lish,t}bar", "foolishbar")); + g_assert_true(util_wildmatch("foo{lish,t}bar", "footbar")); + /* esacped special chars */ + g_assert_true(util_wildmatch("foo\\{l\\}bar", "foo{l}bar")); + g_assert_true(util_wildmatch("ba{r,z\\{\\}}", "bar")); + g_assert_true(util_wildmatch("ba{r,z\\{\\}}", "baz{}")); + g_assert_true(util_wildmatch("test{one\\,two,three}", "testone,two")); + g_assert_true(util_wildmatch("test{one\\,two,three}", "testthree")); + /* backslash before none special char is a normal char */ + g_assert_true(util_wildmatch("back{\\slash,}", "back\\slash")); + g_assert_true(util_wildmatch("one\\two", "one\\two")); + g_assert_true(util_wildmatch("\\}match", "}match")); + g_assert_true(util_wildmatch("\\{", "{")); + /* empty list parts */ + g_assert_true(util_wildmatch("{}", "")); + g_assert_true(util_wildmatch("{,}", "")); + g_assert_true(util_wildmatch("{,foo}", "")); + g_assert_true(util_wildmatch("{,foo}", "foo")); + g_assert_true(util_wildmatch("{bar,}", "")); + g_assert_true(util_wildmatch("{bar,}", "bar")); + /* no special meaning of ? and * in curly braces */ + g_assert_true(util_wildmatch("ab{*,cd}ef", "ab*ef")); + g_assert_true(util_wildmatch("ab{d,?}ef", "ab?ef")); + + g_assert_false(util_wildmatch("{foo,bar}", "foo,bar")); + g_assert_false(util_wildmatch("}match{ it", "}match{ anything")); + /* don't match single parts that are seperated by escaped ',' */ + g_assert_false(util_wildmatch("{a,b\\,c,d}", "b")); + g_assert_false(util_wildmatch("{a,b\\,c,d}", "c")); + /* lonesome braces - this is a syntax error and will always be false */ + g_assert_false(util_wildmatch("}", "}")); + g_assert_false(util_wildmatch("}", "")); + g_assert_false(util_wildmatch("}suffix", "}suffux")); + g_assert_false(util_wildmatch("}suffix", "suffux")); + g_assert_false(util_wildmatch("{", "{")); + g_assert_false(util_wildmatch("{", "")); + g_assert_false(util_wildmatch("{foo", "{foo")); + g_assert_false(util_wildmatch("{foo", "foo")); + g_assert_false(util_wildmatch("foo{bar", "foo{bar")); +} + +static void test_wildmatch_complete(void) +{ + g_assert_true(util_wildmatch("http{s,}://{fanglingsu.,}github.{io,com}/*vimb/", "http://fanglingsu.github.io/vimb/")); + g_assert_true(util_wildmatch("http{s,}://{fanglingsu.,}github.{io,com}/*vimb/", "https://github.com/fanglingsu/vimb/")); +} + int main(int argc, char *argv[]) { g_test_init(&argc, &argv, NULL); @@ -205,6 +262,8 @@ int main(int argc, char *argv[]) g_test_add_func("/test-util/wildmatch-simple", test_wildmatch_simple); g_test_add_func("/test-util/wildmatch-questionmark", test_wildmatch_questionmark); g_test_add_func("/test-util/wildmatch-wildcard", test_wildmatch_wildcard); + g_test_add_func("/test-util/wildmatch-curlybraces", test_wildmatch_curlybraces); + g_test_add_func("/test-util/wildmatch-complete", test_wildmatch_complete); return g_test_run(); } -- 2.20.1