Added curly brace pattern matching for auto commands (#100).
authorDaniel Carl <danielcarl@gmx.de>
Sat, 6 Sep 2014 12:37:38 +0000 (14:37 +0200)
committerDaniel Carl <danielcarl@gmx.de>
Sat, 6 Sep 2014 20:14:20 +0000 (22:14 +0200)
This allows to match pattern with {foo,bar} to match 'foo' or 'bar'. This is
really useful for protocol matching for examaple
http{s,}://{www,mail,maps}.ugly-domain.com/*.

src/util.c
tests/test-util.c

index 1084482..7101bca 100644 (file)
@@ -28,6 +28,9 @@
 
 extern VbCore vb;
 
+static int match_list(const char *pattern, const char *subject);
+
+
 char *util_get_config_dir(void)
 {
     char *path = g_build_filename(g_get_user_config_dir(), PROJECT, NULL);
@@ -509,79 +512,149 @@ gboolean util_parse_expansion(const char **input, GString *str, int flags,
 
 /**
  * Compares given string against also given pattern.
- * *    matches any sequence of characters
- * ?    matches any single character except of /
- * \?   matches a ?
+ *
+ * *         Matches any sequence of characters.
+ * ?         Matches any single character except of '/'.
+ * {foo,bar} Matches foo or bar - '{', ',' and '}' within this pattern must be
+ *           escaped by '\'. '*' and '?' have no special meaning within the
+ *           curly braces.
+ * *?{}      these chars must always be escaped by '\' to match them literally
  */
-gboolean util_wildmatch(const char *pattern, const char *string)
+gboolean util_wildmatch(const char *pattern, const char *subject)
 {
     int i;
-    char ul, pl;
-    const char *p, *s;
+    char sl, pl;
 
-    p = pattern;
-    s = string;
-
-    while (*p) {
-        switch (*p) {
+    while (*pattern) {
+        switch (*pattern) {
             case '?':
-                /* match single char except of / or end */
-                if (*s == '/' || !*s) {
+                /* '?' matches a single char except of / and subject end */
+                if (*subject == '/' || !*subject) {
                     return false;
                 }
                 break;
 
-            case '\\':
-                /* \ escapes next * or ? char */
-                if (*(p + 1) == '*' || *(p + 1) == '?') {
-                    p++;
-                    if (*p != *s) {
-                        return false;
-                    }
-                }
-                break;
-
             case '*':
                 /* easiest case - the '*' ist the last char in pattern - this
                  * will always match */
-                if (*(p + 1) == '\0') {
+                if (!pattern[1]) {
                     return true;
                 }
                 /* Try to match as much as possible. Try to match the complete
                  * uri, if that fails move forward in uri and check for a
                  * match. */
-                i = strlen(s);
-                while (i >= 0 && !util_wildmatch(p + 1, s + i)) {
+                i = strlen(subject);
+                while (i >= 0 && !util_wildmatch(pattern + 1, subject + i)) {
                     i--;
                 }
                 return i >= 0;
 
+            case '}':
+                /* spurious '}' in pattern */
+                return false;
+
+            case '{':
+                /* possible {foo,bar} pattern */
+                return match_list(pattern, subject);
+
+            case '\\':
+                /* '\' escapes next special char */
+                if (strchr("*?{}", pattern[1])) {
+                    pattern++;
+                    if (*pattern != *subject) {
+                        return false;
+                    }
+                }
+                break;
+
             default:
-                ul = *s;
-                if (VB_IS_UPPER(ul)) {
-                    ul += 'a' - 'A';
+                /* compare case insensitive */
+                sl = *subject;
+                if (VB_IS_UPPER(sl)) {
+                    sl += 'a' - 'A';
                 }
-                pl = *p;
+                pl = *pattern;
                 if (VB_IS_UPPER(pl)) {
                     pl += 'a' - 'A';
                 }
-                if (ul != pl) {
+                if (sl != pl) {
                     return false;
                 }
                 break;
         }
-        p++;
-        s++;
+        /* do another loop run with next pattern and subject char */
+        pattern++;
+        subject++;
+    }
+
+    /* on end of pattern only a also ended subject is a match */
+    return !*subject;
+}
+
+static int match_list(const char *pattern, const char *subject)
+{
+    const char *end, *s;
+
+    /* finde the next none escaped '}' */
+    for (end = pattern; *end && *end != '}'; end++) {
+        /* if escape char - move pointer one additional step */
+        if (*end == '\\') {
+            end++;
+        }
     }
 
-    /* if there is uri left on pattern end - this is no match */
-    if (!*p) {
-        return !*s;
+    if (!*end) {
+        /* unterminated '{' in pattern */
+        return false;
     }
 
-    return false;
+    s = subject;
+    end++;      /* skip over } */
+    pattern++;  /* skip over { */
+    while (true) {
+        switch (*pattern) {
+            case ',':
+                if (util_wildmatch(end, s)) {
+                    return true;
+                }
+                s = subject;
+                pattern++;
+                break;
+
+            case '}':
+                return util_wildmatch(end, s);
+
+            case '\\':
+                if (pattern[1] == ',' || pattern[1] == '}' || pattern[1] == '{') {
+                    pattern += 1;
+                }
+                /* fall through */
+
+            default:
+                if (*pattern == *s) {
+                    pattern++;
+                    s++;
+                } else {
+                    /* this item of the list does not match - move forward to
+                     * the next none escaped ',' or '}' */
+                    s = subject;
+                    while (*pattern != ',' && *pattern != '}') {
+                        /* if escape char is found - skip next char */
+                        if (*pattern == '\\') {
+                            pattern++;
+                        }
+                        pattern++;
+                    }
+                    /* found ',' skip over it to check the next list item */
+                    if (*pattern == ',') {
+                        pattern++;
+                    }
+                }
+        }
+    }
 }
 
+
 /**
  * Fills the given list store by matching data of also given src list.
  */
index a8d42f7..38b7bf2 100644 (file)
@@ -170,6 +170,8 @@ static void test_wildmatch_questionmark(void)
     g_assert_false(util_wildmatch("foo\\?bar", "foorbar"));
     g_assert_false(util_wildmatch("?", ""));
     g_assert_false(util_wildmatch("b??r", "bar"));
+    /* ? does not match / in contrast to * which does */
+    g_assert_false(util_wildmatch("user?share", "user/share"));
 }
 
 static void test_wildmatch_wildcard(void)
@@ -180,7 +182,10 @@ static void test_wildmatch_wildcard(void)
     g_assert_true(util_wildmatch("match*", "match suffix"));
     g_assert_true(util_wildmatch("match*", "match*"));
     g_assert_true(util_wildmatch("match\\*", "match*"));
+    g_assert_true(util_wildmatch("match\\\\*", "match\\*"));
     g_assert_true(util_wildmatch("do * match", "do a infix match"));
+    /* '*' matches also / in contrast to other implementations */
+    g_assert_true(util_wildmatch("start*end", "start/something/end"));
     g_assert_true(util_wildmatch("*://*.io/*", "http://fanglingsu.github.io/vimb/"));
     /* multiple * should act like a single one */
     g_assert_true(util_wildmatch("**", ""));
@@ -191,6 +196,58 @@ static void test_wildmatch_wildcard(void)
     g_assert_false(util_wildmatch("f***u", "full"));
 }
 
+static void test_wildmatch_curlybraces(void)
+{
+    g_assert_true(util_wildmatch("{foo}", "foo"));
+    g_assert_true(util_wildmatch("{foo,bar}", "foo"));
+    g_assert_true(util_wildmatch("{foo,bar}", "bar"));
+    g_assert_true(util_wildmatch("foo{lish,t}bar", "foolishbar"));
+    g_assert_true(util_wildmatch("foo{lish,t}bar", "footbar"));
+    /* esacped special chars */
+    g_assert_true(util_wildmatch("foo\\{l\\}bar", "foo{l}bar"));
+    g_assert_true(util_wildmatch("ba{r,z\\{\\}}", "bar"));
+    g_assert_true(util_wildmatch("ba{r,z\\{\\}}", "baz{}"));
+    g_assert_true(util_wildmatch("test{one\\,two,three}", "testone,two"));
+    g_assert_true(util_wildmatch("test{one\\,two,three}", "testthree"));
+    /* backslash before none special char is a normal char */
+    g_assert_true(util_wildmatch("back{\\slash,}", "back\\slash"));
+    g_assert_true(util_wildmatch("one\\two", "one\\two"));
+    g_assert_true(util_wildmatch("\\}match", "}match"));
+    g_assert_true(util_wildmatch("\\{", "{"));
+    /* empty list parts */
+    g_assert_true(util_wildmatch("{}", ""));
+    g_assert_true(util_wildmatch("{,}", ""));
+    g_assert_true(util_wildmatch("{,foo}", ""));
+    g_assert_true(util_wildmatch("{,foo}", "foo"));
+    g_assert_true(util_wildmatch("{bar,}", ""));
+    g_assert_true(util_wildmatch("{bar,}", "bar"));
+    /* no special meaning of ? and * in curly braces */
+    g_assert_true(util_wildmatch("ab{*,cd}ef", "ab*ef"));
+    g_assert_true(util_wildmatch("ab{d,?}ef", "ab?ef"));
+
+    g_assert_false(util_wildmatch("{foo,bar}", "foo,bar"));
+    g_assert_false(util_wildmatch("}match{ it", "}match{ anything"));
+    /* don't match single parts that are seperated by escaped ',' */
+    g_assert_false(util_wildmatch("{a,b\\,c,d}", "b"));
+    g_assert_false(util_wildmatch("{a,b\\,c,d}", "c"));
+    /* lonesome braces - this is a syntax error and will always be false */
+    g_assert_false(util_wildmatch("}", "}"));
+    g_assert_false(util_wildmatch("}", ""));
+    g_assert_false(util_wildmatch("}suffix", "}suffux"));
+    g_assert_false(util_wildmatch("}suffix", "suffux"));
+    g_assert_false(util_wildmatch("{", "{"));
+    g_assert_false(util_wildmatch("{", ""));
+    g_assert_false(util_wildmatch("{foo", "{foo"));
+    g_assert_false(util_wildmatch("{foo", "foo"));
+    g_assert_false(util_wildmatch("foo{bar", "foo{bar"));
+}
+
+static void test_wildmatch_complete(void)
+{
+    g_assert_true(util_wildmatch("http{s,}://{fanglingsu.,}github.{io,com}/*vimb/", "http://fanglingsu.github.io/vimb/"));
+    g_assert_true(util_wildmatch("http{s,}://{fanglingsu.,}github.{io,com}/*vimb/", "https://github.com/fanglingsu/vimb/"));
+}
+
 int main(int argc, char *argv[])
 {
     g_test_init(&argc, &argv, NULL);
@@ -205,6 +262,8 @@ int main(int argc, char *argv[])
     g_test_add_func("/test-util/wildmatch-simple", test_wildmatch_simple);
     g_test_add_func("/test-util/wildmatch-questionmark", test_wildmatch_questionmark);
     g_test_add_func("/test-util/wildmatch-wildcard", test_wildmatch_wildcard);
+    g_test_add_func("/test-util/wildmatch-curlybraces", test_wildmatch_curlybraces);
+    g_test_add_func("/test-util/wildmatch-complete", test_wildmatch_complete);
 
     return g_test_run();
 }