[#23168] File.fnmatch のリファクタリング — "H.Yamamoto" <ocean@...2.ccsnet.ne.jp>

山本です。

13 messages 2004/03/08

[#23192] File.fnmatch と Dir.glob の非互換部分 — "H.Yamamoto" <ocean@...2.ccsnet.ne.jp>

山本です。

19 messages 2004/03/13
[#23194] Re: File.fnmatch と Dir.glob の非互換部分 — matz@... (Yukihiro Matsumoto) 2004/03/13

まつもと ゆきひろです

[#23195] Re: File.fnmatch とDir.glob の非互換部分 — "H.Yamamoto" <ocean@...2.ccsnet.ne.jp> 2004/03/14

山本です。

[#23196] Re: File.fnmatch とDir.glob の非互換部分 — "H.Yamamoto" <ocean@...2.ccsnet.ne.jp> 2004/03/14

山本です。

[#23260] Re: File.fnmatch とDir.glob の非互換部分 — "H.Yamamoto" <ocean@...2.ccsnet.ne.jp> 2004/03/30

山本です。

[#23261] Re: File.fnmatch とDir.glob の非互換部分 — matz@... (Yukihiro Matsumoto) 2004/03/30

まつもと ゆきひろです

[#23265] Re: File.fnmatch とDir.glob の非互換部分 — "H.Yamamoto" <ocean@...2.ccsnet.ne.jp> 2004/03/30

山本です。

[#23238] Re: [ruby-cvs] ruby, ruby/lib, ruby/lib/rss, ruby/sample/openssl: * lib/logger.rb: trim tail space of each line. no user visible change. — Kouhei Sutou <kou@...>

須藤です.

10 messages 2004/03/27

[ruby-dev:23113] Re: File.fnmatchとDir.glob

From: "H.Yamamoto" <ocean@...2.ccsnet.ne.jp>
Date: 2004-03-03 12:12:36 UTC
List: ruby-dev #23113
山本です。

[ruby-dev:23030] の fnmatch を、dir.c にマージしてみました。
一週間使ってみましたが、私の使用範囲では問題ありませんでした。

http://www.ccsnet.ne.jp/~ocean/23113/dir.c にも置いておきます。

cvs diff -u -wb -p dir.c (in directory E:\ruby-cvs\ruby\)
Index: dir.c
===================================================================
RCS file: /ruby/ruby/dir.c,v
retrieving revision 1.109
diff -u -w -b -p -r1.109 dir.c
--- dir.c	24 Feb 2004 09:49:59 -0000	1.109
+++ dir.c	3 Mar 2004 11:58:08 -0000
@@ -68,7 +68,7 @@ char *strchr _((char*,char));
 #endif
 
 #define FNM_NOESCAPE	0x01
-#define FNM_PATHNAME	0x02
+#define FNM_SEPMATCH	0x02
 #define FNM_DOTMATCH	0x04
 #define FNM_CASEFOLD	0x08
 
@@ -164,142 +164,174 @@ CompareImpl(p1, p2, nocase)
 }
 #endif /* environment */
 
-#if defined DOSISH
-#define isdirsep(c) ((c) == '/' || (c) == '\\')
-#else
-#define isdirsep(c) ((c) == '/')
-#endif
-
-static char *
-range(pat, test, flags)
-    char *pat;
-    char *test;
+static int
+bracket(p, pend, test, flags)
+    const char *p;
+    const char *pend;
+    const char *test;
     int flags;
 {
-    int not, ok = 0;
-    int nocase = flags & FNM_CASEFOLD;
-    int escape = !(flags & FNM_NOESCAPE);
+    const int nocase = flags & FNM_CASEFOLD;
 
-    not = *pat == '!' || *pat == '^';
-    if (not)
-	pat++;
-
-    while (*pat) {
-	char *pstart, *pend;
-	pstart = pend = pat;
-	if (*pstart == ']')
-	    return ok == not ? 0 : ++pat;
-	else if (escape && *pstart == '\\')
-	    pstart = pend = ++pat;
-	Inc(pat);
-	if (*pat == '-' && pat[1] != ']') {
-	    if (escape && pat[1] == '\\')
-		pat++;
-	    pend = pat+1;
-	    if (!*pend)
-		return 0;
-	    pat = Next(pend);
+    int ok = 1;
+
+    if (p < pend && (*p == '!' || *p == '^')) {
+	ok = 0;
+	p++;
 	}
-	if (Compare(pstart, test) <= 0 && Compare(test, pend) <= 0)
-	    ok = 1;
+
+    while (p < pend) {
+	const char *t1 = p;
+	Inc(p);
+	if (p < pend && *p == '-') {
+	    const char *t2 = p + 1;
+	    if (t2 < pend) /* range */
+		if (Compare(t1, test) <= 0 && Compare(test, t2) <= 0)
+		    return ok;
+		else {
+		    p = Next(t2);
+		    if (p < pend && *p == '-') /* undefined behavoir in SUSv3 */
+			p = t2;
+		    continue;
     }
-    return 0;
 }
+	if (Compare(t1, test) == 0)
+	    return ok;
+    }
+    return !ok;
+}
+
+/* If FNM_SEPMATCH is set, entire string will be matched.
+ * Otherwise, only path element will be matched. (upto '/' or '\0')
+ * End marker itself won't be compared. */
+
+#define ISEND(c) (!(c) || (pathname && (c) == '/'))
+#define RETURN(val) return *pcur = p, *scur = s, val;
 
-#define ISDIRSEP(c) (pathname && isdirsep(c))
-#define PERIOD_S() (period && *s == '.' && \
-    (!s_prev || ISDIRSEP(*s_prev)))
-#define INC_S() (s = Next(s_prev = s))
 static int
-fnmatch(pat, string, flags)
-    const char *pat;
-    const char *string;
+fnmatch_helper(p, pcur, s, scur, flags)
+    const char *p; /* pattern */
+    const char **pcur;
+    const char *s; /* string */
+    const char **scur;
     int flags;
 {
-    int c;
-    const char *test;
-    const char *s = string, *s_prev = 0;
-    int escape = !(flags & FNM_NOESCAPE);
-    int pathname = flags & FNM_PATHNAME;
-    int period = !(flags & FNM_DOTMATCH);
-    int nocase = flags & FNM_CASEFOLD;
+    const int period = !(flags & FNM_DOTMATCH);
+    const int escape = !(flags & FNM_NOESCAPE);
+    const int nocase = flags & FNM_CASEFOLD;
+    const int pathname = !(flags & FNM_SEPMATCH);
 
-    while (c = *pat) {
-	switch (c) {
-	  case '?':
-	    if (!*s || ISDIRSEP(*s) || PERIOD_S())
-		return FNM_NOMATCH;
-	    INC_S();
-	    ++pat;
-	    break;
+    const char *ptmp = 0;
+    const char *stmp = 0;
 
-	  case '*':
-	    while ((c = *++pat) == '*')
-		;
+    if (period && *s == '.' && *p != '.') /* leading period */
+	RETURN(FNM_NOMATCH);
 
-	    if (PERIOD_S())
-		return FNM_NOMATCH;
+    while (!ISEND(*p) || !ISEND(*s)) {
+	if (*p == '*') {
+	    do { p++; } while (*p == '*');
+	    if (ISEND(*p)) {
+		RETURN(0);
+	    }
+	    ptmp = p;
+	    stmp = s;
+	}
+	if (ISEND(*p)) {
+	    goto failed;
+	}
+	if (ISEND(*s)) {
+	    RETURN(FNM_NOMATCH);
+	}
+	switch (*p) {
+	  case '?':
+	    p++;
+	    Inc(s);
+	    continue;
 
-	    if (!c) {
-		if (pathname && *rb_path_next(s))
-		    return FNM_NOMATCH;
-		else
-		    return 0;
+	  case '[': {
+	    const char *t = ++p;
+	    if (*t == ']') /* bracket expression includes ']' */
+		t++;
+	    while (*t != ']' && !ISEND(*t))
+		Inc(t);
+	    if (*t == ']') { /* bracket expression */
+		if (bracket(p, t, s, flags)) {
+		    p = t + 1;
+		    Inc(s);
+		    continue;
 	    }
-	    else if (ISDIRSEP(c)) {
-		s = rb_path_next(s);
-		if (*s) {
-		    INC_S();
-		    ++pat;
-		    break;
+		else {
+		    p = t + 1;
+		/*  Inc(s); */ /* for performance */
+		    goto failed;
                 }
-		return FNM_NOMATCH;
+	    }
+	    p--; /* treat ']' as ordinary character */
+	    break; /* goto ordinary */
 	    }
 
-	    test = escape && c == '\\' ? pat+1 : pat;
-	    while (*s) {
-		if ((c == '?' || c == '[' || Compare(s, test) == 0) &&
-		    !fnmatch(pat, s, flags | FNM_DOTMATCH))
-		    return 0;
-		else if (ISDIRSEP(*s))
-		    break;
-		INC_S();
+	  case '\\':
+	    if (escape && p[1])
+		p++;
+	    break; /* goto ordinary */
 	    }
-	    return FNM_NOMATCH;
 
-	  case '[':
-	    if (!*s || ISDIRSEP(*s) || PERIOD_S())
-		return FNM_NOMATCH;
-	    pat = range(pat+1, s, flags);
-	    if (!pat)
-		return FNM_NOMATCH;
-	    INC_S();
-	    break;
+	/* ordinary */
+	if (Compare(p, s) != 0) {
+	    goto failed;
+	}
+	Inc(p);
+	Inc(s);
+	continue;
 
-	  case '\\':
-	    if (escape && pat[1]
-#if defined DOSISH
-		&& strchr("*?[]\\", pat[1])
-#endif
-		) {
-		c = *++pat;
+      failed: /* try next '*' position */
+	if (ptmp /* && stmp */) {
+	    p = ptmp;
+	    Inc(stmp);
+	    s = stmp;
+	    continue;
+	}
+	else {
+	    RETURN(FNM_NOMATCH);
+	}
+    }
+    RETURN(0);
 	    }
-	    /* FALLTHROUGH */
 
-	  default:
-#if defined DOSISH
-	    if (ISDIRSEP(c) && isdirsep(*s))
-		;
-	    else
-#endif
-	    if (Compare(pat, s) != 0)
-		return FNM_NOMATCH;
-	    INC_S();
-	    Inc(pat);
-	    break;
+static int
+fnmatch(p, s, flags)
+    const char *p; /* pattern */
+    const char *s; /* string */
+    int flags;
+{
+    const int pathname = !(flags & FNM_SEPMATCH);
+    const int period = !(flags & FNM_DOTMATCH);
+
+    const char *pcur, *scur;
+
+    if (pathname) {
+	int recursive = 0;
+	if (p[0] == '*' && p[1] == '*' && p[2] == '/') {
+	    do { p += 3; } while (p[0] == '*' && p[1] == '*' && p[2] == '/');
+	    recursive = 1;
+	}
+	if (fnmatch_helper(p, &pcur, s, &scur, flags) == 0) {
+	    while (*pcur && *pcur != '/') Inc(pcur);
+	    while (*scur && *scur != '/') Inc(scur);
+	    if (!*pcur && !*scur)
+		return 0;
+	    if (*pcur && *scur && fnmatch(pcur + 1, scur + 1, flags) == 0)
+		return 0;
+	}
+	if (recursive && !(period && *s == '.')) {
+	    while (*scur && *scur != '/') Inc(scur);
+	    if (*scur)
+		return fnmatch(p - 3, scur + 1, flags); /* recurse at next element */
 	}
+	return FNM_NOMATCH;
     }
-    return !*s ? 0 : FNM_NOMATCH;
+    else
+	return fnmatch_helper(p, &pcur, s, &scur, flags);
 }
 
 VALUE rb_cDir;
@@ -920,15 +952,17 @@ has_magic(s, flags)
 	    return 1;
 
 	  case '[':	/* Only accept an open brace if there is a close */
-	    open++;	/* brace to match it.  Bracket expressions must be */
-	    continue;	/* complete, according to Posix.2 */
+	    open = 1;	    /* brace to match it.  Bracket expressions must be */
+	    if (*p == ']')  /* complete, according to Posix.2 */
+		p++;
+	    continue;
 	  case ']':
 	    if (open)
 		return 1;
 	    continue;
 
 	  case '\\':
-	    if (escape && !(c = *p++))
+	    if (!open && escape && !(c = *p++))
 		return 0;
 	    continue;
 	}
@@ -979,13 +1013,13 @@ glob_make_pattern(p, flags)
     int flags;
 {
     char *buf;
-    int dirsep = 0; /* pattern terminates with '/' */
+    int dirsep = 0; /* pattern is terminated with '/' */
     struct glob_pattern *list, *tmp, **tail = &list;
 
     while (*p) {
 	tmp = ALLOC(struct glob_pattern);
 	if (p[0] == '*' && p[1] == '*' && p[2] == '/') {
-	    /* fold continuous RECURSIVEs */
+	    /* fold continuous RECURSIVEs (needed in glob_helper) */
 	    do { p += 3; } while (p[0] == '*' && p[1] == '*' && p[2] == '/');
 	    tmp->type = RECURSIVE;
 	    tmp->str = 0;
@@ -1152,7 +1186,6 @@ glob_helper(path, dirsep, exist, isdir, 
 		isdir = NO;
 	    }
 	}
-
 	if (match_dir && isdir == UNKNOWN) {
 	    if (do_stat(path, &st) == 0) {
 		exist = YES;
@@ -1163,12 +1196,10 @@ glob_helper(path, dirsep, exist, isdir, 
 		isdir = NO;
 	    }
 	}
-
 	if (match_all && exist == YES) {
 	    status = glob_call_func(func, path, arg);
 	    if (status) return status;
 	}
-
 	if (match_dir && isdir == YES) {
 	    char *buf = join_path(path, dirsep, "");
 	    status = glob_call_func(func, buf, arg);
@@ -1232,7 +1263,7 @@ glob_helper(path, dirsep, exist, isdir, 
 
 	for (cur = copy_beg; cur < copy_end; ++cur) {
 	    if (*cur) {
-		char *buf, *name;
+		char *name, *buf;
 		name = ALLOC_N(char, strlen((*cur)->str) + 1);
 		strcpy(name, (*cur)->str);
 		if (escape) remove_backslashes(name);
@@ -1275,16 +1306,18 @@ rb_glob2(path, flags, func, arg)
     int status;
 
     if (flags & FNM_CASEFOLD) {
-	rb_warn("Dir.glob() ignores File::FNM_CASEFOLD");
+	rb_raise(rb_eArgError, "cannot use File::FNM_CASEFOLD");
+    }
+    if (flags & FNM_SEPMATCH) {
+	rb_raise(rb_eArgError, "cannot use File::FNM_SEPMATCH");
     }
 
 #if defined DOSISH
     flags |= FNM_CASEFOLD;
     root = rb_path_skip_prefix(root);
-#else
-    flags &= ~FNM_CASEFOLD;
 #endif
 
+    flags |= FNM_SEPMATCH; /* a bit faster */
     if (*root == '/') root++;
 
     n = root - path;
@@ -1654,7 +1687,7 @@ Init_Dir()
     rb_define_singleton_method(rb_cFile,"fnmatch?", file_s_fnmatch, -1);
 
     rb_file_const("FNM_NOESCAPE", INT2FIX(FNM_NOESCAPE));
-    rb_file_const("FNM_PATHNAME", INT2FIX(FNM_PATHNAME));
+    rb_file_const("FNM_SEPMATCH", INT2FIX(FNM_SEPMATCH));
     rb_file_const("FNM_DOTMATCH", INT2FIX(FNM_DOTMATCH));
     rb_file_const("FNM_CASEFOLD", INT2FIX(FNM_CASEFOLD));
 }


In This Thread