[ruby-dev:23113] Re: File.fnmatchとDir.glob
From:
"H.Yamamoto" <ocean@...2.ccsnet.ne.jp>
Date:
2004-03-03 12:12:36 UTC
List:
ruby-dev #23113
山本です。
[ruby-dev:23030] の fnmatch を、dir.c にマージしてみました。
一週間使ってみましたが、私の使用範囲では問題ありませんでした。
http://www.ccsnet.ne.jp/~ocean/23113/dir.c にも置いておきます。
cvs diff -u -wb -p dir.c (in directory E:\ruby-cvs\ruby\)
Index: dir.c
===================================================================
RCS file: /ruby/ruby/dir.c,v
retrieving revision 1.109
diff -u -w -b -p -r1.109 dir.c
--- dir.c 24 Feb 2004 09:49:59 -0000 1.109
+++ dir.c 3 Mar 2004 11:58:08 -0000
@@ -68,7 +68,7 @@ char *strchr _((char*,char));
#endif
#define FNM_NOESCAPE 0x01
-#define FNM_PATHNAME 0x02
+#define FNM_SEPMATCH 0x02
#define FNM_DOTMATCH 0x04
#define FNM_CASEFOLD 0x08
@@ -164,142 +164,174 @@ CompareImpl(p1, p2, nocase)
}
#endif /* environment */
-#if defined DOSISH
-#define isdirsep(c) ((c) == '/' || (c) == '\\')
-#else
-#define isdirsep(c) ((c) == '/')
-#endif
-
-static char *
-range(pat, test, flags)
- char *pat;
- char *test;
+static int
+bracket(p, pend, test, flags)
+ const char *p;
+ const char *pend;
+ const char *test;
int flags;
{
- int not, ok = 0;
- int nocase = flags & FNM_CASEFOLD;
- int escape = !(flags & FNM_NOESCAPE);
+ const int nocase = flags & FNM_CASEFOLD;
- not = *pat == '!' || *pat == '^';
- if (not)
- pat++;
-
- while (*pat) {
- char *pstart, *pend;
- pstart = pend = pat;
- if (*pstart == ']')
- return ok == not ? 0 : ++pat;
- else if (escape && *pstart == '\\')
- pstart = pend = ++pat;
- Inc(pat);
- if (*pat == '-' && pat[1] != ']') {
- if (escape && pat[1] == '\\')
- pat++;
- pend = pat+1;
- if (!*pend)
- return 0;
- pat = Next(pend);
+ int ok = 1;
+
+ if (p < pend && (*p == '!' || *p == '^')) {
+ ok = 0;
+ p++;
}
- if (Compare(pstart, test) <= 0 && Compare(test, pend) <= 0)
- ok = 1;
+
+ while (p < pend) {
+ const char *t1 = p;
+ Inc(p);
+ if (p < pend && *p == '-') {
+ const char *t2 = p + 1;
+ if (t2 < pend) /* range */
+ if (Compare(t1, test) <= 0 && Compare(test, t2) <= 0)
+ return ok;
+ else {
+ p = Next(t2);
+ if (p < pend && *p == '-') /* undefined behavoir in SUSv3 */
+ p = t2;
+ continue;
}
- return 0;
}
+ if (Compare(t1, test) == 0)
+ return ok;
+ }
+ return !ok;
+}
+
+/* If FNM_SEPMATCH is set, entire string will be matched.
+ * Otherwise, only path element will be matched. (upto '/' or '\0')
+ * End marker itself won't be compared. */
+
+#define ISEND(c) (!(c) || (pathname && (c) == '/'))
+#define RETURN(val) return *pcur = p, *scur = s, val;
-#define ISDIRSEP(c) (pathname && isdirsep(c))
-#define PERIOD_S() (period && *s == '.' && \
- (!s_prev || ISDIRSEP(*s_prev)))
-#define INC_S() (s = Next(s_prev = s))
static int
-fnmatch(pat, string, flags)
- const char *pat;
- const char *string;
+fnmatch_helper(p, pcur, s, scur, flags)
+ const char *p; /* pattern */
+ const char **pcur;
+ const char *s; /* string */
+ const char **scur;
int flags;
{
- int c;
- const char *test;
- const char *s = string, *s_prev = 0;
- int escape = !(flags & FNM_NOESCAPE);
- int pathname = flags & FNM_PATHNAME;
- int period = !(flags & FNM_DOTMATCH);
- int nocase = flags & FNM_CASEFOLD;
+ const int period = !(flags & FNM_DOTMATCH);
+ const int escape = !(flags & FNM_NOESCAPE);
+ const int nocase = flags & FNM_CASEFOLD;
+ const int pathname = !(flags & FNM_SEPMATCH);
- while (c = *pat) {
- switch (c) {
- case '?':
- if (!*s || ISDIRSEP(*s) || PERIOD_S())
- return FNM_NOMATCH;
- INC_S();
- ++pat;
- break;
+ const char *ptmp = 0;
+ const char *stmp = 0;
- case '*':
- while ((c = *++pat) == '*')
- ;
+ if (period && *s == '.' && *p != '.') /* leading period */
+ RETURN(FNM_NOMATCH);
- if (PERIOD_S())
- return FNM_NOMATCH;
+ while (!ISEND(*p) || !ISEND(*s)) {
+ if (*p == '*') {
+ do { p++; } while (*p == '*');
+ if (ISEND(*p)) {
+ RETURN(0);
+ }
+ ptmp = p;
+ stmp = s;
+ }
+ if (ISEND(*p)) {
+ goto failed;
+ }
+ if (ISEND(*s)) {
+ RETURN(FNM_NOMATCH);
+ }
+ switch (*p) {
+ case '?':
+ p++;
+ Inc(s);
+ continue;
- if (!c) {
- if (pathname && *rb_path_next(s))
- return FNM_NOMATCH;
- else
- return 0;
+ case '[': {
+ const char *t = ++p;
+ if (*t == ']') /* bracket expression includes ']' */
+ t++;
+ while (*t != ']' && !ISEND(*t))
+ Inc(t);
+ if (*t == ']') { /* bracket expression */
+ if (bracket(p, t, s, flags)) {
+ p = t + 1;
+ Inc(s);
+ continue;
}
- else if (ISDIRSEP(c)) {
- s = rb_path_next(s);
- if (*s) {
- INC_S();
- ++pat;
- break;
+ else {
+ p = t + 1;
+ /* Inc(s); */ /* for performance */
+ goto failed;
}
- return FNM_NOMATCH;
+ }
+ p--; /* treat ']' as ordinary character */
+ break; /* goto ordinary */
}
- test = escape && c == '\\' ? pat+1 : pat;
- while (*s) {
- if ((c == '?' || c == '[' || Compare(s, test) == 0) &&
- !fnmatch(pat, s, flags | FNM_DOTMATCH))
- return 0;
- else if (ISDIRSEP(*s))
- break;
- INC_S();
+ case '\\':
+ if (escape && p[1])
+ p++;
+ break; /* goto ordinary */
}
- return FNM_NOMATCH;
- case '[':
- if (!*s || ISDIRSEP(*s) || PERIOD_S())
- return FNM_NOMATCH;
- pat = range(pat+1, s, flags);
- if (!pat)
- return FNM_NOMATCH;
- INC_S();
- break;
+ /* ordinary */
+ if (Compare(p, s) != 0) {
+ goto failed;
+ }
+ Inc(p);
+ Inc(s);
+ continue;
- case '\\':
- if (escape && pat[1]
-#if defined DOSISH
- && strchr("*?[]\\", pat[1])
-#endif
- ) {
- c = *++pat;
+ failed: /* try next '*' position */
+ if (ptmp /* && stmp */) {
+ p = ptmp;
+ Inc(stmp);
+ s = stmp;
+ continue;
+ }
+ else {
+ RETURN(FNM_NOMATCH);
+ }
+ }
+ RETURN(0);
}
- /* FALLTHROUGH */
- default:
-#if defined DOSISH
- if (ISDIRSEP(c) && isdirsep(*s))
- ;
- else
-#endif
- if (Compare(pat, s) != 0)
- return FNM_NOMATCH;
- INC_S();
- Inc(pat);
- break;
+static int
+fnmatch(p, s, flags)
+ const char *p; /* pattern */
+ const char *s; /* string */
+ int flags;
+{
+ const int pathname = !(flags & FNM_SEPMATCH);
+ const int period = !(flags & FNM_DOTMATCH);
+
+ const char *pcur, *scur;
+
+ if (pathname) {
+ int recursive = 0;
+ if (p[0] == '*' && p[1] == '*' && p[2] == '/') {
+ do { p += 3; } while (p[0] == '*' && p[1] == '*' && p[2] == '/');
+ recursive = 1;
+ }
+ if (fnmatch_helper(p, &pcur, s, &scur, flags) == 0) {
+ while (*pcur && *pcur != '/') Inc(pcur);
+ while (*scur && *scur != '/') Inc(scur);
+ if (!*pcur && !*scur)
+ return 0;
+ if (*pcur && *scur && fnmatch(pcur + 1, scur + 1, flags) == 0)
+ return 0;
+ }
+ if (recursive && !(period && *s == '.')) {
+ while (*scur && *scur != '/') Inc(scur);
+ if (*scur)
+ return fnmatch(p - 3, scur + 1, flags); /* recurse at next element */
}
+ return FNM_NOMATCH;
}
- return !*s ? 0 : FNM_NOMATCH;
+ else
+ return fnmatch_helper(p, &pcur, s, &scur, flags);
}
VALUE rb_cDir;
@@ -920,15 +952,17 @@ has_magic(s, flags)
return 1;
case '[': /* Only accept an open brace if there is a close */
- open++; /* brace to match it. Bracket expressions must be */
- continue; /* complete, according to Posix.2 */
+ open = 1; /* brace to match it. Bracket expressions must be */
+ if (*p == ']') /* complete, according to Posix.2 */
+ p++;
+ continue;
case ']':
if (open)
return 1;
continue;
case '\\':
- if (escape && !(c = *p++))
+ if (!open && escape && !(c = *p++))
return 0;
continue;
}
@@ -979,13 +1013,13 @@ glob_make_pattern(p, flags)
int flags;
{
char *buf;
- int dirsep = 0; /* pattern terminates with '/' */
+ int dirsep = 0; /* pattern is terminated with '/' */
struct glob_pattern *list, *tmp, **tail = &list;
while (*p) {
tmp = ALLOC(struct glob_pattern);
if (p[0] == '*' && p[1] == '*' && p[2] == '/') {
- /* fold continuous RECURSIVEs */
+ /* fold continuous RECURSIVEs (needed in glob_helper) */
do { p += 3; } while (p[0] == '*' && p[1] == '*' && p[2] == '/');
tmp->type = RECURSIVE;
tmp->str = 0;
@@ -1152,7 +1186,6 @@ glob_helper(path, dirsep, exist, isdir,
isdir = NO;
}
}
-
if (match_dir && isdir == UNKNOWN) {
if (do_stat(path, &st) == 0) {
exist = YES;
@@ -1163,12 +1196,10 @@ glob_helper(path, dirsep, exist, isdir,
isdir = NO;
}
}
-
if (match_all && exist == YES) {
status = glob_call_func(func, path, arg);
if (status) return status;
}
-
if (match_dir && isdir == YES) {
char *buf = join_path(path, dirsep, "");
status = glob_call_func(func, buf, arg);
@@ -1232,7 +1263,7 @@ glob_helper(path, dirsep, exist, isdir,
for (cur = copy_beg; cur < copy_end; ++cur) {
if (*cur) {
- char *buf, *name;
+ char *name, *buf;
name = ALLOC_N(char, strlen((*cur)->str) + 1);
strcpy(name, (*cur)->str);
if (escape) remove_backslashes(name);
@@ -1275,16 +1306,18 @@ rb_glob2(path, flags, func, arg)
int status;
if (flags & FNM_CASEFOLD) {
- rb_warn("Dir.glob() ignores File::FNM_CASEFOLD");
+ rb_raise(rb_eArgError, "cannot use File::FNM_CASEFOLD");
+ }
+ if (flags & FNM_SEPMATCH) {
+ rb_raise(rb_eArgError, "cannot use File::FNM_SEPMATCH");
}
#if defined DOSISH
flags |= FNM_CASEFOLD;
root = rb_path_skip_prefix(root);
-#else
- flags &= ~FNM_CASEFOLD;
#endif
+ flags |= FNM_SEPMATCH; /* a bit faster */
if (*root == '/') root++;
n = root - path;
@@ -1654,7 +1687,7 @@ Init_Dir()
rb_define_singleton_method(rb_cFile,"fnmatch?", file_s_fnmatch, -1);
rb_file_const("FNM_NOESCAPE", INT2FIX(FNM_NOESCAPE));
- rb_file_const("FNM_PATHNAME", INT2FIX(FNM_PATHNAME));
+ rb_file_const("FNM_SEPMATCH", INT2FIX(FNM_SEPMATCH));
rb_file_const("FNM_DOTMATCH", INT2FIX(FNM_DOTMATCH));
rb_file_const("FNM_CASEFOLD", INT2FIX(FNM_CASEFOLD));
}