[ruby-dev:22258] Re: Dir.glob と Shjift_JIS について
From:
"H.Yamamoto" <ocean@...2.ccsnet.ne.jp>
Date:
2003-12-13 03:22:11 UTC
List:
ruby-dev #22258
山本です。glob_helperをリファクタリングしてみました。
次の点以外は、仕様は変化していないはずです。
# DOSISHな環境で、Dir.glob("e:*") が動作するようになった。(eドライブのカレントを列挙)
# 以前は、Dir.glob("e:./*") とする必要があった。
依存個所は、glob_helperの中の char *dir の初期化部分と、
rb_glob2の中のmagic探索開始ポインタ設定部分です。
今までと違って大きくいじったので、ちょっと怖いです。
いちおう自分のスクリプトは動作していますが・・・
--- dir.c Sat Nov 22 12:59:18 2003
+++ dir.c Sat Dec 13 12:02:10 2003
@@ -78,10 +78,66 @@ char *strchr _((char*,char));
#define downcase(c) (nocase && ISUPPER(c) ? tolower(c) : (c))
+#define compare(c1, c2) (((unsigned char)(c1)) - ((unsigned char)(c2)))
-#ifndef CharNext /* defined as CharNext[AW] on Windows. */
-# if defined(DJGPP)
-# define CharNext(p) ((p) + mblen(p, MB_CUR_MAX))
-# else
-# define CharNext(p) ((p) + 1)
-# endif
-#endif
+static char *
+greater(p1, p2)
+ const char *p1;
+ const char *p2;
+{
+ return p1 > p2 ? (char *)p1 : (char *)p2;
+}
+
+#if defined(_WIN32)
+# define Next(p) (greater((p) + 1, CharNext(p))) /* faster */
+#elif defined(DJGPP) || defined(__EMX__)
+# define Next(p) (greater((p) + 1, (p) + mblen(p, INT_MAX)))
+# endif
+
+#ifndef Next /* single byte environment */
+# define Next(p) ((p) + 1)
+# define Inc(p) (++(p))
+# define CopyAndInc(dst, src) (*(dst)++ = *(src)++)
+# define Compare(p1, p2) (compare(downcase(*(p1)), downcase(*(p2))))
+#else /* multi byte environment */
+# define Inc(p) ((p) = Next(p))
+# define CopyAndInc(dst, src) (CopyAndIncImpl(&(dst), &(src)))
+# define Compare(p1, p2) (CompareImpl(p1, p2, nocase))
+static void
+CopyAndIncImpl(pdst, psrc)
+ char **pdst;
+ const char **psrc;
+{
+ const int len = Next(*psrc) - *psrc;
+ memcpy(*pdst, *psrc, len);
+ (*pdst) += len;
+ (*psrc) += len;
+}
+
+static int
+CompareImpl(p1, p2, nocase)
+ const char *p1;
+ const char *p2;
+ int nocase;
+{
+ const int len1 = Next(p1) - p1;
+ const int len2 = Next(p2) - p2;
+
+ if (len1 == 1)
+ if (len2 == 1) {
+ return compare(downcase(*p1), downcase(*p2));
+ }
+ else {
+ const int ret = compare(downcase(*p1), *p2);
+ return ret ? ret : -1;
+ }
+ else
+ if (len2 == 1) {
+ const int ret = compare(*p1, downcase(*p2));
+ return ret ? ret : 1;
+ }
+ else {
+ const int ret = memcmp(p1, p2, len1 < len2 ? len1 : len2);
+ return ret ? ret : len1 - len2;
+ }
+}
+#endif /* environment */
@@ -89,3 +145,3 @@ char *strchr _((char*,char));
#define isdirsep(c) ((c) == '/' || (c) == '\\')
-static const char *
+static char *
find_dirsep(s)
@@ -95,4 +151,4 @@ find_dirsep(s)
if (isdirsep(*s))
- return s;
- s = CharNext(s);
+ return (char *)s;
+ Inc(s);
}
@@ -108,3 +164,3 @@ range(pat, test, flags)
char *pat;
- char test;
+ char *test;
int flags;
@@ -117,22 +173,20 @@ range(pat, test, flags)
if (not)
- pat++;
-
- test = downcase(test);
+ Inc(pat);
while (*pat) {
- int cstart, cend;
- cstart = cend = *pat++;
- if (cstart == ']')
+ char *pstart, *pend;
+ pstart = pend = pat; Inc(pat);
+ if (*pstart == ']')
return ok == not ? 0 : pat;
- else if (escape && cstart == '\\')
- cstart = cend = *pat++;
- if (*pat == '-' && pat[1] != ']') {
- if (escape && pat[1] == '\\')
- pat++;
- cend = pat[1];
- if (!cend)
+ else if (escape && *pstart == '\\')
+ pstart = pend = pat; Inc(pat);
+ if (*pat == '-' && *Next(pat) != ']') {
+ if (escape && *Next(pat) == '\\')
+ Inc(pat);
+ pend = Next(pat);
+ if (!*pend)
return 0;
- pat += 2;
+ Inc(pat); Inc(pat);
}
- if (downcase(cstart) <= test && test <= downcase(cend))
+ if (Compare(pstart, test) <= 0 && Compare(test, pend) <= 0)
ok = 1;
@@ -143,4 +197,5 @@ range(pat, test, flags)
#define ISDIRSEP(c) (pathname && isdirsep(c))
-#define PERIOD(s) (period && *(s) == '.' && \
- ((s) == string || ISDIRSEP((s)[-1])))
+#define PERIOD_S() (period && *s == '.' && \
+ (s == string || ISDIRSEP(*s_prev)))
+#define INC_S() (s = Next(s_prev = s))
static int
@@ -151,5 +206,5 @@ fnmatch(pat, string, flags)
{
- int c;
- int test;
- const char *s = string;
+ const char *p;
+ const char *test;
+ const char *s = string, *s_prev;
int escape = !(flags & FNM_NOESCAPE);
@@ -159,17 +214,17 @@ fnmatch(pat, string, flags)
- while (c = *pat++) {
- switch (c) {
+ while (p = pat, Inc(pat), *p) {
+ switch (*p) {
case '?':
- if (!*s || ISDIRSEP(*s) || PERIOD(s))
+ if (!*s || ISDIRSEP(*s) || PERIOD_S())
return FNM_NOMATCH;
- s++;
+ INC_S();
break;
case '*':
- while ((c = *pat++) == '*')
+ while (p = pat, Inc(pat), *p == '*')
;
- if (PERIOD(s))
+ if (PERIOD_S())
return FNM_NOMATCH;
- if (!c) {
+ if (!*p) {
if (pathname && find_dirsep(s))
@@ -179,6 +234,6 @@ fnmatch(pat, string, flags)
}
- else if (ISDIRSEP(c)) {
+ else if (ISDIRSEP(*p)) {
s = find_dirsep(s);
if (s) {
- s++;
+ INC_S();
break;
@@ -188,7 +243,6 @@ fnmatch(pat, string, flags)
- test = escape && c == '\\' ? *pat : c;
- test = downcase(test);
- pat--;
+ test = escape && *p == '\\' ? pat : p;
+ pat = p;
while (*s) {
- if ((c == '[' || downcase(*s) == test) &&
+ if ((*p == '[' || Compare(s, test) == 0) &&
!fnmatch(pat, s, flags | FNM_DOTMATCH))
@@ -197,3 +251,3 @@ fnmatch(pat, string, flags)
break;
- s++;
+ INC_S();
}
@@ -202,8 +256,8 @@ fnmatch(pat, string, flags)
case '[':
- if (!*s || ISDIRSEP(*s) || PERIOD(s))
+ if (!*s || ISDIRSEP(*s) || PERIOD_S())
return FNM_NOMATCH;
- pat = range(pat, *s, flags);
+ pat = range(pat, s, flags);
if (!pat)
return FNM_NOMATCH;
- s++;
+ INC_S();
break;
@@ -216,7 +270,7 @@ fnmatch(pat, string, flags)
) {
- c = *pat;
- if (!c)
- c = '\\';
+ p = pat;
+ if (!*p)
+ p = "\\"; /* point to embeded string */
else
- pat++;
+ Inc(pat);
}
@@ -226,3 +280,3 @@ fnmatch(pat, string, flags)
#if defined DOSISH
- if (ISDIRSEP(c) && isdirsep(*s))
+ if (ISDIRSEP(*p) && isdirsep(*s))
;
@@ -230,5 +284,5 @@ fnmatch(pat, string, flags)
#endif
- if(downcase(c) != downcase(*s))
+ if(Compare(p, s) != 0)
return FNM_NOMATCH;
- s++;
+ INC_S();
break;
@@ -570,10 +624,11 @@ dir_s_rmdir(obj, dir)
-/* Return nonzero if S has any special globbing chars in it. */
-static int
-has_magic(s, send, flags)
- char *s, *send;
+/* difference from find_dirsep: if not found, return pointer to '\0' */
+static char *
+find_magic(s, flags, pbeg, pend)
+ char *s;
int flags;
+ char **pbeg;
+ char **pend;
{
- register char *p = s;
- register char c;
+ char *p;
int open = 0;
@@ -581,7 +636,13 @@ has_magic(s, send, flags)
- while ((c = *p++) != '\0') {
- switch (c) {
+ *pbeg = s;
+
+ while (p = s, Inc(s), *p != '\0') {
+ switch (*p) {
+ case '/':
+ *pbeg = s;
+ continue;
+
case '?':
case '*':
- return Qtrue;
+ goto found;
@@ -592,3 +653,3 @@ has_magic(s, send, flags)
if (open)
- return Qtrue;
+ goto found;
continue;
@@ -596,45 +657,35 @@ has_magic(s, send, flags)
case '\\':
- if (escape && *p++ == '\0')
- return Qfalse;
+ if (escape && (p = s, Inc(s), *p == '\0'))
+ goto miss;
}
-
- if (send && p >= send) break;
- }
- return Qfalse;
}
-static char*
-extract_path(p, pend)
- char *p, *pend;
-{
- char *alloc;
- int len;
+ miss:
+ *pbeg = *pend = p;
+ return;
- len = pend - p;
- alloc = ALLOC_N(char, len+1);
- memcpy(alloc, p, len);
- if (len > 1 && pend[-1] == '/'
-#if defined DOSISH_DRIVE_LETTER
- && pend[-2] != ':'
-#endif
- ) {
- alloc[len-1] = 0;
- }
- else {
- alloc[len] = 0;
+ found:
+ while (*s) {
+ if (*s == '/')
+ break;
+ Inc(s);
}
-
- return alloc;
+ *pend = s;
}
-static char*
-extract_elem(path)
- char *path;
+static int
+do_fnmatch(beg, end, string, flags)
+ char *beg;
+ char *end;
+ const char *string;
+ int flags;
{
- char *pend;
+ int ret;
+ char c;
- pend = strchr(path, '/');
- if (!pend) pend = path + strlen(path);
-
- return extract_path(path, pend);
+ c = *end;
+ *end = '\0'; /* should I allocate new string? */
+ ret = fnmatch(beg, string, flags);
+ *end = c;
+ return ret;
}
@@ -652,3 +703,3 @@ remove_backslashes(p)
}
- *t++ = *p++;
+ CopyAndInc(t, p);
}
@@ -704,12 +755,16 @@ glob_helper(path, sub, flags, func, arg)
struct stat st;
- char *p, *m;
int status = 0;
+ char *beg, *end, *buf;
+ DIR *dirp;
+ struct dirent *dp;
+ int recursive = 0;
- p = sub ? sub : path;
- if (!has_magic(p, 0, flags)) {
-#if defined DOSISH
- remove_backslashes(path);
-#else
- if (!(flags & FNM_NOESCAPE)) remove_backslashes(p);
-#endif
+ struct d_link {
+ char *path;
+ struct d_link *next;
+ } *tmp, *link, **tail = &link;
+
+ find_magic(sub, flags, &beg, &end);
+
+ if (*beg == '\0') { /* magic not found */
if (lstat(path, &st) == 0) {
@@ -726,36 +781,38 @@ glob_helper(path, sub, flags, func, arg)
- while (p && !status) {
- if (*p == '/') p++;
- m = strchr(p, '/');
- if (has_magic(p, m, flags)) {
- char *dir, *base, *magic, *buf;
- DIR *dirp;
- struct dirent *dp;
- int recursive = 0;
-
- struct d_link {
- char *path;
- struct d_link *next;
- } *tmp, *link, **tail = &link;
-
- base = extract_path(path, p);
- if (path == p) dir = ".";
- else dir = base;
-
- magic = extract_elem(p);
+ {
+#if defined DOSISH_DRIVE_LETTER
+#define NEED_DOT ((beg-path==0) || (beg-path==2 && ISALPHA(*path) && path[1] == ':'))
+#define NEED_LAST ((NEED_DOT) || (beg-path==1 && *path=='/') || (beg-path==3 && ISALPHA(*path) && path[1]==':' && path[2]=='/'))
+#else
+#define NEED_DOT ((beg-path==0))
+#define NEED_LAST ((beg-path==0) || (beg-path==1 && *path=='/'))
+#endif
+ int n = (NEED_LAST) ? (beg - path) : (beg - path) - 1;
+ char *dir = ALLOC_N(char, n+1+1);
+ memcpy(dir, path, n);
+ if (NEED_DOT) {
+ dir[n] = '.';
+ dir[n+1] = '\0';
+ }
+ else {
+ dir[n] = '\0';
+ }
if (stat(dir, &st) < 0) {
if (errno != ENOENT) rb_sys_warning(dir);
- free(base);
- free(magic);
- break;
+ free(dir);
+ return 0;
}
if (S_ISDIR(st.st_mode)) {
- if (m && strcmp(magic, "**") == 0) {
- int n = strlen(base);
+ if (beg[0] == '*' && beg[1] == '*' && beg[2] == '/') {
+ int n = beg - path;
recursive = 1;
- buf = ALLOC_N(char, n+strlen(m)+3);
- sprintf(buf, "%s%s", base, *base ? m : m+1);
+ buf = ALLOC_N(char, n+strlen(beg+3)+1);
+ memcpy(buf, path, n);
+ strcpy(buf+n, beg+3);
status = glob_helper(buf, buf+n, flags, func, arg);
free(buf);
- if (status) goto finalize;
+ if (status) {
+ free(dir);
+ return status;
+ }
}
@@ -764,5 +821,4 @@ glob_helper(path, sub, flags, func, arg)
rb_sys_warning(dir);
- free(base);
- free(magic);
- break;
+ free(dir);
+ return 0;
}
@@ -770,14 +826,10 @@ glob_helper(path, sub, flags, func, arg)
else {
- free(base);
- free(magic);
- break;
+ free(dir);
+ return 0;
+ }
+ free(dir);
}
-
-#if defined DOSISH_DRIVE_LETTER
-#define BASE (*base && !((isdirsep(*base) && !base[1]) || (base[1] == ':' && isdirsep(base[2]) && !base[3])))
-#else
-#define BASE (*base && !(isdirsep(*base) && !base[1]))
-#endif
for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) {
+ int n = beg - path;
if (recursive) {
@@ -785,4 +837,5 @@ glob_helper(path, sub, flags, func, arg)
continue;
- buf = ALLOC_N(char, strlen(base)+NAMLEN(dp)+strlen(m)+6);
- sprintf(buf, "%s%s%s", base, (BASE) ? "/" : "", dp->d_name);
+ buf = ALLOC_N(char, n+NAMLEN(dp)+strlen(end)+3+1);
+ memcpy(buf, path, n);
+ strcpy(buf+n, dp->d_name);
if (lstat(buf, &st) < 0) {
@@ -794,4 +847,4 @@ glob_helper(path, sub, flags, func, arg)
char *t = buf+strlen(buf);
- strcpy(t, "/**");
- strcpy(t+3, m);
+ memcpy(t, "/**", 3);
+ strcpy(t+3, end);
status = glob_helper(buf, t, flags, func, arg);
@@ -804,6 +857,7 @@ glob_helper(path, sub, flags, func, arg)
}
- if (fnmatch(magic, dp->d_name, flags) == 0) {
- buf = ALLOC_N(char, strlen(base)+NAMLEN(dp)+2);
- sprintf(buf, "%s%s%s", base, (BASE) ? "/" : "", dp->d_name);
- if (!m) {
+ if (do_fnmatch(beg, end, dp->d_name, flags) == 0) {
+ buf = ALLOC_N(char, n+NAMLEN(dp)+1);
+ memcpy(buf, path, n);
+ strcpy(buf+n, dp->d_name);
+ if (*end == '\0') {
status = glob_call_func(func, buf, arg);
@@ -822,4 +876,2 @@ glob_helper(path, sub, flags, func, arg)
*tail = 0;
- free(base);
- free(magic);
if (link) {
@@ -829,9 +881,8 @@ glob_helper(path, sub, flags, func, arg)
if (S_ISDIR(st.st_mode)) {
- int len = strlen(link->path);
- int mlen = strlen(m);
- char *t = ALLOC_N(char, len+mlen+1);
-
- sprintf(t, "%s%s", link->path, m);
- status = glob_helper(t, t+len, flags, func, arg);
- free(t);
+ int n = strlen(link->path);
+ buf = ALLOC_N(char, n+strlen(end)+1);
+ memcpy(buf, link->path, n);
+ strcpy(buf+n, end);
+ status = glob_helper(buf, buf+n, flags, func, arg);
+ free(buf);
}
@@ -847,6 +898,2 @@ glob_helper(path, sub, flags, func, arg)
}
- break;
- }
- }
- p = m;
}
@@ -862,3 +909,10 @@ rb_glob2(path, flags, func, arg)
{
- int status = glob_helper(path, 0, flags, func, arg);
+ int status;
+#if defined DOSISH
+ remove_backslashes(path);
+ status = glob_helper(path, (ISALPHA(path[0]) && path[1] == ':' ? path + 2 : path), flags, func, arg);
+#else
+ if (!(flags & FNM_NOESCAPE)) remove_backslashes(path);
+ status = glob_helper(path, path, flags, func, arg);
+#endif
if (status) rb_jump_tag(status);
@@ -926,3 +980,3 @@ push_braces(ary, s, flags)
}
- p++;
+ Inc(p);
}
@@ -934,3 +988,3 @@ push_braces(ary, s, flags)
}
- p++;
+ Inc(p);
}
@@ -944,9 +998,9 @@ push_braces(ary, s, flags)
while (*p != '}') {
- t = p + 1;
- for (p = t; *p!='}' && *p!=','; p++) {
+ t = Next(p);
+ for (p = t; *p!='}' && *p!=','; Inc(p)) {
/* skip inner braces */
- if (*p == '{') while (*p!='}') p++;
+ if (*p == '{') while (*p!='}') Inc(p);
}
memcpy(b, t, p-t);
- strcpy(b+(p-t), rbrace+1);
+ strcpy(b+(p-t), Next(rbrace));
push_braces(ary, buf, flags);
@@ -988,3 +1042,3 @@ rb_push_glob(str, flags)
nest = maxnest = 0;
- while (p < pend && isdelim(*p)) p++;
+ while (p < pend && isdelim(*p)) Inc(p);
while (p < pend && !isdelim(*p)) {
@@ -993,6 +1047,6 @@ rb_push_glob(str, flags)
if (!noescape && *p == '\\') {
- *t++ = *p++;
+ CopyAndInc(t, p);
if (p == pend) break;
}
- *t++ = *p++;
+ CopyAndInc(t, p);
}