From: matz@... Date: 2017-10-19T08:30:30+00:00 Subject: [ruby-core:83387] [Ruby trunk Feature#13873] Optimize Dir.glob with FNM_EXTGLOB Issue #13873 has been updated by matz (Yukihiro Matsumoto). Agreed. Matz ---------------------------------------- Feature #13873: Optimize Dir.glob with FNM_EXTGLOB https://bugs.ruby-lang.org/issues/13873#change-67348 * Author: naruse (Yui NARUSE) * Status: Open * Priority: Normal * Assignee: * Target version: ---------------------------------------- Rails often queries Dir.glob with complex pattern. On accessing Rails site, it calls glob as follows on querying templates. Note that Dir[] insists File::FNM_EXTGLOB. ```ruby def find_template_paths(query) Dir[query].uniq.reject do |filename| File.directory?(filename) || # deals with case-insensitive file systems. !File.fnmatch(query, filename, File::FNM_EXTGLOB) end end ``` https://github.com/rails/rails/blob/6f1c18308ebffc97d51440cdeed7be71de58f26a/actionview/lib/action_view/template/resolver.rb#L247 For example on accessing index page of Post Model (/posts), it calls following glob: Dir["/home/naruse/foo/app/views/posts/index{.en,}{.html,.text,.js,.css,.ics,.csv,.vcf,.png,.jpeg,.gif,.bmp,.tiff,.svg,.mpeg,.xml,.rss,.atom,.yaml,.multipart_form,.url_encoded_form,.json,.pdf,.zip,.gzip,}{}{.raw,.erb,.html,.builder,.ruby,.coffee,.jbuilder,}"]' It calls many stat as follows: ``` % sudo strace ./miniruby58882 -e'Dir["/home/naruse/foo/app/views/posts/index{.en,}{.html,.text,.js,.css,.ics,.csv,.vcf,.png,.jpeg,.gif,.bmp,.tiff,.svg,.mpeg,.xml,.rss,.atom,.yaml,.multipart_form,.url_encoded_form,.json,.pdf,.zip,.gzip,}{}{.raw,.erb,.html,.builder,.ruby,.coffee,.jbuilder,}"]'|&grep newfstatat|wc -l 400 ``` Because current glob implementation handles braces in early stage, then calls stat for each combination of brace patterns instead of using readdir results. Below is a patch to use readdir instead of hundreds of syscalls. Note that this patch changes the order of the result as follows: ``` 1) Failure: TestDir#test_glob [/home/naruse/ruby/test/ruby/test_dir.rb:157]: <["/tmp/test-all/__test_dir__20170906-8620-ka8yqu/}}{}", "/tmp/test-all/__test_dir__20170906-8620-ka8yqu/}}a"]> expected but was <["/tmp/test-all/__test_dir__20170906-8620-ka8yqu/}}a", "/tmp/test-all/__test_dir__20170906-8620-ka8yqu/}}{}"]>. ``` ``` diff --git a/dir.c b/dir.c index b7afaec4e0..7ca84fa8c5 100644 --- a/dir.c +++ b/dir.c @@ -290,6 +290,8 @@ bracket( #define UNESCAPE(p) (escape && *(p) == '\\' ? (p) + 1 : (p)) #define ISEND(p) (!*(p) || (pathname && *(p) == '/')) #define RETURN(val) return *pcur = p, *scur = s, (val); +#define FNMATCH_ALLOC_N(type, n) ((type *)malloc(sizeof(type) * (n))) +#define FNMATCH_FREE(ptr) free(ptr) static int fnmatch_helper( @@ -348,6 +350,56 @@ fnmatch_helper( } goto failed; } + + case '{': { + size_t len = pend - p; + char *buf = FNMATCH_ALLOC_N(char, len); + const char *rbrace = NULL; + while (p < pend) { + const char *t = ++p; + int nest = 0; + while (p < pend && !(*p == ',' && nest == 0)) { + if (*p == '{') nest++; + if (*p == '}') { + if (nest == 0) { + if (!rbrace) rbrace = p; + goto rest; + } + nest--; + } + if (*p == '\\' && escape) { + if (++p >= pend) break; + } + Inc(p, pend, enc); + } + if (!rbrace) { + rbrace = p; + while (rbrace < pend && !(*rbrace == '}' && nest == 0)) { + if (*rbrace == '{') nest++; + if (*rbrace == '}') nest--; + if (*rbrace == '\\' && escape) { + if (++p >= pend) break; + } + Inc(rbrace, pend, enc); + } + } +rest: + memcpy(buf, t, p-t); + buf[p-t]=0; + strlcpy(buf+(p-t), rbrace+1, len-(p-t)); + { + const char *pp = buf, *ss = s; + r = fnmatch_helper((const char **)&pp, &ss, flags|FNM_DOTMATCH, enc); + } + if (r == 0) { + p = buf; + FNMATCH_FREE(buf); + RETURN(0); + } + if (p >= rbrace) break; + } + FNMATCH_FREE(buf); + } } /* ordinary */ @@ -1426,6 +1478,12 @@ has_magic(const char *p, const char *pend, int flags, rb_encoding *enc) case '[': return MAGICAL; + case '{': + if (flags & FNM_EXTGLOB) { + return MAGICAL; + } + break; + case '\\': if (escape && p++ >= pend) continue; @@ -2272,6 +2330,13 @@ push_pattern(const char *path, VALUE ary, void *enc) rb_ary_push(ary, name); } +struct push_glob_args { + struct glob_args glob; + int flags; + int fd; +}; +static int push_caller(const char *path, VALUE val, void *enc); + static int ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg, rb_encoding *enc, VALUE var) @@ -2280,7 +2345,7 @@ ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg, const char *p = str; const char *pend = p + strlen(p); const char *s = p; - const char *lbrace = 0, *rbrace = 0; + const char *lbrace = NULL, *rbrace = NULL; int nest = 0, status = 0; while (*p) { @@ -2299,9 +2364,18 @@ ruby_brace_expand(const char *str, int flags, ruby_glob_func *func, VALUE arg, if (lbrace && rbrace) { size_t len = strlen(s) + 1; - char *buf = GLOB_ALLOC_N(char, len); + char *buf; long shift; + if (func == push_caller && !strchr(lbrace, '/')) { + /* Now it reaches file basename entry. */ + /* Handle braces in glob_helper */ + struct push_glob_args *a = (struct push_glob_args *)arg; + a->flags |= FNM_EXTGLOB; + return glob_call_func(func, s, arg, enc); + } + + buf = GLOB_ALLOC_N(char, len); if (!buf) return -1; memcpy(buf, s, lbrace-s); shift = (lbrace-s); @@ -2365,12 +2439,6 @@ ruby_brace_glob(const char *str, int flags, ruby_glob_func *func, VALUE arg) return ruby_brace_glob_with_enc(str, flags, func, arg, rb_ascii8bit_encoding()); } -struct push_glob_args { - struct glob_args glob; - int flags; - int fd; -}; - static int push_caller(const char *path, VALUE val, void *enc) { ``` -- https://bugs.ruby-lang.org/ Unsubscribe: