[ruby-dev:48194] [ruby-trunk - Feature #9816] 文字列内の数字を数値として比較するメソッド

From: nobu@...
Date: 2014-05-08 14:25:53 UTC
List: ruby-dev #48194
Issue #9816 has been updated by Nobuyoshi Nakada.


`versioncmp`=E3=81=AF'-'=E3=81=A8'.'=E3=81=A0=E3=81=91=E7=89=B9=E5=88=A5=E6=
=89=B1=E3=81=84=E3=81=A3=E3=81=A6=E6=84=9F=E3=81=98=E3=81=A7=E3=81=99=E3=81=
=8B=E3=81=AD=E3=80=82
=E3=81=82=E3=81=A8=E3=80=81=E7=89=87=E6=96=B9=E3=81=A0=E3=81=91=E3=81=8C=E6=
=95=B0=E5=AD=97=E3=81=A7=E7=B5=82=E3=82=8F=E3=81=A3=E3=81=A6=E3=81=84=E3=82=
=8B=E3=81=A8=E3=81=8D=E3=81=AE=E7=B5=82=E7=AB=AF=E3=83=81=E3=82=A7=E3=83=83=
=E3=82=AF=E3=81=8C=E6=8A=9C=E3=81=91=E3=81=A6=E3=81=84=E3=82=8B=E3=82=88=E3=
=81=86=E3=81=AA=E3=80=82

~~~diff
diff --git i/string.c w/string.c
index 66f667f..855d74f 100644
--- i/string.c
+++ w/string.c
@@ -2639,6 +2639,11 @@ rb_str_numericcmp(VALUE str1, VALUE str2)
 		if (ISDIGIT(*p2)) {
 		    VALUE r =3D numerical_compare(&p1, p1end, &p2, p2end);
 		    if (!NIL_P(r)) return r;
+		    if (p1 >=3D p1end) {
+			if (p2 < p2end) return INT2FIX(-1);
+			break;
+		    }
+		    else if (p2 >=3D p2end) return INT2FIX(+1);
 		}
 		else {
 		    return INT2FIX(-1);
@@ -2647,7 +2652,13 @@ rb_str_numericcmp(VALUE str1, VALUE str2)
 	    else if (ISDIGIT(*p2)) {
 		return INT2FIX(1);
 	    }
-	    if (*p1 !=3D *p2) return INT2FIX(*p1 < *p2 ? -1 : 1);
+	    if (*p1 !=3D *p2) {
+		if (*p1 =3D=3D '-') return INT2FIX(-1);
+		if (*p2 =3D=3D '-') return INT2FIX(+1);
+		if (*p1 =3D=3D '.') return INT2FIX(-1);
+		if (*p2 =3D=3D '.') return INT2FIX(+1);
+		return INT2FIX(*p1 < *p2 ? -1 : 1);
+	    }
 	    p1++;
 	    p2++;
 	}
@@ -2662,6 +2673,11 @@ rb_str_numericcmp(VALUE str1, VALUE str2)
 		    if (ISDIGIT(*p2)) {
 			VALUE r =3D numerical_compare(&p1, p1end, &p2, p2end);
 			if (!NIL_P(r)) return r;
+			if (p1 >=3D p1end) {
+			    if (p2 < p2end) return INT2FIX(-1);
+			    break;
+			}
+			else if (p2 >=3D p2end) return INT2FIX(+1);
 		    }
 		    else {
 			return INT2FIX(-1);
@@ -2670,7 +2686,13 @@ rb_str_numericcmp(VALUE str1, VALUE str2)
 		else if (ISDIGIT(*p2)) {
 		    return INT2FIX(1);
 		}
-		if (*p1 !=3D *p2) return INT2FIX(*p1 < *p2 ? -1 : 1);
+		if (*p1 !=3D *p2) {
+		    if (*p1 =3D=3D '-') return INT2FIX(-1);
+		    if (*p2 =3D=3D '-') return INT2FIX(+1);
+		    if (*p1 =3D=3D '.') return INT2FIX(-1);
+		    if (*p2 =3D=3D '.') return INT2FIX(+1);
+		    return INT2FIX(*p1 < *p2 ? -1 : 1);
+		}
 		p1++;
 		p2++;
             }
diff --git i/test/ruby/test_string.rb w/test/ruby/test_string.rb
index f9c788b..313e9cf 100644
--- i/test/ruby/test_string.rb
+++ w/test/ruby/test_string.rb
@@ -2116,6 +2116,9 @@ class TestString < Test::Unit::TestCase
     assert_equal( 1, "a1b".numericcmp("a1a"))
     assert_equal(-1, "a9a".numericcmp("a10a"))
     assert_equal( 1, "b".numericcmp("a"))
+    assert_equal( 1, "a.1".numericcmp("a-1"))
+    assert_equal(-1, "a.1".numericcmp("a.1.a"))
+    assert_equal( 1, "a 1".numericcmp("a.x"))
     assert_equal( 0, "\u30421".numericcmp("\u30421"))
     assert_equal( 1, "\u3042\u3042".numericcmp("\u30421"))
     assert_equal(-1, "\u30421".numericcmp("\u3042\u3042"))
@@ -2125,6 +2128,9 @@ class TestString < Test::Unit::TestCase
     assert_equal( 1, "\u30421\u3044".numericcmp("\u30421\u3042"))
     assert_equal(-1, "\u30429\u3042".numericcmp("\u304210\u3042"))
     assert_equal( 1, "\u3044".numericcmp("\u3042"))
+    assert_equal( 1, "\u3042.1".numericcmp("\u3042-1"))
+    assert_equal(-1, "\u3042.1".numericcmp("\u3042.1.\u3042"))
+    assert_equal( 1, "\u3042 1".numericcmp("\u3042.x"))
   end
=20
   def test_upcase2
~~~

----------------------------------------
Feature #9816: =E6=96=87=E5=AD=97=E5=88=97=E5=86=85=E3=81=AE=E6=95=B0=E5=AD=
=97=E3=82=92=E6=95=B0=E5=80=A4=E3=81=A8=E3=81=97=E3=81=A6=E6=AF=94=E8=BC=83=
=E3=81=99=E3=82=8B=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89
https://bugs.ruby-lang.org/issues/9816#change-46620

* Author: Yui NARUSE
* Status: Assigned
* Priority: Normal
* Assignee: Yukihiro Matsumoto
* Category: core
* Target version:=20
----------------------------------------
=E6=96=87=E5=AD=97=E5=88=97=E5=86=85=E3=81=AE=E6=95=B0=E5=AD=97=E3=82=92=E6=
=95=B0=E5=80=A4=E3=81=A8=E3=81=97=E3=81=A6=E6=AF=94=E8=BC=83=E3=81=99=E3=82=
=8B=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97=
=E3=81=BE=E3=81=9B=E3=82=93=E3=81=8B

=E3=81=9D=E3=81=AE=E3=82=88=E3=81=86=E3=81=AA=E6=AF=94=E8=BC=83=E3=81=AF=E4=
=B8=80=E8=88=AC=E7=9A=84=E3=81=AA=E7=94=A8=E9=80=94=E3=81=A8=E3=81=97=E3=81=
=A6=E3=81=AFGUI=E3=82=B7=E3=82=A7=E3=83=AB=E3=81=AE=E3=83=95=E3=82=A1=E3=82=
=A4=E3=83=A9=E3=83=BC=E3=81=8C=E6=AF=94=E8=BC=83=E3=81=AB=E7=94=A8=E3=81=84=
=E3=81=A6=E3=81=8A=E3=82=8A=E3=80=81
Windows =E3=81=A7=E3=81=AF StrCmpLogicalW =E3=81=8C=E3=80=81OS X =E3=81=A7=
=E3=81=AF NSString:compare:options:=E3=81=B8=E3=81=AENSNumericSearch=E5=AE=
=9A=E6=95=B0=E3=81=8C=E6=8F=90=E4=BE=9B=E3=81=95=E3=82=8C=E3=81=A6=E3=81=84=
=E3=81=BE=E3=81=99=E3=80=82
http://msdn.microsoft.com/en-us/library/windows/desktop/bb759947(v=3Dvs.85)=
.aspx
https://developer.apple.com/library/mac/documentation/Cocoa/Reference/Found=
ation/Classes/NSString_Class/Reference/NSString.html#//apple_ref/c/econst/N=
SNumericSearch

=E4=B8=8A=E8=A8=98=E3=81=AE=E3=82=88=E3=81=86=E3=81=AA=E5=87=A6=E7=90=86=E8=
=87=AA=E4=BD=93=E3=81=AF=E3=81=95=E3=81=BB=E3=81=A9=E9=9B=A3=E3=81=97=E3=81=
=84=E3=82=82=E3=81=AE=E3=81=A7=E3=81=AF=E3=81=82=E3=82=8A=E3=81=BE=E3=81=9B=
=E3=82=93=E3=81=8C=E3=80=81Ruby=E3=83=AC=E3=83=99=E3=83=AB=E3=81=A7=E5=AE=
=9F=E8=A3=85=E3=81=99=E3=82=8B=E3=81=A8=E5=A4=A7=E9=87=8F=E3=81=AE=E3=82=AA=
=E3=83=96=E3=82=B8=E3=82=A7=E3=82=AF=E3=83=88=E3=82=92=E4=BD=9C=E3=81=A3=E3=
=81=A6=E3=81=97=E3=81=BE=E3=81=84=E3=81=BE=E3=81=99=E3=80=82
=E4=BE=8B=E3=81=88=E3=81=B0 `Gem::Version.new("2.1.10".freeze)<=3D>Gem::Ver=
sion.new("2.1.9".freeze)` =E3=81=AF47=E5=80=8B=E3=80=81
`"2.1.10".freeze.split('.').map(&:to_i)<=3D>"2.1.9".freeze.split('.').map(&=
:to_i)` =E3=81=A0=E3=81=A816=E5=80=8B=E3=81=AE=E3=82=AA=E3=83=96=E3=82=B8=
=E3=82=A7=E3=82=AF=E3=83=88=E3=82=92=E4=BD=9C=E3=82=8A=E3=81=BE=E3=81=99=E3=
=80=82
`"2.1.10".freeze.numericcmp"2.1.9".freeze` =E3=81=AA=E3=82=89=E3=81=B0=E3=
=80=81=E3=82=82=E3=81=A1=E3=82=8D=E3=82=93=E3=82=AA=E3=83=96=E3=82=B8=E3=82=
=A7=E3=82=AF=E3=83=88=E3=81=AF=E4=B8=80=E3=81=A4=E3=82=82=E4=BD=9C=E3=82=8A=
=E3=81=BE=E3=81=9B=E3=82=93=E3=80=82

=E3=81=AA=E3=81=8A=E3=80=81=E4=B8=8A=E8=A8=98=E3=81=AE=E4=BE=8B=E3=81=A7=E3=
=82=82=E7=A4=BA=E5=94=86=E3=81=97=E3=81=A6=E3=81=84=E3=81=BE=E3=81=99=E3=81=
=8C=E3=80=81=E6=9C=AC=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89=E3=81=AF Ruby =E3=
=81=AE=E3=83=90=E3=83=BC=E3=82=B8=E3=83=A7=E3=83=B3=E8=A1=A8=E8=A8=98=E3=81=
=AE TEENY =E3=81=8C2=E6=A1=81=E3=81=AB=E3=81=AA=E3=81=A3=E3=81=9F=E5=A0=B4=
=E5=90=88=E3=81=AE=E6=AF=94=E8=BC=83=E3=81=AB=E7=94=A8=E3=81=84=E3=82=8B=E3=
=81=93=E3=81=A8=E3=81=8C=E3=81=A7=E3=81=8D=E3=81=BE=E3=81=99=E3=80=82

=E3=83=91=E3=83=83=E3=83=81=E3=81=AF=E4=BB=A5=E4=B8=8B=E3=81=AE=E9=80=9A=E3=
=82=8A=E3=81=A7=E3=81=99=E3=80=82
=E3=81=AA=E3=81=8A=E3=80=81=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89=E5=90=8D=E3=
=81=AF String#numericcmp =E3=81=A8=E3=81=97=E3=81=A6=E3=81=84=E3=81=BE=E3=
=81=99=E3=80=82
=EF=BC=88String#casecmp=E3=82=92=E5=BF=B5=E9=A0=AD=E3=81=AB=E7=BD=AE=E3=81=
=84=E3=81=9F=EF=BC=89

```
diff --git a/string.c b/string.c
index c589c80..66f667f 100644
--- a/string.c
+++ b/string.c
@@ -2569,6 +2569,131 @@ rb_str_casecmp(VALUE str1, VALUE str2)
     return INT2FIX(-1);
 }
=20
+VALUE
+numerical_compare(const char **pp1, const char *p1end, const char **pp2, c=
onst char *p2end)
+{
+    const char *s1 =3D *pp1, *p1, *s2 =3D *pp2, *p2;
+    ptrdiff_t len1, len2;
+    int r;
+
+    while (s1 < p1end && *s1 =3D=3D '0') s1++;
+    p1 =3D s1;
+    while (p1 < p1end && ISDIGIT(*p1)) p1++;
+    len1 =3D p1 - s1;
+
+    while (s2 < p2end && *s2 =3D=3D '0') s2++;
+    p2 =3D s2;
+    while (p2 < p2end && ISDIGIT(*p2)) p2++;
+    len2 =3D p2 - s2;
+
+    if (len1 !=3D len2) {
+	return INT2FIX(len1 < len2 ? -1 : 1);
+    }
+
+    r =3D memcmp(s1, s2, len1);
+    if (r) return r < 0 ? INT2FIX(-1) : INT2FIX(1);
+
+    len1 =3D s1 - *pp1;
+    len2 =3D s2 - *pp2;
+    if (len1 !=3D len2) {
+	return INT2FIX(len1 < len2 ? -1 : 1);
+    }
+
+    *pp1 =3D p1;
+    *pp2 =3D p2;
+    return Qnil;
+}
+
+/*
+ *  call-seq:
+ *     str.numericcmp(other_str)   -> -1, 0, +1 or nil
+ *
+ *  Variant of <code>String#<=3D></code>, which considers digits in strings
+ *  are numeric value..
+ *
+ *     "a1".numericcmp("a1")            #=3D> 0
+ *     "aa".numericcmp("a1")            #=3D> 1
+ *     "a1".numericcmp("aa")            #=3D> -1
+ *     "a1".numericcmp("a01")           #=3D> -1
+ *     "2.1.2".numericcmp("2.1.10")     #=3D> 1
+ */
+
+static VALUE
+rb_str_numericcmp(VALUE str1, VALUE str2)
+{
+    long len;
+    rb_encoding *enc;
+    const char *p1, *p1end, *p2, *p2end;
+
+    StringValue(str2);
+    enc =3D rb_enc_compatible(str1, str2);
+    if (!enc) {
+	return Qnil;
+    }
+
+    p1 =3D RSTRING_PTR(str1); p1end =3D RSTRING_END(str1);
+    p2 =3D RSTRING_PTR(str2); p2end =3D RSTRING_END(str2);
+    if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
+	while (p1 < p1end && p2 < p2end) {
+	    if (ISDIGIT(*p1)) {
+		if (ISDIGIT(*p2)) {
+		    VALUE r =3D numerical_compare(&p1, p1end, &p2, p2end);
+		    if (!NIL_P(r)) return r;
+		}
+		else {
+		    return INT2FIX(-1);
+		}
+	    }
+	    else if (ISDIGIT(*p2)) {
+		return INT2FIX(1);
+	    }
+	    if (*p1 !=3D *p2) return INT2FIX(*p1 < *p2 ? -1 : 1);
+	    p1++;
+	    p2++;
+	}
+    }
+    else {
+	while (p1 < p1end && p2 < p2end) {
+            int l1, c1 =3D rb_enc_ascget(p1, p1end, &l1, enc);
+            int l2, c2 =3D rb_enc_ascget(p2, p2end, &l2, enc);
+
+            if (0 <=3D c1 && 0 <=3D c2) {
+		if (ISDIGIT(*p1)) {
+		    if (ISDIGIT(*p2)) {
+			VALUE r =3D numerical_compare(&p1, p1end, &p2, p2end);
+			if (!NIL_P(r)) return r;
+		    }
+		    else {
+			return INT2FIX(-1);
+		    }
+		}
+		else if (ISDIGIT(*p2)) {
+		    return INT2FIX(1);
+		}
+		if (*p1 !=3D *p2) return INT2FIX(*p1 < *p2 ? -1 : 1);
+		p1++;
+		p2++;
+            }
+            else {
+                int r;
+                l1 =3D rb_enc_mbclen(p1, p1end, enc);
+                l2 =3D rb_enc_mbclen(p2, p2end, enc);
+                len =3D l1 < l2 ? l1 : l2;
+                r =3D memcmp(p1, p2, len);
+                if (r !=3D 0)
+                    return INT2FIX(r < 0 ? -1 : 1);
+                if (l1 !=3D l2)
+                    return INT2FIX(l1 < l2 ? -1 : 1);
+            }
+	    p1 +=3D l1;
+	    p2 +=3D l2;
+	}
+    }
+    if (RSTRING_LEN(str1) =3D=3D RSTRING_LEN(str2)) return INT2FIX(0);
+    if (RSTRING_LEN(str1) > RSTRING_LEN(str2)) return INT2FIX(1);
+    return INT2FIX(-1);
+}
+
 static long
 rb_str_index(VALUE str, VALUE sub, long offset)
 {
@@ -8721,6 +8846,7 @@ Init_String(void)
     rb_define_method(rb_cString, "eql?", rb_str_eql, 1);
     rb_define_method(rb_cString, "hash", rb_str_hash_m, 0);
     rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1);
+    rb_define_method(rb_cString, "numericcmp", rb_str_numericcmp, 1);
     rb_define_method(rb_cString, "+", rb_str_plus, 1);
     rb_define_method(rb_cString, "*", rb_str_times, 1);
     rb_define_method(rb_cString, "%", rb_str_format_m, 1);
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 8366424..f9c788b 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -2104,6 +2104,29 @@ class TestString < Test::Unit::TestCase
     assert_equal(1, "\u3042B".casecmp("\u3042a"))
   end
=20
+  def test_numericcmp
+    assert_equal(-1, "2.1.0".numericcmp("2.1.1"))
+    assert_equal(-1, "2.1.9".numericcmp("2.1.10"))
+    assert_equal( 0, "a1".numericcmp("a1"))
+    assert_equal( 1, "aa".numericcmp("a1"))
+    assert_equal(-1, "a1".numericcmp("aa"))
+    assert_equal(-1, "a1".numericcmp("a01"))
+    assert_equal(-1, "a0001".numericcmp("a00001"))
+    assert_equal( 0, "a1a".numericcmp("a1a"))
+    assert_equal( 1, "a1b".numericcmp("a1a"))
+    assert_equal(-1, "a9a".numericcmp("a10a"))
+    assert_equal( 1, "b".numericcmp("a"))
+    assert_equal( 0, "\u30421".numericcmp("\u30421"))
+    assert_equal( 1, "\u3042\u3042".numericcmp("\u30421"))
+    assert_equal(-1, "\u30421".numericcmp("\u3042\u3042"))
+    assert_equal(-1, "\u30421".numericcmp("\u304201"))
+    assert_equal(-1, "\u30420001".numericcmp("\u304200001"))
+    assert_equal( 0, "\u30421\u3042".numericcmp("\u30421\u3042"))
+    assert_equal( 1, "\u30421\u3044".numericcmp("\u30421\u3042"))
+    assert_equal(-1, "\u30429\u3042".numericcmp("\u304210\u3042"))
+    assert_equal( 1, "\u3044".numericcmp("\u3042"))
+  end
+
   def test_upcase2
     assert_equal("\u3042AB", "\u3042aB".upcase)
   end
```



--=20
https://bugs.ruby-lang.org/

In This Thread

Prev Next