[#38470] ruby-dev summary 21403-21530 (draft) — Minero Aoki <aamine@...>

青木です。

25 messages 2003/10/07
[#38475] Re: ruby-dev summary 21403-21530 (draft) — maili31s@... (SugHimsi==SUGIHARA Hiroshi) 2003/10/07

すぎむし。

[#38480] Re: ruby-dev summary 21403-21530 (draft) — Minero Aoki <aamine@...> 2003/10/08

青木です。

[#38481] marshal_dump (was Re: ) — m_seki@... 2003/10/08

[#38484] Re: marshal_dump (was Re: ) — matz@... (Yukihiro Matsumoto) 2003/10/09

まつもと ゆきひろです

[#38486] Re: marshal_dump (was Re: ) — Masatoshi Seki <m_seki@...> 2003/10/09

咳といいます

[#38489] exit status on exit! — YANAGAWA Kazuhisa <kjana@...4lab.to>

<http://www.unixuser.org/~ysjj/diary/?200310a&to=200310082#200310082>

29 messages 2003/10/09
[#38490] Re: exit status on exit! — Koji Arai <JCA02266@...> 2003/10/09

新井です。

[#38503] Re: exit status on exit! — YANAGAWA Kazuhisa <kjana@...4lab.to> 2003/10/10

In Message-Id: <20031010.082218.74733862.JCA02266@nifty.ne.jp>

[#38505] Re: exit status on exit! — Koji Arai <JCA02266@...> 2003/10/10

新井です。

[#38507] Re: exit status on exit! — matz@... (Yukihiro Matsumoto) 2003/10/11

まつもと ゆきひろです

[#38514] Re: exit status on exit! — YANAGAWA Kazuhisa <kjana@...4lab.to> 2003/10/11

In Message-Id: <1065883639.405037.23137.nullmailer@picachu.netlab.jp>

[#38515] Re: exit status on exit! — WATANABE Hirofumi <eban@...> 2003/10/11

わたなべです。

[ruby-list:38465] diff library

From: Koji Arai <JCA02266@...>
Date: 2003-10-05 14:46:31 UTC
List: ruby-list #38465
新井です。

comm コマンドのような

Diff.comm("aaabc".split(//), "bcaaad".split(//)) {|*a|
  puts "%5s %5s %5s" % a
}

=>

          b
          c
                a
                a
                a
    b
    c
          d

というメソッドを作ってみました。まず、RAA の algorithm/diff
から以下のようになりました。

require 'algorithm/diff'

module Diff
  def Diff.comm(a, b)
    mvector = Diff.lcs(a, b)    # Longuest Common Subsequence
    ai = bi = 0
    while ai < mvector.length
      bline = mvector[ai]
      if bline
        if bi == bline
          yield(nil,nil,b[bi])
        else
          while bi < bline
            yield(nil,b[bi],nil)
            bi += 1
          end

          while bi >= bline
            yield(nil,nil,b[bline])
            bline += 1
          end
        end
	bi += 1
      else
	yield(a[ai],nil,nil)
      end
      ai += 1
    end

    while ai < a.length
      yield(a[ai],nil,nil)
      ai += 1
    end

    while bi < b.length
      yield(nil,b[bi],nil)
      bi += 1
    end
  end
end

さらに

http://todo.is.os-omicron.org/tiki.cgi?p=diff

という O(ND) アルゴリズムの実装があったので(感謝)こちらも作っ
てみました。こちらは好みにあわせて全面修正してます(汚くなっ
てます)。

もともとは、Excel のシート同士を比較して、差分を色づけして表
したシートを作ろうとしてました。(CSVに落して diff 取ってが面
倒臭くなったので)

残念ながらまだそこまで行きついてないのですが(ライブラリ読む
のに時間を費してしまった comm の作成はそのついで)、ここまで
来てふと、他に便利なライブラリがあるのではと思ったのですがご
存知の方いましたら教えてください。

# 趣旨の分かりにくいメールだ

--
新井康司 (Koji Arai)

Attachments (1)

patch.rb (8.39 KB, text/x-ruby)
module Diff
  # all methods are module function
  module_function

  def diff(base, update)
    lcs = lcs(base, update)

    ret = []
    base_index = update_index = 0
    lcs.each {|match|
      if r = make_patch_item(base, update,
                             base_index, update_index,
                             match[0] - match[2], match[0])
        ret << r
      end

      base_index   = match[1] - match[2]
      update_index = match[1]
    }

    if r = make_patch_item(base, update,
                           base_index, update_index,
                           base.size, update.size)
      ret << r
    end
    # ret = [
    #          [base_index, update_index,
    #            [base_part, ...], [update_part, ...]
    #          ], ...
    #        ]
    return ret
  end

  def comm(base, update)
    lcs = lcs(base, update)
    lcs << [update.size, update.size, update.size-base.size]

    ret = []
    base_index = update_index = 0
    lcs.each {|match|
      base_to = match[0] - match[2]
      update_to = match[0]

      if update_index < update_to
        # right only
        yield(nil,update[update_index ... update_to],nil)
      end

      if base_index < base_to
        # left only
        yield(base[base_index ... base_to],nil,nil)
      end

      if match[0] < match[1]
        # identical
        yield(nil,nil,update[match[0] ... match[1]])
      end

      base_index   = match[1] - match[2]
      update_index = match[1]
    }
  end

  def make_break_dist(base, update)
    base_reverse = base.reverse
    vk_lastindex = [base.size]

    update.reverse_each {|item|
      temp_limit = base_reverse.index(item)
      temp_limit = vk_lastindex[-1] if temp_limit > vk_lastindex[-1]
      vk_lastindex << temp_limit
    }
    vk_lastindex[update.size] = -1
    break_dist = Hash.new(update.size + base.size)
    diff_size = (update.size - base.size)
    (-base.size..update.size).each {|k|
      i = diff_length = -k + diff_size
      i = 0 if i < 0
      i += 1 while (i - diff_length <= vk_lastindex[i])
      i -= 1
      break_dist[k] = i + i - diff_length
    }
    break_dist[-base.size] = update.size
    return break_dist
  end

  # O(ND)
  def lcs_ond(base, update)
    total_size = update.size + base.size
    diff_size  = update.size - base.size
    break_dist = make_break_dist(base, update)
    v = Hash.new(-1)
    v[1] = 0
    ret = Hash.new(Array.new)
    best_way = Array.new
    vk = d = k = flag = nil
    shortest_dist = total_size

    (0 .. total_size).each {|d|
      shortest_dist -= 1
      flag = false

      (-d .. d).step(2) {|k|
        next if (break_dist[k] >= shortest_dist)

        if (v[k-1] < v[k+1])
          vk = v[k+1];   ret[k] = ret[k+1]
        else
          vk = v[k-1]+1; ret[k] = ret[k-1]
        end
        flag = vk
        vk += 1 while (base[vk-k] == update[vk] && update[vk])
        dist = total_size - vk - vk + k
        if flag < vk
          ret[k] += [[flag, vk, k]]
          if dist < shortest_dist
            shortest_dist = dist
            best_way      = ret[k]
          end
        end
        v[k] = vk
      }
      return best_way unless flag
    }
    raise "LCS_ERROR"
  end

  def lcs(base, update)
    base, update = base.to_ary, update.to_ary

    mutual_set = Hash.new

    (base & update).each {|index|
      mutual_set[index] = true
    }

    m_base,   m_index_base   = purify(base  ,mutual_set)
    m_update, m_index_update = purify(update,mutual_set)

    resolve(lcs_ond(m_base, m_update), m_index_base, m_index_update)
  end

  # for speed
  # target = [elem0, elem1, ...]
  #        ->[[elem1, 1], [elem3, 3], ...]
  def purify(target, mutual_set)
    ret = []
    index = []

    target.each_with_index {|item, i|
      if mutual_set.has_key?(item)
        ret << item
        index << i
      end
    }

    return ret, index
  end

  def resolve(lcs, index_base, index_update)
    return lcs if !index_base || !index_update

    ret = Array.new

    lcs.each {|match|
      delta = match[2]; y1 = match[0] ; y2 = match[1] - 1
      while (y1 <= y2)
        i = 0
        i += 1 while index_base[y1-delta+i] - index_base[y1-delta] == i &&
                     index_update[y1+i] - index_update[y1] == i &&
                     index_update[y2] < y1 + i

        ret << [index_update[y1],
                index_update[y1+i]+1,
                index_update[y1]-index_base[y1-delta]]

        y1 += i + 1
      end
    }
    return ret
  end

  def make_patch_item(base, update, base_index, update_index,
                      base_to, update_to)

    if (base_index < base_to || update_index < update_to)

      [base_index, update_index,
       base  [  base_index ...  base_to],
       update[update_index ...update_to]]
    end
  end
end

class Patch
  def initialize(*v)
    if v.size > 0
      @ary = Diff.diff(*v)
    end
  end

  def self.read(log)
    new.read(log)
  end

  def ==(other)
    @ary == other.instance_variable_get(:@ary)
  end

  PATCH_HEADER_RE__ = %r{
        (\d+),?         # 0
        \d*
        (a|c|d)         # 1
        (\d+),?         # 2
        \d*\n

        ( (?:<\ .*?\n)* ) # 3
        (?: ---\n)?
        ( (?:>\ .*?\n)* ) # 4
        }x

  # read diff text
  def read(log)
    @ary = Array.new
    log.scan(PATCH_HEADER_RE__) {|item|
      item[1] = item[1].intern
      item[0] = item[0].to_i
      item[2] = item[2].to_i

      item[0] -= 1 if item[1] != :a
      item[2] -= 1 if item[1] != :d

      ret = [item[0], item[2]]
      out_item = item[3]
      in_item  = item[4]

      if out_item
#        ret << out_item.gsub(/^< /, '').split("\n")
        ret << out_item.scan(/^(?:< )(.*?)$/).flatten
      else
        ret << []
      end

      if in_item
#        ret << in_item.gsub(/^> /, '').split("\n")
        ret << in_item.scan(/^(?:> )(.*?)$/).flatten
      else
        ret << []
      end

      # @ary = [
      #          [base_index, update_index,
      #            [base_part, ...], [update_part, ...]
      #          ], ...
      #        ]
      @ary << ret
    }
    return self
  end

  def to_s
    @ary.collect {|item| make_diff_part(*item) }.join("\n")
  end

  def patch(target)
    ret = target.dup
    @ary.reverse_each {|i, _, len, repl|
      ret[i, len.size] = repl
    }
    return ret
  end

  def reverse_patch(target)
    ret = target.dup
    @ary.reverse_each {|_, i, repl, len|
      ret[i, len.size] = repl
    }
    return ret
  end

protected
  def make_diff_part(base_index, update_index, base_part, update_part)
    mode = check_diff_part_mode(base_part.size > 0,
                                update_part.size > 0)
    unless mode
      raise "LCS Error"
    end

    ret = ''
    ret << make_diff_part_header(mode == :a,
                                 base_index,
                                 base_index + base_part.size)
    ret << mode.to_s
    ret << make_diff_part_header(mode == :d,
                                 update_index,
                                 update_index + update_part.size)
    ret << "\n"
    base_part.each {|item|
      ret << '< ' << item.to_s << "\n"
    }

    ret << "---\n" if mode == :c

    update_part.each {|item|
      ret << '> ' << item.to_s << "\n"
    }

    return ret
  end

  def make_diff_part_header(flug, index, to)
    return to.to_s if flug
    index += 1
    index != to ? index.to_s << ',' << to.to_s : index.to_s
  end

  def check_diff_part_mode(base, update)
    return :c if base && update
    return :d if base
    return :a if update
    return nil
  end
end

if $0 == __FILE__
  def difftest(a, b)
    patch = Patch.new(a, b)

    patch2 = Patch.read(patch.to_s)
    raise "dif and dif2 are not identical" if patch != patch2

    c = patch.patch(a)
    raise unless (b == c)

    patch = Patch.new(b, a)

    c = patch.reverse_patch(a)
    raise unless (b == c)
  end

  a = "update".split(//)
  b = "deptauua".split(//)

  Diff.comm(b,a) {|l,c,r|
    p [l,c,r]
  }

  difftest(b, a)

  srand(0)
  a = []
  b = []
  50.times {
    a << rand(10000000000000000000).to_s.split(//)
    b << rand(10000000000000000000).to_s.split(//)
  }

  begin
    require 'benchmark'
  rescue LoadError
    module Benchmark
      CAPTION = nil
      def measure
        yield
      end
      module_function :measure
    end
  end

  puts Benchmark::CAPTION
  puts Benchmark.measure {
    a.size.times {|i|
      difftest(a[i], b[i])
    }
  }

  a = %w(a b c)
  difftest(a, [])
  difftest(a, %w(a a b b c c))
  difftest(a, %w(a c b b c a))
  difftest(a, %w(a x b y c z))
  difftest(a, %w(c b a))
  difftest(a, %w(c c b b a a))
end

In This Thread

Prev Next