module Levenshtein
def levenshtein(a, b)
if Inline::USABLE
Inline.levenshtein(a, b)
else
PureRuby.levenshtein(a, b)
end
end
module PureRuby
def levenshtein(a, b)
case
when a.empty?
b.length
when b.empty?
a.length
else
d = Array.new(a.length + 1) { |s|
Array.new(b.length + 1, 0)
}
(0..a.length).each do |i|
d[i][0] = i
end
(0..b.length).each do |j|
d[0][j] = j
end
(1..a.length).each do |i|
(1..b.length).each do |j|
cost = (a[i - 1] == b[j - 1]) ? 0 : 1
d[i][j] = [
d[i-1][j ] + 1,
d[i ][j-1] + 1,
d[i-1][j-1] + cost
].min
end
end
d[a.length][b.length]
end
end
module_function :levenshtein
end
module Inline
begin
require "rubygems"
require "inline" # sudo gem install RubyInline
inline do |builder|
builder.c <<-'EOS'
VALUE levenshtein (VALUE array1, VALUE array2) {
VALUE ret;
long len1 = RARRAY(array1)->len;
long len2 = RARRAY(array2)->len;
long i, j;
long** d = ALLOC_N(long*, len1 + 1);
for (i = 0; i <= len1; i++) {
d[i] = ALLOC_N(long, len2 + 1);
memset(d[i], 0, sizeof(d[i]));
}
for (i = 1; i <= len1; i++) d[i][0] = i;
for (j = 1; j <= len2; j++) d[0][j] = j;
for (i = 1; i <= len1; ++i) {
for (j = 1; j <= len2; ++j) {
int del = d[i-1][j ] + 1;
int ins = d[i ][j-1] + 1;
int sub = d[i-1][j-1] + (
rb_equal(
RARRAY(array1)->ptr[i-1],
RARRAY(array2)->ptr[j-1]
) ? 0 : 1
);
d[i][j] =
(del <= ins && del <= sub) ? del:
(ins <= del && ins <= sub) ? ins:
sub;
}
}
ret = LONG2FIX(d[len1][len2]);
for (i = 0; i < len1; i++) free(d[i]);
free(d);
return ret;
}
EOS
end
module_function :levenshtein
USABLE = true
rescue LoadError
USABLE = false
end
end
end
require 'rubygems'
require 'spec'
[Levenshtein::Inline, Levenshtein::PureRuby].each do |m|
describe m do
it "should return correct levenshtein distance" do
[
["kitten", "sitting", 3],
["foo", "foo", 0],
["", "", 0],
["foO", "foo", 1],
["", "foo", 3],
].each do |a, b, expected|
m.levenshtein(a.split(//), b.split(//)).should == expected
m.levenshtein(b.split(//), a.split(//)).should == expected
end
end
end
end
Spec::Runner.run
require "benchmark"
Benchmark.bm(10) do |x|
n = 10000
x.report("inline") do
n.times {
Levenshtein::Inline.levenshtein("kitten".split(//), "sitting".split(//))
}
end if Levenshtein::Inline::USABLE
x.report("pureruby") do
n.times {
Levenshtein::PureRuby.levenshtein("kitten".split(//), "sitting".split(//))
}
end
end
[[ruby]] [[algorithm]