Niro* by cho45

#4 Levenshtein distance implemented in Ruby and RubyInline

module Levenshtein
	def levenshtein(a, b)
		if Inline::USABLE
			Inline.levenshtein(a, b)
		else
			PureRuby.levenshtein(a, b)
		end
	end

	module PureRuby
		def levenshtein(a, b)
			case
			when a.empty?
				b.length
			when b.empty?
				a.length
			else
				d = Array.new(a.length + 1) { |s|
					Array.new(b.length + 1, 0)
				}

				(0..a.length).each do |i|
					d[i][0] = i
				end

				(0..b.length).each do |j|
					d[0][j] = j
				end

				(1..a.length).each do |i|
					(1..b.length).each do |j|
						cost = (a[i - 1] == b[j - 1]) ? 0 : 1
						d[i][j] = [
							d[i-1][j  ] + 1,
							d[i  ][j-1] + 1,
							d[i-1][j-1] + cost
						].min
					end
				end

				d[a.length][b.length]
			end
		end

		module_function :levenshtein
	end

	module Inline
		begin
			require "rubygems"
			require "inline" # sudo gem install RubyInline

			inline do |builder|
				builder.c <<-'EOS'
					VALUE levenshtein (VALUE array1, VALUE array2) {
						VALUE ret;
						long len1 = RARRAY(array1)->len;
						long len2 = RARRAY(array2)->len;
						long i, j;

						long** d = ALLOC_N(long*, len1 + 1);
						for (i = 0; i <= len1; i++) {
							d[i] = ALLOC_N(long, len2 + 1);
							memset(d[i], 0, sizeof(d[i]));
						}

						for (i = 1; i <= len1; i++) d[i][0] = i;
						for (j = 1; j <= len2; j++) d[0][j] = j;
						for (i = 1; i <= len1; ++i) {
							for (j = 1; j <= len2; ++j) {
								int del = d[i-1][j  ] + 1;
								int ins = d[i  ][j-1] + 1;
								int sub = d[i-1][j-1] + (
									rb_equal(
										RARRAY(array1)->ptr[i-1],
										RARRAY(array2)->ptr[j-1]
									) ? 0 : 1
								);

								d[i][j] =
									(del <= ins && del <= sub) ? del:
									(ins <= del && ins <= sub) ? ins:
									sub;
							}
						}

						ret = LONG2FIX(d[len1][len2]);

						for (i = 0; i < len1; i++) free(d[i]);
						free(d);

						return ret;
					}
				EOS
			end

			module_function :levenshtein

			USABLE = true
		rescue LoadError
			USABLE = false
		end
	end
end


require 'rubygems'
require 'spec'

[Levenshtein::Inline, Levenshtein::PureRuby].each do |m|
	describe m do
		it "should return correct levenshtein distance" do
			[
				["kitten", "sitting", 3],
				["foo", "foo", 0],
				["", "", 0],
				["foO", "foo", 1],
				["", "foo", 3],
			].each do |a, b, expected|
				m.levenshtein(a.split(//), b.split(//)).should == expected
				m.levenshtein(b.split(//), a.split(//)).should == expected
			end
		end
	end
end

Spec::Runner.run

require "benchmark"

Benchmark.bm(10) do |x|
	n = 10000

	x.report("inline") do
		n.times {
			Levenshtein::Inline.levenshtein("kitten".split(//), "sitting".split(//))
		}
	end if Levenshtein::Inline::USABLE

	x.report("pureruby") do
		n.times {
			Levenshtein::PureRuby.levenshtein("kitten".split(//), "sitting".split(//))
		}
	end
end

[[ruby]] [[algorithm]

blog comments powered by Disqus
login