# Canonical bottom-up editing distance calculation. function levenshtein(s1::String, s2::String) # Initialize a distance matrix. All distances are assumed to be infinite. δ = ones(length(s1) + 1, length(s2) + 1) * Inf δ[:,1] = 0:(length(s1)) # left column gets values 0 to the length of the first string δ[1,:] = 0:(length(s2)) # top row gets values 0 to the length of the second string # Now we use bottom-up dynamic programming to populate the table. foreach(row -> foreach(column -> δ[row, column] = min( # If the characters match then the distance adds 0, otherwise 1. δ[row - 1, column - 1] + Int(s1[row - 1] != s2[column - 1]), δ[row - 1, column] + 1, # Moving down means we inserted 1 character δ[row, column - 1] + 1 # Moving right means we deleted 1 character ), 2:(length(s2) + 1)), 2:(length(s1) + 1)) return(convert.(Int,floor.(δ))) end # You can also compute editing distance with top-down (recursive) dynamic programming. # This only works because the problem can be stated as a directed acyclic graph. function top_down(s1::String, s2::String) δ = ones(length(s1) + 1, length(s2) + 1) * Inf # You can get away with setting only δ[1,1] = 0, but you'll have to do more bounds checking # for (x > 1) and (y > 1) in the recurrence relation. δ[:,1] = 0:(length(s1)) δ[1,:] = 0:(length(s2)) function distance(x, y) if δ[x, y] == Inf δ[x, y] = min( 1 + distance(x - 1, y), 1 + distance(x, y - 1), distance(x - 1, y - 1) + Int(s1[x - 1] != s2[y - 1])) end return δ[x, y] end distance(length(s1) + 1, length(s2) + 1); return(convert.(Int,floor.(δ))) end using Memoize # Of course, you don't *have* to use memoization, but it will be *REALLY* O(n!) slow # for non-trivial inputs. Julia's built-in @memoize can handle all of this for us. # Personally, I like letting the library worry about memo tables. I also like having # the base cases very explicitly written out in one place. function editing_distance(s1::String, s2::String) @memoize function δ(x, y) if x == 1 && y == 1 return 0 elseif x == 1 return y elseif y == 1 return x else return min( 1 + δ(x - 1, y), 1 + δ(x, y - 1), δ(x - 1, y - 1) + Int(s1[x - 1] != s2[y - 1])) end end return(δ(length(s1) + 1, length(s2) + 1)) end println("Bottom-up:"); display(levenshtein(ARGS[1],ARGS[2])); println(); println("Top-down:"); display(top_down(ARGS[1], ARGS[2])); println(); println("Editing distance computed using top-down recursion with @memoize: ", editing_distance(ARGS[1], ARGS[2]));