From: Joe Wreschnig Date: Tue, 13 May 2014 15:12:44 +0000 (+0200) Subject: Switch Levenshtein calculation to a flat, typed array. X-Git-Tag: 1.0.0~29 X-Git-Url: https://git.yukkurigames.com/?a=commitdiff_plain;h=bbcf50caedbd6edea3fd502d7cb92ece795ec58f;p=string-lerp.git Switch Levenshtein calculation to a flat, typed array. --- diff --git a/string-lerp.js b/string-lerp.js index 4a2c80f..84cc9b9 100644 --- a/string-lerp.js +++ b/string-lerp.js @@ -3,54 +3,48 @@ var MAX_MATRIX_SIZE = 256 * 256; - function repeat(n, v) { - var r = []; - while (n--) - r.push(v); - return r; - } - function createMatrix(i, j) { - var m = []; - while (i--) - m.push(repeat(j, 0)); - return m; + return new Uint32Array(i * j); } function levenshteinMatrix(s, t) { /** Calculate the Levenshtein edit distance matrix for two strings + + The matrix is returned as a flat unsigned typed array. - // Following http://en.wikipedia.org/wiki/Levenshtein_distance + Following http://en.wikipedia.org/wiki/Levenshtein_distance */ var m = s.length + 1; var n = t.length + 1; var d = createMatrix(m, n); var i, j; for (i = 1; i < m; ++i) - d[i][0] = i; + d[n * i] = i; for (j = 1; j < n; ++j) - d[0][j] = j; + d[j] = j; for (j = 1; j < n; ++j) for (i = 1; i < m; ++i) if (s[i - 1] === t[j - 1]) - d[i][j] = d[i - 1][j - 1]; + d[n * i + j] = d[n * (i - 1) + j - 1]; else - d[i][j] = 1 + Math.min(d[i - 1][ j ], - d[ i ][j - 1], - d[i - 1][j - 1]); + d[n * i + j] = 1 + Math.min(d[n * (i - 1) + j ], + d[n * i + j - 1], + d[n * (i - 1) + j - 1]); return d; } - function editPath(m, s, t) { + function editPath(d, s, t) { + /** Given a Levenshtein result matrix, trace the shortest edit */ var path = [] var i = s.length; var j = t.length; + var n = j + 1; while (i || j) { - var sub = (i && j) ? m[i - 1][j - 1] : Infinity; - var del = i ? m[i - 1][j] : Infinity; - var ins = j ? m[i][j - 1] : Infinity; + var sub = (i && j) ? d[n * (i - 1) + j - 1] : Infinity; + var del = i ? d[n * (i - 1) + j] : Infinity; + var ins = j ? d[n * i + j - 1] : Infinity; if (sub <= ins && sub <= del) { - if (m[i][j] !== m[i - 1][j - 1]) + if (d[n * i + j] !== d[n * (i - 1) + j - 1]) path.push(["sub", i - 1, t[j - 1]]); --i; --j; } else if (ins <= del) { @@ -64,7 +58,8 @@ return path; } - function applyEdits(edits, s, t) { + function applyEdits(edits, s) { + /** Apply the list of edits to s */ var i; for (i = 0; i < edits.length; ++i) { var edit = edits[i]; @@ -84,13 +79,26 @@ } function slowLerp(a, b, p) { - var m = levenshteinMatrix(a, b); - var edits = editPath(m, a, b); + /** Interpolate between two strings slowly + + This interpolation algorithm applys a partial edit of one + string into the other. This produces nice looking results, + but can take a significant amount of time and memory to + compute the edits. It is not recommended for strings + longer than a few hundred characters. + */ + var edits = editPath(levenshteinMatrix(a, b), a, b); var partial = edits.slice(0, Math.round(p * edits.length)); - return applyEdits(partial, a, b); + return applyEdits(partial, a); } function fastLerp(a, b, p) { + /** Interpolate between two strings very quickly + + This interpolation algorithm progressively replaces the + front of one string with another. This approach is fast + but does not look good when the strings are similar. + */ var alen = Math.round(a.length * p); var blen = Math.round(b.length * p); return b.substring(0, blen) + a.substring(alen, a.length); @@ -100,6 +108,8 @@ a = a.toString(); b = b.toString(); var n = a.length * b.length; + // TODO: if both strings are integers, do an integer lerp. + // TODO: if both strings are numbers, do a numeric lerp. return (n && n < MAX_MATRIX_SIZE) ? slowLerp(a, b, p) : fastLerp(a, b, p);