From: Joe Wreschnig Date: Mon, 19 May 2014 00:30:27 +0000 (+0200) Subject: Rename levenshteinMatrix to costMatrix. Better argument names for costMatrix, editPat... X-Git-Tag: 1.0.0~15 X-Git-Url: https://git.yukkurigames.com/?a=commitdiff_plain;h=9436ccb743d9cf07a8607da8f75ac175237bccdf;p=string-lerp.git Rename levenshteinMatrix to costMatrix. Better argument names for costMatrix, editPath, diff. --- diff --git a/string-lerp.js b/string-lerp.js index df0d256..cff0bf7 100644 --- a/string-lerp.js +++ b/string-lerp.js @@ -14,15 +14,28 @@ var MAX_MATRIX_SIZE = 256 * 256; - function levenshteinMatrix(s, t, ins, del, sub) { - /** Calculate the Levenshtein edit distance matrix for two strings + function costMatrix(source, target, ins, del, sub) { + /** Calculate the Levenshtein cost matrix for source and target + + If source and target are strings, they cannot contain any + astral or combining codepoints. Such data must be passed + as arrays of strings with one element per glyph. + + ins, del, and sub are the costs for insertion, deletion, + and substition respectively. Their default value is 1. If + only ins is passed, del and sub are set to the same cost. + If ins and del are passed, sub is set to the more + expensive of the two. The matrix is returned as a flat typed array. - + Following http://en.wikipedia.org/wiki/Levenshtein_distance */ - var m = s.length + 1; - var n = t.length + 1; + ins = ins === undefined ? 1 : (ins | 0); + del = (del | 0) || ins; + sub = (sub | 0) || Math.max(ins, del); + var m = source.length + 1; + var n = target.length + 1; var d = new Uint32Array(m * n); var i, j; for (i = 1; i < m; ++i) @@ -31,7 +44,7 @@ d[j] = j; for (j = 1; j < n; ++j) for (i = 1; i < m; ++i) - if (s[i - 1] === t[j - 1]) + if (source[i - 1] === target[j - 1]) d[n * i + j] = d[n * (i - 1) + j - 1]; else d[n * i + j] = Math.min(del + d[n * (i - 1) + j ], @@ -40,22 +53,22 @@ return d; } - function editPath(d, t) { - /** Given a Levenshtein matrix and target, create an edit list */ + function editPath(costs, target) { + /** Given a cost matrix and a target, create an edit list */ var path = []; - var j = t.length; + var j = target.length; var n = j + 1; - var i = d.length / n - 1; + var i = costs.length / n - 1; while (i || j) { - var sub = (i && j) ? d[n * (i - 1) + j - 1] : Infinity; - var del = i ? d[n * (i - 1) + j] : Infinity; - var ins = j ? d[n * i + j - 1] : Infinity; + var sub = (i && j) ? costs[n * (i - 1) + j - 1] : Infinity; + var del = i ? costs[n * (i - 1) + j] : Infinity; + var ins = j ? costs[n * i + j - 1] : Infinity; if (sub <= ins && sub <= del) { - if (d[n * i + j] !== d[n * (i - 1) + j - 1]) - path.push(["sub", i - 1, t[j - 1]]); + if (costs[n * i + j] !== costs[n * (i - 1) + j - 1]) + path.push(["sub", i - 1, target[j - 1]]); --i; --j; } else if (ins <= del) { - path.push(["ins", i, t[j - 1]]); + path.push(["ins", i, target[j - 1]]); --j; } else { path.push(["del", i - 1]); @@ -65,9 +78,12 @@ return path; } - function diff(s, t) { - /** Create a diff between string s and t */ - return editPath(levenshteinMatrix(s, t, 2, 2, 3), t); + function diff(source, target, ins, del, sub) { + /** Create a diff between string source and target + + ins, del, and sub are as passed to levenshtein + */ + return editPath(costMatrix(source, target, ins, del, sub), target); } function patch(edits, s) { @@ -140,7 +156,7 @@ // that's how Levenshtein edits work. To match LTR reading // direction (and the behavior of fastLerp), swap the strings // and invert the parameter when editing. - var edits = diff(b, a); + var edits = diff(b, a, 2, 2, 3); var partial = edits.slice(0, Math.round((1 - p) * edits.length)); return patch(partial, b); } @@ -245,7 +261,7 @@ return ((n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp)(a, b, p); } - exports.levenshteinMatrix = levenshteinMatrix; + exports.costMatrix = costMatrix; exports.patch = patch; exports.diff = diff; exports.fastLerp = fastLerp;