var MAX_MATRIX_SIZE = 256 * 256;
- function levenshteinMatrix(s, t, ins, del, sub) {
- /** Calculate the Levenshtein edit distance matrix for two strings
+ function costMatrix(source, target, ins, del, sub) {
+ /** Calculate the Levenshtein cost matrix for source and target
+
+ If source and target are strings, they cannot contain any
+ astral or combining codepoints. Such data must be passed
+ as arrays of strings with one element per glyph.
+
+ ins, del, and sub are the costs for insertion, deletion,
+ and substition respectively. Their default value is 1. If
+ only ins is passed, del and sub are set to the same cost.
+ If ins and del are passed, sub is set to the more
+ expensive of the two.
The matrix is returned as a flat typed array.
-
+
Following http://en.wikipedia.org/wiki/Levenshtein_distance
*/
- var m = s.length + 1;
- var n = t.length + 1;
+ ins = ins === undefined ? 1 : (ins | 0);
+ del = (del | 0) || ins;
+ sub = (sub | 0) || Math.max(ins, del);
+ var m = source.length + 1;
+ var n = target.length + 1;
var d = new Uint32Array(m * n);
var i, j;
for (i = 1; i < m; ++i)
d[j] = j;
for (j = 1; j < n; ++j)
for (i = 1; i < m; ++i)
- if (s[i - 1] === t[j - 1])
+ if (source[i - 1] === target[j - 1])
d[n * i + j] = d[n * (i - 1) + j - 1];
else
d[n * i + j] = Math.min(del + d[n * (i - 1) + j ],
return d;
}
- function editPath(d, t) {
- /** Given a Levenshtein matrix and target, create an edit list */
+ function editPath(costs, target) {
+ /** Given a cost matrix and a target, create an edit list */
var path = [];
- var j = t.length;
+ var j = target.length;
var n = j + 1;
- var i = d.length / n - 1;
+ var i = costs.length / n - 1;
while (i || j) {
- var sub = (i && j) ? d[n * (i - 1) + j - 1] : Infinity;
- var del = i ? d[n * (i - 1) + j] : Infinity;
- var ins = j ? d[n * i + j - 1] : Infinity;
+ var sub = (i && j) ? costs[n * (i - 1) + j - 1] : Infinity;
+ var del = i ? costs[n * (i - 1) + j] : Infinity;
+ var ins = j ? costs[n * i + j - 1] : Infinity;
if (sub <= ins && sub <= del) {
- if (d[n * i + j] !== d[n * (i - 1) + j - 1])
- path.push(["sub", i - 1, t[j - 1]]);
+ if (costs[n * i + j] !== costs[n * (i - 1) + j - 1])
+ path.push(["sub", i - 1, target[j - 1]]);
--i; --j;
} else if (ins <= del) {
- path.push(["ins", i, t[j - 1]]);
+ path.push(["ins", i, target[j - 1]]);
--j;
} else {
path.push(["del", i - 1]);
return path;
}
- function diff(s, t) {
- /** Create a diff between string s and t */
- return editPath(levenshteinMatrix(s, t, 2, 2, 3), t);
+ function diff(source, target, ins, del, sub) {
+ /** Create a diff between string source and target
+
+ ins, del, and sub are as passed to levenshtein
+ */
+ return editPath(costMatrix(source, target, ins, del, sub), target);
}
function patch(edits, s) {
// that's how Levenshtein edits work. To match LTR reading
// direction (and the behavior of fastLerp), swap the strings
// and invert the parameter when editing.
- var edits = diff(b, a);
+ var edits = diff(b, a, 2, 2, 3);
var partial = edits.slice(0, Math.round((1 - p) * edits.length));
return patch(partial, b);
}
return ((n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp)(a, b, p);
}
- exports.levenshteinMatrix = levenshteinMatrix;
+ exports.costMatrix = costMatrix;
exports.patch = patch;
exports.diff = diff;
exports.fastLerp = fastLerp;