+/* string-lerp - progressively turn one string into another
+ Copyright 2014 Joe Wreschnig
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+*/
+
+/* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */
+
(function (exports) {
"use strict";
var MAX_MATRIX_SIZE = 256 * 256;
- function levenshteinMatrix(s, t, ins, del, sub) {
- /** Calculate the Levenshtein edit distance matrix for two strings
+ function costMatrix(source, target, ins, del, sub) {
+ /** Calculate the Levenshtein cost matrix for source and target
+
+ If source and target are strings, they cannot contain any
+ astral or combining codepoints. Such data must be passed
+ as arrays of strings with one element per glyph.
+
+ ins, del, and sub are the costs for insertion, deletion,
+ and substition respectively. Their default value is 1. If
+ only ins is passed, del and sub are set to the same cost.
+ If ins and del are passed, sub is set to the more
+ expensive of the two.
The matrix is returned as a flat typed array.
-
+
Following http://en.wikipedia.org/wiki/Levenshtein_distance
*/
- var m = s.length + 1;
- var n = t.length + 1;
+ ins = ins === undefined ? 1 : (ins | 0);
+ del = (del | 0) || ins;
+ sub = (sub | 0) || Math.max(ins, del);
+ var m = source.length + 1;
+ var n = target.length + 1;
var d = new Uint32Array(m * n);
var i, j;
for (i = 1; i < m; ++i)
d[j] = j;
for (j = 1; j < n; ++j)
for (i = 1; i < m; ++i)
- if (s[i - 1] === t[j - 1])
+ if (source[i - 1] === target[j - 1])
d[n * i + j] = d[n * (i - 1) + j - 1];
else
d[n * i + j] = Math.min(del + d[n * (i - 1) + j ],
return d;
}
- function editPath(d, t) {
- /** Given a Levenshtein matrix and target, create an edit list */
+ // First, note that deletion is just substition with nothing, so
+ // any DEL operation can be replaced by a SUB. Second, the
+ // operation code *is* the necessary slice offset for applying the
+ // diff.
+ var INS = 0, SUB = 1;
+
+ function editPath(costs, target) {
+ /** Given a cost matrix and a target, create an edit list */
var path = [];
- var j = t.length;
+ var j = target.length;
var n = j + 1;
- var i = d.length / n - 1;
+ var i = costs.length / n - 1;
while (i || j) {
- var sub = (i && j) ? d[n * (i - 1) + j - 1] : Infinity;
- var del = i ? d[n * (i - 1) + j] : Infinity;
- var ins = j ? d[n * i + j - 1] : Infinity;
+ var sub = (i && j) ? costs[n * (i - 1) + j - 1] : Infinity;
+ var del = i ? costs[n * (i - 1) + j] : Infinity;
+ var ins = j ? costs[n * i + j - 1] : Infinity;
if (sub <= ins && sub <= del) {
- if (d[n * i + j] !== d[n * (i - 1) + j - 1])
- path.push(["sub", i - 1, t[j - 1]]);
+ if (costs[n * i + j] !== costs[n * (i - 1) + j - 1])
+ path.push([SUB, i - 1, target[j - 1]]);
--i; --j;
} else if (ins <= del) {
- path.push(["ins", i, t[j - 1]]);
+ path.push([INS, i, target[j - 1]]);
--j;
} else {
- path.push(["del", i - 1]);
+ path.push([SUB, i - 1, ""]);
--i;
}
}
return path;
}
- function diff(s, t) {
- /** Create a diff between string s and t */
- return editPath(levenshteinMatrix(s, t, 2, 2, 3), t);
+ function diff(source, target, ins, del, sub) {
+ /** Create a list of edits to turn source into target
+
+ ins, del, and sub are as passed to costMatrix.
+ */
+ return editPath(costMatrix(source, target, ins, del, sub), target);
}
- function patch(edits, s) {
- /** Apply the list of edits to s */
- var edit;
- var i;
-
- if (Array.isArray(s)) {
- for (i = 0; i < edits.length; ++i) {
- edit = edits[i];
- switch (edit[0]) {
- case "sub":
- s[edit[1]] = edit[2];
- break;
- case "ins":
- s.splice(edit[1], 0, edit[2]);
- break;
- case "del":
- s.splice(edit[1], 1);
- break;
- }
- }
- } else {
- for (i = 0; i < edits.length; ++i) {
- edit = edits[i];
- switch (edit[0]) {
- case "sub":
- s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1] + 1);
- break;
- case "ins":
- s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1]);
- break;
- case "del":
- s = s.slice(0, edit[1]) + s.slice(edit[1] + 1);
- break;
- }
- }
+ function patchArray(diff, source) {
+ for (var i = 0; i < diff.length; ++i) {
+ var edit = diff[i];
+ source.splice(edit[1], edit[0], edit[2]);
+ }
+ return source;
+ }
+
+ function patchString(diff, source) {
+ for (var i = 0; i < diff.length; ++i) {
+ var edit = diff[i];
+ var head = source.slice(0, edit[1]);
+ var tail = source.slice(edit[1] + edit[0]);
+ source = head + edit[2] + tail;
}
- return s;
+ return source;
+ }
+
+ function patch(diff, source) {
+ /** Apply a list of edits to source */
+ var patcher = Array.isArray(source) ? patchArray : patchString;
+ return patcher(diff, source);
}
var MULTI = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/;
var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g;
- function diffLerp(a, b, p) {
- /** Interpolate between two strings based on edit operations
+ function diffLerp(source, target, amount ) {
+ /** Interpolate between two strings using edit operations
This interpolation algorithm applys a partial edit of one
string into the other. This produces nice looking results,
but can take a significant amount of time and memory to
compute the edits. It is not recommended for strings
longer than a few hundred characters.
- */
+ */
// If given strings with astral codepoints or combining
// characters, split them into arrays of "glyphs" first,
// This split is not perfect for all languages, but at least
// it won't create invalid surrogate pairs or orphaned
// combining characters.
- if (a.match && a.match(MULTI) || b.match && b.match(MULTI)) {
- var ca = a.match(GLYPH) || [];
- var cb = b.match(GLYPH) || [];
- return diffLerp(ca, cb, p).join("");
+ //
+ // TODO: The way this is called is a hack.
+ if (source.match && (source.match(MULTI) || target.match(MULTI))) {
+ var sourceGlyphs = source.match(GLYPH) || [];
+ var targetGlyphs = target.match(GLYPH) || [];
+ return diffLerp(sourceGlyphs, targetGlyphs, amount).join("");
}
// The edit path works from the string end, forwards, because
// that's how Levenshtein edits work. To match LTR reading
// direction (and the behavior of fastLerp), swap the strings
// and invert the parameter when editing.
- var edits = diff(b, a);
- var partial = edits.slice(0, Math.round((1 - p) * edits.length));
- return patch(partial, b);
+ var edits = diff(target, source, 2, 2, 3);
+ var partial = edits.slice(0, Math.round((1 - amount) * edits.length));
+ return patch(partial, target);
}
- var NUMBERS = /(-?\d+(?:\.\d+)?)/g;
+ var NUMBERS = /(-?\d{1,20}(?:\.\d{1,20})?)/g;
- function areNumericTwins(a, b) {
- /** Check if a and b differ only in numerals
-
- A leading "-" counts as part of numbers; a leading "+"
- does not. Numbers may contain a single ".", but no other
- floating point syntax.
- */
- return a.replace(NUMBERS, "0") === b.replace(NUMBERS, "0");
+ function areNumericTwins(source, target) {
+ /** Check if a and b differ only in numerals */
+ return source.replace(NUMBERS, "0") === target.replace(NUMBERS, "0");
}
- function nlerp(a, b, p) {
- return a + (b - a) * p;
+ function nlerp(source, target, amount) {
+ return source + (target - source) * amount;
}
- function numericLerp(a, b, p) {
- /** Interpolate numerically between two strings containing numbers
+ function numericLerp(source, target, amount) {
+ /** Interpolate numerically between strings containing numbers
Numbers may have a leading "-" and a single "." to mark
the decimal point, but something must be after the ".".
+ No other floating point syntax (e.g. 1e6) is supported.
If both of the numbers in a pair are integers, the result
is clamped to an integer.
Calling this functions on strings that differ in more than
numerals gives undefined results.
- */
- var aParts = a.split(NUMBERS);
- var bParts = b.split(NUMBERS);
- for (var i = 1; i < aParts.length; i += 2) {
- var part = nlerp(+aParts[i], +bParts[i], p);
- if (aParts[i].indexOf(".") === -1 && bParts[i].indexOf(".") === -1)
- part = Math.round(part);
- aParts[i] = part.toString();
+ */
+
+ var sourceParts = source.split(NUMBERS);
+ var targetParts = target.split(NUMBERS);
+ var destParts = targetParts;
+ for (var i = 1; i < sourceParts.length; i += 2) {
+ var sourcePart = sourceParts[i];
+ var targetPart = targetParts[i];
+ var part = nlerp(+sourcePart, +targetPart, amount);
+ var sourcePoint = sourcePart.indexOf(".");
+ var targetPoint = targetPart.indexOf(".");
+ var point = Math.round(nlerp(
+ sourcePoint >= 0 ? (sourcePart.length - 1) - sourcePoint : 0,
+ targetPoint >= 0 ? (targetPart.length - 1) - targetPoint : 0,
+ amount));
+ targetParts[i] = part.toFixed(point);
}
- return aParts.join("");
+ return targetParts.join("");
+ }
+
+ function fastLerpAstral(source, target, amount) {
+ var sourceGlyphs = source.match(GLYPH) || [];
+ var targetGlyphs = target.match(GLYPH) || [];
+ var sourceLength = Math.round(sourceGlyphs.length * amount);
+ var targetLength = Math.round(targetGlyphs.length * amount);
+ var head = targetGlyphs.slice(0, targetLength);
+ var tail = sourceGlyphs.slice(sourceLength, sourceGlyphs.length);
+ head.push.apply(head, tail);
+ return head.join("");
}
- function fastLerp(a, b, p) {
+ function fastLerpBasic(source, target, amount) {
+ var sourceLength = Math.round(source.length * amount);
+ var targetLength = Math.round(target.length * amount);
+ var head = target.substring(0, targetLength);
+ var tail = source.substring(sourceLength, source.length);
+ return head + tail;
+ }
+
+ function fastLerp(source, target, amount) {
/** Interpolate between two strings based on length
This interpolation algorithm progressively replaces the
// TODO: Consider fast-pathing this even more for very large
// strings, e.g. in the megabyte range. These are large enough
- // that
- if (a.match(MULTI) || b.match(MULTI)) {
- var ca = a.match(GLYPH) || [];
- var cb = b.match(GLYPH) || [];
- var calen = Math.round(ca.length * p);
- var cblen = Math.round(cb.length * p);
- var r = cb.slice(0, cblen);
- r.push.apply(r, ca.slice(calen, ca.length));
- return r.join("");
- } else {
- var alen = Math.round(a.length * p);
- var blen = Math.round(b.length * p);
- return b.substring(0, blen) + a.substring(alen, a.length);
- }
+ // that it should be fine to just pick a codepoint and search
+ // for the nearest glyph start.
+ if (source.match(MULTI) || target.match(MULTI))
+ return fastLerpAstral(source, target, amount);
+ else
+ return fastLerpBasic(source, target, amount);
}
- function lerp(a, b, p) {
+ function lerp(source, target, amount) {
/** Interpolate between two strings as best as possible
If the strings are identical aside from numbers in them,
Otherwise, they are passed through fastLerp.
*/
- a = a.toString();
- b = b.toString();
+ source = source.toString();
+ target = target.toString();
// Fast path for boundary cases.
- if (p === 0) return a;
- if (p === 1) return b;
+ if (amount === 0) return source;
+ if (amount === 1) return target;
- if (areNumericTwins(a, b))
- return numericLerp(a, b, p);
+ if (areNumericTwins(source, target))
+ return numericLerp(source, target, amount);
// Numeric lerps should over- and under-shoot when fed numbers
// outside 0 to 1, but other types cannot.
- if (p < 0) return a;
- if (p > 1) return b;
+ if (amount < 0) return source;
+ if (amount > 1) return target;
- var n = a.length * b.length;
- return ((n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp)(a, b, p);
+ var n = source.length * target.length;
+ var appropriate = (n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp;
+ return appropriate(source, target, amount);
}
- exports.levenshteinMatrix = levenshteinMatrix;
+ exports.costMatrix = costMatrix;
exports.patch = patch;
exports.diff = diff;
exports.fastLerp = fastLerp;