+/* string-lerp - progressively turn one string into another
+ Copyright 2014 Joe Wreschnig
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+*/
+
+/* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */
+
(function (exports) {
"use strict";
var MAX_MATRIX_SIZE = 256 * 256;
- function levenshteinMatrix(s, t) {
+ function levenshteinMatrix(s, t, ins, del, sub) {
/** Calculate the Levenshtein edit distance matrix for two strings
- The matrix is returned as a flat unsigned typed array.
+ The matrix is returned as a flat typed array.
Following http://en.wikipedia.org/wiki/Levenshtein_distance
*/
if (s[i - 1] === t[j - 1])
d[n * i + j] = d[n * (i - 1) + j - 1];
else
- d[n * i + j] = 1 + Math.min(d[n * (i - 1) + j ],
- d[n * i + j - 1],
- d[n * (i - 1) + j - 1]);
+ d[n * i + j] = Math.min(del + d[n * (i - 1) + j ],
+ ins + d[n * i + j - 1],
+ sub + d[n * (i - 1) + j - 1]);
return d;
}
function editPath(d, t) {
/** Given a Levenshtein matrix and target, create an edit list */
- var path = []
+ var path = [];
var j = t.length;
var n = j + 1;
var i = d.length / n - 1;
function diff(s, t) {
/** Create a diff between string s and t */
- return editPath(levenshteinMatrix(s, t), t);
+ return editPath(levenshteinMatrix(s, t, 2, 2, 3), t);
}
function patch(edits, s) {
/** Apply the list of edits to s */
+ var edit;
var i;
- for (i = 0; i < edits.length; ++i) {
- var edit = edits[i];
- switch (edit[0]) {
- case "sub":
- s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1] + 1);
- break;
- case "ins":
- s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1]);
- break;
- case "del":
- s = s.slice(0, edit[1]) + s.slice(edit[1] + 1);
- break;
+
+ if (Array.isArray(s)) {
+ for (i = 0; i < edits.length; ++i) {
+ edit = edits[i];
+ switch (edit[0]) {
+ case "sub":
+ s[edit[1]] = edit[2];
+ break;
+ case "ins":
+ s.splice(edit[1], 0, edit[2]);
+ break;
+ case "del":
+ s.splice(edit[1], 1);
+ break;
+ }
+ }
+ } else {
+ for (i = 0; i < edits.length; ++i) {
+ edit = edits[i];
+ switch (edit[0]) {
+ case "sub":
+ s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1] + 1);
+ break;
+ case "ins":
+ s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1]);
+ break;
+ case "del":
+ s = s.slice(0, edit[1]) + s.slice(edit[1] + 1);
+ break;
+ }
}
}
return s;
}
+ var MULTI = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/;
+
+ var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g;
+
function diffLerp(a, b, p) {
- /** Interpolate between two strings based on edit distance
+ /** Interpolate between two strings based on edit operations
This interpolation algorithm applys a partial edit of one
string into the other. This produces nice looking results,
compute the edits. It is not recommended for strings
longer than a few hundred characters.
*/
- var edits = diff(a, b);
- var partial = edits.slice(0, Math.round(p * edits.length));
- return patch(partial, a);
+
+ // If given strings with astral codepoints or combining
+ // characters, split them into arrays of "glyphs" first,
+ // do the edit on the list of "glyphs", and rejoin them.
+ //
+ // This split is not perfect for all languages, but at least
+ // it won't create invalid surrogate pairs or orphaned
+ // combining characters.
+ if (a.match && a.match(MULTI) || b.match && b.match(MULTI)) {
+ var ca = a.match(GLYPH) || [];
+ var cb = b.match(GLYPH) || [];
+ return diffLerp(ca, cb, p).join("");
+ }
+
+ // The edit path works from the string end, forwards, because
+ // that's how Levenshtein edits work. To match LTR reading
+ // direction (and the behavior of fastLerp), swap the strings
+ // and invert the parameter when editing.
+ var edits = diff(b, a);
+ var partial = edits.slice(0, Math.round((1 - p) * edits.length));
+ return patch(partial, b);
}
- var NUMBERS = /(-?\d+(?:\.\d+)?)/g
+ var NUMBERS = /(-?\d+(?:\.\d+)?)/g;
function areNumericTwins(a, b) {
/** Check if a and b differ only in numerals
is clamped to an integer.
For example, numericLerp("0.0", "100", 0.123) === "12.3"
- because the "." in "0.0" is intepreted as a decimal point.
- But numericLerp("0.", "100.", 0.123) === "12." because the
- strings are interpreted as integers followed by a full
- stop.
+ because the "." in "0.0" is interpreted as a decimal
+ point. But numericLerp("0.", "100.", 0.123) === "12."
+ because the strings are interpreted as integers followed
+ by a full stop.
Calling this functions on strings that differ in more than
numerals gives undefined results.
var aParts = a.split(NUMBERS);
var bParts = b.split(NUMBERS);
for (var i = 1; i < aParts.length; i += 2) {
- var part = nlerp(+aParts[i], +bParts[i], p)
+ var part = nlerp(+aParts[i], +bParts[i], p);
if (aParts[i].indexOf(".") === -1 && bParts[i].indexOf(".") === -1)
part = Math.round(part);
aParts[i] = part.toString();
front of one string with another. This approach is fast
but does not look good when the strings are similar.
*/
- var alen = Math.round(a.length * p);
- var blen = Math.round(b.length * p);
- return b.substring(0, blen) + a.substring(alen, a.length);
+
+ // TODO: Consider fast-pathing this even more for very large
+ // strings, e.g. in the megabyte range. These are large enough
+ // that
+ if (a.match(MULTI) || b.match(MULTI)) {
+ var ca = a.match(GLYPH) || [];
+ var cb = b.match(GLYPH) || [];
+ var calen = Math.round(ca.length * p);
+ var cblen = Math.round(cb.length * p);
+ var r = cb.slice(0, cblen);
+ r.push.apply(r, ca.slice(calen, ca.length));
+ return r.join("");
+ } else {
+ var alen = Math.round(a.length * p);
+ var blen = Math.round(b.length * p);
+ return b.substring(0, blen) + a.substring(alen, a.length);
+ }
}
function lerp(a, b, p) {
if (p === 1) return b;
if (areNumericTwins(a, b))
- return numericLerp(a, b, p)
+ return numericLerp(a, b, p);
// Numeric lerps should over- and under-shoot when fed numbers
// outside 0 to 1, but other types cannot.
if (p > 1) return b;
var n = a.length * b.length;
- return (n && n < MAX_MATRIX_SIZE)
- ? diffLerp(a, b, p)
- : fastLerp(a, b, p);
+ return ((n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp)(a, b, p);
}
exports.levenshteinMatrix = levenshteinMatrix;