X-Git-Url: https://git.yukkurigames.com/?p=string-lerp.git;a=blobdiff_plain;f=string-lerp.js;h=df0d2562f29dfad40b8b29671470d40411824605;hp=4a2c80f1533cad299063a7bdcc644defa5e5fe8b;hb=0e8c216ff3039c994a86f423cf66b60f162d0298;hpb=ce8d91dd9711ad0216e44f8ae69d114bd4fc85c8 diff --git a/string-lerp.js b/string-lerp.js index 4a2c80f..df0d256 100644 --- a/string-lerp.js +++ b/string-lerp.js @@ -1,56 +1,57 @@ +/* string-lerp - progressively turn one string into another + Copyright 2014 Joe Wreschnig + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. +*/ + +/* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */ + (function (exports) { "use strict"; var MAX_MATRIX_SIZE = 256 * 256; - function repeat(n, v) { - var r = []; - while (n--) - r.push(v); - return r; - } - - function createMatrix(i, j) { - var m = []; - while (i--) - m.push(repeat(j, 0)); - return m; - } - - function levenshteinMatrix(s, t) { + function levenshteinMatrix(s, t, ins, del, sub) { /** Calculate the Levenshtein edit distance matrix for two strings + + The matrix is returned as a flat typed array. - // Following http://en.wikipedia.org/wiki/Levenshtein_distance + Following http://en.wikipedia.org/wiki/Levenshtein_distance */ var m = s.length + 1; var n = t.length + 1; - var d = createMatrix(m, n); + var d = new Uint32Array(m * n); var i, j; for (i = 1; i < m; ++i) - d[i][0] = i; + d[n * i] = i; for (j = 1; j < n; ++j) - d[0][j] = j; + d[j] = j; for (j = 1; j < n; ++j) for (i = 1; i < m; ++i) if (s[i - 1] === t[j - 1]) - d[i][j] = d[i - 1][j - 1]; + d[n * i + j] = d[n * (i - 1) + j - 1]; else - d[i][j] = 1 + Math.min(d[i - 1][ j ], - d[ i ][j - 1], - d[i - 1][j - 1]); + d[n * i + j] = Math.min(del + d[n * (i - 1) + j ], + ins + d[n * i + j - 1], + sub + d[n * (i - 1) + j - 1]); return d; } - function editPath(m, s, t) { - var path = [] - var i = s.length; + function editPath(d, t) { + /** Given a Levenshtein matrix and target, create an edit list */ + var path = []; var j = t.length; + var n = j + 1; + var i = d.length / n - 1; while (i || j) { - var sub = (i && j) ? m[i - 1][j - 1] : Infinity; - var del = i ? m[i - 1][j] : Infinity; - var ins = j ? m[i][j - 1] : Infinity; + var sub = (i && j) ? d[n * (i - 1) + j - 1] : Infinity; + var del = i ? d[n * (i - 1) + j] : Infinity; + var ins = j ? d[n * i + j - 1] : Infinity; if (sub <= ins && sub <= del) { - if (m[i][j] !== m[i - 1][j - 1]) + if (d[n * i + j] !== d[n * (i - 1) + j - 1]) path.push(["sub", i - 1, t[j - 1]]); --i; --j; } else if (ins <= del) { @@ -64,52 +65,192 @@ return path; } - function applyEdits(edits, s, t) { + function diff(s, t) { + /** Create a diff between string s and t */ + return editPath(levenshteinMatrix(s, t, 2, 2, 3), t); + } + + function patch(edits, s) { + /** Apply the list of edits to s */ + var edit; var i; - for (i = 0; i < edits.length; ++i) { - var edit = edits[i]; - switch (edit[0]) { - case "sub": - s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1] + 1); - break; - case "ins": - s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1]); - break; - case "del": - s = s.slice(0, edit[1]) + s.slice(edit[1] + 1); - break; + + if (Array.isArray(s)) { + for (i = 0; i < edits.length; ++i) { + edit = edits[i]; + switch (edit[0]) { + case "sub": + s[edit[1]] = edit[2]; + break; + case "ins": + s.splice(edit[1], 0, edit[2]); + break; + case "del": + s.splice(edit[1], 1); + break; + } + } + } else { + for (i = 0; i < edits.length; ++i) { + edit = edits[i]; + switch (edit[0]) { + case "sub": + s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1] + 1); + break; + case "ins": + s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1]); + break; + case "del": + s = s.slice(0, edit[1]) + s.slice(edit[1] + 1); + break; + } } } return s; } - function slowLerp(a, b, p) { - var m = levenshteinMatrix(a, b); - var edits = editPath(m, a, b); - var partial = edits.slice(0, Math.round(p * edits.length)); - return applyEdits(partial, a, b); + var MULTI = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/; + + var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g; + + function diffLerp(a, b, p) { + /** Interpolate between two strings based on edit operations + + This interpolation algorithm applys a partial edit of one + string into the other. This produces nice looking results, + but can take a significant amount of time and memory to + compute the edits. It is not recommended for strings + longer than a few hundred characters. + */ + + // If given strings with astral codepoints or combining + // characters, split them into arrays of "glyphs" first, + // do the edit on the list of "glyphs", and rejoin them. + // + // This split is not perfect for all languages, but at least + // it won't create invalid surrogate pairs or orphaned + // combining characters. + if (a.match && a.match(MULTI) || b.match && b.match(MULTI)) { + var ca = a.match(GLYPH) || []; + var cb = b.match(GLYPH) || []; + return diffLerp(ca, cb, p).join(""); + } + + // The edit path works from the string end, forwards, because + // that's how Levenshtein edits work. To match LTR reading + // direction (and the behavior of fastLerp), swap the strings + // and invert the parameter when editing. + var edits = diff(b, a); + var partial = edits.slice(0, Math.round((1 - p) * edits.length)); + return patch(partial, b); + } + + var NUMBERS = /(-?\d+(?:\.\d+)?)/g; + + function areNumericTwins(a, b) { + /** Check if a and b differ only in numerals + + A leading "-" counts as part of numbers; a leading "+" + does not. Numbers may contain a single ".", but no other + floating point syntax. + */ + return a.replace(NUMBERS, "0") === b.replace(NUMBERS, "0"); + } + + function nlerp(a, b, p) { + return a + (b - a) * p; + } + + function numericLerp(a, b, p) { + /** Interpolate numerically between two strings containing numbers + + Numbers may have a leading "-" and a single "." to mark + the decimal point, but something must be after the ".". + If both of the numbers in a pair are integers, the result + is clamped to an integer. + + For example, numericLerp("0.0", "100", 0.123) === "12.3" + because the "." in "0.0" is interpreted as a decimal + point. But numericLerp("0.", "100.", 0.123) === "12." + because the strings are interpreted as integers followed + by a full stop. + + Calling this functions on strings that differ in more than + numerals gives undefined results. + */ + var aParts = a.split(NUMBERS); + var bParts = b.split(NUMBERS); + for (var i = 1; i < aParts.length; i += 2) { + var part = nlerp(+aParts[i], +bParts[i], p); + if (aParts[i].indexOf(".") === -1 && bParts[i].indexOf(".") === -1) + part = Math.round(part); + aParts[i] = part.toString(); + } + return aParts.join(""); } function fastLerp(a, b, p) { - var alen = Math.round(a.length * p); - var blen = Math.round(b.length * p); - return b.substring(0, blen) + a.substring(alen, a.length); + /** Interpolate between two strings based on length + + This interpolation algorithm progressively replaces the + front of one string with another. This approach is fast + but does not look good when the strings are similar. + */ + + // TODO: Consider fast-pathing this even more for very large + // strings, e.g. in the megabyte range. These are large enough + // that + if (a.match(MULTI) || b.match(MULTI)) { + var ca = a.match(GLYPH) || []; + var cb = b.match(GLYPH) || []; + var calen = Math.round(ca.length * p); + var cblen = Math.round(cb.length * p); + var r = cb.slice(0, cblen); + r.push.apply(r, ca.slice(calen, ca.length)); + return r.join(""); + } else { + var alen = Math.round(a.length * p); + var blen = Math.round(b.length * p); + return b.substring(0, blen) + a.substring(alen, a.length); + } } function lerp(a, b, p) { + /** Interpolate between two strings as best as possible + + If the strings are identical aside from numbers in them, + they are passed through numericLerp. + + If the strings are not numbers and short, they are passed + through diffLerp. + + Otherwise, they are passed through fastLerp. + */ a = a.toString(); b = b.toString(); + + // Fast path for boundary cases. + if (p === 0) return a; + if (p === 1) return b; + + if (areNumericTwins(a, b)) + return numericLerp(a, b, p); + + // Numeric lerps should over- and under-shoot when fed numbers + // outside 0 to 1, but other types cannot. + if (p < 0) return a; + if (p > 1) return b; + var n = a.length * b.length; - return (n && n < MAX_MATRIX_SIZE) - ? slowLerp(a, b, p) - : fastLerp(a, b, p); + return ((n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp)(a, b, p); } exports.levenshteinMatrix = levenshteinMatrix; - exports.editPath = editPath; + exports.patch = patch; + exports.diff = diff; exports.fastLerp = fastLerp; - exports.slowLerp = slowLerp; - exports.applyEdits = applyEdits; + exports.diffLerp = diffLerp; + exports.numericLerp = numericLerp; exports.lerp = lerp; })(typeof exports === "undefined" ? (this.stringLerp = {}) : exports);