X-Git-Url: https://git.yukkurigames.com/?p=string-lerp.git;a=blobdiff_plain;f=string-lerp.js;h=54731995783f405d7d0102d39556747cb2c0ea73;hp=6e0b109c821efdea53551a232cd600987c96de35;hb=39287ad87b41df055c4c7569b8e443051d182f1b;hpb=f037d43269bdb28acf28500ce24d2bef1e011a75 diff --git a/string-lerp.js b/string-lerp.js index 6e0b109..5473199 100644 --- a/string-lerp.js +++ b/string-lerp.js @@ -1,17 +1,54 @@ +/* string-lerp - progressively turn one string into another + Copyright 2014 Joe Wreschnig + Licensed under the terms of the GNU GPL v2 or later + @license http://www.gnu.org/licenses/gpl-2.0.html + @source: http://yukkurigames.com/string-lerp/ +*//* + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + As additional permission, you may distribute the program or works + based on it without the copy of the GNU GPL normally required, + provided you include this license notice and a URL through which + recipients can access the corresponding source code. +*/ + +/*globals exports, Uint32Array */ + (function (exports) { "use strict"; var MAX_MATRIX_SIZE = 256 * 256; - function levenshteinMatrix(s, t, ins, del, sub) { - /** Calculate the Levenshtein edit distance matrix for two strings + function costMatrix(source, target, ins, del, sub) { + /** Calculate the Levenshtein cost matrix for source and target + + If source and target are strings, they cannot contain any + astral or combining codepoints. Such data must be passed + as arrays of strings with one element per glyph. + + ins, del, and sub are the costs for insertion, deletion, + and substition respectively. Their default value is 1. If + only ins is passed, del and sub are set to the same cost. + If ins and del are passed, sub is set to the more + expensive of the two. The matrix is returned as a flat typed array. - + Following http://en.wikipedia.org/wiki/Levenshtein_distance */ - var m = s.length + 1; - var n = t.length + 1; + ins = ins === undefined ? 1 : (ins | 0); + del = (del | 0) || ins; + sub = (sub | 0) || Math.max(ins, del); + var m = source.length + 1; + var n = target.length + 1; var d = new Uint32Array(m * n); var i, j; for (i = 1; i < m; ++i) @@ -20,7 +57,7 @@ d[j] = j; for (j = 1; j < n; ++j) for (i = 1; i < m; ++i) - if (s[i - 1] === t[j - 1]) + if (source[i - 1] === target[j - 1]) d[n * i + j] = d[n * (i - 1) + j - 1]; else d[n * i + j] = Math.min(del + d[n * (i - 1) + j ], @@ -29,136 +66,140 @@ return d; } - function editPath(d, t) { - /** Given a Levenshtein matrix and target, create an edit list */ + // First, note that deletion is just substition with nothing, so + // any DEL operation can be replaced by a SUB. Second, the + // operation code *is* the necessary slice offset for applying the + // diff. + var INS = 0, SUB = 1; + + function editPath(costs, target) { + /** Given a cost matrix and a target, create an edit list */ var path = []; - var j = t.length; + var j = target.length; var n = j + 1; - var i = d.length / n - 1; + var i = costs.length / n - 1; while (i || j) { - var sub = (i && j) ? d[n * (i - 1) + j - 1] : Infinity; - var del = i ? d[n * (i - 1) + j] : Infinity; - var ins = j ? d[n * i + j - 1] : Infinity; + var sub = (i && j) ? costs[n * (i - 1) + j - 1] : Infinity; + var del = i ? costs[n * (i - 1) + j] : Infinity; + var ins = j ? costs[n * i + j - 1] : Infinity; if (sub <= ins && sub <= del) { - if (d[n * i + j] !== d[n * (i - 1) + j - 1]) - path.push(["sub", i - 1, t[j - 1]]); + if (costs[n * i + j] !== costs[n * (i - 1) + j - 1]) + path.push([SUB, i - 1, target[j - 1]]); --i; --j; } else if (ins <= del) { - path.push(["ins", i, t[j - 1]]); + path.push([INS, i, target[j - 1]]); --j; } else { - path.push(["del", i - 1]); + path.push([SUB, i - 1, ""]); --i; } } return path; } - function diff(s, t) { - /** Create a diff between string s and t */ - return editPath(levenshteinMatrix(s, t, 2, 2, 3), t); - } - - function patch(edits, s) { - /** Apply the list of edits to s */ - var edit; - var i; - - if (Array.isArray(s)) { - for (i = 0; i < edits.length; ++i) { - edit = edits[i]; - switch (edit[0]) { - case "sub": - s[edit[1]] = edit[2]; - break; - case "ins": - s.splice(edit[1], 0, edit[2]); - break; - case "del": - s.splice(edit[1], 1); - break; - } - } - } else { - for (i = 0; i < edits.length; ++i) { - edit = edits[i]; - switch (edit[0]) { - case "sub": - s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1] + 1); - break; - case "ins": - s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1]); - break; - case "del": - s = s.slice(0, edit[1]) + s.slice(edit[1] + 1); - break; - } - } + function diff(source, target, ins, del, sub) { + /** Create a list of edits to turn source into target + + ins, del, and sub are as passed to costMatrix. + */ + return editPath(costMatrix(source, target, ins, del, sub), target); + } + + function patchArray(diff, source) { + for (var i = 0; i < diff.length; ++i) { + var edit = diff[i]; + source.splice(edit[1], edit[0], edit[2]); } - return s; + return source; } - var MULTI = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/; + function patchString(diff, source) { + for (var i = 0; i < diff.length; ++i) { + var edit = diff[i]; + var head = source.slice(0, edit[1]); + var tail = source.slice(edit[1] + edit[0]); + source = head + edit[2] + tail; + } + return source; + } - var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g; + function patch(diff, source) { + /** Apply a list of edits to source */ + var patcher = Array.isArray(source) ? patchArray : patchString; + return patcher(diff, source); + } - function diffLerp(a, b, p) { - /** Interpolate between two strings based on edit operations + // Matches if a string contains combining characters or astral + // codepoints (technically, the first half surrogate of an astral + // codepoint). + var MULTI = /[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uD800-\uDBFF\uFE20-\uFE2F]/; - This interpolation algorithm applys a partial edit of one - string into the other. This produces nice looking results, - but can take a significant amount of time and memory to - compute the edits. It is not recommended for strings - longer than a few hundred characters. - */ + // Match an entire (potentially astral) codepoint and any + // combining characters following it. + var GLYPH = /[\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uD800-\uFE1F\uFE30-\uFFFF][\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uDC00-\uDFFF\uFE20-\uFE2F]*/g; - // If given strings with astral codepoints or combining - // characters, split them into arrays of "glyphs" first, - // do the edit on the list of "glyphs", and rejoin them. - // + function diffLerpAstral(source, target, amount) { // This split is not perfect for all languages, but at least // it won't create invalid surrogate pairs or orphaned // combining characters. - if (a.match && a.match(MULTI) || b.match && b.match(MULTI)) { - var ca = a.match(GLYPH) || []; - var cb = b.match(GLYPH) || []; - return diffLerp(ca, cb, p).join(""); - } - + var sourceGlyphs = source.match(GLYPH) || []; + var targetGlyphs = target.match(GLYPH) || []; + var edits = diff(targetGlyphs, sourceGlyphs, 2, 2, 3); // The edit path works from the string end, forwards, because // that's how Levenshtein edits work. To match LTR reading // direction (and the behavior of fastLerp), swap the strings // and invert the parameter when editing. - var edits = diff(b, a); - var partial = edits.slice(0, Math.round((1 - p) * edits.length)); - return patch(partial, b); + var partial = edits.slice(0, Math.round((1 - amount) * edits.length)); + return patchArray(partial, targetGlyphs).join(""); } - var NUMBERS = /(-?\d+(?:\.\d+)?)/g; + function diffLerpBasic(source, target, amount) { + var edits = diff(target, source, 2, 2, 3); + // The edit path works from the string end, forwards, because + // that's how Levenshtein edits work. To match LTR reading + // direction (and the behavior of fastLerp), swap the strings + // and invert the parameter when editing. + var partial = edits.slice(0, Math.round((1 - amount) * edits.length)); + return patchString(partial, target); + } - function areNumericTwins(a, b) { - /** Check if a and b differ only in numerals + function diffLerp(source, target, amount) { + /** Interpolate between two strings using edit operations - A leading "-" counts as part of numbers; a leading "+" - does not. Numbers may contain a single ".", but no other - floating point syntax. + This interpolation algorithm applys a partial edit of one + string into the other. This produces nice looking results, + but can take a significant amount of time and memory to + compute the edits. It is not recommended for strings + longer than a few hundred characters. */ - return a.replace(NUMBERS, "0") === b.replace(NUMBERS, "0"); + + if (source.match(MULTI) || target.match(MULTI)) + return diffLerpAstral(source, target, amount); + else + return diffLerpBasic(source, target, amount); } - function nlerp(a, b, p) { - return a + (b - a) * p; + var NUMBERS = /(-?\d{1,20}(?:\.\d{1,20})?)/g; + + function areNumericTwins(source, target) { + /** Check if a and b differ only in numerals */ + return source.replace(NUMBERS, "0") === target.replace(NUMBERS, "0"); + } + + function nlerp(source, target, amount) { + return source + (target - source) * amount; } - function numericLerp(a, b, p) { - /** Interpolate numerically between two strings containing numbers + function numericLerp(source, target, amount) { + /** Interpolate numerically between strings containing numbers Numbers may have a leading "-" and a single "." to mark the decimal point, but something must be after the ".". - If both of the numbers in a pair are integers, the result - is clamped to an integer. + No other floating point syntax (e.g. 1e6) is supported. + They are treated as fixed-point values, with the point's + position itself interpolating. - For example, numericLerp("0.0", "100", 0.123) === "12.3" + For example, numericLerp("0.0", "100".0, 0.123) === "12.3" because the "." in "0.0" is interpreted as a decimal point. But numericLerp("0.", "100.", 0.123) === "12." because the strings are interpreted as integers followed @@ -166,19 +207,47 @@ Calling this functions on strings that differ in more than numerals gives undefined results. - */ - var aParts = a.split(NUMBERS); - var bParts = b.split(NUMBERS); - for (var i = 1; i < aParts.length; i += 2) { - var part = nlerp(+aParts[i], +bParts[i], p); - if (aParts[i].indexOf(".") === -1 && bParts[i].indexOf(".") === -1) - part = Math.round(part); - aParts[i] = part.toString(); + */ + + var targetParts = target.split(NUMBERS); + var match; + var i = 1; + while ((match = NUMBERS.exec(source))) { + var sourcePart = match[0]; + var targetPart = targetParts[i]; + var part = nlerp(+sourcePart, +targetPart, amount); + var sourcePoint = sourcePart.indexOf("."); + var targetPoint = targetPart.indexOf("."); + var point = Math.round(nlerp( + sourcePoint >= 0 ? (sourcePart.length - 1) - sourcePoint : 0, + targetPoint >= 0 ? (targetPart.length - 1) - targetPoint : 0, + amount)); + targetParts[i] = part.toFixed(point); + i += 2; } - return aParts.join(""); + return targetParts.join(""); } - function fastLerp(a, b, p) { + function fastLerpAstral(source, target, amount) { + var sourceGlyphs = source.match(GLYPH) || []; + var targetGlyphs = target.match(GLYPH) || []; + var sourceLength = Math.round(sourceGlyphs.length * amount); + var targetLength = Math.round(targetGlyphs.length * amount); + var head = targetGlyphs.slice(0, targetLength); + var tail = sourceGlyphs.slice(sourceLength, sourceGlyphs.length); + head.push.apply(head, tail); + return head.join(""); + } + + function fastLerpBasic(source, target, amount) { + var sourceLength = Math.round(source.length * amount); + var targetLength = Math.round(target.length * amount); + var head = target.substring(0, targetLength); + var tail = source.substring(sourceLength, source.length); + return head + tail; + } + + function fastLerp(source, target, amount) { /** Interpolate between two strings based on length This interpolation algorithm progressively replaces the @@ -188,23 +257,15 @@ // TODO: Consider fast-pathing this even more for very large // strings, e.g. in the megabyte range. These are large enough - // that - if (a.match(MULTI) || b.match(MULTI)) { - var ca = a.match(GLYPH) || []; - var cb = b.match(GLYPH) || []; - var calen = Math.round(ca.length * p); - var cblen = Math.round(cb.length * p); - var r = cb.slice(0, cblen); - r.push.apply(r, ca.slice(calen, ca.length)); - return r.join(""); - } else { - var alen = Math.round(a.length * p); - var blen = Math.round(b.length * p); - return b.substring(0, blen) + a.substring(alen, a.length); - } + // that it should be fine to just pick a codepoint and search + // for the nearest glyph start. + if (source.match(MULTI) || target.match(MULTI)) + return fastLerpAstral(source, target, amount); + else + return fastLerpBasic(source, target, amount); } - function lerp(a, b, p) { + function lerp(source, target, amount) { /** Interpolate between two strings as best as possible If the strings are identical aside from numbers in them, @@ -215,26 +276,27 @@ Otherwise, they are passed through fastLerp. */ - a = a.toString(); - b = b.toString(); + source = source.toString(); + target = target.toString(); // Fast path for boundary cases. - if (p === 0) return a; - if (p === 1) return b; + if (amount === 0) return source; + if (amount === 1) return target; - if (areNumericTwins(a, b)) - return numericLerp(a, b, p); + if (areNumericTwins(source, target)) + return numericLerp(source, target, amount); // Numeric lerps should over- and under-shoot when fed numbers // outside 0 to 1, but other types cannot. - if (p < 0) return a; - if (p > 1) return b; + if (amount < 0) return source; + if (amount > 1) return target; - var n = a.length * b.length; - return ((n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp)(a, b, p); + var n = source.length * target.length; + var appropriate = (n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp; + return appropriate(source, target, amount); } - exports.levenshteinMatrix = levenshteinMatrix; + exports.costMatrix = costMatrix; exports.patch = patch; exports.diff = diff; exports.fastLerp = fastLerp;