X-Git-Url: https://git.yukkurigames.com/?p=string-lerp.git;a=blobdiff_plain;f=string-lerp.js;h=6e0b109c821efdea53551a232cd600987c96de35;hp=2096a07ab6892d47499b7ae705141a36040ff40d;hb=f037d43269bdb28acf28500ce24d2bef1e011a75;hpb=bdb8d784b8c1f08a2be6b67a148cb00619b512ec diff --git a/string-lerp.js b/string-lerp.js index 2096a07..6e0b109 100644 --- a/string-lerp.js +++ b/string-lerp.js @@ -3,10 +3,10 @@ var MAX_MATRIX_SIZE = 256 * 256; - function levenshteinMatrix(s, t) { + function levenshteinMatrix(s, t, ins, del, sub) { /** Calculate the Levenshtein edit distance matrix for two strings - The matrix is returned as a flat unsigned typed array. + The matrix is returned as a flat typed array. Following http://en.wikipedia.org/wiki/Levenshtein_distance */ @@ -23,9 +23,9 @@ if (s[i - 1] === t[j - 1]) d[n * i + j] = d[n * (i - 1) + j - 1]; else - d[n * i + j] = 1 + Math.min(d[n * (i - 1) + j ], - d[n * i + j - 1], - d[n * (i - 1) + j - 1]); + d[n * i + j] = Math.min(del + d[n * (i - 1) + j ], + ins + d[n * i + j - 1], + sub + d[n * (i - 1) + j - 1]); return d; } @@ -56,35 +56,54 @@ function diff(s, t) { /** Create a diff between string s and t */ - return editPath(levenshteinMatrix(s, t), t); + return editPath(levenshteinMatrix(s, t, 2, 2, 3), t); } function patch(edits, s) { /** Apply the list of edits to s */ + var edit; var i; - for (i = 0; i < edits.length; ++i) { - var edit = edits[i]; - switch (edit[0]) { - case "sub": - s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1] + 1); - break; - case "ins": - s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1]); - break; - case "del": - s = s.slice(0, edit[1]) + s.slice(edit[1] + 1); - break; + + if (Array.isArray(s)) { + for (i = 0; i < edits.length; ++i) { + edit = edits[i]; + switch (edit[0]) { + case "sub": + s[edit[1]] = edit[2]; + break; + case "ins": + s.splice(edit[1], 0, edit[2]); + break; + case "del": + s.splice(edit[1], 1); + break; + } + } + } else { + for (i = 0; i < edits.length; ++i) { + edit = edits[i]; + switch (edit[0]) { + case "sub": + s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1] + 1); + break; + case "ins": + s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1]); + break; + case "del": + s = s.slice(0, edit[1]) + s.slice(edit[1] + 1); + break; + } } } return s; } - function reverse (s) { - return s.split("").reverse().join(""); - } + var MULTI = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/; + + var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g; function diffLerp(a, b, p) { - /** Interpolate between two strings based on edit distance + /** Interpolate between two strings based on edit operations This interpolation algorithm applys a partial edit of one string into the other. This produces nice looking results, @@ -92,11 +111,27 @@ compute the edits. It is not recommended for strings longer than a few hundred characters. */ - a = reverse(a); - b = reverse(b); - var edits = diff(a, b); - var partial = edits.slice(0, Math.round(p * edits.length)); - return reverse(patch(partial, a)); + + // If given strings with astral codepoints or combining + // characters, split them into arrays of "glyphs" first, + // do the edit on the list of "glyphs", and rejoin them. + // + // This split is not perfect for all languages, but at least + // it won't create invalid surrogate pairs or orphaned + // combining characters. + if (a.match && a.match(MULTI) || b.match && b.match(MULTI)) { + var ca = a.match(GLYPH) || []; + var cb = b.match(GLYPH) || []; + return diffLerp(ca, cb, p).join(""); + } + + // The edit path works from the string end, forwards, because + // that's how Levenshtein edits work. To match LTR reading + // direction (and the behavior of fastLerp), swap the strings + // and invert the parameter when editing. + var edits = diff(b, a); + var partial = edits.slice(0, Math.round((1 - p) * edits.length)); + return patch(partial, b); } var NUMBERS = /(-?\d+(?:\.\d+)?)/g; @@ -124,10 +159,10 @@ is clamped to an integer. For example, numericLerp("0.0", "100", 0.123) === "12.3" - because the "." in "0.0" is intepreted as a decimal point. - But numericLerp("0.", "100.", 0.123) === "12." because the - strings are interpreted as integers followed by a full - stop. + because the "." in "0.0" is interpreted as a decimal + point. But numericLerp("0.", "100.", 0.123) === "12." + because the strings are interpreted as integers followed + by a full stop. Calling this functions on strings that differ in more than numerals gives undefined results. @@ -150,9 +185,23 @@ front of one string with another. This approach is fast but does not look good when the strings are similar. */ - var alen = Math.round(a.length * p); - var blen = Math.round(b.length * p); - return b.substring(0, blen) + a.substring(alen, a.length); + + // TODO: Consider fast-pathing this even more for very large + // strings, e.g. in the megabyte range. These are large enough + // that + if (a.match(MULTI) || b.match(MULTI)) { + var ca = a.match(GLYPH) || []; + var cb = b.match(GLYPH) || []; + var calen = Math.round(ca.length * p); + var cblen = Math.round(cb.length * p); + var r = cb.slice(0, cblen); + r.push.apply(r, ca.slice(calen, ca.length)); + return r.join(""); + } else { + var alen = Math.round(a.length * p); + var blen = Math.round(b.length * p); + return b.substring(0, blen) + a.substring(alen, a.length); + } } function lerp(a, b, p) {