X-Git-Url: https://git.yukkurigames.com/?p=string-lerp.git;a=blobdiff_plain;f=string-lerp.js;fp=string-lerp.js;h=6e0b109c821efdea53551a232cd600987c96de35;hp=40d9fd0c30655daaaac0e83a5fb648ae267cd8c1;hb=f037d43269bdb28acf28500ce24d2bef1e011a75;hpb=245005bdfa792a76d55b0fafd2255c4c8325d28c diff --git a/string-lerp.js b/string-lerp.js index 40d9fd0..6e0b109 100644 --- a/string-lerp.js +++ b/string-lerp.js @@ -3,10 +3,10 @@ var MAX_MATRIX_SIZE = 256 * 256; - function levenshteinMatrix(s, t) { + function levenshteinMatrix(s, t, ins, del, sub) { /** Calculate the Levenshtein edit distance matrix for two strings - The matrix is returned as a flat unsigned typed array. + The matrix is returned as a flat typed array. Following http://en.wikipedia.org/wiki/Levenshtein_distance */ @@ -23,9 +23,9 @@ if (s[i - 1] === t[j - 1]) d[n * i + j] = d[n * (i - 1) + j - 1]; else - d[n * i + j] = 1 + Math.min(d[n * (i - 1) + j ], - d[n * i + j - 1], - d[n * (i - 1) + j - 1]); + d[n * i + j] = Math.min(del + d[n * (i - 1) + j ], + ins + d[n * i + j - 1], + sub + d[n * (i - 1) + j - 1]); return d; } @@ -56,7 +56,7 @@ function diff(s, t) { /** Create a diff between string s and t */ - return editPath(levenshteinMatrix(s, t), t); + return editPath(levenshteinMatrix(s, t, 2, 2, 3), t); } function patch(edits, s) { @@ -103,7 +103,7 @@ var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g; function diffLerp(a, b, p) { - /** Interpolate between two strings based on edit distance + /** Interpolate between two strings based on edit operations This interpolation algorithm applys a partial edit of one string into the other. This produces nice looking results, @@ -159,10 +159,10 @@ is clamped to an integer. For example, numericLerp("0.0", "100", 0.123) === "12.3" - because the "." in "0.0" is intepreted as a decimal point. - But numericLerp("0.", "100.", 0.123) === "12." because the - strings are interpreted as integers followed by a full - stop. + because the "." in "0.0" is interpreted as a decimal + point. But numericLerp("0.", "100.", 0.123) === "12." + because the strings are interpreted as integers followed + by a full stop. Calling this functions on strings that differ in more than numerals gives undefined results. @@ -185,6 +185,10 @@ front of one string with another. This approach is fast but does not look good when the strings are similar. */ + + // TODO: Consider fast-pathing this even more for very large + // strings, e.g. in the megabyte range. These are large enough + // that if (a.match(MULTI) || b.match(MULTI)) { var ca = a.match(GLYPH) || []; var cb = b.match(GLYPH) || [];