X-Git-Url: https://git.yukkurigames.com/?p=string-lerp.git;a=blobdiff_plain;f=string-lerp.js;fp=string-lerp.js;h=40d9fd0c30655daaaac0e83a5fb648ae267cd8c1;hp=3a304886374373c3471aaf530a92f2615ea5544d;hb=245005bdfa792a76d55b0fafd2255c4c8325d28c;hpb=dd1c5bcbe8357a8bc2b83fc711ee16d0c7c8eaac diff --git a/string-lerp.js b/string-lerp.js index 3a30488..40d9fd0 100644 --- a/string-lerp.js +++ b/string-lerp.js @@ -61,24 +61,47 @@ function patch(edits, s) { /** Apply the list of edits to s */ + var edit; var i; - for (i = 0; i < edits.length; ++i) { - var edit = edits[i]; - switch (edit[0]) { - case "sub": - s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1] + 1); - break; - case "ins": - s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1]); - break; - case "del": - s = s.slice(0, edit[1]) + s.slice(edit[1] + 1); - break; + + if (Array.isArray(s)) { + for (i = 0; i < edits.length; ++i) { + edit = edits[i]; + switch (edit[0]) { + case "sub": + s[edit[1]] = edit[2]; + break; + case "ins": + s.splice(edit[1], 0, edit[2]); + break; + case "del": + s.splice(edit[1], 1); + break; + } + } + } else { + for (i = 0; i < edits.length; ++i) { + edit = edits[i]; + switch (edit[0]) { + case "sub": + s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1] + 1); + break; + case "ins": + s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1]); + break; + case "del": + s = s.slice(0, edit[1]) + s.slice(edit[1] + 1); + break; + } } } return s; } + var MULTI = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/; + + var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g; + function diffLerp(a, b, p) { /** Interpolate between two strings based on edit distance @@ -89,6 +112,19 @@ longer than a few hundred characters. */ + // If given strings with astral codepoints or combining + // characters, split them into arrays of "glyphs" first, + // do the edit on the list of "glyphs", and rejoin them. + // + // This split is not perfect for all languages, but at least + // it won't create invalid surrogate pairs or orphaned + // combining characters. + if (a.match && a.match(MULTI) || b.match && b.match(MULTI)) { + var ca = a.match(GLYPH) || []; + var cb = b.match(GLYPH) || []; + return diffLerp(ca, cb, p).join(""); + } + // The edit path works from the string end, forwards, because // that's how Levenshtein edits work. To match LTR reading // direction (and the behavior of fastLerp), swap the strings @@ -149,9 +185,19 @@ front of one string with another. This approach is fast but does not look good when the strings are similar. */ - var alen = Math.round(a.length * p); - var blen = Math.round(b.length * p); - return b.substring(0, blen) + a.substring(alen, a.length); + if (a.match(MULTI) || b.match(MULTI)) { + var ca = a.match(GLYPH) || []; + var cb = b.match(GLYPH) || []; + var calen = Math.round(ca.length * p); + var cblen = Math.round(cb.length * p); + var r = cb.slice(0, cblen); + r.push.apply(r, ca.slice(calen, ca.length)); + return r.join(""); + } else { + var alen = Math.round(a.length * p); + var blen = Math.round(b.length * p); + return b.substring(0, blen) + a.substring(alen, a.length); + } } function lerp(a, b, p) {