function patch(edits, s) {
/** Apply the list of edits to s */
+ var edit;
var i;
- for (i = 0; i < edits.length; ++i) {
- var edit = edits[i];
- switch (edit[0]) {
- case "sub":
- s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1] + 1);
- break;
- case "ins":
- s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1]);
- break;
- case "del":
- s = s.slice(0, edit[1]) + s.slice(edit[1] + 1);
- break;
+
+ if (Array.isArray(s)) {
+ for (i = 0; i < edits.length; ++i) {
+ edit = edits[i];
+ switch (edit[0]) {
+ case "sub":
+ s[edit[1]] = edit[2];
+ break;
+ case "ins":
+ s.splice(edit[1], 0, edit[2]);
+ break;
+ case "del":
+ s.splice(edit[1], 1);
+ break;
+ }
+ }
+ } else {
+ for (i = 0; i < edits.length; ++i) {
+ edit = edits[i];
+ switch (edit[0]) {
+ case "sub":
+ s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1] + 1);
+ break;
+ case "ins":
+ s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1]);
+ break;
+ case "del":
+ s = s.slice(0, edit[1]) + s.slice(edit[1] + 1);
+ break;
+ }
}
}
return s;
}
+ var MULTI = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/;
+
+ var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g;
+
function diffLerp(a, b, p) {
/** Interpolate between two strings based on edit distance
longer than a few hundred characters.
*/
+ // If given strings with astral codepoints or combining
+ // characters, split them into arrays of "glyphs" first,
+ // do the edit on the list of "glyphs", and rejoin them.
+ //
+ // This split is not perfect for all languages, but at least
+ // it won't create invalid surrogate pairs or orphaned
+ // combining characters.
+ if (a.match && a.match(MULTI) || b.match && b.match(MULTI)) {
+ var ca = a.match(GLYPH) || [];
+ var cb = b.match(GLYPH) || [];
+ return diffLerp(ca, cb, p).join("");
+ }
+
// The edit path works from the string end, forwards, because
// that's how Levenshtein edits work. To match LTR reading
// direction (and the behavior of fastLerp), swap the strings
front of one string with another. This approach is fast
but does not look good when the strings are similar.
*/
- var alen = Math.round(a.length * p);
- var blen = Math.round(b.length * p);
- return b.substring(0, blen) + a.substring(alen, a.length);
+ if (a.match(MULTI) || b.match(MULTI)) {
+ var ca = a.match(GLYPH) || [];
+ var cb = b.match(GLYPH) || [];
+ var calen = Math.round(ca.length * p);
+ var cblen = Math.round(cb.length * p);
+ var r = cb.slice(0, cblen);
+ r.push.apply(r, ca.slice(calen, ca.length));
+ return r.join("");
+ } else {
+ var alen = Math.round(a.length * p);
+ var blen = Math.round(b.length * p);
+ return b.substring(0, blen) + a.substring(alen, a.length);
+ }
}
function lerp(a, b, p) {
var JS = this.JS || require('jstest');
var m = require('../string-lerp');
+var HALF_POOS = /\uD83D\uD83D|\uDCA9\uDCA9|\uD83D$/;
+var UNUSUAL_Q = 'q\u0307\u0323';
+
JS.Test.describe('fast lerp', function () { with (this) {
var lerp = m.fastLerp;
var A = "Do you like green eggs and ham?";
assertEqual("I do not like tn eggs and ham?", lerp(A, B, 0.5));
assertEqual("I do not like them, Sam-I-am?", lerp(A, B, 0.98));
}});
+
+ it("doesn't make half a poo", function () { with (this) {
+ var poos = "\uD83D\uDCA9\uD83D\uDCA9\uD83D\uDCA9\uD83D\uDCA9";
+ assertEqual("\uD83D\uDCA9\uD83D\uDCA9", lerp("", poos, 0.5));
+ assertEqual("\uD83D\uDCA9", lerp("", poos, 0.35));
+
+ for (var i = 0; i <= 1; i += 1/256)
+ assertNot(lerp("", poos, i).match(HALF_POOS));
+ }});
+
+ it("doesn't misplace combining marks", function () { with (this) {
+ for (var i = 0; i <= 1; i += 1/256) {
+ var r = lerp("a", UNUSUAL_Q, i);
+ assert(r === "a" || r === UNUSUAL_Q);
+ }
+ }});
}});
JS.Test.describe('diff lerp', function () { with (this) {
assertEqual("I do not like them, Sam-Iham?", lerp(A, B, 0.9));
assertEqual(B, lerp(A, B, 0.99));
}});
+
+ it("doesn't make half a poo", function () { with (this) {
+ var poos = "\uD83D\uDCA9\uD83D\uDCA9\uD83D\uDCA9\uD83D\uDCA9";
+ assertEqual("\uD83D\uDCA9\uD83D\uDCA9", lerp("", poos, 0.5));
+ assertEqual("\uD83D\uDCA9", lerp("", poos, 0.35));
+
+ for (var i = 0; i <= 1; i += 1/256)
+ assertNot(lerp("", poos, i).match(HALF_POOS));
+ }});
+
+ it("doesn't misplace combining marks", function () { with (this) {
+ for (var i = 0; i <= 1; i += 1/256) {
+ var r = lerp("a", UNUSUAL_Q, i);
+ assert(r === "a" || r === UNUSUAL_Q);
+ }
+ }});
}});
JS.Test.describe('numeric lerp', function () { with (this) {