From 7ad63f72d70fb410e1364cdf99c0969f0865941e Mon Sep 17 00:00:00 2001 From: Joe Wreschnig Date: Mon, 19 May 2014 13:05:43 +0200 Subject: [PATCH 1/1] Simplify glyph regexes. --- string-lerp.js | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/string-lerp.js b/string-lerp.js index bb799a4..989ceec 100644 --- a/string-lerp.js +++ b/string-lerp.js @@ -116,15 +116,16 @@ return patcher(diff, source); } - var MULTI = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/; + // Matches if a string contains combining characters or astral + // codepoints (technically, the first half surrogate of an astral + // codepoint). + var MULTI = /[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uD800-\uDBFF\uFE20-\uFE2F]/; - var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g; + // Match an entire (potentially astral) codepoint and any + // combining characters following it. + var GLYPH = /[\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uD800-\uFE1F\uFE30-\uFFFF][\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uDC00-\uDFFF\uFE20-\uFE2F]*/g; function diffLerpAstral(source, target, amount) { - // If given strings with astral codepoints or combining - // characters, split them into arrays of "glyphs" first, - // do the edit on the list of "glyphs", and rejoin them. - // // This split is not perfect for all languages, but at least // it won't create invalid surrogate pairs or orphaned // combining characters. -- 2.20.1