Simplify glyph regexes.
authorJoe Wreschnig <joe.wreschnig@gmail.com>
Mon, 19 May 2014 11:05:43 +0000 (13:05 +0200)
committerJoe Wreschnig <joe.wreschnig@gmail.com>
Mon, 19 May 2014 11:05:43 +0000 (13:05 +0200)
string-lerp.js

index bb799a4743e828658c7eef1c3d3d5df8716b981d..989ceec9d5d19261522b794eb86caf9d5a3e1e3f 100644 (file)
         return patcher(diff, source);
     }
 
-    var MULTI = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/;
+    // Matches if a string contains combining characters or astral
+    // codepoints (technically, the first half surrogate of an astral
+    // codepoint).
+    var MULTI = /[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uD800-\uDBFF\uFE20-\uFE2F]/;
 
-    var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g;
+    // Match an entire (potentially astral) codepoint and any
+    // combining characters following it.
+    var GLYPH = /[\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uD800-\uFE1F\uFE30-\uFFFF][\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uDC00-\uDFFF\uFE20-\uFE2F]*/g;
 
     function diffLerpAstral(source, target, amount) {
-        // If given strings with astral codepoints or combining
-        // characters, split them into arrays of "glyphs" first,
-        // do the edit on the list of "glyphs", and rejoin them.
-        //
         // This split is not perfect for all languages, but at least
         // it won't create invalid surrogate pairs or orphaned
         // combining characters.