Add documentation.
[string-lerp.git] / string-lerp.js
1 (function (exports) {
2 "use strict";
3
4 var MAX_MATRIX_SIZE = 256 * 256;
5
6 function levenshteinMatrix(s, t, ins, del, sub) {
7 /** Calculate the Levenshtein edit distance matrix for two strings
8
9 The matrix is returned as a flat typed array.
10
11 Following http://en.wikipedia.org/wiki/Levenshtein_distance
12 */
13 var m = s.length + 1;
14 var n = t.length + 1;
15 var d = new Uint32Array(m * n);
16 var i, j;
17 for (i = 1; i < m; ++i)
18 d[n * i] = i;
19 for (j = 1; j < n; ++j)
20 d[j] = j;
21 for (j = 1; j < n; ++j)
22 for (i = 1; i < m; ++i)
23 if (s[i - 1] === t[j - 1])
24 d[n * i + j] = d[n * (i - 1) + j - 1];
25 else
26 d[n * i + j] = Math.min(del + d[n * (i - 1) + j ],
27 ins + d[n * i + j - 1],
28 sub + d[n * (i - 1) + j - 1]);
29 return d;
30 }
31
32 function editPath(d, t) {
33 /** Given a Levenshtein matrix and target, create an edit list */
34 var path = [];
35 var j = t.length;
36 var n = j + 1;
37 var i = d.length / n - 1;
38 while (i || j) {
39 var sub = (i && j) ? d[n * (i - 1) + j - 1] : Infinity;
40 var del = i ? d[n * (i - 1) + j] : Infinity;
41 var ins = j ? d[n * i + j - 1] : Infinity;
42 if (sub <= ins && sub <= del) {
43 if (d[n * i + j] !== d[n * (i - 1) + j - 1])
44 path.push(["sub", i - 1, t[j - 1]]);
45 --i; --j;
46 } else if (ins <= del) {
47 path.push(["ins", i, t[j - 1]]);
48 --j;
49 } else {
50 path.push(["del", i - 1]);
51 --i;
52 }
53 }
54 return path;
55 }
56
57 function diff(s, t) {
58 /** Create a diff between string s and t */
59 return editPath(levenshteinMatrix(s, t, 2, 2, 3), t);
60 }
61
62 function patch(edits, s) {
63 /** Apply the list of edits to s */
64 var edit;
65 var i;
66
67 if (Array.isArray(s)) {
68 for (i = 0; i < edits.length; ++i) {
69 edit = edits[i];
70 switch (edit[0]) {
71 case "sub":
72 s[edit[1]] = edit[2];
73 break;
74 case "ins":
75 s.splice(edit[1], 0, edit[2]);
76 break;
77 case "del":
78 s.splice(edit[1], 1);
79 break;
80 }
81 }
82 } else {
83 for (i = 0; i < edits.length; ++i) {
84 edit = edits[i];
85 switch (edit[0]) {
86 case "sub":
87 s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1] + 1);
88 break;
89 case "ins":
90 s = s.slice(0, edit[1]) + edit[2] + s.slice(edit[1]);
91 break;
92 case "del":
93 s = s.slice(0, edit[1]) + s.slice(edit[1] + 1);
94 break;
95 }
96 }
97 }
98 return s;
99 }
100
101 var MULTI = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/;
102
103 var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g;
104
105 function diffLerp(a, b, p) {
106 /** Interpolate between two strings based on edit operations
107
108 This interpolation algorithm applys a partial edit of one
109 string into the other. This produces nice looking results,
110 but can take a significant amount of time and memory to
111 compute the edits. It is not recommended for strings
112 longer than a few hundred characters.
113 */
114
115 // If given strings with astral codepoints or combining
116 // characters, split them into arrays of "glyphs" first,
117 // do the edit on the list of "glyphs", and rejoin them.
118 //
119 // This split is not perfect for all languages, but at least
120 // it won't create invalid surrogate pairs or orphaned
121 // combining characters.
122 if (a.match && a.match(MULTI) || b.match && b.match(MULTI)) {
123 var ca = a.match(GLYPH) || [];
124 var cb = b.match(GLYPH) || [];
125 return diffLerp(ca, cb, p).join("");
126 }
127
128 // The edit path works from the string end, forwards, because
129 // that's how Levenshtein edits work. To match LTR reading
130 // direction (and the behavior of fastLerp), swap the strings
131 // and invert the parameter when editing.
132 var edits = diff(b, a);
133 var partial = edits.slice(0, Math.round((1 - p) * edits.length));
134 return patch(partial, b);
135 }
136
137 var NUMBERS = /(-?\d+(?:\.\d+)?)/g;
138
139 function areNumericTwins(a, b) {
140 /** Check if a and b differ only in numerals
141
142 A leading "-" counts as part of numbers; a leading "+"
143 does not. Numbers may contain a single ".", but no other
144 floating point syntax.
145 */
146 return a.replace(NUMBERS, "0") === b.replace(NUMBERS, "0");
147 }
148
149 function nlerp(a, b, p) {
150 return a + (b - a) * p;
151 }
152
153 function numericLerp(a, b, p) {
154 /** Interpolate numerically between two strings containing numbers
155
156 Numbers may have a leading "-" and a single "." to mark
157 the decimal point, but something must be after the ".".
158 If both of the numbers in a pair are integers, the result
159 is clamped to an integer.
160
161 For example, numericLerp("0.0", "100", 0.123) === "12.3"
162 because the "." in "0.0" is interpreted as a decimal
163 point. But numericLerp("0.", "100.", 0.123) === "12."
164 because the strings are interpreted as integers followed
165 by a full stop.
166
167 Calling this functions on strings that differ in more than
168 numerals gives undefined results.
169 */
170 var aParts = a.split(NUMBERS);
171 var bParts = b.split(NUMBERS);
172 for (var i = 1; i < aParts.length; i += 2) {
173 var part = nlerp(+aParts[i], +bParts[i], p);
174 if (aParts[i].indexOf(".") === -1 && bParts[i].indexOf(".") === -1)
175 part = Math.round(part);
176 aParts[i] = part.toString();
177 }
178 return aParts.join("");
179 }
180
181 function fastLerp(a, b, p) {
182 /** Interpolate between two strings based on length
183
184 This interpolation algorithm progressively replaces the
185 front of one string with another. This approach is fast
186 but does not look good when the strings are similar.
187 */
188
189 // TODO: Consider fast-pathing this even more for very large
190 // strings, e.g. in the megabyte range. These are large enough
191 // that
192 if (a.match(MULTI) || b.match(MULTI)) {
193 var ca = a.match(GLYPH) || [];
194 var cb = b.match(GLYPH) || [];
195 var calen = Math.round(ca.length * p);
196 var cblen = Math.round(cb.length * p);
197 var r = cb.slice(0, cblen);
198 r.push.apply(r, ca.slice(calen, ca.length));
199 return r.join("");
200 } else {
201 var alen = Math.round(a.length * p);
202 var blen = Math.round(b.length * p);
203 return b.substring(0, blen) + a.substring(alen, a.length);
204 }
205 }
206
207 function lerp(a, b, p) {
208 /** Interpolate between two strings as best as possible
209
210 If the strings are identical aside from numbers in them,
211 they are passed through numericLerp.
212
213 If the strings are not numbers and short, they are passed
214 through diffLerp.
215
216 Otherwise, they are passed through fastLerp.
217 */
218 a = a.toString();
219 b = b.toString();
220
221 // Fast path for boundary cases.
222 if (p === 0) return a;
223 if (p === 1) return b;
224
225 if (areNumericTwins(a, b))
226 return numericLerp(a, b, p);
227
228 // Numeric lerps should over- and under-shoot when fed numbers
229 // outside 0 to 1, but other types cannot.
230 if (p < 0) return a;
231 if (p > 1) return b;
232
233 var n = a.length * b.length;
234 return ((n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp)(a, b, p);
235 }
236
237 exports.levenshteinMatrix = levenshteinMatrix;
238 exports.patch = patch;
239 exports.diff = diff;
240 exports.fastLerp = fastLerp;
241 exports.diffLerp = diffLerp;
242 exports.numericLerp = numericLerp;
243 exports.lerp = lerp;
244
245 })(typeof exports === "undefined" ? (this.stringLerp = {}) : exports);