bb799a4743e828658c7eef1c3d3d5df8716b981d
[string-lerp.git] / string-lerp.js
1 /* string-lerp - progressively turn one string into another
2 Copyright 2014 Joe Wreschnig
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8 */
9
10 /* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */
11
12 (function (exports) {
13 "use strict";
14
15 var MAX_MATRIX_SIZE = 256 * 256;
16
17 function costMatrix(source, target, ins, del, sub) {
18 /** Calculate the Levenshtein cost matrix for source and target
19
20 If source and target are strings, they cannot contain any
21 astral or combining codepoints. Such data must be passed
22 as arrays of strings with one element per glyph.
23
24 ins, del, and sub are the costs for insertion, deletion,
25 and substition respectively. Their default value is 1. If
26 only ins is passed, del and sub are set to the same cost.
27 If ins and del are passed, sub is set to the more
28 expensive of the two.
29
30 The matrix is returned as a flat typed array.
31
32 Following http://en.wikipedia.org/wiki/Levenshtein_distance
33 */
34 ins = ins === undefined ? 1 : (ins | 0);
35 del = (del | 0) || ins;
36 sub = (sub | 0) || Math.max(ins, del);
37 var m = source.length + 1;
38 var n = target.length + 1;
39 var d = new Uint32Array(m * n);
40 var i, j;
41 for (i = 1; i < m; ++i)
42 d[n * i] = i;
43 for (j = 1; j < n; ++j)
44 d[j] = j;
45 for (j = 1; j < n; ++j)
46 for (i = 1; i < m; ++i)
47 if (source[i - 1] === target[j - 1])
48 d[n * i + j] = d[n * (i - 1) + j - 1];
49 else
50 d[n * i + j] = Math.min(del + d[n * (i - 1) + j ],
51 ins + d[n * i + j - 1],
52 sub + d[n * (i - 1) + j - 1]);
53 return d;
54 }
55
56 // First, note that deletion is just substition with nothing, so
57 // any DEL operation can be replaced by a SUB. Second, the
58 // operation code *is* the necessary slice offset for applying the
59 // diff.
60 var INS = 0, SUB = 1;
61
62 function editPath(costs, target) {
63 /** Given a cost matrix and a target, create an edit list */
64 var path = [];
65 var j = target.length;
66 var n = j + 1;
67 var i = costs.length / n - 1;
68 while (i || j) {
69 var sub = (i && j) ? costs[n * (i - 1) + j - 1] : Infinity;
70 var del = i ? costs[n * (i - 1) + j] : Infinity;
71 var ins = j ? costs[n * i + j - 1] : Infinity;
72 if (sub <= ins && sub <= del) {
73 if (costs[n * i + j] !== costs[n * (i - 1) + j - 1])
74 path.push([SUB, i - 1, target[j - 1]]);
75 --i; --j;
76 } else if (ins <= del) {
77 path.push([INS, i, target[j - 1]]);
78 --j;
79 } else {
80 path.push([SUB, i - 1, ""]);
81 --i;
82 }
83 }
84 return path;
85 }
86
87 function diff(source, target, ins, del, sub) {
88 /** Create a list of edits to turn source into target
89
90 ins, del, and sub are as passed to costMatrix.
91 */
92 return editPath(costMatrix(source, target, ins, del, sub), target);
93 }
94
95 function patchArray(diff, source) {
96 for (var i = 0; i < diff.length; ++i) {
97 var edit = diff[i];
98 source.splice(edit[1], edit[0], edit[2]);
99 }
100 return source;
101 }
102
103 function patchString(diff, source) {
104 for (var i = 0; i < diff.length; ++i) {
105 var edit = diff[i];
106 var head = source.slice(0, edit[1]);
107 var tail = source.slice(edit[1] + edit[0]);
108 source = head + edit[2] + tail;
109 }
110 return source;
111 }
112
113 function patch(diff, source) {
114 /** Apply a list of edits to source */
115 var patcher = Array.isArray(source) ? patchArray : patchString;
116 return patcher(diff, source);
117 }
118
119 var MULTI = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/;
120
121 var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g;
122
123 function diffLerpAstral(source, target, amount) {
124 // If given strings with astral codepoints or combining
125 // characters, split them into arrays of "glyphs" first,
126 // do the edit on the list of "glyphs", and rejoin them.
127 //
128 // This split is not perfect for all languages, but at least
129 // it won't create invalid surrogate pairs or orphaned
130 // combining characters.
131 var sourceGlyphs = source.match(GLYPH) || [];
132 var targetGlyphs = target.match(GLYPH) || [];
133 var edits = diff(targetGlyphs, sourceGlyphs, 2, 2, 3);
134 // The edit path works from the string end, forwards, because
135 // that's how Levenshtein edits work. To match LTR reading
136 // direction (and the behavior of fastLerp), swap the strings
137 // and invert the parameter when editing.
138 var partial = edits.slice(0, Math.round((1 - amount) * edits.length));
139 return patchArray(partial, targetGlyphs).join("");
140 }
141
142 function diffLerpBasic(source, target, amount) {
143 var edits = diff(target, source, 2, 2, 3);
144 // The edit path works from the string end, forwards, because
145 // that's how Levenshtein edits work. To match LTR reading
146 // direction (and the behavior of fastLerp), swap the strings
147 // and invert the parameter when editing.
148 var partial = edits.slice(0, Math.round((1 - amount) * edits.length));
149 return patchString(partial, target);
150 }
151
152 function diffLerp(source, target, amount) {
153 /** Interpolate between two strings using edit operations
154
155 This interpolation algorithm applys a partial edit of one
156 string into the other. This produces nice looking results,
157 but can take a significant amount of time and memory to
158 compute the edits. It is not recommended for strings
159 longer than a few hundred characters.
160 */
161
162 if (source.match(MULTI) || target.match(MULTI))
163 return diffLerpAstral(source, target, amount);
164 else
165 return diffLerpBasic(source, target, amount);
166 }
167
168 var NUMBERS = /(-?\d{1,20}(?:\.\d{1,20})?)/g;
169
170 function areNumericTwins(source, target) {
171 /** Check if a and b differ only in numerals */
172 return source.replace(NUMBERS, "0") === target.replace(NUMBERS, "0");
173 }
174
175 function nlerp(source, target, amount) {
176 return source + (target - source) * amount;
177 }
178
179 function numericLerp(source, target, amount) {
180 /** Interpolate numerically between strings containing numbers
181
182 Numbers may have a leading "-" and a single "." to mark
183 the decimal point, but something must be after the ".".
184 No other floating point syntax (e.g. 1e6) is supported.
185 They are treated as fixed-point values, with the point's
186 position itself interpolating.
187
188 For example, numericLerp("0.0", "100".0, 0.123) === "12.3"
189 because the "." in "0.0" is interpreted as a decimal
190 point. But numericLerp("0.", "100.", 0.123) === "12."
191 because the strings are interpreted as integers followed
192 by a full stop.
193
194 Calling this functions on strings that differ in more than
195 numerals gives undefined results.
196 */
197
198 var targetParts = target.split(NUMBERS);
199 var match;
200 var i = 1;
201 while ((match = NUMBERS.exec(source))) {
202 var sourcePart = match[0];
203 var targetPart = targetParts[i];
204 var part = nlerp(+sourcePart, +targetPart, amount);
205 var sourcePoint = sourcePart.indexOf(".");
206 var targetPoint = targetPart.indexOf(".");
207 var point = Math.round(nlerp(
208 sourcePoint >= 0 ? (sourcePart.length - 1) - sourcePoint : 0,
209 targetPoint >= 0 ? (targetPart.length - 1) - targetPoint : 0,
210 amount));
211 targetParts[i] = part.toFixed(point);
212 i += 2;
213 }
214 return targetParts.join("");
215 }
216
217 function fastLerpAstral(source, target, amount) {
218 var sourceGlyphs = source.match(GLYPH) || [];
219 var targetGlyphs = target.match(GLYPH) || [];
220 var sourceLength = Math.round(sourceGlyphs.length * amount);
221 var targetLength = Math.round(targetGlyphs.length * amount);
222 var head = targetGlyphs.slice(0, targetLength);
223 var tail = sourceGlyphs.slice(sourceLength, sourceGlyphs.length);
224 head.push.apply(head, tail);
225 return head.join("");
226 }
227
228 function fastLerpBasic(source, target, amount) {
229 var sourceLength = Math.round(source.length * amount);
230 var targetLength = Math.round(target.length * amount);
231 var head = target.substring(0, targetLength);
232 var tail = source.substring(sourceLength, source.length);
233 return head + tail;
234 }
235
236 function fastLerp(source, target, amount) {
237 /** Interpolate between two strings based on length
238
239 This interpolation algorithm progressively replaces the
240 front of one string with another. This approach is fast
241 but does not look good when the strings are similar.
242 */
243
244 // TODO: Consider fast-pathing this even more for very large
245 // strings, e.g. in the megabyte range. These are large enough
246 // that it should be fine to just pick a codepoint and search
247 // for the nearest glyph start.
248 if (source.match(MULTI) || target.match(MULTI))
249 return fastLerpAstral(source, target, amount);
250 else
251 return fastLerpBasic(source, target, amount);
252 }
253
254 function lerp(source, target, amount) {
255 /** Interpolate between two strings as best as possible
256
257 If the strings are identical aside from numbers in them,
258 they are passed through numericLerp.
259
260 If the strings are not numbers and short, they are passed
261 through diffLerp.
262
263 Otherwise, they are passed through fastLerp.
264 */
265 source = source.toString();
266 target = target.toString();
267
268 // Fast path for boundary cases.
269 if (amount === 0) return source;
270 if (amount === 1) return target;
271
272 if (areNumericTwins(source, target))
273 return numericLerp(source, target, amount);
274
275 // Numeric lerps should over- and under-shoot when fed numbers
276 // outside 0 to 1, but other types cannot.
277 if (amount < 0) return source;
278 if (amount > 1) return target;
279
280 var n = source.length * target.length;
281 var appropriate = (n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp;
282 return appropriate(source, target, amount);
283 }
284
285 exports.costMatrix = costMatrix;
286 exports.patch = patch;
287 exports.diff = diff;
288 exports.fastLerp = fastLerp;
289 exports.diffLerp = diffLerp;
290 exports.numericLerp = numericLerp;
291 exports.lerp = lerp;
292
293 })(typeof exports === "undefined" ? (this.stringLerp = {}) : exports);