989ceec9d5d19261522b794eb86caf9d5a3e1e3f
[string-lerp.git] / string-lerp.js
1 /* string-lerp - progressively turn one string into another
2 Copyright 2014 Joe Wreschnig
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8 */
9
10 /* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */
11
12 (function (exports) {
13 "use strict";
14
15 var MAX_MATRIX_SIZE = 256 * 256;
16
17 function costMatrix(source, target, ins, del, sub) {
18 /** Calculate the Levenshtein cost matrix for source and target
19
20 If source and target are strings, they cannot contain any
21 astral or combining codepoints. Such data must be passed
22 as arrays of strings with one element per glyph.
23
24 ins, del, and sub are the costs for insertion, deletion,
25 and substition respectively. Their default value is 1. If
26 only ins is passed, del and sub are set to the same cost.
27 If ins and del are passed, sub is set to the more
28 expensive of the two.
29
30 The matrix is returned as a flat typed array.
31
32 Following http://en.wikipedia.org/wiki/Levenshtein_distance
33 */
34 ins = ins === undefined ? 1 : (ins | 0);
35 del = (del | 0) || ins;
36 sub = (sub | 0) || Math.max(ins, del);
37 var m = source.length + 1;
38 var n = target.length + 1;
39 var d = new Uint32Array(m * n);
40 var i, j;
41 for (i = 1; i < m; ++i)
42 d[n * i] = i;
43 for (j = 1; j < n; ++j)
44 d[j] = j;
45 for (j = 1; j < n; ++j)
46 for (i = 1; i < m; ++i)
47 if (source[i - 1] === target[j - 1])
48 d[n * i + j] = d[n * (i - 1) + j - 1];
49 else
50 d[n * i + j] = Math.min(del + d[n * (i - 1) + j ],
51 ins + d[n * i + j - 1],
52 sub + d[n * (i - 1) + j - 1]);
53 return d;
54 }
55
56 // First, note that deletion is just substition with nothing, so
57 // any DEL operation can be replaced by a SUB. Second, the
58 // operation code *is* the necessary slice offset for applying the
59 // diff.
60 var INS = 0, SUB = 1;
61
62 function editPath(costs, target) {
63 /** Given a cost matrix and a target, create an edit list */
64 var path = [];
65 var j = target.length;
66 var n = j + 1;
67 var i = costs.length / n - 1;
68 while (i || j) {
69 var sub = (i && j) ? costs[n * (i - 1) + j - 1] : Infinity;
70 var del = i ? costs[n * (i - 1) + j] : Infinity;
71 var ins = j ? costs[n * i + j - 1] : Infinity;
72 if (sub <= ins && sub <= del) {
73 if (costs[n * i + j] !== costs[n * (i - 1) + j - 1])
74 path.push([SUB, i - 1, target[j - 1]]);
75 --i; --j;
76 } else if (ins <= del) {
77 path.push([INS, i, target[j - 1]]);
78 --j;
79 } else {
80 path.push([SUB, i - 1, ""]);
81 --i;
82 }
83 }
84 return path;
85 }
86
87 function diff(source, target, ins, del, sub) {
88 /** Create a list of edits to turn source into target
89
90 ins, del, and sub are as passed to costMatrix.
91 */
92 return editPath(costMatrix(source, target, ins, del, sub), target);
93 }
94
95 function patchArray(diff, source) {
96 for (var i = 0; i < diff.length; ++i) {
97 var edit = diff[i];
98 source.splice(edit[1], edit[0], edit[2]);
99 }
100 return source;
101 }
102
103 function patchString(diff, source) {
104 for (var i = 0; i < diff.length; ++i) {
105 var edit = diff[i];
106 var head = source.slice(0, edit[1]);
107 var tail = source.slice(edit[1] + edit[0]);
108 source = head + edit[2] + tail;
109 }
110 return source;
111 }
112
113 function patch(diff, source) {
114 /** Apply a list of edits to source */
115 var patcher = Array.isArray(source) ? patchArray : patchString;
116 return patcher(diff, source);
117 }
118
119 // Matches if a string contains combining characters or astral
120 // codepoints (technically, the first half surrogate of an astral
121 // codepoint).
122 var MULTI = /[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uD800-\uDBFF\uFE20-\uFE2F]/;
123
124 // Match an entire (potentially astral) codepoint and any
125 // combining characters following it.
126 var GLYPH = /[\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uD800-\uFE1F\uFE30-\uFFFF][\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uDC00-\uDFFF\uFE20-\uFE2F]*/g;
127
128 function diffLerpAstral(source, target, amount) {
129 // This split is not perfect for all languages, but at least
130 // it won't create invalid surrogate pairs or orphaned
131 // combining characters.
132 var sourceGlyphs = source.match(GLYPH) || [];
133 var targetGlyphs = target.match(GLYPH) || [];
134 var edits = diff(targetGlyphs, sourceGlyphs, 2, 2, 3);
135 // The edit path works from the string end, forwards, because
136 // that's how Levenshtein edits work. To match LTR reading
137 // direction (and the behavior of fastLerp), swap the strings
138 // and invert the parameter when editing.
139 var partial = edits.slice(0, Math.round((1 - amount) * edits.length));
140 return patchArray(partial, targetGlyphs).join("");
141 }
142
143 function diffLerpBasic(source, target, amount) {
144 var edits = diff(target, source, 2, 2, 3);
145 // The edit path works from the string end, forwards, because
146 // that's how Levenshtein edits work. To match LTR reading
147 // direction (and the behavior of fastLerp), swap the strings
148 // and invert the parameter when editing.
149 var partial = edits.slice(0, Math.round((1 - amount) * edits.length));
150 return patchString(partial, target);
151 }
152
153 function diffLerp(source, target, amount) {
154 /** Interpolate between two strings using edit operations
155
156 This interpolation algorithm applys a partial edit of one
157 string into the other. This produces nice looking results,
158 but can take a significant amount of time and memory to
159 compute the edits. It is not recommended for strings
160 longer than a few hundred characters.
161 */
162
163 if (source.match(MULTI) || target.match(MULTI))
164 return diffLerpAstral(source, target, amount);
165 else
166 return diffLerpBasic(source, target, amount);
167 }
168
169 var NUMBERS = /(-?\d{1,20}(?:\.\d{1,20})?)/g;
170
171 function areNumericTwins(source, target) {
172 /** Check if a and b differ only in numerals */
173 return source.replace(NUMBERS, "0") === target.replace(NUMBERS, "0");
174 }
175
176 function nlerp(source, target, amount) {
177 return source + (target - source) * amount;
178 }
179
180 function numericLerp(source, target, amount) {
181 /** Interpolate numerically between strings containing numbers
182
183 Numbers may have a leading "-" and a single "." to mark
184 the decimal point, but something must be after the ".".
185 No other floating point syntax (e.g. 1e6) is supported.
186 They are treated as fixed-point values, with the point's
187 position itself interpolating.
188
189 For example, numericLerp("0.0", "100".0, 0.123) === "12.3"
190 because the "." in "0.0" is interpreted as a decimal
191 point. But numericLerp("0.", "100.", 0.123) === "12."
192 because the strings are interpreted as integers followed
193 by a full stop.
194
195 Calling this functions on strings that differ in more than
196 numerals gives undefined results.
197 */
198
199 var targetParts = target.split(NUMBERS);
200 var match;
201 var i = 1;
202 while ((match = NUMBERS.exec(source))) {
203 var sourcePart = match[0];
204 var targetPart = targetParts[i];
205 var part = nlerp(+sourcePart, +targetPart, amount);
206 var sourcePoint = sourcePart.indexOf(".");
207 var targetPoint = targetPart.indexOf(".");
208 var point = Math.round(nlerp(
209 sourcePoint >= 0 ? (sourcePart.length - 1) - sourcePoint : 0,
210 targetPoint >= 0 ? (targetPart.length - 1) - targetPoint : 0,
211 amount));
212 targetParts[i] = part.toFixed(point);
213 i += 2;
214 }
215 return targetParts.join("");
216 }
217
218 function fastLerpAstral(source, target, amount) {
219 var sourceGlyphs = source.match(GLYPH) || [];
220 var targetGlyphs = target.match(GLYPH) || [];
221 var sourceLength = Math.round(sourceGlyphs.length * amount);
222 var targetLength = Math.round(targetGlyphs.length * amount);
223 var head = targetGlyphs.slice(0, targetLength);
224 var tail = sourceGlyphs.slice(sourceLength, sourceGlyphs.length);
225 head.push.apply(head, tail);
226 return head.join("");
227 }
228
229 function fastLerpBasic(source, target, amount) {
230 var sourceLength = Math.round(source.length * amount);
231 var targetLength = Math.round(target.length * amount);
232 var head = target.substring(0, targetLength);
233 var tail = source.substring(sourceLength, source.length);
234 return head + tail;
235 }
236
237 function fastLerp(source, target, amount) {
238 /** Interpolate between two strings based on length
239
240 This interpolation algorithm progressively replaces the
241 front of one string with another. This approach is fast
242 but does not look good when the strings are similar.
243 */
244
245 // TODO: Consider fast-pathing this even more for very large
246 // strings, e.g. in the megabyte range. These are large enough
247 // that it should be fine to just pick a codepoint and search
248 // for the nearest glyph start.
249 if (source.match(MULTI) || target.match(MULTI))
250 return fastLerpAstral(source, target, amount);
251 else
252 return fastLerpBasic(source, target, amount);
253 }
254
255 function lerp(source, target, amount) {
256 /** Interpolate between two strings as best as possible
257
258 If the strings are identical aside from numbers in them,
259 they are passed through numericLerp.
260
261 If the strings are not numbers and short, they are passed
262 through diffLerp.
263
264 Otherwise, they are passed through fastLerp.
265 */
266 source = source.toString();
267 target = target.toString();
268
269 // Fast path for boundary cases.
270 if (amount === 0) return source;
271 if (amount === 1) return target;
272
273 if (areNumericTwins(source, target))
274 return numericLerp(source, target, amount);
275
276 // Numeric lerps should over- and under-shoot when fed numbers
277 // outside 0 to 1, but other types cannot.
278 if (amount < 0) return source;
279 if (amount > 1) return target;
280
281 var n = source.length * target.length;
282 var appropriate = (n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp;
283 return appropriate(source, target, amount);
284 }
285
286 exports.costMatrix = costMatrix;
287 exports.patch = patch;
288 exports.diff = diff;
289 exports.fastLerp = fastLerp;
290 exports.diffLerp = diffLerp;
291 exports.numericLerp = numericLerp;
292 exports.lerp = lerp;
293
294 })(typeof exports === "undefined" ? (this.stringLerp = {}) : exports);