114bebd67c6ba4097f18ba25b0627a81b686a938
[string-lerp.git] / string-lerp.js
1 /* string-lerp - progressively turn one string into another
2 Copyright 2014 Joe Wreschnig
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8 */
9
10 /* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */
11
12 (function (exports) {
13 "use strict";
14
15 var MAX_MATRIX_SIZE = 256 * 256;
16
17 function costMatrix(source, target, ins, del, sub) {
18 /** Calculate the Levenshtein cost matrix for source and target
19
20 If source and target are strings, they cannot contain any
21 astral or combining codepoints. Such data must be passed
22 as arrays of strings with one element per glyph.
23
24 ins, del, and sub are the costs for insertion, deletion,
25 and substition respectively. Their default value is 1. If
26 only ins is passed, del and sub are set to the same cost.
27 If ins and del are passed, sub is set to the more
28 expensive of the two.
29
30 The matrix is returned as a flat typed array.
31
32 Following http://en.wikipedia.org/wiki/Levenshtein_distance
33 */
34 ins = ins === undefined ? 1 : (ins | 0);
35 del = (del | 0) || ins;
36 sub = (sub | 0) || Math.max(ins, del);
37 var m = source.length + 1;
38 var n = target.length + 1;
39 var d = new Uint32Array(m * n);
40 var i, j;
41 for (i = 1; i < m; ++i)
42 d[n * i] = i;
43 for (j = 1; j < n; ++j)
44 d[j] = j;
45 for (j = 1; j < n; ++j)
46 for (i = 1; i < m; ++i)
47 if (source[i - 1] === target[j - 1])
48 d[n * i + j] = d[n * (i - 1) + j - 1];
49 else
50 d[n * i + j] = Math.min(del + d[n * (i - 1) + j ],
51 ins + d[n * i + j - 1],
52 sub + d[n * (i - 1) + j - 1]);
53 return d;
54 }
55
56 // First, note that deletion is just substition with nothing, so
57 // any DEL operation can be replaced by a SUB. Second, the
58 // operation code *is* the necessary slice offset for applying the
59 // diff.
60 var INS = 0, SUB = 1;
61
62 function editPath(costs, target) {
63 /** Given a cost matrix and a target, create an edit list */
64 var path = [];
65 var j = target.length;
66 var n = j + 1;
67 var i = costs.length / n - 1;
68 while (i || j) {
69 var sub = (i && j) ? costs[n * (i - 1) + j - 1] : Infinity;
70 var del = i ? costs[n * (i - 1) + j] : Infinity;
71 var ins = j ? costs[n * i + j - 1] : Infinity;
72 if (sub <= ins && sub <= del) {
73 if (costs[n * i + j] !== costs[n * (i - 1) + j - 1])
74 path.push([SUB, i - 1, target[j - 1]]);
75 --i; --j;
76 } else if (ins <= del) {
77 path.push([INS, i, target[j - 1]]);
78 --j;
79 } else {
80 path.push([SUB, i - 1, ""]);
81 --i;
82 }
83 }
84 return path;
85 }
86
87 function diff(source, target, ins, del, sub) {
88 /** Create a list of edits to turn source into target
89
90 ins, del, and sub are as passed to costMatrix.
91 */
92 return editPath(costMatrix(source, target, ins, del, sub), target);
93 }
94
95 function patchArray(diff, source) {
96 for (var i = 0; i < diff.length; ++i) {
97 var edit = diff[i];
98 source.splice(edit[1], edit[0], edit[2]);
99 }
100 return source;
101 }
102
103 function patchString(diff, source) {
104 for (var i = 0; i < diff.length; ++i) {
105 var edit = diff[i];
106 var head = source.slice(0, edit[1]);
107 var tail = source.slice(edit[1] + edit[0]);
108 source = head + edit[2] + tail;
109 }
110 return source;
111 }
112
113 function patch(diff, source) {
114 /** Apply a list of edits to source */
115 var patcher = Array.isArray(source) ? patchArray : patchString;
116 return patcher(diff, source);
117 }
118
119 var MULTI = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/;
120
121 var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g;
122
123 function diffLerpAstral(source, target, amount) {
124 // If given strings with astral codepoints or combining
125 // characters, split them into arrays of "glyphs" first,
126 // do the edit on the list of "glyphs", and rejoin them.
127 //
128 // This split is not perfect for all languages, but at least
129 // it won't create invalid surrogate pairs or orphaned
130 // combining characters.
131 var sourceGlyphs = source.match(GLYPH) || [];
132 var targetGlyphs = target.match(GLYPH) || [];
133 var edits = diff(targetGlyphs, sourceGlyphs, 2, 2, 3);
134 // The edit path works from the string end, forwards, because
135 // that's how Levenshtein edits work. To match LTR reading
136 // direction (and the behavior of fastLerp), swap the strings
137 // and invert the parameter when editing.
138 var partial = edits.slice(0, Math.round((1 - amount) * edits.length));
139 return patchArray(partial, targetGlyphs).join("");
140 }
141
142 function diffLerpBasic(source, target, amount) {
143 var edits = diff(target, source, 2, 2, 3);
144 // The edit path works from the string end, forwards, because
145 // that's how Levenshtein edits work. To match LTR reading
146 // direction (and the behavior of fastLerp), swap the strings
147 // and invert the parameter when editing.
148 var partial = edits.slice(0, Math.round((1 - amount) * edits.length));
149 return patchString(partial, target);
150 }
151
152 function diffLerp(source, target, amount) {
153 /** Interpolate between two strings using edit operations
154
155 This interpolation algorithm applys a partial edit of one
156 string into the other. This produces nice looking results,
157 but can take a significant amount of time and memory to
158 compute the edits. It is not recommended for strings
159 longer than a few hundred characters.
160 */
161
162 if (source.match(MULTI) || target.match(MULTI))
163 return diffLerpAstral(source, target, amount);
164 else
165 return diffLerpBasic(source, target, amount);
166 }
167
168 var NUMBERS = /(-?\d{1,20}(?:\.\d{1,20})?)/g;
169
170 function areNumericTwins(source, target) {
171 /** Check if a and b differ only in numerals */
172 return source.replace(NUMBERS, "0") === target.replace(NUMBERS, "0");
173 }
174
175 function nlerp(source, target, amount) {
176 return source + (target - source) * amount;
177 }
178
179 function numericLerp(source, target, amount) {
180 /** Interpolate numerically between strings containing numbers
181
182 Numbers may have a leading "-" and a single "." to mark
183 the decimal point, but something must be after the ".".
184 No other floating point syntax (e.g. 1e6) is supported.
185 If both of the numbers in a pair are integers, the result
186 is clamped to an integer.
187
188 For example, numericLerp("0.0", "100", 0.123) === "12.3"
189 because the "." in "0.0" is interpreted as a decimal
190 point. But numericLerp("0.", "100.", 0.123) === "12."
191 because the strings are interpreted as integers followed
192 by a full stop.
193
194 Calling this functions on strings that differ in more than
195 numerals gives undefined results.
196 */
197
198 var sourceParts = source.split(NUMBERS);
199 var targetParts = target.split(NUMBERS);
200 var destParts = targetParts;
201 for (var i = 1; i < sourceParts.length; i += 2) {
202 var sourcePart = sourceParts[i];
203 var targetPart = targetParts[i];
204 var part = nlerp(+sourcePart, +targetPart, amount);
205 var sourcePoint = sourcePart.indexOf(".");
206 var targetPoint = targetPart.indexOf(".");
207 var point = Math.round(nlerp(
208 sourcePoint >= 0 ? (sourcePart.length - 1) - sourcePoint : 0,
209 targetPoint >= 0 ? (targetPart.length - 1) - targetPoint : 0,
210 amount));
211 targetParts[i] = part.toFixed(point);
212 }
213 return targetParts.join("");
214 }
215
216 function fastLerpAstral(source, target, amount) {
217 var sourceGlyphs = source.match(GLYPH) || [];
218 var targetGlyphs = target.match(GLYPH) || [];
219 var sourceLength = Math.round(sourceGlyphs.length * amount);
220 var targetLength = Math.round(targetGlyphs.length * amount);
221 var head = targetGlyphs.slice(0, targetLength);
222 var tail = sourceGlyphs.slice(sourceLength, sourceGlyphs.length);
223 head.push.apply(head, tail);
224 return head.join("");
225 }
226
227 function fastLerpBasic(source, target, amount) {
228 var sourceLength = Math.round(source.length * amount);
229 var targetLength = Math.round(target.length * amount);
230 var head = target.substring(0, targetLength);
231 var tail = source.substring(sourceLength, source.length);
232 return head + tail;
233 }
234
235 function fastLerp(source, target, amount) {
236 /** Interpolate between two strings based on length
237
238 This interpolation algorithm progressively replaces the
239 front of one string with another. This approach is fast
240 but does not look good when the strings are similar.
241 */
242
243 // TODO: Consider fast-pathing this even more for very large
244 // strings, e.g. in the megabyte range. These are large enough
245 // that it should be fine to just pick a codepoint and search
246 // for the nearest glyph start.
247 if (source.match(MULTI) || target.match(MULTI))
248 return fastLerpAstral(source, target, amount);
249 else
250 return fastLerpBasic(source, target, amount);
251 }
252
253 function lerp(source, target, amount) {
254 /** Interpolate between two strings as best as possible
255
256 If the strings are identical aside from numbers in them,
257 they are passed through numericLerp.
258
259 If the strings are not numbers and short, they are passed
260 through diffLerp.
261
262 Otherwise, they are passed through fastLerp.
263 */
264 source = source.toString();
265 target = target.toString();
266
267 // Fast path for boundary cases.
268 if (amount === 0) return source;
269 if (amount === 1) return target;
270
271 if (areNumericTwins(source, target))
272 return numericLerp(source, target, amount);
273
274 // Numeric lerps should over- and under-shoot when fed numbers
275 // outside 0 to 1, but other types cannot.
276 if (amount < 0) return source;
277 if (amount > 1) return target;
278
279 var n = source.length * target.length;
280 var appropriate = (n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp;
281 return appropriate(source, target, amount);
282 }
283
284 exports.costMatrix = costMatrix;
285 exports.patch = patch;
286 exports.diff = diff;
287 exports.fastLerp = fastLerp;
288 exports.diffLerp = diffLerp;
289 exports.numericLerp = numericLerp;
290 exports.lerp = lerp;
291
292 })(typeof exports === "undefined" ? (this.stringLerp = {}) : exports);