Custom jshint configuration.
[string-lerp.git] / string-lerp.js
1 /* string-lerp - progressively turn one string into another
2 Copyright 2014 Joe Wreschnig
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8 */
9
10 /* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */
11 /*globals exports, Uint32Array */
12
13 (function (exports) {
14 "use strict";
15
16 var MAX_MATRIX_SIZE = 256 * 256;
17
18 function costMatrix(source, target, ins, del, sub) {
19 /** Calculate the Levenshtein cost matrix for source and target
20
21 If source and target are strings, they cannot contain any
22 astral or combining codepoints. Such data must be passed
23 as arrays of strings with one element per glyph.
24
25 ins, del, and sub are the costs for insertion, deletion,
26 and substition respectively. Their default value is 1. If
27 only ins is passed, del and sub are set to the same cost.
28 If ins and del are passed, sub is set to the more
29 expensive of the two.
30
31 The matrix is returned as a flat typed array.
32
33 Following http://en.wikipedia.org/wiki/Levenshtein_distance
34 */
35 ins = ins === undefined ? 1 : (ins | 0);
36 del = (del | 0) || ins;
37 sub = (sub | 0) || Math.max(ins, del);
38 var m = source.length + 1;
39 var n = target.length + 1;
40 var d = new Uint32Array(m * n);
41 var i, j;
42 for (i = 1; i < m; ++i)
43 d[n * i] = i;
44 for (j = 1; j < n; ++j)
45 d[j] = j;
46 for (j = 1; j < n; ++j)
47 for (i = 1; i < m; ++i)
48 if (source[i - 1] === target[j - 1])
49 d[n * i + j] = d[n * (i - 1) + j - 1];
50 else
51 d[n * i + j] = Math.min(del + d[n * (i - 1) + j ],
52 ins + d[n * i + j - 1],
53 sub + d[n * (i - 1) + j - 1]);
54 return d;
55 }
56
57 // First, note that deletion is just substition with nothing, so
58 // any DEL operation can be replaced by a SUB. Second, the
59 // operation code *is* the necessary slice offset for applying the
60 // diff.
61 var INS = 0, SUB = 1;
62
63 function editPath(costs, target) {
64 /** Given a cost matrix and a target, create an edit list */
65 var path = [];
66 var j = target.length;
67 var n = j + 1;
68 var i = costs.length / n - 1;
69 while (i || j) {
70 var sub = (i && j) ? costs[n * (i - 1) + j - 1] : Infinity;
71 var del = i ? costs[n * (i - 1) + j] : Infinity;
72 var ins = j ? costs[n * i + j - 1] : Infinity;
73 if (sub <= ins && sub <= del) {
74 if (costs[n * i + j] !== costs[n * (i - 1) + j - 1])
75 path.push([SUB, i - 1, target[j - 1]]);
76 --i; --j;
77 } else if (ins <= del) {
78 path.push([INS, i, target[j - 1]]);
79 --j;
80 } else {
81 path.push([SUB, i - 1, ""]);
82 --i;
83 }
84 }
85 return path;
86 }
87
88 function diff(source, target, ins, del, sub) {
89 /** Create a list of edits to turn source into target
90
91 ins, del, and sub are as passed to costMatrix.
92 */
93 return editPath(costMatrix(source, target, ins, del, sub), target);
94 }
95
96 function patchArray(diff, source) {
97 for (var i = 0; i < diff.length; ++i) {
98 var edit = diff[i];
99 source.splice(edit[1], edit[0], edit[2]);
100 }
101 return source;
102 }
103
104 function patchString(diff, source) {
105 for (var i = 0; i < diff.length; ++i) {
106 var edit = diff[i];
107 var head = source.slice(0, edit[1]);
108 var tail = source.slice(edit[1] + edit[0]);
109 source = head + edit[2] + tail;
110 }
111 return source;
112 }
113
114 function patch(diff, source) {
115 /** Apply a list of edits to source */
116 var patcher = Array.isArray(source) ? patchArray : patchString;
117 return patcher(diff, source);
118 }
119
120 // Matches if a string contains combining characters or astral
121 // codepoints (technically, the first half surrogate of an astral
122 // codepoint).
123 var MULTI = /[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uD800-\uDBFF\uFE20-\uFE2F]/;
124
125 // Match an entire (potentially astral) codepoint and any
126 // combining characters following it.
127 var GLYPH = /[\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uD800-\uFE1F\uFE30-\uFFFF][\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uDC00-\uDFFF\uFE20-\uFE2F]*/g;
128
129 function diffLerpAstral(source, target, amount) {
130 // This split is not perfect for all languages, but at least
131 // it won't create invalid surrogate pairs or orphaned
132 // combining characters.
133 var sourceGlyphs = source.match(GLYPH) || [];
134 var targetGlyphs = target.match(GLYPH) || [];
135 var edits = diff(targetGlyphs, sourceGlyphs, 2, 2, 3);
136 // The edit path works from the string end, forwards, because
137 // that's how Levenshtein edits work. To match LTR reading
138 // direction (and the behavior of fastLerp), swap the strings
139 // and invert the parameter when editing.
140 var partial = edits.slice(0, Math.round((1 - amount) * edits.length));
141 return patchArray(partial, targetGlyphs).join("");
142 }
143
144 function diffLerpBasic(source, target, amount) {
145 var edits = diff(target, source, 2, 2, 3);
146 // The edit path works from the string end, forwards, because
147 // that's how Levenshtein edits work. To match LTR reading
148 // direction (and the behavior of fastLerp), swap the strings
149 // and invert the parameter when editing.
150 var partial = edits.slice(0, Math.round((1 - amount) * edits.length));
151 return patchString(partial, target);
152 }
153
154 function diffLerp(source, target, amount) {
155 /** Interpolate between two strings using edit operations
156
157 This interpolation algorithm applys a partial edit of one
158 string into the other. This produces nice looking results,
159 but can take a significant amount of time and memory to
160 compute the edits. It is not recommended for strings
161 longer than a few hundred characters.
162 */
163
164 if (source.match(MULTI) || target.match(MULTI))
165 return diffLerpAstral(source, target, amount);
166 else
167 return diffLerpBasic(source, target, amount);
168 }
169
170 var NUMBERS = /(-?\d{1,20}(?:\.\d{1,20})?)/g;
171
172 function areNumericTwins(source, target) {
173 /** Check if a and b differ only in numerals */
174 return source.replace(NUMBERS, "0") === target.replace(NUMBERS, "0");
175 }
176
177 function nlerp(source, target, amount) {
178 return source + (target - source) * amount;
179 }
180
181 function numericLerp(source, target, amount) {
182 /** Interpolate numerically between strings containing numbers
183
184 Numbers may have a leading "-" and a single "." to mark
185 the decimal point, but something must be after the ".".
186 No other floating point syntax (e.g. 1e6) is supported.
187 They are treated as fixed-point values, with the point's
188 position itself interpolating.
189
190 For example, numericLerp("0.0", "100".0, 0.123) === "12.3"
191 because the "." in "0.0" is interpreted as a decimal
192 point. But numericLerp("0.", "100.", 0.123) === "12."
193 because the strings are interpreted as integers followed
194 by a full stop.
195
196 Calling this functions on strings that differ in more than
197 numerals gives undefined results.
198 */
199
200 var targetParts = target.split(NUMBERS);
201 var match;
202 var i = 1;
203 while ((match = NUMBERS.exec(source))) {
204 var sourcePart = match[0];
205 var targetPart = targetParts[i];
206 var part = nlerp(+sourcePart, +targetPart, amount);
207 var sourcePoint = sourcePart.indexOf(".");
208 var targetPoint = targetPart.indexOf(".");
209 var point = Math.round(nlerp(
210 sourcePoint >= 0 ? (sourcePart.length - 1) - sourcePoint : 0,
211 targetPoint >= 0 ? (targetPart.length - 1) - targetPoint : 0,
212 amount));
213 targetParts[i] = part.toFixed(point);
214 i += 2;
215 }
216 return targetParts.join("");
217 }
218
219 function fastLerpAstral(source, target, amount) {
220 var sourceGlyphs = source.match(GLYPH) || [];
221 var targetGlyphs = target.match(GLYPH) || [];
222 var sourceLength = Math.round(sourceGlyphs.length * amount);
223 var targetLength = Math.round(targetGlyphs.length * amount);
224 var head = targetGlyphs.slice(0, targetLength);
225 var tail = sourceGlyphs.slice(sourceLength, sourceGlyphs.length);
226 head.push.apply(head, tail);
227 return head.join("");
228 }
229
230 function fastLerpBasic(source, target, amount) {
231 var sourceLength = Math.round(source.length * amount);
232 var targetLength = Math.round(target.length * amount);
233 var head = target.substring(0, targetLength);
234 var tail = source.substring(sourceLength, source.length);
235 return head + tail;
236 }
237
238 function fastLerp(source, target, amount) {
239 /** Interpolate between two strings based on length
240
241 This interpolation algorithm progressively replaces the
242 front of one string with another. This approach is fast
243 but does not look good when the strings are similar.
244 */
245
246 // TODO: Consider fast-pathing this even more for very large
247 // strings, e.g. in the megabyte range. These are large enough
248 // that it should be fine to just pick a codepoint and search
249 // for the nearest glyph start.
250 if (source.match(MULTI) || target.match(MULTI))
251 return fastLerpAstral(source, target, amount);
252 else
253 return fastLerpBasic(source, target, amount);
254 }
255
256 function lerp(source, target, amount) {
257 /** Interpolate between two strings as best as possible
258
259 If the strings are identical aside from numbers in them,
260 they are passed through numericLerp.
261
262 If the strings are not numbers and short, they are passed
263 through diffLerp.
264
265 Otherwise, they are passed through fastLerp.
266 */
267 source = source.toString();
268 target = target.toString();
269
270 // Fast path for boundary cases.
271 if (amount === 0) return source;
272 if (amount === 1) return target;
273
274 if (areNumericTwins(source, target))
275 return numericLerp(source, target, amount);
276
277 // Numeric lerps should over- and under-shoot when fed numbers
278 // outside 0 to 1, but other types cannot.
279 if (amount < 0) return source;
280 if (amount > 1) return target;
281
282 var n = source.length * target.length;
283 var appropriate = (n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp;
284 return appropriate(source, target, amount);
285 }
286
287 exports.costMatrix = costMatrix;
288 exports.patch = patch;
289 exports.diff = diff;
290 exports.fastLerp = fastLerp;
291 exports.diffLerp = diffLerp;
292 exports.numericLerp = numericLerp;
293 exports.lerp = lerp;
294
295 })(typeof exports === "undefined" ? (this.stringLerp = {}) : exports);