6c240e44b61f9df46911224e1d51b6fd0aadca12
[string-lerp.git] / string-lerp.js
1 /* string-lerp - progressively turn one string into another
2 Copyright 2014 Joe Wreschnig
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8 */
9
10 /* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */
11
12 (function (exports) {
13 "use strict";
14
15 var MAX_MATRIX_SIZE = 256 * 256;
16
17 function costMatrix(source, target, ins, del, sub) {
18 /** Calculate the Levenshtein cost matrix for source and target
19
20 If source and target are strings, they cannot contain any
21 astral or combining codepoints. Such data must be passed
22 as arrays of strings with one element per glyph.
23
24 ins, del, and sub are the costs for insertion, deletion,
25 and substition respectively. Their default value is 1. If
26 only ins is passed, del and sub are set to the same cost.
27 If ins and del are passed, sub is set to the more
28 expensive of the two.
29
30 The matrix is returned as a flat typed array.
31
32 Following http://en.wikipedia.org/wiki/Levenshtein_distance
33 */
34 ins = ins === undefined ? 1 : (ins | 0);
35 del = (del | 0) || ins;
36 sub = (sub | 0) || Math.max(ins, del);
37 var m = source.length + 1;
38 var n = target.length + 1;
39 var d = new Uint32Array(m * n);
40 var i, j;
41 for (i = 1; i < m; ++i)
42 d[n * i] = i;
43 for (j = 1; j < n; ++j)
44 d[j] = j;
45 for (j = 1; j < n; ++j)
46 for (i = 1; i < m; ++i)
47 if (source[i - 1] === target[j - 1])
48 d[n * i + j] = d[n * (i - 1) + j - 1];
49 else
50 d[n * i + j] = Math.min(del + d[n * (i - 1) + j ],
51 ins + d[n * i + j - 1],
52 sub + d[n * (i - 1) + j - 1]);
53 return d;
54 }
55
56 // First, note that deletion is just substition with nothing, so
57 // any DEL operation can be replaced by a SUB. Second, the
58 // operation code *is* the necessary slice offset for applying the
59 // diff.
60 var INS = 0, SUB = 1;
61
62 function editPath(costs, target) {
63 /** Given a cost matrix and a target, create an edit list */
64 var path = [];
65 var j = target.length;
66 var n = j + 1;
67 var i = costs.length / n - 1;
68 while (i || j) {
69 var sub = (i && j) ? costs[n * (i - 1) + j - 1] : Infinity;
70 var del = i ? costs[n * (i - 1) + j] : Infinity;
71 var ins = j ? costs[n * i + j - 1] : Infinity;
72 if (sub <= ins && sub <= del) {
73 if (costs[n * i + j] !== costs[n * (i - 1) + j - 1])
74 path.push([SUB, i - 1, target[j - 1]]);
75 --i; --j;
76 } else if (ins <= del) {
77 path.push([INS, i, target[j - 1]]);
78 --j;
79 } else {
80 path.push([SUB, i - 1, ""]);
81 --i;
82 }
83 }
84 return path;
85 }
86
87 function diff(source, target, ins, del, sub) {
88 /** Create a list of edits to turn source into target
89
90 ins, del, and sub are as passed to costMatrix.
91 */
92 return editPath(costMatrix(source, target, ins, del, sub), target);
93 }
94
95 function patchArray(diff, source) {
96 for (var i = 0; i < diff.length; ++i) {
97 var edit = diff[i];
98 source.splice(edit[1], edit[0], edit[2]);
99 }
100 return source;
101 }
102
103 function patchString(diff, source) {
104 for (var i = 0; i < diff.length; ++i) {
105 var edit = diff[i];
106 var head = source.slice(0, edit[1]);
107 var tail = source.slice(edit[1] + edit[0]);
108 source = head + edit[2] + tail;
109 }
110 return source;
111 }
112
113 function patch(diff, source) {
114 /** Apply a list of edits to source */
115 var patcher = Array.isArray(source) ? patchArray : patchString;
116 return patcher(diff, source);
117 }
118
119 var MULTI = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/;
120
121 var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g;
122
123 function diffLerp(source, target, amount ) {
124 /** Interpolate between two strings using edit operations
125
126 This interpolation algorithm applys a partial edit of one
127 string into the other. This produces nice looking results,
128 but can take a significant amount of time and memory to
129 compute the edits. It is not recommended for strings
130 longer than a few hundred characters.
131 */
132
133 // If given strings with astral codepoints or combining
134 // characters, split them into arrays of "glyphs" first,
135 // do the edit on the list of "glyphs", and rejoin them.
136 //
137 // This split is not perfect for all languages, but at least
138 // it won't create invalid surrogate pairs or orphaned
139 // combining characters.
140 //
141 // TODO: The way this is called is a hack.
142 if (source.match && (source.match(MULTI) || target.match(MULTI))) {
143 var sourceGlyphs = source.match(GLYPH) || [];
144 var targetGlyphs = target.match(GLYPH) || [];
145 return diffLerp(sourceGlyphs, targetGlyphs, amount).join("");
146 }
147
148 // The edit path works from the string end, forwards, because
149 // that's how Levenshtein edits work. To match LTR reading
150 // direction (and the behavior of fastLerp), swap the strings
151 // and invert the parameter when editing.
152 var edits = diff(target, source, 2, 2, 3);
153 var partial = edits.slice(0, Math.round((1 - amount) * edits.length));
154 return patch(partial, target);
155 }
156
157 var NUMBERS = /(-?\d{1,20}(?:\.\d{1,20})?)/g;
158
159 function areNumericTwins(source, target) {
160 /** Check if a and b differ only in numerals */
161 return source.replace(NUMBERS, "0") === target.replace(NUMBERS, "0");
162 }
163
164 function nlerp(source, target, amount) {
165 return source + (target - source) * amount;
166 }
167
168 function numericLerp(source, target, amount) {
169 /** Interpolate numerically between strings containing numbers
170
171 Numbers may have a leading "-" and a single "." to mark
172 the decimal point, but something must be after the ".".
173 No other floating point syntax (e.g. 1e6) is supported.
174 If both of the numbers in a pair are integers, the result
175 is clamped to an integer.
176
177 For example, numericLerp("0.0", "100", 0.123) === "12.3"
178 because the "." in "0.0" is interpreted as a decimal
179 point. But numericLerp("0.", "100.", 0.123) === "12."
180 because the strings are interpreted as integers followed
181 by a full stop.
182
183 Calling this functions on strings that differ in more than
184 numerals gives undefined results.
185 */
186
187 var sourceParts = source.split(NUMBERS);
188 var targetParts = target.split(NUMBERS);
189 var destParts = targetParts;
190 for (var i = 1; i < sourceParts.length; i += 2) {
191 var sourcePart = sourceParts[i];
192 var targetPart = targetParts[i];
193 var part = nlerp(+sourcePart, +targetPart, amount);
194 var sourcePoint = sourcePart.indexOf(".");
195 var targetPoint = targetPart.indexOf(".");
196 var point = Math.round(nlerp(
197 sourcePoint >= 0 ? (sourcePart.length - 1) - sourcePoint : 0,
198 targetPoint >= 0 ? (targetPart.length - 1) - targetPoint : 0,
199 amount));
200 targetParts[i] = part.toFixed(point);
201 }
202 return targetParts.join("");
203 }
204
205 function fastLerpAstral(source, target, amount) {
206 var sourceGlyphs = source.match(GLYPH) || [];
207 var targetGlyphs = target.match(GLYPH) || [];
208 var sourceLength = Math.round(sourceGlyphs.length * amount);
209 var targetLength = Math.round(targetGlyphs.length * amount);
210 var head = targetGlyphs.slice(0, targetLength);
211 var tail = sourceGlyphs.slice(sourceLength, sourceGlyphs.length);
212 head.push.apply(head, tail);
213 return head.join("");
214 }
215
216 function fastLerpBasic(source, target, amount) {
217 var sourceLength = Math.round(source.length * amount);
218 var targetLength = Math.round(target.length * amount);
219 var head = target.substring(0, targetLength);
220 var tail = source.substring(sourceLength, source.length);
221 return head + tail;
222 }
223
224 function fastLerp(source, target, amount) {
225 /** Interpolate between two strings based on length
226
227 This interpolation algorithm progressively replaces the
228 front of one string with another. This approach is fast
229 but does not look good when the strings are similar.
230 */
231
232 // TODO: Consider fast-pathing this even more for very large
233 // strings, e.g. in the megabyte range. These are large enough
234 // that it should be fine to just pick a codepoint and search
235 // for the nearest glyph start.
236 if (source.match(MULTI) || target.match(MULTI))
237 return fastLerpAstral(source, target, amount);
238 else
239 return fastLerpBasic(source, target, amount);
240 }
241
242 function lerp(source, target, amount) {
243 /** Interpolate between two strings as best as possible
244
245 If the strings are identical aside from numbers in them,
246 they are passed through numericLerp.
247
248 If the strings are not numbers and short, they are passed
249 through diffLerp.
250
251 Otherwise, they are passed through fastLerp.
252 */
253 source = source.toString();
254 target = target.toString();
255
256 // Fast path for boundary cases.
257 if (amount === 0) return source;
258 if (amount === 1) return target;
259
260 if (areNumericTwins(source, target))
261 return numericLerp(source, target, amount);
262
263 // Numeric lerps should over- and under-shoot when fed numbers
264 // outside 0 to 1, but other types cannot.
265 if (amount < 0) return source;
266 if (amount > 1) return target;
267
268 var n = source.length * target.length;
269 var appropriate = (n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp;
270 return appropriate(source, target, amount);
271 }
272
273 exports.costMatrix = costMatrix;
274 exports.patch = patch;
275 exports.diff = diff;
276 exports.fastLerp = fastLerp;
277 exports.diffLerp = diffLerp;
278 exports.numericLerp = numericLerp;
279 exports.lerp = lerp;
280
281 })(typeof exports === "undefined" ? (this.stringLerp = {}) : exports);