Fix some busted comments and argument names.
[string-lerp.git] / string-lerp.js
1 /* string-lerp - progressively turn one string into another
2 Copyright 2014 Joe Wreschnig
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8 */
9
10 /* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */
11
12 (function (exports) {
13 "use strict";
14
15 var MAX_MATRIX_SIZE = 256 * 256;
16
17 function costMatrix(source, target, ins, del, sub) {
18 /** Calculate the Levenshtein cost matrix for source and target
19
20 If source and target are strings, they cannot contain any
21 astral or combining codepoints. Such data must be passed
22 as arrays of strings with one element per glyph.
23
24 ins, del, and sub are the costs for insertion, deletion,
25 and substition respectively. Their default value is 1. If
26 only ins is passed, del and sub are set to the same cost.
27 If ins and del are passed, sub is set to the more
28 expensive of the two.
29
30 The matrix is returned as a flat typed array.
31
32 Following http://en.wikipedia.org/wiki/Levenshtein_distance
33 */
34 ins = ins === undefined ? 1 : (ins | 0);
35 del = (del | 0) || ins;
36 sub = (sub | 0) || Math.max(ins, del);
37 var m = source.length + 1;
38 var n = target.length + 1;
39 var d = new Uint32Array(m * n);
40 var i, j;
41 for (i = 1; i < m; ++i)
42 d[n * i] = i;
43 for (j = 1; j < n; ++j)
44 d[j] = j;
45 for (j = 1; j < n; ++j)
46 for (i = 1; i < m; ++i)
47 if (source[i - 1] === target[j - 1])
48 d[n * i + j] = d[n * (i - 1) + j - 1];
49 else
50 d[n * i + j] = Math.min(del + d[n * (i - 1) + j ],
51 ins + d[n * i + j - 1],
52 sub + d[n * (i - 1) + j - 1]);
53 return d;
54 }
55
56 // First, note that deletion is just substition with nothing, so
57 // any DEL operation can be replaced by a SUB. Second, the
58 // operation code *is* the necessary slice offset for applying the
59 // diff.
60 var INS = 0, SUB = 1;
61
62 function editPath(costs, target) {
63 /** Given a cost matrix and a target, create an edit list */
64 var path = [];
65 var j = target.length;
66 var n = j + 1;
67 var i = costs.length / n - 1;
68 while (i || j) {
69 var sub = (i && j) ? costs[n * (i - 1) + j - 1] : Infinity;
70 var del = i ? costs[n * (i - 1) + j] : Infinity;
71 var ins = j ? costs[n * i + j - 1] : Infinity;
72 if (sub <= ins && sub <= del) {
73 if (costs[n * i + j] !== costs[n * (i - 1) + j - 1])
74 path.push([SUB, i - 1, target[j - 1]]);
75 --i; --j;
76 } else if (ins <= del) {
77 path.push([INS, i, target[j - 1]]);
78 --j;
79 } else {
80 path.push([SUB, i - 1, ""]);
81 --i;
82 }
83 }
84 return path;
85 }
86
87 function diff(source, target, ins, del, sub) {
88 /** Create a list of edits to turn source into target
89
90 ins, del, and sub are as passed to costMatrix.
91 */
92 return editPath(costMatrix(source, target, ins, del, sub), target);
93 }
94
95 function patch(diff, source) {
96 /** Apply a list of edits to source */
97 var edit;
98 var i;
99
100 if (Array.isArray(source)) {
101 for (i = 0; i < diff.length; ++i) {
102 edit = diff[i];
103 source.splice(edit[1], edit[0], edit[2]);
104 }
105 } else {
106 for (i = 0; i < diff.length; ++i) {
107 edit = diff[i];
108 var head = source.slice(0, edit[1]);
109 var tail = source.slice(edit[1] + edit[0]);
110 source = head + edit[2] + tail;
111 }
112 }
113 return source;
114 }
115
116 var MULTI = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/;
117
118 var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g;
119
120 function diffLerp(source, target, amount ) {
121 /** Interpolate between two strings using edit operations
122
123 This interpolation algorithm applys a partial edit of one
124 string into the other. This produces nice looking results,
125 but can take a significant amount of time and memory to
126 compute the edits. It is not recommended for strings
127 longer than a few hundred characters.
128 */
129
130 // If given strings with astral codepoints or combining
131 // characters, split them into arrays of "glyphs" first,
132 // do the edit on the list of "glyphs", and rejoin them.
133 //
134 // This split is not perfect for all languages, but at least
135 // it won't create invalid surrogate pairs or orphaned
136 // combining characters.
137 if (source.match && (source.match(MULTI) || target.match(MULTI))) {
138 var sourceGlyphs = source.match(GLYPH) || [];
139 var targetGlyphs = target.match(GLYPH) || [];
140 return diffLerp(sourceGlyphs, targetGlyphs, amount).join("");
141 }
142
143 // The edit path works from the string end, forwards, because
144 // that's how Levenshtein edits work. To match LTR reading
145 // direction (and the behavior of fastLerp), swap the strings
146 // and invert the parameter when editing.
147 var edits = diff(target, source, 2, 2, 3);
148 var partial = edits.slice(0, Math.round((1 - amount) * edits.length));
149 return patch(partial, target);
150 }
151
152 var NUMBERS = /(-?\d+(?:\.\d+)?)/g;
153
154 function areNumericTwins(source, target) {
155 /** Check if a and b differ only in numerals */
156 return source.replace(NUMBERS, "0") === target.replace(NUMBERS, "0");
157 }
158
159 function nlerp(source, target, amount) {
160 return source + (target - source) * amount;
161 }
162
163 function numericLerp(source, target, amount) {
164 /** Interpolate numerically between strings containing numbers
165
166 Numbers may have a leading "-" and a single "." to mark
167 the decimal point, but something must be after the ".".
168 No other floating point syntax (e.g. 1e6) is supported.
169 If both of the numbers in a pair are integers, the result
170 is clamped to an integer.
171
172 For example, numericLerp("0.0", "100", 0.123) === "12.3"
173 because the "." in "0.0" is interpreted as a decimal
174 point. But numericLerp("0.", "100.", 0.123) === "12."
175 because the strings are interpreted as integers followed
176 by a full stop.
177
178 Calling this functions on strings that differ in more than
179 numerals gives undefined results.
180 */
181
182 // TODO: Try to preserve precision of the original numbers.
183 var sourceParts = source.split(NUMBERS);
184 var targetParts = target.split(NUMBERS);
185 var destParts = targetParts;
186 for (var i = 1; i < sourceParts.length; i += 2) {
187 var sourcePart = sourceParts[i];
188 var targetPart = targetParts[i];
189 var part = nlerp(+sourcePart, +targetPart, amount);
190 var sourcePoint = sourcePart.indexOf(".");
191 var targetPoint = targetPart.indexOf(".");
192 if (sourcePoint === -1 && targetPoint === -1)
193 part = Math.round(part);
194 targetParts[i] = part.toString();
195 }
196 return targetParts.join("");
197 }
198
199 function fastLerp(a, b, p) {
200 /** Interpolate between two strings based on length
201
202 This interpolation algorithm progressively replaces the
203 front of one string with another. This approach is fast
204 but does not look good when the strings are similar.
205 */
206
207 // TODO: Consider fast-pathing this even more for very large
208 // strings, e.g. in the megabyte range. These are large enough
209 // that it should be fine to just pick a codepoint and search
210 // for the nearest glyph start.
211 if (a.match(MULTI) || b.match(MULTI)) {
212 var ca = a.match(GLYPH) || [];
213 var cb = b.match(GLYPH) || [];
214 var calen = Math.round(ca.length * p);
215 var cblen = Math.round(cb.length * p);
216 var r = cb.slice(0, cblen);
217 r.push.apply(r, ca.slice(calen, ca.length));
218 return r.join("");
219 } else {
220 var alen = Math.round(a.length * p);
221 var blen = Math.round(b.length * p);
222 return b.substring(0, blen) + a.substring(alen, a.length);
223 }
224 }
225
226 function lerp(a, b, p) {
227 /** Interpolate between two strings as best as possible
228
229 If the strings are identical aside from numbers in them,
230 they are passed through numericLerp.
231
232 If the strings are not numbers and short, they are passed
233 through diffLerp.
234
235 Otherwise, they are passed through fastLerp.
236 */
237 a = a.toString();
238 b = b.toString();
239
240 // Fast path for boundary cases.
241 if (p === 0) return a;
242 if (p === 1) return b;
243
244 if (areNumericTwins(a, b))
245 return numericLerp(a, b, p);
246
247 // Numeric lerps should over- and under-shoot when fed numbers
248 // outside 0 to 1, but other types cannot.
249 if (p < 0) return a;
250 if (p > 1) return b;
251
252 var n = a.length * b.length;
253 return ((n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp)(a, b, p);
254 }
255
256 exports.costMatrix = costMatrix;
257 exports.patch = patch;
258 exports.diff = diff;
259 exports.fastLerp = fastLerp;
260 exports.diffLerp = diffLerp;
261 exports.numericLerp = numericLerp;
262 exports.lerp = lerp;
263
264 })(typeof exports === "undefined" ? (this.stringLerp = {}) : exports);