Replace switch table with data-based slicing.
[string-lerp.git] / string-lerp.js
1 /* string-lerp - progressively turn one string into another
2 Copyright 2014 Joe Wreschnig
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8 */
9
10 /* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */
11
12 (function (exports) {
13 "use strict";
14
15 var MAX_MATRIX_SIZE = 256 * 256;
16
17 function costMatrix(source, target, ins, del, sub) {
18 /** Calculate the Levenshtein cost matrix for source and target
19
20 If source and target are strings, they cannot contain any
21 astral or combining codepoints. Such data must be passed
22 as arrays of strings with one element per glyph.
23
24 ins, del, and sub are the costs for insertion, deletion,
25 and substition respectively. Their default value is 1. If
26 only ins is passed, del and sub are set to the same cost.
27 If ins and del are passed, sub is set to the more
28 expensive of the two.
29
30 The matrix is returned as a flat typed array.
31
32 Following http://en.wikipedia.org/wiki/Levenshtein_distance
33 */
34 ins = ins === undefined ? 1 : (ins | 0);
35 del = (del | 0) || ins;
36 sub = (sub | 0) || Math.max(ins, del);
37 var m = source.length + 1;
38 var n = target.length + 1;
39 var d = new Uint32Array(m * n);
40 var i, j;
41 for (i = 1; i < m; ++i)
42 d[n * i] = i;
43 for (j = 1; j < n; ++j)
44 d[j] = j;
45 for (j = 1; j < n; ++j)
46 for (i = 1; i < m; ++i)
47 if (source[i - 1] === target[j - 1])
48 d[n * i + j] = d[n * (i - 1) + j - 1];
49 else
50 d[n * i + j] = Math.min(del + d[n * (i - 1) + j ],
51 ins + d[n * i + j - 1],
52 sub + d[n * (i - 1) + j - 1]);
53 return d;
54 }
55
56 // First, note that deletion is just substition with nothing, so
57 // any DEL operation can be replaced by a SUB. Second, the
58 // operation code *is* the necessary slice offset for applying the
59 // diff.
60 var INS = 0, SUB = 1;
61
62 function editPath(costs, target) {
63 /** Given a cost matrix and a target, create an edit list */
64 var path = [];
65 var j = target.length;
66 var n = j + 1;
67 var i = costs.length / n - 1;
68 while (i || j) {
69 var sub = (i && j) ? costs[n * (i - 1) + j - 1] : Infinity;
70 var del = i ? costs[n * (i - 1) + j] : Infinity;
71 var ins = j ? costs[n * i + j - 1] : Infinity;
72 if (sub <= ins && sub <= del) {
73 if (costs[n * i + j] !== costs[n * (i - 1) + j - 1])
74 path.push([SUB, i - 1, target[j - 1]]);
75 --i; --j;
76 } else if (ins <= del) {
77 path.push([INS, i, target[j - 1]]);
78 --j;
79 } else {
80 path.push([SUB, i - 1, ""]);
81 --i;
82 }
83 }
84 return path;
85 }
86
87 function diff(source, target, ins, del, sub) {
88 /** Create a diff between string source and target
89
90 ins, del, and sub are as passed to levenshtein
91 */
92 return editPath(costMatrix(source, target, ins, del, sub), target);
93 }
94
95 function patch(diff, source) {
96 /** Apply the list of edits to s */
97 var edit;
98 var i;
99
100 if (Array.isArray(source)) {
101 for (i = 0; i < diff.length; ++i) {
102 edit = diff[i];
103 source.splice(edit[1], edit[0], edit[2]);
104 }
105 } else {
106 for (i = 0; i < diff.length; ++i) {
107 edit = diff[i];
108 var head = source.slice(0, edit[1]);
109 var tail = source.slice(edit[1] + edit[0]);
110 source = head + edit[2] + tail;
111 }
112 }
113 return source;
114 }
115
116 var MULTI = /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/;
117
118 var GLYPH = /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g;
119
120 function diffLerp(a, b, p) {
121 /** Interpolate between two strings based on edit operations
122
123 This interpolation algorithm applys a partial edit of one
124 string into the other. This produces nice looking results,
125 but can take a significant amount of time and memory to
126 compute the edits. It is not recommended for strings
127 longer than a few hundred characters.
128 */
129
130 // If given strings with astral codepoints or combining
131 // characters, split them into arrays of "glyphs" first,
132 // do the edit on the list of "glyphs", and rejoin them.
133 //
134 // This split is not perfect for all languages, but at least
135 // it won't create invalid surrogate pairs or orphaned
136 // combining characters.
137 if (a.match && a.match(MULTI) || b.match && b.match(MULTI)) {
138 var ca = a.match(GLYPH) || [];
139 var cb = b.match(GLYPH) || [];
140 return diffLerp(ca, cb, p).join("");
141 }
142
143 // The edit path works from the string end, forwards, because
144 // that's how Levenshtein edits work. To match LTR reading
145 // direction (and the behavior of fastLerp), swap the strings
146 // and invert the parameter when editing.
147 var edits = diff(b, a, 2, 2, 3);
148 var partial = edits.slice(0, Math.round((1 - p) * edits.length));
149 return patch(partial, b);
150 }
151
152 var NUMBERS = /(-?\d+(?:\.\d+)?)/g;
153
154 function areNumericTwins(a, b) {
155 /** Check if a and b differ only in numerals
156
157 A leading "-" counts as part of numbers; a leading "+"
158 does not. Numbers may contain a single ".", but no other
159 floating point syntax.
160 */
161 return a.replace(NUMBERS, "0") === b.replace(NUMBERS, "0");
162 }
163
164 function nlerp(a, b, p) {
165 return a + (b - a) * p;
166 }
167
168 function numericLerp(a, b, p) {
169 /** Interpolate numerically between two strings containing numbers
170
171 Numbers may have a leading "-" and a single "." to mark
172 the decimal point, but something must be after the ".".
173 If both of the numbers in a pair are integers, the result
174 is clamped to an integer.
175
176 For example, numericLerp("0.0", "100", 0.123) === "12.3"
177 because the "." in "0.0" is interpreted as a decimal
178 point. But numericLerp("0.", "100.", 0.123) === "12."
179 because the strings are interpreted as integers followed
180 by a full stop.
181
182 Calling this functions on strings that differ in more than
183 numerals gives undefined results.
184 */
185 var aParts = a.split(NUMBERS);
186 var bParts = b.split(NUMBERS);
187 for (var i = 1; i < aParts.length; i += 2) {
188 var part = nlerp(+aParts[i], +bParts[i], p);
189 if (aParts[i].indexOf(".") === -1 && bParts[i].indexOf(".") === -1)
190 part = Math.round(part);
191 aParts[i] = part.toString();
192 }
193 return aParts.join("");
194 }
195
196 function fastLerp(a, b, p) {
197 /** Interpolate between two strings based on length
198
199 This interpolation algorithm progressively replaces the
200 front of one string with another. This approach is fast
201 but does not look good when the strings are similar.
202 */
203
204 // TODO: Consider fast-pathing this even more for very large
205 // strings, e.g. in the megabyte range. These are large enough
206 // that
207 if (a.match(MULTI) || b.match(MULTI)) {
208 var ca = a.match(GLYPH) || [];
209 var cb = b.match(GLYPH) || [];
210 var calen = Math.round(ca.length * p);
211 var cblen = Math.round(cb.length * p);
212 var r = cb.slice(0, cblen);
213 r.push.apply(r, ca.slice(calen, ca.length));
214 return r.join("");
215 } else {
216 var alen = Math.round(a.length * p);
217 var blen = Math.round(b.length * p);
218 return b.substring(0, blen) + a.substring(alen, a.length);
219 }
220 }
221
222 function lerp(a, b, p) {
223 /** Interpolate between two strings as best as possible
224
225 If the strings are identical aside from numbers in them,
226 they are passed through numericLerp.
227
228 If the strings are not numbers and short, they are passed
229 through diffLerp.
230
231 Otherwise, they are passed through fastLerp.
232 */
233 a = a.toString();
234 b = b.toString();
235
236 // Fast path for boundary cases.
237 if (p === 0) return a;
238 if (p === 1) return b;
239
240 if (areNumericTwins(a, b))
241 return numericLerp(a, b, p);
242
243 // Numeric lerps should over- and under-shoot when fed numbers
244 // outside 0 to 1, but other types cannot.
245 if (p < 0) return a;
246 if (p > 1) return b;
247
248 var n = a.length * b.length;
249 return ((n && n < MAX_MATRIX_SIZE) ? diffLerp : fastLerp)(a, b, p);
250 }
251
252 exports.costMatrix = costMatrix;
253 exports.patch = patch;
254 exports.diff = diff;
255 exports.fastLerp = fastLerp;
256 exports.diffLerp = diffLerp;
257 exports.numericLerp = numericLerp;
258 exports.lerp = lerp;
259
260 })(typeof exports === "undefined" ? (this.stringLerp = {}) : exports);