1 /* string-lerp - progressively turn one string into another
2 Copyright 2014 Joe Wreschnig
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
10 /* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */
15 var MAX_MATRIX_SIZE
= 256 * 256;
17 function costMatrix(source
, target
, ins
, del
, sub
) {
18 /** Calculate the Levenshtein cost matrix for source and target
20 If source and target are strings, they cannot contain any
21 astral or combining codepoints. Such data must be passed
22 as arrays of strings with one element per glyph.
24 ins, del, and sub are the costs for insertion, deletion,
25 and substition respectively. Their default value is 1. If
26 only ins is passed, del and sub are set to the same cost.
27 If ins and del are passed, sub is set to the more
30 The matrix is returned as a flat typed array.
32 Following http://en.wikipedia.org/wiki/Levenshtein_distance
34 ins
= ins
=== undefined ? 1 : (ins
| 0);
35 del
= (del
| 0) || ins
;
36 sub
= (sub
| 0) || Math
.max(ins
, del
);
37 var m
= source
.length
+ 1;
38 var n
= target
.length
+ 1;
39 var d
= new Uint32Array(m
* n
);
41 for (i
= 1; i
< m
; ++i
)
43 for (j
= 1; j
< n
; ++j
)
45 for (j
= 1; j
< n
; ++j
)
46 for (i
= 1; i
< m
; ++i
)
47 if (source
[i
- 1] === target
[j
- 1])
48 d
[n
* i
+ j
] = d
[n
* (i
- 1) + j
- 1];
50 d
[n
* i
+ j
] = Math
.min(del
+ d
[n
* (i
- 1) + j
],
51 ins
+ d
[n
* i
+ j
- 1],
52 sub
+ d
[n
* (i
- 1) + j
- 1]);
56 // First, note that deletion is just substition with nothing, so
57 // any DEL operation can be replaced by a SUB. Second, the
58 // operation code *is* the necessary slice offset for applying the
62 function editPath(costs
, target
) {
63 /** Given a cost matrix and a target, create an edit list */
65 var j
= target
.length
;
67 var i
= costs
.length
/ n
- 1;
69 var sub
= (i
&& j
) ? costs
[n
* (i
- 1) + j
- 1] : Infinity
;
70 var del
= i
? costs
[n
* (i
- 1) + j
] : Infinity
;
71 var ins
= j
? costs
[n
* i
+ j
- 1] : Infinity
;
72 if (sub
<= ins
&& sub
<= del
) {
73 if (costs
[n
* i
+ j
] !== costs
[n
* (i
- 1) + j
- 1])
74 path
.push([SUB
, i
- 1, target
[j
- 1]]);
76 } else if (ins
<= del
) {
77 path
.push([INS
, i
, target
[j
- 1]]);
80 path
.push([SUB
, i
- 1, ""]);
87 function diff(source
, target
, ins
, del
, sub
) {
88 /** Create a list of edits to turn source into target
90 ins, del, and sub are as passed to costMatrix.
92 return editPath(costMatrix(source
, target
, ins
, del
, sub
), target
);
95 function patchArray(diff
, source
) {
96 for (var i
= 0; i
< diff
.length
; ++i
) {
98 source
.splice(edit
[1], edit
[0], edit
[2]);
103 function patchString(diff
, source
) {
104 for (var i
= 0; i
< diff
.length
; ++i
) {
106 var head
= source
.slice(0, edit
[1]);
107 var tail
= source
.slice(edit
[1] + edit
[0]);
108 source
= head
+ edit
[2] + tail
;
113 function patch(diff
, source
) {
114 /** Apply a list of edits to source */
115 var patcher
= Array
.isArray(source
) ? patchArray
: patchString
;
116 return patcher(diff
, source
);
119 // Matches if a string contains combining characters or astral
120 // codepoints (technically, the first half surrogate of an astral
122 var MULTI
= /[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uD800-\uDBFF\uFE20-\uFE2F]/;
124 // Match an entire (potentially astral) codepoint and any
125 // combining characters following it.
126 var GLYPH
= /[\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uD800-\uFE1F\uFE30-\uFFFF][\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uDC00-\uDFFF\uFE20-\uFE2F]*/g;
128 function diffLerpAstral(source
, target
, amount
) {
129 // This split is not perfect for all languages, but at least
130 // it won't create invalid surrogate pairs or orphaned
131 // combining characters.
132 var sourceGlyphs
= source
.match(GLYPH
) || [];
133 var targetGlyphs
= target
.match(GLYPH
) || [];
134 var edits
= diff(targetGlyphs
, sourceGlyphs
, 2, 2, 3);
135 // The edit path works from the string end, forwards, because
136 // that's how Levenshtein edits work. To match LTR reading
137 // direction (and the behavior of fastLerp), swap the strings
138 // and invert the parameter when editing.
139 var partial
= edits
.slice(0, Math
.round((1 - amount
) * edits
.length
));
140 return patchArray(partial
, targetGlyphs
).join("");
143 function diffLerpBasic(source
, target
, amount
) {
144 var edits
= diff(target
, source
, 2, 2, 3);
145 // The edit path works from the string end, forwards, because
146 // that's how Levenshtein edits work. To match LTR reading
147 // direction (and the behavior of fastLerp), swap the strings
148 // and invert the parameter when editing.
149 var partial
= edits
.slice(0, Math
.round((1 - amount
) * edits
.length
));
150 return patchString(partial
, target
);
153 function diffLerp(source
, target
, amount
) {
154 /** Interpolate between two strings using edit operations
156 This interpolation algorithm applys a partial edit of one
157 string into the other. This produces nice looking results,
158 but can take a significant amount of time and memory to
159 compute the edits. It is not recommended for strings
160 longer than a few hundred characters.
163 if (source
.match(MULTI
) || target
.match(MULTI
))
164 return diffLerpAstral(source
, target
, amount
);
166 return diffLerpBasic(source
, target
, amount
);
169 var NUMBERS
= /(-?\d{1,20}(?:\.\d{1,20})?)/g;
171 function areNumericTwins(source
, target
) {
172 /** Check if a and b differ only in numerals */
173 return source
.replace(NUMBERS
, "0") === target
.replace(NUMBERS
, "0");
176 function nlerp(source
, target
, amount
) {
177 return source
+ (target
- source
) * amount
;
180 function numericLerp(source
, target
, amount
) {
181 /** Interpolate numerically between strings containing numbers
183 Numbers may have a leading "-" and a single "." to mark
184 the decimal point, but something must be after the ".".
185 No other floating point syntax (e.g. 1e6) is supported.
186 They are treated as fixed-point values, with the point's
187 position itself interpolating.
189 For example, numericLerp("0.0", "100".0, 0.123) === "12.3"
190 because the "." in "0.0" is interpreted as a decimal
191 point. But numericLerp("0.", "100.", 0.123) === "12."
192 because the strings are interpreted as integers followed
195 Calling this functions on strings that differ in more than
196 numerals gives undefined results.
199 var targetParts
= target
.split(NUMBERS
);
202 while ((match
= NUMBERS
.exec(source
))) {
203 var sourcePart
= match
[0];
204 var targetPart
= targetParts
[i
];
205 var part
= nlerp(+sourcePart
, +targetPart
, amount
);
206 var sourcePoint
= sourcePart
.indexOf(".");
207 var targetPoint
= targetPart
.indexOf(".");
208 var point
= Math
.round(nlerp(
209 sourcePoint
>= 0 ? (sourcePart
.length
- 1) - sourcePoint
: 0,
210 targetPoint
>= 0 ? (targetPart
.length
- 1) - targetPoint
: 0,
212 targetParts
[i
] = part
.toFixed(point
);
215 return targetParts
.join("");
218 function fastLerpAstral(source
, target
, amount
) {
219 var sourceGlyphs
= source
.match(GLYPH
) || [];
220 var targetGlyphs
= target
.match(GLYPH
) || [];
221 var sourceLength
= Math
.round(sourceGlyphs
.length
* amount
);
222 var targetLength
= Math
.round(targetGlyphs
.length
* amount
);
223 var head
= targetGlyphs
.slice(0, targetLength
);
224 var tail
= sourceGlyphs
.slice(sourceLength
, sourceGlyphs
.length
);
225 head
.push
.apply(head
, tail
);
226 return head
.join("");
229 function fastLerpBasic(source
, target
, amount
) {
230 var sourceLength
= Math
.round(source
.length
* amount
);
231 var targetLength
= Math
.round(target
.length
* amount
);
232 var head
= target
.substring(0, targetLength
);
233 var tail
= source
.substring(sourceLength
, source
.length
);
237 function fastLerp(source
, target
, amount
) {
238 /** Interpolate between two strings based on length
240 This interpolation algorithm progressively replaces the
241 front of one string with another. This approach is fast
242 but does not look good when the strings are similar.
245 // TODO: Consider fast-pathing this even more for very large
246 // strings, e.g. in the megabyte range. These are large enough
247 // that it should be fine to just pick a codepoint and search
248 // for the nearest glyph start.
249 if (source
.match(MULTI
) || target
.match(MULTI
))
250 return fastLerpAstral(source
, target
, amount
);
252 return fastLerpBasic(source
, target
, amount
);
255 function lerp(source
, target
, amount
) {
256 /** Interpolate between two strings as best as possible
258 If the strings are identical aside from numbers in them,
259 they are passed through numericLerp.
261 If the strings are not numbers and short, they are passed
264 Otherwise, they are passed through fastLerp.
266 source
= source
.toString();
267 target
= target
.toString();
269 // Fast path for boundary cases.
270 if (amount
=== 0) return source
;
271 if (amount
=== 1) return target
;
273 if (areNumericTwins(source
, target
))
274 return numericLerp(source
, target
, amount
);
276 // Numeric lerps should over- and under-shoot when fed numbers
277 // outside 0 to 1, but other types cannot.
278 if (amount
< 0) return source
;
279 if (amount
> 1) return target
;
281 var n
= source
.length
* target
.length
;
282 var appropriate
= (n
&& n
< MAX_MATRIX_SIZE
) ? diffLerp
: fastLerp
;
283 return appropriate(source
, target
, amount
);
286 exports
.costMatrix
= costMatrix
;
287 exports
.patch
= patch
;
289 exports
.fastLerp
= fastLerp
;
290 exports
.diffLerp
= diffLerp
;
291 exports
.numericLerp
= numericLerp
;
294 })(typeof exports
=== "undefined" ? (this.stringLerp
= {}) : exports
);