1 /* string-lerp - progressively turn one string into another
2 Copyright 2014 Joe Wreschnig
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
10 /* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */
15 var MAX_MATRIX_SIZE
= 256 * 256;
17 function costMatrix(source
, target
, ins
, del
, sub
) {
18 /** Calculate the Levenshtein cost matrix for source and target
20 If source and target are strings, they cannot contain any
21 astral or combining codepoints. Such data must be passed
22 as arrays of strings with one element per glyph.
24 ins, del, and sub are the costs for insertion, deletion,
25 and substition respectively. Their default value is 1. If
26 only ins is passed, del and sub are set to the same cost.
27 If ins and del are passed, sub is set to the more
30 The matrix is returned as a flat typed array.
32 Following http://en.wikipedia.org/wiki/Levenshtein_distance
34 ins
= ins
=== undefined ? 1 : (ins
| 0);
35 del
= (del
| 0) || ins
;
36 sub
= (sub
| 0) || Math
.max(ins
, del
);
37 var m
= source
.length
+ 1;
38 var n
= target
.length
+ 1;
39 var d
= new Uint32Array(m
* n
);
41 for (i
= 1; i
< m
; ++i
)
43 for (j
= 1; j
< n
; ++j
)
45 for (j
= 1; j
< n
; ++j
)
46 for (i
= 1; i
< m
; ++i
)
47 if (source
[i
- 1] === target
[j
- 1])
48 d
[n
* i
+ j
] = d
[n
* (i
- 1) + j
- 1];
50 d
[n
* i
+ j
] = Math
.min(del
+ d
[n
* (i
- 1) + j
],
51 ins
+ d
[n
* i
+ j
- 1],
52 sub
+ d
[n
* (i
- 1) + j
- 1]);
56 // First, note that deletion is just substition with nothing, so
57 // any DEL operation can be replaced by a SUB. Second, the
58 // operation code *is* the necessary slice offset for applying the
62 function editPath(costs
, target
) {
63 /** Given a cost matrix and a target, create an edit list */
65 var j
= target
.length
;
67 var i
= costs
.length
/ n
- 1;
69 var sub
= (i
&& j
) ? costs
[n
* (i
- 1) + j
- 1] : Infinity
;
70 var del
= i
? costs
[n
* (i
- 1) + j
] : Infinity
;
71 var ins
= j
? costs
[n
* i
+ j
- 1] : Infinity
;
72 if (sub
<= ins
&& sub
<= del
) {
73 if (costs
[n
* i
+ j
] !== costs
[n
* (i
- 1) + j
- 1])
74 path
.push([SUB
, i
- 1, target
[j
- 1]]);
76 } else if (ins
<= del
) {
77 path
.push([INS
, i
, target
[j
- 1]]);
80 path
.push([SUB
, i
- 1, ""]);
87 function diff(source
, target
, ins
, del
, sub
) {
88 /** Create a list of edits to turn source into target
90 ins, del, and sub are as passed to costMatrix.
92 return editPath(costMatrix(source
, target
, ins
, del
, sub
), target
);
95 function patchArray(diff
, source
) {
96 for (var i
= 0; i
< diff
.length
; ++i
) {
98 source
.splice(edit
[1], edit
[0], edit
[2]);
103 function patchString(diff
, source
) {
104 for (var i
= 0; i
< diff
.length
; ++i
) {
106 var head
= source
.slice(0, edit
[1]);
107 var tail
= source
.slice(edit
[1] + edit
[0]);
108 source
= head
+ edit
[2] + tail
;
113 function patch(diff
, source
) {
114 /** Apply a list of edits to source */
115 var patcher
= Array
.isArray(source
) ? patchArray
: patchString
;
116 return patcher(diff
, source
);
119 var MULTI
= /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/;
121 var GLYPH
= /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g;
123 function diffLerpAstral(source
, target
, amount
) {
124 // If given strings with astral codepoints or combining
125 // characters, split them into arrays of "glyphs" first,
126 // do the edit on the list of "glyphs", and rejoin them.
128 // This split is not perfect for all languages, but at least
129 // it won't create invalid surrogate pairs or orphaned
130 // combining characters.
131 var sourceGlyphs
= source
.match(GLYPH
) || [];
132 var targetGlyphs
= target
.match(GLYPH
) || [];
133 var edits
= diff(targetGlyphs
, sourceGlyphs
, 2, 2, 3);
134 // The edit path works from the string end, forwards, because
135 // that's how Levenshtein edits work. To match LTR reading
136 // direction (and the behavior of fastLerp), swap the strings
137 // and invert the parameter when editing.
138 var partial
= edits
.slice(0, Math
.round((1 - amount
) * edits
.length
));
139 return patchArray(partial
, targetGlyphs
).join("");
142 function diffLerpBasic(source
, target
, amount
) {
143 var edits
= diff(target
, source
, 2, 2, 3);
144 // The edit path works from the string end, forwards, because
145 // that's how Levenshtein edits work. To match LTR reading
146 // direction (and the behavior of fastLerp), swap the strings
147 // and invert the parameter when editing.
148 var partial
= edits
.slice(0, Math
.round((1 - amount
) * edits
.length
));
149 return patchString(partial
, target
);
152 function diffLerp(source
, target
, amount
) {
153 /** Interpolate between two strings using edit operations
155 This interpolation algorithm applys a partial edit of one
156 string into the other. This produces nice looking results,
157 but can take a significant amount of time and memory to
158 compute the edits. It is not recommended for strings
159 longer than a few hundred characters.
162 if (source
.match(MULTI
) || target
.match(MULTI
))
163 return diffLerpAstral(source
, target
, amount
);
165 return diffLerpBasic(source
, target
, amount
);
168 var NUMBERS
= /(-?\d{1,20}(?:\.\d{1,20})?)/g;
170 function areNumericTwins(source
, target
) {
171 /** Check if a and b differ only in numerals */
172 return source
.replace(NUMBERS
, "0") === target
.replace(NUMBERS
, "0");
175 function nlerp(source
, target
, amount
) {
176 return source
+ (target
- source
) * amount
;
179 function numericLerp(source
, target
, amount
) {
180 /** Interpolate numerically between strings containing numbers
182 Numbers may have a leading "-" and a single "." to mark
183 the decimal point, but something must be after the ".".
184 No other floating point syntax (e.g. 1e6) is supported.
185 They are treated as fixed-point values, with the point's
186 position itself interpolating.
188 For example, numericLerp("0.0", "100".0, 0.123) === "12.3"
189 because the "." in "0.0" is interpreted as a decimal
190 point. But numericLerp("0.", "100.", 0.123) === "12."
191 because the strings are interpreted as integers followed
194 Calling this functions on strings that differ in more than
195 numerals gives undefined results.
198 var targetParts
= target
.split(NUMBERS
);
201 while ((match
= NUMBERS
.exec(source
))) {
202 var sourcePart
= match
[0];
203 var targetPart
= targetParts
[i
];
204 var part
= nlerp(+sourcePart
, +targetPart
, amount
);
205 var sourcePoint
= sourcePart
.indexOf(".");
206 var targetPoint
= targetPart
.indexOf(".");
207 var point
= Math
.round(nlerp(
208 sourcePoint
>= 0 ? (sourcePart
.length
- 1) - sourcePoint
: 0,
209 targetPoint
>= 0 ? (targetPart
.length
- 1) - targetPoint
: 0,
211 targetParts
[i
] = part
.toFixed(point
);
214 return targetParts
.join("");
217 function fastLerpAstral(source
, target
, amount
) {
218 var sourceGlyphs
= source
.match(GLYPH
) || [];
219 var targetGlyphs
= target
.match(GLYPH
) || [];
220 var sourceLength
= Math
.round(sourceGlyphs
.length
* amount
);
221 var targetLength
= Math
.round(targetGlyphs
.length
* amount
);
222 var head
= targetGlyphs
.slice(0, targetLength
);
223 var tail
= sourceGlyphs
.slice(sourceLength
, sourceGlyphs
.length
);
224 head
.push
.apply(head
, tail
);
225 return head
.join("");
228 function fastLerpBasic(source
, target
, amount
) {
229 var sourceLength
= Math
.round(source
.length
* amount
);
230 var targetLength
= Math
.round(target
.length
* amount
);
231 var head
= target
.substring(0, targetLength
);
232 var tail
= source
.substring(sourceLength
, source
.length
);
236 function fastLerp(source
, target
, amount
) {
237 /** Interpolate between two strings based on length
239 This interpolation algorithm progressively replaces the
240 front of one string with another. This approach is fast
241 but does not look good when the strings are similar.
244 // TODO: Consider fast-pathing this even more for very large
245 // strings, e.g. in the megabyte range. These are large enough
246 // that it should be fine to just pick a codepoint and search
247 // for the nearest glyph start.
248 if (source
.match(MULTI
) || target
.match(MULTI
))
249 return fastLerpAstral(source
, target
, amount
);
251 return fastLerpBasic(source
, target
, amount
);
254 function lerp(source
, target
, amount
) {
255 /** Interpolate between two strings as best as possible
257 If the strings are identical aside from numbers in them,
258 they are passed through numericLerp.
260 If the strings are not numbers and short, they are passed
263 Otherwise, they are passed through fastLerp.
265 source
= source
.toString();
266 target
= target
.toString();
268 // Fast path for boundary cases.
269 if (amount
=== 0) return source
;
270 if (amount
=== 1) return target
;
272 if (areNumericTwins(source
, target
))
273 return numericLerp(source
, target
, amount
);
275 // Numeric lerps should over- and under-shoot when fed numbers
276 // outside 0 to 1, but other types cannot.
277 if (amount
< 0) return source
;
278 if (amount
> 1) return target
;
280 var n
= source
.length
* target
.length
;
281 var appropriate
= (n
&& n
< MAX_MATRIX_SIZE
) ? diffLerp
: fastLerp
;
282 return appropriate(source
, target
, amount
);
285 exports
.costMatrix
= costMatrix
;
286 exports
.patch
= patch
;
288 exports
.fastLerp
= fastLerp
;
289 exports
.diffLerp
= diffLerp
;
290 exports
.numericLerp
= numericLerp
;
293 })(typeof exports
=== "undefined" ? (this.stringLerp
= {}) : exports
);