1 /* string-lerp - progressively turn one string into another
2 Copyright 2014 Joe Wreschnig
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
10 /* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */
11 /*globals exports, Uint32Array */
16 var MAX_MATRIX_SIZE
= 256 * 256;
18 function costMatrix(source
, target
, ins
, del
, sub
) {
19 /** Calculate the Levenshtein cost matrix for source and target
21 If source and target are strings, they cannot contain any
22 astral or combining codepoints. Such data must be passed
23 as arrays of strings with one element per glyph.
25 ins, del, and sub are the costs for insertion, deletion,
26 and substition respectively. Their default value is 1. If
27 only ins is passed, del and sub are set to the same cost.
28 If ins and del are passed, sub is set to the more
31 The matrix is returned as a flat typed array.
33 Following http://en.wikipedia.org/wiki/Levenshtein_distance
35 ins
= ins
=== undefined ? 1 : (ins
| 0);
36 del
= (del
| 0) || ins
;
37 sub
= (sub
| 0) || Math
.max(ins
, del
);
38 var m
= source
.length
+ 1;
39 var n
= target
.length
+ 1;
40 var d
= new Uint32Array(m
* n
);
42 for (i
= 1; i
< m
; ++i
)
44 for (j
= 1; j
< n
; ++j
)
46 for (j
= 1; j
< n
; ++j
)
47 for (i
= 1; i
< m
; ++i
)
48 if (source
[i
- 1] === target
[j
- 1])
49 d
[n
* i
+ j
] = d
[n
* (i
- 1) + j
- 1];
51 d
[n
* i
+ j
] = Math
.min(del
+ d
[n
* (i
- 1) + j
],
52 ins
+ d
[n
* i
+ j
- 1],
53 sub
+ d
[n
* (i
- 1) + j
- 1]);
57 // First, note that deletion is just substition with nothing, so
58 // any DEL operation can be replaced by a SUB. Second, the
59 // operation code *is* the necessary slice offset for applying the
63 function editPath(costs
, target
) {
64 /** Given a cost matrix and a target, create an edit list */
66 var j
= target
.length
;
68 var i
= costs
.length
/ n
- 1;
70 var sub
= (i
&& j
) ? costs
[n
* (i
- 1) + j
- 1] : Infinity
;
71 var del
= i
? costs
[n
* (i
- 1) + j
] : Infinity
;
72 var ins
= j
? costs
[n
* i
+ j
- 1] : Infinity
;
73 if (sub
<= ins
&& sub
<= del
) {
74 if (costs
[n
* i
+ j
] !== costs
[n
* (i
- 1) + j
- 1])
75 path
.push([SUB
, i
- 1, target
[j
- 1]]);
77 } else if (ins
<= del
) {
78 path
.push([INS
, i
, target
[j
- 1]]);
81 path
.push([SUB
, i
- 1, ""]);
88 function diff(source
, target
, ins
, del
, sub
) {
89 /** Create a list of edits to turn source into target
91 ins, del, and sub are as passed to costMatrix.
93 return editPath(costMatrix(source
, target
, ins
, del
, sub
), target
);
96 function patchArray(diff
, source
) {
97 for (var i
= 0; i
< diff
.length
; ++i
) {
99 source
.splice(edit
[1], edit
[0], edit
[2]);
104 function patchString(diff
, source
) {
105 for (var i
= 0; i
< diff
.length
; ++i
) {
107 var head
= source
.slice(0, edit
[1]);
108 var tail
= source
.slice(edit
[1] + edit
[0]);
109 source
= head
+ edit
[2] + tail
;
114 function patch(diff
, source
) {
115 /** Apply a list of edits to source */
116 var patcher
= Array
.isArray(source
) ? patchArray
: patchString
;
117 return patcher(diff
, source
);
120 // Matches if a string contains combining characters or astral
121 // codepoints (technically, the first half surrogate of an astral
123 var MULTI
= /[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uD800-\uDBFF\uFE20-\uFE2F]/;
125 // Match an entire (potentially astral) codepoint and any
126 // combining characters following it.
127 var GLYPH
= /[\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uD800-\uFE1F\uFE30-\uFFFF][\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uDC00-\uDFFF\uFE20-\uFE2F]*/g;
129 function diffLerpAstral(source
, target
, amount
) {
130 // This split is not perfect for all languages, but at least
131 // it won't create invalid surrogate pairs or orphaned
132 // combining characters.
133 var sourceGlyphs
= source
.match(GLYPH
) || [];
134 var targetGlyphs
= target
.match(GLYPH
) || [];
135 var edits
= diff(targetGlyphs
, sourceGlyphs
, 2, 2, 3);
136 // The edit path works from the string end, forwards, because
137 // that's how Levenshtein edits work. To match LTR reading
138 // direction (and the behavior of fastLerp), swap the strings
139 // and invert the parameter when editing.
140 var partial
= edits
.slice(0, Math
.round((1 - amount
) * edits
.length
));
141 return patchArray(partial
, targetGlyphs
).join("");
144 function diffLerpBasic(source
, target
, amount
) {
145 var edits
= diff(target
, source
, 2, 2, 3);
146 // The edit path works from the string end, forwards, because
147 // that's how Levenshtein edits work. To match LTR reading
148 // direction (and the behavior of fastLerp), swap the strings
149 // and invert the parameter when editing.
150 var partial
= edits
.slice(0, Math
.round((1 - amount
) * edits
.length
));
151 return patchString(partial
, target
);
154 function diffLerp(source
, target
, amount
) {
155 /** Interpolate between two strings using edit operations
157 This interpolation algorithm applys a partial edit of one
158 string into the other. This produces nice looking results,
159 but can take a significant amount of time and memory to
160 compute the edits. It is not recommended for strings
161 longer than a few hundred characters.
164 if (source
.match(MULTI
) || target
.match(MULTI
))
165 return diffLerpAstral(source
, target
, amount
);
167 return diffLerpBasic(source
, target
, amount
);
170 var NUMBERS
= /(-?\d{1,20}(?:\.\d{1,20})?)/g;
172 function areNumericTwins(source
, target
) {
173 /** Check if a and b differ only in numerals */
174 return source
.replace(NUMBERS
, "0") === target
.replace(NUMBERS
, "0");
177 function nlerp(source
, target
, amount
) {
178 return source
+ (target
- source
) * amount
;
181 function numericLerp(source
, target
, amount
) {
182 /** Interpolate numerically between strings containing numbers
184 Numbers may have a leading "-" and a single "." to mark
185 the decimal point, but something must be after the ".".
186 No other floating point syntax (e.g. 1e6) is supported.
187 They are treated as fixed-point values, with the point's
188 position itself interpolating.
190 For example, numericLerp("0.0", "100".0, 0.123) === "12.3"
191 because the "." in "0.0" is interpreted as a decimal
192 point. But numericLerp("0.", "100.", 0.123) === "12."
193 because the strings are interpreted as integers followed
196 Calling this functions on strings that differ in more than
197 numerals gives undefined results.
200 var targetParts
= target
.split(NUMBERS
);
203 while ((match
= NUMBERS
.exec(source
))) {
204 var sourcePart
= match
[0];
205 var targetPart
= targetParts
[i
];
206 var part
= nlerp(+sourcePart
, +targetPart
, amount
);
207 var sourcePoint
= sourcePart
.indexOf(".");
208 var targetPoint
= targetPart
.indexOf(".");
209 var point
= Math
.round(nlerp(
210 sourcePoint
>= 0 ? (sourcePart
.length
- 1) - sourcePoint
: 0,
211 targetPoint
>= 0 ? (targetPart
.length
- 1) - targetPoint
: 0,
213 targetParts
[i
] = part
.toFixed(point
);
216 return targetParts
.join("");
219 function fastLerpAstral(source
, target
, amount
) {
220 var sourceGlyphs
= source
.match(GLYPH
) || [];
221 var targetGlyphs
= target
.match(GLYPH
) || [];
222 var sourceLength
= Math
.round(sourceGlyphs
.length
* amount
);
223 var targetLength
= Math
.round(targetGlyphs
.length
* amount
);
224 var head
= targetGlyphs
.slice(0, targetLength
);
225 var tail
= sourceGlyphs
.slice(sourceLength
, sourceGlyphs
.length
);
226 head
.push
.apply(head
, tail
);
227 return head
.join("");
230 function fastLerpBasic(source
, target
, amount
) {
231 var sourceLength
= Math
.round(source
.length
* amount
);
232 var targetLength
= Math
.round(target
.length
* amount
);
233 var head
= target
.substring(0, targetLength
);
234 var tail
= source
.substring(sourceLength
, source
.length
);
238 function fastLerp(source
, target
, amount
) {
239 /** Interpolate between two strings based on length
241 This interpolation algorithm progressively replaces the
242 front of one string with another. This approach is fast
243 but does not look good when the strings are similar.
246 // TODO: Consider fast-pathing this even more for very large
247 // strings, e.g. in the megabyte range. These are large enough
248 // that it should be fine to just pick a codepoint and search
249 // for the nearest glyph start.
250 if (source
.match(MULTI
) || target
.match(MULTI
))
251 return fastLerpAstral(source
, target
, amount
);
253 return fastLerpBasic(source
, target
, amount
);
256 function lerp(source
, target
, amount
) {
257 /** Interpolate between two strings as best as possible
259 If the strings are identical aside from numbers in them,
260 they are passed through numericLerp.
262 If the strings are not numbers and short, they are passed
265 Otherwise, they are passed through fastLerp.
267 source
= source
.toString();
268 target
= target
.toString();
270 // Fast path for boundary cases.
271 if (amount
=== 0) return source
;
272 if (amount
=== 1) return target
;
274 if (areNumericTwins(source
, target
))
275 return numericLerp(source
, target
, amount
);
277 // Numeric lerps should over- and under-shoot when fed numbers
278 // outside 0 to 1, but other types cannot.
279 if (amount
< 0) return source
;
280 if (amount
> 1) return target
;
282 var n
= source
.length
* target
.length
;
283 var appropriate
= (n
&& n
< MAX_MATRIX_SIZE
) ? diffLerp
: fastLerp
;
284 return appropriate(source
, target
, amount
);
287 exports
.costMatrix
= costMatrix
;
288 exports
.patch
= patch
;
290 exports
.fastLerp
= fastLerp
;
291 exports
.diffLerp
= diffLerp
;
292 exports
.numericLerp
= numericLerp
;
295 })(typeof exports
=== "undefined" ? (this.stringLerp
= {}) : exports
);