1 /* string-lerp - progressively turn one string into another
2 Copyright 2014 Joe Wreschnig
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
10 /* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */
15 var MAX_MATRIX_SIZE
= 256 * 256;
17 function costMatrix(source
, target
, ins
, del
, sub
) {
18 /** Calculate the Levenshtein cost matrix for source and target
20 If source and target are strings, they cannot contain any
21 astral or combining codepoints. Such data must be passed
22 as arrays of strings with one element per glyph.
24 ins, del, and sub are the costs for insertion, deletion,
25 and substition respectively. Their default value is 1. If
26 only ins is passed, del and sub are set to the same cost.
27 If ins and del are passed, sub is set to the more
30 The matrix is returned as a flat typed array.
32 Following http://en.wikipedia.org/wiki/Levenshtein_distance
34 ins
= ins
=== undefined ? 1 : (ins
| 0);
35 del
= (del
| 0) || ins
;
36 sub
= (sub
| 0) || Math
.max(ins
, del
);
37 var m
= source
.length
+ 1;
38 var n
= target
.length
+ 1;
39 var d
= new Uint32Array(m
* n
);
41 for (i
= 1; i
< m
; ++i
)
43 for (j
= 1; j
< n
; ++j
)
45 for (j
= 1; j
< n
; ++j
)
46 for (i
= 1; i
< m
; ++i
)
47 if (source
[i
- 1] === target
[j
- 1])
48 d
[n
* i
+ j
] = d
[n
* (i
- 1) + j
- 1];
50 d
[n
* i
+ j
] = Math
.min(del
+ d
[n
* (i
- 1) + j
],
51 ins
+ d
[n
* i
+ j
- 1],
52 sub
+ d
[n
* (i
- 1) + j
- 1]);
56 // First, note that deletion is just substition with nothing, so
57 // any DEL operation can be replaced by a SUB. Second, the
58 // operation code *is* the necessary slice offset for applying the
62 function editPath(costs
, target
) {
63 /** Given a cost matrix and a target, create an edit list */
65 var j
= target
.length
;
67 var i
= costs
.length
/ n
- 1;
69 var sub
= (i
&& j
) ? costs
[n
* (i
- 1) + j
- 1] : Infinity
;
70 var del
= i
? costs
[n
* (i
- 1) + j
] : Infinity
;
71 var ins
= j
? costs
[n
* i
+ j
- 1] : Infinity
;
72 if (sub
<= ins
&& sub
<= del
) {
73 if (costs
[n
* i
+ j
] !== costs
[n
* (i
- 1) + j
- 1])
74 path
.push([SUB
, i
- 1, target
[j
- 1]]);
76 } else if (ins
<= del
) {
77 path
.push([INS
, i
, target
[j
- 1]]);
80 path
.push([SUB
, i
- 1, ""]);
87 function diff(source
, target
, ins
, del
, sub
) {
88 /** Create a list of edits to turn source into target
90 ins, del, and sub are as passed to costMatrix.
92 return editPath(costMatrix(source
, target
, ins
, del
, sub
), target
);
95 function patchArray(diff
, source
) {
96 for (var i
= 0; i
< diff
.length
; ++i
) {
98 source
.splice(edit
[1], edit
[0], edit
[2]);
103 function patchString(diff
, source
) {
104 for (var i
= 0; i
< diff
.length
; ++i
) {
106 var head
= source
.slice(0, edit
[1]);
107 var tail
= source
.slice(edit
[1] + edit
[0]);
108 source
= head
+ edit
[2] + tail
;
113 function patch(diff
, source
) {
114 /** Apply a list of edits to source */
115 var patcher
= Array
.isArray(source
) ? patchArray
: patchString
;
116 return patcher(diff
, source
);
119 var MULTI
= /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/;
121 var GLYPH
= /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g;
123 function diffLerp(source
, target
, amount
) {
124 /** Interpolate between two strings using edit operations
126 This interpolation algorithm applys a partial edit of one
127 string into the other. This produces nice looking results,
128 but can take a significant amount of time and memory to
129 compute the edits. It is not recommended for strings
130 longer than a few hundred characters.
133 // If given strings with astral codepoints or combining
134 // characters, split them into arrays of "glyphs" first,
135 // do the edit on the list of "glyphs", and rejoin them.
137 // This split is not perfect for all languages, but at least
138 // it won't create invalid surrogate pairs or orphaned
139 // combining characters.
141 // TODO: The way this is called is a hack.
142 if (source
.match
&& (source
.match(MULTI
) || target
.match(MULTI
))) {
143 var sourceGlyphs
= source
.match(GLYPH
) || [];
144 var targetGlyphs
= target
.match(GLYPH
) || [];
145 return diffLerp(sourceGlyphs
, targetGlyphs
, amount
).join("");
148 // The edit path works from the string end, forwards, because
149 // that's how Levenshtein edits work. To match LTR reading
150 // direction (and the behavior of fastLerp), swap the strings
151 // and invert the parameter when editing.
152 var edits
= diff(target
, source
, 2, 2, 3);
153 var partial
= edits
.slice(0, Math
.round((1 - amount
) * edits
.length
));
154 return patch(partial
, target
);
157 var NUMBERS
= /(-?\d+(?:\.\d+)?)/g;
159 function areNumericTwins(source
, target
) {
160 /** Check if a and b differ only in numerals */
161 return source
.replace(NUMBERS
, "0") === target
.replace(NUMBERS
, "0");
164 function nlerp(source
, target
, amount
) {
165 return source
+ (target
- source
) * amount
;
168 function numericLerp(source
, target
, amount
) {
169 /** Interpolate numerically between strings containing numbers
171 Numbers may have a leading "-" and a single "." to mark
172 the decimal point, but something must be after the ".".
173 No other floating point syntax (e.g. 1e6) is supported.
174 If both of the numbers in a pair are integers, the result
175 is clamped to an integer.
177 For example, numericLerp("0.0", "100", 0.123) === "12.3"
178 because the "." in "0.0" is interpreted as a decimal
179 point. But numericLerp("0.", "100.", 0.123) === "12."
180 because the strings are interpreted as integers followed
183 Calling this functions on strings that differ in more than
184 numerals gives undefined results.
187 // TODO: Try to preserve precision of the original numbers.
188 var sourceParts
= source
.split(NUMBERS
);
189 var targetParts
= target
.split(NUMBERS
);
190 var destParts
= targetParts
;
191 for (var i
= 1; i
< sourceParts
.length
; i
+= 2) {
192 var sourcePart
= sourceParts
[i
];
193 var targetPart
= targetParts
[i
];
194 var part
= nlerp(+sourcePart
, +targetPart
, amount
);
195 var sourcePoint
= sourcePart
.indexOf(".");
196 var targetPoint
= targetPart
.indexOf(".");
197 if (sourcePoint
=== -1 && targetPoint
=== -1)
198 part
= Math
.round(part
);
199 targetParts
[i
] = part
.toString();
201 return targetParts
.join("");
204 function fastLerpAstral(source
, target
, amount
) {
205 var sourceGlyphs
= source
.match(GLYPH
) || [];
206 var targetGlyphs
= target
.match(GLYPH
) || [];
207 var sourceLength
= Math
.round(sourceGlyphs
.length
* amount
);
208 var targetLength
= Math
.round(targetGlyphs
.length
* amount
);
209 var head
= targetGlyphs
.slice(0, targetLength
);
210 var tail
= sourceGlyphs
.slice(sourceLength
, sourceGlyphs
.length
);
211 head
.push
.apply(head
, tail
);
212 return head
.join("");
215 function fastLerpBasic(source
, target
, amount
) {
216 var sourceLength
= Math
.round(source
.length
* amount
);
217 var targetLength
= Math
.round(target
.length
* amount
);
218 var head
= target
.substring(0, targetLength
);
219 var tail
= source
.substring(sourceLength
, source
.length
);
223 function fastLerp(source
, target
, amount
) {
224 /** Interpolate between two strings based on length
226 This interpolation algorithm progressively replaces the
227 front of one string with another. This approach is fast
228 but does not look good when the strings are similar.
231 // TODO: Consider fast-pathing this even more for very large
232 // strings, e.g. in the megabyte range. These are large enough
233 // that it should be fine to just pick a codepoint and search
234 // for the nearest glyph start.
235 if (source
.match(MULTI
) || target
.match(MULTI
))
236 return fastLerpAstral(source
, target
, amount
);
238 return fastLerpBasic(source
, target
, amount
);
241 function lerp(source
, target
, amount
) {
242 /** Interpolate between two strings as best as possible
244 If the strings are identical aside from numbers in them,
245 they are passed through numericLerp.
247 If the strings are not numbers and short, they are passed
250 Otherwise, they are passed through fastLerp.
252 source
= source
.toString();
253 target
= target
.toString();
255 // Fast path for boundary cases.
256 if (amount
=== 0) return source
;
257 if (amount
=== 1) return target
;
259 if (areNumericTwins(source
, target
))
260 return numericLerp(source
, target
, amount
);
262 // Numeric lerps should over- and under-shoot when fed numbers
263 // outside 0 to 1, but other types cannot.
264 if (amount
< 0) return source
;
265 if (amount
> 1) return target
;
267 var n
= source
.length
* target
.length
;
268 var appropriate
= (n
&& n
< MAX_MATRIX_SIZE
) ? diffLerp
: fastLerp
;
269 return appropriate(source
, target
, amount
);
272 exports
.costMatrix
= costMatrix
;
273 exports
.patch
= patch
;
275 exports
.fastLerp
= fastLerp
;
276 exports
.diffLerp
= diffLerp
;
277 exports
.numericLerp
= numericLerp
;
280 })(typeof exports
=== "undefined" ? (this.stringLerp
= {}) : exports
);