6c240e44b61f9df46911224e1d51b6fd0aadca12
1 /* string-lerp - progressively turn one string into another
2 Copyright 2014 Joe Wreschnig
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
10 /* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */
15 var MAX_MATRIX_SIZE
= 256 * 256;
17 function costMatrix(source
, target
, ins
, del
, sub
) {
18 /** Calculate the Levenshtein cost matrix for source and target
20 If source and target are strings, they cannot contain any
21 astral or combining codepoints. Such data must be passed
22 as arrays of strings with one element per glyph.
24 ins, del, and sub are the costs for insertion, deletion,
25 and substition respectively. Their default value is 1. If
26 only ins is passed, del and sub are set to the same cost.
27 If ins and del are passed, sub is set to the more
30 The matrix is returned as a flat typed array.
32 Following http://en.wikipedia.org/wiki/Levenshtein_distance
34 ins
= ins
=== undefined ? 1 : (ins
| 0);
35 del
= (del
| 0) || ins
;
36 sub
= (sub
| 0) || Math
.max(ins
, del
);
37 var m
= source
.length
+ 1;
38 var n
= target
.length
+ 1;
39 var d
= new Uint32Array(m
* n
);
41 for (i
= 1; i
< m
; ++i
)
43 for (j
= 1; j
< n
; ++j
)
45 for (j
= 1; j
< n
; ++j
)
46 for (i
= 1; i
< m
; ++i
)
47 if (source
[i
- 1] === target
[j
- 1])
48 d
[n
* i
+ j
] = d
[n
* (i
- 1) + j
- 1];
50 d
[n
* i
+ j
] = Math
.min(del
+ d
[n
* (i
- 1) + j
],
51 ins
+ d
[n
* i
+ j
- 1],
52 sub
+ d
[n
* (i
- 1) + j
- 1]);
56 // First, note that deletion is just substition with nothing, so
57 // any DEL operation can be replaced by a SUB. Second, the
58 // operation code *is* the necessary slice offset for applying the
62 function editPath(costs
, target
) {
63 /** Given a cost matrix and a target, create an edit list */
65 var j
= target
.length
;
67 var i
= costs
.length
/ n
- 1;
69 var sub
= (i
&& j
) ? costs
[n
* (i
- 1) + j
- 1] : Infinity
;
70 var del
= i
? costs
[n
* (i
- 1) + j
] : Infinity
;
71 var ins
= j
? costs
[n
* i
+ j
- 1] : Infinity
;
72 if (sub
<= ins
&& sub
<= del
) {
73 if (costs
[n
* i
+ j
] !== costs
[n
* (i
- 1) + j
- 1])
74 path
.push([SUB
, i
- 1, target
[j
- 1]]);
76 } else if (ins
<= del
) {
77 path
.push([INS
, i
, target
[j
- 1]]);
80 path
.push([SUB
, i
- 1, ""]);
87 function diff(source
, target
, ins
, del
, sub
) {
88 /** Create a list of edits to turn source into target
90 ins, del, and sub are as passed to costMatrix.
92 return editPath(costMatrix(source
, target
, ins
, del
, sub
), target
);
95 function patchArray(diff
, source
) {
96 for (var i
= 0; i
< diff
.length
; ++i
) {
98 source
.splice(edit
[1], edit
[0], edit
[2]);
103 function patchString(diff
, source
) {
104 for (var i
= 0; i
< diff
.length
; ++i
) {
106 var head
= source
.slice(0, edit
[1]);
107 var tail
= source
.slice(edit
[1] + edit
[0]);
108 source
= head
+ edit
[2] + tail
;
113 function patch(diff
, source
) {
114 /** Apply a list of edits to source */
115 var patcher
= Array
.isArray(source
) ? patchArray
: patchString
;
116 return patcher(diff
, source
);
119 var MULTI
= /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/;
121 var GLYPH
= /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g;
123 function diffLerp(source
, target
, amount
) {
124 /** Interpolate between two strings using edit operations
126 This interpolation algorithm applys a partial edit of one
127 string into the other. This produces nice looking results,
128 but can take a significant amount of time and memory to
129 compute the edits. It is not recommended for strings
130 longer than a few hundred characters.
133 // If given strings with astral codepoints or combining
134 // characters, split them into arrays of "glyphs" first,
135 // do the edit on the list of "glyphs", and rejoin them.
137 // This split is not perfect for all languages, but at least
138 // it won't create invalid surrogate pairs or orphaned
139 // combining characters.
141 // TODO: The way this is called is a hack.
142 if (source
.match
&& (source
.match(MULTI
) || target
.match(MULTI
))) {
143 var sourceGlyphs
= source
.match(GLYPH
) || [];
144 var targetGlyphs
= target
.match(GLYPH
) || [];
145 return diffLerp(sourceGlyphs
, targetGlyphs
, amount
).join("");
148 // The edit path works from the string end, forwards, because
149 // that's how Levenshtein edits work. To match LTR reading
150 // direction (and the behavior of fastLerp), swap the strings
151 // and invert the parameter when editing.
152 var edits
= diff(target
, source
, 2, 2, 3);
153 var partial
= edits
.slice(0, Math
.round((1 - amount
) * edits
.length
));
154 return patch(partial
, target
);
157 var NUMBERS
= /(-?\d{1,20}(?:\.\d{1,20})?)/g;
159 function areNumericTwins(source
, target
) {
160 /** Check if a and b differ only in numerals */
161 return source
.replace(NUMBERS
, "0") === target
.replace(NUMBERS
, "0");
164 function nlerp(source
, target
, amount
) {
165 return source
+ (target
- source
) * amount
;
168 function numericLerp(source
, target
, amount
) {
169 /** Interpolate numerically between strings containing numbers
171 Numbers may have a leading "-" and a single "." to mark
172 the decimal point, but something must be after the ".".
173 No other floating point syntax (e.g. 1e6) is supported.
174 If both of the numbers in a pair are integers, the result
175 is clamped to an integer.
177 For example, numericLerp("0.0", "100", 0.123) === "12.3"
178 because the "." in "0.0" is interpreted as a decimal
179 point. But numericLerp("0.", "100.", 0.123) === "12."
180 because the strings are interpreted as integers followed
183 Calling this functions on strings that differ in more than
184 numerals gives undefined results.
187 var sourceParts
= source
.split(NUMBERS
);
188 var targetParts
= target
.split(NUMBERS
);
189 var destParts
= targetParts
;
190 for (var i
= 1; i
< sourceParts
.length
; i
+= 2) {
191 var sourcePart
= sourceParts
[i
];
192 var targetPart
= targetParts
[i
];
193 var part
= nlerp(+sourcePart
, +targetPart
, amount
);
194 var sourcePoint
= sourcePart
.indexOf(".");
195 var targetPoint
= targetPart
.indexOf(".");
196 var point
= Math
.round(nlerp(
197 sourcePoint
>= 0 ? (sourcePart
.length
- 1) - sourcePoint
: 0,
198 targetPoint
>= 0 ? (targetPart
.length
- 1) - targetPoint
: 0,
200 targetParts
[i
] = part
.toFixed(point
);
202 return targetParts
.join("");
205 function fastLerpAstral(source
, target
, amount
) {
206 var sourceGlyphs
= source
.match(GLYPH
) || [];
207 var targetGlyphs
= target
.match(GLYPH
) || [];
208 var sourceLength
= Math
.round(sourceGlyphs
.length
* amount
);
209 var targetLength
= Math
.round(targetGlyphs
.length
* amount
);
210 var head
= targetGlyphs
.slice(0, targetLength
);
211 var tail
= sourceGlyphs
.slice(sourceLength
, sourceGlyphs
.length
);
212 head
.push
.apply(head
, tail
);
213 return head
.join("");
216 function fastLerpBasic(source
, target
, amount
) {
217 var sourceLength
= Math
.round(source
.length
* amount
);
218 var targetLength
= Math
.round(target
.length
* amount
);
219 var head
= target
.substring(0, targetLength
);
220 var tail
= source
.substring(sourceLength
, source
.length
);
224 function fastLerp(source
, target
, amount
) {
225 /** Interpolate between two strings based on length
227 This interpolation algorithm progressively replaces the
228 front of one string with another. This approach is fast
229 but does not look good when the strings are similar.
232 // TODO: Consider fast-pathing this even more for very large
233 // strings, e.g. in the megabyte range. These are large enough
234 // that it should be fine to just pick a codepoint and search
235 // for the nearest glyph start.
236 if (source
.match(MULTI
) || target
.match(MULTI
))
237 return fastLerpAstral(source
, target
, amount
);
239 return fastLerpBasic(source
, target
, amount
);
242 function lerp(source
, target
, amount
) {
243 /** Interpolate between two strings as best as possible
245 If the strings are identical aside from numbers in them,
246 they are passed through numericLerp.
248 If the strings are not numbers and short, they are passed
251 Otherwise, they are passed through fastLerp.
253 source
= source
.toString();
254 target
= target
.toString();
256 // Fast path for boundary cases.
257 if (amount
=== 0) return source
;
258 if (amount
=== 1) return target
;
260 if (areNumericTwins(source
, target
))
261 return numericLerp(source
, target
, amount
);
263 // Numeric lerps should over- and under-shoot when fed numbers
264 // outside 0 to 1, but other types cannot.
265 if (amount
< 0) return source
;
266 if (amount
> 1) return target
;
268 var n
= source
.length
* target
.length
;
269 var appropriate
= (n
&& n
< MAX_MATRIX_SIZE
) ? diffLerp
: fastLerp
;
270 return appropriate(source
, target
, amount
);
273 exports
.costMatrix
= costMatrix
;
274 exports
.patch
= patch
;
276 exports
.fastLerp
= fastLerp
;
277 exports
.diffLerp
= diffLerp
;
278 exports
.numericLerp
= numericLerp
;
281 })(typeof exports
=== "undefined" ? (this.stringLerp
= {}) : exports
);