fdf7df8765be728d4ba6c77bbaffd746a3dbe09b
1 /* string-lerp - progressively turn one string into another
2 Copyright 2014 Joe Wreschnig
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
10 /* @license Copyright 2014 Joe Wreschnig - GNU GPL v2 or later */
15 var MAX_MATRIX_SIZE
= 256 * 256;
17 function costMatrix(source
, target
, ins
, del
, sub
) {
18 /** Calculate the Levenshtein cost matrix for source and target
20 If source and target are strings, they cannot contain any
21 astral or combining codepoints. Such data must be passed
22 as arrays of strings with one element per glyph.
24 ins, del, and sub are the costs for insertion, deletion,
25 and substition respectively. Their default value is 1. If
26 only ins is passed, del and sub are set to the same cost.
27 If ins and del are passed, sub is set to the more
30 The matrix is returned as a flat typed array.
32 Following http://en.wikipedia.org/wiki/Levenshtein_distance
34 ins
= ins
=== undefined ? 1 : (ins
| 0);
35 del
= (del
| 0) || ins
;
36 sub
= (sub
| 0) || Math
.max(ins
, del
);
37 var m
= source
.length
+ 1;
38 var n
= target
.length
+ 1;
39 var d
= new Uint32Array(m
* n
);
41 for (i
= 1; i
< m
; ++i
)
43 for (j
= 1; j
< n
; ++j
)
45 for (j
= 1; j
< n
; ++j
)
46 for (i
= 1; i
< m
; ++i
)
47 if (source
[i
- 1] === target
[j
- 1])
48 d
[n
* i
+ j
] = d
[n
* (i
- 1) + j
- 1];
50 d
[n
* i
+ j
] = Math
.min(del
+ d
[n
* (i
- 1) + j
],
51 ins
+ d
[n
* i
+ j
- 1],
52 sub
+ d
[n
* (i
- 1) + j
- 1]);
56 // First, note that deletion is just substition with nothing, so
57 // any DEL operation can be replaced by a SUB. Second, the
58 // operation code *is* the necessary slice offset for applying the
62 function editPath(costs
, target
) {
63 /** Given a cost matrix and a target, create an edit list */
65 var j
= target
.length
;
67 var i
= costs
.length
/ n
- 1;
69 var sub
= (i
&& j
) ? costs
[n
* (i
- 1) + j
- 1] : Infinity
;
70 var del
= i
? costs
[n
* (i
- 1) + j
] : Infinity
;
71 var ins
= j
? costs
[n
* i
+ j
- 1] : Infinity
;
72 if (sub
<= ins
&& sub
<= del
) {
73 if (costs
[n
* i
+ j
] !== costs
[n
* (i
- 1) + j
- 1])
74 path
.push([SUB
, i
- 1, target
[j
- 1]]);
76 } else if (ins
<= del
) {
77 path
.push([INS
, i
, target
[j
- 1]]);
80 path
.push([SUB
, i
- 1, ""]);
87 function diff(source
, target
, ins
, del
, sub
) {
88 /** Create a list of edits to turn source into target
90 ins, del, and sub are as passed to costMatrix.
92 return editPath(costMatrix(source
, target
, ins
, del
, sub
), target
);
95 function patch(diff
, source
) {
96 /** Apply a list of edits to source */
100 if (Array
.isArray(source
)) {
101 for (i
= 0; i
< diff
.length
; ++i
) {
103 source
.splice(edit
[1], edit
[0], edit
[2]);
106 for (i
= 0; i
< diff
.length
; ++i
) {
108 var head
= source
.slice(0, edit
[1]);
109 var tail
= source
.slice(edit
[1] + edit
[0]);
110 source
= head
+ edit
[2] + tail
;
116 var MULTI
= /[\uD800-\uDBFF][\uDC00-\uDFFF]|[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/;
118 var GLYPH
= /([\0-\u02FF\u0370-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uDC00-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF])([\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]*)/g;
120 function diffLerp(source
, target
, amount
) {
121 /** Interpolate between two strings using edit operations
123 This interpolation algorithm applys a partial edit of one
124 string into the other. This produces nice looking results,
125 but can take a significant amount of time and memory to
126 compute the edits. It is not recommended for strings
127 longer than a few hundred characters.
130 // If given strings with astral codepoints or combining
131 // characters, split them into arrays of "glyphs" first,
132 // do the edit on the list of "glyphs", and rejoin them.
134 // This split is not perfect for all languages, but at least
135 // it won't create invalid surrogate pairs or orphaned
136 // combining characters.
137 if (source
.match
&& (source
.match(MULTI
) || target
.match(MULTI
))) {
138 var sourceGlyphs
= source
.match(GLYPH
) || [];
139 var targetGlyphs
= target
.match(GLYPH
) || [];
140 return diffLerp(sourceGlyphs
, targetGlyphs
, amount
).join("");
143 // The edit path works from the string end, forwards, because
144 // that's how Levenshtein edits work. To match LTR reading
145 // direction (and the behavior of fastLerp), swap the strings
146 // and invert the parameter when editing.
147 var edits
= diff(target
, source
, 2, 2, 3);
148 var partial
= edits
.slice(0, Math
.round((1 - amount
) * edits
.length
));
149 return patch(partial
, target
);
152 var NUMBERS
= /(-?\d+(?:\.\d+)?)/g;
154 function areNumericTwins(source
, target
) {
155 /** Check if a and b differ only in numerals */
156 return source
.replace(NUMBERS
, "0") === target
.replace(NUMBERS
, "0");
159 function nlerp(source
, target
, amount
) {
160 return source
+ (target
- source
) * amount
;
163 function numericLerp(source
, target
, amount
) {
164 /** Interpolate numerically between strings containing numbers
166 Numbers may have a leading "-" and a single "." to mark
167 the decimal point, but something must be after the ".".
168 No other floating point syntax (e.g. 1e6) is supported.
169 If both of the numbers in a pair are integers, the result
170 is clamped to an integer.
172 For example, numericLerp("0.0", "100", 0.123) === "12.3"
173 because the "." in "0.0" is interpreted as a decimal
174 point. But numericLerp("0.", "100.", 0.123) === "12."
175 because the strings are interpreted as integers followed
178 Calling this functions on strings that differ in more than
179 numerals gives undefined results.
182 // TODO: Try to preserve precision of the original numbers.
183 var sourceParts
= source
.split(NUMBERS
);
184 var targetParts
= target
.split(NUMBERS
);
185 var destParts
= targetParts
;
186 for (var i
= 1; i
< sourceParts
.length
; i
+= 2) {
187 var sourcePart
= sourceParts
[i
];
188 var targetPart
= targetParts
[i
];
189 var part
= nlerp(+sourcePart
, +targetPart
, amount
);
190 var sourcePoint
= sourcePart
.indexOf(".");
191 var targetPoint
= targetPart
.indexOf(".");
192 if (sourcePoint
=== -1 && targetPoint
=== -1)
193 part
= Math
.round(part
);
194 targetParts
[i
] = part
.toString();
196 return targetParts
.join("");
199 function fastLerp(a
, b
, p
) {
200 /** Interpolate between two strings based on length
202 This interpolation algorithm progressively replaces the
203 front of one string with another. This approach is fast
204 but does not look good when the strings are similar.
207 // TODO: Consider fast-pathing this even more for very large
208 // strings, e.g. in the megabyte range. These are large enough
209 // that it should be fine to just pick a codepoint and search
210 // for the nearest glyph start.
211 if (a
.match(MULTI
) || b
.match(MULTI
)) {
212 var ca
= a
.match(GLYPH
) || [];
213 var cb
= b
.match(GLYPH
) || [];
214 var calen
= Math
.round(ca
.length
* p
);
215 var cblen
= Math
.round(cb
.length
* p
);
216 var r
= cb
.slice(0, cblen
);
217 r
.push
.apply(r
, ca
.slice(calen
, ca
.length
));
220 var alen
= Math
.round(a
.length
* p
);
221 var blen
= Math
.round(b
.length
* p
);
222 return b
.substring(0, blen
) + a
.substring(alen
, a
.length
);
226 function lerp(a
, b
, p
) {
227 /** Interpolate between two strings as best as possible
229 If the strings are identical aside from numbers in them,
230 they are passed through numericLerp.
232 If the strings are not numbers and short, they are passed
235 Otherwise, they are passed through fastLerp.
240 // Fast path for boundary cases.
241 if (p
=== 0) return a
;
242 if (p
=== 1) return b
;
244 if (areNumericTwins(a
, b
))
245 return numericLerp(a
, b
, p
);
247 // Numeric lerps should over- and under-shoot when fed numbers
248 // outside 0 to 1, but other types cannot.
252 var n
= a
.length
* b
.length
;
253 return ((n
&& n
< MAX_MATRIX_SIZE
) ? diffLerp
: fastLerp
)(a
, b
, p
);
256 exports
.costMatrix
= costMatrix
;
257 exports
.patch
= patch
;
259 exports
.fastLerp
= fastLerp
;
260 exports
.diffLerp
= diffLerp
;
261 exports
.numericLerp
= numericLerp
;
264 })(typeof exports
=== "undefined" ? (this.stringLerp
= {}) : exports
);