4 // (c) 2006 Douglas Crockford
6 // Produce an array of simple token objects from a string.
7 // A simple token object contains these members:
8 // type: 'name', 'string', 'number', 'operator'
9 // value: string or number value of the token
10 // from: index of first character of the token
11 // to: index of the last character + 1
13 // Comments of the // type are ignored.
15 // Operators are by default single characters. Multicharacter
16 // operators can be made by supplying a string of prefix and
18 // characters. For example,
20 // will match any of these:
21 // <= >> >>> <> >= +: -: &: &&: &&
25 String.prototype.tokens = function (prefix, suffix) {
26 var c; // The current character.
27 var from; // The index of the start of the token.
28 var i = 0; // The index of the current character.
29 var length = this.length;
30 var n; // The number value.
31 var q; // The quote character.
32 var str; // The string value.
34 var result = []; // An array to hold the results.
36 var make = function (type, value) {
38 // Make a token object.
48 // Begin tokenization. If the source string is empty, return nothing.
54 // If prefix and suffix strings are not provided, supply defaults.
56 if (typeof prefix !== 'string') {
59 if (typeof suffix !== 'string') {
64 // Loop through this text, one character at a time.
78 } else if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') {
83 if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
84 (c >= '0' && c <= '9') || c === '_') {
91 result.push(make('name', str));
95 // A number cannot start with a decimal point. It must start with a digit,
98 } else if (c >= '0' && c <= '9') {
102 // Look for more digits.
106 if (c < '0' || c > '9') {
113 // Look for a decimal fraction part.
120 if (c < '0' || c > '9') {
128 // Look for an exponent part.
130 if (c === 'e' || c === 'E') {
134 if (c === '-' || c === '+') {
139 if (c < '0' || c > '9') {
140 make('number', str).error("Bad exponent");
146 } while (c >= '0' && c <= '9');
149 // Make sure the next character is not a letter.
151 if (c >= 'a' && c <= 'z') {
154 make('number', str).error("Bad number");
157 // Convert the string value to a number. If it is finite, then it is a good
162 result.push(make('number', n));
164 make('number', str).error("Bad number");
169 } else if (c === '\'' || c === '"') {
176 make('string', str).error(c === '\n' || c === '\r' || c === '' ?
177 "Unterminated string." :
178 "Control character in string.", make('', str));
181 // Look for the closing quote.
187 // Look for escapement.
192 make('string', str).error("Unterminated string");
213 make('string', str).error("Unterminated string");
215 c = parseInt(this.substr(i + 1, 4), 16);
216 if (!isFinite(c) || c < 0) {
217 make('string', str).error("Unterminated string");
219 c = String.fromCharCode(c);
228 result.push(make('string', str));
233 } else if (c === '/' && this.charAt(i + 1) === '/') {
237 if (c === '\n' || c === '\r' || c === '') {
245 } else if (prefix.indexOf(c) >= 0) {
250 if (suffix.indexOf(c) < 0) {
256 result.push(make('operator', str));
258 // single-character operator
262 result.push(make('operator', c));