1 /***********************************************************************
3 A JavaScript tokenizer / parser / beautifier / compressor.
5 This version is suitable for Node.js. With minimal changes (the
6 exports stuff) it should work on any JS platform.
8 This file contains the tokenizer/parser. It is a port to JavaScript
9 of parse-js [1], a JavaScript parser library written in Common Lisp
10 by Marijn Haverbeke. Thank you Marijn!
12 [1] http://marijn.haverbeke.nl/parse-js/
16 - tokenizer(code) -- returns a function. Call the returned
17 function to fetch the next token.
19 - parse(code) -- returns an AST of the given JavaScript code.
21 -------------------------------- (C) ---------------------------------
24 <mihai.bazon@gmail.com>
25 http://mihai.bazon.net/blog
27 Distributed under the BSD license:
29 Copyright 2010 (c) Mihai Bazon <mihai.bazon@gmail.com>
30 Based on parse-js (http://marijn.haverbeke.nl/parse-js/).
32 Redistribution and use in source and binary forms, with or without
33 modification, are permitted provided that the following conditions
36 * Redistributions of source code must retain the above
37 copyright notice, this list of conditions and the following
40 * Redistributions in binary form must reproduce the above
41 copyright notice, this list of conditions and the following
42 disclaimer in the documentation and/or other materials
43 provided with the distribution.
45 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER “AS IS” AND ANY
46 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE
49 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
50 OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
51 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
52 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
53 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
54 TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
55 THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 ***********************************************************************/
60 /* -----[ Tokenizer (constants) ]----- */
62 var KEYWORDS = array_to_hash([
91 var RESERVED_WORDS = array_to_hash([
123 var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([
132 var KEYWORDS_ATOM = array_to_hash([
139 var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^"));
141 var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i;
142 var RE_OCT_NUMBER = /^0[0-7]+$/;
143 var RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i;
145 var OPERATORS = array_to_hash([
192 var WHITESPACE_CHARS = array_to_hash(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"));
194 var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{(,.;:"));
196 var PUNC_CHARS = array_to_hash(characters("[]{}(),;:"));
198 var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy"));
200 /* -----[ Tokenizer ]----- */
202 var UNICODE = { // Unicode 6.1
203 letter: new RegExp("[\\u0041-\\u005A\\u0061-\\u007A\\u00AA\\u00B5\\u00BA\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u02C1\\u02C6-\\u02D1\\u02E0-\\u02E4\\u02EC\\u02EE\\u0370-\\u0374\\u0376\\u0377\\u037A-\\u037D\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03F5\\u03F7-\\u0481\\u048A-\\u0527\\u0531-\\u0556\\u0559\\u0561-\\u0587\\u05D0-\\u05EA\\u05F0-\\u05F2\\u0620-\\u064A\\u066E\\u066F\\u0671-\\u06D3\\u06D5\\u06E5\\u06E6\\u06EE\\u06EF\\u06FA-\\u06FC\\u06FF\\u0710\\u0712-\\u072F\\u074D-\\u07A5\\u07B1\\u07CA-\\u07EA\\u07F4\\u07F5\\u07FA\\u0800-\\u0815\\u081A\\u0824\\u0828\\u0840-\\u0858\\u08A0\\u08A2-\\u08AC\\u0904-\\u0939\\u093D\\u0950\\u0958-\\u0961\\u0971-\\u0977\\u0979-\\u097F\\u0985-\\u098C\\u098F\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09BD\\u09CE\\u09DC\\u09DD\\u09DF-\\u09E1\\u09F0\\u09F1\\u0A05-\\u0A0A\\u0A0F\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A33\\u0A35\\u0A36\\u0A38\\u0A39\\u0A59-\\u0A5C\\u0A5E\\u0A72-\\u0A74\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0ABD\\u0AD0\\u0AE0\\u0AE1\\u0B05-\\u0B0C\\u0B0F\\u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B35-\\u0B39\\u0B3D\\u0B5C\\u0B5D\\u0B5F-\\u0B61\\u0B71\\u0B83\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99\\u0B9A\\u0B9C\\u0B9E\\u0B9F\\u0BA3\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0BD0\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u0C2A-\\u0C33\\u0C35-\\u0C39\\u0C3D\\u0C58\\u0C59\\u0C60\\u0C61\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBD\\u0CDE\\u0CE0\\u0CE1\\u0CF1\\u0CF2\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D\\u0D4E\\u0D60\\u0D61\\u0D7A-\\u0D7F\\u0D85-\\u0D96\\u0D9A-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0E01-\\u0E30\\u0E32\\u0E33\\u0E40-\\u0E46\\u0E81\\u0E82\\u0E84\\u0E87\\u0E88\\u0E8A\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F\\u0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA\\u0EAB\\u0EAD-\\u0EB0\\u0EB2\\u0EB3\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EDC-\\u0EDF\\u0F00\\u0F40-\\u0F47\\u0F49-\\u0F6C\\u0F88-\\u0F8C\\u1000-\\u102A\\u103F\\u1050-\\u1055\\u105A-\\u105D\\u1061\\u1065\\u1066\\u106E-\\u1070\\u1075-\\u1081\\u108E\\u10A0-\\u10C5\\u10C7\\u10CD\\u10D0-\\u10FA\\u10FC-\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u1380-\\u138F\\u13A0-\\u13F4\\u1401-\\u166C\\u166F-\\u167F\\u1681-\\u169A\\u16A0-\\u16EA\\u16EE-\\u16F0\\u1700-\\u170C\\u170E-\\u1711\\u1720-\\u1731\\u1740-\\u1751\\u1760-\\u176C\\u176E-\\u1770\\u1780-\\u17B3\\u17D7\\u17DC\\u1820-\\u1877\\u1880-\\u18A8\\u18AA\\u18B0-\\u18F5\\u1900-\\u191C\\u1950-\\u196D\\u1970-\\u1974\\u1980-\\u19AB\\u19C1-\\u19C7\\u1A00-\\u1A16\\u1A20-\\u1A54\\u1AA7\\u1B05-\\u1B33\\u1B45-\\u1B4B\\u1B83-\\u1BA0\\u1BAE\\u1BAF\\u1BBA-\\u1BE5\\u1C00-\\u1C23\\u1C4D-\\u1C4F\\u1C5A-\\u1C7D\\u1CE9-\\u1CEC\\u1CEE-\\u1CF1\\u1CF5\\u1CF6\\u1D00-\\u1DBF\\u1E00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC2-\\u1FC4\\u1FC6-\\u1FCC\\u1FD0-\\u1FD3\\u1FD6-\\u1FDB\\u1FE0-\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FFC\\u2071\\u207F\\u2090-\\u209C\\u2102\\u2107\\u210A-\\u2113\\u2115\\u2119-\\u211D\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-\\u2139\\u213C-\\u213F\\u2145-\\u2149\\u214E\\u2160-\\u2188\\u2C00-\\u2C2E\\u2C30-\\u2C5E\\u2C60-\\u2CE4\\u2CEB-\\u2CEE\\u2CF2\\u2CF3\\u2D00-\\u2D25\\u2D27\\u2D2D\\u2D30-\\u2D67\\u2D6F\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u2E2F\\u3005-\\u3007\\u3021-\\u3029\\u3031-\\u3035\\u3038-\\u303C\\u3041-\\u3096\\u309D-\\u309F\\u30A1-\\u30FA\\u30FC-\\u30FF\\u3105-\\u312D\\u3131-\\u318E\\u31A0-\\u31BA\\u31F0-\\u31FF\\u3400-\\u4DB5\\u4E00-\\u9FCC\\uA000-\\uA48C\\uA4D0-\\uA4FD\\uA500-\\uA60C\\uA610-\\uA61F\\uA62A\\uA62B\\uA640-\\uA66E\\uA67F-\\uA697\\uA6A0-\\uA6EF\\uA717-\\uA71F\\uA722-\\uA788\\uA78B-\\uA78E\\uA790-\\uA793\\uA7A0-\\uA7AA\\uA7F8-\\uA801\\uA803-\\uA805\\uA807-\\uA80A\\uA80C-\\uA822\\uA840-\\uA873\\uA882-\\uA8B3\\uA8F2-\\uA8F7\\uA8FB\\uA90A-\\uA925\\uA930-\\uA946\\uA960-\\uA97C\\uA984-\\uA9B2\\uA9CF\\uAA00-\\uAA28\\uAA40-\\uAA42\\uAA44-\\uAA4B\\uAA60-\\uAA76\\uAA7A\\uAA80-\\uAAAF\\uAAB1\\uAAB5\\uAAB6\\uAAB9-\\uAABD\\uAAC0\\uAAC2\\uAADB-\\uAADD\\uAAE0-\\uAAEA\\uAAF2-\\uAAF4\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uABC0-\\uABE2\\uAC00-\\uD7A3\\uD7B0-\\uD7C6\\uD7CB-\\uD7FB\\uF900-\\uFA6D\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D\\uFB1F-\\uFB28\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFB\\uFE70-\\uFE74\\uFE76-\\uFEFC\\uFF21-\\uFF3A\\uFF41-\\uFF5A\\uFF66-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC]"),
204 combining_mark: new RegExp("[\\u0300-\\u036F\\u0483-\\u0487\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u061A\\u064B-\\u065F\\u0670\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07A6-\\u07B0\\u07EB-\\u07F3\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-\\u082D\\u0859-\\u085B\\u08E4-\\u08FE\\u0900-\\u0903\\u093A-\\u093C\\u093E-\\u094F\\u0951-\\u0957\\u0962\\u0963\\u0981-\\u0983\\u09BC\\u09BE-\\u09C4\\u09C7\\u09C8\\u09CB-\\u09CD\\u09D7\\u09E2\\u09E3\\u0A01-\\u0A03\\u0A3C\\u0A3E-\\u0A42\\u0A47\\u0A48\\u0A4B-\\u0A4D\\u0A51\\u0A70\\u0A71\\u0A75\\u0A81-\\u0A83\\u0ABC\\u0ABE-\\u0AC5\\u0AC7-\\u0AC9\\u0ACB-\\u0ACD\\u0AE2\\u0AE3\\u0B01-\\u0B03\\u0B3C\\u0B3E-\\u0B44\\u0B47\\u0B48\\u0B4B-\\u0B4D\\u0B56\\u0B57\\u0B62\\u0B63\\u0B82\\u0BBE-\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCD\\u0BD7\\u0C01-\\u0C03\\u0C3E-\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55\\u0C56\\u0C62\\u0C63\\u0C82\\u0C83\\u0CBC\\u0CBE-\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD\\u0CD5\\u0CD6\\u0CE2\\u0CE3\\u0D02\\u0D03\\u0D3E-\\u0D44\\u0D46-\\u0D48\\u0D4A-\\u0D4D\\u0D57\\u0D62\\u0D63\\u0D82\\u0D83\\u0DCA\\u0DCF-\\u0DD4\\u0DD6\\u0DD8-\\u0DDF\\u0DF2\\u0DF3\\u0E31\\u0E34-\\u0E3A\\u0E47-\\u0E4E\\u0EB1\\u0EB4-\\u0EB9\\u0EBB\\u0EBC\\u0EC8-\\u0ECD\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F3E\\u0F3F\\u0F71-\\u0F84\\u0F86\\u0F87\\u0F8D-\\u0F97\\u0F99-\\u0FBC\\u0FC6\\u102B-\\u103E\\u1056-\\u1059\\u105E-\\u1060\\u1062-\\u1064\\u1067-\\u106D\\u1071-\\u1074\\u1082-\\u108D\\u108F\\u109A-\\u109D\\u135D-\\u135F\\u1712-\\u1714\\u1732-\\u1734\\u1752\\u1753\\u1772\\u1773\\u17B4-\\u17D3\\u17DD\\u180B-\\u180D\\u18A9\\u1920-\\u192B\\u1930-\\u193B\\u19B0-\\u19C0\\u19C8\\u19C9\\u1A17-\\u1A1B\\u1A55-\\u1A5E\\u1A60-\\u1A7C\\u1A7F\\u1B00-\\u1B04\\u1B34-\\u1B44\\u1B6B-\\u1B73\\u1B80-\\u1B82\\u1BA1-\\u1BAD\\u1BE6-\\u1BF3\\u1C24-\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE8\\u1CED\\u1CF2-\\u1CF4\\u1DC0-\\u1DE6\\u1DFC-\\u1DFF\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2CEF-\\u2CF1\\u2D7F\\u2DE0-\\u2DFF\\u302A-\\u302F\\u3099\\u309A\\uA66F\\uA674-\\uA67D\\uA69F\\uA6F0\\uA6F1\\uA802\\uA806\\uA80B\\uA823-\\uA827\\uA880\\uA881\\uA8B4-\\uA8C4\\uA8E0-\\uA8F1\\uA926-\\uA92D\\uA947-\\uA953\\uA980-\\uA983\\uA9B3-\\uA9C0\\uAA29-\\uAA36\\uAA43\\uAA4C\\uAA4D\\uAA7B\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8\\uAABE\\uAABF\\uAAC1\\uAAEB-\\uAAEF\\uAAF5\\uAAF6\\uABE3-\\uABEA\\uABEC\\uABED\\uFB1E\\uFE00-\\uFE0F\\uFE20-\\uFE26]"),
205 connector_punctuation: new RegExp("[\\u005F\\u203F\\u2040\\u2054\\uFE33\\uFE34\\uFE4D-\\uFE4F\\uFF3F]"),
206 digit: new RegExp("[\\u0030-\\u0039\\u0660-\\u0669\\u06F0-\\u06F9\\u07C0-\\u07C9\\u0966-\\u096F\\u09E6-\\u09EF\\u0A66-\\u0A6F\\u0AE6-\\u0AEF\\u0B66-\\u0B6F\\u0BE6-\\u0BEF\\u0C66-\\u0C6F\\u0CE6-\\u0CEF\\u0D66-\\u0D6F\\u0E50-\\u0E59\\u0ED0-\\u0ED9\\u0F20-\\u0F29\\u1040-\\u1049\\u1090-\\u1099\\u17E0-\\u17E9\\u1810-\\u1819\\u1946-\\u194F\\u19D0-\\u19D9\\u1A80-\\u1A89\\u1A90-\\u1A99\\u1B50-\\u1B59\\u1BB0-\\u1BB9\\u1C40-\\u1C49\\u1C50-\\u1C59\\uA620-\\uA629\\uA8D0-\\uA8D9\\uA900-\\uA909\\uA9D0-\\uA9D9\\uAA50-\\uAA59\\uABF0-\\uABF9\\uFF10-\\uFF19]")
209 function is_letter(ch) {
210 return UNICODE.letter.test(ch);
213 function is_digit(ch) {
214 ch = ch.charCodeAt(0);
215 return ch >= 48 && ch <= 57;
218 function is_unicode_digit(ch) {
219 return UNICODE.digit.test(ch);
222 function is_alphanumeric_char(ch) {
223 return is_digit(ch) || is_letter(ch);
226 function is_unicode_combining_mark(ch) {
227 return UNICODE.combining_mark.test(ch);
230 function is_unicode_connector_punctuation(ch) {
231 return UNICODE.connector_punctuation.test(ch);
234 function is_identifier_start(ch) {
235 return ch == "$" || ch == "_" || is_letter(ch);
238 function is_identifier_char(ch) {
239 return is_identifier_start(ch)
240 || is_unicode_combining_mark(ch)
241 || is_unicode_digit(ch)
242 || is_unicode_connector_punctuation(ch)
243 || ch == "\u200c" // zero-width non-joiner <ZWNJ>
244 || ch == "\u200d" // zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
248 function parse_js_number(num) {
249 if (RE_HEX_NUMBER.test(num)) {
250 return parseInt(num.substr(2), 16);
251 } else if (RE_OCT_NUMBER.test(num)) {
252 return parseInt(num.substr(1), 8);
253 } else if (RE_DEC_NUMBER.test(num)) {
254 return parseFloat(num);
258 function JS_Parse_Error(message, line, col, pos) {
259 this.message = message;
260 this.line = line + 1;
263 this.stack = new Error().stack;
266 JS_Parse_Error.prototype.toString = function() {
267 return this.message + " (line: " + this.line + ", col: " + this.col + ", pos: " + this.pos + ")" + "\n\n" + this.stack;
270 function js_error(message, line, col, pos) {
271 throw new JS_Parse_Error(message, line, col, pos);
274 function is_token(token, type, val) {
275 return token.type == type && (val == null || token.value == val);
280 function tokenizer($TEXT) {
283 text : $TEXT.replace(/\r\n?|[\n\u2028\u2029]/g, "\n").replace(/^\uFEFF/, ''),
290 newline_before : false,
291 regex_allowed : false,
295 function peek() { return S.text.charAt(S.pos); };
297 function next(signal_eof, in_string) {
298 var ch = S.text.charAt(S.pos++);
299 if (signal_eof && !ch)
302 S.newline_before = S.newline_before || !in_string;
315 function find(what, signal_eof) {
316 var pos = S.text.indexOf(what, S.pos);
317 if (signal_eof && pos == -1) throw EX_EOF;
321 function start_token() {
327 function token(type, value, is_comment) {
328 S.regex_allowed = ((type == "operator" && !HOP(UNARY_POSTFIX, value)) ||
329 (type == "keyword" && HOP(KEYWORDS_BEFORE_EXPRESSION, value)) ||
330 (type == "punc" && HOP(PUNC_BEFORE_EXPRESSION, value)));
338 nlb : S.newline_before
341 ret.comments_before = S.comments_before;
342 S.comments_before = [];
343 // make note of any newlines in the comments that came before
344 for (var i = 0, len = ret.comments_before.length; i < len; i++) {
345 ret.nlb = ret.nlb || ret.comments_before[i].nlb;
348 S.newline_before = false;
352 function skip_whitespace() {
353 while (HOP(WHITESPACE_CHARS, peek()))
357 function read_while(pred) {
358 var ret = "", ch = peek(), i = 0;
359 while (ch && pred(ch, i++)) {
366 function parse_error(err) {
367 js_error(err, S.tokline, S.tokcol, S.tokpos);
370 function read_num(prefix) {
371 var has_e = false, after_e = false, has_x = false, has_dot = prefix == ".";
372 var num = read_while(function(ch, i){
373 if (ch == "x" || ch == "X") {
374 if (has_x) return false;
377 if (!has_x && (ch == "E" || ch == "e")) {
378 if (has_e) return false;
379 return has_e = after_e = true;
382 if (after_e || (i == 0 && !prefix)) return true;
385 if (ch == "+") return after_e;
388 if (!has_dot && !has_x && !has_e)
389 return has_dot = true;
392 return is_alphanumeric_char(ch);
396 var valid = parse_js_number(num);
398 return token("num", valid);
400 parse_error("Invalid syntax: " + num);
404 function read_escaped_char(in_string) {
405 var ch = next(true, in_string);
407 case "n" : return "\n";
408 case "r" : return "\r";
409 case "t" : return "\t";
410 case "b" : return "\b";
411 case "v" : return "\u000b";
412 case "f" : return "\f";
413 case "0" : return "\0";
414 case "x" : return String.fromCharCode(hex_bytes(2));
415 case "u" : return String.fromCharCode(hex_bytes(4));
416 case "\n": return "";
421 function hex_bytes(n) {
424 var digit = parseInt(next(true), 16);
426 parse_error("Invalid hex-character pattern in string");
427 num = (num << 4) | digit;
432 function read_string() {
433 return with_eof_error("Unterminated string constant", function(){
434 var quote = next(), ret = "";
438 // read OctalEscapeSequence (XXX: deprecated if "strict mode")
439 // https://github.com/mishoo/UglifyJS/issues/178
440 var octal_len = 0, first = null;
441 ch = read_while(function(ch){
442 if (ch >= "0" && ch <= "7") {
447 else if (first <= "3" && octal_len <= 2) return ++octal_len;
448 else if (first >= "4" && octal_len <= 1) return ++octal_len;
452 if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
453 else ch = read_escaped_char(true);
455 else if (ch == quote) break;
458 return token("string", ret);
462 function read_line_comment() {
464 var i = find("\n"), ret;
466 ret = S.text.substr(S.pos);
467 S.pos = S.text.length;
469 ret = S.text.substring(S.pos, i);
472 return token("comment1", ret, true);
475 function read_multiline_comment() {
477 return with_eof_error("Unterminated multiline comment", function(){
478 var i = find("*/", true),
479 text = S.text.substring(S.pos, i);
481 S.line += text.split("\n").length - 1;
482 S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
484 // https://github.com/mishoo/UglifyJS/issues/#issue/100
485 if (/^@cc_on/i.test(text)) {
486 warn("WARNING: at line " + S.line);
487 warn("*** Found \"conditional comment\": " + text);
488 warn("*** UglifyJS DISCARDS ALL COMMENTS. This means your code might no longer work properly in Internet Explorer.");
491 return token("comment2", text, true);
495 function read_name() {
496 var backslash = false, name = "", ch, escaped = false, hex;
497 while ((ch = peek()) != null) {
499 if (ch == "\\") escaped = backslash = true, next();
500 else if (is_identifier_char(ch)) name += next();
504 if (ch != "u") parse_error("Expecting UnicodeEscapeSequence -- uXXXX");
505 ch = read_escaped_char();
506 if (!is_identifier_char(ch)) parse_error("Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier");
511 if (HOP(KEYWORDS, name) && escaped) {
512 hex = name.charCodeAt(0).toString(16).toUpperCase();
513 name = "\\u" + "0000".substr(hex.length) + hex + name.slice(1);
518 function read_regexp(regexp) {
519 return with_eof_error("Unterminated regular expression", function(){
520 var prev_backslash = false, ch, in_class = false;
521 while ((ch = next(true))) if (prev_backslash) {
523 prev_backslash = false;
524 } else if (ch == "[") {
527 } else if (ch == "]" && in_class) {
530 } else if (ch == "/" && !in_class) {
532 } else if (ch == "\\") {
533 prev_backslash = true;
537 var mods = read_name();
538 return token("regexp", [ regexp, mods ]);
542 function read_operator(prefix) {
544 if (!peek()) return op;
545 var bigger = op + peek();
546 if (HOP(OPERATORS, bigger)) {
553 return token("operator", grow(prefix || next()));
556 function handle_slash() {
558 var regex_allowed = S.regex_allowed;
561 S.comments_before.push(read_line_comment());
562 S.regex_allowed = regex_allowed;
565 S.comments_before.push(read_multiline_comment());
566 S.regex_allowed = regex_allowed;
569 return S.regex_allowed ? read_regexp("") : read_operator("/");
572 function handle_dot() {
574 return is_digit(peek())
576 : token("punc", ".");
579 function read_word() {
580 var word = read_name();
581 return !HOP(KEYWORDS, word)
582 ? token("name", word)
583 : HOP(OPERATORS, word)
584 ? token("operator", word)
585 : HOP(KEYWORDS_ATOM, word)
586 ? token("atom", word)
587 : token("keyword", word);
590 function with_eof_error(eof_error, cont) {
594 if (ex === EX_EOF) parse_error(eof_error);
599 function next_token(force_regexp) {
600 if (force_regexp != null)
601 return read_regexp(force_regexp);
605 if (!ch) return token("eof");
606 if (is_digit(ch)) return read_num();
607 if (ch == '"' || ch == "'") return read_string();
608 if (HOP(PUNC_CHARS, ch)) return token("punc", next());
609 if (ch == ".") return handle_dot();
610 if (ch == "/") return handle_slash();
611 if (HOP(OPERATOR_CHARS, ch)) return read_operator();
612 if (ch == "\\" || is_identifier_start(ch)) return read_word();
613 parse_error("Unexpected character '" + ch + "'");
616 next_token.context = function(nc) {
625 /* -----[ Parser (constants) ]----- */
627 var UNARY_PREFIX = array_to_hash([
639 var UNARY_POSTFIX = array_to_hash([ "--", "++" ]);
641 var ASSIGNMENT = (function(a, ret, i){
642 while (i < a.length) {
643 ret[a[i]] = a[i].substr(0, a[i].length - 1);
648 ["+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&="],
653 var PRECEDENCE = (function(a, ret){
654 for (var i = 0, n = 1; i < a.length; ++i, ++n) {
656 for (var j = 0; j < b.length; ++j) {
668 ["==", "===", "!=", "!=="],
669 ["<", ">", "<=", ">=", "in", "instanceof"],
677 var STATEMENTS_WITH_LABELS = array_to_hash([ "for", "do", "while", "switch" ]);
679 var ATOMIC_START_TOKEN = array_to_hash([ "atom", "num", "string", "regexp", "name" ]);
681 /* -----[ Parser ]----- */
683 function NodeWithToken(str, start, end) {
689 NodeWithToken.prototype.toString = function() { return this.name; };
691 function parse($TEXT, exigent_mode, embed_tokens) {
694 input : typeof $TEXT == "string" ? tokenizer($TEXT, true) : $TEXT,
699 in_directives : true,
706 function is(type, value) {
707 return is_token(S.token, type, value);
710 function peek() { return S.peeked || (S.peeked = S.input()); };
720 S.in_directives = S.in_directives && (
721 S.token.type == "string" || is("punc", ";")
730 function croak(msg, line, col, pos) {
731 var ctx = S.input.context();
733 line != null ? line : ctx.tokline,
734 col != null ? col : ctx.tokcol,
735 pos != null ? pos : ctx.tokpos);
738 function token_error(token, msg) {
739 croak(msg, token.line, token.col);
742 function unexpected(token) {
745 token_error(token, "Unexpected token: " + token.type + " (" + token.value + ")");
748 function expect_token(type, val) {
752 token_error(S.token, "Unexpected token " + S.token.type + ", expected " + type);
755 function expect(punc) { return expect_token("punc", punc); };
757 function can_insert_semicolon() {
758 return !exigent_mode && (
759 S.token.nlb || is("eof") || is("punc", "}")
763 function semicolon() {
764 if (is("punc", ";")) next();
765 else if (!can_insert_semicolon()) unexpected();
769 return slice(arguments);
772 function parenthesised() {
774 var ex = expression();
779 function add_tokens(str, start, end) {
780 return str instanceof NodeWithToken ? str : new NodeWithToken(str, start, end);
783 function maybe_embed_tokens(parser) {
784 if (embed_tokens) return function() {
786 var ast = parser.apply(this, arguments);
787 ast[0] = add_tokens(ast[0], start, prev());
793 var statement = maybe_embed_tokens(function() {
794 if (is("operator", "/") || is("operator", "/=")) {
796 S.token = S.input(S.token.value.substr(1)); // force regexp
798 switch (S.token.type) {
800 var dir = S.in_directives, stat = simple_statement();
801 if (dir && stat[1][0] == "string" && !is("punc", ","))
802 return as("directive", stat[1][1]);
808 return simple_statement();
811 return is_token(peek(), "punc", ":")
812 ? labeled_statement(prog1(S.token.value, next, next))
813 : simple_statement();
816 switch (S.token.value) {
818 return as("block", block_());
821 return simple_statement();
830 switch (prog1(S.token.value, next)) {
832 return break_cont("break");
835 return break_cont("continue");
839 return as("debugger");
842 return (function(body){
843 expect_token("keyword", "while");
844 return as("do", prog1(parenthesised, semicolon), body);
845 })(in_loop(statement));
851 return function_(true);
857 if (S.in_function == 0)
858 croak("'return' outside of function");
862 : can_insert_semicolon()
864 : prog1(expression, semicolon));
867 return as("switch", parenthesised(), switch_block_());
871 croak("Illegal newline after 'throw'");
872 return as("throw", prog1(expression, semicolon));
878 return prog1(var_, semicolon);
881 return prog1(const_, semicolon);
884 return as("while", parenthesised(), in_loop(statement));
887 return as("with", parenthesised(), statement());
895 function labeled_statement(label) {
896 S.labels.push(label);
897 var start = S.token, stat = statement();
898 if (exigent_mode && !HOP(STATEMENTS_WITH_LABELS, stat[0]))
901 return as("label", label, stat);
904 function simple_statement() {
905 return as("stat", prog1(expression, semicolon));
908 function break_cont(type) {
910 if (!can_insert_semicolon()) {
911 name = is("name") ? S.token.value : null;
915 if (!member(name, S.labels))
916 croak("Label " + name + " without matching loop or statement");
918 else if (S.in_loop == 0)
919 croak(type + " not inside a loop or switch");
921 return as(type, name);
927 if (!is("punc", ";")) {
928 init = is("keyword", "var")
929 ? (next(), var_(true))
930 : expression(true, true);
931 if (is("operator", "in")) {
932 if (init[0] == "var" && init[1].length > 1)
933 croak("Only one variable declaration allowed in for..in loop");
937 return regular_for(init);
940 function regular_for(init) {
942 var test = is("punc", ";") ? null : expression();
944 var step = is("punc", ")") ? null : expression();
946 return as("for", init, test, step, in_loop(statement));
949 function for_in(init) {
950 var lhs = init[0] == "var" ? as("name", init[1][0]) : init;
952 var obj = expression();
954 return as("for-in", init, lhs, obj, in_loop(statement));
957 var function_ = function(in_statement) {
958 var name = is("name") ? prog1(S.token.value, next) : null;
959 if (in_statement && !name)
962 return as(in_statement ? "defun" : "function",
966 while (!is("punc", ")")) {
967 if (first) first = false; else expect(",");
968 if (!is("name")) unexpected();
969 a.push(S.token.value);
978 var loop = S.in_loop;
979 S.in_directives = true;
989 var cond = parenthesised(), body = statement(), belse;
990 if (is("keyword", "else")) {
994 return as("if", cond, body, belse);
1000 while (!is("punc", "}")) {
1001 if (is("eof")) unexpected();
1002 a.push(statement());
1008 var switch_block_ = curry(in_loop, function(){
1010 var a = [], cur = null;
1011 while (!is("punc", "}")) {
1012 if (is("eof")) unexpected();
1013 if (is("keyword", "case")) {
1016 a.push([ expression(), cur ]);
1019 else if (is("keyword", "default")) {
1023 a.push([ null, cur ]);
1026 if (!cur) unexpected();
1027 cur.push(statement());
1035 var body = block_(), bcatch, bfinally;
1036 if (is("keyword", "catch")) {
1040 croak("Name expected");
1041 var name = S.token.value;
1044 bcatch = [ name, block_() ];
1046 if (is("keyword", "finally")) {
1048 bfinally = block_();
1050 if (!bcatch && !bfinally)
1051 croak("Missing catch/finally blocks");
1052 return as("try", body, bcatch, bfinally);
1055 function vardefs(no_in) {
1060 var name = S.token.value;
1062 if (is("operator", "=")) {
1064 a.push([ name, expression(false, no_in) ]);
1068 if (!is("punc", ","))
1075 function var_(no_in) {
1076 return as("var", vardefs(no_in));
1080 return as("const", vardefs());
1084 var newexp = expr_atom(false), args;
1085 if (is("punc", "(")) {
1087 args = expr_list(")");
1091 return subscripts(as("new", newexp, args), true);
1094 var expr_atom = maybe_embed_tokens(function(allow_calls) {
1095 if (is("operator", "new")) {
1100 switch (S.token.value) {
1103 return subscripts(prog1(expression, curry(expect, ")")), allow_calls);
1106 return subscripts(array_(), allow_calls);
1109 return subscripts(object_(), allow_calls);
1113 if (is("keyword", "function")) {
1115 return subscripts(function_(false), allow_calls);
1117 if (HOP(ATOMIC_START_TOKEN, S.token.type)) {
1118 var atom = S.token.type == "regexp"
1119 ? as("regexp", S.token.value[0], S.token.value[1])
1120 : as(S.token.type, S.token.value);
1121 return subscripts(prog1(atom, next), allow_calls);
1126 function expr_list(closing, allow_trailing_comma, allow_empty) {
1127 var first = true, a = [];
1128 while (!is("punc", closing)) {
1129 if (first) first = false; else expect(",");
1130 if (allow_trailing_comma && is("punc", closing)) break;
1131 if (is("punc", ",") && allow_empty) {
1132 a.push([ "atom", "undefined" ]);
1134 a.push(expression(false));
1142 return as("array", expr_list("]", !exigent_mode, true));
1145 function object_() {
1146 var first = true, a = [];
1147 while (!is("punc", "}")) {
1148 if (first) first = false; else expect(",");
1149 if (!exigent_mode && is("punc", "}"))
1150 // allow trailing comma
1152 var type = S.token.type;
1153 var name = as_property_name();
1154 if (type == "name" && (name == "get" || name == "set") && !is("punc", ":")) {
1155 a.push([ as_name(), function_(false), name ]);
1158 a.push([ name, expression(false) ]);
1160 // FIXME [!!] Line not in original parse-js,
1161 // added to be able to warn about unquoted
1162 // keyword properties
1163 a[a.length - 1].type = type;
1166 return as("object", a);
1169 function as_property_name() {
1170 switch (S.token.type) {
1173 return prog1(S.token.value, next);
1178 function as_name() {
1179 switch (S.token.type) {
1184 return prog1(S.token.value, next);
1190 function subscripts(expr, allow_calls) {
1191 if (is("punc", ".")) {
1193 return subscripts(as("dot", expr, as_name()), allow_calls);
1195 if (is("punc", "[")) {
1197 return subscripts(as("sub", expr, prog1(expression, curry(expect, "]"))), allow_calls);
1199 if (allow_calls && is("punc", "(")) {
1201 return subscripts(as("call", expr, expr_list(")")), true);
1206 function maybe_unary(allow_calls) {
1207 if (is("operator") && HOP(UNARY_PREFIX, S.token.value)) {
1208 return make_unary("unary-prefix",
1209 prog1(S.token.value, next),
1210 maybe_unary(allow_calls));
1212 var val = expr_atom(allow_calls);
1213 while (is("operator") && HOP(UNARY_POSTFIX, S.token.value) && !S.token.nlb) {
1214 val = make_unary("unary-postfix", S.token.value, val);
1220 function make_unary(tag, op, expr) {
1221 if ((op == "++" || op == "--") && !is_assignable(expr))
1222 croak("Invalid use of " + op + " operator");
1223 return as(tag, op, expr);
1226 function expr_op(left, min_prec, no_in) {
1227 var op = is("operator") ? S.token.value : null;
1228 if (op && op == "in" && no_in) op = null;
1229 var prec = op != null ? PRECEDENCE[op] : null;
1230 if (prec != null && prec > min_prec) {
1232 var right = expr_op(maybe_unary(true), prec, no_in);
1233 return expr_op(as("binary", op, left, right), min_prec, no_in);
1238 function expr_ops(no_in) {
1239 return expr_op(maybe_unary(true), 0, no_in);
1242 function maybe_conditional(no_in) {
1243 var expr = expr_ops(no_in);
1244 if (is("operator", "?")) {
1246 var yes = expression(false);
1248 return as("conditional", expr, yes, expression(false, no_in));
1253 function is_assignable(expr) {
1254 if (!exigent_mode) return true;
1255 switch (expr[0]+"") {
1262 return expr[1] != "this";
1266 function maybe_assign(no_in) {
1267 var left = maybe_conditional(no_in), val = S.token.value;
1268 if (is("operator") && HOP(ASSIGNMENT, val)) {
1269 if (is_assignable(left)) {
1271 return as("assign", ASSIGNMENT[val], left, maybe_assign(no_in));
1273 croak("Invalid assignment");
1278 var expression = maybe_embed_tokens(function(commas, no_in) {
1279 if (arguments.length == 0)
1281 var expr = maybe_assign(no_in);
1282 if (commas && is("punc", ",")) {
1284 return as("seq", expr, expression(true, no_in));
1289 function in_loop(cont) {
1298 return as("toplevel", (function(a){
1300 a.push(statement());
1306 /* -----[ Utilities ]----- */
1309 var args = slice(arguments, 1);
1310 return function() { return f.apply(this, args.concat(slice(arguments))); };
1313 function prog1(ret) {
1314 if (ret instanceof Function)
1316 for (var i = 1, n = arguments.length; --n > 0; ++i)
1321 function array_to_hash(a) {
1323 for (var i = 0; i < a.length; ++i)
1328 function slice(a, start) {
1329 return Array.prototype.slice.call(a, start || 0);
1332 function characters(str) {
1333 return str.split("");
1336 function member(name, array) {
1337 for (var i = array.length; --i >= 0;)
1338 if (array[i] == name)
1343 function HOP(obj, prop) {
1344 return Object.prototype.hasOwnProperty.call(obj, prop);
1347 var warn = function() {};
1349 /* -----[ Exports ]----- */
1351 exports.tokenizer = tokenizer;
1352 exports.parse = parse;
1353 exports.slice = slice;
1354 exports.curry = curry;
1355 exports.member = member;
1356 exports.array_to_hash = array_to_hash;
1357 exports.PRECEDENCE = PRECEDENCE;
1358 exports.KEYWORDS_ATOM = KEYWORDS_ATOM;
1359 exports.RESERVED_WORDS = RESERVED_WORDS;
1360 exports.KEYWORDS = KEYWORDS;
1361 exports.ATOMIC_START_TOKEN = ATOMIC_START_TOKEN;
1362 exports.OPERATORS = OPERATORS;
1363 exports.is_alphanumeric_char = is_alphanumeric_char;
1364 exports.is_identifier_start = is_identifier_start;
1365 exports.is_identifier_char = is_identifier_char;
1366 exports.set_logger = function(logger) {
1371 // js-indent-level: 8