2 * Pig Latin Mode for CodeMirror 2
3 * @author Prasanth Jayachandran
4 * @link https://github.com/prasanthj/pig-codemirror-2
5 * This implementation is adapted from PL/SQL mode in CodeMirror 2.
7 CodeMirror.defineMode("pig", function(_config, parserConfig) {
8 var keywords = parserConfig.keywords,
9 builtins = parserConfig.builtins,
10 types = parserConfig.types,
11 multiLineStrings = parserConfig.multiLineStrings;
13 var isOperatorChar = /[*+\-%<>=&?:\/!|]/;
15 function chain(stream, state, f) {
17 return f(stream, state);
21 function ret(tp, style) {
26 function tokenComment(stream, state) {
29 while(ch = stream.next()) {
30 if(ch == "/" && isEnd) {
31 state.tokenize = tokenBase;
36 return ret("comment", "comment");
39 function tokenString(quote) {
40 return function(stream, state) {
41 var escaped = false, next, end = false;
42 while((next = stream.next()) != null) {
43 if (next == quote && !escaped) {
46 escaped = !escaped && next == "\\";
48 if (end || !(escaped || multiLineStrings))
49 state.tokenize = tokenBase;
50 return ret("string", "error");
54 function tokenBase(stream, state) {
55 var ch = stream.next();
57 // is a start of string?
58 if (ch == '"' || ch == "'")
59 return chain(stream, state, tokenString(ch));
60 // is it one of the special chars
61 else if(/[\[\]{}\(\),;\.]/.test(ch))
64 else if(/\d/.test(ch)) {
65 stream.eatWhile(/[\w\.]/);
66 return ret("number", "number");
68 // multi line comment or operator
70 if (stream.eat("*")) {
71 return chain(stream, state, tokenComment);
74 stream.eatWhile(isOperatorChar);
75 return ret("operator", "operator");
78 // single line comment or operator
82 return ret("comment", "comment");
85 stream.eatWhile(isOperatorChar);
86 return ret("operator", "operator");
90 else if (isOperatorChar.test(ch)) {
91 stream.eatWhile(isOperatorChar);
92 return ret("operator", "operator");
96 stream.eatWhile(/[\w\$_]/);
97 // is it one of the listed keywords?
98 if (keywords && keywords.propertyIsEnumerable(stream.current().toUpperCase())) {
99 if (stream.eat(")") || stream.eat(".")) {
100 //keywords can be used as variables like flatten(group), group.$0 etc..
103 return ("keyword", "keyword");
106 // is it one of the builtin functions?
107 if (builtins && builtins.propertyIsEnumerable(stream.current().toUpperCase()))
109 return ("keyword", "variable-2");
111 // is it one of the listed types?
112 if (types && types.propertyIsEnumerable(stream.current().toUpperCase()))
113 return ("keyword", "variable-3");
114 // default is a 'variable'
115 return ret("variable", "pig-word");
121 startState: function() {
128 token: function(stream, state) {
129 if(stream.eatSpace()) return null;
130 var style = state.tokenize(stream, state);
137 function keywords(str) {
138 var obj = {}, words = str.split(" ");
139 for (var i = 0; i < words.length; ++i) obj[words[i]] = true;
143 // builtin funcs taken from trunk revision 1303237
144 var pBuiltins = "ABS ACOS ARITY ASIN ATAN AVG BAGSIZE BINSTORAGE BLOOM BUILDBLOOM CBRT CEIL "
145 + "CONCAT COR COS COSH COUNT COUNT_STAR COV CONSTANTSIZE CUBEDIMENSIONS DIFF DISTINCT DOUBLEABS "
146 + "DOUBLEAVG DOUBLEBASE DOUBLEMAX DOUBLEMIN DOUBLEROUND DOUBLESUM EXP FLOOR FLOATABS FLOATAVG "
147 + "FLOATMAX FLOATMIN FLOATROUND FLOATSUM GENERICINVOKER INDEXOF INTABS INTAVG INTMAX INTMIN "
148 + "INTSUM INVOKEFORDOUBLE INVOKEFORFLOAT INVOKEFORINT INVOKEFORLONG INVOKEFORSTRING INVOKER "
149 + "ISEMPTY JSONLOADER JSONMETADATA JSONSTORAGE LAST_INDEX_OF LCFIRST LOG LOG10 LOWER LONGABS "
150 + "LONGAVG LONGMAX LONGMIN LONGSUM MAX MIN MAPSIZE MONITOREDUDF NONDETERMINISTIC OUTPUTSCHEMA "
151 + "PIGSTORAGE PIGSTREAMING RANDOM REGEX_EXTRACT REGEX_EXTRACT_ALL REPLACE ROUND SIN SINH SIZE "
152 + "SQRT STRSPLIT SUBSTRING SUM STRINGCONCAT STRINGMAX STRINGMIN STRINGSIZE TAN TANH TOBAG "
153 + "TOKENIZE TOMAP TOP TOTUPLE TRIM TEXTLOADER TUPLESIZE UCFIRST UPPER UTF8STORAGECONVERTER ";
155 // taken from QueryLexer.g
156 var pKeywords = "VOID IMPORT RETURNS DEFINE LOAD FILTER FOREACH ORDER CUBE DISTINCT COGROUP "
157 + "JOIN CROSS UNION SPLIT INTO IF OTHERWISE ALL AS BY USING INNER OUTER ONSCHEMA PARALLEL "
158 + "PARTITION GROUP AND OR NOT GENERATE FLATTEN ASC DESC IS STREAM THROUGH STORE MAPREDUCE "
159 + "SHIP CACHE INPUT OUTPUT STDERROR STDIN STDOUT LIMIT SAMPLE LEFT RIGHT FULL EQ GT LT GTE LTE "
160 + "NEQ MATCHES TRUE FALSE ";
163 var pTypes = "BOOLEAN INT LONG FLOAT DOUBLE CHARARRAY BYTEARRAY BAG TUPLE MAP ";
165 CodeMirror.defineMIME("text/x-pig", {
167 builtins: keywords(pBuiltins),
168 keywords: keywords(pKeywords),
169 types: keywords(pTypes)