2 Copyright (C) 2011 by MarkLogic Corporation
3 Author: Mike Brevoort <mike@brevoort.com>
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 CodeMirror.defineMode("xquery", function() {
25 // The keywords object is set to the result of this self executing
26 // function. Each keyword is a property of the keywords object whose
27 // value is {type: atype, style: astyle}
28 var keywords = function(){
29 // conveinence functions used to build keywords object
30 function kw(type) {return {type: type, style: "keyword"};}
31 var A = kw("keyword a")
34 , operator = kw("operator")
35 , atom = {type: "atom", style: "atom"}
36 , punctuation = {type: "punctuation", style: null}
37 , qualifier = {type: "axis_specifier", style: "qualifier"};
39 // kwObj is what is return from this function at the end
41 'if': A, 'switch': A, 'while': A, 'for': A,
42 'else': B, 'then': B, 'try': B, 'finally': B, 'catch': B,
43 'element': C, 'attribute': C, 'let': C, 'implements': C, 'import': C, 'module': C, 'namespace': C,
44 'return': C, 'super': C, 'this': C, 'throws': C, 'where': C, 'private': C,
46 'null': atom, 'fn:false()': atom, 'fn:true()': atom
49 // a list of 'basic' keywords. For each add a property to kwObj with the value of
50 // {type: basic[i], style: "keyword"} e.g. 'after' --> {type: "after", style: "keyword"}
51 var basic = ['after','ancestor','ancestor-or-self','and','as','ascending','assert','attribute','before',
52 'by','case','cast','child','comment','declare','default','define','descendant','descendant-or-self',
53 'descending','document','document-node','element','else','eq','every','except','external','following',
54 'following-sibling','follows','for','function','if','import','in','instance','intersect','item',
55 'let','module','namespace','node','node','of','only','or','order','parent','precedes','preceding',
56 'preceding-sibling','processing-instruction','ref','return','returns','satisfies','schema','schema-element',
57 'self','some','sortby','stable','text','then','to','treat','typeswitch','union','variable','version','where',
58 'xquery', 'empty-sequence'];
59 for(var i=0, l=basic.length; i < l; i++) { kwObj[basic[i]] = kw(basic[i]);};
61 // a list of types. For each add a property to kwObj with the value of
62 // {type: "atom", style: "atom"}
63 var types = ['xs:string', 'xs:float', 'xs:decimal', 'xs:double', 'xs:integer', 'xs:boolean', 'xs:date', 'xs:dateTime',
64 'xs:time', 'xs:duration', 'xs:dayTimeDuration', 'xs:time', 'xs:yearMonthDuration', 'numeric', 'xs:hexBinary',
65 'xs:base64Binary', 'xs:anyURI', 'xs:QName', 'xs:byte','xs:boolean','xs:anyURI','xf:yearMonthDuration'];
66 for(var i=0, l=types.length; i < l; i++) { kwObj[types[i]] = atom;};
68 // each operator will add a property to kwObj with value of {type: "operator", style: "keyword"}
69 var operators = ['eq', 'ne', 'lt', 'le', 'gt', 'ge', ':=', '=', '>', '>=', '<', '<=', '.', '|', '?', 'and', 'or', 'div', 'idiv', 'mod', '*', '/', '+', '-'];
70 for(var i=0, l=operators.length; i < l; i++) { kwObj[operators[i]] = operator;};
72 // each axis_specifiers will add a property to kwObj with value of {type: "axis_specifier", style: "qualifier"}
73 var axis_specifiers = ["self::", "attribute::", "child::", "descendant::", "descendant-or-self::", "parent::",
74 "ancestor::", "ancestor-or-self::", "following::", "preceding::", "following-sibling::", "preceding-sibling::"];
75 for(var i=0, l=axis_specifiers.length; i < l; i++) { kwObj[axis_specifiers[i]] = qualifier; };
80 // Used as scratch variables to communicate multiple values without
81 // consing up tons of objects.
84 function ret(tp, style, cont) {
85 type = tp; content = cont;
89 function chain(stream, state, f) {
91 return f(stream, state);
94 // the primary mode tokenizer
95 function tokenBase(stream, state) {
96 var ch = stream.next(),
97 mightBeFunction = false,
98 isEQName = isEQNameAhead(stream);
100 // an XML tag (if not in some sub, chained tokenizer)
102 if(stream.match("!--", true))
103 return chain(stream, state, tokenXMLComment);
105 if(stream.match("![CDATA", false)) {
106 state.tokenize = tokenCDATA;
107 return ret("tag", "tag");
110 if(stream.match("?", false)) {
111 return chain(stream, state, tokenPreProcessing);
114 var isclose = stream.eat("/");
117 while ((c = stream.eat(/[^\s\u00a0=<>\"\'\/?]/))) tagName += c;
119 return chain(stream, state, tokenTag(tagName, isclose));
123 pushStateStack(state,{ type: "codeblock"});
124 return ret("", null);
128 popStateStack(state);
129 return ret("", null);
131 // if we're in an XML block
132 else if(isInXmlBlock(state)) {
134 return ret("tag", "tag");
135 else if(ch == "/" && stream.eat(">")) {
136 popStateStack(state);
137 return ret("tag", "tag");
140 return ret("word", "variable");
143 else if (/\d/.test(ch)) {
144 stream.match(/^\d*(?:\.\d*)?(?:E[+\-]?\d+)?/);
145 return ret("number", "atom");
148 else if (ch === "(" && stream.eat(":")) {
149 pushStateStack(state, { type: "comment"});
150 return chain(stream, state, tokenComment);
153 else if ( !isEQName && (ch === '"' || ch === "'"))
154 return chain(stream, state, tokenString(ch));
156 else if(ch === "$") {
157 return chain(stream, state, tokenVariable);
160 else if(ch ===":" && stream.eat("=")) {
161 return ret("operator", "keyword");
164 else if(ch === "(") {
165 pushStateStack(state, { type: "paren"});
166 return ret("", null);
169 else if(ch === ")") {
170 popStateStack(state);
171 return ret("", null);
174 else if(ch === "[") {
175 pushStateStack(state, { type: "bracket"});
176 return ret("", null);
179 else if(ch === "]") {
180 popStateStack(state);
181 return ret("", null);
184 var known = keywords.propertyIsEnumerable(ch) && keywords[ch];
186 // if there's a EQName ahead, consume the rest of the string portion, it's likely a function
187 if(isEQName && ch === '\"') while(stream.next() !== '"'){}
188 if(isEQName && ch === '\'') while(stream.next() !== '\''){}
190 // gobble up a word if the character is not known
191 if(!known) stream.eatWhile(/[\w\$_-]/);
193 // gobble a colon in the case that is a lib func type call fn:doc
194 var foundColon = stream.eat(":");
196 // if there's not a second colon, gobble another word. Otherwise, it's probably an axis specifier
197 // which should get matched as a keyword
198 if(!stream.eat(":") && foundColon) {
199 stream.eatWhile(/[\w\$_-]/);
201 // if the next non whitespace character is an open paren, this is probably a function (if not a keyword of other sort)
202 if(stream.match(/^[ \t]*\(/, false)) {
203 mightBeFunction = true;
205 // is the word a keyword?
206 var word = stream.current();
207 known = keywords.propertyIsEnumerable(word) && keywords[word];
209 // if we think it's a function call but not yet known,
210 // set style to variable for now for lack of something better
211 if(mightBeFunction && !known) known = {type: "function_call", style: "variable def"};
213 // if the previous word was element, attribute, axis specifier, this word should be the name of that
214 if(isInXmlConstructor(state)) {
215 popStateStack(state);
216 return ret("word", "variable", word);
218 // as previously checked, if the word is element,attribute, axis specifier, call it an "xmlconstructor" and
219 // push the stack so we know to look for it on the next word
220 if(word == "element" || word == "attribute" || known.type == "axis_specifier") pushStateStack(state, {type: "xmlconstructor"});
222 // if the word is known, return the details of that else just call this a generic 'word'
223 return known ? ret(known.type, known.style, word) :
224 ret("word", "variable", word);
228 // handle comments, including nested
229 function tokenComment(stream, state) {
230 var maybeEnd = false, maybeNested = false, nestedCount = 0, ch;
231 while (ch = stream.next()) {
232 if (ch == ")" && maybeEnd) {
236 popStateStack(state);
240 else if(ch == ":" && maybeNested) {
243 maybeEnd = (ch == ":");
244 maybeNested = (ch == "(");
247 return ret("comment", "comment");
250 // tokenizer for string literals
251 // optionally pass a tokenizer function to set state.tokenize back to when finished
252 function tokenString(quote, f) {
253 return function(stream, state) {
256 if(isInString(state) && stream.current() == quote) {
257 popStateStack(state);
258 if(f) state.tokenize = f;
259 return ret("string", "string");
262 pushStateStack(state, { type: "string", name: quote, tokenize: tokenString(quote, f) });
264 // if we're in a string and in an XML block, allow an embedded code block
265 if(stream.match("{", false) && isInXmlAttributeBlock(state)) {
266 state.tokenize = tokenBase;
267 return ret("string", "string");
271 while (ch = stream.next()) {
273 popStateStack(state);
274 if(f) state.tokenize = f;
278 // if we're in a string and in an XML block, allow an embedded code block in an attribute
279 if(stream.match("{", false) && isInXmlAttributeBlock(state)) {
280 state.tokenize = tokenBase;
281 return ret("string", "string");
287 return ret("string", "string");
291 // tokenizer for variables
292 function tokenVariable(stream, state) {
293 var isVariableChar = /[\w\$_-]/;
295 // a variable may start with a quoted EQName so if the next character is quote, consume to the next quote
296 if(stream.eat("\"")) {
297 while(stream.next() !== '\"'){};
300 stream.eatWhile(isVariableChar);
301 if(!stream.match(":=", false)) stream.eat(":");
303 stream.eatWhile(isVariableChar);
304 state.tokenize = tokenBase;
305 return ret("variable", "variable");
308 // tokenizer for XML tags
309 function tokenTag(name, isclose) {
310 return function(stream, state) {
312 if(isclose && stream.eat(">")) {
313 popStateStack(state);
314 state.tokenize = tokenBase;
315 return ret("tag", "tag");
317 // self closing tag without attributes?
319 pushStateStack(state, { type: "tag", name: name, tokenize: tokenBase});
320 if(!stream.eat(">")) {
321 state.tokenize = tokenAttribute;
322 return ret("tag", "tag");
325 state.tokenize = tokenBase;
327 return ret("tag", "tag");
331 // tokenizer for XML attributes
332 function tokenAttribute(stream, state) {
333 var ch = stream.next();
335 if(ch == "/" && stream.eat(">")) {
336 if(isInXmlAttributeBlock(state)) popStateStack(state);
337 if(isInXmlBlock(state)) popStateStack(state);
338 return ret("tag", "tag");
341 if(isInXmlAttributeBlock(state)) popStateStack(state);
342 return ret("tag", "tag");
345 return ret("", null);
347 if (ch == '"' || ch == "'")
348 return chain(stream, state, tokenString(ch, tokenAttribute));
350 if(!isInXmlAttributeBlock(state))
351 pushStateStack(state, { type: "attribute", name: name, tokenize: tokenAttribute});
353 stream.eat(/[a-zA-Z_:]/);
354 stream.eatWhile(/[-a-zA-Z0-9_:.]/);
357 // the case where the attribute has not value and the tag was closed
358 if(stream.match(">", false) || stream.match("/", false)) {
359 popStateStack(state);
360 state.tokenize = tokenBase;
363 return ret("attribute", "attribute");
366 // handle comments, including nested
367 function tokenXMLComment(stream, state) {
369 while (ch = stream.next()) {
370 if (ch == "-" && stream.match("->", true)) {
371 state.tokenize = tokenBase;
372 return ret("comment", "comment");
379 function tokenCDATA(stream, state) {
381 while (ch = stream.next()) {
382 if (ch == "]" && stream.match("]", true)) {
383 state.tokenize = tokenBase;
384 return ret("comment", "comment");
389 // handle preprocessing instructions
390 function tokenPreProcessing(stream, state) {
392 while (ch = stream.next()) {
393 if (ch == "?" && stream.match(">", true)) {
394 state.tokenize = tokenBase;
395 return ret("comment", "comment meta");
401 // functions to test the current context of the state
402 function isInXmlBlock(state) { return isIn(state, "tag"); }
403 function isInXmlAttributeBlock(state) { return isIn(state, "attribute"); }
404 function isInXmlConstructor(state) { return isIn(state, "xmlconstructor"); }
405 function isInString(state) { return isIn(state, "string"); }
407 function isEQNameAhead(stream) {
408 // assume we've already eaten a quote (")
409 if(stream.current() === '"')
410 return stream.match(/^[^\"]+\"\:/, false);
411 else if(stream.current() === '\'')
412 return stream.match(/^[^\"]+\'\:/, false);
417 function isIn(state, type) {
418 return (state.stack.length && state.stack[state.stack.length - 1].type == type);
421 function pushStateStack(state, newState) {
422 state.stack.push(newState);
425 function popStateStack(state) {
427 var reinstateTokenize = state.stack.length && state.stack[state.stack.length-1].tokenize;
428 state.tokenize = reinstateTokenize || tokenBase;
431 // the interface for the mode API
433 startState: function() {
441 token: function(stream, state) {
442 if (stream.eatSpace()) return null;
443 var style = state.tokenize(stream, state);
450 CodeMirror.defineMIME("application/xquery", "xquery");