#!/usr/bin/env python # -*- coding: utf-8 -*- # # Implements a clause # - a "tree" (more precisely a predecessor map, typically computed thanks to a DFS) # - a set of needed fields (those queried by the user) # # Copyright (C) UPMC Paris Universitas # Authors: # Jordan Augé # Marc-Olivier Buob import pyparsing as pp import operator, re from manifold.util.predicate import Predicate from types import StringTypes # XXX When to use Keyword vs. Regex vs. CaselessLiteral # XXX capitalization ? # Instead of CaselessLiteral, try using CaselessKeyword. Keywords are better # choice for grammar keywords, since they inherently avoid mistaking the leading # 'in' of 'inside' as the keyword 'in' in your grammar. class Clause(object): def __new__(cls, *args, **kwargs): if len(args) == 1 and isinstance(args[0], StringTypes): return ClauseStringParser().parse(args[0]) return super(Clause, cls).__new__(cls, *args, **kwargs) def __init__(self, *args, **kwargs): if len(args) == 2: # unary self.operator = Predicate.operators[args[0]] self.operands = [args[1]] elif len(args) == 3: self.operator = Predicate.operators[args[1]] self.operands = [args[0], args[2]] else: raise Exception, "Clause can only be unary or binary" def opstr(self, operator): ops = [string for string, op in Predicate.operators.items() if op == operator] return ops[0] if ops else '' def __repr__(self): if len(self.operands) == 1: return "%s(%s)" % (self.operator, self.operands[0]) else: return "(%s %s %s)" % (self.operands[0], self.opstr(self.operator), self.operands[1]) class ClauseStringParser(object): def __init__(self): """ BNF HERE """ #integer = pp.Word(nums) #floatNumber = pp.Regex(r'\d+(\.\d*)?([eE]\d+)?') point = pp.Literal( "." ) e = pp.CaselessLiteral( "E" ) # Regex string representing the set of possible operators # Example : ">=|<=|!=|>|<|=" OPERATOR_RX = '|'.join([re.sub('\|', '\|', o) for o in Predicate.operators.keys()]) # predicate field = pp.Word(pp.alphanums + '_') operator = pp.Regex(OPERATOR_RX).setName("operator") value = pp.QuotedString('"') #| pp.Combine( pp.Word( "+-"+ pp.nums, pp.nums) + pp.Optional( point + pp.Optional( pp.Word( pp.nums ) ) ) + pp.Optional( e + pp.Word( "+-"+pp.nums, pp.nums ) ) ) predicate = (field + operator + value).setParseAction(self.handlePredicate) # clause of predicates and_op = pp.CaselessLiteral("and") | pp.Keyword("&&") or_op = pp.CaselessLiteral("or") | pp.Keyword("||") not_op = pp.Keyword("!") predicate_precedence_list = [ (not_op, 1, pp.opAssoc.RIGHT, lambda x: self.handleClause(*x)), (and_op, 2, pp.opAssoc.LEFT, lambda x: self.handleClause(*x)), (or_op, 2, pp.opAssoc.LEFT, lambda x: self.handleClause(*x)) ] clause = pp.operatorPrecedence(predicate, predicate_precedence_list) self.bnf = clause def handlePredicate(self, args): return Predicate(*args) def handleClause(self, args): return Clause(*args) def parse(self, string): return self.bnf.parseString(string,parseAll=True) if __name__ == "__main__": print ClauseStringParser().parse('country == "Europe" || ts > "01-01-2007" && country == "France"') print Clause('country == "Europe" || ts > "01-01-2007" && country == "France"')