portal: added wip for PI validation page
[unfold.git] / manifold / util / clause.py
diff --git a/manifold/util/clause.py b/manifold/util/clause.py
new file mode 100644 (file)
index 0000000..670a689
--- /dev/null
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Implements a clause
+# - a "tree" (more precisely a predecessor map, typically computed thanks to a DFS) 
+# - a set of needed fields (those queried by the user)
+#
+# Copyright (C) UPMC Paris Universitas
+# Authors:
+#   Jordan AugĂ©       <jordan.auge@lip6.fr> 
+#   Marc-Olivier Buob <marc-olivier.buob@lip6.fr>
+
+import pyparsing as pp
+import operator, re
+
+from manifold.util.predicate import Predicate
+from types                 import StringTypes
+
+# XXX When to use Keyword vs. Regex vs. CaselessLiteral
+# XXX capitalization ?
+
+# Instead of CaselessLiteral, try using CaselessKeyword. Keywords are better
+# choice for grammar keywords, since they inherently avoid mistaking the leading
+# 'in' of 'inside' as the keyword 'in' in your grammar.
+
+
+class Clause(object):
+
+    def __new__(cls, *args, **kwargs):
+        if len(args) == 1 and isinstance(args[0], StringTypes):
+            return ClauseStringParser().parse(args[0])
+        return super(Clause, cls).__new__(cls, *args, **kwargs)
+
+    def __init__(self, *args, **kwargs):
+        if len(args) == 2:
+            # unary
+            self.operator = Predicate.operators[args[0]]
+            self.operands = [args[1]]
+        elif len(args) == 3:
+            self.operator = Predicate.operators[args[1]]
+            self.operands = [args[0], args[2]]
+        else:
+            raise Exception, "Clause can only be unary or binary"
+                
+    def opstr(self, operator):
+        ops = [string for string, op in Predicate.operators.items() if op == operator]
+        return ops[0] if ops else ''
+
+    def __repr__(self):
+        if len(self.operands) == 1:
+            return "%s(%s)" % (self.operator, self.operands[0])
+        else:
+            return "(%s %s %s)" % (self.operands[0], self.opstr(self.operator), self.operands[1])
+
+class ClauseStringParser(object):
+
+    def __init__(self):
+        """
+        BNF HERE
+        """
+
+        #integer = pp.Word(nums)
+        #floatNumber = pp.Regex(r'\d+(\.\d*)?([eE]\d+)?')
+        point = pp.Literal( "." )
+        e     = pp.CaselessLiteral( "E" )
+
+        # Regex string representing the set of possible operators
+        # Example : ">=|<=|!=|>|<|="
+        OPERATOR_RX = '|'.join([re.sub('\|', '\|', o) for o in Predicate.operators.keys()])
+
+        # predicate
+        field = pp.Word(pp.alphanums + '_')
+        operator = pp.Regex(OPERATOR_RX).setName("operator")
+        value = pp.QuotedString('"') #| pp.Combine( pp.Word( "+-"+ pp.nums, pp.nums) + pp.Optional( point + pp.Optional( pp.Word( pp.nums ) ) ) + pp.Optional( e + pp.Word( "+-"+pp.nums, pp.nums ) ) )
+
+        predicate = (field + operator + value).setParseAction(self.handlePredicate)
+
+        # clause of predicates
+        and_op = pp.CaselessLiteral("and") | pp.Keyword("&&")
+        or_op  = pp.CaselessLiteral("or")  | pp.Keyword("||")
+        not_op = pp.Keyword("!")
+
+        predicate_precedence_list = [
+            (not_op, 1, pp.opAssoc.RIGHT, lambda x: self.handleClause(*x)),
+            (and_op, 2, pp.opAssoc.LEFT,  lambda x: self.handleClause(*x)),
+            (or_op,  2, pp.opAssoc.LEFT,  lambda x: self.handleClause(*x))
+        ]
+        clause = pp.operatorPrecedence(predicate, predicate_precedence_list)
+
+        self.bnf = clause
+
+    def handlePredicate(self, args):
+        return Predicate(*args)
+
+    def handleClause(self, args):
+        return Clause(*args)
+
+    def parse(self, string):
+        return self.bnf.parseString(string,parseAll=True)
+
+if __name__ == "__main__":
+    print ClauseStringParser().parse('country == "Europe" || ts > "01-01-2007" && country == "France"')
+    print Clause('country == "Europe" || ts > "01-01-2007" && country == "France"')