check in hpc_wizard and analytics python source
[plstackapi.git] / planetstack / hpc_wizard / planetstack_analytics.py
1 from bigquery_analytics import BigQueryAnalytics
2 import json
3
4 class PlanetStackAnalytics(BigQueryAnalytics):
5     def __init__(self, tableName="demoevents"):
6         BigQueryAnalytics.__init__(self, tableName)
7
8     def compose_query(self, slice=None, site=None, node=None, timeField="MinuteTime", avg=[], sum=[], count=[], computed=[], groupBy=["MinuteTime"], orderBy=["MinuteTime"], tableName="demoevents"):
9         tablePart = "%s.%s@-3600000--1" % ("vicci", tableName)
10
11         fields = []
12         fieldNames = []
13
14         if (timeField=="MinuteTime"):
15             fields.append("INTEGER(TIMESTAMP_TO_SEC(time)/60)*60 as MinuteTime")
16         elif (timeField=="HourTime"):
17             fields.append("INTEGER(TIMESTAMP_TO_SEC(time)/60/60)*60*60 as HourTime")
18         elif (timeField=="DayTime"):
19             fields.append("INTEGER(TIMESTAMP_TO_SEC(time)/60/60/24)*60*60*24 as DayTime")
20
21         for fieldName in avg:
22             fields.append("AVG(%s) as avg_%s" % (fieldName, fieldName.replace("%","")))
23             fieldNames.append("avg_%s" % fieldName.replace("%",""))
24
25         for fieldName in sum:
26             fields.append("SUM(%s) as sum_%s" % (fieldName, fieldName.replace("%","")))
27             fieldNames.append("sum_%s" % fieldName.replace("%",""))
28
29         for fieldName in count:
30             fields.append("COUNT(distinct %s) as count_%s" % (fieldName, fieldName.replace("%","")))
31             fieldNames.append("count_%s" % fieldName.replace("%",""))
32
33         for fieldName in computed:
34             operator = "/"
35             parts = fieldName.split("/")
36             computedFieldName = "computed_" + parts[0].replace("%","")+"_div_"+parts[1].replace("%","")
37             if len(parts)==1:
38                 operator = "*"
39                 parts = computed.split("*")
40                 computedFieldName = "computed_" + parts[0].replace("%","")+"_mult_"+parts[1].replace("%","")
41             fields.append("SUM(%s)%sSUM(%s) as %s" % (parts[0], operator, parts[1], computedFieldName))
42             fieldNames.append(computedFieldName)
43
44         fields = ", ".join(fields)
45
46         where = []
47
48         if slice:
49             where.append("%%slice='%s'" % slice)
50         if site:
51             where.append("%%site='%s'" % site)
52         if node:
53             where.append("%%hostname='%s'" % node)
54
55         if where:
56             where = " WHERE " + " AND ".join(where)
57         else:
58             where =""
59
60         if groupBy:
61             groupBy = " GROUP BY " + ",".join(groupBy)
62         else:
63             groupBy = ""
64
65         if orderBy:
66             orderBy = " ORDER BY " + ",".join(orderBy)
67         else:
68             orderBy = ""
69
70         if computed:
71             subQuery = "SELECT %%hostname, %s FROM [%s]" % (fields, tablePart)
72             if where:
73                 subQuery = subQuery + where
74             subQuery = subQuery + " GROUP BY %s,%%hostname" % timeField
75
76             sumFields = []
77             for fieldName in fieldNames:
78                 if fieldName.startswith("avg"):
79                     sumFields.append("AVG(%s) as avg_%s"%(fieldName,fieldName))
80                 else:
81                     sumFields.append("SUM(%s) as sum_%s"%(fieldName,fieldName))
82
83             sumFields = ",".join(sumFields)
84
85             query = "SELECT %s, %s FROM (%s)" % (timeField, sumFields, subQuery)
86             if groupBy:
87                 query = query + groupBy
88             if orderBy:
89                 query = query + orderBy
90         else:
91             query = "SELECT %s FROM [%s]" % (fields, tablePart)
92             if where:
93                 query = query + " " + where
94             if groupBy:
95                 query = query + groupBy
96             if orderBy:
97                 query = query + orderBy
98
99         return query
100
101     def get_list_from_req(self, req, name, default=[]):
102         value = req.GET.get(name, None)
103         if not value:
104             return default
105         return value.split(",")
106
107     def format_result(self, format, result, query):
108         if (format == "json_dicts"):
109             result = {"query": query, "rows": result}
110             return ("application/javascript", json.dumps(result))
111
112         elif (format == "json_arrays"):
113             new_result = []
114             for row in result:
115                 new_row = []
116                 for key in sorted(row.keys()):
117                     new_row.append(row[key])
118                 new_result.append(new_row)
119                 new_result = {"query": query, "rows": new_result}
120             return ("application/javascript", json.dumps(new_result))
121
122         elif (format == "html_table"):
123             new_rows = []
124             for row in result:
125                 new_row = []
126                 for key in sorted(row.keys()):
127                     new_row.append("<TD>%s</TD>" % str(row[key]))
128                 new_rows.append("<TR>%s</TR>" % "".join(new_row))
129
130             new_result = "<TABLE>%s</TABLE>" % "\n".join(new_rows)
131
132             return ("text/html", new_result)
133
134     def process_request(self, req):
135         print req.GET
136
137         tqx = req.GET.get("reqId", None)
138
139         slice = req.GET.get("slice", None)
140         site = req.GET.get("site", None)
141         node = req.GET.get("node", None)
142
143         format = req.GET.get("format", "json_dicts")
144
145         timeField = req.GET.get("timeField", "MinuteTime")
146         avg = self.get_list_from_req(req, "avg")
147         sum = self.get_list_from_req(req, "sum")
148         count = self.get_list_from_req(req, "count")
149         computed = self.get_list_from_req(req, "computed")
150         groupBy = self.get_list_from_req(req, "groupBy", ["MinuteTime"])
151         orderBy = self.get_list_from_req(req, "orderBy", ["MinuteTime"])
152
153         maxRows = req.GET.get("maxRows", None)
154
155         q = self.compose_query(slice, site, node, timeField, avg, sum, count, computed, groupBy, orderBy)
156
157         print q
158
159         if (format=="raw"):
160             result = self.run_query_raw(q)
161             result["reqId"] = 0        # XXX FIXME
162             return ("application/javascript", json.dumps(result))
163         else:
164             result = self.run_query(q)
165
166             if maxRows:
167                 result = result[-int(maxRows):]
168
169             return self.format_result(format, result, q)
170
171
172 def DoPlanetStackAnalytics(request):
173     bq = PlanetStackAnalytics()
174     result = bq.process_request(request)
175
176     return result
177
178 def main():
179     bq = PlanetStackAnalytics()
180
181     q=bq.compose_query(avg=["%cpu"], count=["%hostname"], slice="HyperCache")
182     print q
183     bq.dump_table(bq.run_query(q))
184
185     q=bq.compose_query(computed=["%bytes_sent/%elapsed"])
186     print
187     print q
188     bq.dump_table(bq.run_query(q))
189     #print bq.run_query_raw(q)
190
191     q=bq.compose_query(timeField="HourTime", avg=["%cpu"], count=["%hostname"], computed=["%bytes_sent/%elapsed"], groupBy=["HourTime"], orderBy=["HourTime"])
192     print
193     print q
194     bq.dump_table(bq.run_query(q))
195
196 if __name__ == "__main__":
197     main()
198
199
200
201
202