git://git.onelab.eu
/
plstackapi.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
696e266
)
only do maxDeltaTime if there are rows, make count only count distinct items, some...
author
Scott Baker
<smbaker@gmail.com>
Fri, 25 Apr 2014 00:04:55 +0000
(17:04 -0700)
committer
Scott Baker
<smbaker@gmail.com>
Fri, 25 Apr 2014 00:04:55 +0000
(17:04 -0700)
planetstack/hpc_wizard/bigquery_analytics.py
patch
|
blob
|
history
diff --git
a/planetstack/hpc_wizard/bigquery_analytics.py
b/planetstack/hpc_wizard/bigquery_analytics.py
index
994e837
..
dafb55c
100644
(file)
--- a/
planetstack/hpc_wizard/bigquery_analytics.py
+++ b/
planetstack/hpc_wizard/bigquery_analytics.py
@@
-71,6
+71,11
@@
class BigQueryAnalytics:
raise Exception('Error accessing register allocations: %d'%resp.status_code)
def run_query_raw(self, query):
raise Exception('Error accessing register allocations: %d'%resp.status_code)
def run_query_raw(self, query):
+ try:
+ file("/tmp/query_log","a").write("query %s\n" % query)
+ except:
+ pass
+
p = re.compile('%[a-zA-z_]*')
try:
p = re.compile('%[a-zA-z_]*')
try:
@@
-79,6
+84,11
@@
class BigQueryAnalytics:
self.reload_mapping()
query = p.sub(self.remap, query)
self.reload_mapping()
query = p.sub(self.remap, query)
+ try:
+ file("/tmp/query_log","a").write("remapped query %s\n" % query)
+ except:
+ pass
+
storage = Storage('/opt/planetstack/hpc_wizard/bigquery_credentials.dat')
credentials = storage.get()
storage = Storage('/opt/planetstack/hpc_wizard/bigquery_credentials.dat')
credentials = storage.get()
@@
-155,13
+165,21
@@
class BigQueryAnalytics:
new_row["max_" + k] = max(new_row.get("max_" + k, 0), to_number(row.get(k,0)))
for k in count:
new_row["max_" + k] = max(new_row.get("max_" + k, 0), to_number(row.get(k,0)))
for k in count:
- new_row["count_" + k] = new_row.get("count_" + k, 0) + 1
+ v = row.get(k,None)
+ dl = new_row["distinct_" + k] = new_row.get("distinct_" + k, [])
+ if (v not in dl):
+ dl.append(v)
+
+ #new_row["count_" + k] = new_row.get("count_" + k, 0) + 1
for row in new_rows.values():
for k in avg:
row["avg_" + k] = float(row["avg_" + k]) / row["avg_base_" + k]
del row["avg_base_" + k]
for row in new_rows.values():
for k in avg:
row["avg_" + k] = float(row["avg_" + k]) / row["avg_base_" + k]
del row["avg_base_" + k]
+ for k in count:
+ new_row["count_" + k] = len(new_row.get("distinct_" + k, []))
+
return new_rows.values()
def do_computed_fields(self, rows, computed=[]):
return new_rows.values()
def do_computed_fields(self, rows, computed=[]):
@@
-190,9
+208,10
@@
class BigQueryAnalytics:
for (k,v) in filter.items():
rows = self.filter_results(rows, k, v)
for (k,v) in filter.items():
rows = self.filter_results(rows, k, v)
- if maxDeltaTime is not None:
- maxTime = max([float(row["time"]) for row in rows])
- rows = [row for row in rows if float(row["time"])>=maxTime-maxDeltaTime]
+ if rows:
+ if maxDeltaTime is not None:
+ maxTime = max([float(row["time"]) for row in rows])
+ rows = [row for row in rows if float(row["time"])>=maxTime-maxDeltaTime]
(computedFieldNames, rows) = self.do_computed_fields(rows, computed)
sum = sum + computedFieldNames
(computedFieldNames, rows) = self.do_computed_fields(rows, computed)
sum = sum + computedFieldNames