From 58c83964f454e8e1a0404cccd1011467c29f13ce Mon Sep 17 00:00:00 2001 From: Scott Baker Date: Thu, 24 Apr 2014 17:04:55 -0700 Subject: [PATCH] only do maxDeltaTime if there are rows, make count only count distinct items, some debugging --- planetstack/hpc_wizard/bigquery_analytics.py | 27 +++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/planetstack/hpc_wizard/bigquery_analytics.py b/planetstack/hpc_wizard/bigquery_analytics.py index 994e837..dafb55c 100644 --- a/planetstack/hpc_wizard/bigquery_analytics.py +++ b/planetstack/hpc_wizard/bigquery_analytics.py @@ -71,6 +71,11 @@ class BigQueryAnalytics: raise Exception('Error accessing register allocations: %d'%resp.status_code) def run_query_raw(self, query): + try: + file("/tmp/query_log","a").write("query %s\n" % query) + except: + pass + p = re.compile('%[a-zA-z_]*') try: @@ -79,6 +84,11 @@ class BigQueryAnalytics: self.reload_mapping() query = p.sub(self.remap, query) + try: + file("/tmp/query_log","a").write("remapped query %s\n" % query) + except: + pass + storage = Storage('/opt/planetstack/hpc_wizard/bigquery_credentials.dat') credentials = storage.get() @@ -155,13 +165,21 @@ class BigQueryAnalytics: new_row["max_" + k] = max(new_row.get("max_" + k, 0), to_number(row.get(k,0))) for k in count: - new_row["count_" + k] = new_row.get("count_" + k, 0) + 1 + v = row.get(k,None) + dl = new_row["distinct_" + k] = new_row.get("distinct_" + k, []) + if (v not in dl): + dl.append(v) + + #new_row["count_" + k] = new_row.get("count_" + k, 0) + 1 for row in new_rows.values(): for k in avg: row["avg_" + k] = float(row["avg_" + k]) / row["avg_base_" + k] del row["avg_base_" + k] + for k in count: + new_row["count_" + k] = len(new_row.get("distinct_" + k, [])) + return new_rows.values() def do_computed_fields(self, rows, computed=[]): @@ -190,9 +208,10 @@ class BigQueryAnalytics: for (k,v) in filter.items(): rows = self.filter_results(rows, k, v) - if maxDeltaTime is not None: - maxTime = max([float(row["time"]) for row in rows]) - rows = [row for row in rows if float(row["time"])>=maxTime-maxDeltaTime] + if rows: + if maxDeltaTime is not None: + maxTime = max([float(row["time"]) for row in rows]) + rows = [row for row in rows if float(row["time"])>=maxTime-maxDeltaTime] (computedFieldNames, rows) = self.do_computed_fields(rows, computed) sum = sum + computedFieldNames -- 2.43.0