#!/usr/bin/python

import os
import sys
import string
import time
from datetime import datetime,timedelta
import threadpool
import threading

from monitor.util import file
from pcucontrol.util import command
from monitor import config

from monitor.database.info.model import FindbadNodeRecord, session

from monitor.sources import comon
from monitor.wrapper import plc, plccache
from monitor.scanapi import *

from monitor.query import verify,query_to_dict,node_select
import traceback
from monitor.common import nmap_port_status

#print "starting sqlfindbad.py"
# QUERY all nodes.
COMON_COTOPURL= "http://summer.cs.princeton.edu/status/tabulator.cgi?" + \
				"table=table_nodeview&" + \
				"dumpcols='name,resptime,sshstatus,uptime,lastcotop,cpuspeed,memsize,disksize'&" + \
				"formatcsv"
				    #"formatcsv&" + \
					#"select='lastcotop!=0'"

api = plc.getAuthAPI()
plc_lock = threading.Lock()
round = 1
global_round = round
count = 0

# this will be called when an exception occurs within a thread
def handle_exception(request, result):
	print "Exception occured in request %s" % request.requestID
	for i in result:
		print "Result: %s" % i
		

def checkAndRecordState(l_nodes, cohash):
	global global_round
	global count

	tp = threadpool.ThreadPool(10)
	scannode = ScanNodeInternal(global_round)

	# CREATE all the work requests
	for nodename in l_nodes:
		#fbnodesync = FindbadNodeRecordSync.findby_or_create(hostname=nodename, if_new_set={'round':0})
		#node_round   = fbnodesync.round
		node_round = global_round - 1
		#fbnodesync.flush()

		if node_round < global_round or config.force:
			# recreate node stats when refreshed
			#print "%s" % nodename
			req = threadpool.WorkRequest(scannode.collectInternal, [nodename, cohash], {}, 
										 None, scannode.record, handle_exception)
			tp.putRequest(req)
		else:
			# We just skip it, since it's "up to date"
			count += 1
			#print "%d %s %s" % (count, nodename, externalState['nodes'][nodename]['values'])
			print "%d %s %s" % (count, nodename, node_round)

	# WAIT while all the work requests are processed.
	begin = time.time()
	while 1:
		try:
			time.sleep(1)
			tp.poll()
			# if more than two hours
			if time.time() - begin > (60*60*1.5):
				print "findbad.py has run out of time!!!!!!"
				os._exit(1)
		except KeyboardInterrupt:
			print "Interrupted!"
			break
		except threadpool.NoResultsPending:
			print "All results collected."
			break

	#print FindbadNodeRecordSync.query.count()
	print FindbadNodeRecord.query.count()
	session.flush()

def main():
	global global_round

	#fbsync = FindbadNodeRecordSync.findby_or_create(hostname="global", 
	#												if_new_set={'round' : global_round})
	#global_round = fbsync.round

	if config.increment:
		# update global round number to force refreshes across all nodes
		global_round += 1

	cotop = comon.Comon()
	# lastcotop measures whether cotop is actually running.  this is a better
	# metric than sshstatus, or other values from CoMon
	cotop_url = COMON_COTOPURL

	# history information for all nodes
	cohash = {}
	#cohash = cotop.coget(cotop_url)
	l_nodes = plccache.l_nodes
	if config.nodelist:
		f_nodes = file.getListFromFile(config.nodelist)
		l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes)
	elif config.node:
		f_nodes = [config.node]
		l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes)
	elif config.nodegroup:
		ng = api.GetNodeGroups({'name' : config.nodegroup})
		l_nodes = plccache.GetNodesByIds(ng[0]['node_ids'])
	elif config.site:
		site = plccache.GetSitesByName([config.site])
		l_nodes = plccache.GetNodesByIds(site[0]['node_ids'])
	elif config.sitelist:
		site_list = config.sitelist.split(',')
		sites = plccache.GetSitesByName(site_list)
		node_ids = []
		for s in sites:
			node_ids += s['node_ids']
		l_nodes = plccache.GetNodesByIds(node_ids)
		
	l_nodes = [node['hostname'] for node in l_nodes]

	# perform this query after the above options, so that the filter above
	# does not break.
	if config.nodeselect:
		plcnodes = plccache.l_nodes
		plcnodes = [ node['hostname'] for node in plcnodes ]
		l_nodes = node_select(config.nodeselect, plcnodes, None)

	print "fetching %s hosts" % len(l_nodes)

	checkAndRecordState(l_nodes, cohash)

	if config.increment:
		# update global round number to force refreshes across all nodes
		#fbsync.round = global_round
		#fbsync.flush()
		pass

	return 0


if __name__ == '__main__':
	from monitor import parser as parsermodule

	parser = parsermodule.getParser(['nodesets'])

	parser.set_defaults( increment=False, dbname="findbad", cachenodes=False, 
						force=False,)
	parser.add_option("", "--cachenodes", action="store_true",
						help="Cache node lookup from PLC")
	parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
						help="Specify the name of the database to which the information is saved")
	parser.add_option("-i", "--increment", action="store_true", dest="increment", 
						help="Increment round number to force refresh or retry")
	parser.add_option("", "--force", action="store_true", dest="force", 
						help="Force probe without incrementing global 'round'.")

	parser = parsermodule.getParser(['defaults'], parser)
	
	cfg = parsermodule.parse_args(parser)

	try:
		main()
	except Exception, err:
		print traceback.print_exc()
		from monitor.common import email_exception
		email_exception()
		print "Exception: %s" % err
		print "Saving data... exitting."
		sys.exit(0)
	print "sleeping"