add a conversion class for datetime and time stamps, since I need this all the time.
authorStephen Soltesz <soltesz@cs.princeton.edu>
Thu, 21 Jan 2010 20:15:57 +0000 (20:15 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Thu, 21 Jan 2010 20:15:57 +0000 (20:15 +0000)
'Created' value in mailer.py is causing problems for PLE
move print statements to stderr in plccache.py and comon.py
add an 'escapeName' routine in dbpickle to allow filepaths in output names
fix bug in scanapi that missed debug node if there was no bootmanager.log
add checks for yum.config files

monitor/common.py
monitor/database/dbpickle.py
monitor/scanapi.py
monitor/sources/comon.py
monitor/wrapper/mailer.py
monitor/wrapper/plccache.py

index 850d36b..08a6d99 100644 (file)
@@ -278,3 +278,14 @@ def found_within(recent_actions, action_type, within):
        print "%s NOT found_within %s in recent_actions" % (action_type, timedelta(within) )
        return False
        
+
+class Time:
+    @classmethod
+    def dt_to_ts(cls, dt):
+        t = time.mktime(dt.timetuple())
+        return t
+
+    @classmethod
+    def ts_to_dt(cls, ts):
+        d = datetime.fromtimestamp(ts)
+        return d
index 5afc733..f7db480 100644 (file)
@@ -22,11 +22,19 @@ def lastModified(name, type=None):
        t = SPickle().mtime("production.%s" % name, type)
        return t
 
+def escapeName(name):
+       """
+               automatically escape names passed to the db to make sensible-filenames without
+               exposing this to users.
+       """
+       return name.replace("/", "_")
+
 def cachedRecently(name, length=int(config.cachetime), type=None):
        """
                return true or false based on whether the modified time of the cached
                file is within 'length' minutes.
        """
+       name = name.replace("/", "_")
        if hasattr(config, 'cachecalls') and not config.cachecalls:
                # don't use cached calls if cachecalls is false
                return False
@@ -44,14 +52,15 @@ def cachedRecently(name, length=int(config.cachetime), type=None):
                return True
 
 def dbLoad(name, type=None):
+       name = escapeName(name)
        return SPickle().load(name, type)
 
 def dbExists(name, type=None):
-       #if self.config.debug:
-       #       name = "debug.%s" % name
+       name = escapeName(name)
        return SPickle().exists(name, type)
 
 def dbDump(name, obj=None, type=None):
+       name = escapeName(name)
        # depth of the dump is 2 now, since we're redirecting to '.dump'
        return SPickle().dump(name, obj, type, 2)
 
index 88bd513..f91ea55 100644 (file)
@@ -235,7 +235,7 @@ class ScanNodeInternal(ScanInterface):
                                (oval, errval) = ssh.run_noexcept2(""" <<\EOF
                                        echo "{"
                                        echo '  "kernel_version":"'`uname -a`'",'
-                                       echo '  "bmlog":"'`ls /tmp/bm.log`'",'
+                                       echo '  "bmlog":"'`ls /tmp/bm.log || ls /tmp/source/BootManager.py`'",'
                                        echo '  "bootcd_version":"'`cat /mnt/cdrom/bootme/ID || cat /usr/bootme/ID`'",'
                                        echo '  "boot_server":"'`cat /mnt/cdrom/bootme/BOOTSERVER`'",'
                                        echo '  "install_date":"'`python -c "import os,time,stat; print time.ctime(os.stat('/usr/boot/plnode.txt')[stat.ST_CTIME])" || python -c "import os,time,stat; print  time.ctime(os.stat('/usr/boot/cacert.pem')[stat.ST_CTIME])"`'",'
@@ -251,6 +251,9 @@ class ScanNodeInternal(ScanInterface):
                                        echo '  "fs_status":"'`grep proc /proc/mounts | grep ro, ; if [ -x /usr/bin/timeout.pl ] ; then timeout.pl 20 touch /var/log/monitor 2>&1 ; if [ -d /vservers/ ] ; then timeout.pl 20 touch /vservers/monitor.log 2>&1  ; fi ; fi`'",'
                                        echo '  "rpm_version":"'`if [ -x /usr/bin/timeout.pl ] ; then timeout.pl 30 rpm -q NodeManager ; fi`'",'
                                        echo '  "rpm_versions":"'`if [ -x /usr/bin/timeout.pl ] ; then timeout.pl 45 rpm -q -a ; fi`'",'
+                                       echo '  "md5sums":"'`md5sum /etc/yum.conf /etc/yum.myplc.d/myplc.repo /etc/yum.myplc.d/stock.repo  | awk '{print $1}'`'",'
+                                       echo '  "md5sum_yum":"'`grep -v -E "^#" /etc/yum.myplc.d/myplc.repo | md5sum`'",'
+                                       echo '  "nada":"'``'",'
                                        echo "}"
 EOF                    """)
 
@@ -268,6 +271,8 @@ EOF                 """)
                                                                        'fs_status' : '',
                                                                        'uptime' : '',
                                                                        'dns_status' : '',
+                                                                       'md5sums' : '',
+                                                                       'md5sum_yum' : '',
                                                                        'rpm_version' : '',
                                                                        'rpm_versions' : '',
                                                                        'princeton_comon_dir' : "", 
@@ -346,7 +351,7 @@ EOF                 """)
                values['ssh_status'] = True
                if "2.6.17" in oval or "2.6.2" in oval:
                        values['observed_category'] = 'PROD'
-                       if "bm.log" in values['bmlog']:
+                       if "bm.log" in values['bmlog'] or "BootManager" in values['bmlog']:
                                values['observed_status'] = 'DEBUG'
                        else:
                                values['observed_status'] = 'BOOT'
@@ -379,6 +384,7 @@ EOF                 """)
 
                values['firewall'] = False
 
+               #print "BEFORE:%s" % values
                # NOTE: A node is down if some of the public ports are not open
                if values['observed_status'] == "BOOT":
                        # verify that all ports are open.  Else, report node as down.
@@ -461,8 +467,11 @@ EOF                        """)
                        print "ALLVERSIONS: %s %s" % (nodename, values['rpm_versions'])
                        print "RPMVERSION: %s %s" % (nodename, values['rpm_version'])
                        print "UPTIME: %s %s" % (nodename, values['uptime'])
+                       print "MD5SUMS: %s %s" % (nodename, values['md5sums'])
+                       print "MD5SUM_YUM: %s %s" % (nodename, values['md5sum_yum'])
 
                        values = self.evaluate(nodename, values)
+                       #print "%s %s" % (nodename, values)
                        values['date_checked'] = datetime.now()
 
                except:
index 8d96e16..fb17997 100755 (executable)
@@ -135,8 +135,8 @@ class Comon(Thread):
                                else:
                                        i_ignored += 1
 
-                       print "Retrieved %s hosts" % len(hash.keys())
-                       print "Ignoring %d hosts" % i_ignored
+                       #print "Retrieved %s hosts" % len(hash.keys())
+                       #print "Ignoring %d hosts" % i_ignored
 
                        logger.debug("Retrieved %s hosts" % len(hash.keys()))
                        logger.debug("Ignoring %d hosts" % i_ignored)
@@ -161,7 +161,7 @@ class Comon(Thread):
 
        def coget(self,url):
                rawdata = None
-               print "Getting: %s" % url
+               #print "Getting: %s" % url
                try:
                        coserv = urllib2.Request(url)
                        coserv.add_header('User-Agent',
index 9f22c96..34a8553 100755 (executable)
@@ -65,10 +65,10 @@ def getTicketStatus(ticket_id):
                r_values[key] = ":".join(vals[1:])
                r_values[key] = r_values[key].strip()
 
-       if 'Created' in r_values:
-               r_values['Created'] = calendar.timegm(time.strptime(r_values['Created']))
-       else:
-               r_values['Created'] = calendar.timegm(time.localtime())
+       #if 'Created' in r_values:
+       #       r_values['Created'] = calendar.timegm(time.strptime(r_values['Created']))
+       #else:
+       r_values['Created'] = calendar.timegm(time.localtime())
                
        #r_values['Told'] = calendar.timegm(time.strptime(r_values['Told']))
        return r_values
index 78e0500..7b1d258 100755 (executable)
@@ -31,7 +31,7 @@ def dsn_from_dsln(d_sites, id2lb, l_nodes):
                if node['site_id'] in id2lb.keys():
                        login_base = id2lb[node['site_id']]
                else:
-                       print "%s has a foreign site_id %s" % (node['hostname'], 
+                       print >>sys.stderr, "%s has a foreign site_id %s" % (node['hostname'], 
                                                                                                        node['site_id'])
                        continue
                        for i in id2lb:
@@ -71,17 +71,17 @@ def init():
        global plcdb_hn2lb
        global plcdb_lb2hn
        global plcdb_id2lb
-       print "initing plccache"
+       print >>sys.stderr, "initing plccache"
 
-       print "collecting plcsites"
+       print >>sys.stderr, "collecting plcsites"
        dbsites = PlcSite.query.all()
        l_sites = [ s.plc_site_stats for s in dbsites ]
 
-       print "collecting plcnodes"
+       print >>sys.stderr, "collecting plcnodes"
        dbnodes = PlcNode.query.all()
        l_nodes = [ s.plc_node_stats for s in dbnodes ]
 
-       print "collecting plcpcus"
+       print >>sys.stderr, "collecting plcpcus"
        dbpcus = PlcPCU2.query.all()
        l_pcus = []
        for s in dbpcus:
@@ -92,9 +92,9 @@ def init():
                        pcu[k] = getattr(s, k)
                l_pcus.append(pcu)
 
-       print "building id2lb"
+       print >>sys.stderr, "building id2lb"
        (d_sites,id2lb) = dsites_from_lsites(l_sites)
-       print "building lb2hn"
+       print >>sys.stderr, "building lb2hn"
        (plcdb, hn2lb, lb2hn) = dsn_from_dsln(d_sites, id2lb, l_nodes)
 
        plcdb_hn2lb = hn2lb
@@ -138,7 +138,7 @@ def deleteExtra(l_plc, objectClass=PlcSite, dbKey='loginbase', plcKey='login_bas
        plcobj_key = [ s[plcKey] for s in l_plc ]
        extra_key = set(dbobj_key) - set(plcobj_key)
        for obj in extra_key:
-               print "deleting %s" % obj
+               print >>sys.stderr, "deleting %s" % obj
                dbobj = objectClass.get_by(**{dbKey : obj})
                dbobj.delete()
 
@@ -153,7 +153,7 @@ def sync():
                                                 'last_contact', 'pcu_ids', 'interface_ids'])
        l_pcus = plc.api.GetPCUs()
 
-       print "sync sites"
+       print >>sys.stderr, "sync sites"
        for site in l_sites:
                dbsite = PlcSite.findby_or_create(site_id=site['site_id'])
                dbsite.loginbase = site['login_base']
@@ -163,12 +163,12 @@ def sync():
        deleteExtra(l_sites, HistorySiteRecord, 'loginbase', 'login_base')
        session.flush()
 
-       print "sync pcus"
+       print >>sys.stderr, "sync pcus"
        for pcu in l_pcus:
                dbpcu = PlcPCU2.findby_or_create(pcu_id=pcu['pcu_id'])
                dbpcu.date_checked = datetime.now()
                for key in pcu.keys():
-                       print "setting %s  = %s" % (key, pcu[key])
+                       print >>sys.stderr, "setting %s  = %s" % (key, pcu[key])
                        setattr(dbpcu, key, pcu[key])
 
        deleteExtra(l_pcus, PlcPCU2, 'pcu_id', 'pcu_id')
@@ -176,7 +176,7 @@ def sync():
        deleteExtra(l_pcus, FindbadPCURecord, 'plc_pcuid', 'pcu_id')
        session.flush()
 
-       print "sync nodes"
+       print >>sys.stderr, "sync nodes"
        for node in l_nodes:
                dbnode = PlcNode.findby_or_create(node_id=node['node_id'])
                dbnode.hostname = node['hostname']