adding legacy support scripts
authorTony Mack <tmack@cs.princeton.edu>
Wed, 17 Feb 2010 18:08:04 +0000 (18:08 +0000)
committerTony Mack <tmack@cs.princeton.edu>
Wed, 17 Feb 2010 18:08:04 +0000 (18:08 +0000)
support-scripts/gen-sites-xml.py [new file with mode: 0755]
support-scripts/gen-static-content.py [new file with mode: 0755]

diff --git a/support-scripts/gen-sites-xml.py b/support-scripts/gen-sites-xml.py
new file mode 100755 (executable)
index 0000000..13de255
--- /dev/null
@@ -0,0 +1,269 @@
+#!/usr/bin/python
+#
+# Write out sites.xml
+#
+# Mark Huang <mlhuang@cs.princeton.edu>
+# Copyright (C) 2006 The Trustees of Princeton University
+#
+# $Id: gen-sites-xml.py,v 1.8 2007/09/14 20:08:28 tmack Exp $
+#
+
+import os, sys
+import getopt
+import time
+from xml.sax.saxutils import escape, quoteattr, XMLGenerator
+
+PID_FILE= "/var/run/all_planetlab_xml.pid"
+
+#
+# Web server document root
+#
+DOCROOT = '/var/www/html/xml'
+
+#
+# DTD and version number for site information
+#
+ENCODING= "utf-8"
+SITE_VERSION="0.4"
+
+# Debug
+dryrun = False
+
+# Parse options
+def usage():
+    print "Usage: %s [OPTION]..." % sys.argv[0]
+    print "Options:"
+    print "     -n, --dryrun            Dry run, do not write files (default: %s)" % dryrun
+    print "     -d, --docroot=DIR       Document root (default: %s)" % DOCROOT
+    print "     -h, --help              This message"
+    sys.exit(1)
+
+# Get options
+try:
+    (opts, argv) = getopt.getopt(sys.argv[1:], "nd:h", ["dryrun", "docroot=", "help"])
+except getopt.GetoptError, err:
+    print "Error: " + err.msg
+    usage()
+
+for (opt, optval) in opts:
+    if opt == "-n" or opt == "--dryrun":
+        dryrun = True
+    elif opt == "-d" or opt == "--docroot":
+        DOCROOT = optval
+    else:
+        usage()
+
+# Write out lock file
+if not dryrun:
+    if os.access(PID_FILE, os.R_OK):
+        pid= file(PID_FILE).readline().strip()
+        if pid <> "":
+            if os.system("/bin/kill -0 %s > /dev/null 2>&1" % pid) == 0:
+                sys.exit(0)
+
+    # write out our process id
+    pidfile= file( PID_FILE, 'w' )
+    pidfile.write( "%d\n" % os.getpid() )
+    pidfile.close()
+
+# Load shell with default configuration
+sys.path.append('/usr/share/plc_api')
+from PLC.Shell import Shell
+plc = Shell(globals())
+
+#
+# Get information from API
+#
+
+begin()
+GetNodes(None, ['node_id', 'model', 'boot_state', 'hostname', 'version', 'ssh_rsa_key', 'nodenetwork_ids', 'slice_ids_whitelist'])
+GetNodeNetworks({'is_primary': True}, ['nodenetwork_id', 'node_id', 'ip', 'mac', 'bwlimit'])
+GetSites(None, ['name', 'latitude', 'longitude', 'url', 'site_id', 'login_base', 'abbreviated_name', 'node_ids'])
+GetNodeGroups(['Alpha', 'Beta', 'Rollout', 'Production'], ['name', 'node_ids'])
+(nodes, nodenetworks, sites, groups) = commit()
+
+# remove whitelisted nodes
+remove_whitelisted = lambda node: not node['slice_ids_whitelist']
+nodes = filter(remove_whitelisted, nodes)
+
+nodes = dict([(node['node_id'], node) for node in nodes])
+
+for nodenetwork in nodenetworks:
+    if nodes.has_key(nodenetwork['node_id']):
+        node = nodes[nodenetwork['node_id']]
+        for key, value in nodenetwork.iteritems():
+            node[key] = value
+
+group_node_ids = dict([(group['name'], group['node_ids']) for group in groups])
+
+class PrettyXMLGenerator(XMLGenerator):
+    """
+    Adds indentation to the beginning and newlines to the end of
+    opening and closing tags.
+    """
+
+    def __init__(self, out = sys.stdout, encoding = "utf-8", indent = "", addindent = "", newl = ""):
+        XMLGenerator.__init__(self, out, encoding)
+        # XMLGenerator does not export _write()
+        self.write = self.ignorableWhitespace
+        self.indents = [indent]
+        self.addindent = addindent
+        self.newl = newl
+
+    def startDocument(self):
+        XMLGenerator.startDocument(self)
+
+    def startElement(self, name, attrs, indent = True, newl = True):
+        if indent:
+            self.ignorableWhitespace("".join(self.indents))
+        self.indents.append(self.addindent)
+
+        XMLGenerator.startElement(self, name, attrs)
+
+        if newl:
+            self.ignorableWhitespace(self.newl)
+
+    def characters(self, content):
+        # " to &quot;
+        # ' to &apos;
+        self.write(escape(content, {
+            '"': '&quot;',
+            "'": '&apos;',
+            }))
+
+    def endElement(self, name, indent = True, newl = True):
+        self.indents.pop()
+        if indent:
+            self.ignorableWhitespace("".join(self.indents))
+
+        XMLGenerator.endElement(self, name)
+
+        if newl:
+            self.ignorableWhitespace(self.newl)
+
+    def simpleElement(self, name, attrs = {}, indent = True, newl = True):
+        if indent:
+            self.ignorableWhitespace("".join(self.indents))
+
+        self.write('<' + name)
+        for (name, value) in attrs.items():
+            self.write(' %s=%s' % (name, quoteattr(value)))
+        self.write('/>')
+
+        if newl:
+            self.ignorableWhitespace(self.newl)
+
+#
+# Write out sites.xml
+#
+
+if dryrun:
+    sites_xml = sys.stdout
+else:
+    sites_xml = open(DOCROOT + "/sites.xml", mode = "w")
+
+xml = PrettyXMLGenerator(out = sites_xml, encoding = ENCODING, indent = "", addindent = "  ", newl = "\n")
+xml.startDocument()
+
+# Write embedded DTD verbatim
+xml.ignorableWhitespace("""
+<!DOCTYPE PLANETLAB_SITES [
+  <!ELEMENT PLANETLAB_SITES (SITE)*>
+  <!ATTLIST PLANETLAB_SITES VERSION CDATA #REQUIRED
+                            TIME    CDATA #REQUIRED>
+
+  <!ELEMENT SITE (HOST)*>
+  <!ATTLIST SITE NAME            CDATA #REQUIRED
+                 LATITUDE        CDATA #REQUIRED
+                 LONGITUDE       CDATA #REQUIRED
+                 URL             CDATA #REQUIRED
+                 SITE_ID         CDATA #REQUIRED
+                 LOGIN_BASE      CDATA #REQUIRED
+                 FULL_SITE_NAME  CDATA #REQUIRED
+                 SHORT_SITE_NAME CDATA #REQUIRED
+  >
+
+  <!ELEMENT HOST EMPTY>
+  <!ATTLIST HOST NAME         CDATA #REQUIRED
+                 IP           CDATA #REQUIRED
+                 MODEL        CDATA #REQUIRED
+                 MAC          CDATA #IMPLIED
+                 BOOTCD       (y|n) "n"
+                 VERSION      CDATA #REQUIRED
+                 NODE_ID      CDATA #REQUIRED
+                 BOOT_VERSION CDATA ""
+                 STATUS       CDATA ""
+                 BOOT_STATE   CDATA #REQUIRED
+                 RSA_KEY      CDATA ""
+                 BWLIMIT      CDATA ""
+  >
+]>
+""")
+
+def format_tc_rate(rate):
+    """
+    Formats a bits/second rate into a tc rate string
+    """
+
+    if rate >= 1000000000 and (rate % 1000000000) == 0:
+        return "%.0fgbit" % (rate / 1000000000.)
+    elif rate >= 1000000 and (rate % 1000000) == 0:
+        return "%.0fmbit" % (rate / 1000000.)
+    elif rate >= 1000:
+        return "%.0fkbit" % (rate / 1000.)
+    else:
+        return "%.0fbit" % rate
+
+# <PLANETLAB_SITES VERSION="major.minor" TIME="seconds_since_epoch">
+xml.startElement('PLANETLAB_SITES', {'VERSION': SITE_VERSION,
+                                     'TIME': str(int(time.time()))})
+
+for site in sites:
+    # <SITE ...>
+    attrs = {}
+    for attr in ['name', 'latitude', 'longitude', 'url', 'site_id', 'login_base']:
+        attrs[attr.upper()] = unicode(site[attr])
+    attrs['FULL_SITE_NAME'] = unicode(site['name'])
+    attrs['SHORT_SITE_NAME'] = unicode(site['abbreviated_name'])
+    xml.startElement('SITE', attrs)
+
+    for node_id in site['node_ids']:
+        if nodes.has_key(node_id):
+            node = nodes[node_id]
+
+            # <HOST ...>
+            attrs = {}
+            attrs['NAME'] = unicode(node['hostname'])
+            attrs['VERSION'] = "2.0"
+            for attr in ['model', 'node_id', 'boot_state']:
+                attrs[attr.upper()] = unicode(node[attr]).strip()
+
+            # If the node is in Alpha, Beta, or Rollout, otherwise Production
+            for group in ['Alpha', 'Beta', 'Rollout', 'Production']:
+                if group_node_ids.has_key(group) and \
+                   node_id in group_node_ids[group]:
+                    break
+            attrs['STATUS'] = group
+
+            if node['version']:
+                attrs['BOOT_VERSION'] = unicode(node['version'].splitlines()[0])
+            if node['ssh_rsa_key']:
+                attrs['RSA_KEY'] = unicode(node['ssh_rsa_key'].splitlines()[0])
+
+            if node.has_key('ip') and node['ip']:
+                attrs['IP'] = unicode(node['ip'])
+            if node.has_key('mac') and node['mac']:
+                attrs['MAC'] = unicode(node['mac'])
+            if node.has_key('bwlimit') and node['bwlimit']:
+                attrs['BWLIMIT'] = unicode(format_tc_rate(node['bwlimit']))
+
+            xml.simpleElement('HOST', attrs)
+
+    # </SITE>
+    xml.endElement('SITE')
+
+xml.endElement('PLANETLAB_SITES')
+
+if not dryrun:
+    # remove the PID file
+    os.unlink( PID_FILE )
diff --git a/support-scripts/gen-static-content.py b/support-scripts/gen-static-content.py
new file mode 100755 (executable)
index 0000000..3539cab
--- /dev/null
@@ -0,0 +1,341 @@
+#!/usr/bin/env /usr/bin/plcsh
+#
+# Generates static versions of expensive web pages
+#
+# Mark Huang <mlhuang@cs.princeton.edu>
+# Copyright (C) 2005 The Trustees of Princeton University
+#
+# $Id: gen-static-content.py,v 1.35.2.1 2007/02/07 03:27:50 mlhuang Exp $
+#
+
+import os, sys, shutil
+import time
+import string
+import codecs
+import socket
+import urllib2
+import csv
+
+SCRIPT_PID_FILE= "/var/run/gen-static-content.pid"
+
+# where to store the generated files
+GENERATED_OUTPUT_PATH= '/var/www/html/generated'
+
+# this php block, if put at the top of the files,
+# will enable them to be downloaded without the php
+# engine parsing them
+DISABLE_PHP_BLOCK= \
+"""<?php
+if( isset($_GET['disablephp']) )
+  {
+    readfile(__FILE__);
+    exit();
+  }
+?>
+"""
+
+# Globals
+all_nodes = []
+all_sites = []
+node_group_nodes = {}
+
+# return a php page that has node and site counts in it
+def GetCountsFileContent(f):
+    f.write( DISABLE_PHP_BLOCK )
+    f.write( "<?php\n" )
+
+    node_count = len(all_nodes)
+    f.write( "$node_count= %s;\n" % node_count )
+    
+    site_count= len(all_sites)
+    f.write( "$site_count= %s;\n" % site_count )
+
+    f.write( "?>" )
+
+
+# generate a plain text file in ~/.ssh/known_hosts format
+def GetHostKeys(f):
+    time_generated= time.strftime("%a, %d %b %Y %H:%M:%S")
+
+    f.write( DISABLE_PHP_BLOCK )
+    
+    f.write( "<?php\n" )
+    f.write( "$node_list_generated_time= '%s';\n" % time_generated )
+    f.write( "header('Content-type: text/plain');\n" )
+    f.write( "?>\n" )
+
+    nodes = all_nodes
+
+    for node in all_nodes:
+        hostname = node['hostname']
+        ssh_rsa_key = node['ssh_rsa_key']
+        ip = node['ip']
+        if ssh_rsa_key:
+            if hostname:
+                f.write( "%s %s\n" % (hostname, ssh_rsa_key) )
+            if ip:
+                f.write( "%s %s\n" % (ip, ssh_rsa_key) )
+
+
+# return php content that includes all the node lists
+def GetNodeListsContent(f):
+    time_generated= time.strftime("%a, %d %b %Y %H:%M:%S")
+
+    f.write( DISABLE_PHP_BLOCK )
+    
+    f.write( "<?php\n" )
+    f.write( "$node_list_generated_time= '%s';\n" % time_generated )
+
+    # Nodes with primary IP addresses in boot state
+    nodes_in_boot = filter(lambda node: node['boot_state'] == "boot" and node['ip'],
+                           all_nodes)
+
+    # Hostnames
+    all_hosts = [node['hostname'] for node in nodes_in_boot]
+    f.write( "if( $which_node_list == 'all_hosts' )\n" )
+    f.write( "{\n" )
+    f.write( "?>\n" )
+    f.write( "\n".join(all_hosts) + "\n" )
+    f.write( "<?php\n" )
+    f.write( "}\n" )
+
+    # IPs
+    all_ips = [node['ip'] for node in nodes_in_boot]
+    f.write( "elseif( $which_node_list == 'all_ips' )\n" )
+    f.write( "{\n" )
+    f.write( "?>\n" )
+    f.write( "\n".join(all_ips) + "\n" )
+    f.write( "<?php\n" )
+    f.write( "}\n" )
+
+    # /etc/hosts entries
+    etc_hosts = [node['ip'] + "\t" + node['hostname'] for node in nodes_in_boot]
+    f.write( "elseif( $which_node_list == 'etc_hosts' )\n" )
+    f.write( "{\n" )
+    f.write( "?>\n" )
+    # Create a localhost entry for convenience
+    f.write( "127.0.0.1\tlocalhost.localdomain localhost\n" )
+    f.write( "\n".join(etc_hosts) + "\n" )
+    f.write( "<?php\n" )
+    f.write( "}\n" )
+
+    for group in ['Alpha', 'Beta']:
+        if not node_group_nodes.has_key(group):
+            node_group_nodes[group] = []
+
+        # Group nodes with primary IP addresses in boot state
+        group_nodes_in_boot = filter(lambda node: node['boot_state'] == "boot" and node['ip'],
+                                     node_group_nodes[group])
+
+        # Group hostnames
+        group_hosts = [node['hostname'] for node in group_nodes_in_boot]
+        f.write( "elseif( $which_node_list == '%s_hosts' )\n" % group.lower() )
+        f.write( "{\n" )
+        f.write( "?>\n" )
+        f.write( "\n".join(group_hosts) + "\n" )
+        f.write( "<?php\n" )
+        f.write( "}\n" )
+
+        # Group IPs
+        group_ips = [node['ip'] for node in group_nodes_in_boot]
+        f.write( "elseif( $which_node_list == '%s_ips' )\n" % group.lower() )
+        f.write( "{\n" )
+        f.write( "?>\n" )
+        f.write( "\n".join(group_ips) + "\n" )
+        f.write( "<?php\n" )
+        f.write( "}\n" )
+
+    # All production nodes (nodes not in Alpha or Beta)
+    production_nodes_in_boot = filter(lambda node: node not in node_group_nodes['Alpha'] and \
+                                                   node not in node_group_nodes['Beta'],
+                                      nodes_in_boot)
+
+    production_hosts = [node['hostname'] for node in production_nodes_in_boot]                           
+    f.write( "elseif( $which_node_list == 'production_hosts' )\n" )
+    f.write( "{\n" )
+    f.write( "?>\n" )
+    f.write( "\n".join(production_hosts) + "\n" )
+    f.write( "<?php\n" )
+    f.write( "}\n" )
+
+    production_ips = [node['ip'] for node in production_nodes_in_boot]                           
+    f.write( "elseif( $which_node_list == 'production_ips' )\n" )
+    f.write( "{\n" )
+    f.write( "?>\n" )
+    f.write( "\n".join(production_ips) + "\n" )
+    f.write( "<?php\n" )
+    f.write( "}\n" )
+    f.write( "?>" )
+
+
+def GetPlanetFlowStats(f):
+    if hasattr(config, 'PLANETFLOW_BASE'):
+        url = "http://" + config.PLANETFLOW_BASE
+    else:
+        return
+
+    # Slices to calculate detailed statistics for
+    slices = [
+        'cmu_esm',
+        'cornell_beehive',
+        'cornell_cobweb',
+        'cornell_codons',
+        'michigan_tmesh',
+        'nyu_d',
+        'princeton_codeen',
+        'princeton_coblitz',
+        'princeton_comon',
+        'rice_epost',
+        'ucb_bamboo',
+        'ucb_i3',
+        'ucsd_sword',
+        'upenn_dharma',
+        'idsl_psepr',
+        'ucb_ganglia',
+        'cmu_irislog',
+        'tennessee_hliu'
+        ]
+
+    # Seconds to wait
+    socket.setdefaulttimeout(3600)
+
+    url = url + '/slice.php?csv=1&start_time=2+days+ago'
+    if slices:
+        url = url + '&slices[]=' + '&slices[]='.join(slices)
+    stats = urllib2.urlopen(url)
+    fields = ['slice', 'flows', 'packets', 'bytes', 'src_ips',
+              'dst_ips', 'top_dst_ip', 'top_dst_ip_bytes']
+    rows = csv.DictReader(stats, fields)
+    f.write("<?php\n")
+    f.write("$planetflow = array(\n")
+    for row in rows:
+        if row.has_key('slice'):
+            f.write("'%s' => array(\n" % row['slice'])
+            for field in fields:
+                if row.has_key(field) and \
+                   row[field] is not None and \
+                   row[field] != "":
+                    if type(row[field]) == type(0):
+                        f.write("\t'%s' => %d,\n" % (field, int(row[field])))
+                    else:
+                        f.write("\t'%s' => '%s',\n" % (field, row[field]))
+            f.write("),\n")
+    f.write(");\n")
+    f.write("?>")
+
+
+
+def GenDistMap():
+    # update the node distribution map
+    datadir = '/var/www/html/plot-latlong'
+
+    # plot-latlong looks for .mapinfo and .mapimages in $HOME
+    os.environ['HOME'] = datadir
+
+    if hasattr(config, 'PLC_WWW_MAPIMAGE'):
+        image = config.PLC_WWW_MAPIMAGE
+    else:
+        image = "World50"
+
+    (child_stdin,
+     child_stdout) = \
+     os.popen2('perl ' + datadir + os.sep + 'plot-latlong -m "%s" -s 3' % image)
+
+    for site in all_sites:
+        if site['latitude'] and site['longitude']:
+            child_stdin.write("%f %f\n" % \
+                              (site['latitude'], site['longitude']))
+    child_stdin.close()
+
+    map = file(GENERATED_OUTPUT_PATH + os.sep + image + '.png', 'w')
+    map.write(child_stdout.read())
+    child_stdout.close()
+    map.close()
+
+
+# which files to generate, and the functions in
+# this script to call to get the content for
+STATIC_FILE_LIST= (
+    ('_gen_counts.php',GetCountsFileContent),
+    ('_gen_node_lists.php',GetNodeListsContent),
+    ('_gen_known_hosts.php',GetHostKeys),
+    ('_gen_planetflow.php',GetPlanetFlowStats),
+    (None,GenDistMap)
+    )
+
+
+if __name__ == '__main__':
+
+    # see if we are already running by checking the existance
+    # of a PID file, and if it exists, attempting a test kill
+    # to see if the process really does exist. If both of these
+    # tests pass, exit.
+        
+    if os.access(SCRIPT_PID_FILE, os.R_OK):
+        pid= string.strip(file(SCRIPT_PID_FILE).readline())
+        if pid <> "":
+            if os.system("/bin/kill -0 %s > /dev/null 2>&1" % pid) == 0:
+                sys.exit(0)
+            
+    # write out our process id
+    pidfile= file( SCRIPT_PID_FILE, 'w' )
+    pidfile.write( "%d\n" % os.getpid() )
+    pidfile.close()
+    pidfile= None
+
+    # Get all nodes and sites
+    begin()
+    GetNodes(None, ['node_id', 'hostname', 'boot_state', 'ssh_rsa_key', 'interface_ids'])
+    GetInterfaces(None, ['interface_id', 'ip', 'is_primary'])
+    GetSites(None, ['site_id', 'latitude', 'longitude'])
+    GetNodeGroups(None, ['nodegroup_id', 'tagname', 'node_ids'])
+    (all_nodes, all_nodenetworks, all_sites, all_groups) = commit()
+
+    all_nodenetworks = dict([(nodenetwork['interface_id'], nodenetwork) \
+                             for nodenetwork in all_nodenetworks])
+
+    # Set primary IP, if any
+    for node in all_nodes:
+        node['ip'] = None
+        for interface_id in node['interface_ids']:
+            try:
+                nodenetwork = all_nodenetworks[interface_id]
+                if nodenetwork['is_primary']:
+                    node['ip'] = nodenetwork['ip']
+                break
+            except IndexError, KeyError:
+                continue
+
+    # Get list of nodes in each node group
+    for group in all_groups:
+        nodes_in_group = filter(lambda node: node['node_id'] in group['node_ids'], all_nodes)
+        node_group_nodes[group['tagname']] = nodes_in_group
+
+    # generate the static content files
+    for (file_name,func) in STATIC_FILE_LIST:
+        if file_name is not None:
+            try:
+                output_file_path= "%s/%s" % (GENERATED_OUTPUT_PATH,file_name)
+                tmp_output_file_path= output_file_path + '.tmp'
+                tmp_output_file= codecs.open( tmp_output_file_path, encoding = 'utf-8', mode = "w" )
+            except IOError, err:
+                print( "Unable to open file %s for writing." % output_file_path )
+                continue
+
+            try:
+                func(tmp_output_file)
+                tmp_output_file.flush()
+                shutil.copyfile( tmp_output_file_path, output_file_path )
+            except Exception, e:
+                print "Unable to get content for file: %s" % file_name, e
+                import traceback
+                traceback.print_exc()
+
+            tmp_output_file.close()
+            tmp_output_file= None
+            os.unlink( tmp_output_file_path )
+        else:
+            func()
+
+    # remove the PID file
+    os.unlink( SCRIPT_PID_FILE )