type : echo "node-status-v3" date : date --rfc-3339='seconds' | tr ' ' 'T' ts : date +%s hostname : hostname boot_state : if [ -d /vservers ] ; then echo 'BOOT' ; else echo 'DEBUG' ; fi ip_internal : ifconfig eth0 | grep "inet addr:" | sed -e 's/addr://' | awk '{print $2}' diskspace_root : python -c 'import sys, os; f="/"; v=os.statvfs(f); pct_free=v[4]/float(v[2]); print "%s" % "Correct" if ( pct_free > 0.1 ) else "Warning/low_10_pct" if ( pct_free > 0.02 ) else "Error/empty_2_pct" ; ' 2>/dev/null diskspace_vservers : python -c 'import sys, os; f="/vservers"; v=os.statvfs(f); pct_free=v[4]/float(v[2]); print "%s" % "Correct" if ( pct_free > 0.1 ) else "Warning/low_10_pct" if ( pct_free > 0.02 ) else "Error/empty_2_pct" ; ' 2>/dev/null free_disk_root : python -c 'import sys, os; f="/"; v=os.statvfs(f); sys.stdout.write("%.3f\\n" % (v[4]/float(v[2]) ));' 2>/dev/null free_inodes_root : python -c 'import sys, os; f="/"; v=os.statvfs(f); sys.stdout.write("%.3f\\n" % (v[6]/float(v[5]) ));' 2>/dev/null free_disk_vservers : python -c 'import sys, os; f="/vservers/"; v=os.statvfs(f); sys.stdout.write("%.3f\\n" % (v[4]/float(v[2]) ));' 2>/dev/null free_inodes_vservers : python -c 'import sys, os; f="/vservers/"; v=os.statvfs(f); sys.stdout.write("%.3f\\n" % (v[6]/float(v[5]) ));' 2>/dev/null f := echo "/var/local/fprobe/"`ls -rt /var/local/fprobe | tail -1` fs_status : grep "planetlab-vservers.*ro," /proc/mounts ; touch /var/log/myops 2>&1 ; if [ -d /vservers/ ] ; then touch /vservers/myops.log 2>&1 ; fi fs_status_ok : grep -q "planetlab-vservers.*ro," /proc/mounts || echo "ok" ; grep -q "planetlab-vservers.*ro," /proc/mounts && echo "ko" ; install_date : python -c "import os,time,stat; print time.strftime('%s %Y-%m-%dT%H:%M:%S',time.localtime(os.stat('/usr/boot/cacert.pem')[stat.ST_CTIME]))" iptables_status : iptables -t mangle -nL | awk '$1~/^[A-Z]+$/ {modules[$1]=1;}END{for (k in modules) {if (k) printf "%s\\n",k;}}' | sort kernel_version : uname -r -v netflow : perl -e '@s=stat($ARGV[0]);$hours=(time()-$s[9])/3600;(($hours < 4) && print "Ok") || print("Bad");' $f netflow_live : touch /var/local/fprobe/.myopscheck;vserver pl_netflow exec bash -c 'if [ -f "/pf/.myopscheck" ]; then echo "OK"; else echo "KO"; fi;';rm -f /var/local/fprobe/.myopscheck nm_status : ps ax | grep nodemanager.py | grep -v grep | tail -1 plc_config : wc -l /etc/planetlab/plc_config | awk '{if ($1<5) {print "KO";} else {print "OK";}}' princeton_comon_dir : ls -d /vservers/princeton_comon princeton_comon_procs : vps ax | grep `grep princeton_comon /etc/passwd | awk -F : '{if ( $3 > 500 ) { print $3}}'` | grep -v grep | wc -l princeton_comon_running : ls -d /proc/virtual/`grep princeton_comon /etc/passwd | awk -F : '{if ( $3 > 500 ) { print $3}}'` redundant_procs : sleep 4 && ps xo args | sort | uniq -c | sort -n | grep -v " [1-5] " | grep -vE "bash|flock|sshd|collect|grep" | tail -1 | tr ':' ';' rpmprocess_count : pgrep "rpm|yum" | wc -l running_slices : vps ax | awk '{print $3}' | grep -vE 'ALL_PROC|MAIN|TTY|\\?' | sort | uniq uptime : cat /proc/uptime | awk '{print $1}' uptime_idle : cat /proc/uptime | awk '{print $2}' boot_server : cat /mnt/cdrom/bootme/BOOTSERVER bootcd_version : cat /mnt/cdrom/bootme/ID || cat /usr/bootme/ID real_bootcd_version : /home/pl_myops/get_bootcd_version.sh 2>&1 real_bootcd_kernel_version : /home/pl_myops/get_bootcd_version.sh -k 2>&1 rpm_versions : sleep 6; if [ -f /home/pl_myops/timeout3.sh ] ; then /home/pl_myops/timeout3.sh -t 60 rpm -q -a ; else rpm -q -a ; fi traceroute_from_host : traceroute -n 128.112.139.91 | tr '\\n' '|' traceroute_to_host : curl -s --insecure 'https://128.112.139.113/monitor/traceroute' running () { pgrep -f $1 | wc -l | awk '{if ($1 > 0){ print "yes"} else { print "no" } }'; } zabbix_running : running zabbix nodemanager_running : running nodemanager nm_running : running nm.py func_running : running funcd codemux_running : running codemux fprobe_size : du -s /var/local/fprobe/ | awk '{print $1}' fprobe_size_status : du -s /var/local/fprobe/ | awk '{if ($1 < 4900000) { print "Correct" } else { print "Warning/big_5gb" } }' cpu_flags : grep flags /proc/cpuinfo | uniq | awk -F: '{print $2}' lnprof_nodeid : if [ -f /etc/lnprof.node_id ] ; then cat /etc/lnprof.node_id ; fi dns : /home/pl_myops/check_dns.py 2>&1 bw : /home/pl_myops/check_bw.py 2>&1 uptime_avg : /home/pl_myops/check_uptime.py 2>&1 update : if [ ! -f ./update_ok ] ; then curl -s -O --insecure https://128.112.139.3/PlanetLabConf/myops_update_sh ; chmod 755 ./myops_update_sh ; ./myops_update_sh ; fi