From bccc4741c3264bc2d8c47c7ab06eb691fb3a8582 Mon Sep 17 00:00:00 2001 From: Stephen Soltesz Date: Wed, 16 Nov 2011 21:42:33 +0000 Subject: [PATCH] Added simplified ConfFile entry using myops_setup_sh Fixed sh scripts to use https and node cacert.pem Updated setup.sh with new setup scripts --- README | 46 ++++++++++++++++++++ myops.spec | 55 ++++++++++++++++++++++-- web/collect/client/bootstrap.sh | 9 +++- web/collect/client/collect.sh | 10 ++++- web/collect/client/get_bootcd_version.sh | 7 ++- web/collect/client/update.sh | 13 ++++-- web/collect/client/upload.sh | 17 +++----- web/collect/sar2graphite.py | 1 + web/collect/server/build.sh | 3 +- web/collect/server/input.cfg | 20 ++++++--- web/db-config.d/030-conf_files_myops | 12 +++--- web/setup.sh | 8 +++- 12 files changed, 163 insertions(+), 38 deletions(-) diff --git a/README b/README index e69de29..3403a81 100644 --- a/README +++ b/README @@ -0,0 +1,46 @@ + +Install the RPM + +# make rpm +# yum install -y --nogpgcheck myops-getqueryview-1.0-0.i686.rpm + +Run a post install setup script that configures many of the scripts to run +within the local environment. + +# cd /usr/share/myops/web/ +# bash ./setup.sh + +Create the client collection bootstrap files in /var/www/html/PlanetLabConf + +# cd /usr/share/myops/web/collect/server +# ./build.sh + +If you wish to populate this service with data from a test PLC, then first +copy the scripts and ConfFile settings to that test PLC. + +# cd /var/www/html/PlanetLabConf/ +# scp sar2graphite.py sysstat.cron bootstrap.tar myplc.example.eu:/var/www/html/PlanetLabConf +# scp /usr/share/myops/web/db-config.d/030-conf_files_myops myplc.example.eu:/etc/planetlab/db-config.d + +To import the ConfFile() settings in 030-conf_files_myops +# ssh myplc.example.edu +# service plc restart db + +Once the service is running, you can visit: + + http://yourhost.planet-lab.eu/view + +After a few minutes, the LoadAvg graph should start to show local load on the +server. After a few hours, the MyOps graphs should begin to have data. + +To create new graphs from the data available, you can use the graphite +composer: + + http://yourhost.planet-lab.eu/graphite + +And, then copy the image url to /var/www/html/view/urllist.txt to have it +displayed as part of the default /view/index.php + +As well, to query some of the raw data you can look here: + + http://yourhost.planet-lab.eu:5984/myops/_design/myops/index.html diff --git a/myops.spec b/myops.spec index c25cb4c..3b4ebb7 100644 --- a/myops.spec +++ b/myops.spec @@ -7,7 +7,7 @@ %define name myops # keep this version in sync with monitor/monitor_version.py %define version 1.0 -%define taglevel 0 +%define taglevel 1 %define release %{taglevel}%{?pldistro:.%{pldistro}}%{?date:.%{date}} %global python_sitearch %( python -c "from distutils.sysconfig import get_python_lib; print get_python_lib(1)" ) @@ -91,6 +91,41 @@ Requires: php %description getqueryview The combination of collection, query and view servers. +######################################## NAGIOS + +%package policy +Summary: MyOps integration with Nagios +Group: Applications/System + +Requires: coreutils +Requires: passwd +Requires: gd +Requires: gd-devel +Requires: mysql +Requires: mysql-server +Requires: mysql-devel +Requires: mysql-libs +Requires: mailx +Requires: sendmail +Requires: php +Requires: httpd + +Requires: cronie +Requires: nagios +Requires: nagios-common +Requires: nagios-devel +Requires: nagios-plugins-all +Requires: ndoutils +Requires: ndoutils-mysql + +Requires: rt3 + + +%description policy +Scripts and setup necessary to integrate and monitor PLC with Nagios. +Best suited to F12 or above. + + %prep %setup -q @@ -111,8 +146,9 @@ install -d $RPM_BUILD_ROOT/etc/planetlab/db-config.d/ install -d $RPM_BUILD_ROOT/var/www/html/PlanetLabConf/ install -d $RPM_BUILD_ROOT/var/www/html/view -rsync -a ./puppet $RPM_BUILD_ROOT/usr/share/myops/ -rsync -a ./web $RPM_BUILD_ROOT/usr/share/myops/ +rsync -a ./puppet $RPM_BUILD_ROOT/usr/share/myops/ +rsync -a ./web $RPM_BUILD_ROOT/usr/share/myops/ +rsync -a ./nagios $RPM_BUILD_ROOT/usr/share/myops/ # Generate an autosign list from plc node hostnames install -D -m 755 puppet/cron.d/autosign.plcsh $RPM_BUILD_ROOT/%{_sysconfdir}/cron.hourly/ @@ -128,6 +164,7 @@ ls $RPM_BUILD_ROOT install -D -m 644 web/db-config.d/030-conf_files_myops $RPM_BUILD_ROOT/etc/planetlab/db-config.d install -D -m 755 web/collect/sar2graphite.py $RPM_BUILD_ROOT/usr/bin/ +install -D -m 755 web/collect/client/update.sh $RPM_BUILD_ROOT/var/www/html/PlanetLabConf/myops_update_sh install -D -m 755 web/collect/sar2graphite.py $RPM_BUILD_ROOT/var/www/html/PlanetLabConf/ install -D -m 644 web/collect/cron.d/sysstat.cron $RPM_BUILD_ROOT/var/www/html/PlanetLabConf/ install -D -m 644 web/collect/cron.d/sysstat.cron $RPM_BUILD_ROOT/%{_sysconfdir}/cron.d/ @@ -137,6 +174,11 @@ install -D -m 755 web/collect/server/cron.hourly/load_all_couchdb.sh $RPM_BUILD_ install -D -m 644 web/view/urllist.txt $RPM_BUILD_ROOT/var/www/html/view/ install -D -m 644 web/view/index.php $RPM_BUILD_ROOT/var/www/html/view/ +install -D -m 755 nagios/myops-nagios.init $RPM_BUILD_ROOT/%{_sysconfdir}/plc.d/myops-nagios +install -D -m 644 nagios/myops-nagios.cron $RPM_BUILD_ROOT/%{_sysconfdir}/cron.d/myops-nagios.cron +install -D -m 644 nagios/plc.py $RPM_BUILD_ROOT/usr/share/%{name}/nagios/ +install -D -m 644 nagios/generic.py $RPM_BUILD_ROOT/usr/share/%{name}/nagios/ + %clean rm -rf $RPM_BUILD_ROOT @@ -144,6 +186,7 @@ rm -rf $RPM_BUILD_ROOT %defattr(-,root,root) /usr/share/%{name}/web /%{_sysconfdir}/cron.d/sysstat.cron +/var/www/html/PlanetLabConf/myops_update_sh /var/www/html/PlanetLabConf/sysstat.cron /var/www/html/PlanetLabConf/sar2graphite.py* /usr/bin/sar2graphite.py @@ -153,6 +196,12 @@ rm -rf $RPM_BUILD_ROOT /var/www/html/view/urllist.txt /var/www/html/view/index.php +%files policy +%defattr(-,root,root) +%{_sysconfdir}/plc.d/myops-nagios +/usr/share/%{name}/nagios +%{_sysconfdir}/cron.d/myops-nagios.cron + %files puppet-server %defattr(-,root,root) /%{_sysconfdir}/cron.hourly/autosign.plcsh diff --git a/web/collect/client/bootstrap.sh b/web/collect/client/bootstrap.sh index 50af68e..d62be33 100644 --- a/web/collect/client/bootstrap.sh +++ b/web/collect/client/bootstrap.sh @@ -4,7 +4,10 @@ if [[ -f /etc/planetlab/node_id ]] ; then V=`cat /etc/planetlab/node_id` else - V=$RANDOM + if [[ ! -f ./node_id ]] ; then + echo $RANDOM > ./node_id + fi + V=`cat node_id` fi if [[ -f /etc/planetlab/plc_config ]] ; then @@ -15,17 +18,19 @@ fi min=$(( $V % 60 )) min2=$(( ($min + 10) % 60 )) +min3=$(( ($min + 20) % 60 )) # Run every three hours using a fixed point in time. cat < collect_and_upload.cron PATH=/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/home/${PLC_SLICE_PREFIX}_myops/ $min */1 * * * /home/${PLC_SLICE_PREFIX}_myops/collect.sh $min2 */1 * * * /home/${PLC_SLICE_PREFIX}_myops/upload.sh +$min3 */1 * * * /home/${PLC_SLICE_PREFIX}_myops/update.sh EOF yum install -y lshw chmod 755 /home/${PLC_SLICE_PREFIX}_myops/collect.sh chmod 755 /home/${PLC_SLICE_PREFIX}_myops/upload.sh +chmod 755 /home/${PLC_SLICE_PREFIX}_myops/update.sh crontab -u root collect_and_upload.cron - diff --git a/web/collect/client/collect.sh b/web/collect/client/collect.sh index 5bbc108..f8c49ce 100644 --- a/web/collect/client/collect.sh +++ b/web/collect/client/collect.sh @@ -9,6 +9,12 @@ LOGFILE=/home/${PLC_SLICE_PREFIX}_myops/myops.log cd /home/${PLC_SLICE_PREFIX}_myops +if [ -f /usr/boot/cacert.pem ] ; then + CURL_ARGS="$CURL_ARGS --cacert /usr/boot/cacert.pem" +else + CURL_ARGS="$CURL_ARGS --insecure" +fi + # TODO: add a timeout to eval, that kills children after X seconds function write_key_command () { @@ -22,7 +28,7 @@ function write_key_value () key=$1 value=$2 ( - flock -s 200 + flock -x -w 240 200 2> /dev/null echo $key : $value >> $LOGFILE ) 200>/var/lock/myops } @@ -42,7 +48,7 @@ while read key colon command ; do fi fi fi -done < <( curl --silent http://IPADDR/PlanetLabConf/input.cfg ) +done < <( curl $CURL_ARGS -s https://IPADDR/PlanetLabConf/input.cfg ) write_key_value "count" "$count" diff --git a/web/collect/client/get_bootcd_version.sh b/web/collect/client/get_bootcd_version.sh index 5ab3465..58a555f 100755 --- a/web/collect/client/get_bootcd_version.sh +++ b/web/collect/client/get_bootcd_version.sh @@ -1,6 +1,11 @@ #!/bin/bash kernel=$1 +if [ -f /etc/planetlab/plc_config ]; then + source /etc/planetlab/plc_config +else + PLC_SLICE_PREFIX='pl' +fi function get_version () { @@ -33,7 +38,7 @@ function mount_works () { } mkdir -p /mnt/boot -DL=`./sysinfo/systeminfo.py | sort -n | awk '{print $2}' | head -1` +DL=`/home/${PLC_SLICE_PREFIX}_myops/sysinfo/systeminfo.py | sort -n | awk '{print $2}' | head -1` if [ -z "$kernel" ] ; then V="No BootImage Found" else diff --git a/web/collect/client/update.sh b/web/collect/client/update.sh index 47016ff..59298b1 100644 --- a/web/collect/client/update.sh +++ b/web/collect/client/update.sh @@ -7,7 +7,7 @@ else fi IP=IPADDR -DIR=multiops +DIR=PlanetLabConf FILE=bootstrap.tar.gz HDIR=/home/${PLC_SLICE_PREFIX}_myops @@ -23,8 +23,14 @@ else CURL_ARGS="" fi +if [ -f /usr/boot/cacert.pem ] ; then + CURL_ARGS="$CURL_ARGS --cacert /usr/boot/cacert.pem" +else + CURL_ARGS="$CURL_ARGS --insecure" +fi + # if bootstrap file has been updated -curl $CURL_ARGS -s -O --insecure https://$IP/$DIR/$FILE +curl $CURL_ARGS -s -O https://$IP/$DIR/$FILE if [ -f $FILE ] ; then mod_time_after=`stat -c %Y $FILE` @@ -36,6 +42,7 @@ if [[ $mod_time_after -gt $mod_time_before ]] ; then # then an update occurred, and we need to unpack it. tar -xzf $FILE chmod 755 ./*.sh ./*.py - ./bootstrap.sh + ./bootstrap.sh || exit 1 + touch $HDIR/update_ok fi diff --git a/web/collect/client/upload.sh b/web/collect/client/upload.sh index 02b7c5e..064481c 100644 --- a/web/collect/client/upload.sh +++ b/web/collect/client/upload.sh @@ -1,20 +1,15 @@ #!/bin/bash -#if [ -f /tmp/source/configuration ] ; then -# source /tmp/source/configuration -#elif [ -f /etc/planetlab/plc_config ] ; then -# source /etc/planetlab/plc_config -#else -# PLC_MONITOR_HOST=IPADDR -#fi -#if [[ -z "$PLC_MONITOR_HOST" || "$PLC_MONITOR_HOST" = "localhost.localdomain" ]] ; then -# PLC_MONITOR_HOST=monitor.planet-lab.org -#fi if [ -f /etc/planetlab/plc_config ]; then source /etc/planetlab/plc_config else PLC_SLICE_PREFIX='pl' fi +if [ -f /usr/boot/cacert.pem ] ; then + CURL_ARGS="$CURL_ARGS --cacert /usr/boot/cacert.pem" +else + CURL_ARGS="$CURL_ARGS --insecure" +fi export MYOPS_SERVER=IPADDR @@ -38,7 +33,7 @@ function upload_log () #comm -1 -3 $old $new > $log cp $new $log if [ $( stat -c %s $log ) -ne 0 ] ; then - curl --max-time 60 --silent http://${MYOPS_SERVER}/upload.php --form "log=@$log" + curl $CURL_ARGS --max-time 60 --silent https://${MYOPS_SERVER}/upload.php --form "log=@$log" if [ $? -ne 0 ] ; then # the upload has failed, so remove new file so no data is lost rm -f /tmp/$( basename $file ).new diff --git a/web/collect/sar2graphite.py b/web/collect/sar2graphite.py index cb679de..1efa910 100755 --- a/web/collect/sar2graphite.py +++ b/web/collect/sar2graphite.py @@ -48,6 +48,7 @@ def main(): ghost_input = os.popen("/bin/hostname", 'r') config.ghost = ghost_input.read().strip() + sysstat_check = os.popen("if ! rpm -q sysstat > /dev/null ; then yum install -y sysstat ; fi ", 'r').read() hostname_input = os.popen("/bin/hostname | sed -e 's/\./_/g' ", 'r') sar_input = os.popen("/usr/bin/sar %s %s 1" % (config.sarargs, config.seconds), 'r') nc_output = os.popen("/usr/bin/nc %s 2003" % config.ghost, 'w') diff --git a/web/collect/server/build.sh b/web/collect/server/build.sh index 18e3871..f7d4dda 100755 --- a/web/collect/server/build.sh +++ b/web/collect/server/build.sh @@ -1,6 +1,7 @@ #!/bin/bash -tar -cvf /var/www/html/PlanetLabConf/bootstrap.tar -C /usr/share/myops/web/collect/client/ . +FILELIST="sysinfo get_bootcd_version.sh bootstrap.sh collect.sh upload.sh timeout3.sh check_dns.py DNS check_bw.py check_uptime.py update.sh" +tar -zcf /var/www/html/PlanetLabConf/bootstrap.tar.gz -C /usr/share/myops/web/collect/client/ . if [ ! -f /var/www/html/PlanetLabConf/input.cfg ] ; then ln input.cfg /var/www/html/PlanetLabConf/input.cfg fi diff --git a/web/collect/server/input.cfg b/web/collect/server/input.cfg index dd98398..8ca4320 100644 --- a/web/collect/server/input.cfg +++ b/web/collect/server/input.cfg @@ -4,17 +4,17 @@ ts : date +%s hostname : hostname boot_state : if [ -d /vservers ] ; then echo 'BOOT' ; else echo 'DEBUG' ; fi ip_internal : ifconfig eth0 | grep "inet addr:" | sed -e 's/addr://' | awk '{print $2}' -diskspace_root : python -c 'import sys, os; f="/"; v=os.statvfs(f); sys.stdout.write("%s %.3f %.3f\\n" % (f, v[4]/float(v[2]), v[6]/float(v[5])));' 2>/dev/null -diskspace_vservers : python -c 'import sys, os; f="/vservers/"; v=os.statvfs(f); sys.stdout.write("%s %.3f %.3f\\n" % (f, v[4]/float(v[2]), v[6]/float(v[5])));' 2>/dev/null +diskspace_root : python -c 'import sys, os; f="/"; v=os.statvfs(f); pct_free=v[4]/float(v[2]); print "%s" % "Correct" if ( pct_free > 0.1 ) else "Warning/low_10_pct" if ( pct_free > 0.02 ) else "Error/empty_2_pct" ; ' 2>/dev/null +diskspace_vservers : python -c 'import sys, os; f="/vservers"; v=os.statvfs(f); pct_free=v[4]/float(v[2]); print "%s" % "Correct" if ( pct_free > 0.1 ) else "Warning/low_10_pct" if ( pct_free > 0.02 ) else "Error/empty_2_pct" ; ' 2>/dev/null free_disk_root : python -c 'import sys, os; f="/"; v=os.statvfs(f); sys.stdout.write("%.3f\\n" % (v[4]/float(v[2]) ));' 2>/dev/null free_inodes_root : python -c 'import sys, os; f="/"; v=os.statvfs(f); sys.stdout.write("%.3f\\n" % (v[6]/float(v[5]) ));' 2>/dev/null free_disk_vservers : python -c 'import sys, os; f="/vservers/"; v=os.statvfs(f); sys.stdout.write("%.3f\\n" % (v[4]/float(v[2]) ));' 2>/dev/null free_inodes_vservers : python -c 'import sys, os; f="/vservers/"; v=os.statvfs(f); sys.stdout.write("%.3f\\n" % (v[6]/float(v[5]) ));' 2>/dev/null f := echo "/var/local/fprobe/"`ls -rt /var/local/fprobe | tail -1` -fs_status : grep "planetlab-vservers.*ro," /proc/mounts ; touch /var/log/monitor 2>&1 ; if [ -d /vservers/ ] ; then touch /vservers/monitor.log 2>&1 ; fi +fs_status : grep "planetlab-vservers.*ro," /proc/mounts ; touch /var/log/myops 2>&1 ; if [ -d /vservers/ ] ; then touch /vservers/myops.log 2>&1 ; fi fs_status_ok : grep -q "planetlab-vservers.*ro," /proc/mounts || echo "ok" ; grep -q "planetlab-vservers.*ro," /proc/mounts && echo "ko" ; install_date : python -c "import os,time,stat; print time.strftime('%s %Y-%m-%dT%H:%M:%S',time.localtime(os.stat('/usr/boot/cacert.pem')[stat.ST_CTIME]))" -iptables_status : iptables -t mangle -nL | awk '$1~/^[A-Z]+$/ {modules[$1]=1;}END{for (k in modules) {if (k) printf "%s ",k;}}' +iptables_status : iptables -t mangle -nL | awk '$1~/^[A-Z]+$/ {modules[$1]=1;}END{for (k in modules) {if (k) printf "%s\\n",k;}}' | sort kernel_version : uname -r -v netflow : perl -e '@s=stat($ARGV[0]);$hours=(time()-$s[9])/3600;(($hours < 4) && print "Ok") || print("Bad");' $f netflow_live : touch /var/local/fprobe/.myopscheck;vserver pl_netflow exec bash -c 'if [ -f "/pf/.myopscheck" ]; then echo "OK"; else echo "KO"; fi;';rm -f /var/local/fprobe/.myopscheck @@ -30,9 +30,9 @@ uptime : cat /proc/uptime | awk '{print $1}' uptime_idle : cat /proc/uptime | awk '{print $2}' boot_server : cat /mnt/cdrom/bootme/BOOTSERVER bootcd_version : cat /mnt/cdrom/bootme/ID || cat /usr/bootme/ID -real_bootcd_version : ./get_bootcd_version.sh 2>&1 -real_bootcd_kernel_version : ./get_bootcd_version.sh -k 2>&1 -rpm_versions : sleep 6; if [ -f ./timeout3.sh ] ; then ./timeout3.sh -t 60 rpm -q -a ; else rpm -q -a ; fi +real_bootcd_version : /home/pl_myops/get_bootcd_version.sh 2>&1 +real_bootcd_kernel_version : /home/pl_myops/get_bootcd_version.sh -k 2>&1 +rpm_versions : sleep 6; if [ -f /home/pl_myops/timeout3.sh ] ; then /home/pl_myops/timeout3.sh -t 60 rpm -q -a ; else rpm -q -a ; fi traceroute_from_host : traceroute -n 128.112.139.91 | tr '\\n' '|' traceroute_to_host : curl -s --insecure 'https://128.112.139.113/monitor/traceroute' running () { pgrep -f $1 | wc -l | awk '{if ($1 > 0){ print "yes"} else { print "no" } }'; } @@ -42,4 +42,10 @@ nm_running : running nm.py func_running : running funcd codemux_running : running codemux fprobe_size : du -s /var/local/fprobe/ | awk '{print $1}' +fprobe_size_status : du -s /var/local/fprobe/ | awk '{if ($1 < 4900000) { print "Correct" } else { print "Warning/big_5gb" } }' cpu_flags : grep flags /proc/cpuinfo | uniq | awk -F: '{print $2}' +lnprof_nodeid : if [ -f /etc/lnprof.node_id ] ; then cat /etc/lnprof.node_id ; fi +dns : /home/pl_myops/check_dns.py 2>&1 +bw : /home/pl_myops/check_bw.py 2>&1 +uptime_avg : /home/pl_myops/check_uptime.py 2>&1 +update : if [ ! -f ./bootstrap_ok ] ; then curl -s -O --insecure https://128.112.139.115/PlanetLabConf/myops_update_sh ; chmod 755 ./myops_update_sh ; ./myops_update_sh ; fi diff --git a/web/db-config.d/030-conf_files_myops b/web/db-config.d/030-conf_files_myops index b83eed7..17b7503 100644 --- a/web/db-config.d/030-conf_files_myops +++ b/web/db-config.d/030-conf_files_myops @@ -20,17 +20,17 @@ conf_files = [ 'file_permissions': u'644', 'source': 'PlanetLabConf/sysstat.cron'}, -{'dest': "/home/%s_myops/bootstrap.tar" % plc['slice_prefix'], +{'dest': "/usr/bin/myops_update.sh", 'always_update': False, 'enabled': True, 'file_group': u'root', 'file_owner': u'root', - 'file_permissions': u'644', - 'source': 'PlanetLabConf/bootstrap.tar', - 'preinstall_cmd': ' mkdir -p /home/%s_myops/ ' % plc['slice_prefix'], - 'postinstall_cmd': ' cd /home/%s_myops/ ; tar -xvf bootstrap.tar ; chmod 755 ./*.sh ./lshw ; ./bootstrap.sh ' % plc['slice_prefix']}, + 'file_permissions': u'755', + 'source': 'PlanetLabConf/myops_update_sh', + 'preinstall_cmd': '', + 'postinstall_cmd': 'if [ ! -f /home/%s_myops/update_ok ] ; then /usr/bin/myops_update.sh ; fi ' % plc['slice_prefix'] }, - ] +] for conf_file in conf_files: SetConfFile(conf_file) diff --git a/web/setup.sh b/web/setup.sh index 6dadeb4..8f8257d 100644 --- a/web/setup.sh +++ b/web/setup.sh @@ -126,6 +126,12 @@ couchapp push . http://admin:$PASSWORD@$HOST:5984/myops # COLLECT; probably need a better way to set IPADDR... sed -i -e 's/IPADDR/'$IP'/g' /usr/share/myops/web/collect/client/collect.sh sed -i -e 's/IPADDR/'$IP'/g' /usr/share/myops/web/collect/client/upload.sh +sed -i -e 's/IPADDR/'$IP'/g' /usr/share/myops/web/collect/client/update.sh +sed -i -e 's/IPADDR/'$IP'/g' /usr/share/myops/web/collect/server/load_couch.py + +cp /usr/share/myops/web/collect/client/update.sh /var/www/html/PlanetLabConf/myops_update_sh + +/usr/share/myops/web/collect/server/build.sh if [ ! -f /etc/httpd/conf.d/php.conf ] ; then # enable php scripts @@ -135,8 +141,6 @@ fi mkdir -p /var/www/html/uploadlogs/raw chown -R apache.apache /var/www/html/uploadlogs -sed -i -e 's/IPADDR/'$IP'/g' /usr/share/myops/web/collect/server/load_couch.py - chkconfig crond on service crond start -- 2.43.0