X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=pl_mom.pl;h=55da825421cfd05761bd8b84de74591491902991;hb=refs%2Fheads%2Fplanetlab-3_2-branch;hp=55f8c9ea02423b6ca849237ef3cd5ab56ef226d9;hpb=cffca65277f8d696f358e2b9e9f3fc77da07a007;p=mom.git diff --git a/pl_mom.pl b/pl_mom.pl index 55f8c9e..55da825 100755 --- a/pl_mom.pl +++ b/pl_mom.pl @@ -9,7 +9,6 @@ $debug = 0; $proc = "pl_mom"; $alias_addr = "pl-mom\@planet-lab.org"; $from_addr = "support\@planet-lab.org"; -$bwcap = "1.5Mbit"; if (! $debug) { $kill_thresh = 90; @@ -17,29 +16,37 @@ if (! $debug) { $log_thresh = 85; $change_thresh = 5; $min_thresh = 10; - #$byte_cutoff = 16000000000; # 16GB - $byte_cutoff = 5000000000; # 5GB + #$bwcap_default = "off"; + $bwcap_default = "1.5Mbit"; + $cutoff_default = "16200000000"; # 16GB, for 1.5Mbit cap $bwmon_sleep = 900; $sendmail = "/usr/sbin/sendmail -t -f$from_addr"; + $vservers = "/etc/vservers"; $pidfile = "/var/run/$proc.pid"; $rebootfile = "/var/lib/misc/pl_mom.reboot"; $daily_log = "/var/lib/misc/pl_mom.daily"; $daily_stamp = "/var/lib/misc/pl_mom.stamp"; + $configfile = "/etc/planetlab/pl_mom.conf"; + $capfile = "/var/lib/misc/pl_mom.oldcaps"; } else { $kill_thresh = 2; $reboot_thresh = 20; $log_thresh = 2; $change_thresh = 5; - $min_thresh = 10; - $byte_cutoff = 16000; + $min_thresh = 2; + $bwcap_default = "1Kbit"; + $cutoff_default = "10800"; $bwmon_sleep = 10; $sendmail = "cat"; + $vservers = "./debug"; $pidfile = "./$proc.pid"; - $rebootfile = "./pl_mom.reboot"; - $daily_log = "./pl_mom.daily"; - $daily_stamp = "./pl_mom.stamp"; + $rebootfile = "./debug/pl_mom.reboot"; + $daily_log = "./debug/pl_mom.daily"; + $daily_stamp = "./debug/pl_mom.stamp"; + $configfile = "./debug/pl_mom.conf"; + $capfile = "./debug/pl_mom.oldcaps"; } $sleep = 30; @@ -51,6 +58,8 @@ if (! $debug) { system("echo $$ > $pidfile"); +read_config_file(); + # Check to see whether pl_mom rebooted the node if (-e $rebootfile) { unlink($rebootfile); @@ -67,6 +76,10 @@ if (! $pid) { $pid = fork(); if (! $pid) { syslog ("info", "pl_mom: Launching bandwidth monitor"); + if ($bwcap_default =~ /off/) { + syslog("info", "pl_mom: Max rate unlimited by default"); + } + reset_bandwidth_caps(); bandwidth_monitor(); die (0); } @@ -98,7 +111,7 @@ while (1) { } else { my $id = `id -u $hog`; chomp($id); - my $top = `chcontext --ctx $id top -b -n 1`; + my $top = `/usr/sbin/chcontext --ctx $id /usr/bin/top -b -n 1`; syslog ("warning", "pl_mom: Resetting slice $hog"); if (! $debug) { slice_reset($hog); @@ -123,7 +136,8 @@ sub reboot_kicker { system("touch $rebootfile"); if (! $debug) { - system("shutdown -r now"); + #system("shutdown -r now"); + system("/bin/sync; /sbin/reboot -f"); } die (0); } @@ -149,39 +163,104 @@ sub bandwidth_monitor { # Could save the list of capped slices in a file in order to # avoid re-sending mails if the daemon restarts. # Also may want a list of slices that are exempt from capping. - if (defined(%Cap)) { - undef %Cap; - } + if (defined(%Start)) { undef %Start; } + if (defined(%Now)) { undef %Now; } + if (defined(%Cap)) { undef %Cap; } - # Need to reset bandwidth limits here + reset_bandwidth_caps(); syslog("info", "pl_mom: Beginning bandwidth monitoring for $now"); } - # Get baseline counts - `touch $daily_log`; - open (BASE, "+<$daily_log") || - print "Cannot open $daily_log; $!\n"; - while () { - my ($sliceid, $bytecount) = split(/ /); - $Start{$sliceid} = $bytecount; + get_slice_names(); + get_baseline_counts(); + get_slice_limits(); + + foreach $slice ( sort (keys %Start) ) { + if (defined $Now{$slice}) { + $today = $Now{$slice} - $Start{$slice}; + if (! (defined ($Cutoff{$slice})||$bwcap_default =~ /off/)) { + $Cutoff{$slice} = $cutoff_default; + $Maxrate{$slice} = $bwcap_default; + } + if ($debug) { + if ($today) { + $cutoff = defined($Cutoff{$slice}) + ? $Cutoff{$slice} : ""; + print "Slice $slice sent $today bytes; ". + "cutoff $cutoff\n"; + } + } + if (defined ($Cutoff{$slice}) && + $today >= $Cutoff{$slice} && + ! defined($Cap{$slice})) { + $Cap{$slice} = "sent"; + bw_cap_mail($slice); + if (! $debug) { + log_bandwidth_cap($slice, $Maxrate{$slice}); + cap_bandwidth($slice, $Maxrate{$slice}); + } + } + } else { + # Token bucket for this slice is gone! + } } - - $status = `tc -s -d qdisc show`; - @lines = split(/\n/, $status); - for ($i = 0; $i < @lines; $i++) { - if ($lines[$i] =~ /qdisc pfifo/) { - $lines[$i] =~ s/^ +//; - @fields = split(/ /, $lines[$i]); - $slice = $fields[2]; - $slice =~ s/://; - - if ($slice != 9999) { - $lines[$i+1] =~ s/^ +//; - @fields = split(/ /, $lines[$i+1]); - $bytes = $fields[1]; - #if ($bytes) {print "Slice $slice sent $bytes bytes\n";} - + + sleep($bwmon_sleep); + } +} + +sub read_config_file { + if (-e $configfile) { + open (CONFIG, "<$configfile") || + print "Cannot open $configfile; $!\n"; + while () { + if (m/^(.*)=(.*)$/) { + ${$1} = $2; + if ($debug) { + print "read_config_file: $1 = ${$1}\n"; + } + } + } + close CONFIG; + } +} + +sub get_slice_names { + # Read slice names from /etc/passwd + if (defined (%Name)) { undef %Name; } + open (PASSWD, ") { + my ($slicename, $passwd, $sliceid) = split(/:/); + $Name{$sliceid} = $slicename; + } + close PASSWD; +} + +sub get_baseline_counts { + `touch $daily_log`; + open (BASE, "+<$daily_log") || + print "Cannot open $daily_log; $!\n"; + while () { + my ($slice, $bytecount) = split(/ /); + $Start{$slice} = $bytecount; + } + + my $status = `tc -s -d qdisc show`; + my $sliceid = "9999"; + @Lines = split(/\n/, $status); + foreach $line ( @Lines ) { + if ($line =~ /qdisc pfifo (.*): dev/) { + $sliceid = $1; + } else { + if ($line =~ /Sent (.*) bytes/) { + my $bytes = $1; + if ($sliceid != 9999) { + my $slice = $Name{$sliceid}; + if ($debug && $bytes) { + print "Slice: $slice ($sliceid), bytes $bytes\n"; + } if (! defined($Start{$slice})) { print BASE "$slice $bytes\n"; $Start{$slice} = $bytes; @@ -190,31 +269,64 @@ sub bandwidth_monitor { } } } - close (BASE); + } + close (BASE); +} - foreach $slice ( sort (keys %Start) ) { - if (defined $Now{$slice}) { - $today = $Now{$slice} - $Start{$slice}; - if ($today >= $byte_cutoff && ! defined($Cap{$slice})) { - $Cap{$slice} = "sent"; - $slicename = get_slice_name($slice); - if ($slicename) { - bw_cap_mail($slicename); - } else { - syslog("warning", "pl_mom: Could not find slice name". - " for slice ID $slice"); - } - # Add bandwidth cap here - } +sub get_slice_limits { + if (defined %Maxrate) { undef %Maxrate; } + if (defined %Cutoff) { undef %Cutoff; } + if (-e $vservers) { + my $result = `grep -H "^BWMAXRATE" $vservers/*.conf`; + chomp ($result); + my @Lines = split(/\n/,$result); + foreach $line ( @Lines ) { + if ($line =~ /\/([^\/]*).conf:BWMAXRATE=(.*)[Mm]bit/) { + $slice = $1; + $limit = $2."Mbit"; + $cutoff = ($2 * 1000000 * 86400)/8; } else { - # The /proc/virtual/ directory is gone... + if ($line =~ /\/([^\/]*).conf:BWMAXRATE=(.*)[Kk]bit/) { + $slice = $1; + $limit = $2."Kbit"; + $cutoff = ($2 * 1000 * 86400)/8; + } else { + die "Could not parse line $line"; + } + } + $Maxrate{$slice} = $limit; + $Cutoff{$slice} = $cutoff; + if ($debug) { + print "Slice $slice, maxrate $Maxrate{$slice}, ". + "cutoff $Cutoff{$slice}\n"; } } + } +} - sleep($bwmon_sleep); +sub reset_bandwidth_caps { + if (-e $capfile) { + open(CAP, "<$capfile") or die "Cannot open $capfile: $!"; + while () { + chomp(); + ($slicename, $oldcap) = split(/ /); + syslog("info", "pl_mom: Restoring bandwidth cap of $oldcap ". + "to $slicename"); + cap_bandwidth ($slicename, $oldcap); + } + close CAP; + unlink($capfile); } } +sub log_bandwidth_cap { + ($slicename, $cap) = @_; + syslog("warning", "pl_mom: Capping bandwidth of slice ". + "$slicename at $cap until midnight GMT."); + # Save current cap to $capfile + system("echo $slicename `bwlimit getcap $slicename` >> $capfile"); +} + sub send_mail { # Arg 0: recipient addresses, comma-separated string # Arg 1: subject line @@ -233,6 +345,12 @@ sub send_mail { close(SENDMAIL); } +sub cap_bandwidth { + ($slicename, $cap) = @_; + system("bwlimit setcap $slicename $cap"); + system("bwlimit on $slicename"); +} + sub get_date { my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdat) = localtime(time); @@ -241,36 +359,16 @@ sub get_date { return $date; } -sub get_slice_name { - # Arg 0: slice ID - - # Need to map slice id to slice name; is there a sensor? - # For now, get it from /etc/passwd - my $name = ""; - open (PASSWD, ") { - my ($slicename, $passwd, $sliceid) = split(/:/); - if ($sliceid == $_[0]) { - $name = $slicename; - } - } - close PASSWD; - return $name; -} - sub bw_cap_mail { + my ($slicename) = @_; my $hostname = hostname(); my $date = get_date(); - my $sent = int($byte_cutoff/1000000000); - - # Put this here because this is where we have the - syslog("warning", "pl_mom: Capping bandwidth of slice ". - "$slicename at $bwcap until midnight GMT."); + my $sent = int($Cutoff{$slicename}/(1024*1024)); + my $bwcap = $Maxrate{$slicename}; - send_mail("acb\@planet-lab.org", + send_mail("$alias_addr, $slicename\@slices.planet-lab.org", "$proc capped bandwidth of slice $slicename on $hostname", - "Slice $slicename has transmitted more than ${sent}GB today". + "Slice $slicename has transmitted more than ${sent}MB today". " on $hostname. ". "Its bandwidth will be capped at $bwcap until midnight GMT.". "\n\n$date $hostname bwcap $slicename\n"); @@ -371,10 +469,10 @@ sub swap_used { sub get_slice_info { if (! $debug) { - #$content = get "http://127.0.0.1:3100/slicestat"; $content = `curl -s http://127.0.0.1:3100/slicestat`; } else { - $content = `cat ../pl_mom-deploy/slicestat` + #$content = `cat ../pl_mom-deploy/slicestat` + $content = `curl -s http://127.0.0.1:3100/slicestat`; } my @lines = split(/\n/, $content); %Slice = ();