[LON-CAPA-cvs] cvs: modules /raeburn monitor.pl
raeburn
lon-capa-cvs@mail.lon-capa.org
Thu, 15 Feb 2007 19:01:45 -0000
This is a MIME encoded message
--raeburn1171566105
Content-Type: text/plain
raeburn Thu Feb 15 14:01:45 2007 EDT
Modified files:
/modules/raeburn monitor.pl
Log:
Users can define time windows when they should receive alerts.
Users can indicate they should receive a message when a failure of which they were previously notified has been cleared.
--raeburn1171566105
Content-Type: text/plain
Content-Disposition: attachment; filename="raeburn-20070215140145.txt"
Index: modules/raeburn/monitor.pl
diff -u modules/raeburn/monitor.pl:1.5 modules/raeburn/monitor.pl:1.6
--- modules/raeburn/monitor.pl:1.5 Tue Jan 30 10:51:49 2007
+++ modules/raeburn/monitor.pl Thu Feb 15 14:01:43 2007
@@ -59,17 +59,31 @@
# An additional file - recipients -should be added to a server's subdirectory
# in $monitordir. The file will contain an e-mail address of a user to
# receive notification when a particular failure condition is encountered on
-# successive log-in attempts, and a list of failure types for which that
-# user should be notified.
+# successive log-in attempts, a list of failure types for which that
+# user should be notified, as well as the type - allclear - if the user
+# is to be notified when a failure of which the user was previously
+# notified, has cleared, and the time window during which the user wishes
+# to receive alerts. If a failure occurs within outside a user's window,
+# the user will receive an e-mail once the start of the window is reached
+# if the failure has not been cleared in the interim, and if it's a
+# failure type for which the user has selected notification.
+#
+# The format of the time window is: start hour, start minute, end hour,
+# end minute (e.g., 7,0,23,0 or 9,0,2,0). If this is left blank, e-mail
+# will be sent regardless of the time. Times are server times, so if you
+# are in a different time zone, you should determine your window based
+# on the server's time zone (EST/EDT).
#
# An example file would be:
# raeburn@msu.edu:conlost,unavailable,missingparam,unauthenticated,invalidcookie
-# ,nologin,uninitialized,rolesfailed,navmapfailed,logoutfailed
-# itds@msu.edu:conlost
+# ,nologin,uninitialized,rolesfailed,navmapfailed,logoutfailed,allclear:7,0,23,0
+# itds@msu.edu:conlost:
#
# In this case, raeburn@msu.edu would be contacted whenever consecutive failures
-# of any of the failure types occurred; whereas itds@msu.edu would only be
-# contacted if a conlost failure (no response from web server) occurred.
+# of any of the failure types occurred, and would also be contacted if
+# a failure was cleared, between 7 am and 11 pm; whereas itds@msu.edu
+# would only be contacted if a conlost failure (no response from web
+# server) occurred, any time day or night.
#
# The failure types for non-SSO servers are as follows:
# conlost - Request for login page times out ( > 20s) or returns error
@@ -202,16 +216,56 @@
$upass = $cipher->decrypt_hex($upass);
close($fh);
}
+
+my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
+my %recipients;
if (open (my $fh, "<$monitordir/$serveralias/recipients")) {
my @buffer = <$fh>;
close($fh);
foreach my $line (@buffer) {
- my ($email,$conditions) = split/:/,$line;
- @{$recipients{$email}} = ();
- if ($conditions =~ m/,/) {
- @{$recipients{$email}} = split/,/,$conditions;
+ my ($email,$conditions,$times) = split/:/,$line;
+ my ($start_hr,$start_min,$end_hr,$end_min);
+ if ($times) {
+ ($start_hr,$start_min,$end_hr,$end_min) = split(/,/,$times);
+ }
+ my $in_window = 0;
+ if (($start_hr eq '') || ($end_hr eq '')) {
+ $in_window = 1;
+ } elsif ($end_hr < $start_hr) {
+ if (($hour > $start_hr) || (($hour == $start_hr) &&
+ ($min > $start_min))) {
+ $in_window = 1;
+ } elsif (($hour < $end_hr) || (($hour == $end_hr) &&
+ ($min < $end_hr))) {
+ $in_window = 1;
+ }
+ } elsif ($end_hr == $start_hr) {
+ if ($hour == $start_hr) {
+ if ($end_min < $start_min) {
+ if (($min < $end_min) || ($min > $start_min)) {
+ $in_window = 1;
+ }
+ } else {
+ if ($min > $start_hr && $min < $end_min) {
+ $in_window = 1;
+ }
+ }
+ }
} else {
- @{$recipients{$email}} = ("$conditions");
+ if (($hour > $start_hr) || (($hour == $start_hr) &&
+ ($min > $start_min))) {
+ if (($hour < $end_hr) || (($hour == $end_hr) &&
+ ($min < $end_min))) {
+ $in_window = 1;
+ }
+ }
+ }
+ if ($in_window) {
+ if ($conditions =~ m/,/) {
+ @{$recipients{$email}} = split/,/,$conditions;
+ } else {
+ @{$recipients{$email}} = ("$conditions");
+ }
}
}
}
@@ -222,6 +276,7 @@
my %formvalues = ();
my %loadtimes = ();
+my %cleared;
my ($outcome,$loginpage,$lonid,$loadbalserver);
my @failures = ('conlost','unavailable','missingparam','unauthenticated','invalidcookie','nologin','uninitialized','rolesfailed','navmapfailed','logoutfailed');
@@ -238,14 +293,23 @@
my $logfile = $monitordir.'/'.$serveralias.'/log';
($outcome,$loadbalserver) = &attempt_access($outcome,$server,$loginpage,$lonid,$role,\@formitems,\%formvalues,\%loadtimes,$monitordir,$path_to_java,$loncookie_file,$loadbalance,$sso,\%ssoparam,\%ssourl);
-my $mailflag = &alertstatus($outcome,$server,$serveralias,\@failures,$logfile,$monitordir);
-if ($mailflag) {
- my $mailresult = &mailalert($server,$outcome,$contact_email,\%recipients);
+my %needmail = &alertstatus($outcome,$server,$serveralias,\@failures,$logfile,$monitordir,\%recipients,\%cleared);
+if (keys(%needmail) > 0) {
+ my $mailresult = &mailalert($server,$outcome,$contact_email,\%needmail,'fail');
open (my $logfh,">>$logfile");
- print $logfh localtime(time)." - sending mail - result = $mailresult\n";
+ print $logfh localtime(time)." - sending alert mail - result = $mailresult\n";
close($logfh);
}
+if (keys(%cleared) > 0) {
+ foreach my $failure (sort(keys(%cleared))) {
+ my $mailresult = &mailalert($server,$failure,$contact_email,$cleared{$failure},'allclear');
+ open (my $logfh,">>$logfile");
+ print $logfh localtime(time)." - sending allclear mail - result = $mailresult\n";
+ close($logfh);
+ }
+}
+
open(TIMES,">>$monitordir/$serveralias/times");
my $serverid;
if ($sso) {
@@ -459,7 +523,7 @@
my $fieldname = $items[0];
my $fieldvalue;
if ($formvalues->{$field} ne '') {
- $fieldvalue = $formvalues->{$field};
+ $fieldvalue = $formvalues->{$field};
}
push (@{$content},$fieldname => $fieldvalue);
}
@@ -632,61 +696,130 @@
}
sub alertstatus {
- my ($outcome,$server,$serveralias,$failures,$logfile,$monitordir) = @_;
- my $counter = 0;
- my $mailflag = 0;
+ my ($outcome,$server,$serveralias,$failures,$logfile,$monitordir,$recipients,$cleared) = @_;
+ my $timestamp = time;
+ my (%needmail,@notified);
open(my $logfh,">>$logfile");
- while ($counter<@{$failures}) {
- if ($outcome eq $failures[$counter]) {
- if (-e "$monitordir/$serveralias/detected/$$failures[$counter]") {
- if (-e "$monitordir/$serveralias/alert/$$failures[$counter]") {
- print $logfh localtime(time)." - $outcome - current alert\n";
+ foreach my $failure (@{$failures}) {
+ if ($outcome eq $failure) {
+ if (-e "$monitordir/$serveralias/detected/$outcome") {
+ if (-e "$monitordir/$serveralias/alert/$outcome") {
+ ($timestamp,@notified) = &read_alertrecord($outcome,
+ $serveralias,$monitordir);
+ my ($newmailstr,@allmailed,@newmail);
+ foreach my $user (keys(%{$recipients})) {
+ if (grep(/^\Q$outcome\E$/,@{$recipients->{$user}})) {
+ if (!grep(/^\Q$user\E/,@notified)) {
+ push(@notified,$user);
+ push(@newmail,$user);
+ }
+ push(@allmailed,$user);
+ }
+ }
+ if ($timestamp =~ /^old/) {
+ $timestamp = time.' '.$timestamp;
+ if (@allmailed > 0) {
+ foreach my $user (@allmailed) {
+ $needmail{$user} = $outcome;
+ }
+ $newmailstr = join(',',@allmailed);
+ print $logfh localtime(time)." - $outcome - new alerts sent to $newmailstr.\n";
+ }
+ } else {
+ print $logfh localtime(time)." - $outcome - current alert";
+ if (@newmail > 0) {
+ foreach my $user (@newmail) {
+ $needmail{$user} = $outcome;
+ }
+ $newmailstr = join(',',@newmail);
+ print $logfh " - new alerts sent to $newmailstr";
+ }
+ print $logfh ".\n";
+ }
+ if (keys(%needmail) > 0) {
+ my $mailedstr = join(':',sort(@notified));
+ &write_alertrecord($outcome,$serveralias,$monitordir,
+ $timestamp,$mailedstr);
+ }
} else {
- $mailflag = 1;
- if (open(my $fh,">$monitordir/$serveralias/alert/$$failures[$counter]")) {
- print $fh time;
- close $fh;
- print $logfh localtime(time)." - $outcome - new alert sent\n";
+ my $newmailstr;
+ foreach my $user (keys(%{$recipients})) {
+ if (grep(/^\Q$outcome\E$/,@{$recipients->{$user}})) {
+ $needmail{$user} = $outcome;
+ }
+ }
+ if (keys(%needmail) > 0) {
+ $newmailstr = join(':',sort(keys(%needmail)));
+ &write_alertrecord($outcome,$serveralias,$monitordir,
+ $timestamp,$newmailstr);
+ print $logfh localtime(time)." - $outcome - new alerts sent to $newmailstr\n";
}
}
} else {
- if (open(my $fh,">$monitordir/$serveralias/detected/$$failures[$counter]")) {
+ if (open(my $fh,">$monitordir/$serveralias/detected/$outcome")) {
print $fh time;
close $fh;
print $logfh localtime(time)." - $outcome - first failure\n";
}
}
} else {
- if (-e "$monitordir/$serveralias/detected/$$failures[$counter]") {
- unlink("$monitordir/$serveralias/detected/$$failures[$counter]");
- print $logfh localtime(time)." - $$failures[$counter] - old failure cleared\n";
-
- }
- if (-e "$monitordir/$serveralias/alert/$$failures[$counter]") {
- unlink("$monitordir/$serveralias/alert/$$failures[$counter]");
- print $logfh localtime(time)." - $$failures[$counter] - old alert cleared\n";
+ if (-e "$monitordir/$serveralias/detected/$failure") {
+ unlink("$monitordir/$serveralias/detected/$failure");
+ print $logfh localtime(time)." - $failure - old failure cleared\n";
+ }
+ if (-e "$monitordir/$serveralias/alert/$failure") {
+ ($timestamp,@notified) = &read_alertrecord($failure,
+ $serveralias,$monitordir);
+ my (@remnants,@clearances);
+ foreach my $user (@notified) {
+ if (ref($recipients->{$user}) eq 'ARRAY') {
+ if (grep/^allclear$/,@{$recipients->{$user}}) {
+ $$cleared{$failure}{$user} = 1;
+ push(@clearances,$user);
+ }
+ } else {
+ push(@remnants,$user);
+ }
+ }
+ if (@clearances > 0) {
+ my $allclearstr = join(':',@clearances);
+ print $logfh localtime(time)." - $failure - all clear sent to $allclearstr\n";
+ }
+ if (@remnants > 0) {
+ my $remnantstr = join(':',sort(@remnants));
+ if ($timestamp !~ /^old/) {
+ $timestamp = 'old: '.$timestamp;
+ print $logfh localtime(time)." - $failure - alert marked as old.\n";
+ }
+ &write_alertrecord($failure,$serveralias,
+ $monitordir,$timestamp,$remnantstr);
+ } else {
+ unlink("$monitordir/$serveralias/alert/$failure");
+ print $logfh localtime(time)." - $failure - old alert cleared.\n";
+ }
}
}
- $counter ++;
}
close($logfh);
- return $mailflag;
+ return %needmail;
}
sub mailalert {
- my ($server,$failure,$contact_email,$recipients) = @_;
- my $outcome;
+ my ($server,$failure,$contact_email,$needmail,$caller) = @_;
+ my ($outcome,$mailtext);
my $toline = "To: ";
- foreach my $email (keys %{$recipients}) {
- if (grep/^$failure$/,@{$$recipients{$email}}) {
- $toline .= $email.',';
- }
+ foreach my $email (keys(%{$needmail})) {
+ $toline .= $email.',';
}
$toline =~ s/,$//;
+ if ($caller eq 'allclear') {
+ $mailtext = "\n\nAll clear for failures of type: $failure on $server\n\n";
+ } else {
+ $mailtext = "\n\nTwo successive failures of type: $failure on $server\n\n";
+ }
my $alertmail = "$toline\n".
"From: $contact_email\n".
- "Subject: MSU LON-CAPA monitoring\n".
- "\n\nTwo successive failures of type: $failure on $server\n\n";
+ "Subject: MSU LON-CAPA monitoring\n".$mailtext;
if (open(MAIL, "|/usr/lib/sendmail -oi -t -odb")) {
print MAIL $alertmail;
close(MAIL);
@@ -696,3 +829,26 @@
}
return $outcome;
}
+
+sub write_alertrecord {
+ my ($outcome,$serveralias,$monitordir,$timestamp,$newmailstr) = @_;
+ if (open(my $fh,">$monitordir/$serveralias/alert/$outcome")) {
+ print $fh "$timestamp\n";
+ print $fh $newmailstr;
+ close $fh;
+ }
+}
+
+sub read_alertrecord {
+ my ($outcome,$serveralias,$monitordir) = @_;
+ my ($timestamp,@notified);
+ if (open(my $fh,"<$monitordir/$serveralias/alert/$outcome")) {
+ my @entries = <$fh>;
+ close($fh);
+ chomp(@entries);
+ $timestamp = $entries[0];
+ @notified = split(/:/,$entries[1]);
+ }
+ return ($timestamp,@notified);
+}
+
--raeburn1171566105--