  Users can define time windows when they should receive alerts.
  Users can indicate they should receive a message when a failure of which they were previously notified has been cleared.
Index: modules/raeburn/monitor.pl
diff -u modules/raeburn/monitor.pl:1.5 modules/raeburn/monitor.pl:1.6
--- modules/raeburn/monitor.pl:1.5	Tue Jan 30 10:51:49 2007
+++ modules/raeburn/monitor.pl	Thu Feb 15 14:01:43 2007
@@ -59,17 +59,31 @@
 # An additional file - recipients -should be added to a server's subdirectory   
 # in $monitordir.  The file will contain an e-mail address of a user to
 # receive notification when a particular failure condition is encountered on
-# successive log-in attempts, and a list of failure types for which that 
-# user should be notified.
+# successive log-in attempts, a list of failure types for which that 
+# user should be notified, as well as the type - allclear - if the user
+# is to be notified when a failure of which the user was previously 
+# notified, has cleared, and the time window during which the user wishes
+# to receive alerts.  If a failure occurs within outside a user's window,
+# the user will receive an e-mail once the start of the window is reached
+# if the failure has not been cleared in the interim, and if it's a 
+# failure type for which the user has selected notification.
+# The format of the time window is: start hour, start minute, end hour,
+# end minute (e.g., 7,0,23,0 or 9,0,2,0). If this is left blank, e-mail
+# will be sent regardless of the time. Times are server times, so if you
+# are in a different time zone, you should determine your window based
+# on the server's time zone (EST/EDT).
 # An example file would be:
 # raeburn@msu.edu:conlost,unavailable,missingparam,unauthenticated,invalidcookie
-# ,nologin,uninitialized,rolesfailed,navmapfailed,logoutfailed
-# itds@msu.edu:conlost
+# ,nologin,uninitialized,rolesfailed,navmapfailed,logoutfailed,allclear:7,0,23,0
+# itds@msu.edu:conlost:
 # In this case, raeburn@msu.edu would be contacted whenever consecutive failures
-# of any of the failure types occurred; whereas itds@msu.edu would only be
-# contacted if a conlost failure (no response from web server) occurred.
+# of any of the failure types occurred, and would also be contacted if 
+# a failure was cleared, between 7 am and 11 pm; whereas itds@msu.edu 
+# would only be contacted if a conlost failure (no response from web 
+# server) occurred, any time day or night.
 # The failure types for non-SSO servers are as follows:
 # conlost  - Request for login page times out ( > 20s) or returns error
@@ -202,16 +216,56 @@
     $upass = $cipher->decrypt_hex($upass);
+my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
+my %recipients;
 if (open (my $fh, "<$monitordir/$serveralias/recipients")) {
     my @buffer = <$fh>;
     foreach my $line (@buffer) {
-        my ($email,$conditions) = split/:/,$line;
-        @{$recipients{$email}} = ();
-        if ($conditions =~ m/,/) {
-            @{$recipients{$email}} = split/,/,$conditions;
+        my ($email,$conditions,$times) = split/:/,$line;
+        my ($start_hr,$start_min,$end_hr,$end_min);
+        if ($times) {
+            ($start_hr,$start_min,$end_hr,$end_min) = split(/,/,$times);
+        }
+        my $in_window = 0;
+        if (($start_hr eq '') || ($end_hr eq '')) {
+           $in_window = 1;
+        } elsif ($end_hr < $start_hr) {
+            if (($hour > $start_hr) || (($hour == $start_hr) &&
+               ($min > $start_min))) {
+                $in_window = 1;
+            } elsif (($hour < $end_hr) || (($hour == $end_hr) &&
+               ($min < $end_hr))) {
+                $in_window = 1;
+            }
+        } elsif ($end_hr == $start_hr) {
+            if ($hour == $start_hr) {
+                if ($end_min < $start_min) {
+                    if (($min < $end_min) || ($min > $start_min)) {
+                        $in_window = 1;
+                    }
+                } else {
+                    if ($min > $start_hr && $min < $end_min) {
+                        $in_window = 1;
+                    }
+                }
+            }
         } else {
-            @{$recipients{$email}} = ("$conditions");
+            if (($hour > $start_hr) || (($hour == $start_hr) &&
+                ($min > $start_min))) {
+                if (($hour < $end_hr) || (($hour == $end_hr) &&
+                    ($min < $end_min))) {
+                    $in_window = 1;
+                }
+            }
+        }
+        if ($in_window) {
+            if ($conditions =~ m/,/) {
+                @{$recipients{$email}} = split/,/,$conditions;
+            } else {
+                @{$recipients{$email}} = ("$conditions");
+            }
@@ -222,6 +276,7 @@
 my %formvalues = ();
 my %loadtimes = ();
+my %cleared;
 my ($outcome,$loginpage,$lonid,$loadbalserver);
 my @failures = ('conlost','unavailable','missingparam','unauthenticated','invalidcookie','nologin','uninitialized','rolesfailed','navmapfailed','logoutfailed');
@@ -238,14 +293,23 @@
 my $logfile = $monitordir.'/'.$serveralias.'/log'; 
 ($outcome,$loadbalserver) = &attempt_access($outcome,$server,$loginpage,$lonid,$role,\@formitems,\%formvalues,\%loadtimes,$monitordir,$path_to_java,$loncookie_file,$loadbalance,$sso,\%ssoparam,\%ssourl);
-my $mailflag = &alertstatus($outcome,$server,$serveralias,\@failures,$logfile,$monitordir);
-if ($mailflag) {
-    my $mailresult = &mailalert($server,$outcome,$contact_email,\%recipients);
+my %needmail = &alertstatus($outcome,$server,$serveralias,\@failures,$logfile,$monitordir,\%recipients,\%cleared);
+if (keys(%needmail) > 0) {
+    my $mailresult = &mailalert($server,$outcome,$contact_email,\%needmail,'fail');
     open (my $logfh,">>$logfile");
-    print $logfh localtime(time)." - sending mail - result = $mailresult\n";   
+    print $logfh localtime(time)." - sending alert mail - result = $mailresult\n";
+if (keys(%cleared) > 0) {
+    foreach my $failure (sort(keys(%cleared))) {
+        my $mailresult = &mailalert($server,$failure,$contact_email,$cleared{$failure},'allclear');
+        open (my $logfh,">>$logfile");
+        print $logfh localtime(time)." - sending allclear mail - result = $mailresult\n";
+        close($logfh);
+    }
 my $serverid;
 if ($sso) {
@@ -459,7 +523,7 @@
                my $fieldname = $items[0];
                my $fieldvalue;
                if ($formvalues->{$field} ne '') {
-                   $fieldvalue = $formvalues->{$field};      
+                   $fieldvalue = $formvalues->{$field};
                push (@{$content},$fieldname => $fieldvalue);
@@ -632,61 +696,130 @@
 sub alertstatus {
-    my ($outcome,$server,$serveralias,$failures,$logfile,$monitordir) = @_;
-    my $counter = 0;
-    my $mailflag = 0;
+    my ($outcome,$server,$serveralias,$failures,$logfile,$monitordir,$recipients,$cleared) = @_;
+    my $timestamp = time;
+    my (%needmail,@notified);
     open(my $logfh,">>$logfile");
-    while ($counter<@{$failures}) {
-        if ($outcome eq $failures[$counter]) {
-            if (-e "$monitordir/$serveralias/detected/$$failures[$counter]") {
-                if  (-e "$monitordir/$serveralias/alert/$$failures[$counter]") {
-                    print $logfh localtime(time)." - $outcome - current alert\n";
+    foreach my $failure (@{$failures}) {
+        if ($outcome eq $failure) {
+            if (-e "$monitordir/$serveralias/detected/$outcome") {
+                if (-e "$monitordir/$serveralias/alert/$outcome") {
+                    ($timestamp,@notified) = &read_alertrecord($outcome,
+                                                   $serveralias,$monitordir);
+                    my ($newmailstr,@allmailed,@newmail);
+                    foreach my $user (keys(%{$recipients})) {
+                        if (grep(/^\Q$outcome\E$/,@{$recipients->{$user}})) {
+                            if (!grep(/^\Q$user\E/,@notified)) {
+                                push(@notified,$user);
+                                push(@newmail,$user);
+                            }
+                            push(@allmailed,$user);
+                        }
+                    }
+                    if ($timestamp =~ /^old/) {
+                        $timestamp = time.' '.$timestamp;
+                        if (@allmailed > 0) {
+                            foreach my $user (@allmailed) {
+                                $needmail{$user} = $outcome;
+                            }
+                            $newmailstr = join(',',@allmailed);
+                            print $logfh localtime(time)." - $outcome - new alerts sent to $newmailstr.\n";
+                        }
+                    } else {
+                        print $logfh localtime(time)." - $outcome - current alert";
+                        if (@newmail > 0) {
+                            foreach my $user (@newmail) {
+                                $needmail{$user} = $outcome;
+                            }
+                            $newmailstr = join(',',@newmail);
+                            print $logfh " - new alerts sent to $newmailstr";
+                        }
+                        print $logfh ".\n";
+                    }
+                    if (keys(%needmail) > 0) {
+                        my $mailedstr = join(':',sort(@notified));
+                        &write_alertrecord($outcome,$serveralias,$monitordir,
+                                           $timestamp,$mailedstr);
+                    }
                 } else {
-                    $mailflag = 1;
-                    if (open(my $fh,">$monitordir/$serveralias/alert/$$failures[$counter]")) {
-                        print $fh time;
-                        close $fh;
-                        print $logfh localtime(time)." - $outcome - new alert sent\n";
+                    my $newmailstr;
+                    foreach my $user (keys(%{$recipients})) {
+                        if (grep(/^\Q$outcome\E$/,@{$recipients->{$user}})) {
+                            $needmail{$user} = $outcome;
+                        }
+                    }
+                    if (keys(%needmail) > 0) {
+                        $newmailstr = join(':',sort(keys(%needmail)));
+                        &write_alertrecord($outcome,$serveralias,$monitordir,
+                                           $timestamp,$newmailstr);
+                        print $logfh localtime(time)." - $outcome - new alerts sent to $newmailstr\n";
             } else {
-                if (open(my $fh,">$monitordir/$serveralias/detected/$$failures[$counter]")) {
+                if (open(my $fh,">$monitordir/$serveralias/detected/$outcome")) {
                     print $fh time;
                     close $fh;
                     print $logfh localtime(time)." - $outcome - first failure\n";
         } else {
-            if (-e "$monitordir/$serveralias/detected/$$failures[$counter]") {
-                unlink("$monitordir/$serveralias/detected/$$failures[$counter]");
-                print $logfh localtime(time)." - $$failures[$counter] - old failure cleared\n";
-            }
-            if (-e "$monitordir/$serveralias/alert/$$failures[$counter]") {
-                unlink("$monitordir/$serveralias/alert/$$failures[$counter]");
-                print $logfh localtime(time)." - $$failures[$counter] - old alert cleared\n";
+            if (-e "$monitordir/$serveralias/detected/$failure") {
+                unlink("$monitordir/$serveralias/detected/$failure");
+                print $logfh localtime(time)." - $failure - old failure cleared\n";
+            }
+            if (-e "$monitordir/$serveralias/alert/$failure") {
+                ($timestamp,@notified) = &read_alertrecord($failure,
+                                                      $serveralias,$monitordir);
+                my (@remnants,@clearances);
+                foreach my $user (@notified) {
+                    if (ref($recipients->{$user}) eq 'ARRAY') {
+                        if (grep/^allclear$/,@{$recipients->{$user}}) {
+                            $$cleared{$failure}{$user} = 1;
+                            push(@clearances,$user);
+                        }
+                    } else {
+                        push(@remnants,$user);
+                    }
+                }
+                if (@clearances > 0) {
+                    my $allclearstr = join(':',@clearances);
+                    print $logfh localtime(time)." - $failure - all clear sent to $allclearstr\n";
+                }
+                if (@remnants > 0) {
+                    my $remnantstr = join(':',sort(@remnants));
+                    if ($timestamp !~ /^old/) {
+                        $timestamp = 'old: '.$timestamp;
+                        print $logfh localtime(time)." - $failure - alert marked as old.\n";
+                    }
+                    &write_alertrecord($failure,$serveralias,
+                                       $monitordir,$timestamp,$remnantstr);
+                } else {
+                    unlink("$monitordir/$serveralias/alert/$failure");
+                    print $logfh localtime(time)." - $failure - old alert cleared.\n";
+                }
-        $counter ++;
-    return $mailflag;
+    return %needmail;
 sub mailalert {
-    my ($server,$failure,$contact_email,$recipients) = @_;
-    my $outcome;
+    my ($server,$failure,$contact_email,$needmail,$caller) = @_;
+    my ($outcome,$mailtext);
     my $toline = "To: ";
-    foreach my $email (keys %{$recipients}) {
-        if (grep/^$failure$/,@{$$recipients{$email}}) {
-            $toline .= $email.',';
-        }
+    foreach my $email (keys(%{$needmail})) {
+        $toline .= $email.',';
     $toline =~ s/,$//;
+    if ($caller eq 'allclear') {
+        $mailtext =  "\n\nAll clear for failures of type: $failure on $server\n\n";
+    } else {
+        $mailtext = "\n\nTwo successive failures of type: $failure on $server\n\n";
+    }
     my $alertmail = "$toline\n".
                     "From: $contact_email\n".
-                    "Subject: MSU LON-CAPA monitoring\n".
-                    "\n\nTwo successive failures of type: $failure on $server\n\n";
+                    "Subject: MSU LON-CAPA monitoring\n".$mailtext;
     if (open(MAIL, "|/usr/lib/sendmail -oi -t -odb")) {
         print MAIL $alertmail;
@@ -696,3 +829,26 @@
     return $outcome;
+sub write_alertrecord {
+    my ($outcome,$serveralias,$monitordir,$timestamp,$newmailstr) = @_;
+    if (open(my $fh,">$monitordir/$serveralias/alert/$outcome")) {
+        print $fh "$timestamp\n";
+        print $fh $newmailstr;
+        close $fh;
+    }
+sub read_alertrecord {
+    my ($outcome,$serveralias,$monitordir) = @_;
+    my ($timestamp,@notified);
+    if (open(my $fh,"<$monitordir/$serveralias/alert/$outcome")) {
+        my @entries = <$fh>;
+        close($fh);
+        chomp(@entries);
+        $timestamp = $entries[0];
+        @notified = split(/:/,$entries[1]);
+    }
+    return ($timestamp,@notified);
