[LON-CAPA-cvs] cvs: loncom(version_2_11_X) / loncron

raeburn raeburn at source.lon-capa.org
Sat Jan 30 17:25:53 EST 2021


raeburn		Sat Jan 30 22:25:53 2021 EDT

  Modified files:              (Branch: version_2_11_X)
    /loncom	loncron 
  Log:
  - For 2.11
    Backport 1.113, 1.114, 1.116 (modified), 1.125
  
  
-------------- next part --------------
Index: loncom/loncron
diff -u loncom/loncron:1.103.2.8 loncom/loncron:1.103.2.9
--- loncom/loncron:1.103.2.8	Sat May  9 19:24:25 2020
+++ loncom/loncron	Sat Jan 30 22:25:53 2021
@@ -2,7 +2,7 @@
 
 # Housekeeping program, started by cron, loncontrol and loncron.pl
 #
-# $Id: loncron,v 1.103.2.8 2020/05/09 19:24:25 raeburn Exp $
+# $Id: loncron,v 1.103.2.9 2021/01/30 22:25:53 raeburn Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -37,10 +37,13 @@
 use Apache::lonnet;
 use Apache::loncommon;
 
+use LWP::UserAgent();
+use HTTP::Request();
 use IO::File;
 use IO::Socket;
 use HTML::Entities;
 use Getopt::Long;
+use File::ReadBackwards;
 use File::Copy;
 use Sys::Hostname::FQDN();
 
@@ -76,14 +79,14 @@
 	rename("$file.2","$file.3");
 	rename("$file.1","$file.2");
 	rename("$file","$file.1");
-    } 
+    }
 }
 
 sub start_daemon {
     my ($fh,$daemon,$pidfile,$args) = @_;
     my $progname=$daemon;
     if ($daemon eq 'lonc') {
-	$progname='loncnew'; 
+	$progname='loncnew';
     }
     my $error_fname="$perlvar{'lonDaemons'}/logs/${daemon}_errors";
     &rotate_logfile($error_fname,$fh,'error logs');
@@ -128,9 +131,9 @@
         }
         &log($fh,"</tt></p>");
     }
-   
+
     my $pidfile="$perlvar{'lonDaemons'}/logs/$daemon.pid";
-    
+ 
     my $restartflag=1;
     my $daemonpid;
     if (-e $pidfile) {
@@ -210,7 +213,7 @@
 	    }
         }
     }
-    
+
     my $fname="$perlvar{'lonDaemons'}/logs/$daemon.log";
     &rotate_logfile($fname,$fh,'logs');
 
@@ -223,13 +226,13 @@
     my ($fh)=@_;
     &log($fh,'<hr /><a name="machine" /><h2>Machine Information</h2>');
     &log($fh,"<h3>loadavg</h3>");
-	
+
     open (LOADAVGH,"/proc/loadavg");
     my $loadavg=<LOADAVGH>;
     close (LOADAVGH);
-    
+
     &log($fh,"<tt>$loadavg</tt>");
-    
+
     my @parts=split(/\s+/,$loadavg);
     if ($parts[1]>4.0) {
 	$errors++;
@@ -243,14 +246,14 @@
     &log($fh,"<pre>");
 
     open (DFH,"df|");
-    while (my $line=<DFH>) { 
-	&log($fh,&encode_entities($line,'<>&"')); 
+    while (my $line=<DFH>) {
+	&log($fh,&encode_entities($line,'<>&"'));
 	@parts=split(/\s+/,$line);
 	my $usage=$parts[4];
 	$usage=~s/\W//g;
-	if ($usage>90) { 
+	if ($usage>90) {
 	    $warnings++;
-	    $notices++; 
+	    $notices++;
 	} elsif ($usage>80) {
 	    $warnings++;
 	} elsif ($usage>60) {
@@ -267,8 +270,8 @@
     my $psproc=0;
 
     open (PSH,"ps aux --cols 140 |");
-    while (my $line=<PSH>) { 
-	&log($fh,&encode_entities($line,'<>&"')); 
+    while (my $line=<PSH>) {
+	&log($fh,&encode_entities($line,'<>&"'));
 	$psproc++;
     }
     close (PSH);
@@ -290,7 +293,7 @@
     my %simplestatus=();
     my $now=time;
     my $date=localtime($now);
-    
+
 
     &log($fh,(<<ENDHEADERS));
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
@@ -406,12 +409,12 @@
                 ($cleaned,$old,$removed) = 
                      &recursive_clean_tmp($innerdir,$cleaned,$old,$removed,$errors);
                 my @doms = &Apache::lonnet::current_machine_domains();
-                
+
                 if (open(my $dirhandle,$fname)) {
                     unless (($innerdir eq 'helprequests') ||
                             (($innerdir =~ /^addcourse/) && ($innerdir !~ m{/\d+$}))) {
                         my @contents = grep {!/^\.\.?$/} readdir($dirhandle);
-                                      join('&&', at contents)."\n";    
+                                      join('&&', at contents)."\n";
                         if (scalar(grep {!/^\.\.?$/} readdir($dirhandle)) == 0) {
                             closedir($dirhandle);
                             if ($fname =~ m{^\Q$perlvar{'lonDaemons'}\E/tmp/}) {
@@ -464,7 +467,7 @@
                                 }
                             }
                         } elsif (ref($errors->{failopen}) eq 'ARRAY') {
-                            push(@{$errors->{failopen}},$fname); 
+                            push(@{$errors->{failopen}},$fname);
                         }
                     } else {
                         if (unlink($fname)) {
@@ -618,16 +621,16 @@
     print "Checking logs.\n";
     if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
 	open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
-	while (my $line=<DFH>) { 
+	while (my $line=<DFH>) {
 	    &log($fh,&encode_entities($line,'<>&"'));
 	}
 	close (DFH);
     }
     &log($fh,"</pre><h3>Perm Log</h3><pre>");
-    
+
     if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
 	open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
-	while (my $line=<DFH>) { 
+	while (my $line=<DFH>) {
 	    &log($fh,&encode_entities($line,'<>&"'));
 	}
 	close (DFH);
@@ -691,35 +694,199 @@
 
 # ------------------------------------------------------------ Delayed messages
 sub check_delayed_msg {
-    my ($fh)=@_;
+    my ($fh,$weightsref,$exclusionsref)=@_;
     &log($fh,'<hr /><a name="delayed" /><h2>Delayed Messages</h2>');
     print "Checking buffers.\n";
     
     &log($fh,'<h3>Scanning Permanent Log</h3>');
 
     my $unsend=0;
+    my $ignored=0;
 
     my %hostname = &Apache::lonnet::all_hostnames();
     my $numhosts = scalar(keys(%hostname));
+    my $checkbackwards = 0;
+    my $checkfrom = 0;
+    my $checkexcluded = 0;
+    my (%bymachine,%weights,%exclusions,%serverhomes);
+    if (ref($weightsref) eq 'HASH') {
+        %weights = %{$weightsref};
+    }
+    if (ref($exclusionsref) eq 'HASH') {
+        %exclusions = %{$exclusionsref};
+        if (keys(%exclusions)) {
+            $checkexcluded = 1;
+            %serverhomes = &read_serverhomeIDs();
+        }
+    }
 
-    my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log");
-    while (my $line=<$dfh>) {
-	my ($time,$sdf,$dserv,$dcmd)=split(/:/,$line);
-        if ($numhosts) {
-            next unless ($hostname{$dserv});
-        }
-	if ($sdf eq 'F') { 
-	    my $local=localtime($time);
-	    &log($fh,"<b>Failed: $time, $dserv, $dcmd</b><br />");
-	    $warnings++;
-	}
-	if ($sdf eq 'S') { $unsend--; }
-	if ($sdf eq 'D') { $unsend++; }
+#
+# For LON-CAPA 1.2.0 to 2.1.3 (release dates: 8/31/2004 and 3/31/2006) any
+# entry logged in lonnet.perm.log for completion of a delayed (critical)
+# transaction lacked the hostID for the remote node to which the command
+# to be completed was sent.
+#
+# Because of this, exclusion of items in lonnet.perm.log for nodes which are
+# no longer part of the cluster from adding to the overall "unsend" count
+# needs additional effort besides the changes made in loncron rev. 1.105.
+#
+# For "S" (completion) events logging in LON-CAPA 1.2.0 through 2.1.3 included
+# "LondTransaction=HASH(hexadecimal)->getClient() :$cmd, where the hexadecimal
+# is a memory location, and $cmd is the command sent to the remote node.
+#
+# Starting with 2.2.0 (released 8/21/2006) logging for "S" (completion) events
+# had sethost:$host_id:$cmd after LondTransaction=HASH(hexadecimal)->getClient()
+#
+# Starting with 2.4.1 (released 6/13/2007) logging for "S" replaced echoing the
+# getClient() call with the result of the Transaction->getClient() call itself
+# undef for completion of delivery of a delayed message.
+#
+# The net effect of these changes is that lonnet.perm.log is now accessed three
+# times: (a) oldest record is checked, if earlier than release date for 2.5.0
+# then (b) file is read backwards, with timestamp recorded for most recent
+# instance of logged "S" event for "update" command without "sethost:$host_id:"
+# then (c) file is read forward with records ignored which predate the timestamp
+# recorded in (b), if one was found.
+#
+# In (c), when calculating the unsend total, i.e., the difference between delayed
+# transactions ("D") and sent transactions ("S"), transactions are ignored if the
+# target node is no longer in the cluster, and also (for "update" commands), if
+# the target node is in the list of nodes excluded from the count, in the domain
+# configuration for this machine's default domain.  The idea here is to remove
+# delayed "update" commands for nodes for which inbound access to port 5663,
+# is blocked, but are still part of the LON-CAPA network, (i.e., they can still
+# replicate content from other nodes).
+#
+
+    my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log","r");
+    if (defined($dfh)) {
+        while (my $line=<$dfh>) {
+	    my ($time,$sdf,$rest)=split(/:/,$line,3);
+            if ($time < 1541185772) {
+                $checkbackwards = 1;
+            }
+            last;
+        }
+        undef $dfh;
     }
 
-    &log($fh,"<p>Total unsend messages: <b>$unsend</b></p>\n");
-    if ($unsend > 0) {
-        $warnings=$warnings+5*$unsend;
+    if ($checkbackwards) {
+        if (tie *BW, 'File::ReadBackwards', "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
+            while(my $line=<BW>) {
+                if ($line =~ /\QLondTransaction=HASH\E[^:]+:update:/) {
+                    ($checkfrom) = split(/:/,$line,2);
+                    last;
+                }
+            }
+            close(BW);
+        }
+    }
+    $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log","r");
+    if (defined($dfh)) {
+        while (my $line=<$dfh>) {
+            my ($time,$sdf,$rest)=split(/:/,$line,3);
+            next unless (($sdf eq 'F') || ($sdf eq 'S') || ($sdf eq 'D'));
+            next if (($checkfrom) && ($time <= $checkfrom));
+            my ($dserv,$dcmd);
+            if ($sdf eq 'S') {
+                my ($serva,$cmda,$servb,$cmdb) = split(/:/,$rest);
+                if ($cmda eq 'sethost') {
+                    chomp($cmdb);
+                    $dcmd = $cmdb;
+                } else {
+                    $dcmd = $cmda;
+                }
+                if (($serva =~ /^LondTransaction/) || ($serva eq '')) {
+                    unless (($servb eq '') || ($servb =~ m{^/})) {
+                        $dserv = $servb;
+                    }
+                } else {
+                    $dserv = $serva;
+                }
+            } else {
+                ($dserv,$dcmd) = split(/:/,$rest);
+            }
+            if ($sdf eq 'F') {
+                my $local=localtime($time);
+                &log($fh,"<b>Failed: $time, $dserv, $dcmd</b><br />");
+                $warnings++;
+            }
+            next if ((($dserv eq '') || ($dcmd eq '')) && ($sdf ne 'F'));
+            if ($sdf eq 'S') {
+                if ($dcmd eq 'update') {
+                    if ($hostname{$dserv}) {
+                        if ($exclusions{$serverhomes{$hostname{$dserv}}}) {
+                            $ignored --;
+                        } else {
+                            $unsend --;
+                        }
+                    }
+                    if (exists($bymachine{$dserv})) {
+                        $bymachine{$dserv} --;
+                    } else {
+                        $bymachine{$dserv} = -1;
+                    }
+                } else {
+                    if ($hostname{$dserv}) {
+                        $unsend --;
+                    }
+                }
+            } elsif ($sdf eq 'D') {
+                if ($dcmd eq 'update') {
+                    if ($hostname{$dserv}) {
+                        if ($exclusions{$serverhomes{$hostname{$dserv}}}) {
+                            $ignored ++;
+                        } else {
+                            $unsend ++;
+                        }
+                    }
+                    if (exists($bymachine{$dserv})) {
+                        $bymachine{$dserv} ++;
+                    } else {
+                        $bymachine{$dserv} = 1;
+                    }
+                } else {
+                    if ($hostname{$dserv}) {
+                        $unsend ++;
+                    }
+                }
+            }
+        }
+        undef $dfh;
+        my $nodest = 0;
+        my $retired = 0;
+        my %active;
+        if (keys(%bymachine)) {
+            unless ($checkexcluded) {
+                %serverhomes = &read_serverhomeIDs();
+            }
+            foreach my $key (keys(%bymachine)) {
+                if ($bymachine{$key} > 0) {
+                    if ($hostname{$key}) {
+                        $active{$serverhomes{$hostname{$key}}} += $bymachine{$key};
+                    } else {
+                        $retired ++;
+                        $nodest += $bymachine{$key};
+                    }
+                }
+            }
+        }
+        if (keys(%active)) {
+            &log($fh,"<p>Unsend messages by node, active (undegraded) nodes in cluster</p>\n");
+            foreach my $key (sort(keys(%active))) {
+                &log($fh,&encode_entities("$key => $active{$key}",'<>&"')."\n");
+            }
+        }
+        &log($fh,"<p>Total unsend messages: <b>$unsend</b> for ".scalar(keys(%active))." active (undegraded) nodes in cluster.</p>\n");
+        if (keys(%exclusions) > 0) {
+            &log($fh,"<p>Total incomplete updates <b>$ignored</b> for ".scalar(keys(%exclusions))." degraded nodes in cluster.</p>\n");
+        }
+        if ($retired) {
+            &log($fh,"<p>Total unsent <b>$nodest</b> for $retired nodes no longer in cluster.</p>\n");
+        }
+        if ($unsend > 0) {
+            $warnings=$warnings+$weights{'U'}*$unsend;
+        }
     }
 
     if ($unsend) { $simplestatus{'unsend'}=$unsend; }
@@ -758,9 +925,13 @@
 }
 
 sub finish_logging {
-    my ($fh)=@_;
+    my ($fh,$weightsref)=@_;
+    my %weights;
+    if (ref($weightsref) eq 'HASH') {
+        %weights = %{$weightsref};
+    }
     &log($fh,"<a name='errcount' />\n");
-    $totalcount=$notices+4*$warnings+100*$errors;
+    $totalcount=($weights{'N'}*$notices)+($weights{'W'}*$warnings)+($weights{'E'}*$errors);
     &errout($fh);
     &log($fh,"<h1>Total Error Count: $totalcount</h1>");
     my $now=time;
@@ -829,7 +1000,7 @@
                 eval {
                     local $SIG{ ALRM } = sub { die "TIMEOUT" };
                     alarm(10);
-                    $serverhomeID = 
+                    $serverhomeID =
                         &Apache::lonnet::get_server_homeID($name,1,'loncron');
                     alarm(0);
                 };
@@ -962,12 +1133,126 @@
     return;
 }
 
+sub get_domain_config {
+    my ($dom,$primlibserv,$isprimary,$url,%confhash);
+    $dom = $perlvar{'lonDefDomain'};
+    $primlibserv = &Apache::lonnet::domain($dom,'primary');
+    if ($primlibserv eq $perlvar{'lonHostID'}) {
+        $isprimary = 1;
+    } elsif ($primlibserv ne '') {
+        my $protocol = $Apache::lonnet::protocol{$primlibserv};
+        my $hostname = &Apache::lonnet::hostname($primlibserv);
+        unless ($protocol eq 'https') {
+            $protocol = 'http';
+        }
+        $url = $protocol.'://'.$hostname.'/cgi-bin/listdomconfig.pl?primary='.$primlibserv.'&format=raw';
+    }
+    if ($isprimary) {
+        my $lonusersdir = $perlvar{'lonUsersDir'};
+        my $fname = $lonusersdir.'/'.$dom.'/configuration.db';
+        if (-e $fname) {
+            my $dbref=&LONCAPA::locking_hash_tie($fname,&GDBM_READER());
+            if (ref($dbref) eq 'HASH') {
+                foreach my $key (sort(keys(%{$dbref}))) {
+                    my $value = $dbref->{$key};
+                    if ($value =~ s/^__FROZEN__//) {
+                        $value = thaw(&LONCAPA::unescape($value));
+                    } else {
+                        $value = &LONCAPA::unescape($value);
+                    }
+                    $confhash{$key} = $value;
+                }
+                &LONCAPA::locking_hash_untie($dbref);
+            }
+        }
+    } else {
+        my $ua=new LWP::UserAgent;
+        $ua->timeout(5);
+        my $request=new HTTP::Request('GET',$url);
+        my $response=$ua->request($request);
+        unless ($response->is_error()) {
+            my $content = $response->content;
+            if ($content) {
+                my @pairs=split(/\&/,$content);
+                foreach my $item (@pairs) {
+                    my ($key,$value)=split(/=/,$item,2);
+                    my $what = &LONCAPA::unescape($key);
+                    if ($value =~ s/^__FROZEN__//) {
+                        $value = thaw(&LONCAPA::unescape($value));
+                    } else {
+                        $value = &LONCAPA::unescape($value);
+                    }
+                    $confhash{$what}=$value;
+                }
+            }
+        }
+    }
+    return \%confhash;
+}
+
+sub get_permcount_settings {
+    my ($domconf) = @_;
+    my ($defaults,$names) = &Apache::loncommon::lon_status_items();
+    my (%weights,$threshold,$sysmail,$reportstatus,%exclusions);
+    foreach my $type ('E','W','N','U') {
+        $weights{$type} = $defaults->{$type};
+    }
+    $threshold = $defaults->{'threshold'};
+    $sysmail = $defaults->{'sysmail'};
+    $reportstatus = 1;
+    if (ref($domconf) eq 'HASH') {
+        if (ref($domconf->{'contacts'}) eq 'HASH') {
+            if ($domconf->{'contacts'}{'reportstatus'} == 0) {
+                $reportstatus = 0;
+            }
+            if (ref($domconf->{'contacts'}{'lonstatus'}) eq 'HASH') {
+                if (ref($domconf->{'contacts'}{'lonstatus'}{weights}) eq 'HASH') {
+                    foreach my $type ('E','W','N','U') {
+                        if (exists($domconf->{'contacts'}{'lonstatus'}{weights}{$type})) {
+                            $weights{$type} = $domconf->{'contacts'}{'lonstatus'}{weights}{$type};
+                        }
+                    }
+                }
+                if (ref($domconf->{'contacts'}{'lonstatus'}{'excluded'}) eq 'ARRAY') {
+                    my @excluded = @{$domconf->{'contacts'}{'lonstatus'}{'excluded'}};
+                    if (@excluded) {
+                        map { $exclusions{$_} = 1; } @excluded;
+                    }
+                }
+                if (exists($domconf->{'contacts'}{'lonstatus'}{'threshold'})) {
+                    $threshold = $domconf->{'contacts'}{'lonstatus'}{'threshold'};
+                }
+                if (exists($domconf->{'contacts'}{'lonstatus'}{'sysmail'})) {
+                    $sysmail = $domconf->{'contacts'}{'lonstatus'}{'sysmail'};
+                }
+            }
+        }
+    }
+    return ($threshold,$sysmail,$reportstatus,\%weights,\%exclusions);
+}
+
+sub read_serverhomeIDs {
+    my %server;
+    if (-e "$perlvar{'lonTabDir'}/serverhomeIDs.tab") {
+        if (open(my $fh,'<',"$perlvar{'lonTabDir'}/serverhomeIDs.tab")) {
+            while (<$fh>) {
+                my($host,$id) = split(/:/);
+                chomp($id);
+                $server{$host} = $id;
+            }
+            close($fh);
+        }
+    }
+    return %server;
+}
+
 sub send_mail {
+    my ($sysmail,$reportstatus) = @_;
     my $defdom = $perlvar{'lonDefDomain'};
     my $origmail = $perlvar{'lonAdmEMail'};
     my $emailto = &Apache::loncommon::build_recipient_list(undef,
                                    'lonstatusmail',$defdom,$origmail);
-    if ($totalcount>2500) {
+    if (($totalcount>$sysmail) && ($reportstatus)) {
 	$emailto.=",$perlvar{'lonSysEMail'}";
     }
     my $from;
@@ -1093,7 +1378,7 @@
         &Apache::lonnet::get_iphost(1,$nomemcache);
     }
 
-# ----------------------------------------- Force firewall update for lond port  
+# ----------------------------------------- Force firewall update for lond port
 
     if ((!$justcheckdaemons) && (!$justreload)) {
         my $now = time;
@@ -1108,7 +1393,7 @@
             if (&LONCAPA::try_to_lock('/tmp/lock_lciptables')) {
                 my $execpath = $perlvar{'lonDaemons'}.'/lciptables';
                 system("$execpath $tmpfile");
-                unlink('/tmp/lock_lciptables');  # Remove the lock file. 
+                unlink('/tmp/lock_lciptables');  # Remove the lock file.
             }
             unlink($tmpfile);
         }
@@ -1120,7 +1405,7 @@
     $warnings=0;
     $notices=0;
 
-	
+
     my $fh;
     if (!$justcheckdaemons && !$justcheckconnections && !$justreload && !$justiptables) {
 	$fh=&start_logging();
@@ -1152,14 +1437,17 @@
 	&test_connections($fh);
     }
     if (!$justcheckdaemons && !$justcheckconnections && !$justreload && !$justiptables) {
-	&check_delayed_msg($fh);
-	&finish_logging($fh);
+        my $domconf = &get_domain_config();
+        my ($threshold,$sysmail,$reportstatus,$weightsref,$exclusionsref) =
+            &get_permcount_settings($domconf);
+	&check_delayed_msg($fh,$weightsref,$exclusionsref);
+	&finish_logging($fh,$weightsref);
 	&log_simplestatus();
         &write_loncaparevs();
         &write_serverhomeIDs();
 	&write_checksums();
         &write_hostips();
-	if ($totalcount>200 && !$noemail) { &send_mail(); }
+        if ($totalcount>$threshold && !$noemail) { &send_mail($sysmail,$reportstatus); }
     }
 }
 


More information about the LON-CAPA-cvs mailing list