[LON-CAPA-cvs] cvs: loncom / loncron /interface domainprefs.pm loncommon.pm
raeburn
raeburn at source.lon-capa.org
Sun Nov 18 17:50:54 EST 2018
raeburn Sun Nov 18 22:50:54 2018 EDT
Modified files:
/loncom loncron
/loncom/interface loncommon.pm domainprefs.pm
Log:
- Scanning of lonnet.perm.log by loncron
- Information about which nodes have unsent update transactions is logged.
- Specific nodes can be excluded from unsend count (for delayed updates)
via domain configuration.
- Weight for unsend count (before adding to "warnings") is configurable.
-------------- next part --------------
Index: loncom/loncron
diff -u loncom/loncron:1.113 loncom/loncron:1.114
--- loncom/loncron:1.113 Tue Nov 6 15:37:42 2018
+++ loncom/loncron Sun Nov 18 22:50:46 2018
@@ -2,7 +2,7 @@
# Housekeeping program, started by cron, loncontrol and loncron.pl
#
-# $Id: loncron,v 1.113 2018/11/06 15:37:42 raeburn Exp $
+# $Id: loncron,v 1.114 2018/11/18 22:50:46 raeburn Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -43,6 +43,7 @@
use Getopt::Long;
use GDBM_File;
use Storable qw(thaw);
+use File::ReadBackwards;
#globals
use vars qw (%perlvar %simplestatus $errors $warnings $notices $totalcount);
@@ -646,35 +647,199 @@
# ------------------------------------------------------------ Delayed messages
sub check_delayed_msg {
- my ($fh)=@_;
+ my ($fh,$weightsref,$exclusionsref)=@_;
&log($fh,'<hr /><a name="delayed" /><h2>Delayed Messages</h2>');
print "Checking buffers.\n";
&log($fh,'<h3>Scanning Permanent Log</h3>');
my $unsend=0;
+ my $ignored=0;
my %hostname = &Apache::lonnet::all_hostnames();
my $numhosts = scalar(keys(%hostname));
-
- my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log");
- while (my $line=<$dfh>) {
- my ($time,$sdf,$dserv,$dcmd)=split(/:/,$line);
- if ($numhosts) {
- next unless ($hostname{$dserv});
- }
- if ($sdf eq 'F') {
- my $local=localtime($time);
- &log($fh,"<b>Failed: $time, $dserv, $dcmd</b><br />");
- $warnings++;
- }
- if ($sdf eq 'S') { $unsend--; }
- if ($sdf eq 'D') { $unsend++; }
+ my $checkbackwards = 0;
+ my $checkfrom = 0;
+ my $checkexcluded = 0;
+ my (%bymachine,%weights,%exclusions,%serverhomes);
+ if (ref($weightsref) eq 'HASH') {
+ %weights = %{$weightsref};
+ }
+ if (ref($exclusionsref) eq 'HASH') {
+ %exclusions = %{$exclusionsref};
+ if (keys(%exclusions)) {
+ $checkexcluded = 1;
+ %serverhomes = &read_serverhomeIDs();
+ }
}
- &log($fh,"<p>Total unsend messages: <b>$unsend</b></p>\n");
- if ($unsend > 0) {
- $warnings=$warnings+5*$unsend;
+#
+# For LON-CAPA 1.2.0 to 2.1.3 (release dates: 8/31/2004 and 3/31/2006) any
+# entry logged in lonnet.perm.log for completion of a delayed (critical)
+# transaction lacked the hostID for the remote node to which the command
+# to be completed was sent.
+#
+# Because of this, exclusion of items in lonnet.perm.log for nodes which are
+# no longer part of the cluster from adding to the overall "unsend" count
+# needs additional effort besides the changes made in loncron rev. 1.105.
+#
+# For "S" (completion) events logging in LON-CAPA 1.2.0 through 2.1.3 included
+# "LondTransaction=HASH(hexadecimal)->getClient() :$cmd, where the hexadecimal
+# is a memory location, and $cmd is the command sent to the remote node.
+#
+# Starting with 2.2.0 (released 8/21/2006) logging for "S" (completion) events
+# had sethost:$host_id:$cmd after LondTransaction=HASH(hexadecimal)->getClient()
+#
+# Starting with 2.4.1 (released 6/13/2007) logging for "S" replaced echoing the
+# getClient() call with the result of the Transaction->getClient() call itself
+# undef for completion of delivery of a delayed message.
+#
+# The net effect of these changes is that lonnet.perm.log is now accessed three
+# times: (a) oldest record is checked, if earlier than release date for 2.5.0
+# then (b) file is read backwards, with timestamp recorded for most recent
+# instance of logged "S" event for "update" command without "sethost:$host_id:"
+# then (c) file is read forward with records ignored which predate the timestamp
+# recorded in (b), if one was found.
+#
+# In (c), when calculating the unsend total, i.e., the difference between delayed
+# transactions ("D") and sent transactions ("S"), transactions are ignored if the
+# target node is no longer in the cluster, and also (for "update" commands), if
+# the target node is in the list of nodes excluded from the count, in the domain
+# configuration for this machine's default domain. The idea here is to remove
+# delayed "update" commands for nodes for which inbound access to port 5663,
+# is blocked, but are still part of the LON-CAPA network, (i.e., they can still
+# replicate content from other nodes).
+#
+
+ my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log","r");
+ if (defined($dfh)) {
+ while (my $line=<$dfh>) {
+ my ($time,$sdf,$rest)=split(/:/,$line,3);
+ if ($time < 1541185772) {
+ $checkbackwards = 1;
+ }
+ last;
+ }
+ undef $dfh;
+ }
+
+ if ($checkbackwards) {
+ if (tie *BW, 'File::ReadBackwards', "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
+ while(my $line=<BW>) {
+ if ($line =~ /\QLondTransaction=HASH\E[^:]+:update:/) {
+ ($checkfrom) = split(/:/,$line,2);
+ last;
+ }
+ }
+ close(BW);
+ }
+ }
+ $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log","r");
+ if (defined($dfh)) {
+ while (my $line=<$dfh>) {
+ my ($time,$sdf,$rest)=split(/:/,$line,3);
+ next unless (($sdf eq 'F') || ($sdf eq 'S') || ($sdf eq 'D'));
+ next if (($checkfrom) && ($time <= $checkfrom));
+ my ($dserv,$dcmd);
+ if ($sdf eq 'S') {
+ my ($serva,$cmda,$servb,$cmdb) = split(/:/,$rest);
+ if ($cmda eq 'sethost') {
+ chomp($cmdb);
+ $dcmd = $cmdb;
+ } else {
+ $dcmd = $cmda;
+ }
+ if (($serva =~ /^LondTransaction/) || ($serva eq '')) {
+ unless (($servb eq '') || ($servb =~ m{^/})) {
+ $dserv = $servb;
+ }
+ } else {
+ $dserv = $serva;
+ }
+ } else {
+ ($dserv,$dcmd) = split(/:/,$rest);
+ }
+ if ($sdf eq 'F') {
+ my $local=localtime($time);
+ &log($fh,"<b>Failed: $time, $dserv, $dcmd</b><br />");
+ $warnings++;
+ }
+ next if ((($dserv eq '') || ($dcmd eq '')) && ($sdf ne 'F'));
+ if ($sdf eq 'S') {
+ if ($dcmd eq 'update') {
+ if ($hostname{$dserv}) {
+ if ($exclusions{$serverhomes{$hostname{$dserv}}}) {
+ $ignored --;
+ } else {
+ $unsend --;
+ }
+ }
+ if (exists($bymachine{$dserv})) {
+ $bymachine{$dserv} --;
+ } else {
+ $bymachine{$dserv} = -1;
+ }
+ } else {
+ if ($hostname{$dserv}) {
+ $unsend --;
+ }
+ }
+ } elsif ($sdf eq 'D') {
+ if ($dcmd eq 'update') {
+ if ($hostname{$dserv}) {
+ if ($exclusions{$serverhomes{$hostname{$dserv}}}) {
+ $ignored ++;
+ } else {
+ $unsend ++;
+ }
+ }
+ if (exists($bymachine{$dserv})) {
+ $bymachine{$dserv} ++;
+ } else {
+ $bymachine{$dserv} = 1;
+ }
+ } else {
+ if ($hostname{$dserv}) {
+ $unsend ++;
+ }
+ }
+ }
+ }
+ undef $dfh;
+ my $nodest = 0;
+ my $retired = 0;
+ my %active;
+ if (keys(%bymachine)) {
+ unless ($checkexcluded) {
+ %serverhomes = &read_serverhomeIDs();
+ }
+ foreach my $key (keys(%bymachine)) {
+ if ($bymachine{$key} > 0) {
+ if ($hostname{$key}) {
+ $active{$serverhomes{$hostname{$key}}} += $bymachine{$key};
+ } else {
+ $retired ++;
+ $nodest += $bymachine{$key};
+ }
+ }
+ }
+ }
+ if (keys(%active)) {
+ &log($fh,"<p>Unsend messages by node, active (undegraded) nodes in cluster</p>\n");
+ foreach my $key (sort(keys(%active))) {
+ &log($fh,&encode_entities("$key => $active{$key}",'<>&"')."\n");
+ }
+ }
+ &log($fh,"<p>Total unsend messages: <b>$unsend</b> for ".scalar(keys(%active))." active (undegraded) nodes in cluster.</p>\n");
+ if (keys(%exclusions) > 0) {
+ &log($fh,"<p>Total incomplete updates <b>$ignored</b> for ".scalar(keys(%exclusions))." degraded nodes in cluster.</p>\n");
+ }
+ if ($retired) {
+ &log($fh,"<p>Total unsent <b>$nodest</b> for $retired nodes no longer in cluster.</p>\n");
+ }
+ if ($unsend > 0) {
+ $warnings=$warnings+$weights{'U'}*$unsend;
+ }
}
if ($unsend) { $simplestatus{'unsend'}=$unsend; }
@@ -713,7 +878,11 @@
}
sub finish_logging {
- my ($fh,%weights)=@_;
+ my ($fh,$weightsref)=@_;
+ my %weights;
+ if (ref($weightsref) eq 'HASH') {
+ %weights = %{$weightsref};
+ }
&log($fh,"<a name='errcount' />\n");
$totalcount=($weights{'N'}*$notices)+($weights{'W'}*$warnings)+($weights{'E'}*$errors);
&errout($fh);
@@ -1086,6 +1255,62 @@
return;
}
+sub get_permcount_settings {
+ my ($domconf) = @_;
+ my ($defaults,$names) = &Apache::loncommon::lon_status_items();
+ my (%weights,$threshold,$sysmail,$reportstatus,%exclusions);
+ foreach my $type ('E','W','N','U') {
+ $weights{$type} = $defaults->{$type};
+ }
+ $threshold = $defaults->{'threshold'};
+ $sysmail = $defaults->{'sysmail'};
+ $reportstatus = 1;
+ if (ref($domconf) eq 'HASH') {
+ if (ref($domconf->{'contacts'}) eq 'HASH') {
+ if ($domconf->{'contacts'}{'reportstatus'} == 0) {
+ $reportstatus = 0;
+ }
+ if (ref($domconf->{'contacts'}{'lonstatus'}) eq 'HASH') {
+ if (ref($domconf->{'contacts'}{'lonstatus'}{weights}) eq 'HASH') {
+ foreach my $type ('E','W','N','U') {
+ if (exists($domconf->{'contacts'}{'lonstatus'}{weights}{$type})) {
+ $weights{$type} = $domconf->{'contacts'}{'lonstatus'}{weights}{$type};
+ }
+ }
+ }
+ if (ref($domconf->{'contacts'}{'lonstatus'}{'excluded'}) eq 'ARRAY') {
+ my @excluded = @{$domconf->{'contacts'}{'lonstatus'}{'excluded'}};
+ if (@excluded) {
+ map { $exclusions{$_} = 1; } @excluded;
+ }
+ }
+ if (exists($domconf->{'contacts'}{'lonstatus'}{'threshold'})) {
+ $threshold = $domconf->{'contacts'}{'lonstatus'}{'threshold'};
+ }
+ if (exists($domconf->{'contacts'}{'lonstatus'}{'sysmail'})) {
+ $sysmail = $domconf->{'contacts'}{'lonstatus'}{'sysmail'};
+ }
+ }
+ }
+ }
+ return ($threshold,$sysmail,$reportstatus,\%weights,\%exclusions);
+}
+
+sub read_serverhomeIDs {
+ my %server;
+ if (-e "$perlvar{'lonTabDir'}/serverhomeIDs.tab") {
+ if (open(my $fh,'<',"$perlvar{'lonTabDir'}/serverhomeIDs.tab")) {
+ while (<$fh>) {
+ my($host,$id) = split(/:/);
+ chomp($id);
+ $server{$host} = $id;
+ }
+ close($fh);
+ }
+ }
+ return %server;
+}
+
sub send_mail {
my ($sysmail,$reportstatus) = @_;
my $defdom = $perlvar{'lonDefDomain'};
@@ -1273,8 +1498,10 @@
&test_connections($fh);
}
if (!$justcheckdaemons && !$justcheckconnections && !$justreload) {
- &check_delayed_msg($fh);
- &log_simplestatus();
+ my $domconf = &get_domain_config();
+ my ($threshold,$sysmail,$reportstatus,$weightsref,$exclusionsref) =
+ &get_permcount_settings($domconf);
+ &check_delayed_msg($fh,$weightsref,$exclusionsref);
&write_loncaparevs();
&write_serverhomeIDs();
&write_checksums();
@@ -1287,37 +1514,9 @@
&checkon_daemon($fh,'lond',40000,'USR2');
&reset_nosslverify_pids($fh,%sslrem);
}
- my $domconf = &get_domain_config();
- my ($defaults,$names) = &Apache::loncommon::lon_status_items();
- my (%weights,$threshold);
- foreach my $type ('E','W','N') {
- $weights{$type} = $defaults->{$type};
- }
- my $threshold = $defaults->{'threshold'};
- my $sysmail = $defaults->{'sysmail'};
- my $reportstatus = 1;
- if (ref($domconf->{'contacts'}) eq 'HASH') {
- if ($domconf->{'contacts'}{'reportstatus'} == 0) {
- $reportstatus = 0;
- }
- if (ref($domconf->{'contacts'}{'lonstatus'}) eq 'HASH') {
- if (ref($domconf->{'contacts'}{'lonstatus'}{weights}) eq 'HASH') {
- foreach my $type ('E','W','N') {
- if (exists($domconf->{'contacts'}{'lonstatus'}{weights}{$type})) {
- $weights{$type} = $domconf->{'contacts'}{'lonstatus'}{weights}{$type};
- }
- }
- }
- }
- if (exists($domconf->{'contacts'}{'lonstatus'}{'threshold'})) {
- $threshold = $domconf->{'contacts'}{'lonstatus'}{'threshold'};
- }
- if (exists($domconf->{'contacts'}{'lonstatus'}{'sysmail'})) {
- $sysmail = $domconf->{'contacts'}{'lonstatus'}{'sysmail'};
- }
- }
- &finish_logging($fh,%weights);
- if ($totalcount>$threshold && !$noemail) { &send_mail($sysmail,$reportstatus); }
+ &finish_logging($fh,$weightsref);
+ &log_simplestatus();
+ if ($totalcount>$threshold && !$noemail) { &send_mail($sysmail,$reportstatus); }
}
}
Index: loncom/interface/loncommon.pm
diff -u loncom/interface/loncommon.pm:1.1323 loncom/interface/loncommon.pm:1.1324
--- loncom/interface/loncommon.pm:1.1323 Wed Nov 7 19:23:45 2018
+++ loncom/interface/loncommon.pm Sun Nov 18 22:50:52 2018
@@ -1,7 +1,7 @@
# The LearningOnline Network with CAPA
# a pile of common routines
#
-# $Id: loncommon.pm,v 1.1323 2018/11/07 19:23:45 raeburn Exp $
+# $Id: loncommon.pm,v 1.1324 2018/11/18 22:50:52 raeburn Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -16425,6 +16425,7 @@
E => 100,
W => 4,
N => 1,
+ U => 5,
threshold => 200,
sysmail => 2500,
);
@@ -16432,6 +16433,7 @@
E => 'Errors',
W => 'Warnings',
N => 'Notices',
+ U => 'Unsent',
);
return (\%defaults,\%names);
}
Index: loncom/interface/domainprefs.pm
diff -u loncom/interface/domainprefs.pm:1.340 loncom/interface/domainprefs.pm:1.341
--- loncom/interface/domainprefs.pm:1.340 Tue Nov 6 15:37:37 2018
+++ loncom/interface/domainprefs.pm Sun Nov 18 22:50:52 2018
@@ -1,7 +1,7 @@
# The LearningOnline Network with CAPA
# Handler to set domain-wide configuration settings
#
-# $Id: domainprefs.pm,v 1.340 2018/11/06 15:37:37 raeburn Exp $
+# $Id: domainprefs.pm,v 1.341 2018/11/18 22:50:52 raeburn Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -3581,7 +3581,7 @@
$sysmail = $defaults->{'sysmail'};
}
if (ref($lonstatus{'weights'}) eq 'HASH') {
- foreach my $type ('E','W','N') {
+ foreach my $type ('E','W','N','U') {
if ($lonstatus{'weights'}{$type} =~ /^\d+$/) {
$weights{$type} = $lonstatus{'weights'}{$type};
} else {
@@ -3589,7 +3589,7 @@
}
}
} else {
- foreach my $type ('E','W','N') {
+ foreach my $type ('E','W','N','U') {
$weights{$type} = $defaults->{$type};
}
}
@@ -3610,7 +3610,7 @@
'<td class="LC_left_item">'.
'<span class="LC_nobreak">'.$titles->{'errorweights'}.
'</span></td><td class="LC_left_item"><table><tr>';
- foreach my $type ('E','W','N') {
+ foreach my $type ('E','W','N','U') {
$datatable .= '<td>'.$names->{$type}.'<br />'.
'<input type="text" name="errorweights_'.$type.'" value="'.
$weights{$type}.'" size="5" /></td>';
@@ -13019,7 +13019,7 @@
if ((ref($contacts_hash{contacts}{lonstatus}) eq 'HASH') &&
(ref($contacts_hash{contacts}{lonstatus}{$key}) eq 'HASH')) {
if (ref($currsetting{'lonstatus'}{$key}) eq 'HASH') {
- foreach my $type ('E','W','N') {
+ foreach my $type ('E','W','N','U') {
unless ($contacts_hash{contacts}{lonstatus}{$key}{$type} eq
$currsetting{'lonstatus'}{$key}{$type}) {
push(@{$changes{'lonstatus'}},$key);
@@ -13027,7 +13027,7 @@
}
}
} else {
- foreach my $type ('E','W','N') {
+ foreach my $type ('E','W','N','U') {
if ($contacts_hash{contacts}{lonstatus}{$key}{$type} ne '') {
push(@{$changes{'lonstatus'}},$key);
last;
@@ -13035,7 +13035,7 @@
}
}
} elsif (ref($currsetting{'lonstatus'}{$key}) eq 'HASH') {
- foreach my $type ('E','W','N') {
+ foreach my $type ('E','W','N','U') {
if ($currsetting{'lonstatus'}{$key}{$type} ne '') {
push(@{$changes{'lonstatus'}},$key);
last;
@@ -13270,7 +13270,7 @@
$defval{'threshold'} = $lonstatus_defs->{'threshold'};
$defval{'sysmail'} = $lonstatus_defs->{'sysmail'};
$defval{'weights'} =
- join(', ',map { $lonstatus_names->{$_}.'='.$lonstatus_defs->{$_}; } ('E','W','N'));
+ join(', ',map { $lonstatus_names->{$_}.'='.$lonstatus_defs->{$_}; } ('E','W','N','U'));
$defval{'excluded'} = &mt('None');
if (ref($contacts_hash{'contacts'}{'lonstatus'}) eq 'HASH') {
foreach my $item ('threshold','sysmail','weights','excluded') {
@@ -13279,7 +13279,7 @@
$shown{$item} = $contacts_hash{'contacts'}{'lonstatus'}{$item};
} elsif ($item eq 'weights') {
if (ref($contacts_hash{'contacts'}{'lonstatus'}{$item}) eq 'HASH') {
- foreach my $type ('E','W','N') {
+ foreach my $type ('E','W','N','U') {
$shown{$item} .= $lonstatus_names->{$type}.'=';
if (exists($contacts_hash{'contacts'}{'lonstatus'}{$item}{$type})) {
$shown{$item} .= $contacts_hash{'contacts'}{'lonstatus'}{$item}{$type};
More information about the LON-CAPA-cvs
mailing list