[LON-CAPA-cvs] cvs: loncom / lond

albertel lon-capa-cvs@mail.lon-capa.org
Mon, 02 Aug 2004 20:59:47 -0000


albertel		Mon Aug  2 16:59:47 2004 EDT

  Modified files:              
    /loncom	lond 
  Log:
  - the children hash is becoming polluted, and 'checkchildren' currently takes 1 second per entry in the children has to check the existance of children, during this time lond refuses all connections (as it should) this is leading to hosts being marked dead when they really are 'checking their children'
  
  this patche 
  
     - eliminates the 1 second sleep, we now  do locked access to the log file
     - switches the order of the logging to the commmon log file 
         and the child specific one, since the child specific one is
         used as a detection that the child has gone off to never never land
     - deletes entries from the children hash when checkchildren can't find it
   
  - this should hopefully fix the problems being seen on s1-5,10,17
  
  - planning to backport, and will install on s17 today
  
  
Index: loncom/lond
diff -u loncom/lond:1.220 loncom/lond:1.221
--- loncom/lond:1.220	Mon Aug  2 07:02:02 2004
+++ loncom/lond	Mon Aug  2 16:59:46 2004
@@ -2,7 +2,7 @@
 # The LearningOnline Network
 # lond "LON Daemon" Server (port "LOND" 5663)
 #
-# $Id: lond,v 1.220 2004/08/02 11:02:02 foxr Exp $
+# $Id: lond,v 1.221 2004/08/02 20:59:46 albertel Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -50,13 +50,14 @@
 use LONCAPA::ConfigFileEdit;
 use LONCAPA::lonlocal;
 use LONCAPA::lonssl;
+use Fcntl qw(:flock);
 
 my $DEBUG = 0;		       # Non zero to enable debug log entries.
 
 my $status='';
 my $lastlog='';
 
-my $VERSION='$Revision: 1.220 $'; #' stupid emacs
+my $VERSION='$Revision: 1.221 $'; #' stupid emacs
 my $remoteVERSION;
 my $currenthostid="default";
 my $currentdomainid;
@@ -3251,10 +3252,11 @@
     &logthis('Going to check on the children');
     my $docdir=$perlvar{'lonDocRoot'};
     foreach (sort keys %children) {
-	sleep 1;
+	#sleep 1;
         unless (kill 'USR1' => $_) {
 	    &logthis ('Child '.$_.' is dead');
             &logstatus($$.' is dead');
+	    delete($children{$_});
         } 
     }
     sleep 5;
@@ -3273,6 +3275,7 @@
 	    #my $result=`echo 'Killed lond process $_.' | mailto $emailto -s '$subj' > /dev/null`;
 	    #$execdir=$perlvar{'lonDaemons'};
 	    #$result=`/bin/cp $execdir/logs/lond.log $execdir/logs/lond.log.$_`;
+	    delete($children{$_});
 	    alarm(0);
 	  }
         }
@@ -3280,6 +3283,7 @@
     $SIG{ALRM} = 'DEFAULT';
     $SIG{__DIE__} = \&catchexception;
     &status("Finished checking children");
+    &logthis('Finished Checking children');
 }
 
 # --------------------------------------------------------------------- Logging
@@ -3350,17 +3354,19 @@
     &status("Doing logging");
     my $docdir=$perlvar{'lonDocRoot'};
     {
-    my $fh=IO::File->new(">>$docdir/lon-status/londstatus.txt");
-    print $fh $$."\t".$clientname."\t".$currenthostid."\t"
-	.$status."\t".$lastlog."\t $keymode\n";
-    $fh->close();
-    }
-    &status("Finished londstatus.txt");
-    {
 	my $fh=IO::File->new(">$docdir/lon-status/londchld/$$.txt");
         print $fh $status."\n".$lastlog."\n".time."\n$keymode";
         $fh->close();
     }
+    &status("Finished $$.txt");
+    {
+	open(LOG,">>$docdir/lon-status/londstatus.txt");
+	flock(LOG,LOCK_EX);
+	print LOG $$."\t".$clientname."\t".$currenthostid."\t"
+	    .$status."\t".$lastlog."\t $keymode\n";
+	flock(DB,LOCK_UN);
+	close(LOG);
+    }
     &status("Finished logging");
 }