[LON-CAPA-cvs] cvs: loncom / loncnew

foxr lon-capa-cvs@mail.lon-capa.org
Mon, 30 Aug 2004 11:01:04 -0000


foxr		Mon Aug 30 07:01:04 2004 EDT

  Modified files:              
    /loncom	loncnew 
  Log:
  - Completely turn off ticking when there are no connections left.
  - Add a bit more foreceful kill logic since there have been 'rumors' of cases where
    lonc's don't die appropriately.
  - Remove some of the less useful log spew.
  
  
Index: loncom/loncnew
diff -u loncom/loncnew:1.51 loncom/loncnew:1.52
--- loncom/loncnew:1.51	Thu Aug 26 08:35:10 2004
+++ loncom/loncnew	Mon Aug 30 07:01:04 2004
@@ -2,7 +2,7 @@
 # The LearningOnline Network with CAPA
 # lonc maintains the connections to remote computers
 #
-# $Id: loncnew,v 1.51 2004/08/26 12:35:10 albertel Exp $
+# $Id: loncnew,v 1.52 2004/08/30 11:01:04 foxr Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -105,10 +105,6 @@
 my $LondVersion     = "unknown"; # Version of lond we talk with.
 my $KeyMode         = "";       # e.g. ssl, local, insecure from last connect.
 
-my $LongTickLength  = 10000000; #Tick Frequency when Idle
-my $ShortTickLength = 1;        #Tick Frequency when Active (many places in
-                                # the code assume this is one)
-my $TickLength      = $ShortTickLength;#number of seconds to wait until ticking
 #
 #   The hash below gives the HTML format for log messages
 #   given a severity.
@@ -269,7 +265,7 @@
 sub SocketTimeout {
     my $Socket = shift;
     Log("WARNING", "A socket timeout was detected");
-    Debug(0, " SocketTimeout called: ");
+    Debug(5, " SocketTimeout called: ");
     $Socket->Dump(0);
     if(exists($ActiveTransactions{$Socket})) {
 	FailTransaction($ActiveTransactions{$Socket});
@@ -278,7 +274,7 @@
                                 # a connection failure:
     $ConnectionRetriesLeft--;
     if($ConnectionRetriesLeft <= 0) {
-	Log("CRITICAL", "Host marked dead: ".GetServerHost());
+	Log("CRITICAL", "Host marked DEAD: ".GetServerHost());
     }
 
 }
@@ -294,6 +290,9 @@
 
 
 sub Tick {
+    my ($Event)       = @_;
+    my $clock_watcher = $Event->w;
+
     my $client;
     if($ConnectionRetriesLeft > 0) {
 	ShowStatus(GetServerHost()." Connection count: ".$ConnectionCount
@@ -307,13 +306,10 @@
 
     if($IdleConnections->Count()  && 
        ($WorkQueue->Count() == 0)) { # Idle connections and nothing to do?
-	$IdleSeconds+=$TickLength;
+	$IdleSeconds++;
 	if($IdleSeconds > $IdleTimeout) { # Prune a connection...
 	    my $Socket = $IdleConnections->pop();
 	    KillSocket($Socket);
-	    if ($IdleConnections->Count() == 0) {
-		&SetupTimer($LongTickLength);
-	    }
 	}
     } else {
 	$IdleSeconds = 0;	# Reset idle count if not idle.
@@ -358,6 +354,7 @@
     }
     if ($ConnectionCount == 0) {
 	$KeyMode = ""; 
+	$clock_watcher->cancel();
     }
 }
 
@@ -377,13 +374,9 @@
 
 =cut
 
-my $timer;
 sub SetupTimer {
-    my ($newLength)=@_;
-    Debug(6, "SetupTimer $TickLength->$newLength");
-    $TickLength=$newLength;
-    if ($timer) { $timer->cancel; }
-    $timer=Event->timer(interval => $TickLength, cb => \&Tick );
+    Debug(6, "SetupTimer");
+    Event->timer(interval => 1, cb => \&Tick );
 }
 
 =pod
@@ -483,7 +476,7 @@
 	    } else {		# Partial string sent.
 		$Watcher->data(substr($Data, $result));
 		if($result == 0) {    # client hung up on us!!
-		    Log("INFO", "lonc pipe client hung up on us!");
+		    # Log("INFO", "lonc pipe client hung up on us!");
 		    $Watcher->cancel;
 		    $Socket->shutdown(2);
 		    $Socket->close();
@@ -611,7 +604,13 @@
 
 sub FailTransaction {
     my $transaction = shift;
-    Log("WARNING", "Failing transaction ".$transaction->getRequest());
+    
+    #  If the socket is dead, that's already logged.
+
+    if ($ConnectionRetriesLeft > 0) {
+	Log("WARNING", "Failing transaction "
+	    .$transaction->getRequest());
+    }
     Debug(1, "Failing transaction: ".$transaction->getRequest());
     if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it.
 	my $client  = $transaction->getClient();
@@ -1145,7 +1144,9 @@
 			   data     => $Connection,
 			   desc => 'Connection to lond server');
 	$ActiveConnections{$Connection} = $event;
-	
+	if ($ConnectionCount == 0) {
+	    &SetupTimer;	# Need to handle timeouts with connections...
+	}
 	$ConnectionCount++;
 	Debug(4, "Connection count = ".$ConnectionCount);
 	if($ConnectionCount == 1) { # First Connection:
@@ -1243,7 +1244,6 @@
 		    EmptyQueue();	# Fail transactions, can't make connection.
 		    CloseAllLondConnections; # Should all be closed but...
 		}
-		&SetupTimer($ShortTickLength);
 	    } else {
 		ShowStatus(GetServerHost()." >>> DEAD !!!! <<<");
 		EmptyQueue();	# It's worse than that ... he's dead Jim.
@@ -1533,7 +1533,6 @@
 		  cb       => \&ToggleDebug,
 		  data     => "INT");
 
-    SetupTimer($LongTickLength);
     
     SetupLoncListener();
     
@@ -1556,9 +1555,10 @@
 #  Create a new child for host passed in:
 
 sub CreateChild {
+    my $host = shift;
+
     my $sigset = POSIX::SigSet->new(SIGINT);
     sigprocmask(SIG_BLOCK, $sigset);
-    my $host = shift;
     $RemoteHost = $host;
     Log("CRITICAL", "Forking server for ".$host);
     my $pid          = fork;
@@ -1829,17 +1829,33 @@
     local($SIG{CHLD}) = 'IGNORE';      # Our children >will< die.
     foreach my $pid (keys %ChildHash) {
 	my $serving = $ChildHash{$pid};
-	Debug(2, "Killing lonc for $serving pid = $pid");
-	ShowStatus("Killing lonc for $serving pid = $pid");
-	Log("CRITICAL", "Killing lonc for $serving pid = $pid");
+	ShowStatus("Nicely Killing lonc for $serving pid = $pid");
+	Log("CRITICAL", "Nicely Killing lonc for $serving pid = $pid");
 	kill 'QUIT' => $pid;
-	delete($ChildHash{$pid});
     }
-    my $execdir = $perlvar{'lonDaemons'};
-    unlink("$execdir/logs/lonc.pid");
+
 
 }
 
+
+#
+#  Kill all children via KILL.  Just in case the
+#  first shot didn't get them.
+
+sub really_kill_them_all_dammit
+{
+    Debug(2, "Kill them all Dammit");
+    local($SIG{CHLD} = 'IGNORE'); # In case some purist reenabled them.
+    foreach my $pid (keys %ChildHash) {
+	my $serving = $ChildHash{$pid};
+	&ShowStatus("Nastily killing lonc for $serving pid = $pid");
+	Log("CRITICAL", "Nastily killing lonc for $serving pid = $pid");
+	kill 'KILL' => $pid;
+	delete($ChildHash{$pid});
+	my $execdir = $perlvar{'lonDaemons'};
+	unlink("$execdir/logs/lonc.pid");
+    }
+}
 =pod
 
 =head1 Terminate
@@ -1849,7 +1865,15 @@
 =cut
 
 sub Terminate {
-    KillThemAll;
+    &Log("CRITICAL", "Asked to kill children.. first be nice...");
+    &KillThemAll;
+    #
+    #  By now they really should all be dead.. but just in case 
+    #  send them all SIGKILL's after a bit of waiting:
+
+    sleep(4);
+    &Log("CRITICAL", "Now kill children nasty");
+    &really_kill_them_all_dammit;
     Log("CRITICAL","Master process exiting");
     exit 0;