[LON-CAPA-cvs] cvs: loncom / loncnew

foxr lon-capa-cvs@mail.lon-capa.org
Tue, 24 Jun 2003 02:46:04 -0000


foxr		Mon Jun 23 22:46:04 2003 EDT

  Modified files:              
    /loncom	loncnew 
  Log:
  Put a limit on  the number of times we'll retry a connection.
  Start getting the signal stuff put in as well...note that need to get signals
  going or else 6the client will permanently give up on dead servers.
  
  
Index: loncom/loncnew
diff -u loncom/loncnew:1.9 loncom/loncnew:1.10
--- loncom/loncnew:1.9	Thu Jun 12 22:38:43 2003
+++ loncom/loncnew	Mon Jun 23 22:46:04 2003
@@ -2,7 +2,7 @@
 # The LearningOnline Network with CAPA
 # lonc maintains the connections to remote computers
 #
-# $Id: loncnew,v 1.9 2003/06/13 02:38:43 foxr Exp $
+# $Id: loncnew,v 1.10 2003/06/24 02:46:04 foxr Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -46,6 +46,11 @@
 
 # Change log:
 #    $Log: loncnew,v $
+#    Revision 1.10  2003/06/24 02:46:04  foxr
+#    Put a limit on  the number of times we'll retry a connection.
+#    Start getting the signal stuff put in as well...note that need to get signals
+#    going or else 6the client will permanently give up on dead servers.
+#
 #    Revision 1.9  2003/06/13 02:38:43  foxr
 #    Add logging in 'expected format'
 #
@@ -120,7 +125,8 @@
 my $ConnectionCount = 0;
 my $IdleSeconds     = 0;	# Number of seconds idle.
 my $Status          = "";	# Current status string.
-
+my $ConnectionRetries=5;	# Number of connection retries allowed.
+my $ConnectionRetriesLeft=5;	# Number of connection retries remaining.
 
 #
 #   The hash below gives the HTML format for log messages
@@ -134,7 +140,23 @@
 $LogFormats{"WARNING"}  = "<font color=blue>WARNING: %s</font>";
 $LogFormats{"DEFAULT"}  = " %s ";
 
-my $lastlog = '';		# Used for status reporting.
+
+
+=pod
+
+=head2 LogPerm
+
+Makes an entry into the permanent log file.
+
+=cut
+sub LogPerm {
+    my $message=shift;
+    my $execdir=$perlvar{'lonDaemons'};
+    my $now=time;
+    my $local=localtime($now);
+    my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
+    print $fh "$now:$message:$local\n";
+}
 
 =pod
 
@@ -181,6 +203,7 @@
     my $msg = sprintf($finalformat, $message);
     print $fh $msg;
     
+    
 }
 
 
@@ -238,13 +261,15 @@
 =head2 ShowStatus
 
  Place some text as our pid status.
+ and as what we return in a SIGUSR1
 
 =cut
 sub ShowStatus {
-    my $status = shift;
-    $0 =  "lonc: ".$status;
-    $Status  = $status;		# Make available for logging.
-
+    my $state = shift;
+    my $now = time;
+    my $local = localtime($now);
+    $Status   = $local.": ".$state;
+    $0='lonc: '.$state.' '.$local;
 }
 
 =pod
@@ -283,12 +308,17 @@
     #
     
     my $Requests = $WorkQueue->Count();
-    if (($ConnectionCount == 0)  && ($Requests > 0)) {
-	my $Connections = ($Requests <= $MaxConnectionCount) ?
-	                           $Requests : $MaxConnectionCount;
-	Debug(1,"Work but no connections, starting ".$Connections." of them");
-	for ($i =0; $i < $Connections; $i++) {
-	    MakeLondConnection();
+    if (($ConnectionCount == 0)  && ($Requests > 0)) { 
+	if ($ConnectionRetriesLeft > 0) {
+	    my $Connections = ($Requests <= $MaxConnectionCount) ?
+		$Requests : $MaxConnectionCount;
+	    Debug(1,"Work but no connections, start ".$Connections." of them");
+	    for ($i =0; $i < $Connections; $i++) {
+		MakeLondConnection();
+	    }
+	} else {
+	    Debug(1,"Work in queue, but gave up on connections..flushing\n");
+	    EmptyQueue();	# Connections can't be established.
 	}
        
     }
@@ -466,6 +496,7 @@
 	StartClientReply($Transaction, $data);
     } else {			# Delete deferred transaction file.
 	Log("SUCCESS", "A delayed transaction was completed");
+	LogPerm("S:$Client:".$Transaction->getRequest());
 	unlink $Transaction->getFile();
     }
 }
@@ -525,13 +556,12 @@
 
 sub FailTransaction {
     my $transaction = shift;
-    my $Lond        = $transaction->getServer();
-    if (!$client->isDeferred()) { # If the transaction is deferred we'll get to it.
+    Debug(1, "Failing transaction: ".$transaction->getRequest());
+    if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it.
 	my $client  = $transcation->getClient();
-	StartClientReply($client, "con_lost");
+	Debug(1," Replying con_lost to ".$transaction->getRequest());
+	StartClientReply($client, "con_lost\n");
     }
-# not needed, done elsewhere if active.
-#    delete $ActiveTransactions{$Lond};
 
 }
 
@@ -544,7 +574,7 @@
 =cut
 sub EmptyQueue {
     while($WorkQueue->Count()) {
-	my $request = $Workqueue->dequeue(); # This is a transaction
+	my $request = $WorkQueue->dequeue(); # This is a transaction
 	FailTransaction($request);
     }
 }
@@ -978,7 +1008,10 @@
 
     if($Connection == undef) {	# Needs to be more robust later.
 	Log("CRITICAL","Failed to make a connection with lond.");
+	$ConnectionRetriesLeft--;
+	return 0;		# Failure.
     }  else {
+	$ConnectionRetriesLeft = $ConnectionRetries; # success resets the count
 	# The connection needs to have writability 
 	# monitored in order to send the init sequence
 	# that starts the whole authentication/key
@@ -1006,6 +1039,7 @@
 	}
 	Log("SUCESS", "Created connection ".$ConnectionCount
 	    ." to host ".GetServerHost());
+	return 1;		# Return success.
     }
     
 }
@@ -1103,7 +1137,6 @@
 =pod
 
 =head2 ClientRequest
-
 Callback that is called when data can be read from the UNIX domain
 socket connecting us with an apache server process.
 
@@ -1128,12 +1161,13 @@
 	close($socket);
 	$watcher->cancel();
 	delete($ActiveClients{$socket});
+	return;
     }
     Debug(8,"Data: ".$data." this read: ".$thisread);
     $data = $data.$thisread;	# Append new data.
     $watcher->data($data);
     if($data =~ /(.*\n)/) {	# Request entirely read.
-	if($data == "close_connection_exit\n") {
+	if($data eq "close_connection_exit\n") {
 	    Log("CRITICAL",
 		"Request Close Connection ... exiting");
 	    CloseAllLondConnections();
@@ -1250,6 +1284,24 @@
 
 =pod
 
+=head2 SignalledToDeath
+
+Called in response to a signal that causes a chid process to die.
+
+=cut
+
+=pod
+
+sub SignalledToDeath {
+    my ($signal) = @_;
+    chomp($signal);
+    Log("CRITICAL", "Abnormal exit.  Child $$ for $RemoteHost "
+	."died through "."\"$signal\"");
+    LogPerm("F:lonc: $$ on $RemoteHost signalled to death: "
+	    ."\"$signal\"");
+    die("Signal abnormal end");
+
+}
 =head2 ChildProcess
 
 This sub implements a child process for a single lonc daemon.
@@ -1261,12 +1313,12 @@
 
     # For now turn off signals.
     
-    $SIG{QUIT}  = IGNORE;
+    $SIG{QUIT}  = \&SignalledToDeath;
     $SIG{HUP}   = IGNORE;
     $SIG{USR1}  = IGNORE;
     $SIG{INT}   = IGNORE;
     $SIG{CHLD}  = IGNORE;
-    $SIG{__DIE__}  = IGNORE;
+    $SIG{__DIE__}  = \&SignalledToDeath;
 
     SetupTimer();
     
@@ -1278,12 +1330,9 @@
 
 # Setup the initial server connection:
     
-    &MakeLondConnection();
+     # &MakeLondConnection(); // let first work requirest do it.
+
 
-    if($ConnectionCount == 0) {
-	Debug(1,"Could not make initial connection..\n");
-	Debug(1,"Will retry when there's work to do\n");
-    }
     Debug(9,"Entering event loop");
     my $ret = Event::loop();		#  Start the main event loop.
     
@@ -1363,6 +1412,12 @@
 
 ShowStatus("Parent keeping the flock");
 
+#
+#   Set up parent signals:
+#
+$SIG{INT}  = &KillThemAll;
+$SIG{TERM} = &KillThemAll; 
+
 while(1) {
     $deadchild = wait();
     if(exists $ChildHash{$deadchild}) {	# need to restart.
@@ -1373,6 +1428,8 @@
 	Log("INFO", "Restarting child procesing ".$deadhost);
 	CreateChild($deadhost);
     }
+}
+sub KillThemAll {
 }
 
 =head1 Theory