[LON-CAPA-cvs] cvs: loncom / lonc

foxr lon-capa-cvs@mail.lon-capa.org
Wed, 10 Apr 2002 04:35:31 -0000


foxr		Wed Apr 10 00:35:31 2002 EDT

  Modified files:              
    /loncom	lonc 
  Log:
  Fixed USR1 handler to:
  1. Reset retry counters to zero rather than clearing counter hash.
  2. On all counters which were >= maxchildretries, restart the lonc 
     server for the associated host.
  
  
  
  
Index: loncom/lonc
diff -u loncom/lonc:1.38 loncom/lonc:1.39
--- loncom/lonc:1.38	Thu Apr  4 17:04:54 2002
+++ loncom/lonc	Wed Apr 10 00:35:31 2002
@@ -5,7 +5,7 @@
 # provides persistent TCP connections to the other servers in the network
 # through multiplexed domain sockets
 #
-# $Id: lonc,v 1.38 2002/04/04 22:04:54 foxr Exp $
+# $Id: lonc,v 1.39 2002/04/10 04:35:31 foxr Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -175,7 +175,14 @@
 # And maintain the population.
 while (1) {
     my $deadpid = wait;		# Wait for the next child to die.
-                                    # See who died and start new one
+                                # See who died and start new one
+                                # or a signal (e.g. USR1 for restart).
+                                # if a signal, the wait will fail
+                                # This is ordinarily detected by
+                                # checking for the existence of the
+                                # pid index inthe children hash since
+                                # the return value from a failed wait is -1
+                                # which is an impossible PID.
     &status("Woke up");
     my $skipping='';
 
@@ -870,8 +877,25 @@
 
 sub USRMAN {
     &logthis("USR1: Trying to establish connections again");
-    %childatt=();
-    &checkchildren();
+    #
+    #  It is really important not to just clear the childatt hash or we will
+    #  lose all memory of the children.  What we really want to do is this:
+    #  For each index where childatt is >= $childmaxattempts
+    #  Zero the associated counter and do a make_child for the host.
+    #  Regardles, the childatt entry is zeroed:
+    my $host;
+    foreach $host (keys %childatt) {
+	if ($childatt{$host} >= $childmaxattempts) {
+	    $childatt{$host} = 0;
+	    &logthis("<font color=green>INFO: Restarting child for server: "
+		     .$host."</font>\n");
+	    make_new_child($host);
+	}
+	else {
+	    $childatt{$host} = 0;
+	}
+    }
+    &checkchildren();		# See if any children are still dead...
 }
 
 # -------------------------------------------------- Non-critical communication