[LON-CAPA-cvs] cvs: loncom / loncron
albertel
lon-capa-cvs@mail.lon-capa.org
Wed, 10 Sep 2003 19:13:09 -0000
This is a MIME encoded message
--albertel1063221189
Content-Type: text/plain
albertel Wed Sep 10 15:13:09 2003 EDT
Modified files:
/loncom loncron
Log:
- made the restart of daemons look more obvisou as to what is going on
- updated status messages to be less obtuse
--albertel1063221189
Content-Type: text/plain
Content-Disposition: attachment; filename="albertel-20030910151309.txt"
Index: loncom/loncron
diff -u loncom/loncron:1.41 loncom/loncron:1.42
--- loncom/loncron:1.41 Wed Jul 30 12:49:27 2003
+++ loncom/loncron Wed Sep 10 15:13:09 2003
@@ -49,6 +49,128 @@
ENDERROUT
}
+sub start_daemon {
+ my ($fh,$daemon,$pidfile) = @_;
+ system("$perlvar{'lonDaemons'}/$daemon 2>>$perlvar{'lonDaemons'}/logs/${daemon}_errors");
+ sleep 2;
+ if (-e $pidfile) {
+ print $fh "Seems like it started ...<p>";
+ my $lfh=IO::File->new("$pidfile");
+ my $daemonpid=<$lfh>;
+ chomp($daemonpid);
+ sleep 2;
+ if (kill 0 => $daemonpid) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+ print $fh "Seems like that did not work!<p>";
+ $errors++;
+ return 0;
+}
+
+sub checkon_daemon {
+ my ($fh,$daemon,$maxsize,$sendusr1)=@_;
+
+ print $fh '<hr><a name="'.$daemon.'"><h2>'.$daemon.'</h2><h3>Log</h3><pre>';
+ printf("%-10s ",$daemon);
+ if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
+ open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|");
+ while ($line=<DFH>) {
+ print $fh "$line";
+ if ($line=~/INFO/) { $notices++; }
+ if ($line=~/WARNING/) { $notices++; }
+ if ($line=~/CRITICAL/) { $warnings++; }
+ };
+ close (DFH);
+ }
+ print $fh "</pre>";
+
+ my $pidfile="$perlvar{'lonDaemons'}/logs/$daemon.pid";
+
+ my $restartflag=1;
+
+ if (-e $pidfile) {
+ my $lfh=IO::File->new("$pidfile");
+ my $daemonpid=<$lfh>;
+ chomp($daemonpid);
+ if (kill 0 => $daemonpid) {
+ print $fh "<h3>$daemon at pid $daemonpid responding";
+ if ($sendusr1) { print $fh ", sending USR1"; }
+ print $fh "</h3>";
+ if ($sendusr1) { kill USR1 => $daemonpid; }
+ $restartflag=0;
+ print "running\n";
+ } else {
+ $errors++;
+ print $fh "<h3>$daemon at pid $daemonpid not responding</h3>";
+ $restartflag=1;
+ print $fh "<h3>Decided to clean up stale .pid file and restart $daemon</h3>";
+ }
+ }
+ if ($restartflag==1) {
+ $simplestatus{$daemon}='off';
+ $errors++;
+ print $fh '<br><font color="red">Killall '.$daemon.': '.
+ `killall $daemon 2>&1`.' - ';
+ sleep 2;
+ print $fh unlink($pidfile).' - '.
+ `killall -9 $daemon 2>&1`.
+ '</font><br>';
+ print $fh "<h3>$daemon not running, trying to start</h3>";
+
+ if (&start_daemon($fh,$daemon,$pidfile)) {
+ print $fh "<h3>$daemon at pid $daemonpid responding</h3>";
+ $simplestatus{$daemon}='restarted';
+ print "started\n";
+ } else {
+ $errors++;
+ print $fh "<h3>$daemon at pid $daemonpid not responding</h3>";
+ print $fh "Give it one more try ...<p>";
+ print " ";
+ if (&start_daemon($fh,$daemon,$pidfile)) {
+ print $fh "<h3>$daemon at pid $daemonpid responding</h3>";
+ $simplestatus{$daemon}='restarted';
+ print "started\n";
+ } else {
+ print " failed\n";
+ $simplestatus{$daemon}='failed';
+ $errors++; $errors++;
+ print $fh "<h3>$daemon at pid $daemonpid not responding</h3>";
+ print $fh "Unable to start $daemon<p>";
+ }
+ }
+
+ if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
+ print $fh "<p><pre>";
+ open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|");
+ while ($line=<DFH>) {
+ print $fh "$line";
+ if ($line=~/WARNING/) { $notices++; }
+ if ($line=~/CRITICAL/) { $notices++; }
+ };
+ close (DFH);
+ print $fh "</pre>";
+ }
+ }
+
+ $fname="$perlvar{'lonDaemons'}/logs/$daemon.log";
+
+ my ($dev,$ino,$mode,$nlink,
+ $uid,$gid,$rdev,$size,
+ $atime,$mtime,$ctime,
+ $blksize,$blocks)=stat($fname);
+
+ if ($size>$maxsize) {
+ print $fh "Rotating logs ...<p>";
+ rename("$fname.2","$fname.3");
+ rename("$fname.1","$fname.2");
+ rename("$fname","$fname.1");
+ }
+
+ &errout($fh);
+}
# ================================================================ Main Program
# --------------------------------- Read loncapa_apache.conf and loncapa.conf
@@ -316,400 +438,24 @@
# ---------------------------------------------------------------------- lonsql
-my $restartflag=1;
- print $fh '<hr><a name="lonsql"><h2>lonsql</h2><h3>Log</h3><pre>';
- print "lonsql\n";
- if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
- open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
- while ($line=<DFH>) {
- print $fh "$line";
- if ($line=~/INFO/) { $notices++; }
- if ($line=~/WARNING/) { $notices++; }
- if ($line=~/CRITICAL/) { $warnings++; }
- };
- close (DFH);
- }
- print $fh "</pre>";
-
- my $lonsqlfile="$perlvar{'lonDaemons'}/logs/lonsql.pid";
-
- $restartflag=1;
-
- if (-e $lonsqlfile) {
- my $lfh=IO::File->new("$lonsqlfile");
- my $lonsqlpid=<$lfh>;
- chomp($lonsqlpid);
- if (kill 0 => $lonsqlpid) {
- print $fh "<h3>lonsql at pid $lonsqlpid responding</h3>";
- $restartflag=0;
- } else {
- $errors++; $errors++;
- print $fh "<h3>lonsql at pid $lonsqlpid not responding</h3>";
- $restartflag=1;
- print $fh
- "<h3>Decided to clean up stale .pid file and restart lonsql</h3>";
- }
- }
- if ($restartflag==1) {
- $simplestatus{'lonsql'}='off';
- $errors++;
- print $fh '<br><font color="red">Killall lonsql: '.
- system('killall lonsql').' - ';
- sleep 2;
- print $fh unlink($lonsqlfile).' - '.
- system('killall -9 lonsql').
- '</font><br>';
- print $fh "<h3>lonsql not running, trying to start</h3>";
- system(
- "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors");
- sleep 2;
- if (-e $lonsqlfile) {
- print $fh "Seems like it started ...<p>";
- my $lfh=IO::File->new("$lonsqlfile");
- my $lonsqlpid=<$lfh>;
- chomp($lonsqlpid);
- sleep 2;
- if (kill 0 => $lonsqlpid) {
- print $fh "<h3>lonsql at pid $lonsqlpid responding</h3>";
- $simplestatus{'lonsql'}='restarted';
- } else {
- $errors++; $errors++;
- print $fh "<h3>lonsql at pid $lonsqlpid not responding</h3>";
- print $fh "Give it one more try ...<p>";
- system(
- "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors");
- sleep 2;
- }
- } else {
- print $fh "Seems like that did not work!<p>";
- $errors++;
- }
- if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
- print $fh "<p><pre>";
- open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
- while ($line=<DFH>) {
- print $fh "$line";
- if ($line=~/WARNING/) { $notices++; }
- if ($line=~/CRITICAL/) { $notices++; }
- };
- close (DFH);
- print $fh "</pre>";
- }
- }
-
- $fname="$perlvar{'lonDaemons'}/logs/lonsql.log";
+&checkon_daemon($fh,'lonsql',200000);
- my ($dev,$ino,$mode,$nlink,
- $uid,$gid,$rdev,$size,
- $atime,$mtime,$ctime,
- $blksize,$blocks)=stat($fname);
-
- if ($size>200000) {
- print $fh "Rotating logs ...<p>";
- rename("$fname.2","$fname.3");
- rename("$fname.1","$fname.2");
- rename("$fname","$fname.1");
- }
-
- &errout($fh);
# ------------------------------------------------------------------------ lond
-print $fh '<hr><a name="lond"><h2>lond</h2><h3>Log</h3><pre>';
-print "lond\n";
+&checkon_daemon($fh,'lond',40000,1);
-if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
-open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/lond.log|");
-while ($line=<DFH>) {
- print $fh "$line";
- if ($line=~/INFO/) { $notices++; }
- if ($line=~/WARNING/) { $notices++; }
- if ($line=~/CRITICAL/) { $warnings++; }
-};
-close (DFH);
-}
-print $fh "</pre>";
-
-my $londfile="$perlvar{'lonDaemons'}/logs/lond.pid";
-
-$restartflag=1;
-if (-e $londfile) {
- my $lfh=IO::File->new("$londfile");
- my $londpid=<$lfh>;
- chomp($londpid);
- if (kill 0 => $londpid) {
- print $fh "<h3>lond at pid $londpid responding, sending USR1</h3>";
- kill USR1 => $londpid;
- $restartflag=0;
- } else {
- $errors++;
- print $fh "<h3>lond at pid $londpid not responding</h3>";
- $restartflag=1;
- print $fh
- "<h3>Decided to clean up stale .pid file and restart lond</h3>";
- }
-}
-if ($restartflag==1) {
- $simplestatus{'lond'}='off';
- $errors++;
- print $fh '<br><font color="red">Killall lond: '.
- system('killall lond').' - ';
- sleep 2;
- print $fh unlink($londfile).' - '.system('killall -9 lond').
- '</font><br>';
- print $fh "<h3>lond not running, trying to start</h3>";
- system(
- "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors");
- sleep 2;
- if (-e $londfile) {
- print $fh "Seems like it started ...<p>";
- my $lfh=IO::File->new("$londfile");
- my $londpid=<$lfh>;
- chomp($londpid);
- sleep 2;
- if (kill 0 => $londpid) {
- print $fh "<h3>lond at pid $londpid responding</h3>";
- $simplestatus{'lond'}='restarted';
- } else {
- $errors++; $errors++;
- print $fh "<h3>lond at pid $londpid not responding</h3>";
- print $fh "Give it one more try ...<p>";
- system(
- "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors");
- sleep 2;
- }
- } else {
- print $fh "Seems like that did not work!<p>";
- $errors++;
- }
- if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
- print $fh "<p><pre>";
- open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
- while ($line=<DFH>) {
- print $fh "$line";
- if ($line=~/WARNING/) { $notices++; }
- if ($line=~/CRITICAL/) { $notices++; }
- };
- close (DFH);
- print $fh "</pre>";
- }
-}
-
-$fname="$perlvar{'lonDaemons'}/logs/lond.log";
-
- my ($dev,$ino,$mode,$nlink,
- $uid,$gid,$rdev,$size,
- $atime,$mtime,$ctime,
- $blksize,$blocks)=stat($fname);
-
-if ($size>40000) {
- print $fh "Rotating logs ...<p>";
- rename("$fname.2","$fname.3");
- rename("$fname.1","$fname.2");
- rename("$fname","$fname.1");
-}
-
-&errout($fh);
# ------------------------------------------------------------------------ lonc
-print $fh '<hr><a name="lonc"><h2>lonc</h2><h3>Log</h3><pre>';
-print "lonc\n";
+&checkon_daemon($fh,'lonc',40000,1);
-if (-e "$perlvar{'lonDaemons'}/logs/lonc.log"){
-open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/lonc.log|");
-while ($line=<DFH>) {
- print $fh "$line";
- if ($line=~/INFO/) { $notices++; }
- if ($line=~/WARNING/) { $notices++; }
- if ($line=~/CRITICAL/) { $warnings++; }
-};
-close (DFH);
-}
-print $fh "</pre>";
-
-my $loncfile="$perlvar{'lonDaemons'}/logs/lonc.pid";
-
-$restartflag=1;
-if (-e $loncfile) {
- my $lfh=IO::File->new("$loncfile");
- my $loncpid=<$lfh>;
- chomp($loncpid);
- if (kill 0 => $loncpid) {
- print $fh "<h3>lonc at pid $loncpid responding, sending USR1</h3>";
- kill USR1 => $loncpid;
- $restartflag=0;
- } else {
- $errors++;
- print $fh "<h3>lonc at pid $loncpid not responding</h3>";
- # Solution: kill parent and children processes, remove .pid and restart
- $restartflag=1;
- print $fh
- "<h3>Decided to clean up stale .pid file and restart lonc</h3>";
- }
-}
-if ($restartflag==1) {
- $simplestatus{'lonc'}='off';
- $errors++;
- print $fh '<br><font color="red">Killall lonc: '.
- system('killall lonc').' - ';
- sleep 2;
- print $fh unlink($loncfile).' - '.system('killall -9 lonc').
- '</font><br>';
- print $fh "<h3>lonc not running, trying to start</h3>";
- system(
- "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
- sleep 2;
- if (-e $loncfile) {
- print $fh "Seems like it started ...<p>";
- my $lfh=IO::File->new("$loncfile");
- my $loncpid=<$lfh>;
- chomp($loncpid);
- sleep 2;
- if (kill 0 => $loncpid) {
- print $fh "<h3>lonc at pid $loncpid responding</h3>";
- $simplestatus{'lonc'}='restarted';
- } else {
- $errors++; $errors++;
- print $fh "<h3>lonc at pid $loncpid not responding</h3>";
- print $fh "Give it one more try ...<p>";
- system(
- "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors");
- sleep 2;
- }
- } else {
- print $fh "Seems like that did not work!<p>";
- $errors++;
- }
- if (-e "$perlvar{'lonDaemons'}/logs/lonc.log") {
- print $fh "<p><pre>";
- open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
- while ($line=<DFH>) {
- print $fh "$line";
- if ($line=~/WARNING/) { $notices++; }
- if ($line=~/CRITICAL/) { $notices++; }
- };
- close (DFH);
- print $fh "</pre>";
- }
-}
-
-$fname="$perlvar{'lonDaemons'}/logs/lonc.log";
-
- my ($dev,$ino,$mode,$nlink,
- $uid,$gid,$rdev,$size,
- $atime,$mtime,$ctime,
- $blksize,$blocks)=stat($fname);
-
-if ($size>40000) {
- print $fh "Rotating logs ...<p>";
- rename("$fname.2","$fname.3");
- rename("$fname.1","$fname.2");
- rename("$fname","$fname.1");
-}
-
-
-&errout($fh);
# -------------------------------------------------------------------- lonhttpd
-print $fh '<hr><a name="lonhttpd"><h2>lonhttpd</h2><h3>Log</h3><pre>';
-print "lonhttpd\n";
-
-if (-e "$perlvar{'lonDaemons'}/logs/lonhttpd.log"){
-open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/lonhttpd.log|");
-while ($line=<DFH>) {
- print $fh "$line";
- if ($line=~/INFO/) { $notices++; }
- if ($line=~/WARNING/) { $notices++; }
- if ($line=~/CRITICAL/) { $warnings++; }
-};
-close (DFH);
-}
-print $fh "</pre>";
-
-my $lonhttpdfile="$perlvar{'lonDaemons'}/logs/lonhttpd.pid";
-
-$restartflag=1;
-if (-e $lonhttpdfile) {
- my $lfh=IO::File->new("$lonhttpdfile");
- my $lonhttpdpid=<$lfh>;
- chomp($lonhttpdpid);
- if (kill 0 => $lonhttpdpid) {
- print $fh "<h3>lonhttpd at pid $lonhttpdpid responding</h3>";
- $restartflag=0;
- } else {
- $errors++;
- print $fh "<h3>lonhttpd at pid $lonhttpdpid not responding</h3>";
- # Solution: kill parent and children processes, remove .pid and restart
- $restartflag=1;
- print $fh
- "<h3>Decided to clean up stale .pid file and restart lonhttpd</h3>";
- }
-}
-if ($restartflag==1) {
- $simplestatus{'lonhttpd'}='off';
- $errors++;
- print $fh '<br><font color="red">Killall lonhttpd: '.
- system('killall lonhttpd').' - ';
- sleep 2;
- print $fh unlink($lonhttpdfile).' - '.system('killall -9 lonhttpd').
- '</font><br>';
- print $fh "<h3>lonhttpd not running, trying to start</h3>";
- system(
- "$perlvar{'lonDaemons'}/lonhttpd 2>>$perlvar{'lonDaemons'}/logs/lonhttpd_errors");
- sleep 2;
- if (-e $lonhttpdfile) {
- print $fh "Seems like it started ...<p>";
- my $lfh=IO::File->new("$lonhttpdfile");
- my $lonhttpdpid=<$lfh>;
- chomp($lonhttpdpid);
- sleep 2;
- if (kill 0 => $lonhttpdpid) {
- print $fh "<h3>lonhttpd at pid $lonhttpdpid responding</h3>";
- $simplestatus{'lonhttpd'}='restarted';
- } else {
- $errors++; $errors++;
- print $fh "<h3>lonhttpd at pid $lonhttpdpid not responding</h3>";
- print $fh "Give it one more try ...<p>";
- system(
- "$perlvar{'lonDaemons'}/lonhttpd 2>>$perlvar{'lonDaemons'}/logs/lonhttpd_errors");
- sleep 2;
- }
- } else {
- print $fh "Seems like that did not work!<p>";
- $errors++;
- }
- if (-e "$perlvar{'lonDaemons'}/logs/lonhttpd.log") {
- print $fh "<p><pre>";
- open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonhttpd.log|");
- while ($line=<DFH>) {
- print $fh "$line";
- if ($line=~/WARNING/) { $notices++; }
- if ($line=~/CRITICAL/) { $notices++; }
- };
- close (DFH);
- print $fh "</pre>";
- }
-}
-
-$fname="$perlvar{'lonDaemons'}/logs/lonhttpd.log";
-
- my ($dev,$ino,$mode,$nlink,
- $uid,$gid,$rdev,$size,
- $atime,$mtime,$ctime,
- $blksize,$blocks)=stat($fname);
-
-if ($size>40000) {
- print $fh "Rotating logs ...<p>";
- rename("$fname.2","$fname.3");
- rename("$fname.1","$fname.2");
- rename("$fname","$fname.1");
-}
+&checkon_daemon($fh,'lonhttpd',40000);
-
-&errout($fh);
# ---------------------------------------------------------------------- lonnet
print $fh '<hr><a name="lonnet"><h2>lonnet</h2><h3>Temp Log</h3><pre>';
-print "lonnet\n";
+print "checking logs\n";
if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
while ($line=<DFH>) {
@@ -746,10 +492,10 @@
# ----------------------------------------------------------------- Connections
print $fh '<hr><a name="connections"><h2>Connections</h2>';
-
+print "testing connections\n";
print $fh "<table border=2>";
foreach $tryserver (sort(keys(%hostname))) {
-
+ print(".");
$answer=reply("pong",$tryserver);
if ($answer eq "$tryserver:$perlvar{'lonHostID'}") {
$result="<b>ok</b>";
@@ -758,6 +504,7 @@
$warnings++;
if ($answer eq 'con_lost') { $warnings++; }
}
+ if ($answer =~ /con_lost/) { print(" $tryserver down\n"); }
print $fh "<tr><td>$tryserver</td><td>$result</td></tr>\n";
}
@@ -767,7 +514,7 @@
# ------------------------------------------------------------ Delayed messages
print $fh '<hr><a name="delayed"><h2>Delayed Messages</h2>';
-print "buffers\n";
+print "checking buffers\n";
print $fh '<h3>Scanning Permanent Log</h3>';
@@ -778,7 +525,7 @@
($time,$sdf,$dserv,$dcmd)=split(/:/,$line);
if ($sdf eq 'F') {
$local=localtime($time);
- print "<b>Failed: $time, $dserv, $dcmd</b><br>";
+ print $fh "<b>Failed: $time, $dserv, $dcmd</b><br>";
$warnings++;
}
if ($sdf eq 'S') { $unsend--; }
@@ -805,7 +552,7 @@
$now=time;
$date=localtime($now);
print $fh "<hr>$date ($now)</body></html>\n";
-print "writing done\n";
+print "lon-status webpage updated\n";
$fh->close();
}
if ($errors) { $simplestatus{'errors'}=$errors; }
@@ -823,7 +570,7 @@
$sfh->close();
}
if ($totalcount>200) {
- print "mailing\n";
+ print "sending mail\n";
$emailto="$perlvar{'lonAdmEMail'}";
if ($totalcount>1000) {
$emailto.=",$perlvar{'lonSysEMail'}";
--albertel1063221189--