#!/usr/bin/perl -w # # Returns a mon server list that failed services # # Usage : remote.monitor [options] [host1 host2 ...] # # --port n : the mon port # # --timeout n : the timeout connexion (default 10 seconds) # # --summary : flag to extend the summary of this monitor # return for each failed mon server the list of the # failed. Like : host1([g1:s1|s3][g4:s5]) ... # # --bigsummary : flag to extend the summary of this monitor # return for each failed mon server the list of the # failed. Like : host1([g1:s1{sum}|s3{sum}][g4:s5{sum}]) ... # # --debug : some debug information (do not use this with mon) # # --restrict watch[:service] : restrict test to specified watch # [and service] # # --help : prints this message. # # host1 host2 : list of remote MON servers to check # # Contributors : # Gilles LAMIRAL, lamiral@mail.dotcom.fr # Laurent COMBE, laurent.combe@free.fr # Thomas MORIN, thomas.morin@webmotion.com # # Copyright (C) 1999, Gilles LAMIRAL # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License. # Variables: # @failures failed hosts array. # @failuresDetails detailed failed hosts array. use Getopt::Long; use Mon::Client; GetOptions( "port|p:i" => \$port, "timeout|t:i" => \$timeout, "summary" => \$summary, "bigsummary" => \$bigsummary, "debug|d" => \$debug, "help|h" => \$help, "restrict|r:s" => \$restrict, ); my $rcs = ' $Id: remote.monitor,v 1.7 2000/10/09 21:37:53 laurent Exp $ ' ; $rcs =~ m/,v (\d+\.\d+)/; $VERSION = ($1) ? $1 : "0.1"; usage() and exit if ($help); $port = ($port) ? $port : "2583"; $timeout = ($timeout) ? $timeout : "10"; $summary = ($summary) ? $summary : $bigsummary; ($restrict) and ($only_watch,$only_service) = split( /:/, ($restrict) ); @failures = (); @failuresDetails = (); @extendsummary = (); foreach $host (@ARGV) { my $begin = time; eval { local $SIG{ALRM} = sub { die "Timeout Alarm" }; alarm $timeout; &getinfo($host, $port); alarm 0; # Cancel the alarm }; my $end = time; my $timeResponse = $end - $begin; if ($EVAL_ERROR and ($EVAL_ERROR =~ /^Timeout Alarm/)) { ($debug) and print "Timeout connection\n"; $failuresDetails{${host}} = join("", "Timeout connection"); push (@failures, ${host}); push (@extendsummary, "${host}:TIMEOUT(${timeResponse})"); next; } } if (@failures == 0) { exit 0; } if ($summary) { print "@extendsummary\n\n"; } else { print "@failures\n\n"; } foreach $host (@failures) { print "Details for $host failure :\n", "$failuresDetails{$host}\n\n"; } # exit with the the error status on. exit(1); sub getinfo { ($host, $port) = @_; ($debug) and print "testing mon server $host :\n"; $cl = Mon::Client->new; $cl->host ($host); $cl->port ($port); unless (defined ($cl->connect)) { ($debug) and print "connection failed: ", $cl->error, "\n"; $failuresDetails{${host}} = join("", "Connection failed: ", $cl->error); push (@failures, ${host}); push (@extendsummary, "${host}:CONNECT"); return; }else{ ($debug) and print "connection succeeded\n"; ($debug) and print "host : ", $cl->host, "\n", "port : ", $cl->port, "\n", "error : ", $cl->error, "\n", ; %s = $cl->list_opstatus; ($debug) and print "list_opstatus: ", %s, "\n", "error : ", $cl->error,"\n", ; if ($cl->error) { $failuresDetails{${host}} = join("", "list_opstatus failed:", $cl->error); push (@failures, ${host}); push (@extendsummary, "${host}:list_opstatus"); next; } %d = $cl->list_disabled; ($debug) and print "list_disabled: ", %d, "\n", "error : ", $cl->error,"\n", ; # parsing of %d if ($debug) { print "===\nlist_disabled detail:\n"; while ( ($category,$pwatch) = each %d ) { while( ($watch, $pvalue) = each %$pwatch ) { while( ($value) = each %$pvalue ) { print "$category,$watch,$value\n"; } } } print "end of list_disabled detail\n===\n"; } # error state host flag my($hosterr) = 0; my($hostwatch); ($debug) and print "===\nlist_opstatus detail:\n"; foreach $watch (sort keys %s) { my $watcherr = 0; my $ext_service = "[$watch:"; next if ( ($only_watch) && !( $watch eq ($only_watch) )); foreach $service (sort keys %{$s{$watch}}) { my($opstatus); next if ( ($only_service) && !( $service eq ($only_service) )); # state service recuperation $opstatus = $s{$watch}{$service}{opstatus}; ($debug) and print "$watch $service opstatus=$opstatus\n"; #if no error (!= 0) then next next if ($opstatus != 0); #if this service is disabled then next next if (defined($d{services}{$watch}{$service})); # at this point we've got a failure so get the last summary $last_summary = $s{$watch}{$service}{last_summary}; # service failed and not disabled $hosterr++; $watcherr++; ($debug) and print "Watch $watch service $service failed\n"; push (@failures, ${host}) unless (defined($failuresDetails{${host}})); $failuresDetails{${host}} .= "Watch $watch, service $service, failed ". "with summary : ${last_summary}\n"; # save services if ($summary) { if ($bigsummary) { $ext_service .="${service}{${last_summary}}|"; } else { $ext_service .="${service}|"; } } } if ($watcherr) { chop($ext_service); $ext_service .= "]"; $hostwatch .= $ext_service; } } #modification of summary if ($hosterr) { push (@extendsummary, "$host($hostwatch)"); } ($debug) and print "end of list_opstatus detail:\n===\n"; } $cl->disconnect; } sub usage { print <