#!/usr/bin/perl # # Filename: /usr/local/etc/reportdowntime.pl # # Purpose: collect up the information recorded by pingsweep.pl # and calculate the percentage of uptime across the entire # corporate network for all network electronics that are # being monitored by the pingsweep.pl script. # # Author: Michael McNamara (http://blog.michaelfmcnamara.com) # # Credits: Stewart Kendric (http://www.skendric.com/) # I've taken a lot of ideas (and some code) from Stuart's many scripts. # Without Stuart's help it would have taken me much longer to develop # some of the scripts I've come to rely on today. # # Date: January 5, 2004 # # Version: 1.2 # # License: # Copyright (C) 2014 Michael McNamara (mfm@michaelfmcnamara.com) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see # # Changes: # # December 28, 2004 (M.McNamara) 1.2 # added a column to the report to indicate the number of downtime # events affected the device. # April 27, 2004 (M. McNamara) 1.1 # added ability to accept command line arguments for data file and # month of year to report against # January 21, 2004 (M. McNamara) 1.0 # cosmetic HTML changes to improve readability marking devices with # downtime in red # January 13, 2004 (M. McNamara) 1.0 # added sub byKeys to sort DNS names before output to report # January 12, 2004 (M. McNamara) # added ALL flag for including all devices in report # January 11, 2004 (M. McNamara) # list of devices will now be sorted before outputing to email # Devices that were never down will not be reported with 100% uptime # January 6, 2004 (M. McNamara) # Added code to email status report to NetAlert@acme.org # Added perl script to monthly crontab # Added code to make the email message HTML based # # # Notes : # I never did get the opportunity to completely automate this script. # I still had to make copies of the pingsweeprecord.dat files and # change the Month and Year value that I was running the report for. # Please keep that in mind if you are interested in looking at actually # using this code. # # localtime reference # struct tm { # int tm_sec; /* seconds */ # int tm_min; /* minutes */ # int tm_hour; /* hours */ # int tm_mday; /* day of the month */ # int tm_mon; /* month */ # int tm_year; /* year */ # int tm_wday; /* day of the week */ # int tm_yday; /* day in the year */ # int tm_isdst; /* daylight saving time */ # }; # Load Modules use strict; use warnings; # Declare Constants use constant DEBUG => 1; # DEBUG settings use constant ALL => 1; # FLAG for including all devices in report # Declare Global Variables my $year = 1900 + (localtime)[5]; print "DEBUG: year is $year\n" if (DEBUG); # Array of months in the year my @months = ( "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ); # Array of days in each month my @daysinmonth = ( 31, # January 29, # February ***LEAP YEAR ISSUES*** 31, # March 30, # April 31, # May 30, # June 31, # July 31, # August 30, # September 31, # October 30, # November 31 # Dember ); my $is_leap; my $totaldowntime = 0; my $devicecount = 0; my $index = 0; my $sdate = localtime; my $month = (localtime)[4]; # Determine Month print "DEBUG: month is $month\n" if (DEBUG); # ######## # $month = 11; # Hardcode Month for troubleshooting purposes (month - 1) # ######## # my $nod = $daysinmonth[$month]; # Number of Days in Month from Array above my $tutfm = 60*60*24*$nod; # SECS*MINS*HOURS*DAYS # TotalUpTimeForMonth TUTFM $is_leap = $year % 4 == 0 && $year % 100 || $year % 400 == 0; $is_leap = $year % 4 == 0 && $year % 100 || $year % 400 == 0?29:28; my %disk; # Hash Array for holding data elements my %device; # Hash Array for holding individual devices my %down; # Hash Array for holding devices with downtime my %count; # Hash Array for holding the number of events per device # Local Data Files my $datafile = "/usr/local/etc/ping/pingsweeprecord.dat-Dec2012"; my $electronics = "/usr/local/etc/ping/pingsweep.txt"; # Email Addresses and Subject Lines my $MAILTO = 'NetworkAlert@acme.org'; my $MAILFROM = 'Network Management '; my $MAILSUBJECT = 'Network Infrastructure Monthly Network Uptime Report'; ########################################################################### # BEGIN MAIN ########################################################################### { # Load the saved data from the pingsweep.pl script &load_data; # Load and count the devices that are being monitored &count_devices; # Tally the downtime and percentage of uptime for each device &comp_data; # Output the results via email in HTML format &output_report; } exit 0; ########################################################################### # END MAIN ########################################################################### ######################################################################## # Subroutine load_data # # Purpose: load from file the list of devices that were previously down ######################################################################## sub load_data { # Declare Local Variables my $oTime; # Original timestamp when device went down my $rTime; # Last timestamp when device recovered my $dTime; # Amount of time the device was down my $name; # FQDN of the device being checked # Open data file open DATA, "$datafile" or die "Can't open $datafile: $!\n"; print "DEBUG: inside load_data and starting to load hash %disk\n" if (DEBUG); # Walk through data file while () { # Skip blank lines next if (/^\n$/); # Skip comments next if (/^#/); # Read a line of data, throw away the last three data elements # The last three elements are more for humans reading (debuging) the datafile ($name, $oTime, $rTime, $dTime) = split(' '); # Build data structure $disk{$index} = "$name $oTime $rTime $dTime"; $index++; print "DEBUG: hash index $index = $name $oTime $rTime $dTime\n" if (DEBUG); } #end while close DATA; return 1; } #end sub load_data ########################################################### # Subroutine calc_down_time # # Purpose: calculate downtime given to time references ########################################################### sub calc_down_time { # Declare Local Variables my $cTime = shift; my $iTime; my ($iDays, $iHours, $iMins, $iSecs); # Do the math if ($cTime != 0) { $iTime = $cTime; $iSecs = $iTime % 60; $iTime -= $iSecs; $iMins = $iTime % 3600; $iTime -= $iMins; $iMins /= 60; $iHours = $iTime % 86400; $iTime -= $iHours; $iHours /= 3600; $iDays = $iTime / 86400; if ($iSecs < 10) { $iSecs = "0" . $iSecs } if ($iMins < 10) { $iMins = "0" . $iMins } if ($iHours < 10) { $iHours = "0" . $iHours } } else { # If there was no downtime return all zeros $iSecs = 00; $iMins = 00; $iHours = 00; $iDays = 000; } #end if $cTime # Put it all together $iTime = "$iDays:$iHours:$iMins:$iSecs"; return $iTime; } #end sub calc_down_time ######################################################################## # Subroutine comp_data # # Purpose: # Compare the current counts to those from the data file & update %disk # After this routine runs, the information we've gathered and stored in # %live has been merged into %disk ######################################################################## sub comp_data { # Declare Local Variables my $oTime; # original Time of event my $rTime; # timestamp when the device recovered my $dTime; # amount of acruded downtime my $name; # Host from data file # Walk through %live comparing the octet counter we just acquired with # the counter stored on disk for my $idx (keys %disk) { # Get the current data from the hash array ($name, $oTime, $rTime, $dTime) = split(' ', $disk{$idx}); $down{$name} += $dTime; $totaldowntime += $dTime; $count{$name}++; print "DEBUG: $name was down $dTime which brings total to $totaldowntime\n" if (DEBUG); } #end for my $idx return 1; } #end sub comp_data ######################################################################## # Subroutine get_time # # Purpose: calculate the time ######################################################################## sub get_time { # Declare Local Variables my ($sec, $min, $hour, $day, $mon, $year, $date, $time, $now); ($sec, $min, $hour, $day, $mon, $year) = (localtime)[0,1,2,3,4,5]; if ($sec < 10) { $sec = "0" . $sec } if ($min < 10) { $min = "0" . $min } if ($hour < 10) { $hour = "0" . $hour } $mon = $mon + 1; $year = $year + 1900; $date = $mon . "-" . $day . "-" . $year; $time = $hour . ":" . $min . ":" . $sec; $now = $date . " at " . $time; return ($date, $time); } #end sub get_time ######################################################################## # Subroutine count_devices # # Purpose: count the number of devices being ping'd from a flat file ######################################################################## sub count_devices { # Declare Local Variables # Open data file open DATA, "$electronics" or die "Can't open $datafile: $!\n"; # Walk through data file while () { # Skip blank lines next if (/^\n$/); # Skip comments next if (/^#/); # Count the number of devices being monitored $devicecount++; # If we are to include all devices in the uptime report the # flag ALL will need to be set, otherwise we will only report # on those devices that were down. if (ALL) { # Strip the CR/LF from the input file chomp($_); # Build data structure $down{$_} = 0; $count{$_} = 0; print "DEBUG: hash down \$down{$_} = $down{$_}\n" if (DEBUG); } #end if print "DEBUG: incrementing device count to $devicecount\n" if (DEBUG); } #end while close DATA; return 1; } #end sub count_devices ######################################################################## # Subroutine output_report # # Purpose: count the number of devices being ping'd from a flat file ######################################################################## sub output_report { # Declare Local Variables my $fontopen; # Variable to abstract font color for devices with downtime # Open file handle to sendmail open(SENDMAIL, "| /usr/lib/sendmail $MAILTO") || die; print(SENDMAIL "From: $MAILFROM\nTo: $MAILTO\nSubject: $MAILSUBJECT\n"); print(SENDMAIL "MIME-Version: 1.0\n"); print(SENDMAIL "Content-Type: text/html; charset=us-ascii\n\n"); print(SENDMAIL "\n"); print SENDMAIL <

Acme Corporation Network Infrastructure Monthly Uptime Report
\n Date : $sdate

EOF print SENDMAIL "Report for the Month of $months[$month]
\n"; print SENDMAIL "

\n"; print SENDMAIL "There were a total of $devicecount devices monitored during this month.
\n"; # print SENDMAIL "The total number of seconds in this month was $tutfm. "; # print SENDMAIL "Multiple that by the total number of devices = ", $tutfm*$devicecount, "
\n"; print SENDMAIL "

\n"; # print SENDMAIL "We were down for a total of $totaldowntime seconds (,", # &calc_down_time($totaldowntime), " DD:HH:MM:SS)
\n"; print SENDMAIL "The network uptime for this month was "; printf SENDMAIL " %0.3f%%\n", ((($tutfm * $devicecount) - $totaldowntime) / ($tutfm * $devicecount)) * 100; print SENDMAIL "\n"; print SENDMAIL "

\n"; print SENDMAIL "

\n"; print SENDMAIL "Here is the itemized breakdown for each device;
\n"; print SENDMAIL ""; print SENDMAIL ""; print SENDMAIL ""; print SENDMAIL ""; print SENDMAIL ""; print SENDMAIL ""; print SENDMAIL ""; print "DEBUG: totaldowntime = $totaldowntime\n" if (DEBUG); print "DEBUG: that would translate into ", &calc_down_time($totaldowntime), "(DD:HH:MM:SS)\n" if (DEBUG); print "DEBUG: Is it a leap year? $is_leap\n" if (DEBUG); print "DEBUG: The number of days in this month is $nod\n" if (DEBUG); print "DEBUG: The total up time for the month would have been $tutfm\n" if (DEBUG); print "DEBUG: The total number of devices is $devicecount\n" if (DEBUG); print "DEBUG: The total up time for all devices for the month would have been ", ($tutfm * $devicecount), "\n" if (DEBUG); if (DEBUG) { print "The percentage of uptime for the month is "; printf " %0.3f%%\n", ((($tutfm * $devicecount) - $totaldowntime) / ($tutfm * $devicecount)) * 100; } #end if DEBUG # Walk through %live comparing the octet counter we just acquired with # the counter stored on disk for my $idx (sort byKeys ( keys %down)) { # Calculate down time my $dTime = &calc_down_time($down{$idx}); my $upTimePercent = ((($tutfm - $down{$idx}) / $tutfm ) * 100); # Use HTML to mark devices with downtime less than 0.1% # in orange (FF9900) and the devices with more than 0.1% # in red (FF0000) #if ( $down{$idx} > 0 ) { # $fontopen = ""; if ( $upTimePercent < 99.9 ) { $fontopen = ""; } elsif ( $upTimePercent < 100 ) { $fontopen = ""; } else { $fontopen = ""; } print SENDMAIL "\n"; print SENDMAIL "\n"; print SENDMAIL "\n"; print SENDMAIL "\n"; print SENDMAIL "\n"; print SENDMAIL "\n"; print "$idx was down $count{$idx} times a total of ", &calc_down_time($down{$idx}), "(DD:HH:MM:SS)\n" if (DEBUG); } #end for my $idx print SENDMAIL "
Device Hostname or IP Address# EventsTime Down
DD:HH:MM:SS
% Uptime
$fontopen$idx$fontopen$count{$idx}$fontopen$dTime$fontopen"; printf SENDMAIL " %0.3f%%\n", (($tutfm - $down{$idx}) / $tutfm ) * 100; print SENDMAIL "
\n"; print SENDMAIL < Notes: this message is being sent in HTML format.


\n
\n EOF close(SENDMAIL) || die; return 1; } #end sub output_report ######################################################################## # Subroutine byKeys # # Purpose: sort the DNS domain names from the Hash Array for output ######################################################################## sub byKeys { # Declare Local Variables my ($a_host, $a_domain1, $a_domain2, $a_domain3, $b_host, $b_domain1, $b_domain2, $b_domain3); # Split the FQDN into seperate variables ($a_host, $a_domain1, $a_domain2, $a_domain3) = split (/\./, $a); ($b_host, $b_domain1, $b_domain2, $b_domain3) = split (/\./, $b); # If DEBUG output the seperate variables for troubleshooting print "DEBUG: $a_host, $a_domain1, $a_domain2, $a_domain3\n" if (DEBUG); print "DEBUG: $b_host, $b_domain1, $b_domain2, $b_domain3\n" if (DEBUG); print "DEBUG: a = $a\n" if (DEBUG); print "DEBUG: b = $b\n" if (DEBUG); # if (($a_host cmp $b_host) != 0) { $a_host cmp $b_host; } # elsif (($a_domain1 cmp $b_domain1) != 0) { $a_domain1 cmp $b_domain1; } # elsif (($a_domain2 cmp $b_domain2) != 0) { $a_domain2 cmp $b_domain2; } # elsif (($a_domain3 cmp $b_domain3) != 0) { $a_domain3 cmp $b_domain3; } # If the root domain equals each other continue else evaluate if (($a_domain3 cmp $b_domain3) != 0) { $a_domain3 cmp $b_domain3; } # If the subdomain *.org equals each other continue else evaluate elsif (($a_domain2 cmp $b_domain2) != 0) { $a_domain2 cmp $b_domain2; } # if the subdomain *.*.org equals each other continue else evaluate elsif (($a_domain1 cmp $b_domain1) != 0) { $a_domain1 cmp $b_domain1; } # If the doamins equals each other evaluate the hostname elsif (($a_host cmp $b_host) != 0) { $a_host cmp $b_host; } } #end sub bykeys