#!/usr/bin/perl # Rowan Littell / 24-OCT-2008 use strict; use Getopt::Std; use Regexp::Assemble; my $CONFIG_FILE; my %CONF; MAIN: { my ($host, %opts, @alerts, @unmatched, @junk, $report); getopts ('c:a:i:h', \%opts); usage() if ($opts{'h'}); if ($opts{'c'}) { read_config ($opts{'c'}); } else { $CONF{'message_format'} = '[A-Z][a-z][a-z]\s+\d+\s+\d\d:\d\d:\d\d\s+([^\s]+)\s+(.*)'; } my $alert_re = read_regexes($opts{'a'}); my $ignore_re = read_regexes($opts{'i'}); while (<>) { chomp; my ($msg, $alert, $ignore); $alert = $ignore = 0; if ($_ =~ /^$CONF{'message_format'}$/o) { $host = $1; $msg = $2; if ($msg =~ /$alert_re/o) { push @alerts, $_; $alert = 1; } if (!$alert) { if ($msg =~ /$ignore_re/o) { $ignore = 1; } if (!$ignore) { push @unmatched, $_; } } } else { push @junk, $_; } } if ($host =~ /([^\/]+)\/.*/) { $host = $1; } $report = 0; print "Log report for $host:\n"; if (scalar @alerts > 0) { print "-" x 75, "\n"; print "ALERTS\n\n"; print join ("\n", @alerts), "\n\n"; print "-" x 75, "\n"; $report = 1; } if (scalar @unmatched > 0) { print "-" x 75, "\n"; print "UNMATCHED\n\n"; print join ("\n", @unmatched), "\n\n"; print "-" x 75, "\n"; $report = 1; } if (scalar @junk > 0) { print "-" x 75, "\n"; print "JUNK\n\n"; print join ("\n", @junk), "\n\n"; print "-" x 75, "\n"; $report = 1; } if (!$report) { print "-" x 75, "\n"; print "Nothing to report\n"; print "-" x 75, "\n"; } } sub usage () { print "Usage: logtrawl [-h] [-c logtrawl.conf] [-a alerts.conf] [-i ignores.conf]\n logfile logfile ...\n\n"; print " -c logtrawl.conf path to main logtrawl configuration file\n"; print " -a alerts.conf path to alerts match file\n"; print " -i ignores.conf path to ignores match file\n"; print " -h show this message\n"; print "\nFor further information, perldoc logtrawl.\n"; exit; } sub read_config ($) { my ($file) = @_; if (-f $file) { open (F, $file); while () { chomp; next if ($_ =~ /^(#|\s)/ || $_ eq ''); if ($_ =~ /^message_format: (.*)$/) { $CONF{'message_format'} = $1; } elsif ($_ =~ /^include_directory: (.*)$/) { $CONF{'include_directory'} = $1; } } close (F); } else { die "Can't open config file $file: $!\n"; } } sub read_regexes ($) { my ($file) = @_; my ($null_re, @regexes, $regex); $null_re = '^### UNMATCHED ###'; if (defined $file) { if ($file !~ /^\//) { $file = $CONF{'include_directory'} . '/' . $file; } @regexes = plain_regexes ($file); } if (scalar @regexes == 0) { $regex = $null_re; } else { $regex = Regexp::Assemble->new ->add (@regexes) ->anchor_line_begin ->re; } return ($regex); } sub plain_regexes ($) { my ($file) = @_; my (@arr); open (F, $file) || die ("Can't open regex file $file: $!\n"); my @lines = ; close (F); foreach (@lines) { chomp; if ($_ =~ /^#include (.*)$/) { my $include_file = $1; if ($include_file !~ /^\//) { $include_file = $CONF{'include_directory'} . '/' . $include_file; } if (-f $include_file) { my @include_re = plain_regexes ($include_file); push @arr, @include_re; } else { print STDERR "Can't open $include_file: $!\n"; } } next if ($_ =~ /^(#|\s)/ || $_ eq ''); push @arr, $_; } return (@arr); } sub configure_alerts { my ($alerts, @alerts); push @alerts, '.*to='; $alerts = join ('|', @alerts); $alerts = "^($alerts)"; return ($alerts); } sub configure_ignores { my ($ignores, @ignores); push @ignores, 'postfix\/yapp'; push @ignores, 'stunnel:'; push @ignores, 'ssh\[\d+\]:'; push @ignores, 'puppetd\[\d+\]:'; push @ignores, 'mimedefang(\.pl|-multiplexor)?\[\d+\]:'; push @ignores, 'spamd\[\d+\]:'; push @ignores, 'sendmail\[\d+\]:'; push @ignores, 'named\[\d+\]:'; push @ignores, 'clamd\[\d+\]:'; push @ignores, 'spamc\[\d+\]:'; push @ignores, 'nscd\[\d+\]:'; push @ignores, 'dcc\w+\[\d+\]:'; push @ignores, 'sudo: \[ID 702911 local2.notice\]\s+nobody\s'; $ignores = join ('|', @ignores); $ignores = "^($ignores)"; return ($ignores); } ############################################################ # perldoc ############################################################ =pod =head1 NAME logtrawl - simple syslog file analyzer =head1 SYNOPSIS B [-h] [-c logtrawl.conf] [-a alerts.conf] [-i ignores.conf] F F ... =head1 DESCRIPTION B is a simple log file anaylizer and reporting tool based on the concept of strings that will trigger alerts and strings that will be ignored. Any line that is not matched in one of these classes will be reported as "unmatched" after lines that match the alerts. All matching is done using Perl compatible regular expressions. To facilitate the processing of syslog files, the header portion of each line is stripped off, typically including the timestamp and host name. Matching then begins with the daemon name. =head1 OPTIONS =over 8 =item B<-a> F The file containing regular expressions for lines that will trigger alerts. See B for a description of the file format. If the file name does not begin with a slash, it is looked for relative to the B configuration variable. =item B<-c> F The main B configuration file. See B for a description of the configuration file. =item B<-h> Exit after giving a short usage summary. =item B<-i> F The file containing regular expressions for lines that will be ignored. See B for a description of the file format. If the file name does not begin with a slash, it is looked for relative to the B configuration variable. =back =head1 CONFIGURATION B can be given a configuration file to set some of its internal operations. The format of the configuration file is variable: value Blank lines and lines beginning with # are ignored. Any spaces between the colon and the vlue will be stripped. The following variables may be set: =over 8 =item B A regular expression that describes the format of the log messages. The default message format is [A-Z][a-z][a-z]\s+\d+\s+\d\d:\d\d:\d\d\s+([^\s]+)\s+(.*) This matches, for example, Jan 5 14:32:16 loghost daemon[32]: message The message format should include two sets of capturing parentheses. The first set should grab the hostname and the second set should grab the remainder of the message. This portion is then used as the text against which alert and ignore regular expressions are tested. =item B Sets the directory that will be searched when a matching file uses an include statement that does not reference a full path name. This directory is also used if an alert or ignore file name is not a full path name. =back =head1 MATCHING The alert and ignore regular expression files determine which log messages will be elevted to alerts and which will be ignored in the output. The format of the files is identical. Blank lines and lines beginning with # are ignored (except for include statements; see B). Everything else is treated as a Perl compatible regular expression that is anchored at the beginning of the line. Pattern matching is only performed on that portion of the log message that is returned after the log header is stripped off (see the B configuration variable). A simple match that would alert (or ignore) on any log message from sendmail would be: sendmail\[\d+\]: In order to fine tune the alerts and ignores, the pattern can be made more complex. If the log message matches any of the patterns in the alert or ignore file it is considered to have matched for that file. The following patterns would only match for sendmail log messages corresponding to email that has been permanently rejected or temporarily delayed: sendmail\[d+\]:.* dsn=5\.\d\.\d, stat=.* sendmail\[d+\]:.* dsn=4\.\d\.\d, stat=.* These could be combined into one line as such: sendmail\[d+\]:.* dsn=(4|5)\.\d\.\d, stat=.* It is usually better for performance to follow the more explicit first form rather than to try to combine regular expressions. Internally, B uses the F module to produce optimized regular expressions, which typically results in more efficient regular expressions than can be produced by mere mortals. =head1 INCLUDES The alert and ignore regular expression files may also contain file include statements in either of the following forms: #include /path/to/file #include file In both cases, the contents of the referenced file are read and inserted into the matching expression as if they were present in the original file. Included files may, themselves, include other files. If the included file name begins with a slash, it is treated as a full path, otherwise it mus be present in the B. =head1 BUGS & LIMITATIONS No checks are made on the regular expressions found in any of the files. It is perfectly possible to specify a regular expression that will, when used against standard log messages, consume all available resources and hasten the eventual heat death of the universe. =head1 AUTHOR This software was written by John "Rowan" Littell and is hereby placed into the public domain. =cut