The following example event script monitors a specified log file.
The IDOL Server installer provides additional example scripts.
#!/usr/bin/perl use strict; use warnings; use File::Tail; use Encode; use URI::Escape; #--------------------------------------------------- # Script to monitor a specified log file ($ARGV[0]) generated by any IDOL # servers, constantly tailing it. When a query appears in the logs it # gathers the appropriate information and sends it to the Stats server # at the specified host ($ARGV[1]) and port ($ARGV[2]). # # The log file has the following format: # <date> <time> [<thread number>] <log level> <information> # # For example: # (...) # 12/12/2007 08:22:21 [7] 30-Normal: #/action=suggest&maxresults=6&reference=doc_123123 (127.0.0.1) # 12/12/2007 08:22:23 [7] 30-Normal: Returning 6 matches # 12/12/2007 08:22:23 [7] 30-Normal: Suggest complete # (...) # # Parameters: Four main parameters are required to run the script. # # - logfile: The full path and name of the log file. # - stathost: Host name of the stats server. # - statport: Event port of the Stats server to which the script can #send XML events. # - idolname: IDOL Server name generating the log file that you want to # monitor. The value must be the same one specified in the Stats server # configuration file when setting the "IDOLName" stat parameter. # # For example: idolname = "myIDOLServer" # If only one IDOL Server is used, you do not have to use idolname. # # Run the script as follows: # # perl stats.pl <logfile> <host> <port> <idolname> # #---------------------------------------------------------------- use constant XML_HEADER => "<?xml version='1.0' encoding='ISO-8859-1'?>\n<events>\n"; use constant START_FROM_LOGFILE_END => 0; use constant READ_ENTIRE_LOGFILE => -1; if (!defined $ARGV[2]) { print "Syntax: $0 logfile host port [idolname]\n"; exit(0); } my $name = $ARGV[0]; # The log file to monitor my $statshost = $ARGV[1]; # Host of the Stats Server my $statsport = $ARGV[2]; # Event port of the Stats Server my $idolname = $ARGV[3]; # IDOL Server name. Set an IDOL Server name if Stats Server is running in Multiple IDOL Server mode. my $BATCHSIZE=1; # Wait until we have this many events and send them all at once my $tailn = START_FROM_LOGFILE_END; # Start tailing from the end my $resetTail=0; # Start tailing after the file has been automatically closed and reopened my $tail; # Structure returned while tailing a file my %threadqueries; # The 'current query' on each thread my %threadips; # The 'current ip' on each thread my %threadtime; # The 'current time' on each thread my $total = 0; # Number of XML events generated by the script my %escapes = (); # Hash mapping characters to hex equivalent my $data = ""; # Data being sent to the StatsServer my $events = 0; my $bIsWindows = 0; sub buildQueryXML($$$$$@); # return event XML for a particular query with matches and terms sub postEventsData($$$); # Send XML events to the Stats server sub processLine($); #Processes a line returned from the log file sub releaseAndReopenHandle($); #Are we running on Windows? eval { require Win32::Process; }; if ($@) { $bIsWindows = 0; } else { $bIsWindows = 1; } #-------------------------------- # Main loop #-------------------------------- for (;;) { eval { # Tail the specified log file $tail = new File::Tail(name => $name, maxinterval => 2, interval => 1, tail => $tailn, adjustafter => 1); $tailn = START_FROM_LOGFILE_END; # Event Creating loop for (;;) { $data = XML_HEADER; $events = 0; # Event Writing loop while ($events < $BATCHSIZE) { # Wait for new input in the log file. my ($nfound, $timeleft, @pending) = File::Tail::select(undef, undef, undef, 1, $tail); # Exit if no new line is found last if $events > 0 && !$nfound; # Returns one line from the log file my $line = $tail->read(); processLine($line); } # End of the XML event $data .= "</events>\n"; print STDERR "DATA:\n$data\n"; # End of the current batch. Post the response to the statsserver. if ($events) { $total += $events; eval { postEventsData($statshost, $statsport, $data); }; print "Posting events failed: $@" if $@; if ($total % 1000 == 0) { print STDERR "Total events: $total\n"; } $data = ""; #The file handle used by File::Tail is used permanently until it #($tail) goes out of scope. On Windows, this means it keeps a lock #on the log file we are tailing: therefore, the log file has no #opportunity to roll over after it exceeds the maximum permitted #size. The following call closes the handle, sleeps, and reopens #the handle. This allows the rollover to occur correctly if logging #is made to the log file during the sleep, and the log file size #has overstepped the limit. if ($bIsWindows) { releaseAndReopenHandle($tail); } } } }; warn unless $tailn; $tailn = READ_ENTIRE_LOGFILE; sleep 1; } #end of the Main loop sub processLine($) { my $line = shift; # Check whether the line contains the following format (that is, 16/09/2008 14:20:00 [1] 30-Normal: ....) if ($line =~ m,^(\d\d/\d\d/\d{4} \d\d:\d\d:\d\d) \[(\d+)\] \d\d-(Full|Normal|Always|Warning|Error): (.*),) { # A log line (beginning with a time) my $timeLog = $1; # Get the time when the IDOL Server received the query my $thread = $2; # The thread number the query is on my $message = $4; # The rest of the log line; $3 captures the log level (Full, Normal, and so on) # Check whether the message contains a query (action=termgetbest&...) if ($message =~ m,^/?(a.*?=.+) \(([\d\.]+)\)$,) { # The initial 'action received' $threadqueries{$thread} = $1; # Get the query $threadips{$thread} = $2; # Get IP address of the IDOL Server that sent the query $threadtime{$thread} = $timeLog; # Get the query time } # Check whether the message contains the number of hits, returned matches or completed query elsif ($threadqueries{$thread} && ($message =~ /^Completed Action, returning (\d+) hits$/ || $message =~ /^Returning (\d+) match/ || $message =~ /^.* complete$/)) { # The end of the query. Form the event xml and save my $matches = $1 || 0; my $query = $threadqueries{$thread}; my $ip = $threadips{$thread}; # If the query format is correct, generate the data for the XML event if (defined $query && defined $ip && $query =~ m!/?a.*?=(\w+)([?&].*(?<=[?&])text=([^?&]*))?!) { $events++; my $terms = $3 || ""; $data .= buildQueryXML($idolname, $query, $1, $matches, $ip, split(/ /,uri_unescape($terms))); } print STDERR "$threadtime{$thread} $threadqueries{$thread}\n"; # Reset $threadqueries{$thread} = ""; $threadips{$thread} = ""; $threadtime{$thread} = ""; } # end of the action } # end of the line } #Windows-only: close and reopen the log file handle, with a suitable pause in #between, to allow the log file to be rolled over. sub releaseAndReopenHandle($) { my $tail = shift; my $logfile = $tail->input(); close($tail->{'handle'}); sleep(5); open($tail->{'handle'}, "<$logfile") or die "Cannot reopen logfile handle: $!\n"; } sub stripControlChars($) { my $x = shift; $x =~ tr/\x00-\x1F//d; return $x; } #-------------------------------------------------------------------- # Extract information from a query and build the XML to send #-------------------------------------------------------------------- sub buildQueryXML($$$$$@) { my $idolname = shift; my $query = shift; my $action = shift; my $matches = shift; my $ip = shift; my @terms = @_; my $xml = "<queryinfo>\n<ver>0.1</ver>\n<url><![CDATA[$query]]></url>\n"; $xml .= "<action>$action</action>\n"; $xml .= "<terms><term>" . uri_escape(stripControlChars($_)) . "</term></terms>\n" for @terms; $xml .= "<numhits>$matches</numhits>\n"; $xml .= "<ip>$ip</ip>\n"; if ($idolname) { $xml .= "<idolname>$idolname</idolname>\n"; } $xml .= "</queryinfo>\n"; return $xml; } #-------------------------------------------------------------------- # Post a batch of events data to the specified host and port #-------------------------------------------------------------------- sub postEventsData($$$) { my $host = shift; my $port = shift; my $data = shift; my $nConnectTry = 3; use Socket; socket(INDEXSOCK, Socket::PF_INET, Socket::SOCK_STREAM, getprotobyname('tcp')) || print STDERR "Socket Failure: $!"; my $inet_addr = Socket::inet_aton($host) || print STDERR "Internet_addr Failure: $1\n"; my $paddr= Socket::sockaddr_in($port, $inet_addr) || print STDERR "Sockaddr_in Failure: $!\n"; while (!connect(INDEXSOCK, $paddr) && $nConnectTry > 0) { $nConnectTry--; } $nConnectTry > 0 or die "Connect problem: $|\n"; select INDEXSOCK; $| =1; select STDOUT; print INDEXSOCK "POST "; print INDEXSOCK "/stats"; print INDEXSOCK " HTTP/1.0\r\n"; print INDEXSOCK "Content-Length: ".length($data)."\r\n\r\n"; print INDEXSOCK "$data"; my $buffer; read(INDEXSOCK, $buffer, 100); close INDEXSOCK; return(1); }
|