Platinum Partner

Convert Apache Http Combined Logs Into Sql (and Import It Into A Mysql Database Eventually)

you need to extract the data in your http server log files and put it in a database to query it with your usual tools using SQL. this perl script does just this.

it was hard to find it, that's why i put it here.


#!/usr/bin/perl -w
# Written by Aaron Jenson.
# Original source: http://www.visualprose.com/software.php
# Updated to work under Perl 5.6.1 by Edward Rudd
# Updated 24 march 2007 by Slim Amamou 
#  - output SQL with the option '--sql'
#  - added SQL create table script to the HELP
#
#  NOTE : you need the TimeDate library (http://search.cpan.org/dist/TimeDate/)
#
use strict;
use Getopt::Long qw(:config bundling);
use DBI;
use Date::Parse;

my %options = ();
my $i = 0;
my $sql = '';
my $valuesSql = '';
my $line = '';
my $dbh = 0;
my $sth = 0;
my @parts = ();
my $part;
my $TIMESTAMP = 3;
my $REQUEST_LINE = 4;
my @cols = (
	'remote_host',			## 0
	'remote_logname',		## 1
	'remote_user',			## 2
	'request_time',			## 3.string
	'time_stamp',			## 3.posix
	'request_line',			## 5
	'request_method',		## 6
	'request_uri',			## 7
	'request_args',			## 8
	'request_protocol',		## 9
	'status',				## 10
	'bytes_sent',			## 11
	'referer',				## 12
	'agent'					## 13
);
my $col = '';

GetOptions (\%options,
		"version" => sub { VERSION_MESSAGE(); exit 0; },
		"help|?" => sub { HELP_MESSAGE(); exit 0; },
		"host|h=s",
		"database|d=s",
		"table|t=s",
		"username|u=s",
		"password|p=s",
		"logfile|f=s",
		"sql");

$options{host} ||= 'localhost';
$options{database} ||= '';
$options{username} ||= '';
$options{password} ||= '';
$options{logfile} ||= '';
$options{sql} ||= '';

if( ! ($options{database} || $options{sql}))
{
	HELP_MESSAGE();
	print "Must supply a database to connect to.\n";
	exit 1;
}

if( ! $options{table} )
{
	HELP_MESSAGE();
	print "Must supply table name.\n";
	exit 1;
}

if( $options{logfile} )
{
	if( ! -e $options{logfile} )
	{
		print  "File '$options{logfile}' doesn't exist.\n";
		exit 1;
	}
	open(STDIN, "<$options{logfile}") || die "Can't open $options{logfile} for reading.";
}

if( $options{database} )
{
	$dbh = Connect();
	if (! $dbh) {
		exit 1;
	}
}

$sql = "INSERT INTO $options{table} (";
foreach $col (@cols)
{
	$sql .= "$col," if( $col );
}
chop($sql);
$sql .= ') VALUES (';
my ($linecount,$insertcount) = (0,0);
while($line = )
{
	$linecount++;
	@parts = SplitLogLine( $line );
	next if( $parts[$TIMESTAMP+1] == 0 );
	$valuesSql = '';
	for( $i = 0; $i < @cols; ++$i )
	{
		$parts[$i] =~ s/\\/\\\\/g;
		$parts[$i] =~ s/'/\\'/g;
		$valuesSql .= "'$parts[$i]'," if( $cols[$i] );
	}
	chop($valuesSql);

	if( $options{database} )
	{
		$sth  = $dbh->prepare("$sql$valuesSql)");
		if( ! $sth->execute() )
		{
			print "Unable to perform specified query.\n$sql$valuesSql\n" . $sth->errstr() . "\n";
		} else {
			$insertcount++;
		}
		$sth->finish();
	}
	if( $options{sql} )
	{
		print "$sql$valuesSql);\n";
	}
}
if( ! $options{sql} )
{
	print "Parsed $linecount Log lines\n";
	print "Inserted $insertcount records\n";
	print "to table '$options{table}' in database '$options{database}' on '$options{host}'\n";
}

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# Connects to a MySQL database and returns the connection.
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
sub Connect
{
	my $dsn = "DBI:mysql:$options{database};hostname=$options{host}";
	return DBI->connect( $dsn, $options{username}, $options{password} );
}


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# Splits up a log line into its parts.
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
sub SplitLogLine
{
	my $line = shift;
	my $i = 0;
	my $inQuote = 0;
	my $char = '';
	my $part = '';
	my @parts = ();
	my $count = 0;
	chomp($line);
	for( $i = 0; $i < length($line); ++$i )
	{
		$char = substr($line, $i, 1);
		if( $char eq ' ' && ! $inQuote )
		{
			## print "Found part $part.\n";
			if( $count == $TIMESTAMP )
			{
				push(@parts, "[".$part."]");
				$part = str2time($part);
			}
			push(@parts, $part);
			if( $count == $REQUEST_LINE )
			{
				my @request = split(/[ ?]/, $part);
				push(@parts, $request[0]);
				push(@parts, $request[1]);
				if( $request[3] )
				{
					push(@parts, $request[2]);
					push(@parts, $request[3]);
				}
				else
				{
					push(@parts, '');
					push(@parts, $request[2]);
				}
				$count += 5;
			}
			else
			{
				++$count;
			}
			$part = '';
		}
		elsif( $char eq '"' || $char eq '[' || $char eq ']' )
		{
			$inQuote = !$inQuote;
		}
		else
		{
			$part .= $char;
		}
	}
	push(@parts,$part) if $part;

	return @parts;
}


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# Prints the usage/help message for this program.
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
sub HELP_MESSAGE
{
	print< -t  [-h ] [-u ] [-p ] [-f          The host to connect to.  Default is localhost.
 --database|-d  The database to use.  Required.
 --username|-u       The user to connect as.
 --password|-p       The user's password.
 --table|-t 
The name of the table in which to insert data. --logfile|-f The file to read from. If not given, data is read from stdin. --sql Output SQL --help|-? Print out this help message. --version Print out the version of this software. ---------------------------------- -- SQL create statements for the table -- create table ( remote_host varchar(50) , remote_logname varchar(50) , remote_user varchar(50) , request_time char(28), time_stamp varchar(10) , request_line varchar(255), request_method varchar(10) , request_uri varchar(255), request_args varchar(255), request_protocol varchar(10) , status varchar(10) , bytes_sent varchar(10) , referer varchar(255) , agent varchar(255) ); EOF } # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Prints the version information for this program # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # sub VERSION_MESSAGE { print "mysql_import_combined_log.pl version 1.2\n"; print "Version 1.0 Written by Aaron Jenson.\n"; print "Update to work with perl 5.6.1 by Edward Rudd\n"; } 1;
{{ tag }}, {{tag}},

{{ parent.title || parent.header.title}}

{{ parent.tldr }}

{{ parent.urlSource.name }}
{{ parent.authors[0].realName || parent.author}}

{{ parent.authors[0].tagline || parent.tagline }}

{{ parent.views }} ViewsClicks
Tweet

{{parent.nComments}}