#!/usr/bin/perl
# agent_id - returns the short agent ID

# Try to guess which browser and version matches this agent string
sub which_browser{
	my $Browser = $_[0];
	$Browser =~ s/\t/ /g;
	# weird Browsers ... phoney and/or suspicious agent strings
	return "unknown" if ($Browser eq "IE 5.5 Compatible Browser");
	return "unknown" if ($Browser eq "IE5");
	# End weird Browsers
	if ( $Browser =~ /^Mozilla/){
		if ( $Browser =~ /(Netscape\S*)/){
			$Browser = $1;
			$Browser =~ s#6/6# 6#;
		}
		elsif ( $Browser =~ /(Galeon\S*)/){
			$Browser = $1;
			$Browser =~ s/\)//g;
		}
		elsif ( $Browser =~ /(Opera.*)\s*\[..\]$/){
			$Browser = $1;
		}
		elsif ( $Browser =~ /Gecko\/200[0-9][01][0-9][0-3][0-9].*Debian/ ||
		        $Browser =~ /Gecko\/200[0-9][01][0-9][0-3][0-9].*WebWasher/){
			$Browser =~ s/ .*$//;
		}
		elsif ( $Browser =~ /Gecko\/200[0-9][01][0-9][0-3][0-9]\s+(.*)/){
			$Browser = $1;
		}
		elsif ( $Browser =~ /rv:([0-9.]+).*\)\s+Gecko\/200[0-9][01][0-9][0-3][0-9]/){
			$Browser = "Mozilla $1";
		}
		elsif ( $Browser =~ /Gecko\/200[0-9][01][0-9][0-3][0-9]/){
			$Browser =~ s/ .*$//;
		}
		elsif ( $Browser =~ /Crazy\s*Browser\s*(\S+)/){
			$Browser = "Crazy Browser $1";
		}
		elsif ( $Browser =~ /\(compatible;/){
			my $x = $';
			$x =~ s/^\s*//;
			my @t = split(/;\s*/,$x);
			if ($t[0] =~ /^MSIE /){
				if ( $t[1] =~ /^Windows/ ||
				     $t[1] =~ /^Mac_PowerPC/ ||
				     $t[1] =~ /^MSNIA/ ||
				     $t[1] =~ /^MSN/ ||
				     $t[1] =~ /^AOL/ ||
				     $t[1] =~ /^CS 2000/) {
					$Browser = $t[0];
				}
				elsif ( $t[1] =~ /^Linux/){
					$Browser =~ s/^.*\)//;
				}
				else {
					$Browser = $t[1];
				}
			}
			else {
				$Browser = $t[0];
			}
		}
		elsif ( $Browser =~ /^Mozilla\/(4\.[1-9][0-9]*)/ ||
		        $Browser =~ /^Mozilla\/(4\.0[1-9]+)/){
			$Browser = "Netscape $1"; # Most probably Netscape?
		}
	}
	elsif ( $Browser =~ /^(\w+)\/([0-9]+\.[0-9]+\.[0-9]+)/ ||
	        $Browser =~ /^(\w+)\/([0-9]+\.[0-9]+)/ ||
	        $Browser =~ /^(\w+)\/([0-9]+)/ ||
	        $Browser =~ /^(\w+)\s*\(([0-9]+\.[0-9]+)/ ){
		$Browser = "$1 $2";
	}
	$Browser =~ s/^\S+_Arachne/Arachne/;
	$Browser =~ s/\(.*$//;
	$Browser =~ s/\).*$//;
	$Browser =~ s/\s*$//;
	$Browser =~ s/\s*\[..\]$//;
	$Browser =~ s/-[0-9].*$//;
	$Browser =~ s/;$//;
	$Browser =~ s#/# #;
	$Browser =~ s/^\s*//;
	$Browser =~ s/'/ /g;
	$Browser =~ s/\+$//;
	$Browser = "unknown" unless ($Browser);
	return($Browser);
}

# ------------------------------------------------------------------------

# Try to guess which robot and version matches this agent string
sub which_robot{
	my $Robot = $_[0];
	$Robot =~ s/\t/ /g;
	# unorthodox agent strings
	$Robot = "MSIECrawler" if ($Robot =~ /MSIECrawler\)$/);
	$Robot = "Larbin" if ($Robot =~ /larbin\@unspecified.mail/);
	$Robot = "Inktomi slurp" if ($Robot =~ /inktomi/i && $Robot =~ /slurp/i);
	$Robot = "$1 $2" if ($Robot =~ /^Openfind .* (Openbot)\/(\S+)/);
	$Robot = "$1 $2.$3" if ($Robot =~ /^(Scooter).*([0-9]+)\.([0-9]+)/);
	$Robot = "GulperBot $1" if ($Robot =~ /Gulper Web Bot (\S+)/);
	$Robot = "Organica $1" if ($Robot =~ /Mozilla.*Advanced Email Extractor v(\S+)/);
	$Robot = "$1" if ($Robot =~ /(NutchCrawler)/);
	$Robot = "$1 $2" if ($Robot =~ /(VoilaBot). *([0-9.]+)/);
	$Robot =~ s#http://##;
	$Robot = "$1" if ($Robot =~ /^www\.(\w+)\.com/);
	$Robot = "$1" if ($Robot =~ /^Mozilla\/4.0 \(compatible.*\s+(\S+[Bb]ot)\W/);
	# End unorthodox agent strings
	$Robot =~ s/^Mozilla\/\S+.*compatible.//;
	$Robot =~ s/^Mozilla\/\S+ \(//;
	$Robot =~ s/\// /;
	$Robot =~ s/^\s*//;
	$Robot =~ s/\(.*//;
	$Robot =~ s/\).*//;
	$Robot =~ s/;.*//;
	$Robot =~ s/ - .*$//;
	$Robot =~ s/_$//;
	$Robot =~ s/\s*$//;
	$Robot = "$1" if ($Robot =~ /^www\.(\w+)\.\w+\.com/);
	$Robot =~ s/\W+\S+\.com$//;
	$Robot =~ s/\W+\S+\.org$//;
	$Robot = "unknown" if (length($Robot) < 2 || $Robot =~ /^Mozilla/ || $Robot =~ /^MSIE / ||
		$Robot =~ /^Konqueror / || $Robot =~ /^Windows/ || $Robot =~ /^Linux/ );
	$Robot = "$1$2" if ($Robot =~ /^(\S+)\.org(.*)/);
	$Robot = "$1$2" if ($Robot =~ /^(\S+)\.com(.*)/);
	$Robot = "$1 $2" if ($Robot =~ /^(\S+)[ _-]([0-9.x]+)/);
	$Robot = ( ucfirst $Robot) unless ($Robot eq "unknown");
	$Robot =~ s/'/ /g;
	return($Robot);
}

# ------------------------------------------------------------------------

sub which_OS{
	$OS = "-" unless ($OS = $_[0]);
	if ( $OS =~ /^Mozilla\/\S+ \(compatible;/){
		my @t = split(/;/,$');
		my $i = 1;
		$i++ if ($t[$i] =~ /MSN/);
		$i++ if ($t[$i] =~ /AOL [0-9]/);
		$i++ if ($t[$i] =~ /CS 2000/);
		$i++ if ($t[$i] =~ /Wal-Mart/);
		$OS = $t[$i];
	}
	else {
		if (	$OS =~ /(Linux .*)/ ||
			$OS =~ /(Windows .*)/ ||
			$OS =~ /(\S+bsd .*)/ ||
			$OS =~ /(\S+BSD .*)/ ||
			$OS =~ /\W+(Win98)/ ||
			$OS =~ /(Mac OS X)/ ||
			$OS =~ /(Mac OS)/ ||
			$OS =~ /(IRIX)/i
					) {
			$OS = "$1";
		}
		else {
			$OS = "-";
		}
	}
	$OS =~ s/^\(//;
	$OS = "Windows ME" if (($OS =~ /Windows 98/ || $OS =~ /-/) && $_[0] =~ /Win 9x 4.90/);
	$OS = "Windows 2000" if ($OS =~ /Windows NT 5.0/);
	$OS = "Windows XP" if ($OS =~ /Windows NT 5.1/);
	$OS = "Windows 98" if ($OS =~ /Win98/);
	$OS = "Windows NT" if ($OS =~ /Windows NT 4.0/);
	$OS = "Irix" if ($OS =~ /IRIX/i);
	$OS = "$1 intel" if ($OS =~ /(.*BSD) .*[2-6]86/);
	$OS = "Linux intel" if ($OS =~ /Linux/ && $_[0] =~ /i[2-6]86/);
	$OS = "-" if ($OS =~ /www/i);
	$OS =~ s/\).*//;
	$OS =~ s/\;.*//;
	$OS =~ s/\s*$//;
	$OS =~ s/^\s+//;
	$OS =~ s/'/ /g;
	return($OS);
}
1;
