package Lire::WWW::Domain;

use strict;

use vars qw/$REVISION $VERSION/;

use Lire::WWW::Domain::Robot;
use Lire::WWW::Domain::Country;

use Carp;

BEGIN {
    $REVISION  = '$Id: Domain.pm,v 1.7 2003/10/29 06:19:19 wsourdeau Exp $';
    $VERSION   = "0.0.1";
}

my %customRobotHash = ();
my %compiledRobotHash = ();

my %customCountryHash = ();
my %compiledCountryHash = ();

# -----------------------------------------------------------------------------
# Functions
# -----------------------------------------------------------------------------

sub new {
     my ($class, %args ) = @_;

     my $self = { 'robot_default'	=> $args{'robot_default'} || "Unknown/No Robot",
		  'country_default' => $args{'robot_default'} || "Unknown",
		};
     bless $self, $class;

     return $self;
}

sub setDomain {
    my ($self, $domain) = @_;

    if (defined $domain) {
        $self->{'domain'} = lc($domain);
    }
}

sub getRobot {
    my ($self) = @_;

    my $domain = $self->{'domain'};
    return $self->{'robot_default'} if $domain =~ /^[\d.]+$/;

    if (defined $domain) {
        return $self->{'robot_default'} if ($self->{'notInRobotHash'}{$domain});

        if (defined (my $robot = Lire::WWW::Domain::Robot::getRobot($domain))){
            return $robot;
        } else {
            # check custom hash
            foreach my $key (keys %customRobotHash) {
		my $re = $compiledRobotHash{$key};
                if ($domain =~ /$re$/) {
                    return $customRobotHash{$key};
                }
            }
            $self->{'notInRobotHash'}{$domain} = "1";
            return $self->{'robot_default'};
        }
    }
    return undef;
}

sub addRobot {
    my ($self, $code, $value) = @_;

    croak "Must supply both code and value!\n"
      unless $code eq "" || $value eq "";

    $code = lc $code;
    $customRobotHash{$code} = $value;
    $compiledRobotHash{$code} = qr/$code/;
}

sub addRobotHash {
    my ($self, %robothash) = @_;

    foreach my $code (keys %robothash) {
        $self->addRobot($code, $robothash{$code});
    }
}

sub getCountry {
    my ($self) = @_;

    my $domain = $self->{'domain'};
    return $self->{'country_default'} if $domain =~ /^[\d.]+$/;

    if (defined $domain) {
        return $self->{'country_default'}
	  if ($self->{'notInCountryHash'}{$domain});

        if (defined (my $country = Lire::WWW::Domain::Country::getCountry($domain))) {
            return $country;
        } else {
            # check custom hash
            foreach my $key (keys %customCountryHash) {
		my $re = $compiledCountryHash{$key};
                if ($domain =~ /$re/) {
                    return $customCountryHash{$key};
                }
            }
            $self->{'notInCountryHash'}{$domain} = "1";
            return $self->{'country_default'};
        }
    }
    return undef;
}

sub addCountry {
    my ($self, $code, $value) = @_;

    croak "Must supply both code and value!\n"
      unless $code eq "" || $value eq "";

    $code = lc $code;
    $customCountryHash{$code} = $value;
    $compiledCountryHash{$code} = qr/$code/;
}

sub addCountryHash {
    my ($self, %Countryhash) = @_;

    foreach my $code (keys %Countryhash) {
        $self->addCountry($code, $Countryhash{$code});
    }
}

1;

__END__

=pod

=head1 NAME

Lire::WWW::Domain - detect robots and countries from domain names

=head1 SYNOPSIS

 use Lire::WWW::Domain;

 my $test_string = "crawl4.googlebot.com";

 my $analyzer = new Lire::WWW::Domain();
 $analyzer->setDomain($test_string);

 print "  Web Robot name: " . $analyzer->getRobot() . "\n";
 print "  Country       : " . $analyzer->getCountry() . "\n";

=head1 DESCRIPTION

This module offers an interface to databases with known information
useful in analyzing domain names as found in Apache logfiles. Note, that
this only works if the domain names are host names and *not* ip addresses.

The default robot and country values (respectively "Unknown/No Robot"
and "Unknown") can be set by using the I<country_default> and
I<robot_default> parameters:

    my $analyzer = new Lire::WWW::Domain( 'country_default' => "?",
					  'robot_default'   => "No Robot" );

Optionally, the user can add custom information to the database with
the addRobot() and addRobotHash() functions.

=head1 VERSION

$Id: Domain.pm,v 1.7 2003/10/29 06:19:19 wsourdeau Exp $

=head1 COPYRIGHT

Copyright (C) 2001  Stichting LogReport Foundation LogReport@LogReport.org

This file is part of Lire.

Lire is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html or write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.

=head1 AUTHORS

Egon Willighagen <egonw@logreport.org> and

=cut

