package Lire::DlfSchema;

use strict;

use vars qw/ %SCHEMA_CACHE /;

use XML::Parser;

use Lire::Config::Build qw/ ac_info /;
use Lire::Config;
use Lire::Field;
use Lire::DataTypes qw/ :xml :basic :special /;
use Lire::DerivedSchema;
use Lire::ExtendedSchema;
use Lire::I18N qw/ bindtextdomain dgettext dgettext_para /;
use Lire::Utils qw/ sql_quote_name check_param /;

use Carp;

BEGIN {
    %SCHEMA_CACHE = ();
};

=pod

=head1 NAME

Lire::DlfSchema - Interface to Lire DLF Schema XML specifications

=head1 SYNOPSIS

In DLF converters:

    use Lire::DlfSchema;

    my $schema = Lire::DlfSchema::load_schema( "email" );

    my $dlf_maker = $schema->make_hashref2asciidlf_func( qw/time msgid
	      from_user from_domain from_relay_host from_relay_ip
	      size delay to_user to_domain to_relay_host to_relay_ip
	      stat / );


=head1 DESCRIPTION

This module is the interface to the Lire DLF Schemas defined in XML
files. A schema defines the order of the fields along with their
names, descriptions, types and default values.

This module also includes one function which is mandatory to use for
old style DLF converters: make_hashref2asciidlf_func().

=head1 ACCESSING A SCHEMA OBJECT

The way to access a schema for a superservice is through the
load_schema() module function. You use it like this:

    my $schema = Lire::DlfSchema::load_schema( $superservice);

This function will return a schema object which can then be used to
query information about the schema. This function will die() on error.

=cut


sub load_schema {
    my ( $name ) = @_;

    check_param ( $name, 'name', \&check_xml_name,
                  'invalid schema identifier' );

    my $super = $name;
    if ($name =~ /^(\w+)-/ ) {
	$super = $1;
    }

    croak "invalid superservice: $super"
      unless Lire::DlfSchema->has_superservice( $super );

    my $file = Lire::DlfSchema->_schema_file( $name );
    croak "can't find XML schema definition for $name in ",
      join( ":", @{Lire::Config->get( 'lr_schemas_path' )} ),"\n"
        unless defined $file;

    # If the schema is in the cache, make sure
    # the in-memory version is up-to-date.
    if ( exists $SCHEMA_CACHE{$name} ) {
        my $cache_entry = $SCHEMA_CACHE{$name};

        return $cache_entry->{'schema'}
          if $file eq $cache_entry->{'file'} &&
            (stat $file)[9] <= $cache_entry->{'mtime'}
    }

    my $file_h;
    open ( $file_h, "$file")
      or croak "can't open XML schema $file for $name schema: $!";

    my $parser = new XML::Parser ( 'Handlers'	=> {
						    'Init'  => \&Init,
						    'Final' => \&Final,
						    'Start' => \&Start,
						    'End'	  => \&End,
						    'Char'  => \&Char,
						   },
				   'Namespaces' => 1,
				   'NoLWP'      => 1,
				 );
    my $dlf;

    eval {
	$dlf = $parser->parse( $file_h );
    };
    croak "error while parsing XML definition of $name: $@"
      if $@;

    close $file_h;

    # Sanity checks
    croak "$file has '", $dlf->superservice, "' as superservice attribute when it should have '", $super, "'\n"
	if $dlf->superservice ne $super;
    croak "$file has '", $dlf->id, "' as id attribute when it should have '", $name, "'\n"
      if $dlf->id ne $name;

    $SCHEMA_CACHE{$name} = { 'file'   => $file,
			     'schema' => $dlf,
			     'mtime'  => (stat $file)[9],
			   };

    $SCHEMA_CACHE{$name}{'schema'};
}

sub new {
    my $proto = shift;
    my $class = ref( $proto) || $proto;

    my %attr = @_;

    check_param( $attr{'superservice'}, 'superservice',
                 \&check_superservice );
    check_param( $attr{'timestamp'}, 'timestamp',
                 \&check_xml_name );

    my $self = bless { 'id'		 => $attr{'superservice'},
                       'superservice'    => $attr{'superservice'},
                       'timestamp_field' => $attr{'timestamp'},
                       'fields_by_pos'   => [],
                       'fields_by_name'  => {},
                       'title'	    => undef,
                       'description'	    => undef,
                     }, $class;

    bindtextdomain( "lire-" . $self->superservice(),
                    ac_info( 'LR_PERL5LIB' ) . "/LocaleData" );

    return $self;
}

sub check {
    my ( $self ) = @_;
    # Verify that the schema is valid

    # Check that the timestamp attribute is valid
    croak ( "field $self->{'timestamp_field'} doesn't exists" )
      unless $self->has_field( $self->{'timestamp_field'} );

    my $field = $self->timestamp_field;
    croak ( "field $self->{'timestamp_field'} isn't of type timestamp" )
      unless $field->type() eq "timestamp";

    return 1;
}

=pod

=head2 has_superservice( $superservice )

Returns true if there is superservice named $schema_name available. An
error will be thrown if the schema name isn't valid for a superservice.

=cut

sub has_superservice {
    my ($self, $superservice ) = @_;

    check_param( $superservice, 'superservice',
                 sub { return ( check_xml_name($_[0])
                                && index($_[0], "-") == -1 ) },
                 'invalid superservice schema name' );

    return defined $self->_schema_file( $superservice );
}

=pod

=head2 has_schema( $schema_name )

Returns true if there is $schema_name available. An error will be
thrown if the schema name isn't valid.

=cut

sub has_schema {
    my ( $self, $schema_name ) = @_;

    check_param( $schema_name, 'schema_name', \&check_xml_name,
                 'invalid schema name' );

    return defined $self->_schema_file( $schema_name );
}

sub _schema_file {
    my ( $self, $schema_name ) = @_;

    foreach my $dir ( @{Lire::Config->get( 'lr_schemas_path' )} ) {
        return "$dir/$schema_name.xml"
          if -f "$dir/$schema_name.xml";
    }

    return undef;
}

=pod

=head2 superservices()

Returns the name of the available superservices in an array.

=cut

sub superservices {
    my $self = $_[0];

    return grep { /^[^-]+$/ } $self->schemas();
}

=pod

=head2 schemas()

Returns the name of the available schemas in an array.

=cut

sub schemas {
    my $self = $_[0];

    my @schemas = ();
    foreach my $dir ( @{ Lire::Config->get( 'lr_schemas_path' ) } ) {
        next unless -d $dir && -r $dir;
        opendir my $dh, $dir
          or croak "opendir failed on '$dir': $!";
        foreach my $file ( readdir $dh ) {
            next unless $file =~ /^([a-zA-Z][-\w.:]+)\.xml$/;
            push @schemas, $1
              unless grep { $_ eq $1 } @schemas;
        }
        closedir $dh;
    }

    return @schemas;
}

=pod

=head1 SCHEMA OBJECT METHODS

=head2 id()

    my $id = $schema->id();

This method will return the id of the schema. This will be the
superservice's name for superservice's main schema. (There are other
types of schemas (derived and extended schemas) for which the id will be
different than the superservice's name.)

=cut

sub id {
    $_[0]->{'id'};
}

=pod

=head2 superservice()

    my $super = $schema->superservice();

This method will return the superservice's name of the schema.

=cut

sub superservice {
    return $_[0]->{'superservice'};
}

=pod

=head2 title()

This method will return the human readable title of the schema. (This
is the content of the title element in the XML specification.)

=cut

sub title {
    my $self = $_[0];

    return dgettext( "lire-$self->{'superservice'}", $self->{'title'} );
}

=pod

=head2 description()

This method will return the description of the schema. (This is the
content of the description element in the XML specification.) Be aware
that this will most likely contain DocBook markup.

=cut

sub description {
    my $self = $_[0];

    return dgettext_para( "lire-$self->{'superservice'}",
                          $self->{'description'} );
}

=pod

=head2 field_by_pos()

    my $field = $schema->field_by_pos( 0 );

This method takes an integer as parameter and return the field at that
position in the schema. Fields are indexed starting at 0. This method
will die() if an invalid position is passed as parameter.

The method returns a Lire::Field(3pm) object.

=cut

sub field_by_pos {
    my ( $self, $pos ) = @_;
    croak "invalid field number: $pos"
      unless $pos < @{$self->{'fields_by_pos'}} && $pos >= 0;

    $self->{'fields_by_pos'}[$pos];
}

=pod

=head2 has_field()

    if ( $schema->has_field( 'test ) ) { 
	print "schema has field 'test'\n"; 
    }

This method takes a string as parameter and returns a boolean value.
That value will be true if there is a field in the schema with that
name, it will be false otherwise.

=cut

sub has_field {
    my ($self, $name ) = @_;

    return exists $self->{'fields_by_name'}{$name};
}


=pod

=head2 field()

    my $field = $schema->field( 'from_email' );

This method takes a field's name as parameter and returns the
Lire::Field(3pm) object describing that field in the schema. The
method will die() if there is no field with that name in the schema.

=cut

sub field {
    my ( $self, $name ) = @_;

    croak "no field by that name: $name"
      unless $self->has_field( $name );

    $self->{'fields_by_name'}{$name};
}

=pod

=head2 fields()

    my $fields = $schema->fields;
    my @fields = $schema->fields;

In array context, this method will return an array containing all the
fields (as Lire::Field(3pm) objects) in the schema. The order of the
fields in the array is the order of the fields in the schema.

In scalar context, it will return an array reference. This method is
more efficient than creating an array. DO NOT MODIFY THE RETURNED
ARRAY.

=cut

sub fields {
    wantarray ? @{$_[0]{'fields_by_pos'}} : $_[0]{'fields_by_pos'};
}

=pod

=head2 field_count()

    my $number_of_field = $schema->field_count;

This method returns the number of fields in the schema.

=cut

sub field_count {
    scalar @{$_[0]->{'fields_by_pos'}};
}

=pod

=head2 timestamp_field()

    my $time_field = $schema->timestamp_field;

This method will return the Lire::Field(3pm) object representing the
timestamp field in the schema. The timestamp field is the one that
defines the sort order of the DLF records.

=cut

sub timestamp_field {
    my ($self) = @_;

    $self->field( $self->{'timestamp_field'} );
}

=pod

=head2 is_schema_compatible()

    if ( $schema->is_schema_compatible( $other_schema ) ) {

    }

This method takes a Lire::DlfSchema(3pm) object as parameter and returns a
boolean value. That value will be true if the schema passed as parameter is
compatible with the other, it will be false otherwise.

For a superservice's schema, the only compatible schema is an object
representing the same superservice's schema.

=cut

sub is_schema_compatible {
    my ( $self, $schema ) = @_;

    return $schema eq $self->{'id'};
}

sub ascii_dlf_escape_field {
    # Escape the value :
    #   replace space with _
    #   replace 8 bit chars with ?
    #   replace control chars with ?
    $_[0] =~ tr/ \200-\377\000-\037/_?/;
}

=pod

=head2 make_hashref2asciidlf_func()

This method is part of the old style, now deprecated, DLF converter API.

It takes as parameters a list of field's names that are available in
the DLF output by the converter. (Not all services in a given
superservice will support the whole or the same subset of the
superservice's fields. Unsupported fields should contain the value
C<LIRE_NOTAVAIL> in the output DLF.)

This method will return an anonymous subroutine that should be used by
the DLF converter to create the DLF records. The generated subroutine
takes as parameter a hash reference representing the DLF record to
create. It returns an array reference representing the DLF record's
fields. It will make sure that the field's values are in the correct
order, that the unavailable fields are marked correctly, that missing
fields are defaulted and that the field's values are escaped
appropriately.

The hash's keys should be the DLF record's field names with the value
of the field associated to the key. All the fields that are available
(as specified when the method is called) which are undefined or that
aren't present in the hash will be set in the output DLF record to the
field's default value specified in the schema. Extra keys in the hash
will be ignored. Fields that aren't supported (as specified when the
subroutine was created by the make_hashref2asciidlf_func() method)
will contain the C<LIRE_NOTAVAIL> value.

One can write an ASCII DLF by printing the returned array reference
using a space as the join delimiter:

    my $dlf = $dlf_maker->( $hash_dlf );
    print join( " ", @$dlf ), "\n";

See the SYNOPSIS section for an example.

Beware!  New DLF convertors should use the Lire::DlfConverterProcess
interface.

=cut

sub make_hashref2asciidlf_func {
    my ( $self, @fields ) = @_;

    my %avail = map { $_ => 1 } @fields;
    my @ascii_dlf_tmpl = ();
    foreach my $field ( @{$self->fields}) {
	push @ascii_dlf_tmpl, [ $field->name, $field->default ];
    }

    return sub {
	my ($hash) = @_;

	my $dlf = [];
	foreach my $field_tmpl ( @ascii_dlf_tmpl ) {
	    my $name	= $field_tmpl->[0];
	    my $value;
	    if ( $avail{$name} ) {
		if (defined $hash->{$name} && length $hash->{$name}) {
		    $value = $hash->{$name};
		} else {
		    $value = $field_tmpl->[1]; # Use default
		}
		ascii_dlf_escape_field( $value );
	    } else {
		$value = "LIRE_NOTAVAIL";
	    }
	    push @$dlf, $value;
	}

	return $dlf;
    };
}

sub make_key_access_func {
    my ( $self, @fields ) = @_;

    # Compile a key maker function
    my @pos = ();
    foreach my $f ( @fields ) {
	push @pos, $self->field( $f )->pos;
    }
    my $check_undef_code = join( ' || ', map { '! defined $_[0]->[' . $_ . ']' } @pos);
    my $dlf_access_code = join ", ", map { '$_[0]->[' . $_ . ']' } @pos;
    my $key_maker_code = <<EOC;
sub {
    return undef if $check_undef_code;
    return join( ",", , $dlf_access_code);
}
EOC
    my $sub = eval $key_maker_code;
    croak "error compiling key maker function ($key_maker_code): $@"
      if $@;

    return $sub;
}

=pod

=head2 make_dlf_sort_func( @fields )

Returns an anonymous subroutine that can be used to compare the
relative sort order (-1, 0, 1) of two DLF arrays. The @fields
parameter contains the name of the field which are part of the sort
key.

The returned comparison function can be used like this 

$func->( $dlf1, $dlf2 )

It will returns -1 if DLF1 should sorted before DLF2; 0 when DLF1 and
DLF2 should sort at the same place and 1 if DLF1 should sort after
DLF2.

=cut

sub make_dlf_sort_func {
    my ( $self, @fields ) = @_;

    my @sort_ops = ();
    foreach my $f ( @fields ) {
	my $field = $self->field( $f );
	my $i = $field->pos;
	my $cmp = is_numeric_type( $field->type ) ? '<=>' : 'cmp';
	push @sort_ops, '$_[0][' . $i . '] ' . $cmp . ' $_[1][' . $i . ']';
    }
    my $sort_code = "sub { " . join( " || ", @sort_ops ) . " }";
    my $sort_func = eval $sort_code;
    croak "error compiling sort comparison ($sort_code): $@" if $@;

    $sort_func;
}

=pod

=head2 sql_fields_def()

This method returns a string which can be used in a SQL CREATE TABLE
statement. IT only returns the field definitions like :

 (
     ts TIMESTAMP,
     int_field NUMBER(10,0)
 )

=cut

sub sql_fields_def {
    my $self = $_[0];

    my @defs = ();
    foreach my $f ( $self->fields() ) {
        push @defs, "    " . sql_quote_name( $f->name() ) . " " . 
          $f->sql_type();
    }

    return "(\n" . join( ",\n", @defs ) . "\n)\n";
}

use vars qw( $LDSML_NS %LDSML_ELEMENTS );

BEGIN {
    %LDSML_ELEMENTS = map { $_ => 1 } qw( dlf-schema derived-schema
					  extended-schema field
					  title description );
    $LDSML_NS = "http://www.logreport.org/LDSML/";
}

sub Init {
    my ($expat) = @_;

    $expat->{'lire_curr_schema'} = undef;
    $expat->{'lire_curr_field'}  = undef;
    $expat->{'lire_curr_desc'}   = undef;
}

sub Final {
    my ( $expat ) = @_;

    return $expat->{'lire_curr_schema'};
}

sub Start {
    my ( $expat, $name ) = @_;

    my $ns = $expat->namespace($name);
    $ns ||= ""; # Remove warnings
    if ( $ns eq $LDSML_NS ) {
	# This is one of our element
	error( $expat, "unknown element: $name" )
	  unless exists $LDSML_ELEMENTS{$name};

	{
	    no strict 'refs';

	    my $sub = $name . "_start";
	    $sub =~ s/-/_/g;	# Hyphen aren't allowed in element name

	    $sub->( @_ );
	};
    } else {
	# If we are in lire:description, this is probably a
	# DocBook element, append it to the current description.
	my $lire_desc = $expat->generate_ns_name( "description", $LDSML_NS );
	if ( $expat->within_element( $lire_desc ) ) {
	    $expat->{'lire_curr_desc'} .= $expat->original_string();
	} else {
	    error( $expat, "unknown element: $name" );
	}
    }
}

sub End {
    my ( $expat, $name ) = @_;

    my $ns = $expat->namespace($name);
    $ns ||= ""; # Remove warnings
    if ( $ns eq $LDSML_NS ) {
	# This is one of our element
	error( $expat, "unknown element: $name" )
	  unless exists $LDSML_ELEMENTS{$name};

	{
	    no strict 'refs';

	    my $sub = $name . "_end";
	    $sub =~ s/-/_/g;	# Hyphen aren't allowed in element name

	    $sub->( @_ );
	}
    } else {
	# If we are in lire:description, this is probably a
	# DocBook element, append it to the current description.
	my $lire_desc = $expat->generate_ns_name( "description", $LDSML_NS );
	if ( $expat->within_element( $lire_desc ) ) {
	    $expat->{'lire_curr_desc'} .= $expat->original_string();
	} else {
	    error( $expat, "unknown element: $name" );
	}
    }
}

sub Char {
    my ( $expat, $str ) = @_;

    # Character should only appear in title and description
    my $lire_title = $expat->generate_ns_name( "title", $LDSML_NS );
    my $lire_desc  = $expat->generate_ns_name( "description", $LDSML_NS );

    if ( $expat->in_element( $lire_title )) {
	$expat->{'lire_curr_title'} .= $str;
    } elsif ( $expat->within_element( $lire_desc )) {
	# Use original_string because we don't want parsed entities.
	$expat->{'lire_curr_desc'}  .= $expat->original_string();
    }
}

sub dlf_schema_start {
    my ( $expat, $name, %attr ) = @_;

    eval {
	$expat->{'lire_curr_schema'} = new Lire::DlfSchema( %attr );
    };
    error( $expat, $@ ) if $@;
}

sub dlf_schema_end {
    my ( $expat, $name ) = @_;
    eval {
	$expat->{'lire_curr_schema'}->check();
    };
    error( $expat, $@ ) if $@;
}

sub derived_schema_start {
    my ( $expat, $name, %attr ) = @_;

    eval {
	$expat->{'lire_curr_schema'} = new Lire::DerivedSchema( %attr );
    };
    error( $expat, $@ ) if $@;
}

sub derived_schema_end {
    my ( $expat, $name ) = @_;
    eval {
	$expat->{'lire_curr_schema'}->check();
    };
    error( $expat, $@ ) if $@;
}

sub extended_schema_start {
    my ( $expat, $name, %attr ) = @_;

    eval {
	$expat->{'lire_curr_schema'} = new Lire::ExtendedSchema( %attr );
    };
    error( $expat, $@ ) if $@;
}

sub extended_schema_end {
    my ( $expat, $name ) = @_;
    eval {
	$expat->{'lire_curr_schema'}->check();
    };
    error( $expat, $@ ) if $@;
}

sub error {
    my ( $expat, $msg ) = @_;

    my $line = $expat->current_line;

    croak $msg, " at line ", $line, "\n";
}

sub field_start {
    my ( $expat, $name, %attr ) = @_;

    check_param( $attr{'name'}, 'name', \&check_xml_name,
                 'invalid field name' );
    check_param( $attr{'type'}, 'type', \&check_type,
                 'invalid value for type attribute' );

    my $pos = @{$expat->{'lire_curr_schema'}->{'fields_by_pos'}};
    $expat->{'lire_curr_field'} = new Lire::Field( 'name'	    => $attr{'name'},
                                                   'i18n_domain'    => 'lire-'.$expat->{'lire_curr_schema'}->superservice(),
                                                   'type'	    => $attr{'type'},
                                                   'pos'	    => $pos,
                                                   'default'  => $attr{'default'},
                                                   'label'    => $attr{'label'},
                                               );
}

sub field_end {
    my ( $expat, $name ) = @_;

    my $field  = $expat->{'lire_curr_field'};
    my $schema = $expat->{'lire_curr_schema'};
    $schema->{'fields_by_pos'}[$field->pos()]   = $field;
    $schema->{'fields_by_name'}{$field->name()} = $field;

    delete $expat->{'lire_curr_field'};
}

sub title_start {
    my ( $expat, $name ) = @_;

    $expat->{'lire_curr_title'} = "";
}

sub in_schema_element {
    my ( $expat ) = @_;
    my $lire_dlf_schema = $expat->generate_ns_name( "dlf-schema", $LDSML_NS );
    my $lire_ext_schema = $expat->generate_ns_name( "extended-schema",
						    $LDSML_NS );
    my $lire_der_schema = $expat->generate_ns_name( "derived-schema",
						    $LDSML_NS );

    return $expat->in_element( $lire_dlf_schema ) ||
      $expat->in_element( $lire_ext_schema ) ||
      $expat->in_element( $lire_der_schema );

}

sub title_end {
    my ( $expat, $name ) = @_;

    my $lire_field = $expat->generate_ns_name( "field", $LDSML_NS );

    if ( $expat->in_element( $lire_field)) {
	$expat->{'lire_curr_field'}{'title'} = $expat->{'lire_curr_title'};
    } elsif ( in_schema_element( $expat ) ) {
	$expat->{'lire_curr_schema'}{'title'} = $expat->{'lire_curr_title'};
    } else {
	error( $expat, "encountered unexpected title" );
    }
}

sub description_start {
    my ( $expat, $name ) = @_;

    $expat->{'lire_curr_desc'} = "";
}

sub description_end {
    my ( $expat, $name ) = @_;

    my $lire_field = $expat->generate_ns_name( "field", $LDSML_NS );

    if ( $expat->in_element( $lire_field)) {
	$expat->{'lire_curr_field'}{'description'} = $expat->{'lire_curr_desc'};
    } elsif ( in_schema_element( $expat )) {
	$expat->{'lire_curr_schema'}{'description'} = $expat->{'lire_curr_desc'};
    } else {
	error( $expat, "encountered unexpected description" );
    }
}

# keep perl happy
1;

__END__

=pod

=head1 SEE ALSO

Lire::Program(3pm), Lire::Field(3pm)

=head1 VERSION

$Id: DlfSchema.pm,v 1.44 2004/03/26 00:27:34 wsourdeau Exp $

=head1 COPYRIGHT

Copyright (C) 2001, 2002 Stichting LogReport Foundation LogReport@LogReport.org

This file is part of Lire.

Lire is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html or write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.

=head1 AUTHOR

Francis J. Lacoste <flacoste@logreport.org>

=cut

