#
# -*- Perl -*-
# $Id: pdf.pl,v 1.22.4.11 2004/03/22 07:20:20 opengl2772 Exp $
# Copyright (C) 1997-2000 Satoru Takabayashi ,
#               1999 NOKUBI Takatsugu,
#               2000-2004 Namazu Project All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either versions 2, or (at your option)
#  any later version.
# 
#  This program is distributed in the hope that it will be useful
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
#  02111-1307, USA
#
#  This file must be encoded in EUC-JP encoding
#

package pdf;
use strict;
require 'util.pl';
require 'gfilter.pl';

my $pdfconvpath = undef;
my $pdfinfopath = undef;
my $pdfconvver = 0;
my $pdfinfover = 0;
my $pdfconvarg = '';
my $pdfinfoarg = '';

sub mediatype() {
    return ('application/pdf');
}

sub status() {
    $pdfconvpath = util::checkcmd('pdftotext');
    $pdfinfopath = util::checkcmd('pdfinfo');
    if (defined $pdfconvpath) {
	my $ret = `$pdfconvpath 2>&1`;
	if ($ret =~ /^pdftotext\s+version\s+([0-9]+\.[0-9]+)/) {
	    $pdfconvver = $1;
	}
	if (util::islang("ja")) {
	    if ($pdfconvver >= 1.00) {
		$pdfconvarg = '-enc EUC-JP';
	    } else {
		$pdfconvarg = '-eucjp';
	    }
	}
        if (defined $pdfinfopath) {
	    my $ret = `$pdfinfopath 2>&1`;
	    if ($ret =~ /^pdfinfo\s+version\s+([0-9]+\.[0-9]+)/) {
	        $pdfinfover = $1;
	    }
	    if (util::islang("ja")) {
	        if ($pdfinfover >= 2.02) {
		    $pdfinfoarg = '-enc EUC-JP';
	        } else {
		    $pdfinfoarg = '';
	        }
	    }
        }
	return 'yes';
    }
    return 'no';
}

sub recursive() {
    return 0;
}

sub pre_codeconv() {
    return 0;
}

sub post_codeconv () {
    return 1;
}

sub add_magic ($) {
    return;
}

sub filter ($$$$$) {
    my ($orig_cfile, $cont, $weighted_str, $headings, $fields)
      = @_;
    my $cfile = defined $orig_cfile ? $$orig_cfile : '';

    my $tmpfile = util::tmpnam('NMZ.pdf');
    my $tmpfile2 = util::tmpnam('NMZ.pdf2');

    my $fh = util::efopen("> $tmpfile");
    print $fh $$cont;
    util::fclose($fh);
    undef $fh;

    if (util::islang("ja")) {
	util::vprint("Processing pdf file ... (using  '$pdfconvpath' in Japanese mode)\n");
	system("$pdfconvpath -q $pdfconvarg -raw $tmpfile $tmpfile2");
    } else {
	util::vprint("Processing pdf file ... (using  '$pdfconvpath')\n");
	system("$pdfconvpath -q -raw $tmpfile $tmpfile2");
    }
    unless (-e $tmpfile2) {
	unlink $tmpfile;
	unlink $tmpfile2;
	return 'Unable to convert pdf file (maybe copying protection)';
    }

    $fh = util::efopen("< $tmpfile2");
    $$cont = util::readfile($fh);
    util::fclose($fh);
    undef $fh;
    unlink $tmpfile2;

    gfilter::line_adjust_filter($cont);
    gfilter::line_adjust_filter($weighted_str);
    gfilter::white_space_adjust_filter($cont);
    $fields->{'title'} = gfilter::filename_to_title($cfile, $weighted_str)
	unless $fields->{'title'};
    gfilter::show_filter_debug_info($cont, $weighted_str,
			   $fields, $headings);

    if (defined $pdfinfopath) {
	my $tmpfile3 = util::tmpnam('NMZ.pdf3');
#	system("$pdfinfopath $tmpfile > $tmpfile3");
	open(SAVEOUT, ">&STDOUT");
	open(STDOUT, ">$tmpfile3");
	system("$pdfinfopath $pdfinfoarg $tmpfile");
	open(STDOUT, ">&SAVEOUT");
	my $fh = util::efopen("< $tmpfile3");
	my $cont2 = util::readfile($fh);
        util::fclose($fh);
	undef $fh;
	unlink($tmpfile3);
	if ($cont2 =~ /Title: (.*)/) { # or /Subject: (.*)/
	    $fields->{'title'} = $1;
            if ($fields->{'title'} =~ /<unicode>/) {
    	        delete $fields->{'title'};
            }
            elsif ($fields->{'title'} =~ /^\s*$/) {
    	        delete $fields->{'title'};
            }
	}
	if ($cont2 =~ /Author: (.*)/) {
	    $fields->{'author'} = $1;
            if ($fields->{'author'} =~ /<unicode>/) {
    	        delete $fields->{'author'};
            }
            elsif ($fields->{'author'} =~ /^\s*$/) {
    	        delete $fields->{'author'};
            }
	}
    }

    unlink $tmpfile;

    return undef;
}

1;
