#! /usr/bin/perl
# This script's documentation stands below.  Scroll down.
use warnings;
use strict;
use integer;
unshift @INC, '.';
use Bnm;
use Alpha;

our $file_maint    = '../maint.txt' ;
our $file_debram   = '../debram.txt';

our $width         = 132;
our $w_maint       =  18;
our $w_pri         =   6;
our $w_deb         =  29;
our $str_main_body = "THE RAMIFICATION\n";
our $Maintainer    = 'Maintainer';
our $Priority      = 'Priority';
our $Package       = 'Package';
our $Source        = 'Source';
our $Section       = 'Section';
our $Description   = 'Description';
our $Ramification  = 'Ramification';
our $contrib       = 'contrib';
our $n_digit       =   4;

our %pri = (
    required  => 'req',
    important => 'imp',
    standard  => 'std',
    optional  => 'opt',
    extra     => 'ext',
);

our $usage = <<END;
usage: $0 [-Pch] [Packages file]...
    -P instead of a new debram.txt body, output a modified Packages file
    -w within each ram in the output, separate the new pkgs from the old
    -c output contrib packages, too
    -h print this usage message
END

our $rule        = '-' x $width . "\n";
our $skip_to_deb = $w_maint + $w_pri;

# You do not need this optional helper script to install, use, modify,
# develop, build, package or distribute `debram'.  If you are developing
# the debram, however, you may find the script convenient.
#
# SYNOPSIS
#
# new-debram-body [-Pch] [Packages file]...
#
# DESCRIPTION
#
# This script builds a new debram.txt body from one or more new Packages
# files and from the old debram.txt.  It prints the new debram.txt body
# to stdout.
#
# OPTIONS
#
# -P instead of a new debram.txt body, output a sorted, modified
#    Packages file
#
# -w within each ram in the output, separate the new packages from the
#    old (an new package is defined to be one which appears only in a
#    Packages file, not in the old debram.txt)
#
# -c output contrib packages, too (contribs are filtered out by default)
#
# -h (or -?) print a usage message
#
# PACKAGES FILES
#
# A Packages file consists of a list of binary control paragraphs, each
# of which resembles the following example.
#
#   Package: apt-zip
#   Priority: extra
#   Section: admin
#   Installed-Size: 104
#   Maintainer: Giacomo Catenazzi <cate@debian.org>
#   Architecture: all
#   Version: 0.13.2
#   Depends: apt (>= 0.3.10)
#   Filename: pool/main/a/apt-zip/apt-zip_0.13.2_all.deb
#   Size: 14752
#   MD5sum: 78b979ac3ea325e1a2d4b154f14f5eb4
#   Description: Update a non-networked computer using apt and removable media
#    These scripts simplify the process of using dselect and apt on a
#    non-networked Debian box, using removable media like ZIP floppies.
#    One generates a `fetch' script (supporting backends such as wget and
#    lftp, in a modular, extensible way) to be run on a host with better
#    connectivity, check space constraints of your removable media, and
#    then install the package on your Debian box.
#    .
#    Note on current version: space-checking is not done and spanning
#    multiple disks is not yet supported.
#
# You probably have a Packages file on your system at
#
#   /var/lib/dpkg/available
#
# At the time of this writing (Jan 2004), the various current Packages
# files are found in Debian's on-line archive in places like
#
#   dists/testing/main/binary-i386/Packages.gz
#
# When multiple Packages files are given, the leftmost takes precedence.
#
# THE RAMIFICATION SPECIAL CONTROL FIELD
#
# You may find it useful to edit a copy of the Packages file to include
# the special control field `Ramification:' in some or all of the binary
# control paragraphs.  This is no standard control field, of course, but
# the script understands it to identify a package's ramification.  For
# example,
#
#   Ramification: 8212
#   Package: apt-zip
#   Priority: extra
#   Section: admin
#   Installed-Size: 104
#   Maintainer: Giacomo Catenazzi <cate@debian.org>
#   Architecture: all
#   Version: 0.13.2
#   Depends: apt (>= 0.3.10)
#   Filename: pool/main/a/apt-zip/apt-zip_0.13.2_all.deb
#   Size: 14752
#   MD5sum: 78b979ac3ea325e1a2d4b154f14f5eb4
#   Description: Update a non-networked computer using apt and removable media
#    These scripts simplify the process of using dselect and apt on a
#    non-networked Debian box, using removable media like ZIP floppies.
#    One generates a `fetch' script (supporting backends such as wget and
#    lftp, in a modular, extensible way) to be run on a host with better
#    connectivity, check space constraints of your removable media, and
#    then install the package on your Debian box.
#    .
#    Note on current version: space-checking is not done and spanning
#    multiple disks is not yet supported.
#
# Extra text may follow the ram number.  If so, the script ignores the
# extra text.  For example,
#
#   Ramification: 8212 APT
#
# has the same effect as does
#
#   Ramification: 8212
#
# When no Ramification control field is given, the script uses the
# ramification number from the old debram.txt.
#
# The script's -P option can help you to generate an initial
# Ramification-identifying copy of the Packages file.
#
# PACKAGE SORTING ORDER
#
# To group related packages together, this script sorts packages in a
# peculiar order:
#
#   1. by maintainer's name; then
#   2. by source package name; then
#   3. by binary package name with the source's flagship binary first.
#
# Maintainers are sorted last name first.  Refer to `Alpha.pm' for the
# precise name-sorting algorithm.
#
# Package names are sorted first without a leading "lib".  This means
# that "libncurses5" sorts near "ncurses-bin".  (It also means that
# "libtool" sorts near "tool", which is wrong.  In almost every other
# case, though, it is right to sort without the leading "lib"; so this
# is how the sort is done.)
#
# Experience shows such a sort to be surprisingly useful.
#
# PACKAGES NOT OUTPUT
#
# Packages not named in the Packages files are not output, even when
# they appear in the old debram.txt.  Packages whose ramifications
# cannot be determined are output only when the -P option has been
# given.  Contrib packages are output only when the -c option has been
# given.  (As to non-free packages, their control paragraphs do not
# occur in the standard Packages files.  If non-free are fed to the
# script nevertheless, the script's output is undefined.)
#
# THE UNKNOWN-MAINTAINER ERROR
#
# This script often exits with an "unknown maintainer" error.  Usually
# this error is no sign of trouble; it usually merely means that the
# script has seen a new Debian maintainer mentioned somewhere in a
# Packages file, a maintainer not yet in maint.txt.  Sometimes it means
# that the script has found a new spelling of an existing maintainer's
# name.  In either case, the error is usually simple to fix.
#
# If you are developing the debram, when you see an "unknown maintainer"
# error,
#
#   1. examine the Packages files to decide whether the unknown
#      maintainer is really a new maintainer or is an existing
#      maintainer with an alternate spelling.
#
#   2. update `maint.txt' accordingly, and
#
#   3. run this script again.
#
# Although it is not necessary that `maint.txt' always be kept in
# perfect alphabetical order, after making several updates to it you
# probably want to `helper/sort-maint' it for neatness' and correctness'
# sake.
#
# FILES
#
# (See the global parameters at this script's head.  The files include
# debram.txt and maint.txt.)
#
# BUGS
#
# This script uses a lot of memory at runtime: it stores a data
# structure duplicating piecewise almost the Packages files' entire
# combined text.  This is convenient for the script writer but is
# technically unnecessary.  With additional scripting effort, the memory
# usage could be dramatically reduced.
#
# On the other hand, this is just a helper script.  To make such
# improvements therefore has low priority.
#
#

# Read command-line arguments and options.
my @opt;
my @arg;
push @{ /^-/ ? \@opt : \@arg }, $_ for @ARGV;
my %opt = map {
    my $o = $_;
    map { substr( $o, $_, 1 ) => 1 } 1 .. length($_)-1
} @opt;
if ( @arg < 1 || $opt{'?'} || $opt{h} ) {
    print $usage;
    exit 0;
}

# Read the maintainer names.
my %mnstd; # debram-standard maintainer names
my %maint; # full maintainer names
{
    open F, '<', $file_maint;
    local $/ = '';
    while ( <F> ) {
        my @m  = grep { /\S/ } split '\n';
        my $ms = shift @m;
        $mnstd{$ms} and die "$0: std name $ms appears twice\n";
        $mnstd{$ms} = 1;
        for ( @m ) {
            defined $maint{$_}
                and die "$0: full maint name $_ appears twice\n";
            $maint{$_} = $ms;
        };
    }
    close F;
}

# Read the available packages from the Packages files.
my %deb; # Debian packages
for my $file_avail ( @arg ) {
    open F, '<', $file_avail;
    local $/ = '';
    while ( <F> ) {
        next if !$opt{c}
          && /^$Section:[ \t]+(?:$contrib\/|.*\/$contrib$)/om;
        my( $ram   ) = /^$Ramification:[ \t]+(\d{4})/om;
        my( $mnt0  ) = /^$Maintainer:[ \t]+(.+?)[ \t]+</om
          or die
          "$0: control paragraph lacks a proper Maintainer field\n$_\n";
        defined $maint{$mnt0}
          or die "$0: unknown maintainer $mnt0\n";
        my  $maint   = $maint{$mnt0};
        my( $pri0  ) = /^$Priority:[ \t]+(\S+)/om
          or die
          "$0: control paragraph lacks a proper Priority field\n$_\n";
        my  $pri     = $pri  {$pri0}
          or die "$0: unknown priority $pri0\n";
        my( $deb   ) = /^$Package:[ \t]+(\S+)/om
          or die
          "$0: control paragraph lacks a proper Package field\n$_\n";
        if ( $deb{$deb} ) {
            $deb{$deb}{ram} = $ram unless defined $deb{$deb}{ram};
            next;
        }
        my( $src   ) = /^$Source:[ \t]+(\S+)/om;
        my $lead;
        if ( defined( $src ) && $deb ne $src ) {
            $lead = 0;
        }
        else {
            $lead = 1;
            $src = $deb;
        }
        my( $desc  ) = /^$Description:[ \t]+(.+)/om
          or die
          "$0: control paragraph lacks a proper Description field\n$_\n";
        $deb{$deb} = {
            maint   => $maint,
            pri     => $pri  ,
            src     => $src  ,
            lead    => $lead , # is this a flagship package?
            desc    => $desc ,
            ram     => $ram  ,
            control => $_    ,
            old     => 0     ,
              # had this package been ramified in the old debram.txt?
        };
    }
    close F;
}

# Read the section titles.  Record package ramifications not already
# known.
my %title; # ram titles
{
    open F, '<', $file_debram;
    1 until <F> eq $str_main_body; <F>;
    local $/ = '';
    while ( <F> ) {
        my @c  = grep { /\S/ } split '\n';
        shift @c;
        my( $ram, $title ) =
            shift( @c ) =~ /^(\d{$n_digit}) (.+?) \(\d+\)$/o
            or last;
        defined $title{$ram} and die "$0: ram $ram is titled twice\n";
        $title{$ram} = $title;
        shift @c;
        for ( @c ) {
            my( $deb ) = /^.{$skip_to_deb}(\S+)/o
              or die "$0: badly formed old debram line\n$_\n";
            $deb{$deb} or next;
            $deb{$deb}{ram} = $ram unless defined $deb{$deb}{ram};
            $deb{$deb}{old} = 1;
        }
    }
    close F;
}

# Sort the packages in two ways: by ramification then
# maintainer (%debram); by maintainer (@pkg).
my @pkg = sort {
    Alpha::cmp_std_mn
        ( $deb{$a}{maint},   $deb{$b}{maint} )
    || Bnm::cmp_prefixless
        ( $deb{$a}{src  },   $deb{$b}{src  } )
    ||    $deb{$b}{lead} <=> $deb{$a}{lead}
    || Bnm::cmp_prefixless
        (      $a        ,        $b         )
} keys %deb;
my %debram = map { $_ => [] } keys %title;
for my $deb ( @pkg ) {
    my $ram = $deb{$deb}{ram};
    defined( $ram ) or next;
    $debram{$ram} or die "$0: unknown ram $ram\n";
    push @{ $debram{$ram} }, $deb;
}

# Subroutine: fill a field with dots.
sub dot ($$) {
    my( $c, $w ) = @_;
    $c .= ' ';
    if ( length( $c ) < $w - 1 )
      { $c .= "\267" x ( $w - length( $c ) - 1 ) }
    if ( length( $c ) < $w     ) { $c .= ' ' }
    return $c;
}

# Subroutine: output one line describing a single deb.
sub debout (;$) {
    local $_ = @_ ? $_[0] : $_;
    my $out;
    $out .= dot  $deb{$_}{maint}, $w_maint;
    $out .= dot  $deb{$_}{pri  }, $w_pri  ;
    $out .= dot       $_        , $w_deb  ;
    $out .=      $deb{$_}{desc }          ;
    $out =~ s/^(.{0,$width}).*$/$1/o;
    print "$out\n";
}

# Output the reformed ramification.
unless ( $opt{P} ) {
    for my $ram ( sort keys %debram ) {
        print $rule;
        print "$ram $title{$ram} ("
            . scalar( @{ $debram{$ram} } ) . ")\n";
        print $rule;
        if ( $opt{w} ) {
             debout for grep {  $deb{$_}{old} } @{ $debram{$ram} };
             print $rule;
             debout for grep { !$deb{$_}{old} } @{ $debram{$ram} };
        }
        else { debout for @{ $debram{$ram} } }
        print "\n";
    }
}

# Output a working Packages file.
else {
    for ( @pkg ) {
        my $control = $deb{$_}{control};
        $control =~ s/^$Ramification:[ \t]+.*\n?//omg;
        print "$Ramification: $deb{$_}{ram} $title{ $deb{$_}{ram} }\n"
            if defined $deb{$_}{ram};
        print $control;
    }
}

