#! /usr/bin/perl
#   flat2po.pl: compare 2 intermediate representations of debiandoc SGML
#               documents and print PO file
#
#   This script is part of po-debiandoc

use strict;
use IO::File;
use Getopt::Long;

my ($type1, $name1, $id1, $start1, $end1, $text1);
my ($type2, $name2, $id2, $start2, $end2, $text2);
my ($start, $end, $name);
my ($file, $cnt, $extracnt, @entry, @list, $extra, $text, %index);

my %sect = (
        chapt => -1,
        appendix => -1,
        sect  => 0,
        sect1 => 1,
        sect2 => 2,
        sect3 => 3,
        sect4 => 4,
);

use vars(qw($opt_h $opt_v $opt_O $opt_T));

sub usage {
        print STDERR "Usage: $0 [-h] [-v] [-O origname] [-T transname] origfile transfile\n";
        exit($_[0]);
}

$Getopt::Long::bundling = 1;
$Getopt::Long::getopt_compat = 0;
if (not Getopt::Long::GetOptions(qw(
                        h|help
                        v|verbose
                        O|original_file=s
                        T|translation_file=s
                ))) {
        warn "Try `$0 --help' for more information.\n";
        exit(1);
}

usage(0) if $opt_h;
usage(1) unless $#ARGV == 1;

$opt_O ||= $ARGV[0];
$opt_T ||= $ARGV[1];
my $orig  = new IO::File;
die "Unable to read from $ARGV[0]"
        unless $orig->open("< $ARGV[0]");
my $trans = new IO::File;
die "Unable to read from $ARGV[1]"
        unless $trans->open("< $ARGV[1]");

my $date = `date +'%Y-%m-%d %k:%M%z'`;
chomp $date;
print <<"EOT";
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\\n"
"POT-Revision-Date: $date\\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
"Last-Translator: FULL NAME <EMAIL\@ADDRESS>\\n"
"Language-Team: LANGUAGE <LL\@li.org>\\n"
"MIME-Version: 1.0\\n"
"Content-Type: text/plain; charset=CHARSET\\n"
"Content-Transfer-Encoding: ENCODING\\n"

EOT

sub verbose {
        print STDERR "Verbose: $_[0]\n" if $opt_v;
}
sub message {
        print STDERR "Message: $opt_T:$start2: $_[0]\n";
}
sub warning {
        warn "Warning: $opt_O:$start1: $opt_T:$start2: $_[0]\n";
}
sub get_line_number {
        my $fh = shift;
        my ($start, $end);
        my $line = <$fh>;
        chomp $line;
        if ($line =~ s/LINES (\d+)\s?//) {
                $start = $1;
                $end = $line || $start;
        } else {
                die "Mismatch";
        }
        return ($start, $end);
}

sub get_next_entry {
        my ($fh, $msg) = @_;
        my ($type, $name, $id, $start, $end, $text);

        my $line = <$fh>;
        chomp $line;
        if ($line =~ s/START (\S+)\s?//) {
                $name = $1;
                $id = $line;
                ($start, $end) = get_line_number($fh);
                $text = <$fh>;
                chomp $text;
                $type = 'TEXT';
        } elsif ($line =~ s/SECTION (\S+)\s?//) {
                $name = $1;
                $id = $line;
                ($start, $end) = get_line_number($fh);
                $type = 'SECTION';
        } else {
                goto FINISH;
        }
        verbose("$msg l.$start--$end: <$name".
                ($id eq '' ? '' : " id=\"$id\"").
                ">");
        return ($type, $name, $id, $start, $end, $text);
}

sub register_po_extra {
        my ($start, $end, $name, $file, $pos) = @_;
        no strict 'refs';
        $text1 = "<extra: $extracnt";
        $extra->{$start} = {
                end     => $end,
                name    => $name,
                text    => '',
        };
        $text2 = \$extra->{$start}->{text};
        verbose("blah $end ".$extra->{$start}->{end});
        $extracnt++;
        $index{$text1} = $cnt;
        $list[$cnt] = [];
        $cnt++;
        push (@{$list[$index{$text1}]},
                $start, $end, 'extra:'.$name, $file, $pos, $text1, $text2);
}

<$trans>;
$file = <$orig>;
chomp $file;

sub escape_text {
        my $text = shift;
        $text =~ s/^\s+//s;
        $text =~ s/\s+$//s;
        $text =~ s/"/\\"/g;
        $text =~ s/\n/\\n/sg;
        return $text;
}

$cnt = 0;
$extracnt = 0;
MAIN: while (1) {
        ($type1, $name1, $id1, $start1, $end1, $text1) = get_next_entry($orig, 'original');
        ($type2, $name2, $id2, $start2, $end2, $text2) = get_next_entry($trans, 'translation');

        NEXT:
        last MAIN if $type1 eq '' or $type2 eq '';
        if ($type1 eq 'SECTION') {
                while ($id2 =~ m/extra-trans-/) {
                        message("Extra element \`<$name2 id=\"$id2\">' found, skipping onto next section...");
                        $start = $start2;
                        $name  = $name2;
                        do {
                                $end = $end2;
                                ($type2, $name2, $id2, $start2, $end2, $text2) = get_next_entry($trans, 'translation');
                        } while ($type2 ne 'SECTION');
                        register_po_extra($start, $end, $name, $file, $start1);
                }
                next if $type2 eq 'SECTION' && $name2 eq $name1 && $id2 eq $id1;
                if ($type2 eq 'SECTION') {
                        if ($name2 ne $name1) {
                                warning("section mismatch: \`<$name1>' expected but \`<$name2>' found");
                                if ($sect{$name1} > $sect{$name2}) {
                                        warning("untranslated section found in original, skipping onto next section...");
                                        do {
                                                ($type1, $name1, $id1, $start1, $end1, $text1) = get_next_entry($orig, 'original');
                                        } while ($type2 ne 'SECTION');
                                } elsif ($sect{$name1} < $sect{$name2}) {
                                        warning("extra section found in translation, skipping onto next section...");
                                        do {
                                                ($type2, $name2, $id2, $start2, $end2, $text2) = get_next_entry($trans, 'translation');
                                        } while ($type2 ne 'SECTION');
                                }
                        } elsif ($id2 ne $id1) {
                                if ($id1 eq '') {
                                        warning("section id mismatch: \`<$name1\">' expected but \`<$name2 id=\"$id2\">' found");
                                } elsif ($id2 eq '') {
                                        warning("section id mismatch: \`<$name1 id=\"$id1\">' expected but no id found");
                                } else {
                                        warning("section id mismatch: \`<$name1 id=\"$id1\">' expected but \`<$name2 id=\"$id2\">' found");
                                }
                                #   Section ids differ, it means either
                                #   that it has been translated or a
                                #   section is missing.
                                #   The former is assumed here.
                                next;
                        }
                } else {
                        warning("extra text found in translation, skipping onto next section...");
                        do {
                                ($type2, $name2, $id2, $start2, $end2, $text2) = get_next_entry($trans, 'translation');
                        } while ($type2 ne 'SECTION');
                }
                goto NEXT;
        }

        #   Let's now discuss START
        warn "Malformed format l. $start1\n" if $type1 ne 'TEXT';
        while ($id2 =~ m/extra-trans-/) {
                message("Extra element \`<$name2 id=\"$id2\">' found, skipping onto next section...");
                $start = $start2;
                $name  = $name2;
                do {
                        $end = $end2;
                        ($type2, $name2, $id2, $start2, $end2, $text2) = get_next_entry($trans, 'translation');
                } while ($type2 ne 'SECTION');
                register_po_extra($start, $end, $name, $file, $start1);
        }

        if ($type2 eq 'SECTION') {
                warning("untranslated text found in original, skipping onto next section...");
                do {
                        ($type1, $name1, $id1, $start1, $end1, $text1) = get_next_entry($orig, 'original');
                } while ($type1 ne 'SECTION');
                goto NEXT;
        }

        if ($type2 eq 'TEXT' && $name2 eq $name1) {
                $text1 = escape_text($text1);
                $text2 = escape_text($text2);
                next MAIN unless length($text1);
                next MAIN if $text1 =~ m/\[po-debiandoc-dummy\]/;
                if (!defined($index{$text1})) {
                        $index{$text1} = $cnt;
                        $list[$cnt] = [];
                        $cnt++;
                }
                push (@{$list[$index{$text1}]},
                        $start1, $end1, $name1, $file, $start1, $text1, $text2);
        } else {
                warning("element mismatch: \`<$name1>' expected but \`<$name2>' found, skipping translation onto next section...");
                do {
                        ($type2, $name2, $id2, $start2, $end2, $text2) = get_next_entry($trans, 'translation');
                } while ($type2 ne 'SECTION');
                goto NEXT;
        }
}

FINISH:
if ($extracnt > 0) {
        no strict "refs";
        verbose("Reading translated file to retrieve text added...");
        my $lineno = 0;
        open(TRANS, "< $opt_T") or die "Unable to read from $opt_T\n";
        while (<TRANS>) {
                $lineno++;
                next unless defined $extra->{$lineno};
                $text = $_;
                verbose("Reading from line $lineno to $extra->{$lineno}->{end}");
                for ($lineno .. ($extra->{$lineno}->{end} - 1)) {
                        $text .= <TRANS> or die "Error when reading opt_T\n";
                }
                chomp $text;
                if ($extra->{$lineno}->{end} > $lineno &&
                    defined ($extra->{$extra->{$lineno}->{end}})) {
                        if ($text =~ s/\n([^\n]+)$/\n/s) {
                                $_ = $1;
                        } else {
                                $_ = $text;
                        }
                        $extra->{$lineno}->{text} = escape_text($text);
                        $lineno = $extra->{$lineno}->{end} - 1;
                        redo;
                } else {
                        $extra->{$lineno}->{text} = escape_text($text);
                        $lineno = $extra->{$lineno}->{end};
                }
        }
        close(TRANS);
}

foreach (@list) {
        my $pos = '';
        while (@entry = splice (@$_, 0, 7)) {
                print "#. <".$entry[2]."> ".$entry[0]."--".$entry[1]."\n";
                $pos .= "#: ".$entry[3].":".$entry[4]."\n";
                $text1 = $entry[5];
                $text2 = (ref($entry[6]) ? ${$entry[6]} : $entry[6]);
        }
        print $pos;
        print "msgid \"".$text1."\"\n";
        print "msgstr \"".$text2."\"\n\n";
}
1;
