#!/usr/bin/perl
#
# splitnode.pl -- Split an HTML file produced by makeinfo by nodes.
# 
# This script generates two versions, both split up to one node per file.
# One is without frames,  the other uses a frameset at the Lexicon level. 
# (The other nodes are not duplicated.)
#
# There are a number of content assumptions in this script.  
#
# 1. The frame where the subnodes immediately following the lexicon node
#    are collected is called `letters_frame'
# 2. The frame for the second-level subnodes is called `headwords_frame'.
# 3. The frame for the third-level subnodes is called `entry_frame'.
# 4. The `canonicalize' function contains some hacks that avoid NT brain death.
#
# For more assumptions, see the FIXME comments.  This version works with
# makeinfo 4.0 only.
#
$| = 1;

use Getopt::Long;
use strict;
use vars '$title', '$graphdir', '$framenode', '$infile', '$dictionary'; 
use vars '$current_node', '$latch', '$lasthdr', '$version', '$lexicon_link';
use vars '$framenode_seen', '$framenode_depth', '@firstnode', '$topnode';
use vars '$level', '%title', '%next', '%prev', '%up', '%depth', '%expanded';

&GetOptions('title=s', \$title,
	    'framenode=s', \$framenode,
	    'version=s', \$version,
	    'graphdir=s', \$graphdir);
$infile = $ARGV[0];

die("No title specified\n") unless $title;
die("No input specified\n") unless $infile;

$version = " ($version)" if $version;

#
# Clear the decks...
#
system("rm -fr html/*");

#
# First pass: build references.
#
$dictionary = &canonicalize($framenode);

open(INFILE, "<$infile");
$current_node = "index";
$title{"index"} = "Contents";
while (<INFILE>)
{
    my($parent);

    # Build node-to-nodefile mapping
    if (/^Node:/)
    {
	die "Internal error while looking for node name on $_"
	    unless /<a name="([^"]*)">([^<]*)<\/a>/;
	$current_node = &canonicalize($1);

	die("The node name $current_node is reserved.\n")
	    if ($current_node eq "index" || $current_node eq "letters");

	$title{$current_node} = $2;
	$expanded{$current_node} = "${current_node}.html";
    } elsif (/^Next:<a rel=next href="#([^"]*)">/) {
	my($next);
	$next = $1;
	$next = "index" if $next eq "Top";
	$next{$current_node} = &canonicalize($next);
    } elsif (/^Previous:<a rel=previous href="#([^"]*)">/) {
	my($previous);
	$previous = $1;
	$previous = "index" if $previous eq "Top";
	$prev{$current_node} = &canonicalize($previous);
    } elsif (/<a rel=up href="#([^"]*)">/) {
	my($parent, $current_depth);
	$parent = $1;
	$parent = "index" if $parent eq "Top";
	$up{$current_node} = &canonicalize($parent);
	$topnode = $current_node unless $topnode;

        $parent = $up{$current_node};
	if (!$parent || $parent eq "Top")
	{
	    $current_depth = 0;
	}
	else
	{
	    $current_depth = $depth{$parent} + 1;
	}
	$depth{$current_node} = $current_depth;

	if ($framenode_seen) {
	    if ($firstnode[$current_depth] eq undef) {
		$firstnode[$current_depth] = $current_node;
	    }
	}
	if ($current_node eq $dictionary) {
	    $framenode_seen = 1;
	    $framenode_depth = $depth{$current_node};
	} 
    }
    else
    {
	my($target);

	# Note: This only handles one anchor per line.
	# Sufficient for our purposes.
	if (/<a name="([^"]*)">/) {
	    $target = &canonicalize($1);
	    $expanded{$target} = "${current_node}.html#$target" if $target ne $current_node;
	}
    }
}
close(INFILE);

# print "Firstnode: ", join(" ", @firstnode), "\n";

#
# Second pass: rewrite HREFs and split files
#
open(INFILE, "<$infile");

open(SPLIT, ">html/index.html");
&header($title, *SPLIT);

open(LETTERS, ">html/letters.html");
print LETTERS <<EOF;
<HTML>
<HEAD>
<BASE TARGET="headwords_frame">
<TITLE>${framenode}${version}</TITLE>
<BODY>
<CENTER>
<B>${framenode}${version}</B>
<HR>
<A HREF="../index.html" TARGET="_parent">Up</A><P>
EOF

mkdir("html/entry", 0777) unless -d "html/entry";

$lexicon_link = "<a href=\"${dictionary}.html\">$framenode</a>";
/$lexicon_link/o;

/<li><a href="/o;
while (<INFILE>)
{
    if (/^Node:<a name="([^"]*)">([^<]*)<\/a>/o)
    {
	my($node, $popup, $prefix);

	$node = &canonicalize($1);

	next if ($node eq 'Top');

	$level = 0;
	if ($up{$node} eq $dictionary) {
	    $level = 1;
	    $prefix = "";
	    $popup = ""
	} elsif ($up{$up{$node}} eq $dictionary) {
	    $level = 2;
	    $prefix = "entry/";
	    $popup = "../"
	}

        print SPLIT "<P>$lasthdr";
	&trailer(*SPLIT);
	close(*SPLIT);
	open(*SPLIT, ">" . &nodefile($prefix . $node));
	&header($2);
	$lasthdr = &navbar($prev{$node}, $up{$node}, $next{$node}, $popup);
        print SPLIT $lasthdr;

	# Lexicon letter sections
	if ($level == 1) {
	    print LETTERS "<A HREF=\"${node}-frame.html\">" , &titlestrip($title{$node}), "</A>\n";

	    print DEFS "</body></html>\n";
	    close(DEFS);
	    open(DEFS, ">html/${node}-frame.html");
	    print DEFS "<HTML>\n";
	    print DEFS "<HEAD>\n";
	    print DEFS "<TITLE>$node</TITLE>\n";
	    print DEFS "<BASE TARGET=\"entry_frame\">\n";
	    print DEFS "<BODY>\n";
	}

	# Lexicon entries
	if ($level == 2) {
	    print DEFS "<A HREF=\"", $prefix . $node, ".html\">", &detexinfoize($title{$node}), "</A><BR>\n";
	}

	next;	# we generate our own nodes
    }
    elsif (!/^<p><hr>$/ && !/^Node:/ && !/^Previous:/ && !/^Next:/ && !/^Up:/)
    {
	my($realspaces);

	while (/href="#([^"]*)">/o)
	{
	    my($target, $expansion);

	    $target = $1;
	    if ($expanded{$target}) {
		$expansion = $expanded{$target};
	    } elsif ($expanded{&canonicalize($target)}) {
		$expansion = $expanded{&canonicalize($1)};
	    } elsif ($target ne '(dir)') {
		die("Location of $target is unknown\n");
	    }

	    $_ = $`."href=\"$expansion\">".$';
	}


	# Here's where we make both framed and frameless versions available
	$realspaces = $_;
	$realspaces =~ s/%20/ /g;
	if ($realspaces =~ /$lexicon_link/)
	{
	    $_ = $` . "<a href=${dictionary}-framed.html>With frames</a> or <a href=${dictionary}.html>without frames</a>". $';
    	}

	# Here's where we hack the indirection to the entry subdirectory
	# into what makeinfo handed us.
	if ($level != 2)	# Don't do it on entry nodes
	{
	    my($done) = '';
	    while (/<a href="([^"]*).html">/o) {
		my($pre, $what, $post) = ($`, $&, $');

		if ($up{$up{$1}} eq $dictionary) {
		    $done .= $pre . "<a href=\"entry\/$1.html\">";
		} else {
		    $done .= $pre . $what;
		}
		$_ = $post;
	    }
	    $_ = $done . $_;
	}

	# Fix references from within entries to sections
	if ($level == 2)
	{
	    # Exclude : in order to avoid nailing URLs to other hosts.
	    my($done) = '';
	    while (/<a href="([^.:][^:"]*).html">/o) {
		my($pre, $what, $post) = ($`, $&, $');

		if ($up{$up{$1}} ne $dictionary) {
		    $done .= $pre . "<a href=\"..\/$1.html\">";
		} else {
		    $done .= $pre . $what;
		}
		$_ = $post;
	    }
	    $_ = $done . $_;
	}

	# Hack external URLS so jumping to them will replace the frameset.
	# This way the viewer won't get frames inside frames.
	while (/<a href="([a-z]*):\/\/([^"]*)">/)
	{
	    $_ = $` . "<a href=\"$1://$2\" target=\"_parent\">" . $';
	}

        print SPLIT $_;
    }
}
print DEFS "</body></html>\n";
close(DEFS);

print LETTERS "</CENTER>\n</BODY>\n</HTML>\n";
close(*LETTERS);

print SPLIT $lasthdr;
&trailer(*SPLIT);
close(*SPLIT);

close(INFILE);

&makeframe("$firstnode[$framenode_depth+1]-frame.html", 
	   "$firstnode[$framenode_depth+2].html", 
	   "letters.html", "html/${dictionary}-framed.html");

#
# Subroutines
#

sub nodefile
{
    my($stem) = @_;

    return "html/${stem}.html";
}

sub titlestrip
# Used to slim down level 0 nodes in the frameset so they'll
# display nicely in the upper left window.
# FIXME: Assumption about the node format
{
    my($title) = @_;

    $title =~ tr/A-Za-z0-9//cd;

    $title;
}

sub detexinfoize
# This is used to resolve Texinfo escapes in letter indexes
# which do not get processed by Texinfo.
{
    my ($line) = @_;

    $line =~ s/\@TeX{}/TeX/;	# FIXME: Content dependencies here
    $line =~ s/\@\@/\@/;

    $line;
}

sub makeframe
# Make the top-level frame of the framed version.
{
    my($firstletter, $firstdef, $noframes, $outfile) = @_;

    open(FRAME, ">" . $outfile);
    print FRAME " <FRAMESET COLS=\"32%,*\">\n";
    print FRAME "  <FRAMESET ROWS=\"20%,*\">\n";
    print FRAME "   <FRAME NAME=letters_frame SRC=\"$noframes\">\n";
    print FRAME "   <FRAME NAME=headwords_frame SRC=\"" . $firstletter . "\">\n";
    print FRAME "  </FRAMESET>\n";
    print FRAME "  <FRAME NAME=entry_frame SRC=\"entry/" . $firstdef . "\">\n";
    print FRAME " <NOFRAMES>\n";
    print FRAME " <BODY>\n";
    print FRAME "  <P>Click <A HREF=\"$noframes\">here</A> for a\n";
    print FRAME "     non-frames version.<P>\n";
    print FRAME " </BODY>\n";
    print FRAME " </NOFRAMES>\n";
    print FRAME " </FRAMESET>\n";
    print FRAME "</HTML>\n";
    close(FRAME);
}

sub canonicalize
{
    my($str) = @_;

    # NT brain-death.  
    #
    # It doesn't like "con" because that's a special file name.
    $str = "sf-con" if ($str eq "con");
    # There are some special characters that make it unhappy.
    $str =~ tr/*|><//d;

    $str =~ s/%20/-/g;			# Map spaces to dashes
    $str =~ s/ /-/g;			# Map spaces to dashes

    $str =~ s/%[0-9a-f][0-9a-f]//g;	# URL escapes generated by makeinfo
    $str =~ s/[\/&?]//g;		# corresponding special characters

    return $str;
}

sub header
{
    my($title) = @_;

# I used to generate this into the split files:
#
# <SCRIPT LANGUAGE="JavaScript">
#     <!-- if (window != window.top) top.location.href = location.href; -->
# </SCRIPT>
#
# Lloyd Wood <l.wood@eim.surrey.ac.uk> wroites:
# 
# That line is intended to ensure that the current page (jargon entry)
# appears by itself in a single frameless page, even when a page is
# intended to be framed.
# 
# My advice: the intent makes no sense in a page intended to be framed -
# and you get different behaviour if javascript is disabled, anyway.
# Remove the line. (I could add stuff about the window/frame structure
# and how definitions of window.top vary... this line would have made
# more sense in the frameless version when you were maintaining
# different framed and frameless entries.)

    print SPLIT <<EOF;
<html lang="en"><head>
<title>$title</title>
<meta http-equiv="Content-Type" content="text/html">
</head><body>

EOF

}

sub trailer
{
    my($fp) = @_;

    print $fp "\n</body></html>\n"
}

sub navbar
{
    my($prev, $up, $next, $popup) = @_;
    my($hdr, $nextgraph, $prevgraph, $upgraph);

    if ($prev && $prev ne "")
    {
	$prevgraph = "<img src=../$popup$graphdir/prev.png alt=Previous>" if (-f "$graphdir/prev.png");
	$prev = "<a href=\"${prev}.html\">${prevgraph}<spacer type=horizontal size=5>$title{$prev}</a>";
    }

    if ($next && $next ne "")
    {
	$nextgraph = "<img src=../$popup$graphdir/next.png alt=Next>" if (-f "$graphdir/next.png");
	$next = "<a href=\"${next}.html\">${nextgraph}<spacer type=horizontal size=5>$title{$next}</a>";
    }

    if ($up && $up ne "")
    {
	$upgraph   = "<img src=../$popup$graphdir/up.png alt=Up>"   if (-f "$graphdir/up.png");
	$up = "<a href=\"${popup}${up}.html\" target=_parent>${upgraph}<spacer type=horizontal size=5>$title{$up}</a>";
    }

    $hdr = <<EOF;
<table width="100%" border=1>
<tr>
 <td width="33%">$prev</td>
 <td width="33%">$up</td>
 <td width="33%">$next</td>
</tr></table>
EOF

   return $hdr;
}
