package xm::pfe::grabwordset;
use strict;
use xm::o;

sub DESC
{"
  will look for those macro-pairs that form an WORDSET-export table
  to be used on pfe module load. Mark them.
"}

sub ARGS { return xm::o::args_stdin(@_,DESC); }

sub do_block_2item
{
    if ($_[0] eq "CO" or
	$_[0] eq "CI" or
	$_[0] eq "CS" or
	$_[0] eq "CX" )
    {
	return
	    "<TYPEWORDREF>"
		.$_[0]."</TYPEWORDREF>".$_[1]
		    ."<CSTRWORDREF>".$_[2]."</CSTRWORDREF>".$_[3] 
			."<LINKWORDREF to=\"".$_[4]."-fcode\" >"
				.$_[4]."</LINKWORDREF to >".$_[5] ;
    }else{
	return
	    "<TYPEWORDREF>".$_[0]."</TYPEWORDREF>".$_[1]
		."<CSTRWORDREF>".$_[2]."</CSTRWORDREF>".$_[3] 
		    ."<LINKWORDREF>".$_[4]."</LINKWORDREF>".$_[5] ;
    }
}

sub do_block_1item
{
    return
	"<TYPEWORDREF>".$_[0]."</TYPEWORDREF>".$_[1]
	    ."<CSTRWORDREF>".$_[2]."</CSTRWORDREF>".$_[3] ;
}

sub do_block1
{
    my $in = shift;
    my $ws = shift;
    my $n = 0;
    
    $in =~ s{ (\b\w+\b) (\s*\(\s*) 
                  <(CSTR\w*\b[^<>]*)> ((?:.(?!</?CSTR\w*\b))*.) 
		      </CSTR\w*\b[^<>]*> 
			  (\s*\)\s*(?:\,|$ ))  } 
    { # replacing $3
	++$n;
	"<ITEMWORDREF id=\"".$ws.$n."\" >"
	    .do_block_1item($1,$2,$4,$5)
		."</ITEMWORDREF>"
		}gsex;
    
    $in =~ s{ (\b\w+\b) (\s*\(\s*) 
                  <(CSTR\w*\b[^<>]*)> ((?:.(?!</?CSTR\w*\b))*.) 
		      </CSTR\w*\b[^<>]*> 
			  (\s*\,\s*)
			      ([^\(\)\{\}\s]+ (?: \( [^\(\)\{\}]+ \) )?
			       | (?: <CSTR\w*\b[^<>]*> 
				  (?:.(?!</?CSTR\w*\b))*.? 
				  </CSTR\w*\b[^<>]*> )
			       | (?: <CCHR\w*\b[^<>]*> 
				  (?:.(?!</?CCHR\w*\b))*.? 
				  </CCHR\w*\b[^<>]*> )
			       )
				  (\s*\)\s*(?:\,|$ ))  } 
    { # replacing $3
	++$n;
	"<ITEMWORDREF id=\"".$ws.$n."\" >"
	    .do_block_2item($1,$2,$4,$5,$6,$7)
		."</ITEMWORDREF>"
		}gsex; 
    
    $in =~ s{(</ITEMWORDREF>)([\ \t]*<CDOC>[^<>]*</CDOC>)}{$2$1}gm;
    
    return "<LISTWORDREF>".$in."</LISTWORDREF>";
} 

sub do_block
{
    my $in = shift;
    my $ws = shift;
    
    my $head = "";
    my $tail = "";
    $in =~ s{(^ \s* \{ )} {$head = $1; ""}sex;
    $in =~ s{( \} \s* $)} {$tail = $1; ""}sex;
    
    return $head.do_block1($in,$ws.".ext.").$tail;
} 

sub DO
{
    my $in = shift;

    # non-CDEFS style    
    $in =~ s{ (\b(?:P4_)?LISTWORDS) (\s*\(\s*) (\w+) (\s*\)\s*=\s*)
        	 <(CBLK\d*)> ((?:.(?!</?CBLK\w*>))*.) </\5> (\s*\;\s*)
                 (\b(?:P4_)?COUNTWORDS) (\s*\(\s*) 
                 (\w+) (\s*\,\s*)
                 <(CSTR\w*)> ((?:.(?!</?CSTR\w*>))*.) </CSTR\w*> 
		     (\s*\)\s*\;) } 
            { print STDERR "<wordset $3>";
              "<ITEMWORDSET>"
              .$1.$2
              ."<NAMEWORDSET>".$3."</NAMEWORDSET>".$4
              ."<CBLKWORDSET>".do_block($6,$3)."</CBLKWORDSET>".$7 
		  # \5 replaced
              .$8.$9
              ."<NAMEWORDCNT>".$10."</NAMEWORDCNT>".$11    
		  # NAMEWORDSET == NAMEWORDCNT ?? -> !!
              ."<CSTRWORDCNT>".$13."</CSTRWORDCNT>".$14    
		  # \12 replaced
              ."</ITEMWORDSET>" }gsex;

#    # old-CDEFS style    
#    $in =~ s{ (<CDCL>\b(?:P4_)?LISTWORDS) (\s*\(\s*) (\w+) ((?:.(?!</?CDCL>))*.? </CDCL> \s*)
#        	 <(CBLK\d*)> ((?:.(?!</?CBLK\w*>))*.) </\5> </ITEMCDEF> (\s*\;\s*)
#                 (\b(?:P4_)?COUNTWORDS) (\s*\(\s*) 
#                 (\w+) (\s*\,\s*)
#                 <(CSTR\w*)> ((?:.(?!</?CSTR\w*>))*.) </CSTR\w*> (\s*\)\s*\;) } 
#            { print STDERR "<wordset $3>";
#              "<ITEMWORDSET>"
#              .$1.$2
#              ."<NAMEWORDSET>".$3."</NAMEWORDSET>".$4
#              ."<CBLKWORDSET>".do_block($6)."</CBLKWORDSET>".$7 # \5 replaced
#              .$8.$9
#              ."<NAMEWORDCNT>".$10."</NAMEWORDCNT>".$11    # NAMEWORDSET == NAMEWORDCNT ?? -> !!
#              ."<CSTRWORDCNT>".$13."</CSTRWORDCNT>".$14    # \12 replaced
#              ."</ITEMWORDSET></ITEMCDEF>" }gsex;

    # new-CDEFS style    
    $in =~ s{ (<CDCL>\b(?:P4_)?LISTWORDS) 
		  (\s*\(\s*) (\w+) ((?:.(?!</?CDCL>))*.? </CDCL> \s*)
        	 <(CBLK\d*)> ((?:.(?!</?CBLK\w*>))*.) </\5> 
		     (\s*\;?\s*) </ITEMCDEF> (\s*\;?\s*)
                 (\b(?:P4_)?COUNTWORDS) (\s*\(\s*) 
                 (\w+) (\s*\,\s*)
                 <(CSTR\w*)> ((?:.(?!</?CSTR\w*>))*.) </CSTR\w*> 
		     (\s*\)\s*\;) } 
            { print STDERR "<wordset $3>";
              "<ITEMWORDSET>"
              .$1.$2
              ."<NAMEWORDSET>".$3."</NAMEWORDSET>".$4
              ."<CBLKWORDSET>".do_block($6,$3)."</CBLKWORDSET>".$7 
		  # \5 replaced
              .$8.$9.$10
              ."<NAMEWORDCNT>".$11."</NAMEWORDCNT>".$12    
		  # NAMEWORDSET == NAMEWORDCNT ?? -> !!
              ."<CSTRWORDCNT>".$14."</CSTRWORDCNT>".$15    
		  # \13 replaced
              ."</ITEMWORDSET></ITEMCDEF>" }gsex;
    
    return $in;
} 

1;

