#!/usr/bin/perl use Data::Dumper; print `cat header.java`; my $nomorerats=0; my $allmorph=0; my $morphfile=0; my $allspace=0; while() { my $norats=0; my $transient=0; my $type="normal"; if( /^; --- NORATS ---$/ ) { $nomorerats=1; } if( /^; --- MORPHOLOGY FILE ---$/ ) { $morphfile=1; } if( /^; --- MORPHOLOGY ---$/ ) { $allmorph=1; $allspace=0; } if( /^; --- SPACE ---$/ ) { $allspace=1; $allmorph=0; } if( /^; --- SELMAHO ---$/ ) { $allspace=0; $allmorph=0; } if( $allmorph ) { $type="morphology"; } if( $allspace ) { $type="whitespace"; } if( /NORATS/ ) { s/NORATS //; $norats=1; } if( /TRANSIENT/ ) { s/TRANSIENT //; $transient=1; } if( /MORPHOLOGY/ ) { s/^MORPHOLOGY //; $type="morphology"; } if( /SPACE/ ) { s/^SPACE //; $type="whitespace"; } if( /PUBLIC/ ) { s/^PUBLIC //; $type="public"; } if( /^;/ ) # If comment { s!^;!//!; print; next; } if( /^\s*$/ ) { print; next; } # Add type and change <- to = if( $morphfile == 1 ) { s/^(\S+) <- /String \1-morph = /; } else { if( $type eq "public" ) { s/^(\S+) <- /public String \1 = /; } else { s/^(\S+) <- /String \1 = /; } } # Fix - and _ s/[-_](.)/\u$1/g; # Add trailing ; s/(\S+.*)$/\1;/g; if( $morphfile == 1 ) { # Seperate into two halves before and after = $start = $_; $start =~ s/Morph = .*/Morph = /; chomp( $start ); $end = $_; $end =~ s/.*Morph = (.*)/\1/; # Add Morph to the second half. $end =~ s/(? [^()]+ ) # Non-parens without backtracking | (??{ $reparen }) # Group with matching parens )* \) }x; #print "realwork_pre: :$_:\n"; # If we have *nothing* but &FOO: if( /^\s*(([&!][a-zA-Z]+\s*)+)\s*$/ ) { return " $1 { yyValue = \"\"; } "; } # Recursion back-out. if( ! /[a-zA-Z]/ ) { return; } # Catch bare () productions if( /^\s*$\s*$\s*$/ || /^\s*$ \{ yyValue = \"\"; \} $\s*$/ ) { return "NULL"; } # Strip outermost parens if this production consists of only a paren statement if( /^$.*$$/ ) { s/$(.*)$/$1/g; $parens=1; } ## print "realwork: $_.\n"; # This section deals with semantic (curly-brace) expressions by # replacing the text of the semantic expression with "PARSERstuff" s/([&!^]?\{[^}]*\})/ my $first=$1; $j++; @stuff[$j] = $first; "SPECIALPARSERstuff$j"; /exg; # This section deals with parenthetical expressions within the current production # by seperating them out and calling realWork on them recursively. It also # replaces the text of the parenthetical expression with "PARSERstuff" # so that things don't get acted on too many times. s/([^(]*[^(-:]?)($reparen)(\S*)/ ## print "rw1: $1--rw2: $2--$3.\n"; my $first=$1; my $second=$2; my $third=$3; $j++; my $result=$first . "PARSERstuff$j" . $third; # If the paren production is empty. if( $second =~ m{^\s*$\s*$\s*$} ) { $j--; $result=$first . NULL . $third; # Else If the paren production is preceded by ! or &, do nothing. } elsif( $first =~ m{[!&]\s*$} ) { @stuff[$j] = $second; # Else If the paren prod is a repeater, special handling due to Rats! bug. } elsif( $third =~ m{^\s*[*+]} ) { @stuff[$j] = realWork($second, 0, $type); # Default case. } else { ## print "Before work: $result.\n"; @stuff[$j] = realWork($second, 0, $type ); ## print "PARSERstuff$j: @stuff[$j]\n"; ## print "After work: $result.\n"; } $result; /exg; ## print "After reparen1: $_.\n"; # Here we test for disjunctions (/) and pass each setion back to realWork. my $slashed=0; if( # Make sure there's something next to the slash, otherwise we're at the wrong level. ( m/^.*[a-zA-Z].*\/[^()]*[a-zA-Z][^()]*$/ ) || ( m/^[^()]*[a-zA-Z][^()]*\/.*[a-zA-Z].*$/ ) || ( m/^.*[a-zA-Z].*\/[^()]*$reparen[^()]*$/x ) || ( m/^[^()]*$reparen[^()]*\/.*[a-zA-Z].*$/x ) ) { $slashed=1; my $count=0; ## print "Doing slash on $_.\n"; # Handle all but the last section. s{([^/]+)/}{ ## print "slash: $1.\n"; realWork($1, 0, $type, \$count) . "/ $2"; }eg; ## print "After slash: $_.\n"; # Handle the last section. s{^(.*)/([^/]+)$}{ ## print "slash2: $2.\n"; "$1/ " . realWork($2, 0, $type, \$count); }eg; ## print "After slash2: $_.\n"; } ### # Naming (i.e. turning A <- B C into A <- b:B c:C) ### # # We don't want to name a sole element; that should just recurse normally. # my (@counter, $count); # $count = 0; # if( m/\b(? 0 ) { ## print "Stuff: " . Dumper(\@stuff) . ".\n"; s{\bPARSERstuff(\d+)}{$stuff[$1];}exg; s{\bSPECIALPARSERstuff(\d+)}{$stuff[$1];}exg; } ## print "After stuff: $_.\n"; # Build the yyValue construction at the end. if( @names ) { $_ .= " { yyValue = "; for my $elem (@names) { if( $elem && m/$elem/ ) { my $elem2; $elem2 = $elem; $elem2 =~ s/SEP\d+//; if( $elem =~ m/PARSERparen/ || $elem =~ m/SQUARE/ ) { $local_type = "parserParen"; } else { $local_type = $type; } #print "mscall: makeString( \" $elem2=(\", $elem, \") \", \"$local_type\" ) + \n"; $_ .= " makeString( \"$elem2\", $elem, \"$local_type\", false ) + "; } } s/\s*\+\s*$//; $_ .= " ; } "; } # Put parens back on. if( $parens ) { $_ = "($_)"; } s/!\./!\. { yyValue = "EOF"; }/; ## print "Setting countref to $i.\n"; $$countref = $i; ## print "Returning $_\n"; return $_; }