tinycobol/utils/y2l

1155 lines
27 KiB
Awk

#!/usr/bin/awk -f
###############################################################################
#
# FILE: y2l
# DESCRIPTION: Yacc-to-LaTeX grammar processor
#
# Copyright (c) 1994-2001 by Kris Van Hees, Belgium. All rights reserved.
# See the "Artistic License" included in this package (file: "Artistic") for
# terms and conditions.
#
# $Id: y2l,v 1.1 2003/08/10 15:18:49 bgiroud Exp $
#
###############################################################################
#
# Reserved variables:
# _debug Debug level
# _exit Exiting due to an error
# _line Current line being processed
# _spclen Length of string holding padding spaces
# _spcstr String to hold padding spaces
#
###############################################################################
# Support functions
###############################################################################
#
# NAME: SPACES()
# DESCRIPTION: return a given number of spaces
#
function SPACES(snum) {
if (!_spclen) { # uninitialized (first use)
_spcstr = " ";
_spclen = 1;
}
while (_spclen < snum) { # extend the string of spaces
_spcstr = _spcstr _spcstr; # double the spaces
_spclen *= 2; # update the string length
}
return substr(_spcstr, 1, snum); # requested number of spaces
}
#
# NAME: DBG()
# DESCRIPTION: write debug output to standard error (if debugging is enabled)
#
function DBG(dlvl, dmsg) {
if (!_debug) # debugging is disabled
return;
print "DBG (" SPACES(5 - length(_line)) _line ")" SPACES(dlvl * 2 + 1) \
dmsg >"/dev/stderr";
}
#
# NAME: error()
# DESCRIPTION: write an error message to standard error
#
function error(emsg) {
print "ERROR: " emsg >"/dev/stderr";
if (_line)
print " Line is (on or about) " _line >"/dev/stderr";
_exit = 1; # mark program as exiting
exit;
}
#
# NAME: exiting()
# DESCRIPTION: return whether the program is exiting
#
function exiting() {
return _exit; # exit status
}
#
# NAME: exists()
# DESCRIPTION: return whether a file exists
#
function exists(fname) {
return !system("ls " fname " >/dev/null 2>&1");
}
###############################################################################
# Yacc-to-LaTeX initialization
###############################################################################
#
# NAME: init()
# DESCRIPTION: initialization
#
function init() {
PERC_PERC = 257;
PERC_LACC = 258;
PERC_RACC = 259;
PERC_LEFT = 260;
PERC_ASSC = 261;
PERC_RGHT = 262;
PERC_STRT = 263;
PERC_TOKN = 264;
PERC_TYPE = 265;
PERC_UNIO = 266;
PERC_PREC = 267;
CHAR = 268;
IDENT = 269;
STRING = 270;
TYPE = 271;
EOF = -1;
BAD = -2;
reserved["%%"] = PERC_PERC;
reserved["%{"] = PERC_LACC;
reserved["%}"] = PERC_RACC;
reserved["%left"] = PERC_LEFT;
reserved["%nonassoc"] = PERC_ASSC;
reserved["%prec"] = PERC_PREC;
reserved["%right"] = PERC_RGHT;
reserved["%start"] = PERC_STRT;
reserved["%token"] = PERC_TOKN;
reserved["%type"] = PERC_TYPE;
reserved["%union"] = PERC_UNIO;
}
###############################################################################
# Yacc-to-LaTeX processor functions
###############################################################################
#
# NAME: token()
# DESCRIPTION: verify whether something is a valid token
#
function token(tokname) {
return tokname ~ /^[_\.A-Za-z][_\.A-Za-z0-9]*$/;
}
#
# NAME: read()
# DESCRIPTION: read another (non-blank) line from the input file
#
function read() {
while (getline < grammar_file == 1) {
_line++;
if ($1 != "")
return;
}
return grammar_eof = 1;
}
#
# NAME: skip_ws()
# DESCRIPTION: skip a continuous block of whitespace
#
function skip_ws() {
in_comment = 0;
while (!grammar_eof) {
sub(/^[ \t]+/, ""); # remove leading whitespace
if ($1 != "") {
if (/^\/\//) { # C++ comment
$0 = "";
} else if (in_comment) { # in a comment
if (match($0, /\*\//)) { # end of a comment
$0 = substr($0, RSTART + RLENGTH);
in_comment = 0;
} else
$0 = "";
} else if (/^\/\*"[^"]+"\*\//) { # special marker
sub(/\/\*"/, "\"");
sub(/"\*\//, "\"");
return;
} else if (/^\/\*/) { # regular comment
$0 = substr($0, 3);
in_comment = 1;
} else
return;
sub(/[ \t]+$/, ""); # remove trailing whitespace
}
if ($1 == "")
read();
}
}
#
# NAME: lex()
# DESCRIPTION: read the next token from the input
#
function lex() {
if (tok_prev) {
tok = tok_prev;
tok_str = tok_pstr;
tok_prev = 0;
return tok;
}
skip_ws();
if (grammar_eof)
return EOF;
if (/^%/)
if (match($0, /^%(%|{|}|left|nonassoc|prec|right|start|token|type|union)/)) {
tok_str = substr($0, 1, RLENGTH);
$0 = substr($0, RLENGTH + 1);
return reserved[tok_str];
} else if (match($0, /^%[_A-Za-z][_A-Za-z0-9]*/)) {
tok_str = substr($0, 1, RLENGTH);
$0 = substr($0, RLENGTH + 1);
return BAD;
}
if (match($0, /^[_\.A-Za-z][_\.A-zA-z0-9]*/)) {
tok_str = substr($0, 1, RLENGTH);
$0 = substr($0, RLENGTH + 1);
return IDENT;
}
if (match($0, /^<[_\.A-Za-z][_\.A-zA-z0-9]*>/)) {
tok_str = substr($0, 1, RLENGTH);
$0 = substr($0, RLENGTH + 1);
return TYPE;
}
if (/^'/)
if (match($0, /^'(.|\\([tnarfbv\\'"]|[0-7][0-7]*))'/)) {
tok_str = "@C" (consts++);
transtab[tok_str] = "\"" substr($0, 2, RLENGTH - 2) "\"";
$0 = substr($0, RLENGTH + 1);
return CHAR;
} else
error("Excuse me, but this character constant is a bit weird.");
if (/^"/)
if (match($0, /([^\\]|[ \t]+)(\\\\)*"/)) {
tok_str = "@S" (strings++);
transtab[tok_str] = substr($0, 1, RSTART + RLENGTH - 1);
$0 = substr($0, RSTART + RLENGTH);
return STRING;
} else
error("Newlines in strings are interesting, but not allowed.");
tok_str = substr($0, 1, 1);
$0 = substr($0, 2);
return tok_str;
}
#
# NAME: unlex()
# DESCRIPTION: return a token to the input stream
#
function unlex(tok) {
tok_prev = tok;
tok_pstr = tok_str;
}
#
# NAME: skip_definition()
# DESCRIPTION: skip the contents of a %{ ... %} block
#
function skip_definition() {
do {
skip = lex();
} while (skip != PERC_RACC && skip != EOF);
}
#
# NAME: decl_token()
# DESCRIPTION: read a token declaration
#
function decl_token() {
first = 1;
do {
tok = lex();
if (tok == ",") {
symbol = 0;
} else if (tok == CHAR) {
DBG(1, transtab[tok_str] ": No need to remember this token.");
} else if (tok == IDENT) {
if (_tkpat && tok_str !~ _tkpat) {
if (transtab[tok_str])
DBG(2, "WARNING: Redefining '" tok_str "'.");
transtab[tok_str] = "\"" tolower(tok_str) "\"";
DBG(1, tok_str ": Defined as " transtab[tok_str] ".");
}
symbol = tok_str;
} else if (tok == NUMBER) {
if (!symbol)
error("How about giving a token first?");
symbol = 0;
} else if (tok == STRING) {
if (!symbol)
error("How about giving a token first?");
str = transtab[tok_str];
transtab[symbol] = "\"" substr(str, 2, length(str) - 2) "\"";
DBG(1, SPACES(length(symbol) + 2) \
"Defined as " transtab[symbol] ".");
symbol = 0;
} else if (tok == TYPE) {
if (!first)
error("This is no place for a type name.");
} else {
unlex(tok);
return;
}
first = 0;
} while (tok != EOF);
}
#
# NAME: decl_start()
# DESCRIPTION: read a start rule declaration
#
function decl_start() {
if (grammar_start)
error("Hm, you want the grammar to start with two rules?");
else if (lex() == IDENT) {
grammar_start = tok_str;
DBG(2, "The grammar start rule is '" grammar_start "'.");
} else
error("How about a nice juicy identifier?");
}
#
# NAME: decl_type()
# DESCRIPTION: read a type declaration
#
function decl_type() {
if (lex() != TYPE)
error("So where is the typename?");
do {
tok = lex();
if (tok == ",") {
symbol = 0;
} else if (tok == CHAR) {
error("Bison etc may accept literals in a %type declaration, " \
"but the Unix 7th\n Ed manual clearly indicates " \
"that it is NOT legal. And I think that the\n Bell " \
"Labs guys know what they are talking about; but anyway, " \
"do you\n really spend the time reading this long " \
"error message?");
} else if (tok == IDENT) {
if (_tkpat && tok_str !~ _tkpat) {
if (transtab[tok_str])
DBG(2, "WARNING: Redefining '" tok_str "'.");
transtab[tok_str] = "\"" tolower(tok_str) "\"";
DBG(1, tok_str ": Defined as " transtab[tok_str] ".");
}
symbol = tok_str;
} else if (tok == NUMBER) {
if (!symbol)
error("How about giving a token first?");
symbol = 0;
} else if (tok == STRING) {
if (!symbol)
error("How about giving a token first?");
str = transtab[tok_str];
transtab[symbol] = "\"" substr(str, 2, length(str) - 2) "\"";
DBG(1, SPACES(length(symbol) + 2) \
"Defined as " transtab[symbol] ".");
symbol = 0;
} else {
unlex(tok);
return;
}
} while (tok != EOF);
}
#
# NAME: decl_union()
# DESCRIPTION: read a union declaration
#
function decl_union() {
if (grammar_union)
error("How about sticking to one single union declaration?");
grammar_union = 1;
DBG(2, "Extended types have been registered with a union declaration.");
do {
tok = lex();
if (tok == "{")
block++;
else if (tok == "}") {
if (!block)
error("Why close an unopened block?");
if (--block <= 0)
return;
} else if (tok == EOF)
error("The file ends before the union is finished. How rude!");
} while (1);
}
#
# NAME: read_declarations()
# DESCRIPTION: read the yacc declarations
#
function read_declarations() {
do {
tok = lex(); # next token
if (tok == PERC_PERC || tok == EOF) # end of the declarations
return;
if (tok == PERC_LACC) { # definition block
skip_definition();
} else if (tok == PERC_LEFT) { # left associative declaration
decl_token();
} else if (tok == PERC_ASSC) { # non-associative declaration
decl_token();
} else if (tok == PERC_RGHT) { # right associative declaration
decl_token();
} else if (tok == PERC_STRT) { # start rule declaration
decl_start();
} else if (tok == PERC_TOKN) { # token declaration(s)
decl_token();
} else if (tok == PERC_TYPE) { # type declaration
decl_type();
} else if (tok == PERC_UNIO) { # union declaration
decl_union();
} else
DBG(2, "WARNING: Ignoring the unknown token '" tok_str "'.");
} while (1);
}
#
# NAME: skip_action()
# DESCRIPTION: skip the contents of an action block
#
function skip_action() {
block = 1;
do {
tok = lex();
if (tok == "{")
block++;
else if (tok == "}") {
if (!block)
error("Why close an unopened block?");
if (--block <= 0)
return;
} else if (tok == EOF)
error("The file ends before the action is finished. How rude!");
} while (tok != EOF);
}
#
# NAME: read_grammar()
# DESCRIPTION: read the yacc grammar
#
function read_grammar() {
tok = lex();
do {
if (tok == PERC_PERC || tok == EOF) # end of the grammar
return;
if (tok == IDENT) { # rule begins here
if (!(rule_idx = rule_ref[tok_str])) {
rule_idx = ++rule_cnt; # new rule
rule_lhs[rule_idx] = tok_str;
rule_ref[tok_str] = rule_idx;
rule_sub = ++rule_len[rule_idx];
if (!grammar_start)
grammar_start = tok_str;
}
if (lex() != ":")
error("The LHS and RHS would like to be separated by a colon.");
} else if (tok == "|") { # alternative RHS for a rule
if (!rule_cnt)
error("The grammar shouldn't start with a |.");
rule_sub = ++rule_len[rule_idx];
} else
error("Why are you giving me '" tok "'?");
rule_rhs[rule_cnt] = ""; # empty RHS
do { # read the RHS
tok = lex();
if (tok == PERC_PREC) { # %prec
tok = lex();
if (tok != IDENT && tok != CHAR)
error("What precedence are you talking about?");
tok = lex(); # continue with the rest
}
if (tok == IDENT) { # might be a new rule
old_str = tok_str;
nxt = lex(); # look ahead
unlex(nxt);
tok_str = old_str;
if (nxt == ":") # yup, a new rule started
break;
rule_rhs[rule_idx, rule_sub] = rule_rhs[rule_idx, rule_sub] \
" " tok_str;
} else if (tok == CHAR) {
if (tok_str ~ /^@/)
tok_str = transtab[tok_str];
rule_rhs[rule_idx, rule_sub] = rule_rhs[rule_idx, rule_sub] \
" " tok_str;
} else if (tok == "{") { # an action block
skip_action();
} else # can't be part of a rule
break;
} while (1);
sub(/^ /, "", rule_rhs[rule_idx, rule_sub]);
if (rule_rhs[rule_idx, rule_sub] ~ \
/(^|[^_\.A-Za-z0-9])error($|[^_\.A-Za-z0-9])/) {
DBG(1, rule_lhs[rule_idx] ": " rule_rhs[rule_idx, rule_sub] \
" [IGNORED]");
rule_rhs[rule_idx, rule_sub] = "";
rule_len[rule_idx]--;
} else
DBG(1, rule_lhs[rule_idx] ": " rule_rhs[rule_idx, rule_sub]);
if (tok == ";")
tok = lex();
} while (1);
}
#
# NAME: is_optional()
# DESCRIPTION: check whether the given non-terminal is optional
#
function is_optional(idx) {
if ((len = rule_len[idx]) <= 1)
return 0;
#
# One or more empty rules for a non-terminal indicate that the other rules
# are in fact optional. There shouldn't be multiple empty rules, but it is
# is technically possible.
#
for (rule_sub = 1; rule_sub <= len; rule_sub++)
if (rule_rhs[idx, rule_sub] == "") {
while (++rule_sub <= len)
rule_rhs[idx, rule_sub - 1] = rule_rhs[idx, rule_sub];
rule_len[idx]--;
}
return rule_len[idx] < len;
}
#
# NAME: get_prefix()
# DESCRIPTION: check whether the given non-terminal has a common prefix
#
function get_prefix(idx) {
if ((len = rule_len[idx]) <= 1)
return 0;
#
# Split up the first rule into tokens. These will be compared with all the
# other rules for this non-terminal.
#
gp_last = split(rule_rhs[idx, 1], arr);
gp_pref = gp_last;
#
# Look for the longest common prefix.
#
for (rule_sub = 2; rule_sub <= len; rule_sub++) {
$0 = rule_rhs[idx, rule_sub];
gp_tokc = NF;
if (gp_tokc < gp_pref)
gp_pref = gp_tokc;
for (j = 1; j <= gp_pref; j++)
if (arr[j] != $j) {
if (!(gp_pref = j - 1))
return 0;
break;
}
}
#
# Construct the prefix string.
#
gp_pstr = arr[1];
for (j = 2; j <= gp_pref; j++)
gp_pstr = gp_pstr " " arr[j];
#
# Remove the common prefix from all rules for this non-terminal.
#
for (rule_sub = 1; rule_sub <= len; rule_sub++) {
$0 = rule_rhs[idx, rule_sub];
for (j = 1; j <= gp_pref; j++)
$j = "";
sub(/^ +/, "");
rule_rhs[idx, rule_sub] = $0;
}
return gp_pstr;
}
#
# NAME: get_suffix()
# DESCRIPTION: check whether the given non-terminal has a common suffix
#
function get_suffix(idx) {
if ((len = rule_len[idx]) <= 1)
return 0;
#
# Split up the first rule into tokens. These will be compared with all the
# other rules for this non-terminal.
#
gs_last = split(rule_rhs[idx, 1], arr);
gs_suff = gs_last;
#
# Look for the longest common suffix.
#
for (rule_sub = 2; rule_sub <= len; rule_sub++) {
$0 = rule_rhs[idx, rule_sub];
gs_tokc = NF;
if (gs_tokc < gs_suff)
gs_suff = gs_tokc;
for (j = 0; j < gs_suff; j++)
if (arr[gs_last - j] != $(gs_tokc - j)) {
if (!(gs_suff = j))
return 0;
break;
}
}
#
# Construct the suffix string.
#
gs_sstr = arr[gs_last];
for (j = 1; j < gs_suff; j++)
gs_sstr = arr[gs_last - j] " " gs_sstr;
#
# Remove the common suffix from all rules for this non-terminal.
#
for (rule_sub = 1; rule_sub <= len; rule_sub++) {
$0 = rule_rhs[idx, rule_sub];
for (j = 0; j < gs_suff; j++)
$(NF - j) = "";
sub(/ +$/, "");
rule_rhs[idx, rule_sub] = $0;
}
return gs_sstr;
}
#
# NAME: optimize1()
# DESCRIPTION: first pass of the optimizer
#
function optimize1() {
DBG(0, "Optimization pass 1...");
for (rule_idx = 1; rule_idx <= rule_cnt; rule_idx++) {
#
# Non-terminals with only a single rule can't be optimized here.
#
if ((len = rule_len[rule_idx]) <= 1)
continue;
#
# First record whether the entire non-terminal might be optional.
#
rule_opt[rule_idx] = is_optional(rule_idx);
#
# The actual optimization takes place in this endless loop. It will in
# fact end when an iteration does not yield an optional ruleset. This
# is a proper and correct stop criteria, since a non-optional ruleset
# cannot have any common prefix or suffix. If it did, those would have
# been added to the already present prefix or suffix.
#
pref = "";
suff = "";
do {
if (pstr = get_prefix(rule_idx))
pref = pref " " pstr;
else if (sstr = get_suffix(rule_idx))
suff = sstr " " suff;
if (is_optional(rule_idx)) {
pref = pref " [";
suff = "] " suff;
} else {
if (pstr || sstr) {
pref = pref " (";
suff = ") " suff;
}
break;
}
} while (1);
#
# Compose the single composite rule for this non-terminal, if a common
# prefix or suffix was found.
#
if (pref != "" || suff != "") {
sub(/^ /, "", pref);
sub(/ $/, "", suff);
DBG(2, "Rules for '" rule_lhs[rule_idx] "' have:");
if (pref != "")
DBG(3, "Prefix '" pref "'");
if (suff != "")
DBG(3, "Suffix '" suff "'");
len = rule_len[rule_idx];
pref = pref " " rule_rhs[rule_idx, 1];
for (rule_sub = 2; rule_sub <= len; rule_sub++)
pref = pref " | " rule_rhs[rule_idx, rule_sub];
rule_rhs[rule_idx, 1] = pref " " suff;
rule_len[rule_idx] = 1;
DBG(3, "Combined rule '" rule_rhs[rule_idx, 1] "'");
if (rule_opt[rule_idx])
DBG(3, "(The non-terminal is optional.)");
}
}
}
#
# NAME: optimize2()
# DESCRIPTION: second pass of the optimizer
#
function optimize2() {
DBG(0, "Optimization pass 2...");
for (rule_idx = 1; rule_idx <= rule_cnt; rule_idx++) {
$0 = rule_rhs[rule_idx, 1];
if ((len = rule_len[rule_idx]) > 1)
for (rule_sub = 2; rule_sub <= len; rule_sub++)
$0 = $0 " | " rule_rhs[rule_idx, rule_sub];
if ($1 != "[" && rule_opt[rule_idx]) {
$0 = "[ " $0 " ]";
rule_opt[rule_idx] = 0;
}
if ($1 == "[")
if ($2 == rule_lhs[rule_idx]) {
for (i = NF; i > 0; i--)
if ($i == "]")
break;
if ((NF - i) < 3) {
$1 = "";
subst = "{";
$i = "}";
while (++i <= NF)
subst = subst " " $i;
$2 = subst;
sub(/^ +/, "");
}
}
if ($NF == "]")
if ($(NF - 1) == rule_lhs[rule_idx]) {
for (i = 1; i <= NF; i++)
if ($i == "[")
break;
if (i < 3) {
$i = "{";
subst = "}";
$NF = "";
while (--i > 0)
subst = $i " " subst;
$(NF - 1) = subst;
sub(/ +$/, "");
}
}
if (rule_opt[rule_idx]) {
$0 = "[ " $0 " ]";
rule_opt[rule_idx] = 0;
}
rule_rhs[rule_idx, 1] = $0;
rule_len[rule_idx] = 1;
}
}
#
# NAME: latexify()
# DESCRIPTION: make substitutions to ensure that the string is LaTeX ok
#
function latexify(txt) {
gsub(/[{&%}]/, "$\\\\&$", txt);
gsub(/[!|[\]=+\-*\/<>]/, "$&$", txt);
gsub(/\$\$/, "\\!", txt);
gsub(/"[^"]+"/, "``{\\bf &}''", txt);
gsub(/_/, "\\_", txt);
gsub(/\^/, "\\verb+^+", txt);
gsub(/"/, "", txt);
return txt;
}
#
# NAME: break_string()
# DESCRIPTION: possibly break up a string in multiple lines
#
function break_string(str) {
match(str, /^([_A-Za-z][_A-Za-z0-9]* +| +)(::=|\|)/);
bs_len = RLENGTH;
bs_str = substr(str, 1, RLENGTH);
bs_pln = RLENGTH;
bs_pre = SPACES(RLENGTH);
$0 = substr(str, RLENGTH + 2);
for (i = 1; i <= NF; i++) {
if (bs_len + 1 + length($i) > 79) {
bs_str = bs_str "\n" bs_pre;
bs_len = bs_pln;
}
bs_str = bs_str " " $i;
bs_len += 1 + length($i);
}
return bs_str;
}
#
# NAME: output()
# DESCRIPTION: write out the rewritten rules
#
function output(LaTeX) {
rule_use[grammar_start] = 1;
for (rule_idx = 1; rule_idx <= rule_cnt; rule_idx++) {
len = rule_len[rule_idx];
if (rule_opt[rule_idx]) {
rule_rhs[rule_idx, 1] = "[ " rule_rhs[rule_idx, 1];
rule_rhs[rule_idx, len] = rule_rhs[rule_idx, len] " ]";
}
str = LaTeX ? latexify(rule_lhs[rule_idx]) \
: rule_lhs[rule_idx];
if (length(str) > rule_max) {
rule_max = length(str);
rule_mls = str;
}
for (rule_sub = 1; rule_sub <= len; rule_sub++) {
$0 = rule_rhs[rule_idx, rule_sub];
for (j = 1; j <= NF; j++) {
#
# A couple of notes here... Non-terminals that merely have a
# single terminal as definition are substituted anywhere they
# are used. And some special casing is needed for situations
# where someone actually defines the non-terminals as tokens.
# Those should definitely not be quotated.
#
if ((idx = rule_ref[$j]) &&
rule_len[$j] == 1 && rule_rhs[idx, 1] ~ /^[^ ]+$/)
$j = rule_rhs[idx, 1];
else if ((str = transtab[$j]) && !rule_ref[$j])
$j = str;
rule_use[$j] = 1;
}
rule_rhs[rule_idx, rule_sub] = $0;
}
}
if (LaTeX)
#
# Some LaTeX magic... We calculate the available size for the RHS of
# the rules. This will be provided as argument to the minipages used
# for each independent rule.
#
# The formula is fairly easy:
# textwidth - width(LHS) - width("::=") - 6 * tabcolsep
#
print "\\newlength\\rulelhs\n" \
"\\newlength\\rulemid\n" \
"\\newlength\\rulerhs\n" \
"\\settowidth\\rulelhs{" rule_mls "}\n" \
"\\settowidth\\rulemid{::=}\n" \
"\\setlength\\rulerhs{\\textwidth}\n" \
"\\addtolength\\rulerhs{-\\rulelhs}\n" \
"\\addtolength\\rulerhs{-\\rulemid}\n" \
"\\addtolength\\rulerhs{-6\\tabcolsep}\n\n" \
"\\begin{longtable}{lrl}";
for (rule_idx = 1; rule_idx <= rule_cnt; rule_idx++) {
if (!rule_use[rule_lhs[rule_idx]])
continue;
len = rule_len[rule_idx];
for (rule_sub = 1; rule_sub <= len; rule_sub++) {
if (LaTeX) {
str = latexify(rule_lhs[rule_idx]);
out = str SPACES(rule_max - length(str)) " & ::= &\n" \
" \\begin{minipage}[t]{\\rulerhs}\n" \
" \\raggedright\n ";
} else {
str = rule_lhs[rule_idx];
out = str SPACES(rule_max - length(str)) " ::= ";
}
rhs = rule_rhs[rule_idx, rule_sub];
lvl = 0;
while (match(rhs, /(^[\(\{\[] | [\(\{\[\|\]\}\)] | [\]\}\)]$)/)) {
o_beg = RSTART;
o_len = RLENGTH;
if (substr(rhs, o_beg) ~ /(^[\(\{\[] | [\(\{\[] )/) {
lvl++;
str = LaTeX ? latexify(substr(rhs, 1, o_beg + o_len - 1)) \
: substr(rhs, 1, o_beg + o_len - 1);
out = out str;
rhs = substr(rhs, o_beg + o_len);
} else if (substr(rhs, o_beg) ~ /( [\]\}\)] | [\]\}\)]$)/) {
lvl--;
str = LaTeX ? latexify(substr(rhs, 1, o_beg + o_len - 1)) \
: substr(rhs, 1, o_beg + o_len - 1);
out = out str;
rhs = substr(rhs, o_beg + o_len);
} else if (substr(rhs, o_beg + 1, 1) == "|") {
if (lvl) {
out = out substr(rhs, 1, o_beg + o_len - 1);
rhs = substr(rhs, o_beg + o_len);
} else {
if (LaTeX) {
str = latexify(substr(rhs, 1, o_beg - 1));
print out str \
SPACES(64 - rule_max - length(str)) "\n" \
" \\end{minipage}" SPACES(60) " \\\\";
out = SPACES(rule_max) " & $|$ &\n" \
" \\begin{minipage}[t]{\\rulerhs}\n" \
" \\raggedright\n ";
} else {
print break_string(out substr(rhs, 1, o_beg - 1));
out = SPACES(rule_max + 1) " | ";
}
rhs = substr(rhs, o_beg + o_len);
}
}
}
if (LaTeX) {
str = latexify(rhs);
print out str SPACES(76 - length(out) - length(str)) "\n" \
" \\end{minipage}" SPACES(60) " \\\\";
} else
print break_string(out rhs);
if (!Latex)
print "\t\t\t.";
}
}
if (LaTeX)
print "\\end{longtable}";
}
#
# NAME: process()
# DESCRIPTION: process a given yacc grammar definition file
#
function process(grammar_file) {
if (!exists(grammar_file))
error("So where exactly is this file?");
DBG(0, "Reading grammar from '" grammar_file "'.");
read_declarations();
read_grammar();
if (_optim >= 1)
optimize1();
if (_optim >= 2)
optimize2();
output(!_plain);
}
#
# NAME: load_tokens()
# DESCRIPTION: load a list of literal tokens from a file
#
function load_tokens(file) {
while (getline < file == 1)
transtab[$1] = "\"" $2 "\"";
close(file);
}
#
# NAME: give_help()
# DISCRIPTION: offer some help to the poor user
#
function give_help() {
print "Usage: y2l -- [options] yacc-file\n" \
"Options:\n" \
" -d\n" \
"\tWrite debugging output to the standard error stream. One can\n"\
"\tsupply a numeric argument to this option to set an indentation\n"\
"\tlevel for the messages.\n" \
" -h\n" \
"\tDisplay this help information.\n" \
" -O, -O[012]\n"u \
"\tOptimize the grammar to get more typical EBNF. If no argument\n"\
"\tis provided, the highest optimization level is selected.\n" \
" -p\n" \
"\tGive basic ASCII output rather than LaTeX output.\n" \
" -t/regexp/, -tfile\n" \
"\tIn the first form, the regular expression is used to decide\n" \
"\twhether a terminal in the grammar is a real token or rather a\n" \
"\tliteral. The second variant specifies a file which contains\n" \
"\tlines listing a token and the literal it represents.\n" \
" -v\n" \
"\tPrint out version information.";
exit;
}
#
# All processing is done from the BEGIN block. The one and only mandatory
# argument to the program is passed on to the grammar processor.
#
BEGIN {
_debug = 0; # no debugging
_optim = 0; # no optimization
_plain = 0; # no plain output
_tkpat = 0; # no token pattern
_bannr = "Yacc-to-LaTeX grammar processor v1.2";
for (i = 1; i < ARGC; i++) # loop over all arguments
if (ARGV[i] ~ /^-d$/) # debugging
_debug = 1;
else if (ARGV[i] ~ /^-h$/) # help the user
give_help();
else if (ARGV[i] ~ /^-O([012]|$)/) # optimization level
if (length(ARGV[i]) > 2)
_optim = substr(ARGV[i], 3, 1);
else
_optim = 1; # default optimization level
else if (ARGV[i] ~ /^-p$/) # non-LaTeX output
_plain = 1;
else if (ARGV[i] ~ /^-t/) # regexp or file for tokens
if (ARGV[i] ~ /^-t(\/\^|\/)[_\.A-Za-z][_\.A-zA-z0-9]*(\/|\$\/)$/) {
l = length(ARGV[i]);
_tkpat = substr(ARGV[i], 4, l - 4);
} else if (exists(file = substr(ARGV[i], 3)))
load_tokens(file);
else
error("So where is the token regexp pattern or file?");
else if (ARGV[i] ~ /^-v$/) # version
print _bannr >"/dev/stderr";
else if (ARGV[i] ~ /^-/) # unknown option
error("Am I supposed to do something with '" ARGV[i] "'?");
else if (grammar_file) # more than one grammar
error("How about just processing one grammar at a time?");
else
grammar_file = ARGV[i]; # name of the grammar file
if (!grammar_file) # no grammar file provided
error("Any particular grammar you have in mind?");
init(); # initialization
process(grammar_file) # process the given grammar
exit;
}