diff --git a/data/java.m4 b/data/java.m4 index b3dbd3b..798f5c9 100644 --- a/data/java.m4 +++ b/data/java.m4 @@ -117,6 +117,91 @@ m4_define([b4_int_type_for], m4_define([b4_null], [null]) +# b4_parser_tables_choose(SMALL, MEDIUM, LARGE) +# --------------------------------------------- +m4_case(m4_quote(m4_tolower(b4_percent_define_get([parser_tables]))), + [], [m4_define([b4_parser_tables_choose], [$1])], + [small], [m4_define([b4_parser_tables_choose], [$1])], + [medium], [m4_define([b4_parser_tables_choose], [$2])], + [large], [m4_define([b4_parser_tables_choose], [$3])], + [b4_complain([[%%define parser_tables "]b4_percent_define_get([parser_tables])[" not supported.]])]) + + +# b4_typed_parser_table(TYPE, NAME, DATA) +# --------------------------------------- +m4_define([b4_typed_parser_table], +[b4_parser_tables_choose( + [b4_small_parser_table([$1], [$2], [$3])], + [b4_medium_parser_table([$1], [$2], [$3])], + [b4_large_parser_table([$1], [$2], [$3])])]) + +m4_define([b4_small_parser_table], +[[private static final ]$1[ $2[] = + { + ]$3[ + };]]) + +m4_define([b4_medium_parser_table], +[[private static final ]$1[ $2[] = $2init(); + private static final ]$1[[] $2init() + { + return new ]$1[[] + { + ]$3[ + }; + }]]) + +m4_define([b4_large_parser_table], +[[private static final ]$1[ $2[] = yy_decode_]$1s[(]dnl +m4_if([$1], [String], [b4_encode_Strings_count(m4_unquote($3))], + [m4_count(m4_unquote($3))])[, "]dnl +m4_apply(b4_encode_[]$1[]s, $3)[");]m4_define(b4_need_yy_decode_[]$1[]s)]) + +# Encode integral types in 1,2,4 bytes, big-endian, then stored as 2-byte +# characters, again big-endian. + +m4_define([b4_encode_bytes], +[m4_if([$1], [], [], +[m4_if([$2], [], [b4_java_char(m4_eval([($1 & 0xFF) << 8], 16, 4))], +[b4_java_char(m4_eval([(($1 & 0xFF) << 8) | ($2 & 0xFF)], 16, 4))dnl +$0(m4_shift2($@))])])]) + +m4_define([b4_encode_shorts], +[m4_if([$1], [], [], [b4_java_char(m4_eval([$1 & 0xFFFF], 16, 4))dnl +$0(m4_shift($@))])]) + +m4_define([b4_encode_ints], +[m4_if([$1], [], [], [b4_java_char(m4_eval([($1 >> 16) & 0xFFFF], 16, 4))dnl +b4_java_char(m4_eval([$1 & 0xFFFF], 16, 4))[]$0(m4_shift($@))])]) + +m4_define([b4_extract_strings_re], +m4_format([[["\(\([^"\]\|\\.\)*\)",[ %c%c%c%c]*\(null\)?]]], 9, 10, 12, 13)) + +m4_define([b4_encode_Strings], +[m4_bpatsubst([$*], b4_extract_strings_re, [\1\\000])]) + +m4_define([b4_encode_Strings_count], +[m4_len(m4_quote(m4_bpatsubst([$*], b4_extract_strings_re, [1])))]) + +# b4_java_char(xxxx) +# ------------------ +# The char \uxxxx suitable for use inside a Java string literal. +# \u escapes are processed before scanning string literals, +# so some \u escapes cannot be used. +m4_define([b4_java_char], +[m4_case($1, + 000a, [[\n]], + 000d, [[\r]], + 0022, [[\"]], + 005c, [[\\]], + [[\u$1]])]) + +# b4_integral_parser_table(NAME, DATA) +#------------------------------------- +m4_define([b4_integral_parser_table], +[b4_typed_parser_table([b4_int_type_for([$2])], [$1], [$2])]) + + ## ------------------------- ## ## Assigning token numbers. ## ## ------------------------- ## diff --git a/data/lalr1.java b/data/lalr1.java index c855a75..4c7f74d 100644 --- a/data/lalr1.java +++ b/data/lalr1.java @@ -727,104 +727,55 @@ m4_popdef([b4_at_dollar])])dnl /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing STATE-NUM. */ private static final ]b4_int_type_for([b4_pact])[ yypact_ninf_ = ]b4_pact_ninf[; - private static final ]b4_int_type_for([b4_pact])[ yypact_[] = - { - ]b4_pact[ - }; + ]b4_integral_parser_table([yypact_], [b4_pact])[ /* YYDEFACT[S] -- default rule to reduce with in state S when YYTABLE doesn't specify something else to do. Zero means the default is an error. */ - private static final ]b4_int_type_for([b4_defact])[ yydefact_[] = - { - ]b4_defact[ - }; + ]b4_integral_parser_table([yydefact_], [b4_defact])[ /* YYPGOTO[NTERM-NUM]. */ - private static final ]b4_int_type_for([b4_pgoto])[ yypgoto_[] = - { - ]b4_pgoto[ - }; + ]b4_integral_parser_table([yypgoto_], [b4_pgoto])[ /* YYDEFGOTO[NTERM-NUM]. */ - private static final ]b4_int_type_for([b4_defgoto])[ - yydefgoto_[] = - { - ]b4_defgoto[ - }; + ]b4_integral_parser_table([yydefgoto_], [b4_defgoto])[ /* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If positive, shift that token. If negative, reduce the rule which number is the opposite. If zero, do what YYDEFACT says. */ private static final ]b4_int_type_for([b4_table])[ yytable_ninf_ = ]b4_table_ninf[; - private static final ]b4_int_type_for([b4_table])[ - yytable_[] = - { - ]b4_table[ - }; + ]b4_integral_parser_table([yytable_], [b4_table])[ /* YYCHECK. */ - private static final ]b4_int_type_for([b4_check])[ - yycheck_[] = - { - ]b4_check[ - }; + ]b4_integral_parser_table([yycheck_], [b4_check])[ /* STOS_[STATE-NUM] -- The (internal number of the) accessing symbol of state STATE-NUM. */ - private static final ]b4_int_type_for([b4_stos])[ - yystos_[] = - { - ]b4_stos[ - }; + ]b4_integral_parser_table([yystos_], [b4_stos])[ /* TOKEN_NUMBER_[YYLEX-NUM] -- Internal symbol number corresponding to YYLEX-NUM. */ - private static final ]b4_int_type_for([b4_toknum])[ - yytoken_number_[] = - { - ]b4_toknum[ - }; + ]b4_integral_parser_table([yytoken_number_], [b4_toknum])[ /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ - private static final ]b4_int_type_for([b4_r1])[ - yyr1_[] = - { - ]b4_r1[ - }; + ]b4_integral_parser_table([yyr1_], [b4_r1])[ /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ - private static final ]b4_int_type_for([b4_r2])[ - yyr2_[] = - { - ]b4_r2[ - }; + ]b4_integral_parser_table([yyr2_], [b4_r2])[ /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. First, the terminals, then, starting at \a yyntokens_, nonterminals. */ - private static final String yytname_[] = - { - ]b4_tname[ - }; + ]b4_typed_parser_table([String], [yytname_], [b4_tname])[ /* YYRHS -- A `-1'-separated list of the rules' RHS. */ - private static final ]b4_int_type_for([b4_rhs])[ yyrhs_[] = - { - ]b4_rhs[ - }; + ]b4_integral_parser_table([yyrhs_], [b4_rhs])[ /* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in YYRHS. */ - private static final ]b4_int_type_for([b4_prhs])[ yyprhs_[] = - { - ]b4_prhs[ - }; + ]b4_integral_parser_table([yyprhs_], [b4_prhs])[ /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ - private static final ]b4_int_type_for([b4_rline])[ yyrline_[] = - { - ]b4_rline[ - }; + ]b4_integral_parser_table([yyrline_], [b4_rline])[ // Report on the debug stream that the rule yyrule is going to be reduced. private void yy_reduce_print (int yyrule, YYStack yystack) @@ -847,10 +798,7 @@ m4_popdef([b4_at_dollar])])dnl } /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ - private static final ]b4_int_type_for([b4_translate])[ yytranslate_table_[] = - { - ]b4_translate[ - }; + ]b4_integral_parser_table([yytranslate_table_], [b4_translate])[ private static final ]b4_int_type_for([b4_translate])[ yytranslate_ (int t) { @@ -871,7 +819,57 @@ m4_popdef([b4_at_dollar])])dnl private static final int yyuser_token_number_max_ = ]b4_user_token_number_max[; private static final int yyundef_token_ = ]b4_undef_token_number[; -]/* User implementation code. */ +]m4_ifdef([b4_need_yy_decode_bytes], +[[ private static final byte[] yy_decode_bytes(int n, String s) + { + byte[] v = new byte[n]; + int i, j = 0; + for (i = 0, j = 0; j + 1 < n; i++, j += 2) + { + v[j] = (byte)(s.charAt(i) >> 8); + v[j+1] = (byte)(s.charAt(i)); + } + if (j < n) + v[j] = (byte)(s.charAt(i) >> 8); + return v; + } + +]])dnl +m4_ifdef([b4_need_yy_decode_shorts], +[[ private static final short[] yy_decode_shorts(int n, String s) + { + short[] v = new short[n]; + for (int i = 0; i < n; i++) + v[i] = (short)(s.charAt(i)); + return v; + } + +]])dnl +m4_ifdef([b4_need_yy_decode_ints], +[[ private static final int[] yy_decode_ints(int n, String s) + { + int[] v = new int[n]; + for (int i = 0, j = 0; i < n; i++, j += 2) + v[i] = (s.charAt(j) << 16) | s.charAt(j+1); + return v; + } + +]])dnl +m4_ifdef([b4_need_yy_decode_Strings], +[[ private static final String[] yy_decode_Strings(int n, String s) + { + String[] v = new String[n]; + for (int i = 0, a = 0; i < n - 1; i++) + { + int b = s.indexOf('\0', a); + v[i] = s.substring(a, b); + a = b + 1; + } + v[n-1] = null; + return v; + } + +]])/* User implementation code. */ b4_percent_code_get[]dnl } diff --git a/doc/bison.texinfo b/doc/bison.texinfo index 6570c0c..33ce0e4 100644 --- a/doc/bison.texinfo +++ b/doc/bison.texinfo @@ -8789,6 +8789,18 @@ unused code in the generated parser, so use @code{%debug} and @code{%token-table} directive might enable a public interface to access the token names and codes. +Getting a ``code too large'' error from the Java compiler means the code +hit the 64KB bytecode per method limination of the Java class file. +If the error is on static data initialization, try using the address@hidden parser_tables "medium"} and @code{%define parser_tables "large"} +directive, in that order. If @code{"large"} still gives the error on +static data initialization, the problem should be in the code you inserted. +If the ``code too large'' error is on the method containing user actions, +try reducing the amount of code in actions, and if that fails, the parser +skeleton needs to be changed to put actions in multiple functions. Any +other ``code too large'' errors should be in the code you inserted. + + @node Java Semantic Values @subsection Java Semantic Values @c - No %union, specify type in %type/%token. @@ -9272,6 +9284,15 @@ The name of the parser class. Default is @code{YYParser} or @xref{Java Bison Interface}. @end deffn address@hidden {Directive} {%define parser_tables} "@var{strategy}" +The strategy used to implement parser tables to workaround Java's +``code too large'' limitation. The strategy can be @code{small}, address@hidden, or @code{large}. The @code{large} strategy is inefficient +but should not require much static initialization code. +Default is @code{small}. address@hidden Bison Interface}. address@hidden deffn + @deffn {Directive} {%define position_type} "@var{class}" The name of the class used for positions. This class must be supplied by the user. Default is @code{Position}. diff --git a/tests/java.at b/tests/java.at index a3e1a0e..97e0eff 100644 --- a/tests/java.at +++ b/tests/java.at @@ -398,6 +398,24 @@ AT_CHECK_JAVA_CALC([%lex-param { InputStream is } ], [[ } ]]) +AT_CHECK_JAVA_CALC([%define parser_tables "medium"], [[ + public static void main (String args[]) throws IOException + { + CalcLexer l = new CalcLexer (System.in); + Calc p = new Calc (l); + p.parse (); + } +]]) + +AT_CHECK_JAVA_CALC([%define parser_tables "large"], [[ + public static void main (String args[]) throws IOException + { + CalcLexer l = new CalcLexer (System.in); + Calc p = new Calc (l); + p.parse (); + } +]]) + # -----------------#