[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
26-gary-changeresyntax.patch
From: |
Gary V. Vaughan |
Subject: |
26-gary-changeresyntax.patch |
Date: |
Fri, 07 Jul 2006 12:49:52 +0100 |
User-agent: |
Thunderbird 1.5.0.4 (Macintosh/20060530) |
Well, it was easier than I thought, so I wrote the patch last night, and
the ChangeLog over lunch today :-)
Okay to commit to HEAD?
Cheers,
Gary.
--
Gary V. Vaughan ())_. address@hidden,gnu.org}
Research Scientist ( '/ http://blog.azazil.net
GNU Hacker / )= http://trac.azazil.net/projects/libtool
Technical Author `(_~)_ http://sources.redhat.com/autobook
Index: m4--devo--0/modules/gnu.c
===================================================================
--- m4--devo--0.orig/modules/gnu.c 2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/modules/gnu.c 2006-07-07 12:28:41.000000000 +0100
@@ -41,34 +41,6 @@
# include "m4private.h"
#endif
-#define RE_SYNTAX_BRE RE_SYNTAX_EMACS
-
-#define RE_SYNTAX_ERE \
- (/* Allow char classes. */ \
- RE_CHAR_CLASSES \
- /* Anchors are OK in groups. */ \
- | RE_CONTEXT_INDEP_ANCHORS \
- /* Be picky, `/^?/', for instance, makes no sense. */ \
- | RE_CONTEXT_INVALID_OPS \
- /* Allow intervals with `{' and `}', forbid invalid ranges. */\
- | RE_INTERVALS | RE_NO_BK_BRACES | RE_NO_EMPTY_RANGES \
- /* `(' and `)' are the grouping operators. */ \
- | RE_NO_BK_PARENS \
- /* `|' is the alternation. */ \
- | RE_NO_BK_VBAR)
-
-#include "format.c"
-
-
-/* The regs_allocated field in an re_pattern_buffer refers to the
- state of the re_registers struct used in successive matches with
- the same compiled pattern: */
-typedef struct {
- struct re_pattern_buffer pat; /* compiled regular expression */
- struct re_registers regs; /* match registers */
-} m4_pattern_buffer;
-
-
/* Rename exported symbols for dlpreload()ing. */
#define m4_builtin_table gnu_LTX_m4_builtin_table
#define m4_macro_table gnu_LTX_m4_macro_table
@@ -78,22 +50,20 @@
with their details in a single table for easy maintenance.
function macros blind argmin argmax */
-#define builtin_functions \
+#define builtin_functions \
BUILTIN(__file__, false, false, 1, 1 ) \
BUILTIN(__line__, false, false, 1, 1 ) \
BUILTIN(builtin, false, true, 2, -1 ) \
+ BUILTIN(changeresyntax, false, true, 1, 2 ) \
BUILTIN(changesyntax, false, true, 1, -1 ) \
BUILTIN(debugmode, false, false, 1, 2 ) \
BUILTIN(debugfile, false, false, 1, 2 ) \
- BUILTIN(eregexp, false, true, 3, 4 ) \
- BUILTIN(epatsubst, false, true, 3, 4 ) \
- BUILTIN(erenamesyms, false, true, 3, 3 ) \
BUILTIN(esyscmd, false, true, 2, 2 ) \
BUILTIN(format, false, true, 2, -1 ) \
BUILTIN(indir, false, true, 2, -1 ) \
- BUILTIN(patsubst, false, true, 3, 4 ) \
- BUILTIN(regexp, false, true, 3, 4 ) \
- BUILTIN(renamesyms, false, true, 3, 3 ) \
+ BUILTIN(patsubst, false, true, 3, 5 ) \
+ BUILTIN(regexp, false, true, 3, 5 ) \
+ BUILTIN(renamesyms, false, true, 3, 4 ) \
BUILTIN(symbols, false, false, 0, -1 ) \
BUILTIN(syncoutput, false, true, 2, 2 ) \
@@ -131,21 +101,190 @@
{ 0, 0 },
};
-static bool regsub (m4 *context, m4_obstack *obs, const char *caller,
- const char *victim, const char *regexp,
- m4_pattern_buffer *buf, const char *replace,
- bool ignore_duplicates);
-static void substitute (m4 *context, m4_obstack *obs, const char *victim,
- const char *repl, m4_pattern_buffer *buf);
-
-static void m4_regexp_do (m4 *context, m4_obstack *obs, int argc,
- m4_symbol_value **argv, int syntax);
-static void m4_patsubst_do (m4 *context, m4_obstack *obs, int argc,
- m4_symbol_value **argv, int syntax);
-static void m4_renamesyms_do (m4 *context, m4_obstack *obs, int argc,
- m4_symbol_value **argv, int syntax);
+/* The regs_allocated field in an re_pattern_buffer refers to the
+ state of the re_registers struct used in successive matches with
+ the same compiled pattern: */
+typedef struct {
+ struct re_pattern_buffer pat; /* compiled regular expression */
+ struct re_registers regs; /* match registers */
+} m4_pattern_buffer;
+
+
+/* Compile a REGEXP using the Regex SYNTAX bits return the buffer.
+ Report errors on behalf of CALLER. */
+
+static m4_pattern_buffer *
+m4_regexp_compile (m4 *context, const char *caller,
+ const char *regexp, int resyntax)
+{
+ static m4_pattern_buffer buf; /* compiled regular expression */
+ static bool buf_initialized = false;
+ const char *msg; /* error message from re_compile_pattern */
+
+ if (!buf_initialized)
+ {
+ buf_initialized = true;
+ buf.pat.buffer = NULL;
+ buf.pat.allocated = 0;
+ buf.pat.fastmap = NULL;
+ buf.pat.translate = NULL;
+ }
+
+ re_set_syntax (resyntax);
+ msg = re_compile_pattern (regexp, strlen (regexp), &buf.pat);
+
+ if (msg != NULL)
+ {
+ M4ERROR ((m4_get_warning_status_opt (context), 0,
+ _("%s: bad regular expression `%s': %s"),
+ caller, regexp, msg));
+ return NULL;
+ }
+
+ return &buf;
+}
+
+static int
+m4_regexp_search (m4_pattern_buffer *buf, const char *string,
+ const int size, const int start, const int range)
+{
+ return re_search (&(buf->pat), string, size, start, range, &(buf->regs));
+}
+
+
+/* Function to perform substitution by regular expressions. Used by the
+ builtins regexp, patsubst and renamesyms. The changed text is placed on
+ the obstack. The substitution is REPL, with \& substituted by this part
+ of VICTIM matched by the last whole regular expression, taken from
+ REGS[0], and \N substituted by the text matched by the Nth parenthesized
+ sub-expression, taken from REGS[N]. */
+static int substitute_warned = 0;
+
+static void
+substitute (m4 *context, m4_obstack *obs, const char *victim,
+ const char *repl, m4_pattern_buffer *buf)
+{
+ register unsigned int ch;
+
+ for (;;)
+ {
+ while ((ch = *repl++) != '\\')
+ {
+ if (ch == '\0')
+ return;
+ obstack_1grow (obs, ch);
+ }
+
+ switch ((ch = *repl++))
+ {
+ case '0':
+ if (!substitute_warned)
+ {
+ M4ERROR ((m4_get_warning_status_opt (context), 0, _("\
+WARNING: \\0 will disappear, use \\& instead in replacements")));
+ substitute_warned = 1;
+ }
+ /* Fall through. */
+
+ case '&':
+ obstack_grow (obs, victim + buf->regs.start[0],
+ buf->regs.end[0] - buf->regs.start[0]);
+ break;
+
+ case '1': case '2': case '3': case '4': case '5': case '6':
+ case '7': case '8': case '9':
+ ch -= '0';
+ if (buf->regs.end[ch] > 0)
+ obstack_grow (obs, victim + buf->regs.start[ch],
+ buf->regs.end[ch] - buf->regs.start[ch]);
+ break;
+
+ default:
+ obstack_1grow (obs, ch);
+ break;
+ }
+ }
+}
+
+
+static bool
+m4_regexp_substitute (m4 *context, m4_obstack *obs, const char *caller,
+ const char *victim, const char *regexp,
+ m4_pattern_buffer *buf, const char *replace,
+ bool ignore_duplicates)
+{
+ int matchpos = 0; /* start position of match */
+ int offset = 0; /* current match offset */
+ int length = strlen (victim);
+
+ while (offset < length)
+ {
+ matchpos = m4_regexp_search (buf, victim, length,
+ offset, length - offset);
+
+ if (matchpos < 0)
+ {
+
+ /* Match failed -- either error or there is no match in the
+ rest of the string, in which case the rest of the string is
+ copied verbatim. */
+
+ if (matchpos == -2)
+ M4ERROR ((m4_get_warning_status_opt (context), 0,
+ _("%s: error matching regular expression `%s'"),
+ caller, regexp));
+ else if (!ignore_duplicates && (offset < length))
+ obstack_grow (obs, victim + offset, length - offset);
+ break;
+ }
+
+ /* Copy the part of the string that was skipped by re_search (). */
+
+ if (matchpos > offset)
+ obstack_grow (obs, victim + offset, matchpos - offset);
+
+ /* Handle the part of the string that was covered by the match. */
+
+ substitute (context, obs, victim, replace, buf);
+
+ /* Update the offset to the end of the match. If the regexp
+ matched a null string, advance offset one more, to avoid
+ infinite loops. */
+
+ offset = buf->regs.end[0];
+ if (buf->regs.start[0] == buf->regs.end[0])
+ obstack_1grow (obs, victim[offset++]);
+ }
+
+ if (!ignore_duplicates || (matchpos >= 0))
+ obstack_1grow (obs, '\0');
+
+ return (matchpos >= 0);
+}
+
+
+
+
+/**
+ * __file__
+ **/
+M4BUILTIN_HANDLER (__file__)
+{
+ m4_shipout_string (context, obs, m4_current_file, 0, true);
+}
+
+
+/**
+ * __line__
+ **/
+M4BUILTIN_HANDLER (__line__)
+{
+ m4_shipout_int (obs, m4_current_line);
+}
+
+
/* The builtin "builtin" allows calls to builtin macros, even if their
definition has been overridden or shadowed. It is thus possible to
redefine builtins, and still access their original definition. */
@@ -170,8 +309,7 @@
/* The builtin "indir" allows indirect calls to macros, even if their name
is not a proper macro name. It is thus possible to define macros with
- ill-formed names for internal use in larger macro packages. This macro
- is not available in compatibility mode. */
+ ill-formed names for internal use in larger macro packages. */
/**
* indir(MACRO, [...])
@@ -188,9 +326,45 @@
m4_macro_call (context, symbol, obs, argc - 1, argv + 1);
}
-/* Change the current input syntax. The function set_syntax () lives
- in input.c. For compability reasons, this function is not called,
- if not followed by a` SYNTAX_OPEN. Also, any changes to comment
+
+/* Change the current regexp syntax. Currently this affects the
+ builtins: `patsubst', `regexp' and `renamesyms'. */
+
+static int
+m4_resyntax_encode_safe (m4 *context, const char *caller, const char *spec)
+{
+ int resyntax = -1;
+
+ if (spec)
+ {
+ resyntax = m4_regexp_syntax_encode (spec);
+
+ if (resyntax < 0)
+ {
+ M4ERROR ((m4_get_warning_status_opt (context), 0,
+ _("%s: bad syntax-spec: `%s'"),
+ caller, spec));
+ }
+ }
+
+ return resyntax;
+}
+
+/**
+ * changeresyntax([RESYNTAX-SPEC])
+ **/
+M4BUILTIN_HANDLER (changeresyntax)
+{
+ int resyntax = m4_resyntax_encode_safe (context, M4ARG (0), M4ARG (1));
+
+ if (resyntax >= 0)
+ m4_set_regexp_syntax_opt (context, resyntax);
+}
+
+
+/* Change the current input syntax. The function m4_set_syntax () lives
+ in syntax.c. For compability reasons, this function is not called,
+ if not followed by a SYNTAX_OPEN. Also, any changes to comment
delimiters and quotes made here will be overridden by a call to
`changecom' or `changequote'. */
@@ -212,12 +386,16 @@
&& (key != '\0'))
{
M4ERROR ((m4_get_warning_status_opt (context), 0,
- _("Undefined syntax code %c"), key));
+ _("%s: undefined syntax code: `%c'"),
+ M4ARG (0), key));
}
}
}
+ else
+ assert (!"Unable to import from m4 module");
}
+
/* On-the-fly control of the format of the tracing output. It takes one
argument, which is a character string like given to the -d option, or
none in which case the debug_level is zeroed. */
@@ -248,7 +426,7 @@
if (new_debug_level < 0)
M4ERROR ((m4_get_warning_status_opt (context), 0,
- _("Debugmode: bad debug flags: `%s'"), M4ARG (1)));
+ _("%s: bad debug flags: `%s'"), M4ARG(0), M4ARG (1)));
else
{
switch (change_flag)
@@ -269,6 +447,7 @@
}
}
+
/* Specify the destination of the debugging output. With one argument, the
argument is taken as a file name, with no arguments, revert to stderr. */
@@ -281,49 +460,90 @@
m4_debug_set_output (context, NULL);
else if (!m4_debug_set_output (context, M4ARG (1)))
M4ERROR ((m4_get_warning_status_opt (context), errno,
- _("Cannot set error file: %s"), M4ARG (1)));
+ _("%s: cannot set error file `%s'"), M4ARG (0), M4ARG (1)));
}
-/* Compile a REGEXP using the Regex SYNTAX bits return the buffer.
- Report errors on behalf of CALLER. */
+/**
+ * esyscmd(SHELL-COMMAND)
+ **/
-static m4_pattern_buffer *
-m4_regexp_compile (m4 *context, const char *caller,
- const char *regexp, int syntax)
+M4BUILTIN_HANDLER (esyscmd)
{
- static m4_pattern_buffer buf; /* compiled regular expression */
- static bool buf_initialized = false;
- const char *msg; /* error message from re_compile_pattern */
+ M4_MODULE_IMPORT (m4, m4_set_sysval);
+ M4_MODULE_IMPORT (m4, m4_sysval_flush);
- if (!buf_initialized)
+ if (m4_set_sysval && m4_sysval_flush)
{
- buf_initialized = true;
- buf.pat.buffer = NULL;
- buf.pat.allocated = 0;
- buf.pat.fastmap = NULL;
- buf.pat.translate = NULL;
+ FILE *pin;
+ int ch;
+
+ m4_sysval_flush (context);
+ errno = 0;
+ pin = popen (M4ARG (1), "r");
+ if (pin == NULL)
+ {
+ M4ERROR ((m4_get_warning_status_opt (context), errno,
+ _("%s: cannot open pipe to command `%s'"),
+ M4ARG (0), M4ARG (1)));
+ m4_set_sysval (0xffff);
+ }
+ else
+ {
+ while ((ch = getc (pin)) != EOF)
+ obstack_1grow (obs, (char) ch);
+ m4_set_sysval (pclose (pin));
+ }
}
+ else
+ assert (!"Unable to import from m4 module");
+}
- re_set_syntax (syntax);
- msg = re_compile_pattern (regexp, strlen (regexp), &buf.pat);
- if (msg != NULL)
- {
- M4ERROR ((m4_get_warning_status_opt (context), 0,
- _("%s: bad regular expression `%s': %s"),
- caller, regexp, msg));
- return NULL;
- }
+/* Frontend for printf like formatting. The function format () lives in
+ the file format.c. */
- return &buf;
+#include "format.c"
+
+/**
+ * format(FORMAT-STRING, [...])
+ **/
+M4BUILTIN_HANDLER (format)
+{
+ format (obs, argc - 1, argv + 1);
}
-static int
-m4_regexp_search (m4_pattern_buffer *buf, const char *string,
- const int size, const int start, const int range)
+
+/* Substitute all matches of a regexp occuring in a string. Each match of
+ the second argument (a regexp) in the first argument is changed to the
+ third argument, with \& substituted by the matched text, and \N
+ substituted by the text matched by the Nth parenthesized sub-expression. */
+
+/**
+ * patsubst(VICTIM, REGEXP, [REPLACEMENT], [RESYNTAX])
+ **/
+M4BUILTIN_HANDLER (patsubst)
{
- return re_search (&(buf->pat), string, size, start, range, &(buf->regs));
+ const char *me; /* name of this macro */
+ m4_pattern_buffer *buf; /* compiled regular expression */
+ int resyntax;
+
+ me = M4ARG (0);
+
+ resyntax = m4_get_regexp_syntax_opt (context);
+ if (argc == 5)
+ {
+ resyntax = m4_resyntax_encode_safe (context, me, M4ARG (4));
+ if (resyntax < 0)
+ return;
+ }
+
+ buf = m4_regexp_compile (context, me, M4ARG (2), resyntax);
+ if (!buf)
+ return;
+
+ m4_regexp_substitute (context, obs, me, M4ARG (1), M4ARG (2), buf,
+ M4ARG (3), false);
}
@@ -333,167 +553,117 @@
the expansion to this argument. */
/**
- * regexp(VICTIM, REGEXP, [REPLACEMENT])
- * eregexp(VICTIM, REGEXP, [REPLACEMENT])
+ * regexp(VICTIM, REGEXP, [REPLACEMENT], [RESYNTAX])
**/
-
-static void
-m4_regexp_do (m4 *context, m4_obstack *obs, int argc,
- m4_symbol_value **argv, int syntax)
+M4BUILTIN_HANDLER (regexp)
{
- const char *caller; /* calling macro name */
- const char *victim; /* first argument */
- const char *regexp; /* regular expression */
-
+ const char *me; /* name of this macro */
m4_pattern_buffer *buf; /* compiled regular expression */
int startpos; /* start position of match */
int length; /* length of first argument */
+ int resyntax;
- caller = M4ARG (0);
- victim = M4ARG (1);
- regexp = M4ARG (2);
+ me = M4ARG (0);
- buf = m4_regexp_compile (context, caller, regexp, syntax);
+ resyntax = m4_get_regexp_syntax_opt (context);
+ if (argc == 5)
+ {
+ resyntax = m4_resyntax_encode_safe (context, me, M4ARG (4));
+ if (resyntax < 0)
+ return;
+ }
+
+ buf = m4_regexp_compile (context, me, M4ARG (2), resyntax);
if (!buf)
return;
- length = strlen (victim);
- startpos = m4_regexp_search (buf, victim, length, 0, length);
+ length = strlen (M4ARG (1));
+ startpos = m4_regexp_search (buf, M4ARG (1), length, 0, length);
if (startpos == -2)
{
M4ERROR ((m4_get_warning_status_opt (context), 0,
_("%s: error matching regular expression `%s'"),
- caller, regexp));
+ me, M4ARG (2)));
return;
}
if (argc == 3)
m4_shipout_int (obs, startpos);
else if (startpos >= 0)
- substitute (context, obs, victim, M4ARG (3), buf);
+ substitute (context, obs, M4ARG (1), M4ARG (3), buf);
return;
}
-/**
- * regexp(VICTIM, REGEXP, [REPLACEMENT])
- **/
-M4BUILTIN_HANDLER (regexp)
-{
- m4_regexp_do (context, obs, argc, argv, RE_SYNTAX_BRE);
-}
+/* Rename all current symbols that match REGEXP according to the
+ REPLACEMENT specification. */
/**
- * eregexp(VICTIM, REGEXP, [REPLACEMENT])
+ * renamesyms(REGEXP, REPLACEMENT, [RESYNTAX])
**/
-M4BUILTIN_HANDLER (eregexp)
+M4BUILTIN_HANDLER (renamesyms)
{
- m4_regexp_do (context, obs, argc, argv, RE_SYNTAX_ERE);
-}
+ M4_MODULE_IMPORT (m4, m4_dump_symbols);
+ if (m4_dump_symbols)
+ {
+ const char *me; /* name of this macro */
+ const char *regexp; /* regular expression string */
+ const char *replace; /* replacement expression string */
+ m4_pattern_buffer *buf; /* compiled regular expression */
-/* Substitute all matches of a regexp occuring in a string. Each match of
- the second argument (a regexp) in the first argument is changed to the
- third argument, with \& substituted by the matched text, and \N
- substituted by the text matched by the Nth parenthesized sub-expression. */
+ m4_dump_symbol_data data;
+ m4_obstack data_obs;
+ m4_obstack rename_obs;
-/**
- * patsubst(VICTIM, REGEXP, [REPLACEMENT])
- * epatsubst(VICTIM, REGEXP, [REPLACEMENT])
- **/
-static void
-m4_patsubst_do (m4 *context, m4_obstack *obs, int argc,
- m4_symbol_value **argv, int syntax)
-{
- const char *caller; /* calling macro name */
- const char *victim; /* first argument */
- const char *regexp; /* regular expression */
- m4_pattern_buffer *buf; /* compiled regular expression */
+ int resyntax;
- caller = M4ARG (0);
- victim = M4ARG (1);
- regexp = M4ARG (2);
+ me = M4ARG (0);
+ regexp = M4ARG (1);
+ replace = M4ARG (2);
- buf = m4_regexp_compile (context, caller, regexp, syntax);
- if (!buf)
- return;
+ resyntax = m4_get_regexp_syntax_opt (context);
+ if (argc == 4)
+ {
+ resyntax = m4_resyntax_encode_safe (context, me, M4ARG (3));
+ if (resyntax < 0)
+ return;
+ }
- regsub (context, obs, caller, victim, regexp, buf, M4ARG (3), false);
-}
+ buf = m4_regexp_compile (context, me, regexp, resyntax);
+ if (!buf)
+ return;
-static bool
-regsub (m4 *context, m4_obstack *obs, const char *caller,
- const char *victim, const char *regexp, m4_pattern_buffer *buf,
- const char *replace, bool ignore_duplicates)
-{
- int matchpos = 0; /* start position of match */
- int offset = 0; /* current match offset */
- int length = strlen (victim);
+ obstack_init (&rename_obs);
+ obstack_init (&data_obs);
+ data.obs = &data_obs;
- while (offset < length)
- {
- matchpos = m4_regexp_search (buf, victim, length,
- offset, length - offset);
+ m4_dump_symbols (context, &data, 1, argv, false);
- if (matchpos < 0)
+ for (; data.size > 0; --data.size, data.base++)
{
+ const char * name = data.base[0];
+ int length = strlen (name);
- /* Match failed -- either error or there is no match in the
- rest of the string, in which case the rest of the string is
- copied verbatim. */
+ if (m4_regexp_substitute (context, &rename_obs, me, name, regexp,
+ buf, replace, true))
+ {
+ const char *renamed = obstack_finish (&rename_obs);
- if (matchpos == -2)
- M4ERROR ((m4_get_warning_status_opt (context), 0,
- _("%s: error matching regular expression `%s'"),
- caller, regexp));
- else if (!ignore_duplicates && (offset < length))
- obstack_grow (obs, victim + offset, length - offset);
- break;
+ m4_symbol_rename (M4SYMTAB, name, renamed);
+ }
}
- /* Copy the part of the string that was skipped by re_search (). */
-
- if (matchpos > offset)
- obstack_grow (obs, victim + offset, matchpos - offset);
-
- /* Handle the part of the string that was covered by the match. */
-
- substitute (context, obs, victim, replace, buf);
-
- /* Update the offset to the end of the match. If the regexp
- matched a null string, advance offset one more, to avoid
- infinite loops. */
-
- offset = buf->regs.end[0];
- if (buf->regs.start[0] == buf->regs.end[0])
- obstack_1grow (obs, victim[offset++]);
+ obstack_free (&data_obs, NULL);
+ obstack_free (&rename_obs, NULL);
}
-
- if (!ignore_duplicates || (matchpos >= 0))
- obstack_1grow (obs, '\0');
-
- return (matchpos >= 0);
-}
-
-
-/**
- * patsubst(STRING, REGEXP, [REPLACEMENT])
- **/
-M4BUILTIN_HANDLER (patsubst)
-{
- m4_patsubst_do (context, obs, argc, argv, RE_SYNTAX_BRE);
+ else
+ assert (!"Unable to import from m4 module");
}
-/**
- * epatsubst(STRING, REGEXP, [REPLACEMENT])
- **/
-M4BUILTIN_HANDLER (epatsubst)
-{
- m4_patsubst_do (context, obs, argc, argv, RE_SYNTAX_ERE);
-}
/* Implementation of "symbols". It builds up a table of pointers to
symbols, sorts it and ships out the symbol names. */
@@ -526,7 +696,7 @@
assert (!"Unable to import from m4 module");
}
-
+
/* This contains macro which implements syncoutput() which takes one arg
1, on, yes - turn on sync lines
0, off, no - turn off sync lines
@@ -549,196 +719,3 @@
m4_set_sync_output_opt (context, true);
}
}
-
-
-/**
- * esyscmd(SHELL-COMMAND)
- **/
-
-M4BUILTIN_HANDLER (esyscmd)
-{
- M4_MODULE_IMPORT (m4, m4_set_sysval);
- M4_MODULE_IMPORT (m4, m4_sysval_flush);
-
- if (m4_set_sysval && m4_sysval_flush)
- {
- FILE *pin;
- int ch;
-
- m4_sysval_flush (context);
- errno = 0;
- pin = popen (M4ARG (1), "r");
- if (pin == NULL)
- {
- M4ERROR ((m4_get_warning_status_opt (context), errno,
- _("Cannot open pipe to command `%s'"), M4ARG (1)));
- m4_set_sysval (0xffff);
- }
- else
- {
- while ((ch = getc (pin)) != EOF)
- obstack_1grow (obs, (char) ch);
- m4_set_sysval (pclose (pin));
- }
- }
-}
-
-
-
-/* Rename all current symbols that match REGEXP according to the
- REPLACEMENT specification. */
-
-/**
- * renamesyms(REGEXP, REPLACEMENT)
- * erenamesyms(REGEXP, REPLACEMENT)
- **/
-static void
-m4_renamesyms_do (m4 *context, m4_obstack *obs, int argc,
- m4_symbol_value **argv, int syntax)
-{
- const char *caller; /* calling macro name */
- const char *regexp; /* regular expression string */
- const char *replace; /* replacement expression string */
-
- m4_pattern_buffer *buf; /* compiled regular expression */
-
- m4_dump_symbol_data data;
- m4_obstack data_obs;
- m4_obstack rename_obs;
-
- M4_MODULE_IMPORT (m4, m4_dump_symbols);
-
- assert (m4_dump_symbols);
-
- caller = M4ARG (0);
- regexp = M4ARG (1);
- replace = M4ARG (2);
-
- buf = m4_regexp_compile (context, caller, regexp, syntax);
- if (!buf)
- return;
-
- obstack_init (&rename_obs);
- obstack_init (&data_obs);
- data.obs = &data_obs;
-
- m4_dump_symbols (context, &data, 1, argv, false);
-
- for (; data.size > 0; --data.size, data.base++)
- {
- const char * name = data.base[0];
- int length = strlen (name);
-
- if (regsub (context, &rename_obs, caller, name, regexp, buf,
- replace, true))
- {
- const char *renamed = obstack_finish (&rename_obs);
-
- m4_symbol_rename (M4SYMTAB, name, renamed);
- }
- }
-
- obstack_free (&data_obs, NULL);
- obstack_free (&rename_obs, NULL);
-}
-
-/**
- * renamesyms(REGEXP, REPLACEMENT)
- **/
-M4BUILTIN_HANDLER (renamesyms)
-{
- m4_renamesyms_do (context, obs, argc, argv, RE_SYNTAX_BRE);
-}
-
-/**
- * erenamesyms(REGEXP, REPLACEMENT)
- **/
-M4BUILTIN_HANDLER (erenamesyms)
-{
- m4_renamesyms_do (context, obs, argc, argv, RE_SYNTAX_ERE);
-}
-
-
-
-/* Frontend for printf like formatting. The function format () lives in
- the file format.c. */
-
-/**
- * format(FORMAT-STRING, [...])
- **/
-M4BUILTIN_HANDLER (format)
-{
- format (obs, argc - 1, argv + 1);
-}
-
-
-/**
- * __file__
- **/
-M4BUILTIN_HANDLER (__file__)
-{
- m4_shipout_string (context, obs, m4_current_file, 0, true);
-}
-
-
-/**
- * __line__
- **/
-M4BUILTIN_HANDLER (__line__)
-{
- m4_shipout_int (obs, m4_current_line);
-}
-
-/* Function to perform substitution by regular expressions. Used by the
- builtins regexp, patsubst and renamesyms. The changed text is placed on
- the obstack. The substitution is REPL, with \& substituted by this part
- of VICTIM matched by the last whole regular expression, taken from
- REGS[0], and \N substituted by the text matched by the Nth parenthesized
- sub-expression, taken from REGS[N]. */
-static int substitute_warned = 0;
-
-static void
-substitute (m4 *context, m4_obstack *obs, const char *victim,
- const char *repl, m4_pattern_buffer *buf)
-{
- register unsigned int ch;
-
- for (;;)
- {
- while ((ch = *repl++) != '\\')
- {
- if (ch == '\0')
- return;
- obstack_1grow (obs, ch);
- }
-
- switch ((ch = *repl++))
- {
- case '0':
- if (!substitute_warned)
- {
- M4ERROR ((m4_get_warning_status_opt (context), 0, _("\
-WARNING: \\0 will disappear, use \\& instead in replacements")));
- substitute_warned = 1;
- }
- /* Fall through. */
-
- case '&':
- obstack_grow (obs, victim + buf->regs.start[0],
- buf->regs.end[0] - buf->regs.start[0]);
- break;
-
- case '1': case '2': case '3': case '4': case '5': case '6':
- case '7': case '8': case '9':
- ch -= '0';
- if (buf->regs.end[ch] > 0)
- obstack_grow (obs, victim + buf->regs.start[ch],
- buf->regs.end[ch] - buf->regs.start[ch]);
- break;
-
- default:
- obstack_1grow (obs, ch);
- break;
- }
- }
-}
Index: m4--devo--0/src/main.c
===================================================================
--- m4--devo--0.orig/src/main.c 2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/src/main.c 2006-07-07 12:14:52.000000000 +0100
@@ -103,7 +103,14 @@
-e, --interactive unbuffer output, ignore interrupts\n\
-E, --fatal-warnings stop execution after first warning\n\
-Q, --quiet, --silent suppress some warnings for builtins\n\
- -P, --prefix-builtins force a `m4_' prefix to all builtins\n"),
+ -P, --prefix-builtins force a `m4_' prefix to all builtins\n\
+ -r, --regexp-syntax=[SPEC] change the default regexp syntax\n"),
+ stdout);
+ fputs (_("\
+\n\
+SPEC is any one of:\n\
+ AWK, BASIC, BSD_M4, ED, EMACS, EXTENDED, GNU_AWK, GNU_EGREP, GNU_M4,\n\
+ GREP, POSIX_AWK, POSIX_EGREP, MINIMAL, MINIMAL_BASIC, SED.\n"),
stdout);
printf (_("\
\n\
@@ -183,6 +190,7 @@
{"nesting-limit", required_argument, NULL, 'L'},
{"prefix-builtins", no_argument, NULL, 'P'},
{"quiet", no_argument, NULL, 'Q'},
+ {"regexp-syntax", required_argument, NULL, 'r'},
{"reload-state", required_argument, NULL, 'R'},
{"silent", no_argument, NULL, 'Q'},
{"synclines", no_argument, NULL, 's'},
@@ -202,7 +210,7 @@
{ 0, 0, 0, 0 },
};
-#define OPTSTRING "B:D:EF:GH:I:L:M:N:PQR:S:T:U:bcd::el:m:o:st:"
+#define OPTSTRING "B:D:EF:GH:I:L:M:N:PQR:S:T:U:bcd::el:m:o:r:st:"
int
main (int argc, char *const *argv, char *const *envp)
@@ -271,6 +279,7 @@
case 'U':
case 't':
case 'm':
+ case 'r':
/* Arguments that cannot be handled until later are accumulated. */
new = xmalloc (sizeof *new);
@@ -432,6 +441,7 @@
{
macro_definition *next;
char *macro_value;
+ char *optarg = defines->macro;
switch (defines->code)
{
@@ -439,27 +449,38 @@
{
m4_symbol_value *value = m4_symbol_value_create ();
- macro_value = strchr (defines->macro, '=');
+ macro_value = strchr (optarg, '=');
if (macro_value == NULL)
macro_value = "";
else
*macro_value++ = '\0';
m4_set_symbol_value_text (value, xstrdup (macro_value));
- m4_symbol_pushdef (M4SYMTAB, defines->macro, value);
+ m4_symbol_pushdef (M4SYMTAB, optarg, value);
}
break;
case 'U':
- m4_symbol_delete (M4SYMTAB, defines->macro);
+ m4_symbol_delete (M4SYMTAB, optarg);
break;
case 't':
- m4_set_symbol_name_traced (M4SYMTAB, defines->macro);
+ m4_set_symbol_name_traced (M4SYMTAB, optarg);
break;
case 'm':
- m4_module_load (context, defines->macro, 0);
+ m4_module_load (context, optarg, 0);
+ break;
+
+ case 'r':
+ m4_set_regexp_syntax_opt (context,
+ m4_regexp_syntax_encode (optarg));
+ if (m4_get_regexp_syntax_opt (context) < 0)
+ {
+ M4ERROR ((m4_get_warning_status_opt (context), 0,
+ _("Bad regexp syntax option: `%s'"), optarg));
+ abort ();
+ }
break;
default:
Index: m4--devo--0/m4/m4module.h
===================================================================
--- m4--devo--0.orig/m4/m4module.h 2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/m4/m4module.h 2006-07-07 12:14:52.000000000 +0100
@@ -1,7 +1,7 @@
/* GNU m4 -- A simple macro processor
Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1999, 2000, 2003,
- 2004, 2005 Free Software Foundation, Inc.
+ 2004, 2005, 2006 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -113,11 +113,12 @@
M4FIELD(m4_syntax_table *, syntax_table, syntax) \
M4FIELD(FILE *, debug_file, debug_file) \
M4FIELD(m4_obstack, trace_messages, trace_messages) \
- M4FIELD(int, warning_status_opt, warning_status) \
- M4FIELD(bool, no_gnu_extensions_opt, no_gnu_extensions) \
- M4FIELD(int, nesting_limit_opt, nesting_limit) \
- M4FIELD(int, debug_level_opt, debug_level) \
- M4FIELD(int, max_debug_arg_length_opt, max_debug_arg_length)\
+ M4FIELD(int, warning_status_opt, warning_status) \
+ M4FIELD(bool, no_gnu_extensions_opt, no_gnu_extensions) \
+ M4FIELD(int, nesting_limit_opt, nesting_limit) \
+ M4FIELD(int, debug_level_opt, debug_level) \
+ M4FIELD(int, max_debug_arg_length_opt, max_debug_arg_length)\
+ M4FIELD(int, regexp_syntax_opt, regexp_syntax) \
#define m4_context_opt_bit_table \
@@ -274,6 +275,13 @@
+/* --- REGEXP SYNTAX --- */
+
+extern const char * m4_regexp_syntax_decode (int);
+extern int m4_regexp_syntax_encode (const char *);
+
+
+
/* --- SYNTAX TABLE DEFINITIONS --- */
extern m4_syntax_table *m4_syntax_create (void);
Index: m4--devo--0/m4/m4private.h
===================================================================
--- m4--devo--0.orig/m4/m4private.h 2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/m4/m4private.h 2006-07-07 12:14:52.000000000 +0100
@@ -60,10 +60,11 @@
/* Option flags (set in src/main.c). */
int warning_status; /* -E */
- bool no_gnu_extensions; /* -G */
+ bool no_gnu_extensions; /* -G */
int nesting_limit; /* -L */
int debug_level; /* -d */
int max_debug_arg_length; /* -l */
+ int regexp_syntax; /* -r */
int opt_flags;
/* __PRIVATE__: */
@@ -98,6 +99,8 @@
# define m4_set_debug_level_opt(C, V) ((C)->debug_level = (V))
# define m4_get_max_debug_arg_length_opt(C) ((C)->max_debug_arg_length)
# define m4_set_max_debug_arg_length_opt(C, V)
((C)->max_debug_arg_length=(V))
+# define m4_get_regexp_syntax_opt(C) ((C)->regexp_syntax)
+# define m4_set_regexp_syntax_opt(C, V) ((C)->regexp_syntax = (V))
# define m4_get_prefix_builtins_opt(C)
\
(BIT_TEST((C)->opt_flags, M4_OPT_PREFIX_BUILTINS_BIT))
Index: m4--devo--0/m4/resyntax.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ m4--devo--0/m4/resyntax.c 2006-07-07 12:14:52.000000000 +0100
@@ -0,0 +1,117 @@
+/* GNU m4 -- A simple macro processor
+ Copyright (C) 2006 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301 USA
+*/
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <regex.h>
+#include <string.h>
+
+#include "m4private.h"
+#include "strcase.h"
+
+typedef struct {
+ const char *spec;
+ const int code;
+} m4_resyntax;
+
+/* The syntaxes named in this table are saved into frozen files. Changing
+ the mappings will break programs that load a frozen file made before
+ such a change... */
+
+m4_resyntax m4_resyntax_map[] =
+{
+ { "AWK", RE_SYNTAX_AWK },
+ { "BASIC", RE_SYNTAX_POSIX_BASIC },
+ { "BSD_M4", RE_SYNTAX_POSIX_EXTENDED },
+ { "ED", RE_SYNTAX_ED },
+ { "EGREP", RE_SYNTAX_EGREP },
+ { "EMACS", RE_SYNTAX_EMACS },
+ { "EXTENDED", RE_SYNTAX_POSIX_EXTENDED },
+ { "GAWK", RE_SYNTAX_GNU_AWK },
+ { "GNU_AWK", RE_SYNTAX_GNU_AWK },
+ { "GNU_EGREP", RE_SYNTAX_EGREP },
+ { "GNU_EMACS", RE_SYNTAX_EMACS },
+ { "GNU_M4", RE_SYNTAX_EMACS },
+ { "GREP", RE_SYNTAX_GREP },
+ { "MINIMAL", RE_SYNTAX_POSIX_MINIMAL_BASIC },
+ { "MINIMAL_BASIC", RE_SYNTAX_POSIX_MINIMAL_BASIC },
+ { "POSIX_AWK", RE_SYNTAX_POSIX_AWK },
+ { "POSIX_BASIC", RE_SYNTAX_POSIX_BASIC },
+ { "POSIX_EGREP", RE_SYNTAX_POSIX_EGREP },
+ { "POSIX_EXTENDED", RE_SYNTAX_POSIX_EXTENDED },
+ { "POSIX_MINIMAL", RE_SYNTAX_POSIX_MINIMAL_BASIC },
+ { "POSIX_MINIMAL_BASIC", RE_SYNTAX_POSIX_MINIMAL_BASIC },
+ { "SED", RE_SYNTAX_SED },
+
+ { NULL, -1 }
+};
+
+
+/* Return the internal code representing the syntax SPEC, or -1 if
+ SPEC is invalid. The `m4_syntax_map' table is searched case
+ insensitively, after replacing any spaces or dashes in SPEC with
+ underscore characters. Possible matches for the "GNU_M4" element
+ then, are "gnu m4", "GNU-m4" or "Gnu_M4". */
+int
+m4_regexp_syntax_encode (const char *spec)
+{
+ const m4_resyntax *resyntax;
+ char *canonical;
+ char *p;
+
+ assert (spec);
+
+ canonical = strdup (spec);
+
+ /* Canonicalise SPEC. */
+ for (p = canonical; *p != '\0'; ++p)
+ {
+ if ((*p == ' ') || (*p == '-'))
+ *p = '_';
+ }
+
+ for (resyntax = m4_resyntax_map; resyntax->spec != NULL; ++resyntax)
+ {
+ if (!strcasecmp (resyntax->spec, spec))
+ break;
+ }
+
+ free (canonical);
+
+ return resyntax->code;
+}
+
+
+/* Return the syntax specifier that matches CODE, or NULL if there is
+ no match. */
+const char *
+m4_regexp_syntax_decode (int code)
+{
+ const m4_resyntax *resyntax;
+
+ for (resyntax = m4_resyntax_map; resyntax->spec != NULL; ++resyntax)
+ {
+ if (resyntax->code == code)
+ break;
+ }
+
+ return resyntax->spec;
+}
Index: m4--devo--0/Makefile.am
===================================================================
--- m4--devo--0.orig/Makefile.am 2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/Makefile.am 2006-07-07 12:14:52.000000000 +0100
@@ -228,6 +228,7 @@
m4/output.c \
m4/path.c \
m4/pathconf.h \
+ m4/resyntax.c \
m4/symtab.c \
m4/syntax.c \
m4/utility.c
Index: m4--devo--0/doc/m4.texinfo
===================================================================
--- m4--devo--0.orig/doc/m4.texinfo 2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/doc/m4.texinfo 2006-07-07 12:22:41.000000000 +0100
@@ -140,6 +140,7 @@
* Other tokens:: Other kinds of input tokens
* Comments:: Comments in m4 input
* Input processing:: How m4 copies input to output
+* Regular expression syntax:: How m4 interprets regular expressions
How to invoke macros
@@ -157,7 +158,7 @@
* Undefine:: Deleting a macro
* Defn:: Renaming macros
* Pushdef:: Temporarily redefining macros
-* Erenamesyms and Renamesyms:: Renaming macros with regular expressions
+* Renamesyms:: Renaming macros with regular expressions
* Indir:: Indirect call of macros
* Builtin:: Indirect call of builtins
@@ -182,6 +183,7 @@
* Dnl:: Deleting whitespace in input
* Changequote:: Changing the quote characters
* Changecom:: Changing the comment delimiters
+* Changeresyntax:: Changing the regular expression syntax
* Changesyntax:: Changing the lexical structure of the input
* M4wrap:: Saving input until end of input
@@ -208,10 +210,10 @@
* Len:: Calculating length of strings
* Index:: Searching for substrings
-* Eregexp and Regexp:: Searching for regular expressions
+* Regexp:: Searching for regular expressions
* Substr:: Extracting substrings
* Translit:: Translating characters
-* Epatsubst and Patsubst:: Substituting text by regular expression
+* Patsubst:: Substituting text by regular expression
* Format:: Formatting strings (printf-like)
Macros for doing arithmetic
@@ -392,6 +394,13 @@
@samp{m4_define} instead of @samp{define}, and @samp{m4___file__}
instead of @samp{__file__}.
address@hidden -r @var{RESYNTAX-SPEC}
address@hidden address@hidden
+Set the regular expression syntax according to @var{RESYNTAX-SPEC}.
+When this option is not given, @sc{gnu} M4 uses emacs compatible
+regular expressions. @xref{Changeresyntax}, for more details on the
+format and meaning of @var{RESYNTAX-SPEC}.
+
@item -M @var{DIRECTORY}
@itemx address@hidden
Specify an alternate @var{DIRECTORY} to search for modules. This option
@@ -603,7 +612,7 @@
call of the macro will be shown, giving descriptive names to the
arguments, e.g.,
address@hidden {Builtin (gnu)} regexp (@var{string}, @var{regexp}, @w{opt
@var{replacement})}
address@hidden {Builtin (gnu)} regexp (@var{string}, @var{regexp}, @w{opt
@var{replacement},} @w{opt @var{resyntax})}
Description of @samp{regexp}.
@end deffn
@@ -649,6 +658,7 @@
* Other tokens:: Other kinds of input tokens
* Comments:: Comments in m4 input
* Input processing:: How m4 copies input to output
+* Regular expression syntax:: How m4 interprets regular expressions
@end menu
@node Names
@@ -771,6 +781,11 @@
This process continues until there are no more macro calls to expand and
all the input has been consumed.
address@hidden Regular expression syntax
address@hidden Regular Expression Syntax
+
address@hidden regexprops-generic.texi
+
@node Macros
@chapter How to invoke macros
@@ -1024,7 +1039,7 @@
* Undefine:: Deleting a macro
* Defn:: Renaming macros
* Pushdef:: Temporarily redefining macros
-* Erenamesyms and Renamesyms:: Renaming macros with regular expressions
+* Renamesyms:: Renaming macros with regular expressions
* Indir:: Indirect call of macros
* Builtin:: Indirect call of builtins
@@ -1408,18 +1423,23 @@
It is possible to temporarily redefine a builtin with @code{pushdef}
and @code{defn}.
address@hidden Erenamesyms and Renamesyms
address@hidden Renamesyms
@section Renaming macros with regular expressions
@cindex regular expressions
@cindex macros, how to rename
@cindex renaming macros
@cindex GNU extensions
address@hidden {Builtin (gnu)} erenamesyms (@var{regexp}, @var{replacement})
-Global renaming of macros is done by @code{erenamesyms}, which selects
address@hidden {Builtin (gnu)} renamesyms (@var{regexp}, @var{replacement},
@w{opt @var{resyntax})}
+Global renaming of macros is done by @code{renamesyms}, which selects
all macros with names that match @var{regexp}, and renames each match
according to @var{replacement}.
+If @var{resyntax} is given, the particular flavor of regular
+expression understood with respect to @var{regexp} can be changed from
+the current default. @xref{Changeresyntax}, for details of the values
+that can be given for this argument.
+
A macro that does not have a name that matches @var{regexp} is left
with its original name. If only part of the name matches, any part of
the name that is not covered by @var{regexp} is copied to the
@@ -1435,37 +1455,35 @@
@var{regexp}, and @samp{\&} being the text matched by the entire
regular expression.
-The builtin macro @code{erenamesyms} is recognized only when given
+The builtin macro @code{renamesyms} is recognized only when given
arguments.
@end deffn
Here is an example that performs the same renaming as the
@option{--prefix-builtins} option. Where @option{--prefix-builtins}
-only renames M4 builtin macros, @code{erenamesyms} will rename any
+only renames M4 builtin macros, @code{renamesyms} will rename any
macros that match when it runs, including text macros.
@example
-erenamesyms(`^.*$', `m4_\&')
+renamesyms(`^.*$', `m4_\&')
@result{}
@end example
-Here is a more realistic example that performs a similar renaming on
-macros with lowercase names, except that it ignores macros with names
-that begin with @samp{_}, and avoids creating macros with names that
-begin with @samp{m4_m4}.
+If @var{resyntax} is given, @var{regexp} must be given according to
+the syntax chosen, though the default regular expression syntax
+remains unchanged for other invocations. Here is a more realistic
+example that performs a similar renaming on macros with lowercase
+names, except that it ignores macros with names that begin with
address@hidden, and avoids creating macros with names that begin with
address@hidden
@example
-erenamesyms(`^[^_]\w*$', `m4_\&')
+renamesyms(`^[^_]\w*$', `m4_\&')
@result{}
-m4_erenamesyms(`^m4_m4(\w*)$', `m4_\1')
+m4_renamesyms(`^m4_m4(\w*)$', `m4_\1', `POSIX_EXTENDED')
@result{}
@end example
address@hidden {Builtin (gnu)} renamesyms (@var{regexp}, @var{replacement})
-Same as @code{erenamesyms}, but using Basic Regular Expression syntax,
-see @xref{Eregexp and Regexp}, for more details.
address@hidden deffn
-
@node Indir
@section Indirect call of macros
@@ -1945,6 +1963,7 @@
* Dnl:: Deleting whitespace in input
* Changequote:: Changing the quote characters
* Changecom:: Changing the comment delimiters
+* Changeresyntax:: Changing the regular expression syntax
* Changesyntax:: Changing the lexical structure of the input
* M4wrap:: Saving input until end of input
@end menu
@@ -2088,6 +2107,79 @@
@end example
address@hidden Changeresyntax
address@hidden Changing the regular expression syntax
+
address@hidden regular expression syntax, changing
address@hidden GNU extensions
address@hidden {Builtin (gnu)} changeresyntax (@w{opt @var{resyntax}})
+By default, the @sc{gnu} extensions @code{patsubst}, @code{regexp} and
+more recently @code{renamesyms} continue to use emacs style regular
+expression syntax (@pxref{Regular expression syntax}).
+
+The @code{changeresyntax} macro expands to nothing, but changes the
+default regular expression syntax used by M4 according to the value of
address@hidden, equivalent to passing @var{resyntax} as the argument to
address@hidden when invoking @code{m4}. @xref{Invoking m4},
+for more details.
address@hidden deffn
+
+Any one of the values below, case is not important, and optionally
+with @kbd{-} or @kbd{ } substituted for @kbd{_} in the given names,
+will set the default regular expression syntax as described:
+
address@hidden @dfn
address@hidden AWK
address@hidden regular expression syntax}, for details.
+
address@hidden BASIC
address@hidden POSIX_BASIC
address@hidden regular expression syntax}, for details.
+
address@hidden BSD_M4
address@hidden regular expression syntax}, for details.
+
address@hidden ED
address@hidden regular expression syntax}, for details.
+
address@hidden EMACS
address@hidden GNU_EMACS
address@hidden regular expression syntax}, for details.
+
address@hidden EXTENDED
address@hidden POSIX_EXTENDED
address@hidden regular expression syntax}, for details.
+
address@hidden GNU_AWK
address@hidden GAWK
address@hidden regular expression syntax}, for details.
+
address@hidden GNU_EGREP
address@hidden EGREP
address@hidden regular expression syntax}, for details.
+
address@hidden GNU_M4
address@hidden regular expression syntax}, for details.
+
address@hidden GREP
address@hidden regular expression syntax}, for details.
+
address@hidden MINIMAL
address@hidden POSIX_MINIMAL
address@hidden POSIX_MINIMAL_BASIC
address@hidden regular expression syntax}, for details.
+
address@hidden POSIX_AWK
address@hidden regular expression syntax}, for details.
+
address@hidden POSIX_EGREP
address@hidden regular expression syntax}, for details.
+
address@hidden SED
address@hidden regular expression syntax}, for details.
address@hidden table
+
+
@node Changesyntax
@section Changing the lexical structure of the input
@@ -2952,10 +3044,10 @@
@menu
* Len:: Calculating length of strings
* Index:: Searching for substrings
-* Eregexp and Regexp:: Searching for regular expressions
+* Regexp:: Searching for regular expressions
* Substr:: Extracting substrings
* Translit:: Translating characters
-* Epatsubst and Patsubst:: Substituting text by regular expression
+* Patsubst:: Substituting text by regular expression
* Format:: Formatting strings (printf-like)
@end menu
@@ -2998,12 +3090,12 @@
@result{}-1
@end example
address@hidden Eregexp and Regexp
address@hidden Regexp
@section Searching for regular expressions
@cindex regular expressions
@cindex GNU extensions
address@hidden {Builtin (gnu)} eregexp (@var{string}, @var{regexp}, @w{opt
@var{replacement})}
address@hidden {Builtin (gnu)} regexp (@var{string}, @var{regexp}, @w{opt
@var{replacement},} @w{opt @var{resyntax})}
Searching for regular expressions is done with the builtin
@code{regexp}, which searches for @var{regexp} in @var{string}. The
syntax of regular expressions is similar to that of Perl, @sc{gnu} Awk
@@ -3014,13 +3106,18 @@
is specified and matches, then it expands into @var{replacement}. If
@var{regexp} does not match anywhere in @var{string}, it expands to -1.
-The builtin macro @code{eregexp} is recognized only when given arguments.
+If @var{resyntax} is given, the particular flavor of regular
+expression understood with respect to @var{regexp} can be changed from
+the current default. @xref{Changeresyntax}, for details of the values
+that can be given for this argument.
+
+The builtin macro @code{regexp} is recognized only when given arguments.
@end deffn
@example
-eregexp(`GNUs not Unix', `\<[a-z]\w+')
+regexp(`GNUs not Unix', `\<[a-z]\w+')
@result{}5
-eregexp(`GNUs not Unix', `\<Q\w*')
+regexp(`GNUs not Unix', `\<Q\w*')
@result{}-1
@end example
@@ -3030,27 +3127,21 @@
@samp{\&} being the text the entire regular expression matched.
@example
-eregexp(`GNUs not Unix', `\w(\w+)$', `*** \& *** \1 ***')
+regexp(`GNUs not Unix', `\w\(\w+\)$', `*** \& *** \1 ***')
@result{}*** Unix *** nix ***
@end example
-Originally, regular expressions were much less powerful (basically only
address@hidden was available), but to keep backward compatibility, new
-operators were implemented with previously invalid sequences, such as
address@hidden(}. The following macro is exactly equivalent to @code{eregexp},
-but using the old, clumsy syntax.
-
address@hidden {Builtin (gnu)} regexp (@var{string}, @var{regexp}, @w{opt
@var{replacement})}
-Same as @code{eregexp}, but using the old and clumsy ``Basic Regular
-Expression'' syntax, the same as in @sc{gnu} Emacs. @xref{Regexps, ,
-Syntax of Regular Expressions, emacs, The @sc{gnu} Emacs Manual}.
address@hidden deffn
+If @var{resyntax} is given, @var{regexp} must be given according to
+the syntax chosen, though the default regular expression syntax
+remains unchanged for other invocations:
@example
-regexp(`GNUs not Unix', `\w\(\w+\)$', `*** \& *** \1 ***')
+regexp(`GNUs not Unix', `\w(\w+)$', `*** \& *** \1 ***', `POSIX_EXTENDED')
@result{}*** Unix *** nix ***
@end example
+
+
@node Substr
@section Extracting substrings
@@ -3114,18 +3205,22 @@
while converting them to lowercase. The two first cases are by far the
most common.
address@hidden Epatsubst and Patsubst
address@hidden Patsubst
@section Substituting text by regular expression
@cindex regular expressions
@cindex pattern substitution
@cindex substitution by regular expression
@cindex GNU extensions
address@hidden {Builtin (gnu)} epatsubst (@var{string}, @var{regexp}, @w{opt
@var{replacement})}
-Global substitution in a string is done by @code{epatsubst}, which
address@hidden {Builtin (gnu)} patsubst (@var{string}, @var{regexp}, @w{opt
@var{replacement},} @w{opt @var{resyntax})}
+Global substitution in a string is done by @code{patsubst}, which
searches @var{string} for matches of @var{regexp}, and substitutes
address@hidden for each match. It uses Extended Regular Expressions
-syntax.
address@hidden for each match.
+
+If @var{resyntax} is given, the particular flavor of regular
+expression understood with respect to @var{regexp} can be changed from
+the current default. @xref{Changeresyntax}, for details of the values
+that can be given for this parameter.
The parts of @var{string} that are not covered by any match of
@var{regexp} are copied to the expansion. Whenever a match is found, the
@@ -3142,23 +3237,23 @@
The @var{replacement} argument can be omitted, in which case the text
matched by @var{regexp} is deleted.
-The builtin macro @code{epatsubst} is recognized only when given
+The builtin macro @code{patsubst} is recognized only when given
arguments.
@end deffn
-When used with two arguments, while @code{eregexp} returns the position
-of the match, @code{epatsusbt} deletes it:
+When used with two arguments, while @code{regexp} returns the position
+of the match, @code{patsubst} deletes it:
@example
-epatsubst(`GNUs not Unix', `^', `OBS: ')
+patsubst(`GNUs not Unix', `^', `OBS: ')
@result{}OBS: GNUs not Unix
-epatsubst(`GNUs not Unix', `\<', `OBS: ')
+patsubst(`GNUs not Unix', `\<', `OBS: ')
@result{}OBS: GNUs OBS: not OBS: Unix
-epatsubst(`GNUs not Unix', `\w*', `(\&)')
+patsubst(`GNUs not Unix', `\w*', `(\&)')
@result{}(GNUs)() (not)() (Unix)
-epatsubst(`GNUs not Unix', `\w+', `(\&)')
+patsubst(`GNUs not Unix', `\w+', `(\&)')
@result{}(GNUs) (not) (Unix)
-epatsubst(`GNUs not Unix', `[A-Z][a-z]+')
+patsubst(`GNUs not Unix', `[A-Z][a-z]+')
@result{}GN not @comment
@end example
@@ -3170,63 +3265,43 @@
define(`upcase', `translit(`$*', `a-z', `A-Z')')dnl
define(`downcase', `translit(`$*', `A-Z', `a-z')')dnl
define(`capitalize1',
- `eregexp(`$1', `^(\w)(\w*)', `upcase(`\1')`'downcase(`\2')')')dnl
+ `regexp(`$1', `^\(\w\)\(\w*\)', `upcase(`\1')`'downcase(`\2')')')dnl
define(`capitalize',
- `epatsubst(`$1', `\w+', `capitalize1(`\&')')')dnl
+ `patsubst(`$1', `\w+', `capitalize1(`\&')')')dnl
capitalize(`GNUs not Unix')
@result{}Gnus Not Unix
@end example
-While @code{eregexp} replaces the whole input with the replacement as
-soon as there is a match, @code{epatsubst} replaces each
+If @var{resyntax} is given, @var{regexp} must be given according to
+the syntax chosen, though the default regular expression syntax
+remains unchanged for other invocations:
+
address@hidden
+define(`epatsubst',
+ `builtin(`patsubst', `$1', `$2', `$3', `POSIX_EXTENDED')')dnl
+epatsubst(`bar foo baz Foo', `(\w*) (foo|Foo)', `_\1_')
address@hidden _baz_
+patsubst(`bar foo baz Foo', `\(\w*\) \(foo\|Foo\)', `_\1_')
address@hidden _baz_
address@hidden example
+
+While @code{regexp} replaces the whole input with the replacement as
+soon as there is a match, @code{patsubst} replaces each
@emph{occurrence} of a match and preserves non matching pieces:
@example
define(`patreg',
-`epatsubst($@@)
-eregexp($@@)')dnl
-patreg(`bar foo baz Foo', `foo|Foo', `FOO')
+`patsubst($@@)
+regexp($@@)')dnl
+patreg(`bar foo baz Foo', `foo\|Foo', `FOO')
@result{}bar FOO baz FOO
@result{}FOO
-patreg(`aba abb 121', `(.)(.)\1', `\2\1\2')
+patreg(`aba abb 121', `\(.\)\(.\)\1', `\2\1\2')
@result{}bab abb 212
@result{}bab
@end example
address@hidden {Builtin (gnu)} patsubst (@var{string}, @var{regexp}, @w{opt
@var{replacement})}
-Same as @code{epatsubst}, but using Basic Regular Expression syntax, see
address@hidden and Regexp}, for more details.
address@hidden deffn
-
address@hidden No longer interesting for the documentation per se, but good
address@hidden for testing.
address@hidden
address@hidden
-patsubst(`GNUs not Unix', `^', `OBS: ')
address@hidden: GNUs not Unix
-patsubst(`GNUs not Unix', `\<', `OBS: ')
address@hidden: GNUs OBS: not OBS: Unix
-patsubst(`GNUs not Unix', `\w*', `(\&)')
address@hidden(GNUs)() (not)() (Unix)
-patsubst(`GNUs not Unix', `\w+', `(\&)')
address@hidden(GNUs) (not) (Unix)
-patsubst(`GNUs not Unix', `[A-Z][a-z]+')
address@hidden not @comment
address@hidden example
-
address@hidden
-define(`upcase', `translit(`$*', `a-z', `A-Z')')dnl
-define(`downcase', `translit(`$*', `A-Z', `a-z')')dnl
-define(`capitalize1',
- `regexp(`$1', `^\(\w\)\(\w*\)', `upcase(`\1')`'downcase(`\2')')')dnl
-define(`capitalize',
- `patsubst(`$1', `\w+', `capitalize1(`\&')')')dnl
-capitalize(`GNUs not Unix')
address@hidden Not Unix
address@hidden example
address@hidden ignore
-
@node Format
@section Formatted output
@@ -3814,6 +3889,11 @@
Uses @var{string1} and @var{string2} as the beginning quote and end quote
strings.
address@hidden R @var{length} @key{NL} @var{string} @key{NL}
+Sets the default regexp syntax, where @var{string} encodes one of the
+regular expression syntaxes supported by @sc{gnu} M4.
address@hidden, for more details.
+
@item M @var{length} @key{NL} @var{string} @key{NL}
Names a module which will be searched for according to the module search path
and loaded. Modules loaded from a frozen file don't add their builtin entries
@@ -3913,16 +3993,15 @@
@item
Searches and text substitution through regular expressions are supported
-by the @code{eregexp}, @code{regexp} (@pxref{Eregexp and Regexp}) and
address@hidden, @code{patsubst} (@pxref{Epatsubst and Patsubst})
-builtins.
+by the @code{regexp} (@pxref{Regexp}) and @code{patsubst}
+(@pxref{Patsubst}) builtins.
@item
The syntax of regular expressions in M4 has never clearly formalized.
While Open BSD M4 uses extended regular expressions for @code{regexp}
and @code{patsubst}, @sc{gnu} M4 uses basic regular expression. Use
address@hidden (@pxref{Eregexp and Regexp}) and @code{epatsubst}
-(@pxref{Epatsubst and Patsubst}) for extended regular expressions.
address@hidden (@pxref{Changeresyntax}) to change the regular
+expression syntax used by @sc{gnu} M4.
@item
The output of shell commands can be read into @code{m4} with
@@ -3956,8 +4035,8 @@
In addition to the above extensions, GNU @code{m4} implements the
following command line options: @samp{-F}, @samp{-G}, @samp{-I},
@samp{-L}, @samp{-R}, @samp{-V}, @samp{-W}, @samp{-d},
address@hidden, @samp{-o} and @samp{-t}. @xref{Invoking m4}, for a
-description of these options.
address@hidden, @samp{-o}, @samp{-r} and @samp{-t}. @xref{Invoking m4},
+for a description of these options.
Also, the debugging and tracing facilities in GNU @code{m4} are much
more extensive than in most other versions of @code{m4}.
Index: m4--devo--0/tests/generate.awk
===================================================================
--- m4--devo--0.orig/tests/generate.awk 2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/tests/generate.awk 2006-07-07 12:14:52.000000000 +0100
@@ -31,8 +31,8 @@
print ;
print "AT_BANNER([Documentation examples.])";
print ;
- # stop spurious warnings in the erenamesyms checks
- print "m4_pattern_allow([^m4_(m4|erenamesyms|)$])"
+ # stop spurious warnings in the renamesyms checks
+ print "m4_pattern_allow([^m4_(m4|renamesyms|)$])"
print ;
}
Index: m4--devo--0/src/freeze.c
===================================================================
--- m4--devo--0.orig/src/freeze.c 2006-07-07 12:14:09.000000000 +0100
+++ m4--devo--0/src/freeze.c 2006-07-07 12:14:52.000000000 +0100
@@ -24,20 +24,19 @@
#include "m4.h"
#include "m4private.h"
-static int decode_char (FILE *in);
-static void issue_expect_message (int expected);
-static int produce_char_dump (char *buf, int ch);
-static void produce_syntax_dump (FILE *file, m4_syntax_table *syntax,
- char ch);
-static void produce_module_dump (FILE *file, lt_dlhandle handle);
-static void produce_symbol_dump (m4 *context, FILE *file,
- m4_symbol_table *symtab);
-static void *dump_symbol_CB (m4_symbol_table *symtab,
- const char *symbol_name, m4_symbol *symbol,
- void *userdata);
+static int decode_char (FILE *);
+static void issue_expect_message (int);
+static int produce_char_dump (char *, int);
+static void produce_resyntax_dump (m4 *, FILE *);
+static void produce_syntax_dump (FILE *, m4_syntax_table *, char);
+static void produce_module_dump (FILE *, lt_dlhandle);
+static void produce_symbol_dump (m4 *, FILE *, m4_symbol_table *);
+static void *dump_symbol_CB (m4_symbol_table *, const char *,
+ m4_symbol *, void *);
/* Produce a frozen state to the given file NAME. */
+
static int
produce_char_dump (char *buf, int ch)
{
@@ -77,6 +76,32 @@
return strlen (buf);
}
+
+/* Produce the 'R14\nPOSIX_EXTENDED\n' frozen file dump of the current
+ default regular expression syntax. Note that it would be a little
+ faster to use the encoded syntax in this format as used by re_compile(),
+ but the representation of RE_SYNTAX_POSIX_EXTENDED may change in
+ future (or alternative) implementations of re_compile, so we use an
+ unencoded representation here. */
+
+static void
+produce_resyntax_dump (m4 *context, FILE *file)
+{
+ int code = m4_get_regexp_syntax_opt (context);
+
+ /* Don't dump default syntax code (`0' for GNU_EMACS). */
+ if (code)
+ {
+ const char *resyntax = m4_regexp_syntax_decode (code);
+
+ if (!resyntax)
+ M4ERROR ((EXIT_FAILURE, 0,
+ _("Invalid regexp syntax code `%d'"), code));
+
+ fprintf (file, "R%d\n%s\n", strlen(resyntax), resyntax);
+ }
+}
+
#define MAX_CHAR_LENGTH 4 /* '\377' -> 4 characters */
static void
@@ -238,6 +263,10 @@
fputc ('\n', file);
}
+ /* Dump regular expression syntax. */
+
+ produce_resyntax_dump (context, file);
+
/* Dump syntax table. */
produce_syntax_dump (file, M4SYNTAX, 'I');
@@ -515,6 +544,30 @@
break;
+ case 'R':
+
+ if (version < 2)
+ {
+ /* 'R' operator is not supported in format version 1. */
+ M4ERROR ((EXIT_FAILURE, 0, _("Ill-formed frozen file")));
+ }
+
+ GET_CHARACTER;
+ GET_NUMBER (number[0]);
+ VALIDATE ('\n');
+ GET_STRING (file, string[0], allocated[0], number[0]);
+ VALIDATE ('\n');
+
+ m4_set_regexp_syntax_opt (context,
+ m4_regexp_syntax_encode (string[0]));
+ if (m4_get_regexp_syntax_opt (context) < 0)
+ {
+ M4ERROR ((EXIT_FAILURE, 0,
+ _("Unknown regexp syntax code %s"), string[0]));
+ }
+
+ break;
+
case 'S':
if (version < 2)
Index: m4--devo--0/NEWS
===================================================================
--- m4--devo--0.orig/NEWS 2006-07-07 12:14:48.000000000 +0100
+++ m4--devo--0/NEWS 2006-07-07 11:55:54.000000000 +0100
@@ -14,6 +14,20 @@
* The '$' syntax class is now enabled. See the info docs for examples.
+* New builtin `renamesyms' allows programmatic renaming of all symbols
+ according to a regular expression.
+
+* New `-r' command-line option changes the default regular expression
+ syntax used by M4. Without this option, M4 continues to use
+ RE_SYNTAX_EMACS style expressions. A new section in the info docs
+ explains the differences between them, and what builtins are affected.
+
+* The experimental `epatsubst' and `eregexp' have been removed in favor
+ of a new `changeresyntax' builtin.
+
+* `patsubst' and `regexp' have a new optional 4th argument to use a
+ different regular expression syntax for the duration of that invocation.
+
Version beta 1.4q - August 2001, by Gary V. Vaughan
* Support for the experimental `changeword' has been dropped.
Index: m4--devo--0/ChangeLog
===================================================================
--- m4--devo--0.orig/ChangeLog 2006-07-06 00:30:27.000000000 +0100
+++ m4--devo--0/ChangeLog 2006-07-07 12:45:02.000000000 +0100
@@ -1,3 +1,36 @@
+2006-07-07 Gary V. Vaughan <address@hidden>
+
+ * m4/m4module.h (m4_regexp_syntax_decode, m4_regexp_syntax_encode)
+ (m4_get_regexp_syntax_opt, m4_set_regexp_syntax_opt): Declare
+ new functions for managing regexp syntax options.
+ * m4/m4private.h (m4): Add regexp_syntax field.
+ * m4/resyntax.c: New file implements the above.
+ * Makefile.am (m4_libm4_la_SOURCES): Add m4/resyntax.c.
+ * modules/gnu.c: Put builtin definitions in alphabetical order.
+ (RE_SYNTAX_BRE, RE_SYNTAX_ERE, builtin_eregexp, builtin_epatsubst)
+ (builtin_erenamsyms, m4_regexp_do, m4_patsubst_do)
+ (m4_renamesyms_do): Removed.
+ (builtin_changeresyntax): New builtin to change regular expression
+ syntax.
+ (m4_resyntax_encode_safe): Factor out diagnostics code.
+ * src/freeze.c (produce_resyntax_dump): New function to dump
+ default regexp syntax specifier to frozen file.
+ (reload_frozen_state): Updated to action 'R' directive.
+ * src/main.c (usage): Describe new -r option.
+ (long_options, OPTSTRING): Declare it.
+ (main): Encode and store cli regexp syntax option argument.
+ * tests/generate.awk (m4_pattern_allow): Updated for renamesyms.
+ * doc/m4.texinfo (Erenamesyms and Renamesyms, Eregexp and Regexp)
+ (Epatsubst and Patsubst): Renamed to...
+ (Renamesyms, Regexp, Patsubst): ...these respectively. Updated
+ documentation and added new examples.
+ (Changeresyntax): New section describing changeresyntax builtin,
+ and regexp syntax names.
+ (Regular expression syntax): New section describing differences
+ between various regular expression syntaxes.
+ (Frozen files): Document 'R' directive.
+ * NEWS: Updated.
+
2006-07-05 Eric Blake <address@hidden>
Fix all testsuite failures on cygwin.
signature.asc
Description: OpenPGP digital signature
- 26-gary-changeresyntax.patch,
Gary V. Vaughan <=