[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: branch-1_4 regexp coredump
From: |
Eric Blake |
Subject: |
Re: branch-1_4 regexp coredump |
Date: |
Fri, 18 Aug 2006 16:24:55 +0000 (UTC) |
User-agent: |
Loom/3.14 (http://gmane.org/) |
Eric Blake <ebb9 <at> byu.net> writes:
>
> I still need to work on this. We have a memory leak (and have done, since m4
> 0.75 when regexp was introduced), because re_search allocates memory in
> re_registers on success if we haven't done it ourselves.
>
With this patch, I no longer run out of memory on:
$ m4
divert(-1)include(examples/forloop.m4)
forloop(i,1,1000000,`regexp(abc,\(\(b\)\))')
m4exit
I imagine this will improve the performance of m4 as used by automake, which
does its fair share of regexp and patsubst, now that they don't leak memory,
although I haven't actually tried profiling it. There is also a minor
performance tweak for when changeword is enabled.
2006-08-18 Eric Blake <address@hidden>
Regular expressions were leaking memory.
* src/builtin.c (init_pattern_buffer, free_pattern_buffer): New
helper methods.
(m4_regexp, m4_patsubst): Avoid memory leak.
* src/input.c (init_pattern_buffer) [ENABLE_CHANGEWORD]: Make
static.
(set_word_regexp) [ENABLE_CHANGEWORD]: Avoid memory leak. Change
from O(n^2) to O(n) for calculating word_start.
(next_token, peek_token) [ENABLE_CHANGEWORD]: Treat word_start as
O(1) bitmap, not O(n) search string.
* NEWS: Document this fix.
Index: NEWS
===================================================================
RCS file: /sources/m4/m4/NEWS,v
retrieving revision 1.1.1.1.2.53
diff -u -r1.1.1.1.2.53 NEWS
--- NEWS 18 Aug 2006 04:00:59 -0000 1.1.1.1.2.53
+++ NEWS 18 Aug 2006 15:52:26 -0000
@@ -7,6 +7,7 @@
* Fix buffer overruns in regexp and patsubst macros when handed a trailing
backslash in the replacement text, or when handling \n substitutions
beyond the number of \(\) groups.
+* Fix memory leak in regexp, patsubst, and changeword macros.
* The format macro now understands %F, %g, and %G.
* When loading frozen files, m4 now exits with status 63 if version
mismatch is detected.
Index: src/builtin.c
===================================================================
RCS file: /sources/m4/m4/src/Attic/builtin.c,v
retrieving revision 1.1.1.1.2.34
diff -u -r1.1.1.1.2.34 builtin.c
--- src/builtin.c 18 Aug 2006 03:39:11 -0000 1.1.1.1.2.34
+++ src/builtin.c 18 Aug 2006 15:52:26 -0000
@@ -483,7 +483,6 @@
"INTERNAL ERROR: bad token data type in define_macro ()"));
abort ();
}
- return;
}
static void
@@ -1703,6 +1702,33 @@
}
}
+/*------------------------------------------.
+| Initialize regular expression variables. |
+`------------------------------------------*/
+
+static void
+init_pattern_buffer (struct re_pattern_buffer *buf, struct re_registers *regs)
+{
+ buf->translate = NULL;
+ buf->fastmap = NULL;
+ buf->buffer = NULL;
+ buf->allocated = 0;
+ regs->start = NULL;
+ regs->end = NULL;
+}
+
+/*----------------------------------------.
+| Clean up regular expression variables. |
+`----------------------------------------*/
+
+static void
+free_pattern_buffer (struct re_pattern_buffer *buf, struct re_registers *regs)
+{
+ regfree (buf);
+ free (regs->start);
+ free (regs->end);
+}
+
/*--------------------------------------------------------------------------.
| Regular expression version of index. Given two arguments, expand to the |
| index of the first match of the second argument (a regexp) in the first. |
@@ -1729,31 +1755,26 @@
victim = TOKEN_DATA_TEXT (argv[1]);
regexp = TOKEN_DATA_TEXT (argv[2]);
- buf.buffer = NULL;
- buf.allocated = 0;
- buf.fastmap = NULL;
- buf.translate = NULL;
+ init_pattern_buffer (&buf, ®s);
msg = re_compile_pattern (regexp, strlen (regexp), &buf);
if (msg != NULL)
{
M4ERROR ((warning_status, 0,
"bad regular expression: `%s': %s", regexp, msg));
+ free_pattern_buffer (&buf, ®s);
return;
}
length = strlen (victim);
- startpos = re_search (&buf, victim, length, 0, length, ®s);
- free (buf.buffer);
+ /* Avoid overhead of allocating regs if we won't use it. */
+ startpos = re_search (&buf, victim, length, 0, length,
+ argc == 3 ? NULL : ®s);
if (startpos == -2)
- {
- M4ERROR ((warning_status, 0,
- "error matching regular expression \"%s\"", regexp));
- return;
- }
-
- if (argc == 3)
+ M4ERROR ((warning_status, 0,
+ "error matching regular expression \"%s\"", regexp));
+ else if (argc == 3)
shipout_int (obs, startpos);
else if (startpos >= 0)
{
@@ -1761,7 +1782,7 @@
substitute (obs, victim, repl, ®s);
}
- return;
+ free_pattern_buffer (&buf, ®s);
}
/*--------------------------------------------------------------------------.
@@ -1789,10 +1810,7 @@
regexp = TOKEN_DATA_TEXT (argv[2]);
- buf.buffer = NULL;
- buf.allocated = 0;
- buf.fastmap = NULL;
- buf.translate = NULL;
+ init_pattern_buffer (&buf, ®s);
msg = re_compile_pattern (regexp, strlen (regexp), &buf);
if (msg != NULL)
@@ -1846,8 +1864,7 @@
}
obstack_1grow (obs, '\0');
- free (buf.buffer);
- return;
+ free_pattern_buffer (&buf, ®s);
}
/* Finally, a placeholder builtin. This builtin is not installed by
Index: src/input.c
===================================================================
RCS file: /sources/m4/m4/src/Attic/input.c,v
retrieving revision 1.1.1.1.2.19
diff -u -r1.1.1.1.2.19 input.c
--- src/input.c 8 Aug 2006 23:17:44 -0000 1.1.1.1.2.19
+++ src/input.c 18 Aug 2006 15:52:26 -0000
@@ -682,12 +682,12 @@
#ifdef ENABLE_CHANGEWORD
-void
+static void
init_pattern_buffer (struct re_pattern_buffer *buf)
{
- buf->translate = 0;
- buf->fastmap = 0;
- buf->buffer = 0;
+ buf->translate = NULL;
+ buf->fastmap = NULL;
+ buf->buffer = NULL;
buf->allocated = 0;
}
@@ -719,7 +719,9 @@
/* If compilation worked, retry using the word_regexp struct.
Can't rely on struct assigns working, so redo the compilation. */
+ regfree (&word_regexp);
msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp);
+ re_set_registers (&word_regexp, ®s, regs.num_regs, regs.start, regs.end);
if (msg != NULL)
{
@@ -738,8 +740,7 @@
for (i = 1; i < 256; i++)
{
test[0] = i;
- if (re_search (&word_regexp, test, 1, 0, 0, ®s) >= 0)
- strcat (word_start, test);
+ word_start[i] = re_search (&word_regexp, test, 1, 0, 0, NULL) >= 0;
}
}
@@ -826,7 +827,7 @@
#ifdef ENABLE_CHANGEWORD
- else if (!default_word_regexp && strchr (word_start, ch))
+ else if (!default_word_regexp && word_start[ch])
{
obstack_1grow (&token_stack, ch);
while (1)
@@ -960,7 +961,7 @@
if ((default_word_regexp && (isalpha (ch) || ch == '_'))
#ifdef ENABLE_CHANGEWORD
- || (! default_word_regexp && strchr (word_start, ch))
+ || (! default_word_regexp && word_start[ch])
#endif /* ENABLE_CHANGEWORD */
)
{