[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
more buffer vs. byte speedups
From: |
Eric Blake |
Subject: |
more buffer vs. byte speedups |
Date: |
Wed, 18 Feb 2009 18:54:18 +0000 (UTC) |
User-agent: |
Loom/3.14 (http://gmane.org/) |
I'm applying these patches to all three active branches. The first fixes a
potential bug in undivert due to a stack allocation of a 16k buffer; on some
systems, a stack overflow that overshoots a 4k guard page can lead to silent
termination of the program without invoking the SIGSEGV fault handler, or other
equally bad behavior.
Next, I made esyscmd dump directly into the obstack via fread, rather than
repeatedly calling fgetc. I also added detection for (unlikely) read errors
during esyscmd.
Finally, I found more places that could use memchr instead of byte-wise
searches, although the relative frequency of regexp (\), patsubst (\), and
format (%) don't give as much noticeable speedup as today's earlier patch to
macro expansion ($).
>From 01f216c3c98d41db7878b18c93d23e8dc38d8643 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Wed, 18 Feb 2009 11:25:01 -0700
Subject: [PATCH 1/3] Avoid risk of stack overflow.
* src/output.c (insert_file): Avoid stack allocation of large
buffer.
(freeze_diversions): Avoid spurious semicolon.
Signed-off-by: Eric Blake <address@hidden>
---
ChangeLog | 7 +++++++
src/output.c | 14 ++++++--------
2 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 0176fe4..cb35601 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2009-02-18 Eric Blake <address@hidden>
+
+ Avoid risk of stack overflow.
+ * src/output.c (insert_file): Avoid stack allocation of large
+ buffer.
+ (freeze_diversions): Avoid spurious semicolon.
+
2009-02-16 Eric Blake <address@hidden>
Avoid test failure due to different errno.
diff --git a/src/output.c b/src/output.c
index 0586b89..a39d1e5 100644
--- a/src/output.c
+++ b/src/output.c
@@ -1,7 +1,7 @@
/* GNU m4 -- A simple macro processor
Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006,
- 2007, 2008 Free Software Foundation, Inc.
+ 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU M4.
@@ -812,21 +812,19 @@ make_diversion (int divnum)
void
insert_file (FILE *file)
{
- char buffer[COPY_BUFFER_SIZE];
+ static char buffer[COPY_BUFFER_SIZE];
size_t length;
/* Optimize out inserting into a sink. */
-
if (!output_diversion)
return;
/* Insert output by big chunks. */
-
- for (;;)
+ while (1)
{
- length = fread (buffer, 1, COPY_BUFFER_SIZE, file);
+ length = fread (buffer, 1, sizeof buffer, file);
if (ferror (file))
- M4ERROR ((EXIT_FAILURE, errno, "ERROR: reading inserted file"));
+ M4ERROR ((EXIT_FAILURE, errno, "error reading inserted file"));
if (length == 0)
break;
output_text (buffer, length);
@@ -982,7 +980,7 @@ freeze_diversions (FILE *file)
iter = gl_oset_iterator (diversion_table);
while (gl_oset_iterator_next (&iter, &elt))
{
- m4_diversion *diversion = (m4_diversion *) elt;;
+ m4_diversion *diversion = (m4_diversion *) elt;
if (diversion->size || diversion->used)
{
if (diversion->size)
--
1.6.1.2
>From 1974da270a84d8c0cf2b3e3e08ce5292fdb211f3 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Wed, 18 Feb 2009 11:38:54 -0700
Subject: [PATCH 2/3] Speed up esyscmd with buffer reads.
* src/builtin.c (m4_esyscmd): Read blocks directly into obstack,
rather than repeatedly reading bytes. Detect read errors.
Signed-off-by: Eric Blake <address@hidden>
---
ChangeLog | 4 ++++
src/builtin.c | 24 ++++++++++++++++++++----
2 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index cb35601..0a6c47e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
2009-02-18 Eric Blake <address@hidden>
+ Speed up esyscmd with buffer reads.
+ * src/builtin.c (m4_esyscmd): Read blocks directly into obstack,
+ rather than repeatedly reading bytes. Detect read errors.
+
Avoid risk of stack overflow.
* src/output.c (insert_file): Avoid stack allocation of large
buffer.
diff --git a/src/builtin.c b/src/builtin.c
index 50f10cd..4a89fcc 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -1,7 +1,7 @@
/* GNU m4 -- A simple macro processor
Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2000, 2004, 2006,
- 2007, 2008 Free Software Foundation, Inc.
+ 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU M4.
@@ -999,7 +999,6 @@ static void
m4_esyscmd (struct obstack *obs, int argc, token_data **argv)
{
FILE *pin;
- int ch;
if (bad_argc (argv[0], argc, 2, 2))
{
@@ -1019,8 +1018,25 @@ m4_esyscmd (struct obstack *obs, int argc, token_data
**argv)
}
else
{
- while ((ch = getc (pin)) != EOF)
- obstack_1grow (obs, (char) ch);
+ while (1)
+ {
+ size_t avail = obstack_room (obs);
+ size_t len;
+ if (!avail)
+ {
+ int ch = getc (pin);
+ if (ch == EOF)
+ break;
+ obstack_1grow (obs, ch);
+ continue;
+ }
+ len = fread (obstack_next_free (obs), 1, avail, pin);
+ if (len <= 0)
+ break;
+ obstack_blank_fast (obs, len);
+ }
+ if (ferror (pin))
+ M4ERROR ((EXIT_FAILURE, errno, "cannot read pipe"));
sysval = pclose (pin);
}
}
--
1.6.1.2
>From 45a154e6d70517bf1b837715aae2fe366c9c1116 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Wed, 18 Feb 2009 11:03:55 -0700
Subject: [PATCH 3/3] Prefer buffer over byte operations.
* src/format.c (expand_format): Use strchr for speed.
* src/builtin.c (substitute, expand_user_macro): Likewise.
Signed-off-by: Eric Blake <address@hidden>
---
ChangeLog | 4 ++++
src/builtin.c | 38 +++++++++++++++++++++-----------------
src/format.c | 16 +++++++++-------
3 files changed, 34 insertions(+), 24 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 0a6c47e..3a80a51 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
2009-02-18 Eric Blake <address@hidden>
+ Prefer buffer over byte operations.
+ * src/format.c (expand_format): Use strchr for speed.
+ * src/builtin.c (substitute, expand_user_macro): Likewise.
+
Speed up esyscmd with buffer reads.
* src/builtin.c (m4_esyscmd): Read blocks directly into obstack,
rather than repeatedly reading bytes. Detect read errors.
diff --git a/src/builtin.c b/src/builtin.c
index 4a89fcc..a89b861 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -1902,17 +1902,18 @@ substitute (struct obstack *obs, const char *victim,
const char *repl,
struct re_registers *regs)
{
int ch;
-
- for (;;)
+ while (1)
{
- while ((ch = *repl++) != '\\')
+ const char *backslash = strchr (repl, '\\');
+ if (!backslash)
{
- if (ch == '\0')
- return;
- obstack_1grow (obs, ch);
+ obstack_grow (obs, repl, strlen (repl));
+ return;
}
-
- switch ((ch = *repl++))
+ obstack_grow (obs, repl, backslash - repl);
+ repl = backslash;
+ ch = *++repl;
+ switch (ch)
{
case '0':
if (!substitute_warned)
@@ -1926,6 +1927,7 @@ Warning: \\0 will disappear, use \\& instead in
replacements"));
case '&':
obstack_grow (obs, victim + regs->start[0],
regs->end[0] - regs->start[0]);
+ repl++;
break;
case '1': case '2': case '3': case '4': case '5': case '6':
@@ -1937,6 +1939,7 @@ Warning: \\0 will disappear, use \\& instead in
replacements"));
else if (regs->end[ch] > 0)
obstack_grow (obs, victim + regs->start[ch],
regs->end[ch] - regs->start[ch]);
+ repl++;
break;
case '\0':
@@ -1946,6 +1949,7 @@ Warning: \\0 will disappear, use \\& instead in
replacements"));
default:
obstack_1grow (obs, ch);
+ repl++;
break;
}
}
@@ -2151,19 +2155,19 @@ void
expand_user_macro (struct obstack *obs, symbol *sym,
int argc, token_data **argv)
{
- const char *text;
+ const char *text = SYMBOL_TEXT (sym);
int i;
-
- for (text = SYMBOL_TEXT (sym); *text != '\0';)
+ while (1)
{
- if (*text != '$')
+ const char *dollar = strchr (text, '$');
+ if (!dollar)
{
- obstack_1grow (obs, *text);
- text++;
- continue;
+ obstack_grow (obs, text, strlen (text));
+ return;
}
- text++;
- switch (*text)
+ obstack_grow (obs, text, dollar - text);
+ text = dollar;
+ switch (*++text)
{
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
diff --git a/src/format.c b/src/format.c
index 518394a..a0519a0 100644
--- a/src/format.c
+++ b/src/format.c
@@ -1,7 +1,7 @@
/* GNU m4 -- A simple macro processor
- Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007, 2008
- Free Software Foundation, Inc.
+ Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007, 2008,
+ 2009 Free Software Foundation, Inc.
This file is part of GNU M4.
@@ -89,14 +89,16 @@ expand_format (struct obstack *obs, int argc, token_data
**argv)
f = fmt = ARG_STR (argc, argv);
memset (ok, 0, sizeof ok);
- for (;;)
+ while (1)
{
- while ((c = *fmt++) != '%')
+ const char *percent = strchr (fmt, '%');
+ if (!percent)
{
- if (c == '\0')
- return;
- obstack_1grow (obs, c);
+ obstack_grow (obs, fmt, strlen (fmt));
+ return;
}
+ obstack_grow (obs, fmt, percent - fmt);
+ fmt = percent + 1;
if (*fmt == '%')
{
--
1.6.1.2
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- more buffer vs. byte speedups,
Eric Blake <=