[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: minor regex speedup
From: |
Eric Blake |
Subject: |
Re: minor regex speedup |
Date: |
Mon, 18 Feb 2008 06:28:58 -0700 |
User-agent: |
Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.9) Gecko/20071031 Thunderbird/2.0.0.9 Mnenhy/0.7.5.666 |
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
According to Ralf Wildenhues on 2/18/2008 5:23 AM:
|
| I haven't otherwise looked at this patch at all, but it contains several
| instances of the number 256. As it is unqual to 0, 1, or 2, please make
| that one or more #defines or consts, and use them throughout.
Thanks for the idea. In most cases, 256 was being used in the context of
(UCHAR_MAX + 1); M4 currently makes heavy assumption of the POSIX
requirement that CHAR_BIT==8 (and I doubt that it would be very easy to
port M4 to a non-POSIX system with 9-bit or 32-bit char, even though I am
aware that such systems exist). I'm installing this (more than just the
patch you mentioned was affected), for both branch and head.
- --
Don't work too hard, make some time for fun as well!
Eric Blake address@hidden
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.5 (Cygwin)
Comment: Public key at home.comcast.net/~ericblake/eblake.gpg
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org
iD8DBQFHuYga84KuGfSFAYARAhlxAKCDI/QYL/LwyGjP+RxKXzyBeM3cCwCgo8r1
x5OVOe6LgQft60B2smOeBmo=
=K22h
-----END PGP SIGNATURE-----
>From 6e45bac29917da4289b841d1f339851e1def72d9 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Mon, 18 Feb 2008 06:09:45 -0700
Subject: [PATCH] Avoid some magic numbers.
* src/m4.h (DEBUG_TRACE_ARGS, DEBUG_TRACE_EXPANSION)
(DEBUG_TRACE_QUOTE, DEBUT_TRACE_ALL, DEBUG_TRACE_LINE)
(DEBUG_TRACE_FILE, DEBUG_TRACE_PATH, DEBUG_TRACE_CALL)
(DEBUG_TRACE_INPUT, DEBUG_TRACE_CALLID, DEBUG_TRACE_VERBOSE)
(DEBUG_TRACE_DEFAULT): Use hex constants, to make it obvious these
are bit fields.
* src/input.c (CHAR_EOF, CHAR_MACRO, CHAR_QUOTE, CHAR_ARGV):
Define in terms of UCHAR_MAX.
(set_word_regexp): Likewise.
* src/builtin.c (compile_pattern, m4_translit): Likewise.
Reported by Ralf Wildenhues.
Signed-off-by: Eric Blake <address@hidden>
---
ChangeLog | 15 +++++++++++++++
src/builtin.c | 6 +++---
src/input.c | 10 +++++-----
src/m4.h | 38 +++++++++++++++++++-------------------
4 files changed, 42 insertions(+), 27 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 76fcac3..86f8cb8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2008-02-18 Eric Blake <address@hidden>
+
+ Avoid some magic numbers.
+ * src/m4.h (DEBUG_TRACE_ARGS, DEBUG_TRACE_EXPANSION)
+ (DEBUG_TRACE_QUOTE, DEBUT_TRACE_ALL, DEBUG_TRACE_LINE)
+ (DEBUG_TRACE_FILE, DEBUG_TRACE_PATH, DEBUG_TRACE_CALL)
+ (DEBUG_TRACE_INPUT, DEBUG_TRACE_CALLID, DEBUG_TRACE_VERBOSE)
+ (DEBUG_TRACE_DEFAULT): Use hex constants, to make it obvious these
+ are bit fields.
+ * src/input.c (CHAR_EOF, CHAR_MACRO, CHAR_QUOTE, CHAR_ARGV):
+ Define in terms of UCHAR_MAX.
+ (set_word_regexp): Likewise.
+ * src/builtin.c (compile_pattern, m4_translit): Likewise.
+ Reported by Ralf Wildenhues.
+
2008-02-16 Eric Blake <address@hidden>
Add regression test for multi-character quote recursion.
diff --git a/src/builtin.c b/src/builtin.c
index a48e7a0..d4a0fee 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -299,7 +299,7 @@ compile_pattern (const char *str, size_t len, struct
re_pattern_buffer **buf,
return msg;
}
/* Use a fastmap for speed; it is freed by regfree. */
- new_buf->fastmap = xcharalloc (256);
+ new_buf->fastmap = xcharalloc (UCHAR_MAX + 1);
/* Now, find a victim slot. Decrease the count of all entries, then
prime the count of the victim slot at REGEX_CACHE_SIZE. This
@@ -1880,8 +1880,8 @@ m4_translit (struct obstack *obs, int argc,
macro_arguments *argv)
const char *data;
const char *from;
const char *to;
- char map[256] = {0};
- char found[256] = {0};
+ char map[UCHAR_MAX + 1] = {0};
+ char found[UCHAR_MAX + 1] = {0};
unsigned char ch;
if (bad_argc (ARG (0), argc, 2, 3))
diff --git a/src/input.c b/src/input.c
index e320c72..5c3b345 100644
--- a/src/input.c
+++ b/src/input.c
@@ -151,10 +151,10 @@ static bool start_of_input_line;
/* Flag for next_char () to recognize change in input block. */
static bool input_change;
-#define CHAR_EOF 256 /* Character return on EOF. */
-#define CHAR_MACRO 257 /* Character return for MACRO token. */
-#define CHAR_QUOTE 258 /* Character return for quoted string. */
-#define CHAR_ARGV 259 /* Character return for $@ reference. */
+#define CHAR_EOF (UCHAR_MAX + 1) /* Return on EOF. */
+#define CHAR_MACRO (UCHAR_MAX + 2) /* Return for MACRO token. */
+#define CHAR_QUOTE (UCHAR_MAX + 3) /* Return for quoted string. */
+#define CHAR_ARGV (UCHAR_MAX + 4) /* Return for $@ reference. */
/* Quote chars. */
string_pair curr_quote;
@@ -1303,7 +1303,7 @@ set_word_regexp (const char *caller, const char *regexp)
The fastmap can be reused between compilations, and will be freed
by the final regfree. */
if (!word_regexp.fastmap)
- word_regexp.fastmap = xcharalloc (256);
+ word_regexp.fastmap = xcharalloc (UCHAR_MAX + 1);
msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp);
assert (!msg);
re_set_registers (&word_regexp, ®s, regs.num_regs, regs.start, regs.end);
diff --git a/src/m4.h b/src/m4.h
index 7df29b8..e1da7a7 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -161,32 +161,32 @@ extern FILE *debug;
/* The value of debug_level is a bitmask of the following. */
/* a: show arglist in trace output */
-#define DEBUG_TRACE_ARGS 1
+#define DEBUG_TRACE_ARGS 0x001
/* e: show expansion in trace output */
-#define DEBUG_TRACE_EXPANSION 2
+#define DEBUG_TRACE_EXPANSION 0x002
/* q: quote args and expansion in trace output */
-#define DEBUG_TRACE_QUOTE 4
+#define DEBUG_TRACE_QUOTE 0x004
/* t: trace all macros -- overrides trace{on,off} */
-#define DEBUG_TRACE_ALL 8
+#define DEBUG_TRACE_ALL 0x008
/* l: add line numbers to trace output */
-#define DEBUG_TRACE_LINE 16
+#define DEBUG_TRACE_LINE 0x010
/* f: add file name to trace output */
-#define DEBUG_TRACE_FILE 32
+#define DEBUG_TRACE_FILE 0x020
/* p: trace path search of include files */
-#define DEBUG_TRACE_PATH 64
+#define DEBUG_TRACE_PATH 0x040
/* c: show macro call before args collection */
-#define DEBUG_TRACE_CALL 128
+#define DEBUG_TRACE_CALL 0x080
/* i: trace changes of input files */
-#define DEBUG_TRACE_INPUT 256
+#define DEBUG_TRACE_INPUT 0x100
/* x: add call id to trace output */
-#define DEBUG_TRACE_CALLID 512
+#define DEBUG_TRACE_CALLID 0x200
/* V: very verbose -- print everything */
-#define DEBUG_TRACE_VERBOSE 1023
+#define DEBUG_TRACE_VERBOSE 0x377
/* default flags -- equiv: aeq */
-#define DEBUG_TRACE_DEFAULT 7
+#define DEBUG_TRACE_DEFAULT 0x007
-#define DEBUG_PRINT1(Fmt, Arg1) \
+#define DEBUG_PRINT1(Fmt, Arg1) \
do \
{ \
if (debug != NULL) \
--
1.5.4
>From 2e81b080376fcc4f3362a0c4810de084371c87d0 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Mon, 18 Feb 2008 06:24:08 -0700
Subject: [PATCH] Avoid some magic numbers.
* m4/m4private.h (CHAR_EOF, CHAR_BUILTIN, CHAR_QUOTE, CHAR_ARGV)
(CHAR_RETRY): Define in terms of UCHAR_MAX.
* m4/syntax.c (m4_syntax_create, set_syntax_set)
(reset_syntax_set, check_is_single_quotes)
(check_is_single_comments, check_is_macro_escaped)
(m4_set_quotes, m4_set_comment): Likewise.
* modules/gnu.c (regexp_compile): Likewise.
* modules/m4.c (translit): Likewise.
* src/freeze.c (produce_syntax_dump): Likewise.
Reported by Ralf Wildenhues.
Signed-off-by: Eric Blake <address@hidden>
---
ChangeLog | 14 ++++++++++++++
m4/m4private.h | 12 ++++++------
m4/syntax.c | 24 ++++++++++++------------
modules/gnu.c | 2 +-
modules/m4.c | 4 ++--
src/freeze.c | 4 ++--
6 files changed, 37 insertions(+), 23 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 6a89c56..ba56df5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2008-02-18 Eric Blake <address@hidden>
+
+ Avoid some magic numbers.
+ * m4/m4private.h (CHAR_EOF, CHAR_BUILTIN, CHAR_QUOTE, CHAR_ARGV)
+ (CHAR_RETRY): Define in terms of UCHAR_MAX.
+ * m4/syntax.c (m4_syntax_create, set_syntax_set)
+ (reset_syntax_set, check_is_single_quotes)
+ (check_is_single_comments, check_is_macro_escaped)
+ (m4_set_quotes, m4_set_comment): Likewise.
+ * modules/gnu.c (regexp_compile): Likewise.
+ * modules/m4.c (translit): Likewise.
+ * src/freeze.c (produce_syntax_dump): Likewise.
+ Reported by Ralf Wildenhues.
+
2008-02-16 Eric Blake <address@hidden>
Add regression test for multi-character quote recursion.
diff --git a/m4/m4private.h b/m4/m4private.h
index a2b78b8..2201703 100644
--- a/m4/m4private.h
+++ b/m4/m4private.h
@@ -395,11 +395,11 @@ extern void m4__symtab_remove_module_references
(m4_symbol_table*,
/* CHAR_RETRY must be last, because we size the syntax table to hold
all other characters and sentinels. */
-#define CHAR_EOF 256 /* Character return on EOF. */
-#define CHAR_BUILTIN 257 /* Character return for BUILTIN token. */
-#define CHAR_QUOTE 258 /* Character return for quoted string. */
-#define CHAR_ARGV 259 /* Character return for $@ reference. */
-#define CHAR_RETRY 260 /* Character return for end of input block. */
+#define CHAR_EOF (UCHAR_MAX + 1) /* Return on EOF. */
+#define CHAR_BUILTIN (UCHAR_MAX + 2) /* Return for BUILTIN token. */
+#define CHAR_QUOTE (UCHAR_MAX + 3) /* Return for quoted string. */
+#define CHAR_ARGV (UCHAR_MAX + 4) /* Return for $@ reference. */
+#define CHAR_RETRY (UCHAR_MAX + 5) /* Return for end of input block. */
#define DEF_LQUOTE "`" /* Default left quote delimiter. */
#define DEF_RQUOTE "\'" /* Default right quote delimiter. */
diff --git a/m4/syntax.c b/m4/syntax.c
index 8a7b0d1..115884e 100644
--- a/m4/syntax.c
+++ b/m4/syntax.c
@@ -116,7 +116,7 @@ m4_syntax_create (void)
int ch;
/* Set up default table. This table never changes during operation. */
- for (ch = 256; --ch >= 0;)
+ for (ch = UCHAR_MAX + 1; --ch >= 0; )
switch (ch)
{
case '(':
@@ -309,7 +309,7 @@ set_syntax_set (m4_syntax_table *syntax, const char *chars,
int code)
/* Explicit set of characters to install with this category; all
other characters that used to have the category get reset to
OTHER. */
- for (ch = 256; --ch >= 0; )
+ for (ch = UCHAR_MAX + 1; --ch >= 0; )
{
if (code == M4_SYNTAX_RQUOTE || code == M4_SYNTAX_ECOMM)
remove_syntax_attribute (syntax, ch, code);
@@ -329,7 +329,7 @@ static void
reset_syntax_set (m4_syntax_table *syntax, int code)
{
int ch;
- for (ch = 256; --ch >= 0; )
+ for (ch = UCHAR_MAX + 1; --ch >= 0; )
{
/* Reset the category back to its default state. All other
characters that used to have this category get reset to
@@ -443,7 +443,7 @@ check_is_single_quotes (m4_syntax_table *syntax)
on the syntax table, then update lquote/rquote accordingly.
Otherwise, keep lquote/rquote, but we no longer have single
quotes. */
- for (ch = 256; --ch >= 0; )
+ for (ch = UCHAR_MAX + 1; --ch >= 0; )
{
if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE))
{
@@ -496,7 +496,7 @@ check_is_single_comments (m4_syntax_table *syntax)
on the syntax table, then update bcomm/ecomm accordingly.
Otherwise, keep bcomm/ecomm, but we no longer have single
comments. */
- for (ch = 256; --ch >= 0; )
+ for (ch = UCHAR_MAX + 1; --ch >= 0; )
{
if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM))
{
@@ -535,7 +535,7 @@ check_is_macro_escaped (m4_syntax_table *syntax)
int ch;
syntax->is_macro_escaped = false;
- for (ch = 256; --ch >= 0; )
+ for (ch = UCHAR_MAX + 1; --ch >= 0; )
if (m4_has_syntax (syntax, ch, M4_SYNTAX_ESCAPE))
{
syntax->is_macro_escaped = true;
@@ -593,7 +593,7 @@ m4_set_quotes (m4_syntax_table *syntax, const char *lq,
const char *rq)
(M4_SYNTAX_IGNORE | M4_SYNTAX_ESCAPE
| M4_SYNTAX_ALPHA | M4_SYNTAX_NUM)));
- for (ch = 256; --ch >= 0;)
+ for (ch = UCHAR_MAX + 1; --ch >= 0; )
{
if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE))
add_syntax_attribute (syntax, ch,
@@ -656,7 +656,7 @@ m4_set_comment (m4_syntax_table *syntax, const char *bc,
const char *ec)
| M4_SYNTAX_ALPHA | M4_SYNTAX_NUM
| M4_SYNTAX_LQUOTE)));
- for (ch = 256; --ch >= 0;)
+ for (ch = UCHAR_MAX + 1; --ch >= 0; )
{
if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM))
add_syntax_attribute (syntax, ch,
diff --git a/modules/gnu.c b/modules/gnu.c
index f3b7fb7..97b263b 100644
--- a/modules/gnu.c
+++ b/modules/gnu.c
@@ -176,7 +176,7 @@ regexp_compile (m4 *context, const char *caller, const char
*regexp,
return NULL;
}
/* Use a fastmap for speed; it is freed by regfree. */
- pat->fastmap = xcharalloc (256);
+ pat->fastmap = xcharalloc (UCHAR_MAX + 1);
/* Now, find a victim slot. Decrease the count of all entries, then
prime the count of the victim slot at REGEX_CACHE_SIZE. This
diff --git a/modules/m4.c b/modules/m4.c
index ccc847c..afb9d0c 100644
--- a/modules/m4.c
+++ b/modules/m4.c
@@ -998,8 +998,8 @@ M4BUILTIN_HANDLER (translit)
const char *data;
const char *from;
const char *to;
- char map[256] = {0};
- char found[256] = {0};
+ char map[UCHAR_MAX + 1] = {0};
+ char found[UCHAR_MAX + 1] = {0};
unsigned char ch;
if (argc <= 2)
diff --git a/src/freeze.c b/src/freeze.c
index 941b761..7976bec 100644
--- a/src/freeze.c
+++ b/src/freeze.c
@@ -103,12 +103,12 @@ produce_resyntax_dump (m4 *context, FILE *file)
static void
produce_syntax_dump (FILE *file, m4_syntax_table *syntax, char ch)
{
- char buf[256];
+ char buf[UCHAR_MAX + 1];
int code = m4_syntax_code (ch);
int count = 0;
int i;
- for (i = 0; i < 256; ++i)
+ for (i = 0; i < UCHAR_MAX + 1; ++i)
if (m4_has_syntax (syntax, i, code) && code != syntax->orig[i])
buf[count++] = i;
--
1.5.4