[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: poor m4 hash performance
From: |
Eric Blake |
Subject: |
Re: poor m4 hash performance |
Date: |
Sun, 04 Jun 2006 17:33:35 -0600 |
User-agent: |
Thunderbird 1.5.0.4 (Windows/20060516) |
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
According to Eric Blake on 6/4/2006 4:09 PM:
>
> For CVS coreutils, with 83303 definitions and 2526066 lookups, using
> -H99991 reduced comparisons from 9364329 (36022688 bytes, 3.5 comparisons
> per lookup) to 2415937 (22883978 bytes, less than 1 compare per lookup),
> but never bought me much more than .5 sec out of 30 on my machine (1.5 %
> speedup, but that was with naive profiling turned on rather than an
> optimized strcmp). So having autom4te play with -H may not buy us much.
> I'll report separately about potential m4 1.4.5 improvements; this mail is
> just limited to the framework for testing the various ideas.
This patch stores string hashes, to avoid strcmp. With the default 509
buckets on CVS coreutils, it causes only 2362141 strcmp, with only 25
failed comparisons. There are still 22829893 bytes compared (ie. the 2.3M
successful lookups results in 22M of length, or about 9 characters per
average token); this shaved only 60k of comparisons from the earlier run
with -H99991 buckets without this patch. Comparing the time for m4 1.4.4
and my version of m4 with this patch, both compiled at -O2, still shows no
noticeable difference in timing of autoconf. But keep in mind that
autoconf is also invoking several other processes, perhaps the actual m4
invocations are noticeably faster. So I don't know whether it is worth
applying this patch.
2006-06-04 Eric Blake <address@hidden>
* src/m4.h (struct symbol, SYMBOL_HASH): Store symbol's hash.
* src/symtab.c (hash): Don't perform modulo here.
(lookup_symbol): Store hash, use it to reduce strcmp calls.
(symtab_print_list) [DEBUG_SYM]: Display hash.
- --
Life is short - so eat dessert first!
Eric Blake address@hidden
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.2.1 (Cygwin)
Comment: Public key at home.comcast.net/~ericblake/eblake.gpg
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org
iD8DBQFEg23O84KuGfSFAYARAoj/AJ9L5ujZrluhCIIp/lJ8s03XsGQyewCfal77
rAnmz/AeBwETqyymOzApHZ8=
=+S6M
-----END PGP SIGNATURE-----
Index: src/m4.h
===================================================================
RCS file: /sources/m4/m4/src/m4.h,v
retrieving revision 1.1.1.1.2.5
diff -u -p -r1.1.1.1.2.5 m4.h
--- src/m4.h 27 May 2006 18:11:23 -0000 1.1.1.1.2.5
+++ src/m4.h 4 Jun 2006 23:31:33 -0000
@@ -382,6 +382,7 @@ struct symbol
boolean blind_no_args;
char *name;
+ unsigned hash;
token_data data;
};
@@ -391,6 +392,7 @@ struct symbol
#define SYMBOL_MACRO_ARGS(S) ((S)->macro_args)
#define SYMBOL_BLIND_NO_ARGS(S) ((S)->blind_no_args)
#define SYMBOL_NAME(S) ((S)->name)
+#define SYMBOL_HASH(S) ((S)->hash)
#define SYMBOL_TYPE(S) (TOKEN_DATA_TYPE (&(S)->data))
#define SYMBOL_TEXT(S) (TOKEN_DATA_TEXT (&(S)->data))
#define SYMBOL_FUNC(S) (TOKEN_DATA_FUNC (&(S)->data))
Index: src/symtab.c
===================================================================
RCS file: /sources/m4/m4/src/Attic/symtab.c,v
retrieving revision 1.1.1.1.2.3
diff -u -p -r1.1.1.1.2.3 symtab.c
--- src/symtab.c 4 Jun 2006 22:09:30 -0000 1.1.1.1.2.3
+++ src/symtab.c 4 Jun 2006 23:31:33 -0000
@@ -117,10 +117,10 @@ symtab_init (void)
| Return a hashvalue for a string, from GNU-emacs. |
`--------------------------------------------------*/
-static int
+static unsigned
hash (const char *s)
{
- register int val = 0;
+ register unsigned val = 0;
register const char *ptr = s;
register char ch;
@@ -131,8 +131,7 @@ hash (const char *s)
ch -= 40;
val = ((val << 3) + (val >> 28) + ch);
};
- val = (val < 0) ? -val : val;
- return val % hash_table_size;
+ return val;
}
/*--------------------------------------------.
@@ -165,7 +164,8 @@ free_symbol (symbol *sym)
symbol *
lookup_symbol (const char *name, symbol_lookup mode)
{
- int h, cmp = 1;
+ unsigned h;
+ int cmp = 1;
symbol *sym, *prev;
symbol **spp;
@@ -175,11 +175,13 @@ lookup_symbol (const char *name, symbol_
#endif /* DEBUG_SYM */
h = hash (name);
- sym = symtab[h];
+ sym = symtab[h % hash_table_size];
for (prev = NULL; sym != NULL; prev = sym, sym = sym->next)
{
- cmp = strcmp (SYMBOL_NAME (sym), name);
+ cmp = (h < SYMBOL_HASH (sym) ? -1
+ : h > SYMBOL_HASH (sym) ? 1
+ : strcmp (SYMBOL_NAME (sym), name));
if (cmp >= 0)
break;
}
@@ -191,7 +193,7 @@ lookup_symbol (const char *name, symbol_
/* Symbol not found. */
- spp = (prev != NULL) ? &prev->next : &symtab[h];
+ spp = (prev != NULL) ? &prev->next : &symtab[h % hash_table_size];
switch (mode)
{
@@ -215,6 +217,7 @@ lookup_symbol (const char *name, symbol_
SYMBOL_TYPE (sym) = TOKEN_VOID;
SYMBOL_TRACED (sym) = SYMBOL_SHADOWED (sym) = FALSE;
SYMBOL_NAME (sym) = xstrdup (name);
+ SYMBOL_HASH (sym) = h;
SYMBOL_SHADOWED (sym) = FALSE;
SYMBOL_MACRO_ARGS (sym) = FALSE;
SYMBOL_BLIND_NO_ARGS (sym) = FALSE;
@@ -330,9 +333,9 @@ symtab_print_list (int i)
printf ("Symbol dump #%d:\n", i);
for (h = 0; h < hash_table_size; h++)
for (sym = symtab[h]; sym != NULL; sym = sym->next)
- printf ("\tname %s, bucket %d, addr %p, next %p, "
+ printf ("\tname %s, hash %u, bucket %d, addr %p, next %p, "
"flags%s%s\n",
- SYMBOL_NAME (sym),
+ SYMBOL_NAME (sym), SYMBOL_HASH (sym),
h, sym, SYMBOL_NEXT (sym),
SYMBOL_TRACED (sym) ? " traced" : "",
SYMBOL_SHADOWED (sym) ? " shadowed" : "");