[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
speed up translit
From: |
Eric Blake |
Subject: |
speed up translit |
Date: |
Thu, 19 Feb 2009 06:36:54 -0700 |
User-agent: |
Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.19) Gecko/20081209 Thunderbird/2.0.0.19 Mnenhy/0.7.6.666 |
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
This one surprised me, in that the performance gain was actually
noticeable. Using branch-1.4, and running autoconf on coreutils, I saw
the speed drop from 16.7s to 16.5, an improvement better than 1%. I
coupled it with an autoconf patch to use reduce some heavily-used 3-byte
translits down to 2 bytes, for another half a percent improvement. It
turns out that autoconf handles lots of large strings where only one or
two bytes need to be transformed (for example, m4_flatten converts just
newline to space). The trick is recognizing that for a small conversion
set, the overhead of creating a 256-byte map and processing one byte at a
time is large compared to searching a word at a time for just the bytes
that need processing.
- --
Don't work too hard, make some time for fun as well!
Eric Blake address@hidden
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (Cygwin)
Comment: Public key at home.comcast.net/~ericblake/eblake.gpg
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org
iEYEARECAAYFAkmdYHYACgkQ84KuGfSFAYDCrgCfbQpwSZIPmfGP1IDF3g/kmttL
cKUAnA7Zl9gX8udfWdjPVQOvcsKBodzZ
=xffh
-----END PGP SIGNATURE-----
>From 29b625aa941718e43cc04dfc217f314518bbc6d1 Mon Sep 17 00:00:00 2001
From: Eric Blake <address@hidden>
Date: Wed, 18 Feb 2009 17:07:58 -0700
Subject: [PATCH] Speed up translit when from argument is short.
* m4/gnulib-cache.m4: Import memchr2 module.
* src/builtin.c (m4_translit): Use memchr2 when possible.
* doc/m4.texinfo (Translit): Add tests.
* NEWS: Document this.
Signed-off-by: Eric Blake <address@hidden>
---
ChangeLog | 6 +++++
NEWS | 3 ++
doc/m4.texinfo | 21 ++++++++++++++++++
m4/gnulib-cache.m4 | 3 +-
src/builtin.c | 58 ++++++++++++++++++++++++++++++++++++---------------
5 files changed, 73 insertions(+), 18 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 4f2f3d4..a80dfea 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
2009-02-18 Eric Blake <address@hidden>
+ Speed up translit when from argument is short.
+ * m4/gnulib-cache.m4: Import memchr2 module.
+ * src/builtin.c (m4_translit): Use memchr2 when possible.
+ * doc/m4.texinfo (Translit): Add tests.
+ * NEWS: Document this.
+
Update copyright year.
* THANKS: Mention 2009 in copyright year.
diff --git a/NEWS b/NEWS
index 5a895bb..31821b7 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,9 @@ Software Foundation, Inc.
** The `divert' and `undivert' builtins have been made more efficient
when using temporary files for large diversions.
+** The `translit' builtin has been made more efficient when the second
+ argument is short.
+
** The command line option `--debugfile', introduced in 1.4.7, now
treats its argument as optional, in order to allow setting the debug
output back to stderr when used without an argument; and order is now
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index 08daafe..10e63ef 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -5780,6 +5780,27 @@ Translit
translit(`«abc~', `~-»')
@result{}abc
@end example
+
address@hidden Stress test short arguments, since they use a different code
address@hidden path.
address@hidden
+translit(`abcdeabcde', `a')
address@hidden
+translit(`abcdeabcde', `ab')
address@hidden
+translit(`abcdeabcde', `a', `f')
address@hidden
+translit(`abcdeabcde', `a', `f')
address@hidden
+translit(`abcdeabcde', `a', `fg')
address@hidden
+translit(`abcdeabcde', `ab', `f')
address@hidden
+translit(`abcdeabcde', `ab', `fg')
address@hidden
+translit(`abcdeabcde', `ab', `ba')
address@hidden
address@hidden example
@end ignore
Omitting @var{chars} evokes a warning, but still produces output.
diff --git a/m4/gnulib-cache.m4 b/m4/gnulib-cache.m4
index b439e8a..4ed53d0 100644
--- a/m4/gnulib-cache.m4
+++ b/m4/gnulib-cache.m4
@@ -15,7 +15,7 @@
# Specification in the form of a command-line invocation:
-# gnulib-tool --import --dir=. --local-dir=local --lib=libm4
--source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests
--aux-dir=build-aux --with-tests --avoid=lock-tests --avoid=tls-tests
--no-libtool --macro-prefix=M4 announce-gen assert autobuild avltree-oset
binary-io c-stack clean-temp cloexec close-stream closein config-h dirname
error fdl-1.3 fflush filenamecat fopen fopen-safer fseeko gendocs getopt
git-version-gen gnumakefile gnupload gpl-3.0 intprops mkstemp obstack progname
regex sigaction stdbool stdint stdlib-safer strsignal strstr strtod strtol
unlocked-io verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix
+# gnulib-tool --import --dir=. --local-dir=local --lib=libm4
--source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests
--aux-dir=build-aux --with-tests --avoid=lock-tests --avoid=tls-tests
--no-libtool --macro-prefix=M4 announce-gen assert autobuild avltree-oset
binary-io c-stack clean-temp cloexec close-stream closein config-h dirname
error fdl-1.3 fflush filenamecat fopen fopen-safer fseeko gendocs getopt
git-version-gen gnumakefile gnupload gpl-3.0 intprops memchr2 mkstemp obstack
progname regex sigaction stdbool stdint stdlib-safer strsignal strstr strtod
strtol unlocked-io verror version-etc version-etc-fsf xalloc xprintf
xvasprintf-posix
# Specification in the form of a few gnulib-tool.m4 macro invocations:
gl_LOCAL_DIR([local])
@@ -46,6 +46,7 @@ gl_MODULES([
gnupload
gpl-3.0
intprops
+ memchr2
mkstemp
obstack
progname
diff --git a/src/builtin.c b/src/builtin.c
index a89b861..504075a 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -26,6 +26,7 @@
extern FILE *popen ();
+#include "memchr2.h"
#include "regex.h"
#if HAVE_SYS_WAIT_H
@@ -1816,35 +1817,56 @@ expand_ranges (const char *s, struct obstack *obs)
static void
m4_translit (struct obstack *obs, int argc, token_data **argv)
{
- const char *data;
- const char *from;
+ const char *data = ARG (1);
+ const char *from = ARG (2);
const char *to;
- char map[256] = {0};
- char found[256] = {0};
+ char map[UCHAR_MAX + 1];
+ char found[UCHAR_MAX + 1];
unsigned char ch;
- if (bad_argc (argv[0], argc, 3, 4))
+ if (bad_argc (argv[0], argc, 3, 4) || !*from)
{
/* builtin(`translit') is blank, but translit(`abc') is abc. */
- if (argc == 2)
- obstack_grow (obs, ARG (1), strlen (ARG (1)));
+ if (argc <= 2)
+ obstack_grow (obs, data, strlen (data));
return;
}
- from = ARG (2);
- if (strchr (from, '-') != NULL)
- {
- from = expand_ranges (from, obs);
- if (from == NULL)
- return;
- }
-
to = ARG (3);
if (strchr (to, '-') != NULL)
{
to = expand_ranges (to, obs);
- if (to == NULL)
- return;
+ assert (to && *to);
+ }
+
+ /* If there are only one or two bytes to replace, it is faster to
+ use memchr2. Using expand_ranges does nothing unless there are
+ at least three bytes. */
+ if (!from[1] || !from[2])
+ {
+ const char *p;
+ size_t len = strlen (data);
+ while ((p = (char *) memchr2 (data, from[0], from[1], len)))
+ {
+ obstack_grow (obs, data, p - data);
+ len -= p - data;
+ if (!len)
+ return;
+ data = p + 1;
+ len--;
+ if (*p == from[0] && to[0])
+ obstack_1grow (obs, to[0]);
+ else if (*p == from[1] && to[0] && to[1])
+ obstack_1grow (obs, to[1]);
+ }
+ obstack_grow (obs, data, len);
+ return;
+ }
+
+ if (strchr (from, '-') != NULL)
+ {
+ from = expand_ranges (from, obs);
+ assert (from && *from);
}
/* Calling strchr(from) for each character in data is quadratic,
@@ -1853,6 +1875,8 @@ m4_translit (struct obstack *obs, int argc, token_data
**argv)
pass of data, for linear behavior. Traditional behavior is that
only the first instance of a character in from is consulted,
hence the found map. */
+ memset (map, 0, sizeof map);
+ memset (found, 0, sizeof found);
for ( ; (ch = *from) != '\0'; from++)
{
if (! found[ch])
--
1.6.1.2
- speed up translit,
Eric Blake <=