[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#29654: Manual database index.db embeds timestamps
From: |
Ludovic Courtès |
Subject: |
bug#29654: Manual database index.db embeds timestamps |
Date: |
Sun, 17 Dec 2017 00:51:53 +0100 |
User-agent: |
Gnus/5.13 (Gnus v5.13) Emacs/25.3 (gnu/linux) |
Howdy Ricardo,
Ricardo Wurmus <address@hidden> skribis:
>> Unfortunately, this is not fully deterministic: when running --check
>> several times in a row, I occasionally get different results. I suspect
>> GDBM’s output is not fully deterministic.
>
> Hmm, I dumped the contents of the generated databases with gdbm_dump and
> couldn’t find any difference aside from the header (which is produced by
> gdbm_dump itself). Diffoscope shows a lot of differences, though.
>
> I thought that maybe the difference comes from the fact that upon adding
> new entries gdbm grows the hash table. After setting the initial size
> to a multiple of the number of entries I haven’t been able to generate a
> non-reproducible database.
>
> My only change is in “write-mandb-database”:
>
> (gdbm-open file GDBM_WRCREAT #:block-size (* 512 (length entries)))
>
> I tried this:
>
> ./pre-inst-env guix package -p foo -i coreutils guile
> for i in `seq 30`; do ./pre-inst-env guix build --check -K
> /gnu/store/pg3684khpj69py40v7p76b90r9q4j2lv-manual-database.drv; done
>
> Seems fine. Coincidence or did I get lucky?
I checked with the program below. It helps, but does not entirely fix
it:
(use-modules (guix man-db)
(guix hash)
(guix base32))
(define %database "/tmp/index.db")
(let loop ()
(false-if-exception (delete-file %database))
(write-mandb-database %database
(mandb-entries "/home/ludo/.guix-profile/share/man"))
(pk (stat:size (stat %database))
(bytevector->nix-base32-string (file-sha256 %database)))
(loop))
Valgrind reports this:
--8<---------------cut here---------------start------------->8---
==8395== Syscall param write(buf) points to uninitialised byte(s)
==8395== at 0x53E4A8D: ??? (in
/gnu/store/3h31zsqxjjg52da5gp3qmhkh4x8klhah-glibc-2.25/lib/libpthread-2.25.so)
==8395== by 0xACAF44D: _gdbm_full_write (in
/gnu/store/kg8ffb14msfnc9aivxj6djrl51g9b3zz-gdbm-1.13/lib/libgdbm.so.4.0.0)
==8395== by 0xACAC6AD: gdbm_fd_open (in
/gnu/store/kg8ffb14msfnc9aivxj6djrl51g9b3zz-gdbm-1.13/lib/libgdbm.so.4.0.0)
==8395== by 0x55FA0BF: ffi_call_unix64 (in
/gnu/store/kvi64k387hqdrn59gsgd09brxh65jxjj-libffi-3.2.1/lib/libffi.so.6.0.4)
==8395== by 0x55F8EE0: ffi_call (in
/gnu/store/kvi64k387hqdrn59gsgd09brxh65jxjj-libffi-3.2.1/lib/libffi.so.6.0.4)
==8395== by 0x4E8C23C: scm_i_foreign_call (in
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
==8395== by 0x4EF9243: vm_regular_engine (in
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
==8395== by 0x4EFC7B9: scm_call_n (in
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
==8395== by 0x4E80A06: scm_primitive_eval (in
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
==8395== by 0x4E80A62: scm_eval (in
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
==8395== by 0x4ECBA6F: scm_shell (in
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
==8395== by 0x4E974AC: invoke_main_func (in
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
==8395== Address 0xced0044 is 4 bytes inside a block of size 8,388,608 alloc'd
==8395== at 0x4C2AAD6: malloc (in
/gnu/store/p2b1rzqlpdqbhn42g76xzgykbivwc063-valgrind-3.12.0/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==8395== by 0xACAC5E6: gdbm_fd_open (in
/gnu/store/kg8ffb14msfnc9aivxj6djrl51g9b3zz-gdbm-1.13/lib/libgdbm.so.4.0.0)
==8395== by 0x55FA0BF: ffi_call_unix64 (in
/gnu/store/kvi64k387hqdrn59gsgd09brxh65jxjj-libffi-3.2.1/lib/libffi.so.6.0.4)
==8395== by 0x55F8EE0: ffi_call (in
/gnu/store/kvi64k387hqdrn59gsgd09brxh65jxjj-libffi-3.2.1/lib/libffi.so.6.0.4)
==8395== by 0x4E8C23C: scm_i_foreign_call (in
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
--8<---------------cut here---------------end--------------->8---
>> +(define (entry->string entry)
>> + "Return the wire format for ENTRY as a string."
>> + (match entry
>> + (($ <mandb-entry> file name section synopsis)
>> + (string-append (abbreviate-file-name file) "\t"
>> + (number->string section) "\t"
>> + (number->string section)
>> +
>> + ;; Timestamps, that we always set to the epoch.
>> + "\t0\t0"
>> +
>> + ;; XXX: Weird things.
>> + "\tB\t-\t-\tgz\t"
>
> What’s that?
In db_store.c it’s done like this:
--8<---------------cut here---------------start------------->8---
MYDBM_SET (cont, xasprintf (
"%s\t%s\t%s\t%ld\t%ld\t%c\t%s\t%s\t%s\t%s",
dash_if_unset (in->name),
in->ext,
in->sec,
(long) in->mtime.tv_sec,
in->mtime.tv_nsec,
in->id,
in->pointer,
in->filter,
in->comp,
in->whatis));
--8<---------------cut here---------------end--------------->8---
and db_storage.h says:
--8<---------------cut here---------------start------------->8---
struct mandata {
struct mandata *next; /* ptr to next structure, if any */
char *addr; /* ptr to memory containing the fields
*/
char *name; /* Name of page, if != key */
/* The following are all const because they should be pointers to
* parts of strings allocated elsewhere (often the addr field above)
* and should not be written through or freed themselves.
*/
const char *ext; /* Filename ext w/o comp ext */
const char *sec; /* Section name/number */
char id; /* id for this entry */
const char *pointer; /* id related file pointer */
const char *comp; /* Compression extension */
const char *filter; /* filters needed for the page */
const char *whatis; /* whatis description for page */
struct timespec mtime; /* mod time for file */
};
--8<---------------cut here---------------end--------------->8---
The ‘B’ part gives the kind of manual page:
--8<---------------cut here---------------start------------->8---
/* These definitions give an inherent precedence to each particular type
of manual page:
ULT_MAN: ultimate manual page, the full source nroff file.
SO_MAN: source nroff file containing .so request to an ULT_MAN.
WHATIS_MAN: virtual `whatis referenced' page pointing to an ULT_MAN.
STRAY_CAT: pre-formatted manual page with no source.
WHATIS_CAT: virtual `whatis referenced' page pointing to a STRAY_CAT. */
--8<---------------cut here---------------end--------------->8---
I’ve updated man-db.scm to handle that better.
Thanks,
Ludo’.
diff --git a/guix/man-db.scm b/guix/man-db.scm
index b42558b06..3ce268547 100644
--- a/guix/man-db.scm
+++ b/guix/man-db.scm
@@ -29,6 +29,7 @@
mandb-entry-name
mandb-entry-section
mandb-entry-synopsis
+ mandb-entry-kind
mandb-entries
write-mandb-database))
@@ -47,12 +48,13 @@
(module-use! (current-module) (resolve-interface '(gdbm)))
(define-record-type <mandb-entry>
- (mandb-entry file-name name section synopsis)
+ (mandb-entry file-name name section synopsis kind)
mandb-entry?
(file-name mandb-entry-file-name) ;e.g., "../abiword.1.gz"
(name mandb-entry-name) ;e.g., "ABIWORD"
(section mandb-entry-section) ;number
- (synopsis mandb-entry-synopsis)) ;string
+ (synopsis mandb-entry-synopsis) ;string
+ (kind mandb-entry-kind)) ;'ultimate | 'link
(define (mandb-entry<? entry1 entry2)
(match entry1
@@ -74,16 +76,26 @@
(define (entry->string entry)
"Return the wire format for ENTRY as a string."
(match entry
- (($ <mandb-entry> file name section synopsis)
+ (($ <mandb-entry> file name section synopsis kind)
+ ;; See db_store.c:make_content in man-db for the format.
(string-append (abbreviate-file-name file) "\t"
(number->string section) "\t"
(number->string section)
- ;; Timestamps, that we always set to the epoch.
+ ;; Timestamp that we always set to the epoch.
"\t0\t0"
- ;; XXX: Weird things.
- "\tB\t-\t-\tgz\t"
+ ;; See "db_storage.h" in man-db for the different kinds.
+ "\t"
+ (case kind
+ ((ultimate) "A") ;ultimate man page
+ ((link) "B") ;".so" link to other man page
+ (else "A")) ;something that doesn't matter much
+
+ "\t-\t-\t"
+
+ (if (string-suffix? ".gz" file) "gz" "")
+ "\t"
synopsis "\x00"))))
@@ -94,7 +106,8 @@
(define (write-mandb-database file entries)
"Write ENTRIES to FILE as a man-db database. FILE is usually
\".../index.db\", and is a GDBM database."
- (let ((db (gdbm-open file GDBM_WRCREAT)))
+ (let ((db (gdbm-open file GDBM_WRCREAT
+ #:block-size (* 512 (length entries)))))
(gdbm-set! db %version-key %version-value)
;; Write ENTRIES in sorted order so we get deterministic output.
@@ -141,33 +154,37 @@
(string->number (string-drop (string-drop-right str 1) 1))
(string->number str)))
+ ;; Note: This works for both gzipped and uncompressed files.
(call-with-gzip-input-port (open-file file "r0")
(lambda (port)
(let loop ((name #f)
(section #f)
- (synopsis #f))
+ (synopsis #f)
+ (kind 'ultimate))
(if (and name section synopsis)
- (mandb-entry file name section synopsis)
+ (mandb-entry file name section synopsis kind)
(let ((line (read-line port)))
(if (eof-object? line)
- (mandb-entry file name (or section 0) (or synopsis ""))
+ (mandb-entry file name (or section 0) (or synopsis "")
+ kind)
(match (string-tokenize line)
((".TH" name (= string->number* section) _ ...)
- (loop name section synopsis))
+ (loop name section synopsis kind))
((".SH" (or "NAME" "\"NAME\""))
- (loop name section (read-synopsis port)))
+ (loop name section (read-synopsis port) kind))
((".so" link)
(match (and=> (resolve link)
(cut man-page->entry <> resolve))
(#f
- (loop name section synopsis))
+ (loop name section synopsis 'link))
(alias
(mandb-entry file
(mandb-entry-name alias)
(mandb-entry-section alias)
- (mandb-entry-synopsis alias)))))
+ (mandb-entry-synopsis alias)
+ 'link))))
(_
- (loop name section synopsis))))))))))
+ (loop name section synopsis kind))))))))))
(define (man-files directory)
"Return the list of man pages found under DIRECTORY, recursively."
- bug#29654: Manual database index.db embeds timestamps, Ruud van Asseldonk, 2017/12/10
- bug#29654: Manual database index.db embeds timestamps, Ludovic Courtès, 2017/12/15
- bug#29654: Manual database index.db embeds timestamps, Ludovic Courtès, 2017/12/15
- bug#29654: Manual database index.db embeds timestamps, Ricardo Wurmus, 2017/12/15
- bug#29654: Manual database index.db embeds timestamps, Ludovic Courtès, 2017/12/17
- bug#29654: Manual database index.db embeds timestamps, Ruud van Asseldonk, 2017/12/17
- bug#29654: Manual database index.db embeds timestamps, Ludovic Courtès, 2017/12/17