bug-guix
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#29654: Manual database index.db embeds timestamps


From: Ludovic Courtès
Subject: bug#29654: Manual database index.db embeds timestamps
Date: Sun, 17 Dec 2017 00:51:53 +0100
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/25.3 (gnu/linux)

Howdy Ricardo,

Ricardo Wurmus <address@hidden> skribis:

>> Unfortunately, this is not fully deterministic: when running --check
>> several times in a row, I occasionally get different results.  I suspect
>> GDBM’s output is not fully deterministic.
>
> Hmm, I dumped the contents of the generated databases with gdbm_dump and
> couldn’t find any difference aside from the header (which is produced by
> gdbm_dump itself).  Diffoscope shows a lot of differences, though.
>
> I thought that maybe the difference comes from the fact that upon adding
> new entries gdbm grows the hash table.  After setting the initial size
> to a multiple of the number of entries I haven’t been able to generate a
> non-reproducible database.
>
> My only change is in “write-mandb-database”:
>
>   (gdbm-open file GDBM_WRCREAT #:block-size (* 512 (length entries)))
>
> I tried this:
>
>     ./pre-inst-env guix package -p foo -i coreutils guile
>     for i in `seq 30`; do ./pre-inst-env guix build --check -K 
> /gnu/store/pg3684khpj69py40v7p76b90r9q4j2lv-manual-database.drv; done
>
> Seems fine.  Coincidence or did I get lucky?

I checked with the program below.  It helps, but does not entirely fix
it:

(use-modules (guix man-db)
             (guix hash)
             (guix base32))

(define %database "/tmp/index.db")

(let loop ()
  (false-if-exception (delete-file %database))
  (write-mandb-database %database
                        (mandb-entries "/home/ludo/.guix-profile/share/man"))
  (pk (stat:size (stat %database))
      (bytevector->nix-base32-string (file-sha256 %database)))
  (loop))
Valgrind reports this:

--8<---------------cut here---------------start------------->8---
==8395== Syscall param write(buf) points to uninitialised byte(s)
==8395==    at 0x53E4A8D: ??? (in 
/gnu/store/3h31zsqxjjg52da5gp3qmhkh4x8klhah-glibc-2.25/lib/libpthread-2.25.so)
==8395==    by 0xACAF44D: _gdbm_full_write (in 
/gnu/store/kg8ffb14msfnc9aivxj6djrl51g9b3zz-gdbm-1.13/lib/libgdbm.so.4.0.0)
==8395==    by 0xACAC6AD: gdbm_fd_open (in 
/gnu/store/kg8ffb14msfnc9aivxj6djrl51g9b3zz-gdbm-1.13/lib/libgdbm.so.4.0.0)
==8395==    by 0x55FA0BF: ffi_call_unix64 (in 
/gnu/store/kvi64k387hqdrn59gsgd09brxh65jxjj-libffi-3.2.1/lib/libffi.so.6.0.4)
==8395==    by 0x55F8EE0: ffi_call (in 
/gnu/store/kvi64k387hqdrn59gsgd09brxh65jxjj-libffi-3.2.1/lib/libffi.so.6.0.4)
==8395==    by 0x4E8C23C: scm_i_foreign_call (in 
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
==8395==    by 0x4EF9243: vm_regular_engine (in 
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
==8395==    by 0x4EFC7B9: scm_call_n (in 
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
==8395==    by 0x4E80A06: scm_primitive_eval (in 
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
==8395==    by 0x4E80A62: scm_eval (in 
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
==8395==    by 0x4ECBA6F: scm_shell (in 
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
==8395==    by 0x4E974AC: invoke_main_func (in 
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
==8395==  Address 0xced0044 is 4 bytes inside a block of size 8,388,608 alloc'd
==8395==    at 0x4C2AAD6: malloc (in 
/gnu/store/p2b1rzqlpdqbhn42g76xzgykbivwc063-valgrind-3.12.0/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==8395==    by 0xACAC5E6: gdbm_fd_open (in 
/gnu/store/kg8ffb14msfnc9aivxj6djrl51g9b3zz-gdbm-1.13/lib/libgdbm.so.4.0.0)
==8395==    by 0x55FA0BF: ffi_call_unix64 (in 
/gnu/store/kvi64k387hqdrn59gsgd09brxh65jxjj-libffi-3.2.1/lib/libffi.so.6.0.4)
==8395==    by 0x55F8EE0: ffi_call (in 
/gnu/store/kvi64k387hqdrn59gsgd09brxh65jxjj-libffi-3.2.1/lib/libffi.so.6.0.4)
==8395==    by 0x4E8C23C: scm_i_foreign_call (in 
/gnu/store/gwspk20b7fbrs4l5rzgaadf8896h12bq-guile-2.2.3/lib/libguile-2.2.so.1.3.0)
--8<---------------cut here---------------end--------------->8---


>> +(define (entry->string entry)
>> +  "Return the wire format for ENTRY as a string."
>> +  (match entry
>> +    (($ <mandb-entry> file name section synopsis)
>> +     (string-append (abbreviate-file-name file) "\t"
>> +                    (number->string section) "\t"
>> +                    (number->string section)
>> +
>> +                    ;; Timestamps, that we always set to the epoch.
>> +                    "\t0\t0"
>> +
>> +                    ;; XXX: Weird things.
>> +                    "\tB\t-\t-\tgz\t"
>
> What’s that?

In db_store.c it’s done like this:

--8<---------------cut here---------------start------------->8---
        MYDBM_SET (cont, xasprintf (
                "%s\t%s\t%s\t%ld\t%ld\t%c\t%s\t%s\t%s\t%s",
                dash_if_unset (in->name),
                in->ext,
                in->sec,
                (long) in->mtime.tv_sec,
                in->mtime.tv_nsec,
                in->id,
                in->pointer,
                in->filter,
                in->comp,
                in->whatis));
--8<---------------cut here---------------end--------------->8---

and db_storage.h says:

--8<---------------cut here---------------start------------->8---
struct mandata {
        struct mandata *next;           /* ptr to next structure, if any */
        char *addr;                     /* ptr to memory containing the fields 
*/

        char *name;                     /* Name of page, if != key */

        /* The following are all const because they should be pointers to
         * parts of strings allocated elsewhere (often the addr field above)
         * and should not be written through or freed themselves.
         */
        const char *ext;                /* Filename ext w/o comp ext */
        const char *sec;                /* Section name/number */
        char id;                        /* id for this entry */
        const char *pointer;            /* id related file pointer */
        const char *comp;               /* Compression extension */
        const char *filter;             /* filters needed for the page */
        const char *whatis;             /* whatis description for page */
        struct timespec mtime;          /* mod time for file */
}; 
--8<---------------cut here---------------end--------------->8---

The ‘B’ part gives the kind of manual page:

--8<---------------cut here---------------start------------->8---
/* These definitions give an inherent precedence to each particular type
   of manual page:
   
   ULT_MAN:     ultimate manual page, the full source nroff file.
   SO_MAN:      source nroff file containing .so request to an ULT_MAN.
   WHATIS_MAN:  virtual `whatis referenced' page pointing to an ULT_MAN.
   STRAY_CAT:   pre-formatted manual page with no source.
   WHATIS_CAT:  virtual `whatis referenced' page pointing to a STRAY_CAT. */
--8<---------------cut here---------------end--------------->8---

I’ve updated man-db.scm to handle that better.

Thanks,
Ludo’.

diff --git a/guix/man-db.scm b/guix/man-db.scm
index b42558b06..3ce268547 100644
--- a/guix/man-db.scm
+++ b/guix/man-db.scm
@@ -29,6 +29,7 @@
             mandb-entry-name
             mandb-entry-section
             mandb-entry-synopsis
+            mandb-entry-kind
 
             mandb-entries
             write-mandb-database))
@@ -47,12 +48,13 @@
 (module-use! (current-module) (resolve-interface '(gdbm)))
 
 (define-record-type <mandb-entry>
-  (mandb-entry file-name name section synopsis)
+  (mandb-entry file-name name section synopsis kind)
   mandb-entry?
   (file-name mandb-entry-file-name)               ;e.g., "../abiword.1.gz"
   (name      mandb-entry-name)                    ;e.g., "ABIWORD"
   (section   mandb-entry-section)                 ;number
-  (synopsis  mandb-entry-synopsis))               ;string
+  (synopsis  mandb-entry-synopsis)                ;string
+  (kind      mandb-entry-kind))                   ;'ultimate | 'link
 
 (define (mandb-entry<? entry1 entry2)
   (match entry1
@@ -74,16 +76,26 @@
 (define (entry->string entry)
   "Return the wire format for ENTRY as a string."
   (match entry
-    (($ <mandb-entry> file name section synopsis)
+    (($ <mandb-entry> file name section synopsis kind)
+     ;; See db_store.c:make_content in man-db for the format.
      (string-append (abbreviate-file-name file) "\t"
                     (number->string section) "\t"
                     (number->string section)
 
-                    ;; Timestamps, that we always set to the epoch.
+                    ;; Timestamp that we always set to the epoch.
                     "\t0\t0"
 
-                    ;; XXX: Weird things.
-                    "\tB\t-\t-\tgz\t"
+                    ;; See "db_storage.h" in man-db for the different kinds.
+                    "\t"
+                    (case kind
+                      ((ultimate) "A")     ;ultimate man page
+                      ((link)     "B")     ;".so" link to other man page
+                      (else       "A"))    ;something that doesn't matter much
+
+                    "\t-\t-\t"
+
+                    (if (string-suffix? ".gz" file) "gz" "")
+                    "\t"
 
                     synopsis "\x00"))))
 
@@ -94,7 +106,8 @@
 (define (write-mandb-database file entries)
   "Write ENTRIES to FILE as a man-db database.  FILE is usually
 \".../index.db\", and is a GDBM database."
-  (let ((db (gdbm-open file GDBM_WRCREAT)))
+  (let ((db (gdbm-open file GDBM_WRCREAT
+                       #:block-size (* 512 (length entries)))))
     (gdbm-set! db %version-key %version-value)
 
     ;; Write ENTRIES in sorted order so we get deterministic output.
@@ -141,33 +154,37 @@
         (string->number (string-drop (string-drop-right str 1) 1))
         (string->number str)))
 
+  ;; Note: This works for both gzipped and uncompressed files.
   (call-with-gzip-input-port (open-file file "r0")
     (lambda (port)
       (let loop ((name     #f)
                  (section  #f)
-                 (synopsis #f))
+                 (synopsis #f)
+                 (kind     'ultimate))
         (if (and name section synopsis)
-            (mandb-entry file name section synopsis)
+            (mandb-entry file name section synopsis kind)
             (let ((line (read-line port)))
               (if (eof-object? line)
-                  (mandb-entry file name (or section 0) (or synopsis ""))
+                  (mandb-entry file name (or section 0) (or synopsis "")
+                               kind)
                   (match (string-tokenize line)
                     ((".TH" name (= string->number* section) _ ...)
-                     (loop name section synopsis))
+                     (loop name section synopsis kind))
                     ((".SH" (or "NAME" "\"NAME\""))
-                     (loop name section (read-synopsis port)))
+                     (loop name section (read-synopsis port) kind))
                     ((".so" link)
                      (match (and=> (resolve link)
                                    (cut man-page->entry <> resolve))
                        (#f
-                        (loop name section synopsis))
+                        (loop name section synopsis 'link))
                        (alias
                         (mandb-entry file
                                      (mandb-entry-name alias)
                                      (mandb-entry-section alias)
-                                     (mandb-entry-synopsis alias)))))
+                                     (mandb-entry-synopsis alias)
+                                     'link))))
                     (_
-                     (loop name section synopsis))))))))))
+                     (loop name section synopsis kind))))))))))
 
 (define (man-files directory)
   "Return the list of man pages found under DIRECTORY, recursively."

reply via email to

[Prev in Thread] Current Thread [Next in Thread]