emacs-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Some experience with the igc branch


From: Pip Cet
Subject: Re: Some experience with the igc branch
Date: Wed, 25 Dec 2024 17:40:42 +0000

Gerd Möllmann <gerd.moellmann@gmail.com> writes:
> DEFUN ("function-equal", Ffunction_equal, Sfunction_equal, 2, 2, 0,
>        doc: /* Return non-nil if F1 and F2 come from the same source.
> Used to determine if different closures are just different instances of
> the same lambda expression, or are really unrelated function.  */)
>      (Lisp_Object f1, Lisp_Object f2)
> {
>   bool res;
>   if (EQ (f1, f2))

This EQ can also trip.  Sorry to insist on that, but I think it's an
important point: if we change Lisp internals (such as the slow EQ
thing), the "we're not dereferencing it, just looking at the bit
representation of the pointer" approach will fail again, in unexpected
places.

I haven't seen a technical argument against using separate stacks for
MPS and signals (I don't consider "it's a change and we'd need to test
it" to be any more true for this change than for any other proposed
change, or for what's in scratch/igc now).  It would get us on par with
master.  (Both versions need to add the best memory barrier we have to
the specpdl_ptr++ code)

(I don't think MPS works on multi-threaded systems if word stores aren't
atomic.  If thread A is in the middle of updating an mps_word
referencing another object, and thread B triggers a GC, thread A is
stopped and thread B might scan the segment in the inconsistent state.)

Miraculously, everything can be made to work out in the WIDE_EMACS_INT
case, even though 64-bit words are stored in two insns: we only look at
the LSB 32-bit word when fixing (because we USE_LSB_TAG), so that'll
just work.  Late exthdr creation needed to be changed a little, and now
assumes changing a 64-bit value to another 64-bit value which differs
only in one 32-bit half is atomic.

Here's a snapshot of the current code.  It still assumes strong memory
ordering between threads because I'm pretty sure MPS needs that, too, so
it's just asm volatile ("" ::: "memory") for now.

diff --git a/src/igc.c b/src/igc.c
index 136667aefea..4342964382a 100644
--- a/src/igc.c
+++ b/src/igc.c
@@ -747,19 +747,42 @@ IGC_DEFINE_LIST (igc_root);
 
 /* Registry entry for an MPS thread mps_thr_t.  */
 
+/* FIXME */
+#include <stdatomic.h>
+
+struct emacs_ap
+{
+  mps_ap_t mps_ap;
+  struct igc *gc;
+  size_t requested_bytes;
+  void *usable_memory;
+  void *usable_memory_end;
+
+#ifdef ENABLE_CHECKING
+  atomic_uintptr_t waiting_threads; /* debug only */
+  sys_thread_t emacs_thread;
+#endif
+};
+
+typedef struct emacs_ap emacs_ap_t;
+
+#ifndef ATOMIC_POINTER_LOCK_FREE
+#error "this probably won't work"
+#endif
+
 struct igc_thread
 {
   struct igc *gc;
   mps_thr_t thr;
 
   /* Allocation points for the thread.  */
-  mps_ap_t dflt_ap;
-  mps_ap_t leaf_ap;
-  mps_ap_t weak_strong_ap;
-  mps_ap_t weak_weak_ap;
-  mps_ap_t weak_hash_strong_ap;
-  mps_ap_t weak_hash_weak_ap;
-  mps_ap_t immovable_ap;
+  emacs_ap_t dflt_ap;
+  emacs_ap_t leaf_ap;
+  emacs_ap_t weak_strong_ap;
+  emacs_ap_t weak_weak_ap;
+  emacs_ap_t weak_hash_strong_ap;
+  emacs_ap_t weak_hash_weak_ap;
+  emacs_ap_t immovable_ap;
 
   /* Quick access to the roots used for specpdl, bytecode stack and
      control stack.  */
@@ -814,6 +837,15 @@ IGC_DEFINE_LIST (igc_thread);
   /* The real signal mask we want to restore after handling pending
    * signals.  */
   sigset_t signal_mask;
+
+  sys_thread_t allocation_thread;
+  sys_mutex_t mutex;
+  sys_cond_t cond;
+  atomic_uintptr_t which_ap;
+  atomic_uintptr_t fault_address;
+  atomic_uintptr_t manual_collections;
+  atomic_uintptr_t idle_time;
+  atomic_uintptr_t idle_work;
 };
 
 static bool process_one_message (struct igc *gc);
@@ -2913,8 +2945,179 @@ igc_root_destroy_comp_unit_eph (struct 
Lisp_Native_Comp_Unit *u)
   maybe_destroy_root (&u->data_eph_relocs_root);
 }
 
+static mps_addr_t alloc_impl_raw (size_t size, mps_ap_t ap);
+static mps_addr_t alloc_impl (size_t size, enum igc_obj_type type, emacs_ap_t 
*ap);
+static void igc_collect_raw (void);
+
+static void *igc_allocation_thread (void *igc_v)
+{
+  uint64_t n_alloc = 0;
+  uint64_t bytes_alloc = 0;
+  struct igc *gc = igc_v;
+  sys_mutex_lock (&gc->mutex);
+  while (true)
+    {
+      uintptr_t fault_address = gc->fault_address;
+
+      if (fault_address)
+       {
+         volatile char *cp = (void *)fault_address;
+         volatile char c = *cp;
+         (void) c;
+         atomic_store (&gc->fault_address, 0);
+         continue;
+       }
+
+      uintptr_t which_ap = gc->which_ap;
+      if (which_ap)
+       {
+         atomic_store (&gc->which_ap, 0);
+         emacs_ap_t *ap = (void *)which_ap;
+         if (gc->allocation_thread != sys_thread_self ())
+           emacs_abort();
+         if (!ap->usable_memory)
+           {
+             igc_root_create_ambig ((char *)&ap->usable_memory,
+                                    (char *)(&ap->usable_memory+1),
+                                    "thread ap memory root");
+           }
+         while (ap->requested_bytes)
+           {
+             size_t size = ap->requested_bytes;
+             if (size < 1024 * 1024)
+               size = 1024 * 1024;
+             void *p = alloc_impl_raw (size, ap->mps_ap);
+             n_alloc++;
+             bytes_alloc += size;
+#if 0
+             if (0 == ((n_alloc-1)&n_alloc))
+               fprintf (stderr, "%ld %ld\n", n_alloc, bytes_alloc);
+#endif
+             ap->usable_memory = p;
+             ap->usable_memory_end = (char *)p + size;
+             ap->requested_bytes = 0;
+             sched_yield ();
+           }
+         continue;
+       }
+
+      uintptr_t manual_collections = gc->manual_collections;
+      if (manual_collections)
+       {
+         igc_collect_raw ();
+         atomic_store (&gc->manual_collections, 0);
+         continue;
+       }
+
+      uintptr_t idle_time = gc->idle_time;
+      if (idle_time)
+       {
+         double interval = idle_time * 1e-9;
+         atomic_store (&gc->idle_time, 0);
+         if (mps_arena_step (global_igc->arena, interval, 0))
+           atomic_store (&gc->idle_work, 1);
+         else
+           atomic_store (&gc->idle_work, 0);
+
+         continue;
+       }
+
+      sys_cond_wait (&gc->cond, &gc->mutex);
+    }
+  sys_mutex_unlock (&gc->mutex); /* in case the infloop returns */
+
+  return NULL;
+}
+
+static mps_addr_t alloc_impl (size_t size, enum igc_obj_type type, emacs_ap_t 
*ap)
+{
+  if (size == 0)
+    return 0;
+
+  while (size & 7)
+    size++;
+
+  mps_addr_t ret = 0;
+  while (!ret)
+    {
+#ifdef ENABLE_CHECKING
+      if (ap->emacs_thread != sys_thread_self ())
+       emacs_abort ();
+      uintptr_t other_threads = atomic_fetch_add (&ap->waiting_threads, 1);
+      if (other_threads != 0)
+       {
+         /* we know that the other "thread" is actually on top of us,
+          * and we're a signal handler, so we shouldn't be allocating
+          * memory. */
+         emacs_abort ();
+       }
+#endif
+
+      void *candidate = ap->usable_memory;
+      void *end = ap->usable_memory_end;
+      if ((char *)candidate + size <= (char *)end)
+       {
+         void *actual = ap->usable_memory;
+         void *actual_end = (char *)actual + size;
+         ap->usable_memory = actual_end;
+         if (actual_end < end)
+           {
+             set_header ((struct igc_header *)actual_end, IGC_OBJ_PAD, (char 
*)end - (char *)actual_end, 0);
+           }
+         asm volatile ("" : : : "memory");
+         set_header ((struct igc_header *)actual, type, size, alloc_hash ());
+         ret = actual;
+       }
+      else
+       {
+         ap->requested_bytes = (size_t) size;
+
+         while (ap->requested_bytes)
+           {
+             /* No MPS data can be accessed by the main thread while it holds 
the mutex! */
+             sys_mutex_lock (&ap->gc->mutex);
+             atomic_store (&ap->gc->which_ap, (uintptr_t) ap);
+             sys_cond_broadcast (&ap->gc->cond);
+             sys_mutex_unlock (&ap->gc->mutex);
+             sched_yield ();
+           }
+       }
+#ifdef ENABLE_CHECKING
+      atomic_fetch_add (&ap->waiting_threads, -1);
+#endif
+    }
+  return (mps_addr_t) ret;
+}
+
+static mps_res_t emacs_ap_create_k (emacs_ap_t *ap, mps_pool_t pool,
+                                   mps_arg_s *args)
+{
+  ap->gc = global_igc;
+  ap->usable_memory = NULL;
+  ap->usable_memory_end = NULL;
+#ifdef ENABLE_CHECKING
+  atomic_store(&ap->waiting_threads, 0);
+#endif
+  ap->requested_bytes = 0;
+
+  static int just_one;
+  if (!(just_one++))
+    if (!sys_thread_create(&ap->gc->allocation_thread, igc_allocation_thread, 
ap->gc))
+      emacs_abort ();
+
+#ifdef ENABLE_CHECKING
+  ap->emacs_thread = sys_thread_self ();
+#endif
+  return mps_ap_create_k (&ap->mps_ap, pool, args);
+}
+
+static void emacs_ap_destroy (emacs_ap_t *ap)
+{
+  return;
+}
+
 static mps_res_t
-create_weak_ap (mps_ap_t *ap, struct igc_thread *t, bool weak)
+create_weak_ap (emacs_ap_t *ap, struct igc_thread *t, bool weak)
 {
   struct igc *gc = t->gc;
   mps_res_t res;
@@ -2923,14 +3126,14 @@ create_weak_ap (mps_ap_t *ap, struct igc_thread *t, 
bool weak)
   {
     MPS_ARGS_ADD (args, MPS_KEY_RANK,
                  weak ? mps_rank_weak () : mps_rank_exact ());
-    res = mps_ap_create_k (ap, pool, args);
+    res = emacs_ap_create_k (ap, pool, args);
   }
   MPS_ARGS_END (args);
   return res;
 }
 
 static mps_res_t
-create_weak_hash_ap (mps_ap_t *ap, struct igc_thread *t, bool weak)
+create_weak_hash_ap (emacs_ap_t *ap, struct igc_thread *t, bool weak)
 {
   struct igc *gc = t->gc;
   mps_res_t res;
@@ -2939,7 +3142,7 @@ create_weak_hash_ap (mps_ap_t *ap, struct igc_thread *t, 
bool weak)
   {
     MPS_ARGS_ADD (args, MPS_KEY_RANK,
                  weak ? mps_rank_weak () : mps_rank_exact ());
-    res = mps_ap_create_k (ap, pool, args);
+    res = emacs_ap_create_k (ap, pool, args);
   }
   MPS_ARGS_END (args);
   return res;
@@ -2949,12 +3152,14 @@ create_weak_hash_ap (mps_ap_t *ap, struct igc_thread 
*t, bool weak)
 create_thread_aps (struct igc_thread *t)
 {
   struct igc *gc = t->gc;
+  sys_cond_init (&gc->cond);
+  sys_mutex_init (&gc->mutex);
   mps_res_t res;
-  res = mps_ap_create_k (&t->dflt_ap, gc->dflt_pool, mps_args_none);
+  res = emacs_ap_create_k (&t->dflt_ap, gc->dflt_pool, mps_args_none);
   IGC_CHECK_RES (res);
-  res = mps_ap_create_k (&t->leaf_ap, gc->leaf_pool, mps_args_none);
+  res = emacs_ap_create_k (&t->leaf_ap, gc->leaf_pool, mps_args_none);
   IGC_CHECK_RES (res);
-  res = mps_ap_create_k (&t->immovable_ap, gc->immovable_pool, mps_args_none);
+  res = emacs_ap_create_k (&t->immovable_ap, gc->immovable_pool, 
mps_args_none);
   IGC_CHECK_RES (res);
   res = create_weak_ap (&t->weak_strong_ap, t, false);
   res = create_weak_hash_ap (&t->weak_hash_strong_ap, t, false);
@@ -3016,13 +3221,13 @@ igc_thread_remove (void **pinfo)
   destroy_root (&t->d.stack_root);
   destroy_root (&t->d.specpdl_root);
   destroy_root (&t->d.bc_root);
-  mps_ap_destroy (t->d.dflt_ap);
-  mps_ap_destroy (t->d.leaf_ap);
-  mps_ap_destroy (t->d.weak_strong_ap);
-  mps_ap_destroy (t->d.weak_weak_ap);
-  mps_ap_destroy (t->d.weak_hash_strong_ap);
-  mps_ap_destroy (t->d.weak_hash_weak_ap);
-  mps_ap_destroy (t->d.immovable_ap);
+  emacs_ap_destroy (&t->d.dflt_ap);
+  emacs_ap_destroy (&t->d.leaf_ap);
+  emacs_ap_destroy (&t->d.weak_strong_ap);
+  emacs_ap_destroy (&t->d.weak_weak_ap);
+  emacs_ap_destroy (&t->d.weak_hash_strong_ap);
+  emacs_ap_destroy (&t->d.weak_hash_weak_ap);
+  emacs_ap_destroy (&t->d.immovable_ap);
   mps_thread_dereg (deregister_thread (t));
 }
 
@@ -3635,7 +3840,16 @@ arena_step (void)
            interval = 0.05;
        }
 
-      if (mps_arena_step (global_igc->arena, interval, 0))
+      atomic_store (&global_igc->idle_time, interval * 1e9);
+      sys_cond_broadcast (&global_igc->cond);
+      while (global_igc->idle_time)
+       {
+         sched_yield ();
+         sys_mutex_lock (&global_igc->mutex);
+         sched_yield ();
+         sys_mutex_unlock (&global_igc->mutex);
+       }
+      if (global_igc->idle_work)
        return true;
     }
 
@@ -3686,7 +3900,7 @@ igc_on_idle (void)
   }
 }
 
-static mps_ap_t
+static emacs_ap_t *
 thread_ap (enum igc_obj_type type)
 {
   struct igc_thread_list *t = current_thread->gc_info;
@@ -3707,13 +3921,13 @@ thread_ap (enum igc_obj_type type)
       emacs_abort ();
 
     case IGC_OBJ_MARKER_VECTOR:
-      return t->d.weak_weak_ap;
+      return &t->d.weak_weak_ap;
 
     case IGC_OBJ_WEAK_HASH_TABLE_WEAK_PART:
-      return t->d.weak_hash_weak_ap;
+      return &t->d.weak_hash_weak_ap;
 
     case IGC_OBJ_WEAK_HASH_TABLE_STRONG_PART:
-      return t->d.weak_hash_strong_ap;
+      return &t->d.weak_hash_strong_ap;
 
     case IGC_OBJ_VECTOR:
     case IGC_OBJ_CONS:
@@ -3728,12 +3942,12 @@ thread_ap (enum igc_obj_type type)
     case IGC_OBJ_FACE_CACHE:
     case IGC_OBJ_BLV:
     case IGC_OBJ_HANDLER:
-      return t->d.dflt_ap;
+      return &t->d.dflt_ap;
 
     case IGC_OBJ_STRING_DATA:
     case IGC_OBJ_FLOAT:
     case IGC_OBJ_BYTES:
-      return t->d.leaf_ap;
+      return &t->d.leaf_ap;
     }
   emacs_abort ();
 }
@@ -3746,8 +3960,8 @@ igc_break (void)
 {
 }
 
-void
-igc_collect (void)
+static void
+igc_collect_raw (void)
 {
   struct igc *gc = global_igc;
   if (gc->park_count == 0)
@@ -3758,6 +3972,26 @@ igc_collect (void)
     }
 }
 
+void
+igc_collect (void)
+{
+  struct igc *gc = global_igc;
+  if (gc->park_count == 0)
+    {
+      atomic_store (&gc->manual_collections, 1);
+      while (gc->manual_collections)
+       {
+         sys_cond_broadcast (&gc->cond);
+         sched_yield ();
+         sys_mutex_lock (&gc->mutex);
+         /* pthread_mutex_lock () directly followed by
+          * pthread_mutex_unlock () doesn't work, IIRC */
+         sched_yield ();
+         sys_mutex_unlock (&gc->mutex);
+       }
+    }
+}
+
 DEFUN ("igc--collect", Figc__collect, Sigc__collect, 0, 0, 0,
        doc: /* Force an immediate arena garbage collection.  */)
   (void)
@@ -3805,7 +4039,7 @@ igc_hash (Lisp_Object key)
    object.  */
 
 static mps_addr_t
-alloc_impl (size_t size, enum igc_obj_type type, mps_ap_t ap)
+alloc_impl_raw (size_t size, mps_ap_t ap)
 {
   mps_addr_t p UNINIT;
   size = alloc_size (size);
@@ -3820,14 +4054,14 @@ alloc_impl (size_t size, enum igc_obj_type type, 
mps_ap_t ap)
            memory_full (0);
          /* Object _must_ have valid contents before commit.  */
          memclear (p, size);
-         set_header (p, type, size, alloc_hash ());
+         set_header (p, IGC_OBJ_PAD, size, 0);
        }
       while (!mps_commit (ap, p, size));
       break;
 
     case IGC_STATE_DEAD:
       p = xzalloc (size);
-      set_header (p, type, size, alloc_hash ());
+      set_header (p, IGC_OBJ_PAD, size, alloc_hash ());
       break;
 
     case IGC_STATE_INITIAL:
@@ -3854,7 +4088,7 @@ alloc (size_t size, enum igc_obj_type type)
 alloc_immovable (size_t size, enum igc_obj_type type)
 {
   struct igc_thread_list *t = current_thread->gc_info;
-  return alloc_impl (size, type, t->d.immovable_ap);
+  return alloc_impl (size, type, &t->d.immovable_ap);
 }
 
 #ifdef HAVE_MODULES
@@ -4087,6 +4321,8 @@ weak_hash_find_dependent (mps_addr_t base)
        struct Lisp_Weak_Hash_Table_Strong_Part *w = client;
        return w->weak;
       }
+    case IGC_OBJ_PAD:
+      return 0;
     default:
       emacs_abort ();
     }
@@ -4448,8 +4684,12 @@ igc_external_header (struct igc_header *h)
       exthdr->hash = header_hash (h);
       exthdr->obj_type = header_type (h);
       exthdr->extra_dependency = Qnil;
-      /* On IA-32, the upper 32-bit word is 0 after this, which is okay.  */
-      h->v = (intptr_t)exthdr + IGC_TAG_EXTHDR;
+      asm volatile ("" : : : "memory");
+      uint64_t v = h->v;
+      /* maintain the upper 32-bit word for WIDE_EMACS_INT builds. */
+      v -= (uintptr_t) v;
+      v += (intptr_t)exthdr + IGC_TAG_EXTHDR;
+      h->v = v;
       mps_addr_t ref = (mps_addr_t) h;
       mps_res_t res = mps_finalize (global_igc->arena, &ref);
       IGC_CHECK_RES (res);
@@ -4893,17 +5133,17 @@ igc_on_pdump_loaded (void *dump_base, void *hot_start, 
void *hot_end,
 igc_alloc_dump (size_t nbytes)
 {
   igc_assert (global_igc->park_count > 0);
-  mps_ap_t ap = thread_ap (IGC_OBJ_CONS);
+  emacs_ap_t *ap = thread_ap (IGC_OBJ_CONS);
   size_t block_size = igc_header_size () + nbytes;
   mps_addr_t block;
   do
     {
-      mps_res_t res = mps_reserve (&block, ap, block_size);
+      mps_res_t res = mps_reserve (&block, ap->mps_ap, block_size);
       if (res != MPS_RES_OK)
        memory_full (0);
       set_header (block, IGC_OBJ_INVALID, block_size, 0);
     }
-  while (!mps_commit (ap, block, block_size));
+  while (!mps_commit (ap->mps_ap, block, block_size));
   return (char *) block + igc_header_size ();
 }
 
@@ -5050,6 +5290,40 @@ DEFUN ("igc--remove-extra-dependency", 
Figc__remove_extra_dependency,
   return Qt;
 }
 
+static struct sigaction mps_sigaction;
+
+static void sigsegv_sigaction (int sig, siginfo_t *info, void *uap)
+{
+  if (sys_thread_self () == main_thread.s.thread_id)
+    {
+      atomic_store (&global_igc->fault_address, (uintptr_t) info->si_addr);
+      sys_cond_broadcast (&global_igc->cond);
+      sched_yield ();
+      sys_mutex_lock (&global_igc->mutex);
+      /* IIRC, pthread_mutex_lock directly followed by
+        pthread_mutex_unlock causes problems somehow... */
+      sched_yield ();
+      sys_mutex_unlock (&global_igc->mutex);
+    }
+  else
+    {
+      /* Recipe for disaster here, I guess.  */
+      mps_sigaction.sa_sigaction (sig, info, uap);
+    }
+}
+
+#ifdef SIGSEGV
+static void steal_sigsegv (void)
+{
+  struct sigaction emacs_sigaction;
+  emacs_sigaction.sa_sigaction = sigsegv_sigaction;
+  sigemptyset (&emacs_sigaction.sa_mask);
+  emacs_sigaction.sa_flags = SA_SIGINFO | SA_RESTART;
+
+  sigaction (SIGSEGV, &emacs_sigaction, &mps_sigaction);
+}
+#endif
+
 /***********************************************************************
                                  Init
  ***********************************************************************/
@@ -5061,6 +5335,9 @@ init_igc (void)
   (void) mps_lib_assert_fail_install (igc_assert_fail);
   global_igc = make_igc ();
   add_main_thread ();
+#ifdef SIGSEGV
+  steal_sigsegv ();
+#endif
   set_state (IGC_STATE_USABLE_PARKED);
 }
 
diff --git a/src/lisp.h b/src/lisp.h
index 48585c2d8a1..37cc62052c1 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -4102,7 +4102,14 @@ backtrace_debug_on_exit (union specbinding *pdl)
 INLINE void
 grow_specpdl (void)
 {
+  /* we don't need a memory bus barrier here, but a logical memory
+     barrier so GCC doesn't reorder stores. */
+  asm volatile ("" : : : "memory");
+  /* This increment doesn't need to be atomic in its entirety, but it
+     can't expose an intermediate state of specpdl_ptr; IOW, the store
+     needs to be a single CPU instruction. */
   specpdl_ptr++;
+  asm volatile ("" : : : "memory");
   if (specpdl_ptr == specpdl_end)
     grow_specpdl_allocation ();
 }




reply via email to

[Prev in Thread] Current Thread [Next in Thread]