From df35d17559c67c7fc61ca683ea10fc80addb71bb Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 28 Aug 2024 00:44:09 +0000 Subject: [PATCH 01/38] Refactoring to be considered before adding MMTk --- src/gc-common.c | 156 +++++++++++++++++++++++++++++++++++++++++++ src/gc-common.h | 6 ++ src/gc-debug.c | 41 +----------- src/gc-interface.h | 12 ++++ src/gc-stacks.c | 4 +- src/gc-stock.c | 156 ++++++++++++------------------------------- src/gc-stock.h | 21 ------ src/julia.h | 2 +- src/julia_internal.h | 26 +------- src/scheduler.c | 11 +++ src/stackwalk.c | 4 +- src/staticdata.c | 2 + 12 files changed, 237 insertions(+), 204 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index ee461b576ea9e..2ec167caa667a 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -20,6 +20,11 @@ extern "C" { jl_gc_num_t gc_num = {0}; +JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) +{ + return gc_num.total_time; +} + // =========================================================================== // // GC Callbacks // =========================================================================== // @@ -489,6 +494,87 @@ jl_ptls_t* gc_all_tls_states; // MISC // =========================================================================== // +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_new_weakref_th(ptls, value); +} + +JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc(ptls, sz, ty); +} + +JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sz, NULL); +} + +// allocation wrappers that save the size of allocations, to allow using +// jl_gc_counted_* functions with a libc-compatible API. + +JL_DLLEXPORT void *jl_malloc(size_t sz) +{ + int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); + if (p == NULL) + return NULL; + p[0] = sz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +//_unchecked_calloc does not check for potential overflow of nm*sz +STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { + size_t nmsz = nm*sz; + int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); + if (p == NULL) + return NULL; + p[0] = nmsz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) +{ + if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) + return NULL; + return _unchecked_calloc(nm, sz); +} + +JL_DLLEXPORT void jl_free(void *p) +{ + if (p != NULL) { + int64_t *pp = (int64_t *)p - 2; + size_t sz = pp[0]; + jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); + } +} + +JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) +{ + int64_t *pp; + size_t szold; + if (p == NULL) { + pp = NULL; + szold = 0; + } + else { + pp = (int64_t *)p - 2; + szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; + } + int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); + if (pnew == NULL) + return NULL; + pnew[0] = sz; + return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +// allocator entry points + +JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc_(ptls, sz, ty); +} + const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { @@ -501,6 +587,76 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void) jl_throw(jl_memory_exception); } +size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT +{ + const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; + size_t sz = layout->size * m->length; + if (layout->flags.arrayelem_isunion) + // account for isbits Union array selector bytes + sz += m->length; + return sz; +} + +// tracking Memorys with malloc'd storage +void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ + // This is **NOT** a GC safe point. + mallocmemory_t *ma; + if (ptls->gc_tls.heap.mafreelist == NULL) { + ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); + } + else { + ma = ptls->gc_tls.heap.mafreelist; + ptls->gc_tls.heap.mafreelist = ma->next; + } + ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); + ma->next = ptls->gc_tls.heap.mallocarrays; + ptls->gc_tls.heap.mallocarrays = ma; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + +// gc-debug common functions +// --- + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + #ifdef __cplusplus } #endif diff --git a/src/gc-common.h b/src/gc-common.h index 4d53830442a7d..154b9659e9ccb 100644 --- a/src/gc-common.h +++ b/src/gc-common.h @@ -53,6 +53,12 @@ extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure; // malloc wrappers, aligned allocation // =========================================================================== // +// data structure for tracking malloc'd genericmemory. +typedef struct _mallocmemory_t { + jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory + struct _mallocmemory_t *next; +} mallocmemory_t; + #if defined(_OS_WINDOWS_) STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) { diff --git a/src/gc-debug.c b/src/gc-debug.c index 19dd93af5f236..d05fb4b49e9f7 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1105,46 +1105,7 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT -{ - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; -} - -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT -{ - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; -} - -static int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; -} - -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; -} +extern int gc_logging_enabled; void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { diff --git a/src/gc-interface.h b/src/gc-interface.h index e543b4b5879f1..682f22344d69d 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -128,6 +128,13 @@ JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void); // Allocation // ========================================================================= // +// On GCC, this function is inlined when sz is constant (see julia_internal.h) +// In general, this function should implement allocation and should use the specific GC's logic +// to decide whether to allocate a small or a large object. Finally, note that this function +// **must** also set the type of the returning object to be `ty`. The type `ty` may also be used to record +// an allocation of that type in the allocation profiler. +struct _jl_value_t *jl_gc_alloc_(struct _jl_tls_states_t * ptls, size_t sz, void *ty); + // Allocates small objects and increments Julia allocation counterst. Size of the object // header must be included in the object size. The (possibly unused in some implementations) // offset to the arena in which we're allocating is passed in the second parameter, and the @@ -211,6 +218,11 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; +// This function notifies the GC about memory addresses that are set when loading the boot image. +// The GC may use that information to, for instance, determine that such objects should +// be treated as marked and belonged to the old generation in nursery collections. +void jl_gc_notify_image_load(const char* img_data, size_t len); + // ========================================================================= // // Runtime Write-Barriers // ========================================================================= // diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 783129ea97693..8c44b65284386 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -46,7 +46,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { VirtualFree(stkbuf, 0, MEM_RELEASE); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); @@ -81,7 +81,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT return stk; } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { munmap(stkbuf, bufsz); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); diff --git a/src/gc-stock.c b/src/gc-stock.c index d25f8917f302d..4a8c6fe7decc5 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -553,24 +553,6 @@ static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT gc_time_big_end(); } -// tracking Memorys with malloc'd storage - -void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ - // This is **NOT** a GC safe point. - mallocmemory_t *ma; - if (ptls->gc_tls.heap.mafreelist == NULL) { - ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); - } - else { - ma = ptls->gc_tls.heap.mafreelist; - ptls->gc_tls.heap.mafreelist = ma->next; - } - ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); - ma->next = ptls->gc_tls.heap.mallocarrays; - ptls->gc_tls.heap.mallocarrays = ma; -} - - void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; @@ -647,17 +629,6 @@ void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT reset_thread_gc_counts(); } -size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT -{ - const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; - size_t sz = layout->size * m->length; - if (layout->flags.arrayelem_isunion) - // account for isbits Union array selector bytes - sz += m->length; - return sz; -} - - static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT { assert(jl_is_genericmemory(v)); @@ -816,6 +787,29 @@ jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset, int osize) { return jl_gc_small_alloc_inner(ptls, offset, osize); } +// Size does NOT include the type tag!! +inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) +{ + jl_value_t *v; + const size_t allocsz = sz + sizeof(jl_taggedvalue_t); + if (sz <= GC_MAX_SZCLASS) { + int pool_id = jl_gc_szclass(allocsz); + jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; + int osize = jl_gc_sizeclasses[pool_id]; + // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in + // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) + v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); + } + else { + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + v = jl_gc_big_alloc_noinline(ptls, allocsz); + } + jl_set_typeof(v, ty); + maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); + return v; +} + int jl_gc_classify_pools(size_t sz, int *osize) { if (sz > GC_MAX_SZCLASS) @@ -2792,6 +2786,21 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } +int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); +} + +int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + if (jl_n_sweepthreads == 0) { + return 0; + } + int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); + int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; + return tid == concurrent_collector_thread_id; +} + // collector entry point and control _Atomic(uint32_t) jl_gc_disable_counter = 1; @@ -2830,11 +2839,6 @@ JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT *bytes = (num.total_allocd + num.deferred_alloc + num.allocd); } -JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) -{ - return gc_num.total_time; -} - JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { jl_gc_num_t num = gc_num; @@ -3386,13 +3390,6 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq) gc_mark_roots(mq); } -// allocator entry points - -JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) -{ - return jl_gc_alloc_(ptls, sz, ty); -} - // Per-thread initialization void jl_init_thread_heap(jl_ptls_t ptls) { @@ -3674,63 +3671,6 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size return data; } -// allocation wrappers that save the size of allocations, to allow using -// jl_gc_counted_* functions with a libc-compatible API. - -JL_DLLEXPORT void *jl_malloc(size_t sz) -{ - int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); - if (p == NULL) - return NULL; - p[0] = sz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -//_unchecked_calloc does not check for potential overflow of nm*sz -STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { - size_t nmsz = nm*sz; - int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); - if (p == NULL) - return NULL; - p[0] = nmsz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) -{ - if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) - return NULL; - return _unchecked_calloc(nm, sz); -} - -JL_DLLEXPORT void jl_free(void *p) -{ - if (p != NULL) { - int64_t *pp = (int64_t *)p - 2; - size_t sz = pp[0]; - jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); - } -} - -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) -{ - int64_t *pp; - size_t szold; - if (p == NULL) { - pp = NULL; - szold = 0; - } - else { - pp = (int64_t *)p - 2; - szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; - } - int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); - if (pnew == NULL) - return NULL; - pnew[0] = sz; - return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - // allocating blocks for Arrays and Strings JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) @@ -3864,18 +3804,6 @@ jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT return jl_valueof(o); } -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - -JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sz, NULL); -} - JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) { if (jl_is_initialized()) { @@ -4003,14 +3931,14 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) } -JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { - return jl_gc_alloc(ptls, sz, ty); + arraylist_push(&ptls->gc_tls.sweep_objs, obj); } -JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) +void jl_gc_notify_image_load(const char* img_data, size_t len) { - arraylist_push(&ptls->gc_tls.sweep_objs, obj); + // Do nothing } #ifdef __cplusplus diff --git a/src/gc-stock.h b/src/gc-stock.h index 45c93bf4289ae..3f3900b349bcf 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -106,12 +106,6 @@ JL_EXTENSION typedef struct _bigval_t { // must be 64-byte aligned here, in 32 & 64 bit modes } bigval_t; -// data structure for tracking malloc'd genericmemory. -typedef struct _mallocmemory_t { - jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory - struct _mallocmemory_t *next; -} mallocmemory_t; - // pool page metadata typedef struct _jl_gc_pagemeta_t { // next metadata structure in per-thread list @@ -428,21 +422,6 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } -STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); -} - -STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - if (jl_n_sweepthreads == 0) { - return 0; - } - int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); - int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; - return tid == concurrent_collector_thread_id; -} - STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); diff --git a/src/julia.h b/src/julia.h index abb8a57ff13b0..db57db1fbeb38 100644 --- a/src/julia.h +++ b/src/julia.h @@ -850,7 +850,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index f00667d016796..edddb68754fc3 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -513,30 +513,6 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE #define GC_MAX_SZCLASS (2032-sizeof(void*)) static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, ""); - -// Size does NOT include the type tag!! -STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) -{ - jl_value_t *v; - const size_t allocsz = sz + sizeof(jl_taggedvalue_t); - if (sz <= GC_MAX_SZCLASS) { - int pool_id = jl_gc_szclass(allocsz); - jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; - int osize = jl_gc_sizeclasses[pool_id]; - // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in - // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) - v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); - } - else { - if (allocsz < sz) // overflow in adding offs, size was "negative" - jl_throw(jl_memory_exception); - v = jl_gc_big_alloc_noinline(ptls, allocsz); - } - jl_set_typeof(v, ty); - maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); - return v; -} - /* Programming style note: When using jl_gc_alloc, do not JL_GC_PUSH it into a * gc frame, until it has been fully initialized. An uninitialized value in a * gc frame can crash upon encountering the first safepoint. By delaying use of @@ -1074,7 +1050,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern _Atomic(uint32_t) jl_gc_disable_counter; +extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/scheduler.c b/src/scheduler.c index bb2f85b52283f..b85a481588e4f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,9 +80,20 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } +<<<<<<< HEAD // GC functions used extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; +======= +// parallel task runtime +// --- + +JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return cong(max, &ptls->rngseed); +} +>>>>>>> 4f39869d04 (Refactoring to be considered before adding MMTk) // initialize the threading infrastructure // (called only by the main thread) diff --git a/src/stackwalk.c b/src/stackwalk.c index 6aa36fa8b499c..5f28b61c4a8fe 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -5,7 +5,7 @@ utilities for walking the stack and looking up information about code addresses */ #include -#include "gc-stock.h" +#include "gc-common.h" #include "julia.h" #include "julia_internal.h" #include "threading.h" @@ -1294,6 +1294,8 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; +extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; +extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT diff --git a/src/staticdata.c b/src/staticdata.c index 363aa46b62221..e07a5365bf06f 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -654,6 +654,7 @@ static void jl_load_sysimg_so(void) plen = (size_t *)&jl_system_image_size; else jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(sysimg_data, *plen); jl_restore_system_image_data(sysimg_data, *plen); } @@ -3899,6 +3900,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); From d2f2b8d9c477514e93009d0b99e2ffe65bcc9831 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 29 Aug 2024 04:57:59 +0000 Subject: [PATCH 02/38] Removing jl_gc_notify_image_load, since it's a new function and not part of the refactoring --- src/gc-interface.h | 5 ----- src/gc-stock.c | 5 ----- src/staticdata.c | 2 -- 3 files changed, 12 deletions(-) diff --git a/src/gc-interface.h b/src/gc-interface.h index 682f22344d69d..25ffed4524f0c 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -218,11 +218,6 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; -// This function notifies the GC about memory addresses that are set when loading the boot image. -// The GC may use that information to, for instance, determine that such objects should -// be treated as marked and belonged to the old generation in nursery collections. -void jl_gc_notify_image_load(const char* img_data, size_t len); - // ========================================================================= // // Runtime Write-Barriers // ========================================================================= // diff --git a/src/gc-stock.c b/src/gc-stock.c index 4a8c6fe7decc5..9b633cacd7870 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3936,11 +3936,6 @@ JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *o arraylist_push(&ptls->gc_tls.sweep_objs, obj); } -void jl_gc_notify_image_load(const char* img_data, size_t len) -{ - // Do nothing -} - #ifdef __cplusplus } #endif diff --git a/src/staticdata.c b/src/staticdata.c index e07a5365bf06f..363aa46b62221 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -654,7 +654,6 @@ static void jl_load_sysimg_so(void) plen = (size_t *)&jl_system_image_size; else jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); - jl_gc_notify_image_load(sysimg_data, *plen); jl_restore_system_image_data(sysimg_data, *plen); } @@ -3900,7 +3899,6 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); - jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); From a42cb6410cf4f3e1773b0e41ecb5c696bc9cf836 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 2 Sep 2024 01:27:08 +0000 Subject: [PATCH 03/38] Moving gc_enable code to gc-common.c --- src/gc-common.c | 30 ++++++++++++++++++++++++++++++ src/gc-stock.c | 30 ------------------------------ 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 2ec167caa667a..03c046bc300f2 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -613,6 +613,36 @@ void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, i ptls->gc_tls.heap.mallocarrays = ma; } +// collector entry point and control +_Atomic(uint32_t) jl_gc_disable_counter = 1; + +JL_DLLEXPORT int jl_gc_enable(int on) +{ + jl_ptls_t ptls = jl_current_task->ptls; + int prev = !ptls->disable_gc; + ptls->disable_gc = (on == 0); + if (on && !prev) { + // disable -> enable + if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { + gc_num.allocd += gc_num.deferred_alloc; + gc_num.deferred_alloc = 0; + } + } + else if (prev && !on) { + // enable -> disable + jl_atomic_fetch_add(&jl_gc_disable_counter, 1); + // check if the GC is running and wait for it to finish + jl_gc_safepoint_(ptls); + } + return prev; +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + int gc_logging_enabled = 0; JL_DLLEXPORT void jl_enable_gc_logging(int enable) { diff --git a/src/gc-stock.c b/src/gc-stock.c index 9b633cacd7870..61fc8d4e83a3a 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -2801,36 +2801,6 @@ int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT return tid == concurrent_collector_thread_id; } -// collector entry point and control -_Atomic(uint32_t) jl_gc_disable_counter = 1; - -JL_DLLEXPORT int jl_gc_enable(int on) -{ - jl_ptls_t ptls = jl_current_task->ptls; - int prev = !ptls->disable_gc; - ptls->disable_gc = (on == 0); - if (on && !prev) { - // disable -> enable - if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { - gc_num.allocd += gc_num.deferred_alloc; - gc_num.deferred_alloc = 0; - } - } - else if (prev && !on) { - // enable -> disable - jl_atomic_fetch_add(&jl_gc_disable_counter, 1); - // check if the GC is running and wait for it to finish - jl_gc_safepoint_(ptls); - } - return prev; -} - -JL_DLLEXPORT int jl_gc_is_enabled(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; -} - JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT { jl_gc_num_t num = gc_num; From 92563918292056178d6f6ed12c58a9f998ef2d54 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 16 Sep 2024 06:38:02 +0000 Subject: [PATCH 04/38] Addressing PR comments --- src/gc-common.c | 134 +++++++++++++++++++++++++------------------ src/gc-common.h | 6 ++ src/gc-debug.c | 2 - src/gc-interface.h | 30 +--------- src/gc-stock.c | 18 +----- src/gc-stock.h | 15 +++++ src/julia.h | 2 +- src/julia_internal.h | 4 +- src/stackwalk.c | 10 +--- 9 files changed, 110 insertions(+), 111 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 03c046bc300f2..046feae6aa4c5 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -491,15 +491,9 @@ int gc_n_threads; jl_ptls_t* gc_all_tls_states; // =========================================================================== // -// MISC +// Allocation // =========================================================================== // -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) { return jl_gc_alloc(ptls, sz, ty); @@ -575,17 +569,9 @@ JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) return jl_gc_alloc_(ptls, sz, ty); } -const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 -JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT -{ - return jl_buff_tag; -} - -// callback for passing OOM errors from gmp -JL_DLLEXPORT void jl_throw_out_of_memory_error(void) -{ - jl_throw(jl_memory_exception); -} +// =========================================================================== // +// Generic Memory +// =========================================================================== // size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT { @@ -613,6 +599,66 @@ void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, i ptls->gc_tls.heap.mallocarrays = ma; } +// =========================================================================== // +// GC Debug +// =========================================================================== // + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + +// =========================================================================== // +// GC Control +// =========================================================================== // + +JL_DLLEXPORT uint32_t jl_get_gc_disable_counter(void) { + return jl_atomic_load_acquire(&jl_gc_disable_counter); +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + + // collector entry point and control _Atomic(uint32_t) jl_gc_disable_counter = 1; @@ -637,54 +683,30 @@ JL_DLLEXPORT int jl_gc_enable(int on) return prev; } -JL_DLLEXPORT int jl_gc_is_enabled(void) +// =========================================================================== // +// MISC +// =========================================================================== // + +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) { jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; -} - -int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; + return jl_gc_new_weakref_th(ptls, value); } -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; +JL_DLLEXPORT jl_datatype_t **jl_get_ijl_small_typeof(void) { + return ijl_small_typeof; } -// gc-debug common functions -// --- - -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 +JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; + return jl_buff_tag; } -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +// callback for passing OOM errors from gmp +JL_DLLEXPORT void jl_throw_out_of_memory_error(void) { - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; + jl_throw(jl_memory_exception); } #ifdef __cplusplus diff --git a/src/gc-common.h b/src/gc-common.h index 154b9659e9ccb..32b7470b13a58 100644 --- a/src/gc-common.h +++ b/src/gc-common.h @@ -179,4 +179,10 @@ JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o); extern int gc_n_threads; extern jl_ptls_t* gc_all_tls_states; +// =========================================================================== // +// Logging +// =========================================================================== // + +extern int gc_logging_enabled; + #endif // JL_GC_COMMON_H diff --git a/src/gc-debug.c b/src/gc-debug.c index d05fb4b49e9f7..7c479484cde45 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1105,8 +1105,6 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -extern int gc_logging_enabled; - void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { return; diff --git a/src/gc-interface.h b/src/gc-interface.h index 25ffed4524f0c..0e9ce32697f35 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -94,6 +94,8 @@ JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem); // should run a collection cycle again (e.g. a full mark right after a full sweep to ensure // we do a full heap traversal). JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection); +// Returns whether the thread with `tid` is a collector thread +JL_DLLEXPORT int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT; // ========================================================================= // // Metrics @@ -162,26 +164,6 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz); JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz); // Wrapper around Libc realloc that updates Julia allocation counters. JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz); -// Wrapper around Libc malloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_malloc(size_t sz); -// Wrapper around Libc calloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz); -// Wrapper around Libc free that takes a pointer to the payload of a memory region allocated -// with jl_malloc or jl_calloc, and uses the size information stored in the first machine -// words of the memory buffer update Julia allocation counters, and then frees the -// corresponding memory buffer. -JL_DLLEXPORT void jl_free(void *p); -// Wrapper around Libc realloc that takes a memory region allocated with jl_malloc or -// jl_calloc, and uses the size information stored in the first machine words of the memory -// buffer to update Julia allocation counters, reallocating the corresponding memory buffer -// in the end. -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz); // Wrapper around Libc malloc that's used to dynamically allocate memory for Arrays and // Strings. It increments Julia allocation counters and should check whether we're close to // the Julia heap target, and therefore, whether we should run a collection. Note that this @@ -195,14 +177,6 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz); // thread-local allocator of the thread referenced by the first jl_ptls_t argument. JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref_th(struct _jl_tls_states_t *ptls, struct _jl_value_t *value); -// Allocates a new weak-reference, assigns its value and increments Julia allocation -// counters. If thread-local allocators are used, then this function should allocate in the -// thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref(struct _jl_value_t *value); -// Allocates an object whose size is specified by the function argument and increments Julia -// allocation counters. If thread-local allocators are used, then this function should -// allocate in the thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_value_t *jl_gc_allocobj(size_t sz); // Permanently allocates a memory slot of the size specified by the first parameter. This // block of memory is allocated in an immortal region that is never swept. The second // parameter specifies whether the memory should be filled with zeros. The third and fourth diff --git a/src/gc-stock.c b/src/gc-stock.c index 61fc8d4e83a3a..3ff37566dc6c7 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -2786,19 +2786,8 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } -int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); -} - -int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - if (jl_n_sweepthreads == 0) { - return 0; - } - int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); - int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; - return tid == concurrent_collector_thread_id; +int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT { + return gc_is_parallel_collector_thread(tid) || gc_is_concurrent_collector_thread(tid); } JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT @@ -3182,8 +3171,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // free empty GC state for threads that have exited if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { // GC threads should never exit - assert(!gc_is_parallel_collector_thread(t_i)); - assert(!gc_is_concurrent_collector_thread(t_i)); + assert(!gc_is_collector_thread(t_i)); jl_thread_heap_t *heap = &ptls2->gc_tls.heap; if (heap->weak_refs.len == 0) small_arraylist_free(&heap->weak_refs); diff --git a/src/gc-stock.h b/src/gc-stock.h index 3f3900b349bcf..50eca3aadbd86 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -422,6 +422,21 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } +STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); +} + +STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + if (jl_n_sweepthreads == 0) { + return 0; + } + int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); + int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; + return tid == concurrent_collector_thread_id; +} + STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); diff --git a/src/julia.h b/src/julia.h index db57db1fbeb38..abb8a57ff13b0 100644 --- a/src/julia.h +++ b/src/julia.h @@ -850,7 +850,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index edddb68754fc3..e677f40907dfd 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -367,6 +367,8 @@ extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT size_t jl_typeinf_world; extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED; +extern void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; + JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; @@ -1050,7 +1052,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; +extern _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/stackwalk.c b/src/stackwalk.c index 5f28b61c4a8fe..a1de3a6d61a07 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1294,8 +1294,6 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; -extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; -extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT @@ -1304,12 +1302,8 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states); for (size_t i = 0; i < nthreads; i++) { jl_ptls_t ptls2 = allstates[i]; - if (gc_is_parallel_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for parallel GC thread %zu\n", i + 1); - continue; - } - if (gc_is_concurrent_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for concurrent GC thread %zu\n", i + 1); + if (gc_is_collector_thread(i)) { + jl_safe_printf("==== Skipping backtrace for parallel/concurrent GC thread %zu\n", i + 1); continue; } if (ptls2 == NULL) { From ec398e1a98cf713a77f908a459ed37fd4b25af27 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 19 Sep 2024 04:18:13 +0000 Subject: [PATCH 05/38] Push resolution of merge conflict --- src/scheduler.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/scheduler.c b/src/scheduler.c index b85a481588e4f..bb2f85b52283f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,20 +80,9 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } -<<<<<<< HEAD // GC functions used extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; -======= -// parallel task runtime -// --- - -JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return cong(max, &ptls->rngseed); -} ->>>>>>> 4f39869d04 (Refactoring to be considered before adding MMTk) // initialize the threading infrastructure // (called only by the main thread) From 68e5e11a229f253ec6de966a321bd9d3de453a3b Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 01:10:31 +0000 Subject: [PATCH 06/38] Removing jl_gc_mark_queue_obj_explicit extern definition from scheduler.c --- src/scheduler.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/scheduler.c b/src/scheduler.c index bb2f85b52283f..7e23f654c2566 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,10 +80,6 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } -// GC functions used -extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, - jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; - // initialize the threading infrastructure // (called only by the main thread) void jl_init_threadinginfra(void) From c23f0db8347f475e1eb2b37261dd4816537210fa Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 02:50:25 +0000 Subject: [PATCH 07/38] Don't need the getter function since it's possible to use jl_small_typeof directly --- src/gc-common.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 046feae6aa4c5..417f12f26d64d 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -693,10 +693,6 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) return jl_gc_new_weakref_th(ptls, value); } -JL_DLLEXPORT jl_datatype_t **jl_get_ijl_small_typeof(void) { - return ijl_small_typeof; -} - const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { From 4bfcfe5df056bb5066a545e29c29463722678892 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 27 Aug 2024 06:47:41 +0000 Subject: [PATCH 08/38] WIP: Adding support for MMTk/Immix --- Make.inc | 47 ++ contrib/refresh_checksums.mk | 2 +- src/Makefile | 43 +- src/builtins.c | 1 + src/gc-common.c | 70 +++ src/gc-debug.c | 4 +- src/gc-heap-snapshot.cpp | 1 - src/gc-interface.h | 3 + src/gc-mmtk.c | 843 +++++++++++++++++++++++++++++++++++ src/gc-mmtk.h | 34 ++ src/gc-page-profiler.c | 4 +- src/gc-pages.c | 4 +- src/gc-stock.c | 14 +- src/gc-stock.h | 18 +- src/gc-tls-mmtk.h | 49 ++ src/gc-tls.h | 4 + src/julia.h | 2 +- src/julia_internal.h | 2 +- src/julia_threads.h | 4 + src/stackwalk.c | 2 + src/staticdata.c | 2 + src/threading.c | 4 + 22 files changed, 1123 insertions(+), 34 deletions(-) create mode 100644 src/gc-mmtk.c create mode 100644 src/gc-mmtk.h create mode 100644 src/gc-tls-mmtk.h diff --git a/Make.inc b/Make.inc index f078a0c84f806..039755ce34098 100644 --- a/Make.inc +++ b/Make.inc @@ -86,6 +86,9 @@ HAVE_SSP := 0 WITH_GC_VERIFY := 0 WITH_GC_DEBUG_ENV := 0 +# Use MMTk GC +WITH_MMTK ?= 0 + # Enable DTrace support WITH_DTRACE := 0 @@ -790,6 +793,44 @@ JCXXFLAGS += -DGC_DEBUG_ENV JCFLAGS += -DGC_DEBUG_ENV endif +ifeq ($(WITH_MMTK), 1) +ifeq (${MMTK_JULIA_DIR},) +$(error MMTK_JULIA_DIR must be set to use MMTk) +endif +JCXXFLAGS += -DMMTK_GC +JCFLAGS += -DMMTK_GC +ifeq (${MMTK_BUILD},) +ifeq (debug,$(findstring debug,$(MAKECMDGOALS))) +MMTK_BUILD = debug +else +MMTK_BUILD = release +endif +endif +ifeq (${MMTK_PLAN},Immix) +JCXXFLAGS += -DMMTK_PLAN_IMMIX +JCFLAGS += -DMMTK_PLAN_IMMIX +endif +ifeq (${MMTK_PLAN},StickyImmix) +JCXXFLAGS += -DMMTK_PLAN_STICKYIMMIX +JCFLAGS += -DMMTK_PLAN_STICKYIMMIX +endif +MMTK_DIR = ${MMTK_JULIA_DIR}/mmtk +MMTK_API_INC = $(MMTK_DIR)/api +MMTK_JULIA_INC = ${MMTK_JULIA_DIR}/julia +ifeq ($(OS),Linux) +MMTK_LIB_NAME := libmmtk_julia.so +else +$(error "Unsupported OS for MMTk") +endif +MMTK_LIB_SRC := $(MMTK_DIR)/target/$(MMTK_BUILD)/$(MMTK_LIB_NAME) +MMTK_LIB_DST := $(BUILDROOT)/usr/lib/$(MMTK_LIB_NAME) +MMTK_LIB := -lmmtk_julia +LDFLAGS += -Wl,-rpath=$(MMTK_DIR)/target/$(MMTK_BUILD)/ +else +MMTK_JULIA_INC := +MMTK_LIB := +endif + ifeq ($(WITH_DTRACE), 1) JCXXFLAGS += -DUSE_DTRACE JCFLAGS += -DUSE_DTRACE @@ -1777,6 +1818,9 @@ PRINT_PERL = printf ' %b %b\n' $(PERLCOLOR)PERL$(ENDCOLOR) $(BINCOLOR)$(GOAL) PRINT_FLISP = printf ' %b %b\n' $(FLISPCOLOR)FLISP$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1) PRINT_JULIA = printf ' %b %b\n' $(JULIACOLOR)JULIA$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1) PRINT_DTRACE = printf ' %b %b\n' $(DTRACECOLOR)DTRACE$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1) +ifeq ($(WITH_MMTK), 1) +PRINT_MMTK = printf ' %b %b\n' $(LINKCOLOR)MMTK$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1) +endif else QUIET_MAKE = @@ -1787,6 +1831,9 @@ PRINT_PERL = echo '$(subst ','\'',$(1))'; $(1) PRINT_FLISP = echo '$(subst ','\'',$(1))'; $(1) PRINT_JULIA = echo '$(subst ','\'',$(1))'; $(1) PRINT_DTRACE = echo '$(subst ','\'',$(1))'; $(1) +ifeq ($(WITH_MMTK), 1) +PRINT_MMTK = echo '$(subst ','\'',$(1))'; $(1) +endif endif diff --git a/contrib/refresh_checksums.mk b/contrib/refresh_checksums.mk index f67088141ccd4..bf99c0fad9da2 100644 --- a/contrib/refresh_checksums.mk +++ b/contrib/refresh_checksums.mk @@ -24,7 +24,7 @@ CLANG_TRIPLETS=$(filter %-darwin %-freebsd,$(TRIPLETS)) NON_CLANG_TRIPLETS=$(filter-out %-darwin %-freebsd,$(TRIPLETS)) # These are the projects currently using BinaryBuilder; both GCC-expanded and non-GCC-expanded: -BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline libtracyclient +BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline libtracyclient libmmtk_julia BB_GCC_EXPANDED_PROJECTS=openblas csl BB_CXX_EXPANDED_PROJECTS=gmp llvm clang llvm-tools lld # These are non-BB source-only deps diff --git a/src/Makefile b/src/Makefile index 52e673aa6cc1a..c01848c16adf7 100644 --- a/src/Makefile +++ b/src/Makefile @@ -29,6 +29,10 @@ ifeq ($(USECLANG),1) FLAGS += -Wno-return-type-c-linkage -Wno-atomic-alignment endif +ifeq ($(WITH_MMTK), 1) +FLAGS += -I$(MMTK_API_INC) -I$(MMTK_JULIA_INC) +endif + FLAGS += -DJL_BUILD_ARCH='"$(ARCH)"' ifeq ($(OS),WINNT) FLAGS += -DJL_BUILD_UNAME='"NT"' @@ -44,8 +48,8 @@ SRCS := \ jltypes gf typemap smallintset ast builtins module interpreter symbol \ dlload sys init task array genericmemory staticdata toplevel jl_uv datatype \ simplevector runtime_intrinsics precompile jloptions mtarraylist \ - threading scheduler stackwalk gc-common gc-stock gc-debug gc-pages gc-stacks gc-alloc-profiler gc-page-profiler method \ - jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \ + threading scheduler stackwalk gc-common gc-stock gc-mmtk gc-debug gc-pages gc-stacks gc-alloc-profiler gc-page-profiler \ + method jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \ crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall engine RT_LLVMLINK := @@ -103,7 +107,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0) UV_HEADERS += uv.h UV_HEADERS += uv/*.h endif -PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) +PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h gc-tls-mmtk.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) ifeq ($(OS),WINNT) PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h) endif @@ -168,8 +172,8 @@ LIBJULIA_PATH_REL := libjulia endif COMMON_LIBPATHS := -L$(build_libdir) -L$(build_shlibdir) -RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI) -CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI) +RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI) $(MMTK_LIB) +CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI) $(MMTK_LIB) RT_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a -ljulia-debug $(RT_LIBS) CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug RT_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport.a -ljulia $(RT_LIBS) @@ -178,6 +182,15 @@ CG_RELEASE_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia -ljulia-internal OBJS := $(SRCS:%=$(BUILDDIR)/%.o) DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj) +ifeq ($(WITH_MMTK), 1) +MMTK_SRCS := mmtk_julia +MMTK_OBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.o) $(MMTK_LIB_DST) +MMTK_DOBJS := $(MMTK_SRCS:%=$(MMTK_JULIA_INC)/%.dbg.obj) $(MMTK_LIB_DST) +else +MMTK_OBJS := +MMTK_DOBJS := +endif + CODEGEN_OBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.o) CODEGEN_DOBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.dbg.obj) @@ -226,6 +239,16 @@ $(BUILDDIR)/%.h.gen : $(SRCDIR)/%.d sed 's/JULIA_/JL_PROBE_/' $@ > $@.tmp mv $@.tmp $@ +# Compile files from the binding side and copy so file into lib folder +ifeq ($(WITH_MMTK), 1) +$(MMTK_JULIA_INC)/%.o: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC) + @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@) +$(MMTK_JULIA_INC)/%.dbg.obj: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC) + @$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -c $< -o $@) +$(MMTK_LIB_DST): $(MMTK_LIB_SRC) + @$(call PRINT_MMTK, cp $< $@) +endif + $(BUILDDIR)/jl_internal_funcs.inc: $(SRCDIR)/jl_exported_funcs.inc # Generate `.inc` file that contains a list of `#define` macros to rename functions defined in `libjulia-internal` # to have a `ijl_` prefix instead of `jl_`, to denote that they are coming from `libjulia-internal`. This avoids @@ -318,6 +341,7 @@ $(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,de $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h +$(BUILDDIR)/gc-mmtk.o $(BUILDDIR)/mmtk-gc.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(BUILDDIR)/gc-stock.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(SRCDIR)/gc-page-profiler.h $(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc-heap-snapshot.h $(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc-alloc-profiler.h @@ -389,14 +413,14 @@ $(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in $(JULIAHOME)/VERSION $(LLVM_ sed <'$<' >'$@' -e "s/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/" \ -e "s/@LLVM_SHLIB_SYMBOL_VERSION@/$(LLVM_SHLIB_SYMBOL_VERSION)/" -$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV) - @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(RPATH_LIB) -o $@ \ +$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(OBJS) $(MMTK_OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV) + @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(MMTK_OBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(BOLT_LDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@ $(DSYMUTIL) $@ -$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV) - @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(RPATH_LIB) -o $@ \ +$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(DOBJS) $(MMTK_DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV) + @$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(MMTK_DOBJS) $(RPATH_LIB) -o $@ \ $(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))) @$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@ $(DSYMUTIL) $@ @@ -455,6 +479,7 @@ clean: -rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a $(BUILDDIR)/*.h.gen -rm -f $(BUILDDIR)/julia.expmap -rm -f $(BUILDDIR)/julia_version.h + -rm -f $(MMTK_OBJS) $(MMTK_DOBJS) clean-flisp: -$(MAKE) -C $(SRCDIR)/flisp clean BUILDDIR='$(abspath $(BUILDDIR)/flisp)' diff --git a/src/builtins.c b/src/builtins.c index 96c4cec0f5087..4a778035de405 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -22,6 +22,7 @@ #include #include "julia.h" #include "julia_internal.h" +#include "gc-interface.h" #include "builtin_proto.h" #include "intrinsics.h" #include "julia_assert.h" diff --git a/src/gc-common.c b/src/gc-common.c index 417f12f26d64d..17f6f1330743b 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -705,6 +705,76 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void) jl_throw(jl_memory_exception); } +size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT +{ + const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; + size_t sz = layout->size * m->length; + if (layout->flags.arrayelem_isunion) + // account for isbits Union array selector bytes + sz += m->length; + return sz; +} + +// tracking Memorys with malloc'd storage +void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ + // This is **NOT** a GC safe point. + mallocmemory_t *ma; + if (ptls->gc_tls.heap.mafreelist == NULL) { + ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); + } + else { + ma = ptls->gc_tls.heap.mafreelist; + ptls->gc_tls.heap.mafreelist = ma->next; + } + ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); + ma->next = ptls->gc_tls.heap.mallocarrays; + ptls->gc_tls.heap.mallocarrays = ma; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + +// gc-debug common functions +// --- + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + #ifdef __cplusplus } #endif diff --git a/src/gc-debug.c b/src/gc-debug.c index 7c479484cde45..ecd7f2328cada 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1,5 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license - +#ifndef MMTK_GC #include "gc-common.h" #include "gc-stock.h" #include "julia.h" @@ -1129,3 +1129,5 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect #ifdef __cplusplus } #endif + +#endif // !MMTK_GC diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index fcda11dad4f8a..d3cb1e98d84a4 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -1,5 +1,4 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license - #include "gc-heap-snapshot.h" #include "julia.h" diff --git a/src/gc-interface.h b/src/gc-interface.h index 0e9ce32697f35..72a57f4944156 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -192,6 +192,9 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; +// FIXME: add description here +void jl_gc_notify_image_load(const char* img_data, size_t len); + // ========================================================================= // // Runtime Write-Barriers // ========================================================================= // diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c new file mode 100644 index 0000000000000..e459b0f12c41d --- /dev/null +++ b/src/gc-mmtk.c @@ -0,0 +1,843 @@ +#ifdef MMTK_GC + +#include "mmtk_julia.h" +#include "gc-common.h" +#include "mmtkMutator.h" +#include "gc-mmtk.h" +#include "threading.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// For now we're using the same values as stock-gc. However +// for the heap size we use 70% of the free memory available +// since that is actually a hard limit in MMTk. + +// max_total_memory is a suggestion. We try very hard to stay +// under this limit, but we will go above it rather than halting. +#ifdef _P64 +typedef uint64_t memsize_t; +static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*); +// We expose this to the user/ci as jl_gc_set_max_memory +static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024; +#else +typedef uint32_t memsize_t; +static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*); +// Work really hard to stay within 2GB +// Alternative is to risk running out of address space +// on 32 bit architectures. +#define MAX32HEAP 1536 * 1024 * 1024 +static memsize_t max_total_memory = (memsize_t) MAX32HEAP; +#endif + +void jl_gc_init(void) { + // TODO: use jl_options.heap_size_hint to set MMTk's fixed heap size? (see issue: https://github.com/mmtk/mmtk-julia/issues/167) + + JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock"); + + arraylist_new(&to_finalize, 0); + arraylist_new(&finalizer_list_marked, 0); + + gc_num.allocd = 0; + gc_num.max_pause = 0; + gc_num.max_memory = 0; + + long long min_heap_size; + long long max_heap_size; + char* min_size_def = getenv("MMTK_MIN_HSIZE"); + char* min_size_gb = getenv("MMTK_MIN_HSIZE_G"); + + char* max_size_def = getenv("MMTK_MAX_HSIZE"); + char* max_size_gb = getenv("MMTK_MAX_HSIZE_G"); + + // default min heap currently set as Julia's default_collect_interval + if (min_size_def != NULL) { + char *p; + double min_size = strtod(min_size_def, &p); + min_heap_size = (long) 1024 * 1024 * min_size; + } else if (min_size_gb != NULL) { + char *p; + double min_size = strtod(min_size_gb, &p); + min_heap_size = (long) 1024 * 1024 * 1024 * min_size; + } else { + min_heap_size = default_collect_interval; + } + + // default max heap currently set as 70% the free memory in the system + if (max_size_def != NULL) { + char *p; + double max_size = strtod(max_size_def, &p); + max_heap_size = (long) 1024 * 1024 * max_size; + } else if (max_size_gb != NULL) { + char *p; + double max_size = strtod(max_size_gb, &p); + max_heap_size = (long) 1024 * 1024 * 1024 * max_size; + } else { + max_heap_size = uv_get_free_memory() * 70 / 100; + } + + // Assert that the number of stock GC threads is 0; MMTK uses the number of threads in jl_options.ngcthreads + assert(jl_n_gcthreads == 0); + + // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined + int copy_stacks; + +#ifdef COPY_STACKS + copy_stacks = 1; +#else + copy_stacks = 0; +#endif + + mmtk_julia_copy_stack_check(copy_stacks); + + // if only max size is specified initialize MMTk with a fixed size heap + // TODO: We just assume mark threads means GC threads, and ignore the number of concurrent sweep threads. + // If the two values are the same, we can use either. Otherwise, we need to be careful. + uintptr_t gcthreads = jl_options.nmarkthreads; + if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) { + mmtk_gc_init(0, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); + } else { + mmtk_gc_init(min_heap_size, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag); + } +} + +void jl_start_gc_threads(void) { + jl_ptls_t ptls = jl_current_task->ptls; + mmtk_initialize_collection((void *)ptls); + // int nthreads = jl_atomic_load_relaxed(&jl_n_threads); + // int ngcthreads = jl_n_gcthreads; + // int nmutator_threads = nthreads - ngcthreads; + // printf("nthreads = %d, ngcthreads = %d, nmutator_threads = %d\n", nthreads, ngcthreads, nmutator_threads); +} + +void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT { + jl_thread_heap_t *heap = &ptls->gc_tls.heap; + small_arraylist_new(&heap->weak_refs, 0); + small_arraylist_new(&heap->live_tasks, 0); + for (int i = 0; i < JL_N_STACK_POOLS; i++) + small_arraylist_new(&heap->free_stacks[i], 0); + heap->mallocarrays = NULL; + heap->mafreelist = NULL; + arraylist_new(&ptls->finalizers, 0); + // Clear the malloc sz count + jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, 0); + // Create mutator + MMTk_Mutator mmtk_mutator = mmtk_bind_mutator((void *)ptls, ptls->tid); + // Copy the mutator to the thread local storage + memcpy(&ptls->gc_tls.mmtk_mutator, mmtk_mutator, sizeof(MMTkMutatorContext)); + // Call post_bind to maintain a list of active mutators and to reclaim the old mutator (which is no longer needed) + mmtk_post_bind_mutator(&ptls->gc_tls.mmtk_mutator, mmtk_mutator); + memset(&ptls->gc_tls.gc_num, 0, sizeof(ptls->gc_tls.gc_num)); +} + +void jl_free_thread_gc_state(struct _jl_tls_states_t *ptls) { + mmtk_destroy_mutator(&ptls->gc_tls.mmtk_mutator); +} + +// FIXME: mmtk uses the same code as stock to enable/disable the GC +// Should this be moved to gc-common.c? + +_Atomic(uint32_t) jl_gc_disable_counter = 1; + +JL_DLLEXPORT int jl_gc_enable(int on) { + jl_ptls_t ptls = jl_current_task->ptls; + int prev = !ptls->disable_gc; + ptls->disable_gc = (on == 0); + if (on && !prev) { + // disable -> enable + if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { + gc_num.allocd += gc_num.deferred_alloc; + gc_num.deferred_alloc = 0; + } + } + else if (prev && !on) { + // enable -> disable + jl_atomic_fetch_add(&jl_gc_disable_counter, 1); + // check if the GC is running and wait for it to finish + jl_gc_safepoint_(ptls); + } + return prev; +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) { + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + +JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem) { + // MMTk currently does not allow setting the heap size at runtime +} + +JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) { + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { + size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval; + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); + jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); + return; + } + mmtk_handle_user_collection_request(ptls, collection); +} + +// same as above, some of these are identical to the implementation in gc stock +static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT +{ + int gc_n_threads; + jl_ptls_t* gc_all_tls_states; + gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); + gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls = gc_all_tls_states[i]; + if (ptls) { + dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval); + dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc); + dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc); + dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc); + dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc); + dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + if (update_heap) { + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + } + } + } +} + + +void reset_thread_gc_counts(void) JL_NOTSAFEPOINT +{ + int gc_n_threads; + jl_ptls_t* gc_all_tls_states; + gc_n_threads = jl_atomic_load_acquire(&jl_n_threads); + gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states); + for (int i = 0; i < gc_n_threads; i++) { + jl_ptls_t ptls = gc_all_tls_states[i]; + if (ptls != NULL) { + // don't reset `pool_live_bytes` here + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + } + } +} + +// weak references +// --- +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value) +{ + jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); + wr->value = value; // NOTE: wb not needed here + mmtk_add_weak_candidate(wr); + return wr; +} + + +// allocation +int jl_gc_classify_pools(size_t sz, int *osize) +{ + if (sz > GC_MAX_SZCLASS) + return -1; // call big alloc function + size_t allocsz = sz + sizeof(jl_taggedvalue_t); + *osize = LLT_ALIGN(allocsz, 16); + return 0; // use MMTk's fastpath logic +} + +int64_t last_gc_total_bytes = 0; +int64_t last_live_bytes = 0; // live_bytes at last collection +int64_t live_bytes = 0; + +// Retrieves Julia's `GC_Num` (structure that stores GC statistics). +JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { + jl_gc_num_t num = gc_num; + combine_thread_gc_counts(&num, 0); + return num; +} + +JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT { + int64_t oldtb = last_gc_total_bytes; + int64_t newtb; + jl_gc_get_total_bytes(&newtb); + last_gc_total_bytes = newtb; + return newtb - oldtb; +} + +JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT +{ + int64_t oldtb = last_gc_total_bytes; + int64_t newtb; + jl_gc_get_total_bytes(&newtb); + last_gc_total_bytes = newtb - offset; + return newtb - oldtb; +} + +JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void) { + return 0; +} + +void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT +{ + jl_ptls_t ptls = jl_current_task->ptls; + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); +} + +void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT +{ +} + +int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT +{ + jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, inc); + return live_bytes += inc; +} + +void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT +{ + combine_thread_gc_counts(&gc_num, 0); + inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd); + gc_num.allocd = 0; + gc_num.deferred_alloc = 0; + reset_thread_gc_counts(); +} + +JL_DLLEXPORT int64_t jl_gc_live_bytes(void) { + return last_live_bytes; +} + +JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT +{ + jl_gc_num_t num = gc_num; + combine_thread_gc_counts(&num, 0); + // Sync this logic with `base/util.jl:GC_Diff` + *bytes = (num.total_allocd + num.deferred_alloc + num.allocd); +} + +JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void) +{ + // FIXME: should probably return MMTk's heap size + return max_total_memory; +} + +extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr); +extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr); +extern void* mmtk_alloc(void* mutator, size_t size, size_t align, size_t offset, int allocator); +extern void mmtk_post_alloc(void* mutator, void* refer, size_t bytes, int allocator); + + +extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS; +extern const void* MMTK_SIDE_VO_BIT_BASE_ADDRESS; + +// These need to be constants. + +#define MMTK_OBJECT_BARRIER (1) +// Stickyimmix needs write barrier. Immix does not need write barrier. +#ifdef MMTK_PLAN_IMMIX +#define MMTK_NEEDS_WRITE_BARRIER (0) +#endif +#ifdef MMTK_PLAN_STICKYIMMIX +#define MMTK_NEEDS_WRITE_BARRIER (1) +#endif + +#ifdef MMTK_CONSERVATIVE_SCAN +#define MMTK_NEEDS_VO_BIT (1) +#else +#define MMTK_NEEDS_VO_BIT (0) +#endif + +#define MMTK_DEFAULT_IMMIX_ALLOCATOR (0) +#define MMTK_IMMORTAL_BUMP_ALLOCATOR (0) + +// Directly call into MMTk for write barrier (debugging only) +inline void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_post(&ptls->gc_tls.mmtk_mutator, parent, ptr); +} + +// Fastpath. Return 1 if we should go to slowpath +inline int mmtk_gc_wb_fast_check(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { + intptr_t addr = (intptr_t) (void*) parent; + uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); + intptr_t shift = (addr >> 3) & 0b111; + uint8_t byte_val = *meta_addr; + return ((byte_val >> shift) & 1) == 1; + } else { + return 0; + } +} + +// Slowpath. +inline void mmtk_gc_wb_slow(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { + jl_task_t *ct = jl_current_task; + jl_ptls_t ptls = ct->ptls; + mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, ptr); + } +} + +inline void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT +{ + if (mmtk_gc_wb_fast_check(parent, ptr)) { + mmtk_gc_wb_slow(parent, ptr); + } +} + +inline void mmtk_gc_wb_binding(const void *bnd, const void *val) JL_NOTSAFEPOINT +{ + if (mmtk_gc_wb_fast_check(bnd, val)) { + jl_astaggedvalue(bnd)->bits.gc = 2; // to indicate that the buffer is a binding + mmtk_gc_wb_slow(bnd, val); + } +} + +#define MMTK_MIN_ALIGNMENT 4 +// MMTk assumes allocation size is aligned to min alignment. +inline size_t mmtk_align_alloc_sz(size_t sz) JL_NOTSAFEPOINT +{ + return (sz + MMTK_MIN_ALIGNMENT - 1) & ~(MMTK_MIN_ALIGNMENT - 1); +} + +inline void* bump_alloc_fast(MMTkMutatorContext* mutator, uintptr_t* cursor, uintptr_t limit, size_t size, size_t align, size_t offset, int allocator) { + intptr_t delta = (-offset - *cursor) & (align - 1); + uintptr_t result = *cursor + (uintptr_t)delta; + + if (__unlikely(result + size > limit)) { + return (void*) mmtk_alloc(mutator, size, align, offset, allocator); + } else{ + *cursor = result + size; + return (void*)result; + } +} + +inline void* mmtk_immix_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { + ImmixAllocator* allocator = &mutator->allocators.immix[MMTK_DEFAULT_IMMIX_ALLOCATOR]; + return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (intptr_t)allocator->limit, size, align, offset, 0); +} + +inline void mmtk_immix_post_alloc_slow(MMTkMutatorContext* mutator, void* obj, size_t size) { + mmtk_post_alloc(mutator, obj, size, 0); +} + +inline void mmtk_set_vo_bit(void* obj) { + intptr_t addr = (intptr_t) obj; + intptr_t shift = (addr >> 3) & 0b111; + uint8_t* vo_meta_addr = (uint8_t*) (MMTK_SIDE_VO_BIT_BASE_ADDRESS) + (addr >> 6); + uint8_t new_val = (*vo_meta_addr) | (1 << shift); + (*vo_meta_addr) = new_val; +} + +inline void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { + if (MMTK_NEEDS_VO_BIT) { + // set VO bit + mmtk_set_vo_bit(obj); + } +} + +inline void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { + BumpAllocator* allocator = &mutator->allocators.bump_pointer[MMTK_IMMORTAL_BUMP_ALLOCATOR]; + return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (uintptr_t)allocator->limit, size, align, offset, 1); +} + +inline void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { + if (MMTK_NEEDS_VO_BIT) { + // set VO bit + mmtk_set_vo_bit(obj); + } + + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { + intptr_t addr = (intptr_t) obj; + intptr_t shift = (addr >> 3) & 0b111; + uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); + while(1) { + uint8_t old_val = *meta_addr; + uint8_t new_val = old_val | (1 << shift); + if (jl_atomic_cmpswap((_Atomic(uint8_t)*)meta_addr, &old_val, new_val)) { + break; + } + } + } +} + +// mutex for page profile +uv_mutex_t page_profile_lock; + +JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream) +{ + uv_mutex_lock(&page_profile_lock); + const char *str = "Page profiler in unsupported in MMTk."; + ios_write(stream, str, strlen(str)); + uv_mutex_unlock(&page_profile_lock); +} + +// this seems to be needed by the gc tests +#define JL_GC_N_MAX_POOLS 51 +JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS]; + +STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT +{ + // FIXME: MMTk would have to provide its own stats +} + +#define MMTK_GC_PAGE_SZ (1 << 12) // MMTk's page size is defined in mmtk-core constants + +JL_DLLEXPORT uint64_t jl_get_pg_size(void) +{ + return MMTK_GC_PAGE_SZ; +} + + +extern void mmtk_store_obj_size_c(void* obj, size_t size); + +inline void maybe_collect(jl_ptls_t ptls) +{ + // Just do a safe point for general maybe_collect + jl_gc_safepoint_(ptls); +} + +// This is only used for malloc. We need to know if we need to do GC. However, keeping checking with MMTk (mmtk_gc_poll), +// is expensive. So we only check for every few allocations. +static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz) +{ + // We do not need to carefully maintain malloc_sz_since_last_poll. We just need to + // avoid using mmtk_gc_poll too frequently, and try to be precise on our heap usage + // as much as we can. + if (ptls->gc_tls.malloc_sz_since_last_poll > 4096) { + jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, 0); + mmtk_gc_poll(ptls); + } else { + jl_atomic_fetch_add_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, sz); + jl_gc_safepoint_(ptls); + } +} + +// allocation wrappers that track allocation and let collection run + +JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + void *data = malloc(sz); + if (data != NULL && pgcstack != NULL && ct->world_age) { + jl_ptls_t ptls = ct->ptls; + malloc_maybe_collect(ptls, sz); + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz); + } + return data; +} + +JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + void *data = calloc(nm, sz); + if (data != NULL && pgcstack != NULL && ct->world_age) { + jl_ptls_t ptls = ct->ptls; + malloc_maybe_collect(ptls, nm * sz); + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, nm * sz); + } + return data; +} + +JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + free(p); + if (pgcstack != NULL && ct->world_age) { + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, -sz); + } +} + +JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz) +{ + jl_gcframe_t **pgcstack = jl_get_pgcstack(); + jl_task_t *ct = jl_current_task; + if (pgcstack && ct->world_age) { + jl_ptls_t ptls = ct->ptls; + malloc_maybe_collect(ptls, sz); + if (sz < old) + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, old - sz); + else + jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz - old); + } + return realloc(p, sz); +} + +void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) +{ + jl_ptls_t ptls = jl_current_task->ptls; + size_t allocsz = mmtk_align_alloc_sz(sz); + void* addr = mmtk_immortal_alloc_fast(&ptls->gc_tls.mmtk_mutator, allocsz, align, offset); + return addr; +} + +void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset) +{ + return jl_gc_perm_alloc_nolock(sz, zero, align, offset); +} + +jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT +{ + const size_t allocsz = sz + sizeof(jl_taggedvalue_t); + unsigned align = (sz == 0 ? sizeof(void*) : (allocsz <= sizeof(void*) * 2 ? + sizeof(void*) * 2 : 16)); + jl_taggedvalue_t *o = (jl_taggedvalue_t*)jl_gc_perm_alloc(allocsz, 0, align, + sizeof(void*) % align); + + jl_ptls_t ptls = jl_current_task->ptls; + mmtk_immortal_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, jl_valueof(o), allocsz); + o->header = (uintptr_t)ty; + return jl_valueof(o); +} + + +JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, size_t align, void *ty) +{ + // safepoint + jl_gc_safepoint_(ptls); + + jl_value_t *v; + if ((uintptr_t)ty != jl_buff_tag) { + // v needs to be 16 byte aligned, therefore v_tagged needs to be offset accordingly to consider the size of header + jl_taggedvalue_t *v_tagged = (jl_taggedvalue_t *)mmtk_immix_alloc_fast(&ptls->gc_tls.mmtk_mutator, LLT_ALIGN(osize, align), align, sizeof(jl_taggedvalue_t)); + v = jl_valueof(v_tagged); + mmtk_immix_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, v, LLT_ALIGN(osize, align)); + } else { + // allocating an extra word to store the size of buffer objects + jl_taggedvalue_t *v_tagged = (jl_taggedvalue_t *)mmtk_immix_alloc_fast(&ptls->gc_tls.mmtk_mutator, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align), align, 0); + jl_value_t* v_tagged_aligned = ((jl_value_t*)((char*)(v_tagged) + sizeof(jl_taggedvalue_t))); + v = jl_valueof(v_tagged_aligned); + mmtk_store_obj_size_c(v, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align)); + mmtk_immix_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, v, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align)); + } + + ptls->gc_tls.gc_num.allocd += osize; + ptls->gc_tls.gc_num.poolalloc++; + + return v; +} + +void jl_gc_notify_image_load(const char* img_data, size_t len) +{ + mmtk_set_vm_space((void*)img_data, len); +} + +JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t sz) +{ + // safepoint + jl_gc_safepoint_(ptls); + + size_t offs = offsetof(bigval_t, header); + assert(sz >= sizeof(jl_taggedvalue_t) && "sz must include tag"); + static_assert(offsetof(bigval_t, header) >= sizeof(void*), "Empty bigval header?"); + static_assert(sizeof(bigval_t) % JL_HEAP_ALIGNMENT == 0, ""); + size_t allocsz = LLT_ALIGN(sz + offs, JL_CACHE_BYTE_ALIGNMENT); + if (allocsz < sz) { // overflow in adding offs, size was "negative" + assert(0 && "Error when allocating big object"); + jl_throw(jl_memory_exception); + } + + bigval_t *v = (bigval_t*)mmtk_alloc_large(&ptls->gc_tls.mmtk_mutator, allocsz, JL_CACHE_BYTE_ALIGNMENT, 0, 2); + + if (v == NULL) { + assert(0 && "Allocation failed"); + jl_throw(jl_memory_exception); + } + v->sz = allocsz; + + ptls->gc_tls.gc_num.allocd += allocsz; + ptls->gc_tls.gc_num.bigalloc++; + + jl_value_t *result = jl_valueof(&v->header); + mmtk_post_alloc(&ptls->gc_tls.mmtk_mutator, result, allocsz, 2); + + return result; +} + +// Instrumented version of jl_gc_small_alloc_inner, called into by LLVM-generated code. +JL_DLLEXPORT jl_value_t *jl_gc_small_alloc(jl_ptls_t ptls, int offset, int osize, jl_value_t* type) +{ + assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); + + jl_value_t *val = jl_mmtk_gc_alloc_default(ptls, osize, 16, NULL); + maybe_record_alloc_to_profile(val, osize, (jl_datatype_t*)type); + return val; +} + +// Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code. +JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz, jl_value_t *type) +{ + // TODO: assertion needed here? + assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); + + jl_value_t *val = jl_mmtk_gc_alloc_big(ptls, sz); + maybe_record_alloc_to_profile(val, sz, (jl_datatype_t*)type); + return val; +} + +inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) +{ + jl_value_t *v; + const size_t allocsz = sz + sizeof(jl_taggedvalue_t); + if (sz <= GC_MAX_SZCLASS) { + v = jl_mmtk_gc_alloc_default(ptls, allocsz, 16, ty); + } + else { + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + v = jl_mmtk_gc_alloc_big(ptls, allocsz); + } + jl_set_typeof(v, ty); + maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); + return v; +} + +JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) +{ + jl_ptls_t ptls = jl_current_task->ptls; + maybe_collect(ptls); + size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT); + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + + int last_errno = errno; +#ifdef _OS_WINDOWS_ + DWORD last_error = GetLastError(); +#endif + void *b = malloc_cache_align(allocsz); + if (b == NULL) + jl_throw(jl_memory_exception); + + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + // FIXME: Should these be part of mmtk's heap? + // malloc_maybe_collect(ptls, sz); + // jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, allocsz); +#ifdef _OS_WINDOWS_ + SetLastError(last_error); +#endif + errno = last_errno; + // jl_gc_managed_malloc is currently always used for allocating array buffers. + maybe_record_alloc_to_profile((jl_value_t*)b, sz, (jl_datatype_t*)jl_buff_tag); + return b; +} + +// Not used by mmtk +// Number of GC threads that may run parallel marking +int jl_n_markthreads; +// Number of GC threads that may run concurrent sweeping (0 or 1) +int jl_n_sweepthreads; +// `tid` of first GC thread +int gc_first_tid; + +JL_DLLEXPORT void jl_gc_queue_root(const struct _jl_value_t *ptr) JL_NOTSAFEPOINT +{ + mmtk_unreachable(); +} + +JL_DLLEXPORT void jl_gc_queue_multiroot(const struct _jl_value_t *root, const void *stored, + struct _jl_datatype_t *dt) JL_NOTSAFEPOINT +{ + mmtk_unreachable(); +} + +// marking +// --- + +JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj) +{ + mmtk_unreachable(); + return 0; +} +JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent, + jl_value_t **objs, size_t nobjs) +{ + mmtk_unreachable(); +} + +JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void) +{ + // TODO: meaningful for MMTk? + return GC_MAX_SZCLASS; +} + +JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) +{ + // FIXME: do we need to implement this? +} + +// gc-debug functions +// --- + +JL_DLLEXPORT jl_taggedvalue_t *jl_gc_find_taggedvalue_pool(char *p, size_t *osize_p) +{ + return NULL; +} + +void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT +{ +} + +int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return 0; +} + +int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return 0; +} + +void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT +{ + // May not be accurate but should be helpful enough + uint64_t pool_count = gc_num.poolalloc; + uint64_t big_count = gc_num.bigalloc; + jl_safe_printf("Allocations: %" PRIu64 " " + "(Pool: %" PRIu64 "; Big: %" PRIu64 "); GC: %d\n", + pool_count + big_count, pool_count, big_count, gc_num.pause); +} + +JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) +{ + return sizeof(bigval_t); +} + +void jl_print_gc_stats(JL_STREAM *s) +{ +} + +JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) +{ + return 0; +} + +JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void) +{ + return 0; +} + +// TODO: if this is needed, it can be added in MMTk +JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) +{ + return NULL; +} + +#ifdef __cplusplus +} +#endif + +#endif // MMTK_GC diff --git a/src/gc-mmtk.h b/src/gc-mmtk.h new file mode 100644 index 0000000000000..6c2c7a40bc81f --- /dev/null +++ b/src/gc-mmtk.h @@ -0,0 +1,34 @@ +#ifdef MMTK_GC + +#ifdef __cplusplus +extern "C" { +#endif + +extern jl_mutex_t finalizers_lock; +extern arraylist_t to_finalize; +extern arraylist_t finalizer_list_marked; + +JL_EXTENSION typedef struct _bigval_t { + size_t sz; +#ifdef _P64 // Add padding so that the value is 64-byte aligned + // (8 pointers of 8 bytes each) - (2 other pointers in struct) + void *_padding[8 - 2]; +#else + // (16 pointers of 4 bytes each) - (2 other pointers in struct) + void *_padding[16 - 2]; +#endif + //struct jl_taggedvalue_t <>; + union { + uintptr_t header; + struct { + uintptr_t gc:2; + } bits; + }; + // must be 64-byte aligned here, in 32 & 64 bit modes +} bigval_t; + +#ifdef __cplusplus +} +#endif + +#endif // MMTK_GC diff --git a/src/gc-page-profiler.c b/src/gc-page-profiler.c index 2625fa812781a..bfd1c74247df8 100644 --- a/src/gc-page-profiler.c +++ b/src/gc-page-profiler.c @@ -1,5 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license - +#ifndef MMTK_GC #include "gc-page-profiler.h" #include "julia.h" @@ -178,3 +178,5 @@ JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream) #ifdef __cplusplus } #endif + +#endif // !MMTK_GC diff --git a/src/gc-pages.c b/src/gc-pages.c index 71d59de29166f..ed6e0ed20ba1c 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -1,5 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license - +#ifndef MMTK_GC #include "gc-common.h" #include "gc-stock.h" #ifndef _OS_WINDOWS_ @@ -205,3 +205,5 @@ void jl_gc_free_page(jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT #ifdef __cplusplus } #endif + +#endif // !MMTK_GC diff --git a/src/gc-stock.c b/src/gc-stock.c index 3ff37566dc6c7..164d3067a31de 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -1,5 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license - +#ifndef MMTK_GC #include "gc-common.h" #include "gc-stock.h" #include "gc-alloc-profiler.h" @@ -405,7 +405,6 @@ static void sweep_weak_refs(void) } } - STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc) + sz; @@ -453,7 +452,6 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) return jl_valueof(&v->header); } - // Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code. JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz, jl_value_t *type) { @@ -3888,12 +3886,22 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) return sizeof(bigval_t); } +<<<<<<< HEAD +======= +>>>>>>> c48a701f54 (WIP: Adding support for MMTk/Immix) JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { arraylist_push(&ptls->gc_tls.sweep_objs, obj); } +void jl_gc_notify_image_load(const char* img_data, size_t len) +{ + // Do nothing +} + #ifdef __cplusplus } #endif + +#endif // !MMTK_GC diff --git a/src/gc-stock.h b/src/gc-stock.h index 50eca3aadbd86..8e563f32ab9d3 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -5,6 +5,7 @@ . non-moving, precise mark and sweep collector . pool-allocates small objects, keeps big objects on a simple list */ +#ifndef MMTK_GC #ifndef JL_GC_H #define JL_GC_H @@ -422,21 +423,6 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } -STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); -} - -STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - if (jl_n_sweepthreads == 0) { - return 0; - } - int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); - int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; - return tid == concurrent_collector_thread_id; -} - STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); @@ -712,3 +698,5 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect #endif #endif + +#endif // !MMTK_GC diff --git a/src/gc-tls-mmtk.h b/src/gc-tls-mmtk.h new file mode 100644 index 0000000000000..2eb5f2a6a44d9 --- /dev/null +++ b/src/gc-tls-mmtk.h @@ -0,0 +1,49 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#ifdef MMTK_GC + +#include +#include "mmtkMutator.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + // variable for tracking weak references + small_arraylist_t weak_refs; + // live tasks started on this thread + // that are holding onto a stack from the pool + small_arraylist_t live_tasks; + + // variables for tracking malloc'd arrays + struct _mallocmemory_t *mallocarrays; + struct _mallocmemory_t *mafreelist; + +#define JL_N_STACK_POOLS 16 + small_arraylist_t free_stacks[JL_N_STACK_POOLS]; +} jl_thread_heap_t; + +typedef struct { + _Atomic(int64_t) allocd; + _Atomic(int64_t) pool_live_bytes; + _Atomic(uint64_t) malloc; + _Atomic(uint64_t) realloc; + _Atomic(uint64_t) poolalloc; + _Atomic(uint64_t) bigalloc; + _Atomic(int64_t) free_acc; + _Atomic(uint64_t) alloc_acc; +} jl_thread_gc_num_t; + +typedef struct { + jl_thread_heap_t heap; + jl_thread_gc_num_t gc_num; + MMTkMutatorContext mmtk_mutator; + size_t malloc_sz_since_last_poll; +} jl_gc_tls_states_t; + +#ifdef __cplusplus +} +#endif + +#endif // MMTK_GC diff --git a/src/gc-tls.h b/src/gc-tls.h index 9e4b09404db84..43adfb8a7ff2a 100644 --- a/src/gc-tls.h +++ b/src/gc-tls.h @@ -1,5 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license +#ifndef MMTK_GC + // Meant to be included in "julia_threads.h" #ifndef JL_GC_TLS_H #define JL_GC_TLS_H @@ -90,3 +92,5 @@ typedef struct { #endif #endif // JL_GC_TLS_H + +#endif // MMTK_GC diff --git a/src/julia.h b/src/julia.h index abb8a57ff13b0..db57db1fbeb38 100644 --- a/src/julia.h +++ b/src/julia.h @@ -850,7 +850,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index e677f40907dfd..d5013601a9124 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1052,7 +1052,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern _Atomic(uint32_t) jl_gc_disable_counter; +extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/julia_threads.h b/src/julia_threads.h index b697a0bf030ed..641c50386c555 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -4,7 +4,11 @@ #ifndef JL_THREADS_H #define JL_THREADS_H +#ifndef MMTK_GC #include "gc-tls.h" +#else +#include "gc-tls-mmtk.h" +#endif #include "julia_atomics.h" #ifndef _OS_WINDOWS_ #include "pthread.h" diff --git a/src/stackwalk.c b/src/stackwalk.c index a1de3a6d61a07..e6fc2c7bbf56a 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1294,6 +1294,8 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; +extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; +extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT diff --git a/src/staticdata.c b/src/staticdata.c index 363aa46b62221..e07a5365bf06f 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -654,6 +654,7 @@ static void jl_load_sysimg_so(void) plen = (size_t *)&jl_system_image_size; else jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(sysimg_data, *plen); jl_restore_system_image_data(sysimg_data, *plen); } @@ -3899,6 +3900,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); diff --git a/src/threading.c b/src/threading.c index 44b1192528531..df62ea107bf04 100644 --- a/src/threading.c +++ b/src/threading.c @@ -743,6 +743,10 @@ void jl_init_threading(void) } int16_t ngcthreads = jl_n_markthreads + jl_n_sweepthreads; +#ifdef MMTK_GC + ngcthreads = 0; +#endif + jl_all_tls_states_size = nthreads + nthreadsi + ngcthreads; jl_n_threads_per_pool = (int*)malloc_s(2 * sizeof(int)); jl_n_threads_per_pool[0] = nthreadsi; From b488bbeb22847c3740459d015878368587ecb847 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 28 Aug 2024 00:44:09 +0000 Subject: [PATCH 09/38] Refactoring to be considered before adding MMTk --- src/gc-interface.h | 4 +++- src/gc-stock.c | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gc-interface.h b/src/gc-interface.h index 72a57f4944156..b1f3ab9d6908d 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -192,7 +192,9 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; -// FIXME: add description here +// This function notifies the GC about memory addresses that are set when loading the boot image. +// The GC may use that information to, for instance, determine that such objects should +// be treated as marked and belonged to the old generation in nursery collections. void jl_gc_notify_image_load(const char* img_data, size_t len); // ========================================================================= // diff --git a/src/gc-stock.c b/src/gc-stock.c index 164d3067a31de..019ae481ce189 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3888,8 +3888,11 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) <<<<<<< HEAD +<<<<<<< HEAD ======= >>>>>>> c48a701f54 (WIP: Adding support for MMTk/Immix) +======= +>>>>>>> 0aee3ba32a (Refactoring to be considered before adding MMTk) JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { arraylist_push(&ptls->gc_tls.sweep_objs, obj); From a4cf8e7c754fc72c9612750ccce65b87eaeb720b Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 29 Aug 2024 05:37:53 +0000 Subject: [PATCH 10/38] Adding fastpath allocation --- src/llvm-gc-interface-passes.h | 5 ++ src/llvm-late-gc-lowering.cpp | 139 +++++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+) diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h index d33567e887118..ed6b94dcdc3fc 100644 --- a/src/llvm-gc-interface-passes.h +++ b/src/llvm-gc-interface-passes.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -328,6 +329,7 @@ struct LateLowerGCFrame: private JuliaPassContext { private: CallInst *pgcstack; + Function *smallAllocFunc; void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const ArrayRef &SafepointsSoFar, SmallVector &&RefinedPtr = SmallVector()); @@ -365,6 +367,9 @@ struct LateLowerGCFrame: private JuliaPassContext { void RefineLiveSet(LargeSparseBitVector &LS, State &S, ArrayRef CalleeRoots); Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V); Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V); +#ifdef MMTK_GC + Value* lowerGCAllocBytesLate(CallInst *target, Function &F); +#endif }; // The final GC lowering pass. This pass lowers platform-agnostic GC diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 1d390a5115207..d395771f6df0c 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2452,8 +2452,122 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl &Colors, St } } +#ifdef MMTK_GC +Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) +{ + assert(target->arg_size() == 3); + + IRBuilder<> builder(target); + auto ptls = target->getArgOperand(0); + auto type = target->getArgOperand(2); + if (auto CI = dyn_cast(target->getArgOperand(1))) { + size_t sz = (size_t)CI->getZExtValue(); + // This is strongly architecture and OS dependent + int osize; + int offset = jl_gc_classify_pools(sz, &osize); + if (offset >= 0) { + // In this case instead of lowering julia.gc_alloc_bytes to jl_gc_small_alloc + // We do a slowpath/fastpath check and lower it only on the slowpath, returning + // the cursor and updating it in the fastpath. + auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); + auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize); + + // Should we generate fastpath allocation sequence here? We should always generate fastpath here for MMTk. + // Setting this to false will increase allocation overhead a lot, and should only be used for debugging. + const bool INLINE_FASTPATH_ALLOCATION = true; + + if (INLINE_FASTPATH_ALLOCATION) { + // Assuming we use the first immix allocator. + // FIXME: We should get the allocator index and type from MMTk. + auto allocator_offset = offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix); + + auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor)); + auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, limit)); + + auto cursor_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, cursor_pos); + auto cursor_ptr = builder.CreateBitCast(cursor_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "cursor_ptr"); + auto cursor = builder.CreateLoad(Type::getInt64Ty(target->getContext()), cursor_ptr, "cursor"); + + // offset = 8 + auto delta_offset = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), ConstantInt::get(Type::getInt64Ty(target->getContext()), 8)); + auto delta_cursor = builder.CreateNSWSub(ConstantInt::get(Type::getInt64Ty(target->getContext()), 0), cursor); + auto delta_op = builder.CreateNSWAdd(delta_offset, delta_cursor); + // alignment 16 (15 = 16 - 1) + auto delta = builder.CreateAnd(delta_op, ConstantInt::get(Type::getInt64Ty(target->getContext()), 15), "delta"); + auto result = builder.CreateNSWAdd(cursor, delta, "result"); + + auto new_cursor = builder.CreateNSWAdd(result, pool_osize); + + auto limit_tls_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, limit_pos); + auto limit_ptr = builder.CreateBitCast(limit_tls_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "limit_ptr"); + auto limit = builder.CreateLoad(Type::getInt64Ty(target->getContext()), limit_ptr, "limit"); + + auto gt_limit = builder.CreateICmpSGT(new_cursor, limit); + + auto slowpath = BasicBlock::Create(target->getContext(), "slowpath", target->getFunction()); + auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction()); + + auto next_instr = target->getNextNode(); + SmallVector Weights{1, 9}; + + MDBuilder MDB(F.getContext()); + SplitBlockAndInsertIfThenElse(gt_limit, next_instr, &slowpath, &fastpath, false, false, MDB.createBranchWeights(Weights)); + + builder.SetInsertPoint(next_instr); + auto phiNode = builder.CreatePHI(target->getCalledFunction()->getReturnType(), 2, "phi_fast_slow"); + + // slowpath + builder.SetInsertPoint(slowpath); + auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); + auto new_call = builder.CreateCall(smallAllocFunc, { ptls, pool_offs, pool_osize_i32, type }); + new_call->setAttributes(new_call->getCalledFunction()->getAttributes()); + builder.CreateBr(next_instr->getParent()); + + // fastpath + builder.SetInsertPoint(fastpath); + builder.CreateStore(new_cursor, cursor_ptr); + + // ptls->gc_tls.gc_num.allocd += osize; + auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, gc_num)); + auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); + auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); + auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); + auto pool_allocd_total = builder.CreateAdd(pool_allocd, pool_osize); + builder.CreateStore(pool_allocd_total, pool_alloc_tls); + + auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); + auto v_as_ptr = builder.CreateIntToPtr(v_raw, smallAllocFunc->getReturnType()); + builder.CreateBr(next_instr->getParent()); + + phiNode->addIncoming(new_call, slowpath); + phiNode->addIncoming(v_as_ptr, fastpath); + phiNode->takeName(target); + return phiNode; + } + } + } + return target; +} + +template +static void replaceInstruction( + Instruction *oldInstruction, + Value *newInstruction, + TIterator &it) +{ + if (newInstruction != oldInstruction) { + oldInstruction->replaceAllUsesWith(newInstruction); + it = oldInstruction->eraseFromParent(); + } + else { + ++it; + } +} +#endif + bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) { initAll(*F.getParent()); + smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc); LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n"); if (!pgcstack_getter && !adoptthread_func) return CleanupIR(F, nullptr, CFGModified); @@ -2468,6 +2582,31 @@ bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) { std::map> CallFrames; // = OptimizeCallFrames(S, Ordering); PlaceRootsAndUpdateCalls(Colors, S, CallFrames); CleanupIR(F, &S, CFGModified); + +#ifdef MMTK_GC + // We lower the julia.gc_alloc_bytes intrinsic in this pass to insert slowpath/fastpath blocks for MMTk + for (BasicBlock &BB : F) { + for (auto it = BB.begin(); it != BB.end();) { + auto *CI = dyn_cast(&*it); + if (!CI) { + ++it; + continue; + } + + Value *callee = CI->getCalledOperand(); + assert(callee); + + auto GCAllocBytes = getOrNull(jl_intrinsics::GCAllocBytes); + if (GCAllocBytes == callee) { + *CFGModified = true; + replaceInstruction(CI, lowerGCAllocBytesLate(CI, F), it); + continue; + } + ++it; + } + } +#endif + return true; } From ecb675a597ab3dcd57fc053c995252618b6b0edd Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 29 Aug 2024 05:51:26 +0000 Subject: [PATCH 11/38] Fixing removed newlines --- src/gc-debug.c | 1 + src/gc-heap-snapshot.cpp | 1 + src/gc-page-profiler.c | 1 + src/gc-pages.c | 1 + src/gc-stock.c | 7 +++++++ 5 files changed, 11 insertions(+) diff --git a/src/gc-debug.c b/src/gc-debug.c index ecd7f2328cada..2c8e1c6055414 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1,4 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license + #ifndef MMTK_GC #include "gc-common.h" #include "gc-stock.h" diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index d3cb1e98d84a4..fcda11dad4f8a 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -1,4 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license + #include "gc-heap-snapshot.h" #include "julia.h" diff --git a/src/gc-page-profiler.c b/src/gc-page-profiler.c index bfd1c74247df8..e5c6b91978731 100644 --- a/src/gc-page-profiler.c +++ b/src/gc-page-profiler.c @@ -1,4 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license + #ifndef MMTK_GC #include "gc-page-profiler.h" #include "julia.h" diff --git a/src/gc-pages.c b/src/gc-pages.c index ed6e0ed20ba1c..976fc461d5b95 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -1,4 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license + #ifndef MMTK_GC #include "gc-common.h" #include "gc-stock.h" diff --git a/src/gc-stock.c b/src/gc-stock.c index 019ae481ce189..05f2f5930448c 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -1,4 +1,5 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license + #ifndef MMTK_GC #include "gc-common.h" #include "gc-stock.h" @@ -405,6 +406,7 @@ static void sweep_weak_refs(void) } } + STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc) + sz; @@ -452,6 +454,7 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) return jl_valueof(&v->header); } + // Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code. JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz, jl_value_t *type) { @@ -3886,6 +3889,7 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) return sizeof(bigval_t); } +<<<<<<< HEAD <<<<<<< HEAD <<<<<<< HEAD @@ -3893,6 +3897,9 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) >>>>>>> c48a701f54 (WIP: Adding support for MMTk/Immix) ======= >>>>>>> 0aee3ba32a (Refactoring to be considered before adding MMTk) +======= + +>>>>>>> 30ac6f081d (Fixing removed newlines) JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { arraylist_push(&ptls->gc_tls.sweep_objs, obj); From 77db2039905d73c9d6a30bef583d7ad15aea9ca1 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 28 Aug 2024 00:44:09 +0000 Subject: [PATCH 12/38] Refactoring to be considered before adding MMTk --- src/gc-stock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gc-stock.c b/src/gc-stock.c index 05f2f5930448c..5fd3b7efafead 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3892,6 +3892,7 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) <<<<<<< HEAD <<<<<<< HEAD +<<<<<<< HEAD <<<<<<< HEAD ======= >>>>>>> c48a701f54 (WIP: Adding support for MMTk/Immix) @@ -3900,6 +3901,8 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) ======= >>>>>>> 30ac6f081d (Fixing removed newlines) +======= +>>>>>>> 2efcdf8335 (Refactoring to be considered before adding MMTk) JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { arraylist_push(&ptls->gc_tls.sweep_objs, obj); From c5d3a40880cc08014ec6347372ea35c3249f8709 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 2 Sep 2024 06:07:02 +0000 Subject: [PATCH 13/38] Adding a few comments; Moving some functions to be closer together --- src/gc-common.c | 70 ----------- src/gc-mmtk.c | 311 ++++++++++++++-------------------------------- src/gc-tls-mmtk.h | 2 + 3 files changed, 94 insertions(+), 289 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 17f6f1330743b..417f12f26d64d 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -705,76 +705,6 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void) jl_throw(jl_memory_exception); } -size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT -{ - const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; - size_t sz = layout->size * m->length; - if (layout->flags.arrayelem_isunion) - // account for isbits Union array selector bytes - sz += m->length; - return sz; -} - -// tracking Memorys with malloc'd storage -void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ - // This is **NOT** a GC safe point. - mallocmemory_t *ma; - if (ptls->gc_tls.heap.mafreelist == NULL) { - ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); - } - else { - ma = ptls->gc_tls.heap.mafreelist; - ptls->gc_tls.heap.mafreelist = ma->next; - } - ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); - ma->next = ptls->gc_tls.heap.mallocarrays; - ptls->gc_tls.heap.mallocarrays = ma; -} - -int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; -} - -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; -} - -// gc-debug common functions -// --- - -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT -{ - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; -} - -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT -{ - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; -} - #ifdef __cplusplus } #endif diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index e459b0f12c41d..98a5612871be0 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -10,9 +10,10 @@ extern "C" { #endif -// For now we're using the same values as stock-gc. However -// for the heap size we use 70% of the free memory available -// since that is actually a hard limit in MMTk. +// FIXME: Should the values below be shared between both GC's? +// Note that MMTk uses a hard max heap limit, which is set by default +// as 70% of the free available memory. The min heap is set as the +// default_collect_interval variable below. // max_total_memory is a suggestion. We try very hard to stay // under this limit, but we will go above it rather than halting. @@ -33,7 +34,6 @@ static memsize_t max_total_memory = (memsize_t) MAX32HEAP; void jl_gc_init(void) { // TODO: use jl_options.heap_size_hint to set MMTk's fixed heap size? (see issue: https://github.com/mmtk/mmtk-julia/issues/167) - JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock"); arraylist_new(&to_finalize, 0); @@ -105,10 +105,6 @@ void jl_gc_init(void) { void jl_start_gc_threads(void) { jl_ptls_t ptls = jl_current_task->ptls; mmtk_initialize_collection((void *)ptls); - // int nthreads = jl_atomic_load_relaxed(&jl_n_threads); - // int ngcthreads = jl_n_gcthreads; - // int nmutator_threads = nthreads - ngcthreads; - // printf("nthreads = %d, ngcthreads = %d, nmutator_threads = %d\n", nthreads, ngcthreads, nmutator_threads); } void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT { @@ -135,38 +131,31 @@ void jl_free_thread_gc_state(struct _jl_tls_states_t *ptls) { mmtk_destroy_mutator(&ptls->gc_tls.mmtk_mutator); } -// FIXME: mmtk uses the same code as stock to enable/disable the GC -// Should this be moved to gc-common.c? - -_Atomic(uint32_t) jl_gc_disable_counter = 1; - -JL_DLLEXPORT int jl_gc_enable(int on) { - jl_ptls_t ptls = jl_current_task->ptls; - int prev = !ptls->disable_gc; - ptls->disable_gc = (on == 0); - if (on && !prev) { - // disable -> enable - if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { - gc_num.allocd += gc_num.deferred_alloc; - gc_num.deferred_alloc = 0; - } - } - else if (prev && !on) { - // enable -> disable - jl_atomic_fetch_add(&jl_gc_disable_counter, 1); - // check if the GC is running and wait for it to finish - jl_gc_safepoint_(ptls); - } - return prev; +JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem) { + // MMTk currently does not allow setting the heap size at runtime } -JL_DLLEXPORT int jl_gc_is_enabled(void) { - jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; + +inline void maybe_collect(jl_ptls_t ptls) +{ + // Just do a safe point for general maybe_collect + jl_gc_safepoint_(ptls); } -JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem) { - // MMTk currently does not allow setting the heap size at runtime +// This is only used for malloc. We need to know if we need to do GC. However, keeping checking with MMTk (mmtk_gc_poll), +// is expensive. So we only check for every few allocations. +static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz) +{ + // We do not need to carefully maintain malloc_sz_since_last_poll. We just need to + // avoid using mmtk_gc_poll too frequently, and try to be precise on our heap usage + // as much as we can. + if (ptls->gc_tls.malloc_sz_since_last_poll > 4096) { + jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, 0); + mmtk_gc_poll(ptls); + } else { + jl_atomic_fetch_add_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, sz); + jl_gc_safepoint_(ptls); + } } JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) { @@ -182,7 +171,12 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) { mmtk_handle_user_collection_request(ptls, collection); } -// same as above, some of these are identical to the implementation in gc stock +// FIXME: The functions combine_thread_gc_counts and reset_thread_gc_counts +// are currently nearly identical for mmtk and for stock. However, the stats +// are likely different (e.g., MMTk doesn't track the bytes allocated in the fastpath, +// but only when the slowpath is called). We might need to adapt these later so that +// the statistics are the same or as close as possible for each GC. + static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT { int gc_n_threads; @@ -228,31 +222,6 @@ void reset_thread_gc_counts(void) JL_NOTSAFEPOINT } } -// weak references -// --- -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value) -{ - jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); - wr->value = value; // NOTE: wb not needed here - mmtk_add_weak_candidate(wr); - return wr; -} - - -// allocation -int jl_gc_classify_pools(size_t sz, int *osize) -{ - if (sz > GC_MAX_SZCLASS) - return -1; // call big alloc function - size_t allocsz = sz + sizeof(jl_taggedvalue_t); - *osize = LLT_ALIGN(allocsz, 16); - return 0; // use MMTk's fastpath logic -} - -int64_t last_gc_total_bytes = 0; -int64_t last_live_bytes = 0; // live_bytes at last collection -int64_t live_bytes = 0; - // Retrieves Julia's `GC_Num` (structure that stores GC statistics). JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { jl_gc_num_t num = gc_num; @@ -260,6 +229,10 @@ JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { return num; } +int64_t last_gc_total_bytes = 0; +int64_t last_live_bytes = 0; // live_bytes at last collection +int64_t live_bytes = 0; + JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT { int64_t oldtb = last_gc_total_bytes; int64_t newtb; @@ -325,82 +298,38 @@ JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void) return max_total_memory; } +// weak references +// --- +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value) +{ + jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); + wr->value = value; // NOTE: wb not needed here + mmtk_add_weak_candidate(wr); + return wr; +} + +// allocation + extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr); extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr); extern void* mmtk_alloc(void* mutator, size_t size, size_t align, size_t offset, int allocator); extern void mmtk_post_alloc(void* mutator, void* refer, size_t bytes, int allocator); - - extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS; extern const void* MMTK_SIDE_VO_BIT_BASE_ADDRESS; - -// These need to be constants. - -#define MMTK_OBJECT_BARRIER (1) -// Stickyimmix needs write barrier. Immix does not need write barrier. -#ifdef MMTK_PLAN_IMMIX -#define MMTK_NEEDS_WRITE_BARRIER (0) -#endif -#ifdef MMTK_PLAN_STICKYIMMIX -#define MMTK_NEEDS_WRITE_BARRIER (1) -#endif - -#ifdef MMTK_CONSERVATIVE_SCAN -#define MMTK_NEEDS_VO_BIT (1) -#else -#define MMTK_NEEDS_VO_BIT (0) -#endif +extern void mmtk_store_obj_size_c(void* obj, size_t size); #define MMTK_DEFAULT_IMMIX_ALLOCATOR (0) #define MMTK_IMMORTAL_BUMP_ALLOCATOR (0) -// Directly call into MMTk for write barrier (debugging only) -inline void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT -{ - jl_task_t *ct = jl_current_task; - jl_ptls_t ptls = ct->ptls; - mmtk_object_reference_write_post(&ptls->gc_tls.mmtk_mutator, parent, ptr); -} - -// Fastpath. Return 1 if we should go to slowpath -inline int mmtk_gc_wb_fast_check(const void *parent, const void *ptr) JL_NOTSAFEPOINT -{ - if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { - intptr_t addr = (intptr_t) (void*) parent; - uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); - intptr_t shift = (addr >> 3) & 0b111; - uint8_t byte_val = *meta_addr; - return ((byte_val >> shift) & 1) == 1; - } else { - return 0; - } -} - -// Slowpath. -inline void mmtk_gc_wb_slow(const void *parent, const void *ptr) JL_NOTSAFEPOINT -{ - if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { - jl_task_t *ct = jl_current_task; - jl_ptls_t ptls = ct->ptls; - mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, ptr); - } -} -inline void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT -{ - if (mmtk_gc_wb_fast_check(parent, ptr)) { - mmtk_gc_wb_slow(parent, ptr); - } -} - -inline void mmtk_gc_wb_binding(const void *bnd, const void *val) JL_NOTSAFEPOINT +int jl_gc_classify_pools(size_t sz, int *osize) { - if (mmtk_gc_wb_fast_check(bnd, val)) { - jl_astaggedvalue(bnd)->bits.gc = 2; // to indicate that the buffer is a binding - mmtk_gc_wb_slow(bnd, val); - } + if (sz > GC_MAX_SZCLASS) + return -1; // call big alloc function + size_t allocsz = sz + sizeof(jl_taggedvalue_t); + *osize = LLT_ALIGN(allocsz, 16); + return 0; // use MMTk's fastpath logic } - #define MMTK_MIN_ALIGNMENT 4 // MMTk assumes allocation size is aligned to min alignment. inline size_t mmtk_align_alloc_sz(size_t sz) JL_NOTSAFEPOINT @@ -429,19 +358,9 @@ inline void mmtk_immix_post_alloc_slow(MMTkMutatorContext* mutator, void* obj, s mmtk_post_alloc(mutator, obj, size, 0); } -inline void mmtk_set_vo_bit(void* obj) { - intptr_t addr = (intptr_t) obj; - intptr_t shift = (addr >> 3) & 0b111; - uint8_t* vo_meta_addr = (uint8_t*) (MMTK_SIDE_VO_BIT_BASE_ADDRESS) + (addr >> 6); - uint8_t new_val = (*vo_meta_addr) | (1 << shift); - (*vo_meta_addr) = new_val; -} - inline void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { - if (MMTK_NEEDS_VO_BIT) { - // set VO bit - mmtk_set_vo_bit(obj); - } + // FIXME: for now, we do nothing + // but when supporting moving, this is where we set the valid object (VO) bit } inline void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { @@ -450,79 +369,12 @@ inline void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, } inline void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { - if (MMTK_NEEDS_VO_BIT) { - // set VO bit - mmtk_set_vo_bit(obj); - } - - if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { - intptr_t addr = (intptr_t) obj; - intptr_t shift = (addr >> 3) & 0b111; - uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6); - while(1) { - uint8_t old_val = *meta_addr; - uint8_t new_val = old_val | (1 << shift); - if (jl_atomic_cmpswap((_Atomic(uint8_t)*)meta_addr, &old_val, new_val)) { - break; - } - } - } -} - -// mutex for page profile -uv_mutex_t page_profile_lock; - -JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream) -{ - uv_mutex_lock(&page_profile_lock); - const char *str = "Page profiler in unsupported in MMTk."; - ios_write(stream, str, strlen(str)); - uv_mutex_unlock(&page_profile_lock); -} - -// this seems to be needed by the gc tests -#define JL_GC_N_MAX_POOLS 51 -JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS]; - -STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT -{ - // FIXME: MMTk would have to provide its own stats -} - -#define MMTK_GC_PAGE_SZ (1 << 12) // MMTk's page size is defined in mmtk-core constants - -JL_DLLEXPORT uint64_t jl_get_pg_size(void) -{ - return MMTK_GC_PAGE_SZ; -} - - -extern void mmtk_store_obj_size_c(void* obj, size_t size); - -inline void maybe_collect(jl_ptls_t ptls) -{ - // Just do a safe point for general maybe_collect - jl_gc_safepoint_(ptls); -} - -// This is only used for malloc. We need to know if we need to do GC. However, keeping checking with MMTk (mmtk_gc_poll), -// is expensive. So we only check for every few allocations. -static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz) -{ - // We do not need to carefully maintain malloc_sz_since_last_poll. We just need to - // avoid using mmtk_gc_poll too frequently, and try to be precise on our heap usage - // as much as we can. - if (ptls->gc_tls.malloc_sz_since_last_poll > 4096) { - jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, 0); - mmtk_gc_poll(ptls); - } else { - jl_atomic_fetch_add_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, sz); - jl_gc_safepoint_(ptls); - } + // FIXME: Similarly, for now, we do nothing + // but when supporting moving, this is where we set the valid object (VO) bit + // and log (old gen) bit } // allocation wrappers that track allocation and let collection run - JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) { jl_gcframe_t **pgcstack = jl_get_pgcstack(); @@ -601,7 +453,6 @@ jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT return jl_valueof(o); } - JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, size_t align, void *ty) { // safepoint @@ -628,11 +479,6 @@ JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, siz return v; } -void jl_gc_notify_image_load(const char* img_data, size_t len) -{ - mmtk_set_vm_space((void*)img_data, len); -} - JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t sz) { // safepoint @@ -735,6 +581,38 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) return b; } +void jl_gc_notify_image_load(const char* img_data, size_t len) +{ + mmtk_set_vm_space((void*)img_data, len); +} + +// mutex for page profile +uv_mutex_t page_profile_lock; + +JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream) +{ + uv_mutex_lock(&page_profile_lock); + const char *str = "Page profiler in unsupported in MMTk."; + ios_write(stream, str, strlen(str)); + uv_mutex_unlock(&page_profile_lock); +} + +// this seems to be needed by the gc tests +#define JL_GC_N_MAX_POOLS 51 +JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS]; + +STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT +{ + // FIXME: MMTk would have to provide its own stats +} + +#define MMTK_GC_PAGE_SZ (1 << 12) // MMTk's page size is defined in mmtk-core constants + +JL_DLLEXPORT uint64_t jl_get_pg_size(void) +{ + return MMTK_GC_PAGE_SZ; +} + // Not used by mmtk // Number of GC threads that may run parallel marking int jl_n_markthreads; @@ -791,12 +669,7 @@ void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT { } -int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return 0; -} - -int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT { return 0; } diff --git a/src/gc-tls-mmtk.h b/src/gc-tls-mmtk.h index 2eb5f2a6a44d9..64a1bae192445 100644 --- a/src/gc-tls-mmtk.h +++ b/src/gc-tls-mmtk.h @@ -9,6 +9,8 @@ extern "C" { #endif +// This mostly remove some fields that are not used by MMTk + typedef struct { // variable for tracking weak references small_arraylist_t weak_refs; From c26632ed5d2be1effebe86bfa5ca844195933095 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 01:20:29 +0000 Subject: [PATCH 14/38] Fixing merge conflicts --- src/gc-stock.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/gc-stock.c b/src/gc-stock.c index 5fd3b7efafead..078635f18e3ce 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3889,20 +3889,6 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) return sizeof(bigval_t); } -<<<<<<< HEAD -<<<<<<< HEAD - -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> c48a701f54 (WIP: Adding support for MMTk/Immix) -======= ->>>>>>> 0aee3ba32a (Refactoring to be considered before adding MMTk) -======= - ->>>>>>> 30ac6f081d (Fixing removed newlines) -======= ->>>>>>> 2efcdf8335 (Refactoring to be considered before adding MMTk) JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { arraylist_push(&ptls->gc_tls.sweep_objs, obj); From c283442edf340d882b13c9ec887a6d9bd44b2527 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 01:24:44 +0000 Subject: [PATCH 15/38] Applying changes from refactoring before adding MMTk --- src/gc-stock.h | 16 ++++++++++++++++ src/julia.h | 2 +- src/julia_internal.h | 2 +- src/stackwalk.c | 2 -- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/gc-stock.h b/src/gc-stock.h index 8e563f32ab9d3..6f75dcd014176 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -423,6 +423,21 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } +STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); +} + +STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + if (jl_n_sweepthreads == 0) { + return 0; + } + int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); + int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; + return tid == concurrent_collector_thread_id; +} + STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); @@ -699,4 +714,5 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect #endif + #endif // !MMTK_GC diff --git a/src/julia.h b/src/julia.h index db57db1fbeb38..abb8a57ff13b0 100644 --- a/src/julia.h +++ b/src/julia.h @@ -850,7 +850,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index d5013601a9124..e677f40907dfd 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1052,7 +1052,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; +extern _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/stackwalk.c b/src/stackwalk.c index e6fc2c7bbf56a..a1de3a6d61a07 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1294,8 +1294,6 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; -extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; -extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT From 01aa62331858a7810efbcf5857edfda990a93e72 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 28 Aug 2024 00:44:09 +0000 Subject: [PATCH 16/38] Refactoring to be considered before adding MMTk --- src/gc-common.c | 156 +++++++++++++++++++++++++++++++++++++++++++ src/gc-common.h | 6 ++ src/gc-debug.c | 41 +----------- src/gc-interface.h | 12 ++++ src/gc-stacks.c | 4 +- src/gc-stock.c | 156 ++++++++++++------------------------------- src/gc-stock.h | 21 ------ src/julia.h | 2 +- src/julia_internal.h | 26 +------- src/scheduler.c | 11 +++ src/stackwalk.c | 4 +- src/staticdata.c | 2 + 12 files changed, 237 insertions(+), 204 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index ee461b576ea9e..2ec167caa667a 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -20,6 +20,11 @@ extern "C" { jl_gc_num_t gc_num = {0}; +JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) +{ + return gc_num.total_time; +} + // =========================================================================== // // GC Callbacks // =========================================================================== // @@ -489,6 +494,87 @@ jl_ptls_t* gc_all_tls_states; // MISC // =========================================================================== // +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_new_weakref_th(ptls, value); +} + +JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc(ptls, sz, ty); +} + +JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sz, NULL); +} + +// allocation wrappers that save the size of allocations, to allow using +// jl_gc_counted_* functions with a libc-compatible API. + +JL_DLLEXPORT void *jl_malloc(size_t sz) +{ + int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); + if (p == NULL) + return NULL; + p[0] = sz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +//_unchecked_calloc does not check for potential overflow of nm*sz +STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { + size_t nmsz = nm*sz; + int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); + if (p == NULL) + return NULL; + p[0] = nmsz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) +{ + if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) + return NULL; + return _unchecked_calloc(nm, sz); +} + +JL_DLLEXPORT void jl_free(void *p) +{ + if (p != NULL) { + int64_t *pp = (int64_t *)p - 2; + size_t sz = pp[0]; + jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); + } +} + +JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) +{ + int64_t *pp; + size_t szold; + if (p == NULL) { + pp = NULL; + szold = 0; + } + else { + pp = (int64_t *)p - 2; + szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; + } + int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); + if (pnew == NULL) + return NULL; + pnew[0] = sz; + return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +// allocator entry points + +JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc_(ptls, sz, ty); +} + const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { @@ -501,6 +587,76 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void) jl_throw(jl_memory_exception); } +size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT +{ + const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; + size_t sz = layout->size * m->length; + if (layout->flags.arrayelem_isunion) + // account for isbits Union array selector bytes + sz += m->length; + return sz; +} + +// tracking Memorys with malloc'd storage +void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ + // This is **NOT** a GC safe point. + mallocmemory_t *ma; + if (ptls->gc_tls.heap.mafreelist == NULL) { + ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); + } + else { + ma = ptls->gc_tls.heap.mafreelist; + ptls->gc_tls.heap.mafreelist = ma->next; + } + ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); + ma->next = ptls->gc_tls.heap.mallocarrays; + ptls->gc_tls.heap.mallocarrays = ma; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + +// gc-debug common functions +// --- + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + #ifdef __cplusplus } #endif diff --git a/src/gc-common.h b/src/gc-common.h index 4d53830442a7d..154b9659e9ccb 100644 --- a/src/gc-common.h +++ b/src/gc-common.h @@ -53,6 +53,12 @@ extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure; // malloc wrappers, aligned allocation // =========================================================================== // +// data structure for tracking malloc'd genericmemory. +typedef struct _mallocmemory_t { + jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory + struct _mallocmemory_t *next; +} mallocmemory_t; + #if defined(_OS_WINDOWS_) STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) { diff --git a/src/gc-debug.c b/src/gc-debug.c index 19dd93af5f236..d05fb4b49e9f7 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1105,46 +1105,7 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT -{ - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; -} - -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT -{ - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; -} - -static int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; -} - -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; -} +extern int gc_logging_enabled; void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { diff --git a/src/gc-interface.h b/src/gc-interface.h index e543b4b5879f1..682f22344d69d 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -128,6 +128,13 @@ JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void); // Allocation // ========================================================================= // +// On GCC, this function is inlined when sz is constant (see julia_internal.h) +// In general, this function should implement allocation and should use the specific GC's logic +// to decide whether to allocate a small or a large object. Finally, note that this function +// **must** also set the type of the returning object to be `ty`. The type `ty` may also be used to record +// an allocation of that type in the allocation profiler. +struct _jl_value_t *jl_gc_alloc_(struct _jl_tls_states_t * ptls, size_t sz, void *ty); + // Allocates small objects and increments Julia allocation counterst. Size of the object // header must be included in the object size. The (possibly unused in some implementations) // offset to the arena in which we're allocating is passed in the second parameter, and the @@ -211,6 +218,11 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; +// This function notifies the GC about memory addresses that are set when loading the boot image. +// The GC may use that information to, for instance, determine that such objects should +// be treated as marked and belonged to the old generation in nursery collections. +void jl_gc_notify_image_load(const char* img_data, size_t len); + // ========================================================================= // // Runtime Write-Barriers // ========================================================================= // diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 783129ea97693..8c44b65284386 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -46,7 +46,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { VirtualFree(stkbuf, 0, MEM_RELEASE); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); @@ -81,7 +81,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT return stk; } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { munmap(stkbuf, bufsz); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); diff --git a/src/gc-stock.c b/src/gc-stock.c index 6b97881909bbd..6ebac8a0c079e 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -555,24 +555,6 @@ static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT gc_time_big_end(); } -// tracking Memorys with malloc'd storage - -void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ - // This is **NOT** a GC safe point. - mallocmemory_t *ma; - if (ptls->gc_tls.heap.mafreelist == NULL) { - ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); - } - else { - ma = ptls->gc_tls.heap.mafreelist; - ptls->gc_tls.heap.mafreelist = ma->next; - } - ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); - ma->next = ptls->gc_tls.heap.mallocarrays; - ptls->gc_tls.heap.mallocarrays = ma; -} - - void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; @@ -649,17 +631,6 @@ void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT reset_thread_gc_counts(); } -size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT -{ - const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; - size_t sz = layout->size * m->length; - if (layout->flags.arrayelem_isunion) - // account for isbits Union array selector bytes - sz += m->length; - return sz; -} - - static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT { assert(jl_is_genericmemory(v)); @@ -818,6 +789,29 @@ jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset, int osize) { return jl_gc_small_alloc_inner(ptls, offset, osize); } +// Size does NOT include the type tag!! +inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) +{ + jl_value_t *v; + const size_t allocsz = sz + sizeof(jl_taggedvalue_t); + if (sz <= GC_MAX_SZCLASS) { + int pool_id = jl_gc_szclass(allocsz); + jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; + int osize = jl_gc_sizeclasses[pool_id]; + // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in + // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) + v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); + } + else { + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + v = jl_gc_big_alloc_noinline(ptls, allocsz); + } + jl_set_typeof(v, ty); + maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); + return v; +} + int jl_gc_classify_pools(size_t sz, int *osize) { if (sz > GC_MAX_SZCLASS) @@ -2794,6 +2788,21 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } +int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); +} + +int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + if (jl_n_sweepthreads == 0) { + return 0; + } + int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); + int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; + return tid == concurrent_collector_thread_id; +} + // collector entry point and control _Atomic(uint32_t) jl_gc_disable_counter = 1; @@ -2832,11 +2841,6 @@ JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT *bytes = (num.total_allocd + num.deferred_alloc + num.allocd); } -JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) -{ - return gc_num.total_time; -} - JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { jl_gc_num_t num = gc_num; @@ -3397,13 +3401,6 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq) gc_mark_roots(mq); } -// allocator entry points - -JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) -{ - return jl_gc_alloc_(ptls, sz, ty); -} - // Per-thread initialization void jl_init_thread_heap(jl_ptls_t ptls) { @@ -3685,63 +3682,6 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size return data; } -// allocation wrappers that save the size of allocations, to allow using -// jl_gc_counted_* functions with a libc-compatible API. - -JL_DLLEXPORT void *jl_malloc(size_t sz) -{ - int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); - if (p == NULL) - return NULL; - p[0] = sz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -//_unchecked_calloc does not check for potential overflow of nm*sz -STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { - size_t nmsz = nm*sz; - int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); - if (p == NULL) - return NULL; - p[0] = nmsz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) -{ - if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) - return NULL; - return _unchecked_calloc(nm, sz); -} - -JL_DLLEXPORT void jl_free(void *p) -{ - if (p != NULL) { - int64_t *pp = (int64_t *)p - 2; - size_t sz = pp[0]; - jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); - } -} - -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) -{ - int64_t *pp; - size_t szold; - if (p == NULL) { - pp = NULL; - szold = 0; - } - else { - pp = (int64_t *)p - 2; - szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; - } - int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); - if (pnew == NULL) - return NULL; - pnew[0] = sz; - return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - // allocating blocks for Arrays and Strings JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) @@ -3875,18 +3815,6 @@ jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT return jl_valueof(o); } -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - -JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sz, NULL); -} - JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) { if (jl_is_initialized()) { @@ -4014,14 +3942,14 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) } -JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { - return jl_gc_alloc(ptls, sz, ty); + arraylist_push(&ptls->gc_tls.sweep_objs, obj); } -JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) +void jl_gc_notify_image_load(const char* img_data, size_t len) { - arraylist_push(&ptls->gc_tls.sweep_objs, obj); + // Do nothing } #ifdef __cplusplus diff --git a/src/gc-stock.h b/src/gc-stock.h index 46f7d3e11e105..cc661ce6e1600 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -106,12 +106,6 @@ JL_EXTENSION typedef struct _bigval_t { // must be 64-byte aligned here, in 32 & 64 bit modes } bigval_t; -// data structure for tracking malloc'd genericmemory. -typedef struct _mallocmemory_t { - jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory - struct _mallocmemory_t *next; -} mallocmemory_t; - // pool page metadata typedef struct _jl_gc_pagemeta_t { // next metadata structure in per-thread list @@ -428,21 +422,6 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } -STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); -} - -STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - if (jl_n_sweepthreads == 0) { - return 0; - } - int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); - int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; - return tid == concurrent_collector_thread_id; -} - STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); diff --git a/src/julia.h b/src/julia.h index ed3d9bf825658..b74de3060d26a 100644 --- a/src/julia.h +++ b/src/julia.h @@ -858,7 +858,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index 20d90fede3d5e..04857d440b643 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -513,30 +513,6 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE #define GC_MAX_SZCLASS (2032-sizeof(void*)) static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, ""); - -// Size does NOT include the type tag!! -STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) -{ - jl_value_t *v; - const size_t allocsz = sz + sizeof(jl_taggedvalue_t); - if (sz <= GC_MAX_SZCLASS) { - int pool_id = jl_gc_szclass(allocsz); - jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; - int osize = jl_gc_sizeclasses[pool_id]; - // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in - // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) - v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); - } - else { - if (allocsz < sz) // overflow in adding offs, size was "negative" - jl_throw(jl_memory_exception); - v = jl_gc_big_alloc_noinline(ptls, allocsz); - } - jl_set_typeof(v, ty); - maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); - return v; -} - /* Programming style note: When using jl_gc_alloc, do not JL_GC_PUSH it into a * gc frame, until it has been fully initialized. An uninitialized value in a * gc frame can crash upon encountering the first safepoint. By delaying use of @@ -1077,7 +1053,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern _Atomic(uint32_t) jl_gc_disable_counter; +extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/scheduler.c b/src/scheduler.c index bb2f85b52283f..b85a481588e4f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,9 +80,20 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } +<<<<<<< HEAD // GC functions used extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; +======= +// parallel task runtime +// --- + +JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return cong(max, &ptls->rngseed); +} +>>>>>>> 4f39869d04 (Refactoring to be considered before adding MMTk) // initialize the threading infrastructure // (called only by the main thread) diff --git a/src/stackwalk.c b/src/stackwalk.c index 6aa36fa8b499c..5f28b61c4a8fe 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -5,7 +5,7 @@ utilities for walking the stack and looking up information about code addresses */ #include -#include "gc-stock.h" +#include "gc-common.h" #include "julia.h" #include "julia_internal.h" #include "threading.h" @@ -1294,6 +1294,8 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; +extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; +extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT diff --git a/src/staticdata.c b/src/staticdata.c index 0a8cbe6db7c67..bba35e6dcb5f9 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -657,6 +657,7 @@ static void jl_load_sysimg_so(void) plen = (size_t *)&jl_system_image_size; else jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(sysimg_data, *plen); jl_restore_system_image_data(sysimg_data, *plen); } @@ -4054,6 +4055,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); From e10e3caef963bd1086deb3fb7d42f014ca2a3771 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 29 Aug 2024 04:57:59 +0000 Subject: [PATCH 17/38] Removing jl_gc_notify_image_load, since it's a new function and not part of the refactoring --- src/gc-interface.h | 5 ----- src/gc-stock.c | 5 ----- src/staticdata.c | 2 -- 3 files changed, 12 deletions(-) diff --git a/src/gc-interface.h b/src/gc-interface.h index 682f22344d69d..25ffed4524f0c 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -218,11 +218,6 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; -// This function notifies the GC about memory addresses that are set when loading the boot image. -// The GC may use that information to, for instance, determine that such objects should -// be treated as marked and belonged to the old generation in nursery collections. -void jl_gc_notify_image_load(const char* img_data, size_t len); - // ========================================================================= // // Runtime Write-Barriers // ========================================================================= // diff --git a/src/gc-stock.c b/src/gc-stock.c index 6ebac8a0c079e..88b201a687eba 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3947,11 +3947,6 @@ JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *o arraylist_push(&ptls->gc_tls.sweep_objs, obj); } -void jl_gc_notify_image_load(const char* img_data, size_t len) -{ - // Do nothing -} - #ifdef __cplusplus } #endif diff --git a/src/staticdata.c b/src/staticdata.c index bba35e6dcb5f9..0a8cbe6db7c67 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -657,7 +657,6 @@ static void jl_load_sysimg_so(void) plen = (size_t *)&jl_system_image_size; else jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); - jl_gc_notify_image_load(sysimg_data, *plen); jl_restore_system_image_data(sysimg_data, *plen); } @@ -4055,7 +4054,6 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); - jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); From d4c4360ab89dc9052cd87933b1f4b9e3581f4daa Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 2 Sep 2024 01:27:08 +0000 Subject: [PATCH 18/38] Moving gc_enable code to gc-common.c --- src/gc-common.c | 30 ++++++++++++++++++++++++++++++ src/gc-stock.c | 30 ------------------------------ 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 2ec167caa667a..03c046bc300f2 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -613,6 +613,36 @@ void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, i ptls->gc_tls.heap.mallocarrays = ma; } +// collector entry point and control +_Atomic(uint32_t) jl_gc_disable_counter = 1; + +JL_DLLEXPORT int jl_gc_enable(int on) +{ + jl_ptls_t ptls = jl_current_task->ptls; + int prev = !ptls->disable_gc; + ptls->disable_gc = (on == 0); + if (on && !prev) { + // disable -> enable + if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { + gc_num.allocd += gc_num.deferred_alloc; + gc_num.deferred_alloc = 0; + } + } + else if (prev && !on) { + // enable -> disable + jl_atomic_fetch_add(&jl_gc_disable_counter, 1); + // check if the GC is running and wait for it to finish + jl_gc_safepoint_(ptls); + } + return prev; +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + int gc_logging_enabled = 0; JL_DLLEXPORT void jl_enable_gc_logging(int enable) { diff --git a/src/gc-stock.c b/src/gc-stock.c index 88b201a687eba..55499bce61182 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -2803,36 +2803,6 @@ int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT return tid == concurrent_collector_thread_id; } -// collector entry point and control -_Atomic(uint32_t) jl_gc_disable_counter = 1; - -JL_DLLEXPORT int jl_gc_enable(int on) -{ - jl_ptls_t ptls = jl_current_task->ptls; - int prev = !ptls->disable_gc; - ptls->disable_gc = (on == 0); - if (on && !prev) { - // disable -> enable - if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { - gc_num.allocd += gc_num.deferred_alloc; - gc_num.deferred_alloc = 0; - } - } - else if (prev && !on) { - // enable -> disable - jl_atomic_fetch_add(&jl_gc_disable_counter, 1); - // check if the GC is running and wait for it to finish - jl_gc_safepoint_(ptls); - } - return prev; -} - -JL_DLLEXPORT int jl_gc_is_enabled(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; -} - JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT { jl_gc_num_t num = gc_num; From d07cae75b0b36b34a1b5150feab2b52d62a0c1ad Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 16 Sep 2024 06:38:02 +0000 Subject: [PATCH 19/38] Addressing PR comments --- src/gc-common.c | 134 +++++++++++++++++++++++++------------------ src/gc-common.h | 6 ++ src/gc-debug.c | 2 - src/gc-interface.h | 30 +--------- src/gc-stock.c | 18 +----- src/gc-stock.h | 15 +++++ src/julia.h | 2 +- src/julia_internal.h | 4 +- src/stackwalk.c | 10 +--- 9 files changed, 110 insertions(+), 111 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 03c046bc300f2..046feae6aa4c5 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -491,15 +491,9 @@ int gc_n_threads; jl_ptls_t* gc_all_tls_states; // =========================================================================== // -// MISC +// Allocation // =========================================================================== // -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) { return jl_gc_alloc(ptls, sz, ty); @@ -575,17 +569,9 @@ JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) return jl_gc_alloc_(ptls, sz, ty); } -const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 -JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT -{ - return jl_buff_tag; -} - -// callback for passing OOM errors from gmp -JL_DLLEXPORT void jl_throw_out_of_memory_error(void) -{ - jl_throw(jl_memory_exception); -} +// =========================================================================== // +// Generic Memory +// =========================================================================== // size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT { @@ -613,6 +599,66 @@ void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, i ptls->gc_tls.heap.mallocarrays = ma; } +// =========================================================================== // +// GC Debug +// =========================================================================== // + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + +// =========================================================================== // +// GC Control +// =========================================================================== // + +JL_DLLEXPORT uint32_t jl_get_gc_disable_counter(void) { + return jl_atomic_load_acquire(&jl_gc_disable_counter); +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + + // collector entry point and control _Atomic(uint32_t) jl_gc_disable_counter = 1; @@ -637,54 +683,30 @@ JL_DLLEXPORT int jl_gc_enable(int on) return prev; } -JL_DLLEXPORT int jl_gc_is_enabled(void) +// =========================================================================== // +// MISC +// =========================================================================== // + +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) { jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; -} - -int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; + return jl_gc_new_weakref_th(ptls, value); } -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; +JL_DLLEXPORT jl_datatype_t **jl_get_ijl_small_typeof(void) { + return ijl_small_typeof; } -// gc-debug common functions -// --- - -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 +JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; + return jl_buff_tag; } -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +// callback for passing OOM errors from gmp +JL_DLLEXPORT void jl_throw_out_of_memory_error(void) { - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; + jl_throw(jl_memory_exception); } #ifdef __cplusplus diff --git a/src/gc-common.h b/src/gc-common.h index 154b9659e9ccb..32b7470b13a58 100644 --- a/src/gc-common.h +++ b/src/gc-common.h @@ -179,4 +179,10 @@ JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o); extern int gc_n_threads; extern jl_ptls_t* gc_all_tls_states; +// =========================================================================== // +// Logging +// =========================================================================== // + +extern int gc_logging_enabled; + #endif // JL_GC_COMMON_H diff --git a/src/gc-debug.c b/src/gc-debug.c index d05fb4b49e9f7..7c479484cde45 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1105,8 +1105,6 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -extern int gc_logging_enabled; - void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { return; diff --git a/src/gc-interface.h b/src/gc-interface.h index 25ffed4524f0c..0e9ce32697f35 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -94,6 +94,8 @@ JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem); // should run a collection cycle again (e.g. a full mark right after a full sweep to ensure // we do a full heap traversal). JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection); +// Returns whether the thread with `tid` is a collector thread +JL_DLLEXPORT int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT; // ========================================================================= // // Metrics @@ -162,26 +164,6 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz); JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz); // Wrapper around Libc realloc that updates Julia allocation counters. JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz); -// Wrapper around Libc malloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_malloc(size_t sz); -// Wrapper around Libc calloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz); -// Wrapper around Libc free that takes a pointer to the payload of a memory region allocated -// with jl_malloc or jl_calloc, and uses the size information stored in the first machine -// words of the memory buffer update Julia allocation counters, and then frees the -// corresponding memory buffer. -JL_DLLEXPORT void jl_free(void *p); -// Wrapper around Libc realloc that takes a memory region allocated with jl_malloc or -// jl_calloc, and uses the size information stored in the first machine words of the memory -// buffer to update Julia allocation counters, reallocating the corresponding memory buffer -// in the end. -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz); // Wrapper around Libc malloc that's used to dynamically allocate memory for Arrays and // Strings. It increments Julia allocation counters and should check whether we're close to // the Julia heap target, and therefore, whether we should run a collection. Note that this @@ -195,14 +177,6 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz); // thread-local allocator of the thread referenced by the first jl_ptls_t argument. JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref_th(struct _jl_tls_states_t *ptls, struct _jl_value_t *value); -// Allocates a new weak-reference, assigns its value and increments Julia allocation -// counters. If thread-local allocators are used, then this function should allocate in the -// thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref(struct _jl_value_t *value); -// Allocates an object whose size is specified by the function argument and increments Julia -// allocation counters. If thread-local allocators are used, then this function should -// allocate in the thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_value_t *jl_gc_allocobj(size_t sz); // Permanently allocates a memory slot of the size specified by the first parameter. This // block of memory is allocated in an immortal region that is never swept. The second // parameter specifies whether the memory should be filled with zeros. The third and fourth diff --git a/src/gc-stock.c b/src/gc-stock.c index 55499bce61182..b345fe08ff69c 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -2788,19 +2788,8 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } -int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); -} - -int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - if (jl_n_sweepthreads == 0) { - return 0; - } - int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); - int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; - return tid == concurrent_collector_thread_id; +int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT { + return gc_is_parallel_collector_thread(tid) || gc_is_concurrent_collector_thread(tid); } JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT @@ -3193,8 +3182,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // free empty GC state for threads that have exited if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { // GC threads should never exit - assert(!gc_is_parallel_collector_thread(t_i)); - assert(!gc_is_concurrent_collector_thread(t_i)); + assert(!gc_is_collector_thread(t_i)); jl_thread_heap_t *heap = &ptls2->gc_tls.heap; if (heap->weak_refs.len == 0) small_arraylist_free(&heap->weak_refs); diff --git a/src/gc-stock.h b/src/gc-stock.h index cc661ce6e1600..0f8d1eee67581 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -422,6 +422,21 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } +STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); +} + +STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + if (jl_n_sweepthreads == 0) { + return 0; + } + int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); + int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; + return tid == concurrent_collector_thread_id; +} + STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); diff --git a/src/julia.h b/src/julia.h index b74de3060d26a..ed3d9bf825658 100644 --- a/src/julia.h +++ b/src/julia.h @@ -858,7 +858,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index 04857d440b643..c079c06f0189a 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -367,6 +367,8 @@ extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT size_t jl_typeinf_world; extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED; +extern void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; + JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; @@ -1053,7 +1055,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; +extern _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/stackwalk.c b/src/stackwalk.c index 5f28b61c4a8fe..a1de3a6d61a07 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1294,8 +1294,6 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; -extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; -extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT @@ -1304,12 +1302,8 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states); for (size_t i = 0; i < nthreads; i++) { jl_ptls_t ptls2 = allstates[i]; - if (gc_is_parallel_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for parallel GC thread %zu\n", i + 1); - continue; - } - if (gc_is_concurrent_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for concurrent GC thread %zu\n", i + 1); + if (gc_is_collector_thread(i)) { + jl_safe_printf("==== Skipping backtrace for parallel/concurrent GC thread %zu\n", i + 1); continue; } if (ptls2 == NULL) { From 8e15217b8a5eaea51335f6b7577ba929905a4a54 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 19 Sep 2024 04:18:13 +0000 Subject: [PATCH 20/38] Push resolution of merge conflict --- src/scheduler.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/scheduler.c b/src/scheduler.c index b85a481588e4f..bb2f85b52283f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,20 +80,9 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } -<<<<<<< HEAD // GC functions used extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; -======= -// parallel task runtime -// --- - -JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return cong(max, &ptls->rngseed); -} ->>>>>>> 4f39869d04 (Refactoring to be considered before adding MMTk) // initialize the threading infrastructure // (called only by the main thread) From 0cb0784a43aa01803b73407c90bd5ee44d09531f Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 01:10:31 +0000 Subject: [PATCH 21/38] Removing jl_gc_mark_queue_obj_explicit extern definition from scheduler.c --- src/scheduler.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/scheduler.c b/src/scheduler.c index bb2f85b52283f..7e23f654c2566 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,10 +80,6 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } -// GC functions used -extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, - jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; - // initialize the threading infrastructure // (called only by the main thread) void jl_init_threadinginfra(void) From 12634f36d67bd9c8275feda1e2729b0910ca2664 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 02:50:25 +0000 Subject: [PATCH 22/38] Don't need the getter function since it's possible to use jl_small_typeof directly --- src/gc-common.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 046feae6aa4c5..417f12f26d64d 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -693,10 +693,6 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) return jl_gc_new_weakref_th(ptls, value); } -JL_DLLEXPORT jl_datatype_t **jl_get_ijl_small_typeof(void) { - return ijl_small_typeof; -} - const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { From aa8093328cf5f70d9df78fda2315b077a76e4d8b Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Fri, 27 Sep 2024 00:49:07 +0000 Subject: [PATCH 23/38] Remove extern from free_stack declaration in julia_internal.h --- src/julia_internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/julia_internal.h b/src/julia_internal.h index c079c06f0189a..6fd537ed6baf8 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -367,7 +367,7 @@ extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT size_t jl_typeinf_world; extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED; -extern void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; From 7ce3fe392616d4da1035de6b02a21056f05072b6 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 8 Oct 2024 09:12:49 +0000 Subject: [PATCH 24/38] Putting everything that is common GC tls into gc-tls-common.h --- src/gc-common.c | 10 +-- src/gc-stacks.c | 18 +++--- src/gc-stock.c | 154 ++++++++++++++++++++++---------------------- src/gc-tls-common.h | 52 +++++++++++++++ src/gc-tls.h | 25 ------- src/julia_threads.h | 2 + src/stackwalk.c | 2 +- 7 files changed, 147 insertions(+), 116 deletions(-) create mode 100644 src/gc-tls-common.h diff --git a/src/gc-common.c b/src/gc-common.c index 417f12f26d64d..6ce455d3923ad 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -587,16 +587,16 @@ size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ // This is **NOT** a GC safe point. mallocmemory_t *ma; - if (ptls->gc_tls.heap.mafreelist == NULL) { + if (ptls->gc_tls_common.heap.mafreelist == NULL) { ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); } else { - ma = ptls->gc_tls.heap.mafreelist; - ptls->gc_tls.heap.mafreelist = ma->next; + ma = ptls->gc_tls_common.heap.mafreelist; + ptls->gc_tls_common.heap.mafreelist = ma->next; } ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); - ma->next = ptls->gc_tls.heap.mallocarrays; - ptls->gc_tls.heap.mallocarrays = ma; + ma->next = ptls->gc_tls_common.heap.mallocarrays; + ptls->gc_tls_common.heap.mallocarrays = ma; } // =========================================================================== // diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 8c44b65284386..a8fec938456a3 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -131,7 +131,7 @@ void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) { unsigned pool_id = select_pool(bufsz); if (pool_sizes[pool_id] == bufsz) { - small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf); + small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf); return; } } @@ -160,7 +160,7 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task) #ifdef _COMPILER_ASAN_ENABLED_ __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz); #endif - small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf); + small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf); } } } @@ -175,7 +175,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) { unsigned pool_id = select_pool(ssize); ssize = pool_sizes[pool_id]; - small_arraylist_t *pool = &ptls->gc_tls.heap.free_stacks[pool_id]; + small_arraylist_t *pool = &ptls->gc_tls_common.heap.free_stacks[pool_id]; if (pool->len > 0) { stk = small_arraylist_pop(pool); } @@ -196,7 +196,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO } *bufsz = ssize; if (owner) { - small_arraylist_t *live_tasks = &ptls->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls->gc_tls_common.heap.live_tasks; mtarraylist_push(live_tasks, owner); } return stk; @@ -223,7 +223,7 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT // free half of stacks that remain unused since last sweep for (int p = 0; p < JL_N_STACK_POOLS; p++) { - small_arraylist_t *al = &ptls2->gc_tls.heap.free_stacks[p]; + small_arraylist_t *al = &ptls2->gc_tls_common.heap.free_stacks[p]; size_t n_to_free; if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { n_to_free = al->len; // not alive yet or dead, so it does not need these anymore @@ -245,10 +245,10 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT } } if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { - small_arraylist_free(ptls2->gc_tls.heap.free_stacks); + small_arraylist_free(ptls2->gc_tls_common.heap.free_stacks); } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = 0; size_t ndel = 0; size_t l = live_tasks->len; @@ -299,7 +299,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void) jl_ptls_t ptls2 = allstates[i]; if (ptls2 == NULL) continue; - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); l += n + (ptls2->root_task->ctx.stkbuf != NULL); } @@ -318,7 +318,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void) goto restart; jl_array_data(a,void*)[j++] = t; } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); for (size_t i = 0; i < n; i++) { jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i); diff --git a/src/gc-stock.c b/src/gc-stock.c index b345fe08ff69c..8e040c9b25dcf 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -357,7 +357,7 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *valu jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); wr->value = value; // NOTE: wb not needed here - small_arraylist_push(&ptls->gc_tls.heap.weak_refs, wr); + small_arraylist_push(&ptls->gc_tls_common.heap.weak_refs, wr); return wr; } @@ -367,8 +367,8 @@ static void clear_weak_refs(void) for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; if (ptls2 != NULL) { - size_t n, l = ptls2->gc_tls.heap.weak_refs.len; - void **lst = ptls2->gc_tls.heap.weak_refs.items; + size_t n, l = ptls2->gc_tls_common.heap.weak_refs.len; + void **lst = ptls2->gc_tls_common.heap.weak_refs.items; for (n = 0; n < l; n++) { jl_weakref_t *wr = (jl_weakref_t*)lst[n]; if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc)) @@ -386,8 +386,8 @@ static void sweep_weak_refs(void) if (ptls2 != NULL) { size_t n = 0; size_t ndel = 0; - size_t l = ptls2->gc_tls.heap.weak_refs.len; - void **lst = ptls2->gc_tls.heap.weak_refs.items; + size_t l = ptls2->gc_tls_common.heap.weak_refs.len; + void **lst = ptls2->gc_tls_common.heap.weak_refs.items; if (l == 0) continue; while (1) { @@ -402,7 +402,7 @@ static void sweep_weak_refs(void) lst[n] = lst[n + ndel]; lst[n + ndel] = tmp; } - ptls2->gc_tls.heap.weak_refs.len -= ndel; + ptls2->gc_tls_common.heap.weak_refs.len -= ndel; } } } @@ -410,18 +410,18 @@ static void sweep_weak_refs(void) STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc) + sz; + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc) + sz; if (alloc_acc < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, alloc_acc); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, alloc_acc); else { jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); } } STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc) + sz); } // big value list @@ -442,10 +442,10 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) jl_throw(jl_memory_exception); gc_invoke_callbacks(jl_gc_cb_notify_external_alloc_t, gc_cblist_notify_external_alloc, (v, allocsz)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc) + 1); jl_batch_accum_heap_size(ptls, allocsz); #ifdef MEMDEBUG memset(v, 0xee, allocsz); @@ -558,8 +558,8 @@ static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz); jl_batch_accum_heap_size(ptls, sz); } @@ -578,18 +578,18 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTS for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls) { - dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval); - dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc); - dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc); - dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc); - dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc); - dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval); + dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc); + dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc); + dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc); + dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc); + dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc); if (update_heap) { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc); - freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc); + freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc); jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -605,13 +605,13 @@ static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls != NULL) { // don't reset `pool_live_bytes` here - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -654,8 +654,8 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT for (int t_i = 0; t_i < gc_n_threads; t_i++) { jl_ptls_t ptls2 = gc_all_tls_states[t_i]; if (ptls2 != NULL) { - mallocmemory_t *ma = ptls2->gc_tls.heap.mallocarrays; - mallocmemory_t **pma = &ptls2->gc_tls.heap.mallocarrays; + mallocmemory_t *ma = ptls2->gc_tls_common.heap.mallocarrays; + mallocmemory_t **pma = &ptls2->gc_tls_common.heap.mallocarrays; while (ma != NULL) { mallocmemory_t *nxt = ma->next; jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1); @@ -667,8 +667,8 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT *pma = nxt; int isaligned = (uintptr_t)ma->a & 1; jl_gc_free_memory(a, isaligned); - ma->next = ptls2->gc_tls.heap.mafreelist; - ptls2->gc_tls.heap.mafreelist = ma; + ma->next = ptls2->gc_tls_common.heap.mafreelist; + ptls2->gc_tls_common.heap.mafreelist = ma; } gc_time_count_mallocd_memory(bits); ma = nxt; @@ -729,12 +729,12 @@ STATIC_INLINE jl_value_t *jl_gc_small_alloc_inner(jl_ptls_t ptls, int offset, return jl_gc_big_alloc(ptls, osize, NULL); #endif maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + osize); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + osize); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc) + 1); // first try to use the freelist jl_taggedvalue_t *v = p->freelist; if (v != NULL) { @@ -971,8 +971,8 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_ // instead of adding it to the thread that originally allocated the page, so we can avoid // an atomic-fetch-add here. size_t delta = (GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + delta); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + delta); jl_atomic_fetch_add_relaxed((_Atomic(int64_t) *)&gc_num.freed, (nfree - old_nfree) * osize); } @@ -1228,7 +1228,7 @@ static void gc_sweep_pool(void) } continue; } - jl_atomic_store_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes, 0); + jl_atomic_store_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes, 0); for (int i = 0; i < JL_GC_N_POOLS; i++) { jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i]; jl_taggedvalue_t *last = p->freelist; @@ -2834,7 +2834,7 @@ JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void) for (int i = 0; i < n_threads; i++) { jl_ptls_t ptls2 = all_tls_states[i]; if (ptls2 != NULL) { - pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes); + pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes); } } return pool_live_bytes; @@ -3183,11 +3183,12 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { // GC threads should never exit assert(!gc_is_collector_thread(t_i)); + jl_thread_heap_common_t *common_heap = &ptls2->gc_tls_common.heap; jl_thread_heap_t *heap = &ptls2->gc_tls.heap; - if (heap->weak_refs.len == 0) - small_arraylist_free(&heap->weak_refs); - if (heap->live_tasks.len == 0) - small_arraylist_free(&heap->live_tasks); + if (common_heap->weak_refs.len == 0) + small_arraylist_free(&common_heap->weak_refs); + if (common_heap->live_tasks.len == 0) + small_arraylist_free(&common_heap->live_tasks); if (heap->remset.len == 0) arraylist_free(&heap->remset); if (ptls2->finalizers.len == 0) @@ -3256,8 +3257,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { - size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval; + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; @@ -3362,6 +3363,7 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq) // Per-thread initialization void jl_init_thread_heap(jl_ptls_t ptls) { + jl_thread_heap_common_t *common_heap = &ptls->gc_tls_common.heap; jl_thread_heap_t *heap = &ptls->gc_tls.heap; jl_gc_pool_t *p = heap->norm_pools; for (int i = 0; i < JL_GC_N_POOLS; i++) { @@ -3369,12 +3371,12 @@ void jl_init_thread_heap(jl_ptls_t ptls) p[i].freelist = NULL; p[i].newpages = NULL; } - small_arraylist_new(&heap->weak_refs, 0); - small_arraylist_new(&heap->live_tasks, 0); + small_arraylist_new(&common_heap->weak_refs, 0); + small_arraylist_new(&common_heap->live_tasks, 0); for (int i = 0; i < JL_N_STACK_POOLS; i++) - small_arraylist_new(&heap->free_stacks[i], 0); - heap->mallocarrays = NULL; - heap->mafreelist = NULL; + small_arraylist_new(&common_heap->free_stacks[i], 0); + common_heap->mallocarrays = NULL; + common_heap->mafreelist = NULL; heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag; @@ -3400,8 +3402,8 @@ void jl_init_thread_heap(jl_ptls_t ptls) jl_atomic_store_relaxed(&q->array, wsa2); arraylist_new(&mq->reclaim_set, 32); - memset(&ptls->gc_tls.gc_num, 0, sizeof(ptls->gc_tls.gc_num)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + memset(&ptls->gc_tls_common.gc_num, 0, sizeof(ptls->gc_tls_common.gc_num)); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); } void jl_free_thread_gc_state(jl_ptls_t ptls) @@ -3579,10 +3581,10 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) if (data != NULL && pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, sz); } return data; @@ -3596,10 +3598,10 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) if (data != NULL && pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + nm*sz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + nm*sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, sz * nm); } return data; @@ -3624,10 +3626,10 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); if (!(sz < old)) - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + (sz - old)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + (sz - old)); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc) + 1); int64_t diff = sz - old; if (diff < 0) { @@ -3658,10 +3660,10 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) if (b == NULL) jl_throw(jl_memory_exception); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, allocsz); #ifdef _OS_WINDOWS_ SetLastError(last_error); diff --git a/src/gc-tls-common.h b/src/gc-tls-common.h new file mode 100644 index 0000000000000..28fbf2d0c448e --- /dev/null +++ b/src/gc-tls-common.h @@ -0,0 +1,52 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +// Meant to be included in "julia_threads.h" +#ifndef JL_GC_TLS_COMMON_H +#define JL_GC_TLS_COMMON_H + +#include "julia_atomics.h" + +// GC threading ------------------------------------------------------------------ + +#include "arraylist.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + // variable for tracking weak references + small_arraylist_t weak_refs; + // live tasks started on this thread + // that are holding onto a stack from the pool + small_arraylist_t live_tasks; + + // variables for tracking malloc'd arrays + struct _mallocmemory_t *mallocarrays; + struct _mallocmemory_t *mafreelist; + +#define JL_N_STACK_POOLS 16 + small_arraylist_t free_stacks[JL_N_STACK_POOLS]; +} jl_thread_heap_common_t; + +typedef struct { + _Atomic(int64_t) allocd; + _Atomic(int64_t) pool_live_bytes; + _Atomic(uint64_t) malloc; + _Atomic(uint64_t) realloc; + _Atomic(uint64_t) poolalloc; + _Atomic(uint64_t) bigalloc; + _Atomic(int64_t) free_acc; + _Atomic(uint64_t) alloc_acc; +} jl_thread_gc_num_common_t; + +typedef struct { + jl_thread_heap_common_t heap; + jl_thread_gc_num_common_t gc_num; +} jl_gc_tls_states_common_t; + +#ifdef __cplusplus +} +#endif + +#endif // JL_GC_TLS_H diff --git a/src/gc-tls.h b/src/gc-tls.h index 9e4b09404db84..ecc815805a98b 100644 --- a/src/gc-tls.h +++ b/src/gc-tls.h @@ -21,16 +21,6 @@ typedef struct { } jl_gc_pool_t; typedef struct { - // variable for tracking weak references - small_arraylist_t weak_refs; - // live tasks started on this thread - // that are holding onto a stack from the pool - small_arraylist_t live_tasks; - - // variables for tracking malloc'd arrays - struct _mallocmemory_t *mallocarrays; - struct _mallocmemory_t *mafreelist; - // variable for tracking young (i.e. not in `GC_OLD_MARKED`/last generation) large objects struct _bigval_t *young_generation_of_bigvals; @@ -42,22 +32,8 @@ typedef struct { // variables for allocating objects from pools #define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h` jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS]; - -#define JL_N_STACK_POOLS 16 - small_arraylist_t free_stacks[JL_N_STACK_POOLS]; } jl_thread_heap_t; -typedef struct { - _Atomic(int64_t) allocd; - _Atomic(int64_t) pool_live_bytes; - _Atomic(uint64_t) malloc; - _Atomic(uint64_t) realloc; - _Atomic(uint64_t) poolalloc; - _Atomic(uint64_t) bigalloc; - _Atomic(int64_t) free_acc; - _Atomic(uint64_t) alloc_acc; -} jl_thread_gc_num_t; - typedef struct { ws_queue_t chunk_queue; ws_queue_t ptr_queue; @@ -78,7 +54,6 @@ typedef struct { typedef struct { jl_thread_heap_t heap; jl_gc_page_stack_t page_metadata_allocd; - jl_thread_gc_num_t gc_num; jl_gc_markqueue_t mark_queue; jl_gc_mark_cache_t gc_cache; _Atomic(size_t) gc_sweeps_requested; diff --git a/src/julia_threads.h b/src/julia_threads.h index b697a0bf030ed..fcc28591658cb 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -5,6 +5,7 @@ #define JL_THREADS_H #include "gc-tls.h" +#include "gc-tls-common.h" #include "julia_atomics.h" #ifndef _OS_WINDOWS_ #include "pthread.h" @@ -155,6 +156,7 @@ typedef struct _jl_tls_states_t { // Counter to disable finalizer **on the current thread** int finalizers_inhibited; jl_gc_tls_states_t gc_tls; // this is very large, and the offset of the first member is baked into codegen + jl_gc_tls_states_common_t gc_tls_common; // common tls for both GCs volatile sig_atomic_t defer_signal; _Atomic(struct _jl_task_t*) current_task; struct _jl_task_t *next_task; diff --git a/src/stackwalk.c b/src/stackwalk.c index a1de3a6d61a07..0988d7a833c94 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1309,7 +1309,7 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT if (ptls2 == NULL) { continue; } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); int t_state = JL_TASK_STATE_DONE; jl_task_t *t = ptls2->root_task; From 048af72dee003a3ded89c3bf6c6572f97cb2678a Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 8 Oct 2024 09:14:24 +0000 Subject: [PATCH 25/38] Typo --- src/gc-tls-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-tls-common.h b/src/gc-tls-common.h index 28fbf2d0c448e..ba36f5c1c238e 100644 --- a/src/gc-tls-common.h +++ b/src/gc-tls-common.h @@ -49,4 +49,4 @@ typedef struct { } #endif -#endif // JL_GC_TLS_H +#endif // JL_GC_TLS_COMMON_H From fe61c2232d997da0ebd3b936a469024acff7afbb Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 8 Oct 2024 22:46:39 +0000 Subject: [PATCH 26/38] Adding gc-tls-common.h to Makefile as a public header --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index a6b1f433b73ce..80bbdbcff67fc 100644 --- a/src/Makefile +++ b/src/Makefile @@ -103,7 +103,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0) UV_HEADERS += uv.h UV_HEADERS += uv/*.h endif -PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) +PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h gc-tls-common.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) ifeq ($(OS),WINNT) PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h) endif From 380fd833efba491cb167ad9c61909199e14098d8 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 8 Oct 2024 23:26:33 +0000 Subject: [PATCH 27/38] Removing gc-tls-common fields from gc-tls-mmtk.h --- src/gc-mmtk.c | 58 +++++++++++++++++------------------ src/gc-tls-mmtk.h | 30 ------------------ src/llvm-late-gc-lowering.cpp | 2 +- 3 files changed, 30 insertions(+), 60 deletions(-) diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index 98a5612871be0..aa010c73b27d2 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -108,7 +108,7 @@ void jl_start_gc_threads(void) { } void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT { - jl_thread_heap_t *heap = &ptls->gc_tls.heap; + jl_thread_heap_common_t *heap = &ptls->gc_tls_common.heap; small_arraylist_new(&heap->weak_refs, 0); small_arraylist_new(&heap->live_tasks, 0); for (int i = 0; i < JL_N_STACK_POOLS; i++) @@ -124,7 +124,7 @@ void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT { memcpy(&ptls->gc_tls.mmtk_mutator, mmtk_mutator, sizeof(MMTkMutatorContext)); // Call post_bind to maintain a list of active mutators and to reclaim the old mutator (which is no longer needed) mmtk_post_bind_mutator(&ptls->gc_tls.mmtk_mutator, mmtk_mutator); - memset(&ptls->gc_tls.gc_num, 0, sizeof(ptls->gc_tls.gc_num)); + memset(&ptls->gc_tls_common.gc_num, 0, sizeof(ptls->gc_tls_common.gc_num)); } void jl_free_thread_gc_state(struct _jl_tls_states_t *ptls) { @@ -162,8 +162,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) { jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { - size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval; + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; @@ -186,15 +186,15 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTS for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls) { - dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval); - dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc); - dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc); - dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc); - dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc); - dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval); + dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc); + dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc); + dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc); + dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc); + dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc); if (update_heap) { - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -211,13 +211,13 @@ void reset_thread_gc_counts(void) JL_NOTSAFEPOINT jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls != NULL) { // don't reset `pool_live_bytes` here - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -257,8 +257,8 @@ JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void) { void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz); } void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT @@ -473,8 +473,8 @@ JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, siz mmtk_immix_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, v, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align)); } - ptls->gc_tls.gc_num.allocd += osize; - ptls->gc_tls.gc_num.poolalloc++; + ptls->gc_tls_common.gc_num.allocd += osize; + ptls->gc_tls_common.gc_num.poolalloc++; return v; } @@ -502,8 +502,8 @@ JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t sz) } v->sz = allocsz; - ptls->gc_tls.gc_num.allocd += allocsz; - ptls->gc_tls.gc_num.bigalloc++; + ptls->gc_tls_common.gc_num.allocd += allocsz; + ptls->gc_tls_common.gc_num.bigalloc++; jl_value_t *result = jl_valueof(&v->header); mmtk_post_alloc(&ptls->gc_tls.mmtk_mutator, result, allocsz, 2); @@ -565,10 +565,10 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) if (b == NULL) jl_throw(jl_memory_exception); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); // FIXME: Should these be part of mmtk's heap? // malloc_maybe_collect(ptls, sz); // jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, allocsz); diff --git a/src/gc-tls-mmtk.h b/src/gc-tls-mmtk.h index 64a1bae192445..7b1b249cd8ae3 100644 --- a/src/gc-tls-mmtk.h +++ b/src/gc-tls-mmtk.h @@ -9,37 +9,7 @@ extern "C" { #endif -// This mostly remove some fields that are not used by MMTk - -typedef struct { - // variable for tracking weak references - small_arraylist_t weak_refs; - // live tasks started on this thread - // that are holding onto a stack from the pool - small_arraylist_t live_tasks; - - // variables for tracking malloc'd arrays - struct _mallocmemory_t *mallocarrays; - struct _mallocmemory_t *mafreelist; - -#define JL_N_STACK_POOLS 16 - small_arraylist_t free_stacks[JL_N_STACK_POOLS]; -} jl_thread_heap_t; - -typedef struct { - _Atomic(int64_t) allocd; - _Atomic(int64_t) pool_live_bytes; - _Atomic(uint64_t) malloc; - _Atomic(uint64_t) realloc; - _Atomic(uint64_t) poolalloc; - _Atomic(uint64_t) bigalloc; - _Atomic(int64_t) free_acc; - _Atomic(uint64_t) alloc_acc; -} jl_thread_gc_num_t; - typedef struct { - jl_thread_heap_t heap; - jl_thread_gc_num_t gc_num; MMTkMutatorContext mmtk_mutator; size_t malloc_sz_since_last_poll; } jl_gc_tls_states_t; diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index d395771f6df0c..4b7dc0ec855a7 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -2528,7 +2528,7 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) builder.CreateStore(new_cursor, cursor_ptr); // ptls->gc_tls.gc_num.allocd += osize; - auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, gc_num)); + auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls_common) + offsetof(jl_gc_tls_states_common_t, gc_num)); auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos); auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc"); auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls); From ebf478ad2783571684e64fa41c7868d40b105985 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 28 Aug 2024 00:44:09 +0000 Subject: [PATCH 28/38] Refactoring to be considered before adding MMTk --- src/gc-common.c | 156 +++++++++++++++++++++++++++++++++++++++++++ src/gc-common.h | 6 ++ src/gc-debug.c | 41 +----------- src/gc-interface.h | 12 ++++ src/gc-stacks.c | 4 +- src/gc-stock.c | 156 ++++++++++++------------------------------- src/gc-stock.h | 21 ------ src/julia.h | 2 +- src/julia_internal.h | 26 +------- src/scheduler.c | 11 +++ src/stackwalk.c | 4 +- src/staticdata.c | 2 + 12 files changed, 237 insertions(+), 204 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index ee461b576ea9e..2ec167caa667a 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -20,6 +20,11 @@ extern "C" { jl_gc_num_t gc_num = {0}; +JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) +{ + return gc_num.total_time; +} + // =========================================================================== // // GC Callbacks // =========================================================================== // @@ -489,6 +494,87 @@ jl_ptls_t* gc_all_tls_states; // MISC // =========================================================================== // +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_new_weakref_th(ptls, value); +} + +JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc(ptls, sz, ty); +} + +JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return jl_gc_alloc(ptls, sz, NULL); +} + +// allocation wrappers that save the size of allocations, to allow using +// jl_gc_counted_* functions with a libc-compatible API. + +JL_DLLEXPORT void *jl_malloc(size_t sz) +{ + int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); + if (p == NULL) + return NULL; + p[0] = sz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +//_unchecked_calloc does not check for potential overflow of nm*sz +STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { + size_t nmsz = nm*sz; + int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); + if (p == NULL) + return NULL; + p[0] = nmsz; + return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) +{ + if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) + return NULL; + return _unchecked_calloc(nm, sz); +} + +JL_DLLEXPORT void jl_free(void *p) +{ + if (p != NULL) { + int64_t *pp = (int64_t *)p - 2; + size_t sz = pp[0]; + jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); + } +} + +JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) +{ + int64_t *pp; + size_t szold; + if (p == NULL) { + pp = NULL; + szold = 0; + } + else { + pp = (int64_t *)p - 2; + szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; + } + int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); + if (pnew == NULL) + return NULL; + pnew[0] = sz; + return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 +} + +// allocator entry points + +JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc_(ptls, sz, ty); +} + const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { @@ -501,6 +587,76 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void) jl_throw(jl_memory_exception); } +size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT +{ + const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; + size_t sz = layout->size * m->length; + if (layout->flags.arrayelem_isunion) + // account for isbits Union array selector bytes + sz += m->length; + return sz; +} + +// tracking Memorys with malloc'd storage +void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ + // This is **NOT** a GC safe point. + mallocmemory_t *ma; + if (ptls->gc_tls.heap.mafreelist == NULL) { + ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); + } + else { + ma = ptls->gc_tls.heap.mafreelist; + ptls->gc_tls.heap.mafreelist = ma->next; + } + ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); + ma->next = ptls->gc_tls.heap.mallocarrays; + ptls->gc_tls.heap.mallocarrays = ma; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + +// gc-debug common functions +// --- + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + #ifdef __cplusplus } #endif diff --git a/src/gc-common.h b/src/gc-common.h index 4d53830442a7d..154b9659e9ccb 100644 --- a/src/gc-common.h +++ b/src/gc-common.h @@ -53,6 +53,12 @@ extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure; // malloc wrappers, aligned allocation // =========================================================================== // +// data structure for tracking malloc'd genericmemory. +typedef struct _mallocmemory_t { + jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory + struct _mallocmemory_t *next; +} mallocmemory_t; + #if defined(_OS_WINDOWS_) STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align) { diff --git a/src/gc-debug.c b/src/gc-debug.c index 19dd93af5f236..d05fb4b49e9f7 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1105,46 +1105,7 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT -{ - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; -} - -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT -{ - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; -} - -static int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; -} - -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; -} +extern int gc_logging_enabled; void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { diff --git a/src/gc-interface.h b/src/gc-interface.h index e543b4b5879f1..682f22344d69d 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -128,6 +128,13 @@ JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void); // Allocation // ========================================================================= // +// On GCC, this function is inlined when sz is constant (see julia_internal.h) +// In general, this function should implement allocation and should use the specific GC's logic +// to decide whether to allocate a small or a large object. Finally, note that this function +// **must** also set the type of the returning object to be `ty`. The type `ty` may also be used to record +// an allocation of that type in the allocation profiler. +struct _jl_value_t *jl_gc_alloc_(struct _jl_tls_states_t * ptls, size_t sz, void *ty); + // Allocates small objects and increments Julia allocation counterst. Size of the object // header must be included in the object size. The (possibly unused in some implementations) // offset to the arena in which we're allocating is passed in the second parameter, and the @@ -211,6 +218,11 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; +// This function notifies the GC about memory addresses that are set when loading the boot image. +// The GC may use that information to, for instance, determine that such objects should +// be treated as marked and belonged to the old generation in nursery collections. +void jl_gc_notify_image_load(const char* img_data, size_t len); + // ========================================================================= // // Runtime Write-Barriers // ========================================================================= // diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 783129ea97693..8c44b65284386 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -46,7 +46,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { VirtualFree(stkbuf, 0, MEM_RELEASE); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); @@ -81,7 +81,7 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT return stk; } -static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT { munmap(stkbuf, bufsz); jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1); diff --git a/src/gc-stock.c b/src/gc-stock.c index 6b97881909bbd..6ebac8a0c079e 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -555,24 +555,6 @@ static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT gc_time_big_end(); } -// tracking Memorys with malloc'd storage - -void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ - // This is **NOT** a GC safe point. - mallocmemory_t *ma; - if (ptls->gc_tls.heap.mafreelist == NULL) { - ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); - } - else { - ma = ptls->gc_tls.heap.mafreelist; - ptls->gc_tls.heap.mafreelist = ma->next; - } - ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); - ma->next = ptls->gc_tls.heap.mallocarrays; - ptls->gc_tls.heap.mallocarrays = ma; -} - - void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; @@ -649,17 +631,6 @@ void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT reset_thread_gc_counts(); } -size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT -{ - const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout; - size_t sz = layout->size * m->length; - if (layout->flags.arrayelem_isunion) - // account for isbits Union array selector bytes - sz += m->length; - return sz; -} - - static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT { assert(jl_is_genericmemory(v)); @@ -818,6 +789,29 @@ jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset, int osize) { return jl_gc_small_alloc_inner(ptls, offset, osize); } +// Size does NOT include the type tag!! +inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) +{ + jl_value_t *v; + const size_t allocsz = sz + sizeof(jl_taggedvalue_t); + if (sz <= GC_MAX_SZCLASS) { + int pool_id = jl_gc_szclass(allocsz); + jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; + int osize = jl_gc_sizeclasses[pool_id]; + // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in + // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) + v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); + } + else { + if (allocsz < sz) // overflow in adding offs, size was "negative" + jl_throw(jl_memory_exception); + v = jl_gc_big_alloc_noinline(ptls, allocsz); + } + jl_set_typeof(v, ty); + maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); + return v; +} + int jl_gc_classify_pools(size_t sz, int *osize) { if (sz > GC_MAX_SZCLASS) @@ -2794,6 +2788,21 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } +int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); +} + +int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + if (jl_n_sweepthreads == 0) { + return 0; + } + int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); + int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; + return tid == concurrent_collector_thread_id; +} + // collector entry point and control _Atomic(uint32_t) jl_gc_disable_counter = 1; @@ -2832,11 +2841,6 @@ JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT *bytes = (num.total_allocd + num.deferred_alloc + num.allocd); } -JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void) -{ - return gc_num.total_time; -} - JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) { jl_gc_num_t num = gc_num; @@ -3397,13 +3401,6 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq) gc_mark_roots(mq); } -// allocator entry points - -JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) -{ - return jl_gc_alloc_(ptls, sz, ty); -} - // Per-thread initialization void jl_init_thread_heap(jl_ptls_t ptls) { @@ -3685,63 +3682,6 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size return data; } -// allocation wrappers that save the size of allocations, to allow using -// jl_gc_counted_* functions with a libc-compatible API. - -JL_DLLEXPORT void *jl_malloc(size_t sz) -{ - int64_t *p = (int64_t *)jl_gc_counted_malloc(sz + JL_SMALL_BYTE_ALIGNMENT); - if (p == NULL) - return NULL; - p[0] = sz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -//_unchecked_calloc does not check for potential overflow of nm*sz -STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) { - size_t nmsz = nm*sz; - int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1); - if (p == NULL) - return NULL; - p[0] = nmsz; - return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz) -{ - if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT) - return NULL; - return _unchecked_calloc(nm, sz); -} - -JL_DLLEXPORT void jl_free(void *p) -{ - if (p != NULL) { - int64_t *pp = (int64_t *)p - 2; - size_t sz = pp[0]; - jl_gc_counted_free_with_size(pp, sz + JL_SMALL_BYTE_ALIGNMENT); - } -} - -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz) -{ - int64_t *pp; - size_t szold; - if (p == NULL) { - pp = NULL; - szold = 0; - } - else { - pp = (int64_t *)p - 2; - szold = pp[0] + JL_SMALL_BYTE_ALIGNMENT; - } - int64_t *pnew = (int64_t *)jl_gc_counted_realloc_with_old_size(pp, szold, sz + JL_SMALL_BYTE_ALIGNMENT); - if (pnew == NULL) - return NULL; - pnew[0] = sz; - return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16 -} - // allocating blocks for Arrays and Strings JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) @@ -3875,18 +3815,6 @@ jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT return jl_valueof(o); } -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - -JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_alloc(ptls, sz, NULL); -} - JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) { if (jl_is_initialized()) { @@ -4014,14 +3942,14 @@ JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void) } -JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) +JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) { - return jl_gc_alloc(ptls, sz, ty); + arraylist_push(&ptls->gc_tls.sweep_objs, obj); } -JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj) +void jl_gc_notify_image_load(const char* img_data, size_t len) { - arraylist_push(&ptls->gc_tls.sweep_objs, obj); + // Do nothing } #ifdef __cplusplus diff --git a/src/gc-stock.h b/src/gc-stock.h index 46f7d3e11e105..cc661ce6e1600 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -106,12 +106,6 @@ JL_EXTENSION typedef struct _bigval_t { // must be 64-byte aligned here, in 32 & 64 bit modes } bigval_t; -// data structure for tracking malloc'd genericmemory. -typedef struct _mallocmemory_t { - jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory - struct _mallocmemory_t *next; -} mallocmemory_t; - // pool page metadata typedef struct _jl_gc_pagemeta_t { // next metadata structure in per-thread list @@ -428,21 +422,6 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } -STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); -} - -STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - if (jl_n_sweepthreads == 0) { - return 0; - } - int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); - int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; - return tid == concurrent_collector_thread_id; -} - STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); diff --git a/src/julia.h b/src/julia.h index ed3d9bf825658..b74de3060d26a 100644 --- a/src/julia.h +++ b/src/julia.h @@ -858,7 +858,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index 20d90fede3d5e..04857d440b643 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -513,30 +513,6 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE #define GC_MAX_SZCLASS (2032-sizeof(void*)) static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, ""); - -// Size does NOT include the type tag!! -STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) -{ - jl_value_t *v; - const size_t allocsz = sz + sizeof(jl_taggedvalue_t); - if (sz <= GC_MAX_SZCLASS) { - int pool_id = jl_gc_szclass(allocsz); - jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id]; - int osize = jl_gc_sizeclasses[pool_id]; - // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in - // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.) - v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize); - } - else { - if (allocsz < sz) // overflow in adding offs, size was "negative" - jl_throw(jl_memory_exception); - v = jl_gc_big_alloc_noinline(ptls, allocsz); - } - jl_set_typeof(v, ty); - maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty); - return v; -} - /* Programming style note: When using jl_gc_alloc, do not JL_GC_PUSH it into a * gc frame, until it has been fully initialized. An uninitialized value in a * gc frame can crash upon encountering the first safepoint. By delaying use of @@ -1077,7 +1053,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern _Atomic(uint32_t) jl_gc_disable_counter; +extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/scheduler.c b/src/scheduler.c index bb2f85b52283f..b85a481588e4f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,9 +80,20 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } +<<<<<<< HEAD // GC functions used extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; +======= +// parallel task runtime +// --- + +JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return cong(max, &ptls->rngseed); +} +>>>>>>> 4f39869d04 (Refactoring to be considered before adding MMTk) // initialize the threading infrastructure // (called only by the main thread) diff --git a/src/stackwalk.c b/src/stackwalk.c index 6aa36fa8b499c..5f28b61c4a8fe 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -5,7 +5,7 @@ utilities for walking the stack and looking up information about code addresses */ #include -#include "gc-stock.h" +#include "gc-common.h" #include "julia.h" #include "julia_internal.h" #include "threading.h" @@ -1294,6 +1294,8 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; +extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; +extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT diff --git a/src/staticdata.c b/src/staticdata.c index 0a8cbe6db7c67..bba35e6dcb5f9 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -657,6 +657,7 @@ static void jl_load_sysimg_so(void) plen = (size_t *)&jl_system_image_size; else jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(sysimg_data, *plen); jl_restore_system_image_data(sysimg_data, *plen); } @@ -4054,6 +4055,7 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); + jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); From 0a8444ea6f539cdb63481f45411f42629c1c97e1 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 29 Aug 2024 04:57:59 +0000 Subject: [PATCH 29/38] Removing jl_gc_notify_image_load, since it's a new function and not part of the refactoring --- src/gc-interface.h | 5 ----- src/gc-stock.c | 5 ----- src/staticdata.c | 2 -- 3 files changed, 12 deletions(-) diff --git a/src/gc-interface.h b/src/gc-interface.h index 682f22344d69d..25ffed4524f0c 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -218,11 +218,6 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // object being allocated and will be used to set the object header. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; -// This function notifies the GC about memory addresses that are set when loading the boot image. -// The GC may use that information to, for instance, determine that such objects should -// be treated as marked and belonged to the old generation in nursery collections. -void jl_gc_notify_image_load(const char* img_data, size_t len); - // ========================================================================= // // Runtime Write-Barriers // ========================================================================= // diff --git a/src/gc-stock.c b/src/gc-stock.c index 6ebac8a0c079e..88b201a687eba 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3947,11 +3947,6 @@ JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *o arraylist_push(&ptls->gc_tls.sweep_objs, obj); } -void jl_gc_notify_image_load(const char* img_data, size_t len) -{ - // Do nothing -} - #ifdef __cplusplus } #endif diff --git a/src/staticdata.c b/src/staticdata.c index bba35e6dcb5f9..0a8cbe6db7c67 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -657,7 +657,6 @@ static void jl_load_sysimg_so(void) plen = (size_t *)&jl_system_image_size; else jl_dlsym(jl_sysimg_handle, "jl_system_image_size", (void **)&plen, 1); - jl_gc_notify_image_load(sysimg_data, *plen); jl_restore_system_image_data(sysimg_data, *plen); } @@ -4055,7 +4054,6 @@ JL_DLLEXPORT jl_value_t *jl_restore_package_image_from_file(const char *fname, j jl_dlsym(pkgimg_handle, "jl_system_image_data", (void **)&pkgimg_data, 1); size_t *plen; jl_dlsym(pkgimg_handle, "jl_system_image_size", (void **)&plen, 1); - jl_gc_notify_image_load(pkgimg_data, *plen); jl_image_t pkgimage = jl_init_processor_pkgimg(pkgimg_handle); From c8818eab4ec04a248121bef73e1dd5e3b29a3ceb Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 2 Sep 2024 01:27:08 +0000 Subject: [PATCH 30/38] Moving gc_enable code to gc-common.c --- src/gc-common.c | 30 ++++++++++++++++++++++++++++++ src/gc-stock.c | 30 ------------------------------ 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 2ec167caa667a..03c046bc300f2 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -613,6 +613,36 @@ void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, i ptls->gc_tls.heap.mallocarrays = ma; } +// collector entry point and control +_Atomic(uint32_t) jl_gc_disable_counter = 1; + +JL_DLLEXPORT int jl_gc_enable(int on) +{ + jl_ptls_t ptls = jl_current_task->ptls; + int prev = !ptls->disable_gc; + ptls->disable_gc = (on == 0); + if (on && !prev) { + // disable -> enable + if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { + gc_num.allocd += gc_num.deferred_alloc; + gc_num.deferred_alloc = 0; + } + } + else if (prev && !on) { + // enable -> disable + jl_atomic_fetch_add(&jl_gc_disable_counter, 1); + // check if the GC is running and wait for it to finish + jl_gc_safepoint_(ptls); + } + return prev; +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + int gc_logging_enabled = 0; JL_DLLEXPORT void jl_enable_gc_logging(int enable) { diff --git a/src/gc-stock.c b/src/gc-stock.c index 88b201a687eba..55499bce61182 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -2803,36 +2803,6 @@ int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT return tid == concurrent_collector_thread_id; } -// collector entry point and control -_Atomic(uint32_t) jl_gc_disable_counter = 1; - -JL_DLLEXPORT int jl_gc_enable(int on) -{ - jl_ptls_t ptls = jl_current_task->ptls; - int prev = !ptls->disable_gc; - ptls->disable_gc = (on == 0); - if (on && !prev) { - // disable -> enable - if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) { - gc_num.allocd += gc_num.deferred_alloc; - gc_num.deferred_alloc = 0; - } - } - else if (prev && !on) { - // enable -> disable - jl_atomic_fetch_add(&jl_gc_disable_counter, 1); - // check if the GC is running and wait for it to finish - jl_gc_safepoint_(ptls); - } - return prev; -} - -JL_DLLEXPORT int jl_gc_is_enabled(void) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; -} - JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT { jl_gc_num_t num = gc_num; From e721e0c121ee911c29e736668b5e20766844d85e Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Mon, 16 Sep 2024 06:38:02 +0000 Subject: [PATCH 31/38] Addressing PR comments --- src/gc-common.c | 134 +++++++++++++++++++++++++------------------ src/gc-common.h | 6 ++ src/gc-debug.c | 2 - src/gc-interface.h | 30 +--------- src/gc-stock.c | 18 +----- src/gc-stock.h | 15 +++++ src/julia.h | 2 +- src/julia_internal.h | 4 +- src/stackwalk.c | 10 +--- 9 files changed, 110 insertions(+), 111 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 03c046bc300f2..046feae6aa4c5 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -491,15 +491,9 @@ int gc_n_threads; jl_ptls_t* gc_all_tls_states; // =========================================================================== // -// MISC +// Allocation // =========================================================================== // -JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return jl_gc_new_weakref_th(ptls, value); -} - JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty) { return jl_gc_alloc(ptls, sz, ty); @@ -575,17 +569,9 @@ JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) return jl_gc_alloc_(ptls, sz, ty); } -const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 -JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT -{ - return jl_buff_tag; -} - -// callback for passing OOM errors from gmp -JL_DLLEXPORT void jl_throw_out_of_memory_error(void) -{ - jl_throw(jl_memory_exception); -} +// =========================================================================== // +// Generic Memory +// =========================================================================== // size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT { @@ -613,6 +599,66 @@ void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, i ptls->gc_tls.heap.mallocarrays = ma; } +// =========================================================================== // +// GC Debug +// =========================================================================== // + +int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +{ + int nf = (int)jl_datatype_nfields(vt); + for (int i = 1; i < nf; i++) { + if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) + return i - 1; + } + return nf - 1; +} + +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +{ + char *slot = (char*)_slot; + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + char *start = NULL; + size_t len = 0; + size_t elsize = sizeof(void*); + if (vt == jl_module_type) { + jl_module_t *m = (jl_module_t*)obj; + start = (char*)m->usings.items; + len = m->usings.len; + } + else if (vt == jl_simplevector_type) { + start = (char*)jl_svec_data(obj); + len = jl_svec_len(obj); + } + if (slot < start || slot >= start + elsize * len) + return -1; + return (slot - start) / elsize; +} + +// =========================================================================== // +// GC Control +// =========================================================================== // + +JL_DLLEXPORT uint32_t jl_get_gc_disable_counter(void) { + return jl_atomic_load_acquire(&jl_gc_disable_counter); +} + +JL_DLLEXPORT int jl_gc_is_enabled(void) +{ + jl_ptls_t ptls = jl_current_task->ptls; + return !ptls->disable_gc; +} + +int gc_logging_enabled = 0; + +JL_DLLEXPORT void jl_enable_gc_logging(int enable) { + gc_logging_enabled = enable; +} + +JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { + return gc_logging_enabled; +} + + // collector entry point and control _Atomic(uint32_t) jl_gc_disable_counter = 1; @@ -637,54 +683,30 @@ JL_DLLEXPORT int jl_gc_enable(int on) return prev; } -JL_DLLEXPORT int jl_gc_is_enabled(void) +// =========================================================================== // +// MISC +// =========================================================================== // + +JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) { jl_ptls_t ptls = jl_current_task->ptls; - return !ptls->disable_gc; -} - -int gc_logging_enabled = 0; - -JL_DLLEXPORT void jl_enable_gc_logging(int enable) { - gc_logging_enabled = enable; + return jl_gc_new_weakref_th(ptls, value); } -JL_DLLEXPORT int jl_is_gc_logging_enabled(void) { - return gc_logging_enabled; +JL_DLLEXPORT jl_datatype_t **jl_get_ijl_small_typeof(void) { + return ijl_small_typeof; } -// gc-debug common functions -// --- - -int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT +const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 +JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { - int nf = (int)jl_datatype_nfields(vt); - for (int i = 1; i < nf; i++) { - if (slot < (void*)((char*)obj + jl_field_offset(vt, i))) - return i - 1; - } - return nf - 1; + return jl_buff_tag; } -int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT +// callback for passing OOM errors from gmp +JL_DLLEXPORT void jl_throw_out_of_memory_error(void) { - char *slot = (char*)_slot; - jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - char *start = NULL; - size_t len = 0; - size_t elsize = sizeof(void*); - if (vt == jl_module_type) { - jl_module_t *m = (jl_module_t*)obj; - start = (char*)m->usings.items; - len = m->usings.len; - } - else if (vt == jl_simplevector_type) { - start = (char*)jl_svec_data(obj); - len = jl_svec_len(obj); - } - if (slot < start || slot >= start + elsize * len) - return -1; - return (slot - start) / elsize; + jl_throw(jl_memory_exception); } #ifdef __cplusplus diff --git a/src/gc-common.h b/src/gc-common.h index 154b9659e9ccb..32b7470b13a58 100644 --- a/src/gc-common.h +++ b/src/gc-common.h @@ -179,4 +179,10 @@ JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o); extern int gc_n_threads; extern jl_ptls_t* gc_all_tls_states; +// =========================================================================== // +// Logging +// =========================================================================== // + +extern int gc_logging_enabled; + #endif // JL_GC_COMMON_H diff --git a/src/gc-debug.c b/src/gc-debug.c index d05fb4b49e9f7..7c479484cde45 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1105,8 +1105,6 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -extern int gc_logging_enabled; - void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT { if (!gc_logging_enabled) { return; diff --git a/src/gc-interface.h b/src/gc-interface.h index 25ffed4524f0c..0e9ce32697f35 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -94,6 +94,8 @@ JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem); // should run a collection cycle again (e.g. a full mark right after a full sweep to ensure // we do a full heap traversal). JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection); +// Returns whether the thread with `tid` is a collector thread +JL_DLLEXPORT int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT; // ========================================================================= // // Metrics @@ -162,26 +164,6 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz); JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz); // Wrapper around Libc realloc that updates Julia allocation counters. JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz); -// Wrapper around Libc malloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_malloc(size_t sz); -// Wrapper around Libc calloc that allocates a memory region with a few additional machine -// words before the actual payload that are used to record the size of the requested -// allocation. Also updates Julia allocation counters. The function returns a pointer to the -// payload as a result of the allocation. -JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz); -// Wrapper around Libc free that takes a pointer to the payload of a memory region allocated -// with jl_malloc or jl_calloc, and uses the size information stored in the first machine -// words of the memory buffer update Julia allocation counters, and then frees the -// corresponding memory buffer. -JL_DLLEXPORT void jl_free(void *p); -// Wrapper around Libc realloc that takes a memory region allocated with jl_malloc or -// jl_calloc, and uses the size information stored in the first machine words of the memory -// buffer to update Julia allocation counters, reallocating the corresponding memory buffer -// in the end. -JL_DLLEXPORT void *jl_realloc(void *p, size_t sz); // Wrapper around Libc malloc that's used to dynamically allocate memory for Arrays and // Strings. It increments Julia allocation counters and should check whether we're close to // the Julia heap target, and therefore, whether we should run a collection. Note that this @@ -195,14 +177,6 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz); // thread-local allocator of the thread referenced by the first jl_ptls_t argument. JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref_th(struct _jl_tls_states_t *ptls, struct _jl_value_t *value); -// Allocates a new weak-reference, assigns its value and increments Julia allocation -// counters. If thread-local allocators are used, then this function should allocate in the -// thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref(struct _jl_value_t *value); -// Allocates an object whose size is specified by the function argument and increments Julia -// allocation counters. If thread-local allocators are used, then this function should -// allocate in the thread-local allocator of the current thread. -JL_DLLEXPORT struct _jl_value_t *jl_gc_allocobj(size_t sz); // Permanently allocates a memory slot of the size specified by the first parameter. This // block of memory is allocated in an immortal region that is never swept. The second // parameter specifies whether the memory should be filled with zeros. The third and fourth diff --git a/src/gc-stock.c b/src/gc-stock.c index 55499bce61182..b345fe08ff69c 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -2788,19 +2788,8 @@ static void sweep_finalizer_list(arraylist_t *list) list->len = j; } -int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT -{ - return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); -} - -int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT -{ - if (jl_n_sweepthreads == 0) { - return 0; - } - int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); - int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; - return tid == concurrent_collector_thread_id; +int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT { + return gc_is_parallel_collector_thread(tid) || gc_is_concurrent_collector_thread(tid); } JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT @@ -3193,8 +3182,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // free empty GC state for threads that have exited if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { // GC threads should never exit - assert(!gc_is_parallel_collector_thread(t_i)); - assert(!gc_is_concurrent_collector_thread(t_i)); + assert(!gc_is_collector_thread(t_i)); jl_thread_heap_t *heap = &ptls2->gc_tls.heap; if (heap->weak_refs.len == 0) small_arraylist_free(&heap->weak_refs); diff --git a/src/gc-stock.h b/src/gc-stock.h index cc661ce6e1600..0f8d1eee67581 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -422,6 +422,21 @@ STATIC_INLINE int gc_ith_parallel_collector_thread_id(int i) JL_NOTSAFEPOINT return gc_first_tid + i; } +STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT +{ + return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id(); +} + +STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT +{ + if (jl_n_sweepthreads == 0) { + return 0; + } + int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id(); + int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1; + return tid == concurrent_collector_thread_id; +} + STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); diff --git a/src/julia.h b/src/julia.h index b74de3060d26a..ed3d9bf825658 100644 --- a/src/julia.h +++ b/src/julia.h @@ -858,7 +858,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t) return (jl_value_t*)t; } #else -extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; +extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)]; static inline jl_value_t *jl_to_typeof(uintptr_t t) { if (t < (jl_max_tags << 4)) diff --git a/src/julia_internal.h b/src/julia_internal.h index 04857d440b643..c079c06f0189a 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -367,6 +367,8 @@ extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT size_t jl_typeinf_world; extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED; +extern void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; + JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; @@ -1053,7 +1055,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr) return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4; } extern _Atomic(uint32_t) jl_gc_running; -extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter; +extern _Atomic(uint32_t) jl_gc_disable_counter; // All the functions are safe to be called from within a signal handler // provided that the thread will not be interrupted by another asynchronous // signal. diff --git a/src/stackwalk.c b/src/stackwalk.c index 5f28b61c4a8fe..a1de3a6d61a07 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1294,8 +1294,6 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT } extern int gc_first_tid; -extern int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT; -extern int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT; // Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT @@ -1304,12 +1302,8 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states); for (size_t i = 0; i < nthreads; i++) { jl_ptls_t ptls2 = allstates[i]; - if (gc_is_parallel_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for parallel GC thread %zu\n", i + 1); - continue; - } - if (gc_is_concurrent_collector_thread(i)) { - jl_safe_printf("==== Skipping backtrace for concurrent GC thread %zu\n", i + 1); + if (gc_is_collector_thread(i)) { + jl_safe_printf("==== Skipping backtrace for parallel/concurrent GC thread %zu\n", i + 1); continue; } if (ptls2 == NULL) { From 6c0eb93fccbd77a338c6a6e2ddae8888fa6bc1b2 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 19 Sep 2024 04:18:13 +0000 Subject: [PATCH 32/38] Push resolution of merge conflict --- src/scheduler.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/scheduler.c b/src/scheduler.c index b85a481588e4f..bb2f85b52283f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,20 +80,9 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } -<<<<<<< HEAD // GC functions used extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; -======= -// parallel task runtime -// --- - -JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return cong(max, &ptls->rngseed); -} ->>>>>>> 4f39869d04 (Refactoring to be considered before adding MMTk) // initialize the threading infrastructure // (called only by the main thread) From fb0ec76ecc52efae85ad65c34b1a3f49f24475e7 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 01:10:31 +0000 Subject: [PATCH 33/38] Removing jl_gc_mark_queue_obj_explicit extern definition from scheduler.c --- src/scheduler.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/scheduler.c b/src/scheduler.c index bb2f85b52283f..7e23f654c2566 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -80,10 +80,6 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA return 1; } -// GC functions used -extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, - jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; - // initialize the threading infrastructure // (called only by the main thread) void jl_init_threadinginfra(void) From 3eea0790d832eba1d17b1a1564447f51986c7118 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 25 Sep 2024 02:50:25 +0000 Subject: [PATCH 34/38] Don't need the getter function since it's possible to use jl_small_typeof directly --- src/gc-common.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/gc-common.c b/src/gc-common.c index 046feae6aa4c5..417f12f26d64d 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -693,10 +693,6 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value) return jl_gc_new_weakref_th(ptls, value); } -JL_DLLEXPORT jl_datatype_t **jl_get_ijl_small_typeof(void) { - return ijl_small_typeof; -} - const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { From ef6c79823306f2556951d6f8a70b165aceda2c76 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Fri, 27 Sep 2024 00:49:07 +0000 Subject: [PATCH 35/38] Remove extern from free_stack declaration in julia_internal.h --- src/julia_internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/julia_internal.h b/src/julia_internal.h index c079c06f0189a..6fd537ed6baf8 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -367,7 +367,7 @@ extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED; extern JL_DLLEXPORT size_t jl_typeinf_world; extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED; -extern void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; +void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT; JL_DLLEXPORT extern int jl_lineno; JL_DLLEXPORT extern const char *jl_filename; From 63ca362bfaeed147887da242a6721de014ca5535 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 8 Oct 2024 09:12:49 +0000 Subject: [PATCH 36/38] Putting everything that is common GC tls into gc-tls-common.h --- src/gc-common.c | 10 +-- src/gc-stacks.c | 18 +++--- src/gc-stock.c | 154 ++++++++++++++++++++++---------------------- src/gc-tls-common.h | 52 +++++++++++++++ src/gc-tls.h | 25 ------- src/julia_threads.h | 2 + src/stackwalk.c | 2 +- 7 files changed, 147 insertions(+), 116 deletions(-) create mode 100644 src/gc-tls-common.h diff --git a/src/gc-common.c b/src/gc-common.c index 417f12f26d64d..6ce455d3923ad 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -587,16 +587,16 @@ size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){ // This is **NOT** a GC safe point. mallocmemory_t *ma; - if (ptls->gc_tls.heap.mafreelist == NULL) { + if (ptls->gc_tls_common.heap.mafreelist == NULL) { ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t)); } else { - ma = ptls->gc_tls.heap.mafreelist; - ptls->gc_tls.heap.mafreelist = ma->next; + ma = ptls->gc_tls_common.heap.mafreelist; + ptls->gc_tls_common.heap.mafreelist = ma->next; } ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned); - ma->next = ptls->gc_tls.heap.mallocarrays; - ptls->gc_tls.heap.mallocarrays = ma; + ma->next = ptls->gc_tls_common.heap.mallocarrays; + ptls->gc_tls_common.heap.mallocarrays = ma; } // =========================================================================== // diff --git a/src/gc-stacks.c b/src/gc-stacks.c index 8c44b65284386..a8fec938456a3 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -131,7 +131,7 @@ void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) { unsigned pool_id = select_pool(bufsz); if (pool_sizes[pool_id] == bufsz) { - small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf); + small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf); return; } } @@ -160,7 +160,7 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task) #ifdef _COMPILER_ASAN_ENABLED_ __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz); #endif - small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf); + small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf); } } } @@ -175,7 +175,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) { unsigned pool_id = select_pool(ssize); ssize = pool_sizes[pool_id]; - small_arraylist_t *pool = &ptls->gc_tls.heap.free_stacks[pool_id]; + small_arraylist_t *pool = &ptls->gc_tls_common.heap.free_stacks[pool_id]; if (pool->len > 0) { stk = small_arraylist_pop(pool); } @@ -196,7 +196,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO } *bufsz = ssize; if (owner) { - small_arraylist_t *live_tasks = &ptls->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls->gc_tls_common.heap.live_tasks; mtarraylist_push(live_tasks, owner); } return stk; @@ -223,7 +223,7 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT // free half of stacks that remain unused since last sweep for (int p = 0; p < JL_N_STACK_POOLS; p++) { - small_arraylist_t *al = &ptls2->gc_tls.heap.free_stacks[p]; + small_arraylist_t *al = &ptls2->gc_tls_common.heap.free_stacks[p]; size_t n_to_free; if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { n_to_free = al->len; // not alive yet or dead, so it does not need these anymore @@ -245,10 +245,10 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT } } if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { - small_arraylist_free(ptls2->gc_tls.heap.free_stacks); + small_arraylist_free(ptls2->gc_tls_common.heap.free_stacks); } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = 0; size_t ndel = 0; size_t l = live_tasks->len; @@ -299,7 +299,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void) jl_ptls_t ptls2 = allstates[i]; if (ptls2 == NULL) continue; - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); l += n + (ptls2->root_task->ctx.stkbuf != NULL); } @@ -318,7 +318,7 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void) goto restart; jl_array_data(a,void*)[j++] = t; } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); for (size_t i = 0; i < n; i++) { jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i); diff --git a/src/gc-stock.c b/src/gc-stock.c index b345fe08ff69c..8e040c9b25dcf 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -357,7 +357,7 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *valu jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); wr->value = value; // NOTE: wb not needed here - small_arraylist_push(&ptls->gc_tls.heap.weak_refs, wr); + small_arraylist_push(&ptls->gc_tls_common.heap.weak_refs, wr); return wr; } @@ -367,8 +367,8 @@ static void clear_weak_refs(void) for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls2 = gc_all_tls_states[i]; if (ptls2 != NULL) { - size_t n, l = ptls2->gc_tls.heap.weak_refs.len; - void **lst = ptls2->gc_tls.heap.weak_refs.items; + size_t n, l = ptls2->gc_tls_common.heap.weak_refs.len; + void **lst = ptls2->gc_tls_common.heap.weak_refs.items; for (n = 0; n < l; n++) { jl_weakref_t *wr = (jl_weakref_t*)lst[n]; if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc)) @@ -386,8 +386,8 @@ static void sweep_weak_refs(void) if (ptls2 != NULL) { size_t n = 0; size_t ndel = 0; - size_t l = ptls2->gc_tls.heap.weak_refs.len; - void **lst = ptls2->gc_tls.heap.weak_refs.items; + size_t l = ptls2->gc_tls_common.heap.weak_refs.len; + void **lst = ptls2->gc_tls_common.heap.weak_refs.items; if (l == 0) continue; while (1) { @@ -402,7 +402,7 @@ static void sweep_weak_refs(void) lst[n] = lst[n + ndel]; lst[n + ndel] = tmp; } - ptls2->gc_tls.heap.weak_refs.len -= ndel; + ptls2->gc_tls_common.heap.weak_refs.len -= ndel; } } } @@ -410,18 +410,18 @@ static void sweep_weak_refs(void) STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc) + sz; + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc) + sz; if (alloc_acc < 16*1024) - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, alloc_acc); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, alloc_acc); else { jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); } } STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT { - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc) + sz); } // big value list @@ -442,10 +442,10 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz) jl_throw(jl_memory_exception); gc_invoke_callbacks(jl_gc_cb_notify_external_alloc_t, gc_cblist_notify_external_alloc, (v, allocsz)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc) + 1); jl_batch_accum_heap_size(ptls, allocsz); #ifdef MEMDEBUG memset(v, 0xee, allocsz); @@ -558,8 +558,8 @@ static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz); jl_batch_accum_heap_size(ptls, sz); } @@ -578,18 +578,18 @@ static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTS for (int i = 0; i < gc_n_threads; i++) { jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls) { - dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval); - dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc); - dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc); - dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc); - dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc); - dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval); + dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc); + dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc); + dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc); + dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc); + dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc); if (update_heap) { - uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc); - freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc); + uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc); + freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc); jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -605,13 +605,13 @@ static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT jl_ptls_t ptls = gc_all_tls_states[i]; if (ptls != NULL) { // don't reset `pool_live_bytes` here - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0); } } } @@ -654,8 +654,8 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT for (int t_i = 0; t_i < gc_n_threads; t_i++) { jl_ptls_t ptls2 = gc_all_tls_states[t_i]; if (ptls2 != NULL) { - mallocmemory_t *ma = ptls2->gc_tls.heap.mallocarrays; - mallocmemory_t **pma = &ptls2->gc_tls.heap.mallocarrays; + mallocmemory_t *ma = ptls2->gc_tls_common.heap.mallocarrays; + mallocmemory_t **pma = &ptls2->gc_tls_common.heap.mallocarrays; while (ma != NULL) { mallocmemory_t *nxt = ma->next; jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1); @@ -667,8 +667,8 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT *pma = nxt; int isaligned = (uintptr_t)ma->a & 1; jl_gc_free_memory(a, isaligned); - ma->next = ptls2->gc_tls.heap.mafreelist; - ptls2->gc_tls.heap.mafreelist = ma; + ma->next = ptls2->gc_tls_common.heap.mafreelist; + ptls2->gc_tls_common.heap.mafreelist = ma; } gc_time_count_mallocd_memory(bits); ma = nxt; @@ -729,12 +729,12 @@ STATIC_INLINE jl_value_t *jl_gc_small_alloc_inner(jl_ptls_t ptls, int offset, return jl_gc_big_alloc(ptls, osize, NULL); #endif maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + osize); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + osize); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc) + 1); // first try to use the freelist jl_taggedvalue_t *v = p->freelist; if (v != NULL) { @@ -971,8 +971,8 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_ // instead of adding it to the thread that originally allocated the page, so we can avoid // an atomic-fetch-add here. size_t delta = (GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + delta); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + delta); jl_atomic_fetch_add_relaxed((_Atomic(int64_t) *)&gc_num.freed, (nfree - old_nfree) * osize); } @@ -1228,7 +1228,7 @@ static void gc_sweep_pool(void) } continue; } - jl_atomic_store_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes, 0); + jl_atomic_store_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes, 0); for (int i = 0; i < JL_GC_N_POOLS; i++) { jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i]; jl_taggedvalue_t *last = p->freelist; @@ -2834,7 +2834,7 @@ JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void) for (int i = 0; i < n_threads; i++) { jl_ptls_t ptls2 = all_tls_states[i]; if (ptls2 != NULL) { - pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes); + pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes); } } return pool_live_bytes; @@ -3183,11 +3183,12 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) { // GC threads should never exit assert(!gc_is_collector_thread(t_i)); + jl_thread_heap_common_t *common_heap = &ptls2->gc_tls_common.heap; jl_thread_heap_t *heap = &ptls2->gc_tls.heap; - if (heap->weak_refs.len == 0) - small_arraylist_free(&heap->weak_refs); - if (heap->live_tasks.len == 0) - small_arraylist_free(&heap->live_tasks); + if (common_heap->weak_refs.len == 0) + small_arraylist_free(&common_heap->weak_refs); + if (common_heap->live_tasks.len == 0) + small_arraylist_free(&common_heap->live_tasks); if (heap->remset.len == 0) arraylist_free(&heap->remset); if (ptls2->finalizers.len == 0) @@ -3256,8 +3257,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; if (jl_atomic_load_acquire(&jl_gc_disable_counter)) { - size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval; - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval; + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; @@ -3362,6 +3363,7 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq) // Per-thread initialization void jl_init_thread_heap(jl_ptls_t ptls) { + jl_thread_heap_common_t *common_heap = &ptls->gc_tls_common.heap; jl_thread_heap_t *heap = &ptls->gc_tls.heap; jl_gc_pool_t *p = heap->norm_pools; for (int i = 0; i < JL_GC_N_POOLS; i++) { @@ -3369,12 +3371,12 @@ void jl_init_thread_heap(jl_ptls_t ptls) p[i].freelist = NULL; p[i].newpages = NULL; } - small_arraylist_new(&heap->weak_refs, 0); - small_arraylist_new(&heap->live_tasks, 0); + small_arraylist_new(&common_heap->weak_refs, 0); + small_arraylist_new(&common_heap->live_tasks, 0); for (int i = 0; i < JL_N_STACK_POOLS; i++) - small_arraylist_new(&heap->free_stacks[i], 0); - heap->mallocarrays = NULL; - heap->mafreelist = NULL; + small_arraylist_new(&common_heap->free_stacks[i], 0); + common_heap->mallocarrays = NULL; + common_heap->mafreelist = NULL; heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag; @@ -3400,8 +3402,8 @@ void jl_init_thread_heap(jl_ptls_t ptls) jl_atomic_store_relaxed(&q->array, wsa2); arraylist_new(&mq->reclaim_set, 32); - memset(&ptls->gc_tls.gc_num, 0, sizeof(ptls->gc_tls.gc_num)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval); + memset(&ptls->gc_tls_common.gc_num, 0, sizeof(ptls->gc_tls_common.gc_num)); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval); } void jl_free_thread_gc_state(jl_ptls_t ptls) @@ -3579,10 +3581,10 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) if (data != NULL && pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, sz); } return data; @@ -3596,10 +3598,10 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) if (data != NULL && pgcstack != NULL && ct->world_age) { jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + nm*sz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + nm*sz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, sz * nm); } return data; @@ -3624,10 +3626,10 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); if (!(sz < old)) - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + (sz - old)); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + (sz - old)); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc) + 1); int64_t diff = sz - old; if (diff < 0) { @@ -3658,10 +3660,10 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) if (b == NULL) jl_throw(jl_memory_exception); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz); - jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, - jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1); jl_batch_accum_heap_size(ptls, allocsz); #ifdef _OS_WINDOWS_ SetLastError(last_error); diff --git a/src/gc-tls-common.h b/src/gc-tls-common.h new file mode 100644 index 0000000000000..28fbf2d0c448e --- /dev/null +++ b/src/gc-tls-common.h @@ -0,0 +1,52 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +// Meant to be included in "julia_threads.h" +#ifndef JL_GC_TLS_COMMON_H +#define JL_GC_TLS_COMMON_H + +#include "julia_atomics.h" + +// GC threading ------------------------------------------------------------------ + +#include "arraylist.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + // variable for tracking weak references + small_arraylist_t weak_refs; + // live tasks started on this thread + // that are holding onto a stack from the pool + small_arraylist_t live_tasks; + + // variables for tracking malloc'd arrays + struct _mallocmemory_t *mallocarrays; + struct _mallocmemory_t *mafreelist; + +#define JL_N_STACK_POOLS 16 + small_arraylist_t free_stacks[JL_N_STACK_POOLS]; +} jl_thread_heap_common_t; + +typedef struct { + _Atomic(int64_t) allocd; + _Atomic(int64_t) pool_live_bytes; + _Atomic(uint64_t) malloc; + _Atomic(uint64_t) realloc; + _Atomic(uint64_t) poolalloc; + _Atomic(uint64_t) bigalloc; + _Atomic(int64_t) free_acc; + _Atomic(uint64_t) alloc_acc; +} jl_thread_gc_num_common_t; + +typedef struct { + jl_thread_heap_common_t heap; + jl_thread_gc_num_common_t gc_num; +} jl_gc_tls_states_common_t; + +#ifdef __cplusplus +} +#endif + +#endif // JL_GC_TLS_H diff --git a/src/gc-tls.h b/src/gc-tls.h index 9e4b09404db84..ecc815805a98b 100644 --- a/src/gc-tls.h +++ b/src/gc-tls.h @@ -21,16 +21,6 @@ typedef struct { } jl_gc_pool_t; typedef struct { - // variable for tracking weak references - small_arraylist_t weak_refs; - // live tasks started on this thread - // that are holding onto a stack from the pool - small_arraylist_t live_tasks; - - // variables for tracking malloc'd arrays - struct _mallocmemory_t *mallocarrays; - struct _mallocmemory_t *mafreelist; - // variable for tracking young (i.e. not in `GC_OLD_MARKED`/last generation) large objects struct _bigval_t *young_generation_of_bigvals; @@ -42,22 +32,8 @@ typedef struct { // variables for allocating objects from pools #define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h` jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS]; - -#define JL_N_STACK_POOLS 16 - small_arraylist_t free_stacks[JL_N_STACK_POOLS]; } jl_thread_heap_t; -typedef struct { - _Atomic(int64_t) allocd; - _Atomic(int64_t) pool_live_bytes; - _Atomic(uint64_t) malloc; - _Atomic(uint64_t) realloc; - _Atomic(uint64_t) poolalloc; - _Atomic(uint64_t) bigalloc; - _Atomic(int64_t) free_acc; - _Atomic(uint64_t) alloc_acc; -} jl_thread_gc_num_t; - typedef struct { ws_queue_t chunk_queue; ws_queue_t ptr_queue; @@ -78,7 +54,6 @@ typedef struct { typedef struct { jl_thread_heap_t heap; jl_gc_page_stack_t page_metadata_allocd; - jl_thread_gc_num_t gc_num; jl_gc_markqueue_t mark_queue; jl_gc_mark_cache_t gc_cache; _Atomic(size_t) gc_sweeps_requested; diff --git a/src/julia_threads.h b/src/julia_threads.h index b697a0bf030ed..fcc28591658cb 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -5,6 +5,7 @@ #define JL_THREADS_H #include "gc-tls.h" +#include "gc-tls-common.h" #include "julia_atomics.h" #ifndef _OS_WINDOWS_ #include "pthread.h" @@ -155,6 +156,7 @@ typedef struct _jl_tls_states_t { // Counter to disable finalizer **on the current thread** int finalizers_inhibited; jl_gc_tls_states_t gc_tls; // this is very large, and the offset of the first member is baked into codegen + jl_gc_tls_states_common_t gc_tls_common; // common tls for both GCs volatile sig_atomic_t defer_signal; _Atomic(struct _jl_task_t*) current_task; struct _jl_task_t *next_task; diff --git a/src/stackwalk.c b/src/stackwalk.c index a1de3a6d61a07..0988d7a833c94 100644 --- a/src/stackwalk.c +++ b/src/stackwalk.c @@ -1309,7 +1309,7 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT if (ptls2 == NULL) { continue; } - small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks; + small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks; size_t n = mtarraylist_length(live_tasks); int t_state = JL_TASK_STATE_DONE; jl_task_t *t = ptls2->root_task; From 3271996a9eb45899e330a274420a53d45c6b4079 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 8 Oct 2024 09:14:24 +0000 Subject: [PATCH 37/38] Typo --- src/gc-tls-common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-tls-common.h b/src/gc-tls-common.h index 28fbf2d0c448e..ba36f5c1c238e 100644 --- a/src/gc-tls-common.h +++ b/src/gc-tls-common.h @@ -49,4 +49,4 @@ typedef struct { } #endif -#endif // JL_GC_TLS_H +#endif // JL_GC_TLS_COMMON_H From cd4f5a177f0c0c7d9e0fb59bf830f2d914c46727 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Tue, 8 Oct 2024 22:46:39 +0000 Subject: [PATCH 38/38] Adding gc-tls-common.h to Makefile as a public header --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index a6b1f433b73ce..80bbdbcff67fc 100644 --- a/src/Makefile +++ b/src/Makefile @@ -103,7 +103,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0) UV_HEADERS += uv.h UV_HEADERS += uv/*.h endif -PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) +PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h gc-tls-common.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h) ifeq ($(OS),WINNT) PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h) endif