77#include "pycore_pyerrors.h" // _Py_FatalErrorFormat()
88#include "pycore_pymem.h"
99#include "pycore_pystate.h" // _PyInterpreterState_GET
10+ #include "pycore_obmalloc_init.h"
1011
1112#include <stdlib.h> // malloc()
1213#include <stdbool.h>
@@ -1016,6 +1017,13 @@ static int running_on_valgrind = -1;
10161017
10171018typedef struct _obmalloc_state OMState ;
10181019
1020+ /* obmalloc state for main interpreter and shared by all interpreters without
1021+ * their own obmalloc state. By not explicitly initalizing this structure, it
1022+ * will be allocated in the BSS which is a small performance win. The radix
1023+ * tree arrays are fairly large but are sparsely used. */
1024+ static struct _obmalloc_state obmalloc_state_main ;
1025+ static bool obmalloc_state_initialized ;
1026+
10191027static inline int
10201028has_own_state (PyInterpreterState * interp )
10211029{
@@ -1028,10 +1036,8 @@ static inline OMState *
10281036get_state (void )
10291037{
10301038 PyInterpreterState * interp = _PyInterpreterState_GET ();
1031- if (!has_own_state (interp )) {
1032- interp = _PyInterpreterState_Main ();
1033- }
1034- return & interp -> obmalloc ;
1039+ assert (interp -> obmalloc != NULL ); // otherwise not initialized or freed
1040+ return interp -> obmalloc ;
10351041}
10361042
10371043// These macros all rely on a local "state" variable.
@@ -1094,7 +1100,11 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
10941100 "the interpreter doesn't have its own allocator" );
10951101 }
10961102#endif
1097- OMState * state = & interp -> obmalloc ;
1103+ OMState * state = interp -> obmalloc ;
1104+
1105+ if (state == NULL ) {
1106+ return 0 ;
1107+ }
10981108
10991109 Py_ssize_t n = raw_allocated_blocks ;
11001110 /* add up allocated blocks for used pools */
@@ -1116,6 +1126,8 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
11161126 return n ;
11171127}
11181128
1129+ static void free_obmalloc_arenas (PyInterpreterState * interp );
1130+
11191131void
11201132_PyInterpreterState_FinalizeAllocatedBlocks (PyInterpreterState * interp )
11211133{
@@ -1124,10 +1136,20 @@ _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
11241136 return ;
11251137 }
11261138#endif
1127- if (has_own_state (interp )) {
1139+ if (has_own_state (interp ) && interp -> obmalloc != NULL ) {
11281140 Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks (interp );
11291141 assert (has_own_state (interp ) || leaked == 0 );
11301142 interp -> runtime -> obmalloc .interpreter_leaks += leaked ;
1143+ if (_PyMem_obmalloc_state_on_heap (interp ) && leaked == 0 ) {
1144+ // free the obmalloc arenas and radix tree nodes. If leaked > 0
1145+ // then some of the memory allocated by obmalloc has not been
1146+ // freed. It might be safe to free the arenas in that case but
1147+ // it's possible that extension modules are still using that
1148+ // memory. So, it is safer to not free and to leak. Perhaps there
1149+ // should be warning when this happens. It should be possible to
1150+ // use a tool like "-fsanitize=address" to track down these leaks.
1151+ free_obmalloc_arenas (interp );
1152+ }
11311153 }
11321154}
11331155
@@ -2717,9 +2739,96 @@ _PyDebugAllocatorStats(FILE *out,
27172739 (void )printone (out , buf2 , num_blocks * sizeof_block );
27182740}
27192741
2742+ // Return true if the obmalloc state structure is heap allocated,
2743+ // by PyMem_RawCalloc(). For the main interpreter, this structure
2744+ // allocated in the BSS. Allocating that way gives some memory savings
2745+ // and a small performance win (at least on a demand paged OS). On
2746+ // 64-bit platforms, the obmalloc structure is 256 kB. Most of that
2747+ // memory is for the arena_map_top array. Since normally only one entry
2748+ // of that array is used, only one page of resident memory is actually
2749+ // used, rather than the full 256 kB.
2750+ bool _PyMem_obmalloc_state_on_heap (PyInterpreterState * interp )
2751+ {
2752+ #if WITH_PYMALLOC
2753+ return interp -> obmalloc && interp -> obmalloc != & obmalloc_state_main ;
2754+ #else
2755+ return false;
2756+ #endif
2757+ }
2758+
2759+ #ifdef WITH_PYMALLOC
2760+ static void
2761+ init_obmalloc_pools (PyInterpreterState * interp )
2762+ {
2763+ // initialize the obmalloc->pools structure. This must be done
2764+ // before the obmalloc alloc/free functions can be called.
2765+ poolp temp [OBMALLOC_USED_POOLS_SIZE ] =
2766+ _obmalloc_pools_INIT (interp -> obmalloc -> pools );
2767+ memcpy (& interp -> obmalloc -> pools .used , temp , sizeof (temp ));
2768+ }
2769+ #endif /* WITH_PYMALLOC */
2770+
2771+ int _PyMem_init_obmalloc (PyInterpreterState * interp )
2772+ {
2773+ #ifdef WITH_PYMALLOC
2774+ /* Initialize obmalloc, but only for subinterpreters,
2775+ since the main interpreter is initialized statically. */
2776+ if (_Py_IsMainInterpreter (interp )
2777+ || _PyInterpreterState_HasFeature (interp ,
2778+ Py_RTFLAGS_USE_MAIN_OBMALLOC )) {
2779+ interp -> obmalloc = & obmalloc_state_main ;
2780+ if (!obmalloc_state_initialized ) {
2781+ init_obmalloc_pools (interp );
2782+ obmalloc_state_initialized = true;
2783+ }
2784+ } else {
2785+ interp -> obmalloc = PyMem_RawCalloc (1 , sizeof (struct _obmalloc_state ));
2786+ if (interp -> obmalloc == NULL ) {
2787+ return -1 ;
2788+ }
2789+ init_obmalloc_pools (interp );
2790+ }
2791+ #endif /* WITH_PYMALLOC */
2792+ return 0 ; // success
2793+ }
2794+
27202795
27212796#ifdef WITH_PYMALLOC
27222797
2798+ static void
2799+ free_obmalloc_arenas (PyInterpreterState * interp )
2800+ {
2801+ OMState * state = interp -> obmalloc ;
2802+ for (uint i = 0 ; i < maxarenas ; ++ i ) {
2803+ // free each obmalloc memory arena
2804+ struct arena_object * ao = & allarenas [i ];
2805+ _PyObject_Arena .free (_PyObject_Arena .ctx ,
2806+ (void * )ao -> address , ARENA_SIZE );
2807+ }
2808+ // free the array containing pointers to all arenas
2809+ PyMem_RawFree (allarenas );
2810+ #if WITH_PYMALLOC_RADIX_TREE
2811+ #ifdef USE_INTERIOR_NODES
2812+ // Free the middle and bottom nodes of the radix tree. These are allocated
2813+ // by arena_map_mark_used() but not freed when arenas are freed.
2814+ for (int i1 = 0 ; i1 < MAP_TOP_LENGTH ; i1 ++ ) {
2815+ arena_map_mid_t * mid = arena_map_root .ptrs [i1 ];
2816+ if (mid == NULL ) {
2817+ continue ;
2818+ }
2819+ for (int i2 = 0 ; i2 < MAP_MID_LENGTH ; i2 ++ ) {
2820+ arena_map_bot_t * bot = arena_map_root .ptrs [i1 ]-> ptrs [i2 ];
2821+ if (bot == NULL ) {
2822+ continue ;
2823+ }
2824+ PyMem_RawFree (bot );
2825+ }
2826+ PyMem_RawFree (mid );
2827+ }
2828+ #endif
2829+ #endif
2830+ }
2831+
27232832#ifdef Py_DEBUG
27242833/* Is target in the list? The list is traversed via the nextpool pointers.
27252834 * The list may be NULL-terminated, or circular. Return 1 if target is in
0 commit comments