@@ -214,6 +214,22 @@ extern "C" {
214214# define OVERALLOCATE_FACTOR 4
215215#endif
216216
217+ /* bpo-40521: Interned strings are shared by all interpreters. */
218+ #ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
219+ # define INTERNED_STRINGS
220+ #endif
221+
222+ /* This dictionary holds all interned unicode strings. Note that references
223+ to strings in this dictionary are *not* counted in the string's ob_refcnt.
224+ When the interned string reaches a refcnt of 0 the string deallocation
225+ function will delete the reference from this dictionary.
226+
227+ Another way to look at this is that to say that the actual reference
228+ count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
229+ */
230+ #ifdef INTERNED_STRINGS
231+ static PyObject * interned = NULL ;
232+ #endif
217233
218234/* Forward declaration */
219235static inline int
@@ -1950,20 +1966,21 @@ unicode_dealloc(PyObject *unicode)
19501966
19511967 case SSTATE_INTERNED_MORTAL :
19521968 {
1953- struct _Py_unicode_state * state = get_unicode_state ();
1969+ #ifdef INTERNED_STRINGS
19541970 /* Revive the dead object temporarily. PyDict_DelItem() removes two
19551971 references (key and value) which were ignored by
19561972 PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
19571973 to prevent calling unicode_dealloc() again. Adjust refcnt after
19581974 PyDict_DelItem(). */
19591975 assert (Py_REFCNT (unicode ) == 0 );
19601976 Py_SET_REFCNT (unicode , 3 );
1961- if (PyDict_DelItem (state -> interned , unicode ) != 0 ) {
1977+ if (PyDict_DelItem (interned , unicode ) != 0 ) {
19621978 _PyErr_WriteUnraisableMsg ("deletion of interned string failed" ,
19631979 NULL );
19641980 }
19651981 assert (Py_REFCNT (unicode ) == 1 );
19661982 Py_SET_REFCNT (unicode , 0 );
1983+ #endif
19671984 break ;
19681985 }
19691986
@@ -11342,11 +11359,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
1134211359 if (PyUnicode_CHECK_INTERNED (left ))
1134311360 return 0 ;
1134411361
11362+ #ifdef INTERNED_STRINGS
1134511363 assert (_PyUnicode_HASH (right_uni ) != -1 );
1134611364 Py_hash_t hash = _PyUnicode_HASH (left );
1134711365 if (hash != -1 && hash != _PyUnicode_HASH (right_uni )) {
1134811366 return 0 ;
1134911367 }
11368+ #endif
1135011369
1135111370 return unicode_compare_eq (left , right_uni );
1135211371}
@@ -15591,21 +15610,21 @@ PyUnicode_InternInPlace(PyObject **p)
1559115610 return ;
1559215611 }
1559315612
15613+ #ifdef INTERNED_STRINGS
1559415614 if (PyUnicode_READY (s ) == -1 ) {
1559515615 PyErr_Clear ();
1559615616 return ;
1559715617 }
1559815618
15599- struct _Py_unicode_state * state = get_unicode_state ();
15600- if (state -> interned == NULL ) {
15601- state -> interned = PyDict_New ();
15602- if (state -> interned == NULL ) {
15619+ if (interned == NULL ) {
15620+ interned = PyDict_New ();
15621+ if (interned == NULL ) {
1560315622 PyErr_Clear (); /* Don't leave an exception */
1560415623 return ;
1560515624 }
1560615625 }
1560715626
15608- PyObject * t = PyDict_SetDefault (state -> interned , s , s );
15627+ PyObject * t = PyDict_SetDefault (interned , s , s );
1560915628 if (t == NULL ) {
1561015629 PyErr_Clear ();
1561115630 return ;
@@ -15622,9 +15641,13 @@ PyUnicode_InternInPlace(PyObject **p)
1562215641 this. */
1562315642 Py_SET_REFCNT (s , Py_REFCNT (s ) - 2 );
1562415643 _PyUnicode_STATE (s ).interned = SSTATE_INTERNED_MORTAL ;
15644+ #else
15645+ // PyDict expects that interned strings have their hash
15646+ // (PyASCIIObject.hash) already computed.
15647+ (void )unicode_hash (s );
15648+ #endif
1562515649}
1562615650
15627-
1562815651void
1562915652PyUnicode_InternImmortal (PyObject * * p )
1563015653{
@@ -15658,25 +15681,29 @@ PyUnicode_InternFromString(const char *cp)
1565815681void
1565915682_PyUnicode_ClearInterned (PyInterpreterState * interp )
1566015683{
15661- struct _Py_unicode_state * state = & interp -> unicode ;
15662- if ( state -> interned == NULL ) {
15684+ if (! _Py_IsMainInterpreter ( interp )) {
15685+ // interned dict is shared by all interpreters
1566315686 return ;
1566415687 }
15665- assert (PyDict_CheckExact (state -> interned ));
15688+
15689+ if (interned == NULL ) {
15690+ return ;
15691+ }
15692+ assert (PyDict_CheckExact (interned ));
1566615693
1566715694 /* Interned unicode strings are not forcibly deallocated; rather, we give
1566815695 them their stolen references back, and then clear and DECREF the
1566915696 interned dict. */
1567015697
1567115698#ifdef INTERNED_STATS
1567215699 fprintf (stderr , "releasing %zd interned strings\n" ,
15673- PyDict_GET_SIZE (state -> interned ));
15700+ PyDict_GET_SIZE (interned ));
1567415701
1567515702 Py_ssize_t immortal_size = 0 , mortal_size = 0 ;
1567615703#endif
1567715704 Py_ssize_t pos = 0 ;
1567815705 PyObject * s , * ignored_value ;
15679- while (PyDict_Next (state -> interned , & pos , & s , & ignored_value )) {
15706+ while (PyDict_Next (interned , & pos , & s , & ignored_value )) {
1568015707 assert (PyUnicode_IS_READY (s ));
1568115708
1568215709 switch (PyUnicode_CHECK_INTERNED (s )) {
@@ -15707,8 +15734,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1570715734 mortal_size , immortal_size );
1570815735#endif
1570915736
15710- PyDict_Clear (state -> interned );
15711- Py_CLEAR (state -> interned );
15737+ PyDict_Clear (interned );
15738+ Py_CLEAR (interned );
1571215739}
1571315740
1571415741
@@ -16079,8 +16106,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
1607916106static inline int
1608016107unicode_is_finalizing (void )
1608116108{
16082- struct _Py_unicode_state * state = get_unicode_state ();
16083- return (state -> interned == NULL );
16109+ return (interned == NULL );
1608416110}
1608516111#endif
1608616112
@@ -16090,8 +16116,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
1609016116{
1609116117 struct _Py_unicode_state * state = & interp -> unicode ;
1609216118
16093- // _PyUnicode_ClearInterned() must be called before
16094- assert (state -> interned == NULL );
16119+ if (_Py_IsMainInterpreter (interp )) {
16120+ // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
16121+ assert (interned == NULL );
16122+ }
1609516123
1609616124 _PyUnicode_FiniEncodings (& state -> fs_codec );
1609716125
0 commit comments