@@ -236,15 +236,54 @@ static inline PyObject *get_interned_dict(PyInterpreterState *interp)
236236 return _Py_INTERP_CACHED_OBJECT (interp , interned_strings );
237237}
238238
239+ #define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings
240+
239241Py_ssize_t
240242_PyUnicode_InternedSize (void )
241243{
242- return PyObject_Length (get_interned_dict (_PyInterpreterState_GET ()));
244+ PyObject * dict = get_interned_dict (_PyInterpreterState_GET ());
245+ return _Py_hashtable_len (INTERNED_STRINGS ) + PyDict_GET_SIZE (dict );
246+ }
247+
248+ static Py_hash_t unicode_hash (PyObject * );
249+ static int unicode_compare_eq (PyObject * , PyObject * );
250+
251+ static Py_uhash_t
252+ hashtable_unicode_hash (const void * key )
253+ {
254+ return unicode_hash ((PyObject * )key );
255+ }
256+
257+ static int
258+ hashtable_unicode_compare (const void * key1 , const void * key2 )
259+ {
260+ PyObject * obj1 = (PyObject * )key1 ;
261+ PyObject * obj2 = (PyObject * )key2 ;
262+ if (obj1 != NULL && obj2 != NULL ) {
263+ return unicode_compare_eq (obj1 , obj2 );
264+ }
265+ else {
266+ return obj1 == obj2 ;
267+ }
243268}
244269
245270static int
246271init_interned_dict (PyInterpreterState * interp )
247272{
273+ if (_Py_IsMainInterpreter (interp )) {
274+ assert (INTERNED_STRINGS == NULL );
275+ _Py_hashtable_allocator_t hashtable_alloc = {PyMem_RawMalloc , PyMem_RawFree };
276+ INTERNED_STRINGS = _Py_hashtable_new_full (
277+ hashtable_unicode_hash ,
278+ hashtable_unicode_compare ,
279+ NULL ,
280+ NULL ,
281+ & hashtable_alloc
282+ );
283+ if (INTERNED_STRINGS == NULL ) {
284+ return -1 ;
285+ }
286+ }
248287 assert (get_interned_dict (interp ) == NULL );
249288 PyObject * interned = interned = PyDict_New ();
250289 if (interned == NULL ) {
@@ -263,6 +302,10 @@ clear_interned_dict(PyInterpreterState *interp)
263302 Py_DECREF (interned );
264303 _Py_INTERP_CACHED_OBJECT (interp , interned_strings ) = NULL ;
265304 }
305+ if (_Py_IsMainInterpreter (interp ) && INTERNED_STRINGS != NULL ) {
306+ _Py_hashtable_destroy (INTERNED_STRINGS );
307+ INTERNED_STRINGS = NULL ;
308+ }
266309}
267310
268311#define _Py_RETURN_UNICODE_EMPTY () \
@@ -1223,6 +1266,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
12231266 _PyUnicode_STATE (unicode ).kind = kind ;
12241267 _PyUnicode_STATE (unicode ).compact = 1 ;
12251268 _PyUnicode_STATE (unicode ).ascii = is_ascii ;
1269+ _PyUnicode_STATE (unicode ).statically_allocated = 0 ;
12261270 if (is_ascii ) {
12271271 ((char * )data )[size ] = 0 ;
12281272 }
@@ -1553,7 +1597,9 @@ unicode_dealloc(PyObject *unicode)
15531597 * we accidentally decref an immortal string out of existence. Since
15541598 * the string is an immortal object, just re-set the reference count.
15551599 */
1556- if (PyUnicode_CHECK_INTERNED (unicode )) {
1600+ if (PyUnicode_CHECK_INTERNED (unicode )
1601+ || _PyUnicode_STATE (unicode ).statically_allocated )
1602+ {
15571603 _Py_SetImmortal (unicode );
15581604 return ;
15591605 }
@@ -14503,6 +14549,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode)
1450314549 _PyUnicode_STATE (self ).kind = kind ;
1450414550 _PyUnicode_STATE (self ).compact = 0 ;
1450514551 _PyUnicode_STATE (self ).ascii = _PyUnicode_STATE (unicode ).ascii ;
14552+ _PyUnicode_STATE (self ).statically_allocated = 0 ;
1450614553 _PyUnicode_UTF8_LENGTH (self ) = 0 ;
1450714554 _PyUnicode_UTF8 (self ) = NULL ;
1450814555 _PyUnicode_DATA_ANY (self ) = NULL ;
@@ -14726,6 +14773,23 @@ _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p)
1472614773 return ;
1472714774 }
1472814775
14776+ /* Look in the global cache first. */
14777+ PyObject * r = (PyObject * )_Py_hashtable_get (INTERNED_STRINGS , s );
14778+ if (r != NULL && r != s ) {
14779+ Py_SETREF (* p , Py_NewRef (r ));
14780+ return ;
14781+ }
14782+
14783+ /* Handle statically allocated strings. */
14784+ if (_PyUnicode_STATE (s ).statically_allocated ) {
14785+ assert (_Py_IsImmortal (s ));
14786+ if (_Py_hashtable_set (INTERNED_STRINGS , s , s ) == 0 ) {
14787+ _PyUnicode_STATE (* p ).interned = SSTATE_INTERNED_IMMORTAL_STATIC ;
14788+ }
14789+ return ;
14790+ }
14791+
14792+ /* Look in the per-interpreter cache. */
1472914793 PyObject * interned = get_interned_dict (interp );
1473014794 assert (interned != NULL );
1473114795
@@ -14741,9 +14805,11 @@ _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p)
1474114805 }
1474214806
1474314807 if (_Py_IsImmortal (s )) {
14808+ // XXX Restrict this to the main interpreter?
1474414809 _PyUnicode_STATE (* p ).interned = SSTATE_INTERNED_IMMORTAL_STATIC ;
14745- return ;
14810+ return ;
1474614811 }
14812+
1474714813#ifdef Py_REF_DEBUG
1474814814 /* The reference count value excluding the 2 references from the
1474914815 interned dictionary should be excluded from the RefTotal. The
0 commit comments