@@ -220,8 +220,7 @@ static inline PyObject* unicode_get_empty(void)
220220// Return a strong reference to the empty string singleton.
221221static inline PyObject * unicode_new_empty (void )
222222{
223- PyObject * empty = unicode_get_empty ();
224- return Py_NewRef (empty );
223+ return unicode_get_empty ();
225224}
226225
227226/* This dictionary holds all interned unicode strings. Note that references
@@ -1706,7 +1705,7 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
17061705static PyObject *
17071706get_latin1_char (Py_UCS1 ch )
17081707{
1709- return Py_NewRef ( LATIN1 (ch ) );
1708+ return LATIN1 (ch );
17101709}
17111710
17121711static PyObject *
@@ -1863,67 +1862,41 @@ resize_array(PyObject **array, Py_ssize_t *capacity)
18631862 return new_array ;
18641863}
18651864
1866- PyObject *
1867- _PyUnicode_FromId (_Py_Identifier * id )
1865+ static PyObject *
1866+ initialize_identifier (_Py_Identifier * id )
18681867{
1869- PyInterpreterState * interp = _PyInterpreterState_GET ();
1870- struct _Py_unicode_ids * ids = & interp -> unicode .ids ;
1871-
1872- Py_ssize_t index = _Py_atomic_size_get (& id -> index );
1873- if (index < 0 ) {
1874- struct _Py_unicode_runtime_ids * rt_ids = & interp -> runtime -> unicode_state .ids ;
1875-
1876- _PyMutex_lock (& rt_ids -> mutex );
1877- // Check again to detect concurrent access. Another thread can have
1878- // initialized the index while this thread waited for the lock.
1879- index = _Py_atomic_size_get (& id -> index );
1880- if (index < 0 ) {
1881- assert (rt_ids -> next_index < PY_SSIZE_T_MAX );
1882- index = rt_ids -> next_index ;
1883- rt_ids -> next_index ++ ;
1884- _Py_atomic_size_set (& id -> index , index );
1885- }
1886- _PyMutex_unlock (& rt_ids -> mutex );
1887- }
1888- assert (index >= 0 );
1889-
1890- PyObject * obj ;
1891- if (index < ids -> size ) {
1892- obj = ids -> array [index ];
1893- if (obj ) {
1894- // Return a borrowed reference
1895- return obj ;
1896- }
1897- }
1898-
1899- obj = PyUnicode_DecodeUTF8Stateful (id -> string , strlen (id -> string ),
1900- NULL , NULL );
1868+ PyObject * obj = PyUnicode_DecodeUTF8Stateful (id -> string , strlen (id -> string ),
1869+ NULL , NULL );
19011870 if (!obj ) {
19021871 return NULL ;
19031872 }
19041873 PyUnicode_InternInPlace (& obj );
19051874
1906- if (index >= ids -> size ) {
1907- // Overallocate to reduce the number of realloc
1908- Py_ssize_t new_size = Py_MAX (index * 2 , 16 );
1909- Py_ssize_t item_size = sizeof (ids -> array [0 ]);
1910- PyObject * * new_array = PyMem_Realloc (ids -> array , new_size * item_size );
1911- if (new_array == NULL ) {
1912- PyErr_NoMemory ();
1913- return NULL ;
1875+ assert (_PyObject_IS_IMMORTAL (obj ));
1876+
1877+ if (!_Py_atomic_compare_exchange_ptr (& id -> obj , NULL , obj )) {
1878+ Py_DECREF (obj );
1879+ return _Py_atomic_load_ptr (& id -> obj );
1880+ }
1881+ for (;;) {
1882+ id -> next = _Py_atomic_load_ptr (& _PyRuntime .unicode_state .head );
1883+ if (_Py_atomic_compare_exchange_ptr (& _PyRuntime .unicode_state .head , id -> next , id )) {
1884+ break ;
19141885 }
1915- memset (& new_array [ids -> size ], 0 , (new_size - ids -> size ) * item_size );
1916- ids -> array = new_array ;
1917- ids -> size = new_size ;
19181886 }
1919-
1920- // The array stores a strong reference
1921- ids -> array [index ] = obj ;
1922-
1923- // Return a borrowed reference
19241887 return obj ;
19251888}
19261889
1890+ PyObject *
1891+ _PyUnicode_FromId (_Py_Identifier * id )
1892+ {
1893+ PyObject * obj = _Py_atomic_load_ptr (& id -> obj );
1894+ if (obj ) {
1895+ return obj ;
1896+ }
1897+ return initialize_identifier (id );
1898+ }
1899+
19271900static void
19281901_PyUnicode_Immortalize (PyObject * obj )
19291902{
@@ -1956,17 +1929,16 @@ _PyUnicode_Immortalize(PyObject *obj)
19561929
19571930
19581931static void
1959- unicode_clear_identifiers (struct _Py_unicode_state * state )
1932+ unicode_clear_identifiers (struct _Py_unicode_runtime_state * state )
19601933{
1961- struct _Py_unicode_ids * ids = & state -> ids ;
1962- for (Py_ssize_t i = 0 ; i < ids -> size ; i ++ ) {
1963- Py_XDECREF (ids -> array [i ]);
1934+ _Py_Identifier * id = state -> head ;
1935+ while (id ) {
1936+ _Py_Identifier * next = id -> next ;
1937+ id -> next = NULL ;
1938+ id -> obj = NULL ;
1939+ id = next ;
19641940 }
1965- ids -> size = 0 ;
1966- PyMem_Free (ids -> array );
1967- ids -> array = NULL ;
1968- // Don't reset _PyRuntime next_index: _Py_Identifier.id remains valid
1969- // after Py_Finalize().
1941+ state -> head = NULL ;
19701942}
19711943
19721944static void
@@ -14593,6 +14565,16 @@ _PyUnicode_InitGlobalObjects(PyInterpreterState *interp)
1459314565 return _PyStatus_OK ();
1459414566 }
1459514567
14568+ /* Create the interned dictionary. This must be done before creating static
14569+ * strings.
14570+ */
14571+ assert (get_interned_dict () == NULL );
14572+ PyObject * dict = PyDict_New ();
14573+ if (!dict ) {
14574+ return _PyStatus_NO_MEMORY ();
14575+ }
14576+ set_interned_dict (dict );
14577+
1459614578 /* Intern statically allocated string identifiers and deepfreeze strings.
1459714579 * This must be done before any module initialization so that statically
1459814580 * allocated string identifiers are used instead of heap allocated strings.
@@ -14660,14 +14642,6 @@ PyUnicode_InternInPlace(PyObject **p)
1466014642 }
1466114643
1466214644 PyObject * interned = get_interned_dict ();
14663- if (interned == NULL ) {
14664- interned = PyDict_New ();
14665- if (interned == NULL ) {
14666- PyErr_Clear (); /* Don't leave an exception */
14667- return ;
14668- }
14669- set_interned_dict (interned );
14670- }
1467114645
1467214646 if (!_Py_ThreadLocal (s ) && !_PyObject_IS_IMMORTAL (s )) {
1467314647 /* Make a copy so that we can safely immortalize the string. */
@@ -15189,21 +15163,18 @@ _PyUnicode_FiniTypes(PyInterpreterState *interp)
1518915163void
1519015164_PyUnicode_Fini (PyInterpreterState * interp )
1519115165{
15192- struct _Py_unicode_state * state = & interp -> unicode ;
15193-
1519415166 if (_Py_IsMainInterpreter (interp )) {
1519515167 // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
1519615168 assert (get_interned_dict () == NULL );
1519715169 // bpo-47182: force a unicodedata CAPI capsule re-import on
1519815170 // subsequent initialization of main interpreter.
1519915171 }
1520015172
15201- _PyUnicode_FiniEncodings (& state -> fs_codec );
15173+ _PyUnicode_FiniEncodings (& interp -> unicode . fs_codec );
1520215174 interp -> unicode .ucnhash_capi = NULL ;
1520315175
15204- unicode_clear_identifiers (state );
15205-
1520615176 if (_Py_IsMainInterpreter (interp )) {
15177+ unicode_clear_identifiers (& _PyRuntime .unicode_state );
1520715178 unicode_free_immortalized (& _PyRuntime );
1520815179 }
1520915180}
0 commit comments