Skip to content

Commit d929f18

Browse files
authored
bpo-36443: Disable C locale coercion and UTF-8 Mode by default (GH-12589)
bpo-36443, bpo-36202: Since Python 3.7.0, calling Py_DecodeLocale() before Py_Initialize() produces mojibake if the LC_CTYPE locale is coerced and/or if the UTF-8 Mode is enabled by the user configuration. This change fix the issue by disabling LC_CTYPE coercion and UTF-8 Mode by default. They must now be enabled explicitly (opt-in) using the new _Py_PreInitialize() API with _PyPreConfig. When embedding Python, set coerce_c_locale and utf8_mode attributes of _PyPreConfig to -1 to enable automatically these parameters depending on the LC_CTYPE locale, environment variables and command line arguments Alternative: Setting Py_UTF8Mode to 1 always explicitly enables the UTF-8 Mode. Changes: * _PyPreConfig_INIT now sets coerce_c_locale and utf8_mode to 0 by default. * _Py_InitializeFromArgs() and _Py_InitializeFromWideArgs() can now be called with config=NULL.
1 parent 4a9a505 commit d929f18

File tree

7 files changed

+58
-46
lines changed

7 files changed

+58
-46
lines changed

‎Include/cpython/coreconfig.h‎

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,20 @@ typedef struct {
6363
set to !Py_IgnoreEnvironmentFlag. */
6464
int use_environment;
6565

66-
/* PYTHONCOERCECLOCALE, -1 means unknown.
66+
/* Coerce the LC_CTYPE locale if it's equal to "C"? (PEP 538)
67+
68+
Set to 0 by PYTHONCOERCECLOCALE=0. Set to 1 by PYTHONCOERCECLOCALE=1.
69+
Set to 2 if the user preferred LC_CTYPE locale is "C".
6770
6871
If it is equal to 1, LC_CTYPE locale is read to decide it it should be
6972
coerced or not (ex: PYTHONCOERCECLOCALE=1). Internally, it is set to 2
7073
if the LC_CTYPE locale must be coerced. */
7174
int coerce_c_locale;
72-
int coerce_c_locale_warn; /* PYTHONCOERCECLOCALE=warn */
75+
76+
/* Emit a warning if the LC_CTYPE locale is coerced?
77+
78+
Disabled by default. Set to 1 by PYTHONCOERCECLOCALE=warn. */
79+
int coerce_c_locale_warn;
7380

7481
#ifdef MS_WINDOWS
7582
/* If greater than 1, use the "mbcs" encoding instead of the UTF-8
@@ -83,9 +90,17 @@ typedef struct {
8390
int legacy_windows_fs_encoding;
8491
#endif
8592

86-
/* Enable UTF-8 mode?
87-
Set by -X utf8 command line option and PYTHONUTF8 environment variable.
88-
If set to -1 (default), inherit Py_UTF8Mode value. */
93+
/* Enable UTF-8 mode? (PEP 540)
94+
95+
Disabled by default (equals to 0).
96+
97+
Set to 1 by "-X utf8" and "-X utf8=1" command line options.
98+
Set to 1 by PYTHONUTF8=1 environment variable.
99+
100+
Set to 0 by "-X utf8=0" and PYTHONUTF8=0.
101+
102+
If equals to -1, it is set to 1 if the LC_CTYPE locale is "C" or
103+
"POSIX", otherwise inherit Py_UTF8Mode value. */
89104
int utf8_mode;
90105

91106
int dev_mode; /* Development mode. PYTHONDEVMODE, -X dev */
@@ -104,8 +119,6 @@ typedef struct {
104119
_PyPreConfig_WINDOWS_INIT \
105120
.isolated = -1, \
106121
.use_environment = -1, \
107-
.coerce_c_locale = -1, \
108-
.utf8_mode = -1, \
109122
.dev_mode = -1, \
110123
.allocator = NULL}
111124

‎Lib/test/test_embed.py‎

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -494,8 +494,8 @@ def check_config(self, testname, expected_config, expected_preconfig):
494494
if key not in expected_preconfig:
495495
expected_preconfig[key] = expected_config[key]
496496

497-
self.check_core_config(config, expected_config)
498497
self.check_pre_config(config, expected_preconfig)
498+
self.check_core_config(config, expected_config)
499499
self.check_global_config(config)
500500

501501
def test_init_default_config(self):
@@ -573,16 +573,13 @@ def test_init_from_config(self):
573573

574574
INIT_ENV_PRECONFIG = {
575575
'allocator': 'malloc',
576-
'utf8_mode': 1,
577576
}
578577
INIT_ENV_CONFIG = {
579578
'use_hash_seed': 1,
580579
'hash_seed': 42,
581580
'tracemalloc': 2,
582581
'import_time': 1,
583582
'malloc_stats': 1,
584-
'filesystem_encoding': 'utf-8',
585-
'filesystem_errors': UTF8_MODE_ERRORS,
586583
'inspect': 1,
587584
'optimization_level': 2,
588585
'pycache_prefix': 'env_pycache_prefix',
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Since Python 3.7.0, calling :c:func:`Py_DecodeLocale` before
2+
:c:func:`Py_Initialize` produces mojibake if the ``LC_CTYPE`` locale is coerced
3+
and/or if the UTF-8 Mode is enabled by the user configuration. The LC_CTYPE
4+
coercion and UTF-8 Mode are now disabled by default to fix the mojibake issue.
5+
They must now be enabled explicitly (opt-in) using the new
6+
:c:func:`_Py_PreInitialize` API with ``_PyPreConfig``.

‎Modules/main.c‎

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,23 +52,30 @@ pymain_init(const _PyArgv *args)
5252
fedisableexcept(FE_OVERFLOW);
5353
#endif
5454

55-
_PyCoreConfig config = _PyCoreConfig_INIT;
56-
55+
_PyPreConfig preconfig = _PyPreConfig_INIT;
56+
/* Set to -1 to enable them depending on the LC_CTYPE locale and the
57+
environment variables (PYTHONUTF8 and PYTHONCOERCECLOCALE) */
58+
preconfig.coerce_c_locale = -1;
59+
preconfig.utf8_mode = -1;
5760
if (args->use_bytes_argv) {
58-
err = _Py_PreInitializeFromArgs(NULL, args->argc, args->bytes_argv);
61+
err = _Py_PreInitializeFromArgs(&preconfig,
62+
args->argc, args->bytes_argv);
5963
}
6064
else {
61-
err = _Py_PreInitializeFromWideArgs(NULL, args->argc, args->wchar_argv);
65+
err = _Py_PreInitializeFromWideArgs(&preconfig,
66+
args->argc, args->wchar_argv);
6267
}
6368
if (_Py_INIT_FAILED(err)) {
6469
return err;
6570
}
6671

72+
/* pass NULL as the config: config is read from command line arguments,
73+
environment variables, configuration files */
6774
if (args->use_bytes_argv) {
68-
return _Py_InitializeFromArgs(&config, args->argc, args->bytes_argv);
75+
return _Py_InitializeFromArgs(NULL, args->argc, args->bytes_argv);
6976
}
7077
else {
71-
return _Py_InitializeFromWideArgs(&config, args->argc, args->wchar_argv);
78+
return _Py_InitializeFromWideArgs(NULL, args->argc, args->wchar_argv);
7279
}
7380
}
7481

‎Programs/_testembed.c‎

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -441,8 +441,6 @@ static int test_init_from_config(void)
441441
putenv("PYTHONMALLOCSTATS=0");
442442
config.malloc_stats = 1;
443443

444-
/* FIXME: test coerce_c_locale and coerce_c_locale_warn */
445-
446444
putenv("PYTHONPYCACHEPREFIX=env_pycache_prefix");
447445
config.pycache_prefix = L"conf_pycache_prefix";
448446

@@ -617,17 +615,6 @@ static int test_init_isolated(void)
617615
{
618616
_PyInitError err;
619617

620-
_PyPreConfig preconfig = _PyPreConfig_INIT;
621-
622-
/* Set coerce_c_locale and utf8_mode to not depend on the locale */
623-
preconfig.coerce_c_locale = 0;
624-
preconfig.utf8_mode = 0;
625-
626-
err = _Py_PreInitialize(&preconfig);
627-
if (_Py_INIT_FAILED(err)) {
628-
_Py_ExitInitError(err);
629-
}
630-
631618
/* Test _PyCoreConfig.isolated=1 */
632619
_PyCoreConfig config = _PyCoreConfig_INIT;
633620

@@ -654,10 +641,6 @@ static int test_preinit_isolated1(void)
654641
_PyInitError err;
655642

656643
_PyPreConfig preconfig = _PyPreConfig_INIT;
657-
658-
/* Set coerce_c_locale and utf8_mode to not depend on the locale */
659-
preconfig.coerce_c_locale = 0;
660-
preconfig.utf8_mode = 0;
661644
preconfig.isolated = 1;
662645

663646
err = _Py_PreInitialize(&preconfig);
@@ -685,10 +668,6 @@ static int test_preinit_isolated2(void)
685668
_PyInitError err;
686669

687670
_PyPreConfig preconfig = _PyPreConfig_INIT;
688-
689-
/* Set coerce_c_locale and utf8_mode to not depend on the locale */
690-
preconfig.coerce_c_locale = 0;
691-
preconfig.utf8_mode = 0;
692671
preconfig.isolated = 0;
693672

694673
err = _Py_PreInitialize(&preconfig);

‎Python/preconfig.c‎

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,9 @@ _PyPreConfig_GetGlobalConfig(_PyPreConfig *config)
386386
#ifdef MS_WINDOWS
387387
COPY_FLAG(legacy_windows_fs_encoding, Py_LegacyWindowsFSEncodingFlag);
388388
#endif
389-
COPY_FLAG(utf8_mode, Py_UTF8Mode);
389+
if (Py_UTF8Mode > 0) {
390+
config->utf8_mode = 1;
391+
}
390392

391393
#undef COPY_FLAG
392394
#undef COPY_NOT_FLAG

‎Python/pylifecycle.c‎

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ _Py_Initialize_ReconfigureCore(PyInterpreterState **interp_p,
485485
_PyCoreConfig_Write(core_config);
486486

487487
if (_PyCoreConfig_Copy(&interp->core_config, core_config) < 0) {
488-
return _Py_INIT_ERR("failed to copy core config");
488+
return _Py_INIT_NO_MEMORY();
489489
}
490490
core_config = &interp->core_config;
491491

@@ -548,7 +548,7 @@ pycore_create_interpreter(const _PyCoreConfig *core_config,
548548
*interp_p = interp;
549549

550550
if (_PyCoreConfig_Copy(&interp->core_config, core_config) < 0) {
551-
return _Py_INIT_ERR("failed to copy core config");
551+
return _Py_INIT_NO_MEMORY();
552552
}
553553
core_config = &interp->core_config;
554554

@@ -785,6 +785,7 @@ _Py_PreInitialize(const _PyPreConfig *src_config)
785785
_PyInitError
786786
_Py_PreInitializeFromCoreConfig(const _PyCoreConfig *coreconfig)
787787
{
788+
assert(coreconfig != NULL);
788789
_PyPreConfig config = _PyPreConfig_INIT;
789790
_PyCoreConfig_GetCoreConfig(&config, coreconfig);
790791
return _Py_PreInitialize(&config);
@@ -799,8 +800,10 @@ pyinit_coreconfig(_PyCoreConfig *config,
799800
const _PyArgv *args,
800801
PyInterpreterState **interp_p)
801802
{
802-
if (_PyCoreConfig_Copy(config, src_config) < 0) {
803-
return _Py_INIT_ERR("failed to copy core config");
803+
if (src_config) {
804+
if (_PyCoreConfig_Copy(config, src_config) < 0) {
805+
return _Py_INIT_NO_MEMORY();
806+
}
804807
}
805808

806809
_PyInitError err = _PyCoreConfig_Read(config, args);
@@ -839,9 +842,14 @@ _Py_InitializeCore(const _PyCoreConfig *src_config,
839842
const _PyArgv *args,
840843
PyInterpreterState **interp_p)
841844
{
842-
assert(src_config != NULL);
845+
_PyInitError err;
843846

844-
_PyInitError err = _Py_PreInitializeFromCoreConfig(src_config);
847+
if (src_config) {
848+
err = _Py_PreInitializeFromCoreConfig(src_config);
849+
}
850+
else {
851+
err = _Py_PreInitialize(NULL);
852+
}
845853
if (_Py_INIT_FAILED(err)) {
846854
return err;
847855
}
@@ -1395,7 +1403,7 @@ new_interpreter(PyThreadState **tstate_p)
13951403
}
13961404

13971405
if (_PyCoreConfig_Copy(&interp->core_config, core_config) < 0) {
1398-
return _Py_INIT_ERR("failed to copy core config");
1406+
return _Py_INIT_NO_MEMORY();
13991407
}
14001408
core_config = &interp->core_config;
14011409

0 commit comments

Comments
 (0)