Skip to content

Commit 94908bb

Browse files
author
Victor Stinner
committed
Issue #8622: Add PYTHONFSENCODING environment variable to override the
filesystem encoding. initfsencoding() displays also a better error message if get_codeset() failed.
1 parent 56ab01b commit 94908bb

File tree

7 files changed

+93
-34
lines changed

7 files changed

+93
-34
lines changed

‎Doc/using/cmdline.rst‎

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -442,11 +442,20 @@ These environment variables influence Python's behavior.
442442
import of source modules.
443443

444444

445+
.. envvar:: PYTHONFSENCODING
446+
447+
If this is set before running the intepreter, it overrides the encoding used
448+
for the filesystem encoding (see :func:`sys.getfilesystemencoding`).
449+
450+
.. versionadded:: 3.2
451+
452+
445453
.. envvar:: PYTHONIOENCODING
446454

447-
Overrides the encoding used for stdin/stdout/stderr, in the syntax
448-
``encodingname:errorhandler``. The ``:errorhandler`` part is optional and
449-
has the same meaning as in :func:`str.encode`.
455+
If this is set before running the intepreter, it overrides the encoding used
456+
for stdin/stdout/stderr, in the syntax ``encodingname:errorhandler``. The
457+
``:errorhandler`` part is optional and has the same meaning as in
458+
:func:`str.encode`.
450459

451460
For stderr, the ``:errorhandler`` part is ignored; the handler will always be
452461
``'backslashreplace'``.

‎Doc/whatsnew/3.2.rst‎

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,15 @@ Major performance enhancements have been added:
232232

233233
* Stub
234234

235+
236+
Unicode
237+
=======
238+
239+
The filesystem encoding can be specified by setting the
240+
:envvar:`PYTHONFSENCODING` environment variable before running the intepreter.
241+
The value should be a string in the form ``<encoding>``, e.g. ``utf-8``.
242+
243+
235244
IDLE
236245
====
237246

‎Lib/test/test_pep277.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343

4444
# Is it Unicode-friendly?
4545
if not os.path.supports_unicode_filenames:
46-
fsencoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
46+
fsencoding = sys.getfilesystemencoding()
4747
try:
4848
for name in filenames:
4949
name.encode(fsencoding)

‎Lib/test/test_sys.py‎

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -863,31 +863,47 @@ def test_pythontypes(self):
863863
def test_getfilesystemencoding(self):
864864
import codecs
865865

866-
def check_fsencoding(fs_encoding):
866+
def check_fsencoding(fs_encoding, expected=None):
867867
self.assertIsNotNone(fs_encoding)
868868
if sys.platform == 'darwin':
869869
self.assertEqual(fs_encoding, 'utf-8')
870870
codecs.lookup(fs_encoding)
871+
if expected:
872+
self.assertEqual(fs_encoding, expected)
871873

872874
fs_encoding = sys.getfilesystemencoding()
873875
check_fsencoding(fs_encoding)
874876

875-
# Even in C locale
877+
def get_fsencoding(env):
878+
output = subprocess.check_output(
879+
[sys.executable, "-c",
880+
"import sys; print(sys.getfilesystemencoding())"],
881+
env=env)
882+
return output.rstrip().decode('ascii')
883+
876884
try:
877885
sys.executable.encode('ascii')
878886
except UnicodeEncodeError:
879887
# Python doesn't start with ASCII locale if its path is not ASCII,
880888
# see issue #8611
881889
pass
882890
else:
891+
# Even in C locale
883892
env = os.environ.copy()
884893
env['LANG'] = 'C'
885-
output = subprocess.check_output(
886-
[sys.executable, "-c",
887-
"import sys; print(sys.getfilesystemencoding())"],
888-
env=env)
889-
fs_encoding = output.rstrip().decode('ascii')
890-
check_fsencoding(fs_encoding)
894+
try:
895+
del env['PYTHONFSENCODING']
896+
except KeyError:
897+
pass
898+
check_fsencoding(get_fsencoding(env), 'ascii')
899+
900+
# Filesystem encoding is hardcoded on Windows and Mac OS X
901+
if sys.platform not in ('win32', 'darwin'):
902+
for encoding in ('ascii', 'cp850', 'iso8859-1', 'utf-8'):
903+
env = os.environ.copy()
904+
env['PYTHONFSENCODING'] = encoding
905+
check_fsencoding(get_fsencoding(env), encoding)
906+
891907

892908
def test_setfilesystemencoding(self):
893909
old = sys.getfilesystemencoding()

‎Misc/NEWS‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 2?
1212
Core and Builtins
1313
-----------------
1414

15+
- Issue #8622: Add PYTHONFSENCODING environment variable to override the
16+
filesystem encoding.
17+
1518
- Issue #5127: The C functions that access the Unicode Database now accept and
1619
return characters from the full Unicode range, even on narrow unicode builds
1720
(Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others). A visible difference

‎Modules/main.c‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ PYTHONHOME : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\
9999
The default module search path uses %s.\n\
100100
PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
101101
PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
102+
PYTHONFSENCODING: Encoding used for the filesystem.\n\
102103
";
103104

104105
FILE *

‎Python/pythonrun.c‎

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -134,18 +134,13 @@ add_flag(int flag, const char *envs)
134134
return flag;
135135
}
136136

137-
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
138137
static char*
139-
get_codeset(void)
138+
get_codec_name(const char *encoding)
140139
{
141-
char* codeset, *name_str;
140+
char *name_utf8, *name_str;
142141
PyObject *codec, *name = NULL;
143142

144-
codeset = nl_langinfo(CODESET);
145-
if (!codeset || codeset[0] == '\0')
146-
return NULL;
147-
148-
codec = _PyCodec_Lookup(codeset);
143+
codec = _PyCodec_Lookup(encoding);
149144
if (!codec)
150145
goto error;
151146

@@ -154,18 +149,34 @@ get_codeset(void)
154149
if (!name)
155150
goto error;
156151

157-
name_str = _PyUnicode_AsString(name);
152+
name_utf8 = _PyUnicode_AsString(name);
158153
if (name == NULL)
159154
goto error;
160-
codeset = strdup(name_str);
155+
name_str = strdup(name_utf8);
161156
Py_DECREF(name);
162-
return codeset;
157+
if (name_str == NULL) {
158+
PyErr_NoMemory();
159+
return NULL;
160+
}
161+
return name_str;
163162

164163
error:
165164
Py_XDECREF(codec);
166165
Py_XDECREF(name);
167166
return NULL;
168167
}
168+
169+
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
170+
static char*
171+
get_codeset(void)
172+
{
173+
char* codeset = nl_langinfo(CODESET);
174+
if (!codeset || codeset[0] == '\0') {
175+
PyErr_SetString(PyExc_ValueError, "CODESET is not set or empty");
176+
return NULL;
177+
}
178+
return get_codec_name(codeset);
179+
}
169180
#endif
170181

171182
void
@@ -706,25 +717,35 @@ initfsencoding(void)
706717
{
707718
PyObject *codec;
708719
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
709-
char *codeset;
720+
char *codeset = NULL;
710721

711722
if (Py_FileSystemDefaultEncoding == NULL) {
712-
/* On Unix, set the file system encoding according to the
713-
user's preference, if the CODESET names a well-known
714-
Python codec, and Py_FileSystemDefaultEncoding isn't
715-
initialized by other means. Also set the encoding of
716-
stdin and stdout if these are terminals. */
717-
codeset = get_codeset();
723+
const char *env_encoding = Py_GETENV("PYTHONFSENCODING");
724+
if (env_encoding != NULL) {
725+
codeset = get_codec_name(env_encoding);
726+
if (!codeset) {
727+
fprintf(stderr, "PYTHONFSENCODING is not a valid encoding:\n");
728+
PyErr_Print();
729+
}
730+
}
731+
if (!codeset) {
732+
/* On Unix, set the file system encoding according to the
733+
user's preference, if the CODESET names a well-known
734+
Python codec, and Py_FileSystemDefaultEncoding isn't
735+
initialized by other means. Also set the encoding of
736+
stdin and stdout if these are terminals. */
737+
codeset = get_codeset();
738+
}
718739
if (codeset != NULL) {
719740
Py_FileSystemDefaultEncoding = codeset;
720741
Py_HasFileSystemDefaultEncoding = 0;
721742
return;
743+
} else {
744+
fprintf(stderr, "Unable to get the locale encoding:\n");
745+
PyErr_Print();
722746
}
723747

724-
PyErr_Clear();
725-
fprintf(stderr,
726-
"Unable to get the locale encoding: "
727-
"fallback to utf-8\n");
748+
fprintf(stderr, "Unable to get the filesystem encoding: fallback to utf-8\n");
728749
Py_FileSystemDefaultEncoding = "utf-8";
729750
Py_HasFileSystemDefaultEncoding = 1;
730751
}

0 commit comments

Comments
 (0)