Skip to content

Commit 9dd7620

Browse files
authored
bpo-32030: Add _Py_EncodeLocaleRaw() (#4961)
Replace Py_EncodeLocale() with _Py_EncodeLocaleRaw() in: * _Py_wfopen() * _Py_wreadlink() * _Py_wrealpath() * _Py_wstat() * pymain_open_filename() These functions are called early during Python intialization, only the RAW memory allocator must be used.
1 parent 4a02543 commit 9dd7620

File tree

5 files changed

+101
-41
lines changed

5 files changed

+101
-41
lines changed

‎Include/fileutils.h‎

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,13 @@ PyAPI_FUNC(wchar_t *) Py_DecodeLocale(
1313
PyAPI_FUNC(char*) Py_EncodeLocale(
1414
const wchar_t *text,
1515
size_t *error_pos);
16+
17+
PyAPI_FUNC(char*) _Py_EncodeLocaleRaw(
18+
const wchar_t *text,
19+
size_t *error_pos);
1620
#endif
1721

1822
#ifndef Py_LIMITED_API
19-
2023
PyAPI_FUNC(PyObject *) _Py_device_encoding(int);
2124

2225
#ifdef MS_WINDOWS

‎Modules/getpath.c‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,13 +140,13 @@ _Py_wstat(const wchar_t* path, struct stat *buf)
140140
{
141141
int err;
142142
char *fname;
143-
fname = Py_EncodeLocale(path, NULL);
143+
fname = _Py_EncodeLocaleRaw(path, NULL);
144144
if (fname == NULL) {
145145
errno = EINVAL;
146146
return -1;
147147
}
148148
err = stat(fname, buf);
149-
PyMem_Free(fname);
149+
PyMem_RawFree(fname);
150150
return err;
151151
}
152152

‎Modules/main.c‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1490,14 +1490,14 @@ pymain_open_filename(_PyMain *pymain)
14901490
char *cfilename_buffer;
14911491
const char *cfilename;
14921492
int err = errno;
1493-
cfilename_buffer = Py_EncodeLocale(pymain->filename, NULL);
1493+
cfilename_buffer = _Py_EncodeLocaleRaw(pymain->filename, NULL);
14941494
if (cfilename_buffer != NULL)
14951495
cfilename = cfilename_buffer;
14961496
else
14971497
cfilename = "<unprintable file name>";
14981498
fprintf(stderr, "%ls: can't open file '%s': [Errno %d] %s\n",
14991499
pymain->config.program, cfilename, err, strerror(err));
1500-
PyMem_Free(cfilename_buffer);
1500+
PyMem_RawFree(cfilename_buffer);
15011501
pymain->status = 2;
15021502
return NULL;
15031503
}

‎Objects/unicodeobject.c‎

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5158,7 +5158,8 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen)
51585158
On memory allocation failure, return NULL and write (size_t)-1 into
51595159
*error_pos (if error_pos is set). */
51605160
char*
5161-
_Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)
5161+
_Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos,
5162+
int raw_malloc)
51625163
{
51635164
const Py_ssize_t max_char_size = 4;
51645165
Py_ssize_t len = wcslen(text);
@@ -5167,7 +5168,12 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)
51675168

51685169
char *bytes;
51695170
if (len <= PY_SSIZE_T_MAX / max_char_size - 1) {
5170-
bytes = PyMem_Malloc((len + 1) * max_char_size);
5171+
if (raw_malloc) {
5172+
bytes = PyMem_RawMalloc((len + 1) * max_char_size);
5173+
}
5174+
else {
5175+
bytes = PyMem_Malloc((len + 1) * max_char_size);
5176+
}
51715177
}
51725178
else {
51735179
bytes = NULL;
@@ -5221,7 +5227,13 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)
52215227
*p++ = '\0';
52225228

52235229
size_t final_size = (p - bytes);
5224-
char *bytes2 = PyMem_Realloc(bytes, final_size);
5230+
char *bytes2;
5231+
if (raw_malloc) {
5232+
bytes2 = PyMem_RawRealloc(bytes, final_size);
5233+
}
5234+
else {
5235+
bytes2 = PyMem_Realloc(bytes, final_size);
5236+
}
52255237
if (bytes2 == NULL) {
52265238
if (error_pos != NULL) {
52275239
*error_pos = (size_t)-1;
@@ -5231,7 +5243,12 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)
52315243
return bytes2;
52325244

52335245
error:
5234-
PyMem_Free(bytes);
5246+
if (raw_malloc) {
5247+
PyMem_RawFree(bytes);
5248+
}
5249+
else {
5250+
PyMem_Free(bytes);
5251+
}
52355252
return NULL;
52365253
}
52375254

‎Python/fileutils.c‎

Lines changed: 72 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ extern int winerror_to_errno(int);
2323
extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size,
2424
size_t *p_wlen);
2525
extern char* _Py_EncodeUTF8_surrogateescape(const wchar_t *text,
26-
size_t *error_pos);
26+
size_t *error_pos, int raw_malloc);
2727

2828
#ifdef O_CLOEXEC
2929
/* Does open() support the O_CLOEXEC flag? Possible values:
@@ -183,7 +183,7 @@ check_force_ascii(void)
183183
}
184184

185185
static char*
186-
encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
186+
encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos, int raw_malloc)
187187
{
188188
char *result = NULL, *out;
189189
size_t len, i;
@@ -194,7 +194,13 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
194194

195195
len = wcslen(text);
196196

197-
result = PyMem_Malloc(len + 1); /* +1 for NUL byte */
197+
/* +1 for NUL byte */
198+
if (raw_malloc) {
199+
result = PyMem_RawMalloc(len + 1);
200+
}
201+
else {
202+
result = PyMem_Malloc(len + 1);
203+
}
198204
if (result == NULL)
199205
return NULL;
200206

@@ -211,9 +217,15 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
211217
*out++ = (char)(ch - 0xdc00);
212218
}
213219
else {
214-
if (error_pos != NULL)
220+
if (error_pos != NULL) {
215221
*error_pos = i;
216-
PyMem_Free(result);
222+
}
223+
if (raw_malloc) {
224+
PyMem_RawFree(result);
225+
}
226+
else {
227+
PyMem_Free(result);
228+
}
217229
return NULL;
218230
}
219231
}
@@ -423,7 +435,7 @@ Py_DecodeLocale(const char* arg, size_t *size)
423435

424436
#if !defined(__APPLE__) && !defined(__ANDROID__)
425437
static char*
426-
encode_locale(const wchar_t *text, size_t *error_pos)
438+
encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
427439
{
428440
const size_t len = wcslen(text);
429441
char *result = NULL, *bytes = NULL;
@@ -455,8 +467,14 @@ encode_locale(const wchar_t *text, size_t *error_pos)
455467
else
456468
converted = wcstombs(NULL, buf, 0);
457469
if (converted == (size_t)-1) {
458-
if (result != NULL)
459-
PyMem_Free(result);
470+
if (result != NULL) {
471+
if (raw_malloc) {
472+
PyMem_RawFree(result);
473+
}
474+
else {
475+
PyMem_Free(result);
476+
}
477+
}
460478
if (error_pos != NULL)
461479
*error_pos = i;
462480
return NULL;
@@ -475,10 +493,16 @@ encode_locale(const wchar_t *text, size_t *error_pos)
475493
}
476494

477495
size += 1; /* nul byte at the end */
478-
result = PyMem_Malloc(size);
496+
if (raw_malloc) {
497+
result = PyMem_RawMalloc(size);
498+
}
499+
else {
500+
result = PyMem_Malloc(size);
501+
}
479502
if (result == NULL) {
480-
if (error_pos != NULL)
503+
if (error_pos != NULL) {
481504
*error_pos = (size_t)-1;
505+
}
482506
return NULL;
483507
}
484508
bytes = result;
@@ -487,6 +511,28 @@ encode_locale(const wchar_t *text, size_t *error_pos)
487511
}
488512
#endif
489513

514+
static char*
515+
encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
516+
{
517+
#if defined(__APPLE__) || defined(__ANDROID__)
518+
return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
519+
#else /* __APPLE__ */
520+
if (Py_UTF8Mode == 1) {
521+
return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
522+
}
523+
524+
#ifndef MS_WINDOWS
525+
if (force_ascii == -1)
526+
force_ascii = check_force_ascii();
527+
528+
if (force_ascii)
529+
return encode_ascii_surrogateescape(text, error_pos, raw_malloc);
530+
#endif
531+
532+
return encode_current_locale(text, error_pos, raw_malloc);
533+
#endif /* __APPLE__ or __ANDROID__ */
534+
}
535+
490536
/* Encode a wide character string to the locale encoding with the
491537
surrogateescape error handler: surrogate characters in the range
492538
U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
@@ -502,23 +548,16 @@ encode_locale(const wchar_t *text, size_t *error_pos)
502548
char*
503549
Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
504550
{
505-
#if defined(__APPLE__) || defined(__ANDROID__)
506-
return _Py_EncodeUTF8_surrogateescape(text, error_pos);
507-
#else /* __APPLE__ */
508-
if (Py_UTF8Mode == 1) {
509-
return _Py_EncodeUTF8_surrogateescape(text, error_pos);
510-
}
511-
512-
#ifndef MS_WINDOWS
513-
if (force_ascii == -1)
514-
force_ascii = check_force_ascii();
551+
return encode_locale(text, error_pos, 0);
552+
}
515553

516-
if (force_ascii)
517-
return encode_ascii_surrogateescape(text, error_pos);
518-
#endif
519554

520-
return encode_locale(text, error_pos);
521-
#endif /* __APPLE__ or __ANDROID__ */
555+
/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
556+
instead of PyMem_Free(). */
557+
char*
558+
_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
559+
{
560+
return encode_locale(text, error_pos, 1);
522561
}
523562

524563

@@ -1029,11 +1068,12 @@ _Py_wfopen(const wchar_t *path, const wchar_t *mode)
10291068
errno = EINVAL;
10301069
return NULL;
10311070
}
1032-
cpath = Py_EncodeLocale(path, NULL);
1033-
if (cpath == NULL)
1071+
cpath = _Py_EncodeLocaleRaw(path, NULL);
1072+
if (cpath == NULL) {
10341073
return NULL;
1074+
}
10351075
f = fopen(cpath, cmode);
1036-
PyMem_Free(cpath);
1076+
PyMem_RawFree(cpath);
10371077
#else
10381078
f = _wfopen(path, mode);
10391079
#endif
@@ -1341,13 +1381,13 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
13411381
int res;
13421382
size_t r1;
13431383

1344-
cpath = Py_EncodeLocale(path, NULL);
1384+
cpath = _Py_EncodeLocaleRaw(path, NULL);
13451385
if (cpath == NULL) {
13461386
errno = EINVAL;
13471387
return -1;
13481388
}
13491389
res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
1350-
PyMem_Free(cpath);
1390+
PyMem_RawFree(cpath);
13511391
if (res == -1)
13521392
return -1;
13531393
if (res == Py_ARRAY_LENGTH(cbuf)) {
@@ -1386,13 +1426,13 @@ _Py_wrealpath(const wchar_t *path,
13861426
wchar_t *wresolved_path;
13871427
char *res;
13881428
size_t r;
1389-
cpath = Py_EncodeLocale(path, NULL);
1429+
cpath = _Py_EncodeLocaleRaw(path, NULL);
13901430
if (cpath == NULL) {
13911431
errno = EINVAL;
13921432
return NULL;
13931433
}
13941434
res = realpath(cpath, cresolved_path);
1395-
PyMem_Free(cpath);
1435+
PyMem_RawFree(cpath);
13961436
if (res == NULL)
13971437
return NULL;
13981438

0 commit comments

Comments
 (0)