bpo-45211: Move helpers from getpath.c to internal API. (gh-28550)

This accomplishes 2 things:

* consolidates some common code between getpath.c and getpathp.c
* makes the helpers available to code in other files

FWIW, the signature of the join_relfile() function (in fileutils.c) intentionally mirrors that of Windows' PathCchCombineEx().

Note that this change is mostly moving code around. No behavior is meant to change.

https://bugs.python.org/issue45211
This commit is contained in:
Eric Snow 2021-09-27 10:00:32 -06:00 committed by GitHub
parent e5f13ce5b4
commit ae7839bbe8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 164 additions and 100 deletions

View file

@ -10,6 +10,12 @@ extern "C" {
#include <locale.h> /* struct lconv */ #include <locale.h> /* struct lconv */
// This is used after getting NULL back from Py_DecodeLocale().
#define DECODE_LOCALE_ERR(NAME, LEN) \
((LEN) == (size_t)-2) \
? _PyStatus_ERR("cannot decode " NAME) \
: _PyStatus_NO_MEMORY()
PyAPI_DATA(int) _Py_HasFileSystemDefaultEncodeErrors; PyAPI_DATA(int) _Py_HasFileSystemDefaultEncodeErrors;
PyAPI_FUNC(int) _Py_DecodeUTF8Ex( PyAPI_FUNC(int) _Py_DecodeUTF8Ex(
@ -33,6 +39,9 @@ PyAPI_FUNC(wchar_t*) _Py_DecodeUTF8_surrogateescape(
Py_ssize_t arglen, Py_ssize_t arglen,
size_t *wlen); size_t *wlen);
extern int
_Py_wstat(const wchar_t *, struct stat *);
PyAPI_FUNC(int) _Py_GetForceASCII(void); PyAPI_FUNC(int) _Py_GetForceASCII(void);
/* Reset "force ASCII" mode (if it was initialized). /* Reset "force ASCII" mode (if it was initialized).
@ -65,6 +74,12 @@ extern int _Py_EncodeNonUnicodeWchar_InPlace(
Py_ssize_t size); Py_ssize_t size);
#endif #endif
extern wchar_t * _Py_join_relfile(const wchar_t *dirname,
const wchar_t *relfile);
extern int _Py_add_relfile(wchar_t *dirname,
const wchar_t *relfile,
size_t bufsize);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View file

@ -30,6 +30,17 @@ _Py_IsMainInterpreter(PyInterpreterState *interp)
} }
static inline const PyConfig *
_Py_GetMainConfig(void)
{
PyInterpreterState *interp = _PyRuntime.interpreters.main;
if (interp == NULL) {
return NULL;
}
return _PyInterpreterState_GetConfig(interp);
}
/* Only handle signals on the main thread of the main interpreter. */ /* Only handle signals on the main thread of the main interpreter. */
static inline int static inline int
_Py_ThreadCanHandleSignals(PyInterpreterState *interp) _Py_ThreadCanHandleSignals(PyInterpreterState *interp)

View file

@ -434,7 +434,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase):
'pathconfig_warnings': 1, 'pathconfig_warnings': 1,
'_init_main': 1, '_init_main': 1,
'_isolated_interpreter': 0, '_isolated_interpreter': 0,
'use_frozen_modules': False, 'use_frozen_modules': 0,
} }
if MS_WINDOWS: if MS_WINDOWS:
CONFIG_COMPAT.update({ CONFIG_COMPAT.update({

View file

@ -115,11 +115,6 @@ extern "C" {
#define BUILD_LANDMARK L"Modules/Setup.local" #define BUILD_LANDMARK L"Modules/Setup.local"
#define DECODE_LOCALE_ERR(NAME, LEN) \
((LEN) == (size_t)-2) \
? _PyStatus_ERR("cannot decode " NAME) \
: _PyStatus_NO_MEMORY()
#define PATHLEN_ERR() _PyStatus_ERR("path configuration: path too long") #define PATHLEN_ERR() _PyStatus_ERR("path configuration: path too long")
typedef struct { typedef struct {
@ -149,23 +144,6 @@ static const wchar_t delimiter[2] = {DELIM, '\0'};
static const wchar_t separator[2] = {SEP, '\0'}; static const wchar_t separator[2] = {SEP, '\0'};
/* Get file status. Encode the path to the locale encoding. */
static int
_Py_wstat(const wchar_t* path, struct stat *buf)
{
int err;
char *fname;
fname = _Py_EncodeLocaleRaw(path, NULL);
if (fname == NULL) {
errno = EINVAL;
return -1;
}
err = stat(fname, buf);
PyMem_RawFree(fname);
return err;
}
static void static void
reduce(wchar_t *dir) reduce(wchar_t *dir)
{ {
@ -235,28 +213,18 @@ isdir(const wchar_t *filename)
static PyStatus static PyStatus
joinpath(wchar_t *path, const wchar_t *path2, size_t path_len) joinpath(wchar_t *path, const wchar_t *path2, size_t path_len)
{ {
size_t n; if (_Py_isabs(path2)) {
if (!_Py_isabs(path2)) { if (wcslen(path2) >= path_len) {
n = wcslen(path);
if (n >= path_len) {
return PATHLEN_ERR(); return PATHLEN_ERR();
} }
wcscpy(path, path2);
if (n > 0 && path[n-1] != SEP) {
path[n++] = SEP;
}
} }
else { else {
n = 0; if (_Py_add_relfile(path, path2, path_len) < 0) {
return PATHLEN_ERR();
}
return _PyStatus_OK();
} }
size_t k = wcslen(path2);
if (n + k >= path_len) {
return PATHLEN_ERR();
}
wcsncpy(path + n, path2, k);
path[n + k] = '\0';
return _PyStatus_OK(); return _PyStatus_OK();
} }
@ -283,23 +251,7 @@ joinpath2(const wchar_t *path, const wchar_t *path2)
if (_Py_isabs(path2)) { if (_Py_isabs(path2)) {
return _PyMem_RawWcsdup(path2); return _PyMem_RawWcsdup(path2);
} }
return _Py_join_relfile(path, path2);
size_t len = wcslen(path);
int add_sep = (len > 0 && path[len - 1] != SEP);
len += add_sep;
len += wcslen(path2);
wchar_t *new_path = PyMem_RawMalloc((len + 1) * sizeof(wchar_t));
if (new_path == NULL) {
return NULL;
}
wcscpy(new_path, path);
if (add_sep) {
wcscat(new_path, separator);
}
wcscat(new_path, path2);
return new_path;
} }

View file

@ -82,6 +82,7 @@
#include "Python.h" #include "Python.h"
#include "pycore_initconfig.h" // PyStatus #include "pycore_initconfig.h" // PyStatus
#include "pycore_pathconfig.h" // _PyPathConfig #include "pycore_pathconfig.h" // _PyPathConfig
#include "pycore_fileutils.h" // _Py_add_relfile()
#include "osdefs.h" // SEP, ALTSEP #include "osdefs.h" // SEP, ALTSEP
#include <wchar.h> #include <wchar.h>
@ -115,10 +116,6 @@
* with a semicolon separated path prior to calling Py_Initialize. * with a semicolon separated path prior to calling Py_Initialize.
*/ */
#ifndef LANDMARK
# define LANDMARK L"lib\\os.py"
#endif
#define INIT_ERR_BUFFER_OVERFLOW() _PyStatus_ERR("buffer overflow") #define INIT_ERR_BUFFER_OVERFLOW() _PyStatus_ERR("buffer overflow")
@ -216,7 +213,7 @@ exists(const wchar_t *filename)
Assumes 'filename' MAXPATHLEN+1 bytes long - Assumes 'filename' MAXPATHLEN+1 bytes long -
may extend 'filename' by one character. */ may extend 'filename' by one character. */
static int static int
ismodule(wchar_t *filename, int update_filename) ismodule(wchar_t *filename)
{ {
size_t n; size_t n;
@ -231,9 +228,8 @@ ismodule(wchar_t *filename, int update_filename)
filename[n] = L'c'; filename[n] = L'c';
filename[n + 1] = L'\0'; filename[n + 1] = L'\0';
exist = exists(filename); exist = exists(filename);
if (!update_filename) { // Drop the 'c' we just added.
filename[n] = L'\0'; filename[n] = L'\0';
}
return exist; return exist;
} }
return 0; return 0;
@ -253,7 +249,7 @@ ismodule(wchar_t *filename, int update_filename)
static void static void
join(wchar_t *buffer, const wchar_t *stuff) join(wchar_t *buffer, const wchar_t *stuff)
{ {
if (FAILED(PathCchCombineEx(buffer, MAXPATHLEN+1, buffer, stuff, 0))) { if (_Py_add_relfile(buffer, stuff, MAXPATHLEN+1) < 0) {
Py_FatalError("buffer overflow in getpathp.c's join()"); Py_FatalError("buffer overflow in getpathp.c's join()");
} }
} }
@ -273,30 +269,37 @@ canonicalize(wchar_t *buffer, const wchar_t *path)
return _PyStatus_OK(); return _PyStatus_OK();
} }
/* gotlandmark only called by search_for_prefix, which ensures
'prefix' is null terminated in bounds. join() ensures
'landmark' can not overflow prefix if too long. */
static int static int
gotlandmark(const wchar_t *prefix, const wchar_t *landmark) is_stdlibdir(wchar_t *stdlibdir)
{ {
wchar_t filename[MAXPATHLEN+1]; wchar_t *filename = stdlibdir;
memset(filename, 0, sizeof(filename)); #ifndef LANDMARK
wcscpy_s(filename, Py_ARRAY_LENGTH(filename), prefix); # define LANDMARK L"os.py"
join(filename, landmark); #endif
return ismodule(filename, FALSE); /* join() ensures 'landmark' can not overflow prefix if too long. */
join(filename, LANDMARK);
return ismodule(filename);
} }
/* assumes argv0_path is MAXPATHLEN+1 bytes long, already \0 term'd. /* assumes argv0_path is MAXPATHLEN+1 bytes long, already \0 term'd.
assumption provided by only caller, calculate_path() */ assumption provided by only caller, calculate_path() */
static int static int
search_for_prefix(wchar_t *prefix, const wchar_t *argv0_path, const wchar_t *landmark) search_for_prefix(wchar_t *prefix, const wchar_t *argv0_path)
{ {
/* Search from argv0_path, until landmark is found */ /* Search from argv0_path, until LANDMARK is found.
wcscpy_s(prefix, MAXPATHLEN + 1, argv0_path); We guarantee 'prefix' is null terminated in bounds. */
wcscpy_s(prefix, MAXPATHLEN+1, argv0_path);
wchar_t stdlibdir[MAXPATHLEN+1];
wcscpy_s(stdlibdir, Py_ARRAY_LENGTH(stdlibdir), prefix);
/* We initialize with the longest possible path, in case it doesn't fit.
This also gives us an initial SEP at stdlibdir[wcslen(prefix)]. */
join(stdlibdir, L"lib");
do { do {
if (gotlandmark(prefix, landmark)) { assert(stdlibdir[wcslen(prefix)] == SEP);
/* Due to reduce() and our initial value, this result
is guaranteed to fit. */
wcscpy(&stdlibdir[wcslen(prefix) + 1], L"lib");
if (is_stdlibdir(stdlibdir)) {
return 1; return 1;
} }
reduce(prefix); reduce(prefix);
@ -758,7 +761,7 @@ calculate_home_prefix(PyCalculatePath *calculate,
reduce(prefix); reduce(prefix);
calculate->home = prefix; calculate->home = prefix;
} }
else if (search_for_prefix(prefix, argv0_path, LANDMARK)) { else if (search_for_prefix(prefix, argv0_path)) {
calculate->home = prefix; calculate->home = prefix;
} }
else { else {
@ -936,7 +939,7 @@ calculate_module_search_path(PyCalculatePath *calculate,
lookBuf[nchars] = L'\0'; lookBuf[nchars] = L'\0';
/* Up one level to the parent */ /* Up one level to the parent */
reduce(lookBuf); reduce(lookBuf);
if (search_for_prefix(prefix, lookBuf, LANDMARK)) { if (search_for_prefix(prefix, lookBuf)) {
break; break;
} }
/* If we are out of paths to search - give up */ /* If we are out of paths to search - give up */

View file

@ -7,6 +7,7 @@
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
# include <malloc.h> # include <malloc.h>
# include <windows.h> # include <windows.h>
# include <pathcch.h> // PathCchCombineEx
extern int winerror_to_errno(int); extern int winerror_to_errno(int);
#endif #endif
@ -1205,6 +1206,31 @@ _Py_fstat(int fd, struct _Py_stat_struct *status)
return 0; return 0;
} }
/* Like _Py_stat() but with a raw filename. */
int
_Py_wstat(const wchar_t* path, struct stat *buf)
{
int err;
#ifdef MS_WINDOWS
struct _stat wstatbuf;
err = _wstat(path, &wstatbuf);
if (!err) {
buf->st_mode = wstatbuf.st_mode;
}
#else
char *fname;
fname = _Py_EncodeLocaleRaw(path, NULL);
if (fname == NULL) {
errno = EINVAL;
return -1;
}
err = stat(fname, buf);
PyMem_RawFree(fname);
#endif
return err;
}
/* Call _wstat() on Windows, or encode the path to the filesystem encoding and /* Call _wstat() on Windows, or encode the path to the filesystem encoding and
call stat() otherwise. Only fill st_mode attribute on Windows. call stat() otherwise. Only fill st_mode attribute on Windows.
@ -1216,7 +1242,6 @@ _Py_stat(PyObject *path, struct stat *statbuf)
{ {
#ifdef MS_WINDOWS #ifdef MS_WINDOWS
int err; int err;
struct _stat wstatbuf;
#if USE_UNICODE_WCHAR_CACHE #if USE_UNICODE_WCHAR_CACHE
const wchar_t *wpath = _PyUnicode_AsUnicode(path); const wchar_t *wpath = _PyUnicode_AsUnicode(path);
@ -1226,9 +1251,7 @@ _Py_stat(PyObject *path, struct stat *statbuf)
if (wpath == NULL) if (wpath == NULL)
return -2; return -2;
err = _wstat(wpath, &wstatbuf); err = _Py_wstat(wpath, statbuf);
if (!err)
statbuf->st_mode = wstatbuf.st_mode;
#if !USE_UNICODE_WCHAR_CACHE #if !USE_UNICODE_WCHAR_CACHE
PyMem_Free(wpath); PyMem_Free(wpath);
#endif /* USE_UNICODE_WCHAR_CACHE */ #endif /* USE_UNICODE_WCHAR_CACHE */
@ -2072,6 +2095,77 @@ _Py_abspath(const wchar_t *path, wchar_t **abspath_p)
} }
// The caller must ensure "buffer" is big enough.
static int
join_relfile(wchar_t *buffer, size_t bufsize,
const wchar_t *dirname, const wchar_t *relfile)
{
#ifdef MS_WINDOWS
if (FAILED(PathCchCombineEx(buffer, bufsize, dirname, relfile, 0))) {
return -1;
}
#else
assert(!_Py_isabs(relfile));
size_t dirlen = wcslen(dirname);
size_t rellen = wcslen(relfile);
size_t maxlen = bufsize - 1;
if (maxlen > MAXPATHLEN || dirlen >= maxlen || rellen >= maxlen - dirlen) {
return -1;
}
if (dirlen == 0) {
// We do not add a leading separator.
wcscpy(buffer, relfile);
}
else {
if (dirname != buffer) {
wcscpy(buffer, dirname);
}
size_t relstart = dirlen;
if (dirlen > 1 && dirname[dirlen - 1] != SEP) {
buffer[dirlen] = SEP;
relstart += 1;
}
wcscpy(&buffer[relstart], relfile);
}
#endif
return 0;
}
/* Join the two paths together, like os.path.join(). Return NULL
if memory could not be allocated. The caller is responsible
for calling PyMem_RawFree() on the result. */
wchar_t *
_Py_join_relfile(const wchar_t *dirname, const wchar_t *relfile)
{
assert(dirname != NULL && relfile != NULL);
assert(!_Py_isabs(relfile));
size_t maxlen = wcslen(dirname) + 1 + wcslen(relfile);
size_t bufsize = maxlen + 1;
wchar_t *filename = PyMem_RawMalloc(bufsize * sizeof(wchar_t));
if (filename == NULL) {
return NULL;
}
assert(wcslen(dirname) < MAXPATHLEN);
assert(wcslen(relfile) < MAXPATHLEN - wcslen(dirname));
join_relfile(filename, bufsize, dirname, relfile);
return filename;
}
/* Join the two paths together, like os.path.join().
dirname: the target buffer with the dirname already in place,
including trailing NUL
relfile: this must be a relative path
bufsize: total allocated size of the buffer
Return -1 if anything is wrong with the path lengths. */
int
_Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize)
{
assert(dirname != NULL && relfile != NULL);
assert(bufsize > 0);
return join_relfile(dirname, bufsize, dirname, relfile);
}
/* Get the current directory. buflen is the buffer size in wide characters /* Get the current directory. buflen is the buffer size in wide characters
including the null character. Decode the path from the locale encoding. including the null character. Decode the path from the locale encoding.

View file

@ -587,11 +587,6 @@ Py_GetArgcArgv(int *argc, wchar_t ***argv)
/* --- PyConfig ---------------------------------------------- */ /* --- PyConfig ---------------------------------------------- */
#define DECODE_LOCALE_ERR(NAME, LEN) \
(((LEN) == -2) \
? _PyStatus_ERR("cannot decode " NAME) \
: _PyStatus_NO_MEMORY())
#define MAX_HASH_SEED 4294967295UL #define MAX_HASH_SEED 4294967295UL

View file

@ -1,4 +1,5 @@
#include "Python.h" #include "Python.h"
#include "pycore_fileutils.h" // DECODE_LOCALE_ERR
#include "pycore_getopt.h" // _PyOS_GetOpt() #include "pycore_getopt.h" // _PyOS_GetOpt()
#include "pycore_initconfig.h" // _PyArgv #include "pycore_initconfig.h" // _PyArgv
#include "pycore_pymem.h" // _PyMem_GetAllocatorName() #include "pycore_pymem.h" // _PyMem_GetAllocatorName()
@ -6,12 +7,6 @@
#include <locale.h> // setlocale() #include <locale.h> // setlocale()
#define DECODE_LOCALE_ERR(NAME, LEN) \
(((LEN) == -2) \
? _PyStatus_ERR("cannot decode " NAME) \
: _PyStatus_NO_MEMORY())
/* Forward declarations */ /* Forward declarations */
static void static void
preconfig_copy(PyPreConfig *config, const PyPreConfig *config2); preconfig_copy(PyPreConfig *config, const PyPreConfig *config2);
@ -87,8 +82,7 @@ _PyArgv_AsWstrList(const _PyArgv *args, PyWideStringList *list)
wchar_t *arg = Py_DecodeLocale(args->bytes_argv[i], &len); wchar_t *arg = Py_DecodeLocale(args->bytes_argv[i], &len);
if (arg == NULL) { if (arg == NULL) {
_PyWideStringList_Clear(&wargv); _PyWideStringList_Clear(&wargv);
return DECODE_LOCALE_ERR("command line arguments", return DECODE_LOCALE_ERR("command line arguments", len);
(Py_ssize_t)len);
} }
wargv.items[i] = arg; wargv.items[i] = arg;
wargv.length++; wargv.length++;