gh-102511: Speed up os.path.splitroot() with native helpers (GH-118089)

This commit is contained in:
Nice Zombies 2024-04-25 11:07:38 +02:00 committed by GitHub
parent e38b43c213
commit 10bb90ed49
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 337 additions and 108 deletions

View file

@ -2295,6 +2295,99 @@ PathCchCombineEx(wchar_t *buffer, size_t bufsize, const wchar_t *dirname,
#endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */
void
_Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize,
Py_ssize_t *rootsize)
{
assert(drvsize);
assert(rootsize);
#ifndef MS_WINDOWS
#define IS_SEP(x) (*(x) == SEP)
*drvsize = 0;
if (!IS_SEP(&path[0])) {
// Relative path, e.g.: 'foo'
*rootsize = 0;
}
else if (!IS_SEP(&path[1]) || IS_SEP(&path[2])) {
// Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
*rootsize = 1;
}
else {
// Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
// https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
*rootsize = 2;
}
#undef IS_SEP
#else
const wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
#define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
if (IS_SEP(&path[0])) {
if (IS_SEP(&path[1])) {
// Device drives, e.g. \\.\device or \\?\device
// UNC drives, e.g. \\server\share or \\?\UNC\server\share
Py_ssize_t idx;
if (path[2] == L'?' && IS_SEP(&path[3]) &&
(path[4] == L'U' || path[4] == L'u') &&
(path[5] == L'N' || path[5] == L'n') &&
(path[6] == L'C' || path[6] == L'c') &&
IS_SEP(&path[7]))
{
idx = 8;
}
else {
idx = 2;
}
while (!SEP_OR_END(&path[idx])) {
idx++;
}
if (IS_END(&path[idx])) {
*drvsize = idx;
*rootsize = 0;
}
else {
idx++;
while (!SEP_OR_END(&path[idx])) {
idx++;
}
*drvsize = idx;
if (IS_END(&path[idx])) {
*rootsize = 0;
}
else {
*rootsize = 1;
}
}
}
else {
// Relative path with root, e.g. \Windows
*drvsize = 0;
*rootsize = 1;
}
}
else if (!IS_END(&path[0]) && path[1] == L':') {
*drvsize = 2;
if (IS_SEP(&path[2])) {
// Absolute drive-letter path, e.g. X:\Windows
*rootsize = 1;
}
else {
// Relative path with drive, e.g. X:Windows
*rootsize = 0;
}
}
else {
// Relative path, e.g. Windows
*drvsize = 0;
*rootsize = 0;
}
#undef SEP_OR_END
#undef IS_SEP
#undef IS_END
#endif
}
// The caller must ensure "buffer" is big enough.
static int
join_relfile(wchar_t *buffer, size_t bufsize,
@ -2411,49 +2504,39 @@ _Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize)
#endif
#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
// Skip leading '.\'
if (p1[0] == L'.' && IS_SEP(&p1[1])) {
// Skip leading '.\'
path = &path[2];
while (IS_SEP(path) && !IS_END(path)) {
while (IS_SEP(path)) {
path++;
}
p1 = p2 = minP2 = path;
lastC = SEP;
}
#ifdef MS_WINDOWS
// Skip past drive segment and update minP2
else if (p1[0] && p1[1] == L':') {
*p2++ = *p1++;
*p2++ = *p1++;
minP2 = p2;
lastC = L':';
}
// Skip past all \\-prefixed paths, including \\?\, \\.\,
// and network paths, including the first segment.
else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1])) {
int sepCount = 2;
*p2++ = SEP;
*p2++ = SEP;
p1 += 2;
for (; !IS_END(p1) && sepCount; ++p1) {
if (IS_SEP(p1)) {
--sepCount;
*p2++ = lastC = SEP;
} else {
*p2++ = lastC = *p1;
}
}
minP2 = p2 - 1;
}
else {
Py_ssize_t drvsize, rootsize;
_Py_skiproot(path, size, &drvsize, &rootsize);
if (drvsize || rootsize) {
// Skip past root and update minP2
p1 = &path[drvsize + rootsize];
#ifndef ALTSEP
p2 = p1;
#else
// Skip past two leading SEPs
else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1]) && !IS_SEP(&p1[2])) {
*p2++ = *p1++;
*p2++ = *p1++;
minP2 = p2 - 1; // Absolute path has SEP at minP2
lastC = SEP;
for (; p2 < p1; ++p2) {
if (*p2 == ALTSEP) {
*p2 = SEP;
}
}
#endif
minP2 = p2 - 1;
lastC = *minP2;
#ifdef MS_WINDOWS
if (lastC != SEP) {
minP2++;
}
#endif
}
}
#endif /* MS_WINDOWS */
/* if pEnd is specified, check that. Else, check for null terminator */
for (; !IS_END(p1); ++p1) {