Issue #23152: Implement _Py_fstat() to support files larger than 2 GB on Windows.

fstat() may fail with EOVERFLOW on files larger than 2 GB because the file size type is an signed 32-bit integer.
This commit is contained in:
Steve Dower 2015-02-21 08:44:05 -08:00
parent 18d1924987
commit f2f373f593
12 changed files with 242 additions and 180 deletions

View file

@ -71,8 +71,8 @@ dl_funcptr _PyImport_GetDynLoadFunc(const char *shortname,
if (fp != NULL) {
int i;
struct stat statb;
if (fstat(fileno(fp), &statb) == -1) {
struct _Py_stat_struct statb;
if (_Py_fstat(fileno(fp), &statb) == -1) {
PyErr_SetFromErrno(PyExc_IOError);
return NULL;
}

View file

@ -544,8 +544,145 @@ _Py_wstat(const wchar_t* path, struct stat *buf)
}
#endif
#ifdef HAVE_STAT
#if defined(HAVE_FSTAT) || defined(MS_WINDOWS)
#ifdef MS_WINDOWS
static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
static void
FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
{
/* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
/* Cannot simply cast and dereference in_ptr,
since it might not be aligned properly */
__int64 in;
memcpy(&in, in_ptr, sizeof(in));
*nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
*time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
}
void
time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
{
/* XXX endianness */
__int64 out;
out = time_in + secs_between_epochs;
out = out * 10000000 + nsec_in / 100;
memcpy(out_ptr, &out, sizeof(out));
}
/* Below, we *know* that ugo+r is 0444 */
#if _S_IREAD != 0400
#error Unsupported C library
#endif
static int
attributes_to_mode(DWORD attr)
{
int m = 0;
if (attr & FILE_ATTRIBUTE_DIRECTORY)
m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
else
m |= _S_IFREG;
if (attr & FILE_ATTRIBUTE_READONLY)
m |= 0444;
else
m |= 0666;
return m;
}
int
attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, struct _Py_stat_struct *result)
{
memset(result, 0, sizeof(*result));
result->st_mode = attributes_to_mode(info->dwFileAttributes);
result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
result->st_dev = info->dwVolumeSerialNumber;
result->st_rdev = result->st_dev;
FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
result->st_nlink = info->nNumberOfLinks;
result->st_ino = (((__int64)info->nFileIndexHigh)<<32) + info->nFileIndexLow;
if (reparse_tag == IO_REPARSE_TAG_SYMLINK) {
/* first clear the S_IFMT bits */
result->st_mode ^= (result->st_mode & S_IFMT);
/* now set the bits that make this a symlink */
result->st_mode |= S_IFLNK;
}
result->st_file_attributes = info->dwFileAttributes;
return 0;
}
#endif
/* Return information about a file.
On POSIX, use fstat().
On Windows, use GetFileType() and GetFileInformationByHandle() which support
files larger than 2 GB. fstat() may fail with EOVERFLOW on files larger
than 2 GB because the file size type is an signed 32-bit integer: see issue
#23152.
*/
int
_Py_fstat(int fd, struct _Py_stat_struct *result)
{
#ifdef MS_WINDOWS
BY_HANDLE_FILE_INFORMATION info;
HANDLE h;
int type;
if (!_PyVerify_fd(fd))
h = INVALID_HANDLE_VALUE;
else
h = (HANDLE)_get_osfhandle(fd);
/* Protocol violation: we explicitly clear errno, instead of
setting it to a POSIX error. Callers should use GetLastError. */
errno = 0;
if (h == INVALID_HANDLE_VALUE) {
/* This is really a C library error (invalid file handle).
We set the Win32 error to the closes one matching. */
SetLastError(ERROR_INVALID_HANDLE);
return -1;
}
memset(result, 0, sizeof(*result));
type = GetFileType(h);
if (type == FILE_TYPE_UNKNOWN) {
DWORD error = GetLastError();
if (error != 0) {
return -1;
}
/* else: valid but unknown file */
}
if (type != FILE_TYPE_DISK) {
if (type == FILE_TYPE_CHAR)
result->st_mode = _S_IFCHR;
else if (type == FILE_TYPE_PIPE)
result->st_mode = _S_IFIFO;
return 0;
}
if (!GetFileInformationByHandle(h, &info)) {
return -1;
}
attribute_data_to_stat(&info, 0, result);
/* specific to fstat() */
result->st_ino = (((__int64)info.nFileIndexHigh)<<32) + info.nFileIndexLow;
return 0;
#else
return fstat(fd, result);
#endif
}
#endif /* HAVE_FSTAT || MS_WINDOWS */
#ifdef HAVE_STAT
/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
call stat() otherwise. Only fill st_mode attribute on Windows.
@ -578,7 +715,8 @@ _Py_stat(PyObject *path, struct stat *statbuf)
#endif
}
#endif
#endif /* HAVE_STAT */
static int
get_inheritable(int fd, int raise)

View file

@ -1481,16 +1481,20 @@ PyMarshal_ReadLongFromFile(FILE *fp)
return res;
}
#ifdef HAVE_FSTAT
/* Return size of file in bytes; < 0 if unknown. */
#if defined(HAVE_FSTAT) || defined(MS_WINDOWS)
/* Return size of file in bytes; < 0 if unknown or INT_MAX if too big */
static off_t
getfilesize(FILE *fp)
{
struct stat st;
if (fstat(fileno(fp), &st) != 0)
struct _Py_stat_struct st;
if (_Py_fstat(fileno(fp), &st) != 0)
return -1;
#if SIZEOF_OFF_T == 4
else if (st.st_size >= INT_MAX)
return (off_t)INT_MAX;
#endif
else
return st.st_size;
return (off_t)st.st_size;
}
#endif
@ -1505,7 +1509,7 @@ PyMarshal_ReadLastObjectFromFile(FILE *fp)
{
/* REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */
#define REASONABLE_FILE_LIMIT (1L << 18)
#ifdef HAVE_FSTAT
#if defined(HAVE_FSTAT) || defined(MS_WINDOWS)
off_t filesize;
filesize = getfilesize(fp);
if (filesize > 0 && filesize <= REASONABLE_FILE_LIMIT) {

View file

@ -139,14 +139,14 @@ dev_urandom_python(char *buffer, Py_ssize_t size)
{
int fd;
Py_ssize_t n;
struct stat st;
struct _Py_stat_struct st;
if (size <= 0)
return 0;
if (urandom_cache.fd >= 0) {
/* Does the fd point to the same thing as before? (issue #21207) */
if (fstat(urandom_cache.fd, &st)
if (_Py_fstat(urandom_cache.fd, &st)
|| st.st_dev != urandom_cache.st_dev
|| st.st_ino != urandom_cache.st_ino) {
/* Something changed: forget the cached fd (but don't close it,
@ -178,7 +178,7 @@ dev_urandom_python(char *buffer, Py_ssize_t size)
fd = urandom_cache.fd;
}
else {
if (fstat(fd, &st)) {
if (_Py_fstat(fd, &st)) {
PyErr_SetFromErrno(PyExc_OSError);
close(fd);
return -1;

View file

@ -1681,8 +1681,8 @@ _PySys_Init(void)
the shell already prevents that. */
#if !defined(MS_WINDOWS)
{
struct stat sb;
if (fstat(fileno(stdin), &sb) == 0 &&
struct _Py_stat_struct sb;
if (_Py_fstat(fileno(stdin), &sb) == 0 &&
S_ISDIR(sb.st_mode)) {
/* There's nothing more we can do. */
/* Py_FatalError() will core dump, so just exit. */