Issue #23152: Implement _Py_fstat() to support files larger than 2 GB on Windows.

fstat() may fail with EOVERFLOW on files larger than 2 GB because the file size type is an signed 32-bit integer.
This commit is contained in:
Steve Dower 2015-02-21 08:44:05 -08:00
parent 18d1924987
commit f2f373f593
12 changed files with 242 additions and 180 deletions

View file

@ -180,9 +180,9 @@ fileio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
static int
check_fd(int fd)
{
#if defined(HAVE_FSTAT)
struct stat buf;
if (!_PyVerify_fd(fd) || (fstat(fd, &buf) < 0 && errno == EBADF)) {
#if defined(HAVE_FSTAT) || defined(MS_WINDOWS)
struct _Py_stat_struct buf;
if (!_PyVerify_fd(fd) || (_Py_fstat(fd, &buf) < 0 && errno == EBADF)) {
PyObject *exc;
char *msg = strerror(EBADF);
exc = PyObject_CallFunction(PyExc_OSError, "(is)",
@ -222,8 +222,8 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
#elif !defined(MS_WINDOWS)
int *atomic_flag_works = NULL;
#endif
#ifdef HAVE_FSTAT
struct stat fdfstat;
#if defined(HAVE_FSTAT) || defined(MS_WINDOWS)
struct _Py_stat_struct fdfstat;
#endif
int async_err = 0;
@ -420,9 +420,11 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
}
self->blksize = DEFAULT_BUFFER_SIZE;
#ifdef HAVE_FSTAT
if (fstat(self->fd, &fdfstat) < 0)
#if defined(HAVE_FSTAT) || defined(MS_WINDOWS)
if (_Py_fstat(self->fd, &fdfstat) < 0) {
PyErr_SetFromErrno(PyExc_OSError);
goto error;
}
#if defined(S_ISDIR) && defined(EISDIR)
/* On Unix, open will succeed for directories.
In Python, there should be no file objects referring to
@ -437,7 +439,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
if (fdfstat.st_blksize > 1)
self->blksize = fdfstat.st_blksize;
#endif /* HAVE_STRUCT_STAT_ST_BLKSIZE */
#endif /* HAVE_FSTAT */
#endif /* HAVE_FSTAT || MS_WINDOWS */
#if defined(MS_WINDOWS) || defined(__CYGWIN__)
/* don't translate newlines (\r\n <=> \n) */
@ -603,17 +605,7 @@ fileio_readinto(fileio *self, PyObject *args)
return PyLong_FromSsize_t(n);
}
#ifndef HAVE_FSTAT
static PyObject *
fileio_readall(fileio *self)
{
_Py_IDENTIFIER(readall);
return _PyObject_CallMethodId((PyObject*)&PyRawIOBase_Type,
&PyId_readall, "O", self);
}
#else
#if defined(HAVE_FSTAT) || defined(MS_WINDOWS)
static size_t
new_buffersize(fileio *self, size_t currentsize)
@ -637,7 +629,7 @@ new_buffersize(fileio *self, size_t currentsize)
static PyObject *
fileio_readall(fileio *self)
{
struct stat st;
struct _Py_stat_struct st;
Py_off_t pos, end;
PyObject *result;
Py_ssize_t bytes_read = 0;
@ -655,7 +647,7 @@ fileio_readall(fileio *self)
#else
pos = lseek(self->fd, 0L, SEEK_CUR);
#endif
if (fstat(self->fd, &st) == 0)
if (_Py_fstat(self->fd, &st) == 0)
end = st.st_size;
else
end = (Py_off_t)-1;
@ -729,7 +721,17 @@ fileio_readall(fileio *self)
return result;
}
#endif /* HAVE_FSTAT */
#else
static PyObject *
fileio_readall(fileio *self)
{
_Py_IDENTIFIER(readall);
return _PyObject_CallMethodId((PyObject*)&PyRawIOBase_Type,
&PyId_readall, "O", self);
}
#endif /* HAVE_FSTAT || MS_WINDOWS */
static PyObject *
fileio_read(fileio *self, PyObject *args)

View file

@ -752,9 +752,8 @@ Py_Main(int argc, wchar_t **argv)
}
}
{
/* XXX: does this work on Win/Win64? (see posix_fstat) */
struct stat sb;
if (fstat(fileno(fp), &sb) == 0 &&
struct _Py_stat_struct sb;
if (_Py_fstat(fileno(fp), &sb) == 0 &&
S_ISDIR(sb.st_mode)) {
fprintf(stderr, "%ls: '%ls' is a directory, cannot continue\n", argv[0], filename);
fclose(fp);

View file

@ -459,8 +459,8 @@ mmap_size_method(mmap_object *self,
#ifdef UNIX
{
struct stat buf;
if (-1 == fstat(self->fd, &buf)) {
struct _Py_stat_struct buf;
if (-1 == _Py_fstat(self->fd, &buf)) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
@ -1107,7 +1107,7 @@ static PyObject *
new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
{
#ifdef HAVE_FSTAT
struct stat st;
struct _Py_stat_struct st;
#endif
mmap_object *m_obj;
PyObject *map_size_obj = NULL;
@ -1174,7 +1174,7 @@ new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
(void)fcntl(fd, F_FULLFSYNC);
#endif
#ifdef HAVE_FSTAT
if (fd != -1 && fstat(fd, &st) == 0 && S_ISREG(st.st_mode)) {
if (fd != -1 && _Py_fstat(fd, &st) == 0 && S_ISREG(st.st_mode)) {
if (map_size == 0) {
if (st.st_size == 0) {
PyErr_SetString(PyExc_ValueError,

View file

@ -350,8 +350,8 @@ static int win32_can_symlink = 0;
#ifdef MS_WINDOWS
# define STAT win32_stat
# define LSTAT win32_lstat
# define FSTAT win32_fstat
# define STRUCT_STAT struct win32_stat
# define FSTAT _Py_fstat
# define STRUCT_STAT struct _Py_stat_struct
#else
# define STAT stat
# define LSTAT lstat
@ -1469,73 +1469,6 @@ struct win32_stat{
unsigned long st_file_attributes;
};
static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
static void
FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
{
/* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
/* Cannot simply cast and dereference in_ptr,
since it might not be aligned properly */
__int64 in;
memcpy(&in, in_ptr, sizeof(in));
*nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
*time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
}
static void
time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
{
/* XXX endianness */
__int64 out;
out = time_in + secs_between_epochs;
out = out * 10000000 + nsec_in / 100;
memcpy(out_ptr, &out, sizeof(out));
}
/* Below, we *know* that ugo+r is 0444 */
#if _S_IREAD != 0400
#error Unsupported C library
#endif
static int
attributes_to_mode(DWORD attr)
{
int m = 0;
if (attr & FILE_ATTRIBUTE_DIRECTORY)
m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
else
m |= _S_IFREG;
if (attr & FILE_ATTRIBUTE_READONLY)
m |= 0444;
else
m |= 0666;
return m;
}
static int
attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, struct win32_stat *result)
{
memset(result, 0, sizeof(*result));
result->st_mode = attributes_to_mode(info->dwFileAttributes);
result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
result->st_dev = info->dwVolumeSerialNumber;
result->st_rdev = result->st_dev;
FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec);
FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
result->st_nlink = info->nNumberOfLinks;
result->st_ino = (((__int64)info->nFileIndexHigh)<<32) + info->nFileIndexLow;
if (reparse_tag == IO_REPARSE_TAG_SYMLINK) {
/* first clear the S_IFMT bits */
result->st_mode ^= (result->st_mode & S_IFMT);
/* now set the bits that make this a symlink */
result->st_mode |= S_IFLNK;
}
result->st_file_attributes = info->dwFileAttributes;
return 0;
}
static BOOL
attributes_from_dir(LPCSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *reparse_tag)
{
@ -1645,11 +1578,15 @@ get_target_path(HANDLE hdl, wchar_t **target_path)
return TRUE;
}
/* defined in fileutils.c */
int
attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, struct _Py_stat_struct *result);
static int
win32_xstat_impl_w(const wchar_t *path, struct win32_stat *result,
win32_xstat_impl_w(const wchar_t *path, struct _Py_stat_struct *result,
BOOL traverse);
static int
win32_xstat_impl(const char *path, struct win32_stat *result,
win32_xstat_impl(const char *path, struct _Py_stat_struct *result,
BOOL traverse)
{
int code;
@ -1745,7 +1682,7 @@ win32_xstat_impl(const char *path, struct win32_stat *result,
}
static int
win32_xstat_impl_w(const wchar_t *path, struct win32_stat *result,
win32_xstat_impl_w(const wchar_t *path, struct _Py_stat_struct *result,
BOOL traverse)
{
int code;
@ -1841,7 +1778,7 @@ win32_xstat_impl_w(const wchar_t *path, struct win32_stat *result,
}
static int
win32_xstat(const char *path, struct win32_stat *result, BOOL traverse)
win32_xstat(const char *path, struct _Py_stat_struct *result, BOOL traverse)
{
/* Protocol violation: we explicitly clear errno, instead of
setting it to a POSIX error. Callers should use GetLastError. */
@ -1851,7 +1788,7 @@ win32_xstat(const char *path, struct win32_stat *result, BOOL traverse)
}
static int
win32_xstat_w(const wchar_t *path, struct win32_stat *result, BOOL traverse)
win32_xstat_w(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse)
{
/* Protocol violation: we explicitly clear errno, instead of
setting it to a POSIX error. Callers should use GetLastError. */
@ -1873,80 +1810,29 @@ win32_xstat_w(const wchar_t *path, struct win32_stat *result, BOOL traverse)
The _w represent Unicode equivalents of the aforementioned ANSI functions. */
static int
win32_lstat(const char* path, struct win32_stat *result)
win32_lstat(const char* path, struct _Py_stat_struct *result)
{
return win32_xstat(path, result, FALSE);
}
static int
win32_lstat_w(const wchar_t* path, struct win32_stat *result)
win32_lstat_w(const wchar_t* path, struct _Py_stat_struct *result)
{
return win32_xstat_w(path, result, FALSE);
}
static int
win32_stat(const char* path, struct win32_stat *result)
win32_stat(const char* path, struct _Py_stat_struct *result)
{
return win32_xstat(path, result, TRUE);
}
static int
win32_stat_w(const wchar_t* path, struct win32_stat *result)
win32_stat_w(const wchar_t* path, struct _Py_stat_struct *result)
{
return win32_xstat_w(path, result, TRUE);
}
static int
win32_fstat(int file_number, struct win32_stat *result)
{
BY_HANDLE_FILE_INFORMATION info;
HANDLE h;
int type;
if (!_PyVerify_fd(file_number))
h = INVALID_HANDLE_VALUE;
else
h = (HANDLE)_get_osfhandle(file_number);
/* Protocol violation: we explicitly clear errno, instead of
setting it to a POSIX error. Callers should use GetLastError. */
errno = 0;
if (h == INVALID_HANDLE_VALUE) {
/* This is really a C library error (invalid file handle).
We set the Win32 error to the closes one matching. */
SetLastError(ERROR_INVALID_HANDLE);
return -1;
}
memset(result, 0, sizeof(*result));
type = GetFileType(h);
if (type == FILE_TYPE_UNKNOWN) {
DWORD error = GetLastError();
if (error != 0) {
return -1;
}
/* else: valid but unknown file */
}
if (type != FILE_TYPE_DISK) {
if (type == FILE_TYPE_CHAR)
result->st_mode = _S_IFCHR;
else if (type == FILE_TYPE_PIPE)
result->st_mode = _S_IFIFO;
return 0;
}
if (!GetFileInformationByHandle(h, &info)) {
return -1;
}
attribute_data_to_stat(&info, 0, result);
/* specific to fstat() */
result->st_ino = (((__int64)info.nFileIndexHigh)<<32) + info.nFileIndexLow;
return 0;
}
#endif /* MS_WINDOWS */
PyDoc_STRVAR(stat_result__doc__,
@ -6333,6 +6219,11 @@ exit:
return return_value;
}
#ifdef MS_WINDOWS
void
time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr);
#endif
static PyObject *
os_utime_impl(PyModuleDef *module, path_t *path, PyObject *times, PyObject *ns, int dir_fd, int follow_symlinks)
/*[clinic end generated code: output=891489c35cc68c5d input=1f18c17d5941aa82]*/

View file

@ -503,13 +503,13 @@ signal_siginterrupt(PyObject *self, PyObject *args)
static PyObject *
signal_set_wakeup_fd(PyObject *self, PyObject *args)
{
struct _Py_stat_struct st;
#ifdef MS_WINDOWS
PyObject *fdobj;
SOCKET_T sockfd, old_sockfd;
int res;
int res_size = sizeof res;
PyObject *mod;
struct stat st;
int is_socket;
if (!PyArg_ParseTuple(args, "O:set_wakeup_fd", &fdobj))
@ -520,7 +520,6 @@ signal_set_wakeup_fd(PyObject *self, PyObject *args)
return NULL;
#else
int fd, old_fd;
struct stat st;
if (!PyArg_ParseTuple(args, "i:set_wakeup_fd", &fd))
return NULL;
@ -560,7 +559,7 @@ signal_set_wakeup_fd(PyObject *self, PyObject *args)
return NULL;
}
if (fstat(fd, &st) != 0) {
if (_Py_fstat(fd, &st) != 0) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
@ -592,7 +591,7 @@ signal_set_wakeup_fd(PyObject *self, PyObject *args)
return NULL;
}
if (fstat(fd, &st) != 0) {
if (_Py_fstat(fd, &st) != 0) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}