mirror of
https://github.com/python/cpython.git
synced 2025-08-03 16:39:00 +00:00
Create fileutils.c/.h
* _Py_fopen() and _Py_stat() come from Python/import.c * (_Py)_wrealpath() comes from Python/sysmodule.c * _Py_char2wchar(), _Py_wchar2char() and _Py_wfopen() come from Modules/main.c * (_Py)_wstat(), (_Py)_wgetcwd(), _Py_wreadlink() come from Modules/getpath.c
This commit is contained in:
parent
7ae7c87b05
commit
4e31443c4d
9 changed files with 832 additions and 392 deletions
208
Modules/main.c
208
Modules/main.c
|
@ -104,31 +104,6 @@ static char *usage_5 =
|
|||
#endif
|
||||
;
|
||||
|
||||
FILE *
|
||||
_Py_wfopen(const wchar_t *path, const wchar_t *mode)
|
||||
{
|
||||
#ifndef MS_WINDOWS
|
||||
FILE *f;
|
||||
char *cpath;
|
||||
char cmode[10];
|
||||
size_t r;
|
||||
r = wcstombs(cmode, mode, 10);
|
||||
if (r == (size_t)-1 || r >= 10) {
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
cpath = _Py_wchar2char(path);
|
||||
if (cpath == NULL)
|
||||
return NULL;
|
||||
f = fopen(cpath, cmode);
|
||||
PyMem_Free(cpath);
|
||||
return f;
|
||||
#else
|
||||
return _wfopen(path, mode);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
usage(int exitcode, wchar_t* program)
|
||||
{
|
||||
|
@ -756,189 +731,6 @@ Py_GetArgcArgv(int *argc, wchar_t ***argv)
|
|||
*argv = orig_argv;
|
||||
}
|
||||
|
||||
|
||||
/* Encode a (wide) character string to the locale encoding with the
|
||||
surrogateescape error handler (characters in range U+DC80..U+DCFF are
|
||||
converted to bytes 0x80..0xFF).
|
||||
|
||||
This function is the reverse of _Py_char2wchar().
|
||||
|
||||
Return a pointer to a newly allocated byte string (use PyMem_Free() to free
|
||||
the memory), or NULL on error (conversion error or memory error). */
|
||||
char*
|
||||
_Py_wchar2char(const wchar_t *text)
|
||||
{
|
||||
const size_t len = wcslen(text);
|
||||
char *result = NULL, *bytes = NULL;
|
||||
size_t i, size, converted;
|
||||
wchar_t c, buf[2];
|
||||
|
||||
/* The function works in two steps:
|
||||
1. compute the length of the output buffer in bytes (size)
|
||||
2. outputs the bytes */
|
||||
size = 0;
|
||||
buf[1] = 0;
|
||||
while (1) {
|
||||
for (i=0; i < len; i++) {
|
||||
c = text[i];
|
||||
if (c >= 0xdc80 && c <= 0xdcff) {
|
||||
/* UTF-8b surrogate */
|
||||
if (bytes != NULL) {
|
||||
*bytes++ = c - 0xdc00;
|
||||
size--;
|
||||
}
|
||||
else
|
||||
size++;
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
buf[0] = c;
|
||||
if (bytes != NULL)
|
||||
converted = wcstombs(bytes, buf, size);
|
||||
else
|
||||
converted = wcstombs(NULL, buf, 0);
|
||||
if (converted == (size_t)-1) {
|
||||
if (result != NULL)
|
||||
PyMem_Free(result);
|
||||
return NULL;
|
||||
}
|
||||
if (bytes != NULL) {
|
||||
bytes += converted;
|
||||
size -= converted;
|
||||
}
|
||||
else
|
||||
size += converted;
|
||||
}
|
||||
}
|
||||
if (result != NULL) {
|
||||
*bytes = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
size += 1; /* nul byte at the end */
|
||||
result = PyMem_Malloc(size);
|
||||
if (result == NULL)
|
||||
return NULL;
|
||||
bytes = result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* Decode a byte string from the locale encoding with the
|
||||
surrogateescape error handler (undecodable bytes are decoded as characters
|
||||
in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
|
||||
character, escape the bytes using the surrogateescape error handler instead
|
||||
of decoding them.
|
||||
|
||||
Use _Py_wchar2char() to encode the character string back to a byte string.
|
||||
|
||||
Return a pointer to a newly allocated (wide) character string (use
|
||||
PyMem_Free() to free the memory), or NULL on error (conversion error or
|
||||
memory error). */
|
||||
wchar_t*
|
||||
_Py_char2wchar(char* arg)
|
||||
{
|
||||
wchar_t *res;
|
||||
#ifdef HAVE_BROKEN_MBSTOWCS
|
||||
/* Some platforms have a broken implementation of
|
||||
* mbstowcs which does not count the characters that
|
||||
* would result from conversion. Use an upper bound.
|
||||
*/
|
||||
size_t argsize = strlen(arg);
|
||||
#else
|
||||
size_t argsize = mbstowcs(NULL, arg, 0);
|
||||
#endif
|
||||
size_t count;
|
||||
unsigned char *in;
|
||||
wchar_t *out;
|
||||
#ifdef HAVE_MBRTOWC
|
||||
mbstate_t mbs;
|
||||
#endif
|
||||
if (argsize != (size_t)-1) {
|
||||
res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
|
||||
if (!res)
|
||||
goto oom;
|
||||
count = mbstowcs(res, arg, argsize+1);
|
||||
if (count != (size_t)-1) {
|
||||
wchar_t *tmp;
|
||||
/* Only use the result if it contains no
|
||||
surrogate characters. */
|
||||
for (tmp = res; *tmp != 0 &&
|
||||
(*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
|
||||
;
|
||||
if (*tmp == 0)
|
||||
return res;
|
||||
}
|
||||
PyMem_Free(res);
|
||||
}
|
||||
/* Conversion failed. Fall back to escaping with surrogateescape. */
|
||||
#ifdef HAVE_MBRTOWC
|
||||
/* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
|
||||
|
||||
/* Overallocate; as multi-byte characters are in the argument, the
|
||||
actual output could use less memory. */
|
||||
argsize = strlen(arg) + 1;
|
||||
res = (wchar_t*)PyMem_Malloc(argsize*sizeof(wchar_t));
|
||||
if (!res) goto oom;
|
||||
in = (unsigned char*)arg;
|
||||
out = res;
|
||||
memset(&mbs, 0, sizeof mbs);
|
||||
while (argsize) {
|
||||
size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
|
||||
if (converted == 0)
|
||||
/* Reached end of string; null char stored. */
|
||||
break;
|
||||
if (converted == (size_t)-2) {
|
||||
/* Incomplete character. This should never happen,
|
||||
since we provide everything that we have -
|
||||
unless there is a bug in the C library, or I
|
||||
misunderstood how mbrtowc works. */
|
||||
fprintf(stderr, "unexpected mbrtowc result -2\n");
|
||||
return NULL;
|
||||
}
|
||||
if (converted == (size_t)-1) {
|
||||
/* Conversion error. Escape as UTF-8b, and start over
|
||||
in the initial shift state. */
|
||||
*out++ = 0xdc00 + *in++;
|
||||
argsize--;
|
||||
memset(&mbs, 0, sizeof mbs);
|
||||
continue;
|
||||
}
|
||||
if (*out >= 0xd800 && *out <= 0xdfff) {
|
||||
/* Surrogate character. Escape the original
|
||||
byte sequence with surrogateescape. */
|
||||
argsize -= converted;
|
||||
while (converted--)
|
||||
*out++ = 0xdc00 + *in++;
|
||||
continue;
|
||||
}
|
||||
/* successfully converted some bytes */
|
||||
in += converted;
|
||||
argsize -= converted;
|
||||
out++;
|
||||
}
|
||||
#else
|
||||
/* Cannot use C locale for escaping; manually escape as if charset
|
||||
is ASCII (i.e. escape all bytes > 128. This will still roundtrip
|
||||
correctly in the locale's charset, which must be an ASCII superset. */
|
||||
res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
|
||||
if (!res) goto oom;
|
||||
in = (unsigned char*)arg;
|
||||
out = res;
|
||||
while(*in)
|
||||
if(*in < 128)
|
||||
*out++ = *in++;
|
||||
else
|
||||
*out++ = 0xdc00 + *in++;
|
||||
*out = 0;
|
||||
#endif
|
||||
return res;
|
||||
oom:
|
||||
fprintf(stderr, "out of memory\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue