Issue 1267, continued.

Additional patch by Christian Heimes to deal more cleanly with the
FILE* vs file-descriptor issues.
I cleaned up his code a bit, and moved the lseek() call into import.c.
This commit is contained in:
Guido van Rossum 2007-10-22 00:09:51 +00:00
parent c2954e5273
commit 40d20bcf1f
12 changed files with 94 additions and 64 deletions

View file

@ -1602,40 +1602,44 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
}
#endif
/* Get -*- encoding -*- from a Python file
/* Get -*- encoding -*- from a Python file.
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
the first or second line of the file (in which case the encoding
should be assumed to be PyUnicode_GetDefaultEncoding()).
The char * returned was malloc'ed from PyMem_MALLOC() and thus must be freed
when no longer needed.
The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed
by the caller.
*/
char *
PyTokenizer_FindEncoding(FILE *fp) {
PyTokenizer_FindEncoding(int fd)
{
struct tok_state *tok;
char *p_start=NULL, *p_end=NULL, *encoding=NULL;
FILE *fp;
char *p_start =NULL , *p_end =NULL , *encoding = NULL;
if ((tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL)) == NULL) {
/* lseek() usage is on purpose; see note later in code. */
lseek(fileno(fp), 0, 0);
fd = dup(fd);
if (fd < 0) {
return NULL;
}
while(((tok->lineno < 2) && (tok->done == E_OK))) {
fp = fdopen(fd, "r");
if (fp == NULL) {
return NULL;
}
tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL);
if (tok == NULL) {
fclose(fp);
return NULL;
}
while (tok->lineno < 2 && tok->done == E_OK) {
PyTokenizer_Get(tok, &p_start, &p_end);
}
/* lseek() must be used instead of fseek()/rewind() as those fail on
OS X 10.4 to properly seek back to the beginning when reading from
the file descriptor instead of the file pointer. */
lseek(fileno(fp), 0, 0);
fclose(fp);
if (tok->encoding) {
encoding = (char *)PyMem_MALLOC(strlen(tok->encoding) + 1);
strcpy(encoding, tok->encoding);
}
PyTokenizer_Free(tok);
return encoding;
}