Issue 1267, continued.

Additional patch by Christian Heimes to deal more cleanly with the FILE* vs file-descriptor issues. I cleaned up his code a bit, and moved the lseek() call into import.c.
2025-11-02 03:01:58 +00:00 · 2007-10-22 00:09:51 +00:00 · 2007-10-22 00:09:51 +00:00 · 40d20bcf1f
commit 40d20bcf1f
parent c2954e5273
12 changed files with 94 additions and 64 deletions
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@ -1602,40 +1602,44 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
 }
 #endif

-/* Get -*- encoding -*- from a Python file
+/* Get -*- encoding -*- from a Python file.

   PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
   the first or second line of the file (in which case the encoding 
   should be assumed to be PyUnicode_GetDefaultEncoding()).

-   The char * returned was malloc'ed from PyMem_MALLOC() and thus must be freed
-   when no longer needed.
+   The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed
+   by the caller.
 */
 char *
-PyTokenizer_FindEncoding(FILE *fp) {
+PyTokenizer_FindEncoding(int fd)
+{
 	struct tok_state *tok;
-	char *p_start=NULL, *p_end=NULL, *encoding=NULL;
+	FILE *fp;
+	char *p_start =NULL , *p_end =NULL , *encoding = NULL;

-	if ((tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL)) == NULL) {
-		/* lseek() usage is on purpose; see note later in code. */
-		lseek(fileno(fp), 0, 0);
+	fd = dup(fd);
+	if (fd < 0) {
 		return NULL;
 	}
-	while(((tok->lineno < 2) && (tok->done == E_OK))) {
+	fp = fdopen(fd, "r");
+	if (fp == NULL) {
+		return NULL;
+	}
+	tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL);
+	if (tok == NULL) {
+		fclose(fp);
+		return NULL;
+	}
+	while (tok->lineno < 2 && tok->done == E_OK) {
 		PyTokenizer_Get(tok, &p_start, &p_end);
 	}
-
-	/* lseek() must be used instead of fseek()/rewind() as those fail on
-	   OS X 10.4 to properly seek back to the beginning when reading from
-	   the file descriptor instead of the file pointer.  */
-	lseek(fileno(fp), 0, 0);
-
+	fclose(fp);
 	if (tok->encoding) {
            encoding = (char *)PyMem_MALLOC(strlen(tok->encoding) + 1);
            strcpy(encoding, tok->encoding);
        }
 	PyTokenizer_Free(tok);
-
 	return encoding;
 }