Issue #10419, issue #6011: port 6ad356525381 fix from distutils to packaging

build_scripts command of packaging now handles correctly non-ASCII path (path to the Python executable). Open and write the script in binary mode, but ensure that the shebang is decodable from UTF-8 and from the encoding of the script.
2025-10-28 17:13:08 +00:00 · 2011-05-19 15:18:36 +02:00 · 2011-05-19 15:18:36 +02:00 · cfd365b937
commit cfd365b937
parent 35de5ac44d
2 changed files with 38 additions and 17 deletions
--- a/Lib/distutils/command/build_scripts.py
+++ b/Lib/distutils/command/build_scripts.py
@ -128,10 +128,9 @@ class build_scripts(Command):
                            "The shebang ({!r}) is not decodable "
                            "from the script encoding ({})"
                            .format(shebang, encoding))
-                    outf = open(outfile, "wb")
+                    with open(outfile, "wb") as outf:
                        outf.write(shebang)
                        outf.writelines(f.readlines())
                    outf.close()
                if f:
                    f.close()
            else:
--- a/Lib/packaging/command/build_scripts.py
+++ b/Lib/packaging/command/build_scripts.py
@ -3,6 +3,7 @@
 import os
 import re
 import sysconfig
 import tokenize
 from packaging.command.cmd import Command
 from packaging.util import convert_path, newer
@ -11,7 +12,7 @@ from packaging.compat import Mixin2to3
 # check if Python is called on the first line with this expression
-first_line_re = re.compile('^#!.*python[0-9.]*([ \t].*)?$')
+first_line_re = re.compile(b'^#!.*python[0-9.]*([ \t].*)?$')
 class build_scripts(Command, Mixin2to3):
@ -76,12 +77,14 @@ class build_scripts(Command, Mixin2to3):
            # that way, we'll get accurate feedback if we can read the
            # script.
            try:
-                f = open(script, "r")
+                f = open(script, "rb")
            except IOError:
                if not self.dry_run:
                    raise
                f = None
            else:
                encoding, lines = tokenize.detect_encoding(f.readline)
                f.seek(0)
                first_line = f.readline()
                if not first_line:
                    logger.warning('%s: %s is an empty file (skipping)',
@ -91,26 +94,45 @@ class build_scripts(Command, Mixin2to3):
                match = first_line_re.match(first_line)
                if match:
                    adjust = True
-                    post_interp = match.group(1) or ''
+                    post_interp = match.group(1) or b''
            if adjust:
                logger.info("copying and adjusting %s -> %s", script,
                         self.build_dir)
                if not self.dry_run:
                    outf = open(outfile, "w")
                    if not sysconfig.is_python_build():
-                        outf.write("#!%s%s\n" %
+                        executable = self.executable
                                   (self.executable,
                                    post_interp))
                    else:
-                        outf.write("#!%s%s\n" %
+                        executable = os.path.join(
                                   (os.path.join(
                            sysconfig.get_config_var("BINDIR"),
                           "python%s%s" % (sysconfig.get_config_var("VERSION"),
-                                           sysconfig.get_config_var("EXE"))),
+                                           sysconfig.get_config_var("EXE")))
-                                    post_interp))
+                    executable = os.fsencode(executable)
                    shebang = b"#!" + executable + post_interp + b"\n"
                    # Python parser starts to read a script using UTF-8 until
                    # it gets a #coding:xxx cookie. The shebang has to be the
                    # first line of a file, the #coding:xxx cookie cannot be
                    # written before. So the shebang has to be decodable from
                    # UTF-8.
                    try:
                        shebang.decode('utf-8')
                    except UnicodeDecodeError:
                        raise ValueError(
                            "The shebang ({!r}) is not decodable "
                            "from utf-8".format(shebang))
                    # If the script is encoded to a custom encoding (use a
                    # #coding:xxx cookie), the shebang has to be decodable from
                    # the script encoding too.
                    try:
                        shebang.decode(encoding)
                    except UnicodeDecodeError:
                        raise ValueError(
                            "The shebang ({!r}) is not decodable "
                            "from the script encoding ({})"
                            .format(shebang, encoding))
                    with open(outfile, "wb") as outf:
                        outf.write(shebang)
                        outf.writelines(f.readlines())
                    outf.close()
                if f:
                    f.close()
            else: