Issue #17429: platform.linux_distribution() now decodes files from the UTF-8

encoding with the surrogateescape error handler, instead of decoding from the
locale encoding in strict mode. It fixes the function on Fedora 19 which is
probably the first major distribution release with a non-ASCII name. Patch
written by Toshio Kuratomi.
This commit is contained in:
Victor Stinner 2013-12-09 00:01:27 +01:00
parent 589ecda56e
commit 620c48b7ea
4 changed files with 30 additions and 2 deletions

View file

@ -129,6 +129,10 @@ except AttributeError:
# Standard Unix uses /dev/null # Standard Unix uses /dev/null
DEV_NULL = '/dev/null' DEV_NULL = '/dev/null'
# Directory to search for configuration information on Unix.
# Constant used by test_platform to test linux_distribution().
_UNIXCONFDIR = '/etc'
### Platform specific APIs ### Platform specific APIs
_libc_search = re.compile(b'(__libc_init)' _libc_search = re.compile(b'(__libc_init)'
@ -315,7 +319,7 @@ def linux_distribution(distname='', version='', id='',
""" """
try: try:
etc = os.listdir('/etc') etc = os.listdir(_UNIXCONFDIR)
except os.error: except os.error:
# Probably not a Unix system # Probably not a Unix system
return distname,version,id return distname,version,id
@ -331,7 +335,8 @@ def linux_distribution(distname='', version='', id='',
return _dist_try_harder(distname,version,id) return _dist_try_harder(distname,version,id)
# Read the first line # Read the first line
with open('/etc/'+file, 'r') as f: with open(os.path.join(_UNIXCONFDIR, file), 'r',
encoding='utf-8', errors='surrogateescape') as f:
firstline = f.readline() firstline = f.readline()
_distname, _version, _id = _parse_release_file(firstline) _distname, _version, _id = _parse_release_file(firstline)

View file

@ -1,7 +1,10 @@
from unittest import mock
import contextlib
import os import os
import platform import platform
import subprocess import subprocess
import sys import sys
import tempfile
import unittest import unittest
import warnings import warnings
@ -295,6 +298,19 @@ class PlatformTest(unittest.TestCase):
returncode = ret >> 8 returncode = ret >> 8
self.assertEqual(returncode, len(data)) self.assertEqual(returncode, len(data))
def test_linux_distribution_encoding(self):
# Issue #17429
with tempfile.TemporaryDirectory() as tempdir:
filename = os.path.join(tempdir, 'fedora-release')
with open(filename, 'w', encoding='utf-8') as f:
f.write('Fedora release 19 (Schr\xf6dinger\u2019s Cat)\n')
with mock.patch('platform._UNIXCONFDIR', tempdir):
distname, version, distid = platform.linux_distribution()
self.assertEqual(distname, 'Fedora')
self.assertEqual(version, '19')
self.assertEqual(distid, 'Schr\xf6dinger\u2019s Cat')
def test_main(): def test_main():
support.run_unittest( support.run_unittest(

View file

@ -689,6 +689,7 @@ Steven Kryskalla
Andrew Kuchling Andrew Kuchling
Dave Kuhlman Dave Kuhlman
Jon Kuhn Jon Kuhn
Toshio Kuratomi
Vladimir Kushnir Vladimir Kushnir
Erno Kuusela Erno Kuusela
Ross Lagerwall Ross Lagerwall

View file

@ -18,6 +18,12 @@ Core and Builtins
Library Library
------- -------
- Issue #17429: platform.linux_distribution() now decodes files from the UTF-8
encoding with the surrogateescape error handler, instead of decoding from the
locale encoding in strict mode. It fixes the function on Fedora 19 which is
probably the first major distribution release with a non-ASCII name. Patch
written by Toshio Kuratomi.
- Issue #19929: Call os.read with 32768 within subprocess.Popen.communicate - Issue #19929: Call os.read with 32768 within subprocess.Popen.communicate
rather than 4096 for efficiency. A microbenchmark shows Linux and OS X rather than 4096 for efficiency. A microbenchmark shows Linux and OS X
both using ~50% less cpu time this way. both using ~50% less cpu time this way.