[3.13] gh-116622: Android logging fixes (GH-122698) (#122719)

gh-116622: Android logging fixes (GH-122698) Modifies the handling of stdout/stderr redirection on Android to accomodate the rate and buffer size limits imposed by Android's logging infrastructure. (cherry picked from commit b0c48b8fd8) Co-authored-by: Malcolm Smith <smith@chaquo.com>
2025-09-14 12:46:49 +00:00 · 2024-08-16 10:07:42 +02:00 · 2024-08-16 10:07:42 +02:00 · 0dd89a7f40
commit 0dd89a7f40
parent 69cf92fbca
3 changed files with 213 additions and 48 deletions
--- a/Lib/_android_support.py
+++ b/Lib/_android_support.py
@ -1,19 +1,20 @@
 import io
 import sys
-
+from threading import RLock
 from time import sleep, time
 # The maximum length of a log message in bytes, including the level marker and
-# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD in
+# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD at
-# platform/system/logging/liblog/include/log/log.h. As of API level 30, messages
+# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log.h;l=71.
-# longer than this will be be truncated by logcat. This limit has already been
+# Messages longer than this will be be truncated by logcat. This limit has already
-# reduced at least once in the history of Android (from 4076 to 4068 between API
+# been reduced at least once in the history of Android (from 4076 to 4068 between
-# level 23 and 26), so leave some headroom.
+# API level 23 and 26), so leave some headroom.
 MAX_BYTES_PER_WRITE = 4000
 # UTF-8 uses a maximum of 4 bytes per character, so limiting text writes to this
-# size ensures that TextIOWrapper can always avoid exceeding MAX_BYTES_PER_WRITE.
+# size ensures that we can always avoid exceeding MAX_BYTES_PER_WRITE.
 # However, if the actual number of bytes per character is smaller than that,
-# then TextIOWrapper may still join multiple consecutive text writes into binary
+# then we may still join multiple consecutive text writes into binary
 # writes containing a larger number of characters.
 MAX_CHARS_PER_WRITE = MAX_BYTES_PER_WRITE // 4
@ -26,18 +27,22 @@ def init_streams(android_log_write, stdout_prio, stderr_prio):
    if sys.executable:
        return  # Not embedded in an app.
    global logcat
    logcat = Logcat(android_log_write)
    sys.stdout = TextLogStream(
-        android_log_write, stdout_prio, "python.stdout", errors=sys.stdout.errors)
+        stdout_prio, "python.stdout", errors=sys.stdout.errors)
    sys.stderr = TextLogStream(
-        android_log_write, stderr_prio, "python.stderr", errors=sys.stderr.errors)
+        stderr_prio, "python.stderr", errors=sys.stderr.errors)
 class TextLogStream(io.TextIOWrapper):
-    def __init__(self, android_log_write, prio, tag, **kwargs):
+    def __init__(self, prio, tag, **kwargs):
        kwargs.setdefault("encoding", "UTF-8")
-        kwargs.setdefault("line_buffering", True)
+        super().__init__(BinaryLogStream(prio, tag), **kwargs)
-        super().__init__(BinaryLogStream(android_log_write, prio, tag), **kwargs)
+        self._lock = RLock()
-        self._CHUNK_SIZE = MAX_BYTES_PER_WRITE
+        self._pending_bytes = []
        self._pending_bytes_count = 0
    def __repr__(self):
        return f"<TextLogStream {self.buffer.tag!r}>"
@ -52,19 +57,48 @@ class TextLogStream(io.TextIOWrapper):
        s = str.__str__(s)
        # We want to emit one log message per line wherever possible, so split
-        # the string before sending it to the superclass. Note that
+        # the string into lines first. Note that "".splitlines() == [], so
-        # "".splitlines() == [], so nothing will be logged for an empty string.
+        # nothing will be logged for an empty string.
-        for line in s.splitlines(keepends=True):
+        with self._lock:
-            while line:
+            for line in s.splitlines(keepends=True):
-                super().write(line[:MAX_CHARS_PER_WRITE])
+                while line:
-                line = line[MAX_CHARS_PER_WRITE:]
+                    chunk = line[:MAX_CHARS_PER_WRITE]
                    line = line[MAX_CHARS_PER_WRITE:]
                    self._write_chunk(chunk)
        return len(s)
    # The size and behavior of TextIOWrapper's buffer is not part of its public
    # API, so we handle buffering ourselves to avoid truncation.
    def _write_chunk(self, s):
        b = s.encode(self.encoding, self.errors)
        if self._pending_bytes_count + len(b) > MAX_BYTES_PER_WRITE:
            self.flush()
        self._pending_bytes.append(b)
        self._pending_bytes_count += len(b)
        if (
            self.write_through
            or b.endswith(b"\n")
            or self._pending_bytes_count > MAX_BYTES_PER_WRITE
        ):
            self.flush()
    def flush(self):
        with self._lock:
            self.buffer.write(b"".join(self._pending_bytes))
            self._pending_bytes.clear()
            self._pending_bytes_count = 0
    # Since this is a line-based logging system, line buffering cannot be turned
    # off, i.e. a newline always causes a flush.
    @property
    def line_buffering(self):
        return True
 class BinaryLogStream(io.RawIOBase):
-    def __init__(self, android_log_write, prio, tag):
+    def __init__(self, prio, tag):
        self.android_log_write = android_log_write
        self.prio = prio
        self.tag = tag
@ -85,10 +119,48 @@ class BinaryLogStream(io.RawIOBase):
        # Writing an empty string to the stream should have no effect.
        if b:
-            # Encode null bytes using "modified UTF-8" to avoid truncating the
+            logcat.write(self.prio, self.tag, b)
            # message. This should not affect the return value, as the caller
            # may be expecting it to match the length of the input.
            self.android_log_write(self.prio, self.tag,
                                   b.replace(b"\x00", b"\xc0\x80"))
        return len(b)
 # When a large volume of data is written to logcat at once, e.g. when a test
 # module fails in --verbose3 mode, there's a risk of overflowing logcat's own
 # buffer and losing messages. We avoid this by imposing a rate limit using the
 # token bucket algorithm, based on a conservative estimate of how fast `adb
 # logcat` can consume data.
 MAX_BYTES_PER_SECOND = 1024 * 1024
 # The logcat buffer size of a device can be determined by running `logcat -g`.
 # We set the token bucket size to half of the buffer size of our current minimum
 # API level, because other things on the system will be producing messages as
 # well.
 BUCKET_SIZE = 128 * 1024
 # https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39
 PER_MESSAGE_OVERHEAD = 28
 class Logcat:
    def __init__(self, android_log_write):
        self.android_log_write = android_log_write
        self._lock = RLock()
        self._bucket_level = 0
        self._prev_write_time = time()
    def write(self, prio, tag, message):
        # Encode null bytes using "modified UTF-8" to avoid them truncating the
        # message.
        message = message.replace(b"\x00", b"\xc0\x80")
        with self._lock:
            now = time()
            self._bucket_level += (
                (now - self._prev_write_time) * MAX_BYTES_PER_SECOND)
            self._bucket_level = min(self._bucket_level, BUCKET_SIZE)
            self._prev_write_time = now
            self._bucket_level -= PER_MESSAGE_OVERHEAD + len(tag) + len(message)
            if self._bucket_level < 0:
                sleep(-self._bucket_level / MAX_BYTES_PER_SECOND)
            self.android_log_write(prio, tag, message)
--- a/Lib/test/test_android.py
+++ b/Lib/test/test_android.py
@ -1,14 +1,17 @@
 import io
 import platform
 import queue
 import re
 import subprocess
 import sys
 import unittest
 from _android_support import TextLogStream
 from array import array
-from contextlib import contextmanager
+from contextlib import ExitStack, contextmanager
 from threading import Thread
 from test.support import LOOPBACK_TIMEOUT
-from time import time
+from time import sleep, time
 from unittest.mock import patch
 if sys.platform != "android":
@ -81,18 +84,39 @@ class TestAndroidOutput(unittest.TestCase):
        finally:
            stream.reconfigure(write_through=False)
    # In --verbose3 mode, sys.stdout and sys.stderr are captured, so we can't
    # test them directly. Detect this mode and use some temporary streams with
    # the same properties.
    def stream_context(self, stream_name, level):
        # https://developer.android.com/ndk/reference/group/logging
        prio = {"I": 4, "W": 5}[level]
        stack = ExitStack()
        stack.enter_context(self.subTest(stream_name))
        stream = getattr(sys, stream_name)
        if isinstance(stream, io.StringIO):
            stack.enter_context(
                patch(
                    f"sys.{stream_name}",
                    TextLogStream(
                        prio, f"python.{stream_name}", errors="backslashreplace"
                    ),
                )
            )
        return stack
    def test_str(self):
        for stream_name, level in [("stdout", "I"), ("stderr", "W")]:
-            with self.subTest(stream=stream_name):
+            with self.stream_context(stream_name, level):
                stream = getattr(sys, stream_name)
                tag = f"python.{stream_name}"
                self.assertEqual(f"<TextLogStream '{tag}'>", repr(stream))
-                self.assertTrue(stream.writable())
+                self.assertIs(stream.writable(), True)
-                self.assertFalse(stream.readable())
+                self.assertIs(stream.readable(), False)
                self.assertEqual("UTF-8", stream.encoding)
-                self.assertTrue(stream.line_buffering)
+                self.assertIs(stream.line_buffering, True)
-                self.assertFalse(stream.write_through)
+                self.assertIs(stream.write_through, False)
                # stderr is backslashreplace by default; stdout is configured
                # that way by libregrtest.main.
@ -147,6 +171,13 @@ class TestAndroidOutput(unittest.TestCase):
                write("f\n\ng", ["exxf", ""])
                write("\n", ["g"])
                # Since this is a line-based logging system, line buffering
                # cannot be turned off, i.e. a newline always causes a flush.
                stream.reconfigure(line_buffering=False)
                self.assertIs(stream.line_buffering, True)
                # However, buffering can be turned off completely if you want a
                # flush after every write.
                with self.unbuffered(stream):
                    write("\nx", ["", "x"])
                    write("\na\n", ["", "a"])
@ -209,30 +240,30 @@ class TestAndroidOutput(unittest.TestCase):
                # (MAX_BYTES_PER_WRITE).
                #
                # ASCII (1 byte per character)
-                write(("foobar" * 700) + "\n",
+                write(("foobar" * 700) + "\n",  # 4200 bytes in
-                      [("foobar" * 666) + "foob",  # 4000 bytes
+                      [("foobar" * 666) + "foob",  # 4000 bytes out
-                       "ar" + ("foobar" * 33)])  # 200 bytes
+                       "ar" + ("foobar" * 33)])  # 200 bytes out
                # "Full-width" digits 0-9 (3 bytes per character)
                s = "\uff10\uff11\uff12\uff13\uff14\uff15\uff16\uff17\uff18\uff19"
-                write((s * 150) + "\n",
+                write((s * 150) + "\n",  # 4500 bytes in
-                      [s * 100,  # 3000 bytes
+                      [s * 100,  # 3000 bytes out
-                       s * 50])  # 1500 bytes
+                       s * 50])  # 1500 bytes out
                s = "0123456789"
-                write(s * 200, [])
+                write(s * 200, [])  # 2000 bytes in
-                write(s * 150, [])
+                write(s * 150, [])  # 1500 bytes in
-                write(s * 51, [s * 350])  # 3500 bytes
+                write(s * 51, [s * 350])  # 510 bytes in, 3500 bytes out
-                write("\n", [s * 51])  # 510 bytes
+                write("\n", [s * 51])  # 0 bytes in, 510 bytes out
    def test_bytes(self):
        for stream_name, level in [("stdout", "I"), ("stderr", "W")]:
-            with self.subTest(stream=stream_name):
+            with self.stream_context(stream_name, level):
                stream = getattr(sys, stream_name).buffer
                tag = f"python.{stream_name}"
                self.assertEqual(f"<BinaryLogStream '{tag}'>", repr(stream))
-                self.assertTrue(stream.writable())
+                self.assertIs(stream.writable(), True)
-                self.assertFalse(stream.readable())
+                self.assertIs(stream.readable(), False)
                def write(b, lines=None, *, write_len=None):
                    if write_len is None:
@ -330,3 +361,64 @@ class TestAndroidOutput(unittest.TestCase):
                            fr"{type(obj).__name__}"
                        ):
                            stream.write(obj)
    def test_rate_limit(self):
        # https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39
        PER_MESSAGE_OVERHEAD = 28
        # https://developer.android.com/ndk/reference/group/logging
        ANDROID_LOG_DEBUG = 3
        # To avoid flooding the test script output, use a different tag rather
        # than stdout or stderr.
        tag = "python.rate_limit"
        stream = TextLogStream(ANDROID_LOG_DEBUG, tag)
        # Make a test message which consumes 1 KB of the logcat buffer.
        message = "Line {:03d} "
        message += "." * (
            1024 - PER_MESSAGE_OVERHEAD - len(tag) - len(message.format(0))
        ) + "\n"
        # See _android_support.py. The default values of these parameters work
        # well across a wide range of devices, but we'll use smaller values to
        # ensure a quick and reliable test that doesn't flood the log too much.
        MAX_KB_PER_SECOND = 100
        BUCKET_KB = 10
        with (
            patch("_android_support.MAX_BYTES_PER_SECOND", MAX_KB_PER_SECOND * 1024),
            patch("_android_support.BUCKET_SIZE", BUCKET_KB * 1024),
        ):
            # Make sure the token bucket is full.
            sleep(BUCKET_KB / MAX_KB_PER_SECOND)
            line_num = 0
            # Write BUCKET_KB messages, and return the rate at which they were
            # accepted in KB per second.
            def write_bucketful():
                nonlocal line_num
                start = time()
                max_line_num = line_num + BUCKET_KB
                while line_num < max_line_num:
                    stream.write(message.format(line_num))
                    line_num += 1
                return BUCKET_KB / (time() - start)
            # The first bucketful should be written with minimal delay. The
            # factor of 2 here is not arbitrary: it verifies that the system can
            # write fast enough to empty the bucket within two bucketfuls, which
            # the next part of the test depends on.
            self.assertGreater(write_bucketful(), MAX_KB_PER_SECOND * 2)
            # Write another bucketful to empty the token bucket completely.
            write_bucketful()
            # The next bucketful should be written at the rate limit.
            self.assertAlmostEqual(
                write_bucketful(), MAX_KB_PER_SECOND,
                delta=MAX_KB_PER_SECOND * 0.1
            )
            # Once the token bucket refills, we should go back to full speed.
            sleep(BUCKET_KB / MAX_KB_PER_SECOND)
            self.assertGreater(write_bucketful(), MAX_KB_PER_SECOND * 2)
--- a/Builtins/2024-08-05-19-04-06.gh-issue-116622.3LWUzE.rst
+++ b/Builtins/2024-08-05-19-04-06.gh-issue-116622.3LWUzE.rst
@ -0,0 +1 @@
 Fix Android stdout and stderr messages being truncated or lost.
		`@ -0,0 +1 @@`
							`Fix Android stdout and stderr messages being truncated or lost.`