Fix stream did not contain valid UTF-8 (#8120)

## Summary

Related issues: #8009 #7549

Although `PYTHONIOENCODING=utf-8` forces python to use UTF-8 for
`stdout`/`stderr`, it can't prevent code like
`sys.stdout.buffer.write()` or `subprocess.call(["cl.exe", ...])` to
bypass the encoder. This PR uses lossy UTF-8 conversion to avoid
decoding error.

## Alternative

Using `bstr` crate might be better since it can preserve original
information. Or we should follow the Windows convention, unset
`PYTHONIOENCODING` and decode with system default encoding.

## Test Plan

Running locally with non-ASCII character in `UV_CACHE_DIR` works fine,
but I have no unit test plan. Testing locale problem is hard :(
This commit is contained in:
Jiahao Yuan 2024-10-11 21:10:06 +08:00 committed by GitHub
parent 7bd0d97ce5
commit fce7a838e9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -921,13 +921,15 @@ impl PythonRunner {
) -> Result<PythonRunnerOutput, Error> {
/// Read lines from a reader and store them in a buffer.
async fn read_from(
mut reader: tokio::io::Lines<tokio::io::BufReader<impl tokio::io::AsyncRead + Unpin>>,
mut reader: tokio::io::Split<tokio::io::BufReader<impl tokio::io::AsyncRead + Unpin>>,
mut printer: Printer,
buffer: &mut Vec<String>,
) -> io::Result<()> {
loop {
match reader.next_line().await? {
Some(line) => {
match reader.next_segment().await? {
Some(line_buf) => {
let line_buf = line_buf.strip_suffix(b"\r").unwrap_or(&line_buf);
let line = String::from_utf8_lossy(line_buf).into();
let _ = write!(printer, "{line}");
buffer.push(line);
}
@ -945,7 +947,7 @@ impl PythonRunner {
.env("PATH", modified_path)
.env("VIRTUAL_ENV", venv.root())
.env("CLICOLOR_FORCE", "1")
.env("PYTHONIOENCODING", "utf-8")
.env("PYTHONIOENCODING", "utf-8:backslashreplace")
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
@ -956,8 +958,8 @@ impl PythonRunner {
let mut stderr_buf = Vec::with_capacity(1024);
// Create separate readers for `stdout` and `stderr`.
let stdout_reader = tokio::io::BufReader::new(child.stdout.take().unwrap()).lines();
let stderr_reader = tokio::io::BufReader::new(child.stderr.take().unwrap()).lines();
let stdout_reader = tokio::io::BufReader::new(child.stdout.take().unwrap()).split(b'\n');
let stderr_reader = tokio::io::BufReader::new(child.stderr.take().unwrap()).split(b'\n');
// Asynchronously read from the in-memory pipes.
let printer = Printer::from(self.level);