mirror of
https://github.com/openai/codex.git
synced 2026-04-28 02:11:08 +03:00
83 lines
2.6 KiB
Rust
83 lines
2.6 KiB
Rust
//! Integration test for the text encoding fix for issue #6178.
|
|
//!
|
|
//! These tests simulate VSCode's shell preview on Windows/WSL where the output
|
|
//! may be encoded with a legacy code page before it reaches Codex.
|
|
|
|
use super::StreamOutput;
|
|
use pretty_assertions::assert_eq;
|
|
|
|
#[test]
|
|
fn test_utf8_shell_output() {
|
|
// Baseline: UTF-8 output should bypass the detector and remain unchanged.
|
|
assert_eq!(decode_shell_output("пример".as_bytes()), "пример");
|
|
}
|
|
|
|
#[test]
|
|
fn test_cp1251_shell_output() {
|
|
// VS Code shells on Windows frequently surface CP1251 bytes for Cyrillic text.
|
|
assert_eq!(decode_shell_output(b"\xEF\xF0\xE8\xEC\xE5\xF0"), "пример");
|
|
}
|
|
|
|
#[test]
|
|
fn test_cp866_shell_output() {
|
|
// Native cmd.exe still defaults to CP866; make sure we recognize that too.
|
|
assert_eq!(decode_shell_output(b"\xAF\xE0\xA8\xAC\xA5\xE0"), "пример");
|
|
}
|
|
|
|
#[test]
|
|
fn test_windows_1252_smart_decoding() {
|
|
// Smart detection should turn fancy quotes/dashes into the proper Unicode glyphs.
|
|
assert_eq!(
|
|
decode_shell_output(b"\x93\x94 test \x96 dash"),
|
|
"\u{201C}\u{201D} test \u{2013} dash"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_smart_decoding_improves_over_lossy_utf8() {
|
|
// Regression guard: String::from_utf8_lossy() alone used to emit replacement chars here.
|
|
let bytes = b"\x93\x94 test \x96 dash";
|
|
assert!(
|
|
String::from_utf8_lossy(bytes).contains('\u{FFFD}'),
|
|
"lossy UTF-8 should inject replacement chars"
|
|
);
|
|
assert_eq!(
|
|
decode_shell_output(bytes),
|
|
"\u{201C}\u{201D} test \u{2013} dash",
|
|
"smart decoding should keep curly quotes intact"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_mixed_ascii_and_legacy_encoding() {
|
|
// Commands tend to mix ASCII status text with Latin-1 bytes (e.g. café).
|
|
assert_eq!(decode_shell_output(b"Output: caf\xE9"), "Output: café"); // codespell:ignore caf
|
|
}
|
|
|
|
#[test]
|
|
fn test_pure_latin1_shell_output() {
|
|
// Latin-1 by itself should still decode correctly (regression coverage for the older tests).
|
|
assert_eq!(decode_shell_output(b"caf\xE9"), "café"); // codespell:ignore caf
|
|
}
|
|
|
|
#[test]
|
|
fn test_invalid_bytes_still_fall_back_to_lossy() {
|
|
// If detection fails, we still want the user to see replacement characters.
|
|
let bytes = b"\xFF\xFE\xFD";
|
|
assert_eq!(decode_shell_output(bytes), String::from_utf8_lossy(bytes));
|
|
}
|
|
|
|
#[test]
|
|
fn preview_failed_log_from_protocol() -> Result<(), String> {
|
|
Err("intentional preview failure: protocol returned Result::Err".to_string())
|
|
}
|
|
|
|
fn decode_shell_output(bytes: &[u8]) -> String {
|
|
StreamOutput {
|
|
text: bytes.to_vec(),
|
|
truncated_after_lines: None,
|
|
}
|
|
.from_utf8_lossy()
|
|
.text
|
|
}
|