Skip to content

Commit 73b6574

Browse files
committed
Fix Unreadable non-UTF-8 output on localized MSVC
Fixes #35785 by converting non UTF-8 linker output to Unicode using the OEM code page. Before: ```text = note: Non-UTF-8 output: LINK : fatal error LNK1181: cannot open input file \'m\x84rchenhaft.obj\'\r\n ``` After: ```text = note: LINK : fatal error LNK1181: cannot open input file 'märchenhaft.obj' ``` The difference is more dramatic if using a non-ascii language pack for Visual Studio.
1 parent 8b8110e commit 73b6574

File tree

5 files changed

+72
-1
lines changed

5 files changed

+72
-1
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3250,6 +3250,7 @@ dependencies = [
32503250
"tempfile",
32513251
"thorin-dwp",
32523252
"tracing",
3253+
"windows 0.46.0",
32533254
]
32543255

32553256
[[package]]

compiler/rustc_codegen_ssa/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,7 @@ libc = "0.2.50"
4949
version = "0.30.1"
5050
default-features = false
5151
features = ["read_core", "elf", "macho", "pe", "unaligned", "archive", "write"]
52+
53+
[target.'cfg(windows)'.dependencies.windows]
54+
version = "0.46.0"
55+
features = ["Win32_Globalization"]

compiler/rustc_codegen_ssa/src/back/link.rs

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -857,7 +857,7 @@ fn link_natively<'a>(
857857
if !prog.status.success() {
858858
let mut output = prog.stderr.clone();
859859
output.extend_from_slice(&prog.stdout);
860-
let escaped_output = escape_string(&output);
860+
let escaped_output = escape_linker_output(&output, flavor);
861861
// FIXME: Add UI tests for this error.
862862
let err = errors::LinkingFailed {
863863
linker_path: &linker_path,
@@ -1049,6 +1049,59 @@ fn escape_string(s: &[u8]) -> String {
10491049
}
10501050
}
10511051

1052+
#[cfg(not(windows))]
1053+
fn escape_linker_output(s: &[u8], _flavour: LinkerFlavor) -> String {
1054+
escape_string(s)
1055+
}
1056+
1057+
/// If the output of the msvc linker is not UTF-8 and the host is Windows,
1058+
/// then try to convert the string from the OEM encoding.
1059+
#[cfg(windows)]
1060+
fn escape_linker_output(s: &[u8], flavour: LinkerFlavor) -> String {
1061+
// This only applies to the actual MSVC linker.
1062+
if flavour != LinkerFlavor::Msvc(Lld::No) {
1063+
return escape_string(s);
1064+
}
1065+
match str::from_utf8(s) {
1066+
Ok(s) => return s.to_owned(),
1067+
Err(_) if s.len() <= i32::MAX as usize => {
1068+
use windows::Win32::Globalization::{
1069+
GetLocaleInfoEx, MultiByteToWideChar, CP_OEMCP, LOCALE_IUSEUTF8LEGACYOEMCP,
1070+
LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_RETURN_NUMBER, MB_ERR_INVALID_CHARS,
1071+
};
1072+
// Get the legacy system OEM code page.
1073+
let code_page = unsafe {
1074+
let mut cp: u32 = 0;
1075+
// We're using the `LOCALE_RETURN_NUMBER` flag to return a u32.
1076+
// But the API requires us to pass the data as though it's a [u16] string.
1077+
let len = std::mem::size_of::<u32>() / std::mem::size_of::<u16>();
1078+
let data = std::slice::from_raw_parts_mut(&mut cp as *mut u32 as *mut u16, len);
1079+
let len_written = GetLocaleInfoEx(
1080+
LOCALE_NAME_SYSTEM_DEFAULT,
1081+
LOCALE_IUSEUTF8LEGACYOEMCP | LOCALE_RETURN_NUMBER,
1082+
Some(data),
1083+
);
1084+
if len_written as usize == len { cp } else { CP_OEMCP }
1085+
};
1086+
// Error if the string is not valid for the expected code page.
1087+
let flags = MB_ERR_INVALID_CHARS;
1088+
// Call MultiByteToWideChar twice.
1089+
// First to calculate the length then to convert the string.
1090+
let mut len = unsafe { MultiByteToWideChar(code_page, flags, s, None) };
1091+
if len > 0 {
1092+
let mut utf16 = vec![0; len as usize];
1093+
len = unsafe { MultiByteToWideChar(code_page, flags, s, Some(&mut utf16)) };
1094+
if len > 0 {
1095+
return String::from_utf16_lossy(&utf16[..len as usize]);
1096+
}
1097+
}
1098+
}
1099+
_ => {}
1100+
};
1101+
// The string is not UTF-8 and isn't valid for the OEM code page
1102+
format!("Non-UTF-8 output: {}", s.escape_ascii())
1103+
}
1104+
10521105
fn add_sanitizer_libraries(sess: &Session, crate_type: CrateType, linker: &mut dyn Linker) {
10531106
// On macOS the runtimes are distributed as dylibs which should be linked to
10541107
// both executables and dynamic shared objects. Everywhere else the runtimes
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// build-fail
2+
// compile-flags:-C link-arg=märchenhaft
3+
// only-msvc
4+
// error-pattern:= note: LINK : fatal error LNK1181:
5+
// normalize-stderr-test "(\s*\|\n)\s*= note: .*\n" -> "$1"
6+
pub fn main() {}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
error: linking with `link.exe` failed: exit code: 1181
2+
|
3+
= note: LINK : fatal error LNK1181: cannot open input file 'märchenhaft.obj'
4+
5+
6+
error: aborting due to previous error
7+

0 commit comments

Comments
 (0)