Skip to content

Commit 581462d

Browse files
committed
The embedded bitcode should always be prepared for LTO/ThinLTO
1 parent d53f0b1 commit 581462d

File tree

13 files changed

+260
-71
lines changed

13 files changed

+260
-71
lines changed

compiler/rustc_codegen_cranelift/src/driver/aot.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ fn produce_final_output_artifacts(
204204
// to get rid of it.
205205
for output_type in crate_output.outputs.keys() {
206206
match *output_type {
207-
OutputType::Bitcode | OutputType::ThinLinkBitcode => {
207+
OutputType::Bitcode | OutputType::ThinLinkBitcode | OutputType::ThinBitcode => {
208208
// Cranelift doesn't have bitcode
209209
// user_wants_bitcode = true;
210210
// // Copy to .bc, but always keep the .0.bc. There is a later

compiler/rustc_codegen_llvm/src/back/lto.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::ffi::{CStr, CString};
33
use std::fs::File;
44
use std::mem::ManuallyDrop;
55
use std::path::Path;
6+
use std::ptr::NonNull;
67
use std::sync::Arc;
78
use std::{io, iter, slice};
89

@@ -604,7 +605,7 @@ pub(crate) fn run_pass_manager(
604605
debug!("running the pass manager");
605606
let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
606607
let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
607-
unsafe { write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) }?;
608+
unsafe { write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage) }?;
608609
debug!("lto done");
609610
Ok(())
610611
}
@@ -663,6 +664,11 @@ impl ThinBuffer {
663664
ThinBuffer(buffer)
664665
}
665666
}
667+
668+
pub unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
669+
let mut ptr = NonNull::new(ptr).unwrap();
670+
ThinBuffer(unsafe { ptr.as_mut() })
671+
}
666672
}
667673

668674
impl ThinBufferMethods for ThinBuffer {

compiler/rustc_codegen_llvm/src/back/write.rs

Lines changed: 85 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::ffi::{CStr, CString};
22
use std::io::{self, Write};
33
use std::path::{Path, PathBuf};
4+
use std::ptr::null_mut;
45
use std::sync::Arc;
56
use std::{fs, slice, str};
67

@@ -15,7 +16,7 @@ use rustc_codegen_ssa::back::write::{
1516
TargetMachineFactoryFn,
1617
};
1718
use rustc_codegen_ssa::traits::*;
18-
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
19+
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind};
1920
use rustc_data_structures::profiling::SelfProfilerRef;
2021
use rustc_data_structures::small_c_str::SmallCStr;
2122
use rustc_errors::{DiagCtxtHandle, FatalError, Level};
@@ -41,7 +42,7 @@ use crate::errors::{
4142
WithLlvmError, WriteBytecode,
4243
};
4344
use crate::llvm::diagnostic::OptimizationDiagnosticKind::*;
44-
use crate::llvm::{self, DiagnosticInfo, PassManager};
45+
use crate::llvm::{self, DiagnosticInfo, PassManager, ThinLTOBuffer};
4546
use crate::type_::Type;
4647
use crate::{LlvmCodegenBackend, ModuleLlvm, base, common, llvm_util};
4748

@@ -514,13 +515,24 @@ pub(crate) unsafe fn llvm_optimize(
514515
cgcx: &CodegenContext<LlvmCodegenBackend>,
515516
dcx: DiagCtxtHandle<'_>,
516517
module: &ModuleCodegen<ModuleLlvm>,
518+
thin_lto_buffer: Option<&mut *mut ThinLTOBuffer>,
517519
config: &ModuleConfig,
518520
opt_level: config::OptLevel,
519521
opt_stage: llvm::OptStage,
520522
) -> Result<(), FatalError> {
523+
if thin_lto_buffer.is_some() {
524+
assert!(
525+
matches!(
526+
opt_stage,
527+
llvm::OptStage::PreLinkNoLTO
528+
| llvm::OptStage::PreLinkFatLTO
529+
| llvm::OptStage::PreLinkThinLTO
530+
),
531+
"the bitcode for LTO can only be obtained at the pre-link stage"
532+
);
533+
}
521534
let unroll_loops =
522535
opt_level != config::OptLevel::Size && opt_level != config::OptLevel::SizeMin;
523-
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
524536
let pgo_gen_path = get_pgo_gen_path(config);
525537
let pgo_use_path = get_pgo_use_path(config);
526538
let pgo_sample_use_path = get_pgo_sample_use_path(config);
@@ -580,7 +592,9 @@ pub(crate) unsafe fn llvm_optimize(
580592
config.no_prepopulate_passes,
581593
config.verify_llvm_ir,
582594
config.lint_llvm_ir,
583-
using_thin_buffers,
595+
thin_lto_buffer,
596+
config.emit_thin_lto,
597+
config.emit_thin_lto_summary,
584598
config.merge_functions,
585599
unroll_loops,
586600
config.vectorize_slp,
@@ -635,7 +649,47 @@ pub(crate) unsafe fn optimize(
635649
_ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO,
636650
_ => llvm::OptStage::PreLinkNoLTO,
637651
};
638-
return unsafe { llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) };
652+
// The embedded bitcode is used to run LTO/ThinLTO.
653+
// The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO.
654+
// It may have undergone LTO due to ThinLocal, so we need to obtain the embedded bitcode at
655+
// this point.
656+
let mut thin_lto_buffer = if (module.kind == ModuleKind::Regular
657+
&& config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full))
658+
|| config.emit_thin_lto_summary
659+
{
660+
Some(null_mut())
661+
} else {
662+
None
663+
};
664+
unsafe {
665+
llvm_optimize(cgcx, dcx, module, thin_lto_buffer.as_mut(), config, opt_level, opt_stage)
666+
}?;
667+
if let Some(thin_lto_buffer) = thin_lto_buffer {
668+
let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
669+
let thin_bc_out = cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name);
670+
if let Err(err) = fs::write(&thin_bc_out, thin_lto_buffer.data()) {
671+
dcx.emit_err(WriteBytecode { path: &thin_bc_out, err });
672+
}
673+
let bc_summary_out =
674+
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
675+
if config.emit_thin_lto_summary
676+
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
677+
{
678+
let summary_data = thin_lto_buffer.thin_link_data();
679+
cgcx.prof.artifact_size(
680+
"llvm_bitcode_summary",
681+
thin_link_bitcode_filename.to_string_lossy(),
682+
summary_data.len() as u64,
683+
);
684+
let _timer = cgcx.prof.generic_activity_with_arg(
685+
"LLVM_module_codegen_emit_bitcode_summary",
686+
&*module.name,
687+
);
688+
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
689+
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
690+
}
691+
}
692+
}
639693
}
640694
Ok(())
641695
}
@@ -714,61 +768,49 @@ pub(crate) unsafe fn codegen(
714768
// requested.
715769
// - If we don't have the integrated assembler then we need to emit
716770
// asm from LLVM and use `gcc` to create the object file.
717-
718771
let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
719-
let bc_summary_out =
720-
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
721772
let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
722773

723774
if config.bitcode_needed() {
724-
let _timer = cgcx
725-
.prof
726-
.generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
727-
let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
728-
let data = thin.data();
729-
730-
if let Some(bitcode_filename) = bc_out.file_name() {
731-
cgcx.prof.artifact_size(
732-
"llvm_bitcode",
733-
bitcode_filename.to_string_lossy(),
734-
data.len() as u64,
735-
);
736-
}
737-
738-
if config.emit_thin_lto_summary
739-
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
740-
{
741-
let summary_data = thin.thin_link_data();
742-
cgcx.prof.artifact_size(
743-
"llvm_bitcode_summary",
744-
thin_link_bitcode_filename.to_string_lossy(),
745-
summary_data.len() as u64,
746-
);
747-
748-
let _timer = cgcx.prof.generic_activity_with_arg(
749-
"LLVM_module_codegen_emit_bitcode_summary",
750-
&*module.name,
751-
);
752-
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
753-
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
754-
}
755-
}
756-
775+
// If the object file of the target spec is bitcode, what happens when performing LTO in Rust?
757776
if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
777+
let thin = {
778+
let _timer = cgcx.prof.generic_activity_with_arg(
779+
"LLVM_module_codegen_make_bitcode",
780+
&*module.name,
781+
);
782+
ThinBuffer::new(llmod, config.emit_thin_lto, false)
783+
};
784+
let data = thin.data();
758785
let _timer = cgcx
759786
.prof
760787
.generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
788+
if let Some(bitcode_filename) = bc_out.file_name() {
789+
cgcx.prof.artifact_size(
790+
"llvm_bitcode",
791+
bitcode_filename.to_string_lossy(),
792+
data.len() as u64,
793+
);
794+
}
761795
if let Err(err) = fs::write(&bc_out, data) {
762796
dcx.emit_err(WriteBytecode { path: &bc_out, err });
763797
}
764798
}
765799

766-
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
800+
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)
801+
&& module.kind == ModuleKind::Regular
802+
{
767803
let _timer = cgcx
768804
.prof
769805
.generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
806+
let thin_bc_out =
807+
cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name);
808+
assert!(thin_bc_out.exists(), "cannot find {:?} as embedded bitcode", thin_bc_out);
809+
let data = fs::read(&thin_bc_out).unwrap();
810+
debug!("removing embed bitcode file {:?}", thin_bc_out);
811+
ensure_removed(dcx, &thin_bc_out);
770812
unsafe {
771-
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data);
813+
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &data);
772814
}
773815
}
774816
}

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2250,7 +2250,9 @@ unsafe extern "C" {
22502250
NoPrepopulatePasses: bool,
22512251
VerifyIR: bool,
22522252
LintIR: bool,
2253-
UseThinLTOBuffers: bool,
2253+
ThinLTOBuffer: Option<&mut *mut ThinLTOBuffer>,
2254+
EmitThinLTO: bool,
2255+
EmitThinLTOSummary: bool,
22542256
MergeFunctions: bool,
22552257
UnrollLoops: bool,
22562258
SLPVectorize: bool,

compiler/rustc_codegen_ssa/src/back/write.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,9 @@ fn produce_final_output_artifacts(
616616
// them for making an rlib.
617617
copy_if_one_unit(OutputType::Bitcode, true);
618618
}
619+
OutputType::ThinBitcode => {
620+
copy_if_one_unit(OutputType::ThinBitcode, true);
621+
}
619622
OutputType::ThinLinkBitcode => {
620623
copy_if_one_unit(OutputType::ThinLinkBitcode, false);
621624
}

compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "llvm/Analysis/Lint.h"
88
#include "llvm/Analysis/TargetLibraryInfo.h"
99
#include "llvm/Bitcode/BitcodeWriter.h"
10+
#include "llvm/Bitcode/BitcodeWriterPass.h"
1011
#include "llvm/CodeGen/CommandFlags.h"
1112
#include "llvm/IR/AssemblyAnnotationWriter.h"
1213
#include "llvm/IR/AutoUpgrade.h"
@@ -36,6 +37,7 @@
3637
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
3738
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
3839
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
40+
#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
3941
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
4042
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
4143
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
@@ -194,6 +196,19 @@ extern "C" void LLVMRustTimeTraceProfilerFinish(const char *FileName) {
194196
GEN_SUBTARGETS
195197
#undef SUBTARGET
196198

199+
// This struct and various functions are sort of a hack right now, but the
200+
// problem is that we've got in-memory LLVM modules after we generate and
201+
// optimize all codegen-units for one compilation in rustc. To be compatible
202+
// with the LTO support above we need to serialize the modules plus their
203+
// ThinLTO summary into memory.
204+
//
205+
// This structure is basically an owned version of a serialize module, with
206+
// a ThinLTO summary attached.
207+
struct LLVMRustThinLTOBuffer {
208+
std::string data;
209+
std::string thin_link_data;
210+
};
211+
197212
extern "C" bool LLVMRustHasFeature(LLVMTargetMachineRef TM,
198213
const char *Feature) {
199214
TargetMachine *Target = unwrap(TM);
@@ -676,7 +691,8 @@ extern "C" LLVMRustResult LLVMRustOptimize(
676691
LLVMModuleRef ModuleRef, LLVMTargetMachineRef TMRef,
677692
LLVMRustPassBuilderOptLevel OptLevelRust, LLVMRustOptStage OptStage,
678693
bool IsLinkerPluginLTO, bool NoPrepopulatePasses, bool VerifyIR,
679-
bool LintIR, bool UseThinLTOBuffers, bool MergeFunctions, bool UnrollLoops,
694+
bool LintIR, LLVMRustThinLTOBuffer **ThinLTOBufferRef, bool EmitThinLTO,
695+
bool EmitThinLTOSummary, bool MergeFunctions, bool UnrollLoops,
680696
bool SLPVectorize, bool LoopVectorize, bool DisableSimplifyLibCalls,
681697
bool EmitLifetimeMarkers, LLVMRustSanitizerOptions *SanitizerOptions,
682698
const char *PGOGenPath, const char *PGOUsePath, bool InstrumentCoverage,
@@ -923,7 +939,10 @@ extern "C" LLVMRustResult LLVMRustOptimize(
923939
}
924940

925941
ModulePassManager MPM;
926-
bool NeedThinLTOBufferPasses = UseThinLTOBuffers;
942+
bool NeedThinLTOBufferPasses = EmitThinLTO;
943+
auto ThinLTOBuffer = std::make_unique<LLVMRustThinLTOBuffer>();
944+
auto ThinLTODataOS = raw_string_ostream(ThinLTOBuffer->data);
945+
auto ThinLinkDataOS = raw_string_ostream(ThinLTOBuffer->thin_link_data);
927946
if (!NoPrepopulatePasses) {
928947
// The pre-link pipelines don't support O0 and require using
929948
// buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do
@@ -947,7 +966,25 @@ extern "C" LLVMRustResult LLVMRustOptimize(
947966

948967
switch (OptStage) {
949968
case LLVMRustOptStage::PreLinkNoLTO:
950-
MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
969+
if (ThinLTOBufferRef) {
970+
// This is similar to LLVM's `buildFatLTODefaultPipeline`, where the
971+
// bitcode for embedding is obtained after performing
972+
// `ThinLTOPreLinkDefaultPipeline`.
973+
MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(OptLevel));
974+
if (EmitThinLTO) {
975+
MPM.addPass(ThinLTOBitcodeWriterPass(
976+
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
977+
} else {
978+
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
979+
}
980+
*ThinLTOBufferRef = ThinLTOBuffer.release();
981+
MPM.addPass(PB.buildModuleOptimizationPipeline(
982+
OptLevel, ThinOrFullLTOPhase::None));
983+
MPM.addPass(
984+
createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
985+
} else {
986+
MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
987+
}
951988
break;
952989
case LLVMRustOptStage::PreLinkThinLTO:
953990
MPM = PB.buildThinLTOPreLinkDefaultPipeline(OptLevel);
@@ -993,6 +1030,16 @@ extern "C" LLVMRustResult LLVMRustOptimize(
9931030
MPM.addPass(CanonicalizeAliasesPass());
9941031
MPM.addPass(NameAnonGlobalPass());
9951032
}
1033+
// For `-Copt-level=0`, ThinLTO, or LTO.
1034+
if (ThinLTOBufferRef && *ThinLTOBufferRef == nullptr) {
1035+
if (EmitThinLTO) {
1036+
MPM.addPass(ThinLTOBitcodeWriterPass(
1037+
ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
1038+
} else {
1039+
MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
1040+
}
1041+
*ThinLTOBufferRef = ThinLTOBuffer.release();
1042+
}
9961043

9971044
// Upgrade all calls to old intrinsics first.
9981045
for (Module::iterator I = TheModule->begin(), E = TheModule->end(); I != E;)
@@ -1465,19 +1512,6 @@ extern "C" bool LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data,
14651512
return true;
14661513
}
14671514

1468-
// This struct and various functions are sort of a hack right now, but the
1469-
// problem is that we've got in-memory LLVM modules after we generate and
1470-
// optimize all codegen-units for one compilation in rustc. To be compatible
1471-
// with the LTO support above we need to serialize the modules plus their
1472-
// ThinLTO summary into memory.
1473-
//
1474-
// This structure is basically an owned version of a serialize module, with
1475-
// a ThinLTO summary attached.
1476-
struct LLVMRustThinLTOBuffer {
1477-
std::string data;
1478-
std::string thin_link_data;
1479-
};
1480-
14811515
extern "C" LLVMRustThinLTOBuffer *
14821516
LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) {
14831517
auto Ret = std::make_unique<LLVMRustThinLTOBuffer>();

0 commit comments

Comments
 (0)