Skip to content

Commit 7d49ae9

Browse files
committed
Auto merge of #136410 - saethlin:clean-up-cgu-internal-copy, r=compiler-errors
Remove InstanceKind::generates_cgu_internal_copy This PR should not contain any behavior changes. Before this PR, the logic for selecting instantiation mode is spread across all of * `instantiation_mode` * `cross_crate_inlinable` * `generates_cgu_internal_copy` * `requires_inline` The last two of those functions are not well-designed. The function that actually decides if we generate a CGU-internal copy is `instantiation_mode`, _not_ `generates_cgu_internal_copy`. The function `requires_inline` documents that it is about the LLVM `inline` attribute and that it is a hint. The LLVM attribute is called `inlinehint`, this function is also used by other codegen backends, and since it is part of instantiation mode selection it is *not* a hint. The goal of this PR is to start cleaning up the logic into a sequence of checks that have a more logical flow and are easier to customize in the future (to do things like improve incrementality or improve optimizations without causing obscure linker errors because you forgot to update another part of the compiler).
2 parents e61403a + 817e2c5 commit 7d49ae9

File tree

3 files changed

+68
-64
lines changed

3 files changed

+68
-64
lines changed

compiler/rustc_codegen_ssa/src/back/symbol_export.rs

+4-9
Original file line numberDiff line numberDiff line change
@@ -95,16 +95,11 @@ fn reachable_non_generics_provider(tcx: TyCtxt<'_>, _: LocalCrate) -> DefIdMap<S
9595
return None;
9696
}
9797

98-
// Functions marked with #[inline] are codegened with "internal"
99-
// linkage and are not exported unless marked with an extern
100-
// indicator
101-
if !Instance::mono(tcx, def_id.to_def_id()).def.generates_cgu_internal_copy(tcx)
102-
|| tcx.codegen_fn_attrs(def_id.to_def_id()).contains_extern_indicator()
103-
{
104-
Some(def_id)
105-
} else {
106-
None
98+
if Instance::mono(tcx, def_id.into()).def.requires_inline(tcx) {
99+
return None;
107100
}
101+
102+
if tcx.cross_crate_inlinable(def_id) { None } else { Some(def_id) }
108103
})
109104
.map(|def_id| {
110105
// We won't link right if this symbol is stripped during LTO.

compiler/rustc_middle/src/mir/mono.rs

+64-11
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use tracing::debug;
2121

2222
use crate::dep_graph::{DepNode, WorkProduct, WorkProductId};
2323
use crate::middle::codegen_fn_attrs::CodegenFnAttrFlags;
24-
use crate::ty::{GenericArgs, Instance, InstanceKind, SymbolName, TyCtxt};
24+
use crate::ty::{self, GenericArgs, Instance, InstanceKind, SymbolName, Ty, TyCtxt};
2525

2626
/// Describes how a monomorphization will be instantiated in object files.
2727
#[derive(PartialEq)]
@@ -55,6 +55,39 @@ pub enum MonoItem<'tcx> {
5555
GlobalAsm(ItemId),
5656
}
5757

58+
fn opt_incr_drop_glue_mode<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> InstantiationMode {
59+
// Non-ADTs can't have a Drop impl. This case is mostly hit by closures whose captures require
60+
// dropping.
61+
let ty::Adt(adt_def, _) = ty.kind() else {
62+
return InstantiationMode::LocalCopy;
63+
};
64+
65+
// Types that don't have a direct Drop impl, but have fields that require dropping.
66+
let Some(dtor) = adt_def.destructor(tcx) else {
67+
// We use LocalCopy for drops of enums only; this code is inherited from
68+
// https://github.com/rust-lang/rust/pull/67332 and the theory is that we get to optimize
69+
// out code like drop_in_place(Option::None) before crate-local ThinLTO, which improves
70+
// compile time. At the time of writing, simply removing this entire check does seem to
71+
// regress incr-opt compile times. But it sure seems like a more sophisticated check could
72+
// do better here.
73+
if adt_def.is_enum() {
74+
return InstantiationMode::LocalCopy;
75+
} else {
76+
return InstantiationMode::GloballyShared { may_conflict: true };
77+
}
78+
};
79+
80+
// We've gotten to a drop_in_place for a type that directly implements Drop.
81+
// The drop glue is a wrapper for the Drop::drop impl, and we are an optimized build, so in an
82+
// effort to coordinate with the mode that the actual impl will get, we make the glue also
83+
// LocalCopy.
84+
if tcx.cross_crate_inlinable(dtor.did) {
85+
InstantiationMode::LocalCopy
86+
} else {
87+
InstantiationMode::GloballyShared { may_conflict: true }
88+
}
89+
}
90+
5891
impl<'tcx> MonoItem<'tcx> {
5992
/// Returns `true` if the mono item is user-defined (i.e. not compiler-generated, like shims).
6093
pub fn is_user_defined(&self) -> bool {
@@ -124,16 +157,36 @@ impl<'tcx> MonoItem<'tcx> {
124157
return InstantiationMode::GloballyShared { may_conflict: false };
125158
}
126159

127-
// FIXME: The logic for which functions are permitted to get LocalCopy is actually spread
128-
// across 4 functions:
129-
// * cross_crate_inlinable(def_id)
130-
// * InstanceKind::requires_inline
131-
// * InstanceKind::generate_cgu_internal_copy
132-
// * MonoItem::instantiation_mode
133-
// Since reachable_non_generics calls InstanceKind::generates_cgu_internal_copy to decide
134-
// which symbols this crate exports, we are obligated to only generate LocalCopy when
135-
// generates_cgu_internal_copy returns true.
136-
if !instance.def.generates_cgu_internal_copy(tcx) {
160+
// This is technically a heuristic even though it's in the "not a heuristic" part of
161+
// instantiation mode selection.
162+
// It is surely possible to untangle this; the root problem is that the way we instantiate
163+
// InstanceKind other than Item is very complicated.
164+
//
165+
// The fallback case is to give everything else GloballyShared at OptLevel::No and
166+
// LocalCopy at all other opt levels. This is a good default, except for one specific build
167+
// configuration: Optimized incremental builds.
168+
// In the current compiler architecture there is a fundamental tension between
169+
// optimizations (which want big CGUs with as many things LocalCopy as possible) and
170+
// incrementality (which wants small CGUs with as many things GloballyShared as possible).
171+
// The heuristics implemented here do better than a completely naive approach in the
172+
// compiler benchmark suite, but there is no reason to believe they are optimal.
173+
if let InstanceKind::DropGlue(_, Some(ty)) = instance.def {
174+
if tcx.sess.opts.optimize == OptLevel::No {
175+
return InstantiationMode::GloballyShared { may_conflict: false };
176+
}
177+
if tcx.sess.opts.incremental.is_none() {
178+
return InstantiationMode::LocalCopy;
179+
}
180+
return opt_incr_drop_glue_mode(tcx, ty);
181+
}
182+
183+
// We need to ensure that we do not decide the InstantiationMode of an exported symbol is
184+
// LocalCopy. Since exported symbols are computed based on the output of
185+
// cross_crate_inlinable, we are beholden to our previous decisions.
186+
//
187+
// Note that just like above, this check for requires_inline is technically a heuristic
188+
// even though it's in the "not a heuristic" part of instantiation mode selection.
189+
if !tcx.cross_crate_inlinable(instance.def_id()) && !instance.def.requires_inline(tcx) {
137190
return InstantiationMode::GloballyShared { may_conflict: false };
138191
}
139192

compiler/rustc_middle/src/ty/instance.rs

-44
Original file line numberDiff line numberDiff line change
@@ -302,50 +302,6 @@ impl<'tcx> InstanceKind<'tcx> {
302302
)
303303
}
304304

305-
/// Returns `true` if the machine code for this instance is instantiated in
306-
/// each codegen unit that references it.
307-
/// Note that this is only a hint! The compiler can globally decide to *not*
308-
/// do this in order to speed up compilation. CGU-internal copies are
309-
/// only exist to enable inlining. If inlining is not performed (e.g. at
310-
/// `-Copt-level=0`) then the time for generating them is wasted and it's
311-
/// better to create a single copy with external linkage.
312-
pub fn generates_cgu_internal_copy(&self, tcx: TyCtxt<'tcx>) -> bool {
313-
if self.requires_inline(tcx) {
314-
return true;
315-
}
316-
if let ty::InstanceKind::DropGlue(.., Some(ty))
317-
| ty::InstanceKind::AsyncDropGlueCtorShim(.., Some(ty)) = *self
318-
{
319-
// Drop glue generally wants to be instantiated at every codegen
320-
// unit, but without an #[inline] hint. We should make this
321-
// available to normal end-users.
322-
if tcx.sess.opts.incremental.is_none() {
323-
return true;
324-
}
325-
// When compiling with incremental, we can generate a *lot* of
326-
// codegen units. Including drop glue into all of them has a
327-
// considerable compile time cost.
328-
//
329-
// We include enums without destructors to allow, say, optimizing
330-
// drops of `Option::None` before LTO. We also respect the intent of
331-
// `#[inline]` on `Drop::drop` implementations.
332-
return ty.ty_adt_def().is_none_or(|adt_def| {
333-
match *self {
334-
ty::InstanceKind::DropGlue(..) => adt_def.destructor(tcx).map(|dtor| dtor.did),
335-
ty::InstanceKind::AsyncDropGlueCtorShim(..) => {
336-
adt_def.async_destructor(tcx).map(|dtor| dtor.ctor)
337-
}
338-
_ => unreachable!(),
339-
}
340-
.map_or_else(|| adt_def.is_enum(), |did| tcx.cross_crate_inlinable(did))
341-
});
342-
}
343-
if let ty::InstanceKind::ThreadLocalShim(..) = *self {
344-
return false;
345-
}
346-
tcx.cross_crate_inlinable(self.def_id())
347-
}
348-
349305
pub fn requires_caller_location(&self, tcx: TyCtxt<'_>) -> bool {
350306
match *self {
351307
InstanceKind::Item(def_id) | InstanceKind::Virtual(def_id, _) => {

0 commit comments

Comments
 (0)