Skip to content

Commit

Permalink
WIP: apply method optimizations in parallel
Browse files Browse the repository at this point in the history
  • Loading branch information
yorickpeterse committed Jan 16, 2025
1 parent 1f69cbf commit ec8d1f4
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 100 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions compiler/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ blake3 = "^1.5"
inkwell = { version = "^0.5", features = ["llvm17-0"] }
llvm-sys-170 = { package = "llvm-sys", version = "^170.2", features = ["prefer-static"] }
indexmap = "^2.6"
crossbeam-queue = "^0.3"

[dev-dependencies]
similar-asserts = "^1.1"
40 changes: 11 additions & 29 deletions compiler/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,7 @@ struct OptimizationTimings {
prepare: Duration,
inline: Duration,
remove_methods: Duration,
remove_instructions: Duration,
simplify_graph: Duration,
method_local: Duration,
total: Duration,
}

Expand Down Expand Up @@ -202,8 +201,7 @@ impl Timings {
prepare: Duration::from_secs(0),
inline: Duration::from_secs(0),
remove_methods: Duration::from_secs(0),
remove_instructions: Duration::from_secs(0),
simplify_graph: Duration::from_secs(0),
method_local: Duration::from_secs(0),
total: Duration::from_secs(0),
},
llvm: Duration::from_secs(0),
Expand Down Expand Up @@ -367,12 +365,11 @@ Frontend:
Specialize {specialize}
Optimizations:
Prepare {opt_prep}
Inline {opt_inline}
Remove unused methods {opt_unused_methods}
Remove unused instructions {opt_unused_instr}
Simplify graph {opt_simplify}
Total {opt_total}
Prepare {opt_prep}
Inline {opt_inline}
Remove unused methods {opt_unused_methods}
Method local {opt_method_local}
Total {opt_total}
Backend:
LLVM {llvm}
Expand All @@ -394,14 +391,8 @@ Total: {total}\
self.timings.optimize.remove_methods,
Some(total)
),
opt_unused_instr = format_timing(
self.timings.optimize.remove_instructions,
Some(total)
),
opt_simplify = format_timing(
self.timings.optimize.simplify_graph,
Some(total)
),
opt_method_local =
format_timing(self.timings.optimize.method_local, Some(total)),
opt_total = format_timing(self.timings.optimize.total, Some(total)),
llvm = format_timing(self.timings.llvm, Some(total)),
link = format_timing(self.timings.link, Some(total)),
Expand Down Expand Up @@ -604,17 +595,8 @@ LLVM module timings:
mir.remove_unused_methods(&self.state.db);
});

// Optimization passes may remove instructions or mutate blocks in
// such a way that they are a bit messy. By simplifying the graph we
// reduce the amount of LLVM IR we need to generate.
measure(&mut self.timings.optimize.simplify_graph, || {
mir.simplify_graph();
});

// Inlining and other optimizations may result in unused
// instructions, so let's get rid of those.
measure(&mut self.timings.optimize.remove_instructions, || {
mir.remove_unused_instructions();
measure(&mut self.timings.optimize.method_local, || {
mir.apply_method_local_optimizations(self.state.config.threads);
});
}

Expand Down
163 changes: 92 additions & 71 deletions compiler/src/mir/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@ pub(crate) mod specialize;

use crate::state::State;
use crate::symbol_names::{qualified_type_name, SymbolNames};
use crossbeam_queue::ArrayQueue;
use indexmap::IndexMap;
use location::Location;
use std::collections::{HashMap, HashSet};
use std::fmt;
use std::hash::{Hash, Hasher};
use std::mem::swap;
use std::ops::{Add, AddAssign, Sub, SubAssign};
use std::thread;
use types::module_name::ModuleName;
use types::{
Database, ForeignType, Intrinsic, MethodId, Module as ModuleType, Shape,
Expand Down Expand Up @@ -1956,6 +1958,19 @@ impl Method {
self.body.start_id -= shift_map[self.body.start_id.0];
}

/// Applies optimizations local to this method (e.g. they don't depend on
/// other methods).
fn apply_local_optimizations(&mut self) {
self.simplify_graph();

// The above code is likely to produce many unreachable basic
// blocks, so we need to remove those.
self.remove_unreachable_blocks();
self.remove_unused_instructions();
}

/// Simplify the CFG of the method, such as by merging redundant basic
/// blocks.
fn simplify_graph(&mut self) {
let mut idx = 0;

Expand Down Expand Up @@ -2004,6 +2019,67 @@ impl Method {
}
}
}

/// Removes instructions that write to an unused register without side
/// effects.
///
/// Instructions such as `Int` and `String` don't produce side effects,
/// meaning that if the register they write to isn't used, the entire
/// instruction can be removed.
///
/// This method isn't terribly useful on its own, but when combined with
/// e.g. copy propagation it can result in the removal of many redundant
/// instructions.
pub(crate) fn remove_unused_instructions(&mut self) {
let mut uses = self.register_use_counts();
let mut repeat = true;

// Removing an instruction may result in other instructions becoming
// unused, so we repeat this until we run out of instructions to
// remove.
while repeat {
repeat = false;

for block in &mut self.body.blocks {
block.instructions.retain(|ins| {
let (reg, src) = match ins {
Instruction::Float(i) => (i.register, None),
Instruction::Int(i) => (i.register, None),
Instruction::Nil(i) => (i.register, None),
Instruction::String(i) => (i.register, None),
Instruction::Bool(i) => (i.register, None),
Instruction::Allocate(i) => (i.register, None),
Instruction::Spawn(i) => (i.register, None),
Instruction::GetConstant(i) => (i.register, None),
Instruction::MethodPointer(i) => (i.register, None),
Instruction::SizeOf(i) => (i.register, None),
Instruction::MoveRegister(i) => {
(i.target, Some(i.source))
}
Instruction::GetField(i) => {
(i.register, Some(i.receiver))
}
Instruction::FieldPointer(i) => {
(i.register, Some(i.receiver))
}
Instruction::Cast(i) => (i.register, Some(i.source)),
_ => return true,
};

if uses[reg.0] > 0 {
return true;
}

if let Some(src) = src {
uses[src.0] -= 1;
repeat = true;
}

false
});
}
}
}
}

/// An Inko program in its MIR form.
Expand Down Expand Up @@ -2331,81 +2407,26 @@ impl Mir {
}
}

/// Simplify the CFG of each method, such as by merging redundant basic
/// blocks.
pub(crate) fn simplify_graph(&mut self) {
for method in self.methods.values_mut() {
method.simplify_graph();

// The above code is likely to produce many unreachable basic
// blocks, so we need to remove those.
method.remove_unreachable_blocks();
}
}

/// Removes instructions that write to an unused register without side
/// effects.
/// Applies method-local optimizations to all methods.
///
/// Instructions such as `Int` and `String` don't produce side effects,
/// meaning that if the register they write to isn't used, the entire
/// instruction can be removed.
///
/// This method isn't terribly useful on its own, but when combined with
/// e.g. copy propagation it can result in the removal of many redundant
/// instructions.
pub(crate) fn remove_unused_instructions(&mut self) {
for method in self.methods.values_mut() {
let mut uses = method.register_use_counts();
let mut repeat = true;

// Removing an instruction may result in other instructions becoming
// unused, so we repeat this until we run out of instructions to
// remove.
while repeat {
repeat = false;

for block in &mut method.body.blocks {
block.instructions.retain(|ins| {
let (reg, src) = match ins {
Instruction::Float(i) => (i.register, None),
Instruction::Int(i) => (i.register, None),
Instruction::Nil(i) => (i.register, None),
Instruction::String(i) => (i.register, None),
Instruction::Bool(i) => (i.register, None),
Instruction::Allocate(i) => (i.register, None),
Instruction::Spawn(i) => (i.register, None),
Instruction::GetConstant(i) => (i.register, None),
Instruction::MethodPointer(i) => (i.register, None),
Instruction::SizeOf(i) => (i.register, None),
Instruction::MoveRegister(i) => {
(i.target, Some(i.source))
}
Instruction::GetField(i) => {
(i.register, Some(i.receiver))
}
Instruction::FieldPointer(i) => {
(i.register, Some(i.receiver))
}
Instruction::Cast(i) => {
(i.register, Some(i.source))
}
_ => return true,
};

if uses[reg.0] > 0 {
return true;
}
/// The optimizations are applied in parallel as they don't rely on any
/// shared (mutable) state.
pub(crate) fn apply_method_local_optimizations(&mut self, threads: usize) {
let queue = ArrayQueue::new(self.methods.len());

if let Some(src) = src {
uses[src.0] -= 1;
repeat = true;
}
for method in self.methods.values_mut() {
let _ = queue.push(method);
}

false
});
}
thread::scope(|s| {
for _ in 0..threads {
s.spawn(|| {
while let Some(m) = queue.pop() {
m.apply_local_optimizations();
}
});
}
}
});
}
}

Expand Down

0 comments on commit ec8d1f4

Please sign in to comment.