Skip to content

Commit 4b3bf91

Browse files
neildharcortinico
authored andcommittedAug 15, 2024
Recycle block local registers in fast pass (#1448)
Summary: Original Author: [email protected] Original Git: 6b69a06 Original Reviewed By: avp Original Revision: D59072005 The register allocator has the ability to honour a memory limit that is proportional to the product of the number of instructions and basic blocks in the function being allocated. Unfortunately, functions that hit this limit by definition have a lot of instructions Even in the most degenerate case where every block has one instruction, you need 4000 instructions to hit the 10M limit. This diff tries to improve the quality of generated code in cases where most values are used within the basic block they are defined in. In such cases, we currently make the register available after the end of the block. With this diff, the registers become available after their last use in the block. This is useful for functions with extremely large basic blocks, where the current approach would end up allocating a huge number of registers since the registers cannot be used within the same block. Closes #1448 Reviewed By: avp Differential Revision: D60241766 fbshipit-source-id: 5196333862517cd546d675cf8fe005eb1ed5a790
1 parent 1edbe36 commit 4b3bf91

12 files changed

+600
-579
lines changed
 

‎include/hermes/BCGen/RegAlloc.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -384,9 +384,9 @@ class RegisterAllocator {
384384
/// predecessor blocks.
385385
void lowerPhis(ArrayRef<BasicBlock *> order);
386386

387-
/// Allocate the registers for the instructions in the function. The order of
388-
/// the block needs to match the order which we'll use for instruction
389-
/// selection.
387+
/// Allocate the registers for the instructions in the function. The blocks
388+
/// must be in reverse-post-order, and must match the order which we'll use
389+
/// for instruction selection.
390390
void allocate(ArrayRef<BasicBlock *> order);
391391

392392
/// Reserves consecutive registers that will be manually managed by the user.

‎lib/BCGen/RegAlloc.cpp

+60-29
Original file line numberDiff line numberDiff line change
@@ -535,19 +535,6 @@ void RegisterAllocator::coalesce(
535535
}
536536
}
537537

538-
namespace {
539-
/// Determines whether the Instruction is ever used outside its BasicBlock.
540-
bool isBlockLocal(Instruction *inst) {
541-
BasicBlock *parent = inst->getParent();
542-
for (auto user : inst->getUsers()) {
543-
if (parent != user->getParent()) {
544-
return false;
545-
}
546-
}
547-
return true;
548-
}
549-
} // namespace
550-
551538
void RegisterAllocator::allocateFastPass(ArrayRef<BasicBlock *> order) {
552539
// Make sure Phis and related Movs get the same register
553540
for (auto *bb : order) {
@@ -563,26 +550,70 @@ void RegisterAllocator::allocateFastPass(ArrayRef<BasicBlock *> order) {
563550
}
564551
}
565552

566-
llvh::SmallVector<Register, 16> blockLocals;
553+
// Bit vector indicating whether a register with a given index is being used
554+
// as a block local register.
555+
llvh::BitVector blockLocals;
556+
557+
// List of free block local registers. We have to maintain this outside the
558+
// file because we cannot determine interference between local and global
559+
// registers. So we have to ensure that the local registers are only reused
560+
// for other block-local instructions.
561+
llvh::SmallVector<Register, 8> freeBlockLocals;
562+
563+
// A dummy register used for all instructions that have no users.
564+
Register deadReg = file.allocateRegister();
565+
566+
// Iterate in reverse, so we can cheaply determine whether an instruction
567+
// is local, and assign it a register accordingly.
568+
for (auto *bb : llvh::reverse(order)) {
569+
for (auto &inst : llvh::reverse(*bb)) {
570+
if (isAllocated(&inst)) {
571+
// If this is using a local register, we know the register is free after
572+
// we visit the definition.
573+
auto reg = getRegister(&inst);
574+
auto idx = reg.getIndex();
575+
if (idx < blockLocals.size() && blockLocals.test(idx))
576+
freeBlockLocals.push_back(reg);
577+
} else {
578+
// Unallocated instruction means the result is dead, because all users
579+
// are visited first. Allocate a temporary register.
580+
// Note that we cannot assert that the instruction has no users, because
581+
// there may be users in dead blocks.
582+
updateRegister(&inst, deadReg);
583+
}
567584

568-
// Then just allocate the rest sequentially, while optimizing the case
569-
// where an inst is only ever used in its own block.
570-
for (auto *bb : order) {
571-
for (auto &inst : *bb) {
572-
if (!isAllocated(&inst)) {
573-
Register R = file.allocateRegister();
574-
updateRegister(&inst, R);
575-
if (inst.getNumUsers() == 0) {
576-
file.killRegister(R);
577-
} else if (isBlockLocal(&inst)) {
578-
blockLocals.push_back(R);
585+
// Allocate a register to unallocated operands.
586+
for (size_t i = 0, e = inst.getNumOperands(); i < e; ++i) {
587+
auto *op = llvh::dyn_cast<Instruction>(inst.getOperand(i));
588+
589+
// Skip if op is not an instruction or already has a register.
590+
if (!op || isAllocated(op))
591+
continue;
592+
593+
if (op->getParent() != bb) {
594+
// Live across blocks, allocate a global regigster.
595+
updateRegister(op, file.allocateRegister());
596+
continue;
579597
}
598+
599+
// We know this operand is local because:
600+
// 1. The operand is in the same block as this one.
601+
// 2. All blocks dominated by this block have been visited.
602+
// 3. All users must be dominated by their def, since Phis are
603+
// allocated beforehand.
604+
if (!freeBlockLocals.empty()) {
605+
updateRegister(op, freeBlockLocals.pop_back_val());
606+
continue;
607+
}
608+
609+
// No free local register, allocate another one.
610+
Register reg = file.allocateRegister();
611+
if (blockLocals.size() <= reg.getIndex())
612+
blockLocals.resize(reg.getIndex() + 1);
613+
blockLocals.set(reg.getIndex());
614+
updateRegister(op, reg);
580615
}
581616
}
582-
for (auto &reg : blockLocals) {
583-
file.killRegister(reg);
584-
}
585-
blockLocals.clear();
586617
}
587618
}
588619

0 commit comments

Comments
 (0)
Please sign in to comment.