Skip to content

Commit 22fa83d

Browse files
[LV][VPlan] Add initial support for CSA vectorization
This patch adds initial support for CSA vectorization LLVM. This new class can be characterized by vectorization of assignment to a scalar in a loop, such that the assignment is conditional from the perspective of its use. An assignment is conditional in a loop if a value may or may not be assigned in the loop body. For example: ``` int t = init_val; for (int i = 0; i < N; i++) { if (cond[i]) t = a[i]; } s = t; // use t ``` Using pseudo-LLVM code this can be vectorized as ``` vector.ph: ... %t = %init_val %init.mask = <all-false-vec> %init.data = <poison-vec> ; uninitialized vector.body: ... %mask.phi = phi [%init.mask, %vector.ph], [%new.mask, %vector.body] %data.phi = phi [%data.mask, %vector.ph], [%new.mask, %vector.body] %cond.vec = <widened-cmp> ... %a.vec = <widened-load> %a, %i %b = <any-lane-active> %cond.vec %new.mask = select %b, %cond.vec, %mask.phi %new.data = select %b, %a.vec, %data.phi ... middle.block: %s = <extract-last-active-lane> %new.mask, %new.data ``` On each iteration, we track whether any lane in the widened condition was active, and if it was take the current mask and data as the new mask and data vector. Then at the end of the loop, the scalar can be extracted only once. This transformation works the same way for integer, pointer, and floating point conditional assignment, since the transformation does not require inspection of the data being assigned. In the vectorization of a CSA, we will be introducing recipes into the vector preheader, the vector body, and the middle block. Recipes that are introduced into the preheader and middle block are executed only one time, and recipes that are in the vector body will be possibly executed multiple times. The more times that the vector body is executed, the less of an impact the preheader and middle block cost have on the overall cost of a CSA. A detailed explanation of the concept can be found [here](https://discourse.llvm.org/t/vectorization-of-conditional-scalar-assignment-csa/80964). This patch is further tested in llvm/llvm-test-suite#155. This patch contains only the non-EVL related code. The is based on the larger patch of llvm#106560, which contained both EVL and non-EVL related parts.
1 parent 7c165f7 commit 22fa83d

19 files changed

+3907
-21
lines changed

llvm/include/llvm/Analysis/IVDescriptors.h

+65-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
//
77
//===----------------------------------------------------------------------===//
88
//
9-
// This file "describes" induction and recurrence variables.
9+
// This file "describes" induction, recurrence, and conditional scalar
10+
// assignment variables.
1011
//
1112
//===----------------------------------------------------------------------===//
1213

@@ -423,6 +424,69 @@ class InductionDescriptor {
423424
SmallVector<Instruction *, 2> RedundantCasts;
424425
};
425426

427+
/// A Conditional Scalar Assignment is an assignment from an initial
428+
/// scalar that may or may not occur.
429+
class ConditionalScalarAssignmentDescriptor {
430+
/// If the conditional assignment occurs inside a loop, then Phi chooses
431+
/// the value of the assignment from the entry block or the loop body block.
432+
PHINode *Phi = nullptr;
433+
434+
/// The initial value of the ConditionalScalarAssignment. If the condition
435+
/// guarding the assignment is not met, then the assignment retains this
436+
/// value.
437+
Value *InitScalar = nullptr;
438+
439+
/// The Instruction that conditionally assigned to inside the loop.
440+
SelectInst *Assignment = nullptr;
441+
442+
/// Create a ConditionalScalarAssignmentDescriptor that models a valid
443+
/// conditional scalar assignment with its members initialized correctly.
444+
ConditionalScalarAssignmentDescriptor(PHINode *Phi, SelectInst *Assignment,
445+
Value *InitScalar)
446+
: Phi(Phi), InitScalar(InitScalar), Assignment(Assignment) {}
447+
448+
public:
449+
/// Create a ConditionalScalarAssignmentDescriptor that models an invalid
450+
/// ConditionalScalarAssignment.
451+
ConditionalScalarAssignmentDescriptor() = default;
452+
453+
/// If Phi is the root of a ConditionalScalarAssignment, set
454+
/// ConditionalScalarAssignmentDesc as the ConditionalScalarAssignment rooted
455+
/// by Phi. Otherwise, return a false, leaving ConditionalScalarAssignmentDesc
456+
/// unmodified.
457+
static bool
458+
isConditionalScalarAssignmentPhi(PHINode *Phi, Loop *TheLoop,
459+
ConditionalScalarAssignmentDescriptor &Desc);
460+
461+
operator bool() const { return isValid(); }
462+
463+
/// Returns whether SI is the Assignment in ConditionalScalarAssignment
464+
static bool isConditionalScalarAssignmentSelect(
465+
ConditionalScalarAssignmentDescriptor Desc, SelectInst *SI) {
466+
return Desc.getAssignment() == SI;
467+
}
468+
469+
/// Return whether this ConditionalScalarAssignmentDescriptor models a valid
470+
/// ConditionalScalarAssignment.
471+
bool isValid() const { return Phi && InitScalar && Assignment; }
472+
473+
/// Return the PHI that roots this ConditionalScalarAssignment.
474+
PHINode *getPhi() const { return Phi; }
475+
476+
/// Return the initial value of the ConditionalScalarAssignment. This is the
477+
/// value if the conditional assignment does not occur.
478+
Value *getInitScalar() const { return InitScalar; }
479+
480+
/// The Instruction that is used after the loop
481+
SelectInst *getAssignment() const { return Assignment; }
482+
483+
/// Return the condition that this ConditionalScalarAssignment is conditional
484+
/// upon.
485+
Value *getCond() const {
486+
return Assignment ? Assignment->getCondition() : nullptr;
487+
}
488+
};
489+
426490
} // end namespace llvm
427491

428492
#endif // LLVM_ANALYSIS_IVDESCRIPTORS_H

llvm/include/llvm/Analysis/TargetTransformInfo.h

+9
Original file line numberDiff line numberDiff line change
@@ -1854,6 +1854,10 @@ class TargetTransformInfo {
18541854
: EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {}
18551855
};
18561856

1857+
/// \returns true if the loop vectorizer should vectorize conditional
1858+
/// scalar assignments for the target.
1859+
bool enableConditionalScalarAssignmentVectorization() const;
1860+
18571861
/// \returns How the target needs this vector-predicated operation to be
18581862
/// transformed.
18591863
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
@@ -2309,6 +2313,7 @@ class TargetTransformInfo::Concept {
23092313
SmallVectorImpl<Use *> &OpsToSink) const = 0;
23102314

23112315
virtual bool isVectorShiftByScalarCheap(Type *Ty) const = 0;
2316+
virtual bool enableConditionalScalarAssignmentVectorization() const = 0;
23122317
virtual VPLegalization
23132318
getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
23142319
virtual bool hasArmWideBranch(bool Thumb) const = 0;
@@ -3135,6 +3140,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
31353140
return Impl.isVectorShiftByScalarCheap(Ty);
31363141
}
31373142

3143+
bool enableConditionalScalarAssignmentVectorization() const override {
3144+
return Impl.enableConditionalScalarAssignmentVectorization();
3145+
}
3146+
31383147
VPLegalization
31393148
getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
31403149
return Impl.getVPLegalizationStrategy(PI);

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

+2
Original file line numberDiff line numberDiff line change
@@ -1030,6 +1030,8 @@ class TargetTransformInfoImplBase {
10301030

10311031
bool isVectorShiftByScalarCheap(Type *Ty) const { return false; }
10321032

1033+
bool enableConditionalScalarAssignmentVectorization() const { return false; }
1034+
10331035
TargetTransformInfo::VPLegalization
10341036
getVPLegalizationStrategy(const VPIntrinsic &PI) const {
10351037
return TargetTransformInfo::VPLegalization(

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

+27
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,12 @@ class LoopVectorizationLegality {
269269
/// induction descriptor.
270270
using InductionList = MapVector<PHINode *, InductionDescriptor>;
271271

272+
/// ConditionalScalarAssignmentList contains the
273+
/// ConditionalScalarAssignmentDescriptors for all the conditional scalar
274+
/// assignments that were found in the loop, rooted by their phis.
275+
using ConditionalScalarAssignmentList =
276+
MapVector<PHINode *, ConditionalScalarAssignmentDescriptor>;
277+
272278
/// RecurrenceSet contains the phi nodes that are recurrences other than
273279
/// inductions and reductions.
274280
using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
@@ -321,6 +327,18 @@ class LoopVectorizationLegality {
321327
/// Returns True if V is a Phi node of an induction variable in this loop.
322328
bool isInductionPhi(const Value *V) const;
323329

330+
/// Returns the conditional scalar assignments found in the loop.
331+
const ConditionalScalarAssignmentList &
332+
getConditionalScalarAssignments() const {
333+
return ConditionalScalarAssignments;
334+
}
335+
336+
/// Returns true if Phi is the root of a conditional scalar assignments in the
337+
/// loop.
338+
bool isConditionalScalarAssignmentPhi(PHINode *Phi) const {
339+
return ConditionalScalarAssignments.count(Phi) != 0;
340+
}
341+
324342
/// Returns a pointer to the induction descriptor, if \p Phi is an integer or
325343
/// floating point induction.
326344
const InductionDescriptor *getIntOrFpInductionDescriptor(PHINode *Phi) const;
@@ -554,6 +572,12 @@ class LoopVectorizationLegality {
554572
void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
555573
SmallPtrSetImpl<Value *> &AllowedExit);
556574

575+
/// Updates the vetorization state by adding \p Phi to the
576+
/// ConditionalScalarAssignment list.
577+
void addConditionalScalarAssignmentPhi(
578+
PHINode *Phi, const ConditionalScalarAssignmentDescriptor &Desc,
579+
SmallPtrSetImpl<Value *> &AllowedExit);
580+
557581
/// The loop that we evaluate.
558582
Loop *TheLoop;
559583

@@ -598,6 +622,9 @@ class LoopVectorizationLegality {
598622
/// variables can be pointers.
599623
InductionList Inductions;
600624

625+
/// Holds the conditional scalar assignments
626+
ConditionalScalarAssignmentList ConditionalScalarAssignments;
627+
601628
/// Holds all the casts that participate in the update chain of the induction
602629
/// variables, and that have been proven to be redundant (possibly under a
603630
/// runtime guard). These casts can be ignored when creating the vectorized

llvm/lib/Analysis/IVDescriptors.cpp

+59-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
//
77
//===----------------------------------------------------------------------===//
88
//
9-
// This file "describes" induction and recurrence variables.
9+
// This file "describes" induction, recurrence, and conditional scalar
10+
// assignment variables.
1011
//
1112
//===----------------------------------------------------------------------===//
1213

@@ -1570,3 +1571,60 @@ bool InductionDescriptor::isInductionPHI(
15701571
D = InductionDescriptor(StartValue, IK_PtrInduction, Step);
15711572
return true;
15721573
}
1574+
1575+
/// Return ConditionalScalarAssignmentDescriptor that describes a
1576+
/// ConditionalScalarAssignment that matches one of these patterns:
1577+
/// phi loop_inv, (select cmp, value, phi)
1578+
/// phi loop_inv, (select cmp, phi, value)
1579+
/// phi (select cmp, value, phi), loop_inv
1580+
/// phi (select cmp, phi, value), loop_inv
1581+
/// If the ConditionalScalarAssignment does not match any of these paterns,
1582+
/// return a ConditionalScalarAssignmentDescriptor that describes an
1583+
/// InvalidConditionalScalarAssignment.
1584+
bool ConditionalScalarAssignmentDescriptor::isConditionalScalarAssignmentPhi(
1585+
PHINode *Phi, Loop *TheLoop, ConditionalScalarAssignmentDescriptor &Desc) {
1586+
1587+
// Must be a scalar.
1588+
Type *Type = Phi->getType();
1589+
if (!Type->isIntegerTy() && !Type->isFloatingPointTy() &&
1590+
!Type->isPointerTy())
1591+
return false;
1592+
1593+
// Match phi loop_inv, (select cmp, value, phi)
1594+
// or phi loop_inv, (select cmp, phi, value)
1595+
// or phi (select cmp, value, phi), loop_inv
1596+
// or phi (select cmp, phi, value), loop_inv
1597+
if (Phi->getNumIncomingValues() != 2)
1598+
return false;
1599+
auto SelectInstIt = find_if(Phi->incoming_values(), [&Phi](const Use &U) {
1600+
return match(U.get(), m_Select(m_Value(), m_Specific(Phi), m_Value())) ||
1601+
match(U.get(), m_Select(m_Value(), m_Value(), m_Specific(Phi)));
1602+
});
1603+
if (SelectInstIt == Phi->incoming_values().end())
1604+
return false;
1605+
auto LoopInvIt = find_if(Phi->incoming_values(), [&](Use &U) {
1606+
return U.get() != *SelectInstIt && TheLoop->isLoopInvariant(U.get());
1607+
});
1608+
if (LoopInvIt == Phi->incoming_values().end())
1609+
return false;
1610+
1611+
// Phi or Sel must be used only outside the loop,
1612+
// excluding if Phi use Sel or Sel use Phi
1613+
auto IsOnlyUsedOutsideLoop = [&](Value *V, Value *Ignore) {
1614+
return all_of(V->users(), [Ignore, TheLoop](User *U) {
1615+
if (U == Ignore)
1616+
return true;
1617+
if (auto *I = dyn_cast<Instruction>(U))
1618+
return !TheLoop->contains(I);
1619+
return true;
1620+
});
1621+
};
1622+
SelectInst *Select = cast<SelectInst>(SelectInstIt->get());
1623+
Value *LoopInv = LoopInvIt->get();
1624+
if (!IsOnlyUsedOutsideLoop(Phi, Select) ||
1625+
!IsOnlyUsedOutsideLoop(Select, Phi))
1626+
return false;
1627+
1628+
Desc = ConditionalScalarAssignmentDescriptor(Phi, Select, LoopInv);
1629+
return true;
1630+
}

llvm/lib/Analysis/TargetTransformInfo.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -1374,6 +1374,11 @@ bool TargetTransformInfo::preferEpilogueVectorization() const {
13741374
return TTIImpl->preferEpilogueVectorization();
13751375
}
13761376

1377+
bool TargetTransformInfo::enableConditionalScalarAssignmentVectorization()
1378+
const {
1379+
return TTIImpl->enableConditionalScalarAssignmentVectorization();
1380+
}
1381+
13771382
TargetTransformInfo::VPLegalization
13781383
TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
13791384
return TTIImpl->getVPLegalizationStrategy(VPI);

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -2433,6 +2433,11 @@ bool RISCVTTIImpl::isLegalMaskedExpandLoad(Type *DataTy, Align Alignment) {
24332433
return true;
24342434
}
24352435

2436+
bool RISCVTTIImpl::enableConditionalScalarAssignmentVectorization() const {
2437+
return ST->hasVInstructions() &&
2438+
ST->getProcFamily() == RISCVSubtarget::SiFive7;
2439+
}
2440+
24362441
bool RISCVTTIImpl::isLegalMaskedCompressStore(Type *DataTy, Align Alignment) {
24372442
auto *VTy = dyn_cast<VectorType>(DataTy);
24382443
if (!VTy || VTy->isScalableTy())

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

+4
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,10 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
306306
return TLI->isVScaleKnownToBeAPowerOfTwo();
307307
}
308308

309+
/// \returns true if the loop vectorizer should vectorize conditional
310+
/// scalar assignments for the target.
311+
bool enableConditionalScalarAssignmentVectorization() const;
312+
309313
/// \returns How the target needs this vector-predicated operation to be
310314
/// transformed.
311315
TargetTransformInfo::VPLegalization

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

+37-4
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ static cl::opt<bool> EnableHistogramVectorization(
8383
"enable-histogram-loop-vectorization", cl::init(false), cl::Hidden,
8484
cl::desc("Enables autovectorization of some loops containing histograms"));
8585

86+
static cl::opt<bool> EnableConditionalScalarAssignment(
87+
"enable-csa-vectorization", cl::init(false), cl::Hidden,
88+
cl::desc("Control whether loop vectorization is enabled"));
89+
8690
/// Maximum vectorization interleave count.
8791
static const unsigned MaxInterleaveFactor = 16;
8892

@@ -749,6 +753,18 @@ bool LoopVectorizationLegality::setupOuterLoopInductions() {
749753
return llvm::all_of(Header->phis(), IsSupportedPhi);
750754
}
751755

756+
void LoopVectorizationLegality::addConditionalScalarAssignmentPhi(
757+
PHINode *Phi, const ConditionalScalarAssignmentDescriptor &Desc,
758+
SmallPtrSetImpl<Value *> &AllowedExit) {
759+
assert(Desc.isValid() &&
760+
"Expected Valid ConditionalScalarAssignmentDescriptor");
761+
LLVM_DEBUG(
762+
dbgs() << "LV: found legal conditional scalar assignment opportunity"
763+
<< *Phi << "\n");
764+
AllowedExit.insert(Phi);
765+
ConditionalScalarAssignments.insert({Phi, Desc});
766+
}
767+
752768
/// Checks if a function is scalarizable according to the TLI, in
753769
/// the sense that it should be vectorized and then expanded in
754770
/// multiple scalar calls. This is represented in the
@@ -878,14 +894,27 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
878894
continue;
879895
}
880896

881-
// As a last resort, coerce the PHI to a AddRec expression
882-
// and re-try classifying it a an induction PHI.
897+
// Try to coerce the PHI to a AddRec expression and re-try classifying
898+
// it a an induction PHI.
883899
if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
884900
!IsDisallowedStridedPointerInduction(ID)) {
885901
addInductionPhi(Phi, ID, AllowedExit);
886902
continue;
887903
}
888904

905+
// Check if the PHI can be classified as a conditional scalar assignment
906+
// PHI.
907+
if (EnableConditionalScalarAssignment ||
908+
(TTI->enableConditionalScalarAssignmentVectorization() &&
909+
EnableConditionalScalarAssignment.getNumOccurrences() == 0)) {
910+
ConditionalScalarAssignmentDescriptor Desc;
911+
if (ConditionalScalarAssignmentDescriptor::
912+
isConditionalScalarAssignmentPhi(Phi, TheLoop, Desc)) {
913+
addConditionalScalarAssignmentPhi(Phi, Desc, AllowedExit);
914+
continue;
915+
}
916+
}
917+
889918
reportVectorizationFailure("Found an unidentified PHI",
890919
"value that could not be identified as "
891920
"reduction is used outside the loop",
@@ -1883,11 +1912,15 @@ bool LoopVectorizationLegality::canFoldTailByMasking() const {
18831912
for (const auto &Reduction : getReductionVars())
18841913
ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
18851914

1915+
SmallPtrSet<const Value *, 8> CSALiveOuts;
1916+
for (const auto &CSA : getConditionalScalarAssignments())
1917+
CSALiveOuts.insert(CSA.second.getAssignment());
1918+
18861919
// TODO: handle non-reduction outside users when tail is folded by masking.
18871920
for (auto *AE : AllowedExit) {
18881921
// Check that all users of allowed exit values are inside the loop or
1889-
// are the live-out of a reduction.
1890-
if (ReductionLiveOuts.count(AE))
1922+
// are the live-out of a reduction or conditional scalar assignment.
1923+
if (ReductionLiveOuts.count(AE) || CSALiveOuts.count(AE))
18911924
continue;
18921925
for (User *U : AE->users()) {
18931926
Instruction *UI = cast<Instruction>(U);

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

+22-2
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,8 @@ class VPBuilder {
174174
new VPInstruction(Opcode, Operands, WrapFlags, DL, Name));
175175
}
176176

177-
VPValue *createNot(VPValue *Operand, DebugLoc DL = {},
178-
const Twine &Name = "") {
177+
VPInstruction *createNot(VPValue *Operand, DebugLoc DL = {},
178+
const Twine &Name = "") {
179179
return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
180180
}
181181

@@ -261,6 +261,26 @@ class VPBuilder {
261261
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags()));
262262
}
263263

264+
VPInstruction *createConditionalScalarAssignmentMaskPhi(VPValue *InitMask,
265+
DebugLoc DL,
266+
const Twine &Name) {
267+
return createInstruction(VPInstruction::ConditionalScalarAssignmentMaskPhi,
268+
{InitMask}, DL, Name);
269+
}
270+
271+
VPInstruction *createAnyOf(VPValue *Cond, DebugLoc DL, const Twine &Name) {
272+
return createInstruction(VPInstruction::AnyOf, {Cond}, DL, Name);
273+
}
274+
275+
VPInstruction *createConditionalScalarAssignmentMaskSel(VPValue *Cond,
276+
VPValue *MaskPhi,
277+
VPValue *AnyOf,
278+
DebugLoc DL,
279+
const Twine &Name) {
280+
return createInstruction(VPInstruction::ConditionalScalarAssignmentMaskSel,
281+
{Cond, MaskPhi, AnyOf}, DL, Name);
282+
}
283+
264284
//===--------------------------------------------------------------------===//
265285
// RAII helpers.
266286
//===--------------------------------------------------------------------===//

0 commit comments

Comments
 (0)