Skip to content

Commit d5b6b52

Browse files
authoredOct 11, 2024··
Merge pull request #47 from GPUEngineering/f/46-givens
Givens rotations and Givens annihilation
2 parents 40b83a1 + eec8641 commit d5b6b52

File tree

4 files changed

+376
-24
lines changed

4 files changed

+376
-24
lines changed
 

‎CHANGELOG.md

+11
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

88

9+
<!-- ---------------------
10+
v1.3.0
11+
--------------------- -->
12+
## v1.3.0 - 11-10-2024
13+
14+
### Added
15+
16+
- Left/right Givens rotations
17+
- `GivensAnnihilator` implemented
18+
19+
920
<!-- ---------------------
1021
v1.2.1
1122
--------------------- -->

‎include/tensor.cuh

+203-6
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ inline std::vector<int> generateIntRandomVector(size_t n, int low, int hi) {
7474
* @param threads_per_block threads per block (defaults to THREADS_PER_BLOCK)
7575
* @return number of blocks
7676
*/
77-
constexpr size_t numBlocks(size_t n, size_t threads_per_block=THREADS_PER_BLOCK) {
77+
constexpr size_t numBlocks(size_t n, size_t threads_per_block = THREADS_PER_BLOCK) {
7878
return (n / threads_per_block + (n % threads_per_block != 0));
7979
}
8080

@@ -390,6 +390,51 @@ public:
390390
*/
391391
T minAbs() const;
392392

393+
/**
394+
* Applied the Givens rotation G(i, j, c, s) on row i and column j,
395+
* with cos θ = c, and sin θ = s. The rotation is applied in-place
396+
* to all slices of the tensor. Recall that the Givens rotation is
397+
*
398+
* i j
399+
* |1 0 ... 0 |
400+
* |0 1 ... 0 |
401+
* | |
402+
* | 1 |
403+
* i | c s |
404+
* G' = ' '
405+
* j | -s c |
406+
* | 1 |
407+
* | |
408+
* | 0 |
409+
*
410+
* The right Givens rotation consists in multiplying from the right
411+
* by G.
412+
*
413+
* Equivalently, the right Givens transformation performs the following
414+
* operation on the i and j columns of this matrix:
415+
*
416+
* A[:, i] <-- c * A[:, i] - s * A[:, j]
417+
* A[:, j] <-- s * A[:, i] + c * A[:, j]
418+
*
419+
* @param i first column index
420+
* @param j second column index
421+
* @param c cos θ
422+
* @param minus_s minus sin θ
423+
* @throws std::invalid_argument if either i or j is greater or equal ncols
424+
*/
425+
void applyRightGivensRotation(size_t i, size_t j, const T *c, const T *minus_s);
426+
427+
/**
428+
* Performs a Givens rotation (left multiplication by G')
429+
*
430+
* @param i first row index
431+
* @param j second row index
432+
* @param c cos θ
433+
* @param minus_s minus sin θ
434+
* @throws std::invalid_argument if either i or j is greater or equal nrows
435+
*/
436+
void applyLeftGivensRotation(size_t i, size_t j, const T *c, const T *minus_s);
437+
393438
/**
394439
* Batch solves `A \ b`.
395440
* Solves `bi <- Ai \ bi` for each k-index `i`.
@@ -449,7 +494,6 @@ public:
449494
}
450495

451496
friend DTensor<T> operator*(T a, DTensor &B) {
452-
size_t nrA = B.m_numRows, ncB = B.m_numCols, nmB = B.m_numMats;
453497
DTensor<T> result(B);
454498
result *= a;
455499
return result;
@@ -471,6 +515,8 @@ DTensor<T> DTensor<T>::createRandomTensor(size_t numRows, size_t numCols, size_t
471515
auto randVec = generateIntRandomVector(numRows * numCols * numMats, low, hi);
472516
DTensor<T> a(randVec, numRows, numCols, numMats);
473517
return a;
518+
} else {
519+
throw std::invalid_argument("[createRandomTensor] unsupported type T");
474520
}
475521
}
476522

@@ -675,6 +721,38 @@ inline double DTensor<double>::minAbs() const {
675721
return std::signbit(hostDst) ? -hostDst : hostDst;
676722
}
677723

724+
template<typename T>
725+
void DTensor<T>::applyRightGivensRotation(size_t i, size_t j, const T *c, const T *minus_s) {
726+
if (m_numMats > 1) throw std::invalid_argument("[applyRightGivensRotation] tensors (nMat>1) not supported");
727+
T *col_i = m_d_data + i * m_numRows;
728+
T *col_j = m_d_data + j * m_numRows;
729+
if constexpr (std::is_same_v<T, double>) {
730+
gpuErrChk(cublasDrot(Session::getInstance().cuBlasHandle(), m_numRows,
731+
col_i, 1, col_j, 1, c, minus_s));
732+
} else if constexpr (std::is_same_v<T, float>) {
733+
gpuErrChk(cublasSrot(Session::getInstance().cuBlasHandle(), m_numRows,
734+
col_i, 1, col_j, 1, c, minus_s));
735+
}
736+
}
737+
738+
template<typename T>
739+
void DTensor<T>::applyLeftGivensRotation(size_t i, size_t j, const T *c, const T *minus_s) {
740+
if (m_numMats > 1) throw std::invalid_argument("[applyLeftGivensRotation] tensors (nMat>1) not supported");
741+
if constexpr (std::is_same_v<T, double>) {
742+
gpuErrChk(cublasDrot(Session::getInstance().cuBlasHandle(), m_numCols,
743+
m_d_data + i, m_numRows,
744+
m_d_data + j, m_numRows,
745+
c, minus_s));
746+
} else if constexpr (std::is_same_v<T, float>) {
747+
gpuErrChk(cublasSrot(Session::getInstance().cuBlasHandle(), m_numCols,
748+
m_d_data + i, m_numRows,
749+
m_d_data + j, m_numRows,
750+
c, minus_s));
751+
} else {
752+
throw std::invalid_argument("[applyLeftGivensRotation] Unsupported type T");
753+
}
754+
}
755+
678756
template<typename T>
679757
inline bool DTensor<T>::allocateOnDevice(size_t size, bool zero) {
680758
if (size <= 0) return false;
@@ -1132,7 +1210,7 @@ public:
11321210
DTensor<T> Si(*m_S, 2, i, i);
11331211
DTensor<unsigned int> rankI(*m_rank, 2, i, i);
11341212
k_countNonzeroSingularValues<T><<<numBlocks(numElS), THREADS_PER_BLOCK>>>(Si.raw(), numElS,
1135-
rankI.raw(), epsilon);
1213+
rankI.raw(), epsilon);
11361214
}
11371215
return *m_rank;
11381216
}
@@ -1380,14 +1458,19 @@ public:
13801458
* @param b provided matrix
13811459
* @return status code of computation
13821460
*/
1383-
int leastSquares(DTensor<T> &);
1461+
int leastSquares(DTensor<T> &b);
13841462

13851463
/**
13861464
* Populate the given tensors with Q and R.
13871465
* Caution! This is an inefficient method: only to be used for debugging.
1388-
* @return resulting Q and R from factorisation
1466+
*
1467+
* @param Q matrix Q (preallocated)
1468+
* @param R matrix R (preallocated)
1469+
* @return status code
1470+
*
1471+
* @throws std::invalid_argument if Q or R have invalid dimensions
13891472
*/
1390-
int getQR(DTensor<T> &, DTensor<T> &);
1473+
int getQR(DTensor<T> &Q, DTensor<T> &R);
13911474

13921475
};
13931476

@@ -1759,4 +1842,118 @@ inline void CholeskyBatchFactoriser<float>::solve(DTensor<float> &b) {
17591842
m_numMats));
17601843
}
17611844

1845+
1846+
1847+
/* ================================================================================================
1848+
* GIVENS ANNIHILATOR
1849+
* ================================================================================================ */
1850+
1851+
/**
1852+
* GivensAnnihilator is used to apply a left Givens rotation that
1853+
* makes a particular element (k, j) of a matrix zero by applying
1854+
* an appropriate Givens rotation G(i, k, c, s).
1855+
*
1856+
* @tparam T data type of tensor (must be float or double)
1857+
*/
1858+
TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX
1859+
class GivensAnnihilator {
1860+
1861+
private:
1862+
DTensor<T> *m_matrix;
1863+
/**
1864+
* Auxiliary memory on the device of length 3 used to store
1865+
* rhypot(xij, xkj), cos θ, and sin θ.
1866+
*/
1867+
std::unique_ptr<DTensor<T>> m_d_rhyp_cos_sin;
1868+
1869+
void init() {
1870+
m_d_rhyp_cos_sin = std::make_unique<DTensor<T>>(3);
1871+
}
1872+
1873+
public:
1874+
1875+
GivensAnnihilator() {
1876+
init();
1877+
}
1878+
1879+
/**
1880+
* Constructor of GivensAnnihilator
1881+
* @param a matrix
1882+
* @throws std::invalid_argument if a.numMats() > 1
1883+
*/
1884+
GivensAnnihilator(DTensor<T> &a) {
1885+
if (a.numMats() > 1) {
1886+
throw std::invalid_argument("[GivensAnnihilator] tensors (numMats > 1) not supported");
1887+
}
1888+
m_matrix = &a;
1889+
init();
1890+
}
1891+
1892+
/**
1893+
* Set the reference to a matrix; this way the current
1894+
* object can be reused
1895+
*
1896+
* @param a
1897+
*/
1898+
void setMatrix(DTensor<T> &a) {
1899+
if (a.numMats() > 1) {
1900+
throw std::invalid_argument("[GivensAnnihilator] tensors (numMats > 1) not supported");
1901+
}
1902+
m_matrix = &a;
1903+
}
1904+
1905+
/**
1906+
* Applies a left Givens rotation G(i, k, c, s) that eliminates
1907+
* the (k, j) element of the given matrix.
1908+
*
1909+
* @param i row index i
1910+
* @param k row index k
1911+
* @param j column index j
1912+
*
1913+
* @throws std::invalid_argument if i, k, or j are out of bounds
1914+
*/
1915+
void annihilate(size_t i, size_t k, size_t j);
1916+
1917+
};
1918+
1919+
TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX
1920+
__global__ void k_givensAnnihilateRHypot(const T *data,
1921+
T *res,
1922+
size_t i, size_t k, size_t j,
1923+
size_t nRows) {
1924+
T xij = data[i + j * nRows];
1925+
T xkj = data[k + j * nRows];
1926+
res[0] = rhypot(xij, xkj);
1927+
res[1] = xij * (*res); // cos
1928+
res[2] = xkj * (*res); // -sin
1929+
}
1930+
1931+
template<typename T>
1932+
inline void GivensAnnihilator<T>::annihilate(size_t i, size_t k, size_t j) {
1933+
/* A few checks */
1934+
size_t nR = m_matrix->numRows(), nC = m_matrix->numCols();
1935+
if (i >= nR or k >= nR) throw std::invalid_argument("[GivensAnnihilator::annihilate] invalid row index");
1936+
if (j >= nC) std::invalid_argument("[GivensAnnihilator::annihilate] invalid column index j");
1937+
1938+
/*
1939+
* Pass cosine and sine as device pointers
1940+
* (Avoid having to download first)
1941+
*/
1942+
gpuErrChk(cublasSetPointerMode(Session::getInstance().cuBlasHandle(), CUBLAS_POINTER_MODE_DEVICE));
1943+
1944+
/* Useful definitions */
1945+
T *aux = m_d_rhyp_cos_sin->raw();
1946+
T *matData = m_matrix->raw();
1947+
1948+
/* Call kernel to determine 1/sqrt(Ai^2 + Ak^2) */
1949+
k_givensAnnihilateRHypot<<<1, 1>>>(m_matrix->raw(), aux, i, k, j, nR);
1950+
1951+
/* Apply Givens rotation */
1952+
m_matrix->applyLeftGivensRotation(i, k, aux + 1, aux + 2);
1953+
1954+
/* Change back to default behaviour */
1955+
gpuErrChk(cublasSetPointerMode(Session::getInstance().cuBlasHandle(), CUBLAS_POINTER_MODE_HOST));
1956+
}
1957+
1958+
17621959
#endif

‎main.cu

+9-9
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,18 @@
1212

1313

1414
int main() {
15-
// cudaStream_t stream1;
16-
// cudaStreamCreate(&stream1);
17-
// cublasSetStream(Session::getInstance().cuBlasHandle(), stream1);
1815

19-
cudaStream_t s1;
20-
cudaStreamCreate(&s1);
16+
size_t m = 10;
17+
size_t n = 6;
18+
std::vector<double> v(m*n);
19+
v.reserve(m*n);
20+
std::iota(v.begin(), v.end(), 1);
21+
DTensor<double> a = DTensor<double>(v, m, n);
2122

22-
auto a = DTensor<float>::createRandomTensor(2000, 200, 1, -2, 2);
23-
Svd svd(a);
24-
svd.factorise();
23+
auto ga = GivensAnnihilator<double>(a);
24+
ga.annihilate(0, 1, 2);
2525

26-
std::cout << svd.singularValues();
26+
std::cout << a;
2727

2828

2929
return 0;

‎test/testTensor.cu

+153-9
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,78 @@ TEST_F(TensorTest, tensorMin) {
391391
tensorMin<double>();
392392
}
393393

394+
/* ---------------------------------------
395+
* Tensor: right Givens rotation
396+
* --------------------------------------- */
397+
398+
TEMPLATE_WITH_TYPE_T
399+
void tensorRightGivens(T epsilon) {
400+
// Construct matrix A
401+
size_t m = 10;
402+
size_t n = 6;
403+
std::vector<T> v(m * n);
404+
v.reserve(m * n);
405+
std::iota(v.begin(), v.end(), 1);
406+
auto a = DTensor<T>(v, m, n, 1);
407+
408+
// Apply right Givens rotation G
409+
size_t i_givens = 1, j_givens = 4;
410+
T c = 0.1;
411+
T minus_s = sqrt(1 - c * c);
412+
a.applyRightGivensRotation(i_givens, j_givens, &c, &minus_s);
413+
414+
// Check the result
415+
for (size_t i = 0; i < m; i++) {
416+
EXPECT_NEAR(1 + i, a(i, 0), epsilon);
417+
EXPECT_NEAR(21 + i, a(i, 2), epsilon);
418+
EXPECT_NEAR(31 + i, a(i, 3), epsilon);
419+
EXPECT_NEAR((11 + i) * c - (41 + i) * (-minus_s), a(i, i_givens), epsilon);
420+
EXPECT_NEAR((11 + i) * (-minus_s) + (41 + i) * c, a(i, j_givens), epsilon);
421+
}
422+
}
423+
424+
TEST_F(TensorTest, tensorRightGivens) {
425+
tensorRightGivens<float>(PRECISION_LOW);
426+
tensorRightGivens<double>(PRECISION_HIGH);
427+
}
428+
429+
/* ---------------------------------------
430+
* Tensor: left Givens rotation
431+
* --------------------------------------- */
432+
433+
TEMPLATE_WITH_TYPE_T
434+
void tensorLeftGivens(T epsilon) {
435+
// Construct matrix A
436+
size_t m = 10;
437+
size_t n = 6;
438+
std::vector<double> v(m * n);
439+
v.reserve(m * n);
440+
std::iota(v.begin(), v.end(), 1);
441+
auto a = DTensor<double>(v, m, n, 1);
442+
443+
// Apply right Givens rotation G
444+
size_t i_givens = 1, j_givens = 9;
445+
double c = 0.1;
446+
double minus_s = -sqrt(1 - c * c);
447+
a.applyLeftGivensRotation(i_givens, j_givens, &c, &minus_s);
448+
449+
450+
// Check the result
451+
for (size_t j = 0; j < n; j++) {
452+
EXPECT_NEAR(1 + 10 * j, a(0, j), epsilon);
453+
for (size_t i = 2; i < m - 1; i++) {
454+
EXPECT_NEAR(1 + i + 10 * j, a(i, j), epsilon);
455+
}
456+
EXPECT_NEAR((2 + 10 * j) * c + (10 + 10 * j) * minus_s, a(i_givens, j), epsilon);
457+
EXPECT_NEAR((2 + 10 * j) * (-minus_s) + (10 + 10 * j) * c, a(j_givens, j), epsilon);
458+
}
459+
}
460+
461+
TEST_F(TensorTest, tensorLeftGivens) {
462+
tensorLeftGivens<float>(1e-10);
463+
tensorLeftGivens<double>(1e-14);
464+
}
465+
394466
/* ---------------------------------------
395467
* Tensor operator() to access element
396468
* e.g., t(2, 3, 4)
@@ -1119,14 +1191,14 @@ void qrLeastSquares(T epsilon) {
11191191
size_t nR = 4;
11201192
size_t nC = 3;
11211193
DTensor<T> temp(nR, nC);
1122-
std::vector<T> vecA = { 85.5638, -59.4001, -80.1992,
1123-
99.9464, 5.51393, 5.17935,
1124-
6.87488, -26.7536, 36.0914,
1125-
-44.3857, -32.1268, 54.8915 }; // Random matrix
1126-
std::vector<T> vecB = { -23.3585,
1127-
-48.5744,
1128-
43.4229,
1129-
-56.5081 }; // Random vector
1194+
std::vector<T> vecA = {85.5638, -59.4001, -80.1992,
1195+
99.9464, 5.51393, 5.17935,
1196+
6.87488, -26.7536, 36.0914,
1197+
-44.3857, -32.1268, 54.8915}; // Random matrix
1198+
std::vector<T> vecB = {-23.3585,
1199+
-48.5744,
1200+
43.4229,
1201+
-56.5081}; // Random vector
11301202
DTensor<T> A(vecA, nR, nC, 1, rowMajor);
11311203
DTensor<T> b(vecB, nR);
11321204
DTensor<T> xFull(nR);
@@ -1265,4 +1337,76 @@ void projectOnNullspaceTensor(T epsilon) {
12651337
TEST_F(NullspaceTest, projectOnNullspaceTensor) {
12661338
projectOnNullspaceTensor<float>(PRECISION_LOW);
12671339
projectOnNullspaceTensor<double>(PRECISION_HIGH);
1268-
}
1340+
}
1341+
1342+
1343+
/* ================================================================================================
1344+
* GIVENSANNIHILATOR TESTS
1345+
* ================================================================================================ */
1346+
class GivensAnnihilatorTest : public testing::Test {
1347+
protected:
1348+
GivensAnnihilatorTest() {}
1349+
1350+
virtual ~GivensAnnihilatorTest() {}
1351+
};
1352+
1353+
1354+
/* ---------------------------------------
1355+
* GivensAnnihilator works
1356+
* --------------------------------------- */
1357+
1358+
TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX
1359+
void givensAnnihilateElement(T epsilon) {
1360+
size_t m = 10;
1361+
size_t n = 6;
1362+
std::vector<T> v(m * n);
1363+
v.reserve(m * n);
1364+
std::iota(v.begin(), v.end(), 1);
1365+
1366+
auto a = DTensor<T>(v, m, n, 1);
1367+
auto ga = GivensAnnihilator<T>(a);
1368+
size_t i = 0;
1369+
for (size_t k = 1; k < m; k++) {
1370+
for (size_t j = 0; j < n; j++) {
1371+
ga.annihilate(i, k, j);
1372+
EXPECT_NEAR(0.0, a(k, j), epsilon);
1373+
}
1374+
}
1375+
}
1376+
1377+
TEST_F(GivensAnnihilatorTest, givensAnnihilateElement) {
1378+
givensAnnihilateElement<float>(PRECISION_LOW);
1379+
givensAnnihilateElement<double>(PRECISION_HIGH);
1380+
}
1381+
1382+
1383+
1384+
/* ---------------------------------------
1385+
* GivensAnnihilator: correctness
1386+
* --------------------------------------- */
1387+
1388+
TEMPLATE_WITH_TYPE_T TEMPLATE_CONSTRAINT_REQUIRES_FPX
1389+
void givensAnnihilateCorrectness(T epsilon) {
1390+
size_t m = 10, n = 6;
1391+
std::vector<double> v(m * n);
1392+
v.reserve(m * n);
1393+
std::iota(v.begin(), v.end(), 1);
1394+
DTensor<double> a = DTensor<double>(v, m, n);
1395+
1396+
auto ga = GivensAnnihilator<double>(a);
1397+
ga.annihilate(0, 1, 2);
1398+
1399+
EXPECT_NEAR(0.0, a(1, 2), epsilon);
1400+
EXPECT_NEAR(2.137186834969645, a(0, 0), epsilon);
1401+
EXPECT_NEAR(44.552125559751822, a(0, 3), epsilon);
1402+
EXPECT_NEAR(-0.328797974610715, a(1, 3), epsilon);
1403+
1404+
}
1405+
1406+
TEST_F(GivensAnnihilatorTest, givensAnnihilateCorrectness) {
1407+
givensAnnihilateCorrectness<double>(1e-14);
1408+
givensAnnihilateCorrectness<float>(1e-12);
1409+
}
1410+
1411+
1412+

0 commit comments

Comments
 (0)
Please sign in to comment.