-
Notifications
You must be signed in to change notification settings - Fork 64
/
Copy pathmatrix3x4SIMD.h
263 lines (210 loc) · 7.26 KB
/
matrix3x4SIMD.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
// This file is part of the "Nabla Engine".
// For conditions of distribution and use, see copyright notice in nabla.h
#ifndef __NBL_MATRIX3X4SIMD_H_INCLUDED__
#define __NBL_MATRIX3X4SIMD_H_INCLUDED__
#include "vectorSIMD.h"
#include "quaternion.h"
namespace nbl::core
{
class matrix4x3;
#define _NBL_MATRIX_ALIGNMENT _NBL_SIMD_ALIGNMENT
static_assert(_NBL_MATRIX_ALIGNMENT>=_NBL_VECTOR_ALIGNMENT,"Matrix must be equally or more aligned than vector!");
//! Equivalent of GLSL's mat4x3
class matrix3x4SIMD// : private AllocationOverrideBase<_NBL_MATRIX_ALIGNMENT> EBO inheritance problem w.r.t `rows[3]`
{
public:
_NBL_STATIC_INLINE_CONSTEXPR uint32_t VectorCount = 3u;
vectorSIMDf rows[VectorCount];
explicit matrix3x4SIMD( const vectorSIMDf& _r0 = vectorSIMDf(1.f, 0.f, 0.f, 0.f),
const vectorSIMDf& _r1 = vectorSIMDf(0.f, 1.f, 0.f, 0.f),
const vectorSIMDf& _r2 = vectorSIMDf(0.f, 0.f, 1.f, 0.f)) : rows{_r0, _r1, _r2}
{
}
matrix3x4SIMD( float _a00, float _a01, float _a02, float _a03,
float _a10, float _a11, float _a12, float _a13,
float _a20, float _a21, float _a22, float _a23)
: matrix3x4SIMD(vectorSIMDf(_a00, _a01, _a02, _a03),
vectorSIMDf(_a10, _a11, _a12, _a13),
vectorSIMDf(_a20, _a21, _a22, _a23))
{
}
explicit matrix3x4SIMD(const float* const _data)
{
if (!_data)
return;
for (size_t i = 0u; i < VectorCount; ++i)
rows[i] = vectorSIMDf(_data + 4*i);
}
matrix3x4SIMD(const float* const _data, bool ALIGNED)
{
if (!_data)
return;
for (size_t i = 0u; i < VectorCount; ++i)
rows[i] = vectorSIMDf(_data + 4*i, ALIGNED);
}
float* pointer() { return rows[0].pointer; }
const float* pointer() const { return rows[0].pointer; }
inline matrix3x4SIMD& set(const matrix4x3& _retarded);
inline matrix4x3 getAsRetardedIrrlichtMatrix() const;
static inline matrix3x4SIMD concatenateBFollowedByA(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b);
static inline matrix3x4SIMD concatenateBFollowedByAPrecisely(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b);
inline matrix3x4SIMD& concatenateAfter(const matrix3x4SIMD& _other)
{
return *this = concatenateBFollowedByA(*this, _other);
}
inline matrix3x4SIMD& concatenateBefore(const matrix3x4SIMD& _other)
{
return *this = concatenateBFollowedByA(_other, *this);
}
inline matrix3x4SIMD& concatenateAfterPrecisely(const matrix3x4SIMD& _other)
{
return *this = concatenateBFollowedByAPrecisely(*this, _other);
}
inline matrix3x4SIMD& concatenateBeforePrecisely(const matrix3x4SIMD& _other)
{
return *this = concatenateBFollowedByAPrecisely(_other, *this);
}
inline bool operator==(const matrix3x4SIMD& _other)
{
return !(*this != _other);
}
inline bool operator!=(const matrix3x4SIMD& _other);
inline matrix3x4SIMD operator-() const
{
matrix3x4SIMD retval;
retval.rows[0] = -rows[0];
retval.rows[1] = -rows[1];
retval.rows[2] = -rows[2];
return retval;
}
inline matrix3x4SIMD& operator+=(const matrix3x4SIMD& _other);
inline matrix3x4SIMD operator+(const matrix3x4SIMD& _other) const
{
matrix3x4SIMD retval(*this);
return retval += _other;
}
inline matrix3x4SIMD& operator-=(const matrix3x4SIMD& _other);
inline matrix3x4SIMD operator-(const matrix3x4SIMD& _other) const
{
matrix3x4SIMD retval(*this);
return retval -= _other;
}
inline matrix3x4SIMD& operator*=(float _scalar);
inline matrix3x4SIMD operator*(float _scalar) const
{
matrix3x4SIMD retval(*this);
return retval *= _scalar;
}
inline matrix3x4SIMD& setTranslation(const vectorSIMDf& _translation)
{
// no faster way of doing it?
rows[0].w = _translation.x;
rows[1].w = _translation.y;
rows[2].w = _translation.z;
return *this;
}
inline vectorSIMDf getTranslation() const;
inline vectorSIMDf getTranslation3D() const;
inline matrix3x4SIMD& setScale(const vectorSIMDf& _scale);
inline vectorSIMDf getScale() const;
inline void transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const;
inline void transformVect(vectorSIMDf& _in_out) const
{
transformVect(_in_out, _in_out);
}
inline void pseudoMulWith4x1(vectorSIMDf& _out, const vectorSIMDf& _in) const;
inline void pseudoMulWith4x1(vectorSIMDf& _in_out) const
{
pseudoMulWith4x1(_in_out,_in_out);
}
inline void mulSub3x3WithNx1(vectorSIMDf& _out, const vectorSIMDf& _in) const;
inline void mulSub3x3WithNx1(vectorSIMDf& _in_out) const
{
mulSub3x3WithNx1(_in_out, _in_out);
}
inline static matrix3x4SIMD buildCameraLookAtMatrixLH(
const vectorSIMDf& position,
const vectorSIMDf& target,
const vectorSIMDf& upVector);
inline static matrix3x4SIMD buildCameraLookAtMatrixRH(
const vectorSIMDf& position,
const vectorSIMDf& target,
const vectorSIMDf& upVector);
inline matrix3x4SIMD& setRotation(const quaternion& _quat);
inline matrix3x4SIMD& setScaleRotationAndTranslation( const vectorSIMDf& _scale,
const quaternion& _quat,
const vectorSIMDf& _translation);
inline vectorSIMDf getPseudoDeterminant() const
{
vectorSIMDf tmp;
return determinant_helper(tmp);
}
inline bool getInverse(matrix3x4SIMD& _out) const;
bool makeInverse()
{
matrix3x4SIMD tmp;
if (getInverse(tmp))
{
*this = tmp;
return true;
}
return false;
}
//
inline bool getSub3x3InverseTranspose(matrix3x4SIMD& _out) const;
//
inline bool getSub3x3InverseTransposePacked(float outRows[9]) const
{
matrix3x4SIMD tmp;
if (!getSub3x3InverseTranspose(tmp))
return false;
float* _out = outRows;
for (auto i=0; i<3; i++)
{
const auto& row = tmp.rows[i];
for (auto j=0; j<3; j++)
*(_out++) = row[j];
}
return true;
}
//
inline core::matrix3x4SIMD getSub3x3TransposeCofactors() const;
//
inline void setTransformationCenter(const vectorSIMDf& _center, const vectorSIMDf& _translation);
//
static inline matrix3x4SIMD buildAxisAlignedBillboard(
const vectorSIMDf& camPos,
const vectorSIMDf& center,
const vectorSIMDf& translation,
const vectorSIMDf& axis,
const vectorSIMDf& from);
//
float& operator()(size_t _i, size_t _j) { return rows[_i].pointer[_j]; }
const float& operator()(size_t _i, size_t _j) const { return rows[_i].pointer[_j]; }
//
inline const vectorSIMDf& operator[](size_t _rown) const { return rows[_rown]; }
inline vectorSIMDf& operator[](size_t _rown) { return rows[_rown]; }
private:
static inline vectorSIMDf doJob(const __m128& a, const matrix3x4SIMD& _mtx);
// really need that dvec<2> or wider
inline __m128d halfRowAsDouble(size_t _n, bool _0) const;
static inline __m128d doJob_d(const __m128d& _a0, const __m128d& _a1, const matrix3x4SIMD& _mtx, bool _xyHalf);
vectorSIMDf determinant_helper(vectorSIMDf& r1crossr2) const
{
r1crossr2 = core::cross(rows[1], rows[2]);
return core::dot(rows[0], r1crossr2);
}
};
inline matrix3x4SIMD concatenateBFollowedByA(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b)
{
return matrix3x4SIMD::concatenateBFollowedByA(_a, _b);
}
/*
inline matrix3x4SIMD concatenateBFollowedByAPrecisely(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b)
{
return matrix3x4SIMD::concatenateBFollowedByAPrecisely(_a, _b);
}
*/
}
#endif