Skip to content

Commit

Permalink
Don't apply the UHJ all-pass's first segment in the time domain
Browse files Browse the repository at this point in the history
Increases the delay by 128 samples, but replaces a time-domain convolution with
a frequency-domain one.
  • Loading branch information
kcat committed Oct 28, 2023
1 parent 6ac36a9 commit 6420bc8
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 57 deletions.
83 changes: 30 additions & 53 deletions core/uhjfilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,25 +48,19 @@ using PFFFTSetupPtr = std::unique_ptr<PFFFT_Setup,PFFFTSetupDeleter>;
* signal of length N*2 - 1, and this holds true regardless of the convolution
* being applied in the frequency domain, so these "overflow" samples need to
* be accounted for.
*
* To avoid a delay with gathering enough input samples to apply an FFT with,
* the first segment is applied directly in the time-domain as the samples come
* in. Once enough have been retrieved, the FFT is applied on the input and
* it's paired with the remaining (FFT'd) filter segments for processing.
*/
template<size_t N>
struct SplitFilter {
struct SegmentedFilter {
static constexpr size_t sFftLength{256};
static constexpr size_t sSampleLength{sFftLength / 2};
static constexpr size_t sNumSegments{N/sSampleLength - 1};
static constexpr size_t sNumSegments{N/sSampleLength};
static_assert(N >= sFftLength);
static_assert((N % sSampleLength) == 0);

PhaseShifterT<sSampleLength> mPShift{NoInit{}};
PFFFTSetupPtr mFft;
alignas(16) std::array<float,sFftLength*sNumSegments> mFilterData;

SplitFilter()
SegmentedFilter()
{
mFft = PFFFTSetupPtr{pffft_new_setup(sFftLength, PFFFT_REAL)};

Expand Down Expand Up @@ -97,36 +91,27 @@ struct SplitFilter {
fftBuffer[i] = std::conj(fftBuffer[fft_size - i]);
inverse_fft(al::span{fftBuffer.get(), fft_size});

/* Store the first segment of the filter to apply in the time-domain
* (backwards for more efficient processing).
*/
auto fftiter = fftBuffer.get() + sSampleLength - 1;
for(float &coeff : mPShift.mCoeffs)
{
coeff = static_cast<float>(fftiter->real() / double{fft_size});
fftiter -= 2;
}

/* The remaining segments of the filter are converted back to the
* frequency domain, each on their own (0 stuffed).
/* The segments of the filter are converted back to the frequency
* domain, each on their own (0 stuffed).
*/
auto fftBuffer2 = std::make_unique<complex_d[]>(sFftLength);
auto fftTmp = al::vector<float,16>(sFftLength);
float *filter{mFilterData.data()};
for(size_t s{0};s < sNumSegments;++s)
{
for(size_t i{0};i < sSampleLength;++i)
fftBuffer[i] = fftBuffer[sSampleLength*(s+1) + i].real() / double{fft_size};
std::fill_n(fftBuffer.get()+sSampleLength, sSampleLength, complex_d{});
forward_fft(al::span{fftBuffer.get(), sFftLength});
fftBuffer2[i] = fftBuffer[sSampleLength*s + i].real() / double{fft_size};
std::fill_n(fftBuffer2.get()+sSampleLength, sSampleLength, complex_d{});
forward_fft(al::span{fftBuffer2.get(), sFftLength});

/* Convert to zdomain data for PFFFT, scaled by the FFT length so
* the iFFT result will be normalized.
*/
for(size_t i{0};i < sSampleLength;++i)
{
fftTmp[i*2 + 0] = static_cast<float>(fftBuffer[i].real()) / float{sFftLength};
fftTmp[i*2 + 1] = static_cast<float>((i == 0) ? fftBuffer[sSampleLength].real()
: fftBuffer[i].imag()) / float{sFftLength};
fftTmp[i*2 + 0] = static_cast<float>(fftBuffer2[i].real()) / float{sFftLength};
fftTmp[i*2 + 1] = static_cast<float>((i == 0) ? fftBuffer2[sSampleLength].real()
: fftBuffer2[i].imag()) / float{sFftLength};
}
pffft_zreorder(mFft.get(), fftTmp.data(), filter, PFFFT_BACKWARD);
filter += sFftLength;
Expand All @@ -135,7 +120,7 @@ struct SplitFilter {
};

template<size_t N>
const SplitFilter<N> gSplitFilter;
const SegmentedFilter<N> gSegmentedFilter;

template<size_t N>
const PhaseShifterT<N> PShifter;
Expand Down Expand Up @@ -212,7 +197,7 @@ template<size_t N>
void UhjEncoder<N>::encode(float *LeftOut, float *RightOut,
const al::span<const float*const,3> InSamples, const size_t SamplesToDo)
{
static constexpr auto &Filter = gSplitFilter<N>;
static constexpr auto &Filter = gSegmentedFilter<N>;
static_assert(sFftLength == Filter.sFftLength);
static_assert(sSegmentSize == Filter.sSampleLength);
static_assert(sNumSegments == Filter.sNumSegments);
Expand All @@ -237,45 +222,37 @@ void UhjEncoder<N>::encode(float *LeftOut, float *RightOut,
{
const size_t todo{minz(sSegmentSize-mFifoPos, SamplesToDo-base)};

/* Transform the non-delayed input and store in the later half of the
/* Copy out the samples that were previously processed by the FFT. */
std::copy_n(mWXInOut.begin()+mFifoPos, todo, mD.begin()+base);

/* Transform the non-delayed input and store in the front half of the
* filter input.
*/
std::transform(winput+base, winput+base+todo, xinput+base,
mWXIn.begin()+sSegmentSize + mFifoPos,
std::transform(winput+base, winput+base+todo, xinput+base, mWXInOut.begin()+mFifoPos,
[](const float w, const float x) noexcept -> float
{ return -0.3420201f*w + 0.5098604f*x; });

/* Apply the first segment of the filter in the time domain. */
auto buf_iter = mD.begin() + base;
Filter.mPShift.process({buf_iter, todo}, mWXIn.data()+1 + mFifoPos);

/* Add the other segments of the filter that were previously processed
* by the FFT.
*/
auto fifo_iter = mWXOut.begin() + mFifoPos;
std::transform(fifo_iter, fifo_iter+todo, buf_iter, buf_iter, std::plus<>{});

mFifoPos += todo;
base += todo;

/* Check whether the input buffer is filled with new samples. */
if(mFifoPos < sSegmentSize) break;
mFifoPos = 0;

/* Move the newest input to the front for the next iteration's history. */
std::copy(mWXIn.cbegin()+sSegmentSize, mWXIn.cend(), mWXIn.begin());
std::fill(mWXIn.begin()+sSegmentSize, mWXIn.end(), 0.0f);
/* Copy the new input to the next history segment, clearing the back
* half of the segment, and convert to the frequency domain.
*/
float *input{mWXHistory.data() + curseg*sFftLength};
std::copy_n(mWXInOut.begin(), sSegmentSize, input);
std::fill_n(input+sSegmentSize, sSegmentSize, 0.0f);

/* Overwrite the stale/oldest FFT'd segment with the newest input. */
pffft_transform(Filter.mFft.get(), mWXIn.data(), mWXHistory.data() + curseg*sFftLength,
mWorkData.data(), PFFFT_FORWARD);
pffft_transform(Filter.mFft.get(), input, input, mWorkData.data(), PFFFT_FORWARD);

/* Convolve each input segment with its IR filter counterpart (aligned
* in time).
* in time, from newest to oldest).
*/
mFftBuffer.fill(0.0f);
const float *filter{Filter.mFilterData.data()};
const float *input{mWXHistory.data() + curseg*sFftLength};
for(size_t s{curseg};s < sNumSegments;++s)
{
pffft_zconvolve_accumulate(Filter.mFft.get(), input, filter, mFftBuffer.data());
Expand All @@ -297,9 +274,9 @@ void UhjEncoder<N>::encode(float *LeftOut, float *RightOut,
mWorkData.data(), PFFFT_BACKWARD);

for(size_t i{0};i < sSegmentSize;++i)
mWXOut[i] = mFftBuffer[i] + mWXOut[sSegmentSize+i];
mWXInOut[i] = mFftBuffer[i] + mWXInOut[sSegmentSize+i];
for(size_t i{0};i < sSegmentSize;++i)
mWXOut[sSegmentSize+i] = mFftBuffer[sSegmentSize+i];
mWXInOut[sSegmentSize+i] = mFftBuffer[sSegmentSize+i];

/* Shift the input history. */
curseg = curseg ? (curseg-1) : (sNumSegments-1);
Expand All @@ -324,7 +301,7 @@ void UhjEncoder<N>::encode(float *LeftOut, float *RightOut,

float *inout{al::assume_aligned<16>(buffer)};
auto inout_end = inout + SamplesToDo;
if(SamplesToDo >= sFilterDelay) LIKELY
if(SamplesToDo >= sFilterDelay)
{
auto delay_end = std::rotate(inout, inout_end - sFilterDelay, inout_end);
std::swap_ranges(inout, delay_end, distbuf);
Expand Down
7 changes: 3 additions & 4 deletions core/uhjfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ struct UhjEncoderBase {

template<size_t N>
struct UhjEncoder final : public UhjEncoderBase {
static constexpr size_t sFilterDelay{N/2};
static constexpr size_t sFftLength{256};
static constexpr size_t sSegmentSize{sFftLength/2};
static constexpr size_t sNumSegments{N/sSegmentSize - 1};
static constexpr size_t sNumSegments{N/sSegmentSize};
static constexpr size_t sFilterDelay{N/2 + sSegmentSize};

/* Delays and processing storage for the input signal. */
alignas(16) std::array<float,BufferLineSize+sFilterDelay> mW{};
Expand All @@ -66,8 +66,7 @@ struct UhjEncoder final : public UhjEncoderBase {

/* History and temp storage for the convolution filter. */
size_t mFifoPos{}, mCurrentSegment{};
alignas(16) std::array<float,sFftLength> mWXIn{};
alignas(16) std::array<float,sFftLength> mWXOut{};
alignas(16) std::array<float,sFftLength> mWXInOut{};
alignas(16) std::array<float,sFftLength> mFftBuffer{};
alignas(16) std::array<float,sFftLength> mWorkData{};
alignas(16) std::array<float,sFftLength*sNumSegments> mWXHistory{};
Expand Down

0 comments on commit 6420bc8

Please sign in to comment.