Skip to content


New bloom filter: multistep downsampling / upsampling, to better pres…
Browse files Browse the repository at this point in the history
…erve the smaller details.

See merge request lightspeedrtx/dxvk-remix-nv!839
  • Loading branch information
sultim-t-nv committed Jun 10, 2024
1 parent 1ecad7a commit ac797fc
Show file tree
Hide file tree
Showing 9 changed files with 314 additions and 255 deletions.
6 changes: 3 additions & 3 deletions
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ Tables below enumerate all the options and their defaults set by RTX Remix. Note
|rtx.automation.disableUpdateUpscaleFromDlssPreset|bool|False|Disables updating upscaler from DLSS preset\.<br>This option is typically meant for automation of tests for which we don't want upscaler to be updated based on a DLSS preset\.|
|rtx.automation.suppressAssetLoadingErrors|bool|False|Suppresses asset loading errors by turning them into warnings\.<br>This option is typically meant for automation of tests for which acceptable asset loading issues are known\.|
|rtx.bloom.burnIntensity|float|1|Amount of bloom to add to the final image\.|
|rtx.bloom.enable|bool|True|Enable bloom \- glowing halos around intense, bright areas\.|
|rtx.bloom.luminanceThreshold|float|0.25|Adjust the bloom threshold to suppress blooming of the dim areas\. Pixels with luminance lower than the threshold are multiplied by the weight value that smoothly transitions from 1\.0 \(at luminance=threshold\) to 0\.0 \(at luminance=0\)\.|
|rtx.calculateLightIntensityUsingLeastSquares|bool|True|Enable usage of least squares for approximating a light's falloff curve rather than a more basic single point approach\. This will generally result in more accurate matching of the original application's custom light attenuation curves, especially with non physically based linear\-style attenuation\.|
||bool|False|Enables free camera\.|
||float|0|Free camera's pitch\.|
Expand Down
169 changes: 94 additions & 75 deletions src/dxvk/rtx_render/rtx_bloom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,42 +26,41 @@
#include "rtx_render/rtx_shader_manager.h"
#include "rtx/pass/bloom/bloom.h"

#include <rtx_shaders/bloom_downscale.h>
#include <rtx_shaders/bloom_blur.h>
#include <rtx_shaders/bloom_downsample.h>
#include <rtx_shaders/bloom_upsample.h>
#include <rtx_shaders/bloom_composite.h>
#include <pxr/base/arch/math.h>
#include "rtx_imgui.h"

namespace dxvk {
// Defined within an unnamed namespace to ensure unique definition across binary
namespace {
class DownscaleShader : public ManagedShader
class BloomDownsampleShader : public ManagedShader
SHADER_SOURCE(DownscaleShader, VK_SHADER_STAGE_COMPUTE_BIT, bloom_downscale)
SHADER_SOURCE(BloomDownsampleShader, VK_SHADER_STAGE_COMPUTE_BIT, bloom_downsample)




class BlurShader : public ManagedShader
class BloomUpsampleShader : public ManagedShader
SHADER_SOURCE(BloomUpsampleShader, VK_SHADER_STAGE_COMPUTE_BIT, bloom_upsample)




class CompositeShader : public ManagedShader
Expand All @@ -84,94 +83,102 @@ namespace dxvk {
DxvkBloom::~DxvkBloom() {

void DxvkBloom::showImguiSettings()
void DxvkBloom::showImguiSettings() {
ImGui::Checkbox("Bloom Enabled", &enableObject());
ImGui::DragFloat("Bloom Sigma", &sigmaObject(), 0.001f, 0.f, 1.f, "%.3f", ImGuiSliderFlags_AlwaysClamp);
ImGui::DragFloat("Bloom Intensity", &intensityObject(), 0.001f, 0.f, 1.f, "%.3f", ImGuiSliderFlags_AlwaysClamp);
ImGui::DragFloat("Intensity##bloom", &burnIntensityObject(), 0.05f, 0.f, 5.f, "%.2f");
ImGui::DragFloat("Threshold##bloom", &luminanceThresholdObject(), 0.05f, 0.f, 100.f, "%.2f");

void DxvkBloom::dispatch(
Rc<RtxContext> ctx,
Rc<DxvkSampler> linearSampler,
const Resources::Resource& inOutColorBuffer)
void DxvkBloom::dispatch(Rc<RtxContext> ctx,
Rc<DxvkSampler> linearSampler,
const Resources::Resource& inOutColorBuffer) {
ScopedGpuProfileZone(ctx, "Bloom");


dispatchDownscale(ctx, inOutColorBuffer, m_bloomBuffer0);
dispatchBlur<false>(ctx, linearSampler, m_bloomBuffer0, m_bloomBuffer1);
dispatchBlur<true>(ctx, linearSampler, m_bloomBuffer1, m_bloomBuffer0);
dispatchComposite(ctx, linearSampler, inOutColorBuffer, m_bloomBuffer0);
const Resources::Resource* res[] = {
assert(std::size(m_bloomBuffer) == std::size(res) - 1);

for (uint32_t i = 0; i < std::size(res) - 1; i++) {
dispatchDownsampleStep(ctx, linearSampler, *res[i], *res[i + 1], i == 0);

for (uint32_t i = std::size(res) - 1; i > 1; i--) {
dispatchUpsampleStep(ctx, linearSampler, *res[i], *res[i - 1]);

dispatchComposite(ctx, linearSampler, inOutColorBuffer, m_bloomBuffer[0]);

void DxvkBloom::dispatchDownscale(
void DxvkBloom::dispatchDownsampleStep(
Rc<DxvkContext> ctx,
const Rc<DxvkSampler>& linearSampler,
const Resources::Resource& inputBuffer,
const Resources::Resource& outputBuffer)
ScopedGpuProfileZone(ctx, "Downscale");
const Resources::Resource& outputBuffer,
bool initial) {
ScopedGpuProfileZone(ctx, "Bloom Downsample");

VkExtent3D inputSize = inputBuffer.image->info().extent;
const VkExtent3D inputSize = inputBuffer.image->info().extent;
const VkExtent3D outputSize = outputBuffer.image->info().extent;

// Prepare shader arguments
BloomDownscaleArgs pushArgs = {};
pushArgs.inputSize = { (int)inputSize.width, (int)inputSize.height };
BloomDownsampleArgs pushArgs = {};
pushArgs.inputSizeInverse = { 1.0f / float(inputSize.width), 1.0f / float(inputSize.height) };
pushArgs.downsampledOutputSize = { outputSize.width, outputSize.height };
pushArgs.downsampledOutputSizeInverse = { 1.0f / float(outputSize.width), 1.0f / float(outputSize.height) };
pushArgs.threshold = initial ? std::max(0.01f, luminanceThreshold()) : -1;
ctx->pushConstants(0, sizeof(pushArgs), &pushArgs);

VkExtent3D workgroups = util::computeBlockCount(inputSize, VkExtent3D{ 16 , 16, 1 });
const VkExtent3D workgroups = util::computeBlockCount(outputSize, VkExtent3D{ 16, 16, 1 });

ctx->bindResourceView(BLOOM_DOWNSCALE_INPUT, inputBuffer.view, nullptr);
ctx->bindResourceView(BLOOM_DOWNSCALE_OUTPUT, outputBuffer.view, nullptr);
ctx->bindShader(VK_SHADER_STAGE_COMPUTE_BIT, DownscaleShader::getShader());
ctx->bindResourceView(BLOOM_DOWNSAMPLE_INPUT, inputBuffer.view, nullptr);
ctx->bindResourceSampler(BLOOM_DOWNSAMPLE_INPUT, linearSampler);
ctx->bindResourceView(BLOOM_DOWNSAMPLE_OUTPUT, outputBuffer.view, nullptr);
ctx->bindShader(VK_SHADER_STAGE_COMPUTE_BIT, BloomDownsampleShader::getShader());
ctx->dispatch(workgroups.width, workgroups.height, workgroups.depth);

template<bool isVertical>
void DxvkBloom::dispatchBlur(
void DxvkBloom::dispatchUpsampleStep(
Rc<DxvkContext> ctx,
Rc<DxvkSampler> linearSampler,
const Rc<DxvkSampler>& linearSampler,
const Resources::Resource& inputBuffer,
const Resources::Resource& outputBuffer)
ScopedGpuProfileZone(ctx, isVertical ? "Vertical Blur" : "Horizontal Blur");
const Resources::Resource& outputBuffer) {
ScopedGpuProfileZone(ctx, "Bloom Upsample");

VkExtent3D inputSize = inputBuffer.image->info().extent;
VkExtent3D outputSize = outputBuffer.image->info().extent;

// Prepare shader arguments
BloomBlurArgs pushArgs = {};
pushArgs.imageSize = { (int)inputSize.width, (int)inputSize.height };
pushArgs.invImageSize = { 1.f / (float)inputSize.width, 1.f / (float)inputSize.height };

float bloomSigmaInPixels = sigma() * (float)inputSize.height;

float effectiveSigma = bloomSigmaInPixels * 0.25f;
effectiveSigma = std::min(effectiveSigma, 100.f);
effectiveSigma = std::max(effectiveSigma, 1.f);

if (isVertical)
pushArgs.pixstep = { 0.f, 1.f };
pushArgs.pixstep = { 1.f, 0.f };
BloomUpsampleArgs pushArgs = {};
pushArgs.inputSizeInverse = { 1.f / float(inputSize.width), 1.f / float(inputSize.height) };
pushArgs.upsampledOutputSize = { outputSize.width, outputSize.height };
pushArgs.upsampledOutputSizeInverse = { 1.f / float(outputSize.width), 1.f / float(outputSize.height) };

pushArgs.argumentScale = -1.f / (2.0f * effectiveSigma * effectiveSigma);
pushArgs.normalizationScale = 1.f / (sqrtf(2.f * (float)M_PI) * effectiveSigma);
pushArgs.numSamples = (int)roundf(effectiveSigma * 4.f);
ctx->pushConstants(0, sizeof(pushArgs), &pushArgs);

VkExtent3D workgroups = util::computeBlockCount(inputSize, VkExtent3D{ 16 , 16, 1 });
VkExtent3D workgroups = util::computeBlockCount(outputSize, VkExtent3D{ 16, 16, 1 });

ctx->bindResourceView(BLOOM_BLUR_INPUT, inputBuffer.view, nullptr);
ctx->bindResourceSampler(BLOOM_BLUR_INPUT, linearSampler);
ctx->bindResourceView(BLOOM_BLUR_OUTPUT, outputBuffer.view, nullptr);
ctx->bindShader(VK_SHADER_STAGE_COMPUTE_BIT, BlurShader::getShader());
ctx->bindResourceView(BLOOM_UPSAMPLE_INPUT, inputBuffer.view, nullptr);
ctx->bindResourceSampler(BLOOM_UPSAMPLE_INPUT, linearSampler);
ctx->bindResourceView(BLOOM_UPSAMPLE_OUTPUT, outputBuffer.view, nullptr);
ctx->bindShader(VK_SHADER_STAGE_COMPUTE_BIT, BloomUpsampleShader::getShader());
ctx->dispatch(workgroups.width, workgroups.height, workgroups.depth);

void DxvkBloom::dispatchComposite(
Rc<DxvkContext> ctx,
Rc<DxvkSampler> linearSampler,
const Rc<DxvkSampler> &linearSampler,
const Resources::Resource& inOutColorBuffer,
const Resources::Resource& bloomBuffer)
Expand All @@ -181,9 +188,9 @@ namespace dxvk {

// Prepare shader arguments
BloomCompositeArgs pushArgs = {};
pushArgs.imageSize = { (int)outputSize.width, (int)outputSize.height };
pushArgs.invImageSize = { 1.f / (float)outputSize.width, 1.f / (float)outputSize.height };
pushArgs.blendFactor = std::max(0.f, std::min(1.f, intensity()));
pushArgs.imageSize = { outputSize.width, outputSize.height };
pushArgs.imageSizeInverse = { 1.f / float(outputSize.width), 1.f / float(outputSize.height) };
pushArgs.intensity = 0.01f * std::max(burnIntensity(), 0.0f);
ctx->pushConstants(0, sizeof(pushArgs), &pushArgs);

VkExtent3D workgroups = util::computeBlockCount(outputSize, VkExtent3D{ 16 , 16, 1 });
Expand All @@ -196,13 +203,25 @@ namespace dxvk {

void DxvkBloom::createTargetResource(Rc<DxvkContext>& ctx, const VkExtent3D& targetExtent) {
m_bloomBuffer0 = Resources::createImageResource(ctx, "bloom buffer 0", { util::ceilDivide(targetExtent.width, 4), util::ceilDivide(targetExtent.height, 4), 1 }, VK_FORMAT_R16G16B16A16_SFLOAT);
m_bloomBuffer1 = Resources::createImageResource(ctx, "bloom buffer 1", { util::ceilDivide(targetExtent.width, 4), util::ceilDivide(targetExtent.height, 4), 1 }, VK_FORMAT_R16G16B16A16_SFLOAT);
for (uint32_t i = 0; i < std::size(m_bloomBuffer); i++) {
const uint32_t divisor = (1U << (i + 1));

m_bloomBuffer[i] = Resources::createImageResource(
"bloom buffer",
util::ceilDivide(targetExtent.width, divisor),
util::ceilDivide(targetExtent.height, divisor),

void DxvkBloom::releaseTargetResource() {
for (auto& i : m_bloomBuffer) {

bool DxvkBloom::isActive() {
Expand Down
47 changes: 26 additions & 21 deletions src/dxvk/rtx_render/rtx_bloom.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,10 @@
#pragma once

#include "dxvk_format.h"
#include "dxvk_include.h"
#include "dxvk_context.h"
#include "rtx_resources.h"

#include "../spirv/spirv_code_buffer.h"
#include "../util/util_matrix.h"
#include "rtx_options.h"

namespace dxvk {

class DxvkDevice;
Expand All @@ -40,31 +35,37 @@ namespace dxvk {
explicit DxvkBloom(DxvkDevice* device);

DxvkBloom(const DxvkBloom&) = delete;
DxvkBloom(DxvkBloom&&) noexcept = delete;
DxvkBloom& operator=(const DxvkBloom&) = delete;
DxvkBloom& operator=(DxvkBloom&&) noexcept = delete;

void dispatch(
Rc<RtxContext> ctx,
Rc<DxvkSampler> linearSampler,
const Resources::Resource& inOutColorBuffer);

inline bool isEnabled() const { return enable() && intensity() > 0.f; }
bool isEnabled() const { return enable() && burnIntensity() > 0.f; }

void showImguiSettings();

void dispatchDownscale(
void dispatchDownsampleStep(
Rc<DxvkContext> ctx,
const Rc<DxvkSampler>& linearSampler,
const Resources::Resource& inputBuffer,
const Resources::Resource& outputBuffer);
const Resources::Resource& outputBuffer,
bool initial);

template<bool isVertical>
void dispatchBlur(
void dispatchUpsampleStep(
Rc<DxvkContext> ctx,
Rc<DxvkSampler> linearSampler,
const Rc<DxvkSampler>& linearSampler,
const Resources::Resource& inputBuffer,
const Resources::Resource& outputBuffer);

void dispatchComposite(
Rc<DxvkContext> ctx,
Rc<DxvkSampler> linearSampler,
const Rc<DxvkSampler> &linearSampler,
const Resources::Resource& inOutColorBuffer,
const Resources::Resource& bloomBuffer);

Expand All @@ -75,14 +76,18 @@ namespace dxvk {

Rc<vk::DeviceFn> m_vkd;

Resources::Resource m_bloomBuffer0;
Resources::Resource m_bloomBuffer1;

RTX_OPTION("rtx.bloom", bool, enable, true, "");
RTX_OPTION("rtx.bloom", float, sigma, 0.1f, "");
RTX_OPTION("rtx.bloom", float, intensity, 0.06f, "");

void initSettings(const dxvk::Config& config);
// Each image is 1/2 resolution of the previous.
// Here, 5 steps are chosen: so the last image would be 1/(2^5) = 1/32 of the target resolution,
// and at 4K resolution, it's ~67 pixels height, which is fine enough -- as on other hand,
// we would like to keep the amount of steps as few as possible.
Resources::Resource m_bloomBuffer[5] = {};

RTX_OPTION("rtx.bloom", bool, enable, true, "Enable bloom - glowing halos around intense, bright areas.");
RTX_OPTION("rtx.bloom", float, burnIntensity, 1.0f, "Amount of bloom to add to the final image.");
RTX_OPTION("rtx.bloom", float, luminanceThreshold, 0.25f,
"Adjust the bloom threshold to suppress blooming of the dim areas. "
"Pixels with luminance lower than the threshold are multiplied by "
"the weight value that smoothly transitions from 1.0 (at luminance=threshold) to 0.0 (at luminance=0).");


0 comments on commit ac797fc

Please sign in to comment.