From 5e4faa59716e408c65d064a65cc73b9f76e0b6df Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Fri, 5 Apr 2024 17:01:49 -0400 Subject: [PATCH 01/13] First pass DoF optimisation. --- .../Runtime/Effects/DepthOfField.cs | 40 ++++- .../PostProcessing/Runtime/Utils/ShaderIDs.cs | 4 + .../Shaders/Builtins/DepthOfField.hlsl | 100 ++++++++++- .../Shaders/Builtins/DepthOfField.shader | 64 ++++++- .../Shaders/Builtins/DiskKernels.hlsl | 159 ++++++++++++++++++ 5 files changed, 357 insertions(+), 10 deletions(-) diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs index 396107ce3cd..7fe39b6ce96 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs @@ -89,11 +89,15 @@ enum Pass { CoCCalculation, CoCTemporalFilter, + downsampleInitialMaxCoC, + downsampleMaxCoC, + extendMaxCoC, DownsampleAndPrefilter, BokehSmallKernel, BokehMediumKernel, BokehLargeKernel, BokehVeryLargeKernel, + BokehUnified, PostFilter, Combine, DebugOverlay @@ -146,6 +150,12 @@ float CalculateMaxCoCRadius(int screenHeight) return Mathf.Min(0.05f, radiusInPixels / screenHeight); } + void CalculateCoCKernelLimits(int screenHeight, out Vector4 cocKernelLimitsA, out Vector4 cocKernelLimitsB) + { + cocKernelLimitsA = new Vector4(2-0.5f, 6- 0.5f, 10- 0.5f, 14- 0.5f) / screenHeight; + cocKernelLimitsB = new Vector4(18, 22, 26, 30) / screenHeight; + } + RenderTexture CheckHistory(int eye, int id, PostProcessRenderContext context, RenderTextureFormat format) { var rt = m_CoCHistoryTextures[eye][id]; @@ -166,6 +176,8 @@ RenderTexture CheckHistory(int eye, int id, PostProcessRenderContext context, Re public override void Render(PostProcessRenderContext context) { + bool useUnified = true; // (kc) + // The coc is stored in alpha so we need a 4 channels target. Note that using ARGB32 // will result in a very weak near-blur. var colorFormat = context.camera.allowHDR ? RenderTextureFormat.ARGBHalf : RenderTextureFormat.ARGB32; @@ -179,10 +191,18 @@ public override void Render(PostProcessRenderContext context) var coeff = f * f / (settings.aperture.value * (s1 - f) * scaledFilmHeight * 2f); var maxCoC = CalculateMaxCoCRadius(context.screenHeight); + Vector4 cocKernelLimitsA; + Vector4 cocKernelLimitsB; + CalculateCoCKernelLimits(context.screenHeight, out cocKernelLimitsA, out cocKernelLimitsB); + cocKernelLimitsA /= maxCoC; + cocKernelLimitsB /= maxCoC; + var sheet = context.propertySheets.Get(context.resources.shaders.depthOfField); sheet.properties.Clear(); sheet.properties.SetFloat(ShaderIDs.Distance, s1); sheet.properties.SetFloat(ShaderIDs.LensCoeff, coeff); + sheet.properties.SetVector(ShaderIDs.CoCKernelLimitsA, cocKernelLimitsA); + sheet.properties.SetVector(ShaderIDs.CoCKernelLimitsB, cocKernelLimitsB); sheet.properties.SetFloat(ShaderIDs.MaxCoC, maxCoC); sheet.properties.SetFloat(ShaderIDs.RcpMaxCoC, 1f / maxCoC); sheet.properties.SetFloat(ShaderIDs.RcpAspect, 1f / aspect); @@ -213,13 +233,31 @@ public override void Render(PostProcessRenderContext context) cmd.SetGlobalTexture(ShaderIDs.CoCTex, historyWrite); } + int macCoCIndex = 4; + + // Downsampling CoC + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[1], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> 1, context.height >> 1); + cmd.BlitFullscreenTriangle(ShaderIDs.CoCTex, ShaderIDs.MaxCoCMips[1], sheet, (int)Pass.downsampleInitialMaxCoC); + for (int i = 2; i <= macCoCIndex; ++i) + { + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[i], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> i, context.height >> i); + cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[i-1], ShaderIDs.MaxCoCMips[i], sheet, (int)Pass.downsampleMaxCoC); + } + + if (useUnified) + { + // Extend CoC + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCTex, 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> macCoCIndex, context.height >> macCoCIndex); + cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[macCoCIndex], ShaderIDs.MaxCoCTex, sheet, (int)Pass.extendMaxCoC); + } + // Downsampling and prefiltering pass context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.DepthOfFieldTex, 0, colorFormat, RenderTextureReadWrite.Default, FilterMode.Bilinear, context.width / 2, context.height / 2); cmd.BlitFullscreenTriangle(context.source, ShaderIDs.DepthOfFieldTex, sheet, (int)Pass.DownsampleAndPrefilter); // Bokeh simulation pass context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.DepthOfFieldTemp, 0, colorFormat, RenderTextureReadWrite.Default, FilterMode.Bilinear, context.width / 2, context.height / 2); - cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehSmallKernel + (int)settings.kernelSize.value); + cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, useUnified ? (int)Pass.BokehUnified : (int)Pass.BokehSmallKernel + (int)settings.kernelSize.value); // Postfilter pass cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTemp, ShaderIDs.DepthOfFieldTex, sheet, (int)Pass.PostFilter); diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs index f555f7b8560..22551387300 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs @@ -67,8 +67,12 @@ static class ShaderIDs internal static readonly int DepthOfFieldTemp = Shader.PropertyToID("_DepthOfFieldTemp"); internal static readonly int DepthOfFieldTex = Shader.PropertyToID("_DepthOfFieldTex"); + internal static readonly int[] MaxCoCMips = new int[] { Shader.PropertyToID("_CoCMip0"), Shader.PropertyToID("_CoCMip1"), Shader.PropertyToID("_CoCMip2"), Shader.PropertyToID("_CoCMip3"), Shader.PropertyToID("_CoCMip4"), Shader.PropertyToID("_CoCMip5") }; + internal static readonly int MaxCoCTex = Shader.PropertyToID("_MaxCoCTex"); internal static readonly int Distance = Shader.PropertyToID("_Distance"); internal static readonly int LensCoeff = Shader.PropertyToID("_LensCoeff"); + internal static readonly int CoCKernelLimitsA = Shader.PropertyToID("_CoCKernelLimitsA"); + internal static readonly int CoCKernelLimitsB = Shader.PropertyToID("_CoCKernelLimitsB"); internal static readonly int MaxCoC = Shader.PropertyToID("_MaxCoC"); internal static readonly int RcpMaxCoC = Shader.PropertyToID("_RcpMaxCoC"); internal static readonly int RcpAspect = Shader.PropertyToID("_RcpAspect"); diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl index 693304ae0f0..c23669d0472 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl @@ -12,6 +12,7 @@ TEXTURE2D_SAMPLER2D(_CameraDepthTexture, sampler_CameraDepthTexture); TEXTURE2D_SAMPLER2D(_CameraMotionVectorsTexture, sampler_CameraMotionVectorsTexture); TEXTURE2D_SAMPLER2D(_CoCTex, sampler_CoCTex); +TEXTURE2D_SAMPLER2D(_MaxCoCTex, sampler_MaxCoCTex); TEXTURE2D_SAMPLER2D(_DepthOfFieldTex, sampler_DepthOfFieldTex); float4 _DepthOfFieldTex_TexelSize; @@ -19,6 +20,8 @@ float4 _DepthOfFieldTex_TexelSize; // Camera parameters float _Distance; float _LensCoeff; // f^2 / (N * (S1 - f) * film_width * 2) +half4 _CoCKernelLimitsA; +half4 _CoCKernelLimitsB; float _MaxCoC; float _RcpMaxCoC; float _RcpAspect; @@ -147,18 +150,95 @@ half4 FragPrefilter(VaryingsDefault i) : SV_Target return half4(avg, coc); } +half4 FragDownsampleCoC(VaryingsDefault i) : SV_Target +{ + // TODO gather version + + float3 duv = _MainTex_TexelSize.xyx * float3(0.5, 0.5, -0.5); + float2 uv0 = UnityStereoTransformScreenSpaceTex(i.texcoord - duv.xy); + float2 uv1 = UnityStereoTransformScreenSpaceTex(i.texcoord - duv.zy); + float2 uv2 = UnityStereoTransformScreenSpaceTex(i.texcoord + duv.zy); + float2 uv3 = UnityStereoTransformScreenSpaceTex(i.texcoord + duv.xy); + + // Sample CoCs + half4 cocs; + cocs.x = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv0).r; + cocs.y = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv1).r; + cocs.z = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv2).r; + cocs.w = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv3).r; + +#if defined(INITIAL_COC) + cocs = cocs * 2.0 - 1.0; +#endif + cocs = abs(cocs); + + half maxCoC = max(cocs.x, Max3(cocs.y, cocs.z, cocs.w)); + return half4(maxCoC, 0.0, 0.0, 0.0); +} + +half4 FragExtendCoC(VaryingsDefault i) : SV_Target +{ + float tx = _MainTex_TexelSize.x; + float ty = _MainTex_TexelSize.y; + + float2 uv0 = UnityStereoTransformScreenSpaceTex(i.texcoord); + float2 uv1 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2( tx, 0)); + float2 uv2 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2( tx, ty)); + float2 uv3 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2( 0, ty)); + float2 uv4 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2(-tx, ty)); + float2 uv5 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2(-tx, 0)); + float2 uv6 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2(-tx,-ty)); + float2 uv7 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2( 0,-ty)); + float2 uv8 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2( tx,-ty)); + + half coc0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv0).r; + half coc1 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv1).r; + half coc2 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv2).r; + half coc3 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv3).r; + half coc4 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv4).r; + half coc5 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv5).r; + half coc6 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv6).r; + half coc7 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv7).r; + half coc8 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv8).r; + + half maxCoC = Max3(Max3(coc0, coc1, coc2), Max3(coc3, coc4, coc5), Max3(coc6, coc7, coc8)); + return half4(maxCoC, 0.0, 0.0, 0.0); +} + + // Bokeh filter with disk-shaped kernels half4 FragBlur(VaryingsDefault i) : SV_Target { half4 samp0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); + // normalized value in range [0, 1] + half maxCoC = SAMPLE_TEXTURE2D(_MaxCoCTex, sampler_MaxCoCTex, i.texcoordStereo).r; + + int sampleCount; + +#if defined(KERNEL_UNIFIED) + UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[0]) + sampleCount = kDiskKernelSizes[0]; + else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[1]) + sampleCount = kDiskKernelSizes[1]; + else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[2]) + sampleCount = kDiskKernelSizes[2]; + else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[3]) + sampleCount = kDiskKernelSizes[3]; + else + sampleCount = kDiskKernelSizes[4]; + //(kc) sampleCount = kDiskKernelSizes[4]; +#else + sampleCount = kSampleCount; +#endif half4 bgAcc = 0.0; // Background: far field bokeh half4 fgAcc = 0.0; // Foreground: near field bokeh UNITY_LOOP - for (int si = 0; si < kSampleCount; si++) + for (int si = 0; si < sampleCount; si++) { - float2 disp = kDiskKernel[si] * _MaxCoC; + //float2 disp = kDiskKernel[si] * _MaxCoC; + float2 disp = kDiskAllKernels[si] * _MaxCoC * (12.0/8.0); float dist = length(disp); float2 duv = float2(disp.x * _RcpAspect, disp.y); @@ -198,6 +278,22 @@ half4 FragBlur(VaryingsDefault i) : SV_Target half alpha = saturate(fgAcc.a); half3 rgb = lerp(bgAcc.rgb, fgAcc.rgb, alpha); +#if defined(KERNEL_UNIFIED) + if (i.texcoord.x < 0.1) + rgb.r += 0.5; // (kc) + + /* + if (sampleCount == 8) + rgb.r += 0.5; + if (sampleCount == 22) + rgb.g += 0.5; + if (sampleCount == 43) + rgb.b += 0.5; + if (sampleCount == 1) + rgb.rg += 0.5; + */ +#endif + return half4(rgb, alpha); } diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader index 69c363f6540..9d852ee5be1 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader @@ -32,6 +32,43 @@ Shader "Hidden/PostProcessing/DepthOfField" } Pass // 2 + { + Name "Downsample initial MaxCoC" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertDefault + #pragma fragment FragDownsampleCoC + #define INITIAL_COC + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 3 + { + Name "Downsample MaxCoC" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertDefault + #pragma fragment FragDownsampleCoC + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 4 + { + Name "Extend MaxCoC" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertDefault + #pragma fragment FragExtendCoC + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 5 { Name "Downsample and Prefilter" @@ -43,7 +80,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 3 + Pass // 6 { Name "Bokeh Filter (small)" @@ -56,7 +93,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 4 + Pass // 7 { Name "Bokeh Filter (medium)" @@ -69,7 +106,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 5 + Pass // 8 { Name "Bokeh Filter (large)" @@ -82,7 +119,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 6 + Pass // 9 { Name "Bokeh Filter (very large)" @@ -95,7 +132,20 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 7 + Pass // 10 + { + Name "Bokeh Filter (unified)" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertDefault + #pragma fragment FragBlur + #define KERNEL_UNIFIED + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 11 { Name "Postfilter" @@ -107,7 +157,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 8 + Pass // 12 { Name "Combine" @@ -119,7 +169,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 9 + Pass // 13 { Name "Debug Overlay" diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl index b817ce852ed..31f68f90387 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl @@ -201,4 +201,163 @@ static const float2 kDiskKernel[kSampleCount] = { #endif + +static const int kDiskKernelSizes[7] = { 1, 8, 22, 43, 71, 106, 148 }; +static const float2 kDiskAllKernels[148] = { +float2(0, 0), +// ring 1 index=1 +float2(0.186046511627907, 0), +float2(0.115998102671392, 0.145457019994052), +float2(-0.0413992435267562, 0.181381937150107), +float2(-0.16762211495859, 0.0807225561148946), +float2(-0.16762211495859, -0.0807225561148945), +float2(-0.0413992435267562, -0.181381937150107), +float2(0.115998102671392, -0.145457019994052), +// ring 2 index=8 +float2(0.348837209302326, 0), +float2(0.314291465547356, 0.151354792715427), +float2(0.217496442508861, 0.272731912488848), +float2(0.0776235816126678, 0.34009113215645), +float2(-0.0776235816126678, 0.34009113215645), +float2(-0.217496442508861, 0.272731912488848), +float2(-0.314291465547355, 0.151354792715427), +float2(-0.348837209302326, 4.27202371795588E-17), +float2(-0.314291465547356, -0.151354792715427), +float2(-0.217496442508861, -0.272731912488848), +float2(-0.0776235816126679, -0.34009113215645), +float2(0.0776235816126674, -0.34009113215645), +float2(0.21749644250886, -0.272731912488848), +float2(0.314291465547356, -0.151354792715427), +// ring 3 index=22 +float2(0.511627906976744, 0), +float2(0.488897714588258, 0.150804972954416), +float2(0.422726814766323, 0.288210262265109), +float2(0.318994782346329, 0.400006804983643), +float2(0.186918663629318, 0.47626098767843), +float2(0.0382340013697985, 0.510197291581069), +float2(-0.113847919698579, 0.498800327162793), +float2(-0.255813953488372, 0.443082764726922), +float2(-0.375049794889679, 0.347995354208377), +float2(-0.460960816136121, 0.22198702931596), +float2(-0.505913445975647, 0.0762541826947871), +float2(-0.505913445975647, -0.0762541826947867), +float2(-0.460960816136121, -0.22198702931596), +float2(-0.375049794889679, -0.347995354208377), +float2(-0.255813953488372, -0.443082764726922), +float2(-0.11384791969858, -0.498800327162793), +float2(0.0382340013697985, -0.510197291581069), +float2(0.186918663629319, -0.47626098767843), +float2(0.318994782346329, -0.400006804983643), +float2(0.422726814766323, -0.288210262265109), +float2(0.488897714588258, -0.150804972954416), +// ring 4 index=43 +float2(0.674418604651163, 0), +float2(0.657509522169137, 0.150072257784491), +float2(0.607630166724887, 0.292619265916493), +float2(0.527281697478439, 0.420493122183797), +float2(0.420493122183797, 0.527281697478439), +float2(0.292619265916493, 0.607630166724887), +float2(0.150072257784491, 0.657509522169137), +float2(4.12962292735735E-17, 0.674418604651163), +float2(-0.150072257784491, 0.657509522169137), +float2(-0.292619265916493, 0.607630166724887), +float2(-0.420493122183797, 0.527281697478439), +float2(-0.527281697478438, 0.420493122183797), +float2(-0.607630166724887, 0.292619265916493), +float2(-0.657509522169137, 0.150072257784491), +float2(-0.674418604651163, 8.25924585471471E-17), +float2(-0.657509522169137, -0.150072257784491), +float2(-0.607630166724887, -0.292619265916493), +float2(-0.527281697478439, -0.420493122183797), +float2(-0.420493122183797, -0.527281697478439), +float2(-0.292619265916493, -0.607630166724887), +float2(-0.150072257784491, -0.657509522169137), +float2(-1.23888687820721E-16, -0.674418604651163), +float2(0.15007225778449, -0.657509522169137), +float2(0.292619265916493, -0.607630166724887), +float2(0.420493122183797, -0.527281697478439), +float2(0.527281697478439, -0.420493122183797), +float2(0.607630166724887, -0.292619265916492), +float2(0.657509522169137, -0.150072257784491), +// ring 5 index=71 +float2(0.837209302325581, 0), +float2(0.823755004408155, 0.149489493319789), +float2(0.783824542861175, 0.294174271323915), +float2(0.718701315573655, 0.429404046200294), +float2(0.630478436654186, 0.550832421716969), +float2(0.521991462021265, 0.654556589973234), +float2(0.396727252302976, 0.737242770856804), +float2(0.258711902267398, 0.796233362479663), +float2(0.112381338824084, 0.829632358689434), +float2(-0.0375612533167101, 0.836366288267147), +float2(-0.186296595870403, 0.81621871717548), +float2(-0.329044212547471, 0.769837204926796), +float2(-0.461216077494783, 0.698712491487602), +float2(-0.578564078221561, 0.605130583669444), +float2(-0.677316553430188, 0.492099280989047), +float2(-0.754299517313653, 0.363251502517026), +float2(-0.807038674070947, 0.222728521869775), +float2(-0.833838943809968, 0.0750468632679909), +float2(-0.833838943809968, -0.0750468632679907), +float2(-0.807038674070947, -0.222728521869774), +float2(-0.754299517313653, -0.363251502517025), +float2(-0.677316553430189, -0.492099280989047), +float2(-0.578564078221562, -0.605130583669444), +float2(-0.461216077494784, -0.698712491487602), +float2(-0.329044212547471, -0.769837204926796), +float2(-0.186296595870403, -0.81621871717548), +float2(-0.0375612533167103, -0.836366288267147), +float2(0.112381338824084, -0.829632358689434), +float2(0.258711902267398, -0.796233362479664), +float2(0.396727252302976, -0.737242770856804), +float2(0.521991462021265, -0.654556589973234), +float2(0.630478436654186, -0.550832421716969), +float2(0.718701315573655, -0.429404046200294), +float2(0.783824542861175, -0.294174271323915), +float2(0.823755004408155, -0.149489493319789), +// ring 6 index=106 +float2(1, 0), +float2(0.988830826225129, 0.149042266176174), +float2(0.955572805786141, 0.294755174410904), +float2(0.900968867902419, 0.433883739117558), +float2(0.826238774315995, 0.563320058063622), +float2(0.733051871829826, 0.680172737770919), +float2(0.623489801858734, 0.78183148246803), +float2(0.5, 0.866025403784439), +float2(0.365341024366395, 0.930873748644204), +float2(0.222520933956314, 0.974927912181824), +float2(0.0747300935864244, 0.99720379718118), +float2(-0.074730093586424, 0.99720379718118), +float2(-0.222520933956314, 0.974927912181824), +float2(-0.365341024366395, 0.930873748644204), +float2(-0.5, 0.866025403784439), +float2(-0.623489801858733, 0.78183148246803), +float2(-0.733051871829826, 0.680172737770919), +float2(-0.826238774315995, 0.563320058063622), +float2(-0.900968867902419, 0.433883739117558), +float2(-0.955572805786141, 0.294755174410905), +float2(-0.988830826225129, 0.149042266176175), +float2(-1, 1.22464679914735E-16), +float2(-0.988830826225129, -0.149042266176174), +float2(-0.955572805786141, -0.294755174410904), +float2(-0.900968867902419, -0.433883739117558), +float2(-0.826238774315995, -0.563320058063622), +float2(-0.733051871829826, -0.680172737770919), +float2(-0.623489801858734, -0.78183148246803), +float2(-0.5, -0.866025403784438), +float2(-0.365341024366395, -0.930873748644204), +float2(-0.222520933956315, -0.974927912181824), +float2(-0.0747300935864247, -0.99720379718118), +float2(0.0747300935864244, -0.99720379718118), +float2(0.222520933956314, -0.974927912181824), +float2(0.365341024366395, -0.930873748644204), +float2(0.499999999999999, -0.866025403784439), +float2(0.623489801858733, -0.78183148246803), +float2(0.733051871829827, -0.680172737770919), +float2(0.826238774315994, -0.563320058063623), +float2(0.900968867902419, -0.433883739117558), +float2(0.955572805786141, -0.294755174410905), +float2(0.988830826225128, -0.149042266176175), +}; + #endif // UNITY_POSTFX_DISK_KERNELS From 8a0758fc4074e584591f293e435a0f0e80a06367 Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Sun, 7 Apr 2024 20:52:44 -0400 Subject: [PATCH 02/13] Update to get same result with unified and original brute-force approach. --- .../Runtime/Effects/DepthOfField.cs | 38 +++---- .../PostProcessing/Runtime/Utils/ShaderIDs.cs | 6 +- .../Shaders/Builtins/DepthOfField.hlsl | 98 +++++++++++++++---- .../Shaders/Builtins/DepthOfField.shader | 2 +- .../Shaders/Builtins/DiskKernels.hlsl | 2 +- 5 files changed, 105 insertions(+), 41 deletions(-) diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs index 7fe39b6ce96..8a52d436ac1 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs @@ -139,12 +139,14 @@ RenderTextureFormat SelectFormat(RenderTextureFormat primary, RenderTextureForma return RenderTextureFormat.Default; } - float CalculateMaxCoCRadius(int screenHeight) + float CalculateMaxCoCRadius(int screenHeight, out int mipLevel) { // Estimate the allowable maximum radius of CoC from the kernel // size (the equation below was empirically derived). float radiusInPixels = (float)settings.kernelSize.value * 4f + 6f; - + // Find the miplevel encasing the bokeh radius. + mipLevel = (int)(Mathf.Log(radiusInPixels * 2 - 1) / Mathf.Log(2)); + // Applying a 5% limit to the CoC radius to keep the size of // TileMax/NeighborMax small enough. return Mathf.Min(0.05f, radiusInPixels / screenHeight); @@ -176,7 +178,7 @@ RenderTexture CheckHistory(int eye, int id, PostProcessRenderContext context, Re public override void Render(PostProcessRenderContext context) { - bool useUnified = true; // (kc) + bool useUnified = (Time.time % 2f) < 1f; // (kc) // The coc is stored in alpha so we need a 4 channels target. Note that using ARGB32 // will result in a very weak near-blur. @@ -189,7 +191,8 @@ public override void Render(PostProcessRenderContext context) var s1 = Mathf.Max(settings.focusDistance.value, f); var aspect = (float)context.screenWidth / (float)context.screenHeight; var coeff = f * f / (settings.aperture.value * (s1 - f) * scaledFilmHeight * 2f); - var maxCoC = CalculateMaxCoCRadius(context.screenHeight); + int maxCoCMipLevel; + var maxCoC = CalculateMaxCoCRadius(context.screenHeight, out maxCoCMipLevel); Vector4 cocKernelLimitsA; Vector4 cocKernelLimitsB; @@ -233,22 +236,21 @@ public override void Render(PostProcessRenderContext context) cmd.SetGlobalTexture(ShaderIDs.CoCTex, historyWrite); } - int macCoCIndex = 4; - - // Downsampling CoC - context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[1], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> 1, context.height >> 1); - cmd.BlitFullscreenTriangle(ShaderIDs.CoCTex, ShaderIDs.MaxCoCMips[1], sheet, (int)Pass.downsampleInitialMaxCoC); - for (int i = 2; i <= macCoCIndex; ++i) - { - context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[i], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> i, context.height >> i); - cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[i-1], ShaderIDs.MaxCoCMips[i], sheet, (int)Pass.downsampleMaxCoC); - } - if (useUnified) { + // Downsampling CoC + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[1], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> 1, context.height >> 1); + cmd.BlitFullscreenTriangle(ShaderIDs.CoCTex, ShaderIDs.MaxCoCMips[1], sheet, (int)Pass.downsampleInitialMaxCoC); + + for (int i = 2; i <= maxCoCMipLevel; ++i) + { + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[i], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> i, context.height >> i); + cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[i - 1], ShaderIDs.MaxCoCMips[i], sheet, (int)Pass.downsampleMaxCoC); + } + // Extend CoC - context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCTex, 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> macCoCIndex, context.height >> macCoCIndex); - cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[macCoCIndex], ShaderIDs.MaxCoCTex, sheet, (int)Pass.extendMaxCoC); + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCTex, 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> maxCoCMipLevel, context.height >> maxCoCMipLevel); + cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[maxCoCMipLevel], ShaderIDs.MaxCoCTex, sheet, (int)Pass.extendMaxCoC); } // Downsampling and prefiltering pass @@ -257,7 +259,7 @@ public override void Render(PostProcessRenderContext context) // Bokeh simulation pass context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.DepthOfFieldTemp, 0, colorFormat, RenderTextureReadWrite.Default, FilterMode.Bilinear, context.width / 2, context.height / 2); - cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, useUnified ? (int)Pass.BokehUnified : (int)Pass.BokehSmallKernel + (int)settings.kernelSize.value); + cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, useUnified ? (int)Pass.BokehUnified : (int)Pass.BokehSmallKernel + (int)settings.kernelSize.value); // Postfilter pass cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTemp, ShaderIDs.DepthOfFieldTex, sheet, (int)Pass.PostFilter); diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs index 22551387300..802fe4ed8e0 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs @@ -67,7 +67,11 @@ static class ShaderIDs internal static readonly int DepthOfFieldTemp = Shader.PropertyToID("_DepthOfFieldTemp"); internal static readonly int DepthOfFieldTex = Shader.PropertyToID("_DepthOfFieldTex"); - internal static readonly int[] MaxCoCMips = new int[] { Shader.PropertyToID("_CoCMip0"), Shader.PropertyToID("_CoCMip1"), Shader.PropertyToID("_CoCMip2"), Shader.PropertyToID("_CoCMip3"), Shader.PropertyToID("_CoCMip4"), Shader.PropertyToID("_CoCMip5") }; + internal static readonly int[] MaxCoCMips = new int[] { + Shader.PropertyToID("_CoCMip0"), Shader.PropertyToID("_CoCMip1"), Shader.PropertyToID("_CoCMip2"), Shader.PropertyToID("_CoCMip3"), + Shader.PropertyToID("_CoCMip4"), Shader.PropertyToID("_CoCMip5"), Shader.PropertyToID("_CoCMip6"), Shader.PropertyToID("_CoCMip7"), + Shader.PropertyToID("_CoCMip8"), Shader.PropertyToID("_CoCMip9"), Shader.PropertyToID("_CoCMip10"), Shader.PropertyToID("_CoCMip11") + }; internal static readonly int MaxCoCTex = Shader.PropertyToID("_MaxCoCTex"); internal static readonly int Distance = Shader.PropertyToID("_Distance"); internal static readonly int LensCoeff = Shader.PropertyToID("_LensCoeff"); diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl index c23669d0472..68b279533d5 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl @@ -213,23 +213,84 @@ half4 FragBlur(VaryingsDefault i) : SV_Target // normalized value in range [0, 1] half maxCoC = SAMPLE_TEXTURE2D(_MaxCoCTex, sampler_MaxCoCTex, i.texcoordStereo).r; - int sampleCount; + int sampleCount = kSampleCount; + + half4 bgAcc = 0.0; // Background: far field bokeh + half4 fgAcc = 0.0; // Foreground: near field bokeh + + const half margin = _MainTex_TexelSize.y * 2; + UNITY_LOOP + for (int si = 0; si < sampleCount; si++) + { #if defined(KERNEL_UNIFIED) + float2 disp = kDiskAllKernels[si] * _MaxCoC * (12.0 / 8.0); +#else + float2 disp = kDiskKernel[si] * _MaxCoC; +#endif + float dist = length(disp); + + float2 duv = float2(disp.x * _RcpAspect, disp.y); + half4 samp = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, UnityStereoTransformScreenSpaceTex(i.texcoord + duv)); + + // BG: Compare CoC of the current sample and the center sample + // and select smaller one. + half bgCoC = max(min(samp0.a, samp.a), 0.0); + + // Compare the CoC to the sample distance. + // Add a small margin to smooth out. + half bgWeight = saturate((bgCoC - dist + margin) / margin); + half fgWeight = saturate((-samp.a - dist + margin) / margin); + + // Cut influence from focused areas because they're darkened by CoC + // premultiplying. This is only needed for near field. + fgWeight *= step(_MainTex_TexelSize.y, -samp.a); + + // Accumulation + bgAcc += half4(samp.rgb, 1.0) * bgWeight; + fgAcc += half4(samp.rgb, 1.0) * fgWeight; + } + + // Get the weighted average. + bgAcc.rgb /= bgAcc.a + (bgAcc.a == 0.0); // zero-div guard + fgAcc.rgb /= fgAcc.a + (fgAcc.a == 0.0); + + // BG: Calculate the alpha value only based on the center CoC. + // This is a rather aggressive approximation but provides stable results. + bgAcc.a = smoothstep(_MainTex_TexelSize.y, _MainTex_TexelSize.y * 2.0, samp0.a); + + // FG: Normalize the total of the weights. + fgAcc.a *= PI / sampleCount; + + // Alpha premultiplying + half alpha = saturate(fgAcc.a); + half3 rgb = lerp(bgAcc.rgb, fgAcc.rgb, alpha); + + return half4(rgb, alpha); +} + +// Bokeh filter with disk-shaped kernels +half4 FragBlurUnified(VaryingsDefault i) : SV_Target +{ + half4 samp0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); + // normalized value in range [0, 1] + half maxCoC = SAMPLE_TEXTURE2D(_MaxCoCTex, sampler_MaxCoCTex, i.texcoordStereo).r; + + int sampleCount; + UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[0]) - sampleCount = kDiskKernelSizes[0]; + sampleCount = kDiskAllKernelSizes[0]; + // margin adjustment later in the shader code artifically expand bokeh by 4px in fullscreen units (1 extra ring), we cannot have small bokeh as a result! else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[1]) - sampleCount = kDiskKernelSizes[1]; + sampleCount = kDiskAllKernelSizes[1+1]; else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[2]) - sampleCount = kDiskKernelSizes[2]; + sampleCount = kDiskAllKernelSizes[2+1]; else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[3]) - sampleCount = kDiskKernelSizes[3]; + sampleCount = kDiskAllKernelSizes[3+1]; else - sampleCount = kDiskKernelSizes[4]; - //(kc) sampleCount = kDiskKernelSizes[4]; -#else - sampleCount = kSampleCount; -#endif + sampleCount = kDiskAllKernelSizes[4]; + + const half margin = _MainTex_TexelSize.y * 2; half4 bgAcc = 0.0; // Background: far field bokeh half4 fgAcc = 0.0; // Foreground: near field bokeh @@ -237,8 +298,8 @@ half4 FragBlur(VaryingsDefault i) : SV_Target UNITY_LOOP for (int si = 0; si < sampleCount; si++) { - //float2 disp = kDiskKernel[si] * _MaxCoC; - float2 disp = kDiskAllKernels[si] * _MaxCoC * (12.0/8.0); + float2 disp = kDiskAllKernels[si] * _MaxCoC * (12.0 / 8.0); + float dist = length(disp); float2 duv = float2(disp.x * _RcpAspect, disp.y); @@ -250,8 +311,7 @@ half4 FragBlur(VaryingsDefault i) : SV_Target // Compare the CoC to the sample distance. // Add a small margin to smooth out. - const half margin = _MainTex_TexelSize.y * 2; - half bgWeight = saturate((bgCoC - dist + margin) / margin); + half bgWeight = saturate((bgCoC - dist + margin) / margin); half fgWeight = saturate((-samp.a - dist + margin) / margin); // Cut influence from focused areas because they're darkened by CoC @@ -272,27 +332,25 @@ half4 FragBlur(VaryingsDefault i) : SV_Target bgAcc.a = smoothstep(_MainTex_TexelSize.y, _MainTex_TexelSize.y * 2.0, samp0.a); // FG: Normalize the total of the weights. - fgAcc.a *= PI / kSampleCount; + fgAcc.a *= PI / sampleCount; // Alpha premultiplying half alpha = saturate(fgAcc.a); half3 rgb = lerp(bgAcc.rgb, fgAcc.rgb, alpha); -#if defined(KERNEL_UNIFIED) - if (i.texcoord.x < 0.1) + /* + if (i.texcoord.x < 0.05) rgb.r += 0.5; // (kc) - /* if (sampleCount == 8) rgb.r += 0.5; if (sampleCount == 22) rgb.g += 0.5; if (sampleCount == 43) rgb.b += 0.5; - if (sampleCount == 1) + if (sampleCount == 71) rgb.rg += 0.5; */ -#endif return half4(rgb, alpha); } diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader index 9d852ee5be1..fc7c55822bb 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader @@ -139,7 +139,7 @@ Shader "Hidden/PostProcessing/DepthOfField" HLSLPROGRAM #pragma target 3.5 #pragma vertex VertDefault - #pragma fragment FragBlur + #pragma fragment FragBlurUnified #define KERNEL_UNIFIED #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl index 31f68f90387..37f6a96693d 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl @@ -202,7 +202,7 @@ static const float2 kDiskKernel[kSampleCount] = { #endif -static const int kDiskKernelSizes[7] = { 1, 8, 22, 43, 71, 106, 148 }; +static const int kDiskAllKernelSizes[7] = { 1, 8, 22, 43, 71, 106, 148 }; static const float2 kDiskAllKernels[148] = { float2(0, 0), // ring 1 index=1 From 418ad5391637ef258b9f03d0d85a8a1fd3272ccd Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Thu, 11 Apr 2024 00:40:43 -0400 Subject: [PATCH 03/13] Added UNITY_NEAR_CLIP_VALUE. --- com.unity.postprocessing/PostProcessing/Shaders/API/D3D11.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/D3D12.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/D3D9.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/Metal.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/OpenGL.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/PSP2.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/PSSL.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/Switch.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/Vulkan.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/WebGPU.hlsl | 1 + com.unity.postprocessing/PostProcessing/Shaders/API/XboxOne.hlsl | 1 + 11 files changed, 11 insertions(+) diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D11.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D11.hlsl index 1982bcb3616..bcf2eb91f4c 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D11.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D11.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D12.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D12.hlsl index 1982bcb3616..bcf2eb91f4c 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D12.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D12.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D9.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D9.hlsl index 8427b9c8ae3..0bb74573a4a 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D9.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D9.hlsl @@ -1,6 +1,7 @@ // ALso used for Direct3D 11 "feature level 9.x" target for Windows Store and Windows Phone #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 0 +#define UNITY_NEAR_CLIP_VALUE (0.0) #define UNITY_GATHER_SUPPORTED 0 #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/Metal.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/Metal.hlsl index 50916982d13..5ac8a00ed59 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/Metal.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/Metal.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED 0 // Currently broken on Metal for some reason (May 2017) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/OpenGL.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/OpenGL.hlsl index 9c7cd11f11d..0eb657ff46e 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/OpenGL.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/OpenGL.hlsl @@ -1,6 +1,7 @@ // For now OpenGL is considered at GLES2 level #define UNITY_UV_STARTS_AT_TOP 0 #define UNITY_REVERSED_Z 0 +#define UNITY_NEAR_CLIP_VALUE (-1.0) #define UNITY_GATHER_SUPPORTED 0 #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/PSP2.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/PSP2.hlsl index dcb0c7e37c7..25b3356f58f 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/PSP2.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/PSP2.hlsl @@ -1,6 +1,7 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 0 +#define UNITY_NEAR_CLIP_VALUE (0.0) #define UNITY_GATHER_SUPPORTED 0 #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 0 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/PSSL.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/PSSL.hlsl index 1982bcb3616..bcf2eb91f4c 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/PSSL.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/PSSL.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/Switch.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/Switch.hlsl index bf2d2333360..046df14d7a6 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/Switch.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/Switch.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define TEXTURE2D_SAMPLER2D(textureName, samplerName) Texture2D textureName; SamplerState samplerName diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/Vulkan.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/Vulkan.hlsl index 1982bcb3616..bcf2eb91f4c 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/Vulkan.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/Vulkan.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/WebGPU.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/WebGPU.hlsl index 1982bcb3616..bcf2eb91f4c 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/WebGPU.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/WebGPU.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/XboxOne.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/XboxOne.hlsl index 1982bcb3616..bcf2eb91f4c 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/XboxOne.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/XboxOne.hlsl @@ -1,5 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 +#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 From 69f1e7c90af3ce34b41010085802de1d51d1271d Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Thu, 11 Apr 2024 22:05:44 -0400 Subject: [PATCH 04/13] Added static tile version. --- .../Runtime/Effects/DepthOfField.cs | 53 ++++++- .../Runtime/Utils/RuntimeUtilities.cs | 28 ++++ .../PostProcessing/Runtime/Utils/ShaderIDs.cs | 7 + .../Shaders/Builtins/DepthOfField.hlsl | 136 +++++++++++++++--- .../Shaders/Builtins/DepthOfField.shader | 66 ++++++++- 5 files changed, 254 insertions(+), 36 deletions(-) diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs index 8a52d436ac1..32a118e0438 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs @@ -98,6 +98,10 @@ enum Pass BokehLargeKernel, BokehVeryLargeKernel, BokehUnified, + BokehKernel1, + BokehKernel2, + BokehKernel3, + BokehKernel4, PostFilter, Combine, DebugOverlay @@ -178,7 +182,8 @@ RenderTexture CheckHistory(int eye, int id, PostProcessRenderContext context, Re public override void Render(PostProcessRenderContext context) { - bool useUnified = (Time.time % 2f) < 1f; // (kc) + bool useUnified = true;// (Time.time % 2f) < 1f; // (kc) + bool useStaticTiles = false; // The coc is stored in alpha so we need a 4 channels target. Note that using ARGB32 // will result in a very weak near-blur. @@ -194,6 +199,11 @@ public override void Render(PostProcessRenderContext context) int maxCoCMipLevel; var maxCoC = CalculateMaxCoCRadius(context.screenHeight, out maxCoCMipLevel); + // pad full-resolution screen so that the number of mips required by maxCoCMipLevel does not cause the downsampling chain to skip row or colums of pixels. + int tileSize = 1 << maxCoCMipLevel; + int paddedWidth = ((context.width + tileSize - 1) >> maxCoCMipLevel) << maxCoCMipLevel; + int paddedHeight = ((context.height + tileSize - 1) >> maxCoCMipLevel) << maxCoCMipLevel; + Vector4 cocKernelLimitsA; Vector4 cocKernelLimitsB; CalculateCoCKernelLimits(context.screenHeight, out cocKernelLimitsA, out cocKernelLimitsB); @@ -206,7 +216,13 @@ public override void Render(PostProcessRenderContext context) sheet.properties.SetFloat(ShaderIDs.LensCoeff, coeff); sheet.properties.SetVector(ShaderIDs.CoCKernelLimitsA, cocKernelLimitsA); sheet.properties.SetVector(ShaderIDs.CoCKernelLimitsB, cocKernelLimitsB); + sheet.properties.SetVector(ShaderIDs.MaxCoCTexUvScale, new Vector4(paddedWidth / (float)context.width, paddedHeight / (float)context.height, context.width / (float)paddedWidth, context.height / (float)paddedHeight)); sheet.properties.SetFloat(ShaderIDs.MaxCoC, maxCoC); + sheet.properties.SetVector(ShaderIDs.CoCScreen, new Vector4(context.width, context.height, 1f / context.width, 1f / context.height)); + sheet.properties.SetFloat(ShaderIDs.CoCTileXCount, paddedWidth >> maxCoCMipLevel); + sheet.properties.SetFloat(ShaderIDs.CoCTileYCount, paddedHeight >> maxCoCMipLevel); + sheet.properties.SetFloat(ShaderIDs.CoCTilePixelWidth, 1 << maxCoCMipLevel); + sheet.properties.SetFloat(ShaderIDs.CoCTilePixelHeight, 1 << maxCoCMipLevel); sheet.properties.SetFloat(ShaderIDs.RcpMaxCoC, 1f / maxCoC); sheet.properties.SetFloat(ShaderIDs.RcpAspect, 1f / aspect); @@ -236,20 +252,20 @@ public override void Render(PostProcessRenderContext context) cmd.SetGlobalTexture(ShaderIDs.CoCTex, historyWrite); } - if (useUnified) + if (useUnified || useStaticTiles) { // Downsampling CoC - context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[1], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> 1, context.height >> 1); + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[1], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, paddedWidth >> 1, paddedHeight >> 1); cmd.BlitFullscreenTriangle(ShaderIDs.CoCTex, ShaderIDs.MaxCoCMips[1], sheet, (int)Pass.downsampleInitialMaxCoC); for (int i = 2; i <= maxCoCMipLevel; ++i) { - context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[i], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> i, context.height >> i); + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[i], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, paddedWidth >> i, paddedHeight >> i); cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[i - 1], ShaderIDs.MaxCoCMips[i], sheet, (int)Pass.downsampleMaxCoC); } // Extend CoC - context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCTex, 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, context.width >> maxCoCMipLevel, context.height >> maxCoCMipLevel); + context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCTex, 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, paddedWidth >> maxCoCMipLevel, paddedHeight >> maxCoCMipLevel); cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[maxCoCMipLevel], ShaderIDs.MaxCoCTex, sheet, (int)Pass.extendMaxCoC); } @@ -259,7 +275,32 @@ public override void Render(PostProcessRenderContext context) // Bokeh simulation pass context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.DepthOfFieldTemp, 0, colorFormat, RenderTextureReadWrite.Default, FilterMode.Bilinear, context.width / 2, context.height / 2); - cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, useUnified ? (int)Pass.BokehUnified : (int)Pass.BokehSmallKernel + (int)settings.kernelSize.value); + if (useUnified) + { + /* + int tileXCount = paddedWidth >> maxCoCMipLevel; + int tileYCount = paddedHeight >> maxCoCMipLevel; + int tileCount = tileXCount * tileYCount; + cmd.SetGlobalFloat(ShaderIDs.CoCRingCount, 2.0f); + cmd.BlitProcedural(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehUnified, 6, tileCount); + */ + cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehUnified); + } + else if (useStaticTiles) + { + int tileXCount = paddedWidth >> maxCoCMipLevel; + int tileYCount = paddedHeight >> maxCoCMipLevel; + int tileCount = tileXCount * tileYCount; + for (int i = 0; i < 4; ++i) + { + cmd.SetGlobalFloat(ShaderIDs.CoCRingCount, i + 1); + cmd.BlitProcedural(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehKernel1 + i, 6, tileCount); + } + } + else + { + cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehSmallKernel + (int)settings.kernelSize.value); + } // Postfilter pass cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTemp, ShaderIDs.DepthOfFieldTex, sheet, (int)Pass.PostFilter); diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Utils/RuntimeUtilities.cs b/com.unity.postprocessing/PostProcessing/Runtime/Utils/RuntimeUtilities.cs index 459cca0306b..246e384abb7 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Utils/RuntimeUtilities.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Utils/RuntimeUtilities.cs @@ -556,6 +556,34 @@ public static void BlitFullscreenTriangle(this CommandBuffer cmd, RenderTargetId #endif } + /// + /// Blits procedural geometry using a given material. + /// + /// The command buffer to use + /// The source render target + /// The destination render target + /// The property sheet to use + /// The pass from the material to use + /// The number of instances to render + /// Should the destination target be cleared? + /// An optional viewport to consider for the blit + /// Should the depth buffer be preserved? + public static void BlitProcedural(this CommandBuffer cmd, RenderTargetIdentifier source, RenderTargetIdentifier destination, PropertySheet propertySheet, int pass, int vertexCount, int instanceCount, bool clear = false, Rect? viewport = null, bool preserveDepth = false) + { + cmd.SetGlobalTexture(ShaderIDs.MainTex, source); + var loadAction = viewport == null ? LoadAction.DontCare : LoadAction.Load; + cmd.SetRenderTargetWithLoadStoreAction(destination, loadAction, StoreAction.Store, preserveDepth ? LoadAction.Load : loadAction, StoreAction.Store); + + if (viewport != null) + cmd.SetViewport(viewport.Value); + + if (clear) + cmd.ClearRenderTarget(true, true, Color.clear); + + // TODO: detect which platforms support quads + cmd.DrawProcedural(Matrix4x4.identity, propertySheet.material, pass, MeshTopology.Triangles, vertexCount, instanceCount, propertySheet.properties); + } + /// /// Blits a fullscreen triangle from a double-wide source. /// diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs index 802fe4ed8e0..0a91a1c6b95 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs @@ -77,6 +77,13 @@ static class ShaderIDs internal static readonly int LensCoeff = Shader.PropertyToID("_LensCoeff"); internal static readonly int CoCKernelLimitsA = Shader.PropertyToID("_CoCKernelLimitsA"); internal static readonly int CoCKernelLimitsB = Shader.PropertyToID("_CoCKernelLimitsB"); + internal static readonly int MaxCoCTexUvScale = Shader.PropertyToID("_MaxCoCTexUvScale"); + internal static readonly int CoCRingCount = Shader.PropertyToID("_CoCRingCount"); + internal static readonly int CoCScreen = Shader.PropertyToID("_CoCScreen"); + internal static readonly int CoCTileXCount = Shader.PropertyToID("_CoCTileXCount"); + internal static readonly int CoCTileYCount = Shader.PropertyToID("_CoCTileYCount"); + internal static readonly int CoCTilePixelWidth = Shader.PropertyToID("_CoCTilePixelWidth"); + internal static readonly int CoCTilePixelHeight = Shader.PropertyToID("_CoCTilePixelHeight"); internal static readonly int MaxCoC = Shader.PropertyToID("_MaxCoC"); internal static readonly int RcpMaxCoC = Shader.PropertyToID("_RcpMaxCoC"); internal static readonly int RcpAspect = Shader.PropertyToID("_RcpAspect"); diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl index 68b279533d5..2d75e7cedd5 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl @@ -22,6 +22,13 @@ float _Distance; float _LensCoeff; // f^2 / (N * (S1 - f) * film_width * 2) half4 _CoCKernelLimitsA; half4 _CoCKernelLimitsB; +float4 _MaxCoCTexUvScale; // (kc)rename + move more variables to half +float _CoCRingCount; +float4 _CoCScreen; +float _CoCTileXCount; +float _CoCTileYCount; +float _CoCTilePixelWidth; +float _CoCTilePixelHeight; float _MaxCoC; float _RcpMaxCoC; float _RcpAspect; @@ -150,9 +157,27 @@ half4 FragPrefilter(VaryingsDefault i) : SV_Target return half4(avg, coc); } +VaryingsDefault VertDownsampleCoC(AttributesDefault v) +{ + VaryingsDefault o; + o.vertex = float4(v.vertex.xy, 0.0, 1.0); + o.texcoord = TransformTriangleVertexToUV(v.vertex.xy); +#if defined(INITIAL_COC) + o.texcoord *= _MaxCoCTexUvScale.xy; +#endif + +#if UNITY_UV_STARTS_AT_TOP + o.texcoord = o.texcoord * float2(1.0, -1.0) + float2(0.0, 1.0); +#endif + + o.texcoordStereo = TransformStereoScreenSpaceTex(o.texcoord, 1.0); + + return o; +} + half4 FragDownsampleCoC(VaryingsDefault i) : SV_Target { - // TODO gather version + // TODO implement gather version float3 duv = _MainTex_TexelSize.xyx * float3(0.5, 0.5, -0.5); float2 uv0 = UnityStereoTransformScreenSpaceTex(i.texcoord - duv.xy); @@ -168,6 +193,7 @@ half4 FragDownsampleCoC(VaryingsDefault i) : SV_Target cocs.w = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv3).r; #if defined(INITIAL_COC) + // Storing the absolute normalized CoC is enough. cocs = cocs * 2.0 - 1.0; #endif cocs = abs(cocs); @@ -209,22 +235,24 @@ half4 FragExtendCoC(VaryingsDefault i) : SV_Target // Bokeh filter with disk-shaped kernels half4 FragBlur(VaryingsDefault i) : SV_Target { - half4 samp0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); - // normalized value in range [0, 1] - half maxCoC = SAMPLE_TEXTURE2D(_MaxCoCTex, sampler_MaxCoCTex, i.texcoordStereo).r; + const half margin = _MainTex_TexelSize.y * 2; - int sampleCount = kSampleCount; + half4 samp0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); half4 bgAcc = 0.0; // Background: far field bokeh half4 fgAcc = 0.0; // Foreground: near field bokeh - const half margin = _MainTex_TexelSize.y * 2; +#if defined(KERNEL_UNIFIED) + int sampleCount = kDiskAllKernelSizes[KERNEL_UNIFIED]; +#else + int sampleCount = kSampleCount; +#endif UNITY_LOOP for (int si = 0; si < sampleCount; si++) { #if defined(KERNEL_UNIFIED) - float2 disp = kDiskAllKernels[si] * _MaxCoC * (12.0 / 8.0); + float2 disp = kDiskAllKernels[si] * (_MaxCoC * (12.0 / 8.0)); #else float2 disp = kDiskKernel[si] * _MaxCoC; #endif @@ -270,11 +298,11 @@ half4 FragBlur(VaryingsDefault i) : SV_Target } // Bokeh filter with disk-shaped kernels -half4 FragBlurUnified(VaryingsDefault i) : SV_Target +half4 FragBlurDynamic(VaryingsDefault i) : SV_Target { half4 samp0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); // normalized value in range [0, 1] - half maxCoC = SAMPLE_TEXTURE2D(_MaxCoCTex, sampler_MaxCoCTex, i.texcoordStereo).r; + half maxCoC = SAMPLE_TEXTURE2D(_MaxCoCTex, sampler_MaxCoCTex, i.texcoordStereo * _MaxCoCTexUvScale.zw).r; int sampleCount; @@ -298,7 +326,7 @@ half4 FragBlurUnified(VaryingsDefault i) : SV_Target UNITY_LOOP for (int si = 0; si < sampleCount; si++) { - float2 disp = kDiskAllKernels[si] * _MaxCoC * (12.0 / 8.0); + float2 disp = kDiskAllKernels[si] * (_MaxCoC * (12.0 / 8.0)); float dist = length(disp); @@ -338,21 +366,83 @@ half4 FragBlurUnified(VaryingsDefault i) : SV_Target half alpha = saturate(fgAcc.a); half3 rgb = lerp(bgAcc.rgb, fgAcc.rgb, alpha); - /* - if (i.texcoord.x < 0.05) - rgb.r += 0.5; // (kc) + return half4(rgb, alpha); +} - if (sampleCount == 8) - rgb.r += 0.5; - if (sampleCount == 22) - rgb.g += 0.5; - if (sampleCount == 43) - rgb.b += 0.5; - if (sampleCount == 71) - rgb.rg += 0.5; - */ +struct Attributes +{ + uint vertexID : SV_VertexID; + uint instanceID : SV_InstanceID; +}; - return half4(rgb, alpha); +uint2 UnpackTileID(uint tileID) +{ + return uint2(tileID & 0xFFFF, (tileID >> 16) & 0xFFFF); +} + +// 0 - 0,1 +// 1 - 0,0 +// 2 - 1,0 +// 3 - 1,1 +float4 GetQuadVertexPosition(uint vertexID, float z = UNITY_NEAR_CLIP_VALUE) +{ + uint topBit = vertexID >> 1; + uint botBit = (vertexID & 1); + float x = topBit; + float y = 1 - (topBit + botBit) & 1; // produces 1 for indices 0,3 and 0 for 1,2 + float4 pos = float4(x, y, z, 1.0); + return pos; +} + +VaryingsDefault VertexTiling(Attributes input) +{ + uint2 tileCoord = uint2(input.instanceID % (uint)_CoCTileXCount, input.instanceID / (uint)_CoCTileXCount); // (kc) stereo mode? + // normalized value in range [0, 1] + half maxCoC = LOAD_TEXTURE2D(_MaxCoCTex, _MaxCoCTex_TexelSize, tileCoord).x; + + bool shouldDiscard; + + UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[0]) + shouldDiscard = _CoCRingCount != 0; + // margin adjustment later in the shader code artifically expand bokeh by 4px in fullscreen units (1 extra ring), we cannot have small bokeh as a result! + else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[1]) + shouldDiscard = _CoCRingCount != 1+1; + else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[2]) + shouldDiscard = _CoCRingCount != 2+1; + else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[3]) + shouldDiscard = _CoCRingCount != 3+1; + else + shouldDiscard = _CoCRingCount != 4; + + VaryingsDefault output; + + [branch] if (shouldDiscard) + { + output.vertex = float4(-2, -2, -2, 1); + output.texcoord = 0.0.xx; + output.texcoordStereo = 0.0.xx; +#if STEREO_INSTANCING_ENABLED + output.stereoTargetEyeIndex = 0; +#endif + return output; + } + + // This handles both "real quad" and "2 triangles" cases: remaps {0, 1, 2, 3, 4, 5} into {0, 1, 2, 3, 0, 2}. + uint quadIndex = (input.vertexID & 0x03) + (input.vertexID >> 2) * (input.vertexID & 0x01); + float2 pp = GetQuadVertexPosition(quadIndex).xy; + uint2 pixelCoord = tileCoord * uint2(_CoCTilePixelWidth, _CoCTilePixelHeight); + pixelCoord += uint2(pp.xy * uint2(_CoCTilePixelWidth, _CoCTilePixelHeight)); + pixelCoord.y = _CoCScreen.y - pixelCoord.y; + float2 clipCoord = (pixelCoord * _CoCScreen.zw) * 2.0 - 1.0; + + output.vertex = float4(clipCoord, 0, 1); + output.texcoord = clipCoord * 0.5 + 0.5; + #if UNITY_UV_STARTS_AT_TOP + output.texcoord = output.texcoord * float2(1.0, -1.0) + float2(0.0, 1.0); + #endif + output.texcoordStereo = TransformStereoScreenSpaceTex(output.texcoord, 1.0); + + return output; } // Postfilter blur diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader index fc7c55822bb..e31838aff22 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader @@ -50,7 +50,7 @@ Shader "Hidden/PostProcessing/DepthOfField" HLSLPROGRAM #pragma target 3.5 - #pragma vertex VertDefault + #pragma vertex VertDownsampleCoC #pragma fragment FragDownsampleCoC #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL @@ -134,18 +134,70 @@ Shader "Hidden/PostProcessing/DepthOfField" Pass // 10 { - Name "Bokeh Filter (unified)" + Name "Bokeh Filter (dynamic)" HLSLPROGRAM #pragma target 3.5 #pragma vertex VertDefault - #pragma fragment FragBlurUnified - #define KERNEL_UNIFIED + #pragma fragment FragBlurDynamic + #define KERNEL_UNIFIED 4 + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 11 + { + Name "Bokeh Filter (1 ring)" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertexTiling + #pragma fragment FragBlur + #define KERNEL_UNIFIED 1 + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 12 + { + Name "Bokeh Filter (2 rings)" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertexTiling + #pragma fragment FragBlur + #define KERNEL_UNIFIED 2 + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 13 + { + Name "Bokeh Filter (3 rings)" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertexTiling + #pragma fragment FragBlur + #define KERNEL_UNIFIED 3 + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 14 + { + Name "Bokeh Filter (4 rings)" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertexTiling + #pragma fragment FragBlur + #define KERNEL_UNIFIED 4 #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL } - Pass // 11 + Pass // 15 { Name "Postfilter" @@ -157,7 +209,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 12 + Pass // 16 { Name "Combine" @@ -169,7 +221,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 13 + Pass // 17 { Name "Debug Overlay" From d9da953f997470f00cff2df5dbfcaf585962dbab Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Mon, 15 Apr 2024 23:02:14 -0400 Subject: [PATCH 05/13] Shader code optimisation (7% faster), manual loop unrolling (15% faster). --- .../Runtime/Effects/DepthOfField.cs | 2 + .../PostProcessing/Runtime/Utils/ShaderIDs.cs | 2 + .../Shaders/Builtins/DepthOfField.hlsl | 157 +++++++-- .../Shaders/Builtins/DiskKernels.hlsl | 311 +++++++++--------- 4 files changed, 282 insertions(+), 190 deletions(-) diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs index 32a118e0438..93d5f0d19c1 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs @@ -217,6 +217,8 @@ public override void Render(PostProcessRenderContext context) sheet.properties.SetVector(ShaderIDs.CoCKernelLimitsA, cocKernelLimitsA); sheet.properties.SetVector(ShaderIDs.CoCKernelLimitsB, cocKernelLimitsB); sheet.properties.SetVector(ShaderIDs.MaxCoCTexUvScale, new Vector4(paddedWidth / (float)context.width, paddedHeight / (float)context.height, context.width / (float)paddedWidth, context.height / (float)paddedHeight)); + sheet.properties.SetVector(ShaderIDs.KernelScale, new Vector4(maxCoC * (12f / 8f) / aspect, maxCoC * (12f / 8f), maxCoC * (12f / 8f), 0f)); // (kc) hardcoded for 4 rings + sheet.properties.SetVector(ShaderIDs.MarginFactors, new Vector4(2f / (context.height >> 1), (context.height >> 1) / 2f, 0f, 0f)); sheet.properties.SetFloat(ShaderIDs.MaxCoC, maxCoC); sheet.properties.SetVector(ShaderIDs.CoCScreen, new Vector4(context.width, context.height, 1f / context.width, 1f / context.height)); sheet.properties.SetFloat(ShaderIDs.CoCTileXCount, paddedWidth >> maxCoCMipLevel); diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs index 0a91a1c6b95..37a3486f248 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs @@ -84,6 +84,8 @@ static class ShaderIDs internal static readonly int CoCTileYCount = Shader.PropertyToID("_CoCTileYCount"); internal static readonly int CoCTilePixelWidth = Shader.PropertyToID("_CoCTilePixelWidth"); internal static readonly int CoCTilePixelHeight = Shader.PropertyToID("_CoCTilePixelHeight"); + internal static readonly int KernelScale = Shader.PropertyToID("_KernelScale"); + internal static readonly int MarginFactors = Shader.PropertyToID("_MarginFactors"); internal static readonly int MaxCoC = Shader.PropertyToID("_MaxCoC"); internal static readonly int RcpMaxCoC = Shader.PropertyToID("_RcpMaxCoC"); internal static readonly int RcpAspect = Shader.PropertyToID("_RcpAspect"); diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl index 2d75e7cedd5..0b5c6a85241 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl @@ -29,6 +29,8 @@ float _CoCTileXCount; float _CoCTileYCount; float _CoCTilePixelWidth; float _CoCTilePixelHeight; +half3 _KernelScale; +half2 _MarginFactors; float _MaxCoC; float _RcpMaxCoC; float _RcpAspect; @@ -231,12 +233,35 @@ half4 FragExtendCoC(VaryingsDefault i) : SV_Target return half4(maxCoC, 0.0, 0.0, 0.0); } +void AccumSample(int si, half4 samp0, float2 texcoord, inout half4 bgAcc, inout half4 fgAcc) +{ + half2 disp = kDiskAllKernels[si].xy * _KernelScale.xy; + half dist = kDiskAllKernels[si].z * _KernelScale.z; + half2 duv = disp; + + half4 samp = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, UnityStereoTransformScreenSpaceTex(texcoord + duv)); + + // BG: Compare CoC of the current sample and the center sample + // and select smaller one. + half bgCoC = max(min(samp0.a, samp.a), 0.0); + + // Compare the CoC to the sample distance. + // Add a small margin to smooth out. + half bgWeight = saturate((bgCoC - dist + _MarginFactors.x) * _MarginFactors.y); + half fgWeight = saturate((-samp.a - dist + _MarginFactors.x) * _MarginFactors.y); + + // Cut influence from focused areas because they're darkened by CoC + // premultiplying. This is only needed for near field. + fgWeight *= step(_MainTex_TexelSize.y, -samp.a); + + // Accumulation + bgAcc += half4(samp.rgb, 1.0) * bgWeight; + fgAcc += half4(samp.rgb, 1.0) * fgWeight; +} // Bokeh filter with disk-shaped kernels half4 FragBlur(VaryingsDefault i) : SV_Target { - const half margin = _MainTex_TexelSize.y * 2; - half4 samp0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); half4 bgAcc = 0.0; // Background: far field bokeh @@ -244,21 +269,25 @@ half4 FragBlur(VaryingsDefault i) : SV_Target #if defined(KERNEL_UNIFIED) int sampleCount = kDiskAllKernelSizes[KERNEL_UNIFIED]; + half rcpSampleCount = kDiskAllKernelRcpSizes[KERNEL_UNIFIED]; #else int sampleCount = kSampleCount; + half rcpSampleCount = 1.0 / kSampleCount; #endif - UNITY_LOOP + UNITY_FLATTEN for (int si = 0; si < sampleCount; si++) { #if defined(KERNEL_UNIFIED) - float2 disp = kDiskAllKernels[si] * (_MaxCoC * (12.0 / 8.0)); + half2 disp = kDiskAllKernels[si].xy * _KernelScale.xy; + half dist = kDiskAllKernels[si].z * _KernelScale.z; + half2 duv = disp; #else - float2 disp = kDiskKernel[si] * _MaxCoC; + half2 disp = kDiskKernel[si] * _MaxCoC; + half dist = length(disp); + half2 duv = half2(disp.x * _RcpAspect, disp.y); #endif - float dist = length(disp); - float2 duv = float2(disp.x * _RcpAspect, disp.y); half4 samp = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, UnityStereoTransformScreenSpaceTex(i.texcoord + duv)); // BG: Compare CoC of the current sample and the center sample @@ -267,8 +296,8 @@ half4 FragBlur(VaryingsDefault i) : SV_Target // Compare the CoC to the sample distance. // Add a small margin to smooth out. - half bgWeight = saturate((bgCoC - dist + margin) / margin); - half fgWeight = saturate((-samp.a - dist + margin) / margin); + half bgWeight = saturate((bgCoC - dist + _MarginFactors.x) * _MarginFactors.y); + half fgWeight = saturate((-samp.a - dist + _MarginFactors.x) * _MarginFactors.y); // Cut influence from focused areas because they're darkened by CoC // premultiplying. This is only needed for near field. @@ -288,7 +317,7 @@ half4 FragBlur(VaryingsDefault i) : SV_Target bgAcc.a = smoothstep(_MainTex_TexelSize.y, _MainTex_TexelSize.y * 2.0, samp0.a); // FG: Normalize the total of the weights. - fgAcc.a *= PI / sampleCount; + fgAcc.a *= PI * rcpSampleCount; // Alpha premultiplying half alpha = saturate(fgAcc.a); @@ -318,37 +347,95 @@ half4 FragBlurDynamic(VaryingsDefault i) : SV_Target else sampleCount = kDiskAllKernelSizes[4]; - const half margin = _MainTex_TexelSize.y * 2; - half4 bgAcc = 0.0; // Background: far field bokeh half4 fgAcc = 0.0; // Foreground: near field bokeh - UNITY_LOOP - for (int si = 0; si < sampleCount; si++) - { - float2 disp = kDiskAllKernels[si] * (_MaxCoC * (12.0 / 8.0)); - - float dist = length(disp); - - float2 duv = float2(disp.x * _RcpAspect, disp.y); - half4 samp = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, UnityStereoTransformScreenSpaceTex(i.texcoord + duv)); - - // BG: Compare CoC of the current sample and the center sample - // and select smaller one. - half bgCoC = max(min(samp0.a, samp.a), 0.0); + AccumSample(0, samp0, i.texcoord, bgAcc, fgAcc); - // Compare the CoC to the sample distance. - // Add a small margin to smooth out. - half bgWeight = saturate((bgCoC - dist + margin) / margin); - half fgWeight = saturate((-samp.a - dist + margin) / margin); + UNITY_BRANCH if (sampleCount >= 8) + { + AccumSample( 1, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample( 2, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample( 3, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample( 4, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample( 5, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample( 6, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample( 7, samp0, i.texcoord, bgAcc, fgAcc); + } + UNITY_BRANCH if (sampleCount >= 22) + { + AccumSample( 8, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample( 9, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(10, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(11, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(12, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(13, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(14, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(15, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(16, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(17, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(18, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(19, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(20, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(21, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(22, samp0, i.texcoord, bgAcc, fgAcc); + } - // Cut influence from focused areas because they're darkened by CoC - // premultiplying. This is only needed for near field. - fgWeight *= step(_MainTex_TexelSize.y, -samp.a); + UNITY_BRANCH if (sampleCount >= 43) + { + AccumSample(23, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(24, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(25, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(26, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(27, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(28, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(29, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(30, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(31, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(32, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(33, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(34, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(35, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(36, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(37, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(38, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(39, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(40, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(41, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(42, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(43, samp0, i.texcoord, bgAcc, fgAcc); + } - // Accumulation - bgAcc += half4(samp.rgb, 1.0) * bgWeight; - fgAcc += half4(samp.rgb, 1.0) * fgWeight; + UNITY_BRANCH if (sampleCount >= 71) + { + AccumSample(44, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(45, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(46, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(47, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(48, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(49, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(50, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(51, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(52, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(53, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(54, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(55, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(56, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(57, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(58, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(59, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(60, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(61, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(62, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(63, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(64, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(65, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(66, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(67, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(68, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(69, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(70, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(71, samp0, i.texcoord, bgAcc, fgAcc); } // Get the weighted average. diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl index 37f6a96693d..6956c3d9eb8 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl @@ -203,161 +203,162 @@ static const float2 kDiskKernel[kSampleCount] = { static const int kDiskAllKernelSizes[7] = { 1, 8, 22, 43, 71, 106, 148 }; -static const float2 kDiskAllKernels[148] = { -float2(0, 0), -// ring 1 index=1 -float2(0.186046511627907, 0), -float2(0.115998102671392, 0.145457019994052), -float2(-0.0413992435267562, 0.181381937150107), -float2(-0.16762211495859, 0.0807225561148946), -float2(-0.16762211495859, -0.0807225561148945), -float2(-0.0413992435267562, -0.181381937150107), -float2(0.115998102671392, -0.145457019994052), -// ring 2 index=8 -float2(0.348837209302326, 0), -float2(0.314291465547356, 0.151354792715427), -float2(0.217496442508861, 0.272731912488848), -float2(0.0776235816126678, 0.34009113215645), -float2(-0.0776235816126678, 0.34009113215645), -float2(-0.217496442508861, 0.272731912488848), -float2(-0.314291465547355, 0.151354792715427), -float2(-0.348837209302326, 4.27202371795588E-17), -float2(-0.314291465547356, -0.151354792715427), -float2(-0.217496442508861, -0.272731912488848), -float2(-0.0776235816126679, -0.34009113215645), -float2(0.0776235816126674, -0.34009113215645), -float2(0.21749644250886, -0.272731912488848), -float2(0.314291465547356, -0.151354792715427), -// ring 3 index=22 -float2(0.511627906976744, 0), -float2(0.488897714588258, 0.150804972954416), -float2(0.422726814766323, 0.288210262265109), -float2(0.318994782346329, 0.400006804983643), -float2(0.186918663629318, 0.47626098767843), -float2(0.0382340013697985, 0.510197291581069), -float2(-0.113847919698579, 0.498800327162793), -float2(-0.255813953488372, 0.443082764726922), -float2(-0.375049794889679, 0.347995354208377), -float2(-0.460960816136121, 0.22198702931596), -float2(-0.505913445975647, 0.0762541826947871), -float2(-0.505913445975647, -0.0762541826947867), -float2(-0.460960816136121, -0.22198702931596), -float2(-0.375049794889679, -0.347995354208377), -float2(-0.255813953488372, -0.443082764726922), -float2(-0.11384791969858, -0.498800327162793), -float2(0.0382340013697985, -0.510197291581069), -float2(0.186918663629319, -0.47626098767843), -float2(0.318994782346329, -0.400006804983643), -float2(0.422726814766323, -0.288210262265109), -float2(0.488897714588258, -0.150804972954416), -// ring 4 index=43 -float2(0.674418604651163, 0), -float2(0.657509522169137, 0.150072257784491), -float2(0.607630166724887, 0.292619265916493), -float2(0.527281697478439, 0.420493122183797), -float2(0.420493122183797, 0.527281697478439), -float2(0.292619265916493, 0.607630166724887), -float2(0.150072257784491, 0.657509522169137), -float2(4.12962292735735E-17, 0.674418604651163), -float2(-0.150072257784491, 0.657509522169137), -float2(-0.292619265916493, 0.607630166724887), -float2(-0.420493122183797, 0.527281697478439), -float2(-0.527281697478438, 0.420493122183797), -float2(-0.607630166724887, 0.292619265916493), -float2(-0.657509522169137, 0.150072257784491), -float2(-0.674418604651163, 8.25924585471471E-17), -float2(-0.657509522169137, -0.150072257784491), -float2(-0.607630166724887, -0.292619265916493), -float2(-0.527281697478439, -0.420493122183797), -float2(-0.420493122183797, -0.527281697478439), -float2(-0.292619265916493, -0.607630166724887), -float2(-0.150072257784491, -0.657509522169137), -float2(-1.23888687820721E-16, -0.674418604651163), -float2(0.15007225778449, -0.657509522169137), -float2(0.292619265916493, -0.607630166724887), -float2(0.420493122183797, -0.527281697478439), -float2(0.527281697478439, -0.420493122183797), -float2(0.607630166724887, -0.292619265916492), -float2(0.657509522169137, -0.150072257784491), -// ring 5 index=71 -float2(0.837209302325581, 0), -float2(0.823755004408155, 0.149489493319789), -float2(0.783824542861175, 0.294174271323915), -float2(0.718701315573655, 0.429404046200294), -float2(0.630478436654186, 0.550832421716969), -float2(0.521991462021265, 0.654556589973234), -float2(0.396727252302976, 0.737242770856804), -float2(0.258711902267398, 0.796233362479663), -float2(0.112381338824084, 0.829632358689434), -float2(-0.0375612533167101, 0.836366288267147), -float2(-0.186296595870403, 0.81621871717548), -float2(-0.329044212547471, 0.769837204926796), -float2(-0.461216077494783, 0.698712491487602), -float2(-0.578564078221561, 0.605130583669444), -float2(-0.677316553430188, 0.492099280989047), -float2(-0.754299517313653, 0.363251502517026), -float2(-0.807038674070947, 0.222728521869775), -float2(-0.833838943809968, 0.0750468632679909), -float2(-0.833838943809968, -0.0750468632679907), -float2(-0.807038674070947, -0.222728521869774), -float2(-0.754299517313653, -0.363251502517025), -float2(-0.677316553430189, -0.492099280989047), -float2(-0.578564078221562, -0.605130583669444), -float2(-0.461216077494784, -0.698712491487602), -float2(-0.329044212547471, -0.769837204926796), -float2(-0.186296595870403, -0.81621871717548), -float2(-0.0375612533167103, -0.836366288267147), -float2(0.112381338824084, -0.829632358689434), -float2(0.258711902267398, -0.796233362479664), -float2(0.396727252302976, -0.737242770856804), -float2(0.521991462021265, -0.654556589973234), -float2(0.630478436654186, -0.550832421716969), -float2(0.718701315573655, -0.429404046200294), -float2(0.783824542861175, -0.294174271323915), -float2(0.823755004408155, -0.149489493319789), -// ring 6 index=106 -float2(1, 0), -float2(0.988830826225129, 0.149042266176174), -float2(0.955572805786141, 0.294755174410904), -float2(0.900968867902419, 0.433883739117558), -float2(0.826238774315995, 0.563320058063622), -float2(0.733051871829826, 0.680172737770919), -float2(0.623489801858734, 0.78183148246803), -float2(0.5, 0.866025403784439), -float2(0.365341024366395, 0.930873748644204), -float2(0.222520933956314, 0.974927912181824), -float2(0.0747300935864244, 0.99720379718118), -float2(-0.074730093586424, 0.99720379718118), -float2(-0.222520933956314, 0.974927912181824), -float2(-0.365341024366395, 0.930873748644204), -float2(-0.5, 0.866025403784439), -float2(-0.623489801858733, 0.78183148246803), -float2(-0.733051871829826, 0.680172737770919), -float2(-0.826238774315995, 0.563320058063622), -float2(-0.900968867902419, 0.433883739117558), -float2(-0.955572805786141, 0.294755174410905), -float2(-0.988830826225129, 0.149042266176175), -float2(-1, 1.22464679914735E-16), -float2(-0.988830826225129, -0.149042266176174), -float2(-0.955572805786141, -0.294755174410904), -float2(-0.900968867902419, -0.433883739117558), -float2(-0.826238774315995, -0.563320058063622), -float2(-0.733051871829826, -0.680172737770919), -float2(-0.623489801858734, -0.78183148246803), -float2(-0.5, -0.866025403784438), -float2(-0.365341024366395, -0.930873748644204), -float2(-0.222520933956315, -0.974927912181824), -float2(-0.0747300935864247, -0.99720379718118), -float2(0.0747300935864244, -0.99720379718118), -float2(0.222520933956314, -0.974927912181824), -float2(0.365341024366395, -0.930873748644204), -float2(0.499999999999999, -0.866025403784439), -float2(0.623489801858733, -0.78183148246803), -float2(0.733051871829827, -0.680172737770919), -float2(0.826238774315994, -0.563320058063623), -float2(0.900968867902419, -0.433883739117558), -float2(0.955572805786141, -0.294755174410905), -float2(0.988830826225128, -0.149042266176175), +static const half kDiskAllKernelRcpSizes[7] = { 1, 1.0/8, 1.0/22, 1.0/43, 1.0/71, 1.0/106, 1.0/148 }; +static const half3 kDiskAllKernels[148] = { + half3(0, 0, 0), + // ring 1 index=1 + half3(0.186046511627907, 0, 0.186046511627907), + half3(0.115998102671392, 0.145457019994052, 0.186046511627907), + half3(-0.0413992435267562, 0.181381937150107, 0.186046511627907), + half3(-0.16762211495859, 0.0807225561148946, 0.186046511627907), + half3(-0.16762211495859, -0.0807225561148945, 0.186046511627907), + half3(-0.0413992435267562, -0.181381937150107, 0.186046511627907), + half3(0.115998102671392, -0.145457019994052, 0.186046511627907), + // ring 2 index=8 + half3(0.348837209302326, 0, 0.348837209302326), + half3(0.314291465547356, 0.151354792715427, 0.348837209302326), + half3(0.217496442508861, 0.272731912488848, 0.348837209302326), + half3(0.0776235816126678, 0.34009113215645, 0.348837209302326), + half3(-0.0776235816126678, 0.34009113215645, 0.348837209302326), + half3(-0.217496442508861, 0.272731912488848, 0.348837209302326), + half3(-0.314291465547355, 0.151354792715427, 0.348837209302326), + half3(-0.348837209302326, 4.27202371795588E-17, 0.348837209302326), + half3(-0.314291465547356, -0.151354792715427, 0.348837209302326), + half3(-0.217496442508861, -0.272731912488848, 0.348837209302326), + half3(-0.0776235816126679, -0.34009113215645, 0.348837209302326), + half3(0.0776235816126674, -0.34009113215645, 0.348837209302326), + half3(0.21749644250886, -0.272731912488848, 0.348837209302326), + half3(0.314291465547356, -0.151354792715427, 0.348837209302326), + // ring 3 index=22 + half3(0.511627906976744, 0, 0.511627906976744), + half3(0.488897714588258, 0.150804972954416, 0.511627906976744), + half3(0.422726814766323, 0.288210262265109, 0.511627906976744), + half3(0.318994782346329, 0.400006804983643, 0.511627906976744), + half3(0.186918663629318, 0.47626098767843, 0.511627906976744), + half3(0.0382340013697985, 0.510197291581069, 0.511627906976744), + half3(-0.113847919698579, 0.498800327162793, 0.511627906976744), + half3(-0.255813953488372, 0.443082764726922, 0.511627906976744), + half3(-0.375049794889679, 0.347995354208377, 0.511627906976744), + half3(-0.460960816136121, 0.22198702931596, 0.511627906976744), + half3(-0.505913445975647, 0.0762541826947871, 0.511627906976744), + half3(-0.505913445975647, -0.0762541826947867, 0.511627906976744), + half3(-0.460960816136121, -0.22198702931596, 0.511627906976744), + half3(-0.375049794889679, -0.347995354208377, 0.511627906976744), + half3(-0.255813953488372, -0.443082764726922, 0.511627906976744), + half3(-0.11384791969858, -0.498800327162793, 0.511627906976744), + half3(0.0382340013697985, -0.510197291581069, 0.511627906976744), + half3(0.186918663629319, -0.47626098767843, 0.511627906976744), + half3(0.318994782346329, -0.400006804983643, 0.511627906976744), + half3(0.422726814766323, -0.288210262265109, 0.511627906976744), + half3(0.488897714588258, -0.150804972954416, 0.511627906976744), + // ring 4 index=43 + half3(0.674418604651163, 0, 0.674418604651163), + half3(0.657509522169137, 0.150072257784491, 0.674418604651163), + half3(0.607630166724887, 0.292619265916493, 0.674418604651163), + half3(0.527281697478439, 0.420493122183797, 0.674418604651163), + half3(0.420493122183797, 0.527281697478439, 0.674418604651163), + half3(0.292619265916493, 0.607630166724887, 0.674418604651163), + half3(0.150072257784491, 0.657509522169137, 0.674418604651163), + half3(4.12962292735735E-17, 0.674418604651163, 0.674418604651163), + half3(-0.150072257784491, 0.657509522169137, 0.674418604651163), + half3(-0.292619265916493, 0.607630166724887, 0.674418604651163), + half3(-0.420493122183797, 0.527281697478439, 0.674418604651163), + half3(-0.527281697478438, 0.420493122183797, 0.674418604651163), + half3(-0.607630166724887, 0.292619265916493, 0.674418604651163), + half3(-0.657509522169137, 0.150072257784491, 0.674418604651163), + half3(-0.674418604651163, 8.25924585471471E-17, 0.674418604651163), + half3(-0.657509522169137, -0.150072257784491, 0.674418604651163), + half3(-0.607630166724887, -0.292619265916493, 0.674418604651163), + half3(-0.527281697478439, -0.420493122183797, 0.674418604651163), + half3(-0.420493122183797, -0.527281697478439, 0.674418604651163), + half3(-0.292619265916493, -0.607630166724887, 0.674418604651163), + half3(-0.150072257784491, -0.657509522169137, 0.674418604651163), + half3(-1.23888687820721E-16, -0.674418604651163, 0.674418604651163), + half3(0.15007225778449, -0.657509522169137, 0.674418604651163), + half3(0.292619265916493, -0.607630166724887, 0.674418604651163), + half3(0.420493122183797, -0.527281697478439, 0.674418604651163), + half3(0.527281697478439, -0.420493122183797, 0.674418604651163), + half3(0.607630166724887, -0.292619265916492, 0.674418604651163), + half3(0.657509522169137, -0.150072257784491, 0.674418604651163), + // ring 5 index=71 + half3(0.837209302325581, 0, 0.837209302325581), + half3(0.823755004408155, 0.149489493319789, 0.837209302325581), + half3(0.783824542861175, 0.294174271323915, 0.837209302325581), + half3(0.718701315573655, 0.429404046200294, 0.837209302325581), + half3(0.630478436654186, 0.550832421716969, 0.837209302325581), + half3(0.521991462021265, 0.654556589973234, 0.837209302325581), + half3(0.396727252302976, 0.737242770856804, 0.837209302325581), + half3(0.258711902267398, 0.796233362479663, 0.837209302325581), + half3(0.112381338824084, 0.829632358689434, 0.837209302325581), + half3(-0.0375612533167101, 0.836366288267147, 0.837209302325581), + half3(-0.186296595870403, 0.81621871717548, 0.837209302325581), + half3(-0.329044212547471, 0.769837204926796, 0.837209302325581), + half3(-0.461216077494783, 0.698712491487602, 0.837209302325581), + half3(-0.578564078221561, 0.605130583669444, 0.837209302325581), + half3(-0.677316553430188, 0.492099280989047, 0.837209302325581), + half3(-0.754299517313653, 0.363251502517026, 0.837209302325581), + half3(-0.807038674070947, 0.222728521869775, 0.837209302325581), + half3(-0.833838943809968, 0.0750468632679909, 0.837209302325581), + half3(-0.833838943809968, -0.0750468632679907, 0.837209302325581), + half3(-0.807038674070947, -0.222728521869774, 0.837209302325581), + half3(-0.754299517313653, -0.363251502517025, 0.837209302325581), + half3(-0.677316553430189, -0.492099280989047, 0.837209302325581), + half3(-0.578564078221562, -0.605130583669444, 0.837209302325581), + half3(-0.461216077494784, -0.698712491487602, 0.837209302325581), + half3(-0.329044212547471, -0.769837204926796, 0.837209302325581), + half3(-0.186296595870403, -0.81621871717548, 0.837209302325582), + half3(-0.0375612533167103, -0.836366288267147, 0.837209302325581), + half3(0.112381338824084, -0.829632358689434, 0.837209302325581), + half3(0.258711902267398, -0.796233362479664, 0.837209302325581), + half3(0.396727252302976, -0.737242770856804, 0.837209302325581), + half3(0.521991462021265, -0.654556589973234, 0.837209302325581), + half3(0.630478436654186, -0.550832421716969, 0.837209302325581), + half3(0.718701315573655, -0.429404046200294, 0.837209302325581), + half3(0.783824542861175, -0.294174271323915, 0.837209302325581), + half3(0.823755004408155, -0.149489493319789, 0.837209302325581), + // ring 6 index=106 + half3(1, 0, 1), + half3(0.988830826225129, 0.149042266176174, 1), + half3(0.955572805786141, 0.294755174410904, 1), + half3(0.900968867902419, 0.433883739117558, 1), + half3(0.826238774315995, 0.563320058063622, 1), + half3(0.733051871829826, 0.680172737770919, 1), + half3(0.623489801858734, 0.78183148246803, 1), + half3(0.5, 0.866025403784439, 1), + half3(0.365341024366395, 0.930873748644204, 1), + half3(0.222520933956314, 0.974927912181824, 1), + half3(0.0747300935864244, 0.99720379718118, 1), + half3(-0.074730093586424, 0.99720379718118, 1), + half3(-0.222520933956314, 0.974927912181824, 1), + half3(-0.365341024366395, 0.930873748644204, 1), + half3(-0.5, 0.866025403784439, 1), + half3(-0.623489801858733, 0.78183148246803, 1), + half3(-0.733051871829826, 0.680172737770919, 1), + half3(-0.826238774315995, 0.563320058063622, 1), + half3(-0.900968867902419, 0.433883739117558, 1), + half3(-0.955572805786141, 0.294755174410905, 1), + half3(-0.988830826225129, 0.149042266176175, 1), + half3(-1, 1.22464679914735E-16, 1), + half3(-0.988830826225129, -0.149042266176174, 1), + half3(-0.955572805786141, -0.294755174410904, 1), + half3(-0.900968867902419, -0.433883739117558, 1), + half3(-0.826238774315995, -0.563320058063622, 1), + half3(-0.733051871829826, -0.680172737770919, 1), + half3(-0.623489801858734, -0.78183148246803, 1), + half3(-0.5, -0.866025403784438, 1), + half3(-0.365341024366395, -0.930873748644204, 1), + half3(-0.222520933956315, -0.974927912181824, 1), + half3(-0.0747300935864247, -0.99720379718118, 1), + half3(0.0747300935864244, -0.99720379718118, 1), + half3(0.222520933956314, -0.974927912181824, 1), + half3(0.365341024366395, -0.930873748644204, 1), + half3(0.499999999999999, -0.866025403784439, 1), + half3(0.623489801858733, -0.78183148246803, 1), + half3(0.733051871829827, -0.680172737770919, 1), + half3(0.826238774315994, -0.563320058063623, 1), + half3(0.900968867902419, -0.433883739117558, 1), + half3(0.955572805786141, -0.294755174410905, 1), + half3(0.988830826225128, -0.149042266176175, 1), }; #endif // UNITY_POSTFX_DISK_KERNELS From 870726355b6689679f3058263e978937d7a7385e Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Wed, 17 Apr 2024 23:23:34 -0400 Subject: [PATCH 06/13] Removed tiling shaders, cleaned up code. --- .../Runtime/Effects/DepthOfField.cs | 95 ++-- .../PostProcessing/Runtime/Utils/ShaderIDs.cs | 11 +- .../Shaders/Builtins/DepthOfField.hlsl | 167 ++----- .../Shaders/Builtins/DepthOfField.shader | 146 ++----- .../Shaders/Builtins/DiskKernels.hlsl | 409 ++++-------------- 5 files changed, 182 insertions(+), 646 deletions(-) diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs index 93d5f0d19c1..b8b9f019ba7 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs @@ -91,17 +91,10 @@ enum Pass CoCTemporalFilter, downsampleInitialMaxCoC, downsampleMaxCoC, - extendMaxCoC, + neighborMaxCoC, DownsampleAndPrefilter, BokehSmallKernel, - BokehMediumKernel, - BokehLargeKernel, - BokehVeryLargeKernel, - BokehUnified, - BokehKernel1, - BokehKernel2, - BokehKernel3, - BokehKernel4, + BokehDynamic, PostFilter, Combine, DebugOverlay @@ -156,10 +149,9 @@ float CalculateMaxCoCRadius(int screenHeight, out int mipLevel) return Mathf.Min(0.05f, radiusInPixels / screenHeight); } - void CalculateCoCKernelLimits(int screenHeight, out Vector4 cocKernelLimitsA, out Vector4 cocKernelLimitsB) + void CalculateCoCKernelLimits(int screenHeight, out Vector4 cocKernelLimits) { - cocKernelLimitsA = new Vector4(2-0.5f, 6- 0.5f, 10- 0.5f, 14- 0.5f) / screenHeight; - cocKernelLimitsB = new Vector4(18, 22, 26, 30) / screenHeight; + cocKernelLimits = new Vector4(2 - 0.5f, 6 - 0.5f, 10 - 0.5f, 14 - 0.5f) / screenHeight; } RenderTexture CheckHistory(int eye, int id, PostProcessRenderContext context, RenderTextureFormat format) @@ -182,8 +174,8 @@ RenderTexture CheckHistory(int eye, int id, PostProcessRenderContext context, Re public override void Render(PostProcessRenderContext context) { - bool useUnified = true;// (Time.time % 2f) < 1f; // (kc) - bool useStaticTiles = false; + // Legacy: if KERNEL_SMALL was selected, then run different sample pattern from KERNEL_MEDIUM, KERNEL_LARGE and KERNEL_VERY_LARGE + bool useDynamicBokeh = settings.kernelSize.value != KernelSize.Small; // The coc is stored in alpha so we need a 4 channels target. Note that using ARGB32 // will result in a very weak near-blur. @@ -204,27 +196,36 @@ public override void Render(PostProcessRenderContext context) int paddedWidth = ((context.width + tileSize - 1) >> maxCoCMipLevel) << maxCoCMipLevel; int paddedHeight = ((context.height + tileSize - 1) >> maxCoCMipLevel) << maxCoCMipLevel; - Vector4 cocKernelLimitsA; - Vector4 cocKernelLimitsB; - CalculateCoCKernelLimits(context.screenHeight, out cocKernelLimitsA, out cocKernelLimitsB); - cocKernelLimitsA /= maxCoC; - cocKernelLimitsB /= maxCoC; + Vector4 cocKernelLimits; + CalculateCoCKernelLimits(context.screenHeight, out cocKernelLimits); + cocKernelLimits /= maxCoC; + + // The samples coordinates in kDiskAllKernels in the shader code are normalized to 4 rings (coordinates with length 1 lies on the 4th ring). + // The ring placement are not uniform but: + // 1st ring: 8/29 + // 2nd ring: 15/29 + // 3rd ring: 22/29 + // 4th ring: 19/29 + // When the user clamps the bokeh size, the sample coordinates must be renormalized to the number of rings requested. + float kernelScaleReNormalization = 1f; + if (settings.kernelSize.value == KernelSize.Small) + kernelScaleReNormalization = 1f; // custom sampling pattern, does not use kDiskAllKernels array. + else if (settings.kernelSize.value == KernelSize.Medium) + kernelScaleReNormalization = 29f / 15f; + else if (settings.kernelSize.value == KernelSize.Large) + kernelScaleReNormalization = 29f / 22f; + else if (settings.kernelSize.value == KernelSize.VeryLarge) + kernelScaleReNormalization = 29f / 29f; var sheet = context.propertySheets.Get(context.resources.shaders.depthOfField); sheet.properties.Clear(); sheet.properties.SetFloat(ShaderIDs.Distance, s1); sheet.properties.SetFloat(ShaderIDs.LensCoeff, coeff); - sheet.properties.SetVector(ShaderIDs.CoCKernelLimitsA, cocKernelLimitsA); - sheet.properties.SetVector(ShaderIDs.CoCKernelLimitsB, cocKernelLimitsB); - sheet.properties.SetVector(ShaderIDs.MaxCoCTexUvScale, new Vector4(paddedWidth / (float)context.width, paddedHeight / (float)context.height, context.width / (float)paddedWidth, context.height / (float)paddedHeight)); - sheet.properties.SetVector(ShaderIDs.KernelScale, new Vector4(maxCoC * (12f / 8f) / aspect, maxCoC * (12f / 8f), maxCoC * (12f / 8f), 0f)); // (kc) hardcoded for 4 rings + sheet.properties.SetVector(ShaderIDs.CoCKernelLimits, cocKernelLimits); + sheet.properties.SetVector(ShaderIDs.MaxCoCTexScale, new Vector4(paddedWidth / (float)context.width, paddedHeight / (float)context.height, context.width / (float)paddedWidth, context.height / (float)paddedHeight)); + sheet.properties.SetVector(ShaderIDs.KernelScale, new Vector4(maxCoC * kernelScaleReNormalization / aspect, maxCoC * kernelScaleReNormalization, maxCoC * kernelScaleReNormalization, 0f)); sheet.properties.SetVector(ShaderIDs.MarginFactors, new Vector4(2f / (context.height >> 1), (context.height >> 1) / 2f, 0f, 0f)); sheet.properties.SetFloat(ShaderIDs.MaxCoC, maxCoC); - sheet.properties.SetVector(ShaderIDs.CoCScreen, new Vector4(context.width, context.height, 1f / context.width, 1f / context.height)); - sheet.properties.SetFloat(ShaderIDs.CoCTileXCount, paddedWidth >> maxCoCMipLevel); - sheet.properties.SetFloat(ShaderIDs.CoCTileYCount, paddedHeight >> maxCoCMipLevel); - sheet.properties.SetFloat(ShaderIDs.CoCTilePixelWidth, 1 << maxCoCMipLevel); - sheet.properties.SetFloat(ShaderIDs.CoCTilePixelHeight, 1 << maxCoCMipLevel); sheet.properties.SetFloat(ShaderIDs.RcpMaxCoC, 1f / maxCoC); sheet.properties.SetFloat(ShaderIDs.RcpAspect, 1f / aspect); @@ -254,21 +255,23 @@ public override void Render(PostProcessRenderContext context) cmd.SetGlobalTexture(ShaderIDs.CoCTex, historyWrite); } - if (useUnified || useStaticTiles) + if (useDynamicBokeh) { - // Downsampling CoC + // Downsample MaxCoC. context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[1], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, paddedWidth >> 1, paddedHeight >> 1); cmd.BlitFullscreenTriangle(ShaderIDs.CoCTex, ShaderIDs.MaxCoCMips[1], sheet, (int)Pass.downsampleInitialMaxCoC); + // Downsample until tile-size reaches CoC max radius. for (int i = 2; i <= maxCoCMipLevel; ++i) { context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCMips[i], 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, paddedWidth >> i, paddedHeight >> i); cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[i - 1], ShaderIDs.MaxCoCMips[i], sheet, (int)Pass.downsampleMaxCoC); } - // Extend CoC + // Neighbor MaxCoC. + // We can then sample it during Bokeh simulation pass and dynamically adjust the number of samples (== number of rings) to generate the bokeh. context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.MaxCoCTex, 0, cocFormat, RenderTextureReadWrite.Linear, FilterMode.Point, paddedWidth >> maxCoCMipLevel, paddedHeight >> maxCoCMipLevel); - cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[maxCoCMipLevel], ShaderIDs.MaxCoCTex, sheet, (int)Pass.extendMaxCoC); + cmd.BlitFullscreenTriangle(ShaderIDs.MaxCoCMips[maxCoCMipLevel], ShaderIDs.MaxCoCTex, sheet, (int)Pass.neighborMaxCoC); } // Downsampling and prefiltering pass @@ -277,32 +280,10 @@ public override void Render(PostProcessRenderContext context) // Bokeh simulation pass context.GetScreenSpaceTemporaryRT(cmd, ShaderIDs.DepthOfFieldTemp, 0, colorFormat, RenderTextureReadWrite.Default, FilterMode.Bilinear, context.width / 2, context.height / 2); - if (useUnified) - { - /* - int tileXCount = paddedWidth >> maxCoCMipLevel; - int tileYCount = paddedHeight >> maxCoCMipLevel; - int tileCount = tileXCount * tileYCount; - cmd.SetGlobalFloat(ShaderIDs.CoCRingCount, 2.0f); - cmd.BlitProcedural(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehUnified, 6, tileCount); - */ - cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehUnified); - } - else if (useStaticTiles) - { - int tileXCount = paddedWidth >> maxCoCMipLevel; - int tileYCount = paddedHeight >> maxCoCMipLevel; - int tileCount = tileXCount * tileYCount; - for (int i = 0; i < 4; ++i) - { - cmd.SetGlobalFloat(ShaderIDs.CoCRingCount, i + 1); - cmd.BlitProcedural(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehKernel1 + i, 6, tileCount); - } - } + if (useDynamicBokeh) + cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehDynamic); else - { - cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehSmallKernel + (int)settings.kernelSize.value); - } + cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTex, ShaderIDs.DepthOfFieldTemp, sheet, (int)Pass.BokehSmallKernel); // Postfilter pass cmd.BlitFullscreenTriangle(ShaderIDs.DepthOfFieldTemp, ShaderIDs.DepthOfFieldTex, sheet, (int)Pass.PostFilter); diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs index 37a3486f248..62a6f2fb921 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs @@ -75,15 +75,8 @@ static class ShaderIDs internal static readonly int MaxCoCTex = Shader.PropertyToID("_MaxCoCTex"); internal static readonly int Distance = Shader.PropertyToID("_Distance"); internal static readonly int LensCoeff = Shader.PropertyToID("_LensCoeff"); - internal static readonly int CoCKernelLimitsA = Shader.PropertyToID("_CoCKernelLimitsA"); - internal static readonly int CoCKernelLimitsB = Shader.PropertyToID("_CoCKernelLimitsB"); - internal static readonly int MaxCoCTexUvScale = Shader.PropertyToID("_MaxCoCTexUvScale"); - internal static readonly int CoCRingCount = Shader.PropertyToID("_CoCRingCount"); - internal static readonly int CoCScreen = Shader.PropertyToID("_CoCScreen"); - internal static readonly int CoCTileXCount = Shader.PropertyToID("_CoCTileXCount"); - internal static readonly int CoCTileYCount = Shader.PropertyToID("_CoCTileYCount"); - internal static readonly int CoCTilePixelWidth = Shader.PropertyToID("_CoCTilePixelWidth"); - internal static readonly int CoCTilePixelHeight = Shader.PropertyToID("_CoCTilePixelHeight"); + internal static readonly int CoCKernelLimits = Shader.PropertyToID("_CoCKernelLimits"); + internal static readonly int MaxCoCTexScale = Shader.PropertyToID("_MaxCoCTexScale"); internal static readonly int KernelScale = Shader.PropertyToID("_KernelScale"); internal static readonly int MarginFactors = Shader.PropertyToID("_MarginFactors"); internal static readonly int MaxCoC = Shader.PropertyToID("_MaxCoC"); diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl index 0b5c6a85241..d644e17b12b 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl @@ -20,15 +20,8 @@ float4 _DepthOfFieldTex_TexelSize; // Camera parameters float _Distance; float _LensCoeff; // f^2 / (N * (S1 - f) * film_width * 2) -half4 _CoCKernelLimitsA; -half4 _CoCKernelLimitsB; -float4 _MaxCoCTexUvScale; // (kc)rename + move more variables to half -float _CoCRingCount; -float4 _CoCScreen; -float _CoCTileXCount; -float _CoCTileYCount; -float _CoCTilePixelWidth; -float _CoCTilePixelHeight; +half4 _CoCKernelLimits; +float4 _MaxCoCTexScale; // MaxCoC scale is padded with a bit of empty space (right-bottom edge) so we need some uv scale to sample it half3 _KernelScale; half2 _MarginFactors; float _MaxCoC; @@ -165,7 +158,7 @@ VaryingsDefault VertDownsampleCoC(AttributesDefault v) o.vertex = float4(v.vertex.xy, 0.0, 1.0); o.texcoord = TransformTriangleVertexToUV(v.vertex.xy); #if defined(INITIAL_COC) - o.texcoord *= _MaxCoCTexUvScale.xy; + o.texcoord *= _MaxCoCTexScale.xy; #endif #if UNITY_UV_STARTS_AT_TOP @@ -259,54 +252,30 @@ void AccumSample(int si, half4 samp0, float2 texcoord, inout half4 bgAcc, inout fgAcc += half4(samp.rgb, 1.0) * fgWeight; } -// Bokeh filter with disk-shaped kernels -half4 FragBlur(VaryingsDefault i) : SV_Target +#if defined(KERNEL_SMALL) +half4 FragBlurSmallBokeh (VaryingsDefault i) : SV_Target { half4 samp0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); half4 bgAcc = 0.0; // Background: far field bokeh half4 fgAcc = 0.0; // Foreground: near field bokeh -#if defined(KERNEL_UNIFIED) - int sampleCount = kDiskAllKernelSizes[KERNEL_UNIFIED]; - half rcpSampleCount = kDiskAllKernelRcpSizes[KERNEL_UNIFIED]; -#else - int sampleCount = kSampleCount; - half rcpSampleCount = 1.0 / kSampleCount; -#endif - - UNITY_FLATTEN - for (int si = 0; si < sampleCount; si++) - { -#if defined(KERNEL_UNIFIED) - half2 disp = kDiskAllKernels[si].xy * _KernelScale.xy; - half dist = kDiskAllKernels[si].z * _KernelScale.z; - half2 duv = disp; -#else - half2 disp = kDiskKernel[si] * _MaxCoC; - half dist = length(disp); - half2 duv = half2(disp.x * _RcpAspect, disp.y); -#endif - - half4 samp = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, UnityStereoTransformScreenSpaceTex(i.texcoord + duv)); - - // BG: Compare CoC of the current sample and the center sample - // and select smaller one. - half bgCoC = max(min(samp0.a, samp.a), 0.0); - - // Compare the CoC to the sample distance. - // Add a small margin to smooth out. - half bgWeight = saturate((bgCoC - dist + _MarginFactors.x) * _MarginFactors.y); - half fgWeight = saturate((-samp.a - dist + _MarginFactors.x) * _MarginFactors.y); - - // Cut influence from focused areas because they're darkened by CoC - // premultiplying. This is only needed for near field. - fgWeight *= step(_MainTex_TexelSize.y, -samp.a); - - // Accumulation - bgAcc += half4(samp.rgb, 1.0) * bgWeight; - fgAcc += half4(samp.rgb, 1.0) * fgWeight; - } + AccumSample(0, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(1, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(2, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(3, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(4, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(5, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(6, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(7, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(8, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(9, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(10, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(11, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(12, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(13, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(14, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(15, samp0, i.texcoord, bgAcc, fgAcc); // Get the weighted average. bgAcc.rgb /= bgAcc.a + (bgAcc.a == 0.0); // zero-div guard @@ -317,7 +286,7 @@ half4 FragBlur(VaryingsDefault i) : SV_Target bgAcc.a = smoothstep(_MainTex_TexelSize.y, _MainTex_TexelSize.y * 2.0, samp0.a); // FG: Normalize the total of the weights. - fgAcc.a *= PI * rcpSampleCount; + fgAcc.a *= PI / kSampleCount; // Alpha premultiplying half alpha = saturate(fgAcc.a); @@ -325,24 +294,25 @@ half4 FragBlur(VaryingsDefault i) : SV_Target return half4(rgb, alpha); } +#endif // Bokeh filter with disk-shaped kernels half4 FragBlurDynamic(VaryingsDefault i) : SV_Target { half4 samp0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); // normalized value in range [0, 1] - half maxCoC = SAMPLE_TEXTURE2D(_MaxCoCTex, sampler_MaxCoCTex, i.texcoordStereo * _MaxCoCTexUvScale.zw).r; + half maxCoC = SAMPLE_TEXTURE2D(_MaxCoCTex, sampler_MaxCoCTex, i.texcoordStereo * _MaxCoCTexScale.zw).r; int sampleCount; - UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[0]) + UNITY_BRANCH if (maxCoC < _CoCKernelLimits[0]) sampleCount = kDiskAllKernelSizes[0]; // margin adjustment later in the shader code artifically expand bokeh by 4px in fullscreen units (1 extra ring), we cannot have small bokeh as a result! - else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[1]) + else UNITY_BRANCH if (maxCoC < _CoCKernelLimits[1]) sampleCount = kDiskAllKernelSizes[1+1]; - else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[2]) + else UNITY_BRANCH if (maxCoC < _CoCKernelLimits[2]) sampleCount = kDiskAllKernelSizes[2+1]; - else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[3]) + else UNITY_BRANCH if (maxCoC < _CoCKernelLimits[3]) sampleCount = kDiskAllKernelSizes[3+1]; else sampleCount = kDiskAllKernelSizes[4]; @@ -378,11 +348,11 @@ half4 FragBlurDynamic(VaryingsDefault i) : SV_Target AccumSample(19, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(20, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(21, samp0, i.texcoord, bgAcc, fgAcc); - AccumSample(22, samp0, i.texcoord, bgAcc, fgAcc); } UNITY_BRANCH if (sampleCount >= 43) { + AccumSample(22, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(23, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(24, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(25, samp0, i.texcoord, bgAcc, fgAcc); @@ -403,11 +373,11 @@ half4 FragBlurDynamic(VaryingsDefault i) : SV_Target AccumSample(40, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(41, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(42, samp0, i.texcoord, bgAcc, fgAcc); - AccumSample(43, samp0, i.texcoord, bgAcc, fgAcc); } UNITY_BRANCH if (sampleCount >= 71) { + AccumSample(43, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(44, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(45, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(46, samp0, i.texcoord, bgAcc, fgAcc); @@ -435,7 +405,6 @@ half4 FragBlurDynamic(VaryingsDefault i) : SV_Target AccumSample(68, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(69, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(70, samp0, i.texcoord, bgAcc, fgAcc); - AccumSample(71, samp0, i.texcoord, bgAcc, fgAcc); } // Get the weighted average. @@ -456,82 +425,6 @@ half4 FragBlurDynamic(VaryingsDefault i) : SV_Target return half4(rgb, alpha); } -struct Attributes -{ - uint vertexID : SV_VertexID; - uint instanceID : SV_InstanceID; -}; - -uint2 UnpackTileID(uint tileID) -{ - return uint2(tileID & 0xFFFF, (tileID >> 16) & 0xFFFF); -} - -// 0 - 0,1 -// 1 - 0,0 -// 2 - 1,0 -// 3 - 1,1 -float4 GetQuadVertexPosition(uint vertexID, float z = UNITY_NEAR_CLIP_VALUE) -{ - uint topBit = vertexID >> 1; - uint botBit = (vertexID & 1); - float x = topBit; - float y = 1 - (topBit + botBit) & 1; // produces 1 for indices 0,3 and 0 for 1,2 - float4 pos = float4(x, y, z, 1.0); - return pos; -} - -VaryingsDefault VertexTiling(Attributes input) -{ - uint2 tileCoord = uint2(input.instanceID % (uint)_CoCTileXCount, input.instanceID / (uint)_CoCTileXCount); // (kc) stereo mode? - // normalized value in range [0, 1] - half maxCoC = LOAD_TEXTURE2D(_MaxCoCTex, _MaxCoCTex_TexelSize, tileCoord).x; - - bool shouldDiscard; - - UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[0]) - shouldDiscard = _CoCRingCount != 0; - // margin adjustment later in the shader code artifically expand bokeh by 4px in fullscreen units (1 extra ring), we cannot have small bokeh as a result! - else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[1]) - shouldDiscard = _CoCRingCount != 1+1; - else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[2]) - shouldDiscard = _CoCRingCount != 2+1; - else UNITY_BRANCH if (maxCoC < _CoCKernelLimitsA[3]) - shouldDiscard = _CoCRingCount != 3+1; - else - shouldDiscard = _CoCRingCount != 4; - - VaryingsDefault output; - - [branch] if (shouldDiscard) - { - output.vertex = float4(-2, -2, -2, 1); - output.texcoord = 0.0.xx; - output.texcoordStereo = 0.0.xx; -#if STEREO_INSTANCING_ENABLED - output.stereoTargetEyeIndex = 0; -#endif - return output; - } - - // This handles both "real quad" and "2 triangles" cases: remaps {0, 1, 2, 3, 4, 5} into {0, 1, 2, 3, 0, 2}. - uint quadIndex = (input.vertexID & 0x03) + (input.vertexID >> 2) * (input.vertexID & 0x01); - float2 pp = GetQuadVertexPosition(quadIndex).xy; - uint2 pixelCoord = tileCoord * uint2(_CoCTilePixelWidth, _CoCTilePixelHeight); - pixelCoord += uint2(pp.xy * uint2(_CoCTilePixelWidth, _CoCTilePixelHeight)); - pixelCoord.y = _CoCScreen.y - pixelCoord.y; - float2 clipCoord = (pixelCoord * _CoCScreen.zw) * 2.0 - 1.0; - - output.vertex = float4(clipCoord, 0, 1); - output.texcoord = clipCoord * 0.5 + 0.5; - #if UNITY_UV_STARTS_AT_TOP - output.texcoord = output.texcoord * float2(1.0, -1.0) + float2(0.0, 1.0); - #endif - output.texcoordStereo = TransformStereoScreenSpaceTex(output.texcoord, 1.0); - - return output; -} - // Postfilter blur half4 FragPostBlur(VaryingsDefault i) : SV_Target { diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader index e31838aff22..6f135cf7378 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader @@ -58,7 +58,7 @@ Shader "Hidden/PostProcessing/DepthOfField" Pass // 4 { - Name "Extend MaxCoC" + Name "Neighbor MaxCoC" HLSLPROGRAM #pragma target 3.5 @@ -87,52 +87,13 @@ Shader "Hidden/PostProcessing/DepthOfField" HLSLPROGRAM #pragma target 3.5 #pragma vertex VertDefault - #pragma fragment FragBlur + #pragma fragment FragBlurSmallBokeh #define KERNEL_SMALL #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL } Pass // 7 - { - Name "Bokeh Filter (medium)" - - HLSLPROGRAM - #pragma target 3.5 - #pragma vertex VertDefault - #pragma fragment FragBlur - #define KERNEL_MEDIUM - #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" - ENDHLSL - } - - Pass // 8 - { - Name "Bokeh Filter (large)" - - HLSLPROGRAM - #pragma target 3.5 - #pragma vertex VertDefault - #pragma fragment FragBlur - #define KERNEL_LARGE - #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" - ENDHLSL - } - - Pass // 9 - { - Name "Bokeh Filter (very large)" - - HLSLPROGRAM - #pragma target 3.5 - #pragma vertex VertDefault - #pragma fragment FragBlur - #define KERNEL_VERYLARGE - #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" - ENDHLSL - } - - Pass // 10 { Name "Bokeh Filter (dynamic)" @@ -145,59 +106,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 11 - { - Name "Bokeh Filter (1 ring)" - - HLSLPROGRAM - #pragma target 3.5 - #pragma vertex VertexTiling - #pragma fragment FragBlur - #define KERNEL_UNIFIED 1 - #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" - ENDHLSL - } - - Pass // 12 - { - Name "Bokeh Filter (2 rings)" - - HLSLPROGRAM - #pragma target 3.5 - #pragma vertex VertexTiling - #pragma fragment FragBlur - #define KERNEL_UNIFIED 2 - #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" - ENDHLSL - } - - Pass // 13 - { - Name "Bokeh Filter (3 rings)" - - HLSLPROGRAM - #pragma target 3.5 - #pragma vertex VertexTiling - #pragma fragment FragBlur - #define KERNEL_UNIFIED 3 - #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" - ENDHLSL - } - - Pass // 14 - { - Name "Bokeh Filter (4 rings)" - - HLSLPROGRAM - #pragma target 3.5 - #pragma vertex VertexTiling - #pragma fragment FragBlur - #define KERNEL_UNIFIED 4 - #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" - ENDHLSL - } - - Pass // 15 + Pass // 8 { Name "Postfilter" @@ -209,7 +118,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 16 + Pass // 9 { Name "Combine" @@ -221,7 +130,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 17 + Pass // 10 { Name "Debug Overlay" @@ -266,69 +175,80 @@ Shader "Hidden/PostProcessing/DepthOfField" Pass // 2 { - Name "Downsample and Prefilter" + Name "Downsample initial MaxCoC" HLSLPROGRAM #pragma target 3.5 #pragma vertex VertDefault - #pragma fragment FragPrefilter + #pragma fragment FragDownsampleCoC + #define INITIAL_COC #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL } Pass // 3 { - Name "Bokeh Filter (small)" + Name "Downsample MaxCoC" HLSLPROGRAM #pragma target 3.5 - #pragma vertex VertDefault - #pragma fragment FragBlur - #define KERNEL_SMALL + #pragma vertex VertDownsampleCoC + #pragma fragment FragDownsampleCoC #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL } Pass // 4 { - Name "Bokeh Filter (medium)" + Name "Neighbor MaxCoC" HLSLPROGRAM #pragma target 3.5 #pragma vertex VertDefault - #pragma fragment FragBlur - #define KERNEL_MEDIUM + #pragma fragment FragExtendCoC #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL } Pass // 5 { - Name "Bokeh Filter (large)" + Name "Downsample and Prefilter" HLSLPROGRAM #pragma target 3.5 #pragma vertex VertDefault - #pragma fragment FragBlur - #define KERNEL_LARGE + #pragma fragment FragPrefilter #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL } Pass // 6 { - Name "Bokeh Filter (very large)" + Name "Bokeh Filter (small)" HLSLPROGRAM #pragma target 3.5 #pragma vertex VertDefault - #pragma fragment FragBlur - #define KERNEL_VERYLARGE + #pragma fragment FragBlurSmallBokeh + #define KERNEL_SMALL #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL } Pass // 7 + { + Name "Bokeh Filter (dynamic)" + + HLSLPROGRAM + #pragma target 3.5 + #pragma vertex VertDefault + #pragma fragment FragBlurDynamic + #define KERNEL_UNIFIED 4 + #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" + ENDHLSL + } + + Pass // 8 { Name "Postfilter" @@ -340,7 +260,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 8 + Pass // 9 { Name "Combine" @@ -352,7 +272,7 @@ Shader "Hidden/PostProcessing/DepthOfField" ENDHLSL } - Pass // 9 + Pass // 10 { Name "Debug Overlay" diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl index 6956c3d9eb8..f4e239d1d6d 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl @@ -1,14 +1,6 @@ #ifndef UNITY_POSTFX_DISK_KERNELS #define UNITY_POSTFX_DISK_KERNELS -#if !defined(KERNEL_SMALL) && !defined(KERNEL_MEDIUM) && \ - !defined(KERNEL_LARGE) && !defined(KERNEL_VERYLARGE) - -static const int kSampleCount = 1; -static const float2 kDiskKernel[1] = { float2(0, 0) }; - -#endif - #if defined(KERNEL_SMALL) // rings = 2 @@ -35,330 +27,87 @@ static const float2 kDiskKernel[kSampleCount] = { #endif -#if defined(KERNEL_MEDIUM) - -// rings = 3 -// points per ring = 7 -static const int kSampleCount = 22; -static const float2 kDiskKernel[kSampleCount] = { - float2(0,0), - float2(0.53333336,0), - float2(0.3325279,0.4169768), - float2(-0.11867785,0.5199616), - float2(-0.48051673,0.2314047), - float2(-0.48051673,-0.23140468), - float2(-0.11867763,-0.51996166), - float2(0.33252785,-0.4169769), - float2(1,0), - float2(0.90096885,0.43388376), - float2(0.6234898,0.7818315), - float2(0.22252098,0.9749279), - float2(-0.22252095,0.9749279), - float2(-0.62349,0.7818314), - float2(-0.90096885,0.43388382), - float2(-1,0), - float2(-0.90096885,-0.43388376), - float2(-0.6234896,-0.7818316), - float2(-0.22252055,-0.974928), - float2(0.2225215,-0.9749278), - float2(0.6234897,-0.7818316), - float2(0.90096885,-0.43388376), -}; - -#endif - -#if defined(KERNEL_LARGE) - -// rings = 4 -// points per ring = 7 -static const int kSampleCount = 43; -static const float2 kDiskKernel[kSampleCount] = { - float2(0,0), - float2(0.36363637,0), - float2(0.22672357,0.28430238), - float2(-0.08091671,0.35451925), - float2(-0.32762504,0.15777594), - float2(-0.32762504,-0.15777591), - float2(-0.08091656,-0.35451928), - float2(0.22672352,-0.2843024), - float2(0.6818182,0), - float2(0.614297,0.29582983), - float2(0.42510667,0.5330669), - float2(0.15171885,0.6647236), - float2(-0.15171883,0.6647236), - float2(-0.4251068,0.53306687), - float2(-0.614297,0.29582986), - float2(-0.6818182,0), - float2(-0.614297,-0.29582983), - float2(-0.42510656,-0.53306705), - float2(-0.15171856,-0.66472363), - float2(0.1517192,-0.6647235), - float2(0.4251066,-0.53306705), - float2(0.614297,-0.29582983), - float2(1,0), - float2(0.9555728,0.2947552), - float2(0.82623875,0.5633201), - float2(0.6234898,0.7818315), - float2(0.36534098,0.93087375), - float2(0.07473,0.9972038), - float2(-0.22252095,0.9749279), - float2(-0.50000006,0.8660254), - float2(-0.73305196,0.6801727), - float2(-0.90096885,0.43388382), - float2(-0.98883086,0.14904208), - float2(-0.9888308,-0.14904249), - float2(-0.90096885,-0.43388376), - float2(-0.73305184,-0.6801728), - float2(-0.4999999,-0.86602545), - float2(-0.222521,-0.9749279), - float2(0.07473029,-0.99720377), - float2(0.36534148,-0.9308736), - float2(0.6234897,-0.7818316), - float2(0.8262388,-0.56332), - float2(0.9555729,-0.29475483), -}; - -#endif - -#if defined(KERNEL_VERYLARGE) - -// rings = 5 -// points per ring = 7 -static const int kSampleCount = 71; -static const float2 kDiskKernel[kSampleCount] = { - float2(0,0), - float2(0.2758621,0), - float2(0.1719972,0.21567768), - float2(-0.061385095,0.26894566), - float2(-0.24854316,0.1196921), - float2(-0.24854316,-0.11969208), - float2(-0.061384983,-0.2689457), - float2(0.17199717,-0.21567771), - float2(0.51724136,0), - float2(0.46601835,0.22442262), - float2(0.32249472,0.40439558), - float2(0.11509705,0.50427306), - float2(-0.11509704,0.50427306), - float2(-0.3224948,0.40439552), - float2(-0.46601835,0.22442265), - float2(-0.51724136,0), - float2(-0.46601835,-0.22442262), - float2(-0.32249463,-0.40439564), - float2(-0.11509683,-0.5042731), - float2(0.11509732,-0.504273), - float2(0.32249466,-0.40439564), - float2(0.46601835,-0.22442262), - float2(0.7586207,0), - float2(0.7249173,0.22360738), - float2(0.6268018,0.4273463), - float2(0.47299224,0.59311354), - float2(0.27715522,0.7061801), - float2(0.056691725,0.75649947), - float2(-0.168809,0.7396005), - float2(-0.3793104,0.65698475), - float2(-0.55610836,0.51599306), - float2(-0.6834936,0.32915324), - float2(-0.7501475,0.113066405), - float2(-0.7501475,-0.11306671), - float2(-0.6834936,-0.32915318), - float2(-0.5561083,-0.5159932), - float2(-0.37931028,-0.6569848), - float2(-0.16880904,-0.7396005), - float2(0.056691945,-0.7564994), - float2(0.2771556,-0.7061799), - float2(0.47299215,-0.59311366), - float2(0.62680185,-0.4273462), - float2(0.72491735,-0.22360711), - float2(1,0), - float2(0.9749279,0.22252093), - float2(0.90096885,0.43388376), - float2(0.7818315,0.6234898), - float2(0.6234898,0.7818315), - float2(0.43388364,0.9009689), - float2(0.22252098,0.9749279), - float2(0,1), - float2(-0.22252095,0.9749279), - float2(-0.43388385,0.90096885), - float2(-0.62349,0.7818314), - float2(-0.7818317,0.62348956), - float2(-0.90096885,0.43388382), - float2(-0.9749279,0.22252093), - float2(-1,0), - float2(-0.9749279,-0.22252087), - float2(-0.90096885,-0.43388376), - float2(-0.7818314,-0.6234899), - float2(-0.6234896,-0.7818316), - float2(-0.43388346,-0.900969), - float2(-0.22252055,-0.974928), - float2(0,-1), - float2(0.2225215,-0.9749278), - float2(0.4338835,-0.90096897), - float2(0.6234897,-0.7818316), - float2(0.78183144,-0.62348986), - float2(0.90096885,-0.43388376), - float2(0.9749279,-0.22252086), -}; - -#endif -static const int kDiskAllKernelSizes[7] = { 1, 8, 22, 43, 71, 106, 148 }; -static const half kDiskAllKernelRcpSizes[7] = { 1, 1.0/8, 1.0/22, 1.0/43, 1.0/71, 1.0/106, 1.0/148 }; -static const half3 kDiskAllKernels[148] = { - half3(0, 0, 0), - // ring 1 index=1 - half3(0.186046511627907, 0, 0.186046511627907), - half3(0.115998102671392, 0.145457019994052, 0.186046511627907), - half3(-0.0413992435267562, 0.181381937150107, 0.186046511627907), - half3(-0.16762211495859, 0.0807225561148946, 0.186046511627907), - half3(-0.16762211495859, -0.0807225561148945, 0.186046511627907), - half3(-0.0413992435267562, -0.181381937150107, 0.186046511627907), - half3(0.115998102671392, -0.145457019994052, 0.186046511627907), - // ring 2 index=8 - half3(0.348837209302326, 0, 0.348837209302326), - half3(0.314291465547356, 0.151354792715427, 0.348837209302326), - half3(0.217496442508861, 0.272731912488848, 0.348837209302326), - half3(0.0776235816126678, 0.34009113215645, 0.348837209302326), - half3(-0.0776235816126678, 0.34009113215645, 0.348837209302326), - half3(-0.217496442508861, 0.272731912488848, 0.348837209302326), - half3(-0.314291465547355, 0.151354792715427, 0.348837209302326), - half3(-0.348837209302326, 4.27202371795588E-17, 0.348837209302326), - half3(-0.314291465547356, -0.151354792715427, 0.348837209302326), - half3(-0.217496442508861, -0.272731912488848, 0.348837209302326), - half3(-0.0776235816126679, -0.34009113215645, 0.348837209302326), - half3(0.0776235816126674, -0.34009113215645, 0.348837209302326), - half3(0.21749644250886, -0.272731912488848, 0.348837209302326), - half3(0.314291465547356, -0.151354792715427, 0.348837209302326), - // ring 3 index=22 - half3(0.511627906976744, 0, 0.511627906976744), - half3(0.488897714588258, 0.150804972954416, 0.511627906976744), - half3(0.422726814766323, 0.288210262265109, 0.511627906976744), - half3(0.318994782346329, 0.400006804983643, 0.511627906976744), - half3(0.186918663629318, 0.47626098767843, 0.511627906976744), - half3(0.0382340013697985, 0.510197291581069, 0.511627906976744), - half3(-0.113847919698579, 0.498800327162793, 0.511627906976744), - half3(-0.255813953488372, 0.443082764726922, 0.511627906976744), - half3(-0.375049794889679, 0.347995354208377, 0.511627906976744), - half3(-0.460960816136121, 0.22198702931596, 0.511627906976744), - half3(-0.505913445975647, 0.0762541826947871, 0.511627906976744), - half3(-0.505913445975647, -0.0762541826947867, 0.511627906976744), - half3(-0.460960816136121, -0.22198702931596, 0.511627906976744), - half3(-0.375049794889679, -0.347995354208377, 0.511627906976744), - half3(-0.255813953488372, -0.443082764726922, 0.511627906976744), - half3(-0.11384791969858, -0.498800327162793, 0.511627906976744), - half3(0.0382340013697985, -0.510197291581069, 0.511627906976744), - half3(0.186918663629319, -0.47626098767843, 0.511627906976744), - half3(0.318994782346329, -0.400006804983643, 0.511627906976744), - half3(0.422726814766323, -0.288210262265109, 0.511627906976744), - half3(0.488897714588258, -0.150804972954416, 0.511627906976744), - // ring 4 index=43 - half3(0.674418604651163, 0, 0.674418604651163), - half3(0.657509522169137, 0.150072257784491, 0.674418604651163), - half3(0.607630166724887, 0.292619265916493, 0.674418604651163), - half3(0.527281697478439, 0.420493122183797, 0.674418604651163), - half3(0.420493122183797, 0.527281697478439, 0.674418604651163), - half3(0.292619265916493, 0.607630166724887, 0.674418604651163), - half3(0.150072257784491, 0.657509522169137, 0.674418604651163), - half3(4.12962292735735E-17, 0.674418604651163, 0.674418604651163), - half3(-0.150072257784491, 0.657509522169137, 0.674418604651163), - half3(-0.292619265916493, 0.607630166724887, 0.674418604651163), - half3(-0.420493122183797, 0.527281697478439, 0.674418604651163), - half3(-0.527281697478438, 0.420493122183797, 0.674418604651163), - half3(-0.607630166724887, 0.292619265916493, 0.674418604651163), - half3(-0.657509522169137, 0.150072257784491, 0.674418604651163), - half3(-0.674418604651163, 8.25924585471471E-17, 0.674418604651163), - half3(-0.657509522169137, -0.150072257784491, 0.674418604651163), - half3(-0.607630166724887, -0.292619265916493, 0.674418604651163), - half3(-0.527281697478439, -0.420493122183797, 0.674418604651163), - half3(-0.420493122183797, -0.527281697478439, 0.674418604651163), - half3(-0.292619265916493, -0.607630166724887, 0.674418604651163), - half3(-0.150072257784491, -0.657509522169137, 0.674418604651163), - half3(-1.23888687820721E-16, -0.674418604651163, 0.674418604651163), - half3(0.15007225778449, -0.657509522169137, 0.674418604651163), - half3(0.292619265916493, -0.607630166724887, 0.674418604651163), - half3(0.420493122183797, -0.527281697478439, 0.674418604651163), - half3(0.527281697478439, -0.420493122183797, 0.674418604651163), - half3(0.607630166724887, -0.292619265916492, 0.674418604651163), - half3(0.657509522169137, -0.150072257784491, 0.674418604651163), - // ring 5 index=71 - half3(0.837209302325581, 0, 0.837209302325581), - half3(0.823755004408155, 0.149489493319789, 0.837209302325581), - half3(0.783824542861175, 0.294174271323915, 0.837209302325581), - half3(0.718701315573655, 0.429404046200294, 0.837209302325581), - half3(0.630478436654186, 0.550832421716969, 0.837209302325581), - half3(0.521991462021265, 0.654556589973234, 0.837209302325581), - half3(0.396727252302976, 0.737242770856804, 0.837209302325581), - half3(0.258711902267398, 0.796233362479663, 0.837209302325581), - half3(0.112381338824084, 0.829632358689434, 0.837209302325581), - half3(-0.0375612533167101, 0.836366288267147, 0.837209302325581), - half3(-0.186296595870403, 0.81621871717548, 0.837209302325581), - half3(-0.329044212547471, 0.769837204926796, 0.837209302325581), - half3(-0.461216077494783, 0.698712491487602, 0.837209302325581), - half3(-0.578564078221561, 0.605130583669444, 0.837209302325581), - half3(-0.677316553430188, 0.492099280989047, 0.837209302325581), - half3(-0.754299517313653, 0.363251502517026, 0.837209302325581), - half3(-0.807038674070947, 0.222728521869775, 0.837209302325581), - half3(-0.833838943809968, 0.0750468632679909, 0.837209302325581), - half3(-0.833838943809968, -0.0750468632679907, 0.837209302325581), - half3(-0.807038674070947, -0.222728521869774, 0.837209302325581), - half3(-0.754299517313653, -0.363251502517025, 0.837209302325581), - half3(-0.677316553430189, -0.492099280989047, 0.837209302325581), - half3(-0.578564078221562, -0.605130583669444, 0.837209302325581), - half3(-0.461216077494784, -0.698712491487602, 0.837209302325581), - half3(-0.329044212547471, -0.769837204926796, 0.837209302325581), - half3(-0.186296595870403, -0.81621871717548, 0.837209302325582), - half3(-0.0375612533167103, -0.836366288267147, 0.837209302325581), - half3(0.112381338824084, -0.829632358689434, 0.837209302325581), - half3(0.258711902267398, -0.796233362479664, 0.837209302325581), - half3(0.396727252302976, -0.737242770856804, 0.837209302325581), - half3(0.521991462021265, -0.654556589973234, 0.837209302325581), - half3(0.630478436654186, -0.550832421716969, 0.837209302325581), - half3(0.718701315573655, -0.429404046200294, 0.837209302325581), - half3(0.783824542861175, -0.294174271323915, 0.837209302325581), - half3(0.823755004408155, -0.149489493319789, 0.837209302325581), - // ring 6 index=106 - half3(1, 0, 1), - half3(0.988830826225129, 0.149042266176174, 1), - half3(0.955572805786141, 0.294755174410904, 1), - half3(0.900968867902419, 0.433883739117558, 1), - half3(0.826238774315995, 0.563320058063622, 1), - half3(0.733051871829826, 0.680172737770919, 1), - half3(0.623489801858734, 0.78183148246803, 1), - half3(0.5, 0.866025403784439, 1), - half3(0.365341024366395, 0.930873748644204, 1), - half3(0.222520933956314, 0.974927912181824, 1), - half3(0.0747300935864244, 0.99720379718118, 1), - half3(-0.074730093586424, 0.99720379718118, 1), - half3(-0.222520933956314, 0.974927912181824, 1), - half3(-0.365341024366395, 0.930873748644204, 1), - half3(-0.5, 0.866025403784439, 1), - half3(-0.623489801858733, 0.78183148246803, 1), - half3(-0.733051871829826, 0.680172737770919, 1), - half3(-0.826238774315995, 0.563320058063622, 1), - half3(-0.900968867902419, 0.433883739117558, 1), - half3(-0.955572805786141, 0.294755174410905, 1), - half3(-0.988830826225129, 0.149042266176175, 1), - half3(-1, 1.22464679914735E-16, 1), - half3(-0.988830826225129, -0.149042266176174, 1), - half3(-0.955572805786141, -0.294755174410904, 1), - half3(-0.900968867902419, -0.433883739117558, 1), - half3(-0.826238774315995, -0.563320058063622, 1), - half3(-0.733051871829826, -0.680172737770919, 1), - half3(-0.623489801858734, -0.78183148246803, 1), - half3(-0.5, -0.866025403784438, 1), - half3(-0.365341024366395, -0.930873748644204, 1), - half3(-0.222520933956315, -0.974927912181824, 1), - half3(-0.0747300935864247, -0.99720379718118, 1), - half3(0.0747300935864244, -0.99720379718118, 1), - half3(0.222520933956314, -0.974927912181824, 1), - half3(0.365341024366395, -0.930873748644204, 1), - half3(0.499999999999999, -0.866025403784439, 1), - half3(0.623489801858733, -0.78183148246803, 1), - half3(0.733051871829827, -0.680172737770919, 1), - half3(0.826238774315994, -0.563320058063623, 1), - half3(0.900968867902419, -0.433883739117558, 1), - half3(0.955572805786141, -0.294755174410905, 1), - half3(0.988830826225128, -0.149042266176175, 1), +static const int kDiskAllKernelSizes[5] = { 1, 8, 22, 43, 71 }; +static const half kDiskAllKernelRcpSizes[5] = { 1, 1.0/8, 1.0/22, 1.0/43, 1.0/71 }; +static const half3 kDiskAllKernels[71] = { +half3(0, 0, 0), +// ring 1 index=1 +half3(0.275862068965517, 0, 0.275862068965517), +half3(0.171997186719651, 0.215677650336008, 0.275862068965517), +half3(-0.0613850852293281, 0.26894563094671, 0.275862068965517), +half3(-0.248543135973081, 0.119692065963464, 0.275862068965517), +half3(-0.248543135973081, -0.119692065963464, 0.275862068965517), +half3(-0.0613850852293282, -0.26894563094671, 0.275862068965517), +half3(0.171997186719651, -0.215677650336008, 0.275862068965517), +// ring 2 index=8 +half3(0.517241379310345, 0, 0.517241379310345), +half3(0.466018379949527, 0.224422623681496, 0.517241379310345), +half3(0.322494725099345, 0.404395594380015, 0.517241379310345), +half3(0.11509703480499, 0.504273058025081, 0.517241379310345), +half3(-0.11509703480499, 0.504273058025081, 0.517241379310345), +half3(-0.322494725099345, 0.404395594380015, 0.517241379310345), +half3(-0.466018379949527, 0.224422623681496, 0.517241379310345), +half3(-0.517241379310345, 6.33437999558976E-17, 0.517241379310345), +half3(-0.466018379949527, -0.224422623681496, 0.517241379310345), +half3(-0.322494725099345, -0.404395594380015, 0.517241379310345), +half3(-0.11509703480499, -0.504273058025081, 0.517241379310345), +half3(0.11509703480499, -0.504273058025081, 0.517241379310345), +half3(0.322494725099345, -0.404395594380015, 0.517241379310345), +half3(0.466018379949527, -0.224422623681495, 0.517241379310345), +// ring 3 index=22 +half3(0.758620689655172, 0, 0.758620689655172), +half3(0.72491730094121, 0.223607373691031, 0.758620689655172), +half3(0.626801828791444, 0.427346250944817, 0.758620689655172), +half3(0.472992263479039, 0.593113538424023, 0.758620689655172), +half3(0.277155259864162, 0.706180085178362, 0.758620689655172), +half3(0.0566917951345288, 0.756499432344343, 0.758620689655172), +half3(-0.168808984380652, 0.739600485103452, 0.758620689655172), +half3(-0.379310344827586, 0.65698478907785, 0.758620689655172), +half3(-0.556108316560558, 0.515993111412422, 0.758620689655172), +half3(-0.683493623925973, 0.329153181399527, 0.758620689655172), +half3(-0.750147523343201, 0.113066546754339, 0.758620689655172), +half3(-0.750147523343201, -0.113066546754339, 0.758620689655172), +half3(-0.683493623925973, -0.329153181399527, 0.758620689655172), +half3(-0.556108316560558, -0.515993111412422, 0.758620689655172), +half3(-0.379310344827587, -0.65698478907785, 0.758620689655172), +half3(-0.168808984380652, -0.739600485103452, 0.758620689655172), +half3(0.0566917951345288, -0.756499432344343, 0.758620689655172), +half3(0.277155259864162, -0.706180085178362, 0.758620689655172), +half3(0.472992263479039, -0.593113538424023, 0.758620689655172), +half3(0.626801828791444, -0.427346250944817, 0.758620689655172), +half3(0.72491730094121, -0.223607373691031, 0.758620689655172), +// ring 4 index=43 +half3(1, 0, 1), +half3(0.974927912181824, 0.222520933956314, 1), +half3(0.900968867902419, 0.433883739117558, 1), +half3(0.78183148246803, 0.623489801858733, 1), +half3(0.623489801858734, 0.78183148246803, 1), +half3(0.433883739117558, 0.900968867902419, 1), +half3(0.222520933956314, 0.974927912181824, 1), +half3(6.12323399573677E-17, 1, 1), +half3(-0.222520933956314, 0.974927912181824, 1), +half3(-0.433883739117558, 0.900968867902419, 1), +half3(-0.623489801858733, 0.78183148246803, 1), +half3(-0.781831482468029, 0.623489801858734, 1), +half3(-0.900968867902419, 0.433883739117558, 1), +half3(-0.974927912181824, 0.222520933956314, 1), +half3(-1, 1.22464679914735E-16, 1), +half3(-0.974927912181824, -0.222520933956314, 1), +half3(-0.900968867902419, -0.433883739117558, 1), +half3(-0.78183148246803, -0.623489801858734, 1), +half3(-0.623489801858734, -0.78183148246803, 1), +half3(-0.433883739117558, -0.900968867902419, 1), +half3(-0.222520933956315, -0.974927912181824, 1), +half3(-1.83697019872103E-16, -1, 1), +half3(0.222520933956313, -0.974927912181824, 1), +half3(0.433883739117558, -0.900968867902419, 1), +half3(0.623489801858733, -0.78183148246803, 1), +half3(0.78183148246803, -0.623489801858734, 1), +half3(0.900968867902419, -0.433883739117558, 1), +half3(0.974927912181824, -0.222520933956315, 1), +// totalSampleCount=71 }; #endif // UNITY_POSTFX_DISK_KERNELS From bafc28d5445c9076036d44a78bf5fc8bb94514a0 Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Wed, 17 Apr 2024 23:49:35 -0400 Subject: [PATCH 07/13] Removed unecessary code. --- .../Runtime/Effects/DepthOfField.cs | 2 +- .../Runtime/Utils/RuntimeUtilities.cs | 28 ------------------- .../PostProcessing/Shaders/API/D3D11.hlsl | 1 - .../PostProcessing/Shaders/API/D3D12.hlsl | 1 - .../PostProcessing/Shaders/API/Metal.hlsl | 1 - .../PostProcessing/Shaders/API/OpenGL.hlsl | 1 - .../PostProcessing/Shaders/API/PSP2.hlsl | 1 - .../PostProcessing/Shaders/API/PSSL.hlsl | 1 - .../PostProcessing/Shaders/API/Switch.hlsl | 1 - .../PostProcessing/Shaders/API/Vulkan.hlsl | 1 - .../PostProcessing/Shaders/API/WebGPU.hlsl | 1 - .../PostProcessing/Shaders/API/XboxOne.hlsl | 1 - 12 files changed, 1 insertion(+), 39 deletions(-) diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs index b8b9f019ba7..b06dc4b8d04 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs @@ -174,7 +174,7 @@ RenderTexture CheckHistory(int eye, int id, PostProcessRenderContext context, Re public override void Render(PostProcessRenderContext context) { - // Legacy: if KERNEL_SMALL was selected, then run different sample pattern from KERNEL_MEDIUM, KERNEL_LARGE and KERNEL_VERY_LARGE + // Legacy: if KERNEL_SMALL is selected, then run a different sample pattern from KERNEL_MEDIUM, KERNEL_LARGE and KERNEL_VERY_LARGE (no dynamic branching). bool useDynamicBokeh = settings.kernelSize.value != KernelSize.Small; // The coc is stored in alpha so we need a 4 channels target. Note that using ARGB32 diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Utils/RuntimeUtilities.cs b/com.unity.postprocessing/PostProcessing/Runtime/Utils/RuntimeUtilities.cs index 246e384abb7..459cca0306b 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Utils/RuntimeUtilities.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Utils/RuntimeUtilities.cs @@ -556,34 +556,6 @@ public static void BlitFullscreenTriangle(this CommandBuffer cmd, RenderTargetId #endif } - /// - /// Blits procedural geometry using a given material. - /// - /// The command buffer to use - /// The source render target - /// The destination render target - /// The property sheet to use - /// The pass from the material to use - /// The number of instances to render - /// Should the destination target be cleared? - /// An optional viewport to consider for the blit - /// Should the depth buffer be preserved? - public static void BlitProcedural(this CommandBuffer cmd, RenderTargetIdentifier source, RenderTargetIdentifier destination, PropertySheet propertySheet, int pass, int vertexCount, int instanceCount, bool clear = false, Rect? viewport = null, bool preserveDepth = false) - { - cmd.SetGlobalTexture(ShaderIDs.MainTex, source); - var loadAction = viewport == null ? LoadAction.DontCare : LoadAction.Load; - cmd.SetRenderTargetWithLoadStoreAction(destination, loadAction, StoreAction.Store, preserveDepth ? LoadAction.Load : loadAction, StoreAction.Store); - - if (viewport != null) - cmd.SetViewport(viewport.Value); - - if (clear) - cmd.ClearRenderTarget(true, true, Color.clear); - - // TODO: detect which platforms support quads - cmd.DrawProcedural(Matrix4x4.identity, propertySheet.material, pass, MeshTopology.Triangles, vertexCount, instanceCount, propertySheet.properties); - } - /// /// Blits a fullscreen triangle from a double-wide source. /// diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D11.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D11.hlsl index bcf2eb91f4c..1982bcb3616 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D11.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D11.hlsl @@ -1,6 +1,5 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 -#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D12.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D12.hlsl index bcf2eb91f4c..1982bcb3616 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D12.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D12.hlsl @@ -1,6 +1,5 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 -#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/Metal.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/Metal.hlsl index 5ac8a00ed59..50916982d13 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/Metal.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/Metal.hlsl @@ -1,6 +1,5 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 -#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED 0 // Currently broken on Metal for some reason (May 2017) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/OpenGL.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/OpenGL.hlsl index 0eb657ff46e..9c7cd11f11d 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/OpenGL.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/OpenGL.hlsl @@ -1,7 +1,6 @@ // For now OpenGL is considered at GLES2 level #define UNITY_UV_STARTS_AT_TOP 0 #define UNITY_REVERSED_Z 0 -#define UNITY_NEAR_CLIP_VALUE (-1.0) #define UNITY_GATHER_SUPPORTED 0 #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/PSP2.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/PSP2.hlsl index 25b3356f58f..dcb0c7e37c7 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/PSP2.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/PSP2.hlsl @@ -1,7 +1,6 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 0 -#define UNITY_NEAR_CLIP_VALUE (0.0) #define UNITY_GATHER_SUPPORTED 0 #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 0 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/PSSL.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/PSSL.hlsl index bcf2eb91f4c..1982bcb3616 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/PSSL.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/PSSL.hlsl @@ -1,6 +1,5 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 -#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/Switch.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/Switch.hlsl index 046df14d7a6..bf2d2333360 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/Switch.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/Switch.hlsl @@ -1,6 +1,5 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 -#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define TEXTURE2D_SAMPLER2D(textureName, samplerName) Texture2D textureName; SamplerState samplerName diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/Vulkan.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/Vulkan.hlsl index bcf2eb91f4c..1982bcb3616 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/Vulkan.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/Vulkan.hlsl @@ -1,6 +1,5 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 -#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/WebGPU.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/WebGPU.hlsl index bcf2eb91f4c..1982bcb3616 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/WebGPU.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/WebGPU.hlsl @@ -1,6 +1,5 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 -#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/XboxOne.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/XboxOne.hlsl index bcf2eb91f4c..1982bcb3616 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/XboxOne.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/XboxOne.hlsl @@ -1,6 +1,5 @@ #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 1 -#define UNITY_NEAR_CLIP_VALUE (1.0) #define UNITY_GATHER_SUPPORTED (SHADER_TARGET >= 50) #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 From e44d18b95445830aa47f1c522f443ef693e374cc Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Wed, 17 Apr 2024 23:52:18 -0400 Subject: [PATCH 08/13] Renaming. --- .../PostProcessing/Shaders/Builtins/DepthOfField.hlsl | 2 +- .../PostProcessing/Shaders/Builtins/DepthOfField.shader | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl index d644e17b12b..bc177d74841 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl @@ -197,7 +197,7 @@ half4 FragDownsampleCoC(VaryingsDefault i) : SV_Target return half4(maxCoC, 0.0, 0.0, 0.0); } -half4 FragExtendCoC(VaryingsDefault i) : SV_Target +half4 FragNeighborCoC(VaryingsDefault i) : SV_Target { float tx = _MainTex_TexelSize.x; float ty = _MainTex_TexelSize.y; diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader index 6f135cf7378..a0984cfde2c 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader @@ -63,7 +63,7 @@ Shader "Hidden/PostProcessing/DepthOfField" HLSLPROGRAM #pragma target 3.5 #pragma vertex VertDefault - #pragma fragment FragExtendCoC + #pragma fragment FragNeighborCoC #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL } From 158887353988cd04fbbbcc4c4bf9aa2c56c83afe Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Thu, 18 Apr 2024 21:50:25 -0400 Subject: [PATCH 09/13] More clean up. --- .../Runtime/Effects/DepthOfField.cs | 5 +- .../PostProcessing/Shaders/API/D3D9.hlsl | 1 - .../Shaders/Builtins/DepthOfField.hlsl | 9 +- .../Shaders/Builtins/DepthOfField.shader | 4 +- .../Shaders/Builtins/DiskKernels.hlsl | 204 +++++++++--------- 5 files changed, 113 insertions(+), 110 deletions(-) diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs index b06dc4b8d04..06d2292289a 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs @@ -174,7 +174,7 @@ RenderTexture CheckHistory(int eye, int id, PostProcessRenderContext context, Re public override void Render(PostProcessRenderContext context) { - // Legacy: if KERNEL_SMALL is selected, then run a different sample pattern from KERNEL_MEDIUM, KERNEL_LARGE and KERNEL_VERY_LARGE (no dynamic branching). + // Legacy: if KERNEL_SMALL is selected, then run a coarser fixed sample pattern (no dynamic branching). bool useDynamicBokeh = settings.kernelSize.value != KernelSize.Small; // The coc is stored in alpha so we need a 4 channels target. Note that using ARGB32 @@ -200,7 +200,7 @@ public override void Render(PostProcessRenderContext context) CalculateCoCKernelLimits(context.screenHeight, out cocKernelLimits); cocKernelLimits /= maxCoC; - // The samples coordinates in kDiskAllKernels in the shader code are normalized to 4 rings (coordinates with length 1 lies on the 4th ring). + // The samples coordinates for kDiskAllKernels in DiskKernels.hlsl are normalized to 4 rings (coordinates with length 1 lie on the 4th ring). // The ring placement are not uniform but: // 1st ring: 8/29 // 2nd ring: 15/29 @@ -255,6 +255,7 @@ public override void Render(PostProcessRenderContext context) cmd.SetGlobalTexture(ShaderIDs.CoCTex, historyWrite); } + // Generate a low-res maxCoC texture later used to infer how many samples are needed around any pixels to generate the bokeh effect. if (useDynamicBokeh) { // Downsample MaxCoC. diff --git a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D9.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D9.hlsl index 0bb74573a4a..8427b9c8ae3 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/API/D3D9.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/API/D3D9.hlsl @@ -1,7 +1,6 @@ // ALso used for Direct3D 11 "feature level 9.x" target for Windows Store and Windows Phone #define UNITY_UV_STARTS_AT_TOP 1 #define UNITY_REVERSED_Z 0 -#define UNITY_NEAR_CLIP_VALUE (0.0) #define UNITY_GATHER_SUPPORTED 0 #define UNITY_CAN_READ_POSITION_IN_FRAGMENT_PROGRAM 1 diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl index bc177d74841..17c0e3346a3 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl @@ -228,8 +228,13 @@ half4 FragNeighborCoC(VaryingsDefault i) : SV_Target void AccumSample(int si, half4 samp0, float2 texcoord, inout half4 bgAcc, inout half4 fgAcc) { +#if defined(KERNEL_SMALL) + half2 disp = kSmallDiskKernel[si].xy * _KernelScale.xy; + half dist = kSmallDiskKernel[si].z * _KernelScale.z; +#else half2 disp = kDiskAllKernels[si].xy * _KernelScale.xy; half dist = kDiskAllKernels[si].z * _KernelScale.z; +#endif half2 duv = disp; half4 samp = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, UnityStereoTransformScreenSpaceTex(texcoord + duv)); @@ -252,7 +257,6 @@ void AccumSample(int si, half4 samp0, float2 texcoord, inout half4 bgAcc, inout fgAcc += half4(samp.rgb, 1.0) * fgWeight; } -#if defined(KERNEL_SMALL) half4 FragBlurSmallBokeh (VaryingsDefault i) : SV_Target { half4 samp0 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); @@ -286,7 +290,7 @@ half4 FragBlurSmallBokeh (VaryingsDefault i) : SV_Target bgAcc.a = smoothstep(_MainTex_TexelSize.y, _MainTex_TexelSize.y * 2.0, samp0.a); // FG: Normalize the total of the weights. - fgAcc.a *= PI / kSampleCount; + fgAcc.a *= PI / kSmallSampleCount; // Alpha premultiplying half alpha = saturate(fgAcc.a); @@ -294,7 +298,6 @@ half4 FragBlurSmallBokeh (VaryingsDefault i) : SV_Target return half4(rgb, alpha); } -#endif // Bokeh filter with disk-shaped kernels half4 FragBlurDynamic(VaryingsDefault i) : SV_Target diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader index a0984cfde2c..6942c1d5b8a 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader @@ -101,7 +101,6 @@ Shader "Hidden/PostProcessing/DepthOfField" #pragma target 3.5 #pragma vertex VertDefault #pragma fragment FragBlurDynamic - #define KERNEL_UNIFIED 4 #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL } @@ -205,7 +204,7 @@ Shader "Hidden/PostProcessing/DepthOfField" HLSLPROGRAM #pragma target 3.5 #pragma vertex VertDefault - #pragma fragment FragExtendCoC + #pragma fragment FragNeighborCoC #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL } @@ -243,7 +242,6 @@ Shader "Hidden/PostProcessing/DepthOfField" #pragma target 3.5 #pragma vertex VertDefault #pragma fragment FragBlurDynamic - #define KERNEL_UNIFIED 4 #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL } diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl index f4e239d1d6d..32b1e24d1db 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DiskKernels.hlsl @@ -1,113 +1,115 @@ #ifndef UNITY_POSTFX_DISK_KERNELS #define UNITY_POSTFX_DISK_KERNELS -#if defined(KERNEL_SMALL) - // rings = 2 -// points per ring = 5 -static const int kSampleCount = 16; -static const float2 kDiskKernel[kSampleCount] = { - float2(0,0), - float2(0.54545456,0), - float2(0.16855472,0.5187581), - float2(-0.44128203,0.3206101), - float2(-0.44128197,-0.3206102), - float2(0.1685548,-0.5187581), - float2(1,0), - float2(0.809017,0.58778524), - float2(0.30901697,0.95105654), - float2(-0.30901703,0.9510565), - float2(-0.80901706,0.5877852), - float2(-1,0), - float2(-0.80901694,-0.58778536), - float2(-0.30901664,-0.9510566), - float2(0.30901712,-0.9510565), - float2(0.80901694,-0.5877853), +// points per ring = 5 (+5 per ring) +// 1st ring spacing: 8/15 +// 2nd ring spacing: 15/15 +static const int kSmallSampleCount = 16; +static const half3 kSmallDiskKernel[kSmallSampleCount] = { + half3(0,0,0), + half3(0.54545456,0,0.5454546), + half3(0.16855472,0.5187581,0.5454546), + half3(-0.44128203,0.3206101,0.5454546), + half3(-0.44128197,-0.3206102,0.5454546), + half3(0.1685548,-0.5187581,0.5454546), + half3(1,0,1), + half3(0.809017,0.58778524,1), + half3(0.30901697,0.95105654,1), + half3(-0.30901703,0.9510565,1), + half3(-0.80901706,0.5877852,1), + half3(-1,0,1), + half3(-0.80901694,-0.58778536,1), + half3(-0.30901664,-0.9510566,1), + half3(0.30901712,-0.9510565,1), + half3(0.80901694,-0.5877853,1), }; -#endif - - - +// rings = 4 +// points per ring = 7 (+7 per ring) +// 1st ring spacing : 8/29 +// 2nd ring spacing : 15/29 +// 3rd ring spacing : 22/29 +// 4th ring spacing : 29/29 static const int kDiskAllKernelSizes[5] = { 1, 8, 22, 43, 71 }; static const half kDiskAllKernelRcpSizes[5] = { 1, 1.0/8, 1.0/22, 1.0/43, 1.0/71 }; static const half3 kDiskAllKernels[71] = { -half3(0, 0, 0), -// ring 1 index=1 -half3(0.275862068965517, 0, 0.275862068965517), -half3(0.171997186719651, 0.215677650336008, 0.275862068965517), -half3(-0.0613850852293281, 0.26894563094671, 0.275862068965517), -half3(-0.248543135973081, 0.119692065963464, 0.275862068965517), -half3(-0.248543135973081, -0.119692065963464, 0.275862068965517), -half3(-0.0613850852293282, -0.26894563094671, 0.275862068965517), -half3(0.171997186719651, -0.215677650336008, 0.275862068965517), -// ring 2 index=8 -half3(0.517241379310345, 0, 0.517241379310345), -half3(0.466018379949527, 0.224422623681496, 0.517241379310345), -half3(0.322494725099345, 0.404395594380015, 0.517241379310345), -half3(0.11509703480499, 0.504273058025081, 0.517241379310345), -half3(-0.11509703480499, 0.504273058025081, 0.517241379310345), -half3(-0.322494725099345, 0.404395594380015, 0.517241379310345), -half3(-0.466018379949527, 0.224422623681496, 0.517241379310345), -half3(-0.517241379310345, 6.33437999558976E-17, 0.517241379310345), -half3(-0.466018379949527, -0.224422623681496, 0.517241379310345), -half3(-0.322494725099345, -0.404395594380015, 0.517241379310345), -half3(-0.11509703480499, -0.504273058025081, 0.517241379310345), -half3(0.11509703480499, -0.504273058025081, 0.517241379310345), -half3(0.322494725099345, -0.404395594380015, 0.517241379310345), -half3(0.466018379949527, -0.224422623681495, 0.517241379310345), -// ring 3 index=22 -half3(0.758620689655172, 0, 0.758620689655172), -half3(0.72491730094121, 0.223607373691031, 0.758620689655172), -half3(0.626801828791444, 0.427346250944817, 0.758620689655172), -half3(0.472992263479039, 0.593113538424023, 0.758620689655172), -half3(0.277155259864162, 0.706180085178362, 0.758620689655172), -half3(0.0566917951345288, 0.756499432344343, 0.758620689655172), -half3(-0.168808984380652, 0.739600485103452, 0.758620689655172), -half3(-0.379310344827586, 0.65698478907785, 0.758620689655172), -half3(-0.556108316560558, 0.515993111412422, 0.758620689655172), -half3(-0.683493623925973, 0.329153181399527, 0.758620689655172), -half3(-0.750147523343201, 0.113066546754339, 0.758620689655172), -half3(-0.750147523343201, -0.113066546754339, 0.758620689655172), -half3(-0.683493623925973, -0.329153181399527, 0.758620689655172), -half3(-0.556108316560558, -0.515993111412422, 0.758620689655172), -half3(-0.379310344827587, -0.65698478907785, 0.758620689655172), -half3(-0.168808984380652, -0.739600485103452, 0.758620689655172), -half3(0.0566917951345288, -0.756499432344343, 0.758620689655172), -half3(0.277155259864162, -0.706180085178362, 0.758620689655172), -half3(0.472992263479039, -0.593113538424023, 0.758620689655172), -half3(0.626801828791444, -0.427346250944817, 0.758620689655172), -half3(0.72491730094121, -0.223607373691031, 0.758620689655172), -// ring 4 index=43 -half3(1, 0, 1), -half3(0.974927912181824, 0.222520933956314, 1), -half3(0.900968867902419, 0.433883739117558, 1), -half3(0.78183148246803, 0.623489801858733, 1), -half3(0.623489801858734, 0.78183148246803, 1), -half3(0.433883739117558, 0.900968867902419, 1), -half3(0.222520933956314, 0.974927912181824, 1), -half3(6.12323399573677E-17, 1, 1), -half3(-0.222520933956314, 0.974927912181824, 1), -half3(-0.433883739117558, 0.900968867902419, 1), -half3(-0.623489801858733, 0.78183148246803, 1), -half3(-0.781831482468029, 0.623489801858734, 1), -half3(-0.900968867902419, 0.433883739117558, 1), -half3(-0.974927912181824, 0.222520933956314, 1), -half3(-1, 1.22464679914735E-16, 1), -half3(-0.974927912181824, -0.222520933956314, 1), -half3(-0.900968867902419, -0.433883739117558, 1), -half3(-0.78183148246803, -0.623489801858734, 1), -half3(-0.623489801858734, -0.78183148246803, 1), -half3(-0.433883739117558, -0.900968867902419, 1), -half3(-0.222520933956315, -0.974927912181824, 1), -half3(-1.83697019872103E-16, -1, 1), -half3(0.222520933956313, -0.974927912181824, 1), -half3(0.433883739117558, -0.900968867902419, 1), -half3(0.623489801858733, -0.78183148246803, 1), -half3(0.78183148246803, -0.623489801858734, 1), -half3(0.900968867902419, -0.433883739117558, 1), -half3(0.974927912181824, -0.222520933956315, 1), -// totalSampleCount=71 + half3(0, 0, 0), + // ring 1 index=1 + half3(0.275862068965517, 0, 0.275862068965517), + half3(0.171997186719651, 0.215677650336008, 0.275862068965517), + half3(-0.0613850852293281, 0.26894563094671, 0.275862068965517), + half3(-0.248543135973081, 0.119692065963464, 0.275862068965517), + half3(-0.248543135973081, -0.119692065963464, 0.275862068965517), + half3(-0.0613850852293282, -0.26894563094671, 0.275862068965517), + half3(0.171997186719651, -0.215677650336008, 0.275862068965517), + // ring 2 index=8 + half3(0.517241379310345, 0, 0.517241379310345), + half3(0.466018379949527, 0.224422623681496, 0.517241379310345), + half3(0.322494725099345, 0.404395594380015, 0.517241379310345), + half3(0.11509703480499, 0.504273058025081, 0.517241379310345), + half3(-0.11509703480499, 0.504273058025081, 0.517241379310345), + half3(-0.322494725099345, 0.404395594380015, 0.517241379310345), + half3(-0.466018379949527, 0.224422623681496, 0.517241379310345), + half3(-0.517241379310345, 6.33437999558976E-17, 0.517241379310345), + half3(-0.466018379949527, -0.224422623681496, 0.517241379310345), + half3(-0.322494725099345, -0.404395594380015, 0.517241379310345), + half3(-0.11509703480499, -0.504273058025081, 0.517241379310345), + half3(0.11509703480499, -0.504273058025081, 0.517241379310345), + half3(0.322494725099345, -0.404395594380015, 0.517241379310345), + half3(0.466018379949527, -0.224422623681495, 0.517241379310345), + // ring 3 index=22 + half3(0.758620689655172, 0, 0.758620689655172), + half3(0.72491730094121, 0.223607373691031, 0.758620689655172), + half3(0.626801828791444, 0.427346250944817, 0.758620689655172), + half3(0.472992263479039, 0.593113538424023, 0.758620689655172), + half3(0.277155259864162, 0.706180085178362, 0.758620689655172), + half3(0.0566917951345288, 0.756499432344343, 0.758620689655172), + half3(-0.168808984380652, 0.739600485103452, 0.758620689655172), + half3(-0.379310344827586, 0.65698478907785, 0.758620689655172), + half3(-0.556108316560558, 0.515993111412422, 0.758620689655172), + half3(-0.683493623925973, 0.329153181399527, 0.758620689655172), + half3(-0.750147523343201, 0.113066546754339, 0.758620689655172), + half3(-0.750147523343201, -0.113066546754339, 0.758620689655172), + half3(-0.683493623925973, -0.329153181399527, 0.758620689655172), + half3(-0.556108316560558, -0.515993111412422, 0.758620689655172), + half3(-0.379310344827587, -0.65698478907785, 0.758620689655172), + half3(-0.168808984380652, -0.739600485103452, 0.758620689655172), + half3(0.0566917951345288, -0.756499432344343, 0.758620689655172), + half3(0.277155259864162, -0.706180085178362, 0.758620689655172), + half3(0.472992263479039, -0.593113538424023, 0.758620689655172), + half3(0.626801828791444, -0.427346250944817, 0.758620689655172), + half3(0.72491730094121, -0.223607373691031, 0.758620689655172), + // ring 4 index=43 + half3(1, 0, 1), + half3(0.974927912181824, 0.222520933956314, 1), + half3(0.900968867902419, 0.433883739117558, 1), + half3(0.78183148246803, 0.623489801858733, 1), + half3(0.623489801858734, 0.78183148246803, 1), + half3(0.433883739117558, 0.900968867902419, 1), + half3(0.222520933956314, 0.974927912181824, 1), + half3(6.12323399573677E-17, 1, 1), + half3(-0.222520933956314, 0.974927912181824, 1), + half3(-0.433883739117558, 0.900968867902419, 1), + half3(-0.623489801858733, 0.78183148246803, 1), + half3(-0.781831482468029, 0.623489801858734, 1), + half3(-0.900968867902419, 0.433883739117558, 1), + half3(-0.974927912181824, 0.222520933956314, 1), + half3(-1, 1.22464679914735E-16, 1), + half3(-0.974927912181824, -0.222520933956314, 1), + half3(-0.900968867902419, -0.433883739117558, 1), + half3(-0.78183148246803, -0.623489801858734, 1), + half3(-0.623489801858734, -0.78183148246803, 1), + half3(-0.433883739117558, -0.900968867902419, 1), + half3(-0.222520933956315, -0.974927912181824, 1), + half3(-1.83697019872103E-16, -1, 1), + half3(0.222520933956313, -0.974927912181824, 1), + half3(0.433883739117558, -0.900968867902419, 1), + half3(0.623489801858733, -0.78183148246803, 1), + half3(0.78183148246803, -0.623489801858734, 1), + half3(0.900968867902419, -0.433883739117558, 1), + half3(0.974927912181824, -0.222520933956315, 1), + // totalSampleCount=71 }; #endif // UNITY_POSTFX_DISK_KERNELS From b0084563f749ee060e6b21f7d298dbb684a1fdf7 Mon Sep 17 00:00:00 2001 From: Kay-Leng Chang Date: Sun, 26 May 2024 02:53:56 -0400 Subject: [PATCH 10/13] Fix for artifact. The original blending formula between DoF texture and source image is flawed but this fix tries to keep the new results as similar as possible. --- .../Runtime/Effects/DepthOfField.cs | 60 +++++++++++++++---- .../PostProcessing/Runtime/Utils/ShaderIDs.cs | 1 + .../Shaders/Builtins/DepthOfField.hlsl | 49 ++++++++------- 3 files changed, 77 insertions(+), 33 deletions(-) diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs index 06d2292289a..cd8540567ff 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs @@ -107,6 +107,15 @@ enum Pass readonly RenderTexture[][] m_CoCHistoryTextures = new RenderTexture[k_NumEyes][]; int[] m_HistoryPingPong = new int[k_NumEyes]; + // The samples coordinates for kDiskAllKernels in DiskKernels.hlsl are normalized to 4 rings (coordinates with length 1 lie on the 4th ring). + // The ring placement are not evenly-spaced but: + // 1st ring: 8/29 + // 2nd ring: 15/29 + // 3rd ring: 22/29 + // 4th ring: 29/29 + static readonly float[] k_DisAllKernelRingOffsets = { 8f/29, 15f/29, 22f/29, 29f/29 }; + static readonly int[] k_DiskAllKernelSizes = { 1, 8, 22, 43, 71 }; + // Height of the 35mm full-frame format (36mm x 24mm) // TODO: Should be set by a physical camera const float k_FilmHeight = 0.024f; @@ -151,7 +160,29 @@ float CalculateMaxCoCRadius(int screenHeight, out int mipLevel) void CalculateCoCKernelLimits(int screenHeight, out Vector4 cocKernelLimits) { - cocKernelLimits = new Vector4(2 - 0.5f, 6 - 0.5f, 10 - 0.5f, 14 - 0.5f) / screenHeight; + // The sample points are grouped in 4 rings, but the distance between + // each ring is not even. + // Depending on a max CoC "distance", we can conservatively garantie + // only some rings need to be sampled. + // For instance, for a pixel C being processed, if the max CoC distance + // in the neighbouring pixels is less than ~14 pixels (at source image resolution), + // then the 4th ring does not need to be sampled. + // When sampling the half-resolution color texture, we sample the equivalent of + // 2 pixels radius from the full-resolution source image, thus the "spread" of + // each ring is 2 pixels wide in this diagram. + // + // Center pixel 1st ring 2nd ring 3rd ring 4th ring + // at 0 spread spread spread spread + // <-------> <-------> <-------> <-------> <-------> + // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---> pixel offset at full-resolution + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 + // ~a ~b ~c ~d + + float a = k_DisAllKernelRingOffsets[0] * 16 + 2; + float b = k_DisAllKernelRingOffsets[1] * 16 + 2; + float c = k_DisAllKernelRingOffsets[2] * 16 + 2; + //float d = k_DisAllKernelRingOffsets[3] * 16 + 2; + cocKernelLimits = new Vector4(2 - 0.5f, a - 0.5f, b - 0.5f, c - 0.5f) / screenHeight; } RenderTexture CheckHistory(int eye, int id, PostProcessRenderContext context, RenderTextureFormat format) @@ -200,22 +231,30 @@ public override void Render(PostProcessRenderContext context) CalculateCoCKernelLimits(context.screenHeight, out cocKernelLimits); cocKernelLimits /= maxCoC; - // The samples coordinates for kDiskAllKernels in DiskKernels.hlsl are normalized to 4 rings (coordinates with length 1 lie on the 4th ring). - // The ring placement are not uniform but: - // 1st ring: 8/29 - // 2nd ring: 15/29 - // 3rd ring: 22/29 - // 4th ring: 19/29 // When the user clamps the bokeh size, the sample coordinates must be renormalized to the number of rings requested. float kernelScaleReNormalization = 1f; + float fgAlphaFactor = 0f; + if (settings.kernelSize.value == KernelSize.Small) + { kernelScaleReNormalization = 1f; // custom sampling pattern, does not use kDiskAllKernels array. + fgAlphaFactor = 0; // unused by shader + } else if (settings.kernelSize.value == KernelSize.Medium) - kernelScaleReNormalization = 29f / 15f; + { + kernelScaleReNormalization = 1f / k_DisAllKernelRingOffsets[1]; + fgAlphaFactor = 1f / k_DiskAllKernelSizes[1]; + } else if (settings.kernelSize.value == KernelSize.Large) - kernelScaleReNormalization = 29f / 22f; + { + kernelScaleReNormalization = 1f / k_DisAllKernelRingOffsets[2]; + fgAlphaFactor = 1f / k_DiskAllKernelSizes[2]; + } else if (settings.kernelSize.value == KernelSize.VeryLarge) - kernelScaleReNormalization = 29f / 29f; + { + kernelScaleReNormalization = 1f / k_DisAllKernelRingOffsets[3]; + fgAlphaFactor = 1f / k_DiskAllKernelSizes[2]; + } var sheet = context.propertySheets.Get(context.resources.shaders.depthOfField); sheet.properties.Clear(); @@ -228,6 +267,7 @@ public override void Render(PostProcessRenderContext context) sheet.properties.SetFloat(ShaderIDs.MaxCoC, maxCoC); sheet.properties.SetFloat(ShaderIDs.RcpMaxCoC, 1f / maxCoC); sheet.properties.SetFloat(ShaderIDs.RcpAspect, 1f / aspect); + sheet.properties.SetFloat(ShaderIDs.FgAlphaFactor, fgAlphaFactor); var cmd = context.command; cmd.BeginSample("DepthOfField"); diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs index 62a6f2fb921..7d4a9519886 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs @@ -82,6 +82,7 @@ static class ShaderIDs internal static readonly int MaxCoC = Shader.PropertyToID("_MaxCoC"); internal static readonly int RcpMaxCoC = Shader.PropertyToID("_RcpMaxCoC"); internal static readonly int RcpAspect = Shader.PropertyToID("_RcpAspect"); + internal static readonly int FgAlphaFactor = Shader.PropertyToID("_FgAlphaFactor"); internal static readonly int CoCTex = Shader.PropertyToID("_CoCTex"); internal static readonly int TaaParams = Shader.PropertyToID("_TaaParams"); diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl index 17c0e3346a3..312f1d465b6 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl @@ -21,12 +21,14 @@ float4 _DepthOfFieldTex_TexelSize; float _Distance; float _LensCoeff; // f^2 / (N * (S1 - f) * film_width * 2) half4 _CoCKernelLimits; -float4 _MaxCoCTexScale; // MaxCoC scale is padded with a bit of empty space (right-bottom edge) so we need some uv scale to sample it +// MaxCoC texture is padded with a bit of empty space for alignment reasons (right&bottom sides) so we need some uv scale to sample it +float4 _MaxCoCTexScale; half3 _KernelScale; half2 _MarginFactors; float _MaxCoC; float _RcpMaxCoC; float _RcpAspect; +float _FgAlphaFactor; // 1 / sampleCount of either the first and/or second ring half3 _TaaParams; // Jitter.x, Jitter.y, Blending // CoC calculation @@ -285,10 +287,6 @@ half4 FragBlurSmallBokeh (VaryingsDefault i) : SV_Target bgAcc.rgb /= bgAcc.a + (bgAcc.a == 0.0); // zero-div guard fgAcc.rgb /= fgAcc.a + (fgAcc.a == 0.0); - // BG: Calculate the alpha value only based on the center CoC. - // This is a rather aggressive approximation but provides stable results. - bgAcc.a = smoothstep(_MainTex_TexelSize.y, _MainTex_TexelSize.y * 2.0, samp0.a); - // FG: Normalize the total of the weights. fgAcc.a *= PI / kSmallSampleCount; @@ -306,19 +304,22 @@ half4 FragBlurDynamic(VaryingsDefault i) : SV_Target // normalized value in range [0, 1] half maxCoC = SAMPLE_TEXTURE2D(_MaxCoCTex, sampler_MaxCoCTex, i.texcoordStereo * _MaxCoCTexScale.zw).r; - int sampleCount; - - UNITY_BRANCH if (maxCoC < _CoCKernelLimits[0]) - sampleCount = kDiskAllKernelSizes[0]; - // margin adjustment later in the shader code artifically expand bokeh by 4px in fullscreen units (1 extra ring), we cannot have small bokeh as a result! - else UNITY_BRANCH if (maxCoC < _CoCKernelLimits[1]) - sampleCount = kDiskAllKernelSizes[1+1]; - else UNITY_BRANCH if (maxCoC < _CoCKernelLimits[2]) - sampleCount = kDiskAllKernelSizes[2+1]; - else UNITY_BRANCH if (maxCoC < _CoCKernelLimits[3]) - sampleCount = kDiskAllKernelSizes[3+1]; + int kernelRingIndex; + + // margin adjustment +1 in the shader code artifically expand bokeh by 4px in fullscreen units (1 extra ring), we cannot have small bokeh as a result! + if (maxCoC < _CoCKernelLimits[0]) + kernelRingIndex = 0; + else if (maxCoC < _CoCKernelLimits[1]) + kernelRingIndex = 1+1; + else if (maxCoC < _CoCKernelLimits[2]) + kernelRingIndex = 2+1; + else if (maxCoC < _CoCKernelLimits[3]) + kernelRingIndex = 3+1; else - sampleCount = kDiskAllKernelSizes[4]; + kernelRingIndex = 4; + + int sampleCount = kDiskAllKernelSizes[kernelRingIndex]; + half sampleCountRcp = kDiskAllKernelRcpSizes[kernelRingIndex]; half4 bgAcc = 0.0; // Background: far field bokeh half4 fgAcc = 0.0; // Foreground: near field bokeh @@ -414,12 +415,14 @@ half4 FragBlurDynamic(VaryingsDefault i) : SV_Target bgAcc.rgb /= bgAcc.a + (bgAcc.a == 0.0); // zero-div guard fgAcc.rgb /= fgAcc.a + (fgAcc.a == 0.0); - // BG: Calculate the alpha value only based on the center CoC. - // This is a rather aggressive approximation but provides stable results. - bgAcc.a = smoothstep(_MainTex_TexelSize.y, _MainTex_TexelSize.y * 2.0, samp0.a); - - // FG: Normalize the total of the weights. - fgAcc.a *= PI / sampleCount; + // FG: fgAcc roughly represents the number of samples in the foreground which bleed into the pixel being processed. + // We can use this value to gradually blend-in the DoF texture into the original source image. We use the number + // of samples on the first or second ring as threshold to full-blend the DoF texture (when other outer rings are sampled, + // we are already at full-blend). + // The choice of first or second ring is arbitrary and decided to closely reproduce the original algorithm result. + // The original algorithm produces unnatural (physically inaccurate) blending that varies with "MaxBlurSize" parameter, + // so there is no good fix for it. + fgAcc.a *= max(sampleCountRcp, _FgAlphaFactor); // Alpha premultiplying half alpha = saturate(fgAcc.a); From b3c068992b3f935125a40930d06f455037d626cc Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Mon, 27 May 2024 12:45:36 -0400 Subject: [PATCH 11/13] Small improvement. --- .../Runtime/Effects/DepthOfField.cs | 2 +- .../Shaders/Builtins/DepthOfField.hlsl | 33 ++++++++++--------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs index cd8540567ff..b2c0c99171e 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs @@ -248,7 +248,7 @@ public override void Render(PostProcessRenderContext context) else if (settings.kernelSize.value == KernelSize.Large) { kernelScaleReNormalization = 1f / k_DisAllKernelRingOffsets[2]; - fgAlphaFactor = 1f / k_DiskAllKernelSizes[2]; + fgAlphaFactor = 1f / ((k_DiskAllKernelSizes[1] + k_DiskAllKernelSizes[2]) * 0.5f); } else if (settings.kernelSize.value == KernelSize.VeryLarge) { diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl index 312f1d465b6..293ae368433 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl @@ -310,11 +310,11 @@ half4 FragBlurDynamic(VaryingsDefault i) : SV_Target if (maxCoC < _CoCKernelLimits[0]) kernelRingIndex = 0; else if (maxCoC < _CoCKernelLimits[1]) - kernelRingIndex = 1+1; + kernelRingIndex = 1 + 1; else if (maxCoC < _CoCKernelLimits[2]) - kernelRingIndex = 2+1; + kernelRingIndex = 2 + 1; else if (maxCoC < _CoCKernelLimits[3]) - kernelRingIndex = 3+1; + kernelRingIndex = 3 + 1; else kernelRingIndex = 4; @@ -326,20 +326,21 @@ half4 FragBlurDynamic(VaryingsDefault i) : SV_Target AccumSample(0, samp0, i.texcoord, bgAcc, fgAcc); - UNITY_BRANCH if (sampleCount >= 8) + UNITY_BRANCH if (kernelRingIndex >= 1) { - AccumSample( 1, samp0, i.texcoord, bgAcc, fgAcc); - AccumSample( 2, samp0, i.texcoord, bgAcc, fgAcc); - AccumSample( 3, samp0, i.texcoord, bgAcc, fgAcc); - AccumSample( 4, samp0, i.texcoord, bgAcc, fgAcc); - AccumSample( 5, samp0, i.texcoord, bgAcc, fgAcc); - AccumSample( 6, samp0, i.texcoord, bgAcc, fgAcc); - AccumSample( 7, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(1, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(2, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(3, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(4, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(5, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(6, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(7, samp0, i.texcoord, bgAcc, fgAcc); } - UNITY_BRANCH if (sampleCount >= 22) + + UNITY_BRANCH if (kernelRingIndex >= 2) { - AccumSample( 8, samp0, i.texcoord, bgAcc, fgAcc); - AccumSample( 9, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(8, samp0, i.texcoord, bgAcc, fgAcc); + AccumSample(9, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(10, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(11, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(12, samp0, i.texcoord, bgAcc, fgAcc); @@ -354,7 +355,7 @@ half4 FragBlurDynamic(VaryingsDefault i) : SV_Target AccumSample(21, samp0, i.texcoord, bgAcc, fgAcc); } - UNITY_BRANCH if (sampleCount >= 43) + UNITY_BRANCH if (kernelRingIndex >= 3) { AccumSample(22, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(23, samp0, i.texcoord, bgAcc, fgAcc); @@ -379,7 +380,7 @@ half4 FragBlurDynamic(VaryingsDefault i) : SV_Target AccumSample(42, samp0, i.texcoord, bgAcc, fgAcc); } - UNITY_BRANCH if (sampleCount >= 71) + UNITY_BRANCH if (kernelRingIndex >= 4) { AccumSample(43, samp0, i.texcoord, bgAcc, fgAcc); AccumSample(44, samp0, i.texcoord, bgAcc, fgAcc); From 6e5120796eec5415e1b7cbc7ded52c8366b31b0e Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Mon, 27 May 2024 13:12:49 -0400 Subject: [PATCH 12/13] Clamp max kernel size. --- .../PostProcessing/Runtime/Effects/DepthOfField.cs | 1 + .../PostProcessing/Runtime/Utils/ShaderIDs.cs | 1 + .../PostProcessing/Shaders/Builtins/DepthOfField.hlsl | 3 +++ 3 files changed, 5 insertions(+) diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs index b2c0c99171e..0bba60a792b 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Effects/DepthOfField.cs @@ -268,6 +268,7 @@ public override void Render(PostProcessRenderContext context) sheet.properties.SetFloat(ShaderIDs.RcpMaxCoC, 1f / maxCoC); sheet.properties.SetFloat(ShaderIDs.RcpAspect, 1f / aspect); sheet.properties.SetFloat(ShaderIDs.FgAlphaFactor, fgAlphaFactor); + sheet.properties.SetInteger(ShaderIDs.MaxRingIndex, (int)settings.kernelSize.value + 1); var cmd = context.command; cmd.BeginSample("DepthOfField"); diff --git a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs index 7d4a9519886..d855da297fc 100644 --- a/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs +++ b/com.unity.postprocessing/PostProcessing/Runtime/Utils/ShaderIDs.cs @@ -83,6 +83,7 @@ static class ShaderIDs internal static readonly int RcpMaxCoC = Shader.PropertyToID("_RcpMaxCoC"); internal static readonly int RcpAspect = Shader.PropertyToID("_RcpAspect"); internal static readonly int FgAlphaFactor = Shader.PropertyToID("_FgAlphaFactor"); + internal static readonly int MaxRingIndex = Shader.PropertyToID("_MaxRingIndex"); internal static readonly int CoCTex = Shader.PropertyToID("_CoCTex"); internal static readonly int TaaParams = Shader.PropertyToID("_TaaParams"); diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl index 293ae368433..6edd7d02238 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl @@ -29,6 +29,7 @@ float _MaxCoC; float _RcpMaxCoC; float _RcpAspect; float _FgAlphaFactor; // 1 / sampleCount of either the first and/or second ring +int _MaxRingIndex; half3 _TaaParams; // Jitter.x, Jitter.y, Blending // CoC calculation @@ -318,6 +319,8 @@ half4 FragBlurDynamic(VaryingsDefault i) : SV_Target else kernelRingIndex = 4; + kernelRingIndex = min(_MaxRingIndex, kernelRingIndex); + int sampleCount = kDiskAllKernelSizes[kernelRingIndex]; half sampleCountRcp = kDiskAllKernelRcpSizes[kernelRingIndex]; From 79e0a2fdea0ad9e68963e4f2ef921904c5f5dbda Mon Sep 17 00:00:00 2001 From: Kay Chang Date: Mon, 27 May 2024 14:40:48 -0400 Subject: [PATCH 13/13] Added GatherRed support. --- .../Shaders/Builtins/DepthOfField.hlsl | 32 +++++++++++++++++-- .../Shaders/Builtins/DepthOfField.shader | 22 ++++++------- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl index 6edd7d02238..6fb89426f1c 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl @@ -155,7 +155,7 @@ half4 FragPrefilter(VaryingsDefault i) : SV_Target return half4(avg, coc); } -VaryingsDefault VertDownsampleCoC(AttributesDefault v) +VaryingsDefault VertDownsampleMaxCoC(AttributesDefault v) { VaryingsDefault o; o.vertex = float4(v.vertex.xy, 0.0, 1.0); @@ -173,8 +173,12 @@ VaryingsDefault VertDownsampleCoC(AttributesDefault v) return o; } -half4 FragDownsampleCoC(VaryingsDefault i) : SV_Target +half4 FragDownsampleMaxCoC(VaryingsDefault i) : SV_Target { +#if UNITY_GATHER_SUPPORTED + // Sample source colors + half4 cocs = GATHER_RED_TEXTURE2D(_MainTex, sampler_MainTex, i.texcoordStereo); +#else // TODO implement gather version float3 duv = _MainTex_TexelSize.xyx * float3(0.5, 0.5, -0.5); @@ -189,6 +193,7 @@ half4 FragDownsampleCoC(VaryingsDefault i) : SV_Target cocs.y = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv1).r; cocs.z = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv2).r; cocs.w = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv3).r; +#endif #if defined(INITIAL_COC) // Storing the absolute normalized CoC is enough. @@ -200,11 +205,31 @@ half4 FragDownsampleCoC(VaryingsDefault i) : SV_Target return half4(maxCoC, 0.0, 0.0, 0.0); } -half4 FragNeighborCoC(VaryingsDefault i) : SV_Target +half4 FragNeighborMaxCoC(VaryingsDefault i) : SV_Target { float tx = _MainTex_TexelSize.x; float ty = _MainTex_TexelSize.y; +#if UNITY_GATHER_SUPPORTED + float2 uvA = UnityStereoTransformScreenSpaceTex(i.texcoord + _MainTex_TexelSize.xy * float2(-0.5, -0.5)); + float2 uvB = UnityStereoTransformScreenSpaceTex(i.texcoord + _MainTex_TexelSize.xy * float2( 0.5, -0.5)); + float2 uvC = UnityStereoTransformScreenSpaceTex(i.texcoord + _MainTex_TexelSize.xy * float2(-0.5, 0.5)); + float2 uvD = UnityStereoTransformScreenSpaceTex(i.texcoord + _MainTex_TexelSize.xy * float2( 0.5, 0.5)); + + half4 cocsA = GATHER_RED_TEXTURE2D(_MainTex, sampler_MainTex, uvA); + half4 cocsB = GATHER_RED_TEXTURE2D(_MainTex, sampler_MainTex, uvB); + half4 cocsC = GATHER_RED_TEXTURE2D(_MainTex, sampler_MainTex, uvC); + half4 cocsD = GATHER_RED_TEXTURE2D(_MainTex, sampler_MainTex, uvD); + half coc0 = cocsA.x; + half coc1 = cocsA.y; + half coc2 = cocsB.x; + half coc3 = cocsA.z; + half coc4 = cocsA.w; + half coc5 = cocsB.z; + half coc6 = cocsC.x; + half coc7 = cocsC.y; + half coc8 = cocsD.w; +#else float2 uv0 = UnityStereoTransformScreenSpaceTex(i.texcoord); float2 uv1 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2( tx, 0)); float2 uv2 = UnityStereoTransformScreenSpaceTex(i.texcoord + float2( tx, ty)); @@ -224,6 +249,7 @@ half4 FragNeighborCoC(VaryingsDefault i) : SV_Target half coc6 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv6).r; half coc7 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv7).r; half coc8 = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, uv8).r; +#endif half maxCoC = Max3(Max3(coc0, coc1, coc2), Max3(coc3, coc4, coc5), Max3(coc6, coc7, coc8)); return half4(maxCoC, 0.0, 0.0, 0.0); diff --git a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader index 6942c1d5b8a..57ec927631f 100644 --- a/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader +++ b/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.shader @@ -36,9 +36,9 @@ Shader "Hidden/PostProcessing/DepthOfField" Name "Downsample initial MaxCoC" HLSLPROGRAM - #pragma target 3.5 + #pragma target 5.0 #pragma vertex VertDefault - #pragma fragment FragDownsampleCoC + #pragma fragment FragDownsampleMaxCoC #define INITIAL_COC #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL @@ -49,9 +49,9 @@ Shader "Hidden/PostProcessing/DepthOfField" Name "Downsample MaxCoC" HLSLPROGRAM - #pragma target 3.5 - #pragma vertex VertDownsampleCoC - #pragma fragment FragDownsampleCoC + #pragma target 5.0 + #pragma vertex VertDownsampleMaxCoC + #pragma fragment FragDownsampleMaxCoC #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL } @@ -61,9 +61,9 @@ Shader "Hidden/PostProcessing/DepthOfField" Name "Neighbor MaxCoC" HLSLPROGRAM - #pragma target 3.5 + #pragma target 5.0 #pragma vertex VertDefault - #pragma fragment FragNeighborCoC + #pragma fragment FragNeighborMaxCoC #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL } @@ -179,7 +179,7 @@ Shader "Hidden/PostProcessing/DepthOfField" HLSLPROGRAM #pragma target 3.5 #pragma vertex VertDefault - #pragma fragment FragDownsampleCoC + #pragma fragment FragDownsampleMaxCoC #define INITIAL_COC #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL @@ -191,8 +191,8 @@ Shader "Hidden/PostProcessing/DepthOfField" HLSLPROGRAM #pragma target 3.5 - #pragma vertex VertDownsampleCoC - #pragma fragment FragDownsampleCoC + #pragma vertex VertDownsampleMaxCoC + #pragma fragment FragDownsampleMaxCoC #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL } @@ -204,7 +204,7 @@ Shader "Hidden/PostProcessing/DepthOfField" HLSLPROGRAM #pragma target 3.5 #pragma vertex VertDefault - #pragma fragment FragNeighborCoC + #pragma fragment FragNeighborMaxCoC #include "Packages/com.unity.postprocessing/PostProcessing/Shaders/Builtins/DepthOfField.hlsl" ENDHLSL }