renderer: optimise shader usage further, split shaders and add more caching (#12992)

* shader: split CM rgba/rgbx into discard ones

make it branchless if we have no discards.

* shader: ensure we dont stall on vbo uv buffer

if we render a new texture before the previous was done gpu wise its
going to stall until done, call glBufferData to orphan the data.
this allows the driver to return a new memory block immediately
if the GPU is still reading from the previous one

* protocols: ensure we reset GL_PACK_ALIGNMENT

reset GL_PACK_ALIGNMENT back to the default initial value of 4

* shader: use unsigned short in VAO

loose a tiny bit of precision but gain massive bandwidth reductions.
use GL_UNSIGNED_SHORT and set it as normalized. clamp and round the UV
for uint16_t in customUv.

* shader: interleave vertex buffers

use std::array for fullverts, use a single interleaved buffer for
position and uv, should in theory improve cache locality. and also remove
the need to have two buffers around.

* shader: revert precision drop

we need the float precision because we might have 1.01 or similiar
floats entering CM shader maths, and rounding/clamping those means the
maths turns out wrong. so revert back to float, sadly higher bandwidth
usage.

* update doColorManagement api

* convert primaries to XYZ on cpu

* remove unused primaries uniform

---------

Co-authored-by: UjinT34 <ujint34@mail.ru>
This commit is contained in:
Tom Englund 2026-01-17 15:31:19 +01:00 committed by GitHub
parent 92a3b91999
commit c99eb23869
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 185 additions and 95 deletions

View file

@ -401,13 +401,12 @@ vec4 tonemap(vec4 color, mat3 dstXYZ) {
return vec4(fromLMS * toLinear(vec4(ICtCpPQInv * ICtCp, 1.0), CM_TRANSFER_FUNCTION_ST2084_PQ).rgb * HDR_MAX_LUMINANCE * refScale, color[3]);
}
vec4 doColorManagement(vec4 pixColor, int srcTF, int dstTF, mat4x2 dstPrimaries) {
vec4 doColorManagement(vec4 pixColor, int srcTF, int dstTF, mat3 dstxyz) {
pixColor.rgb /= max(pixColor.a, 0.001);
pixColor.rgb = toLinearRGB(pixColor.rgb, srcTF);
pixColor.rgb = convertMatrix * pixColor.rgb;
pixColor = toNit(pixColor, srcTFRange);
pixColor.rgb *= pixColor.a;
mat3 dstxyz = primaries2xyz(dstPrimaries);
pixColor = tonemap(pixColor, dstxyz);
pixColor = fromLinearNit(pixColor, dstTF, dstTFRange);
if ((srcTF == CM_TRANSFER_FUNCTION_SRGB || srcTF == CM_TRANSFER_FUNCTION_GAMMA22) && dstTF == CM_TRANSFER_FUNCTION_ST2084_PQ) {

View file

@ -6,7 +6,7 @@ in vec2 v_texcoord;
uniform int sourceTF; // eTransferFunction
uniform int targetTF; // eTransferFunction
uniform mat4x2 targetPrimaries;
uniform mat3 targetPrimariesXYZ;
uniform vec2 fullSizeUntransformed;
uniform float radiusOuter;
@ -90,7 +90,7 @@ void main() {
pixColor = getColorForCoord(v_texcoord);
pixColor.rgb *= pixColor[3];
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimaries);
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimariesXYZ);
pixColor *= alpha * additionalAlpha;

View file

@ -7,14 +7,9 @@ uniform sampler2D tex;
uniform int sourceTF; // eTransferFunction
uniform int targetTF; // eTransferFunction
uniform mat4x2 targetPrimaries;
uniform mat3 targetPrimariesXYZ;
uniform float alpha;
uniform bool discardOpaque;
uniform bool discardAlpha;
uniform float discardAlphaValue;
uniform bool applyTint;
uniform vec3 tint;
@ -25,14 +20,8 @@ layout(location = 0) out vec4 fragColor;
void main() {
vec4 pixColor = texture(tex, v_texcoord);
if (discardOpaque && pixColor.a * alpha == 1.0)
discard;
if (discardAlpha && pixColor.a <= discardAlphaValue)
discard;
// this shader shouldn't be used when skipCM == 1
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimaries);
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimariesXYZ);
if (applyTint)
pixColor.rgb *= tint;

View file

@ -0,0 +1,44 @@
#version 300 es
#extension GL_ARB_shading_language_include : enable
precision highp float;
in vec2 v_texcoord;
uniform sampler2D tex;
uniform int sourceTF; // eTransferFunction
uniform int targetTF; // eTransferFunction
uniform mat3 targetPrimariesXYZ;
uniform float alpha;
uniform bool discardOpaque;
uniform bool discardAlpha;
uniform float discardAlphaValue;
uniform bool applyTint;
uniform vec3 tint;
#include "rounding.glsl"
#include "CM.glsl"
layout(location = 0) out vec4 fragColor;
void main() {
vec4 pixColor = texture(tex, v_texcoord);
if (discardOpaque && pixColor.a * alpha == 1.0)
discard;
if (discardAlpha && pixColor.a <= discardAlphaValue)
discard;
// this shader shouldn't be used when skipCM == 1
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimariesXYZ);
if (applyTint)
pixColor.rgb *= tint;
if (radius > 0.0)
pixColor = rounding(pixColor);
fragColor = pixColor * alpha;
}

View file

@ -7,14 +7,9 @@ uniform sampler2D tex;
uniform int sourceTF; // eTransferFunction
uniform int targetTF; // eTransferFunction
uniform mat4x2 targetPrimaries;
uniform mat3 targetPrimariesXYZ;
uniform float alpha;
uniform bool discardOpaque;
uniform bool discardAlpha;
uniform float discardAlphaValue;
uniform bool applyTint;
uniform vec3 tint;
@ -25,14 +20,8 @@ layout(location = 0) out vec4 fragColor;
void main() {
vec4 pixColor = vec4(texture(tex, v_texcoord).rgb, 1.0);
if (discardOpaque && pixColor.a * alpha == 1.0)
discard;
if (discardAlpha && pixColor.a <= discardAlphaValue)
discard;
// this shader shouldn't be used when skipCM == 1
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimaries);
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimariesXYZ);
if (applyTint)
pixColor.rgb *= tint;

View file

@ -0,0 +1,44 @@
#version 300 es
#extension GL_ARB_shading_language_include : enable
precision highp float;
in vec2 v_texcoord;
uniform sampler2D tex;
uniform int sourceTF; // eTransferFunction
uniform int targetTF; // eTransferFunction
uniform mat3 targetPrimariesXYZ;
uniform float alpha;
uniform bool discardOpaque;
uniform bool discardAlpha;
uniform float discardAlphaValue;
uniform bool applyTint;
uniform vec3 tint;
#include "rounding.glsl"
#include "CM.glsl"
layout(location = 0) out vec4 fragColor;
void main() {
vec4 pixColor = vec4(texture(tex, v_texcoord).rgb, 1.0);
if (discardOpaque && pixColor.a * alpha == 1.0)
discard;
if (discardAlpha && pixColor.a <= discardAlphaValue)
discard;
// this shader shouldn't be used when skipCM == 1
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimariesXYZ);
if (applyTint)
pixColor.rgb *= tint;
if (radius > 0.0)
pixColor = rounding(pixColor);
fragColor = pixColor * alpha;
}

View file

@ -8,7 +8,7 @@ in vec2 v_texcoord;
uniform int skipCM;
uniform int sourceTF; // eTransferFunction
uniform int targetTF; // eTransferFunction
uniform mat4x2 targetPrimaries;
uniform mat3 targetPrimariesXYZ;
uniform vec2 topLeft;
uniform vec2 bottomRight;
@ -93,7 +93,7 @@ void main() {
pixColor.rgb *= pixColor[3];
if (skipCM == 0)
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimaries);
pixColor = doColorManagement(pixColor, sourceTF, targetTF, targetPrimariesXYZ);
fragColor = pixColor;
}