renderer: optimise shader usage further, split shaders and add more caching (#12992)

* shader: split CM rgba/rgbx into discard ones

make it branchless if we have no discards.

* shader: ensure we dont stall on vbo uv buffer

if we render a new texture before the previous was done gpu wise its
going to stall until done, call glBufferData to orphan the data.
this allows the driver to return a new memory block immediately
if the GPU is still reading from the previous one

* protocols: ensure we reset GL_PACK_ALIGNMENT

reset GL_PACK_ALIGNMENT back to the default initial value of 4

* shader: use unsigned short in VAO

loose a tiny bit of precision but gain massive bandwidth reductions.
use GL_UNSIGNED_SHORT and set it as normalized. clamp and round the UV
for uint16_t in customUv.

* shader: interleave vertex buffers

use std::array for fullverts, use a single interleaved buffer for
position and uv, should in theory improve cache locality. and also remove
the need to have two buffers around.

* shader: revert precision drop

we need the float precision because we might have 1.01 or similiar
floats entering CM shader maths, and rounding/clamping those means the
maths turns out wrong. so revert back to float, sadly higher bandwidth
usage.

* update doColorManagement api

* convert primaries to XYZ on cpu

* remove unused primaries uniform

---------

Co-authored-by: UjinT34 <ujint34@mail.ru>
This commit is contained in:
Tom Englund 2026-01-17 15:31:19 +01:00 committed by GitHub
parent 92a3b91999
commit c99eb23869
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 185 additions and 95 deletions

View file

@ -43,6 +43,7 @@
#include <fcntl.h>
#include <gbm.h>
#include <filesystem>
#include <cstring>
#include "./shaders/Shaders.hpp"
using namespace Hyprutils::OS;
@ -896,7 +897,9 @@ bool CHyprOpenGLImpl::initShaders() {
else {
std::vector<SFragShaderDesc> CM_SHADERS = {{
{SH_FRAG_CM_RGBA, "CMrgba.frag"},
{SH_FRAG_CM_RGBA_DISCARD, "CMrgbadiscard.frag"},
{SH_FRAG_CM_RGBX, "CMrgbx.frag"},
{SH_FRAG_CM_RGBX_DISCARD, "CMrgbadiscard.frag"},
{SH_FRAG_CM_BLURPREPARE, "CMblurprepare.frag"},
{SH_FRAG_CM_BORDER1, "CMborder.frag"},
}};
@ -1228,13 +1231,14 @@ void CHyprOpenGLImpl::passCMUniforms(WP<CShader> shader, const NColorManagement:
shader->setUniformInt(SHADER_TARGET_TF, targetImageDescription->value().transferFunction);
const auto targetPrimaries = targetImageDescription->getPrimaries();
const std::array<GLfloat, 8> glTargetPrimaries = {
targetPrimaries->value().red.x, targetPrimaries->value().red.y, targetPrimaries->value().green.x, targetPrimaries->value().green.y,
targetPrimaries->value().blue.x, targetPrimaries->value().blue.y, targetPrimaries->value().white.x, targetPrimaries->value().white.y,
const auto targetPrimaries = targetImageDescription->getPrimaries();
const auto mat = targetPrimaries->value().toXYZ().mat();
const std::array<GLfloat, 9> glTargetPrimariesXYZ = {
mat[0][0], mat[1][0], mat[2][0], //
mat[0][1], mat[1][1], mat[2][1], //
mat[0][2], mat[1][2], mat[2][2], //
};
shader->setUniformMatrix4x2fv(SHADER_TARGET_PRIMARIES, 1, false, glTargetPrimaries);
shader->setUniformMatrix3fv(SHADER_TARGET_PRIMARIES_XYZ, 1, false, glTargetPrimariesXYZ);
const bool needsSDRmod = modifySDR && isSDR2HDR(imageDescription->value(), targetImageDescription->value());
const bool needsHDRmod = !needsSDRmod && isHDR2SDR(imageDescription->value(), targetImageDescription->value());
@ -1364,10 +1368,17 @@ void CHyprOpenGLImpl::renderTextureInternal(SP<CTexture> tex, const CBox& box, c
m_renderData.pMonitor->inFullscreenMode()) /* Fullscreen window with pass cm enabled */;
if (!skipCM && !usingFinalShader) {
if (texType == TEXTURE_RGBA)
shader = m_shaders->frag[SH_FRAG_CM_RGBA];
else if (texType == TEXTURE_RGBX)
shader = m_shaders->frag[SH_FRAG_CM_RGBX];
if (!data.discardActive) {
if (texType == TEXTURE_RGBA)
shader = m_shaders->frag[SH_FRAG_CM_RGBA];
else if (texType == TEXTURE_RGBX)
shader = m_shaders->frag[SH_FRAG_CM_RGBX];
} else {
if (texType == TEXTURE_RGBA)
shader = m_shaders->frag[SH_FRAG_CM_RGBA_DISCARD];
else if (texType == TEXTURE_RGBX)
shader = m_shaders->frag[SH_FRAG_CM_RGBA_DISCARD];
}
shader = useShader(shader);
@ -1487,20 +1498,33 @@ void CHyprOpenGLImpl::renderTextureInternal(SP<CTexture> tex, const CBox& box, c
}
glBindVertexArray(shader->getUniformLocation(SHADER_SHADER_VAO));
if (data.allowCustomUV && m_renderData.primarySurfaceUVTopLeft != Vector2D(-1, -1)) {
const float customUVs[] = {
m_renderData.primarySurfaceUVBottomRight.x, m_renderData.primarySurfaceUVTopLeft.y, m_renderData.primarySurfaceUVTopLeft.x,
m_renderData.primarySurfaceUVTopLeft.y, m_renderData.primarySurfaceUVBottomRight.x, m_renderData.primarySurfaceUVBottomRight.y,
m_renderData.primarySurfaceUVTopLeft.x, m_renderData.primarySurfaceUVBottomRight.y,
};
glBindBuffer(GL_ARRAY_BUFFER, shader->getUniformLocation(SHADER_SHADER_VBO));
glBindBuffer(GL_ARRAY_BUFFER, shader->getUniformLocation(SHADER_SHADER_VBO_UV));
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(customUVs), customUVs);
} else {
glBindBuffer(GL_ARRAY_BUFFER, shader->getUniformLocation(SHADER_SHADER_VBO_UV));
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(fullVerts), fullVerts);
// this tells GPU can keep reading the old block for previous draws while the CPU writes to a new one.
// to avoid stalls if renderTextureInternal is called multiple times on same renderpass
// at the cost of some temporar vram usage.
glBufferData(GL_ARRAY_BUFFER, sizeof(fullVerts), nullptr, GL_DYNAMIC_DRAW);
auto verts = fullVerts;
if (data.allowCustomUV && m_renderData.primarySurfaceUVTopLeft != Vector2D(-1, -1)) {
const float u0 = m_renderData.primarySurfaceUVTopLeft.x;
const float v0 = m_renderData.primarySurfaceUVTopLeft.y;
const float u1 = m_renderData.primarySurfaceUVBottomRight.x;
const float v1 = m_renderData.primarySurfaceUVBottomRight.y;
verts[0].u = u0;
verts[0].v = v0;
verts[1].u = u0;
verts[1].v = v1;
verts[2].u = u1;
verts[2].v = v0;
verts[3].u = u1;
verts[3].v = v1;
}
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(verts), verts.data());
if (!m_renderData.clipBox.empty() || !m_renderData.clipRegion.empty()) {
CRegion damageClip = m_renderData.clipBox;