Update mpv shaders

2022-10-31 14:09:57 +01:00
parent cd9f63f03a
commit ef8c481915
7 changed files with 563 additions and 44 deletions
--- a/config/mpv/shaders/FSR.glsl
+++ b/config/mpv/shaders/FSR.glsl
@ -0,0 +1,453 @@
+// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+// FidelityFX FSR v1.0.2 by AMD
+// ported to mpv by agyild
+
+// Changelog
+// Made it compatible with pre-OpenGL 4.0 renderers
+// Made it directly operate on LUMA plane, since the original shader was operating on LUMA by deriving it from RGB. This should cause a major increase in performance, especially on OpenGL 4.0+ renderers (4+2 texture lookups vs. 12+5)
+// Removed transparency preservation mechanism since the alpha channel is a separate source plane than LUMA
+// Added optional performance-saving lossy optimizations to EASU (Credit: atyuwen, https://atyuwen.github.io/posts/optimizing-fsr/)
+//
+// Notes
+// Per AMD's guidelines only upscales content up to 4x (e.g., 1080p -> 2160p, 720p -> 1440p etc.) and everything else in between,
+// that means FSR will scale up to 4x at maximum, and any further scaling will be processed by mpv's scalers
+
+//!HOOK LUMA
+//!BIND HOOKED
+//!SAVE EASUTEX
+//!DESC FidelityFX Super Resolution v1.0.2 (EASU)
+//!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 >
+//!WIDTH OUTPUT.w OUTPUT.w LUMA.w 2 * < * LUMA.w 2 * OUTPUT.w LUMA.w 2 * > * + OUTPUT.w OUTPUT.w LUMA.w 2 * = * +
+//!HEIGHT OUTPUT.h OUTPUT.h LUMA.h 2 * < * LUMA.h 2 * OUTPUT.h LUMA.h 2 * > * + OUTPUT.h OUTPUT.h LUMA.h 2 * = * +
+//!COMPONENTS 1
+
+// User variables - EASU
+#define FSR_PQ 0 // Whether the source content has PQ gamma or not. Needs to be set to the same value for both passes. 0 or 1.
+#define FSR_EASU_DERING 1 // If set to 0, disables deringing for a small increase in performance. 0 or 1.
+#define FSR_EASU_SIMPLE_ANALYSIS 0 // If set to 1, uses a simpler single-pass direction and length analysis for an increase in performance. 0 or 1.
+#define FSR_EASU_QUIT_EARLY 0 // If set to 1, uses bilinear filtering for non-edge pixels and skips EASU on those regions for an increase in performance. 0 or 1.
+
+// Shader code
+
+#ifndef FSR_EASU_DIR_THRESHOLD
+    #if (FSR_EASU_QUIT_EARLY == 1)
+        #define FSR_EASU_DIR_THRESHOLD 64.0
+    #elif (FSR_EASU_QUIT_EARLY == 0)
+        #define FSR_EASU_DIR_THRESHOLD 32768.0
+    #endif
+#endif
+
+float APrxLoRcpF1(float a) {
+    return uintBitsToFloat(uint(0x7ef07ebb) - floatBitsToUint(a));
+}
+
+float APrxLoRsqF1(float a) {
+    return uintBitsToFloat(uint(0x5f347d74) - (floatBitsToUint(a) >> uint(1)));
+}
+
+float AMin3F1(float x, float y, float z) {
+    return min(x, min(y, z));
+}
+
+float AMax3F1(float x, float y, float z) {
+    return max(x, max(y, z));
+}
+
+#if (FSR_PQ == 1)
+
+float ToGamma2(float a) {
+    return pow(a, 4.0);
+}
+
+#endif
+
+ // Filtering for a given tap for the scalar.
+ void FsrEasuTap(
+    inout float aC,     // Accumulated color, with negative lobe.
+    inout float aW, // Accumulated weight.
+    vec2 off,       // Pixel offset from resolve position to tap.
+    vec2 dir,       // Gradient direction.
+    vec2 len,       // Length.
+    float lob,      // Negative lobe strength.
+    float clp,      // Clipping point.
+    float c){       // Tap color.
+    // Rotate offset by direction.
+    vec2 v;
+    v.x = (off.x * ( dir.x)) + (off.y * dir.y);
+    v.y = (off.x * (-dir.y)) + (off.y * dir.x);
+    // Anisotropy.
+    v *= len;
+    // Compute distance^2.
+    float d2 = v.x * v.x + v.y * v.y;
+    // Limit to the window as at corner, 2 taps can easily be outside.
+    d2 = min(d2, clp);
+    // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x.
+    //  (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2
+    //  |_______________________________________|   |_______________|
+    //                   base                             window
+    // The general form of the 'base' is,
+    //  (a*(b*x^2-1)^2-(a-1))
+    // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe.
+    float wB = float(2.0 / 5.0) * d2 + -1.0;
+    float wA = lob * d2 + -1.0;
+    wB *= wB;
+    wA *= wA;
+    wB = float(25.0 / 16.0) * wB + float(-(25.0 / 16.0 - 1.0));
+    float w = wB * wA;
+    // Do weighted average.
+    aC += c * w;
+    aW += w;
+}
+
+// Accumulate direction and length.
+void FsrEasuSet(
+    inout vec2 dir,
+    inout float len,
+    vec2 pp,
+#if (FSR_EASU_SIMPLE_ANALYSIS == 1)
+    float b, float c,
+    float i, float j, float f, float e,
+    float k, float l, float h, float g,
+    float o, float n
+#elif (FSR_EASU_SIMPLE_ANALYSIS == 0)
+    bool biS, bool biT, bool biU, bool biV,
+    float lA, float lB, float lC, float lD, float lE
+#endif
+    ){
+    // Compute bilinear weight, branches factor out as predicates are compiler time immediates.
+    //  s t
+    //  u v
+#if (FSR_EASU_SIMPLE_ANALYSIS == 1)
+    vec4 w = vec4(0.0);
+    w.x = (1.0 - pp.x) * (1.0 - pp.y);
+    w.y =        pp.x  * (1.0 - pp.y);
+    w.z = (1.0 - pp.x) *        pp.y;
+    w.w =        pp.x  *        pp.y;
+
+    float lA = dot(w, vec4(b, c, f, g));
+    float lB = dot(w, vec4(e, f, i, j));
+    float lC = dot(w, vec4(f, g, j, k));
+    float lD = dot(w, vec4(g, h, k, l));
+    float lE = dot(w, vec4(j, k, n, o));
+#elif (FSR_EASU_SIMPLE_ANALYSIS == 0)
+    float w = 0.0;
+    if (biS)
+        w = (1.0 - pp.x) * (1.0 - pp.y);
+    if (biT)
+        w =        pp.x  * (1.0 - pp.y);
+    if (biU)
+        w = (1.0 - pp.x) *        pp.y;
+    if (biV)
+        w =        pp.x  *        pp.y;
+#endif
+    // Direction is the '+' diff.
+    //    a
+    //  b c d
+    //    e
+    // Then takes magnitude from abs average of both sides of 'c'.
+    // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms.
+    float dc = lD - lC;
+    float cb = lC - lB;
+    float lenX = max(abs(dc), abs(cb));
+    lenX = APrxLoRcpF1(lenX);
+    float dirX = lD - lB;
+    lenX = clamp(abs(dirX) * lenX, 0.0, 1.0);
+    lenX *= lenX;
+    // Repeat for the y axis.
+    float ec = lE - lC;
+    float ca = lC - lA;
+    float lenY = max(abs(ec), abs(ca));
+    lenY = APrxLoRcpF1(lenY);
+    float dirY = lE - lA;
+    lenY = clamp(abs(dirY) * lenY, 0.0, 1.0);
+    lenY *= lenY;
+#if (FSR_EASU_SIMPLE_ANALYSIS == 1)
+    len = lenX + lenY;
+    dir = vec2(dirX, dirY);
+#elif (FSR_EASU_SIMPLE_ANALYSIS == 0)
+    dir += vec2(dirX, dirY) * w;
+    len += dot(vec2(w), vec2(lenX, lenY));
+#endif
+}
+
+vec4 hook() {
+    // Result
+    vec4 pix = vec4(0.0, 0.0, 0.0, 1.0);
+
+    //------------------------------------------------------------------------------------------------------------------------------
+    //      +---+---+
+    //      |   |   |
+    //      +--(0)--+
+    //      | b | c |
+    //  +---F---+---+---+
+    //  | e | f | g | h |
+    //  +--(1)--+--(2)--+
+    //  | i | j | k | l |
+    //  +---+---+---+---+
+    //      | n | o |
+    //      +--(3)--+
+    //      |   |   |
+    //      +---+---+
+    // Get position of 'F'.
+    vec2 pp = HOOKED_pos * HOOKED_size - vec2(0.5);
+    vec2 fp = floor(pp);
+    pp -= fp;
+    //------------------------------------------------------------------------------------------------------------------------------
+    // 12-tap kernel.
+    //    b c
+    //  e f g h
+    //  i j k l
+    //    n o
+    // Gather 4 ordering.
+    //  a b
+    //  r g
+    // Allowing dead-code removal to remove the 'z's.
+#if (defined(HOOKED_gather) && (__VERSION__ >= 400 || (GL_ES && __VERSION__ >= 310)))
+    vec4 bczzL = HOOKED_gather(vec2((fp + vec2(1.0, -1.0)) * HOOKED_pt), 0);
+    vec4 ijfeL = HOOKED_gather(vec2((fp + vec2(0.0,  1.0)) * HOOKED_pt), 0);
+    vec4 klhgL = HOOKED_gather(vec2((fp + vec2(2.0,  1.0)) * HOOKED_pt), 0);
+    vec4 zzonL = HOOKED_gather(vec2((fp + vec2(1.0,  3.0)) * HOOKED_pt), 0);
+#else
+    // pre-OpenGL 4.0 compatibility
+    float b = HOOKED_tex(vec2((fp + vec2(0.5, -0.5)) * HOOKED_pt)).r;
+    float c = HOOKED_tex(vec2((fp + vec2(1.5, -0.5)) * HOOKED_pt)).r;
+
+    float e = HOOKED_tex(vec2((fp + vec2(-0.5, 0.5)) * HOOKED_pt)).r;
+    float f = HOOKED_tex(vec2((fp + vec2( 0.5, 0.5)) * HOOKED_pt)).r;
+    float g = HOOKED_tex(vec2((fp + vec2( 1.5, 0.5)) * HOOKED_pt)).r;
+    float h = HOOKED_tex(vec2((fp + vec2( 2.5, 0.5)) * HOOKED_pt)).r;
+
+    float i = HOOKED_tex(vec2((fp + vec2(-0.5, 1.5)) * HOOKED_pt)).r;
+    float j = HOOKED_tex(vec2((fp + vec2( 0.5, 1.5)) * HOOKED_pt)).r;
+    float k = HOOKED_tex(vec2((fp + vec2( 1.5, 1.5)) * HOOKED_pt)).r;
+    float l = HOOKED_tex(vec2((fp + vec2( 2.5, 1.5)) * HOOKED_pt)).r;
+
+    float n = HOOKED_tex(vec2((fp + vec2(0.5, 2.5) ) * HOOKED_pt)).r;
+    float o = HOOKED_tex(vec2((fp + vec2(1.5, 2.5) ) * HOOKED_pt)).r;
+
+    vec4 bczzL = vec4(b, c, 0.0, 0.0);
+    vec4 ijfeL = vec4(i, j, f, e);
+    vec4 klhgL = vec4(k, l, h, g);
+    vec4 zzonL = vec4(0.0, 0.0, o, n);
+#endif
+    //------------------------------------------------------------------------------------------------------------------------------
+    // Rename.
+    float bL = bczzL.x;
+    float cL = bczzL.y;
+    float iL = ijfeL.x;
+    float jL = ijfeL.y;
+    float fL = ijfeL.z;
+    float eL = ijfeL.w;
+    float kL = klhgL.x;
+    float lL = klhgL.y;
+    float hL = klhgL.z;
+    float gL = klhgL.w;
+    float oL = zzonL.z;
+    float nL = zzonL.w;
+
+#if (FSR_PQ == 1)
+    // Not the most performance-friendly solution, but should work until mpv adds proper gamma transformation functions for shaders
+    bL = ToGamma2(bL);
+    cL = ToGamma2(cL);
+    iL = ToGamma2(iL);
+    jL = ToGamma2(jL);
+    fL = ToGamma2(fL);
+    eL = ToGamma2(eL);
+    kL = ToGamma2(kL);
+    lL = ToGamma2(lL);
+    hL = ToGamma2(hL);
+    gL = ToGamma2(gL);
+    oL = ToGamma2(oL);
+    nL = ToGamma2(nL);
+#endif
+
+    // Accumulate for bilinear interpolation.
+    vec2 dir = vec2(0.0);
+    float len = 0.0;
+#if (FSR_EASU_SIMPLE_ANALYSIS == 1)
+    FsrEasuSet(dir, len, pp, bL, cL, iL, jL, fL, eL, kL, lL, hL, gL, oL, nL);
+#elif (FSR_EASU_SIMPLE_ANALYSIS == 0)
+    FsrEasuSet(dir, len, pp, true, false, false, false, bL, eL, fL, gL, jL);
+    FsrEasuSet(dir, len, pp, false, true, false, false, cL, fL, gL, hL, kL);
+    FsrEasuSet(dir, len, pp, false, false, true, false, fL, iL, jL, kL, nL);
+    FsrEasuSet(dir, len, pp, false, false, false, true, gL, jL, kL, lL, oL);
+#endif
+    //------------------------------------------------------------------------------------------------------------------------------
+    // Normalize with approximation, and cleanup close to zero.
+    vec2 dir2 = dir * dir;
+    float dirR = dir2.x + dir2.y;
+    bool zro = dirR < float(1.0 / FSR_EASU_DIR_THRESHOLD);
+    dirR = APrxLoRsqF1(dirR);
+#if (FSR_EASU_QUIT_EARLY == 1)
+    if (zro) {
+        vec4 w = vec4(0.0);
+        w.x = (1.0 - pp.x) * (1.0 - pp.y);
+        w.y =        pp.x  * (1.0 - pp.y);
+        w.z = (1.0 - pp.x) *        pp.y;
+        w.w =        pp.x  *        pp.y;
+
+        pix.r = clamp(dot(w, vec4(fL, gL, jL, kL)), 0.0, 1.0);
+        return pix;
+    }
+#elif (FSR_EASU_QUIT_EARLY == 0)
+    dirR = zro ? 1.0 : dirR;
+    dir.x = zro ? 1.0 : dir.x;
+#endif
+    dir *= vec2(dirR);
+    // Transform from {0 to 2} to {0 to 1} range, and shape with square.
+    len = len * 0.5;
+    len *= len;
+    // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}.
+    float stretch = (dir.x * dir.x + dir.y * dir.y) * APrxLoRcpF1(max(abs(dir.x), abs(dir.y)));
+    // Anisotropic length after rotation,
+    //  x := 1.0 lerp to 'stretch' on edges
+    //  y := 1.0 lerp to 2x on edges
+    vec2 len2 = vec2(1.0 + (stretch - 1.0) * len, 1.0 + -0.5 * len);
+    // Based on the amount of 'edge',
+    // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}.
+    float lob = 0.5 + float((1.0 / 4.0 - 0.04) - 0.5) * len;
+    // Set distance^2 clipping point to the end of the adjustable window.
+    float clp = APrxLoRcpF1(lob);
+    //------------------------------------------------------------------------------------------------------------------------------
+    // Accumulation
+    //    b c
+    //  e f g h
+    //  i j k l
+    //    n o
+    float aC = 0.0;
+    float aW = 0.0;
+    FsrEasuTap(aC, aW, vec2( 0.0,-1.0) - pp, dir, len2, lob, clp, bL); // b
+    FsrEasuTap(aC, aW, vec2( 1.0,-1.0) - pp, dir, len2, lob, clp, cL); // c
+    FsrEasuTap(aC, aW, vec2(-1.0, 1.0) - pp, dir, len2, lob, clp, iL); // i
+    FsrEasuTap(aC, aW, vec2( 0.0, 1.0) - pp, dir, len2, lob, clp, jL); // j
+    FsrEasuTap(aC, aW, vec2( 0.0, 0.0) - pp, dir, len2, lob, clp, fL); // f
+    FsrEasuTap(aC, aW, vec2(-1.0, 0.0) - pp, dir, len2, lob, clp, eL); // e
+    FsrEasuTap(aC, aW, vec2( 1.0, 1.0) - pp, dir, len2, lob, clp, kL); // k
+    FsrEasuTap(aC, aW, vec2( 2.0, 1.0) - pp, dir, len2, lob, clp, lL); // l
+    FsrEasuTap(aC, aW, vec2( 2.0, 0.0) - pp, dir, len2, lob, clp, hL); // h
+    FsrEasuTap(aC, aW, vec2( 1.0, 0.0) - pp, dir, len2, lob, clp, gL); // g
+    FsrEasuTap(aC, aW, vec2( 1.0, 2.0) - pp, dir, len2, lob, clp, oL); // o
+    FsrEasuTap(aC, aW, vec2( 0.0, 2.0) - pp, dir, len2, lob, clp, nL); // n
+    //------------------------------------------------------------------------------------------------------------------------------
+    // Normalize and dering.
+    pix.r = aC / aW;
+#if (FSR_EASU_DERING == 1)
+    float min1 = min(AMin3F1(fL, gL, jL), kL);
+    float max1 = max(AMax3F1(fL, gL, jL), kL);
+    pix.r = clamp(pix.r, min1, max1);
+#endif
+    pix.r = clamp(pix.r, 0.0, 1.0);
+
+    return pix;
+}
+
+//!HOOK LUMA
+//!BIND EASUTEX
+//!DESC FidelityFX Super Resolution v1.0.2 (RCAS)
+//!WIDTH EASUTEX.w
+//!HEIGHT EASUTEX.h
+//!COMPONENTS 1
+
+// User variables - RCAS
+#define SHARPNESS 0.2 // Controls the amount of sharpening. The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. 0.0 to 2.0.
+#define FSR_RCAS_DENOISE 1 // If set to 1, lessens the sharpening on noisy areas. Can be disabled for better performance. 0 or 1.
+#define FSR_PQ 0 // Whether the source content has PQ gamma or not. Needs to be set to the same value for both passes. 0 or 1.
+
+// Shader code
+
+#define FSR_RCAS_LIMIT (0.25 - (1.0 / 16.0)) // This is set at the limit of providing unnatural results for sharpening.
+
+float APrxMedRcpF1(float a) {
+    float b = uintBitsToFloat(uint(0x7ef19fff) - floatBitsToUint(a));
+    return b * (-b * a + 2.0);
+}
+
+float AMax3F1(float x, float y, float z) {
+    return max(x, max(y, z));
+}
+
+float AMin3F1(float x, float y, float z) {
+    return min(x, min(y, z));
+}
+
+#if (FSR_PQ == 1)
+
+float FromGamma2(float a) {
+    return sqrt(sqrt(a));
+}
+
+#endif
+
+vec4 hook() {
+    // Algorithm uses minimal 3x3 pixel neighborhood.
+    //    b
+    //  d e f
+    //    h
+#if (defined(EASUTEX_gather) && (__VERSION__ >= 400 || (GL_ES && __VERSION__ >= 310)))
+    vec3 bde = EASUTEX_gather(EASUTEX_pos + EASUTEX_pt * vec2(-0.5), 0).xyz;
+    float b = bde.z;
+    float d = bde.x;
+    float e = bde.y;
+
+    vec2 fh = EASUTEX_gather(EASUTEX_pos + EASUTEX_pt * vec2(0.5), 0).zx;
+    float f = fh.x;
+    float h = fh.y;
+#else
+    float b = EASUTEX_texOff(vec2( 0.0, -1.0)).r;
+    float d = EASUTEX_texOff(vec2(-1.0,  0.0)).r;
+    float e = EASUTEX_tex(EASUTEX_pos).r;
+    float f = EASUTEX_texOff(vec2(1.0, 0.0)).r;
+    float h = EASUTEX_texOff(vec2(0.0, 1.0)).r;
+#endif
+
+    // Min and max of ring.
+    float mn1L = min(AMin3F1(b, d, f), h);
+    float mx1L = max(AMax3F1(b, d, f), h);
+
+    // Immediate constants for peak range.
+    vec2 peakC = vec2(1.0, -1.0 * 4.0);
+
+    // Limiters, these need to be high precision RCPs.
+    float hitMinL = min(mn1L, e) / (4.0 * mx1L);
+    float hitMaxL = (peakC.x - max(mx1L, e)) / (4.0 * mn1L + peakC.y);
+    float lobeL = max(-hitMinL, hitMaxL);
+    float lobe = max(float(-FSR_RCAS_LIMIT), min(lobeL, 0.0)) * exp2(-clamp(float(SHARPNESS), 0.0, 2.0));
+
+    // Apply noise removal.
+#if (FSR_RCAS_DENOISE == 1)
+    // Noise detection.
+    float nz = 0.25 * b + 0.25 * d + 0.25 * f + 0.25 * h - e;
+    nz = clamp(abs(nz) * APrxMedRcpF1(AMax3F1(AMax3F1(b, d, e), f, h) - AMin3F1(AMin3F1(b, d, e), f, h)), 0.0, 1.0);
+    nz = -0.5 * nz + 1.0;
+    lobe *= nz;
+#endif
+
+    // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+    float rcpL = APrxMedRcpF1(4.0 * lobe + 1.0);
+    vec4 pix = vec4(0.0, 0.0, 0.0, 1.0);
+    pix.r = float((lobe * b + lobe * d + lobe * h + lobe * f + e) * rcpL);
+#if (FSR_PQ == 1)
+    pix.r = FromGamma2(pix.r);
+#endif
+
+    return pix;
+}
--- a/config/mpv/shaders/FSRCNNX_x2_8-0-4-1.glsl
+++ b/config/mpv/shaders/FSRCNNX_x2_8-0-4-1.glsl
@ -1,4 +1,22 @@
-// Source: https://github.com/xzpyth/mpv-config/blob/main/shaders/FSRCNNX_x2_8-0-4-1.glsl
+// Revised 03/05/21
+// https://github.com/deus0ww/mpv-conf/tree/master/shaders/igv
+//
+// FSRCNNX by igv
+//
+// Copyright (C) 2017-2021 igv
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3.0 of the License, or (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// Lesser General Public License for more details.
+// 
+// You should have received a copy of the GNU Lesser General Public
+// License along with this program.  If not, see <https://www.gnu.org/licenses/>.

 //!HOOK LUMA
 //!WHEN OUTPUT.w LUMA.w / 1.300 > OUTPUT.h LUMA.h / 1.300 > *
@ -410,4 +428,4 @@ vec2 base = SUBCONV1_pos + (vec2(0.5) - fcoord) * SUBCONV1_pt;
 ivec2 index = ivec2(fcoord * vec2(2));
 vec4 res = SUBCONV1_tex(base);
 return vec4(res[index.x * 2 + index.y], 0, 0, 1);
-}
+}
--- a/config/mpv/shaders/KrigBilateral.glsl
+++ b/config/mpv/shaders/KrigBilateral.glsl
@ -1,17 +1,18 @@
-// Source: https://gist.github.com/igv/a015fc885d5c22e6891820ad89555637
-
+// Revised 05/30/22
+// https://gist.github.com/igv/a015fc885d5c22e6891820ad89555637
+//
 // KrigBilateral by Shiandow
 //
 // This library is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
 // License as published by the Free Software Foundation; either
 // version 3.0 of the License, or (at your option) any later version.
-//
+// 
 // This library is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 // Lesser General Public License for more details.
-//
+// 
 // You should have received a copy of the GNU Lesser General Public
 // License along with this library.

@ -213,4 +214,4 @@ vec4 hook() {
    interp += b[0] * (X[0] - X[N]).zw;

    return interp.xyxy;
-}
+}
--- a/config/mpv/shaders/SSimDownscaler.glsl
+++ b/config/mpv/shaders/SSimDownscaler.glsl
@ -1,19 +1,18 @@
-// Source: https://gist.github.com/igv/36508af3ffc84410fe39761d6969be10
-
-// Requires linear-downscaling=no. Can be used with sharp scalers now (finally able to suppress ringing artifacts).
-
+// Revised 05/15/22
+// https://gist.github.com/igv/36508af3ffc84410fe39761d6969be10
+//
 // SSimDownscaler by Shiandow
 //
 // This library is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
 // License as published by the Free Software Foundation; either
 // version 3.0 of the License, or (at your option) any later version.
-//
+// 
 // This library is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 // Lesser General Public License for more details.
-//
+// 
 // You should have received a copy of the GNU Lesser General Public
 // License along with this library.

@ -217,4 +216,4 @@ vec4 hook() {
    avg /= W;
    vec4 L = POSTKERNEL_texOff(0);
    return vec4(avg[1] + avg[2] * L.rgb - avg[0], L.a);
-}
+}
--- a/config/mpv/shaders/SSimSuperRes.glsl
+++ b/config/mpv/shaders/SSimSuperRes.glsl
@ -1,17 +1,18 @@
-// Source: https://gist.github.com/igv/2364ffa6e81540f29cb7ab4c9bc05b6b
-
+// Revised 02/07/22
+// https://gist.github.com/igv/2364ffa6e81540f29cb7ab4c9bc05b6b
+//
 // SSimSuperRes by Shiandow
 //
 // This library is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
 // License as published by the Free Software Foundation; either
 // version 3.0 of the License, or (at your option) any later version.
-//
+// 
 // This library is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 // Lesser General Public License for more details.
-//
+// 
 // You should have received a copy of the GNU Lesser General Public
 // License along with this library.

@ -202,4 +203,4 @@ vec4 hook() {
    c0.rgb = ((c0.rgb) + diff);

    return c0;
-}
+}
--- a/config/mpv/shaders/adaptive-sharpen.glsl
+++ b/config/mpv/shaders/adaptive-sharpen.glsl
@ -1,8 +1,6 @@
-// Source: https://gist.github.com/igv/8a77e4eb8276753b54bb94c1c50c317e
-
-// Requires sigmoid-upscaling=no. Best quality setting (according to objective metrics): curve_height 0.5, overshoot_ctrl true.
-// To use it on-demand add the following line to input.conf: n change-list glsl-shaders toggle "~~/adaptive-sharpen.glsl"
-
+// Revised 06/18/22
+// https://gist.github.com/igv/8a77e4eb8276753b54bb94c1c50c317e
+//
 // Copyright (c) 2015-2021, bacondither
 // All rights reserved.
 //
@ -74,9 +72,13 @@
 #define sat(x)         ( clamp(x, 0.0, 1.0) )
 #define dxdy(val)      ( length(fwidth(val)) ) // =~1/2.5 hq edge without c_comp

-#define CtL(RGB)       ( sat(dot(RGB, vec3(0.2126, 0.7152, 0.0722))) )
+#ifdef LUMA_tex
+#define CtL(RGB)       RGB.x
+#else
+#define CtL(RGB)       ( sqrt(dot(sat(RGB)*sat(RGB), vec3(0.2126, 0.7152, 0.0722))) )
+#endif

-#define b_diff(pix)    ( (blur-c[pix])*(blur-c[pix]) )
+#define b_diff(pix)    ( (blur-luma[pix])*(blur-luma[pix]) )

 vec4 hook() {

@ -97,11 +99,19 @@ vec4 hook() {
                          dxdy(c[5]),  dxdy(c[6]),  dxdy(c[7]),  dxdy(c[8]),  dxdy(c[9]),
                          dxdy(c[10]), dxdy(c[11]), dxdy(c[12]));

-    // Blur, gauss 3x3
-    vec3  blur   = sat((2.0 * (c[2]+c[4]+c[5]+c[7]) + (c[1]+c[3]+c[6]+c[8]) + 4.0 * c[0]) / 16.0);
+    // RGB to luma
+    float luma[25] = float[](CtL(c[0]), CtL(c[1]), CtL(c[2]), CtL(c[3]), CtL(c[4]), CtL(c[5]), CtL(c[6]),
+                             CtL(c[7]),  CtL(c[8]),  CtL(c[9]),  CtL(c[10]), CtL(c[11]), CtL(c[12]),
+                             CtL(c[13]), CtL(c[14]), CtL(c[15]), CtL(c[16]), CtL(c[17]), CtL(c[18]),
+                             CtL(c[19]), CtL(c[20]), CtL(c[21]), CtL(c[22]), CtL(c[23]), CtL(c[24]));

-    // Contrast compression, center = 0.5, scaled to 1/3
-    float c_comp = sat(0.266666681f + 0.9*exp2(dot(blur, vec3(-7.4/3.0))));
+    float c0_Y = luma[0];
+
+    // Blur, gauss 3x3
+    float  blur   = (2.0 * (luma[2]+luma[4]+luma[5]+luma[7]) + (luma[1]+luma[3]+luma[6]+luma[8]) + 4.0 * luma[0]) / 16.0;
+
+    // Contrast compression, center = 0.5
+    float c_comp = sat(0.266666681f + 0.9*exp2(blur * blur * -7.4));

    // Edge detection
    // Relative matrix weights
@ -110,10 +120,10 @@ vec4 hook() {
    // [  1,  5,  6,  5,  1  ]
    // [      4,  5,  4      ]
    // [          1          ]
-    float edge = length( 1.38*b_diff(0)
-                       + 1.15*(b_diff(2) + b_diff(4) + b_diff(5) + b_diff(7))
-                       + 0.92*(b_diff(1) + b_diff(3) + b_diff(6) + b_diff(8))
-                       + 0.23*(b_diff(9) + b_diff(10) + b_diff(11) + b_diff(12)) ) * c_comp;
+    float edge = ( 1.38*b_diff(0)
+                 + 1.15*(b_diff(2) + b_diff(4) + b_diff(5) + b_diff(7))
+                 + 0.92*(b_diff(1) + b_diff(3) + b_diff(6) + b_diff(8))
+                 + 0.23*(b_diff(9) + b_diff(10) + b_diff(11) + b_diff(12)) ) * c_comp;

    vec2 cs = vec2(L_compr_low,  D_compr_low);

@ -136,14 +146,6 @@ vec4 hook() {
        cs = mix(cs, vec2(L_compr_high, D_compr_high), sat(2.4002*sbe - 2.282));
    }

-    // RGB to luma
-    float luma[25] = float[](CtL(c[0]), CtL(c[1]), CtL(c[2]), CtL(c[3]), CtL(c[4]), CtL(c[5]), CtL(c[6]),
-                             CtL(c[7]),  CtL(c[8]),  CtL(c[9]),  CtL(c[10]), CtL(c[11]), CtL(c[12]),
-                             CtL(c[13]), CtL(c[14]), CtL(c[15]), CtL(c[16]), CtL(c[17]), CtL(c[18]),
-                             CtL(c[19]), CtL(c[20]), CtL(c[21]), CtL(c[22]), CtL(c[23]), CtL(c[24]));
-
-    float c0_Y = luma[0];
-
    // Precalculated default squared kernel weights
    const vec3 w1 = vec3(0.5,           1.0, 1.41421356237); // 0.25, 1.0, 2.0
    const vec3 w2 = vec3(0.86602540378, 1.0, 0.54772255751); // 0.75, 1.0, 0.3
@ -222,10 +224,10 @@ vec4 hook() {
    // Soft limited anti-ringing with tanh, wpmean to control compression slope
    sharpdiff = wpmean(max(sharpdiff, 0.0), soft_lim( max(sharpdiff, 0.0), min_dist ), cs.x )
              - wpmean(min(sharpdiff, 0.0), soft_lim( min(sharpdiff, 0.0), min_dist ), cs.y );
-
+    
    float sharpdiff_lim = sat(c0_Y + sharpdiff) - c0_Y;
    /*float satmul = (c0_Y + max(sharpdiff_lim*0.9, sharpdiff_lim)*0.3 + 0.03)/(c0_Y + 0.03);
    vec3 res = c0_Y + sharpdiff_lim + (c[0] - c0_Y)*satmul;
    */
    return vec4(sharpdiff_lim + c[0], HOOKED_texOff(0).a);
-}
+}
--- a/config/mpv/shaders/filmgrain.glsl
+++ b/config/mpv/shaders/filmgrain.glsl
@ -0,0 +1,45 @@
+// Revised (unknown)
+// https://raw.githubusercontent.com/haasn/gentoo-conf/xor/home/nand/.mpv/shaders/filmgrain.glsl
+//
+// Film Grain by haasn
+//
+//!HOOK LUMA
+//!BIND HOOKED
+//!DESC gaussian film grain
+
+#define INTENSITY 0.05
+
+float permute(float x)
+{
+    x = (34.0 * x + 1.0) * x;
+    return fract(x * 1.0/289.0) * 289.0;
+}
+
+float rand(inout float state)
+{
+    state = permute(state);
+    return fract(state * 1.0/41.0);
+}
+
+vec4 hook()
+{
+    vec3 m = vec3(HOOKED_pos, random) + vec3(1.0);
+    float state = permute(permute(m.x) + m.y) + m.z;
+
+    const float a0 = 0.151015505647689;
+    const float a1 = -0.5303572634357367;
+    const float a2 = 1.365020122861334;
+    const float b0 = 0.132089632343748;
+    const float b1 = -0.7607324991323768;
+
+    float p = 0.95 * rand(state) + 0.025;
+    float q = p - 0.5;
+    float r = q * q;
+
+    float grain = q * (a2 + (a1 * r + a0) / (r*r + b1*r + b0));
+    grain *= 0.255121822830526; // normalize to [-1,1)
+
+    vec4 color = HOOKED_tex(HOOKED_pos);
+    color.rgb += vec3(INTENSITY * grain);
+    return color;
+}