-
Notifications
You must be signed in to change notification settings - Fork 52
/
Copy pathFFNx.common.sh
322 lines (271 loc) · 13.4 KB
/
FFNx.common.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
/****************************************************************************/
// Copyright (C) 2023 Cosmos //
// //
// This file is part of FFNx //
// //
// FFNx is free software: you can redistribute it and/or modify //
// it under the terms of the GNU General Public License as published by //
// the Free Software Foundation, either version 3 of the License //
// //
// FFNx is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU General Public License for more details. //
/****************************************************************************/
// Gamut LUT
SAMPLER2D(tex_10, 10);
// YUV to RGB ---------------------------------------------------------
// tv-range functions include implicit range expansion
vec3 toRGB_bt601_fullrange(vec3 yuv_input)
{
const mat3 jpeg_rgb_transform = mat3(
vec3(+1.000, +1.000, +1.000),
vec3(+0.000, -0.202008 / 0.587, +1.772),
vec3(+1.402, -0.419198 / 0.587, +0.000)
);
return saturate(instMul(jpeg_rgb_transform, yuv_input));
}
vec3 toRGB_bt601_tvrange(vec3 yuv_input)
{
const mat3 mpeg_rgb_transform = mat3(
vec3(+255.0 / 219.0, +255.0 / 219.0, +255.0 / 219.0),
vec3(+0.000, -25.75602 / 65.744 , +225.93 / 112.0),
vec3(+178.755 / 112.0, -53.447745 / 65.744 , +0.000)
);
return saturate(instMul(mpeg_rgb_transform, yuv_input));
}
vec3 toRGB_bt709_fullrange(vec3 yuv_input)
{
const mat3 bt709full_rgb_transform = mat3(
vec3(+1.000, +1.000, +1.000),
vec3(+0.000, -0.13397432 / 0.7152, +1.8556),
vec3(+1.5748, -0.33480248 / 0.7152 , +0.000)
);
return saturate(instMul(bt709full_rgb_transform, yuv_input));
}
vec3 toRGB_bt709_tvrange(vec3 yuv_input)
{
const mat3 bt709tv_rgb_transform = mat3(
vec3(+255.0 / 219.0, +255.0 / 219.0, +255.0 / 219.0),
vec3(+0.000, -17.0817258 / 80.1024 , +236.589 / 112.0),
vec3(+200.787 / 112.0, -42.6873162 / 80.1024 , +0.000)
);
return saturate(instMul(bt709tv_rgb_transform, yuv_input));
}
// Gamma functions ------------------------------------------------
// gamma encoded --> linear:
// sRGB
vec3 toLinear(vec3 _rgb)
{
bvec3 cutoff = lessThan(_rgb.rgb, vec3_splat(0.04045));
vec3 higher = pow((_rgb.rgb + vec3_splat(0.055)) / vec3_splat(1.055), vec3_splat(2.4));
vec3 lower = _rgb.rgb / vec3_splat(12.92);
return saturate(mix(higher, lower, cutoff));
}
vec3 toLinearSMPTE170M(vec3 _rgb)
{
bvec3 cutoff = lessThan(_rgb.rgb, vec3_splat(0.0812));
vec3 higher = pow((_rgb.rgb + vec3_splat(0.099)) / vec3_splat(1.099), (vec3_splat(1.0) / vec3_splat(0.45)));
vec3 lower = _rgb.rgb / vec3_splat(4.5);
return saturate(mix(higher, lower, cutoff));
}
vec3 toLinear2pt2(vec3 _rgb)
{
return saturate(pow(_rgb.rgb, vec3_splat(2.2)));
}
// Microsoft says PAL uses a pure 2.8 gamma curve. See: https://learn.microsoft.com/en-us/windows/win32/api/mfobjects/ne-mfobjects-mfvideotransferfunction
// ffmpeg thinks there *should* be a linear toe slope, but uses a pure curve since they cannot find any documentation for it. See: https://github.com/FFmpeg/FFmpeg/blob/master/libavfilter/vf_colorspace.c#L162
// In any event, Poynton says 2.8 is "unrealistically high" and PAL CRT units did not really behave like that.
// PAL switched to the SMPTE170M function in 2005 (see BT1700)
vec3 toLinear2pt8(vec3 _rgb)
{
return saturate(pow(_rgb.rgb, vec3_splat(2.8)));
}
// This is an unprincipled, bespoke gamma function that "looks good" with FF7 videos, while all other options are problematic:
// - Functions with toe slopes cause banding near black in these videos.
// - A pure 2.2 power function loses details in shadow to darkness.
// - A pure 2.0 power function blows out highlights.
// While unmoored from any theoretical or mathematical justification, this function avoids all those problems.
vec3 toLinearToelessSRGB(vec3 _rgb)
{
vec3 twoPtwo = toLinear2pt2(_rgb);
vec3 sRGB = toLinear(_rgb);
bvec3 useSRGB = lessThan(sRGB, twoPtwo);
vec3 proportion = pow(_rgb / vec3_splat(0.389223), vec3_splat(1.0 / 2.2));
vec3 merged = mix(twoPtwo, sRGB, proportion);
return saturate(mix(merged, sRGB, useSRGB));
}
// linear --> gamma encoded:
// sRGB
vec3 toGamma(vec3 _rgb)
{
bvec3 cutoff = lessThan(_rgb.rgb, vec3_splat(0.0031308));
vec3 higher = vec3_splat(1.055) * pow(_rgb.rgb, vec3_splat(1.0/2.4)) - vec3_splat(0.055);
vec3 lower = _rgb.rgb * vec3_splat(12.92);
return saturate(mix(higher, lower, cutoff));
}
// See https://github.com/Microsoft/DirectX-Graphics-Samples/blob/master/MiniEngine/Core/Shaders/ColorSpaceUtility.hlsli#L75
// max_nits should be "the brightness level that SDR 'white' is rendered at within an HDR monitor" (probably 100-200ish)
// Google Chrome uses a default of 200 if autodetection fails.
vec3 ApplyREC2084Curve(vec3 _color, float max_nits)
{
// reference PQ OETF will yield reference OOTF when
// displayed on a reference monitor employing EOTF
float m1 = 2610.0 / 4096.0 * 1.0 / 4;
float m2 = 2523.0 / 4096.0 * 128;
float c1 = 3424.0 / 4096.0;
float c2 = 2413.0 / 4096.0 * 32;
float c3 = 2392.0 / 4096.0 * 32;
vec3 Lp = pow(_color * (vec3_splat(max_nits)/vec3_splat(10000.0)), vec3_splat(m1));
return saturate(pow((c1 + c2 * Lp) / (vec3_splat(1.0) + c3 * Lp), vec3_splat(m2)));
}
// Gamut conversions ---------------------------------------------
// These functions all take a linear RGB input and produce a linear RGB output.
// Mathematically, they are equivalent to:
// 1. Convert linear RGB to XYZ using the source gamut's red/green/blue points
// 2. Do a gamut conversion from the source gamut to the destination gamut
// 3. Covert XYZ to linear RGB using the destination gamut's red/green/blue points
// But all of that has been pre-computed into a single matrix multiply operation.
// Note: sRGB is the same gamut as rec709 video.
// Note: High precision values are used for the "D65" whitepoint. (x=0.312713, y=0.329016)
// Note: There are (at least) three different whitepoints that are all referred to as "D93"/"9300K."
// The one used here is 9300K+27mpcd (x=0.281, y=0.311), which is what NTSC-J television sets used.
// Most of the gamut conversion matrices have been replacved with LUTs.
// We will want to bring them back for HDR *if* we can find a way to left potentially out-of-bounds values linger until post processing.
// To rec2020:
// See https://github.com/Microsoft/DirectX-Graphics-Samples/blob/master/MiniEngine/Core/Shaders/ColorSpaceUtility.hlsli#L120
vec3 convertGamut_SRGBtoREC2020(vec3 rgb_input)
{
mat3 toRec2020 = mat3(
vec3(+0.628252390228217, +0.069018748494509, +0.016358741846493),
vec3(+0.329243684863216, +0.9191169021082, +0.087837787397663),
vec3(+0.042503924908568, +0.011864349397292, +0.895803470755843)
);
return saturate(instMul(toRec2020, rgb_input));
}
vec3 convertGamut_NTSCJtoREC2020(vec3 rgb_input)
{
mat3 NTSCJtoRec2020 = mat3(
vec3(+0.835314787642499, +0.064086581191406, -0.00258827855966),
vec3(+0.139190018780176, +0.859494098117681, +0.036362217824334),
vec3(+0.025495200617381, +0.076419362630435, +0.966226011255509)
);
return saturate(instMul(NTSCJtoRec2020, rgb_input));
}
// This is a generic 3D LUT function.
// We're using it to do gamut conversions when a gamut compression mapping algorithm is necessary to avoid losing detail to clipping.
// Since that's waaaay too compute heavy, we precompute it, then use a LUT.
// Renderer::AssignGamutLUT() in renderer.cpp is in charge of making sure the correct LUT is bound.
// Expects:
// - coords 0,0 in the upper left corner
// - 4096x64 dimensions
// - linear rgb (unlike most other textures BGFX is NOT doing a linearize for us; we expect the image is linear to start with)
// - black in the upper left corner
// - green on the vertical axis
// - red on the small horizontal axis
// - blue on the large horizontal axis
vec3 GamutLUT(vec3 rgb_input)
{
vec3 temp = saturate(rgb_input) * vec3_splat(63.0);
vec3 floors = floor(temp);
vec3 ceils = ceil(temp);
vec3 ceilweights = temp - floors;
vec3 RfGfBf = texture2D(tex_10, vec2(((floors.b * 64.0) + floors.r) / 4095.0, floors.g / 63.0)).xyz;
vec3 RfGfBc = texture2D(tex_10, vec2(((ceils.b * 64.0) + floors.r) / 4095.0, floors.g / 63.0)).xyz;
vec3 RfGcBf = texture2D(tex_10, vec2(((floors.b * 64.0) + floors.r) / 4095.0, ceils.g / 63.0)).xyz;
vec3 RfGcBc = texture2D(tex_10, vec2(((ceils.b * 64.0) + floors.r) / 4095.0, ceils.g / 63.0)).xyz;
vec3 RcGfBf = texture2D(tex_10, vec2(((floors.b * 64.0) + ceils.r) / 4095.0, floors.g / 63.0)).xyz;
vec3 RcGfBc = texture2D(tex_10, vec2(((ceils.b * 64.0) + ceils.r) / 4095.0, floors.g / 63.0)).xyz;
vec3 RcGcBf = texture2D(tex_10, vec2(((floors.b * 64.0) + ceils.r) / 4095.0, ceils.g / 63.0)).xyz;
vec3 RcGcBc = texture2D(tex_10, vec2(((ceils.b * 64.0) + ceils.r) / 4095.0, ceils.g / 63.0)).xyz;
vec3 RfGf = mix(RfGfBf, RfGfBc, vec3_splat(ceilweights.b));
vec3 RfGc = mix(RfGcBf, RfGcBc, vec3_splat(ceilweights.b));
vec3 RcGf = mix(RcGfBf, RcGfBc, vec3_splat(ceilweights.b));
vec3 RcGc = mix(RcGcBf, RcGcBc, vec3_splat(ceilweights.b));
vec3 Rf = mix(RfGf, RfGc, vec3_splat(ceilweights.g));
vec3 Rc = mix(RcGf, RcGc, vec3_splat(ceilweights.g));
vec3 outcolor = mix(Rf, Rc, vec3_splat(ceilweights.r));
return outcolor;
}
// Dithering ---------------------------------------------
// Apply Martin Roberts' quasirandom dithering scaled below a specified level of precision.
// See https://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/
// pixelval: float (range 0-1) pixel color trio.
// coords: float (range 0-1) pixel coordinates, i.e., v_texcoord0.xy
// ydims: integer dimensions of first channel's texture
// udims & vdims: integer dimensions of first second and third channels' textures (may differ from ydim for various yuv formats)
// scale_divisor: step size divisor to scale the dithering to fit within. E.g., use 255.0 for dithering 8-bit values.
// xyoffset: value to add to x & y coords. Should be at least 1 to avoid x=0 and y=0. Should be different if the same input is dithered twice.
// (This function will be used twice if TV-range video is dithered for range expansion, then again for HDR bit depth increase.)
vec3 QuasirandomDither(vec3 pixelval, vec2 coords, ivec2 ydims, ivec2 udims, ivec2 vdims, float scale_divisor, float xyoffset)
{
// get integer range x,y coords for this pixel
// invert one axis for u and the other axis for v to decouple dither patterns across channels
// see https://blog.kaetemi.be/2015/04/01/practical-bayer-dithering/
// add 1 to avoid x=0 and y=0
vec3 xpos = vec3(
round(float(ydims.x) * coords.x) + xyoffset,
round(float(udims.x) * (1.0 - coords.x)) + xyoffset,
round(float(vdims.x) * coords.x) + xyoffset
);
vec3 ypos = vec3(
round(float(ydims.y) * coords.y) + xyoffset,
round(float(udims.y) * coords.y) + xyoffset,
round(float(vdims.y) * (1.0 - coords.y)) + xyoffset
);
// R series magic
vec3 dither = fract((xpos * vec3_splat(0.7548776662)) + (ypos * vec3_splat(0.56984029)));
// triangular wave function
// if exactly 0.5, then pass through so we don't get a 1.0
bvec3 smallcutoff = lessThan(dither, vec3_splat(0.5));
bvec3 bigcutoff = greaterThan(dither, vec3_splat(0.5));
dither = mix(dither, dither * vec3_splat(2.0), smallcutoff);
dither = mix(dither, vec3_splat(2.0) - (dither * vec3_splat(2.0)), bigcutoff);
// shift down by half
dither = dither - vec3_splat(0.5);
// scale down below the specified step size
dither = dither / vec3_splat(scale_divisor);
// add to input
vec3 tempout = saturate(pixelval + dither);
// don't dither colors so close to 0 or 1 that dithering is asymmetric
bvec3 highcutoff = greaterThan(pixelval, vec3_splat(1.0 - (0.5 / scale_divisor)));
bvec3 lowcutoff = lessThan(pixelval, vec3_splat(0.5 / scale_divisor));
vec3 outcolor = mix(tempout, pixelval, highcutoff);
outcolor = mix(outcolor, pixelval, lowcutoff);
return outcolor;
}
// Fog ---------------------------------------------
vec3 ApplyWorldFog(vec3 color, vec3 viewPosition)
{
float d = sqrt(dot(viewPosition, viewPosition));
float s0 = 0;
float e0 = 10000;
float density = 0.00025;
float t = 1 / exp (d * density);
vec3 fogColor0 = vec3(0.1, 0.1, 0.2);
vec3 outColor = mix(color * fogColor0, color, t);
float e1 = 15000;
float t2 = 1.0 - saturate((d - e0) / e1);
outColor *= t2;
return outColor;
}
// Spherical world ---------------------------------------------
#define cplx vec2
#define cplx_new(re, im) vec2(re, im)
#define cplx_re(z) z.x
#define cplx_im(z) z.y
#define cplx_exp(z) (exp(z.x) * cplx_new(cos(z.y), sin(z.y)))
#define cplx_scale(z, scalar) (z * scalar)
#define cplx_abs(z) (sqrt(z.x * z.x + z.y * z.y))
vec3 ApplySphericalWorld(vec3 viewPosition, float radiusScale)
{
vec3 outResult = vec3(0.0, 0.0, 0.0);
float rp = -250000 * radiusScale;
vec2 planedir = normalize(vec2(viewPosition.x, viewPosition.z));
cplx plane = cplx_new(viewPosition.y, sqrt((viewPosition.x) * (viewPosition.x) + (viewPosition.z) * (viewPosition.z)));
cplx circle = rp * cplx_exp(cplx_scale(plane, 1.0 / rp)) - cplx_new(rp, 0);
outResult.x = cplx_im(circle) * planedir.x;
outResult.z = cplx_im(circle) * planedir.y;
outResult.y = cplx_re(circle);
return outResult;
}