Skip to content

Commit 761271c

Browse files
committed
Optimizations to filter effects, blend targets, and batching.
* Switch all filter effects to run through the single source blend shader, instead of composite shader. * Each render target uses a single alpha batcher now that the batching algorithm is reasonably efficient. * Change tile size to 256x256. This seems to be a better tradeoff for GPU vs CPU time on all sites I tried. * Switch blend targets to only allocate the used rect, rather than the entire tile size rect. Closes #423.
1 parent 12af87a commit 761271c

9 files changed

+399
-437
lines changed

webrender/res/prim_shared.glsl

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -120,17 +120,17 @@ Layer fetch_layer(int index) {
120120
}
121121

122122
struct Tile {
123-
vec4 actual_rect;
124-
vec4 target_rect;
123+
vec4 screen_origin_task_origin;
124+
vec4 size;
125125
};
126126

127127
Tile fetch_tile(int index) {
128128
Tile tile;
129129

130130
ivec2 uv = get_fetch_uv(index, VECS_PER_TILE);
131131

132-
tile.actual_rect = texelFetchOffset(sRenderTasks, uv, 0, ivec2(0, 0));
133-
tile.target_rect = texelFetchOffset(sRenderTasks, uv, 0, ivec2(1, 0));
132+
tile.screen_origin_task_origin = texelFetchOffset(sRenderTasks, uv, 0, ivec2(0, 0));
133+
tile.size = texelFetchOffset(sRenderTasks, uv, 0, ivec2(1, 0));
134134

135135
return tile;
136136
}
@@ -406,13 +406,13 @@ VertexInfo write_vertex(vec4 instance_rect,
406406
vec2 device_pos = world_pos.xy * uDevicePixelRatio;
407407

408408
vec2 clamped_pos = clamp(device_pos,
409-
vec2(tile.actual_rect.xy),
410-
vec2(tile.actual_rect.xy + tile.actual_rect.zw));
409+
vec2(tile.screen_origin_task_origin.xy),
410+
vec2(tile.screen_origin_task_origin.xy + tile.size.xy));
411411

412412
vec4 local_clamped_pos = layer.inv_transform * vec4(clamped_pos / uDevicePixelRatio, world_pos.z, 1);
413413
local_clamped_pos.xyz /= local_clamped_pos.w;
414414

415-
vec2 final_pos = clamped_pos + vec2(tile.target_rect.xy) - vec2(tile.actual_rect.xy);
415+
vec2 final_pos = clamped_pos + vec2(tile.screen_origin_task_origin.zw) - vec2(tile.screen_origin_task_origin.xy);
416416

417417
gl_Position = uTransform * vec4(final_pos, 0, 1);
418418

@@ -460,20 +460,20 @@ TransformVertexInfo write_transform_vertex(vec4 instance_rect,
460460
vec2 max_pos = max(tp0.xy, max(tp1.xy, max(tp2.xy, tp3.xy)));
461461

462462
vec2 min_pos_clamped = clamp(min_pos * uDevicePixelRatio,
463-
vec2(tile.actual_rect.xy),
464-
vec2(tile.actual_rect.xy + tile.actual_rect.zw));
463+
vec2(tile.screen_origin_task_origin.xy),
464+
vec2(tile.screen_origin_task_origin.xy + tile.size.xy));
465465

466466
vec2 max_pos_clamped = clamp(max_pos * uDevicePixelRatio,
467-
vec2(tile.actual_rect.xy),
468-
vec2(tile.actual_rect.xy + tile.actual_rect.zw));
467+
vec2(tile.screen_origin_task_origin.xy),
468+
vec2(tile.screen_origin_task_origin.xy + tile.size.xy));
469469

470470
vec2 clamped_pos = mix(min_pos_clamped,
471471
max_pos_clamped,
472472
aPosition.xy);
473473

474474
vec3 layer_pos = get_layer_pos(clamped_pos / uDevicePixelRatio, layer);
475475

476-
vec2 final_pos = clamped_pos + vec2(tile.target_rect.xy) - vec2(tile.actual_rect.xy);
476+
vec2 final_pos = clamped_pos + vec2(tile.screen_origin_task_origin.zw) - vec2(tile.screen_origin_task_origin.xy);
477477

478478
gl_Position = uTransform * vec4(final_pos, 0, 1);
479479

@@ -550,30 +550,28 @@ BoxShadow fetch_boxshadow(int index) {
550550
}
551551

552552
struct Blend {
553-
ivec4 src_id_target_id_opacity;
553+
ivec4 src_id_target_id_op_amount;
554554
};
555555

556556
Blend fetch_blend(int index) {
557557
Blend blend;
558558

559559
int offset = index * 1;
560-
blend.src_id_target_id_opacity = int_data[offset + 0];
560+
blend.src_id_target_id_op_amount = int_data[offset + 0];
561561

562562
return blend;
563563
}
564564

565565
struct Composite {
566-
ivec4 src0_src1_target_id;
567-
ivec4 info_amount;
566+
ivec4 src0_src1_target_id_op;
568567
};
569568

570569
Composite fetch_composite(int index) {
571570
Composite composite;
572571

573-
int offset = index * 2;
572+
int offset = index * 1;
574573

575-
composite.src0_src1_target_id = int_data[offset + 0];
576-
composite.info_amount = int_data[offset + 1];
574+
composite.src0_src1_target_id_op = int_data[offset + 0];
577575

578576
return composite;
579577
}

webrender/res/ps_blend.fs.glsl

Lines changed: 129 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,134 @@
44

55
uniform sampler2D sCache;
66

7+
vec3 rgbToHsv(vec3 c) {
8+
float value = max(max(c.r, c.g), c.b);
9+
10+
float chroma = value - min(min(c.r, c.g), c.b);
11+
if (chroma == 0.0) {
12+
return vec3(0.0);
13+
}
14+
float saturation = chroma / value;
15+
16+
float hue;
17+
if (c.r == value)
18+
hue = (c.g - c.b) / chroma;
19+
else if (c.g == value)
20+
hue = 2.0 + (c.b - c.r) / chroma;
21+
else // if (c.b == value)
22+
hue = 4.0 + (c.r - c.g) / chroma;
23+
24+
hue *= 1.0/6.0;
25+
if (hue < 0.0)
26+
hue += 1.0;
27+
return vec3(hue, saturation, value);
28+
}
29+
30+
vec3 hsvToRgb(vec3 c) {
31+
if (c.s == 0.0) {
32+
return vec3(c.z);
33+
}
34+
35+
float hue = c.x * 6.0;
36+
int sector = int(hue);
37+
float residualHue = hue - float(sector);
38+
39+
vec3 pqt = c.z * vec3(1.0 - c.y, 1.0 - c.y * residualHue, 1.0 - c.y * (1.0 - residualHue));
40+
if (sector == 0)
41+
return vec3(c.z, pqt.z, pqt.x);
42+
if (sector == 1)
43+
return vec3(pqt.y, c.z, pqt.x);
44+
if (sector == 2)
45+
return vec3(pqt.x, c.z, pqt.z);
46+
if (sector == 3)
47+
return vec3(pqt.x, pqt.y, c.z);
48+
if (sector == 4)
49+
return vec3(pqt.z, pqt.x, c.z);
50+
return vec3(c.z, pqt.x, pqt.y);
51+
}
52+
53+
vec4 Blur(float radius, vec2 direction) {
54+
// TODO(gw): Support blur in WR2!
55+
return vec4(1, 1, 1, 1);
56+
}
57+
58+
vec4 Contrast(vec4 Cs, float amount) {
59+
return vec4(Cs.rgb * amount - 0.5 * amount + 0.5, 1.0);
60+
}
61+
62+
vec4 Grayscale(vec4 Cs, float amount) {
63+
float ia = 1.0 - amount;
64+
return mat4(vec4(0.2126 + 0.7874 * ia, 0.2126 - 0.2126 * ia, 0.2126 - 0.2126 * ia, 0.0),
65+
vec4(0.7152 - 0.7152 * ia, 0.7152 + 0.2848 * ia, 0.7152 - 0.7152 * ia, 0.0),
66+
vec4(0.0722 - 0.0722 * ia, 0.0722 - 0.0722 * ia, 0.0722 + 0.9278 * ia, 0.0),
67+
vec4(0.0, 0.0, 0.0, 1.0)) * Cs;
68+
}
69+
70+
vec4 HueRotate(vec4 Cs, float amount) {
71+
vec3 CsHsv = rgbToHsv(Cs.rgb);
72+
CsHsv.x = mod(CsHsv.x + amount / 6.283185307179586, 1.0);
73+
return vec4(hsvToRgb(CsHsv), Cs.a);
74+
}
75+
76+
vec4 Invert(vec4 Cs, float amount) {
77+
return mix(Cs, vec4(1.0, 1.0, 1.0, Cs.a) - vec4(Cs.rgb, 0.0), amount);
78+
}
79+
80+
vec4 Saturate(vec4 Cs, float amount) {
81+
return vec4(hsvToRgb(min(vec3(1.0, amount, 1.0) * rgbToHsv(Cs.rgb), vec3(1.0))), Cs.a);
82+
}
83+
84+
vec4 Sepia(vec4 Cs, float amount) {
85+
float ia = 1.0 - amount;
86+
return mat4(vec4(0.393 + 0.607 * ia, 0.349 - 0.349 * ia, 0.272 - 0.272 * ia, 0.0),
87+
vec4(0.769 - 0.769 * ia, 0.686 + 0.314 * ia, 0.534 - 0.534 * ia, 0.0),
88+
vec4(0.189 - 0.189 * ia, 0.168 - 0.168 * ia, 0.131 + 0.869 * ia, 0.0),
89+
vec4(0.0, 0.0, 0.0, 1.0)) * Cs;
90+
}
91+
92+
vec4 Brightness(vec4 Cs, float amount) {
93+
return vec4(Cs.rgb * amount, Cs.a);
94+
}
95+
96+
vec4 Opacity(vec4 Cs, float amount) {
97+
return vec4(Cs.rgb, Cs.a * amount);
98+
}
99+
7100
void main(void) {
8-
vec4 color = texture(sCache, vUv);
9-
oFragColor = vec4(color.rgb * vBrightnessOpacity.x, color.a * vBrightnessOpacity.y);
101+
vec4 Cs = texture(sCache, vUv);
102+
103+
if (Cs.a == 0.0) {
104+
discard;
105+
}
106+
107+
switch (vOp) {
108+
case 0:
109+
// Gaussian blur is specially handled:
110+
oFragColor = Cs;// Blur(vAmount, vec2(0,0));
111+
break;
112+
case 1:
113+
oFragColor = Contrast(Cs, vAmount);
114+
break;
115+
case 2:
116+
oFragColor = Grayscale(Cs, vAmount);
117+
break;
118+
case 3:
119+
oFragColor = HueRotate(Cs, vAmount);
120+
break;
121+
case 4:
122+
oFragColor = Invert(Cs, vAmount);
123+
break;
124+
case 5:
125+
oFragColor = Saturate(Cs, vAmount);
126+
break;
127+
case 6:
128+
oFragColor = Sepia(Cs, vAmount);
129+
break;
130+
case 7:
131+
oFragColor = Brightness(Cs, vAmount);
132+
break;
133+
case 8:
134+
oFragColor = Opacity(Cs, vAmount);
135+
break;
136+
}
10137
}

webrender/res/ps_blend.glsl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@
33
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
44

55
varying vec2 vUv;
6-
varying vec2 vBrightnessOpacity;
6+
flat varying float vAmount;
7+
flat varying int vOp;

webrender/res/ps_blend.vs.glsl

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,23 @@
55

66
void main(void) {
77
Blend blend = fetch_blend(gl_InstanceID);
8-
Tile src = fetch_tile(blend.src_id_target_id_opacity.x);
9-
Tile dest = fetch_tile(blend.src_id_target_id_opacity.y);
8+
Tile src = fetch_tile(blend.src_id_target_id_op_amount.x);
9+
Tile dest = fetch_tile(blend.src_id_target_id_op_amount.y);
1010

11-
vec2 local_pos = mix(vec2(dest.target_rect.xy),
12-
vec2(dest.target_rect.xy + dest.target_rect.zw),
11+
vec2 dest_origin = dest.screen_origin_task_origin.zw -
12+
dest.screen_origin_task_origin.xy +
13+
src.screen_origin_task_origin.xy;
14+
15+
vec2 local_pos = mix(dest_origin,
16+
dest_origin + src.size.xy,
1317
aPosition.xy);
1418

15-
vec2 st0 = vec2(src.target_rect.xy) / 2048.0;
16-
vec2 st1 = vec2(src.target_rect.xy + src.target_rect.zw) / 2048.0;
19+
vec2 st0 = vec2(src.screen_origin_task_origin.zw) / 2048.0;
20+
vec2 st1 = vec2(src.screen_origin_task_origin.zw + src.size.xy) / 2048.0;
1721
vUv = mix(st0, st1, aPosition.xy);
18-
vBrightnessOpacity = blend.src_id_target_id_opacity.zw / 65535.0;
22+
23+
vOp = blend.src_id_target_id_op_amount.z;
24+
vAmount = blend.src_id_target_id_op_amount.w / 65535.0;
1925

2026
gl_Position = uTransform * vec4(local_pos, 0, 1);
2127
}

0 commit comments

Comments
 (0)