diff --git a/build-system/compile/bundles.config.extensions.json b/build-system/compile/bundles.config.extensions.json
index 07fdd317f0a3..d2a46c8df27d 100644
--- a/build-system/compile/bundles.config.extensions.json
+++ b/build-system/compile/bundles.config.extensions.json
@@ -998,6 +998,14 @@
"version": "0.1",
"latestVersion": "0.1"
},
+ {
+ "name": "amp-story-captions",
+ "version": "0.1",
+ "latestVersion": "0.1",
+ "options": {
+ "hasCss": true
+ }
+ },
{
"name": "amp-story-dev-tools",
"version": "0.1",
diff --git a/examples/amp-story/amp-story-captions.html b/examples/amp-story/amp-story-captions.html
new file mode 100644
index 000000000000..40ccbd266252
--- /dev/null
+++ b/examples/amp-story/amp-story-captions.html
@@ -0,0 +1,61 @@
+
+
+
+
+ amp-story with amp-story-captions
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
This element is always below captions and never overlaps.
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/examples/amp-story/captions.vtt b/examples/amp-story/captions.vtt
new file mode 100644
index 000000000000..1c809b94284a
--- /dev/null
+++ b/examples/amp-story/captions.vtt
@@ -0,0 +1,447 @@
+WEBVTT
+Kind: captions
+Language: en
+
+00:00:14.330 --> 00:00:17.890 align:start position:0%
+
+at<00:00:15.290> the<00:00:15.470> left<00:00:15.710> we<00:00:15.889> can<00:00:16.190> see<00:00:16.250> though<00:00:17.210> we<00:00:17.450> can<00:00:17.660> see
+
+00:00:17.890 --> 00:00:17.900 align:start position:0%
+at the left we can see though we can see
+
+
+00:00:17.900 --> 00:00:21.040 align:start position:0%
+at the left we can see though we can see
+the<00:00:18.080> at<00:00:18.800> the<00:00:19.040> right<00:00:19.250> we<00:00:19.610> can<00:00:19.790> see<00:00:19.850> the<00:00:20.150> hats
+
+00:00:21.040 --> 00:00:21.050 align:start position:0%
+the at the right we can see the hats
+
+
+00:00:21.050 --> 00:00:23.350 align:start position:0%
+the at the right we can see the hats
+knowledge<00:00:21.830> and<00:00:22.130> everything<00:00:22.880> is<00:00:23.060> safe
+
+00:00:23.350 --> 00:00:23.360 align:start position:0%
+knowledge and everything is safe
+
+
+00:00:23.360 --> 00:00:51.539 align:start position:0%
+knowledge and everything is safe
+perfectly<00:00:24.170> safe<00:00:24.500> emo<00:00:25.779> emo<00:00:46.000> are<00:00:47.000> you<00:00:47.239> heard<00:00:47.510> I
+
+00:00:51.539 --> 00:00:51.549 align:start position:0%
+
+
+
+00:00:51.549 --> 00:00:58.840 align:start position:0%
+
+don't<00:00:52.549> think<00:00:52.729> so<00:00:52.970> you<00:00:54.879> I'm<00:00:55.879> okay<00:00:56.949> get<00:00:57.949> up<00:00:58.100> emo
+
+00:00:58.840 --> 00:00:58.850 align:start position:0%
+don't think so you I'm okay get up emo
+
+
+00:00:58.850 --> 00:01:03.850 align:start position:0%
+don't think so you I'm okay get up emo
+but<00:00:59.299> it's<00:00:59.629> not<00:00:59.960> safe<00:01:00.379> here<00:01:01.479> let's<00:01:02.479> go<00:01:02.860> what's
+
+00:01:03.850 --> 00:01:03.860 align:start position:0%
+but it's not safe here let's go what's
+
+
+00:01:03.860 --> 00:01:15.660 align:start position:0%
+but it's not safe here let's go what's
+next<00:01:04.780> you<00:01:05.780> you'll<00:01:06.320> see<00:01:07.180> you'll<00:01:08.180> see
+
+00:01:15.660 --> 00:01:15.670 align:start position:0%
+
+
+
+00:01:15.670 --> 00:01:33.870 align:start position:0%
+
+emo<00:01:16.479> this<00:01:17.380> way
+
+00:01:33.870 --> 00:01:33.880 align:start position:0%
+
+
+
+00:01:33.880 --> 00:02:10.500 align:start position:0%
+
+follow<00:01:34.880> me
+
+00:02:10.500 --> 00:02:10.510 align:start position:0%
+
+
+
+00:02:10.510 --> 00:02:47.870 align:start position:0%
+
+hurry
+
+00:02:47.870 --> 00:02:47.880 align:start position:0%
+
+
+
+00:02:47.880 --> 00:02:50.130 align:start position:0%
+
+you're<00:02:48.880> not<00:02:49.060> paying<00:02:49.270> attention
+
+00:02:50.130 --> 00:02:50.140 align:start position:0%
+you're not paying attention
+
+
+00:02:50.140 --> 00:02:55.710 align:start position:0%
+you're not paying attention
+I<00:02:50.940> just<00:02:51.940> want<00:02:52.180> to<00:02:52.330> enter<00:02:52.810> plain<00:02:54.240> email<00:02:55.240> look
+
+00:02:55.710 --> 00:02:55.720 align:start position:0%
+I just want to enter plain email look
+
+
+00:02:55.720 --> 00:03:01.380 align:start position:0%
+I just want to enter plain email look
+but<00:02:56.020> the<00:02:56.140> only<00:02:56.470> lesson<00:02:59.100> you<00:03:00.100> have<00:03:00.130> to<00:03:00.430> learn<00:03:00.790> to
+
+00:03:01.380 --> 00:03:01.390 align:start position:0%
+but the only lesson you have to learn to
+
+
+00:03:01.390 --> 00:03:06.030 align:start position:0%
+but the only lesson you have to learn to
+this<00:03:02.640> this<00:03:03.640> is<00:03:03.760> not<00:03:03.940> some<00:03:04.209> game<00:03:04.500> you<00:03:05.500> are<00:03:05.530> I
+
+00:03:06.030 --> 00:03:06.040 align:start position:0%
+this this is not some game you are I
+
+
+00:03:06.040 --> 00:03:09.560 align:start position:0%
+this this is not some game you are I
+mean<00:03:06.490> we<00:03:06.730> we<00:03:07.390> can<00:03:07.570> easily<00:03:08.050> die<00:03:08.320> out<00:03:08.350> here
+
+00:03:09.560 --> 00:03:09.570 align:start position:0%
+mean we we can easily die out here
+
+
+00:03:09.570 --> 00:03:13.229 align:start position:0%
+mean we we can easily die out here
+listen<00:03:10.950> listen<00:03:11.950> to<00:03:12.370> the<00:03:12.489> sounds<00:03:12.520> of<00:03:13.180> the
+
+00:03:13.229 --> 00:03:13.239 align:start position:0%
+listen listen to the sounds of the
+
+
+00:03:13.239 --> 00:04:08.900 align:start position:0%
+listen listen to the sounds of the
+machine<00:03:17.640> listen<00:03:18.640> to<00:03:18.820> your<00:03:19.000> breathing
+
+00:04:08.900 --> 00:04:08.910 align:start position:0%
+
+
+
+00:04:08.910 --> 00:04:26.160 align:start position:0%
+
+you
+
+00:04:26.160 --> 00:04:26.170 align:start position:0%
+
+
+
+00:04:26.170 --> 00:04:29.250 align:start position:0%
+
+well<00:04:27.170> don't<00:04:27.500> you<00:04:27.650> ever<00:04:27.680> get<00:04:28.040> tired<00:04:28.100> of<00:04:28.580> his
+
+00:04:29.250 --> 00:04:29.260 align:start position:0%
+well don't you ever get tired of his
+
+
+00:04:29.260 --> 00:04:34.000 align:start position:0%
+well don't you ever get tired of his
+tired<00:04:30.790> emo<00:04:31.790> the<00:04:32.090> machine<00:04:32.270> is<00:04:32.600> like<00:04:32.810> of<00:04:33.590> my
+
+00:04:34.000 --> 00:04:34.010 align:start position:0%
+tired emo the machine is like of my
+
+
+00:04:34.010 --> 00:04:37.270 align:start position:0%
+tired emo the machine is like of my
+clockwork<00:04:34.810> one<00:04:35.810> move<00:04:36.170> out<00:04:36.470> of<00:04:36.530> place<00:04:36.680> and
+
+00:04:37.270 --> 00:04:37.280 align:start position:0%
+clockwork one move out of place and
+
+
+00:04:37.280 --> 00:04:42.400 align:start position:0%
+clockwork one move out of place and
+you're<00:04:37.700> ground<00:04:37.850> too<00:04:40.840> isn't<00:04:41.840> it
+
+00:04:42.400 --> 00:04:42.410 align:start position:0%
+you're ground too isn't it
+
+
+00:04:42.410 --> 00:04:47.040 align:start position:0%
+you're ground too isn't it
+I<00:04:42.440> pulled<00:04:42.920> me<00:04:43.220> home<00:04:43.520> is<00:04:44.420> that<00:04:44.660> what<00:04:44.870> you<00:04:45.020> want
+
+00:04:47.040 --> 00:04:47.050 align:start position:0%
+I pulled me home is that what you want
+
+
+00:04:47.050 --> 00:05:40.860 align:start position:0%
+I pulled me home is that what you want
+emo<00:04:48.050> your<00:04:48.740> goal<00:04:48.800> in<00:04:49.280> life<00:04:49.310> pulp
+
+00:05:40.860 --> 00:05:40.870 align:start position:0%
+
+
+
+00:05:40.870 --> 00:05:52.949 align:start position:0%
+
+you<00:05:41.710> know<00:05:41.949> close<00:05:42.580> your<00:05:42.820> eyes<00:05:43.889> why<00:05:44.889> now
+
+00:05:52.949 --> 00:05:52.959 align:start position:0%
+
+
+
+00:05:52.959 --> 00:06:00.939 align:start position:0%
+
+good<00:05:58.959> what<00:05:59.959> do<00:06:00.079> you<00:06:00.199> see<00:06:00.379> at<00:06:00.499> your<00:06:00.529> left<00:06:00.799> side
+
+00:06:00.939 --> 00:06:00.949 align:start position:0%
+good what do you see at your left side
+
+
+00:06:00.949 --> 00:06:07.329 align:start position:0%
+good what do you see at your left side
+emo<00:06:01.129> well<00:06:03.789> nothing<00:06:04.789> really<00:06:05.859> no<00:06:06.859> nothing<00:06:07.219> at
+
+00:06:07.329 --> 00:06:07.339 align:start position:0%
+emo well nothing really no nothing at
+
+
+00:06:07.339 --> 00:06:10.239 align:start position:0%
+emo well nothing really no nothing at
+all<00:06:07.459> really<00:06:08.209> and<00:06:08.569> and<00:06:08.959> that<00:06:09.289> you<00:06:09.439> ride<00:06:09.679> what<00:06:10.099> do
+
+00:06:10.239 --> 00:06:10.249 align:start position:0%
+all really and and that you ride what do
+
+
+00:06:10.249 --> 00:06:12.899 align:start position:0%
+all really and and that you ride what do
+you<00:06:10.339> see<00:06:10.519> at<00:06:10.609> your<00:06:10.639> right<00:06:10.789> side<00:06:11.209> emo
+
+00:06:12.899 --> 00:06:12.909 align:start position:0%
+you see at your right side emo
+
+
+00:06:12.909 --> 00:06:17.699 align:start position:0%
+you see at your right side emo
+the<00:06:13.909> same<00:06:14.209> proog<00:06:14.539> exactly<00:06:15.369> the<00:06:16.369> same<00:06:16.489> nothing
+
+00:06:17.699 --> 00:06:17.709 align:start position:0%
+the same proog exactly the same nothing
+
+
+00:06:17.709 --> 00:06:39.980 align:start position:0%
+the same proog exactly the same nothing
+great
+
+00:06:39.980 --> 00:06:39.990 align:start position:0%
+
+
+
+00:06:39.990 --> 00:06:44.130 align:start position:0%
+
+listen<00:06:40.990> broom<00:06:41.260> do<00:06:41.860> you<00:06:42.010> hear<00:06:42.220> that<00:06:42.870> can<00:06:43.870> we<00:06:43.960> go
+
+00:06:44.130 --> 00:06:44.140 align:start position:0%
+listen broom do you hear that can we go
+
+
+00:06:44.140 --> 00:06:46.500 align:start position:0%
+listen broom do you hear that can we go
+here<00:06:44.790> there<00:06:45.790> yeah
+
+00:06:46.500 --> 00:06:46.510 align:start position:0%
+here there yeah
+
+
+00:06:46.510 --> 00:06:52.430 align:start position:0%
+here there yeah
+it<00:06:46.780> isn't<00:06:47.200> saving<00:06:49.020> what<00:06:50.020> trust<00:06:50.980> me<00:06:51.280> it's<00:06:51.580> not
+
+00:06:52.430 --> 00:06:52.440 align:start position:0%
+it isn't saving what trust me it's not
+
+
+00:06:52.440 --> 00:07:01.260 align:start position:0%
+it isn't saving what trust me it's not
+well<00:06:53.440> maybe<00:06:53.680> I<00:06:54.040> could<00:06:54.100> get<00:06:54.640> no<00:06:57.240> no<00:06:58.620> no<00:07:00.270> any
+
+00:07:01.260 --> 00:07:01.270 align:start position:0%
+well maybe I could get no no no any
+
+
+00:07:01.270 --> 00:07:11.970 align:start position:0%
+well maybe I could get no no no any
+further<00:07:01.840> questions<00:07:02.650> emo<00:07:03.780> no<00:07:09.300> emo<00:07:10.300> yeah<00:07:11.080> Eva
+
+00:07:11.970 --> 00:07:11.980 align:start position:0%
+further questions emo no emo yeah Eva
+
+
+00:07:11.980 --> 00:07:17.370 align:start position:0%
+further questions emo no emo yeah Eva
+why<00:07:13.200> emo<00:07:14.200> why<00:07:14.980> why<00:07:15.760> can't<00:07:16.120> you<00:07:16.300> see<00:07:16.600> the<00:07:16.810> beauty
+
+00:07:17.370 --> 00:07:17.380 align:start position:0%
+why emo why why can't you see the beauty
+
+
+00:07:17.380 --> 00:07:21.990 align:start position:0%
+why emo why why can't you see the beauty
+of<00:07:17.410> this<00:07:17.680> place<00:07:18.270> the<00:07:19.270> way<00:07:19.480> it<00:07:19.510> works<00:07:20.220> how<00:07:21.220> how
+
+00:07:21.990 --> 00:07:22.000 align:start position:0%
+of this place the way it works how how
+
+
+00:07:22.000 --> 00:07:27.030 align:start position:0%
+of this place the way it works how how
+perfect<00:07:22.900> it<00:07:23.080> is<00:07:23.760> no<00:07:24.760> proof<00:07:25.120> I<00:07:25.450> don't<00:07:26.380> see<00:07:26.710> I
+
+00:07:27.030 --> 00:07:27.040 align:start position:0%
+perfect it is no proof I don't see I
+
+
+00:07:27.040 --> 00:07:30.950 align:start position:0%
+perfect it is no proof I don't see I
+don't<00:07:27.580> see<00:07:27.910> because<00:07:28.180> there's<00:07:28.870> nothing<00:07:29.350> there
+
+00:07:30.950 --> 00:07:30.960 align:start position:0%
+don't see because there's nothing there
+
+
+00:07:30.960 --> 00:07:33.409 align:start position:0%
+don't see because there's nothing there
+and<00:07:31.650> why<00:07:32.039> should<00:07:32.400> I<00:07:32.490> trust<00:07:32.789> my<00:07:32.970> life<00:07:33.210> to
+
+00:07:33.409 --> 00:07:33.419 align:start position:0%
+and why should I trust my life to
+
+
+00:07:33.419 --> 00:07:36.080 align:start position:0%
+and why should I trust my life to
+something<00:07:33.690> that<00:07:33.840> isn't<00:07:34.020> there<00:07:34.820> what<00:07:35.820> can<00:07:36.000> you
+
+00:07:36.080 --> 00:07:36.090 align:start position:0%
+something that isn't there what can you
+
+
+00:07:36.090 --> 00:07:43.029 align:start position:0%
+something that isn't there what can you
+tell<00:07:36.270> me<00:07:36.419> that<00:07:36.570> emo<00:07:37.380> answer<00:07:37.919> me
+
+00:07:43.029 --> 00:07:43.039 align:start position:0%
+
+
+
+00:07:43.039 --> 00:07:46.719 align:start position:0%
+
+Luke<00:07:44.720> you're<00:07:45.720> a<00:07:45.750> sick<00:07:46.110> man
+
+00:07:46.719 --> 00:07:46.729 align:start position:0%
+Luke you're a sick man
+
+
+00:07:46.729 --> 00:07:55.270 align:start position:0%
+Luke you're a sick man
+stay<00:07:47.729> away<00:07:48.000> from<00:07:48.060> me<00:07:51.770> no<00:07:52.770> key<00:07:53.250> no<00:07:53.280> it's<00:07:53.940> a<00:07:54.090> trap
+
+00:07:55.270 --> 00:07:55.280 align:start position:0%
+stay away from me no key no it's a trap
+
+
+00:07:55.280 --> 00:07:58.909 align:start position:0%
+stay away from me no key no it's a trap
+it's<00:07:56.280> a<00:07:56.430> trap<00:07:56.750> at<00:07:57.750> the<00:07:57.900> left<00:07:58.139> side<00:07:58.410> you<00:07:58.650> can<00:07:58.680> see
+
+00:07:58.909 --> 00:07:58.919 align:start position:0%
+it's a trap at the left side you can see
+
+
+00:07:58.919 --> 00:08:02.510 align:start position:0%
+it's a trap at the left side you can see
+yes<00:07:59.400> Hanging<00:08:00.180> Gardens<00:08:00.210> of<00:08:00.750> Babylon<00:08:01.520> how's
+
+00:08:02.510 --> 00:08:02.520 align:start position:0%
+yes Hanging Gardens of Babylon how's
+
+
+00:08:02.520 --> 00:08:09.770 align:start position:0%
+yes Hanging Gardens of Babylon how's
+that<00:08:02.729> for<00:08:02.940> a<00:08:03.000> trap
+
+00:08:09.770 --> 00:08:09.780 align:start position:0%
+
+
+
+00:08:09.780 --> 00:08:12.080 align:start position:0%
+
+the<00:08:09.870> right<00:08:10.139> side<00:08:10.440> you<00:08:10.710> can<00:08:10.740> see<00:08:11.190> but<00:08:11.880> guess
+
+00:08:12.080 --> 00:08:12.090 align:start position:0%
+the right side you can see but guess
+
+
+00:08:12.090 --> 00:08:16.730 align:start position:0%
+the right side you can see but guess
+what<00:08:12.330> the<00:08:13.139> Colossus<00:08:13.680> Papa<00:08:14.070> rodas<00:08:14.960> no<00:08:15.960> the
+
+00:08:16.730 --> 00:08:16.740 align:start position:0%
+what the Colossus Papa rodas no the
+
+
+00:08:16.740 --> 00:08:18.800 align:start position:0%
+what the Colossus Papa rodas no the
+Colossus<00:08:17.220> of<00:08:17.370> Rhoda's<00:08:17.820> and<00:08:18.180> it<00:08:18.360> is<00:08:18.510> here<00:08:18.780> just
+
+00:08:18.800 --> 00:08:18.810 align:start position:0%
+Colossus of Rhoda's and it is here just
+
+
+00:08:18.810 --> 00:08:50.139 align:start position:0%
+Colossus of Rhoda's and it is here just
+for<00:08:19.290> you<00:08:19.530> proog<00:08:20.060> for<00:08:21.060> you
+
+00:08:50.139 --> 00:08:50.149 align:start position:0%
+
+
+
+00:08:50.149 --> 00:08:54.860 align:start position:0%
+
+it<00:08:51.149> is<00:08:51.600> there<00:08:52.939> I'm<00:08:53.939> telling<00:08:54.449> you
+
+00:08:54.860 --> 00:08:54.870 align:start position:0%
+it is there I'm telling you
+
+
+00:08:54.870 --> 00:10:05.389 align:start position:0%
+it is there I'm telling you
+the<00:08:55.259> emo<00:08:55.709> it<00:08:56.610> it<00:08:57.360> is<00:08:57.860> it<00:08:58.860> is
+
+00:10:05.389 --> 00:10:05.399 align:start position:0%
+
+
+
+00:10:05.399 --> 00:10:51.150 align:start position:0%
+
+and
+
+00:10:51.150 --> 00:10:51.160 align:start position:0%
+
+
+
+00:10:51.160 --> 00:10:53.220 align:start position:0%
+
+you
diff --git a/extensions/amp-story-captions/0.1/amp-story-captions.css b/extensions/amp-story-captions/0.1/amp-story-captions.css
new file mode 100644
index 000000000000..80dd20bb863e
--- /dev/null
+++ b/extensions/amp-story-captions/0.1/amp-story-captions.css
@@ -0,0 +1,23 @@
+/**
+ * Copyright 2021 The AMP HTML Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS-IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+amp-story-captions {
+ white-space: pre-wrap;
+}
+
+.amp-story-captions-future {
+ color: gray;
+}
diff --git a/extensions/amp-story-captions/0.1/amp-story-captions.js b/extensions/amp-story-captions/0.1/amp-story-captions.js
new file mode 100644
index 000000000000..329ac9edcbbd
--- /dev/null
+++ b/extensions/amp-story-captions/0.1/amp-story-captions.js
@@ -0,0 +1,81 @@
+import {applyFillContent, isLayoutSizeDefined} from '#core/dom/layout';
+import {toArray} from '#core/types/array';
+
+import {TrackRenderer} from './track-renderer';
+
+import {CSS} from '../../../build/amp-story-captions-0.1.css';
+import {listen} from '../../../src/event-helper';
+
+export class AmpStoryCaptions extends AMP.BaseElement {
+ /** @param {!AmpElement} element */
+ constructor(element) {
+ super(element);
+
+ /** @private {?Element} */
+ this.container_ = null;
+
+ /** @private {?HTMLVideoElement} */
+ this.video_ = null;
+
+ /** @private {?UnlistenDef} */
+ this.textTracksChangeUnlistener_ = null;
+
+ /** @private {!Array} */
+ this.trackRenderers_ = [];
+ }
+
+ /** @override */
+ buildCallback() {
+ this.container_ = this.element.ownerDocument.createElement('div');
+ this.element.appendChild(this.container_);
+ applyFillContent(this.container_, /* replacedContent */ true);
+ }
+
+ /** @override */
+ isLayoutSupported(layout) {
+ return isLayoutSizeDefined(layout);
+ }
+
+ /**
+ * Attaches caption rendering to a video element. Called from amp-video.
+ * @param {!HTMLVideoElement} video
+ */
+ setVideoElement(video) {
+ if (this.textTracksChangeUnlistener_) {
+ this.textTracksChangeUnlistener_();
+ }
+
+ this.video_ = video;
+
+ this.updateTracks_();
+ this.textTracksChangeUnlistener_ = listen(
+ video.textTracks,
+ 'change',
+ () => {
+ this.updateTracks_();
+ }
+ );
+ }
+
+ /** Creates new track renderers for current textTracks. */
+ updateTracks_() {
+ while (this.trackRenderers_.length) {
+ this.trackRenderers_.pop().dispose();
+ }
+
+ toArray(this.video_.textTracks).forEach((track) => {
+ // Render both showing and hidden, because otherwise we would need to remember when we set it to hidden.
+ // Disabled tracks are ignored.
+ if (track.mode === 'showing' || track.mode === 'hidden') {
+ track.mode = 'hidden';
+ this.trackRenderers_.push(
+ new TrackRenderer(this.video_, track, this.container_)
+ );
+ }
+ });
+ }
+}
+
+AMP.extension('amp-story-captions', '0.1', (AMP) => {
+ AMP.registerElement('amp-story-captions', AmpStoryCaptions, CSS);
+});
diff --git a/extensions/amp-story-captions/0.1/test/test-amp-story-captions.js b/extensions/amp-story-captions/0.1/test/test-amp-story-captions.js
new file mode 100644
index 000000000000..d5a49391f723
--- /dev/null
+++ b/extensions/amp-story-captions/0.1/test/test-amp-story-captions.js
@@ -0,0 +1,93 @@
+/**
+ * Copyright 2021 The AMP HTML Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS-IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import '../amp-story-captions';
+import {createElementWithAttributes} from '#core/dom';
+
+const BLANK_VIDEO =
+ 'data:video/mp4;base64,AAAAIGZ0eXBpc29tAAACAGlzb21pc28yYXZjMW1wNDEAAAAIZnJlZQAABDBtZGF0AAACsAYF//+s3EXpvebZSLeWLNgg2SPu73gyNjQgLSBjb3JlIDE1NSByMjkxNyAwYTg0ZDk4IC0gSC4yNjQvTVBFRy00IEFWQyBjb2RlYyAtIENvcHlsZWZ0IDIwMDMtMjAxOCAtIGh0dHA6Ly93d3cudmlkZW9sYW4ub3JnL3gyNjQuaHRtbCAtIG9wdGlvbnM6IGNhYmFjPTEgcmVmPTMgZGVibG9jaz0xOi0zOi0zIGFuYWx5c2U9MHgzOjB4MTEzIG1lPWhleCBzdWJtZT03IHBzeT0xIHBzeV9yZD0yLjAwOjAuNzAgbWl4ZWRfcmVmPTEgbWVfcmFuZ2U9MTYgY2hyb21hX21lPTEgdHJlbGxpcz0xIDh4OGRjdD0xIGNxbT0wIGRlYWR6b25lPTIxLDExIGZhc3RfcHNraXA9MSBjaHJvbWFfcXBfb2Zmc2V0PS00IHRocmVhZHM9MSBsb29rYWhlYWRfdGhyZWFkcz0xIHNsaWNlZF90aHJlYWRzPTAgbnI9MCBkZWNpbWF0ZT0xIGludGVybGFjZWQ9MCBibHVyYXlfY29tcGF0PTAgY29uc3RyYWluZWRfaW50cmE9MCBiZnJhbWVzPTMgYl9weXJhbWlkPTIgYl9hZGFwdD0xIGJfYmlhcz0wIGRpcmVjdD0xIHdlaWdodGI9MSBvcGVuX2dvcD0wIHdlaWdodHA9MiBrZXlpbnQ9MjUwIGtleWludF9taW49MjUgc2NlbmVjdXQ9NDAgaW50cmFfcmVmcmVzaD0wIHJjX2xvb2thaGVhZD00MCByYz1jcmYgbWJ0cmVlPTEgY3JmPTIzLjAgcWNvbXA9MC42MCBxcG1pbj0wIHFwbWF4PTY5IHFwc3RlcD00IGlwX3JhdGlvPTEuNDAgYXE9MToxLjIwAIAAAAAQZYiEABHOf/73iB8yy2+ceQAAAAlBmiRsQRzn/uAAAAAJQZ5CeIdnP7eBAAAACQGeYXRDc5+6gAAAAAkBnmNqQ3OfuoEAAAAPQZpoSahBaJlMCCOc//7hAAAAC0GehkURLDs5/7eBAAAACQGepXRDc5+6gQAAAAkBnqdqQ3OfuoAAAAAPQZqsSahBbJlMCCOc//7gAAAAC0GeykUVLDs5/7eBAAAACQGe6XRDc5+6gAAAAAkBnutqQ3OfuoAAAAAPQZrwSahBbJlMCCGc//7hAAAAC0GfDkUVLDs5/7eBAAAACQGfLXRDc5+6gQAAAAkBny9qQ3OfuoAAAAAPQZs0SahBbJlMCH5z//7gAAAAC0GfUkUVLDs5/7eBAAAACQGfcXRDc5+6gAAAAAkBn3NqQ3OfuoAAAAAPQZt4SahBbJlMCG5z//7hAAAAC0GflkUVLDs5/7eAAAAACQGftXRDc5+6gQAAAAkBn7dqQ3OfuoEAAARPbW9vdgAAAGxtdmhkAAAAAAAAAAAAAAAAAAAD6AAAA+gAAQAAAQAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAA3l0cmFrAAAAXHRraGQAAAADAAAAAAAAAAAAAAABAAAAAAAAA+gAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAABAAAAAAAIAAAACAAAAAAAkZWR0cwAAABxlbHN0AAAAAAAAAAEAAAPoAAAEAAABAAAAAALxbWRpYQAAACBtZGhkAAAAAAAAAAAAAAAAAAAyAAAAMgBVxAAAAAAALWhkbHIAAAAAAAAAAHZpZGUAAAAAAAAAAAAAAABWaWRlb0hhbmRsZXIAAAACnG1pbmYAAAAUdm1oZAAAAAEAAAAAAAAAAAAAACRkaW5mAAAAHGRyZWYAAAAAAAAAAQAAAAx1cmwgAAAAAQAAAlxzdGJsAAAAqHN0c2QAAAAAAAAAAQAAAJhhdmMxAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAIAAgBIAAAASAAAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGP//AAAAMmF2Y0MBZAAK/+EAGWdkAAqs2V+IiMBEAAADAAQAAAMAyDxIllgBAAZo6+PEyEwAAAAQcGFzcAAAAAEAAAABAAAAGHN0dHMAAAAAAAAAAQAAABkAAAIAAAAAFHN0c3MAAAAAAAAAAQAAAAEAAADYY3R0cwAAAAAAAAAZAAAAAQAABAAAAAABAAAKAAAAAAEAAAQAAAAAAQAAAAAAAAABAAACAAAAAAEAAAoAAAAAAQAABAAAAAABAAAAAAAAAAEAAAIAAAAAAQAACgAAAAABAAAEAAAAAAEAAAAAAAAAAQAAAgAAAAABAAAKAAAAAAEAAAQAAAAAAQAAAAAAAAABAAACAAAAAAEAAAoAAAAAAQAABAAAAAABAAAAAAAAAAEAAAIAAAAAAQAACgAAAAABAAAEAAAAAAEAAAAAAAAAAQAAAgAAAAAcc3RzYwAAAAAAAAABAAAAAQAAABkAAAABAAAAeHN0c3oAAAAAAAAAAAAAABkAAALIAAAADQAAAA0AAAANAAAADQAAABMAAAAPAAAADQAAAA0AAAATAAAADwAAAA0AAAANAAAAEwAAAA8AAAANAAAADQAAABMAAAAPAAAADQAAAA0AAAATAAAADwAAAA0AAAANAAAAFHN0Y28AAAAAAAAAAQAAADAAAABidWR0YQAAAFptZXRhAAAAAAAAACFoZGxyAAAAAAAAAABtZGlyYXBwbAAAAAAAAAAAAAAAAC1pbHN0AAAAJal0b28AAAAdZGF0YQAAAAEAAAAATGF2ZjU4LjIwLjEwMA==';
+
+describes.realWin(
+ 'amp-story-captions',
+ {
+ amp: {
+ runtimeOn: true,
+ extensions: ['amp-story-captions'],
+ },
+ },
+ (env) => {
+ let win;
+ let element;
+
+ beforeEach(() => {
+ win = env.win;
+ element = createElementWithAttributes(
+ win.document,
+ 'amp-story-captions',
+ {
+ layout: 'fixed-height',
+ height: '100px',
+ }
+ );
+ win.document.body.appendChild(element);
+ });
+
+ it('should contain be empty when built', async () => {
+ await element.whenBuilt();
+ expect(element.querySelector('div').textContent).to.equal('');
+ });
+
+ it('update on cuechange', async () => {
+ await element.whenBuilt();
+ const impl = await element.getImpl();
+
+ const video = createElementWithAttributes(win.document, 'video', {
+ 'width': '100',
+ 'height': '100',
+ 'muted': '',
+ 'src': BLANK_VIDEO,
+ });
+ win.document.body.appendChild(video);
+
+ video.play();
+ video.pause();
+
+ // Wait for loadedmetadata event to fire.
+ await new Promise((resolve) => {
+ video.addEventListener('loadedmetadata', resolve);
+ });
+
+ // Add captions to the video.
+ const track = video.addTextTrack('captions', 'English', 'en');
+ track.mode = 'showing';
+ track.addCue(new VTTCue(0, 0.1, 'first caption'));
+ track.addCue(new VTTCue(0.4, 1, 'second caption'));
+
+ impl.setVideoElement(video);
+ expect(element.querySelector('div').textContent).to.equal(
+ 'first caption'
+ );
+
+ video.currentTime = 0.5;
+ await new Promise((resolve) => {
+ video.addEventListener('timeupdate', resolve);
+ });
+ expect(element.querySelector('div').textContent).to.equal(
+ 'second caption'
+ );
+ });
+ }
+);
diff --git a/extensions/amp-story-captions/0.1/test/validator-amp-story-captions.html b/extensions/amp-story-captions/0.1/test/validator-amp-story-captions.html
new file mode 100644
index 000000000000..f1bc765b7a19
--- /dev/null
+++ b/extensions/amp-story-captions/0.1/test/validator-amp-story-captions.html
@@ -0,0 +1,31 @@
+
+
+
+
+ amp-story-captions example
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/extensions/amp-story-captions/0.1/test/validator-amp-story-captions.out b/extensions/amp-story-captions/0.1/test/validator-amp-story-captions.out
new file mode 100644
index 000000000000..c08cb368b513
--- /dev/null
+++ b/extensions/amp-story-captions/0.1/test/validator-amp-story-captions.out
@@ -0,0 +1,32 @@
+PASS
+|
+|
+|
+|
+| amp-story-captions example
+|
+|
+|
+|
+|
+|
+|
+|
+|
+|
+|
+|
+|
+|
+|
+|
+|
+|
+|
+|
+|
\ No newline at end of file
diff --git a/extensions/amp-story-captions/0.1/track-renderer.js b/extensions/amp-story-captions/0.1/track-renderer.js
new file mode 100644
index 000000000000..f6045121aaa3
--- /dev/null
+++ b/extensions/amp-story-captions/0.1/track-renderer.js
@@ -0,0 +1,129 @@
+import {removeChildren, removeElement} from '#core/dom';
+import {setStyles} from '#core/dom/style';
+import {toArray} from '#core/types/array';
+
+import {listen} from '../../../src/event-helper';
+
+// Class used for sections of text in the future (for ASR-style captions).
+const FUTURE_CUE_SECTION_CLASS = 'amp-story-captions-future';
+
+/**
+ * Parses a WebVTT timestamp and returns the time in seconds from video start.
+ * https://www.w3.org/TR/webvtt1/#webvtt-timestamp
+ * @param {string} timestamp
+ * @return {?number}
+ */
+function parseTimestamp(timestamp) {
+ const match = /^(?:(\d{2,}):)?(\d{2}):(\d{2})\.(\d{3})$/.exec(timestamp);
+ if (!match) {
+ return null;
+ }
+ const hours = match[1] ? parseInt(match[1], 10) : 0;
+ const minutes = parseInt(match[2], 10);
+ const seconds = parseInt(match[3], 10);
+ const milliseconds = parseInt(match[4], 10);
+ return hours * 3600 + minutes * 60 + seconds + milliseconds / 1000;
+}
+
+export class TrackRenderer {
+ /**
+ *
+ * @param {!HTMLVideoElement} video
+ * @param {!TextTrack} track
+ * @param {!Element} container
+ */
+ constructor(video, track, container) {
+ /** @private {!HTMLVideoElement} */
+ this.video_ = video;
+
+ /** @private {?TextTrack} */
+ this.track_ = track;
+
+ /** @private {!Element} */
+ this.element_ = container.ownerDocument.createElement('div');
+ container.appendChild(this.element_);
+
+ /** @private {!Array} */
+ this.cueTimestamps_ = [];
+
+ this.render_();
+ this.cueChangeUnlistener_ = listen(track, 'cuechange', () => {
+ this.render_();
+ });
+ this.timeUpdateUnlistener_ = listen(video, 'timeupdate', () => {
+ this.updateTime_();
+ });
+ }
+
+ /**
+ * Cleans up listeners and DOM elements.
+ */
+ dispose() {
+ this.cueChangeUnlistener_();
+ this.timeUpdateUnlistener_();
+ removeElement(this.element_);
+ this.video_ = null;
+ this.track_ = null;
+ }
+
+ /**
+ * Render currently active cues.
+ * @private
+ */
+ render_() {
+ removeChildren(this.element_);
+ this.cueTimestamps_.length = 0;
+ toArray(this.track_.activeCues).forEach((cue) => {
+ const cueElement = this.element_.ownerDocument.createElement('div');
+ setStyles(cueElement, {
+ 'position': 'absolute',
+ 'bottom': 0,
+ 'left': 0,
+ 'right': 0,
+ });
+
+ const html = cue.getCueAsHTML();
+ let section = this.element_.ownerDocument.createElement('span');
+ cueElement.appendChild(section);
+ const timestamps = [];
+ toArray(html.childNodes).forEach((node) => {
+ if (node.target === 'timestamp') {
+ const timestamp = parseTimestamp(node.data);
+ if (timestamp !== null) {
+ timestamps.push(timestamp);
+ // Create a new section after each timestamp, so the style can
+ // easily be updated based on time.
+ section = this.element_.ownerDocument.createElement('span');
+ cueElement.appendChild(section);
+ }
+ } else {
+ section.appendChild(node);
+ }
+ });
+
+ this.cueTimestamps_.push(timestamps);
+ this.element_.appendChild(cueElement);
+ });
+ this.updateTime_();
+ }
+
+ /**
+ * Update cue style based on the current video time (for ASR-style captions).
+ * @private
+ */
+ updateTime_() {
+ const videoTime = this.video_.currentTime;
+ toArray(this.element_.childNodes).forEach((cue, i) => {
+ toArray(cue.childNodes).forEach((section, j) => {
+ // The first section always has implicit timestamp 0, so it's never in
+ // the future.
+ if (j > 0) {
+ section.classList.toggle(
+ FUTURE_CUE_SECTION_CLASS,
+ this.cueTimestamps_[i][j - 1] > videoTime
+ );
+ }
+ });
+ });
+ }
+}
diff --git a/extensions/amp-story-captions/validator-amp-story-captions.protoascii b/extensions/amp-story-captions/validator-amp-story-captions.protoascii
new file mode 100644
index 000000000000..93e3992f2f27
--- /dev/null
+++ b/extensions/amp-story-captions/validator-amp-story-captions.protoascii
@@ -0,0 +1,46 @@
+#
+# Copyright 2021 The AMP HTML Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS-IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the license.
+#
+
+tags: { # amp-story-captions
+ html_format: AMP
+ tag_name: "SCRIPT"
+ extension_spec: {
+ name: "amp-story-captions"
+ version: "0.1"
+ version: "latest"
+ }
+ attr_lists: "common-extension-attrs"
+}
+tags: { #
+ html_format: AMP
+ tag_name: "AMP-STORY-CAPTIONS"
+ requires_extension: "amp-story-captions"
+ attr_lists: "extended-amp-global"
+ spec_url: "https://amp.dev/documentation/components/amp-story-captions"
+ mandatory_ancestor: "AMP-STORY"
+ amp_layout: {
+ supported_layouts: FILL
+ supported_layouts: FIXED
+ supported_layouts: FIXED_HEIGHT
+ supported_layouts: FLEX_ITEM
+ supported_layouts: FLUID
+ supported_layouts: INTRINSIC
+ supported_layouts: RESPONSIVE
+ }
+ child_tags: {
+ mandatory_num_child_tags: 0
+ }
+}
diff --git a/extensions/amp-story/validator-amp-story.protoascii b/extensions/amp-story/validator-amp-story.protoascii
index 21406dea22b2..2869cf1d1ca4 100644
--- a/extensions/amp-story/validator-amp-story.protoascii
+++ b/extensions/amp-story/validator-amp-story.protoascii
@@ -704,6 +704,7 @@ descendant_tag_list: {
tag: "AMP-STATE"
tag: "AMP-STORY-360"
tag: "AMP-STORY-AUTO-ANALYTICS"
+ tag: "AMP-STORY-CAPTIONS"
tag: "AMP-STORY-INTERACTIVE-BINARY-POLL"
tag: "AMP-STORY-INTERACTIVE-IMG-POLL"
tag: "AMP-STORY-INTERACTIVE-IMG-QUIZ"
diff --git a/extensions/amp-video/0.1/amp-video.js b/extensions/amp-video/0.1/amp-video.js
index e8ba1067c710..b5f8d5d67f0b 100644
--- a/extensions/amp-video/0.1/amp-video.js
+++ b/extensions/amp-video/0.1/amp-video.js
@@ -20,6 +20,7 @@ import {
insertAfterOrAtStart,
removeElement,
} from '#core/dom';
+import {escapeCssSelectorIdent} from '#core/dom/css-selectors';
import {fetchCachedSources} from './video-cache';
import {
fullscreenEnter,
@@ -606,6 +607,7 @@ export class AmpVideo extends AMP.BaseElement {
tracks.forEach((track) => {
this.video_.appendChild(track);
});
+ this.setUpCaptions_();
if (this.video_.changedSources) {
this.video_.changedSources();
@@ -746,6 +748,29 @@ export class AmpVideo extends AMP.BaseElement {
listenOncePromise(this.video_, 'loadedmetadata').then(() =>
this.onVideoLoaded_()
);
+ this.setUpCaptions_();
+ }
+
+ /**
+ * Connects to amp-story-captions component.
+ * @private
+ */
+ setUpCaptions_() {
+ const captionsId = this.element.getAttribute('captions-id');
+ if (!captionsId) {
+ return;
+ }
+ const captionsElement = this.win.document.querySelector(
+ `amp-story-captions#${escapeCssSelectorIdent(captionsId)}`
+ );
+ if (!captionsElement) {
+ return;
+ }
+ captionsElement.getImpl().then((impl) => {
+ if (impl.setVideoElement) {
+ impl.setVideoElement(this.video_);
+ }
+ });
}
/** @private */
diff --git a/extensions/amp-video/validator-amp-video.protoascii b/extensions/amp-video/validator-amp-video.protoascii
index 46b23af3ea62..2327ea1be7f8 100644
--- a/extensions/amp-video/validator-amp-video.protoascii
+++ b/extensions/amp-video/validator-amp-video.protoascii
@@ -179,6 +179,10 @@ tags: { # in amp-story
name: "cache"
value: "google"
}
+ attrs: {
+ name: "captions-id"
+ requires_extension: "amp-story-captions"
+ }
attr_lists: "extended-amp-global"
attr_lists: "amp-video-common"
spec_url: "https://amp.dev/documentation/components/amp-video/"