77
88< p > < a class ="logo " href ="https://whatwg.org/ "> < img alt ="WHATWG " height ="100 " src ="https://resources.whatwg.org/logo-encoding.svg " width ="100 "> </ a > </ p >
99< h1 > Encoding</ h1 >
10- < h2 class ="no-num no-toc " id ="living-standard-—-last-updated-12-february -2016 "> Living Standard — Last Updated 12 February 2016</ h2 >
10+ < h2 class ="no-num no-toc " id ="living-standard-—-last-updated-18-march -2016 "> Living Standard — Last Updated 18 March 2016</ h2 >
1111
1212< dl >
1313 < dt > Participate:
@@ -101,16 +101,13 @@ <h2 class="no-num no-toc" id="table-of-contents">Table of Contents</h2>
101101 < li > < a href ="#replacement-decoder "> < span class ="secno "> 15.1.1 </ span > replacement decoder</ a > </ ol > </ li >
102102 < li > < a href ="#common-infrastructure-for-utf-16be-and-utf-16le "> < span class ="secno "> 15.2 </ span > Common infrastructure for < span > UTF-16BE</ span > and < span > UTF-16LE</ span > </ a >
103103 < ol >
104- < li > < a href ="#shared-utf-16-decoder "> < span class ="secno "> 15.2.1 </ span > shared UTF-16 decoder</ a > </ li >
105- < li > < a href ="#shared-utf-16-encoder "> < span class ="secno "> 15.2.2 </ span > shared UTF-16 encoder</ a > </ ol > </ li >
104+ < li > < a href ="#shared-utf-16-decoder "> < span class ="secno "> 15.2.1 </ span > shared UTF-16 decoder</ a > </ ol > </ li >
106105 < li > < a href ="#utf-16be "> < span class ="secno "> 15.3 </ span > UTF-16BE</ a >
107106 < ol >
108- < li > < a href ="#utf-16be-decoder "> < span class ="secno "> 15.3.1 </ span > UTF-16BE decoder</ a > </ li >
109- < li > < a href ="#utf-16be-encoder "> < span class ="secno "> 15.3.2 </ span > UTF-16BE encoder</ a > </ ol > </ li >
107+ < li > < a href ="#utf-16be-decoder "> < span class ="secno "> 15.3.1 </ span > UTF-16BE decoder</ a > </ ol > </ li >
110108 < li > < a href ="#utf-16le "> < span class ="secno "> 15.4 </ span > UTF-16LE</ a >
111109 < ol >
112- < li > < a href ="#utf-16le-decoder "> < span class ="secno "> 15.4.1 </ span > UTF-16LE decoder</ a > </ li >
113- < li > < a href ="#utf-16le-encoder "> < span class ="secno "> 15.4.2 </ span > UTF-16LE encoder</ a > </ ol > </ li >
110+ < li > < a href ="#utf-16le-decoder "> < span class ="secno "> 15.4.1 </ span > UTF-16LE decoder</ a > </ ol > </ li >
114111 < li > < a href ="#x-user-defined "> < span class ="secno "> 15.5 </ span > x-user-defined</ a >
115112 < ol >
116113 < li > < a href ="#x-user-defined-decoder "> < span class ="secno "> 15.5.1 </ span > x-user-defined decoder</ a > </ li >
@@ -299,12 +296,15 @@ <h2 id="encodings"><span class="secno">5 </span>Encodings</h2>
299296< h3 id ="encoders-and-decoders "> < span class ="secno "> 5.1 </ span > Encoders and decoders</ h3 >
300297
301298< p > Each < a href ="#encoding "> encoding</ a > has an associated < dfn id ="decoder "> decoder</ dfn > and most of them have an
302- associated < dfn id ="encoder "> encoder</ dfn > ( < a href ="#replacement " > replacement </ a > does not). Each < a href ="#decoder " > decoder </ a > and
303- < a href =" #encoder " > encoder </ a > have a < dfn id ="handler "> handler</ dfn > algorithm. A < a href ="#handler "> handler</ a > algorithm takes an
304- input < a href ="#concept-stream " title ="concept-stream "> stream</ a > and a < a href ="#concept-token " title ="concept-token "> token</ a > , and
305- returns < dfn id ="finished "> finished</ dfn > , one or more < a href ="#concept-token " title ="concept-token "> tokens</ a > , < dfn id ="error "> error</ dfn >
299+ associated < dfn id ="encoder "> encoder</ dfn > . Each < a href ="#decoder " > decoder </ a > and < a href ="#encoder " > encoder </ a > have a
300+ < dfn id ="handler "> handler</ dfn > algorithm. A < a href ="#handler "> handler</ a > algorithm takes an input
301+ < a href ="#concept-stream " title ="concept-stream "> stream</ a > and a < a href ="#concept-token " title ="concept-token "> token</ a > , and returns
302+ < dfn id ="finished "> finished</ dfn > , one or more < a href ="#concept-token " title ="concept-token "> tokens</ a > , < dfn id ="error "> error</ dfn >
306303optionally with a < a href ="#code-point "> code point</ a > , or < dfn id ="continue "> continue</ dfn > .
307304
305+ < p class ="note no-backref "> The < a href ="#replacement "> replacement</ a > , < a href ="#utf-16be "> UTF-16BE</ a > , and
306+ < a href ="#utf-16le "> UTF-16LE</ a > < a href ="#encoding " title ="encoding "> encodings</ a > have no < a href ="#encoder "> encoder</ a > .
307+
308308< p > An < dfn id ="error-mode "> error mode</ dfn > as used below is "< code title =""> replacement</ code > " (default) or
309309"< code > fatal</ code > " for a < a href ="#decoder "> decoder</ a > and "< code > fatal</ code > " (default) or
310310"< code > html</ code > " for an < a href ="#encoder "> encoder</ a > .
@@ -1080,10 +1080,10 @@ <h2 id="api"><span class="secno">8 </span>API</h2>
10801080 < a href ="#utf-8 "> UTF-8</ a > encoded string data, the length of the second string (as
10811081 a < code title =""> Uint32Array</ code > ), the string data,
10821082 and so on.
1083- < pre > < code > function encodeArrayOfStrings(strings, encoding ) {
1083+ < pre > < code > function encodeArrayOfStrings(strings) {
10841084 var encoder, encoded, len, bytes, view, offset;
10851085
1086- encoder = new TextEncoder(encoding );
1086+ encoder = new TextEncoder();
10871087 encoded = [];
10881088
10891089 len = Uint32Array.BYTES_PER_ELEMENT;
@@ -1109,10 +1109,10 @@ <h2 id="api"><span class="secno">8 </span>API</h2>
11091109 return bytes.buffer;
11101110}</ code > </ pre >
11111111
1112- < p > The following example decodes an
1113- < code title ="" > ArrayBuffer </ code > containing data
1114- encoded in the format produced by the previous example back into an array
1115- of strings.
1112+ < p > The following example decodes an < code title ="" > ArrayBuffer </ code > containing data encoded in the
1113+ format produced by the previous example, or an equivalent algorithm for encodings other than
1114+ < a href =" #utf-8 " > UTF-8 </ a > , back into an array of strings.
1115+
11161116 < pre > < code > function decodeArrayOfStrings(buffer, encoding) {
11171117 var decoder, view, offset, num_strings, strings, len;
11181118
@@ -1328,60 +1328,46 @@ <h3 id="interface-textdecoder"><span class="secno">8.1 </span>Interface <code ti
13281328
13291329< h3 id ="interface-textencoder "> < span class ="secno "> 8.2 </ span > Interface < code title =""> TextEncoder</ code > </ h3 >
13301330
1331- < pre class ="idl "> [< a href ="#dom-textencoder " title ="dom-TextEncoder "> Constructor</ a > (optional DOMString < var > utfLabel</ var > = "utf-8"),
1331+ < pre class ="idl "> [< a href ="#dom-textencoder " title ="dom-TextEncoder "> Constructor</ a > <!-- We cannot add an argument here
1332+ that is not the label argument it had previously. That would break content. --> ,
13321333 Exposed=Window,Worker]
13331334interface < dfn id ="textencoder "> TextEncoder</ dfn > {
13341335 readonly attribute DOMString < a href ="#dom-textencoder-encoding " title ="dom-TextEncoder-encoding "> encoding</ a > ;
13351336 [NewObject] Uint8Array < a href ="#dom-textencoder-encode " title ="dom-TextEncoder-encode "> encode</ a > (optional USVString < var > input</ var > = "");
13361337};</ pre >
13371338
1338- < p > A < a href ="#textencoder "> < code > TextEncoder</ code > </ a > object has an associated < b > encoding </ b > and < b > encoder</ b > .
1339+ < p > A < a href ="#textencoder "> < code > TextEncoder</ code > </ a > object has an associated < b > encoder</ b > .
13391340
1340- < p class ="note no-backref "> A < a href ="#textencoder "> < code > TextEncoder</ code > </ a > object offers no < code > stream</ code >
1341- option as no < a href ="#encoder "> encoder</ a > requires buffering of scalar values.
1341+ < p class ="note no-backref "> A < a href ="#textencoder "> < code > TextEncoder</ code > </ a > object offers no < var > label</ var > argument as
1342+ it only supports < a href ="#utf-8 "> UTF-8</ a > . It also offers no < code > stream</ code > option as no
1343+ < a href ="#encoder "> encoder</ a > requires buffering of scalar values.
13421344
13431345< hr >
13441346
13451347< dl class ="domintro ">
1346- < dt > < code > < var > encoder</ var > = new < a href ="#dom-textencoder " title ="dom-TextEncoder "> TextEncoder</ a > ([< var > utfLabel</ var > = "utf-8"])</ code >
1347- < dd >
1348- < p > Returns a new < a href ="#textencoder "> < code > TextEncoder</ code > </ a > object.
1349- < p > If < var > utfLabel</ var > is not a < a href ="#label "> label</ a > for
1350- < a href ="#utf-8 "> UTF-8</ a > , < a href ="#utf-16be "> UTF-16BE</ a > , or < a href ="#utf-16le "> UTF-16LE</ a > ,
1351- < a class ="external " data-anolis-spec ="webidl " href ="https://heycam.github.io/webidl/#dfn-throw " title ="throw "> throws</ a > a
1352- < code > RangeError</ code > .
1348+ < dt > < code > < var > encoder</ var > = new < a href ="#dom-textencoder " title ="dom-TextEncoder "> TextEncoder</ a > ()</ code >
1349+ < dd > < p > Returns a new < a href ="#textencoder "> < code > TextEncoder</ code > </ a > object.
13531350
13541351 < dt > < code > < var > encoder</ var > . < a href ="#dom-textencoder-encoding " title ="dom-TextEncoder-encoding "> encoding</ a > </ code >
1355- < dd > < p > Returns < b > encoding </ b > 's < a href =" #name " > name </ a > , lowercased .
1352+ < dd > < p > Returns " < code title ="" > utf-8 </ code > " .
13561353
13571354 < dt > < code > < var > encoder</ var > . < a href ="#dom-textencoder-encode " title ="dom-TextEncoder-encode "> encode</ a > ([< var > input</ var > = ""])</ code >
1358- < dd > < p > Returns the result of running < b > encoding </ b > 's < a href ="#encoder "> encoder</ a > .
1355+ < dd > < p > Returns the result of running < a href =" #utf-8 " > UTF-8 </ a > 's < a href ="#encoder "> encoder</ a > .
13591356</ dl >
13601357
1361- < p > The
1362- < dfn id ="dom-textencoder " title ="dom-TextEncoder "> < code > TextEncoder(< var > utfLabel</ var > )</ code > </ dfn >
1363- constructor, when invoked, must run these steps:
1358+ < p > The < dfn id ="dom-textencoder " title ="dom-TextEncoder "> < code > TextEncoder()</ code > </ dfn > constructor, when invoked, must
1359+ run these steps:
13641360
13651361< ol >
1366- < li > < p > Let < var > encoding</ var > be the result of
1367- < a href ="#concept-encoding-get " title ="concept-encoding-get "> getting an encoding</ a > from
1368- < var > utfLabel</ var > .
1369-
1370- < li > < p > If < var > encoding</ var > is failure, or is not < a href ="#utf-8 "> UTF-8</ a > , < a href ="#utf-16be "> UTF-16BE</ a > , or
1371- < a href ="#utf-16le "> UTF-16LE</ a > , < a class ="external " data-anolis-spec ="webidl " href ="https://heycam.github.io/webidl/#dfn-throw "> throw</ a > a < code > RangeError</ code > .
1372-
13731362 < li > < p > Let < var > enc</ var > be a new < a href ="#textencoder "> < code > TextEncoder</ code > </ a > object.
13741363
1375- < li > < p > Set < var > enc</ var > 's < b > encoding</ b > to < var > encoding</ var > .
1376-
1377- < li > < p > Set < var > enc</ var > 's < b > encoder</ b > to a new
1378- < var > enc</ var > 's < b > encoding</ b > 's < a href ="#encoder "> encoder</ a > .
1364+ < li > < p > Set < var > enc</ var > 's < b > encoder</ b > to < a href ="#utf-8 "> UTF-8</ a > 's < a href ="#encoder "> encoder</ a > .
13791365
13801366 < li > < p > Return < var > enc</ var > .
13811367</ ol >
13821368
13831369< p > The < dfn id ="dom-textencoder-encoding " title ="dom-TextEncoder-encoding "> < code > encoding</ code > </ dfn > attribute's getter must
1384- return < b > encoding </ b > 's < a href =" #name " > name </ a > in < a href =" #ascii-lowercase " > ASCII lowercase </ a > .
1370+ return " < code title ="" > utf-8 </ code > " .
13851371
13861372< p > The
13871373< dfn id ="dom-textencoder-encode " title ="dom-TextEncoder-encode "> < code > encode(< var > input</ var > )</ code > </ dfn >
@@ -2607,26 +2593,6 @@ <h4 id="replacement-decoder"><span class="secno">15.1.1 </span><dfn>replacement
26072593
26082594< h3 id ="common-infrastructure-for-utf-16be-and-utf-16le "> < span class ="secno "> 15.2 </ span > Common infrastructure for < a href ="#utf-16be "> UTF-16BE</ a > and < a href ="#utf-16le "> UTF-16LE</ a > </ h3 >
26092595
2610- < p > To < dfn id ="convert-a-code-unit-to-bytes "> convert a < var > code unit</ var > to bytes</ dfn > using a
2611- < var > utf-16be flag</ var > , run these steps:
2612-
2613- < ol >
2614- < li > < p > Let < var > byte1</ var > be < var > code unit</ var > >> 8.
2615-
2616- < li > < p > Let < var > byte2</ var > be
2617- < var > code unit</ var > & 0x00FF.
2618-
2619- < li >
2620- < p > Then return the bytes in order:
2621-
2622- < dl class ="switch ">
2623- < dt > < var > utf-16be flag</ var > is set
2624- < dd > < p > < var > byte1</ var > , then < var > byte2</ var > .
2625- < dt > < var > utf-16be flag</ var > is unset
2626- < dd > < p > < var > byte2</ var > , then < var > byte1</ var > .
2627- </ dl >
2628- </ ol >
2629-
26302596< h4 id ="shared-utf-16-decoder "> < span class ="secno "> 15.2.1 </ span > < dfn > shared UTF-16 decoder</ dfn > </ h4 >
26312597
26322598< p class ="note no-backref "> A byte order mark has priority over a < a href ="#label "> label</ a > as it
@@ -2675,10 +2641,27 @@ <h4 id="shared-utf-16-decoder"><span class="secno">15.2.1 </span><dfn>shared UTF
26752641 return a code point whose value is
26762642 0x10000 + ((< var > lead surrogate</ var > − 0xD800) << 10) + (< var > code unit</ var > − 0xDC00).
26772643
2678- < li > < p > < a href ="#concept-stream-prepend " title ="concept-stream-prepend "> Prepend</ a > the sequence resulting of
2679- < a href ="#convert-a-code-unit-to-bytes " title ="convert a code unit to bytes "> converting < var > code unit</ var > to bytes</ a >
2680- using < a href ="#utf-16be-decoder-flag "> UTF-16BE decoder flag</ a > to < var > stream</ var > and return
2681- < a href ="#error "> error</ a > .
2644+ < li >
2645+ < p > Let < var > bytes</ var > be the return value of running these subsubsteps:
2646+
2647+ < ol >
2648+ < li > < p > Let < var > byte1</ var > be < var > code unit</ var > >> 8.
2649+
2650+ < li > < p > Let < var > byte2</ var > be < var > code unit</ var > & 0x00FF.
2651+
2652+ < li >
2653+ < p > Then return the bytes in order, switching on < a href ="#utf-16be-decoder-flag "> UTF-16BE decoder flag</ a > :
2654+
2655+ < dl class ="switch ">
2656+ < dt > Set
2657+ < dd > < p > < var > byte1</ var > , then < var > byte2</ var > .
2658+ < dt > Unset
2659+ < dd > < p > < var > byte2</ var > , then < var > byte1</ var > .
2660+ </ dl >
2661+ </ ol >
2662+
2663+ < li > < p > < a href ="#concept-stream-prepend " title ="concept-stream-prepend "> Prepend</ a > the < var > bytes</ var > to
2664+ < var > stream</ var > and return < a href ="#error "> error</ a > .
26822665 <!-- unpaired surrogates; IE/WebKit output them, Gecko/Opera FFFD them -->
26832666 </ ol >
26842667
@@ -2694,37 +2677,6 @@ <h4 id="shared-utf-16-decoder"><span class="secno">15.2.1 </span><dfn>shared UTF
26942677</ ol >
26952678
26962679
2697- < h4 id ="shared-utf-16-encoder "> < span class ="secno "> 15.2.2 </ span > < dfn > shared UTF-16 encoder</ dfn > </ h4 >
2698-
2699- < p > < a href ="#shared-utf-16-encoder "> shared UTF-16 encoder</ a > has an associated < dfn id ="utf-16be-encoder-flag "> UTF-16BE encoder flag</ dfn >
2700- (initially unset).
2701-
2702- < p > < a href ="#shared-utf-16-encoder "> shared UTF-16 encoder</ a > 's < a href ="#handler "> handler</ a > , given a < var > stream</ var >
2703- and < var > code point</ var > , runs these steps:
2704-
2705- < ol >
2706- < li > < p > If < var > code point</ var > is < a href ="#end-of-stream "> end-of-stream</ a > , return
2707- < a href ="#finished "> finished</ a > .
2708-
2709- < li > < p > If < var > code point</ var > is in the range U+0000 to U+FFFF, inclusive, return
2710- the sequence resulting of
2711- < a href ="#convert-a-code-unit-to-bytes " title ="convert a code unit to bytes "> converting < var > code point</ var > to bytes</ a >
2712- using < a href ="#utf-16be-encoder-flag "> UTF-16BE encoder flag</ a > .
2713-
2714- < li > < p > Let < var > lead</ var > be
2715- ((< var > code point</ var > − 0x10000) >> 10) + 0xD800,
2716- < a href ="#convert-a-code-unit-to-bytes " title ="convert a code unit to bytes "> converted to bytes</ a > using
2717- < a href ="#utf-16be-encoder-flag "> UTF-16BE encoder flag</ a > .
2718-
2719- < li > < p > Let < var > trail</ var > be
2720- ((< var > code point</ var > − 0x10000) & 0x3FF) + 0xDC00,
2721- < a href ="#convert-a-code-unit-to-bytes " title ="convert a code unit to bytes "> converted to bytes</ a > using
2722- < a href ="#utf-16be-encoder-flag "> UTF-16BE encoder flag</ a > .
2723-
2724- < li > < p > Return a byte sequence of < var > lead</ var > followed by < var > trail</ var > .
2725- </ ol >
2726-
2727-
27282680< h3 id ="utf-16be "> < span class ="secno "> 15.3 </ span > < dfn > UTF-16BE</ dfn > </ h3 >
27292681
27302682< h4 id ="utf-16be-decoder "> < span class ="secno "> 15.3.1 </ span > < dfn > UTF-16BE decoder</ dfn > </ h4 >
@@ -2733,15 +2685,6 @@ <h4 id="utf-16be-decoder"><span class="secno">15.3.1 </span><dfn>UTF-16BE decode
27332685its < a href ="#utf-16be-decoder-flag "> UTF-16BE decoder flag</ a > set.
27342686
27352687
2736- < h4 id ="utf-16be-encoder "> < span class ="secno "> 15.3.2 </ span > < dfn > UTF-16BE encoder</ dfn > </ h4 >
2737-
2738- < p > < a href ="#utf-16be "> UTF-16BE</ a > 's < a href ="#encoder "> encoder</ a > is < a href ="#shared-utf-16-encoder "> shared UTF-16 encoder</ a > with
2739- its < a href ="#utf-16be-encoder-flag "> UTF-16BE encoder flag</ a > set.
2740-
2741- < p class ="note "> This algorithm has identical results to the one described in the Unicode standard. It
2742- is included here for completeness. < a href ="#refsUNICODE "> [UNICODE]</ a >
2743-
2744-
27452688< h3 id ="utf-16le "> < span class ="secno "> 15.4 </ span > < dfn > UTF-16LE</ dfn > </ h3 >
27462689
27472690< p class ="note no-backref "> Both "< code title =""> utf-16</ code > " and
@@ -2754,14 +2697,6 @@ <h4 id="utf-16le-decoder"><span class="secno">15.4.1 </span><dfn>UTF-16LE decode
27542697< p > < a href ="#utf-16le "> UTF-16LE</ a > 's < a href ="#decoder "> decoder</ a > is < a href ="#shared-utf-16-decoder "> shared UTF-16 decoder</ a > .
27552698
27562699
2757- < h4 id ="utf-16le-encoder "> < span class ="secno "> 15.4.2 </ span > < dfn > UTF-16LE encoder</ dfn > </ h4 >
2758-
2759- < p > < a href ="#utf-16le "> UTF-16LE</ a > 's < a href ="#encoder "> encoder</ a > is < a href ="#shared-utf-16-encoder "> shared UTF-16 encoder</ a > .
2760-
2761- < p class ="note "> This algorithm has identical results to the one described in the Unicode standard. It
2762- is included here for completeness. < a href ="#refsUNICODE "> [UNICODE]</ a >
2763-
2764-
27652700< h3 id ="x-user-defined "> < span class ="secno "> 15.5 </ span > < dfn > x-user-defined</ dfn > </ h3 >
27662701
27672702< p class ="note "> While technically this is a < a href ="#single-byte-encoding "> single-byte encoding</ a > ,
0 commit comments