CINXE.COM
Encoding Standard Review Draft June 2023
<!doctype html><html lang="en"> <head> <meta charset="utf-8"> <meta content="width=device-width, initial-scale=1, shrink-to-fit=no" name="viewport"> <meta content="#3c790a" name="theme-color"> <title>Encoding Standard Review Draft June 2023</title> <link crossorigin href="https://resources.whatwg.org/spec.css" rel="stylesheet"> <link crossorigin href="https://resources.whatwg.org/review-draft.css" rel="stylesheet"> <link crossorigin href="https://resources.whatwg.org/logo-encoding-snapshot.svg" rel="icon"> <meta content="Bikeshed version 5fcd28d6d, updated Tue May 30 13:12:11 2023 -0700" name="generator"> <link crossorigin href="visualization-colors.css" rel="stylesheet"> <style>/* Boilerplate: style-issues */ a[href].issue-return { float: right; float: inline-end; color: var(--issueheading-text); font-weight: bold; text-decoration: none; } </style> <body class="h-entry status-RD"> <div class="head"> <a class="logo" href="https://whatwg.org/"> <img alt="WHATWG" crossorigin height="100" src="https://resources.whatwg.org/logo-encoding-snapshot.svg"> </a> <hgroup> <h1 class="p-name no-ref" id="title">Encoding</h1> <p id="profile-and-date">Review Draft — Published <time class="dt-updated" datetime="2023-06-19">19 June 2023</time> </p> </hgroup> <details class="annoying-warning" open> <summary>This is a Review Draft of the standard</summary> <p> This is a Review Draft. It is published primarily for purposes of patent review by Workstream Participants; it mirrors the <a href="https://encoding.spec.whatwg.org/">Living Standard</a> closely, redacting only text that is identified as "Objection Pending" or "Confirmation Pending". Developers should refer to the <a href="https://encoding.spec.whatwg.org/">Living Standard</a> for the most current error corrections and other developments. </p> <p> For information regarding patent commitments, please see the <a href="https://whatwg.org/ipr-policy">IPR Policy</a> and <a href="https://github.com/whatwg/encoding/labels/exclusion%20notices">exclusion notices</a>. </p> <p> Do not attempt to implement this version of the standard. Do not reference this version as authoritative in any way. Instead, see <a href="https://encoding.spec.whatwg.org/">https://encoding.spec.whatwg.org/</a> for the Living Standard. </p> </details> </div> <div class="p-summary"> <h2 class="no-num no-toc no-ref heading settled" id="abstract"><span class="content">Abstract</span></h2> <p>The Encoding Standard defines encodings and their JavaScript API.</p> </div> <nav id="toc"> <h2 class="no-num no-toc no-ref" id="contents">Table of Contents</h2> <ol class="toc" role="directory"> <li><a href="#preface"><span class="secno">1</span> <span class="content">Preface</span></a> <li><a href="#security-background"><span class="secno">2</span> <span class="content">Security background</span></a> <li><a href="#terminology"><span class="secno">3</span> <span class="content">Terminology</span></a> <li> <a href="#encodings"><span class="secno">4</span> <span class="content">Encodings</span></a> <ol class="toc"> <li><a href="#encoders-and-decoders"><span class="secno">4.1</span> <span class="content">Encoders and decoders</span></a> <li><a href="#names-and-labels"><span class="secno">4.2</span> <span class="content">Names and labels</span></a> <li><a href="#output-encodings"><span class="secno">4.3</span> <span class="content">Output encodings</span></a> </ol> <li><a href="#indexes"><span class="secno">5</span> <span class="content">Indexes</span></a> <li> <a href="#specification-hooks"><span class="secno">6</span> <span class="content">Hooks for standards</span></a> <ol class="toc"> <li><a href="#legacy-hooks"><span class="secno">6.1</span> <span class="content">Legacy hooks for standards</span></a> </ol> <li> <a href="#api"><span class="secno">7</span> <span class="content">API</span></a> <ol class="toc"> <li><a href="#interface-mixin-textdecodercommon"><span class="secno">7.1</span> <span class="content">Interface mixin <code class="idl"><span>TextDecoderCommon</span></code></span></a> <li><a href="#interface-textdecoder"><span class="secno">7.2</span> <span class="content">Interface <code class="idl"><span>TextDecoder</span></code></span></a> <li><a href="#interface-mixin-textencodercommon"><span class="secno">7.3</span> <span class="content">Interface mixin <code class="idl"><span>TextEncoderCommon</span></code></span></a> <li><a href="#interface-textencoder"><span class="secno">7.4</span> <span class="content">Interface <code class="idl"><span>TextEncoder</span></code></span></a> <li><a href="#interface-textdecoderstream"><span class="secno">7.5</span> <span class="content">Interface <code class="idl"><span>TextDecoderStream</span></code></span></a> <li><a href="#interface-textencoderstream"><span class="secno">7.6</span> <span class="content">Interface <code class="idl"><span>TextEncoderStream</span></code></span></a> </ol> <li> <a href="#the-encoding"><span class="secno">8</span> <span class="content">The encoding</span></a> <ol class="toc"> <li> <a href="#utf-8"><span class="secno">8.1</span> <span class="content">UTF-8</span></a> <ol class="toc"> <li><a href="#utf-8-decoder"><span class="secno">8.1.1</span> <span class="content">UTF-8 decoder</span></a> <li><a href="#utf-8-encoder"><span class="secno">8.1.2</span> <span class="content">UTF-8 encoder</span></a> </ol> </ol> <li> <a href="#legacy-single-byte-encodings"><span class="secno">9</span> <span class="content">Legacy single-byte encodings</span></a> <ol class="toc"> <li><a href="#single-byte-decoder"><span class="secno">9.1</span> <span class="content">single-byte decoder</span></a> <li><a href="#single-byte-encoder"><span class="secno">9.2</span> <span class="content">single-byte encoder</span></a> </ol> <li> <a href="#legacy-multi-byte-chinese-(simplified)-encodings"><span class="secno">10</span> <span class="content">Legacy multi-byte Chinese (simplified) encodings</span></a> <ol class="toc"> <li> <a href="#gbk"><span class="secno">10.1</span> <span class="content">GBK</span></a> <ol class="toc"> <li><a href="#gbk-decoder"><span class="secno">10.1.1</span> <span class="content">GBK decoder</span></a> <li><a href="#gbk-encoder"><span class="secno">10.1.2</span> <span class="content">GBK encoder</span></a> </ol> <li> <a href="#gb18030"><span class="secno">10.2</span> <span class="content">gb18030</span></a> <ol class="toc"> <li><a href="#gb18030-decoder"><span class="secno">10.2.1</span> <span class="content">gb18030 decoder</span></a> <li><a href="#gb18030-encoder"><span class="secno">10.2.2</span> <span class="content">gb18030 encoder</span></a> </ol> </ol> <li> <a href="#legacy-multi-byte-chinese-(traditional)-encodings"><span class="secno">11</span> <span class="content">Legacy multi-byte Chinese (traditional) encodings</span></a> <ol class="toc"> <li> <a href="#big5"><span class="secno">11.1</span> <span class="content">Big5</span></a> <ol class="toc"> <li><a href="#big5-decoder"><span class="secno">11.1.1</span> <span class="content">Big5 decoder</span></a> <li><a href="#big5-encoder"><span class="secno">11.1.2</span> <span class="content">Big5 encoder</span></a> </ol> </ol> <li> <a href="#legacy-multi-byte-japanese-encodings"><span class="secno">12</span> <span class="content">Legacy multi-byte Japanese encodings</span></a> <ol class="toc"> <li> <a href="#euc-jp"><span class="secno">12.1</span> <span class="content">EUC-JP</span></a> <ol class="toc"> <li><a href="#euc-jp-decoder"><span class="secno">12.1.1</span> <span class="content">EUC-JP decoder</span></a> <li><a href="#euc-jp-encoder"><span class="secno">12.1.2</span> <span class="content">EUC-JP encoder</span></a> </ol> <li> <a href="#iso-2022-jp"><span class="secno">12.2</span> <span class="content">ISO-2022-JP</span></a> <ol class="toc"> <li><a href="#iso-2022-jp-decoder"><span class="secno">12.2.1</span> <span class="content">ISO-2022-JP decoder</span></a> <li><a href="#iso-2022-jp-encoder"><span class="secno">12.2.2</span> <span class="content">ISO-2022-JP encoder</span></a> </ol> <li> <a href="#shift_jis"><span class="secno">12.3</span> <span class="content">Shift_JIS</span></a> <ol class="toc"> <li><a href="#shift_jis-decoder"><span class="secno">12.3.1</span> <span class="content">Shift_JIS decoder</span></a> <li><a href="#shift_jis-encoder"><span class="secno">12.3.2</span> <span class="content">Shift_JIS encoder</span></a> </ol> </ol> <li> <a href="#legacy-multi-byte-korean-encodings"><span class="secno">13</span> <span class="content">Legacy multi-byte Korean encodings</span></a> <ol class="toc"> <li> <a href="#euc-kr"><span class="secno">13.1</span> <span class="content">EUC-KR</span></a> <ol class="toc"> <li><a href="#euc-kr-decoder"><span class="secno">13.1.1</span> <span class="content">EUC-KR decoder</span></a> <li><a href="#euc-kr-encoder"><span class="secno">13.1.2</span> <span class="content">EUC-KR encoder</span></a> </ol> </ol> <li> <a href="#legacy-miscellaneous-encodings"><span class="secno">14</span> <span class="content">Legacy miscellaneous encodings</span></a> <ol class="toc"> <li> <a href="#replacement"><span class="secno">14.1</span> <span class="content">replacement</span></a> <ol class="toc"> <li><a href="#replacement-decoder"><span class="secno">14.1.1</span> <span class="content">replacement decoder</span></a> </ol> <li> <a href="#common-infrastructure-for-utf-16be-and-utf-16le"><span class="secno">14.2</span> <span class="content">Common infrastructure for <span>UTF-16BE/LE</span></span></a> <ol class="toc"> <li><a href="#shared-utf-16-decoder"><span class="secno">14.2.1</span> <span class="content">shared UTF-16 decoder</span></a> </ol> <li> <a href="#utf-16be"><span class="secno">14.3</span> <span class="content">UTF-16BE</span></a> <ol class="toc"> <li><a href="#utf-16be-decoder"><span class="secno">14.3.1</span> <span class="content">UTF-16BE decoder</span></a> </ol> <li> <a href="#utf-16le"><span class="secno">14.4</span> <span class="content">UTF-16LE</span></a> <ol class="toc"> <li><a href="#utf-16le-decoder"><span class="secno">14.4.1</span> <span class="content">UTF-16LE decoder</span></a> </ol> <li> <a href="#x-user-defined"><span class="secno">14.5</span> <span class="content">x-user-defined</span></a> <ol class="toc"> <li><a href="#x-user-defined-decoder"><span class="secno">14.5.1</span> <span class="content">x-user-defined decoder</span></a> <li><a href="#x-user-defined-encoder"><span class="secno">14.5.2</span> <span class="content">x-user-defined encoder</span></a> </ol> </ol> <li><a href="#browser-ui"><span class="secno">15</span> <span class="content">Browser UI</span></a> <li><a href="#implementation-considerations"><span class="secno"></span> <span class="content">Implementation considerations</span></a> <li><a href="#acknowledgments"><span class="secno"></span> <span class="content">Acknowledgments</span></a> <li><a href="#ipr"><span class="secno"></span> <span class="content">Intellectual property rights</span></a> <li> <a href="#section-index"><span class="secno"></span> <span class="content">Index</span></a> <ol class="toc"> <li><a href="#index-defined-here"><span class="secno"></span> <span class="content">Terms defined by this specification</span></a> <li><a href="#index-defined-elsewhere"><span class="secno"></span> <span class="content">Terms defined by reference</span></a> </ol> <li> <a href="#references"><span class="secno"></span> <span class="content">References</span></a> <ol class="toc"> <li><a href="#normative"><span class="secno"></span> <span class="content">Normative References</span></a> <li><a href="#informative"><span class="secno"></span> <span class="content">Informative References</span></a> </ol> <li><a href="#idl-index"><span class="secno"></span> <span class="content">IDL Index</span></a> </ol> </nav> <main> <h2 class="heading settled" id="preface"><span class="secno">1. </span><span class="content">Preface</span></h2> <p>The UTF-8 encoding is the most appropriate encoding for interchange of Unicode, the universal coded character set. Therefore for new protocols and formats, as well as existing formats deployed in new contexts, this specification requires (and defines) the UTF-8 encoding. </p> <p>The other (legacy) encodings have been defined to some extent in the past. However, user agents have not always implemented them in the same way, have not always used the same labels, and often differ in dealing with undefined and former proprietary areas of encodings. This specification addresses those gaps so that new user agents do not have to reverse engineer encoding implementations and existing user agents can converge. </p> <p>In particular, this specification defines all those encodings, their algorithms to go from bytes to scalar values and back, and their canonical names and identifying labels. This specification also defines an API to expose part of the encoding algorithms to JavaScript. </p> <p>User agents have also significantly deviated from the labels listed in the <a href="https://www.iana.org/assignments/character-sets/character-sets.xhtml">IANA Character Sets registry</a>. To stop spreading legacy encodings further, this specification is exhaustive about the aforementioned details and therefore has no need for the registry. In particular, this specification does not provide a mechanism for extending any aspect of encodings. </p> <h2 class="heading settled" id="security-background"><span class="secno">2. </span><span class="content">Security background</span></h2> <p>There is a set of encoding security issues when the producer and consumer do not agree on the encoding in use, or on the way a given encoding is to be implemented. For instance, an attack was reported in 2011 where a <a href="#shift_jis">Shift_JIS</a> lead byte 0x82 was used to “mask” a 0x22 trail byte in a JSON resource of which an attacker could control some field. The producer did not see the problem even though this is an illegal byte combination. The consumer decoded it as a single U+FFFD and therefore changed the overall interpretation as U+0022 is an important delimiter. Decoders of encodings that use multiple bytes for scalar values now require that in case of an illegal byte combination, a scalar value in the range U+0000 to U+007F, inclusive, cannot be “masked”. For the aforementioned sequence the output would be U+FFFD U+0022. (As an unfortunate exception to this, the <a href="#gb18030-decoder">gb18030 decoder</a> will “mask” up to one such byte at <a href="#end-of-stream">end-of-queue</a>.) </p> <p>This is a larger issue for encodings that map anything that is an <a href="https://infra.spec.whatwg.org/#ascii-byte" id="ad0a76920">ASCII byte</a> to something that is not an <a href="https://infra.spec.whatwg.org/#ascii-code-point" id="48a42c940">ASCII code point</a>, when there is no lead byte present. These are “ASCII-incompatible” encodings and other than <a href="#iso-2022-jp">ISO-2022-JP</a> and <a href="#utf-16be-le">UTF-16BE/LE</a>, which are unfortunately required due to deployed content, they are not supported. (Investigation is <a href="https://github.com/whatwg/encoding/issues/8">ongoing</a> whether more labels of other such encodings can be mapped to the <a href="#replacement">replacement</a> encoding, rather than the unknown encoding fallback.) An example attack is injecting carefully crafted content into a resource and then encouraging the user to override the encoding, resulting in, e.g., script execution. </p> <p>Encoders used by URLs found in HTML and HTML’s form feature can also result in slight information loss when an encoding is used that cannot represent all scalar values. E.g., when a resource uses the <a href="#windows-1252">windows-1252</a> encoding a server will not be able to distinguish between an end user entering “💩” and “&#128169;” into a form. </p> <p>The problems outlined here go away when exclusively using UTF-8, which is one of the many reasons that is now the mandatory encoding for all things. </p> <p class="note" role="note">See also the <a href="#browser-ui">Browser UI</a> chapter. </p> <h2 class="heading settled" id="terminology"><span class="secno">3. </span><span class="content">Terminology</span></h2> <p>This specification depends on the Infra Standard. <a href="#biblio-infra" title="Infra Standard">[INFRA]</a> </p> <p>Hexadecimal numbers are prefixed with "0x". </p> <p>In equations, all numbers are integers, addition is represented by "+", subtraction by "−", multiplication by "×", integer division by "/" (returns the quotient), modulo by "%" (returns the remainder of an integer division), logical left shifts by "<<", logical right shifts by ">>", bitwise AND by "&", and bitwise OR by "|". </p> <p>For logical right shifts operands must have at least twenty-one bits precision. </p> <hr> <p>An <dfn id="concept-stream">I/O queue</dfn> is a type of <a href="https://infra.spec.whatwg.org/#list" id="afe3eb660">list</a> with <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e0">items</a> of a particular type (i.e., <a href="https://infra.spec.whatwg.org/#byte" id="b69a47660">bytes</a> or <a href="https://infra.spec.whatwg.org/#scalar-value" id="61ea31220">scalar values</a>). <dfn id="end-of-stream">End-of-queue</dfn> is a special <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e1">item</a> that can be present in <a href="#concept-stream">I/O queues</a> of any type and it signifies that there are no more <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e2">items</a> in the queue. </p> <div class="note" role="note"> <p>There are two ways to use an <a href="#concept-stream">I/O queue</a>: in immediate mode, to represent I/O data stored in memory, and in streaming mode, to represent data coming in from the network. Immediate queues have <a href="#end-of-stream">end-of-queue</a> as their last item, whereas streaming queues need not have it, and so their <a href="#concept-stream-read">read</a> operation might block. </p> <p>It is expected that streaming <a href="#concept-stream">I/O queues</a> will be created empty, and that new <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e3">items</a> will be <a href="#concept-stream-push">pushed</a> to it as data comes in from the network. When the underlying network stream closes, an <a href="#end-of-stream">end-of-queue</a> item is to be <a href="#concept-stream-push">pushed</a> into the queue. </p> <p>Since reading from a streaming <a href="#concept-stream">I/O queue</a> might block, streaming <a href="#concept-stream">I/O queues</a> are not to be used from an <a href="https://html.spec.whatwg.org/multipage/webappapis.html#event-loop" id="c60e545a0">event loop</a>. They are to be used <a href="https://html.spec.whatwg.org/multipage/infrastructure.html#in-parallel" id="a459951e0">in parallel</a> instead. </p> </div> <p>To <dfn id="concept-stream-read">read</dfn> an <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e4">item</a> from an <a href="#concept-stream">I/O queue</a> <var>ioQueue</var>, run these steps: </p> <ol> <li> <p>If <var>ioQueue</var> is <a href="https://infra.spec.whatwg.org/#list-empty" id="0a62c3180">empty</a>, then wait until its <a href="https://infra.spec.whatwg.org/#list-size" id="222107ce0">size</a> is at least 1. </p> <li> <p>If <var>ioQueue</var>[0] is <a href="#end-of-stream">end-of-queue</a>, then return <a href="#end-of-stream">end-of-queue</a>. </p> <li> <p><a href="https://infra.spec.whatwg.org/#list-remove" id="6e78fe0c0">Remove</a> <var>ioQueue</var>[0] and return it. </p> </ol> <p>To <a href="#concept-stream-read">read</a> a number <var>number</var> of <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e5">items</a> from <var>ioQueue</var>, run these steps: </p> <ol> <li> <p>Let <var>readItems</var> be an empty list. </p> <li> <p>Perform the following step <var>number</var> times: </p> <ol> <li> <p><a href="https://infra.spec.whatwg.org/#list-append" id="6afa26dd0">Append</a> to <var>readItems</var> the result of <a href="#concept-stream-read">reading</a> an item from <var>ioQueue</var>. </p> </ol> <li> <p><a href="https://infra.spec.whatwg.org/#list-remove" id="6e78fe0c1">Remove</a> <a href="#end-of-stream">end-of-queue</a> from <var>readItems</var>. </p> <li> <p>Return <var>readItems</var>. </p> </ol> <p>To <dfn id="i-o-queue-peek">peek</dfn> a number <var>number</var> of <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e6">items</a> from an <a href="#concept-stream">I/O queue</a> <var>ioQueue</var>, run these steps: </p> <ol> <li> <p>Wait until either <var>ioQueue</var>’s <a href="https://infra.spec.whatwg.org/#list-size" id="222107ce1">size</a> is equal to or greater than <var>number</var>, or <var>ioQueue</var> <a href="https://infra.spec.whatwg.org/#list-contain" id="6e0f99b10">contains</a> <a href="#end-of-stream">end-of-queue</a>, whichever comes first. </p> <li> <p>Let <var>prefix</var> be an empty list. </p> <li> <p><a href="https://infra.spec.whatwg.org/#list-iterate" id="3f0f895a0">For each</a> <var>n</var> in <a href="https://infra.spec.whatwg.org/#the-range" id="f8e0c3a50">the range</a> 1 to <var>number</var>, inclusive: </p> <ol> <li> <p>If <var>ioQueue</var>[<var>n</var>] is <a href="#end-of-stream">end-of-queue</a>, <a href="https://infra.spec.whatwg.org/#iteration-break" id="7a438e490">break</a>. </p> <li> <p>Otherwise, <a href="https://infra.spec.whatwg.org/#list-append" id="6afa26dd1">append</a> <var>ioQueue</var>[<var>n</var>] to <var>prefix</var>. </p> </ol> <li> <p>Return <var>prefix</var>. </p> </ol> <p>To <dfn id="concept-stream-push">push</dfn> an <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e7">item</a> <var>item</var> to an <a href="#concept-stream">I/O queue</a> <var>ioQueue</var>, run these steps: </p> <ol> <li> <p>If the last <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e8">item</a> in <var>ioQueue</var> is <a href="#end-of-stream">end-of-queue</a>, then: </p> <ol> <li> <p>If <var>item</var> is <a href="#end-of-stream">end-of-queue</a>, do nothing. </p> <li> <p>Otherwise, <a href="https://infra.spec.whatwg.org/#list-insert" id="4f0ded9e0">insert</a> <var>item</var> before the last <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e9">item</a> in <var>ioQueue</var>. </p> </ol> <li> <p>Otherwise, <a href="https://infra.spec.whatwg.org/#list-append" id="6afa26dd2">append</a> <var>item</var> to <var>ioQueue</var>. </p> </ol> <p>To <a href="#concept-stream-push">push</a> a sequence of items to an <a href="#concept-stream">I/O queue</a> <var>ioQueue</var> is to push each item in the sequence to <var>ioQueue</var>, in the given order. </p> <p>To <dfn id="concept-stream-prepend">prepend</dfn> an <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e10">item</a> other than <a href="#end-of-stream">end-of-queue</a> to an <a href="#concept-stream">I/O queue</a>, perform the normal <a href="https://infra.spec.whatwg.org/#list" id="afe3eb661">list</a> <a href="https://infra.spec.whatwg.org/#list-prepend" id="967137770">prepend</a> operation. To prepend a sequence of items not containing <a href="#end-of-stream">end-of-queue</a>, insert those items, in the given order, before the first item in the queue. </p> <p class="example" id="example-tokens">Inserting the sequence of scalar value items <code>&#128169;</code> in an I/O queue of scalar values "<code> hello world</code>", results in an I/O queue "<code>&#128169; hello world</code>". The next item to be read would be <code>&</code>. </p> <p>To <dfn id="from-i-o-queue-convert">convert</dfn> an <a href="#concept-stream">I/O queue</a> <var>ioQueue</var> into a <a href="https://infra.spec.whatwg.org/#list" id="afe3eb662">list</a>, <a href="https://infra.spec.whatwg.org/#string" id="14014d7f0">string</a>, or <a href="https://infra.spec.whatwg.org/#byte-sequence" id="c2327ca40">byte sequence</a>, return the result of <a href="#concept-stream-read">reading</a> an indefinite number of <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e11">items</a> from <var>ioQueue</var>. </p> <p>To <dfn id="to-i-o-queue-convert">convert</dfn> a <a href="https://infra.spec.whatwg.org/#list" id="afe3eb663">list</a>, <a href="https://infra.spec.whatwg.org/#string" id="14014d7f1">string</a>, or <a href="https://infra.spec.whatwg.org/#byte-sequence" id="c2327ca41">byte sequence</a> <var>input</var> into an <a href="#concept-stream">I/O queue</a>, run these steps: </p> <ol> <li> <p>Assert: if <var>input</var> is a <a href="https://infra.spec.whatwg.org/#list" id="afe3eb664">list</a>, then it does not <a href="https://infra.spec.whatwg.org/#list-contain" id="6e0f99b11">contain</a> <a href="#end-of-stream">end-of-queue</a>. </p> <li> <p>Return an <a href="#concept-stream">I/O queue</a> containing the <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e12">items</a> in <var>input</var>, in order, followed by <a href="#end-of-stream">end-of-queue</a>. </p> </ol> <p class="XXX">The Infra standard is expected to define some infrastructure around type conversions. See <a href="https://github.com/whatwg/infra/issues/319">whatwg/infra issue #319</a>. <a href="#biblio-infra" title="Infra Standard">[INFRA]</a> </p> <p class="note" role="note"><a href="#concept-stream">I/O queues</a> are defined as <a href="https://infra.spec.whatwg.org/#list" id="afe3eb665">lists</a>, not <a href="https://infra.spec.whatwg.org/#queue" id="9889b21e0">queues</a>, because they feature a <a href="#concept-stream-prepend">prepend</a> operation. However, this prepend operation is an internal detail of the algorithms in this specification, and is not to be used by other standards. Implementations are free to find alternative ways to implement such algorithms, as detailed in <a href="#implementation-considerations">Implementation considerations</a>. </p> <h2 class="heading settled" id="encodings"><span class="secno">4. </span><span class="content">Encodings</span></h2> <p>An <dfn id="encoding">encoding</dfn> defines a mapping from a <a href="https://infra.spec.whatwg.org/#scalar-value" id="61ea31221">scalar value</a> sequence to a <a href="https://infra.spec.whatwg.org/#byte" id="b69a47661">byte</a> sequence (and vice versa). Each <a href="#encoding">encoding</a> has a <dfn id="name">name</dfn>, and one or more <dfn id="label">labels</dfn>. </p> <p class="note no-backref" role="note">This specification defines three <a href="#encoding">encodings</a> with the same names as <i>encoding schemes</i> defined in the Unicode standard: <a href="#utf-8">UTF-8</a>, <a href="#utf-16le">UTF-16LE</a>, and <a href="#utf-16be">UTF-16BE</a>. The <a href="#encoding">encodings</a> differ from the <i>encoding schemes</i> by byte order mark (also known as BOM) handling not being part of the <a href="#encoding">encodings</a> themselves and instead being part of wrapper algorithms in this specification, whereas byte order mark handling is part of the definition of the <i>encoding schemes</i> in the Unicode Standard. <a href="#utf-8">UTF-8</a> used together with the <a href="#utf-8-decode">UTF-8 decode</a> algorithm matches the <i>encoding scheme</i> of the same name. This specification does not provide wrapper algorithms that would combine with <a href="#utf-16le">UTF-16LE</a> and <a href="#utf-16be">UTF-16BE</a> to match the similarly-named <i>encoding schemes</i>. <a href="#biblio-unicode" title="The Unicode Standard">[UNICODE]</a> </p> <h3 class="heading settled" id="encoders-and-decoders"><span class="secno">4.1. </span><span class="content">Encoders and decoders</span></h3> <p>Each <a href="#encoding">encoding</a> has an associated <dfn id="decoder">decoder</dfn> and most of them have an associated <dfn id="encoder">encoder</dfn>. Instances of <a href="#decoder">decoders</a> and <a href="#encoder">encoders</a> have a <dfn id="handler">handler</dfn> algorithm and might also have state. A <a href="#handler">handler</a> algorithm takes an input <a href="#concept-stream">I/O queue</a> and an <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e13">item</a>, and returns <dfn id="finished">finished</dfn>, one or more <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e14">items</a>, <dfn id="error">error</dfn> optionally with a <a href="https://infra.spec.whatwg.org/#code-point" id="b61de9b10">code point</a>, or <dfn id="continue">continue</dfn>. </p> <p class="note no-backref" role="note">The <a href="#replacement">replacement</a> and <a href="#utf-16be-le">UTF-16BE/LE</a> <a href="#encoding">encodings</a> have no <a href="#encoder">encoder</a>. </p> <p>An <dfn id="error-mode">error mode</dfn> as used below is "<code>replacement</code>" or "<code>fatal</code>" for a <a href="#decoder">decoder</a> and "<code>fatal</code>" or "<code>html</code>" for an <a href="#encoder">encoder</a>. </p> <p class="note" role="note">An XML processor would set <a href="#error-mode">error mode</a> to "<code>fatal</code>". <a href="#biblio-xml" title="Extensible Markup Language (XML) 1.0 (Fifth Edition)">[XML]</a> </p> <p class="note" role="note">"<code>html</code>" exists as <a href="#error-mode">error mode</a> due to HTML forms requiring a non-terminating legacy <a href="#encoder">encoder</a>. The "<code>html</code>" <a href="#error-mode">error mode</a> causes a sequence to be emitted that cannot be distinguished from legitimate input and can therefore lead to silent data loss. Developers are strongly encouraged to use the <a href="#utf-8">UTF-8</a> <a href="#encoding">encoding</a> to prevent this from happening. <a href="#biblio-html" title="HTML Standard">[HTML]</a> </p> <hr> <p>To <dfn id="concept-encoding-run">process a queue</dfn> given an <a href="#encoding">encoding</a>’s <a href="#decoder">decoder</a> or <a href="#encoder">encoder</a> instance <var>encoderDecoder</var>, <a href="#concept-stream">I/O queue</a> <var>input</var>, <a href="#concept-stream">I/O queue</a> <var>output</var>, and <a href="#error-mode">error mode</a> <var>mode</var>: </p> <ol> <li> <p>While true: </p> <ol> <li> <p>Let <var>result</var> be the result of <a href="#concept-encoding-process">processing an item</a> with the result of <a href="#concept-stream-read">reading</a> from <var>input</var>, <var>encoderDecoder</var>, <var>input</var>, <var>output</var>, and <var>mode</var>. </p> <li> <p>If <var>result</var> is not <a href="#continue">continue</a>, then return <var>result</var>. </p> </ol> </ol> <p>To <dfn id="concept-encoding-process">process an item</dfn> given an <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e15">item</a> <var>item</var>, <a href="#encoding">encoding</a>’s <a href="#encoder">encoder</a> or <a href="#decoder">decoder</a> instance <var>encoderDecoder</var>, <a href="#concept-stream">I/O queue</a> <var>input</var>, <a href="#concept-stream">I/O queue</a> <var>output</var>, and <a href="#error-mode">error mode</a> <var>mode</var>: </p> <ol> <li> <p>Assert: if <var>encoderDecoder</var> is an <a href="#encoder">encoder</a> instance, <var>mode</var> is not "<code>replacement</code>". </p> <li> <p>Assert: if <var>encoderDecoder</var> is a <a href="#decoder">decoder</a> instance, <var>mode</var> is not "<code>html</code>". </p> <li> <p>Assert: if <var>encoderDecoder</var> is an <a href="#encoder">encoder</a> instance, <var>item</var> is not a <a href="https://infra.spec.whatwg.org/#surrogate" id="1be036c80">surrogate</a>. </p> <li> <p>Let <var>result</var> be the result of running <var>encoderDecoder</var>’s <a href="#handler">handler</a> on <var>input</var> and <var>item</var>. </p> <li> <p>If <var>result</var> is <a href="#finished">finished</a>: </p> <ol> <li> <p><a href="#concept-stream-push">Push</a> <a href="#end-of-stream">end-of-queue</a> to <var>output</var>. </p> <li> <p>Return <var>result</var>. </p> </ol> <li> <p>Otherwise, if <var>result</var> is one or more <a href="https://infra.spec.whatwg.org/#list-item" id="2d67d96e16">items</a>: </p> <ol> <li> <p>Assert: if <var>encoderDecoder</var> is a <a href="#decoder">decoder</a> instance, <var>result</var> does not contain any <a href="https://infra.spec.whatwg.org/#surrogate" id="1be036c81">surrogates</a>. </p> <li> <p><a href="#concept-stream-push">Push</a> <var>result</var> to <var>output</var>. </p> </ol> <li> <p>Otherwise, if <var>result</var> is an <a href="#error">error</a>, switch on <var>mode</var> and run the associated steps: </p> <dl class="switch"> <dt>"<code>replacement</code>" <dd><a href="#concept-stream-push">Push</a> U+FFFD (�) to <var>output</var>. <dt>"<code>html</code>" <dd><a href="#concept-stream-push">Push</a> 0x26 (&), 0x23 (#), followed by the shortest sequence of 0x30 (0) to 0x39 (9), inclusive, representing <var>result</var>’s <a href="https://infra.spec.whatwg.org/#code-point" id="b61de9b11">code point</a>’s <a href="https://infra.spec.whatwg.org/#code-point-value" id="2bff95cb0">value</a> in base ten, followed by 0x3B (;) to <var>output</var>. <dt>"<code>fatal</code>" <dd>Return <var>result</var>. </dl> <li> <p>Return <a href="#continue">continue</a>. </p> </ol> <h3 class="heading settled" id="names-and-labels"><span class="secno">4.2. </span><span class="content">Names and labels</span></h3> <p>The table below lists all <a href="#encoding">encodings</a> and their <a href="#label">labels</a> user agents must support. User agents must not support any other <a href="#encoding">encodings</a> or <a href="#label">labels</a>. </p> <p class="note" role="note">For each encoding, <a href="https://infra.spec.whatwg.org/#ascii-lowercase" id="441210640">ASCII-lowercasing</a> its <a href="#name">name</a> yields one of its <a href="#label">labels</a>. </p> <p>Authors must use the <a href="#utf-8">UTF-8</a> <a href="#encoding">encoding</a> and must use its (<a href="https://infra.spec.whatwg.org/#ascii-case-insensitive" id="1274df4f0">ASCII case-insensitive</a>) "<code>utf-8</code>" <a href="#label">label</a> to identify it. </p> <p>New protocols and formats, as well as existing formats deployed in new contexts, must use the <a href="#utf-8">UTF-8</a> <a href="#encoding">encoding</a> exclusively. If these protocols and formats need to expose the <a href="#encoding">encoding</a>’s <a href="#name">name</a> or <a href="#label">label</a>, they must expose it as "<code>utf-8</code>". </p> <p>To <dfn id="concept-encoding-get">get an encoding</dfn> from a string <var>label</var>, run these steps: </p> <ol> <li> <p>Remove any leading and trailing <a href="https://infra.spec.whatwg.org/#ascii-whitespace" id="9b09dd760">ASCII whitespace</a> from <var>label</var>. </p> <li> <p>If <var>label</var> is an <a href="https://infra.spec.whatwg.org/#ascii-case-insensitive" id="1274df4f1">ASCII case-insensitive</a> match for any of the labels listed in the table below, then return the corresponding <a href="#encoding">encoding</a>; otherwise return failure. </p> </ol> <p class="note" role="note">This is a more basic and restrictive algorithm of mapping labels to <a href="#encoding">encodings</a> than <a href="https://www.unicode.org/reports/tr22/tr22-8.html#Charset_Alias_Matching">section 1.4 of Unicode Technical Standard #22</a> prescribes, as that is necessary to be compatible with deployed content. </p> <table> <thead> <tr> <th>Name <th>Labels <tbody> <tr> <th colspan="2"><a href="#the-encoding">The Encoding</a> <tr> <td rowspan="6"><a href="#utf-8">UTF-8</a> <td>"<code>unicode-1-1-utf-8</code>" <tr> <td>"<code>unicode11utf8</code>" <tr> <td>"<code>unicode20utf8</code>" <tr> <td>"<code>utf-8</code>" <tr> <td>"<code>utf8</code>" <tr> <td>"<code>x-unicode20utf8</code>" <tbody> <tr> <th colspan="2"><a href="#legacy-single-byte-encodings">Legacy single-byte encodings</a> <tr> <td rowspan="4"><a href="#ibm866">IBM866</a> <td>"<code>866</code>" <tr> <td>"<code>cp866</code>" <tr> <td>"<code>csibm866</code>" <tr> <td>"<code>ibm866</code>" <tr> <td rowspan="9"><a href="#iso-8859-2">ISO-8859-2</a> <td>"<code>csisolatin2</code>" <tr> <td>"<code>iso-8859-2</code>" <tr> <td>"<code>iso-ir-101</code>" <tr> <td>"<code>iso8859-2</code>" <tr> <td>"<code>iso88592</code>" <tr> <td>"<code>iso_8859-2</code>" <tr> <td>"<code>iso_8859-2:1987</code>" <tr> <td>"<code>l2</code>" <tr> <td>"<code>latin2</code>" <tr> <td rowspan="9"><a href="#iso-8859-3">ISO-8859-3</a> <td>"<code>csisolatin3</code>" <tr> <td>"<code>iso-8859-3</code>" <tr> <td>"<code>iso-ir-109</code>" <tr> <td>"<code>iso8859-3</code>" <tr> <td>"<code>iso88593</code>" <tr> <td>"<code>iso_8859-3</code>" <tr> <td>"<code>iso_8859-3:1988</code>" <tr> <td>"<code>l3</code>" <tr> <td>"<code>latin3</code>" <tr> <td rowspan="9"><a href="#iso-8859-4">ISO-8859-4</a> <td>"<code>csisolatin4</code>" <tr> <td>"<code>iso-8859-4</code>" <tr> <td>"<code>iso-ir-110</code>" <tr> <td>"<code>iso8859-4</code>" <tr> <td>"<code>iso88594</code>" <tr> <td>"<code>iso_8859-4</code>" <tr> <td>"<code>iso_8859-4:1988</code>" <tr> <td>"<code>l4</code>" <tr> <td>"<code>latin4</code>" <tr> <td rowspan="8"><a href="#iso-8859-5">ISO-8859-5</a> <td>"<code>csisolatincyrillic</code>" <tr> <td>"<code>cyrillic</code>" <tr> <td>"<code>iso-8859-5</code>" <tr> <td>"<code>iso-ir-144</code>" <tr> <td>"<code>iso8859-5</code>" <tr> <td>"<code>iso88595</code>" <tr> <td>"<code>iso_8859-5</code>" <tr> <td>"<code>iso_8859-5:1988</code>" <tr> <td rowspan="14"><a href="#iso-8859-6">ISO-8859-6</a> <td>"<code>arabic</code>" <tr> <td>"<code>asmo-708</code>" <tr> <td>"<code>csiso88596e</code>" <tr> <td>"<code>csiso88596i</code>" <tr> <td>"<code>csisolatinarabic</code>" <tr> <td>"<code>ecma-114</code>" <tr> <td>"<code>iso-8859-6</code>" <tr> <td>"<code>iso-8859-6-e</code>" <tr> <td>"<code>iso-8859-6-i</code>" <tr> <td>"<code>iso-ir-127</code>" <tr> <td>"<code>iso8859-6</code>" <tr> <td>"<code>iso88596</code>" <tr> <td>"<code>iso_8859-6</code>" <tr> <td>"<code>iso_8859-6:1987</code>" <tr> <td rowspan="12"><a href="#iso-8859-7">ISO-8859-7</a> <td>"<code>csisolatingreek</code>" <tr> <td>"<code>ecma-118</code>" <tr> <td>"<code>elot_928</code>" <tr> <td>"<code>greek</code>" <tr> <td>"<code>greek8</code>" <tr> <td>"<code>iso-8859-7</code>" <tr> <td>"<code>iso-ir-126</code>" <tr> <td>"<code>iso8859-7</code>" <tr> <td>"<code>iso88597</code>" <tr> <td>"<code>iso_8859-7</code>" <tr> <td>"<code>iso_8859-7:1987</code>" <tr> <td>"<code>sun_eu_greek</code>" <tr> <td rowspan="11"><a href="#iso-8859-8">ISO-8859-8</a> <td>"<code>csiso88598e</code>" <tr> <td>"<code>csisolatinhebrew</code>" <tr> <td>"<code>hebrew</code>" <tr> <td>"<code>iso-8859-8</code>" <tr> <td>"<code>iso-8859-8-e</code>" <tr> <td>"<code>iso-ir-138</code>" <tr> <td>"<code>iso8859-8</code>" <tr> <td>"<code>iso88598</code>" <tr> <td>"<code>iso_8859-8</code>" <tr> <td>"<code>iso_8859-8:1988</code>" <tr> <td>"<code>visual</code>" <tr> <td rowspan="3"><a href="#iso-8859-8-i">ISO-8859-8-I</a> <td>"<code>csiso88598i</code>" <tr> <td>"<code>iso-8859-8-i</code>" <tr> <td>"<code>logical</code>" <tr> <td rowspan="7"><a href="#iso-8859-10">ISO-8859-10</a> <td>"<code>csisolatin6</code>" <tr> <td>"<code>iso-8859-10</code>" <tr> <td>"<code>iso-ir-157</code>" <tr> <td>"<code>iso8859-10</code>" <tr> <td>"<code>iso885910</code>" <tr> <td>"<code>l6</code>" <tr> <td>"<code>latin6</code>" <tr> <td rowspan="3"><a href="#iso-8859-13">ISO-8859-13</a> <td>"<code>iso-8859-13</code>" <tr> <td>"<code>iso8859-13</code>" <tr> <td>"<code>iso885913</code>" <tr> <td rowspan="3"><a href="#iso-8859-14">ISO-8859-14</a> <td>"<code>iso-8859-14</code>" <tr> <td>"<code>iso8859-14</code>" <tr> <td>"<code>iso885914</code>" <tr> <td rowspan="6"><a href="#iso-8859-15">ISO-8859-15</a> <td>"<code>csisolatin9</code>" <tr> <td>"<code>iso-8859-15</code>" <tr> <td>"<code>iso8859-15</code>" <tr> <td>"<code>iso885915</code>" <tr> <td>"<code>iso_8859-15</code>" <tr> <td>"<code>l9</code>" <tr> <td><a href="#iso-8859-16">ISO-8859-16</a> <td>"<code>iso-8859-16</code>" <tr> <td rowspan="5"><a href="#koi8-r">KOI8-R</a> <td>"<code>cskoi8r</code>" <tr> <td>"<code>koi</code>" <tr> <td>"<code>koi8</code>" <tr> <td>"<code>koi8-r</code>" <tr> <td>"<code>koi8_r</code>" <tr> <td rowspan="2"><a href="#koi8-u">KOI8-U</a> <td>"<code>koi8-ru</code>" <tr> <td>"<code>koi8-u</code>" <tr> <td rowspan="4"><a href="#macintosh">macintosh</a> <td>"<code>csmacintosh</code>" <tr> <td>"<code>mac</code>" <tr> <td>"<code>macintosh</code>" <tr> <td>"<code>x-mac-roman</code>" <tr> <td rowspan="6"><a href="#windows-874">windows-874</a> <td>"<code>dos-874</code>" <tr> <td>"<code>iso-8859-11</code>" <tr> <td>"<code>iso8859-11</code>" <tr> <td>"<code>iso885911</code>" <tr> <td>"<code>tis-620</code>" <tr> <td>"<code>windows-874</code>" <tr> <td rowspan="3"><a href="#windows-1250">windows-1250</a> <td>"<code>cp1250</code>" <tr> <td>"<code>windows-1250</code>" <tr> <td>"<code>x-cp1250</code>" <tr> <td rowspan="3"><a href="#windows-1251">windows-1251</a> <td>"<code>cp1251</code>" <tr> <td>"<code>windows-1251</code>" <tr> <td>"<code>x-cp1251</code>" <tr> <td rowspan="17"><a href="#windows-1252">windows-1252</a> <td>"<code>ansi_x3.4-1968</code>" <tr> <td>"<code>ascii</code>" <tr> <td>"<code>cp1252</code>" <tr> <td>"<code>cp819</code>" <tr> <td>"<code>csisolatin1</code>" <tr> <td>"<code>ibm819</code>" <tr> <td>"<code>iso-8859-1</code>" <tr> <td>"<code>iso-ir-100</code>" <tr> <td>"<code>iso8859-1</code>" <tr> <td>"<code>iso88591</code>" <tr> <td>"<code>iso_8859-1</code>" <tr> <td>"<code>iso_8859-1:1987</code>" <tr> <td>"<code>l1</code>" <tr> <td>"<code>latin1</code>" <tr> <td>"<code>us-ascii</code>" <tr> <td>"<code>windows-1252</code>" <tr> <td>"<code>x-cp1252</code>" <tr> <td rowspan="3"><a href="#windows-1253">windows-1253</a> <td>"<code>cp1253</code>" <tr> <td>"<code>windows-1253</code>" <tr> <td>"<code>x-cp1253</code>" <tr> <td rowspan="12"><a href="#windows-1254">windows-1254</a> <td>"<code>cp1254</code>" <tr> <td>"<code>csisolatin5</code>" <tr> <td>"<code>iso-8859-9</code>" <tr> <td>"<code>iso-ir-148</code>" <tr> <td>"<code>iso8859-9</code>" <tr> <td>"<code>iso88599</code>" <tr> <td>"<code>iso_8859-9</code>" <tr> <td>"<code>iso_8859-9:1989</code>" <tr> <td>"<code>l5</code>" <tr> <td>"<code>latin5</code>" <tr> <td>"<code>windows-1254</code>" <tr> <td>"<code>x-cp1254</code>" <tr> <td rowspan="3"><a href="#windows-1255">windows-1255</a> <td>"<code>cp1255</code>" <tr> <td>"<code>windows-1255</code>" <tr> <td>"<code>x-cp1255</code>" <tr> <td rowspan="3"><a href="#windows-1256">windows-1256</a> <td>"<code>cp1256</code>" <tr> <td>"<code>windows-1256</code>" <tr> <td>"<code>x-cp1256</code>" <tr> <td rowspan="3"><a href="#windows-1257">windows-1257</a> <td>"<code>cp1257</code>" <tr> <td>"<code>windows-1257</code>" <tr> <td>"<code>x-cp1257</code>" <tr> <td rowspan="3"><a href="#windows-1258">windows-1258</a> <td>"<code>cp1258</code>" <tr> <td>"<code>windows-1258</code>" <tr> <td>"<code>x-cp1258</code>" <tr> <td rowspan="2"><a href="#x-mac-cyrillic">x-mac-cyrillic</a> <td>"<code>x-mac-cyrillic</code>" <tr> <td>"<code>x-mac-ukrainian</code>" <tbody> <tr> <th colspan="2"><a href="#legacy-multi-byte-chinese-(simplified)-encodings">Legacy multi-byte Chinese (simplified) encodings</a> <tr> <td rowspan="9"><a href="#gbk">GBK</a> <td>"<code>chinese</code>" <tr> <td>"<code>csgb2312</code>" <tr> <td>"<code>csiso58gb231280</code>" <tr> <td>"<code>gb2312</code>" <tr> <td>"<code>gb_2312</code>" <tr> <td>"<code>gb_2312-80</code>" <tr> <td>"<code>gbk</code>" <tr> <td>"<code>iso-ir-58</code>" <tr> <td>"<code>x-gbk</code>" <tr> <td><a href="#gb18030">gb18030</a> <td>"<code>gb18030</code>" <tbody> <tr> <th colspan="2"><a href="#legacy-multi-byte-chinese-(traditional)-encodings">Legacy multi-byte Chinese (traditional) encodings</a> <tr> <td rowspan="5"><a href="#big5">Big5</a> <td>"<code>big5</code>" <tr> <td>"<code>big5-hkscs</code>" <tr> <td>"<code>cn-big5</code>" <tr> <td>"<code>csbig5</code>" <tr> <td>"<code>x-x-big5</code>" <tbody> <tr> <th colspan="2"><a href="#legacy-multi-byte-japanese-encodings">Legacy multi-byte Japanese encodings</a> <tr> <td rowspan="3"><a href="#euc-jp">EUC-JP</a> <td>"<code>cseucpkdfmtjapanese</code>" <tr> <td>"<code>euc-jp</code>" <tr> <td>"<code>x-euc-jp</code>" <tr> <td rowspan="2"><a href="#iso-2022-jp">ISO-2022-JP</a> <td>"<code>csiso2022jp</code>" <tr> <td>"<code>iso-2022-jp</code>" <tr> <td rowspan="8"><a href="#shift_jis">Shift_JIS</a> <td>"<code>csshiftjis</code>" <tr> <td>"<code>ms932</code>" <tr> <td>"<code>ms_kanji</code>" <tr> <td>"<code>shift-jis</code>" <tr> <td>"<code>shift_jis</code>" <tr> <td>"<code>sjis</code>" <tr> <td>"<code>windows-31j</code>" <tr> <td>"<code>x-sjis</code>" <tbody> <tr> <th colspan="2"><a href="#legacy-multi-byte-korean-encodings">Legacy multi-byte Korean encodings</a> <tr> <td rowspan="10"><a href="#euc-kr">EUC-KR</a> <td>"<code>cseuckr</code>" <tr> <td>"<code>csksc56011987</code>" <tr> <td>"<code>euc-kr</code>" <tr> <td>"<code>iso-ir-149</code>" <tr> <td>"<code>korean</code>" <tr> <td>"<code>ks_c_5601-1987</code>" <tr> <td>"<code>ks_c_5601-1989</code>" <tr> <td>"<code>ksc5601</code>" <tr> <td>"<code>ksc_5601</code>" <tr> <td>"<code>windows-949</code>" <tbody> <tr> <th colspan="2"><a href="#legacy-miscellaneous-encodings">Legacy miscellaneous encodings</a> <tr> <td rowspan="6"><a href="#replacement">replacement</a> <td>"<code>csiso2022kr</code>" <tr> <td>"<code>hz-gb-2312</code>" <tr> <td>"<code>iso-2022-cn</code>" <tr> <td>"<code>iso-2022-cn-ext</code>" <tr> <td>"<code>iso-2022-kr</code>" <tr> <td>"<code>replacement</code>" <tr> <td rowspan="2"><a href="#utf-16be">UTF-16BE</a> <td>"<code>unicodefffe</code>" <tr> <td>"<code>utf-16be</code>" <tr> <td rowspan="7"><a href="#utf-16le">UTF-16LE</a> <td>"<code>csunicode</code>" <tr> <td>"<code>iso-10646-ucs-2</code>" <tr> <td>"<code>ucs-2</code>" <tr> <td>"<code>unicode</code>" <tr> <td>"<code>unicodefeff</code>" <tr> <td>"<code>utf-16</code>" <tr> <td>"<code>utf-16le</code>" <tr> <td><a href="#x-user-defined">x-user-defined</a> <td>"<code>x-user-defined</code>" </table> <p class="note" role="note">All <a href="#encoding">encodings</a> and their <a href="#label">labels</a> are also available as non-normative <a href="encodings.json">encodings.json</a> resource. </p> <p class="note" id="supported-encodings" role="note">The set of supported <a href="#encoding">encodings</a> is primarily based on the intersection of the sets supported by major browser engines when the development of this standard started, while removing encodings that were rarely used legitimately but that could be used in attacks. The inclusion of some encodings is questionable in the light of anecdotal evidence of the level of use by existing Web content. That is, while they have been broadly supported by browsers, it is unclear if they are broadly used by Web content. However, an effort has not been made to eagerly remove <a href="#single-byte-encoding">single-byte encodings</a> that were broadly supported by browsers or are part of the ISO 8859 series. In particular, the necessity of the inclusion of <a href="#ibm866">IBM866</a>, <a href="#macintosh">macintosh</a>, <a href="#x-mac-cyrillic">x-mac-cyrillic</a>, <a href="#iso-8859-3">ISO-8859-3</a>, <a href="#iso-8859-10">ISO-8859-10</a>, <a href="#iso-8859-14">ISO-8859-14</a>, and <a href="#iso-8859-16">ISO-8859-16</a> is doubtful for the purpose of supporting existing content, but there are no plans to remove these.</p> <h3 class="heading settled" id="output-encodings"><span class="secno">4.3. </span><span class="content">Output encodings</span></h3> <p>To <dfn id="get-an-output-encoding">get an output encoding</dfn> from an <a href="#encoding">encoding</a> <var>encoding</var>, run these steps: </p> <ol> <li> <p>If <var>encoding</var> is <a href="#replacement">replacement</a> or <a href="#utf-16be-le">UTF-16BE/LE</a>, then return <a href="#utf-8">UTF-8</a>. </p> <li> <p>Return <var>encoding</var>. </p> </ol> <p class="note" role="note">The <a href="#get-an-output-encoding">get an output encoding</a> algorithm is useful for URL parsing and HTML form submission, which both need exactly this. </p> <h2 class="heading settled" id="indexes"><span class="secno">5. </span><span class="content">Indexes</span></h2> <p>Most legacy <a href="#encoding">encodings</a> make use of an <dfn id="index">index</dfn>. An <a href="#index">index</a> is an ordered list of entries, each entry consisting of a pointer and a corresponding code point. Within an <a href="#index">index</a> pointers are unique and code points can be duplicated. </p> <p class="note no-backref" role="note">An efficient implementation likely has two <a href="#index">indexes</a> per <a href="#encoding">encoding</a>. One optimized for its <a href="#decoder">decoder</a> and one for its <a href="#encoder">encoder</a>. </p> <p>To find the pointers and their corresponding code points in an <a href="#index">index</a>, let <var>lines</var> be the result of splitting the resource’s contents on U+000A. Then remove each item in <var>lines</var> that is the empty string or starts with U+0023. Then the pointers and their corresponding code points are found by splitting each item in <var>lines</var> on U+0009. The first subitem is the pointer (as a decimal number) and the second is the corresponding code point (as a hexadecimal number). Other subitems are not relevant. </p> <p class="note no-backref" role="note">To signify changes an <a href="#index">index</a> includes an <i>Identifier</i> and a <i>Date</i>. If an <i>Identifier</i> has changed, so has the <a href="#index">index</a>. </p> <p>The <dfn id="index-code-point">index code point</dfn> for <var>pointer</var> in <var>index</var> is the code point corresponding to <var>pointer</var> in <var>index</var>, or null if <var>pointer</var> is not in <var>index</var>. </p> <p>The <dfn id="index-pointer">index pointer</dfn> for <var>code point</var> in <var>index</var> is the <em>first</em> pointer corresponding to <var>code point</var> in <var>index</var>, or null if <var>code point</var> is not in <var>index</var>. </p> <div class="note" id="visualization" role="note"> <p>There is a non-normative visualization for each <a href="#index">index</a> other than <a href="#index-gb18030-ranges">index gb18030 ranges</a> and <a href="#index-iso-2022-jp-katakana">index ISO-2022-JP katakana</a>. <a href="#index-jis0208">index jis0208</a> also has an alternative <a href="#shift_jis">Shift_JIS</a> visualization. Additionally, there is visualization of the Basic Multilingual Plane coverage of each index other than <a href="#index-gb18030-ranges">index gb18030 ranges</a> and <a href="#index-iso-2022-jp-katakana">index ISO-2022-JP katakana</a>. </p> <p>The legend for the visualizations is: </p> <ul class="visualizationlegend"> <li class="unmapped">Unmapped <li class="mid">Two bytes in UTF-8 <li class="mid contiguous">Two bytes in UTF-8, code point follows immediately the code point of previous pointer <li class="upper">Three bytes in UTF-8 (non-PUA) <li class="upper contiguous">Three bytes in UTF-8 (non-PUA), code point follows immediately the code point of previous pointer <li class="pua">Private Use <li class="pua contiguous">Private Use, code point follows immediately the code point of previous pointer <li class="astral">Four bytes in UTF-8 <li class="astral contiguous">Four bytes in UTF-8, code point follows immediately the code point of previous pointer <li class="duplicate">Duplicate code point already mapped at an earlier index <li class="compatibility">CJK Compatibility Ideograph <li class="ext">CJK Unified Ideographs Extension A </ul> </div> <p>These are the <a href="#index">indexes</a> defined by this specification, excluding <a href="#index-single-byte">index single-byte</a>, which have their own table: </p> <table> <tbody> <tr> <th colspan="4"><a href="#index">Index</a> <th>Notes <tr> <td><dfn id="index-big5">index Big5</dfn> <td><a href="index-big5.txt">index-big5.txt</a> <td><a href="big5.html">index Big5 visualization</a> <td><a href="big5-bmp.html">index Big5 BMP coverage</a> <td>This matches the Big5 standard in combination with the Hong Kong Supplementary Character Set and other common extensions. <tr> <td><dfn id="index-euc-kr">index EUC-KR</dfn> <td><a href="index-euc-kr.txt">index-euc-kr.txt</a> <td><a href="euc-kr.html">index EUC-KR visualization</a> <td><a href="euc-kr-bmp.html">index EUC-KR BMP coverage</a> <td>This matches the KS X 1001 standard and the Unified Hangul Code, more commonly known together as Windows Codepage 949. It covers the Hangul Syllables block of Unicode in its entirety. The Hangul block whose top left corner in the visualization is at pointer 9026 is in the Unicode order. Taken separately, the rest of the Hangul syllables in this index are in the Unicode order, too. <tr> <td><dfn id="index-gb18030">index gb18030</dfn> <td><a href="index-gb18030.txt">index-gb18030.txt</a> <td><a href="gb18030.html">index gb18030 visualization</a> <td><a href="gb18030-bmp.html">index gb18030 BMP coverage</a> <td>This matches the GB18030-2005 standard for code points encoded as two bytes, except for 0xA3 0xA0 which maps to U+3000 to be compatible with deployed content. This index covers the CJK Unified Ideographs block of Unicode in its entirety. Entries from that block that are above or to the left of (the first) U+3000 in the visualization are in the Unicode order. <tr> <td><dfn id="index-gb18030-ranges">index gb18030 ranges</dfn> <td colspan="3"><a href="index-gb18030-ranges.txt">index-gb18030-ranges.txt</a> <td>This <a href="#index">index</a> works different from all others. Listing all code points would result in over a million items whereas they can be represented neatly in 207 ranges combined with trivial limit checks. It therefore only superficially matches the GB18030-2005 standard for code points encoded as four bytes. See also <a href="#index-gb18030-ranges-code-point">index gb18030 ranges code point</a> and <a href="#index-gb18030-ranges-pointer">index gb18030 ranges pointer</a> below. <tr> <td><dfn id="index-jis0208">index jis0208</dfn> <td><a href="index-jis0208.txt">index-jis0208.txt</a> <td><a href="jis0208.html">index jis0208 visualization</a>, <a href="shift_jis.html">Shift_JIS visualization</a> <td><a href="jis0208-bmp.html">index jis0208 BMP coverage</a> <td>This is the JIS X 0208 standard including formerly proprietary extensions from IBM and NEC. <tr> <td><dfn id="index-jis0212">index jis0212</dfn> <td><a href="index-jis0212.txt">index-jis0212.txt</a> <td><a href="jis0212.html">index jis0212 visualization</a> <td><a href="jis0212-bmp.html">index jis0212 BMP coverage</a> <td>This is the JIS X 0212 standard. It is only used by the <a href="#euc-jp-decoder">EUC-JP decoder</a> due to lack of widespread support elsewhere. <tr> <td><dfn id="index-iso-2022-jp-katakana">index ISO-2022-JP katakana</dfn> <td colspan="3"><a href="index-iso-2022-jp-katakana.txt">index-iso-2022-jp-katakana.txt</a> <td>This maps halfwidth to fullwidth katakana as per Unicode Normalization Form KC, except that U+FF9E and U+FF9F map to U+309B and U+309C rather than U+3099 and U+309A. It is only used by the <a href="#iso-2022-jp-encoder">ISO-2022-JP encoder</a>. <a href="#biblio-unicode" title="The Unicode Standard">[UNICODE]</a> </table> <p>The <dfn id="index-gb18030-ranges-code-point">index gb18030 ranges code point</dfn> for <var>pointer</var> is the return value of these steps: </p> <ol> <li> <p>If <var>pointer</var> is greater than 39419 and less than 189000, or <var>pointer</var> is greater than 1237575, return null. </p> <li> <p>If <var>pointer</var> is 7457, return code point U+E7C7. </p> <li> <p>Let <var>offset</var> be the last pointer in <a href="#index-gb18030-ranges">index gb18030 ranges</a> that is less than or equal to <var>pointer</var> and let <var>code point offset</var> be its corresponding code point. </p> <li> <p>Return a code point whose value is <var>code point offset</var> + <var>pointer</var> − <var>offset</var>. </p> </ol> <p>The <dfn id="index-gb18030-ranges-pointer">index gb18030 ranges pointer</dfn> for <var>code point</var> is the return value of these steps: </p> <ol> <li> <p>If <var>code point</var> is U+E7C7, return pointer 7457. </p> <li> <p>Let <var>offset</var> be the last code point in <a href="#index-gb18030-ranges">index gb18030 ranges</a> that is less than or equal to <var>code point</var> and let <var>pointer offset</var> be its corresponding pointer. </p> <li> <p>Return a pointer whose value is <var>pointer offset</var> + <var>code point</var> − <var>offset</var>. </p> </ol> <p>The <dfn id="index-shift_jis-pointer">index Shift_JIS pointer</dfn> for <var>code point</var> is the return value of these steps: </p> <ol> <li> <p>Let <var>index</var> be <a href="#index-jis0208">index jis0208</a> excluding all entries whose pointer is in the range 8272 to 8835, inclusive. </p> <p class="note" role="note">The <a href="#index-jis0208">index jis0208</a> contains duplicate code points so the exclusion of these entries causes later code points to be used. </p> <li> <p>Return the <a href="#index-pointer">index pointer</a> for <var>code point</var> in <var>index</var>. </p> </ol> <p>The <dfn id="index-big5-pointer">index Big5 pointer</dfn> for <var>code point</var> is the return value of these steps: </p> <ol> <li> <p>Let <var>index</var> be <a href="#index-big5">index Big5</a> excluding all entries whose pointer is less than (0xA1 - 0x81) × 157. </p> <p class="note" role="note">Avoid returning Hong Kong Supplementary Character Set extensions literally. </p> <li> <p>If <var>code point</var> is U+2550, U+255E, U+2561, U+256A, U+5341, or U+5345, return the <em>last</em> pointer corresponding to <var>code point</var> in <var>index</var>. </p> <p class="note" role="note">There are other duplicate code points, but for those the <em>first</em> pointer is to be used. </p> <li> <p>Return the <a href="#index-pointer">index pointer</a> for <var>code point</var> in <var>index</var>. </p> </ol> <hr> <p class="note no-backref" role="note">All <a href="#index">indexes</a> are also available as a non-normative <a href="indexes.json">indexes.json</a> resource. (<a href="#index-gb18030-ranges">Index gb18030 ranges</a> has a slightly different format here, to be able to represent ranges.) </p> <h2 class="heading settled" id="specification-hooks"><span class="secno">6. </span><span class="content">Hooks for standards</span></h2> <div class="note" role="note"> <p>The algorithms defined below (<a href="#utf-8-decode">UTF-8 decode</a>, <a href="#utf-8-decode-without-bom">UTF-8 decode without BOM</a>, <a href="#utf-8-decode-without-bom-or-fail">UTF-8 decode without BOM or fail</a>, and <a href="#utf-8-encode">UTF-8 encode</a>) are intended for usage by other standards. </p> <p>For decoding, <a href="#utf-8-decode">UTF-8 decode</a> is to be used by new formats. For identifiers or byte sequences within a format or protocol, use <a href="#utf-8-decode-without-bom">UTF-8 decode without BOM</a> or <a href="#utf-8-decode-without-bom-or-fail">UTF-8 decode without BOM or fail</a>. </p> <p>For encoding, <a href="#utf-8-encode">UTF-8 encode</a> is to be used. </p> <p>Standards are to ensure that the input I/O queues they pass to <a href="#utf-8-encode">UTF-8 encode</a> (as well as the legacy <a href="#encode">encode</a>) are effectively I/O queues of scalar values, i.e., they contain no <a href="https://infra.spec.whatwg.org/#surrogate" id="1be036c82">surrogates</a>. </p> <p>These hooks (as well as <a href="#decode">decode</a> and <a href="#encode">encode</a>) will block until the input I/O queue has been consumed in its entirety. In order to use the output tokens as they are pushed into the stream, callers are to invoke the hooks with an empty output I/O queue and read from it <a href="https://html.spec.whatwg.org/multipage/infrastructure.html#in-parallel" id="a459951e1">in parallel</a>. Note that some care is needed when using <a href="#utf-8-decode-without-bom-or-fail">UTF-8 decode without BOM or fail</a>, as any error found during decoding will prevent the <a href="#end-of-stream">end-of-queue</a> item from ever being pushed into the output I/O queue. </p> </div> <p>To <dfn id="utf-8-decode">UTF-8 decode</dfn> an I/O queue of bytes <var>ioQueue</var> given an optional I/O queue of scalar values <var>output</var> (default « »), run these steps: </p> <ol> <li> <p>Let <var>buffer</var> be the result of <a href="#i-o-queue-peek">peeking</a> three bytes from <var>ioQueue</var>, converted to a byte sequence. </p> <li> <p>If <var>buffer</var> is 0xEF 0xBB 0xBF, then <a href="#concept-stream-read">read</a> three bytes from <var>ioQueue</var>. (Do nothing with those bytes.) </p> <li> <p><a href="#concept-encoding-run">Process a queue</a> with an instance of <a href="#utf-8">UTF-8</a>’s <a href="#decoder">decoder</a>, <var>ioQueue</var>, <var>output</var>, and "<code>replacement</code>". </p> <li> <p>Return <var>output</var>. </p> </ol> <p>To <dfn id="utf-8-decode-without-bom">UTF-8 decode without BOM</dfn> an I/O queue of bytes <var>ioQueue</var> given an optional I/O queue of scalar values <var>output</var> (default « »), run these steps: </p> <ol> <li> <p><a href="#concept-encoding-run">Process a queue</a> with an instance of <a href="#utf-8">UTF-8</a>’s <a href="#decoder">decoder</a>, <var>ioQueue</var>, <var>output</var>, and "<code>replacement</code>". </p> <li> <p>Return <var>output</var>. </p> </ol> <p>To <dfn id="utf-8-decode-without-bom-or-fail">UTF-8 decode without BOM or fail</dfn> an I/O queue of bytes <var>ioQueue</var> given an optional I/O queue of scalar values <var>output</var> (default « »), run these steps: </p> <ol> <li> <p>Let <var>potentialError</var> be the result of <a href="#concept-encoding-run">processing a queue</a> with an instance of <a href="#utf-8">UTF-8</a>’s <a href="#decoder">decoder</a>, <var>ioQueue</var>, <var>output</var>, and "<code>fatal</code>". </p> <li> <p>If <var>potentialError</var> is an <a href="#error">error</a>, then return failure. </p> <li> <p>Return <var>output</var>. </p> </ol> <hr> <p>To <dfn id="utf-8-encode">UTF-8 encode</dfn> an I/O queue of scalar values <var>ioQueue</var> given an optional I/O queue of bytes <var>output</var> (default « »), return the result of <a href="#encode">encoding</a> <var>ioQueue</var> with encoding <a href="#utf-8">UTF-8</a> and <var>output</var>. </p> <h3 class="heading settled" id="legacy-hooks"><span class="secno">6.1. </span><span class="content">Legacy hooks for standards</span></h3> <div class="note" role="note"> <p>Standards are strongly discouraged from using <a href="#decode">decode</a>, <a href="#bom-sniff">BOM sniff</a>, and <a href="#encode">encode</a>, except as needed for compatibility. Standards needing these legacy hooks will most likely also need to use <a href="#concept-encoding-get">get an encoding</a> (to turn a label into an <a href="#encoding">encoding</a>) and <a href="#get-an-output-encoding">get an output encoding</a> (to turn an <a href="#encoding">encoding</a> into another <a href="#encoding">encoding</a> that is suitable to pass into <a href="#encode">encode</a>). </p> <p>For the extremely niche case of URL percent-encoding, custom encoder error handling is needed. The <a href="#get-an-encoder">get an encoder</a> and <a href="#encode-or-fail">encode or fail</a> algorithms are to be used for that. Other algorithms are not to be used directly. </p> </div> <p>To <dfn id="decode">decode</dfn> an I/O queue of bytes <var>ioQueue</var> given a fallback encoding <var>encoding</var> and an optional I/O queue of scalar values <var>output</var> (default « »), run these steps: </p> <ol> <li> <p>Let <var>BOMEncoding</var> be the result of <a href="#bom-sniff">BOM sniffing</a> <var>ioQueue</var>. </p> <li> <p>If <var>BOMEncoding</var> is non-null: </p> <ol> <li> <p>Set <var>encoding</var> to <var>BOMEncoding</var>. </p> <li> <p><a href="#concept-stream-read">Read</a> three bytes from <var>ioQueue</var>, if <var>BOMEncoding</var> is <a href="#utf-8">UTF-8</a>; otherwise <a href="#concept-stream-read">read</a> two bytes. (Do nothing with those bytes.) </p> </ol> <p class="note" role="note">For compatibility with deployed content, the byte order mark is more authoritative than anything else. In a context where HTTP is used this is in violation of the semantics of the `<code>Content-Type</code>` header. </p> <li> <p><a href="#concept-encoding-run">Process a queue</a> with an instance of <var>encoding</var>’s <a href="#decoder">decoder</a>, <var>ioQueue</var>, <var>output</var>, and "<code>replacement</code>". </p> <li> <p>Return <var>output</var>. </p> </ol> <p>To <dfn id="bom-sniff">BOM sniff</dfn> an I/O queue of bytes <var>ioQueue</var>, run these steps: </p> <ol> <li> <p>Let <var>BOM</var> be the result of <a href="#i-o-queue-peek">peeking</a> 3 bytes from <var>ioQueue</var>, converted to a byte sequence. </p> <li> <p>For each of the rows in the table below, starting with the first one and going down, if <var>BOM</var> <a href="https://infra.spec.whatwg.org/#byte-sequence-starts-with" id="49a960210">starts with</a> the bytes given in the first column, then return the <a href="#encoding">encoding</a> given in the cell in the second column of that row. Otherwise, return null. </p> <table> <tbody> <tr> <th>Byte order mark <th>Encoding <tr> <td>0xEF 0xBB 0xBF <td><a href="#utf-8">UTF-8</a> <tr> <td>0xFE 0xFF <td><a href="#utf-16be">UTF-16BE</a> <tr> <td>0xFF 0xFE <td><a href="#utf-16le">UTF-16LE</a> </table> </ol> <p class="note" role="note">This hook is a workaround for the fact that <a href="#decode">decode</a> has no way to communicate back to the caller that it has found a byte order mark and is therefore not using the provided encoding. The hook is to be invoked before <a href="#decode">decode</a>, and it will return an encoding corresponding to the byte order mark found, or null otherwise. </p> <hr> <p>To <dfn id="encode">encode</dfn> an I/O queue of scalar values <var>ioQueue</var> given an encoding <var>encoding</var> and an optional I/O queue of bytes <var>output</var> (default « »), run these steps: </p> <ol> <li> <p>Let <var>encoder</var> be the result of <a href="#get-an-encoder">getting an encoder</a> from <var>encoding</var>. </p> <li> <p><a href="#concept-encoding-run">Process a queue</a> with <var>encoder</var>, <var>ioQueue</var>, <var>output</var>, and "<code>html</code>". </p> <li> <p>Return <var>output</var>. </p> </ol> <p class="note no-backref" role="note">This is a legacy hook for HTML forms. Layering <a href="#utf-8-encode">UTF-8 encode</a> on top is safe as it never triggers <a href="#error">errors</a>. <a href="#biblio-html" title="HTML Standard">[HTML]</a> </p> <hr> <p>To <dfn id="get-an-encoder">get an encoder</dfn> from an <a href="#encoding">encoding</a> <var>encoding</var>: </p> <ol> <li> <p>Assert: <var>encoding</var> is not <a href="#replacement">replacement</a> or <a href="#utf-16be-le">UTF-16BE/LE</a>. </p> <li> <p>Return an instance of <var>encoding</var>’s <a href="#encoder">encoder</a>. </p> </ol> <p>To <dfn id="encode-or-fail">encode or fail</dfn> an I/O queue of scalar values <var>ioQueue</var> given an <a href="#encoder">encoder</a> instance <var>encoder</var> and an I/O queue of bytes <var>output</var>, run these steps: </p> <ol> <li> <p>Let <var>potentialError</var> be the result of <a href="#concept-encoding-run">processing a queue</a> with <var>encoder</var>, <var>ioQueue</var>, <var>output</var>, and "<code>fatal</code>". </p> <li> <p><a href="#concept-stream-push">Push</a> <a href="#end-of-stream">end-of-queue</a> to <var>output</var>. </p> <li> <p>If <var>potentialError</var> is an <a href="#error">error</a>, then return <a href="#error">error</a>’s <a href="https://infra.spec.whatwg.org/#code-point" id="b61de9b12">code point</a>’s <a href="https://infra.spec.whatwg.org/#code-point-value" id="2bff95cb1">value</a>. </p> <li> <p>Return null. </p> </ol> <div class="note" id="pit-of-iso-2022-jp" role="note"> <p>This is a legacy hook for URL percent-encoding. The caller will have to keep an <a href="#encoder">encoder</a> instance alive as the <a href="#iso-2022-jp-encoder">ISO-2022-JP encoder</a> can be in two different states when returning an <a href="#error">error</a>. That also means that if the caller emits bytes to encode the error in some way, these have to be in the range 0x00 to 0x7F, inclusive, excluding 0x0E, 0x0F, 0x1B, 0x5C, and 0x7E. <a href="#biblio-url" title="URL Standard">[URL]</a> </p> <p>In particular, if upon returning an <a href="#error">error</a> the <a href="#iso-2022-jp-encoder">ISO-2022-JP encoder</a> is in the <a href="#iso-2022-jp-decoder-roman">Roman</a> state, the caller cannot output 0x5C (\) as it will not decode as U+005C (\). For this reason, applications using <a href="#encode-or-fail">encode or fail</a> for unintended purposes ought to take care to prevent the use of the <a href="#iso-2022-jp-encoder">ISO-2022-JP encoder</a> in combination with replacement schemes, such as those of JavaScript and CSS, that use U+005C (\) as part of the replacement syntax (e.g., <code>\u2603</code>) or make sure to pass the replacement syntax through the encoder (in contrast to URL percent-encoding). </p> <p>The return value is either the number representing the <a href="https://infra.spec.whatwg.org/#code-point" id="b61de9b13">code point</a> that could not be encoded or null, if there was no <a href="#error">error</a>. When it returns non-null the caller will have to invoke it again, supplying the same <a href="#encoder">encoder</a> instance and a new output I/O queue. </p> </div> <h2 class="heading settled" id="api"><span class="secno">7. </span><span class="content">API</span></h2> <p>This section uses terminology from Web IDL. Browser user agents must support this API. JavaScript implementations should support this API. Other user agents or programming languages are encouraged to use an API suitable to their needs, which might not be this one. <a href="#biblio-webidl" title="Web IDL Standard">[WEBIDL]</a> </p> <div class="example" id="example-textencoder"> <p>The following example uses the <code class="idl"><a href="#textencoder">TextEncoder</a></code> object to encode an array of strings into an <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-ArrayBuffer" id="6cfb0bb30">ArrayBuffer</a></code>. The result is a <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-Uint8Array" id="eddeeee90">Uint8Array</a></code> containing the number of strings (as a <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-Uint32Array" id="ba0cf2600">Uint32Array</a></code>), followed by the length of the first string (as a <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-Uint32Array" id="ba0cf2601">Uint32Array</a></code>), the <a href="#utf-8">UTF-8</a> encoded string data, the length of the second string (as a <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-Uint32Array" id="ba0cf2602">Uint32Array</a></code>), the string data, and so on. </p> <pre><code class="lang-javascript">function encodeArrayOfStrings(strings) { var encoder, encoded, len, bytes, view, offset; encoder = new TextEncoder(); encoded = []; len = Uint32Array.BYTES_PER_ELEMENT; for (var i = 0; i < strings.length; i++) { len += Uint32Array.BYTES_PER_ELEMENT; encoded[i] = encoder.encode(strings[i]); len += encoded[i].byteLength; } bytes = new Uint8Array(len); view = new DataView(bytes.buffer); offset = 0; view.setUint32(offset, strings.length); offset += Uint32Array.BYTES_PER_ELEMENT; for (var i = 0; i < encoded.length; i += 1) { len = encoded[i].byteLength; view.setUint32(offset, len); offset += Uint32Array.BYTES_PER_ELEMENT; bytes.set(encoded[i], offset); offset += len; } return bytes.buffer; }</code></pre> <p>The following example decodes an <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-ArrayBuffer" id="6cfb0bb31">ArrayBuffer</a></code> containing data encoded in the format produced by the previous example, or an equivalent algorithm for encodings other than <a href="#utf-8">UTF-8</a>, back into an array of strings. </p> <pre><code class="lang-javascript">function decodeArrayOfStrings(buffer, encoding) { var decoder, view, offset, num_strings, strings, len; decoder = new TextDecoder(encoding); view = new DataView(buffer); offset = 0; strings = []; num_strings = view.getUint32(offset); offset += Uint32Array.BYTES_PER_ELEMENT; for (var i = 0; i < num_strings; i++) { len = view.getUint32(offset); offset += Uint32Array.BYTES_PER_ELEMENT; strings[i] = decoder.decode( new DataView(view.buffer, offset, len)); offset += len; } return strings; }</code></pre> </div> <h3 class="heading settled" id="interface-mixin-textdecodercommon"><span class="secno">7.1. </span><span class="content">Interface mixin <code class="idl"><a href="#textdecodercommon">TextDecoderCommon</a></code></span></h3> <pre class="idl highlight def">interface mixin <dfn class="idl-code" id="textdecodercommon"><code>TextDecoderCommon</code></dfn> { readonly attribute <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-DOMString" id="162027790">DOMString</a> <a class="idl-code" href="#dom-textdecoder-encoding">encoding</a>; readonly attribute <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-boolean" id="7631bead0">boolean</a> <a class="idl-code" href="#dom-textdecoder-fatal">fatal</a>; readonly attribute <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-boolean" id="7631bead1">boolean</a> <a class="idl-code" href="#dom-textdecoder-ignorebom">ignoreBOM</a>; }; </pre> <p>The <code class="idl"><a href="#textdecodercommon">TextDecoderCommon</a></code> interface mixin defines common getters that are shared between <code class="idl"><a href="#textdecoder">TextDecoder</a></code> and <code class="idl"><a href="#textdecoderstream">TextDecoderStream</a></code> objects. These objects have an associated: </p> <dl> <dt><dfn id="textdecoder-encoding">encoding</dfn> <dd>An <a href="#encoding">encoding</a>. <dt><dfn id="textdecodercommon-decoder">decoder<span id="textdecoder-decoder"></span><span id="textdecoderstream-decoder"></span></dfn> <dd>A <a href="#decoder">decoder</a> instance. <dt><dfn id="textdecodercommon-i-o-queue">I/O queue<span id="textdecoder-stream"></span><span id="textdecoderstream-stream"></span><span id="textdecodercommon-stream"></span></dfn> <dd>An <a href="#concept-stream">I/O queue</a> of bytes. <dt><dfn id="textdecoder-ignore-bom-flag">ignore BOM</dfn> <dd>A boolean, initially false. <dt><dfn id="textdecoder-bom-seen-flag">BOM seen</dfn> <dd>A boolean, initially false. <dt><dfn id="textdecoder-error-mode">error mode</dfn> <dd>An <a href="#error-mode">error mode</a>, initially "<code>replacement</code>". </dl> <p>The <dfn id="concept-td-serialize">serialize I/O queue</dfn> algorithm, given a <code class="idl"><a href="#textdecodercommon">TextDecoderCommon</a></code> <var>decoder</var> and an <a href="#concept-stream">I/O queue</a> of scalar values <var>ioQueue</var>, runs these steps: </p> <ol> <li> <p>Let <var>output</var> be the empty string. </p> <li> <p>While true: </p> <ol> <li> <p>Let <var>item</var> be the result of <a href="#concept-stream-read">reading</a> from <var>ioQueue</var>. </p> <li> <p>If <var>item</var> is <a href="#end-of-stream">end-of-queue</a>, then return <var>output</var>. </p> <li> <p>If <var>decoder</var>’s <a href="#textdecoder-encoding">encoding</a> is <a href="#utf-8">UTF-8</a> or <a href="#utf-16be-le">UTF-16BE/LE</a>, and <var>decoder</var>’s <a href="#textdecoder-ignore-bom-flag">ignore BOM</a> and <a href="#textdecoder-bom-seen-flag">BOM seen</a> are false, then: </p> <ol> <li> <p>Set <var>decoder</var>’s <a href="#textdecoder-bom-seen-flag">BOM seen</a> to true. </p> <li> <p>If <var>item</var> is U+FEFF, then <a href="https://infra.spec.whatwg.org/#iteration-continue" id="dfd6b7da0">continue</a>. </p> </ol> <li> <p>Append <var>item</var> to <var>output</var>. </p> </ol> </ol> <p class="note" role="note">This algorithm is intentionally different with respect to BOM handling from the <a href="#decode">decode</a> algorithm used by the rest of the platform to give API users more control. </p> <hr> <p>The <dfn class="idl-code" id="dom-textdecoder-encoding"><code>encoding</code></dfn> getter steps are to return <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef60">this</a>’s <a href="#textdecoder-encoding">encoding</a>’s <a href="#name">name</a>, <a href="https://infra.spec.whatwg.org/#ascii-lowercase" id="441210641">ASCII lowercased</a>. </p> <p>The <dfn class="idl-code" id="dom-textdecoder-fatal"><code>fatal</code></dfn> getter steps are to return true if <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef61">this</a>’s <a href="#textdecoder-error-mode">error mode</a> is "<code>fatal</code>", otherwise false. </p> <p>The <dfn class="idl-code" id="dom-textdecoder-ignorebom"><code>ignoreBOM</code></dfn> getter steps are to return <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef62">this</a>’s <a href="#textdecoder-ignore-bom-flag">ignore BOM</a>. </p> <h3 class="heading settled" id="interface-textdecoder"><span class="secno">7.2. </span><span class="content">Interface <code class="idl"><a href="#textdecoder">TextDecoder</a></code></span></h3> <pre class="idl highlight def">dictionary <dfn class="idl-code" id="textdecoderoptions"><code>TextDecoderOptions</code></dfn> { <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-boolean" id="7631bead2">boolean</a> <dfn class="idl-code" id="dom-textdecoderoptions-fatal"><code>fatal</code></dfn> = false; <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-boolean" id="7631bead3">boolean</a> <dfn class="idl-code" id="dom-textdecoderoptions-ignorebom"><code>ignoreBOM</code></dfn> = false; }; dictionary <dfn class="idl-code" id="textdecodeoptions"><code>TextDecodeOptions</code></dfn> { <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-boolean" id="7631bead4">boolean</a> <dfn class="idl-code" id="dom-textdecodeoptions-stream"><code>stream</code></dfn> = false; }; [Exposed=*] interface <dfn class="idl-code" id="textdecoder"><code>TextDecoder</code></dfn> { <a class="idl-code" href="#dom-textdecoder">constructor</a>(optional <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-DOMString" id="162027791">DOMString</a> <dfn class="idl-code" id="dom-textdecoder-textdecoder-label-options-label"><code>label</code></dfn> = "utf-8", optional <a href="#textdecoderoptions">TextDecoderOptions</a> <dfn class="idl-code" id="dom-textdecoder-textdecoder-label-options-options"><code>options</code></dfn> = {}); <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-USVString" id="63dcd5930">USVString</a> <a class="idl-code" href="#dom-textdecoder-decode">decode</a>(optional <a href="https://webidl.spec.whatwg.org/#AllowSharedBufferSource" id="8145e9370">AllowSharedBufferSource</a> <dfn class="idl-code" id="dom-textdecoder-decode-input-options-input"><code>input</code></dfn>, optional <a href="#textdecodeoptions">TextDecodeOptions</a> <dfn class="idl-code" id="dom-textdecoder-decode-input-options-options"><code>options</code></dfn> = {}); }; <a href="#textdecoder">TextDecoder</a> includes <a href="#textdecodercommon">TextDecoderCommon</a>; </pre> <p>A <code class="idl"><a href="#textdecoder">TextDecoder</a></code> object has an associated <dfn id="textdecoder-do-not-flush-flag">do not flush</dfn>, which is a boolean, initially false. </p> <dl class="domintro"> <dt><code><var>decoder</var> = new <a class="idl-code" href="#dom-textdecoder">TextDecoder([<var>label</var> = "utf-8" [, <var>options</var>]])</a></code> <dd> <p>Returns a new <code class="idl"><a href="#textdecoder">TextDecoder</a></code> object. </p> <p>If <var>label</var> is either not a label or is a <a href="#label">label</a> for <a href="#replacement">replacement</a>, <a href="https://webidl.spec.whatwg.org/#dfn-throw" id="807892d40">throws</a> a <code class="idl"><a href="https://webidl.spec.whatwg.org/#exceptiondef-rangeerror" id="a0324b580">RangeError</a></code>. </p> <dt><code><var>decoder</var> . <a class="idl-code" href="#dom-textdecoder-encoding">encoding</a></code> <dd> <p>Returns <a href="#textdecoder-encoding">encoding</a>’s <a href="#name">name</a>, lowercased. </p> <dt><code><var>decoder</var> . <a class="idl-code" href="#dom-textdecoder-fatal">fatal</a></code> <dd> <p>Returns true if <a href="#textdecoder-error-mode">error mode</a> is "<code>fatal</code>", otherwise false. </p> <dt><code><var>decoder</var> . <a class="idl-code" href="#dom-textdecoder-ignorebom">ignoreBOM</a></code> <dd> <p>Returns the value of <a href="#textdecoder-ignore-bom-flag">ignore BOM</a>. </p> <dt><code><var>decoder</var> . <a class="idl-code" href="#dom-textdecoder-decode">decode([<var>input</var> [, <var>options</var>]])</a></code> <dd> <p>Returns the result of running <a href="#textdecoder-encoding">encoding</a>’s <a href="#decoder">decoder</a>. The method can be invoked zero or more times with <var>options</var>’s <code>stream</code> set to true, and then once without <var>options</var>’s <code>stream</code> (or set to false), to process a fragmented input. If the invocation without <var>options</var>’s <code>stream</code> (or set to false) has no <var>input</var>, it’s clearest to omit both arguments. </p> <pre class="example" id="example-end-of-stream"><code class="lang-javascript">var string = "", decoder = new TextDecoder(encoding), buffer; while(buffer = next_chunk()) { string += decoder.decode(buffer, {stream:true}); } string += decoder.decode(); // end-of-queue</code></pre> <p>If the <a href="#textdecoder-error-mode">error mode</a> is "<code>fatal</code>" and <a href="#textdecoder-encoding">encoding</a>’s <a href="#decoder">decoder</a> returns <a href="#error">error</a>, <a href="https://webidl.spec.whatwg.org/#dfn-throw" id="807892d41">throws</a> a <code class="idl"><a href="https://webidl.spec.whatwg.org/#exceptiondef-typeerror" id="20fb259a0">TypeError</a></code>. </p> </dl> <p>The <dfn class="idl-code" id="dom-textdecoder"><code>new TextDecoder(<var>label</var>, <var>options</var>)</code></dfn> constructor steps are: </p> <ol> <li> <p>Let <var>encoding</var> be the result of <a href="#concept-encoding-get">getting an encoding</a> from <var>label</var>. </p> <li> <p>If <var>encoding</var> is failure or <a href="#replacement">replacement</a>, then <a href="https://webidl.spec.whatwg.org/#dfn-throw" id="807892d42">throw</a> a <code class="idl"><a href="https://webidl.spec.whatwg.org/#exceptiondef-rangeerror" id="a0324b581">RangeError</a></code>. </p> <li> <p>Set <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef63">this</a>’s <a href="#textdecoder-encoding">encoding</a> to <var>encoding</var>. </p> <li> <p>If <var>options</var>["<code class="idl"><a href="#dom-textdecoderoptions-fatal">fatal</a></code>"] is true, then set <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef64">this</a>’s <a href="#textdecoder-error-mode">error mode</a> to "<code>fatal</code>". </p> <li> <p>Set <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef65">this</a>’s <a href="#textdecoder-ignore-bom-flag">ignore BOM</a> to <var>options</var>["<code class="idl"><a href="#dom-textdecoderoptions-ignorebom">ignoreBOM</a></code>"]. </p> </ol> <p>The <dfn class="idl-code" id="dom-textdecoder-decode"><code>decode(<var>input</var>, <var>options</var>)</code></dfn> method steps are: </p> <ol> <li> <p>If <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef66">this</a>’s <a href="#textdecoder-do-not-flush-flag">do not flush</a> is false, then set <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef67">this</a>’s <a href="#textdecodercommon-decoder">decoder</a> to a new instance of <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef68">this</a>’s <a href="#textdecoder-encoding">encoding</a>’s <a href="#decoder">decoder</a>, <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef69">this</a>’s <a href="#textdecodercommon-i-o-queue">I/O queue</a> to the <a href="#concept-stream">I/O queue</a> of bytes « <a href="#end-of-stream">end-of-queue</a> », and <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef610">this</a>’s <a href="#textdecoder-bom-seen-flag">BOM seen</a> to false. </p> <li> <p>Set <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef611">this</a>’s <a href="#textdecoder-do-not-flush-flag">do not flush</a> to <var>options</var>["<code class="idl"><a href="#dom-textdecodeoptions-stream">stream</a></code>"]. </p> <li> <p>If <var>input</var> is given, then <a href="#concept-stream-push">push</a> a <a href="https://webidl.spec.whatwg.org/#dfn-get-buffer-source-copy" id="5270c6ac0">copy of</a> <var>input</var> to <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef612">this</a>’s <a href="#textdecodercommon-i-o-queue">I/O queue</a>. </p> <p class="note" role="note">Implementations are strongly encouraged to use an implementation strategy that avoids this copy. When doing so they will have to make sure that changes to <var>input</var> do not affect future calls to <a class="idl-code" href="#dom-textdecoder-decode"><code>decode()</code></a>. </p> <p class="warning" id="sharedarraybuffer-warning">The memory exposed by <code>SharedArrayBuffer</code> objects does not adhere to data race freedom properties required by the memory model of programming languages typically used for implementations. When implementing, take care to use the appropriate facilities when accessing memory exposed by <code>SharedArrayBuffer</code> objects. </p> <li> <p>Let <var>output</var> be the <a href="#concept-stream">I/O queue</a> of scalar values « <a href="#end-of-stream">end-of-queue</a> ». </p> <li> <p>While true: </p> <ol> <li> <p>Let <var>item</var> be the result of <a href="#concept-stream-read">reading</a> from <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef613">this</a>’s <a href="#textdecodercommon-i-o-queue">I/O queue</a>. </p> <li> <p>If <var>item</var> is <a href="#end-of-stream">end-of-queue</a> and <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef614">this</a>’s <a href="#textdecoder-do-not-flush-flag">do not flush</a> is true, then return the result of running <a href="#concept-td-serialize">serialize I/O queue</a> with <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef615">this</a> and <var>output</var>. </p> <p class="note" role="note">The way streaming works is to not handle <a href="#end-of-stream">end-of-queue</a> here when <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef616">this</a>’s <a href="#textdecoder-do-not-flush-flag">do not flush</a> is true and to not set it to false. That way in a subsequent invocation <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef617">this</a>’s <a href="#textdecodercommon-decoder">decoder</a> is not set anew in the first step of the algorithm and its state is preserved. </p> <li> <p>Otherwise: </p> <ol> <li> <p>Let <var>result</var> be the result of <a href="#concept-encoding-process">processing an item</a> with <var>item</var>, <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef618">this</a>’s <a href="#textdecodercommon-decoder">decoder</a>, <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef619">this</a>’s <a href="#textdecodercommon-i-o-queue">I/O queue</a>, <var>output</var>, and <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef620">this</a>’s <a href="#textdecoder-error-mode">error mode</a>. </p> <li> <p>If <var>result</var> is <a href="#finished">finished</a>, then return the result of running <a href="#concept-td-serialize">serialize I/O queue</a> with <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef621">this</a> and <var>output</var>. </p> <li> <p>Otherwise, if <var>result</var> is <a href="#error">error</a>, <a href="https://webidl.spec.whatwg.org/#dfn-throw" id="807892d43">throw</a> a <code class="idl"><a href="https://webidl.spec.whatwg.org/#exceptiondef-typeerror" id="20fb259a1">TypeError</a></code>. </p> </ol> </ol> </ol> <h3 class="heading settled" id="interface-mixin-textencodercommon"><span class="secno">7.3. </span><span class="content">Interface mixin <code class="idl"><a href="#textencodercommon">TextEncoderCommon</a></code></span></h3> <pre class="idl highlight def">interface mixin <dfn class="idl-code" id="textencodercommon"><code>TextEncoderCommon</code></dfn> { readonly attribute <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-DOMString" id="162027792">DOMString</a> <a class="idl-code" href="#dom-textencoder-encoding">encoding</a>; }; </pre> <p>The <code class="idl"><a href="#textencodercommon">TextEncoderCommon</a></code> interface mixin defines common getters that are shared between <code class="idl"><a href="#textencoder">TextEncoder</a></code> and <code class="idl"><a href="#textencoderstream">TextEncoderStream</a></code> objects. </p> <p>The <dfn class="idl-code" id="dom-textencoder-encoding"><code>encoding</code></dfn> getter steps are to return "<code>utf-8</code>". </p> <h3 class="heading settled" id="interface-textencoder"><span class="secno">7.4. </span><span class="content">Interface <code class="idl"><a href="#textencoder">TextEncoder</a></code></span></h3> <pre class="idl highlight def">dictionary <dfn class="idl-code" id="dictdef-textencoderencodeintoresult"><code>TextEncoderEncodeIntoResult</code></dfn> { <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-unsigned-long-long" id="607781f40">unsigned long long</a> <dfn class="idl-code" id="dom-textencoderencodeintoresult-read"><code>read</code></dfn>; <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-unsigned-long-long" id="607781f41">unsigned long long</a> <dfn class="idl-code" id="dom-textencoderencodeintoresult-written"><code>written</code></dfn>; }; [Exposed=*] interface <dfn class="idl-code" id="textencoder"><code>TextEncoder</code></dfn> { <a class="idl-code" href="#dom-textencoder">constructor</a>(); [<a class="idl-code" href="https://webidl.spec.whatwg.org/#NewObject" id="f8bab20f0">NewObject</a>] <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-Uint8Array" id="eddeeee91">Uint8Array</a> <a class="idl-code" href="#dom-textencoder-encode">encode</a>(optional <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-USVString" id="63dcd5931">USVString</a> <dfn class="idl-code" id="dom-textencoder-encode-input-input"><code>input</code></dfn> = ""); <a href="#dictdef-textencoderencodeintoresult">TextEncoderEncodeIntoResult</a> <a class="idl-code" href="#dom-textencoder-encodeinto">encodeInto</a>(<a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-USVString" id="63dcd5932">USVString</a> <dfn class="idl-code" id="dom-textencoder-encodeinto-source-destination-source"><code>source</code></dfn>, [<a class="idl-code" href="https://webidl.spec.whatwg.org/#AllowShared" id="ac6a6bf70">AllowShared</a>] <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-Uint8Array" id="eddeeee92">Uint8Array</a> <dfn class="idl-code" id="dom-textencoder-encodeinto-source-destination-destination"><code>destination</code></dfn>); }; <a href="#textencoder">TextEncoder</a> includes <a href="#textencodercommon">TextEncoderCommon</a>; </pre> <p class="note no-backref" role="note">A <code class="idl"><a href="#textencoder">TextEncoder</a></code> object offers no <var>label</var> argument as it only supports <a href="#utf-8">UTF-8</a>. It also offers no <code>stream</code> option as no <a href="#encoder">encoder</a> requires buffering of scalar values. </p> <hr> <dl class="domintro"> <dt><code><var>encoder</var> = new <a class="idl-code" href="#dom-textencoder">TextEncoder()</a></code> <dd> <p>Returns a new <code class="idl"><a href="#textencoder">TextEncoder</a></code> object. </p> <dt><code><var>encoder</var> . <a class="idl-code" href="#dom-textencoder-encoding">encoding</a></code> <dd> <p>Returns "<code>utf-8</code>". </p> <dt><code><var>encoder</var> . <a class="idl-code" href="#dom-textencoder-encode">encode([<var>input</var> = ""])</a></code> <dd> <p>Returns the result of running <a href="#utf-8">UTF-8</a>’s <a href="#encoder">encoder</a>. </p> <dt><code><var>encoder</var> . <a class="idl-code" href="#dom-textencoder-encodeinto">encodeInto(<var>source</var>, <var>destination</var>)</a></code> <dd> <p>Runs the <a href="#utf-8-encoder">UTF-8 encoder</a> on <var>source</var>, stores the result of that operation into <var>destination</var>, and returns the progress made as an object wherein <code class="idl"><a href="#dom-textencoderencodeintoresult-read">read</a></code> is the number of converted <a href="https://infra.spec.whatwg.org/#code-unit" id="d0892c6f0">code units</a> of <var>source</var> and <code class="idl"><a href="#dom-textencoderencodeintoresult-written">written</a></code> is the number of bytes modified in <var>destination</var>. </p> </dl> <p>The <dfn class="idl-code" id="dom-textencoder"><code>new TextEncoder()</code></dfn> constructor steps are to do nothing. </p> <p>The <dfn class="idl-code" id="dom-textencoder-encode"><code>encode(<var>input</var>)</code></dfn> method steps are: </p> <ol> <li> <p><a href="#to-i-o-queue-convert">Convert</a> <var>input</var> to an <a href="#concept-stream">I/O queue</a> of scalar values. </p> <li> <p>Let <var>output</var> be the <a href="#concept-stream">I/O queue</a> of bytes « <a href="#end-of-stream">end-of-queue</a> ». </p> <li> <p>While true: </p> <ol> <li> <p>Let <var>item</var> be the result of <a href="#concept-stream-read">reading</a> from <var>input</var>. </p> <li> <p>Let <var>result</var> be the result of <a href="#concept-encoding-process">processing an item</a> with <var>item</var>, an instance of the <a href="#utf-8-encoder">UTF-8 encoder</a>, <var>input</var>, <var>output</var>, and "<code>fatal</code>". </p> <li> <p>Assert: <var>result</var> is not an <a href="#error">error</a>. </p> <p class="note" role="note">The <a href="#utf-8-encoder">UTF-8 encoder</a> cannot return <a href="#error">error</a>. </p> <li> <p>If <var>result</var> is <a href="#finished">finished</a>, then <a href="#from-i-o-queue-convert">convert</a> <var>output</var> into a byte sequence and return a <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-Uint8Array" id="eddeeee93">Uint8Array</a></code> object wrapping an <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-ArrayBuffer" id="6cfb0bb32">ArrayBuffer</a></code> containing <var>output</var>. </p> </ol> </ol> <p>The <dfn class="idl-code" id="dom-textencoder-encodeinto"><code>encodeInto(<var>source</var>, <var>destination</var>)</code></dfn> method steps are: </p> <ol> <li> <p>Let <var>read</var> be 0. </p> <li> <p>Let <var>written</var> be 0. </p> <li> <p>Let <var>encoder</var> be an instance of the <a href="#utf-8-encoder">UTF-8 encoder</a>. </p> <li> <p>Let <var>unused</var> be the <a href="#concept-stream">I/O queue</a> of scalar values « <a href="#end-of-stream">end-of-queue</a> ». </p> <p class="note" role="note">The <a href="#handler">handler</a> algorithm invoked below requires this argument, but it is not used by the <a href="#utf-8-encoder">UTF-8 encoder</a>. </p> <li> <p><a href="#to-i-o-queue-convert">Convert</a> <var>source</var> to an <a href="#concept-stream">I/O queue</a> of scalar values. </p> <li> <p>While true: </p> <ol> <li> <p>Let <var>item</var> be the result of <a href="#concept-stream-read">reading</a> from <var>source</var>. </p> <li> <p>Let <var>result</var> be the result of running <var>encoder</var>’s <a href="#handler">handler</a> on <var>unused</var> and <var>item</var>. </p> <li> <p>If <var>result</var> is <a href="#finished">finished</a>, then <a href="https://infra.spec.whatwg.org/#iteration-break" id="7a438e491">break</a>. </p> <li> <p>Otherwise: </p> <ol> <li> <p>If <var>destination</var>’s <a href="https://webidl.spec.whatwg.org/#buffersource-byte-length" id="62ab8c180">byte length</a> − <var>written</var> is greater than or equal to the number of bytes in <var>result</var>, then: </p> <ol> <li> <p>If <var>item</var> is greater than U+FFFF, then increment <var>read</var> by 2. </p> <li> <p>Otherwise, increment <var>read</var> by 1. </p> <li> <p><a href="https://webidl.spec.whatwg.org/#arraybufferview-write" id="291f56c00">Write</a> the bytes in <var>result</var> into <var>destination</var>, with <a href="https://webidl.spec.whatwg.org/#arraybufferview-write-startingoffset" id="71867f7d0"><i>startingOffset</i></a> set to <var>written</var>. </p> <p class="warning">See the <a href="#sharedarraybuffer-warning">warning for <code>SharedArrayBuffer</code> objects</a> above. </p> <li> <p>Increment <var>written</var> by the number of bytes in <var>result</var>. </p> </ol> <li> <p>Otherwise, <a href="https://infra.spec.whatwg.org/#iteration-break" id="7a438e492">break</a>. </p> </ol> </ol> <li> <p>Return «[ "<code class="idl"><a href="#dom-textencoderencodeintoresult-read">read</a></code>" → <var>read</var>, "<code class="idl"><a href="#dom-textencoderencodeintoresult-written">written</a></code>" → <var>written</var> ]». </p> </ol> <div class="example" id="example-textencoder-encodeinto"> <p>The <a class="idl-code" href="#dom-textencoder-encodeinto">encodeInto()</a> method can be used to encode a string into an existing <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-ArrayBuffer" id="6cfb0bb33">ArrayBuffer</a></code> object. Various details below are left as an exercise for the reader, but this demonstrates an approach one could take to use this method: </p> <pre><code class="lang-javascript">function convertString(buffer, input, callback) { let bufferSize = 256, bufferStart = malloc(buffer, bufferSize), writeOffset = 0, readOffset = 0; while (true) { const view = new Uint8Array(buffer, bufferStart + writeOffset, bufferSize - writeOffset), {read, written} = cachedEncoder.encodeInto(input.substring(readOffset), view); readOffset += read; writeOffset += written; if (readOffset === input.length) { callback(bufferStart, writeOffset); free(buffer, bufferStart); return; } bufferSize *= 2; bufferStart = realloc(buffer, bufferStart, bufferSize); } } </code></pre> </div> <h3 class="heading settled" id="interface-textdecoderstream"><span class="secno">7.5. </span><span class="content">Interface <code class="idl"><a href="#textdecoderstream">TextDecoderStream</a></code></span></h3> <pre class="idl highlight def">[Exposed=*] interface <dfn class="idl-code" id="textdecoderstream"><code>TextDecoderStream</code></dfn> { <a class="idl-code" href="#dom-textdecoderstream">constructor</a>(optional <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-DOMString" id="162027793">DOMString</a> <dfn class="idl-code" id="dom-textdecoderstream-textdecoderstream-label-options-label"><code>label</code></dfn> = "utf-8", optional <a href="#textdecoderoptions">TextDecoderOptions</a> <dfn class="idl-code" id="dom-textdecoderstream-textdecoderstream-label-options-options"><code>options</code></dfn> = {}); }; <a href="#textdecoderstream">TextDecoderStream</a> includes <a href="#textdecodercommon">TextDecoderCommon</a>; <a href="#textdecoderstream">TextDecoderStream</a> includes <a href="https://streams.spec.whatwg.org/#generictransformstream" id="af5e2e850">GenericTransformStream</a>; </pre> <dl class="domintro"> <dt><code><var>decoder</var> = new <a class="idl-code" href="#dom-textdecoderstream">TextDecoderStream([<var>label</var> = "utf-8" [, <var>options</var>]])</a></code> <dd> <p>Returns a new <code class="idl"><a href="#textdecoderstream">TextDecoderStream</a></code> object. </p> <p>If <var>label</var> is either not a label or is a <a href="#label">label</a> for <a href="#replacement">replacement</a>, <a href="https://webidl.spec.whatwg.org/#dfn-throw" id="807892d44">throws</a> a <code class="idl"><a href="https://webidl.spec.whatwg.org/#exceptiondef-rangeerror" id="a0324b582">RangeError</a></code>. </p> <dt><code><var>decoder</var> . <a class="idl-code" href="#dom-textdecoder-encoding">encoding</a></code> <dd> <p>Returns <a href="#textdecoder-encoding">encoding</a>’s <a href="#name">name</a>, lowercased. </p> <dt><code><var>decoder</var> . <a class="idl-code" href="#dom-textdecoder-fatal">fatal</a></code> <dd> <p>Returns true if <a href="#textdecoder-error-mode">error mode</a> is "<code>fatal</code>", and false otherwise. </p> <dt><code><var>decoder</var> . <a class="idl-code" href="#dom-textdecoder-ignorebom">ignoreBOM</a></code> <dd> <p>Returns the value of <a href="#textdecoder-ignore-bom-flag">ignore BOM</a>. </p> <dt><code><var>decoder</var> . <a class="idl-code" href="https://streams.spec.whatwg.org/#dom-generictransformstream-readable" id="271adbee0">readable</a></code> <dd> <p>Returns a <a href="https://streams.spec.whatwg.org/#readable-stream" id="02cae24a0">readable stream</a> whose <a href="https://streams.spec.whatwg.org/#chunk" id="a63869ef0">chunks</a> are strings resulting from running <a href="#textdecoder-encoding">encoding</a>’s <a href="#decoder">decoder</a> on the chunks written to <code class="idl"><a href="https://streams.spec.whatwg.org/#dom-generictransformstream-writable" id="92533bb30">writable</a></code>. </p> <dt><code><var>decoder</var> . <a class="idl-code" href="https://streams.spec.whatwg.org/#dom-generictransformstream-writable" id="92533bb31">writable</a></code> <dd> <p>Returns a <a href="https://streams.spec.whatwg.org/#writable-stream" id="74cfb0750">writable stream</a> which accepts <code><a class="idl-code" href="https://webidl.spec.whatwg.org/#AllowSharedBufferSource" id="8145e9371">AllowSharedBufferSource</a></code> chunks and runs them through <a href="#textdecoder-encoding">encoding</a>’s <a href="#decoder">decoder</a> before making them available to <code class="idl"><a href="https://streams.spec.whatwg.org/#dom-generictransformstream-readable" id="271adbee1">readable</a></code>. </p> <p>Typically this will be used via the <code class="idl"><a href="https://streams.spec.whatwg.org/#rs-pipe-through" id="3bf29ae90">pipeThrough()</a></code> method on a <code class="idl"><a href="https://streams.spec.whatwg.org/#readablestream" id="e42e67350">ReadableStream</a></code> source. </p> <pre class="example" id="example-textdecoderstream-writable"><code class="lang-javascript">var decoder = new TextDecoderStream(encoding); byteReadable .pipeThrough(decoder) .pipeTo(textWritable);</code></pre> <p>If the <a href="#textdecoder-error-mode">error mode</a> is "<code>fatal</code>" and <a href="#textdecoder-encoding">encoding</a>’s <a href="#decoder">decoder</a> returns <a href="#error">error</a>, both <code class="idl"><a href="https://streams.spec.whatwg.org/#dom-generictransformstream-readable" id="271adbee2">readable</a></code> and <code class="idl"><a href="https://streams.spec.whatwg.org/#dom-generictransformstream-writable" id="92533bb32">writable</a></code> will be errored with a <code class="idl"><a href="https://webidl.spec.whatwg.org/#exceptiondef-typeerror" id="20fb259a2">TypeError</a></code>. </p> </dl> <p>The <dfn class="idl-code" id="dom-textdecoderstream"><code>new TextDecoderStream(<var>label</var>, <var>options</var>)</code></dfn> constructor steps are: </p> <ol> <li> <p>Let <var>encoding</var> be the result of <a href="#concept-encoding-get">getting an encoding</a> from <var>label</var>. </p> <li> <p>If <var>encoding</var> is failure or <a href="#replacement">replacement</a>, then <a href="https://webidl.spec.whatwg.org/#dfn-throw" id="807892d45">throw</a> a <code class="idl"><a href="https://webidl.spec.whatwg.org/#exceptiondef-rangeerror" id="a0324b583">RangeError</a></code>. </p> <li> <p>Set <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef622">this</a>’s <a href="#textdecoder-encoding">encoding</a> to <var>encoding</var>. </p> <li> <p>If <var>options</var>["<code class="idl"><a href="#dom-textdecoderoptions-fatal">fatal</a></code>"] is true, then set <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef623">this</a>’s <a href="#textdecoder-error-mode">error mode</a> to "<code>fatal</code>". </p> <li> <p>Set <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef624">this</a>’s <a href="#textdecoder-ignore-bom-flag">ignore BOM</a> to <var>options</var>["<code class="idl"><a href="#dom-textdecoderoptions-ignorebom">ignoreBOM</a></code>"]. </p> <li> <p>Set <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef625">this</a>’s <a href="#textdecodercommon-decoder">decoder</a> to a new instance of <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef626">this</a>’s <a href="#textdecoder-encoding">encoding</a>’s <a href="#decoder">decoder</a>, and set <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef627">this</a>’s <a href="#textdecodercommon-i-o-queue">I/O queue</a> to a new <a href="#concept-stream">I/O queue</a>. </p> <li> <p>Let <var>transformAlgorithm</var> be an algorithm which takes a <var>chunk</var> argument and runs the <a href="#decode-and-enqueue-a-chunk">decode and enqueue a chunk</a> algorithm with <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef628">this</a> and <var>chunk</var>. </p> <li> <p>Let <var>flushAlgorithm</var> be an algorithm which takes no arguments and runs the <a href="#flush-and-enqueue">flush and enqueue</a> algorithm with <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef629">this</a>. </p> <li> <p>Let <var>transformStream</var> be a <a href="https://webidl.spec.whatwg.org/#new" id="85cd0b720">new</a> <code class="idl"><a href="https://streams.spec.whatwg.org/#transformstream" id="a26122b10">TransformStream</a></code>. </p> <li> <p><a href="https://streams.spec.whatwg.org/#transformstream-set-up" id="4229e9a80">Set up</a> <var>transformStream</var> with <a href="https://streams.spec.whatwg.org/#transformstream-set-up-transformalgorithm" id="14e4f1490"><var>transformAlgorithm</var></a> set to <var>transformAlgorithm</var> and <a href="https://streams.spec.whatwg.org/#transformstream-set-up-flushalgorithm" id="a54978b30"><var>flushAlgorithm</var></a> set to <var>flushAlgorithm</var>. </p> <li> <p>Set <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef630">this</a>’s <a href="https://streams.spec.whatwg.org/#generictransformstream-transform" id="d582c3cd0">transform</a> to <var>transformStream</var>. </p> </ol> <p>The <dfn id="decode-and-enqueue-a-chunk">decode and enqueue a chunk</dfn> algorithm, given a <code class="idl"><a href="#textdecoderstream">TextDecoderStream</a></code> object <var>decoder</var> and a <var>chunk</var>, runs these steps: </p> <ol> <li> <p>Let <var>bufferSource</var> be the result of <a href="https://webidl.spec.whatwg.org/#dfn-convert-ecmascript-to-idl-value" id="ce0b23860">converting</a> <var>chunk</var> to an <code><a class="idl-code" href="https://webidl.spec.whatwg.org/#AllowSharedBufferSource" id="8145e9372">AllowSharedBufferSource</a></code>. </p> <li> <p><a href="#concept-stream-push">Push</a> a <a href="https://webidl.spec.whatwg.org/#dfn-get-buffer-source-copy" id="5270c6ac1">copy of</a> <var>bufferSource</var> to <var>decoder</var>’s <a href="#textdecodercommon-i-o-queue">I/O queue</a>. </p> <p class="warning">See the <a href="#sharedarraybuffer-warning">warning for <code>SharedArrayBuffer</code> objects</a> above. </p> <li> <p>Let <var>output</var> be the <a href="#concept-stream">I/O queue</a> of scalar values « <a href="#end-of-stream">end-of-queue</a> ». </p> <li> <p>While true: </p> <ol> <li> <p>Let <var>item</var> be the result of <a href="#concept-stream-read">reading</a> from <var>decoder</var>’s <a href="#textdecodercommon-i-o-queue">I/O queue</a>. </p> <li> <p>If <var>item</var> is <a href="#end-of-stream">end-of-queue</a>, then: </p> <ol> <li> <p>Let <var>outputChunk</var> be the result of running <a href="#concept-td-serialize">serialize I/O queue</a> with <var>decoder</var> and <var>output</var>. </p> <li> <p>If <var>outputChunk</var> is non-empty, then <a href="https://streams.spec.whatwg.org/#transformstream-enqueue" id="9c9f0e820">enqueue</a> <var>outputChunk</var> in <var>decoder</var>’s <a href="https://streams.spec.whatwg.org/#generictransformstream-transform" id="d582c3cd1">transform</a>. </p> <li> <p>Return. </p> </ol> <li> <p>Let <var>result</var> be the result of <a href="#concept-encoding-process">processing an item</a> with <var>item</var>, <var>decoder</var>’s <a href="#textdecodercommon-decoder">decoder</a>, <var>decoder</var>’s <a href="#textdecodercommon-i-o-queue">I/O queue</a>, <var>output</var>, and <var>decoder</var>’s <a href="#textdecoder-error-mode">error mode</a>. </p> <li> <p>If <var>result</var> is <a href="#error">error</a>, then <a href="https://webidl.spec.whatwg.org/#dfn-throw" id="807892d46">throw</a> a <code class="idl"><a href="https://webidl.spec.whatwg.org/#exceptiondef-typeerror" id="20fb259a3">TypeError</a></code>. </p> </ol> </ol> <p>The <dfn id="flush-and-enqueue">flush and enqueue</dfn> algorithm, which handles the end of data from the input <code class="idl"><a href="https://streams.spec.whatwg.org/#readablestream" id="e42e67351">ReadableStream</a></code> object, given a <code class="idl"><a href="#textdecoderstream">TextDecoderStream</a></code> object <var>decoder</var>, runs these steps: </p> <ol> <li> <p>Let <var>output</var> be the <a href="#concept-stream">I/O queue</a> of scalar values « <a href="#end-of-stream">end-of-queue</a> ». </p> <li> <p>While true: </p> <ol> <li> <p>Let <var>item</var> be the result of <a href="#concept-stream-read">reading</a> from <var>decoder</var>’s <a href="#textdecodercommon-i-o-queue">I/O queue</a>. </p> <li> <p>Let <var>result</var> be the result of <a href="#concept-encoding-process">processing an item</a> with <var>item</var>, <var>decoder</var>’s <a href="#textdecodercommon-decoder">decoder</a>, <var>decoder</var>’s <a href="#textdecodercommon-i-o-queue">I/O queue</a>, <var>output</var>, and <var>decoder</var>’s <a href="#textdecoder-error-mode">error mode</a>. </p> <li> <p>If <var>result</var> is <a href="#finished">finished</a>, then: </p> <ol> <li> <p>Let <var>outputChunk</var> be the result of running <a href="#concept-td-serialize">serialize I/O queue</a> with <var>decoder</var> and <var>output</var>. </p> <li> <p>If <var>outputChunk</var> is non-empty, then <a href="https://streams.spec.whatwg.org/#transformstream-enqueue" id="9c9f0e821">enqueue</a> <var>outputChunk</var> in <var>decoder</var>’s <a href="https://streams.spec.whatwg.org/#generictransformstream-transform" id="d582c3cd2">transform</a>. </p> <li> <p>Return. </p> </ol> <li> <p>Otherwise, if <var>result</var> is <a href="#error">error</a>, <a href="https://webidl.spec.whatwg.org/#dfn-throw" id="807892d47">throw</a> a <code class="idl"><a href="https://webidl.spec.whatwg.org/#exceptiondef-typeerror" id="20fb259a4">TypeError</a></code>. </p> </ol> </ol> <h3 class="heading settled" id="interface-textencoderstream"><span class="secno">7.6. </span><span class="content">Interface <code class="idl"><a href="#textencoderstream">TextEncoderStream</a></code></span></h3> <pre class="idl highlight def">[Exposed=*] interface <dfn class="idl-code" id="textencoderstream"><code>TextEncoderStream</code></dfn> { <a class="idl-code" href="#dom-textencoderstream">constructor</a>(); }; <a href="#textencoderstream">TextEncoderStream</a> includes <a href="#textencodercommon">TextEncoderCommon</a>; <a href="#textencoderstream">TextEncoderStream</a> includes <a href="https://streams.spec.whatwg.org/#generictransformstream" id="af5e2e851">GenericTransformStream</a>; </pre> <p>A <code class="idl"><a href="#textencoderstream">TextEncoderStream</a></code> object has an associated: </p> <dl> <dt><dfn id="textencoderstream-encoder">encoder</dfn> <dd>An <a href="#encoder">encoder</a> instance. <dt><dfn id="textencoderstream-pending-high-surrogate">pending high surrogate</dfn> <dd>Null or a <a href="https://infra.spec.whatwg.org/#surrogate" id="1be036c83">surrogate</a>, initially null. </dl> <p class="note no-backref" role="note">A <code class="idl"><a href="#textencoderstream">TextEncoderStream</a></code> object offers no <var>label</var> argument as it only supports <a href="#utf-8">UTF-8</a>. </p> <dl class="domintro"> <dt><code><var>encoder</var> = new <a class="idl-code" href="#dom-textencoderstream">TextEncoderStream()</a></code> <dd> <p>Returns a new <code class="idl"><a href="#textencoderstream">TextEncoderStream</a></code> object. </p> <dt><code><var>encoder</var> . <a class="idl-code" href="#dom-textencoder-encoding">encoding</a></code> <dd> <p>Returns "<code>utf-8</code>". </p> <dt><code><var>encoder</var> . <a class="idl-code" href="https://streams.spec.whatwg.org/#dom-generictransformstream-readable" id="271adbee3">readable</a></code> <dd> <p>Returns a <a href="https://streams.spec.whatwg.org/#readable-stream" id="02cae24a1">readable stream</a> whose <a href="https://streams.spec.whatwg.org/#chunk" id="a63869ef1">chunks</a> are <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-Uint8Array" id="eddeeee94">Uint8Array</a></code>s resulting from running <a href="#utf-8">UTF-8</a>’s <a href="#encoder">encoder</a> on the chunks written to <code class="idl"><a href="https://streams.spec.whatwg.org/#dom-generictransformstream-writable" id="92533bb33">writable</a></code>. </p> <dt><code><var>encoder</var> . <a class="idl-code" href="https://streams.spec.whatwg.org/#dom-generictransformstream-writable" id="92533bb34">writable</a></code> <dd> <p>Returns a <a href="https://streams.spec.whatwg.org/#writable-stream" id="74cfb0751">writable stream</a> which accepts string chunks and runs them through <a href="#utf-8">UTF-8</a>’s <a href="#encoder">encoder</a> before making them available to <code class="idl"><a href="https://streams.spec.whatwg.org/#dom-generictransformstream-readable" id="271adbee4">readable</a></code>. </p> <p>Typically this will be used via the <code class="idl"><a href="https://streams.spec.whatwg.org/#rs-pipe-through" id="3bf29ae91">pipeThrough()</a></code> method on a <code class="idl"><a href="https://streams.spec.whatwg.org/#readablestream" id="e42e67352">ReadableStream</a></code> source. </p> <pre class="example" id="example-textencoderstream-writable"><code class="lang-javascript">textReadable .pipeThrough(new TextEncoderStream()) .pipeTo(byteWritable);</code></pre> </dl> <p>The <dfn class="idl-code" id="dom-textencoderstream"><code>new TextEncoderStream()</code></dfn> constructor steps are: </p> <ol> <li> <p>Set <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef631">this</a>’s <a href="#textencoderstream-encoder">encoder</a> to an instance of the <a href="#utf-8-encoder">UTF-8 encoder</a>. </p> <li> <p>Let <var>transformAlgorithm</var> be an algorithm which takes a <var>chunk</var> argument and runs the <a href="#encode-and-enqueue-a-chunk">encode and enqueue a chunk</a> algorithm with <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef632">this</a> and <var>chunk</var>. </p> <li> <p>Let <var>flushAlgorithm</var> be an algorithm which runs the <a href="#encode-and-flush">encode and flush</a> algorithm with <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef633">this</a>. </p> <li> <p>Let <var>transformStream</var> be a <a href="https://webidl.spec.whatwg.org/#new" id="85cd0b721">new</a> <code class="idl"><a href="https://streams.spec.whatwg.org/#transformstream" id="a26122b11">TransformStream</a></code>. </p> <li> <p><a href="https://streams.spec.whatwg.org/#transformstream-set-up" id="4229e9a81">Set up</a> <var>transformStream</var> with <a href="https://streams.spec.whatwg.org/#transformstream-set-up-transformalgorithm" id="14e4f1491"><var>transformAlgorithm</var></a> set to <var>transformAlgorithm</var> and <a href="https://streams.spec.whatwg.org/#transformstream-set-up-flushalgorithm" id="a54978b31"><var>flushAlgorithm</var></a> set to <var>flushAlgorithm</var>. </p> <li> <p>Set <a href="https://webidl.spec.whatwg.org/#this" id="7bbe6ef634">this</a>’s <a href="https://streams.spec.whatwg.org/#generictransformstream-transform" id="d582c3cd3">transform</a> to <var>transformStream</var>. </p> </ol> <hr> <p>The <dfn id="encode-and-enqueue-a-chunk">encode and enqueue a chunk</dfn> algorithm, given a <code class="idl"><a href="#textencoderstream">TextEncoderStream</a></code> object <var>encoder</var> and <var>chunk</var>, runs these steps: </p> <ol> <li> <p>Let <var>input</var> be the result of <a href="https://webidl.spec.whatwg.org/#dfn-convert-ecmascript-to-idl-value" id="ce0b23861">converting</a> <var>chunk</var> to a <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-DOMString" id="162027794">DOMString</a></code>. </p> <li> <p><a href="#to-i-o-queue-convert">Convert</a> <var>input</var> to an <a href="#concept-stream">I/O queue</a> of <a href="https://infra.spec.whatwg.org/#code-unit" id="d0892c6f1">code units</a>. </p> <p class="note" role="note"><code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-DOMString" id="162027795">DOMString</a></code>, as well as an <a href="#concept-stream">I/O queue</a> of code units rather than scalar values, are used here so that a surrogate pair that is split between chunks can be reassembled into the appropriate scalar value. The behavior is otherwise identical to <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-USVString" id="63dcd5933">USVString</a></code>. In particular, lone surrogates will be replaced with U+FFFD. </p> <li> <p>Let <var>output</var> be the <a href="#concept-stream">I/O queue</a> of bytes « <a href="#end-of-stream">end-of-queue</a> ». </p> <li> <p>While true: </p> <ol> <li> <p>Let <var>item</var> be the result of <a href="#concept-stream-read">reading</a> from <var>input</var>. </p> <li> <p>If <var>item</var> is <a href="#end-of-stream">end-of-queue</a>, then: </p> <ol> <li> <p><a href="#from-i-o-queue-convert">Convert</a> <var>output</var> into a byte sequence. </p> <li> <p>If <var>output</var> is non-empty, then: </p> <ol> <li> <p>Let <var>chunk</var> be a <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-Uint8Array" id="eddeeee95">Uint8Array</a></code> object wrapping an <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-ArrayBuffer" id="6cfb0bb34">ArrayBuffer</a></code> containing <var>output</var>. </p> <li> <p><a href="https://streams.spec.whatwg.org/#transformstream-enqueue" id="9c9f0e822">Enqueue</a> <var>chunk</var> into <var>encoder</var>’s <a href="https://streams.spec.whatwg.org/#generictransformstream-transform" id="d582c3cd4">transform</a>. </p> </ol> <li> <p>Return. </p> </ol> <li> <p>Let <var>result</var> be the result of executing the <a href="#convert-code-unit-to-scalar-value">convert code unit to scalar value</a> algorithm with <var>encoder</var>, <var>item</var> and <var>input</var>. </p> <li> <p>If <var>result</var> is not <a href="#continue">continue</a>, then <a href="#concept-encoding-process">process an item</a> with <var>result</var>, <var>encoder</var>’s <a href="#textencoderstream-encoder">encoder</a>, <var>input</var>, <var>output</var>, and "<code>fatal</code>". </p> </ol> </ol> <p>The <dfn id="convert-code-unit-to-scalar-value">convert code unit to scalar value</dfn> algorithm, given a <code class="idl"><a href="#textencoderstream">TextEncoderStream</a></code> object <var>encoder</var>, a <a href="https://infra.spec.whatwg.org/#code-unit" id="d0892c6f2">code unit</a> <var>item</var>, and an <a href="#concept-stream">I/O queue</a> of code units <var>input</var>, runs these steps: </p> <ol> <li> <p>If <var>encoder</var>’s <a href="#textencoderstream-pending-high-surrogate">pending high surrogate</a> is non-null, then: </p> <ol> <li> <p>Let <var>high surrogate</var> be <var>encoder</var>’s <a href="#textencoderstream-pending-high-surrogate">pending high surrogate</a>. </p> <li> <p>Set <var>encoder</var>’s <a href="#textencoderstream-pending-high-surrogate">pending high surrogate</a> to null. </p> <li> <p>If <var>item</var> is in the range U+DC00 to U+DFFF, inclusive, then return a scalar value whose value is 0x10000 + ((<var>high surrogate</var> − 0xD800) << 10) + (<var>item</var> − 0xDC00). </p> <li> <p><a href="#concept-stream-prepend">Prepend</a> <var>item</var> to <var>input</var>. </p> <li> <p>Return U+FFFD. </p> </ol> <li> <p>If <var>item</var> is in the range U+D800 to U+DBFF, inclusive, then set <a href="#textencoderstream-pending-high-surrogate">pending high surrogate</a> to <var>item</var> and return <a href="#continue">continue</a>. </p> <li> <p>If <var>item</var> is in the range U+DC00 to U+DFFF, inclusive, then return U+FFFD. </p> <li> <p>Return <var>item</var>. </p> </ol> <p class="note" role="note">This is equivalent to the "<a href="https://infra.spec.whatwg.org/#javascript-string-convert" id="5dcd6b6c0">convert</a> a <a href="https://infra.spec.whatwg.org/#string" id="14014d7f2">string</a> into a <a href="https://infra.spec.whatwg.org/#scalar-value-string" id="9739dc530">scalar value string</a>" algorithm from the Infra Standard, but allows for surrogate pairs that are split between strings. <a href="#biblio-infra" title="Infra Standard">[INFRA]</a> </p> <p>The <dfn id="encode-and-flush">encode and flush</dfn> algorithm, given a <code class="idl"><a href="#textencoderstream">TextEncoderStream</a></code> object <var>encoder</var>, runs these steps: </p> <ol> <li> <p>If <var>encoder</var>’s <a href="#textencoderstream-pending-high-surrogate">pending high surrogate</a> is non-null, then: </p> <ol> <li> <p>Let <var>chunk</var> be a <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-Uint8Array" id="eddeeee96">Uint8Array</a></code> object wrapping an <code class="idl"><a href="https://webidl.spec.whatwg.org/#idl-ArrayBuffer" id="6cfb0bb35">ArrayBuffer</a></code> containing 0xEF 0xBF 0xBD. </p> <p class="note" role="note">This is U+FFFD (�) in <a href="#utf-8">UTF-8</a> bytes. </p> <li> <p><a href="https://streams.spec.whatwg.org/#transformstream-enqueue" id="9c9f0e823">Enqueue</a> <var>chunk</var> into <var>encoder</var>’s <a href="https://streams.spec.whatwg.org/#generictransformstream-transform" id="d582c3cd5">transform</a>. </p> </ol> </ol> <h2 class="heading settled" id="the-encoding"><span class="secno">8. </span><span class="content">The encoding</span></h2> <h3 class="heading settled" id="utf-8"><span class="secno">8.1. </span><span class="content">UTF-8</span></h3> <h4 class="heading settled" id="utf-8-decoder"><span class="secno">8.1.1. </span><span class="content">UTF-8 decoder</span></h4> <p class="note" role="note">A byte order mark has priority over a label as it has been found to be more accurate in deployed content. Therefore it is not part of the <a href="#utf-8-decoder">UTF-8 decoder</a> algorithm, but rather the <a href="#decode">decode</a> and <a href="#utf-8-decode">UTF-8 decode</a> algorithms. </p> <p><a href="#utf-8">UTF-8</a>’s <a href="#decoder">decoder</a> has an associated <dfn id="utf-8-code-point">UTF-8 code point</dfn>, <dfn id="utf-8-bytes-seen">UTF-8 bytes seen</dfn>, and <dfn id="utf-8-bytes-needed">UTF-8 bytes needed</dfn> (all initially 0), a <dfn id="utf-8-lower-boundary">UTF-8 lower boundary</dfn> (initially 0x80), and a <dfn id="utf-8-upper-boundary">UTF-8 upper boundary</dfn> (initially 0xBF). </p> <p><a href="#utf-8">UTF-8</a>’s <a href="#decoder">decoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>byte</var>, runs these steps: </p> <ol> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a> and <a href="#utf-8-bytes-needed">UTF-8 bytes needed</a> is not 0, set <a href="#utf-8-bytes-needed">UTF-8 bytes needed</a> to 0 and return <a href="#error">error</a>. </p> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a>, return <a href="#finished">finished</a>. </p> <li> <p>If <a href="#utf-8-bytes-needed">UTF-8 bytes needed</a> is 0, based on <var>byte</var>: </p> <dl class="switch"> <dt>0x00 to 0x7F <dd> <p>Return a code point whose value is <var>byte</var>. </p> <dt>0xC2 to 0xDF <dd> <ol> <li> <p>Set <a href="#utf-8-bytes-needed">UTF-8 bytes needed</a> to 1. </p> <li> <p>Set <a href="#utf-8-code-point">UTF-8 code point</a> to <var>byte</var> & 0x1F. </p> <p class="note" role="note">The five least significant bits of <var>byte</var>. </p> </ol> <dt>0xE0 to 0xEF <dd> <ol> <li> <p>If <var>byte</var> is 0xE0, set <a href="#utf-8-lower-boundary">UTF-8 lower boundary</a> to 0xA0. </p> <li> <p>If <var>byte</var> is 0xED, set <a href="#utf-8-upper-boundary">UTF-8 upper boundary</a> to 0x9F. </p> <li> <p>Set <a href="#utf-8-bytes-needed">UTF-8 bytes needed</a> to 2. </p> <li> <p>Set <a href="#utf-8-code-point">UTF-8 code point</a> to <var>byte</var> & 0xF. </p> <p class="note" role="note">The four least significant bits of <var>byte</var>. </p> </ol> <dt>0xF0 to 0xF4 <dd> <ol> <li> <p>If <var>byte</var> is 0xF0, set <a href="#utf-8-lower-boundary">UTF-8 lower boundary</a> to 0x90. </p> <li> <p>If <var>byte</var> is 0xF4, set <a href="#utf-8-upper-boundary">UTF-8 upper boundary</a> to 0x8F. </p> <li> <p>Set <a href="#utf-8-bytes-needed">UTF-8 bytes needed</a> to 3. </p> <li> <p>Set <a href="#utf-8-code-point">UTF-8 code point</a> to <var>byte</var> & 0x7. </p> <p class="note" role="note">The three least significant bits of <var>byte</var>. </p> </ol> <dt>Otherwise <dd> <p>Return <a href="#error">error</a>. </p> </dl> <p>Return <a href="#continue">continue</a>. </p> <li> <p>If <var>byte</var> is not in the range <a href="#utf-8-lower-boundary">UTF-8 lower boundary</a> to <a href="#utf-8-upper-boundary">UTF-8 upper boundary</a>, inclusive, then: </p> <ol> <li> <p>Set <a href="#utf-8-code-point">UTF-8 code point</a>, <a href="#utf-8-bytes-needed">UTF-8 bytes needed</a>, and <a href="#utf-8-bytes-seen">UTF-8 bytes seen</a> to 0, set <a href="#utf-8-lower-boundary">UTF-8 lower boundary</a> to 0x80, and set <a href="#utf-8-upper-boundary">UTF-8 upper boundary</a> to 0xBF. </p> <li> <p><a href="#concept-stream-prepend">Prepend</a> <var>byte</var> to <var>ioQueue</var>. </p> <li> <p>Return <a href="#error">error</a>. </p> </ol> <li> <p>Set <a href="#utf-8-lower-boundary">UTF-8 lower boundary</a> to 0x80 and <a href="#utf-8-upper-boundary">UTF-8 upper boundary</a> to 0xBF. </p> <li> <p>Set <a href="#utf-8-code-point">UTF-8 code point</a> to (<a href="#utf-8-code-point">UTF-8 code point</a> << 6) | (<var>byte</var> & 0x3F) </p> <p class="note no-backref" role="note">Shift the existing bits of <a href="#utf-8-code-point">UTF-8 code point</a> left by six places and set the newly-vacated six least significant bits to the six least significant bits of <var>byte</var>. </p> <li> <p>Increase <a href="#utf-8-bytes-seen">UTF-8 bytes seen</a> by one. </p> <li> <p>If <a href="#utf-8-bytes-seen">UTF-8 bytes seen</a> is not equal to <a href="#utf-8-bytes-needed">UTF-8 bytes needed</a>, return <a href="#continue">continue</a>. </p> <li> <p>Let <var>code point</var> be <a href="#utf-8-code-point">UTF-8 code point</a>. </p> <li> <p>Set <a href="#utf-8-code-point">UTF-8 code point</a>, <a href="#utf-8-bytes-needed">UTF-8 bytes needed</a>, and <a href="#utf-8-bytes-seen">UTF-8 bytes seen</a> to 0. </p> <li> <p>Return a code point whose value is <var>code point</var>. </p> </ol> <p class="note" role="note">The constraints in the <a href="#utf-8-decoder">UTF-8 decoder</a> above match “Best Practices for Using U+FFFD” from the Unicode standard. No other behavior is permitted per the Encoding Standard (other algorithms that achieve the same result are fine, even encouraged). <a href="#biblio-unicode" title="The Unicode Standard">[UNICODE]</a> </p> <h4 class="heading settled" id="utf-8-encoder"><span class="secno">8.1.2. </span><span class="content">UTF-8 encoder</span></h4> <p><a href="#utf-8">UTF-8</a>’s <a href="#encoder">encoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>code point</var>, runs these steps: </p> <ol> <li> <p>If <var>code point</var> is <a href="#end-of-stream">end-of-queue</a>, return <a href="#finished">finished</a>. </p> <li> <p>If <var>code point</var> is an <a href="https://infra.spec.whatwg.org/#ascii-code-point" id="48a42c941">ASCII code point</a>, return a byte whose value is <var>code point</var>. </p> <li> <p>Set <var>count</var> and <var>offset</var> based on the range <var>code point</var> is in: </p> <dl class="switch"> <dt>U+0080 to U+07FF, inclusive <dd>1 and 0xC0 <dt>U+0800 to U+FFFF, inclusive <dd>2 and 0xE0 <dt>U+10000 to U+10FFFF, inclusive <dd>3 and 0xF0 </dl> <li> <p>Let <var>bytes</var> be a byte sequence whose first byte is (<var>code point</var> >> (6 × <var>count</var>)) + <var>offset</var>. </p> <li> <p>While <var>count</var> is greater than 0: </p> <ol> <li> <p>Set <var>temp</var> to <var>code point</var> >> (6 × (<var>count</var> − 1)). </p> <li> <p>Append to <var>bytes</var> 0x80 | (<var>temp</var> & 0x3F). </p> <li> <p>Decrease <var>count</var> by one. </p> </ol> <li> <p>Return bytes <var>bytes</var>, in order. </p> </ol> <p class="note" role="note">This algorithm has identical results to the one described in the Unicode standard. It is included here for completeness. <a href="#biblio-unicode" title="The Unicode Standard">[UNICODE]</a> </p> <h2 class="heading settled" id="legacy-single-byte-encodings"><span class="secno">9. </span><span class="content">Legacy single-byte encodings</span></h2> <p>An <a href="#encoding">encoding</a> where each byte is either a single code point or nothing, is a <dfn id="single-byte-encoding">single-byte encoding</dfn>. <a href="#single-byte-encoding">Single-byte encodings</a> share the <a href="#decoder">decoder</a> and <a href="#encoder">encoder</a>. <dfn id="index-single-byte">Index single-byte</dfn>, as referenced by the <a href="#single-byte-decoder">single-byte decoder</a> and <a href="#single-byte-encoder">single-byte encoder</a>, is defined by the following table, and depends on the <a href="#single-byte-encoding">single-byte encoding</a> in use. All but two <a href="#single-byte-encoding">single-byte encodings</a> have a unique <a href="#index">index</a>. </p> <table> <tbody> <tr> <td><dfn id="ibm866">IBM866</dfn> <td><a href="index-ibm866.txt">index-ibm866.txt</a> <td><a href="ibm866.html">index IBM866 visualization</a> <td><a href="ibm866-bmp.html">index IBM866 BMP coverage</a> <tr> <td><dfn id="iso-8859-2">ISO-8859-2</dfn> <td><a href="index-iso-8859-2.txt">index-iso-8859-2.txt</a> <td><a href="iso-8859-2.html">index ISO-8859-2 visualization</a> <td><a href="iso-8859-2-bmp.html">index ISO-8859-2 BMP coverage</a> <tr> <td><dfn id="iso-8859-3">ISO-8859-3</dfn> <td><a href="index-iso-8859-3.txt">index-iso-8859-3.txt</a> <td><a href="iso-8859-3.html">index ISO-8859-3 visualization</a> <td><a href="iso-8859-3-bmp.html">index ISO-8859-3 BMP coverage</a> <tr> <td><dfn id="iso-8859-4">ISO-8859-4</dfn> <td><a href="index-iso-8859-4.txt">index-iso-8859-4.txt</a> <td><a href="iso-8859-4.html">index ISO-8859-4 visualization</a> <td><a href="iso-8859-4-bmp.html">index ISO-8859-4 BMP coverage</a> <tr> <td><dfn id="iso-8859-5">ISO-8859-5</dfn> <td><a href="index-iso-8859-5.txt">index-iso-8859-5.txt</a> <td><a href="iso-8859-5.html">index ISO-8859-5 visualization</a> <td><a href="iso-8859-5-bmp.html">index ISO-8859-5 BMP coverage</a> <tr> <td><dfn id="iso-8859-6">ISO-8859-6</dfn> <td><a href="index-iso-8859-6.txt">index-iso-8859-6.txt</a> <td><a href="iso-8859-6.html">index ISO-8859-6 visualization</a> <td><a href="iso-8859-6-bmp.html">index ISO-8859-6 BMP coverage</a> <tr> <td><dfn id="iso-8859-7">ISO-8859-7</dfn> <td><a href="index-iso-8859-7.txt">index-iso-8859-7.txt</a> <td><a href="iso-8859-7.html">index ISO-8859-7 visualization</a> <td><a href="iso-8859-7-bmp.html">index ISO-8859-7 BMP coverage</a> <tr> <td><dfn id="iso-8859-8">ISO-8859-8</dfn> <td rowspan="2"><a href="index-iso-8859-8.txt">index-iso-8859-8.txt</a> <td rowspan="2"><a href="iso-8859-8.html">index ISO-8859-8 visualization</a> <td rowspan="2"><a href="iso-8859-8-bmp.html">index ISO-8859-8 BMP coverage</a> <tr> <td><dfn id="iso-8859-8-i">ISO-8859-8-I</dfn> <tr> <td><dfn id="iso-8859-10">ISO-8859-10</dfn> <td><a href="index-iso-8859-10.txt">index-iso-8859-10.txt</a> <td><a href="iso-8859-10.html">index ISO-8859-10 visualization</a> <td><a href="iso-8859-10-bmp.html">index ISO-8859-10 BMP coverage</a> <tr> <td><dfn id="iso-8859-13">ISO-8859-13</dfn> <td><a href="index-iso-8859-13.txt">index-iso-8859-13.txt</a> <td><a href="iso-8859-13.html">index ISO-8859-13 visualization</a> <td><a href="iso-8859-13-bmp.html">index ISO-8859-13 BMP coverage</a> <tr> <td><dfn id="iso-8859-14">ISO-8859-14</dfn> <td><a href="index-iso-8859-14.txt">index-iso-8859-14.txt</a> <td><a href="iso-8859-14.html">index ISO-8859-14 visualization</a> <td><a href="iso-8859-14-bmp.html">index ISO-8859-14 BMP coverage</a> <tr> <td><dfn id="iso-8859-15">ISO-8859-15</dfn> <td><a href="index-iso-8859-15.txt">index-iso-8859-15.txt</a> <td><a href="iso-8859-15.html">index ISO-8859-15 visualization</a> <td><a href="iso-8859-15-bmp.html">index ISO-8859-15 BMP coverage</a> <tr> <td><dfn id="iso-8859-16">ISO-8859-16</dfn> <td><a href="index-iso-8859-16.txt">index-iso-8859-16.txt</a> <td><a href="iso-8859-16.html">index ISO-8859-16 visualization</a> <td><a href="iso-8859-16-bmp.html">index ISO-8859-16 BMP coverage</a> <tr> <td><dfn id="koi8-r">KOI8-R</dfn> <td><a href="index-koi8-r.txt">index-koi8-r.txt</a> <td><a href="koi8-r.html">index KOI8-R visualization</a> <td><a href="koi8-r-bmp.html">index KOI8-R BMP coverage</a> <tr> <td><dfn id="koi8-u">KOI8-U</dfn> <td><a href="index-koi8-u.txt">index-koi8-u.txt</a> <td><a href="koi8-u.html">index KOI8-U visualization</a> <td><a href="koi8-u-bmp.html">index KOI8-U BMP coverage</a> <tr> <td><dfn id="macintosh">macintosh</dfn> <td><a href="index-macintosh.txt">index-macintosh.txt</a> <td><a href="macintosh.html">index macintosh visualization</a> <td><a href="macintosh-bmp.html">index macintosh BMP coverage</a> <tr> <td><dfn id="windows-874">windows-874</dfn> <td><a href="index-windows-874.txt">index-windows-874.txt</a> <td><a href="windows-874.html">index windows-874 visualization</a> <td><a href="windows-874-bmp.html">index windows-874 BMP coverage</a> <tr> <td><dfn id="windows-1250">windows-1250</dfn> <td><a href="index-windows-1250.txt">index-windows-1250.txt</a> <td><a href="windows-1250.html">index windows-1250 visualization</a> <td><a href="windows-1250-bmp.html">index windows-1250 BMP coverage</a> <tr> <td><dfn id="windows-1251">windows-1251</dfn> <td><a href="index-windows-1251.txt">index-windows-1251.txt</a> <td><a href="windows-1251.html">index windows-1251 visualization</a> <td><a href="windows-1251-bmp.html">index windows-1251 BMP coverage</a> <tr> <td><dfn id="windows-1252">windows-1252</dfn> <td><a href="index-windows-1252.txt">index-windows-1252.txt</a> <td><a href="windows-1252.html">index windows-1252 visualization</a> <td><a href="windows-1252-bmp.html">index windows-1252 BMP coverage</a> <tr> <td><dfn id="windows-1253">windows-1253</dfn> <td><a href="index-windows-1253.txt">index-windows-1253.txt</a> <td><a href="windows-1253.html">index windows-1253 visualization</a> <td><a href="windows-1253-bmp.html">index windows-1253 BMP coverage</a> <tr> <td><dfn id="windows-1254">windows-1254</dfn> <td><a href="index-windows-1254.txt">index-windows-1254.txt</a> <td><a href="windows-1254.html">index windows-1254 visualization</a> <td><a href="windows-1254-bmp.html">index windows-1254 BMP coverage</a> <tr> <td><dfn id="windows-1255">windows-1255</dfn> <td><a href="index-windows-1255.txt">index-windows-1255.txt</a> <td><a href="windows-1255.html">index windows-1255 visualization</a> <td><a href="windows-1255-bmp.html">index windows-1255 BMP coverage</a> <tr> <td><dfn id="windows-1256">windows-1256</dfn> <td><a href="index-windows-1256.txt">index-windows-1256.txt</a> <td><a href="windows-1256.html">index windows-1256 visualization</a> <td><a href="windows-1256-bmp.html">index windows-1256 BMP coverage</a> <tr> <td><dfn id="windows-1257">windows-1257</dfn> <td><a href="index-windows-1257.txt">index-windows-1257.txt</a> <td><a href="windows-1257.html">index windows-1257 visualization</a> <td><a href="windows-1257-bmp.html">index windows-1257 BMP coverage</a> <tr> <td><dfn id="windows-1258">windows-1258</dfn> <td><a href="index-windows-1258.txt">index-windows-1258.txt</a> <td><a href="windows-1258.html">index windows-1258 visualization</a> <td><a href="windows-1258-bmp.html">index windows-1258 BMP coverage</a> <tr> <td><dfn id="x-mac-cyrillic">x-mac-cyrillic</dfn> <td><a href="index-x-mac-cyrillic.txt">index-x-mac-cyrillic.txt</a> <td><a href="x-mac-cyrillic.html">index x-mac-cyrillic visualization</a> <td><a href="x-mac-cyrillic-bmp.html">index x-mac-cyrillic BMP coverage</a> </table> <p class="note" role="note"><a href="#iso-8859-8">ISO-8859-8</a> and <a href="#iso-8859-8-i">ISO-8859-8-I</a> are distinct <a href="#encoding">encoding</a> <a href="#name">names</a>, because <a href="#iso-8859-8">ISO-8859-8</a> has influence on the layout direction. And although historically this might have been the case for <a href="#iso-8859-6">ISO-8859-6</a> and "ISO-8859-6-I" as well, that is no longer true. </p> <h3 class="heading settled" id="single-byte-decoder"><span class="secno">9.1. </span><span class="content">single-byte decoder</span></h3> <p><a href="#single-byte-encoding">Single-byte encodings</a>’s <a href="#decoder">decoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>byte</var>, runs these steps: </p> <ol> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a>, return <a href="#finished">finished</a>. </p> <li> <p>If <var>byte</var> is an <a href="https://infra.spec.whatwg.org/#ascii-byte" id="ad0a76921">ASCII byte</a>, return a code point whose value is <var>byte</var>. </p> <li> <p>Let <var>code point</var> be the <a href="#index-code-point">index code point</a> for <var>byte</var> − 0x80 in <a href="#index-single-byte">index single-byte</a>. </p> <li> <p>If <var>code point</var> is null, return <a href="#error">error</a>. </p> <li> <p>Return a code point whose value is <var>code point</var>. </p> </ol> <h3 class="heading settled" id="single-byte-encoder"><span class="secno">9.2. </span><span class="content">single-byte encoder</span></h3> <p><a href="#single-byte-encoding">Single-byte encodings</a>’s <a href="#encoder">encoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>code point</var>, runs these steps: </p> <ol> <li> <p>If <var>code point</var> is <a href="#end-of-stream">end-of-queue</a>, return <a href="#finished">finished</a>. </p> <li> <p>If <var>code point</var> is an <a href="https://infra.spec.whatwg.org/#ascii-code-point" id="48a42c942">ASCII code point</a>, return a byte whose value is <var>code point</var>. </p> <li> <p>Let <var>pointer</var> be the <a href="#index-pointer">index pointer</a> for <var>code point</var> in <a href="#index-single-byte">index single-byte</a>. </p> <li> <p>If <var>pointer</var> is null, return <a href="#error">error</a> with <var>code point</var>. </p> <li> <p>Return a byte whose value is <var>pointer</var> + 0x80. </p> </ol> <h2 class="heading settled" id="legacy-multi-byte-chinese-(simplified)-encodings"><span class="secno">10. </span><span class="content">Legacy multi-byte Chinese (simplified) encodings</span></h2> <h3 class="heading settled" id="gbk"><span class="secno">10.1. </span><span class="content">GBK</span></h3> <h4 class="heading settled" id="gbk-decoder"><span class="secno">10.1.1. </span><span class="content">GBK decoder</span></h4> <p><a href="#gbk">GBK</a>’s <a href="#decoder">decoder</a> is <a href="#gb18030">gb18030</a>’s <a href="#decoder">decoder</a>. </p> <h4 class="heading settled" id="gbk-encoder"><span class="secno">10.1.2. </span><span class="content">GBK encoder</span></h4> <p><a href="#gbk">GBK</a>’s <a href="#encoder">encoder</a> is <a href="#gb18030">gb18030</a>’s <a href="#encoder">encoder</a> with its <a href="#gbk-flag">is GBK</a> set to true. </p> <p class="note no-backref" role="note">Not fully aliasing <a href="#gbk">GBK</a> with <a href="#gb18030">gb18030</a> is a conservative move to decrease the chances of breaking legacy servers and other consumers of content generated with <a href="#gbk">GBK</a>’s <a href="#encoder">encoder</a>. </p> <h3 class="heading settled" id="gb18030"><span class="secno">10.2. </span><span class="content">gb18030</span></h3> <h4 class="heading settled" id="gb18030-decoder"><span class="secno">10.2.1. </span><span class="content">gb18030 decoder</span></h4> <p><a href="#gb18030">gb18030</a>’s <a href="#decoder">decoder</a> has an associated <dfn id="gb18030-first">gb18030 first</dfn>, <dfn id="gb18030-second">gb18030 second</dfn>, and <dfn id="gb18030-third">gb18030 third</dfn> (all initially 0x00). </p> <p><a href="#gb18030">gb18030</a>’s <a href="#decoder">decoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>byte</var>, runs these steps: </p> <ol> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a> and <a href="#gb18030-first">gb18030 first</a>, <a href="#gb18030-second">gb18030 second</a>, and <a href="#gb18030-third">gb18030 third</a> are 0x00, return <a href="#finished">finished</a>. </p> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a>, and <a href="#gb18030-first">gb18030 first</a>, <a href="#gb18030-second">gb18030 second</a>, or <a href="#gb18030-third">gb18030 third</a> is not 0x00, set <a href="#gb18030-first">gb18030 first</a>, <a href="#gb18030-second">gb18030 second</a>, and <a href="#gb18030-third">gb18030 third</a> to 0x00, and return <a href="#error">error</a>. </p> <li> <p>If <a href="#gb18030-third">gb18030 third</a> is not 0x00, then: </p> <ol> <li> <p>If <var>byte</var> is not in the range 0x30 to 0x39, inclusive, then: </p> <ol> <li> <p><a href="#concept-stream-prepend">Prepend</a> <a href="#gb18030-second">gb18030 second</a>, <a href="#gb18030-third">gb18030 third</a>, and <var>byte</var> to <var>ioQueue</var>. </p> <li> <p>Set <a href="#gb18030-first">gb18030 first</a>, <a href="#gb18030-second">gb18030 second</a>, and <a href="#gb18030-third">gb18030 third</a> to 0x00. </p> <li> <p>Return <a href="#error">error</a>. </p> </ol> <li> <p>Let <var>code point</var> be the <a href="#index-gb18030-ranges-code-point">index gb18030 ranges code point</a> for ((<a href="#gb18030-first">gb18030 first</a> − 0x81) × (10 × 126 × 10)) + ((<a href="#gb18030-second">gb18030 second</a> − 0x30) × (10 × 126)) + ((<a href="#gb18030-third">gb18030 third</a> − 0x81) × 10) + <var>byte</var> − 0x30. </p> <li> <p>Set <a href="#gb18030-first">gb18030 first</a>, <a href="#gb18030-second">gb18030 second</a>, and <a href="#gb18030-third">gb18030 third</a> to 0x00. </p> <li> <p>If <var>code point</var> is null, return <a href="#error">error</a>. </p> <li> <p>Return a code point whose value is <var>code point</var>. </p> </ol> <li> <p>If <a href="#gb18030-second">gb18030 second</a> is not 0x00, then: </p> <ol> <li> <p>If <var>byte</var> is in the range 0x81 to 0xFE, inclusive, set <a href="#gb18030-third">gb18030 third</a> to <var>byte</var> and return <a href="#continue">continue</a>. </p> <li> <p><a href="#concept-stream-prepend">Prepend</a> <a href="#gb18030-second">gb18030 second</a> followed by <var>byte</var> to <var>ioQueue</var>, set <a href="#gb18030-first">gb18030 first</a> and <a href="#gb18030-second">gb18030 second</a> to 0x00, and return <a href="#error">error</a>. </p> </ol> <li> <p>If <a href="#gb18030-first">gb18030 first</a> is not 0x00, then: </p> <ol> <li> <p>If <var>byte</var> is in the range 0x30 to 0x39, inclusive, set <a href="#gb18030-second">gb18030 second</a> to <var>byte</var> and return <a href="#continue">continue</a>. </p> <li> <p>Let <var>lead</var> be <a href="#gb18030-first">gb18030 first</a>, let <var>pointer</var> be null, and set <a href="#gb18030-first">gb18030 first</a> to 0x00. </p> <li> <p>Let <var>offset</var> be 0x40 if <var>byte</var> is less than 0x7F, otherwise 0x41. </p> <li> <p>If <var>byte</var> is in the range 0x40 to 0x7E, inclusive, or 0x80 to 0xFE, inclusive, set <var>pointer</var> to (<var>lead</var> − 0x81) × 190 + (<var>byte</var> − <var>offset</var>). </p> <li> <p>Let <var>code point</var> be null if <var>pointer</var> is null, otherwise the <a href="#index-code-point">index code point</a> for <var>pointer</var> in <a href="#index-gb18030">index gb18030</a>. </p> <li> <p>If <var>code point</var> is non-null, return a code point whose value is <var>code point</var>. </p> <li> <p>If <var>byte</var> is an <a href="https://infra.spec.whatwg.org/#ascii-byte" id="ad0a76922">ASCII byte</a>, <a href="#concept-stream-prepend">prepend</a> <var>byte</var> to <var>ioQueue</var>. </p> <li> <p>Return <a href="#error">error</a>. </p> </ol> <li> <p>If <var>byte</var> is an <a href="https://infra.spec.whatwg.org/#ascii-byte" id="ad0a76923">ASCII byte</a>, return a code point whose value is <var>byte</var>. </p> <li> <p>If <var>byte</var> is 0x80, return code point U+20AC. </p> <li> <p>If <var>byte</var> is in the range 0x81 to 0xFE, inclusive, set <a href="#gb18030-first">gb18030 first</a> to <var>byte</var> and return <a href="#continue">continue</a>. </p> <li> <p>Return <a href="#error">error</a>. </p> </ol> <h4 class="heading settled" id="gb18030-encoder"><span class="secno">10.2.2. </span><span class="content">gb18030 encoder</span></h4> <p><a href="#gb18030">gb18030</a>’s <a href="#encoder">encoder</a> has an associated <dfn id="gbk-flag">is GBK</dfn> (initially false). </p> <p><a href="#gb18030">gb18030</a>’s <a href="#encoder">encoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>code point</var>, runs these steps: </p> <ol> <li> <p>If <var>code point</var> is <a href="#end-of-stream">end-of-queue</a>, return <a href="#finished">finished</a>. </p> <li> <p>If <var>code point</var> is an <a href="https://infra.spec.whatwg.org/#ascii-code-point" id="48a42c943">ASCII code point</a>, return a byte whose value is <var>code point</var>. </p> <li> <p>If <var>code point</var> is U+E5E5, return <a href="#error">error</a> with <var>code point</var>. </p> <p class="note" role="note"><a href="#index-gb18030">Index gb18030</a> maps 0xA3 0xA0 to U+3000 rather than U+E5E5 for compatibility with deployed content. Therefore it cannot roundtrip. </p> <li> <p>If <a href="#gbk-flag">is GBK</a> is true and <var>code point</var> is U+20AC, return byte 0x80. </p> <li> <p>Let <var>pointer</var> be the <a href="#index-pointer">index pointer</a> for <var>code point</var> in <a href="#index-gb18030">index gb18030</a>. </p> <li> <p>If <var>pointer</var> is non-null, then: </p> <ol> <li> <p>Let <var>lead</var> be <var>pointer</var> / 190 + 0x81. </p> <li> <p>Let <var>trail</var> be <var>pointer</var> % 190. </p> <li> <p>Let <var>offset</var> be 0x40 if <var>trail</var> is less than 0x3F, otherwise 0x41. </p> <li> <p>Return two bytes whose values are <var>lead</var> and <var>trail</var> + <var>offset</var>. </p> </ol> <li> <p>If <a href="#gbk-flag">is GBK</a> is true, return <a href="#error">error</a> with <var>code point</var>. </p> <li> <p>Set <var>pointer</var> to the <a href="#index-gb18030-ranges-pointer">index gb18030 ranges pointer</a> for <var>code point</var>. </p> <li> <p>Let <var>byte1</var> be <var>pointer</var> / (10 × 126 × 10). </p> <li> <p>Set <var>pointer</var> to <var>pointer</var> % (10 × 126 × 10). </p> <li> <p>Let <var>byte2</var> be <var>pointer</var> / (10 × 126). </p> <li> <p>Set <var>pointer</var> to <var>pointer</var> % (10 × 126). </p> <li> <p>Let <var>byte3</var> be <var>pointer</var> / 10. </p> <li> <p>Let <var>byte4</var> be <var>pointer</var> % 10. </p> <li> <p>Return four bytes whose values are <var>byte1</var> + 0x81, <var>byte2</var> + 0x30, <var>byte3</var> + 0x81, <var>byte4</var> + 0x30. </p> </ol> <h2 class="heading settled" id="legacy-multi-byte-chinese-(traditional)-encodings"><span class="secno">11. </span><span class="content">Legacy multi-byte Chinese (traditional) encodings</span></h2> <h3 class="heading settled" id="big5"><span class="secno">11.1. </span><span class="content">Big5</span></h3> <h4 class="heading settled" id="big5-decoder"><span class="secno">11.1.1. </span><span class="content">Big5 decoder</span></h4> <p><a href="#big5">Big5</a>’s <a href="#decoder">decoder</a> has an associated <dfn id="big5-lead">Big5 lead</dfn> (initially 0x00). </p> <p><a href="#big5">Big5</a>’s <a href="#decoder">decoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>byte</var>, runs these steps:</p> <ol> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a> and <a href="#big5-lead">Big5 lead</a> is not 0x00, set <a href="#big5-lead">Big5 lead</a> to 0x00 and return <a href="#error">error</a>. </p> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a> and <a href="#big5-lead">Big5 lead</a> is 0x00, return <a href="#finished">finished</a>. </p> <li> <p>If <a href="#big5-lead">Big5 lead</a> is not 0x00, let <var>lead</var> be <a href="#big5-lead">Big5 lead</a>, let <var>pointer</var> be null, set <a href="#big5-lead">Big5 lead</a> to 0x00, and then: </p> <ol> <li> <p>Let <var>offset</var> be 0x40 if <var>byte</var> is less than 0x7F, otherwise 0x62. </p> <li> <p>If <var>byte</var> is in the range 0x40 to 0x7E, inclusive, or 0xA1 to 0xFE, inclusive, set <var>pointer</var> to (<var>lead</var> − 0x81) × 157 + (<var>byte</var> − <var>offset</var>). </p> <li> <p>If there is a row in the table below whose first column is <var>pointer</var>, return the <em>two</em> code points listed in its second column (the third column is irrelevant): </p> <table> <tbody> <tr> <th>Pointer <th>Code points <th>Notes <tr> <td>1133 <td>U+00CA U+0304 <td>Ê̄ (LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND MACRON) <tr> <td>1135 <td>U+00CA U+030C <td>Ê̌ (LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND CARON) <tr> <td>1164 <td>U+00EA U+0304 <td>ê̄ (LATIN SMALL LETTER E WITH CIRCUMFLEX AND MACRON) <tr> <td>1166 <td>U+00EA U+030C <td>ê̌ (LATIN SMALL LETTER E WITH CIRCUMFLEX AND CARON) </table> <p class="note" role="note">Since <a href="#index">indexes</a> are limited to single code points this table is used for these pointers. </p> <li> <p>Let <var>code point</var> be null if <var>pointer</var> is null, otherwise the <a href="#index-code-point">index code point</a> for <var>pointer</var> in <a href="#index-big5">index Big5</a>. </p> <li> <p>If <var>code point</var> is non-null, return a code point whose value is <var>code point</var>. </p> <li> <p>If <var>byte</var> is an <a href="https://infra.spec.whatwg.org/#ascii-byte" id="ad0a76924">ASCII byte</a>, <a href="#concept-stream-prepend">prepend</a> <var>byte</var> to <var>ioQueue</var>. </p> <li> <p>Return <a href="#error">error</a>. </p> </ol> <li> <p>If <var>byte</var> is an <a href="https://infra.spec.whatwg.org/#ascii-byte" id="ad0a76925">ASCII byte</a>, return a code point whose value is <var>byte</var>. </p> <li> <p>If <var>byte</var> is in the range 0x81 to 0xFE, inclusive, set <a href="#big5-lead">Big5 lead</a> to <var>byte</var> and return <a href="#continue">continue</a>. </p> <li> <p>Return <a href="#error">error</a>. </p> </ol> <h4 class="heading settled" id="big5-encoder"><span class="secno">11.1.2. </span><span class="content">Big5 encoder</span></h4> <p><a href="#big5">Big5</a>’s <a href="#encoder">encoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>code point</var>, runs these steps: </p> <ol> <li> <p>If <var>code point</var> is <a href="#end-of-stream">end-of-queue</a>, return <a href="#finished">finished</a>. </p> <li> <p>If <var>code point</var> is an <a href="https://infra.spec.whatwg.org/#ascii-code-point" id="48a42c944">ASCII code point</a>, return a byte whose value is <var>code point</var>. </p> <li> <p>Let <var>pointer</var> be the <a href="#index-big5-pointer">index Big5 pointer</a> for <var>code point</var>. </p> <li> <p>If <var>pointer</var> is null, return <a href="#error">error</a> with <var>code point</var>. </p> <li> <p>Let <var>lead</var> be <var>pointer</var> / 157 + 0x81. </p> <li> <p>Let <var>trail</var> be <var>pointer</var> % 157. </p> <li> <p>Let <var>offset</var> be 0x40 if <var>trail</var> is less than 0x3F, otherwise 0x62. </p> <li> <p>Return two bytes whose values are <var>lead</var> and <var>trail</var> + <var>offset</var>. </p> </ol> <h2 class="heading settled" id="legacy-multi-byte-japanese-encodings"><span class="secno">12. </span><span class="content">Legacy multi-byte Japanese encodings</span></h2> <h3 class="heading settled" id="euc-jp"><span class="secno">12.1. </span><span class="content">EUC-JP</span></h3> <h4 class="heading settled" id="euc-jp-decoder"><span class="secno">12.1.1. </span><span class="content">EUC-JP decoder</span></h4> <p><a href="#euc-jp">EUC-JP</a>’s <a href="#decoder">decoder</a> has an associated <dfn id="euc-jp-jis0212-flag">EUC-JP jis0212</dfn> (initially false) and <dfn id="euc-jp-lead">EUC-JP lead</dfn> (initially 0x00). </p> <p><a href="#euc-jp">EUC-JP</a>’s <a href="#decoder">decoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>byte</var>, runs these steps: </p> <ol> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a> and <a href="#euc-jp-lead">EUC-JP lead</a> is not 0x00, set <a href="#euc-jp-lead">EUC-JP lead</a> to 0x00, and return <a href="#error">error</a>. </p> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a> and <a href="#euc-jp-lead">EUC-JP lead</a> is 0x00, return <a href="#finished">finished</a>. </p> <li> <p>If <a href="#euc-jp-lead">EUC-JP lead</a> is 0x8E and <var>byte</var> is in the range 0xA1 to 0xDF, inclusive, set <a href="#euc-jp-lead">EUC-JP lead</a> to 0x00 and return a code point whose value is 0xFF61 − 0xA1 + <var>byte</var>. </p> <li> <p>If <a href="#euc-jp-lead">EUC-JP lead</a> is 0x8F and <var>byte</var> is in the range 0xA1 to 0xFE, inclusive, set <a href="#euc-jp-jis0212-flag">EUC-JP jis0212</a> to true, set <a href="#euc-jp-lead">EUC-JP lead</a> to <var>byte</var>, and return <a href="#continue">continue</a>. </p> <li> <p>If <a href="#euc-jp-lead">EUC-JP lead</a> is not 0x00, let <var>lead</var> be <a href="#euc-jp-lead">EUC-JP lead</a>, set <a href="#euc-jp-lead">EUC-JP lead</a> to 0x00, and then: </p> <ol> <li> <p>Let <var>code point</var> be null. </p> <li> <p>If <var>lead</var> and <var>byte</var> are both in the range 0xA1 to 0xFE, inclusive, then set <var>code point</var> to the <a href="#index-code-point">index code point</a> for (<var>lead</var> − 0xA1) × 94 + <var>byte</var> − 0xA1 in <a href="#index-jis0208">index jis0208</a> if <a href="#euc-jp-jis0212-flag">EUC-JP jis0212</a> is false and in <a href="#index-jis0212">index jis0212</a> otherwise. </p> <li> <p>Set <a href="#euc-jp-jis0212-flag">EUC-JP jis0212</a> to false. </p> <li> <p>If <var>code point</var> is non-null, return a code point whose value is <var>code point</var>. </p> <li> <p>If <var>byte</var> is an <a href="https://infra.spec.whatwg.org/#ascii-byte" id="ad0a76926">ASCII byte</a>, <a href="#concept-stream-prepend">prepend</a> <var>byte</var> to <var>ioQueue</var>. </p> <li> <p>Return <a href="#error">error</a>. </p> </ol> <li> <p>If <var>byte</var> is an <a href="https://infra.spec.whatwg.org/#ascii-byte" id="ad0a76927">ASCII byte</a>, return a code point whose value is <var>byte</var>. </p> <li> <p>If <var>byte</var> is 0x8E, 0x8F, or in the range 0xA1 to 0xFE, inclusive, set <a href="#euc-jp-lead">EUC-JP lead</a> to <var>byte</var> and return <a href="#continue">continue</a>. </p> <li> <p>Return <a href="#error">error</a>. </p> </ol> <h4 class="heading settled" id="euc-jp-encoder"><span class="secno">12.1.2. </span><span class="content">EUC-JP encoder</span></h4> <p><a href="#euc-jp">EUC-JP</a>’s <a href="#encoder">encoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>code point</var>, runs these steps: </p> <ol> <li> <p>If <var>code point</var> is <a href="#end-of-stream">end-of-queue</a>, return <a href="#finished">finished</a>. </p> <li> <p>If <var>code point</var> is an <a href="https://infra.spec.whatwg.org/#ascii-code-point" id="48a42c945">ASCII code point</a>, return a byte whose value is <var>code point</var>. </p> <li> <p>If <var>code point</var> is U+00A5, return byte 0x5C. </p> <li> <p>If <var>code point</var> is U+203E, return byte 0x7E. </p> <li> <p>If <var>code point</var> is in the range U+FF61 to U+FF9F, inclusive, return two bytes whose values are 0x8E and <var>code point</var> − 0xFF61 + 0xA1. </p> <li> <p>If <var>code point</var> is U+2212, set it to U+FF0D. </p> <li> <p>Let <var>pointer</var> be the <a href="#index-pointer">index pointer</a> for <var>code point</var> in <a href="#index-jis0208">index jis0208</a>. </p> <p class="note" role="note">If <var>pointer</var> is non-null, it is less than 8836 due to the nature of <a href="#index-jis0208">index jis0208</a> and the <a href="#index-pointer">index pointer</a> operation. </p> <li> <p>If <var>pointer</var> is null, return <a href="#error">error</a> with <var>code point</var>. </p> <li> <p>Let <var>lead</var> be <var>pointer</var> / 94 + 0xA1. </p> <li> <p>Let <var>trail</var> be <var>pointer</var> % 94 + 0xA1. </p> <li> <p>Return two bytes whose values are <var>lead</var> and <var>trail</var>. </p> </ol> <h3 class="heading settled" id="iso-2022-jp"><span class="secno">12.2. </span><span class="content">ISO-2022-JP</span></h3> <h4 class="heading settled" id="iso-2022-jp-decoder"><span class="secno">12.2.1. </span><span class="content">ISO-2022-JP decoder</span></h4> <p><a href="#iso-2022-jp">ISO-2022-JP</a>’s <a href="#decoder">decoder</a> has an associated <dfn id="iso-2022-jp-decoder-state">ISO-2022-JP decoder state</dfn> (initially <a href="#iso-2022-jp-decoder-ascii">ASCII</a>), <dfn id="iso-2022-jp-decoder-output-state">ISO-2022-JP decoder output state</dfn> (initially <a href="#iso-2022-jp-decoder-ascii">ASCII</a>), <dfn id="iso-2022-jp-lead">ISO-2022-JP lead</dfn> (initially 0x00), and <dfn id="iso-2022-jp-output-flag">ISO-2022-JP output</dfn> (initially false). </p> <p><a href="#iso-2022-jp">ISO-2022-JP</a>’s <a href="#decoder">decoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>byte</var>, runs these steps, switching on <a href="#iso-2022-jp-decoder-state">ISO-2022-JP decoder state</a>: </p> <dl class="switch"> <dt><dfn id="iso-2022-jp-decoder-ascii">ASCII</dfn> <dd> <p>Based on <var>byte</var>: </p> <dl class="switch"> <dt>0x1B <dd> <p>Set <a href="#iso-2022-jp-decoder-state">ISO-2022-JP decoder state</a> to <a href="#iso-2022-jp-decoder-escape-start">escape start</a> and return <a href="#continue">continue</a>. </p> <dt>0x00 to 0x7F, excluding 0x0E, 0x0F, and 0x1B <dd> <p>Set <a href="#iso-2022-jp-output-flag">ISO-2022-JP output</a> to false and return a code point whose value is <var>byte</var>. </p> <dt><a href="#end-of-stream">end-of-queue</a> <dd> <p>Return <a href="#finished">finished</a>. </p> <dt>Otherwise <dd> <p>Set <a href="#iso-2022-jp-output-flag">ISO-2022-JP output</a> to false and return <a href="#error">error</a>. </p> </dl> <dt><dfn id="iso-2022-jp-decoder-roman">Roman</dfn> <dd> <p>Based on <var>byte</var>: </p> <dl class="switch"> <dt>0x1B <dd> <p>Set <a href="#iso-2022-jp-decoder-state">ISO-2022-JP decoder state</a> to <a href="#iso-2022-jp-decoder-escape-start">escape start</a> and return <a href="#continue">continue</a>. </p> <dt>0x5C <dd> <p>Set <a href="#iso-2022-jp-output-flag">ISO-2022-JP output</a> to false and return code point U+00A5. </p> <dt>0x7E <dd> <p>Set <a href="#iso-2022-jp-output-flag">ISO-2022-JP output</a> to false and return code point U+203E. </p> <dt>0x00 to 0x7F, excluding 0x0E, 0x0F, 0x1B, 0x5C, and 0x7E <dd> <p>Set <a href="#iso-2022-jp-output-flag">ISO-2022-JP output</a> to false and return a code point whose value is <var>byte</var>. </p> <dt><a href="#end-of-stream">end-of-queue</a> <dd> <p>Return <a href="#finished">finished</a>. </p> <dt>Otherwise <dd> <p>Set <a href="#iso-2022-jp-output-flag">ISO-2022-JP output</a> to false and return <a href="#error">error</a>. </p> </dl> <dt><dfn id="iso-2022-jp-decoder-katakana">katakana</dfn> <dd> <p>Based on <var>byte</var>: </p> <dl class="switch"> <dt>0x1B <dd> <p>Set <a href="#iso-2022-jp-decoder-state">ISO-2022-JP decoder state</a> to <a href="#iso-2022-jp-decoder-escape-start">escape start</a> and return <a href="#continue">continue</a>. </p> <dt>0x21 to 0x5F <dd> <p>Set <a href="#iso-2022-jp-output-flag">ISO-2022-JP output</a> to false and return a code point whose value is 0xFF61 − 0x21 + <var>byte</var>. </p> <dt><a href="#end-of-stream">end-of-queue</a> <dd> <p>Return <a href="#finished">finished</a>. </p> <dt>Otherwise <dd> <p>Set <a href="#iso-2022-jp-output-flag">ISO-2022-JP output</a> to false and return <a href="#error">error</a>. </p> </dl> <dt><dfn id="iso-2022-jp-decoder-lead-byte">Lead byte</dfn> <dd> <p>Based on <var>byte</var>: </p> <dl class="switch"> <dt>0x1B <dd> <p>Set <a href="#iso-2022-jp-decoder-state">ISO-2022-JP decoder state</a> to <a href="#iso-2022-jp-decoder-escape-start">escape start</a> and return <a href="#continue">continue</a>. </p> <dt>0x21 to 0x7E <dd> <p>Set <a href="#iso-2022-jp-output-flag">ISO-2022-JP output</a> to false, <a href="#iso-2022-jp-lead">ISO-2022-JP lead</a> to <var>byte</var>, <a href="#iso-2022-jp-decoder-state">ISO-2022-JP decoder state</a> to <a href="#iso-2022-jp-decoder-trail-byte">trail byte</a>, and return <a href="#continue">continue</a>. </p> <dt><a href="#end-of-stream">end-of-queue</a> <dd> <p>Return <a href="#finished">finished</a>. </p> <dt>Otherwise <dd> <p>Set <a href="#iso-2022-jp-output-flag">ISO-2022-JP output</a> to false and return <a href="#error">error</a>. </p> </dl> <dt><dfn id="iso-2022-jp-decoder-trail-byte">Trail byte</dfn> <dd> <p>Based on <var>byte</var>: </p> <dl class="switch"> <dt>0x1B <dd> <p>Set <a href="#iso-2022-jp-decoder-state">ISO-2022-JP decoder state</a> to <a href="#iso-2022-jp-decoder-escape-start">escape start</a> and return <a href="#error">error</a>. </p> <dt>0x21 to 0x7E <dd> <ol> <li> <p>Set the <a href="#iso-2022-jp-decoder-state">ISO-2022-JP decoder state</a> to <a href="#iso-2022-jp-decoder-lead-byte">lead byte</a>. </p> <li> <p>Let <var>pointer</var> be (<a href="#iso-2022-jp-lead">ISO-2022-JP lead</a> − 0x21) × 94 + <var>byte</var> − 0x21. </p> <li> <p>Let <var>code point</var> be the <a href="#index-code-point">index code point</a> for <var>pointer</var> in <a href="#index-jis0208">index jis0208</a>. </p> <li> <p>If <var>code point</var> is null, return <a href="#error">error</a>. </p> <li> <p>Return a code point whose value is <var>code point</var>. </p> </ol> <dt><a href="#end-of-stream">end-of-queue</a> <dd> <p>Set the <a href="#iso-2022-jp-decoder-state">ISO-2022-JP decoder state</a> to <a href="#iso-2022-jp-decoder-lead-byte">lead byte</a> and return <a href="#error">error</a>. </p> <dt>Otherwise <dd> <p>Set <a href="#iso-2022-jp-decoder-state">ISO-2022-JP decoder state</a> to <a href="#iso-2022-jp-decoder-lead-byte">lead byte</a> and return <a href="#error">error</a>. </p> </dl> <dt><dfn id="iso-2022-jp-decoder-escape-start">Escape start</dfn> <dd> <ol> <li> <p>If <var>byte</var> is either 0x24 or 0x28, set <a href="#iso-2022-jp-lead">ISO-2022-JP lead</a> to <var>byte</var>, <a href="#iso-2022-jp-decoder-state">ISO-2022-JP decoder state</a> to <a href="#iso-2022-jp-decoder-escape">escape</a>, and return <a href="#continue">continue</a>. </p> <li> <p>If <var>byte</var> is not <a href="#end-of-stream">end-of-queue</a>, then <a href="#concept-stream-prepend">prepend</a> <var>byte</var> to <var>ioQueue</var>. </p> <li> <p>Set <a href="#iso-2022-jp-output-flag">ISO-2022-JP output</a> to false, <a href="#iso-2022-jp-decoder-state">ISO-2022-JP decoder state</a> to <a href="#iso-2022-jp-decoder-output-state">ISO-2022-JP decoder output state</a>, and return <a href="#error">error</a>. </p> </ol> <dt><dfn id="iso-2022-jp-decoder-escape">Escape</dfn> <dd> <ol> <li> <p>Let <var>lead</var> be <a href="#iso-2022-jp-lead">ISO-2022-JP lead</a> and set <a href="#iso-2022-jp-lead">ISO-2022-JP lead</a> to 0x00. </p> <li> <p>Let <var>state</var> be null. </p> <li> <p>If <var>lead</var> is 0x28 and <var>byte</var> is 0x42, set <var>state</var> to <a href="#iso-2022-jp-decoder-ascii">ASCII</a>. </p> <li> <p>If <var>lead</var> is 0x28 and <var>byte</var> is 0x4A, set <var>state</var> to <a href="#iso-2022-jp-decoder-roman">Roman</a>. </p> <li> <p>If <var>lead</var> is 0x28 and <var>byte</var> is 0x49, set <var>state</var> to <a href="#iso-2022-jp-decoder-katakana">katakana</a>. </p> <li> <p>If <var>lead</var> is 0x24 and <var>byte</var> is either 0x40 or 0x42, set <var>state</var> to <a href="#iso-2022-jp-decoder-lead-byte">lead byte</a>. </p> <li> <p>If <var>state</var> is non-null, then: </p> <ol> <li> <p>Set <a href="#iso-2022-jp-decoder-state">ISO-2022-JP decoder state</a> and <a href="#iso-2022-jp-decoder-output-state">ISO-2022-JP decoder output state</a> to <var>state</var>. </p> <li> <p>Let <var>output</var> be the value of <a href="#iso-2022-jp-output-flag">ISO-2022-JP output</a>. </p> <li> <p>Set <a href="#iso-2022-jp-output-flag">ISO-2022-JP output</a> to true. </p> <li> <p>Return <a href="#continue">continue</a>, if <var>output</var> is false, and <a href="#error">error</a> otherwise. </p> </ol> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a>, then <a href="#concept-stream-prepend">prepend</a> <var>lead</var> to <var>ioQueue</var>. Otherwise, <a href="#concept-stream-prepend">prepend</a> <var>lead</var> and <var>byte</var> to <var>ioQueue</var>. </p> <li> <p>Set <a href="#iso-2022-jp-output-flag">ISO-2022-JP output</a> to false, <a href="#iso-2022-jp-decoder-state">ISO-2022-JP decoder state</a> to <a href="#iso-2022-jp-decoder-output-state">ISO-2022-JP decoder output state</a> and return <a href="#error">error</a>. </p> </ol> </dl> <h4 class="heading settled" id="iso-2022-jp-encoder"><span class="secno">12.2.2. </span><span class="content">ISO-2022-JP encoder</span></h4> <div class="note no-backref" role="note"> <p>The <a href="#iso-2022-jp-encoder">ISO-2022-JP encoder</a> is the only <a href="#encoder">encoder</a> for which the concatenation of multiple outputs can result in an <a href="#error">error</a> when run through the corresponding <a href="#decoder">decoder</a>. </p> <p class="example" id="example-iso-2022-jp-encoder-oddity">Encoding U+00A5 gives 0x1B 0x28 0x4A 0x5C 0x1B 0x28 0x42. Doing that twice, concatenating the results, and then decoding yields U+00A5 U+FFFD U+00A5. </p> </div> <p><a href="#iso-2022-jp">ISO-2022-JP</a>’s <a href="#encoder">encoder</a> has an associated <dfn id="iso-2022-jp-encoder-state">ISO-2022-JP encoder state</dfn> which is <dfn id="iso-2022-jp-encoder-ascii">ASCII</dfn>, <dfn id="iso-2022-jp-encoder-roman">Roman</dfn>, or <dfn id="iso-2022-jp-encoder-jis0208">jis0208</dfn> (initially <a href="#iso-2022-jp-encoder-ascii">ASCII</a>). </p> <p><a href="#iso-2022-jp">ISO-2022-JP</a>’s <a href="#encoder">encoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>code point</var>, runs these steps: </p> <ol> <li> <p>If <var>code point</var> is <a href="#end-of-stream">end-of-queue</a> and <a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a> is not <a href="#iso-2022-jp-encoder-ascii">ASCII</a>, set <a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a> to <a href="#iso-2022-jp-encoder-ascii">ASCII</a>, and return three bytes 0x1B 0x28 0x42. </p> <li> <p>If <var>code point</var> is <a href="#end-of-stream">end-of-queue</a> and <a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a> is <a href="#iso-2022-jp-encoder-ascii">ASCII</a>, return <a href="#finished">finished</a>. </p> <li> <p>If <a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a> is <a href="#iso-2022-jp-encoder-ascii">ASCII</a> or <a href="#iso-2022-jp-encoder-roman">Roman</a>, and <var>code point</var> is U+000E, U+000F, or U+001B, return <a href="#error">error</a> with U+FFFD. </p> <p class="note" role="note">This returns U+FFFD rather than <var>code point</var> to prevent attacks. </p> <li> <p>If <a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a> is <a href="#iso-2022-jp-encoder-ascii">ASCII</a> and <var>code point</var> is an <a href="https://infra.spec.whatwg.org/#ascii-code-point" id="48a42c946">ASCII code point</a>, return a byte whose value is <var>code point</var>. </p> <li> <p>If <a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a> is <a href="#iso-2022-jp-encoder-roman">Roman</a> and <var>code point</var> is an <a href="https://infra.spec.whatwg.org/#ascii-code-point" id="48a42c947">ASCII code point</a>, excluding U+005C and U+007E, or is U+00A5 or U+203E, then: </p> <ol> <li> <p>If <var>code point</var> is an <a href="https://infra.spec.whatwg.org/#ascii-code-point" id="48a42c948">ASCII code point</a>, return a byte whose value is <var>code point</var>. </p> <li> <p>If <var>code point</var> is U+00A5, return byte 0x5C. </p> <li> <p>If <var>code point</var> is U+203E, return byte 0x7E. </p> </ol> <li> <p>If <var>code point</var> is an <a href="https://infra.spec.whatwg.org/#ascii-code-point" id="48a42c949">ASCII code point</a>, and <a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a> is not <a href="#iso-2022-jp-encoder-ascii">ASCII</a>, <a href="#concept-stream-prepend">prepend</a> <var>code point</var> to <var>ioQueue</var>, set <a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a> to <a href="#iso-2022-jp-encoder-ascii">ASCII</a>, and return three bytes 0x1B 0x28 0x42. </p> <li> <p>If <var>code point</var> is either U+00A5 or U+203E, and <a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a> is not <a href="#iso-2022-jp-encoder-roman">Roman</a>, <a href="#concept-stream-prepend">prepend</a> <var>code point</var> to <var>ioQueue</var>, set <a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a> to <a href="#iso-2022-jp-encoder-roman">Roman</a>, and return three bytes 0x1B 0x28 0x4A. </p> <li> <p>If <var>code point</var> is U+2212, set it to U+FF0D. </p> <li> <p>If <var>code point</var> is in the range U+FF61 to U+FF9F, inclusive, set it to the <a href="#index-code-point">index code point</a> for <var>code point</var> − 0xFF61 in <a href="#index-iso-2022-jp-katakana">index ISO-2022-JP katakana</a>. </p> <li> <p>Let <var>pointer</var> be the <a href="#index-pointer">index pointer</a> for <var>code point</var> in <a href="#index-jis0208">index jis0208</a>. </p> <p class="note" role="note">If <var>pointer</var> is non-null, it is less than 8836 due to the nature of <a href="#index-jis0208">index jis0208</a> and the <a href="#index-pointer">index pointer</a> operation. </p> <li> <p>If <var>pointer</var> is null, then: </p> <ol> <li> <p>If <a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a> is <a href="#iso-2022-jp-encoder-jis0208">jis0208</a>, then <a href="#concept-stream-prepend">prepend</a> <var>code point</var> to <var>ioQueue</var>, set <a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a> to <a href="#iso-2022-jp-encoder-ascii">ASCII</a>, and return three bytes 0x1B 0x28 0x42. </p> <li> <p>Return <a href="#error">error</a> with <var>code point</var>. </p> </ol> <li> <p>If <a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a> is not <a href="#iso-2022-jp-encoder-jis0208">jis0208</a>, <a href="#concept-stream-prepend">prepend</a> <var>code point</var> to <var>ioQueue</var>, set <a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a> to <a href="#iso-2022-jp-encoder-jis0208">jis0208</a>, and return three bytes 0x1B 0x24 0x42. </p> <li> <p>Let <var>lead</var> be <var>pointer</var> / 94 + 0x21. </p> <li> <p>Let <var>trail</var> be <var>pointer</var> % 94 + 0x21. </p> <li> <p>Return two bytes whose values are <var>lead</var> and <var>trail</var>. </p> </ol> <h3 class="heading settled" id="shift_jis"><span class="secno">12.3. </span><span class="content">Shift_JIS</span></h3> <h4 class="heading settled" id="shift_jis-decoder"><span class="secno">12.3.1. </span><span class="content">Shift_JIS decoder</span></h4> <p><a href="#shift_jis">Shift_JIS</a>’s <a href="#decoder">decoder</a> has an associated <dfn id="shift_jis-lead">Shift_JIS lead</dfn> (initially 0x00). </p> <p><a href="#shift_jis">Shift_JIS</a>’s <a href="#decoder">decoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>byte</var>, runs these steps: </p> <ol> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a> and <a href="#shift_jis-lead">Shift_JIS lead</a> is not 0x00, set <a href="#shift_jis-lead">Shift_JIS lead</a> to 0x00 and return <a href="#error">error</a>. </p> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a> and <a href="#shift_jis-lead">Shift_JIS lead</a> is 0x00, return <a href="#finished">finished</a>. </p> <li> <p>If <a href="#shift_jis-lead">Shift_JIS lead</a> is not 0x00, let <var>lead</var> be <a href="#shift_jis-lead">Shift_JIS lead</a>, let <var>pointer</var> be null, set <a href="#shift_jis-lead">Shift_JIS lead</a> to 0x00, and then: </p> <ol> <li> <p>Let <var>offset</var> be 0x40 if <var>byte</var> is less than 0x7F, otherwise 0x41. </p> <li> <p>Let <var>lead offset</var> be 0x81 if <var>lead</var> is less than 0xA0, otherwise 0xC1. </p> <li> <p>If <var>byte</var> is in the range 0x40 to 0x7E, inclusive, or 0x80 to 0xFC, inclusive, set <var>pointer</var> to (<var>lead</var> − <var>lead offset</var>) × 188 + <var>byte</var> − <var>offset</var>. </p> <li> <p>If <var>pointer</var> is in the range 8836 to 10715, inclusive, return a code point whose value is 0xE000 − 8836 + <var>pointer</var>. </p> <p class="note" role="note">This is interoperable legacy from Windows known as EUDC. </p> <li> <p>Let <var>code point</var> be null if <var>pointer</var> is null, otherwise the <a href="#index-code-point">index code point</a> for <var>pointer</var> in <a href="#index-jis0208">index jis0208</a>. </p> <li> <p>If <var>code point</var> is non-null, return a code point whose value is <var>code point</var>. </p> <li> <p>If <var>byte</var> is an <a href="https://infra.spec.whatwg.org/#ascii-byte" id="ad0a76928">ASCII byte</a>, <a href="#concept-stream-prepend">prepend</a> <var>byte</var> to <var>ioQueue</var>. </p> <li> <p>Return <a href="#error">error</a>. </p> </ol> <li> <p>If <var>byte</var> is an <a href="https://infra.spec.whatwg.org/#ascii-byte" id="ad0a76929">ASCII byte</a> or 0x80, return a code point whose value is <var>byte</var>. </p> <li> <p>If <var>byte</var> is in the range 0xA1 to 0xDF, inclusive, return a code point whose value is 0xFF61 − 0xA1 + <var>byte</var>. </p> <li> <p>If <var>byte</var> is in the range 0x81 to 0x9F, inclusive, or 0xE0 to 0xFC, inclusive, set <a href="#shift_jis-lead">Shift_JIS lead</a> to <var>byte</var> and return <a href="#continue">continue</a>. </p> <li> <p>Return <a href="#error">error</a>. </p> </ol> <h4 class="heading settled" id="shift_jis-encoder"><span class="secno">12.3.2. </span><span class="content">Shift_JIS encoder</span></h4> <p><a href="#shift_jis">Shift_JIS</a>’s <a href="#encoder">encoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>code point</var>, runs these steps: </p> <ol> <li> <p>If <var>code point</var> is <a href="#end-of-stream">end-of-queue</a>, return <a href="#finished">finished</a>. </p> <li> <p>If <var>code point</var> is an <a href="https://infra.spec.whatwg.org/#ascii-code-point" id="48a42c9410">ASCII code point</a> or U+0080, return a byte whose value is <var>code point</var>. </p> <li> <p>If <var>code point</var> is U+00A5, return byte 0x5C. </p> <li> <p>If <var>code point</var> is U+203E, return byte 0x7E. </p> <li> <p>If <var>code point</var> is in the range U+FF61 to U+FF9F, inclusive, return a byte whose value is <var>code point</var> − 0xFF61 + 0xA1. </p> <li> <p>If <var>code point</var> is U+2212, set it to U+FF0D. </p> <li> <p>Let <var>pointer</var> be the <a href="#index-shift_jis-pointer">index Shift_JIS pointer</a> for <var>code point</var>. </p> <li> <p>If <var>pointer</var> is null, return <a href="#error">error</a> with <var>code point</var>. </p> <li> <p>Let <var>lead</var> be <var>pointer</var> / 188. </p> <li> <p>Let <var>lead offset</var> be 0x81 if <var>lead</var> is less than 0x1F, otherwise 0xC1. </p> <li> <p>Let <var>trail</var> be <var>pointer</var> % 188. </p> <li> <p>Let <var>offset</var> be 0x40 if <var>trail</var> is less than 0x3F, otherwise 0x41. </p> <li> <p>Return two bytes whose values are <var>lead</var> + <var>lead offset</var> and <var>trail</var> + <var>offset</var>. </p> </ol> <h2 class="heading settled" id="legacy-multi-byte-korean-encodings"><span class="secno">13. </span><span class="content">Legacy multi-byte Korean encodings</span></h2> <h3 class="heading settled" id="euc-kr"><span class="secno">13.1. </span><span class="content">EUC-KR</span></h3> <h4 class="heading settled" id="euc-kr-decoder"><span class="secno">13.1.1. </span><span class="content">EUC-KR decoder</span></h4> <p><a href="#euc-kr">EUC-KR</a>’s <a href="#decoder">decoder</a> has an associated <dfn id="euc-kr-lead">EUC-KR lead</dfn> (initially 0x00). </p> <p><a href="#euc-kr">EUC-KR</a>’s <a href="#decoder">decoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>byte</var>, runs these steps: </p> <ol> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a> and <a href="#euc-kr-lead">EUC-KR lead</a> is not 0x00, set <a href="#euc-kr-lead">EUC-KR lead</a> to 0x00 and return <a href="#error">error</a>. </p> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a> and <a href="#euc-kr-lead">EUC-KR lead</a> is 0x00, return <a href="#finished">finished</a>. </p> <li> <p>If <a href="#euc-kr-lead">EUC-KR lead</a> is not 0x00, let <var>lead</var> be <a href="#euc-kr-lead">EUC-KR lead</a>, let <var>pointer</var> be null, set <a href="#euc-kr-lead">EUC-KR lead</a> to 0x00, and then: </p> <ol> <li> <p>If <var>byte</var> is in the range 0x41 to 0xFE, inclusive, set <var>pointer</var> to (<var>lead</var> − 0x81) × 190 + (<var>byte</var> − 0x41). </p> <li> <p>Let <var>code point</var> be null if <var>pointer</var> is null, otherwise the <a href="#index-code-point">index code point</a> for <var>pointer</var> in <a href="#index-euc-kr">index EUC-KR</a>. </p> <li> <p>If <var>code point</var> is non-null, return a code point whose value is <var>code point</var>. </p> <li> <p>If <var>byte</var> is an <a href="https://infra.spec.whatwg.org/#ascii-byte" id="ad0a769210">ASCII byte</a>, <a href="#concept-stream-prepend">prepend</a> <var>byte</var> to <var>ioQueue</var>. </p> <li> <p>Return <a href="#error">error</a>. </p> </ol> <li> <p>If <var>byte</var> is an <a href="https://infra.spec.whatwg.org/#ascii-byte" id="ad0a769211">ASCII byte</a>, return a code point whose value is <var>byte</var>. </p> <li> <p>If <var>byte</var> is in the range 0x81 to 0xFE, inclusive, set <a href="#euc-kr-lead">EUC-KR lead</a> to <var>byte</var> and return <a href="#continue">continue</a>. </p> <li> <p>Return <a href="#error">error</a>. </p> </ol> <h4 class="heading settled" id="euc-kr-encoder"><span class="secno">13.1.2. </span><span class="content">EUC-KR encoder</span></h4> <p><a href="#euc-kr">EUC-KR</a>’s <a href="#encoder">encoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>code point</var>, runs these steps: </p> <ol> <li> <p>If <var>code point</var> is <a href="#end-of-stream">end-of-queue</a>, return <a href="#finished">finished</a>. </p> <li> <p>If <var>code point</var> is an <a href="https://infra.spec.whatwg.org/#ascii-code-point" id="48a42c9411">ASCII code point</a>, return a byte whose value is <var>code point</var>. </p> <li> <p>Let <var>pointer</var> be the <a href="#index-pointer">index pointer</a> for <var>code point</var> in <a href="#index-euc-kr">index EUC-KR</a>. </p> <li> <p>If <var>pointer</var> is null, return <a href="#error">error</a> with <var>code point</var>. </p> <li> <p>Let <var>lead</var> be <var>pointer</var> / 190 + 0x81. </p> <li> <p>Let <var>trail</var> be <var>pointer</var> % 190 + 0x41. </p> <li> <p>Return two bytes whose values are <var>lead</var> and <var>trail</var>. </p> </ol> <h2 class="heading settled" id="legacy-miscellaneous-encodings"><span class="secno">14. </span><span class="content">Legacy miscellaneous encodings</span></h2> <h3 class="heading settled" id="replacement"><span class="secno">14.1. </span><span class="content">replacement</span></h3> <p class="note" role="note">The <a href="#replacement">replacement</a> <a href="#encoding">encoding</a> exists to prevent certain attacks that abuse a mismatch between <a href="#encoding">encodings</a> supported on the server and the client. </p> <h4 class="heading settled" id="replacement-decoder"><span class="secno">14.1.1. </span><span class="content">replacement decoder</span></h4> <p><a href="#replacement">replacement</a>’s <a href="#decoder">decoder</a> has an associated <dfn id="replacement-error-returned-flag">replacement error returned</dfn> (initially false). </p> <p><a href="#replacement">replacement</a>’s <a href="#decoder">decoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>byte</var>, runs these steps: </p> <ol> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a>, return <a href="#finished">finished</a>. </p> <li> <p>If <a href="#replacement-error-returned-flag">replacement error returned</a> is false, set <a href="#replacement-error-returned-flag">replacement error returned</a> to true and return <a href="#error">error</a>. </p> <li> <p>Return <a href="#finished">finished</a>. </p> </ol> <h3 class="heading settled" id="common-infrastructure-for-utf-16be-and-utf-16le"><span class="secno">14.2. </span><span class="content">Common infrastructure for <a href="#utf-16be-le">UTF-16BE/LE</a></span></h3> <p><dfn id="utf-16be-le">UTF-16BE/LE</dfn> is <a href="#utf-16be">UTF-16BE</a> or <a href="#utf-16le">UTF-16LE</a>. </p> <h4 class="heading settled" id="shared-utf-16-decoder"><span class="secno">14.2.1. </span><span class="content">shared UTF-16 decoder</span></h4> <p class="note" role="note">A byte order mark has priority over a label as it has been found to be more accurate in deployed content. Therefore it is not part of the <a href="#shared-utf-16-decoder">shared UTF-16 decoder</a> algorithm, but rather the <a href="#decode">decode</a> algorithm. </p> <p><a href="#shared-utf-16-decoder">shared UTF-16 decoder</a> has an associated <dfn id="utf-16-lead-byte">UTF-16 lead byte</dfn> and <dfn id="utf-16-lead-surrogate">UTF-16 lead surrogate</dfn> (both initially null), and <dfn id="utf-16be-decoder-flag">is UTF-16BE decoder</dfn> (initially false). </p> <p><a href="#shared-utf-16-decoder">shared UTF-16 decoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>byte</var>, runs these steps: </p> <ol> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a> and either <a href="#utf-16-lead-byte">UTF-16 lead byte</a> or <a href="#utf-16-lead-surrogate">UTF-16 lead surrogate</a> is non-null, set <a href="#utf-16-lead-byte">UTF-16 lead byte</a> and <a href="#utf-16-lead-surrogate">UTF-16 lead surrogate</a> to null, and return <a href="#error">error</a>. </p> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a> and <a href="#utf-16-lead-byte">UTF-16 lead byte</a> and <a href="#utf-16-lead-surrogate">UTF-16 lead surrogate</a> are null, return <a href="#finished">finished</a>. </p> <li> <p>If <a href="#utf-16-lead-byte">UTF-16 lead byte</a> is null, set <a href="#utf-16-lead-byte">UTF-16 lead byte</a> to <var>byte</var> and return <a href="#continue">continue</a>. </p> <li> <p>Let <var>code unit</var> be the result of: </p> <dl class="switch"> <dt><a href="#utf-16be-decoder-flag">is UTF-16BE decoder</a> is true <dd> <p>(<a href="#utf-16-lead-byte">UTF-16 lead byte</a> << 8) + <var>byte</var>. </p> <dt><a href="#utf-16be-decoder-flag">is UTF-16BE decoder</a> is false <dd> <p>(<var>byte</var> << 8) + <a href="#utf-16-lead-byte">UTF-16 lead byte</a>. </p> </dl> <p>Then set <a href="#utf-16-lead-byte">UTF-16 lead byte</a> to null. </p> <li> <p>If <a href="#utf-16-lead-surrogate">UTF-16 lead surrogate</a> is non-null, let <var>lead surrogate</var> be <a href="#utf-16-lead-surrogate">UTF-16 lead surrogate</a>, set <a href="#utf-16-lead-surrogate">UTF-16 lead surrogate</a> to null, and then: </p> <ol> <li> <p>If <var>code unit</var> is in the range U+DC00 to U+DFFF, inclusive, return a code point whose value is 0x10000 + ((<var>lead surrogate</var> − 0xD800) << 10) + (<var>code unit</var> − 0xDC00). </p> <li> <p>Let <var>byte1</var> be <var>code unit</var> >> 8. </p> <li> <p>Let <var>byte2</var> be <var>code unit</var> & 0x00FF. </p> <li> <p>Let <var>bytes</var> be two bytes whose values are <var>byte1</var> and <var>byte2</var>, if <a href="#utf-16be-decoder-flag">is UTF-16BE decoder</a> is true, and <var>byte2</var> and <var>byte1</var> otherwise. </p> <li> <p><a href="#concept-stream-prepend">Prepend</a> the <var>bytes</var> to <var>ioQueue</var> and return <a href="#error">error</a>. </p> </ol> <li> <p>If <var>code unit</var> is in the range U+D800 to U+DBFF, inclusive, set <a href="#utf-16-lead-surrogate">UTF-16 lead surrogate</a> to <var>code unit</var> and return <a href="#continue">continue</a>. </p> <li> <p>If <var>code unit</var> is in the range U+DC00 to U+DFFF, inclusive, return <a href="#error">error</a>. </p> <li> <p>Return code point <var>code unit</var>. </p> </ol> <h3 class="heading settled" id="utf-16be"><span class="secno">14.3. </span><span class="content">UTF-16BE</span></h3> <h4 class="heading settled" id="utf-16be-decoder"><span class="secno">14.3.1. </span><span class="content">UTF-16BE decoder</span></h4> <p><a href="#utf-16be">UTF-16BE</a>’s <a href="#decoder">decoder</a> is <a href="#shared-utf-16-decoder">shared UTF-16 decoder</a> with its <a href="#utf-16be-decoder-flag">is UTF-16BE decoder</a> set to true. </p> <h3 class="heading settled" id="utf-16le"><span class="secno">14.4. </span><span class="content">UTF-16LE</span></h3> <p class="note" role="note">"<code>utf-16</code>" is a <a href="#label">label</a> for <a href="#utf-16le">UTF-16LE</a> to deal with deployed content. </p> <h4 class="heading settled" id="utf-16le-decoder"><span class="secno">14.4.1. </span><span class="content">UTF-16LE decoder</span></h4> <p><a href="#utf-16le">UTF-16LE</a>’s <a href="#decoder">decoder</a> is <a href="#shared-utf-16-decoder">shared UTF-16 decoder</a>. </p> <h3 class="heading settled" id="x-user-defined"><span class="secno">14.5. </span><span class="content">x-user-defined</span></h3> <p class="note" role="note">While technically this is a <a href="#single-byte-encoding">single-byte encoding</a>, it is defined separately as it can be implemented algorithmically. </p> <h4 class="heading settled" id="x-user-defined-decoder"><span class="secno">14.5.1. </span><span class="content">x-user-defined decoder</span></h4> <p><a href="#x-user-defined">x-user-defined</a>’s <a href="#decoder">decoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>byte</var>, runs these steps: </p> <ol> <li> <p>If <var>byte</var> is <a href="#end-of-stream">end-of-queue</a>, return <a href="#finished">finished</a>. </p> <li> <p>If <var>byte</var> is an <a href="https://infra.spec.whatwg.org/#ascii-byte" id="ad0a769212">ASCII byte</a>, return a code point whose value is <var>byte</var>. </p> <li> <p>Return a code point whose value is 0xF780 + <var>byte</var> − 0x80. </p> </ol> <h4 class="heading settled" id="x-user-defined-encoder"><span class="secno">14.5.2. </span><span class="content">x-user-defined encoder</span></h4> <p><a href="#x-user-defined">x-user-defined</a>’s <a href="#encoder">encoder</a>’s <a href="#handler">handler</a>, given <var>ioQueue</var> and <var>code point</var>, runs these steps: </p> <ol> <li> <p>If <var>code point</var> is <a href="#end-of-stream">end-of-queue</a>, return <a href="#finished">finished</a>. </p> <li> <p>If <var>code point</var> is an <a href="https://infra.spec.whatwg.org/#ascii-code-point" id="48a42c9412">ASCII code point</a>, return a byte whose value is <var>code point</var>. </p> <li> <p>If <var>code point</var> is in the range U+F780 to U+F7FF, inclusive, return a byte whose value is <var>code point</var> − 0xF780 + 0x80. </p> <li> <p>Return <a href="#error">error</a> with <var>code point</var>. </p> </ol> <h2 class="heading settled" id="browser-ui"><span class="secno">15. </span><span class="content">Browser UI</span></h2> <p>Browsers are encouraged to not enable overriding the encoding of a resource. If such a feature is nonetheless present, browsers should not offer <a href="#utf-16be-le">UTF-16BE/LE</a> as an option, due to the aforementioned security issues. Browsers should also disable this feature if the resource was decoded using <a href="#utf-16be-le">UTF-16BE/LE</a>. </p> <h2 class="no-num heading settled" id="implementation-considerations"><span class="content">Implementation considerations</span></h2> <p>Instead of supporting <a href="#concept-stream">I/O queues</a> with arbitrary <a href="#concept-stream-prepend">prepend</a>, the <a href="#decoder">decoders</a> for <a href="#encoding">encodings</a> in this standard could be implemented with: </p> <ol> <li> <p>The ability to unread the current byte. </p> <li> <p>A single-byte buffer for <a href="#gb18030">gb18030</a> (an <a href="https://infra.spec.whatwg.org/#ascii-byte" id="ad0a769213">ASCII byte</a>) and <a href="#iso-2022-jp">ISO-2022-JP</a> (0x24 or 0x28). </p> <p class="example" id="example-gb18030-implementation-strategy">For <a href="#gb18030">gb18030</a> when hitting a bogus byte while <a href="#gb18030-third">gb18030 third</a> is not 0x00, <a href="#gb18030-second">gb18030 second</a> could be moved into the single-byte buffer to be returned next, and <a href="#gb18030-third">gb18030 third</a> would be the new <a href="#gb18030-first">gb18030 first</a>, checked for not being 0x00 after the single-byte buffer was returned and emptied. This is possible as the range for the first and third byte in <a href="#gb18030">gb18030</a> is identical. </p> </ol> <p>The <a href="#iso-2022-jp-encoder">ISO-2022-JP encoder</a> needs <a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a> as additional state, but other than that, none of the <a href="#encoder">encoders</a> for <a href="#encoding">encodings</a> in this standard require additional state or buffers. </p> <h2 class="no-num heading settled" id="acknowledgments"><span class="content">Acknowledgments</span></h2> <p>There have been a lot of people that have helped make encodings more interoperable over the years and thereby furthered the goals of this standard. Likewise many people have helped making this standard what it is today. </p> <p>With that, many thanks to Adam Rice, Alan Chaney, Alexander Shtuchkin, Allen Wirfs-Brock, Andreu Botella, Aneesh Agrawal, Arkadiusz Michalski, Asmus Freytag, Ben Noordhuis, Bnaya Peretz, Boris Zbarsky, Bruno Haible, Cameron McCormack, Charles McCathieNeville, Christopher Foo, CodifierNL, David Carlisle, Domenic Denicola, Dominique Hazaël-Massieux, Doug Ewell, Erik van der Poel, 譚永鋒 (Frank Yung-Fong Tang), Glenn Maynard, Gordon P. Hemsley, Henri Sivonen, Ian Hickson, J. King, James Graham, Jeffrey Yasskin, John Tamplin, Joshua Bell, 村井純 (Jun Murai), 신정식 (Jungshik Shin), Jxck, 강 성훈 (Kang Seonghoon), 川幡太一 (Kawabata Taichi), Ken Lunde, Ken Whistler, Kenneth Russell, 田村健人 (Kent Tamura), Leif Halvard Silli, Luke Wagner, Maciej Hirsz, Makoto Kato, Mark Callow, Mark Crispin, Mark Davis, Martin Dürst, Masatoshi Kimura, Mattias Buelens, Ms2ger, Nigel Megitt, Nigel Tao, Norbert Lindenberg, Øistein E. Andersen, Peter Krefting, Philip Jägenstedt, Philip Taylor, Richard Ishida, Robbert Broersma, Robert Mustacchi, Ryan Dahl, Sam Sneddon, Shawn Steele, Simon Montagu, Simon Pieters, Simon Sapin, Stephen Checkoway, 寺田健 (Takeshi Terada), Vyacheslav Matva, Wolf Lammen, and 成瀬ゆい (Yui Naruse) for being awesome. </p> <p>This standard is written by <a href="https://annevankesteren.nl/" lang="nl">Anne van Kesteren</a> (<a href="https://www.apple.com/">Apple</a>, <a href="mailto:annevk@annevk.nl">annevk@annevk.nl</a>). The <a href="#api">API</a> chapter was initially written by Joshua Bell (<a href="https://www.google.com/">Google</a>). </p> <h2 class="no-num heading settled" id="ipr"><span class="content">Intellectual property rights</span></h2> <p>Copyright © WHATWG (Apple, Google, Mozilla, Microsoft). This work is licensed under a <a href="https://creativecommons.org/licenses/by/4.0/" rel="license">Creative Commons Attribution 4.0 International License</a>. To the extent portions of it are incorporated into source code, such portions in the source code are licensed under the <a href="https://opensource.org/licenses/BSD-3-Clause" rel="license">BSD 3-Clause License</a> instead.</p> </main> <h2 class="no-num no-ref heading settled" id="section-index"><span class="content">Index</span></h2> <h3 class="no-num no-ref heading settled" id="index-defined-here"><span class="content">Terms defined by this specification</span></h3> <ul class="index"> <li><a href="#big5">Big5</a><span>, in § 11</span> <li><a href="#big5-decoder">Big5 decoder</a><span>, in § 11.1</span> <li><a href="#big5-encoder">Big5 encoder</a><span>, in § 11.1.1</span> <li><a href="#big5-lead">Big5 lead</a><span>, in § 11.1.1</span> <li><a href="#textdecoder-bom-seen-flag">BOM seen</a><span>, in § 7.1</span> <li><a href="#bom-sniff">BOM sniff</a><span>, in § 6.1</span> <li> constructor() <ul> <li><a href="#dom-textdecoder">constructor for TextDecoder</a><span>, in § 7.2</span> <li><a href="#dom-textdecoderstream">constructor for TextDecoderStream</a><span>, in § 7.5</span> <li><a href="#dom-textencoder">constructor for TextEncoder</a><span>, in § 7.4</span> <li><a href="#dom-textencoderstream">constructor for TextEncoderStream</a><span>, in § 7.6</span> </ul> <li> constructor(label) <ul> <li><a href="#dom-textdecoder">constructor for TextDecoder</a><span>, in § 7.2</span> <li><a href="#dom-textdecoderstream">constructor for TextDecoderStream</a><span>, in § 7.5</span> </ul> <li> constructor(label, options) <ul> <li><a href="#dom-textdecoder">constructor for TextDecoder</a><span>, in § 7.2</span> <li><a href="#dom-textdecoderstream">constructor for TextDecoderStream</a><span>, in § 7.5</span> </ul> <li><a href="#continue">continue</a><span>, in § 4.1</span> <li> convert <ul> <li><a href="#from-i-o-queue-convert">dfn for from I/O queue</a><span>, in § 3</span> <li><a href="#to-i-o-queue-convert">dfn for to I/O queue</a><span>, in § 3</span> </ul> <li><a href="#convert-code-unit-to-scalar-value">convert code unit to scalar value</a><span>, in § 7.6</span> <li><a href="#decode">decode</a><span>, in § 6.1</span> <li><a href="#dom-textdecoder-decode">decode()</a><span>, in § 7.2</span> <li><a href="#decode-and-enqueue-a-chunk">decode and enqueue a chunk</a><span>, in § 7.5</span> <li><a href="#dom-textdecoder-decode">decode(input)</a><span>, in § 7.2</span> <li><a href="#dom-textdecoder-decode">decode(input, options)</a><span>, in § 7.2</span> <li> decoder <ul> <li><a href="#decoder">definition of</a><span>, in § 4.1</span> <li><a href="#textdecodercommon-decoder">dfn for TextDecoderCommon</a><span>, in § 7.1</span> </ul> <li><a href="#textdecoder-do-not-flush-flag">do not flush</a><span>, in § 7.2</span> <li><a href="#encode">encode</a><span>, in § 6.1</span> <li><a href="#dom-textencoder-encode">encode()</a><span>, in § 7.4</span> <li><a href="#encode-and-enqueue-a-chunk">encode and enqueue a chunk</a><span>, in § 7.6</span> <li><a href="#encode-and-flush">encode and flush</a><span>, in § 7.6</span> <li><a href="#dom-textencoder-encode">encode(input)</a><span>, in § 7.4</span> <li><a href="#dom-textencoder-encodeinto">encodeInto(source, destination)</a><span>, in § 7.4</span> <li><a href="#encode-or-fail">encode or fail</a><span>, in § 6.1</span> <li> encoder <ul> <li><a href="#encoder">definition of</a><span>, in § 4.1</span> <li><a href="#textencoderstream-encoder">dfn for TextEncoderStream</a><span>, in § 7.6</span> </ul> <li> encoding <ul> <li><a href="#dom-textdecoder-encoding">attribute for TextDecoderCommon</a><span>, in § 7.1</span> <li><a href="#dom-textencoder-encoding">attribute for TextEncoderCommon</a><span>, in § 7.3</span> <li><a href="#encoding">definition of</a><span>, in § 4</span> <li><a href="#textdecoder-encoding">dfn for TextDecoderCommon</a><span>, in § 7.1</span> </ul> <li><a href="#end-of-stream">End-of-queue</a><span>, in § 3</span> <li><a href="#error">error</a><span>, in § 4.1</span> <li> error mode <ul> <li><a href="#error-mode">definition of</a><span>, in § 4.1</span> <li><a href="#textdecoder-error-mode">dfn for TextDecoderCommon</a><span>, in § 7.1</span> </ul> <li><a href="#euc-jp">EUC-JP</a><span>, in § 12</span> <li><a href="#euc-jp-decoder">EUC-JP decoder</a><span>, in § 12.1</span> <li><a href="#euc-jp-encoder">EUC-JP encoder</a><span>, in § 12.1.1</span> <li><a href="#euc-jp-jis0212-flag">EUC-JP jis0212</a><span>, in § 12.1.1</span> <li><a href="#euc-jp-lead">EUC-JP lead</a><span>, in § 12.1.1</span> <li><a href="#euc-kr">EUC-KR</a><span>, in § 13</span> <li><a href="#euc-kr-decoder">EUC-KR decoder</a><span>, in § 13.1</span> <li><a href="#euc-kr-encoder">EUC-KR encoder</a><span>, in § 13.1.1</span> <li><a href="#euc-kr-lead">EUC-KR lead</a><span>, in § 13.1.1</span> <li> fatal <ul> <li><a href="#dom-textdecoder-fatal">attribute for TextDecoderCommon</a><span>, in § 7.1</span> <li><a href="#dom-textdecoderoptions-fatal">dict-member for TextDecoderOptions</a><span>, in § 7.2</span> </ul> <li><a href="#finished">finished</a><span>, in § 4.1</span> <li><a href="#flush-and-enqueue">flush and enqueue</a><span>, in § 7.5</span> <li><a href="#gb18030">gb18030</a><span>, in § 10.1.2</span> <li><a href="#gb18030-decoder">gb18030 decoder</a><span>, in § 10.2</span> <li><a href="#gb18030-encoder">gb18030 encoder</a><span>, in § 10.2.1</span> <li><a href="#gb18030-first">gb18030 first</a><span>, in § 10.2.1</span> <li><a href="#gb18030-second">gb18030 second</a><span>, in § 10.2.1</span> <li><a href="#gb18030-third">gb18030 third</a><span>, in § 10.2.1</span> <li><a href="#gbk">GBK</a><span>, in § 10</span> <li><a href="#gbk-decoder">GBK decoder</a><span>, in § 10.1</span> <li><a href="#gbk-encoder">GBK encoder</a><span>, in § 10.1.1</span> <li><a href="#get-an-encoder">get an encoder</a><span>, in § 6.1</span> <li><a href="#concept-encoding-get">get an encoding</a><span>, in § 4.2</span> <li><a href="#get-an-output-encoding">get an output encoding</a><span>, in § 4.3</span> <li><a href="#get-an-encoder">getting an encoder</a><span>, in § 6.1</span> <li><a href="#concept-encoding-get">getting an encoding</a><span>, in § 4.2</span> <li><a href="#handler">handler</a><span>, in § 4.1</span> <li><a href="#ibm866">IBM866</a><span>, in § 9</span> <li><a href="#textdecoder-ignore-bom-flag">ignore BOM</a><span>, in § 7.1</span> <li> ignoreBOM <ul> <li><a href="#dom-textdecoder-ignorebom">attribute for TextDecoderCommon</a><span>, in § 7.1</span> <li><a href="#dom-textdecoderoptions-ignorebom">dict-member for TextDecoderOptions</a><span>, in § 7.2</span> </ul> <li><a href="#index">index</a><span>, in § 5</span> <li><a href="#index-big5">index Big5</a><span>, in § 5</span> <li><a href="#index-big5-pointer">index Big5 pointer</a><span>, in § 5</span> <li><a href="#index-code-point">index code point</a><span>, in § 5</span> <li><a href="#index-euc-kr">index EUC-KR</a><span>, in § 5</span> <li><a href="#index-gb18030">index gb18030</a><span>, in § 5</span> <li><a href="#index-gb18030-ranges">index gb18030 ranges</a><span>, in § 5</span> <li><a href="#index-gb18030-ranges-code-point">index gb18030 ranges code point</a><span>, in § 5</span> <li><a href="#index-gb18030-ranges-pointer">index gb18030 ranges pointer</a><span>, in § 5</span> <li><a href="#index-iso-2022-jp-katakana">index ISO-2022-JP katakana</a><span>, in § 5</span> <li><a href="#index-jis0208">index jis0208</a><span>, in § 5</span> <li><a href="#index-jis0212">index jis0212</a><span>, in § 5</span> <li><a href="#index-pointer">index pointer</a><span>, in § 5</span> <li><a href="#index-shift_jis-pointer">index Shift_JIS pointer</a><span>, in § 5</span> <li><a href="#index-single-byte">Index single-byte</a><span>, in § 9</span> <li> I/O queue <ul> <li><a href="#concept-stream">definition of</a><span>, in § 3</span> <li><a href="#textdecodercommon-i-o-queue">dfn for TextDecoderCommon</a><span>, in § 7.1</span> </ul> <li><a href="#gbk-flag">is GBK</a><span>, in § 10.2.2</span> <li><a href="#iso-2022-jp">ISO-2022-JP</a><span>, in § 12.1.2</span> <li><a href="#iso-2022-jp-decoder">ISO-2022-JP decoder</a><span>, in § 12.2</span> <li><a href="#iso-2022-jp-decoder-ascii">ISO-2022-JP decoder ASCII</a><span>, in § 12.2.1</span> <li><a href="#iso-2022-jp-decoder-escape">ISO-2022-JP decoder escape</a><span>, in § 12.2.1</span> <li><a href="#iso-2022-jp-decoder-escape-start">ISO-2022-JP decoder escape start</a><span>, in § 12.2.1</span> <li><a href="#iso-2022-jp-decoder-katakana">ISO-2022-JP decoder katakana</a><span>, in § 12.2.1</span> <li><a href="#iso-2022-jp-decoder-lead-byte">ISO-2022-JP decoder lead byte</a><span>, in § 12.2.1</span> <li><a href="#iso-2022-jp-decoder-output-state">ISO-2022-JP decoder output state</a><span>, in § 12.2.1</span> <li><a href="#iso-2022-jp-decoder-roman">ISO-2022-JP decoder Roman</a><span>, in § 12.2.1</span> <li><a href="#iso-2022-jp-decoder-state">ISO-2022-JP decoder state</a><span>, in § 12.2.1</span> <li><a href="#iso-2022-jp-decoder-trail-byte">ISO-2022-JP decoder trail byte</a><span>, in § 12.2.1</span> <li><a href="#iso-2022-jp-encoder">ISO-2022-JP encoder</a><span>, in § 12.2.1</span> <li><a href="#iso-2022-jp-encoder-ascii">ISO-2022-JP encoder ASCII</a><span>, in § 12.2.2</span> <li><a href="#iso-2022-jp-encoder-jis0208">ISO-2022-JP encoder jis0208</a><span>, in § 12.2.2</span> <li><a href="#iso-2022-jp-encoder-roman">ISO-2022-JP encoder Roman</a><span>, in § 12.2.2</span> <li><a href="#iso-2022-jp-encoder-state">ISO-2022-JP encoder state</a><span>, in § 12.2.2</span> <li><a href="#iso-2022-jp-lead">ISO-2022-JP lead</a><span>, in § 12.2.1</span> <li><a href="#iso-2022-jp-output-flag">ISO-2022-JP output</a><span>, in § 12.2.1</span> <li><a href="#iso-8859-10">ISO-8859-10</a><span>, in § 9</span> <li><a href="#iso-8859-13">ISO-8859-13</a><span>, in § 9</span> <li><a href="#iso-8859-14">ISO-8859-14</a><span>, in § 9</span> <li><a href="#iso-8859-15">ISO-8859-15</a><span>, in § 9</span> <li><a href="#iso-8859-16">ISO-8859-16</a><span>, in § 9</span> <li><a href="#iso-8859-2">ISO-8859-2</a><span>, in § 9</span> <li><a href="#iso-8859-3">ISO-8859-3</a><span>, in § 9</span> <li><a href="#iso-8859-4">ISO-8859-4</a><span>, in § 9</span> <li><a href="#iso-8859-5">ISO-8859-5</a><span>, in § 9</span> <li><a href="#iso-8859-6">ISO-8859-6</a><span>, in § 9</span> <li><a href="#iso-8859-7">ISO-8859-7</a><span>, in § 9</span> <li><a href="#iso-8859-8">ISO-8859-8</a><span>, in § 9</span> <li><a href="#iso-8859-8-i">ISO-8859-8-I</a><span>, in § 9</span> <li><a href="#utf-16be-decoder-flag">is UTF-16BE decoder</a><span>, in § 14.2.1</span> <li><a href="#koi8-r">KOI8-R</a><span>, in § 9</span> <li><a href="#koi8-u">KOI8-U</a><span>, in § 9</span> <li><a href="#label">label</a><span>, in § 4</span> <li><a href="#macintosh">macintosh</a><span>, in § 9</span> <li><a href="#name">name</a><span>, in § 4</span> <li><a href="#i-o-queue-peek">peek</a><span>, in § 3</span> <li><a href="#textencoderstream-pending-high-surrogate">pending high surrogate</a><span>, in § 7.6</span> <li><a href="#concept-stream-prepend">prepend</a><span>, in § 3</span> <li><a href="#concept-encoding-process">process an item</a><span>, in § 4.1</span> <li><a href="#concept-encoding-run">process a queue</a><span>, in § 4.1</span> <li><a href="#concept-encoding-process">processing an item</a><span>, in § 4.1</span> <li><a href="#concept-encoding-run">processing a queue</a><span>, in § 4.1</span> <li><a href="#concept-stream-push">push</a><span>, in § 3</span> <li> read <ul> <li><a href="#concept-stream-read">dfn for I/O queue</a><span>, in § 3</span> <li><a href="#dom-textencoderencodeintoresult-read">dict-member for TextEncoderEncodeIntoResult</a><span>, in § 7.4</span> </ul> <li><a href="#replacement">replacement</a><span>, in § 14</span> <li><a href="#replacement-decoder">replacement decoder</a><span>, in § 14.1</span> <li><a href="#replacement-error-returned-flag">replacement error returned</a><span>, in § 14.1.1</span> <li><a href="#concept-td-serialize">serialize I/O queue</a><span>, in § 7.1</span> <li><a href="#shared-utf-16-decoder">shared UTF-16 decoder</a><span>, in § 14.2</span> <li><a href="#shift_jis">Shift_JIS</a><span>, in § 12.2.2</span> <li><a href="#shift_jis-decoder">Shift_JIS decoder</a><span>, in § 12.3</span> <li><a href="#shift_jis-encoder">Shift_JIS encoder</a><span>, in § 12.3.1</span> <li><a href="#shift_jis-lead">Shift_JIS lead</a><span>, in § 12.3.1</span> <li><a href="#single-byte-decoder">single-byte decoder</a><span>, in § 9</span> <li><a href="#single-byte-encoder">single-byte encoder</a><span>, in § 9.1</span> <li><a href="#single-byte-encoding">single-byte encoding</a><span>, in § 9</span> <li><a href="#dom-textdecodeoptions-stream">stream</a><span>, in § 7.2</span> <li><a href="#textdecodeoptions">TextDecodeOptions</a><span>, in § 7.2</span> <li><a href="#textdecoder">TextDecoder</a><span>, in § 7.2</span> <li><a href="#dom-textdecoder">TextDecoder()</a><span>, in § 7.2</span> <li><a href="#textdecodercommon">TextDecoderCommon</a><span>, in § 7.1</span> <li><a href="#dom-textdecoder">TextDecoder(label)</a><span>, in § 7.2</span> <li><a href="#dom-textdecoder">TextDecoder(label, options)</a><span>, in § 7.2</span> <li><a href="#textdecoderoptions">TextDecoderOptions</a><span>, in § 7.2</span> <li><a href="#textdecoderstream">TextDecoderStream</a><span>, in § 7.5</span> <li><a href="#dom-textdecoderstream">TextDecoderStream()</a><span>, in § 7.5</span> <li><a href="#dom-textdecoderstream">TextDecoderStream(label)</a><span>, in § 7.5</span> <li><a href="#dom-textdecoderstream">TextDecoderStream(label, options)</a><span>, in § 7.5</span> <li><a href="#textencoder">TextEncoder</a><span>, in § 7.4</span> <li><a href="#dom-textencoder">TextEncoder()</a><span>, in § 7.4</span> <li><a href="#textencodercommon">TextEncoderCommon</a><span>, in § 7.3</span> <li><a href="#dictdef-textencoderencodeintoresult">TextEncoderEncodeIntoResult</a><span>, in § 7.4</span> <li><a href="#textencoderstream">TextEncoderStream</a><span>, in § 7.6</span> <li><a href="#dom-textencoderstream">TextEncoderStream()</a><span>, in § 7.6</span> <li><a href="#utf-16be">UTF-16BE</a><span>, in § 14.2.1</span> <li><a href="#utf-16be-decoder">UTF-16BE decoder</a><span>, in § 14.3</span> <li><a href="#utf-16be-le">UTF-16BE/LE</a><span>, in § 14.2</span> <li><a href="#utf-16le">UTF-16LE</a><span>, in § 14.3.1</span> <li><a href="#utf-16-lead-byte">UTF-16 lead byte</a><span>, in § 14.2.1</span> <li><a href="#utf-16-lead-surrogate">UTF-16 lead surrogate</a><span>, in § 14.2.1</span> <li><a href="#utf-16le-decoder">UTF-16LE decoder</a><span>, in § 14.4</span> <li><a href="#utf-8">UTF-8</a><span>, in § 8</span> <li><a href="#utf-8-bytes-needed">UTF-8 bytes needed</a><span>, in § 8.1.1</span> <li><a href="#utf-8-bytes-seen">UTF-8 bytes seen</a><span>, in § 8.1.1</span> <li><a href="#utf-8-code-point">UTF-8 code point</a><span>, in § 8.1.1</span> <li><a href="#utf-8-decode">UTF-8 decode</a><span>, in § 6</span> <li><a href="#utf-8-decoder">UTF-8 decoder</a><span>, in § 8.1</span> <li><a href="#utf-8-decode-without-bom">UTF-8 decode without BOM</a><span>, in § 6</span> <li><a href="#utf-8-decode-without-bom-or-fail">UTF-8 decode without BOM or fail</a><span>, in § 6</span> <li><a href="#utf-8-encode">UTF-8 encode</a><span>, in § 6</span> <li><a href="#utf-8-encoder">UTF-8 encoder</a><span>, in § 8.1.1</span> <li><a href="#utf-8-lower-boundary">UTF-8 lower boundary</a><span>, in § 8.1.1</span> <li><a href="#utf-8-upper-boundary">UTF-8 upper boundary</a><span>, in § 8.1.1</span> <li><a href="#windows-1250">windows-1250</a><span>, in § 9</span> <li><a href="#windows-1251">windows-1251</a><span>, in § 9</span> <li><a href="#windows-1252">windows-1252</a><span>, in § 9</span> <li><a href="#windows-1253">windows-1253</a><span>, in § 9</span> <li><a href="#windows-1254">windows-1254</a><span>, in § 9</span> <li><a href="#windows-1255">windows-1255</a><span>, in § 9</span> <li><a href="#windows-1256">windows-1256</a><span>, in § 9</span> <li><a href="#windows-1257">windows-1257</a><span>, in § 9</span> <li><a href="#windows-1258">windows-1258</a><span>, in § 9</span> <li><a href="#windows-874">windows-874</a><span>, in § 9</span> <li><a href="#dom-textencoderencodeintoresult-written">written</a><span>, in § 7.4</span> <li><a href="#x-mac-cyrillic">x-mac-cyrillic</a><span>, in § 9</span> <li><a href="#x-user-defined">x-user-defined</a><span>, in § 14.4.1</span> <li><a href="#x-user-defined-decoder">x-user-defined decoder</a><span>, in § 14.5</span> <li><a href="#x-user-defined-encoder">x-user-defined encoder</a><span>, in § 14.5.1</span> </ul> <h3 class="no-num no-ref heading settled" id="index-defined-elsewhere"><span class="content">Terms defined by reference</span></h3> <ul class="index"> <li> <a>[HTML]</a> defines the following terms: <ul> <li><span class="dfn-paneled" id="c6d19e56">event loop</span> <li><span class="dfn-paneled" id="a72449dd">in parallel</span> </ul> <li> <a>[INFRA]</a> defines the following terms: <ul> <li><span class="dfn-paneled" id="53275e46">append</span> <li><span class="dfn-paneled" id="617d690e">ascii byte</span> <li><span class="dfn-paneled" id="7f9469b5">ascii case-insensitive</span> <li><span class="dfn-paneled" id="ddc587b1">ascii code point</span> <li><span class="dfn-paneled" id="6f2dfa22">ascii lowercase</span> <li><span class="dfn-paneled" id="5fb1ed8a">ascii whitespace</span> <li><span class="dfn-paneled" id="7b0d918d">break</span> <li><span class="dfn-paneled" id="f5354b9d">byte</span> <li><span class="dfn-paneled" id="3de9e659">byte sequence</span> <li><span class="dfn-paneled" id="915aff5e">code point</span> <li><span class="dfn-paneled" id="59912c93">code unit</span> <li><span class="dfn-paneled" id="ae8def21">contain</span> <li><span class="dfn-paneled" id="f937b7b6">continue</span> <li><span class="dfn-paneled" id="a088e610">convert</span> <li><span class="dfn-paneled" id="03afaf9c">empty</span> <li><span class="dfn-paneled" id="16d07e10">for each</span> <li><span class="dfn-paneled" id="56ad2aed">insert</span> <li><span class="dfn-paneled" id="5afbefcd">item</span> <li><span class="dfn-paneled" id="649608b9">list</span> <li><span class="dfn-paneled" id="e2fc6023">prepend</span> <li><span class="dfn-paneled" id="47dfca75">queue</span> <li><span class="dfn-paneled" id="99c988d6">remove</span> <li><span class="dfn-paneled" id="ecf251b4">scalar value</span> <li><span class="dfn-paneled" id="762869d3">scalar value string</span> <li><span class="dfn-paneled" id="0204d188">size</span> <li><span class="dfn-paneled" id="904cdc91">starts with</span> <li><span class="dfn-paneled" id="0698d556">string</span> <li><span class="dfn-paneled" id="a3fb968a">surrogate</span> <li><span class="dfn-paneled" id="febebe0e">the range</span> <li><span class="dfn-paneled" id="bb049306">value</span> </ul> <li> <a>[STREAMS]</a> defines the following terms: <ul> <li><span class="dfn-paneled" id="d61c5cab">GenericTransformStream</span> <li><span class="dfn-paneled" id="59ed4e57">ReadableStream</span> <li><span class="dfn-paneled" id="47e93e3e">TransformStream</span> <li><span class="dfn-paneled" id="2d10bf58">chunk</span> <li><span class="dfn-paneled" id="593deb55">enqueue</span> <li><span class="dfn-paneled" id="3af29886">flushalgorithm</span> <li><span class="dfn-paneled" id="c1bd634e">pipeThrough(transform)</span> <li><span class="dfn-paneled" id="2fb3c1e9">readable</span> <li><span class="dfn-paneled" id="4e9534f3">readable stream</span> <li><span class="dfn-paneled" id="2a7798d8">set up</span> <li><span class="dfn-paneled" id="eade7401">transform</span> <li><span class="dfn-paneled" id="c7574ff1">transformalgorithm</span> <li><span class="dfn-paneled" id="97ba5569">writable</span> <li><span class="dfn-paneled" id="14fa5a95">writable stream</span> </ul> <li> <a>[WEBIDL]</a> defines the following terms: <ul> <li><span class="dfn-paneled" id="b1e3ba8e">AllowShared</span> <li><span class="dfn-paneled" id="5e4b6157">AllowSharedBufferSource</span> <li><span class="dfn-paneled" id="2f8afbfe">ArrayBuffer</span> <li><span class="dfn-paneled" id="8855a9aa">DOMString</span> <li><span class="dfn-paneled" id="c807e273">NewObject</span> <li><span class="dfn-paneled" id="ec878a66">RangeError</span> <li><span class="dfn-paneled" id="82ca3efc">TypeError</span> <li><span class="dfn-paneled" id="b0d7f3c3">USVString</span> <li><span class="dfn-paneled" id="13d0450b">Uint32Array</span> <li><span class="dfn-paneled" id="95d7775a">Uint8Array</span> <li><span class="dfn-paneled" id="5372cca8">boolean</span> <li><span class="dfn-paneled" id="495737df">byte length</span> <li><span class="dfn-paneled" id="cadf5fe9">converted to an idl value</span> <li><span class="dfn-paneled" id="92d13070">get a copy of the buffer source</span> <li><span class="dfn-paneled" id="56f81a8e">new</span> <li><span class="dfn-paneled" id="df3c9a3c">startingoffset</span> <li><span class="dfn-paneled" id="4013a022">this</span> <li><span class="dfn-paneled" id="b4cfa5ce">throw</span> <li><span class="dfn-paneled" id="f14b47b8">unsigned long long</span> <li><span class="dfn-paneled" id="f3fd9d04">write</span> </ul> </ul> <h2 class="no-num no-ref heading settled" id="references"><span class="content">References</span></h2> <h3 class="no-num no-ref heading settled" id="normative"><span class="content">Normative References</span></h3> <dl> <dt id="biblio-infra">[INFRA] <dd>Anne van Kesteren; Domenic Denicola. <a href="https://infra.spec.whatwg.org/"><cite>Infra Standard</cite></a>. Living Standard. URL: <a href="https://infra.spec.whatwg.org/">https://infra.spec.whatwg.org/</a> <dt id="biblio-streams">[STREAMS] <dd>Adam Rice; et al. <a href="https://streams.spec.whatwg.org/"><cite>Streams Standard</cite></a>. Living Standard. URL: <a href="https://streams.spec.whatwg.org/">https://streams.spec.whatwg.org/</a> <dt id="biblio-unicode">[UNICODE] <dd><a href="https://www.unicode.org/versions/latest/"><cite>The Unicode Standard</cite></a>. URL: <a href="https://www.unicode.org/versions/latest/">https://www.unicode.org/versions/latest/</a> <dt id="biblio-webidl">[WEBIDL] <dd>Edgar Chen; Timothy Gu. <a href="https://webidl.spec.whatwg.org/"><cite>Web IDL Standard</cite></a>. Living Standard. URL: <a href="https://webidl.spec.whatwg.org/">https://webidl.spec.whatwg.org/</a> </dl> <h3 class="no-num no-ref heading settled" id="informative"><span class="content">Informative References</span></h3> <dl> <dt id="biblio-html">[HTML] <dd>Anne van Kesteren; et al. <a href="https://html.spec.whatwg.org/multipage/"><cite>HTML Standard</cite></a>. Living Standard. URL: <a href="https://html.spec.whatwg.org/multipage/">https://html.spec.whatwg.org/multipage/</a> <dt id="biblio-url">[URL] <dd>Anne van Kesteren. <a href="https://url.spec.whatwg.org/"><cite>URL Standard</cite></a>. Living Standard. URL: <a href="https://url.spec.whatwg.org/">https://url.spec.whatwg.org/</a> <dt id="biblio-xml">[XML] <dd>Tim Bray; et al. <a href="https://www.w3.org/TR/xml/"><cite>Extensible Markup Language (XML) 1.0 (Fifth Edition)</cite></a>. 26 November 2008. REC. URL: <a href="https://www.w3.org/TR/xml/">https://www.w3.org/TR/xml/</a> </dl> <h2 class="no-num no-ref heading settled" id="idl-index"><span class="content">IDL Index</span></h2> <pre class="idl highlight def">interface mixin <a href="#textdecodercommon"><code>TextDecoderCommon</code></a> { readonly attribute <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-DOMString">DOMString</a> <a class="idl-code" href="#dom-textdecoder-encoding">encoding</a>; readonly attribute <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-boolean">boolean</a> <a class="idl-code" href="#dom-textdecoder-fatal">fatal</a>; readonly attribute <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-boolean">boolean</a> <a class="idl-code" href="#dom-textdecoder-ignorebom">ignoreBOM</a>; }; dictionary <a href="#textdecoderoptions"><code>TextDecoderOptions</code></a> { <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-boolean">boolean</a> <a href="#dom-textdecoderoptions-fatal"><code>fatal</code></a> = false; <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-boolean">boolean</a> <a href="#dom-textdecoderoptions-ignorebom"><code>ignoreBOM</code></a> = false; }; dictionary <a href="#textdecodeoptions"><code>TextDecodeOptions</code></a> { <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-boolean">boolean</a> <a href="#dom-textdecodeoptions-stream"><code>stream</code></a> = false; }; [Exposed=*] interface <a href="#textdecoder"><code>TextDecoder</code></a> { <a class="idl-code" href="#dom-textdecoder">constructor</a>(optional <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-DOMString">DOMString</a> <a href="#dom-textdecoder-textdecoder-label-options-label"><code>label</code></a> = "utf-8", optional <a href="#textdecoderoptions">TextDecoderOptions</a> <a href="#dom-textdecoder-textdecoder-label-options-options"><code>options</code></a> = {}); <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-USVString">USVString</a> <a class="idl-code" href="#dom-textdecoder-decode">decode</a>(optional <a href="https://webidl.spec.whatwg.org/#AllowSharedBufferSource">AllowSharedBufferSource</a> <a href="#dom-textdecoder-decode-input-options-input"><code>input</code></a>, optional <a href="#textdecodeoptions">TextDecodeOptions</a> <a href="#dom-textdecoder-decode-input-options-options"><code>options</code></a> = {}); }; <a href="#textdecoder">TextDecoder</a> includes <a href="#textdecodercommon">TextDecoderCommon</a>; interface mixin <a href="#textencodercommon"><code>TextEncoderCommon</code></a> { readonly attribute <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-DOMString">DOMString</a> <a class="idl-code" href="#dom-textencoder-encoding">encoding</a>; }; dictionary <a href="#dictdef-textencoderencodeintoresult"><code>TextEncoderEncodeIntoResult</code></a> { <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-unsigned-long-long">unsigned long long</a> <a href="#dom-textencoderencodeintoresult-read"><code>read</code></a>; <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-unsigned-long-long">unsigned long long</a> <a href="#dom-textencoderencodeintoresult-written"><code>written</code></a>; }; [Exposed=*] interface <a href="#textencoder"><code>TextEncoder</code></a> { <a class="idl-code" href="#dom-textencoder">constructor</a>(); [<a class="idl-code" href="https://webidl.spec.whatwg.org/#NewObject">NewObject</a>] <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-Uint8Array">Uint8Array</a> <a class="idl-code" href="#dom-textencoder-encode">encode</a>(optional <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-USVString">USVString</a> <a href="#dom-textencoder-encode-input-input"><code>input</code></a> = ""); <a href="#dictdef-textencoderencodeintoresult">TextEncoderEncodeIntoResult</a> <a class="idl-code" href="#dom-textencoder-encodeinto">encodeInto</a>(<a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-USVString">USVString</a> <a href="#dom-textencoder-encodeinto-source-destination-source"><code>source</code></a>, [<a class="idl-code" href="https://webidl.spec.whatwg.org/#AllowShared">AllowShared</a>] <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-Uint8Array">Uint8Array</a> <a href="#dom-textencoder-encodeinto-source-destination-destination"><code>destination</code></a>); }; <a href="#textencoder">TextEncoder</a> includes <a href="#textencodercommon">TextEncoderCommon</a>; [Exposed=*] interface <a href="#textdecoderstream"><code>TextDecoderStream</code></a> { <a class="idl-code" href="#dom-textdecoderstream">constructor</a>(optional <a class="idl-code" href="https://webidl.spec.whatwg.org/#idl-DOMString">DOMString</a> <a href="#dom-textdecoderstream-textdecoderstream-label-options-label"><code>label</code></a> = "utf-8", optional <a href="#textdecoderoptions">TextDecoderOptions</a> <a href="#dom-textdecoderstream-textdecoderstream-label-options-options"><code>options</code></a> = {}); }; <a href="#textdecoderstream">TextDecoderStream</a> includes <a href="#textdecodercommon">TextDecoderCommon</a>; <a href="#textdecoderstream">TextDecoderStream</a> includes <a href="https://streams.spec.whatwg.org/#generictransformstream">GenericTransformStream</a>; [Exposed=*] interface <a href="#textencoderstream"><code>TextEncoderStream</code></a> { <a class="idl-code" href="#dom-textencoderstream">constructor</a>(); }; <a href="#textencoderstream">TextEncoderStream</a> includes <a href="#textencodercommon">TextEncoderCommon</a>; <a href="#textencoderstream">TextEncoderStream</a> includes <a href="https://streams.spec.whatwg.org/#generictransformstream">GenericTransformStream</a>; </pre> <script>/* Boilerplate: script-dfn-panel */ "use strict"; { const dfnsJson = window.dfnsJson || {}; function genDfnPanel({dfnID, url, dfnText, refSections, external}) { return mk.aside({ class: "dfn-panel", id: `infopanel-for-${dfnID}`, "data-for": dfnID, "aria-labelled-by":`infopaneltitle-for-${dfnID}`, }, mk.span({id:`infopaneltitle-for-${dfnID}`, style:"display:none"}, `Info about the '${dfnText}' ${external?"external":""} reference.`), mk.a({href:url}, url), mk.b({}, "Referenced in:"), mk.ul({}, ...refSections.map(section=> mk.li({}, ...section.refs.map((ref, refI)=> [ mk.a({ href: `#${ref.id}` }, (refI == 0) ? section.title : `(${refI + 1})` ), " ", ] ), ), ), ), ); } function genAllDfnPanels() { for(const panelData of Object.values(window.dfnpanelData)) { const dfnID = panelData.dfnID; const dfn = document.getElementById(dfnID); if(!dfn) { console.log(`Can't find dfn#${dfnID}.`, panelData); } else { const panel = genDfnPanel(panelData); append(document.body, panel); insertDfnPopupAction(dfn, panel) } } } document.addEventListener("DOMContentLoaded", ()=>{ genAllDfnPanels(); // Add popup behavior to all dfns to show the corresponding dfn-panel. var dfns = queryAll('.dfn-paneled'); for(let dfn of dfns) { ; } document.body.addEventListener("click", (e) => { // If not handled already, just hide all dfn panels. hideAllDfnPanels(); }); }) function hideAllDfnPanels() { // Turn off any currently "on" or "activated" panels. queryAll(".dfn-panel.on, .dfn-panel.activated").forEach(el=>hideDfnPanel(el)); } function showDfnPanel(dfnPanel, dfn) { hideAllDfnPanels(); // Only display one at this time. dfn.setAttribute("aria-expanded", "true"); dfnPanel.classList.add("on"); dfnPanel.style.left = "5px"; dfnPanel.style.top = "0px"; const panelRect = dfnPanel.getBoundingClientRect(); const panelWidth = panelRect.right - panelRect.left; if (panelRect.right > document.body.scrollWidth) { // Panel's overflowing the screen. // Just drop it below the dfn and flip it rightward instead. // This still wont' fix things if the screen is *really* wide, // but fixing that's a lot harder without 'anchor()'. dfnPanel.style.top = "1.5em"; dfnPanel.style.left = "auto"; dfnPanel.style.right = "0px"; } } function pinDfnPanel(dfnPanel) { // Switch it to "activated" state, which pins it. dfnPanel.classList.add("activated"); dfnPanel.style.left = null; dfnPanel.style.top = null; } function hideDfnPanel(dfnPanel, dfn) { if(!dfn) { dfn = document.getElementById(dfnPanel.getAttribute("data-for")); } dfn.setAttribute("aria-expanded", "false") dfnPanel.classList.remove("on"); dfnPanel.classList.remove("activated"); } function toggleDfnPanel(dfnPanel, dfn) { if(dfnPanel.classList.contains("on")) { hideDfnPanel(dfnPanel, dfn); } else { showDfnPanel(dfnPanel, dfn); } } function insertDfnPopupAction(dfn, dfnPanel) { // Find dfn panel const panelWrapper = document.createElement('span'); panelWrapper.appendChild(dfnPanel); panelWrapper.style.position = "relative"; panelWrapper.style.height = "0px"; dfn.insertAdjacentElement("afterend", panelWrapper); dfn.setAttribute('role', 'button'); dfn.setAttribute('aria-expanded', 'false') dfn.tabIndex = 0; dfn.classList.add('has-dfn-panel'); dfn.addEventListener('click', (event) => { showDfnPanel(dfnPanel, dfn); event.stopPropagation(); }); dfn.addEventListener('keypress', (event) => { const kc = event.keyCode; // 32->Space, 13->Enter if(kc == 32 || kc == 13) { toggleDfnPanel(dfnPanel, dfn); event.stopPropagation(); event.preventDefault(); } }); dfnPanel.addEventListener('click', (event) => { if (event.target.nodeName == 'A') { pinDfnPanel(dfnPanel); } event.stopPropagation(); }); dfnPanel.addEventListener('keydown', (event) => { if(event.keyCode == 27) { // Escape key hideDfnPanel(dfnPanel, dfn); event.stopPropagation(); event.preventDefault(); } }) } } </script> <script>/* Boilerplate: script-dfn-panel-json */ window.dfnpanelData = {}; window.dfnpanelData['c6d19e56'] = {"dfnID": "c6d19e56", "url": "https://html.spec.whatwg.org/multipage/webappapis.html#event-loop", "dfnText": "event loop", "refSections": [{"refs": [{"id": "c60e545a0"}], "title": "3. Terminology"}], "external": true}; window.dfnpanelData['a72449dd'] = {"dfnID": "a72449dd", "url": "https://html.spec.whatwg.org/multipage/infrastructure.html#in-parallel", "dfnText": "in parallel", "refSections": [{"refs": [{"id": "a459951e0"}], "title": "3. Terminology"}, {"refs": [{"id": "a459951e1"}], "title": "6. Hooks for standards"}], "external": true}; window.dfnpanelData['53275e46'] = {"dfnID": "53275e46", "url": "https://infra.spec.whatwg.org/#list-append", "dfnText": "append", "refSections": [{"refs": [{"id": "6afa26dd0"}, {"id": "6afa26dd1"}, {"id": "6afa26dd2"}], "title": "3. Terminology"}], "external": true}; window.dfnpanelData['617d690e'] = {"dfnID": "617d690e", "url": "https://infra.spec.whatwg.org/#ascii-byte", "dfnText": "ascii byte", "refSections": [{"refs": [{"id": "ad0a76920"}], "title": "2. Security background"}, {"refs": [{"id": "ad0a76921"}], "title": "9.1. single-byte decoder"}, {"refs": [{"id": "ad0a76922"}, {"id": "ad0a76923"}], "title": "10.2.1. gb18030 decoder"}, {"refs": [{"id": "ad0a76924"}, {"id": "ad0a76925"}], "title": "11.1.1. Big5 decoder"}, {"refs": [{"id": "ad0a76926"}, {"id": "ad0a76927"}], "title": "12.1.1. EUC-JP decoder"}, {"refs": [{"id": "ad0a76928"}, {"id": "ad0a76929"}], "title": "12.3.1. Shift_JIS decoder"}, {"refs": [{"id": "ad0a769210"}, {"id": "ad0a769211"}], "title": "13.1.1. EUC-KR decoder"}, {"refs": [{"id": "ad0a769212"}], "title": "14.5.1. x-user-defined decoder"}, {"refs": [{"id": "ad0a769213"}], "title": "Implementation considerations"}], "external": true}; window.dfnpanelData['7f9469b5'] = {"dfnID": "7f9469b5", "url": "https://infra.spec.whatwg.org/#ascii-case-insensitive", "dfnText": "ascii case-insensitive", "refSections": [{"refs": [{"id": "1274df4f0"}, {"id": "1274df4f1"}], "title": "4.2. Names and labels"}], "external": true}; window.dfnpanelData['ddc587b1'] = {"dfnID": "ddc587b1", "url": "https://infra.spec.whatwg.org/#ascii-code-point", "dfnText": "ascii code point", "refSections": [{"refs": [{"id": "48a42c940"}], "title": "2. Security background"}, {"refs": [{"id": "48a42c941"}], "title": "8.1.2. UTF-8 encoder"}, {"refs": [{"id": "48a42c942"}], "title": "9.2. single-byte encoder"}, {"refs": [{"id": "48a42c943"}], "title": "10.2.2. gb18030 encoder"}, {"refs": [{"id": "48a42c944"}], "title": "11.1.2. Big5 encoder"}, {"refs": [{"id": "48a42c945"}], "title": "12.1.2. EUC-JP encoder"}, {"refs": [{"id": "48a42c946"}, {"id": "48a42c947"}, {"id": "48a42c948"}, {"id": "48a42c949"}], "title": "12.2.2. ISO-2022-JP encoder"}, {"refs": [{"id": "48a42c9410"}], "title": "12.3.2. Shift_JIS encoder"}, {"refs": [{"id": "48a42c9411"}], "title": "13.1.2. EUC-KR encoder"}, {"refs": [{"id": "48a42c9412"}], "title": "14.5.2. x-user-defined encoder"}], "external": true}; window.dfnpanelData['6f2dfa22'] = {"dfnID": "6f2dfa22", "url": "https://infra.spec.whatwg.org/#ascii-lowercase", "dfnText": "ascii lowercase", "refSections": [{"refs": [{"id": "441210640"}], "title": "4.2. Names and labels"}, {"refs": [{"id": "441210641"}], "title": "7.1. Interface mixin TextDecoderCommon"}], "external": true}; window.dfnpanelData['5fb1ed8a'] = {"dfnID": "5fb1ed8a", "url": "https://infra.spec.whatwg.org/#ascii-whitespace", "dfnText": "ascii whitespace", "refSections": [{"refs": [{"id": "9b09dd760"}], "title": "4.2. Names and labels"}], "external": true}; window.dfnpanelData['7b0d918d'] = {"dfnID": "7b0d918d", "url": "https://infra.spec.whatwg.org/#iteration-break", "dfnText": "break", "refSections": [{"refs": [{"id": "7a438e490"}], "title": "3. Terminology"}, {"refs": [{"id": "7a438e491"}, {"id": "7a438e492"}], "title": "7.4. Interface TextEncoder"}], "external": true}; window.dfnpanelData['f5354b9d'] = {"dfnID": "f5354b9d", "url": "https://infra.spec.whatwg.org/#byte", "dfnText": "byte", "refSections": [{"refs": [{"id": "b69a47660"}], "title": "3. Terminology"}, {"refs": [{"id": "b69a47661"}], "title": "4. Encodings"}], "external": true}; window.dfnpanelData['3de9e659'] = {"dfnID": "3de9e659", "url": "https://infra.spec.whatwg.org/#byte-sequence", "dfnText": "byte sequence", "refSections": [{"refs": [{"id": "c2327ca40"}, {"id": "c2327ca41"}], "title": "3. Terminology"}], "external": true}; window.dfnpanelData['915aff5e'] = {"dfnID": "915aff5e", "url": "https://infra.spec.whatwg.org/#code-point", "dfnText": "code point", "refSections": [{"refs": [{"id": "b61de9b10"}, {"id": "b61de9b11"}], "title": "4.1. Encoders and decoders"}, {"refs": [{"id": "b61de9b12"}, {"id": "b61de9b13"}], "title": "6.1. Legacy hooks for standards"}], "external": true}; window.dfnpanelData['59912c93'] = {"dfnID": "59912c93", "url": "https://infra.spec.whatwg.org/#code-unit", "dfnText": "code unit", "refSections": [{"refs": [{"id": "d0892c6f0"}], "title": "7.4. Interface TextEncoder"}, {"refs": [{"id": "d0892c6f1"}, {"id": "d0892c6f2"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['ae8def21'] = {"dfnID": "ae8def21", "url": "https://infra.spec.whatwg.org/#list-contain", "dfnText": "contain", "refSections": [{"refs": [{"id": "6e0f99b10"}, {"id": "6e0f99b11"}], "title": "3. Terminology"}], "external": true}; window.dfnpanelData['f937b7b6'] = {"dfnID": "f937b7b6", "url": "https://infra.spec.whatwg.org/#iteration-continue", "dfnText": "continue", "refSections": [{"refs": [{"id": "dfd6b7da0"}], "title": "7.1. Interface mixin TextDecoderCommon"}], "external": true}; window.dfnpanelData['a088e610'] = {"dfnID": "a088e610", "url": "https://infra.spec.whatwg.org/#javascript-string-convert", "dfnText": "convert", "refSections": [{"refs": [{"id": "5dcd6b6c0"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['03afaf9c'] = {"dfnID": "03afaf9c", "url": "https://infra.spec.whatwg.org/#list-empty", "dfnText": "empty", "refSections": [{"refs": [{"id": "0a62c3180"}], "title": "3. Terminology"}], "external": true}; window.dfnpanelData['16d07e10'] = {"dfnID": "16d07e10", "url": "https://infra.spec.whatwg.org/#list-iterate", "dfnText": "for each", "refSections": [{"refs": [{"id": "3f0f895a0"}], "title": "3. Terminology"}], "external": true}; window.dfnpanelData['56ad2aed'] = {"dfnID": "56ad2aed", "url": "https://infra.spec.whatwg.org/#list-insert", "dfnText": "insert", "refSections": [{"refs": [{"id": "4f0ded9e0"}], "title": "3. Terminology"}], "external": true}; window.dfnpanelData['5afbefcd'] = {"dfnID": "5afbefcd", "url": "https://infra.spec.whatwg.org/#list-item", "dfnText": "item", "refSections": [{"refs": [{"id": "2d67d96e0"}, {"id": "2d67d96e1"}, {"id": "2d67d96e2"}, {"id": "2d67d96e3"}, {"id": "2d67d96e4"}, {"id": "2d67d96e5"}, {"id": "2d67d96e6"}, {"id": "2d67d96e7"}, {"id": "2d67d96e8"}, {"id": "2d67d96e9"}, {"id": "2d67d96e10"}, {"id": "2d67d96e11"}, {"id": "2d67d96e12"}], "title": "3. Terminology"}, {"refs": [{"id": "2d67d96e13"}, {"id": "2d67d96e14"}, {"id": "2d67d96e15"}, {"id": "2d67d96e16"}], "title": "4.1. Encoders and decoders"}], "external": true}; window.dfnpanelData['649608b9'] = {"dfnID": "649608b9", "url": "https://infra.spec.whatwg.org/#list", "dfnText": "list", "refSections": [{"refs": [{"id": "afe3eb660"}, {"id": "afe3eb661"}, {"id": "afe3eb662"}, {"id": "afe3eb663"}, {"id": "afe3eb664"}, {"id": "afe3eb665"}], "title": "3. Terminology"}], "external": true}; window.dfnpanelData['e2fc6023'] = {"dfnID": "e2fc6023", "url": "https://infra.spec.whatwg.org/#list-prepend", "dfnText": "prepend", "refSections": [{"refs": [{"id": "967137770"}], "title": "3. Terminology"}], "external": true}; window.dfnpanelData['47dfca75'] = {"dfnID": "47dfca75", "url": "https://infra.spec.whatwg.org/#queue", "dfnText": "queue", "refSections": [{"refs": [{"id": "9889b21e0"}], "title": "3. Terminology"}], "external": true}; window.dfnpanelData['99c988d6'] = {"dfnID": "99c988d6", "url": "https://infra.spec.whatwg.org/#list-remove", "dfnText": "remove", "refSections": [{"refs": [{"id": "6e78fe0c0"}, {"id": "6e78fe0c1"}], "title": "3. Terminology"}], "external": true}; window.dfnpanelData['ecf251b4'] = {"dfnID": "ecf251b4", "url": "https://infra.spec.whatwg.org/#scalar-value", "dfnText": "scalar value", "refSections": [{"refs": [{"id": "61ea31220"}], "title": "3. Terminology"}, {"refs": [{"id": "61ea31221"}], "title": "4. Encodings"}], "external": true}; window.dfnpanelData['762869d3'] = {"dfnID": "762869d3", "url": "https://infra.spec.whatwg.org/#scalar-value-string", "dfnText": "scalar value string", "refSections": [{"refs": [{"id": "9739dc530"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['0204d188'] = {"dfnID": "0204d188", "url": "https://infra.spec.whatwg.org/#list-size", "dfnText": "size", "refSections": [{"refs": [{"id": "222107ce0"}, {"id": "222107ce1"}], "title": "3. Terminology"}], "external": true}; window.dfnpanelData['904cdc91'] = {"dfnID": "904cdc91", "url": "https://infra.spec.whatwg.org/#byte-sequence-starts-with", "dfnText": "starts with", "refSections": [{"refs": [{"id": "49a960210"}], "title": "6.1. Legacy hooks for standards"}], "external": true}; window.dfnpanelData['0698d556'] = {"dfnID": "0698d556", "url": "https://infra.spec.whatwg.org/#string", "dfnText": "string", "refSections": [{"refs": [{"id": "14014d7f0"}, {"id": "14014d7f1"}], "title": "3. Terminology"}, {"refs": [{"id": "14014d7f2"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['a3fb968a'] = {"dfnID": "a3fb968a", "url": "https://infra.spec.whatwg.org/#surrogate", "dfnText": "surrogate", "refSections": [{"refs": [{"id": "1be036c80"}, {"id": "1be036c81"}], "title": "4.1. Encoders and decoders"}, {"refs": [{"id": "1be036c82"}], "title": "6. Hooks for standards"}, {"refs": [{"id": "1be036c83"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['febebe0e'] = {"dfnID": "febebe0e", "url": "https://infra.spec.whatwg.org/#the-range", "dfnText": "the range", "refSections": [{"refs": [{"id": "f8e0c3a50"}], "title": "3. Terminology"}], "external": true}; window.dfnpanelData['bb049306'] = {"dfnID": "bb049306", "url": "https://infra.spec.whatwg.org/#code-point-value", "dfnText": "value", "refSections": [{"refs": [{"id": "2bff95cb0"}], "title": "4.1. Encoders and decoders"}, {"refs": [{"id": "2bff95cb1"}], "title": "6.1. Legacy hooks for standards"}], "external": true}; window.dfnpanelData['d61c5cab'] = {"dfnID": "d61c5cab", "url": "https://streams.spec.whatwg.org/#generictransformstream", "dfnText": "GenericTransformStream", "refSections": [{"refs": [{"id": "af5e2e850"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "af5e2e851"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['59ed4e57'] = {"dfnID": "59ed4e57", "url": "https://streams.spec.whatwg.org/#readablestream", "dfnText": "ReadableStream", "refSections": [{"refs": [{"id": "e42e67350"}, {"id": "e42e67351"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "e42e67352"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['47e93e3e'] = {"dfnID": "47e93e3e", "url": "https://streams.spec.whatwg.org/#transformstream", "dfnText": "TransformStream", "refSections": [{"refs": [{"id": "a26122b10"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "a26122b11"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['2d10bf58'] = {"dfnID": "2d10bf58", "url": "https://streams.spec.whatwg.org/#chunk", "dfnText": "chunk", "refSections": [{"refs": [{"id": "a63869ef0"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "a63869ef1"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['593deb55'] = {"dfnID": "593deb55", "url": "https://streams.spec.whatwg.org/#transformstream-enqueue", "dfnText": "enqueue", "refSections": [{"refs": [{"id": "9c9f0e820"}, {"id": "9c9f0e821"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "9c9f0e822"}, {"id": "9c9f0e823"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['3af29886'] = {"dfnID": "3af29886", "url": "https://streams.spec.whatwg.org/#transformstream-set-up-flushalgorithm", "dfnText": "flushalgorithm", "refSections": [{"refs": [{"id": "a54978b30"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "a54978b31"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['c1bd634e'] = {"dfnID": "c1bd634e", "url": "https://streams.spec.whatwg.org/#rs-pipe-through", "dfnText": "pipeThrough(transform)", "refSections": [{"refs": [{"id": "3bf29ae90"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "3bf29ae91"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['2fb3c1e9'] = {"dfnID": "2fb3c1e9", "url": "https://streams.spec.whatwg.org/#dom-generictransformstream-readable", "dfnText": "readable", "refSections": [{"refs": [{"id": "271adbee0"}, {"id": "271adbee1"}, {"id": "271adbee2"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "271adbee3"}, {"id": "271adbee4"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['4e9534f3'] = {"dfnID": "4e9534f3", "url": "https://streams.spec.whatwg.org/#readable-stream", "dfnText": "readable stream", "refSections": [{"refs": [{"id": "02cae24a0"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "02cae24a1"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['2a7798d8'] = {"dfnID": "2a7798d8", "url": "https://streams.spec.whatwg.org/#transformstream-set-up", "dfnText": "set up", "refSections": [{"refs": [{"id": "4229e9a80"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "4229e9a81"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['eade7401'] = {"dfnID": "eade7401", "url": "https://streams.spec.whatwg.org/#generictransformstream-transform", "dfnText": "transform", "refSections": [{"refs": [{"id": "d582c3cd0"}, {"id": "d582c3cd1"}, {"id": "d582c3cd2"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "d582c3cd3"}, {"id": "d582c3cd4"}, {"id": "d582c3cd5"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['c7574ff1'] = {"dfnID": "c7574ff1", "url": "https://streams.spec.whatwg.org/#transformstream-set-up-transformalgorithm", "dfnText": "transformalgorithm", "refSections": [{"refs": [{"id": "14e4f1490"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "14e4f1491"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['97ba5569'] = {"dfnID": "97ba5569", "url": "https://streams.spec.whatwg.org/#dom-generictransformstream-writable", "dfnText": "writable", "refSections": [{"refs": [{"id": "92533bb30"}, {"id": "92533bb31"}, {"id": "92533bb32"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "92533bb33"}, {"id": "92533bb34"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['14fa5a95'] = {"dfnID": "14fa5a95", "url": "https://streams.spec.whatwg.org/#writable-stream", "dfnText": "writable stream", "refSections": [{"refs": [{"id": "74cfb0750"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "74cfb0751"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['b1e3ba8e'] = {"dfnID": "b1e3ba8e", "url": "https://webidl.spec.whatwg.org/#AllowShared", "dfnText": "AllowShared", "refSections": [{"refs": [{"id": "ac6a6bf70"}], "title": "7.4. Interface TextEncoder"}], "external": true}; window.dfnpanelData['5e4b6157'] = {"dfnID": "5e4b6157", "url": "https://webidl.spec.whatwg.org/#AllowSharedBufferSource", "dfnText": "AllowSharedBufferSource", "refSections": [{"refs": [{"id": "8145e9370"}], "title": "7.2. Interface TextDecoder"}, {"refs": [{"id": "8145e9371"}, {"id": "8145e9372"}], "title": "7.5. Interface TextDecoderStream"}], "external": true}; window.dfnpanelData['2f8afbfe'] = {"dfnID": "2f8afbfe", "url": "https://webidl.spec.whatwg.org/#idl-ArrayBuffer", "dfnText": "ArrayBuffer", "refSections": [{"refs": [{"id": "6cfb0bb30"}, {"id": "6cfb0bb31"}], "title": "7. API"}, {"refs": [{"id": "6cfb0bb32"}, {"id": "6cfb0bb33"}], "title": "7.4. Interface TextEncoder"}, {"refs": [{"id": "6cfb0bb34"}, {"id": "6cfb0bb35"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['8855a9aa'] = {"dfnID": "8855a9aa", "url": "https://webidl.spec.whatwg.org/#idl-DOMString", "dfnText": "DOMString", "refSections": [{"refs": [{"id": "162027790"}], "title": "7.1. Interface mixin TextDecoderCommon"}, {"refs": [{"id": "162027791"}], "title": "7.2. Interface TextDecoder"}, {"refs": [{"id": "162027792"}], "title": "7.3. Interface mixin TextEncoderCommon"}, {"refs": [{"id": "162027793"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "162027794"}, {"id": "162027795"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['c807e273'] = {"dfnID": "c807e273", "url": "https://webidl.spec.whatwg.org/#NewObject", "dfnText": "NewObject", "refSections": [{"refs": [{"id": "f8bab20f0"}], "title": "7.4. Interface TextEncoder"}], "external": true}; window.dfnpanelData['ec878a66'] = {"dfnID": "ec878a66", "url": "https://webidl.spec.whatwg.org/#exceptiondef-rangeerror", "dfnText": "RangeError", "refSections": [{"refs": [{"id": "a0324b580"}, {"id": "a0324b581"}], "title": "7.2. Interface TextDecoder"}, {"refs": [{"id": "a0324b582"}, {"id": "a0324b583"}], "title": "7.5. Interface TextDecoderStream"}], "external": true}; window.dfnpanelData['82ca3efc'] = {"dfnID": "82ca3efc", "url": "https://webidl.spec.whatwg.org/#exceptiondef-typeerror", "dfnText": "TypeError", "refSections": [{"refs": [{"id": "20fb259a0"}, {"id": "20fb259a1"}], "title": "7.2. Interface TextDecoder"}, {"refs": [{"id": "20fb259a2"}, {"id": "20fb259a3"}, {"id": "20fb259a4"}], "title": "7.5. Interface TextDecoderStream"}], "external": true}; window.dfnpanelData['b0d7f3c3'] = {"dfnID": "b0d7f3c3", "url": "https://webidl.spec.whatwg.org/#idl-USVString", "dfnText": "USVString", "refSections": [{"refs": [{"id": "63dcd5930"}], "title": "7.2. Interface TextDecoder"}, {"refs": [{"id": "63dcd5931"}, {"id": "63dcd5932"}], "title": "7.4. Interface TextEncoder"}, {"refs": [{"id": "63dcd5933"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['13d0450b'] = {"dfnID": "13d0450b", "url": "https://webidl.spec.whatwg.org/#idl-Uint32Array", "dfnText": "Uint32Array", "refSections": [{"refs": [{"id": "ba0cf2600"}, {"id": "ba0cf2601"}, {"id": "ba0cf2602"}], "title": "7. API"}], "external": true}; window.dfnpanelData['95d7775a'] = {"dfnID": "95d7775a", "url": "https://webidl.spec.whatwg.org/#idl-Uint8Array", "dfnText": "Uint8Array", "refSections": [{"refs": [{"id": "eddeeee90"}], "title": "7. API"}, {"refs": [{"id": "eddeeee91"}, {"id": "eddeeee92"}, {"id": "eddeeee93"}], "title": "7.4. Interface TextEncoder"}, {"refs": [{"id": "eddeeee94"}, {"id": "eddeeee95"}, {"id": "eddeeee96"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['5372cca8'] = {"dfnID": "5372cca8", "url": "https://webidl.spec.whatwg.org/#idl-boolean", "dfnText": "boolean", "refSections": [{"refs": [{"id": "7631bead0"}, {"id": "7631bead1"}], "title": "7.1. Interface mixin TextDecoderCommon"}, {"refs": [{"id": "7631bead2"}, {"id": "7631bead3"}, {"id": "7631bead4"}], "title": "7.2. Interface TextDecoder"}], "external": true}; window.dfnpanelData['495737df'] = {"dfnID": "495737df", "url": "https://webidl.spec.whatwg.org/#buffersource-byte-length", "dfnText": "byte length", "refSections": [{"refs": [{"id": "62ab8c180"}], "title": "7.4. Interface TextEncoder"}], "external": true}; window.dfnpanelData['cadf5fe9'] = {"dfnID": "cadf5fe9", "url": "https://webidl.spec.whatwg.org/#dfn-convert-ecmascript-to-idl-value", "dfnText": "converted to an idl value", "refSections": [{"refs": [{"id": "ce0b23860"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "ce0b23861"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['92d13070'] = {"dfnID": "92d13070", "url": "https://webidl.spec.whatwg.org/#dfn-get-buffer-source-copy", "dfnText": "get a copy of the buffer source", "refSections": [{"refs": [{"id": "5270c6ac0"}], "title": "7.2. Interface TextDecoder"}, {"refs": [{"id": "5270c6ac1"}], "title": "7.5. Interface TextDecoderStream"}], "external": true}; window.dfnpanelData['56f81a8e'] = {"dfnID": "56f81a8e", "url": "https://webidl.spec.whatwg.org/#new", "dfnText": "new", "refSections": [{"refs": [{"id": "85cd0b720"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "85cd0b721"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['df3c9a3c'] = {"dfnID": "df3c9a3c", "url": "https://webidl.spec.whatwg.org/#arraybufferview-write-startingoffset", "dfnText": "startingoffset", "refSections": [{"refs": [{"id": "71867f7d0"}], "title": "7.4. Interface TextEncoder"}], "external": true}; window.dfnpanelData['4013a022'] = {"dfnID": "4013a022", "url": "https://webidl.spec.whatwg.org/#this", "dfnText": "this", "refSections": [{"refs": [{"id": "7bbe6ef60"}, {"id": "7bbe6ef61"}, {"id": "7bbe6ef62"}], "title": "7.1. Interface mixin TextDecoderCommon"}, {"refs": [{"id": "7bbe6ef63"}, {"id": "7bbe6ef64"}, {"id": "7bbe6ef65"}, {"id": "7bbe6ef66"}, {"id": "7bbe6ef67"}, {"id": "7bbe6ef68"}, {"id": "7bbe6ef69"}, {"id": "7bbe6ef610"}, {"id": "7bbe6ef611"}, {"id": "7bbe6ef612"}, {"id": "7bbe6ef613"}, {"id": "7bbe6ef614"}, {"id": "7bbe6ef615"}, {"id": "7bbe6ef616"}, {"id": "7bbe6ef617"}, {"id": "7bbe6ef618"}, {"id": "7bbe6ef619"}, {"id": "7bbe6ef620"}, {"id": "7bbe6ef621"}], "title": "7.2. Interface TextDecoder"}, {"refs": [{"id": "7bbe6ef622"}, {"id": "7bbe6ef623"}, {"id": "7bbe6ef624"}, {"id": "7bbe6ef625"}, {"id": "7bbe6ef626"}, {"id": "7bbe6ef627"}, {"id": "7bbe6ef628"}, {"id": "7bbe6ef629"}, {"id": "7bbe6ef630"}], "title": "7.5. Interface TextDecoderStream"}, {"refs": [{"id": "7bbe6ef631"}, {"id": "7bbe6ef632"}, {"id": "7bbe6ef633"}, {"id": "7bbe6ef634"}], "title": "7.6. Interface TextEncoderStream"}], "external": true}; window.dfnpanelData['b4cfa5ce'] = {"dfnID": "b4cfa5ce", "url": "https://webidl.spec.whatwg.org/#dfn-throw", "dfnText": "throw", "refSections": [{"refs": [{"id": "807892d40"}, {"id": "807892d41"}, {"id": "807892d42"}, {"id": "807892d43"}], "title": "7.2. Interface TextDecoder"}, {"refs": [{"id": "807892d44"}, {"id": "807892d45"}, {"id": "807892d46"}, {"id": "807892d47"}], "title": "7.5. Interface TextDecoderStream"}], "external": true}; window.dfnpanelData['f14b47b8'] = {"dfnID": "f14b47b8", "url": "https://webidl.spec.whatwg.org/#idl-unsigned-long-long", "dfnText": "unsigned long long", "refSections": [{"refs": [{"id": "607781f40"}, {"id": "607781f41"}], "title": "7.4. Interface TextEncoder"}], "external": true}; window.dfnpanelData['f3fd9d04'] = {"dfnID": "f3fd9d04", "url": "https://webidl.spec.whatwg.org/#arraybufferview-write", "dfnText": "write", "refSections": [{"refs": [{"id": "291f56c00"}], "title": "7.4. Interface TextEncoder"}], "external": true}; </script> <script>/* Boilerplate: script-dom-helper */ function query(sel) { return document.querySelector(sel); } function queryAll(sel) { return [...document.querySelectorAll(sel)]; } function iter(obj) { if(!obj) return []; var it = obj[Symbol.iterator]; if(it) return it; return Object.entries(obj); } function mk(tagname, attrs, ...children) { const el = document.createElement(tagname); for(const [k,v] of iter(attrs)) { if(k.slice(0,3) == "_on") { const eventName = k.slice(3); el.addEventListener(eventName, v); } else if(k[0] == "_") { // property, not attribute el[k.slice(1)] = v; } else { if(v === false || v == null) { continue; } else if(v === true) { el.setAttribute(k, ""); continue; } else { el.setAttribute(k, v); } } } append(el, children); return el; } /* Create shortcuts for every known HTML element */ [ "a", "abbr", "acronym", "address", "applet", "area", "article", "aside", "audio", "b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button", "canvas", "caption", "center", "cite", "code", "col", "colgroup", "datalist", "dd", "del", "details", "dfn", "dialog", "div", "dl", "dt", "em", "embed", "fieldset", "figcaption", "figure", "font", "footer", "form", "frame", "frameset", "head", "header", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "html", "i", "iframe", "img", "input", "ins", "kbd", "label", "legend", "li", "link", "main", "map", "mark", "meta", "meter", "nav", "nobr", "noscript", "object", "ol", "optgroup", "option", "output", "p", "param", "pre", "progress", "q", "s", "samp", "script", "section", "select", "small", "source", "span", "strike", "strong", "style", "sub", "summary", "sup", "table", "tbody", "td", "template", "textarea", "tfoot", "th", "thead", "time", "title", "tr", "u", "ul", "var", "video", "wbr", "xmp", ].forEach(tagname=>{ mk[tagname] = (...args) => mk(tagname, ...args); }); function* nodesFromChildList(children) { for(const child of children.flat(Infinity)) { if(child instanceof Node) { yield child; } else { yield new Text(child); } } } function append(el, ...children) { for(const child of nodesFromChildList(children)) { if(el instanceof Node) el.appendChild(child); else el.push(child); } return el; } function insertAfter(el, ...children) { for(const child of nodesFromChildList(children)) { el.parentNode.insertBefore(child, el.nextSibling); } return el; } function clearContents(el) { el.innerHTML = ""; return el; } function parseHTML(markup) { if(markup.toLowerCase().trim().indexOf('<!doctype') === 0) { const doc = document.implementation.createHTMLDocument(""); doc.documentElement.innerHTML = markup; return doc; } else { const el = mk.template({}); el.innerHTML = markup; return el.content; } } </script>