Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions ext/mbstring/libmbfl/filters/mbfilter_jis.c
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,17 @@ static size_t mb_iso2022jp_to_wchar(unsigned char **in, size_t *in_len, uint32_t
} else if (c < 0x80) {
*out++ = c;
} else if (c >= 0xA1 && c <= 0xDF) {
/* GR-invoked Kana; "GR" stands for "graphics right" and refers to bytes
* with the MSB bit (in the context of ISO-2022 encoding).
*
* In this regard, Wikipedia states:
* "Other, older variants known as JIS7 and JIS8 build directly on the 7-bit and 8-bit
* encodings defined by JIS X 0201 and allow use of JIS X 0201 kana from G1 without
* escape sequences, using Shift Out and Shift In or setting the eighth bit
* (GR-invoked), respectively."
*
* Note that we support both the 'JIS7' use of 0xE/0xF Shift In/Shift Out codes
* and the 'JIS8' use of GR-invoked Kana */
*out++ = 0xFEC0 + c;
} else {
*out++ = MBFL_BAD_INPUT;
Expand Down Expand Up @@ -731,6 +742,13 @@ static void mb_wchar_to_jis(uint32_t *in, size_t len, mb_convert_buf *buf, bool
buf->state = ASCII;
}
out = mb_convert_buf_add(out, s);
} else if (s >= 0xA1 && s <= 0xDF) {
if (buf->state != JISX_0201_KANA) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
out = mb_convert_buf_add3(out, 0x1B, '(', 'I');
buf->state = JISX_0201_KANA;
}
out = mb_convert_buf_add(out, s & 0x7F);
} else if (s < 0x8080) { /* JIS X 0208 */
if (buf->state != JISX_0208) {
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 5);
Expand Down
18 changes: 18 additions & 0 deletions ext/mbstring/tests/iso2022jp_encoding.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,23 @@ testValidString("\x20\x3E", "\x1B\$B!1\x1B(B", 'UTF-16BE', 'ISO-2022-JP', false)

echo "Other mappings from Unicode -> ISO-2022-JP are OK\n";

// Single bytes from 0xA3-0xDF can be used to encode kana in JIS8
$grInvoked = [
"\xA3" => "\x1B(I\x23\x1B(B",
"\xB1" => "\x1B(I\x31\x1B(B",
"\xC2" => "\x1B(I\x42\x1B(B",
"\xDF" => "\x1B(I\x5F\x1B(B"
];
foreach ($grInvoked as $gr => $jisx) {
// JISX 0201 is used as the canonical form for outputting kana
testValidString($gr, $jisx, 'JIS', 'JIS', false);
if (mb_convert_encoding($gr, 'UTF-16BE', 'JIS') !== mb_convert_encoding($jisx, 'UTF-16BE', 'JIS'))
die("Equivalent GR byte and JISX 0201 sequence do not decode to the same codepoint");
}

echo "GR-invoked kana support OK\n";

// Check handling of BOM
convertInvalidString("\xFF\xFE", "%", "UTF-16BE", "JIS", false);
convertInvalidString("\xFF\xFE", "%", "UTF-16BE", "ISO-2022-JP", false);

Expand All @@ -239,4 +256,5 @@ JIS X 0208 support OK
JIS X 0212 support OK
All escape sequences work as expected
Other mappings from Unicode -> ISO-2022-JP are OK
GR-invoked kana support OK
Done!