Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion ext/mbstring/config.m4
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/filters/mbfilter_sjis_mobile.c
libmbfl/filters/mbfilter_sjis_mac.c
libmbfl/filters/mbfilter_sjis_2004.c
libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.c
libmbfl/filters/mbfilter_ucs2.c
libmbfl/filters/mbfilter_ucs4.c
libmbfl/filters/mbfilter_uhc.c
Expand Down
3 changes: 1 addition & 2 deletions ext/mbstring/config.w32
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ if (PHP_MBSTRING != "no") {
mbfilter_utf8_mobile.c mbfilter_uuencode.c \
mbfilter_cp5022x.c mbfilter_sjis_mobile.c \
mbfilter_sjis_mac.c \
mbfilter_iso2022jp_mobile.c mbfilter_singlebyte.c \
mbfilter_tl_jisx0201_jisx0208.c", "mbstring");
mbfilter_iso2022jp_mobile.c mbfilter_singlebyte.c", "mbstring");

ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \
mbfilter_pass.c mbfilter_wchar.c mbfl_convert.c mbfl_encoding.c \
Expand Down
11 changes: 7 additions & 4 deletions ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
#include "mbfilter.h"
#include "mbfilter_cp5022x.h"
#include "mbfilter_jis.h"
#include "mbfilter_tl_jisx0201_jisx0208.h"

#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
#include "cp932_table.h"
#include "translit_kana_jisx0201_jisx0208.h"

static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter);
static int mbfl_filt_conv_wchar_cp50220_flush(mbfl_convert_filter *filter);
Expand All @@ -40,6 +40,9 @@ static void mb_wchar_to_cp50220(uint32_t *in, size_t len, mb_convert_buf *buf, b
static void mb_wchar_to_cp50221(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
static void mb_wchar_to_cp50222(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);

/* See mbstring.c */
uint32_t mb_convert_kana_codepoint(uint32_t c, uint32_t next, bool *consumed, uint32_t *second, int mode);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is introducing a reverse-dependency from libmbfl to mbstring. Probably the function should continue to live in libmbfl?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, indeed. @alexdowad, can you please address this? If mb_convert_kana_codepoint() can be easily changed to be moved to libmbfl, an option might be to establish a callback.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do (in my next PR, which is close to ready).


/* Previously, a dubious 'encoding' called 'cp50220raw' was supported
* This was just CP50220, but the implementation was less strict regarding
* invalid characters; it would silently pass some through
Expand Down Expand Up @@ -336,7 +339,7 @@ static int mbfl_filt_conv_wchar_cp50220(int c, mbfl_convert_filter *filter)
bool consumed = false;

if (filter->cache) {
int s = mbfl_convert_kana(filter->cache, c, &consumed, NULL, mode);
int s = mb_convert_kana_codepoint(filter->cache, c, &consumed, NULL, mode);
filter->cache = consumed ? 0 : c;
/* Terrible hack to get CP50220 to emit error markers in the proper
* position, not reordering them with subsequent characters */
Expand All @@ -359,7 +362,7 @@ static int mbfl_filt_conv_wchar_cp50220_flush(mbfl_convert_filter *filter)
int mode = MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE;

if (filter->cache) {
int s = mbfl_convert_kana(filter->cache, 0, NULL, NULL, mode);
int s = mb_convert_kana_codepoint(filter->cache, 0, NULL, NULL, mode);
mbfl_filt_conv_wchar_cp50221(s, filter);
filter->cache = 0;
}
Expand Down Expand Up @@ -866,7 +869,7 @@ static void mb_wchar_to_cp50220(uint32_t *in, size_t len, mb_convert_buf *buf, b
buf->state |= w << 8;
break;
} else {
w = mbfl_convert_kana(w, len ? *in : 0, &consumed, NULL, MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE);
w = mb_convert_kana_codepoint(w, len ? *in : 0, &consumed, NULL, MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE);
}

if (consumed) {
Expand Down
21 changes: 10 additions & 11 deletions ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,14 @@ int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter)
{
int c1, c2, s = 0;

if ((filter->status & 0x100) == 0) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)('$', filter->data));
CK((*filter->output_function)(')', filter->data));
CK((*filter->output_function)('C', filter->data));
filter->status |= 0x100;
}

if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) {
s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min];
} else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) {
Expand All @@ -211,9 +219,7 @@ int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter)
/* exclude UHC extension area */
if (c1 < 0xa1 || c2 < 0xa1) {
s = c;
}

if (s & 0x8000) {
} else if (s & 0x8000) {
s -= 0x8080;
}

Expand All @@ -235,13 +241,6 @@ int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter)
}
CK((*filter->output_function)(s, filter->data));
} else {
if ((filter->status & 0x100) == 0) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
CK((*filter->output_function)('$', filter->data));
CK((*filter->output_function)(')', filter->data));
CK((*filter->output_function)('C', filter->data));
filter->status |= 0x100;
}
if ((filter->status & 0x10) == 0) {
CK((*filter->output_function)(0x0e, filter->data)); /* shift out */
filter->status |= 0x10;
Expand All @@ -259,7 +258,7 @@ int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter)
static int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter)
{
/* back to ascii */
if (filter->status & 0xff00) {
if (filter->status & 0x10) {
CK((*filter->output_function)(0x0f, filter->data)); /* shift in */
}

Expand Down
148 changes: 127 additions & 21 deletions ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@

#include "mbfilter.h"
#include "mbfilter_iso2022jp_mobile.h"
#include "mbfilter_sjis_mobile.h"

#include "unicode_table_cp932_ext.h"
#include "unicode_table_jis.h"
Expand All @@ -39,11 +38,27 @@
static size_t mb_iso2022jp_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
static void mb_wchar_to_iso2022jp_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);

static int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter);
static int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter);
static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter);
static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter);

extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n);

/* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF
* These correspond to the letters A-Z
* To display the flag emoji for a country, two unicode codepoints are combined,
* which correspond to the two-letter code for that country
* This macro converts uppercase ASCII values to Regional Indicator codepoints */
#define NFLAGS(c) (0x1F1A5+((unsigned int)(c)))

static const char nflags_s[10][2] = {
"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"
};
static const int nflags_code_kddi[10] = {
0x2549, 0x2546, 0x24C0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254A, 0x24C1, 0x27F7
};

static const char *mbfl_encoding_2022jp_kddi_aliases[] = {"ISO-2022-JP-KDDI", NULL};

const mbfl_encoding mbfl_encoding_2022jp_kddi = {
Expand Down Expand Up @@ -125,7 +140,53 @@ const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi = {
#define JISX0201_KANA 0x20
#define JISX0208_KANJI 0x80

int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter)
#define EMIT_KEYPAD_EMOJI(c) do { *snd = (c); return 0x20E3; } while(0)
#define EMIT_FLAG_EMOJI(country) do { *snd = NFLAGS((country)[0]); return NFLAGS((country)[1]); } while(0)

static const char nflags_kddi[6][2] = {"FR", "DE", "IT", "GB", "CN", "KR"};

static inline int convert_emoji_cp(int cp)
{
if (cp > 0xF000)
return cp + 0x10000;
if (cp > 0xE000)
return cp + 0xF0000;
return cp;
}

static int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd)
{
if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) {
if (s == 0x24C0) { /* Spain */
EMIT_FLAG_EMOJI("ES");
} else if (s == 0x24C1) { /* Russia */
EMIT_FLAG_EMOJI("RU");
} else if (s >= 0x2545 && s <= 0x254A) {
EMIT_FLAG_EMOJI(nflags_kddi[s - 0x2545]);
} else if (s == 0x25BC) {
EMIT_KEYPAD_EMOJI('#');
} else {
*snd = 0;
return convert_emoji_cp(mb_tbl_code2uni_kddi1[s - mb_tbl_code2uni_kddi1_min]);
}
} else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) {
if (s == 0x2750) { /* Japan */
EMIT_FLAG_EMOJI("JP");
} else if (s >= 0x27A6 && s <= 0x27AE) {
EMIT_KEYPAD_EMOJI(s - 0x27A6 + '1');
} else if (s == 0x27F7) { /* United States */
EMIT_FLAG_EMOJI("US");
} else if (s == 0x2830) {
EMIT_KEYPAD_EMOJI('0');
} else {
*snd = 0;
return convert_emoji_cp(mb_tbl_code2uni_kddi2[s - mb_tbl_code2uni_kddi2_min]);
}
}
return 0;
}

static int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter)
{
int c1, s, w, snd = 0;

Expand Down Expand Up @@ -260,7 +321,67 @@ static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter)
return 0;
}

int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter)
static int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter)
{
if ((filter->status & 0xF) == 1) {
int c1 = filter->cache;
filter->cache = 0;
filter->status &= ~0xFF;
if (c == 0x20E3) {
if (c1 == '#') {
*s1 = 0x25BC;
} else if (c1 == '0') {
*s1 = 0x2830;
} else { /* Previous character was '1'-'9' */
*s1 = 0x27A6 + (c1 - '1');
}
return 1;
} else {
if (filter->status & 0xFF00) {
CK((*filter->output_function)(0x1B, filter->data)); /* ESC */
CK((*filter->output_function)('(', filter->data));
CK((*filter->output_function)('B', filter->data));
}
CK((*filter->output_function)(c1, filter->data));
filter->status = 0;
}
}

if (c == '#' || (c >= '0' && c <= '9')) {
filter->status |= 1;
filter->cache = c;
return 0;
}

if (c == 0xA9) { /* Copyright sign */
*s1 = 0x27DC;
return 1;
} else if (c == 0xAE) { /* Registered sign */
*s1 = 0x27DD;
return 1;
} else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) {
int i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len);
if (i >= 0) {
*s1 = mb_tbl_uni_kddi2code2_value[i];
return 1;
}
} else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) {
int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len);
if (i >= 0) {
*s1 = mb_tbl_uni_kddi2code3_value[i];
return 1;
}
} else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) {
int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len);
if (i >= 0) {
*s1 = mb_tbl_uni_kddi2code5_val[i];
return 1;
}
}
return 0;
}

static int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter)
{
int c1, c2, s1 = 0, s2 = 0;

Expand Down Expand Up @@ -293,11 +414,10 @@ int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter)
}

if (mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter)) {
/* A KDDI emoji was detected and stored in s1 */
CODE2JIS(c1,c2,s1,s2);
s1 -= 0x1600;
}

if (filter->status == 1 && filter->cache) {
} else if ((filter->status & 0xFF) == 1 && filter->cache) {
/* We are just processing one of KDDI's special emoji for a phone keypad button */
return 0;
}
Expand Down Expand Up @@ -360,7 +480,7 @@ static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter)
}

int c1 = filter->cache;
if (filter->status == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
if ((filter->status & 0xFF) == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) {
(*filter->output_function)(c1, filter->data);
}

Expand Down Expand Up @@ -484,20 +604,6 @@ static size_t mb_iso2022jp_kddi_to_wchar(unsigned char **in, size_t *in_len, uin
return out - buf;
}

/* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF
* These correspond to the letters A-Z
* To display the flag emoji for a country, two unicode codepoints are combined,
* which correspond to the two-letter code for that country
* This macro converts uppercase ASCII values to Regional Indicator codepoints */
#define NFLAGS(c) (0x1F1A5+((unsigned int)(c)))

static const char nflags_s[10][2] = {
"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"
};
static const int nflags_code_kddi[10] = {
0x2549, 0x2546, 0x24C0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254A, 0x24C1, 0x27F7
};

static void mb_wchar_to_iso2022jp_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
{
unsigned char *out, *limit;
Expand Down
3 changes: 0 additions & 3 deletions ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,4 @@ extern const mbfl_encoding mbfl_encoding_2022jp_kddi;
extern const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi;

int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter);

#endif /* MBFL_MBFILTER_ISO2022_JP_MOBILE_H */
6 changes: 4 additions & 2 deletions ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
Original file line number Diff line number Diff line change
Expand Up @@ -630,10 +630,12 @@ int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter)
filter->status = 0;
CK((*filter->output_function)(s1, filter->data));
} else if (s1 < 0x100) { /* latin or kana */
if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) {
CK((*filter->output_function)(0x8e, filter->data));
CK((*filter->output_function)(s1, filter->data));
} else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
CK((*filter->output_function)(s1, filter->data));
} else if (s1 < 0x7f00) { /* X 0213 plane 1 */
if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) {
c1 = (s1 >> 8) & 0xff;
Expand Down
Loading