Skip to content

Commit 217911e

Browse files
gh-103583: Add codecs and maps to _codecs_* module state (#103540)
1 parent ff3303e commit 217911e

File tree

9 files changed

+161
-96
lines changed

9 files changed

+161
-96
lines changed

Modules/cjkcodecs/_codecs_cn.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -453,14 +453,14 @@ DECODER(hz)
453453
}
454454

455455

456-
BEGIN_MAPPINGS_LIST
456+
BEGIN_MAPPINGS_LIST(4)
457457
MAPPING_DECONLY(gb2312)
458458
MAPPING_DECONLY(gbkext)
459459
MAPPING_ENCONLY(gbcommon)
460460
MAPPING_ENCDEC(gb18030ext)
461461
END_MAPPINGS_LIST
462462

463-
BEGIN_CODECS_LIST
463+
BEGIN_CODECS_LIST(4)
464464
CODEC_STATELESS(gb2312)
465465
CODEC_STATELESS(gbk)
466466
CODEC_STATELESS(gb18030)

Modules/cjkcodecs/_codecs_hk.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -177,14 +177,13 @@ DECODER(big5hkscs)
177177
return 0;
178178
}
179179

180-
181-
BEGIN_MAPPINGS_LIST
180+
BEGIN_MAPPINGS_LIST(3)
182181
MAPPING_DECONLY(big5hkscs)
183182
MAPPING_ENCONLY(big5hkscs_bmp)
184183
MAPPING_ENCONLY(big5hkscs_nonbmp)
185184
END_MAPPINGS_LIST
186185

187-
BEGIN_CODECS_LIST
186+
BEGIN_CODECS_LIST(1)
188187
CODEC_STATELESS_WINIT(big5hkscs)
189188
END_CODECS_LIST
190189

Modules/cjkcodecs/_codecs_iso2022.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,18 +1119,19 @@ static const struct iso2022_designation iso2022_jp_ext_designations[] = {
11191119
CONFIGDEF(jp_ext, NO_SHIFT | USE_JISX0208_EXT)
11201120

11211121

1122-
BEGIN_MAPPINGS_LIST
1122+
BEGIN_MAPPINGS_LIST(0)
11231123
/* no mapping table here */
11241124
END_MAPPINGS_LIST
11251125

1126-
#define ISO2022_CODEC(variation) { \
1126+
#define ISO2022_CODEC(variation) \
1127+
NEXT_CODEC = (MultibyteCodec){ \
11271128
"iso2022_" #variation, \
11281129
&iso2022_##variation##_config, \
11291130
iso2022_codec_init, \
11301131
_STATEFUL_METHODS(iso2022) \
1131-
},
1132+
};
11321133

1133-
BEGIN_CODECS_LIST
1134+
BEGIN_CODECS_LIST(7)
11341135
ISO2022_CODEC(kr)
11351136
ISO2022_CODEC(jp)
11361137
ISO2022_CODEC(jp_1)

Modules/cjkcodecs/_codecs_jp.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -733,7 +733,7 @@ DECODER(shift_jis_2004)
733733
}
734734

735735

736-
BEGIN_MAPPINGS_LIST
736+
BEGIN_MAPPINGS_LIST(11)
737737
MAPPING_DECONLY(jisx0208)
738738
MAPPING_DECONLY(jisx0212)
739739
MAPPING_ENCONLY(jisxcommon)
@@ -747,14 +747,19 @@ BEGIN_MAPPINGS_LIST
747747
MAPPING_ENCDEC(cp932ext)
748748
END_MAPPINGS_LIST
749749

750-
BEGIN_CODECS_LIST
750+
#define CODEC_CUSTOM(NAME, N, METH) \
751+
NEXT_CODEC = (MultibyteCodec){NAME, (void *)N, NULL, _STATELESS_METHODS(METH)};
752+
753+
BEGIN_CODECS_LIST(7)
751754
CODEC_STATELESS(shift_jis)
752755
CODEC_STATELESS(cp932)
753756
CODEC_STATELESS(euc_jp)
754757
CODEC_STATELESS(shift_jis_2004)
755758
CODEC_STATELESS(euc_jis_2004)
756-
{ "euc_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(euc_jis_2004) },
757-
{ "shift_jisx0213", (void *)2000, NULL, _STATELESS_METHODS(shift_jis_2004) },
759+
CODEC_CUSTOM("euc_jisx0213", 2000, euc_jis_2004)
760+
CODEC_CUSTOM("shift_jisx0213", 2000, shift_jis_2004)
758761
END_CODECS_LIST
759762

763+
#undef CODEC_CUSTOM
764+
760765
I_AM_A_MODULE_FOR(jp)

Modules/cjkcodecs/_codecs_kr.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -453,13 +453,13 @@ DECODER(johab)
453453
#undef FILL
454454

455455

456-
BEGIN_MAPPINGS_LIST
456+
BEGIN_MAPPINGS_LIST(3)
457457
MAPPING_DECONLY(ksx1001)
458458
MAPPING_ENCONLY(cp949)
459459
MAPPING_DECONLY(cp949ext)
460460
END_MAPPINGS_LIST
461461

462-
BEGIN_CODECS_LIST
462+
BEGIN_CODECS_LIST(3)
463463
CODEC_STATELESS(euc_kr)
464464
CODEC_STATELESS(cp949)
465465
CODEC_STATELESS(johab)

Modules/cjkcodecs/_codecs_tw.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,12 @@ DECODER(cp950)
130130

131131

132132

133-
BEGIN_MAPPINGS_LIST
133+
BEGIN_MAPPINGS_LIST(2)
134134
MAPPING_ENCDEC(big5)
135135
MAPPING_ENCDEC(cp950ext)
136136
END_MAPPINGS_LIST
137137

138-
BEGIN_CODECS_LIST
138+
BEGIN_CODECS_LIST(2)
139139
CODEC_STATELESS(big5)
140140
CODEC_STATELESS(cp950)
141141
END_CODECS_LIST

Modules/cjkcodecs/cjkcodecs.h

Lines changed: 118 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,20 @@ struct pair_encodemap {
6060
DBCHAR code;
6161
};
6262

63-
static const MultibyteCodec *codec_list;
64-
static const struct dbcs_map *mapping_list;
63+
typedef struct {
64+
int num_mappings;
65+
int num_codecs;
66+
struct dbcs_map *mapping_list;
67+
MultibyteCodec *codec_list;
68+
} cjkcodecs_module_state;
69+
70+
static inline cjkcodecs_module_state *
71+
get_module_state(PyObject *mod)
72+
{
73+
void *state = PyModule_GetState(mod);
74+
assert(state != NULL);
75+
return (cjkcodecs_module_state *)state;
76+
}
6577

6678
#define CODEC_INIT(encoding) \
6779
static int encoding##_codec_init(const void *config)
@@ -202,16 +214,42 @@ static const struct dbcs_map *mapping_list;
202214
#define TRYMAP_DEC(charset, assi, c1, c2) \
203215
_TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
204216

205-
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
206-
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
207-
#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
208-
#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap},
209-
#define END_MAPPINGS_LIST \
210-
{"", NULL, NULL} }; \
211-
static const struct dbcs_map *mapping_list = \
212-
(const struct dbcs_map *)_mapping_list;
217+
#define BEGIN_MAPPINGS_LIST(NUM) \
218+
static int \
219+
add_mappings(cjkcodecs_module_state *st) \
220+
{ \
221+
int idx = 0; \
222+
(void)idx; \
223+
st->num_mappings = NUM; \
224+
st->mapping_list = PyMem_Calloc(NUM, sizeof(struct dbcs_map)); \
225+
if (st->mapping_list == NULL) { \
226+
return -1; \
227+
}
228+
229+
#define MAPPING_ENCONLY(enc) \
230+
st->mapping_list[idx++] = (struct dbcs_map){#enc, (void*)enc##_encmap, NULL};
231+
#define MAPPING_DECONLY(enc) \
232+
st->mapping_list[idx++] = (struct dbcs_map){#enc, NULL, (void*)enc##_decmap};
233+
#define MAPPING_ENCDEC(enc) \
234+
st->mapping_list[idx++] = (struct dbcs_map){#enc, (void*)enc##_encmap, (void*)enc##_decmap};
235+
236+
#define END_MAPPINGS_LIST \
237+
assert(st->num_mappings == idx); \
238+
return 0; \
239+
}
240+
241+
#define BEGIN_CODECS_LIST(NUM) \
242+
static int \
243+
add_codecs(cjkcodecs_module_state *st) \
244+
{ \
245+
int idx = 0; \
246+
(void)idx; \
247+
st->num_codecs = NUM; \
248+
st->codec_list = PyMem_Calloc(NUM, sizeof(MultibyteCodec)); \
249+
if (st->codec_list == NULL) { \
250+
return -1; \
251+
}
213252

214-
#define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = {
215253
#define _STATEFUL_METHODS(enc) \
216254
enc##_encode, \
217255
enc##_encode_init, \
@@ -222,23 +260,21 @@ static const struct dbcs_map *mapping_list;
222260
#define _STATELESS_METHODS(enc) \
223261
enc##_encode, NULL, NULL, \
224262
enc##_decode, NULL, NULL,
225-
#define CODEC_STATEFUL(enc) { \
226-
#enc, NULL, NULL, \
227-
_STATEFUL_METHODS(enc) \
228-
},
229-
#define CODEC_STATELESS(enc) { \
230-
#enc, NULL, NULL, \
231-
_STATELESS_METHODS(enc) \
232-
},
233-
#define CODEC_STATELESS_WINIT(enc) { \
234-
#enc, NULL, \
235-
enc##_codec_init, \
236-
_STATELESS_METHODS(enc) \
237-
},
238-
#define END_CODECS_LIST \
239-
{"", NULL,} }; \
240-
static const MultibyteCodec *codec_list = \
241-
(const MultibyteCodec *)_codec_list;
263+
264+
#define NEXT_CODEC \
265+
st->codec_list[idx++]
266+
267+
#define CODEC_STATEFUL(enc) \
268+
NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATEFUL_METHODS(enc)};
269+
#define CODEC_STATELESS(enc) \
270+
NEXT_CODEC = (MultibyteCodec){#enc, NULL, NULL, _STATELESS_METHODS(enc)};
271+
#define CODEC_STATELESS_WINIT(enc) \
272+
NEXT_CODEC = (MultibyteCodec){#enc, NULL, enc##_codec_init, _STATELESS_METHODS(enc)};
273+
274+
#define END_CODECS_LIST \
275+
assert(st->num_codecs == idx); \
276+
return 0; \
277+
}
242278

243279

244280

@@ -249,53 +285,70 @@ getmultibytecodec(void)
249285
}
250286

251287
static PyObject *
252-
getcodec(PyObject *self, PyObject *encoding)
288+
_getcodec(const MultibyteCodec *codec)
253289
{
254-
PyObject *codecobj, *r, *cofunc;
255-
const MultibyteCodec *codec;
256-
const char *enc;
257-
258-
if (!PyUnicode_Check(encoding)) {
259-
PyErr_SetString(PyExc_TypeError,
260-
"encoding name must be a string.");
290+
PyObject *cofunc = getmultibytecodec();
291+
if (cofunc == NULL) {
261292
return NULL;
262293
}
263-
enc = PyUnicode_AsUTF8(encoding);
264-
if (enc == NULL)
265-
return NULL;
266294

267-
cofunc = getmultibytecodec();
268-
if (cofunc == NULL)
295+
PyObject *codecobj = PyCapsule_New((void *)codec,
296+
PyMultibyteCodec_CAPSULE_NAME,
297+
NULL);
298+
if (codecobj == NULL) {
299+
Py_DECREF(cofunc);
269300
return NULL;
301+
}
270302

271-
for (codec = codec_list; codec->encoding[0]; codec++)
272-
if (strcmp(codec->encoding, enc) == 0)
273-
break;
303+
PyObject *res = PyObject_CallOneArg(cofunc, codecobj);
304+
Py_DECREF(codecobj);
305+
Py_DECREF(cofunc);
306+
return res;
307+
}
274308

275-
if (codec->encoding[0] == '\0') {
276-
PyErr_SetString(PyExc_LookupError,
277-
"no such codec is supported.");
309+
static PyObject *
310+
getcodec(PyObject *self, PyObject *encoding)
311+
{
312+
if (!PyUnicode_Check(encoding)) {
313+
PyErr_SetString(PyExc_TypeError,
314+
"encoding name must be a string.");
278315
return NULL;
279316
}
280-
281-
codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL);
282-
if (codecobj == NULL)
317+
const char *enc = PyUnicode_AsUTF8(encoding);
318+
if (enc == NULL) {
283319
return NULL;
320+
}
284321

285-
r = PyObject_CallOneArg(cofunc, codecobj);
286-
Py_DECREF(codecobj);
287-
Py_DECREF(cofunc);
322+
cjkcodecs_module_state *st = get_module_state(self);
323+
for (int i = 0; i < st->num_codecs; i++) {
324+
const MultibyteCodec *codec = &st->codec_list[i];
325+
if (strcmp(codec->encoding, enc) == 0) {
326+
return _getcodec(codec);
327+
}
328+
}
288329

289-
return r;
330+
PyErr_SetString(PyExc_LookupError,
331+
"no such codec is supported.");
332+
return NULL;
290333
}
291334

335+
static int add_mappings(cjkcodecs_module_state *);
336+
static int add_codecs(cjkcodecs_module_state *);
292337

293338
static int
294339
register_maps(PyObject *module)
295340
{
296-
const struct dbcs_map *h;
341+
// Init module state.
342+
cjkcodecs_module_state *st = get_module_state(module);
343+
if (add_mappings(st) < 0) {
344+
return -1;
345+
}
346+
if (add_codecs(st) < 0) {
347+
return -1;
348+
}
297349

298-
for (h = mapping_list; h->charset[0] != '\0'; h++) {
350+
for (int i = 0; i < st->num_mappings; i++) {
351+
const struct dbcs_map *h = &st->mapping_list[i];
299352
char mhname[256] = "__map_";
300353
strcpy(mhname + sizeof("__map_") - 1, h->charset);
301354

@@ -394,6 +447,13 @@ _cjk_exec(PyObject *module)
394447
return register_maps(module);
395448
}
396449

450+
static void
451+
_cjk_free(void *mod)
452+
{
453+
cjkcodecs_module_state *st = get_module_state((PyObject *)mod);
454+
PyMem_Free(st->mapping_list);
455+
PyMem_Free(st->codec_list);
456+
}
397457

398458
static struct PyMethodDef _cjk_methods[] = {
399459
{"getcodec", (PyCFunction)getcodec, METH_O, ""},
@@ -409,9 +469,10 @@ static PyModuleDef_Slot _cjk_slots[] = {
409469
static struct PyModuleDef _cjk_module = { \
410470
PyModuleDef_HEAD_INIT, \
411471
.m_name = "_codecs_"#loc, \
412-
.m_size = 0, \
472+
.m_size = sizeof(cjkcodecs_module_state), \
413473
.m_methods = _cjk_methods, \
414474
.m_slots = _cjk_slots, \
475+
.m_free = _cjk_free, \
415476
}; \
416477
\
417478
PyMODINIT_FUNC \

0 commit comments

Comments
 (0)