1/* 2 * cjkcodecs.h: common header for cjkcodecs 3 * 4 * Written by Hye-Shik Chang <perky@FreeBSD.org> 5 */ 6 7#ifndef _CJKCODECS_H_ 8#define _CJKCODECS_H_ 9 10#define PY_SSIZE_T_CLEAN 11#include "Python.h" 12#include "multibytecodec.h" 13 14 15/* a unicode "undefined" code point */ 16#define UNIINV 0xFFFE 17 18/* internal-use DBCS code points which aren't used by any charsets */ 19#define NOCHAR 0xFFFF 20#define MULTIC 0xFFFE 21#define DBCINV 0xFFFD 22 23/* shorter macros to save source size of mapping tables */ 24#define U UNIINV 25#define N NOCHAR 26#define M MULTIC 27#define D DBCINV 28 29struct dbcs_index { 30 const ucs2_t *map; 31 unsigned char bottom, top; 32}; 33typedef struct dbcs_index decode_map; 34 35struct widedbcs_index { 36 const Py_UCS4 *map; 37 unsigned char bottom, top; 38}; 39typedef struct widedbcs_index widedecode_map; 40 41struct unim_index { 42 const DBCHAR *map; 43 unsigned char bottom, top; 44}; 45typedef struct unim_index encode_map; 46 47struct unim_index_bytebased { 48 const unsigned char *map; 49 unsigned char bottom, top; 50}; 51 52struct dbcs_map { 53 const char *charset; 54 const struct unim_index *encmap; 55 const struct dbcs_index *decmap; 56}; 57 58struct pair_encodemap { 59 Py_UCS4 uniseq; 60 DBCHAR code; 61}; 62 63static const MultibyteCodec *codec_list; 64static const struct dbcs_map *mapping_list; 65 66#define CODEC_INIT(encoding) \ 67 static int encoding##_codec_init(const void *config) 68 69#define ENCODER_INIT(encoding) \ 70 static int encoding##_encode_init( \ 71 MultibyteCodec_State *state, const void *config) 72#define ENCODER(encoding) \ 73 static Py_ssize_t encoding##_encode( \ 74 MultibyteCodec_State *state, const void *config, \ 75 int kind, const void *data, \ 76 Py_ssize_t *inpos, Py_ssize_t inlen, \ 77 unsigned char **outbuf, Py_ssize_t outleft, int flags) 78#define ENCODER_RESET(encoding) \ 79 static Py_ssize_t encoding##_encode_reset( \ 80 MultibyteCodec_State *state, const void *config, \ 81 unsigned char **outbuf, Py_ssize_t outleft) 82 83#define DECODER_INIT(encoding) \ 84 static int encoding##_decode_init( \ 85 MultibyteCodec_State *state, const void *config) 86#define DECODER(encoding) \ 87 static Py_ssize_t encoding##_decode( \ 88 MultibyteCodec_State *state, const void *config, \ 89 const unsigned char **inbuf, Py_ssize_t inleft, \ 90 _PyUnicodeWriter *writer) 91#define DECODER_RESET(encoding) \ 92 static Py_ssize_t encoding##_decode_reset( \ 93 MultibyteCodec_State *state, const void *config) 94 95#define NEXT_IN(i) \ 96 do { \ 97 (*inbuf) += (i); \ 98 (inleft) -= (i); \ 99 } while (0) 100#define NEXT_INCHAR(i) \ 101 do { \ 102 (*inpos) += (i); \ 103 } while (0) 104#define NEXT_OUT(o) \ 105 do { \ 106 (*outbuf) += (o); \ 107 (outleft) -= (o); \ 108 } while (0) 109#define NEXT(i, o) \ 110 do { \ 111 NEXT_INCHAR(i); \ 112 NEXT_OUT(o); \ 113 } while (0) 114 115#define REQUIRE_INBUF(n) \ 116 do { \ 117 if (inleft < (n)) \ 118 return MBERR_TOOFEW; \ 119 } while (0) 120 121#define REQUIRE_OUTBUF(n) \ 122 do { \ 123 if (outleft < (n)) \ 124 return MBERR_TOOSMALL; \ 125 } while (0) 126 127#define INBYTE1 ((*inbuf)[0]) 128#define INBYTE2 ((*inbuf)[1]) 129#define INBYTE3 ((*inbuf)[2]) 130#define INBYTE4 ((*inbuf)[3]) 131 132#define INCHAR1 (PyUnicode_READ(kind, data, *inpos)) 133#define INCHAR2 (PyUnicode_READ(kind, data, *inpos + 1)) 134 135#define OUTCHAR(c) \ 136 do { \ 137 if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0) \ 138 return MBERR_EXCEPTION; \ 139 } while (0) 140 141#define OUTCHAR2(c1, c2) \ 142 do { \ 143 Py_UCS4 _c1 = (c1); \ 144 Py_UCS4 _c2 = (c2); \ 145 if (_PyUnicodeWriter_Prepare(writer, 2, Py_MAX(_c1, c2)) < 0) \ 146 return MBERR_EXCEPTION; \ 147 PyUnicode_WRITE(writer->kind, writer->data, writer->pos, _c1); \ 148 PyUnicode_WRITE(writer->kind, writer->data, writer->pos + 1, _c2); \ 149 writer->pos += 2; \ 150 } while (0) 151 152#define OUTBYTEI(c, i) \ 153 do { \ 154 assert((unsigned char)(c) == (c)); \ 155 ((*outbuf)[i]) = (c); \ 156 } while (0) 157 158#define OUTBYTE1(c) OUTBYTEI(c, 0) 159#define OUTBYTE2(c) OUTBYTEI(c, 1) 160#define OUTBYTE3(c) OUTBYTEI(c, 2) 161#define OUTBYTE4(c) OUTBYTEI(c, 3) 162 163#define WRITEBYTE1(c1) \ 164 do { \ 165 REQUIRE_OUTBUF(1); \ 166 OUTBYTE1(c1); \ 167 } while (0) 168#define WRITEBYTE2(c1, c2) \ 169 do { \ 170 REQUIRE_OUTBUF(2); \ 171 OUTBYTE1(c1); \ 172 OUTBYTE2(c2); \ 173 } while (0) 174#define WRITEBYTE3(c1, c2, c3) \ 175 do { \ 176 REQUIRE_OUTBUF(3); \ 177 OUTBYTE1(c1); \ 178 OUTBYTE2(c2); \ 179 OUTBYTE3(c3); \ 180 } while (0) 181#define WRITEBYTE4(c1, c2, c3, c4) \ 182 do { \ 183 REQUIRE_OUTBUF(4); \ 184 OUTBYTE1(c1); \ 185 OUTBYTE2(c2); \ 186 OUTBYTE3(c3); \ 187 OUTBYTE4(c4); \ 188 } while (0) 189 190#define _TRYMAP_ENC(m, assi, val) \ 191 ((m)->map != NULL && (val) >= (m)->bottom && \ 192 (val)<= (m)->top && ((assi) = (m)->map[(val) - \ 193 (m)->bottom]) != NOCHAR) 194#define TRYMAP_ENC(charset, assi, uni) \ 195 _TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff) 196 197#define _TRYMAP_DEC(m, assi, val) \ 198 ((m)->map != NULL && \ 199 (val) >= (m)->bottom && \ 200 (val)<= (m)->top && \ 201 ((assi) = (m)->map[(val) - (m)->bottom]) != UNIINV) 202#define TRYMAP_DEC(charset, assi, c1, c2) \ 203 _TRYMAP_DEC(&charset##_decmap[c1], assi, c2) 204 205#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = { 206#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL}, 207#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap}, 208#define MAPPING_ENCDEC(enc) {#enc, (void*)enc##_encmap, (void*)enc##_decmap}, 209#define END_MAPPINGS_LIST \ 210 {"", NULL, NULL} }; \ 211 static const struct dbcs_map *mapping_list = \ 212 (const struct dbcs_map *)_mapping_list; 213 214#define BEGIN_CODECS_LIST static const MultibyteCodec _codec_list[] = { 215#define _STATEFUL_METHODS(enc) \ 216 enc##_encode, \ 217 enc##_encode_init, \ 218 enc##_encode_reset, \ 219 enc##_decode, \ 220 enc##_decode_init, \ 221 enc##_decode_reset, 222#define _STATELESS_METHODS(enc) \ 223 enc##_encode, NULL, NULL, \ 224 enc##_decode, NULL, NULL, 225#define CODEC_STATEFUL(enc) { \ 226 #enc, NULL, NULL, \ 227 _STATEFUL_METHODS(enc) \ 228}, 229#define CODEC_STATELESS(enc) { \ 230 #enc, NULL, NULL, \ 231 _STATELESS_METHODS(enc) \ 232}, 233#define CODEC_STATELESS_WINIT(enc) { \ 234 #enc, NULL, \ 235 enc##_codec_init, \ 236 _STATELESS_METHODS(enc) \ 237}, 238#define END_CODECS_LIST \ 239 {"", NULL,} }; \ 240 static const MultibyteCodec *codec_list = \ 241 (const MultibyteCodec *)_codec_list; 242 243 244 245static PyObject * 246getmultibytecodec(void) 247{ 248 PyObject *mod = PyImport_ImportModuleNoBlock("_multibytecodec"); 249 if (mod == NULL) { 250 return NULL; 251 } 252 253 PyObject *cofunc = PyObject_GetAttrString(mod, "__create_codec"); 254 Py_DECREF(mod); 255 return cofunc; 256} 257 258static PyObject * 259getcodec(PyObject *self, PyObject *encoding) 260{ 261 PyObject *codecobj, *r, *cofunc; 262 const MultibyteCodec *codec; 263 const char *enc; 264 265 if (!PyUnicode_Check(encoding)) { 266 PyErr_SetString(PyExc_TypeError, 267 "encoding name must be a string."); 268 return NULL; 269 } 270 enc = PyUnicode_AsUTF8(encoding); 271 if (enc == NULL) 272 return NULL; 273 274 cofunc = getmultibytecodec(); 275 if (cofunc == NULL) 276 return NULL; 277 278 for (codec = codec_list; codec->encoding[0]; codec++) 279 if (strcmp(codec->encoding, enc) == 0) 280 break; 281 282 if (codec->encoding[0] == '\0') { 283 PyErr_SetString(PyExc_LookupError, 284 "no such codec is supported."); 285 return NULL; 286 } 287 288 codecobj = PyCapsule_New((void *)codec, PyMultibyteCodec_CAPSULE_NAME, NULL); 289 if (codecobj == NULL) 290 return NULL; 291 292 r = PyObject_CallOneArg(cofunc, codecobj); 293 Py_DECREF(codecobj); 294 Py_DECREF(cofunc); 295 296 return r; 297} 298 299 300static int 301register_maps(PyObject *module) 302{ 303 const struct dbcs_map *h; 304 305 for (h = mapping_list; h->charset[0] != '\0'; h++) { 306 char mhname[256] = "__map_"; 307 strcpy(mhname + sizeof("__map_") - 1, h->charset); 308 309 PyObject *capsule = PyCapsule_New((void *)h, 310 PyMultibyteCodec_CAPSULE_NAME, NULL); 311 if (capsule == NULL) { 312 return -1; 313 } 314 if (PyModule_AddObject(module, mhname, capsule) < 0) { 315 Py_DECREF(capsule); 316 return -1; 317 } 318 } 319 return 0; 320} 321 322#ifdef USING_BINARY_PAIR_SEARCH 323static DBCHAR 324find_pairencmap(ucs2_t body, ucs2_t modifier, 325 const struct pair_encodemap *haystack, int haystacksize) 326{ 327 int pos, min, max; 328 Py_UCS4 value = body << 16 | modifier; 329 330 min = 0; 331 max = haystacksize; 332 333 for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1) { 334 if (value < haystack[pos].uniseq) { 335 if (max != pos) { 336 max = pos; 337 continue; 338 } 339 } 340 else if (value > haystack[pos].uniseq) { 341 if (min != pos) { 342 min = pos; 343 continue; 344 } 345 } 346 break; 347 } 348 349 if (value == haystack[pos].uniseq) { 350 return haystack[pos].code; 351 } 352 return DBCINV; 353} 354#endif 355 356#ifdef USING_IMPORTED_MAPS 357#define IMPORT_MAP(locale, charset, encmap, decmap) \ 358 importmap("_codecs_" #locale, "__map_" #charset, \ 359 (const void**)encmap, (const void**)decmap) 360 361static int 362importmap(const char *modname, const char *symbol, 363 const void **encmap, const void **decmap) 364{ 365 PyObject *o, *mod; 366 367 mod = PyImport_ImportModule(modname); 368 if (mod == NULL) 369 return -1; 370 371 o = PyObject_GetAttrString(mod, symbol); 372 if (o == NULL) 373 goto errorexit; 374 else if (!PyCapsule_IsValid(o, PyMultibyteCodec_CAPSULE_NAME)) { 375 PyErr_SetString(PyExc_ValueError, 376 "map data must be a Capsule."); 377 goto errorexit; 378 } 379 else { 380 struct dbcs_map *map; 381 map = PyCapsule_GetPointer(o, PyMultibyteCodec_CAPSULE_NAME); 382 if (encmap != NULL) 383 *encmap = map->encmap; 384 if (decmap != NULL) 385 *decmap = map->decmap; 386 Py_DECREF(o); 387 } 388 389 Py_DECREF(mod); 390 return 0; 391 392errorexit: 393 Py_DECREF(mod); 394 return -1; 395} 396#endif 397 398static int 399_cjk_exec(PyObject *module) 400{ 401 return register_maps(module); 402} 403 404 405static struct PyMethodDef _cjk_methods[] = { 406 {"getcodec", (PyCFunction)getcodec, METH_O, ""}, 407 {NULL, NULL}, 408}; 409 410static PyModuleDef_Slot _cjk_slots[] = { 411 {Py_mod_exec, _cjk_exec}, 412 {0, NULL} 413}; 414 415#define I_AM_A_MODULE_FOR(loc) \ 416 static struct PyModuleDef _cjk_module = { \ 417 PyModuleDef_HEAD_INIT, \ 418 .m_name = "_codecs_"#loc, \ 419 .m_size = 0, \ 420 .m_methods = _cjk_methods, \ 421 .m_slots = _cjk_slots, \ 422 }; \ 423 \ 424 PyMODINIT_FUNC \ 425 PyInit__codecs_##loc(void) \ 426 { \ 427 return PyModuleDef_Init(&_cjk_module); \ 428 } 429 430#endif 431