mirror of
https://github.com/python/cpython.git
synced 2025-08-03 16:39:00 +00:00
Add CJK codecs support as discussed on python-dev. (SF #873597)
Several style fixes are suggested by Martin v. Loewis and Marc-Andre Lemburg. Thanks!
This commit is contained in:
parent
cd1f7430cb
commit
3e2a306920
88 changed files with 43278 additions and 34 deletions
87
Modules/cjkcodecs/README
Normal file
87
Modules/cjkcodecs/README
Normal file
|
@ -0,0 +1,87 @@
|
|||
Notes on cjkcodecs
|
||||
-------------------
|
||||
This directory contains source files for cjkcodecs extension modules.
|
||||
They are based on CJKCodecs (http://cjkpython.i18n.org/#CJKCodecs)
|
||||
as of Jan 17 2004 currently.
|
||||
|
||||
|
||||
|
||||
To generate or modify mapping headers
|
||||
-------------------------------------
|
||||
Mapping headers are imported from CJKCodecs as pre-generated form.
|
||||
If you need to tweak or add something on it, please look at tools/
|
||||
subdirectory of CJKCodecs' distribution.
|
||||
|
||||
|
||||
|
||||
Notes on implmentation characteristics of each codecs
|
||||
-----------------------------------------------------
|
||||
|
||||
1) Big5 codec
|
||||
|
||||
The big5 codec maps the following characters as cp950 does rather
|
||||
than conforming Unicode.org's that maps to 0xFFFD.
|
||||
|
||||
BIG5 Unicode Description
|
||||
|
||||
0xA15A 0x2574 SPACING UNDERSCORE
|
||||
0xA1C3 0xFFE3 SPACING HEAVY OVERSCORE
|
||||
0xA1C5 0x02CD SPACING HEAVY UNDERSCORE
|
||||
0xA1FE 0xFF0F LT DIAG UP RIGHT TO LOW LEFT
|
||||
0xA240 0xFF3C LT DIAG UP LEFT TO LOW RIGHT
|
||||
0xA2CC 0x5341 HANGZHOU NUMERAL TEN
|
||||
0xA2CE 0x5345 HANGZHOU NUMERAL THIRTY
|
||||
|
||||
Because unicode 0x5341, 0x5345, 0xFF0F, 0xFF3C is mapped to another
|
||||
big5 codes already, a roundtrip compatibility is not guaranteed for
|
||||
them.
|
||||
|
||||
|
||||
2) cp932 codec
|
||||
|
||||
To conform to Windows's real mapping, cp932 codec maps the following
|
||||
codepoints in addition of the official cp932 mapping.
|
||||
|
||||
CP932 Unicode Description
|
||||
|
||||
0x80 0x80 UNDEFINED
|
||||
0xA0 0xF8F0 UNDEFINED
|
||||
0xFD 0xF8F1 UNDEFINED
|
||||
0xFE 0xF8F2 UNDEFINED
|
||||
0xFF 0xF8F3 UNDEFINED
|
||||
|
||||
|
||||
3) euc-jisx0213 codec
|
||||
|
||||
The euc-jisx0213 codec maps JIS X 0213 Plane 1 code 0x2140 into
|
||||
unicode U+FF3C instead of U+005C as on unicode.org's mapping.
|
||||
Because euc-jisx0213 has REVERSE SOLIDUS on 0x5c already and A140
|
||||
is shown as a full width character, mapping to U+FF3C can make
|
||||
more sense.
|
||||
|
||||
The euc-jisx0213 codec is enabled to decode JIS X 0212 codes on
|
||||
codeset 2. Because JIS X 0212 and JIS X 0213 Plane 2 don't have
|
||||
overlapped by each other, it doesn't bother standard conformations
|
||||
(and JIS X 0213 Plane 2 is intended to use so.) On encoding
|
||||
sessions, the codec will try to encode kanji characters in this
|
||||
order:
|
||||
|
||||
JIS X 0213 Plane 1 -> JIS X 0213 Plane 2 -> JIS X 0212
|
||||
|
||||
|
||||
4) euc-jp codec
|
||||
|
||||
The euc-jp codec is a compatibility instance on these points:
|
||||
- U+FF3C FULLWIDTH REVERSE SOLIDUS is mapped to EUC-JP A1C0 (vice versa)
|
||||
- U+00A5 YEN SIGN is mapped to EUC-JP 0x5c. (one way)
|
||||
- U+203E OVERLINE is mapped to EUC-JP 0x7e. (one way)
|
||||
|
||||
|
||||
5) shift-jis codec
|
||||
|
||||
The shift-jis codec is mapping 0x20-0x7e area to U+20-U+7E directly
|
||||
instead of using JIS X 0201 for compatibility. The differences are:
|
||||
- U+005C REVERSE SOLIDUS is mapped to SHIFT-JIS 0x5c.
|
||||
- U+007E TILDE is mapped to SHIFT-JIS 0x7e.
|
||||
- U+FF3C FULL-WIDTH REVERSE SOLIDUS is mapped to SHIFT-JIS 815f.
|
||||
|
67
Modules/cjkcodecs/_big5.c
Normal file
67
Modules/cjkcodecs/_big5.c
Normal file
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* _big5.c: the Big5 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _big5.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(big5)
|
||||
DECMAP(big5)
|
||||
|
||||
ENCODER(big5)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = **inbuf;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
RESERVE_OUTBUF(1)
|
||||
**outbuf = c;
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
|
||||
TRYMAP_ENC(big5, code, c);
|
||||
else return 1;
|
||||
|
||||
(*outbuf)[0] = code >> 8;
|
||||
(*outbuf)[1] = code & 0xFF;
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(big5)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
TRYMAP_DEC(big5, **outbuf, c, IN2) {
|
||||
NEXT(2, 1)
|
||||
} else return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(big5)
|
||||
MAPOPEN(zh_TW)
|
||||
IMPORTMAP_ENCDEC(big5)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(big5)
|
134
Modules/cjkcodecs/_cp932.c
Normal file
134
Modules/cjkcodecs/_cp932.c
Normal file
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* _cp932.c: the CP932 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _cp932.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
ENCMAP(cp932ext)
|
||||
DECMAP(jisx0208)
|
||||
DECMAP(cp932ext)
|
||||
|
||||
ENCODER(cp932)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
unsigned char c1, c2;
|
||||
|
||||
if (c <= 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
} else if (c >= 0xff61 && c <= 0xff9f) {
|
||||
WRITE1(c - 0xfec0)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
} else if (c >= 0xf8f0 && c <= 0xf8f3) {
|
||||
/* Windows compatability */
|
||||
RESERVE_OUTBUF(1)
|
||||
if (c == 0xf8f0)
|
||||
OUT1(0xa0)
|
||||
else
|
||||
OUT1(c - 0xfef1 + 0xfd)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
UCS4INVALID(c)
|
||||
RESERVE_OUTBUF(2)
|
||||
|
||||
TRYMAP_ENC(cp932ext, code, c) {
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xff)
|
||||
} else TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000) /* MSB set: JIS X 0212 */
|
||||
return 1;
|
||||
|
||||
/* JIS X 0208 */
|
||||
c1 = code >> 8;
|
||||
c2 = code & 0xff;
|
||||
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
|
||||
c1 = (c1 - 0x21) >> 1;
|
||||
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
|
||||
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
|
||||
} else if (c >= 0xe000 && c < 0xe758) {
|
||||
/* User-defined area */
|
||||
c1 = (Py_UNICODE)(c - 0xe000) / 188;
|
||||
c2 = (Py_UNICODE)(c - 0xe000) % 188;
|
||||
OUT1(c1 + 0xf0)
|
||||
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
|
||||
} else
|
||||
return 1;
|
||||
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(cp932)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1, c2;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
if (c <= 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
} else if (c >= 0xa0 && c <= 0xdf) {
|
||||
if (c == 0xa0)
|
||||
OUT1(0xf8f0) /* half-width katakana */
|
||||
else
|
||||
OUT1(0xfec0 + c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
} else if (c >= 0xfd/* && c <= 0xff*/) {
|
||||
/* Windows compatibility */
|
||||
OUT1(0xf8f1 - 0xfd + c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
c2 = IN2;
|
||||
|
||||
TRYMAP_DEC(cp932ext, **outbuf, c, c2);
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)) {
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 2;
|
||||
|
||||
c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else if (c >= 0xf0 && c <= 0xf9) {
|
||||
if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfc))
|
||||
OUT1(0xe000 + 188 * (c - 0xf0) +
|
||||
(c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
|
||||
else
|
||||
return 2;
|
||||
} else
|
||||
return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(cp932)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_ENCDEC(cp932ext)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(cp932)
|
73
Modules/cjkcodecs/_cp949.c
Normal file
73
Modules/cjkcodecs/_cp949.c
Normal file
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* _cp949.c: the CP949 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _cp949.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(cp949)
|
||||
DECMAP(ksx1001)
|
||||
DECMAP(cp949ext)
|
||||
|
||||
ENCODER(cp949)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
TRYMAP_ENC(cp949, code, c);
|
||||
else return 1;
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
if (code & 0x8000)
|
||||
OUT2(code & 0xFF) /* MSB set: CP949 */
|
||||
else
|
||||
OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(cp949)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
|
||||
else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(cp949)
|
||||
MAPOPEN(ko_KR)
|
||||
IMPORTMAP_DEC(ksx1001)
|
||||
IMPORTMAP_DEC(cp949ext)
|
||||
IMPORTMAP_ENC(cp949)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(cp949)
|
72
Modules/cjkcodecs/_cp950.c
Normal file
72
Modules/cjkcodecs/_cp950.c
Normal file
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* _cp950.c: the CP950 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _cp950.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(big5)
|
||||
ENCMAP(cp950ext)
|
||||
DECMAP(big5)
|
||||
DECMAP(cp950ext)
|
||||
|
||||
ENCODER(cp950)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
TRYMAP_ENC(cp950ext, code, c);
|
||||
else TRYMAP_ENC(big5, code, c);
|
||||
else return 1;
|
||||
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xFF)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(cp950)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
|
||||
TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
|
||||
else TRYMAP_DEC(big5, **outbuf, c, IN2);
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(cp950)
|
||||
MAPOPEN(zh_TW)
|
||||
IMPORTMAP_ENCDEC(big5)
|
||||
IMPORTMAP_ENCDEC(cp950ext)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(cp950)
|
180
Modules/cjkcodecs/_euc_jisx0213.c
Normal file
180
Modules/cjkcodecs/_euc_jisx0213.c
Normal file
|
@ -0,0 +1,180 @@
|
|||
/*
|
||||
* _euc_jisx0213.c: the EUC-JISX0213 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _euc_jisx0213.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define USING_BINARY_PAIR_SEARCH
|
||||
#include "codeccommon.h"
|
||||
#include "map_jisx0213_pairs.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
DECMAP(jisx0212)
|
||||
ENCMAP(jisx0213_bmp)
|
||||
DECMAP(jisx0213_1_bmp)
|
||||
DECMAP(jisx0213_2_bmp)
|
||||
ENCMAP(jisx0213_emp)
|
||||
DECMAP(jisx0213_1_emp)
|
||||
DECMAP(jisx0213_2_emp)
|
||||
|
||||
#define EMPBASE 0x20000
|
||||
|
||||
ENCODER(euc_jisx0213)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = IN1;
|
||||
DBCHAR code;
|
||||
int insize;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
DECODE_SURROGATE(c)
|
||||
insize = GET_INSIZE(c);
|
||||
|
||||
if (c <= 0xFFFF) {
|
||||
/* try 0213 first because it might have MULTIC */
|
||||
TRYMAP_ENC(jisx0213_bmp, code, c) {
|
||||
if (code == MULTIC) {
|
||||
if (inleft < 2) {
|
||||
if (flags & MBENC_FLUSH) {
|
||||
code = find_pairencmap(c, 0, jisx0213_pairencmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
return MBERR_TOOFEW;
|
||||
} else {
|
||||
code = find_pairencmap(c, (*inbuf)[1],
|
||||
jisx0213_pairencmap, JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV) {
|
||||
code = find_pairencmap(c, 0, jisx0213_pairencmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
insize = 2;
|
||||
}
|
||||
}
|
||||
} else TRYMAP_ENC(jisxcommon, code, c);
|
||||
else if (c >= 0xff61 && c <= 0xff9f) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
WRITE2(0x8e, c - 0xfec0)
|
||||
NEXT(1, 2)
|
||||
continue;
|
||||
} else if (c == 0xff3c)
|
||||
/* F/W REVERSE SOLIDUS (see NOTES.euc-jisx0213) */
|
||||
code = 0x2140;
|
||||
else if (c == 0xff5e)
|
||||
/* F/W TILDE (see NOTES.euc-jisx0213) */
|
||||
code = 0x2232;
|
||||
else
|
||||
return 1;
|
||||
} else if (c >> 16 == EMPBASE >> 16) {
|
||||
TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
|
||||
else return insize;
|
||||
} else
|
||||
return insize;
|
||||
|
||||
if (code & 0x8000) {
|
||||
/* Codeset 2 */
|
||||
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
|
||||
NEXT(insize, 3)
|
||||
} else {
|
||||
/* Codeset 1 */
|
||||
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
|
||||
NEXT(insize, 2)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(euc_jisx0213)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
ucs4_t code;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == 0x8e) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
unsigned char c2;
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 >= 0xa1 && c2 <= 0xdf) {
|
||||
OUT1(0xfec0 + c2)
|
||||
NEXT(2, 1)
|
||||
} else
|
||||
return 2;
|
||||
} else if (c == 0x8f) {
|
||||
unsigned char c2, c3;
|
||||
|
||||
RESERVE_INBUF(3)
|
||||
c2 = IN2 ^ 0x80;
|
||||
c3 = IN3 ^ 0x80;
|
||||
|
||||
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES.euc-jisx0213) */
|
||||
TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
|
||||
else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
|
||||
PUTUCS4(EMPBASE | code)
|
||||
NEXT_IN(3)
|
||||
continue;
|
||||
} else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
|
||||
else return 3;
|
||||
NEXT(3, 1)
|
||||
} else {
|
||||
unsigned char c2;
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
c ^= 0x80;
|
||||
c2 = IN2 ^ 0x80;
|
||||
|
||||
/* JIS X 0213 Plane 1 */
|
||||
if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
|
||||
else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
|
||||
PUTUCS4(EMPBASE | code)
|
||||
NEXT_IN(2)
|
||||
continue;
|
||||
} else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
|
||||
WRITE2(code >> 16, code & 0xffff)
|
||||
NEXT(2, 2)
|
||||
continue;
|
||||
} else return 2;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(euc_jisx0213)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_DEC(jisx0212)
|
||||
IMPORTMAP_ENC(jisx0213_bmp)
|
||||
IMPORTMAP_DEC(jisx0213_1_bmp)
|
||||
IMPORTMAP_DEC(jisx0213_2_bmp)
|
||||
IMPORTMAP_ENC(jisx0213_emp)
|
||||
IMPORTMAP_DEC(jisx0213_1_emp)
|
||||
IMPORTMAP_DEC(jisx0213_2_emp)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(euc_jisx0213)
|
127
Modules/cjkcodecs/_euc_jp.c
Normal file
127
Modules/cjkcodecs/_euc_jp.c
Normal file
|
@ -0,0 +1,127 @@
|
|||
/*
|
||||
* _euc_jp.c: the EUC-JP codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _euc_jp.c,v 1.5 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
DECMAP(jisx0212)
|
||||
|
||||
ENCODER(euc_jp)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
UCS4INVALID(c)
|
||||
|
||||
TRYMAP_ENC(jisxcommon, code, c);
|
||||
else if (c >= 0xff61 && c <= 0xff9f) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
WRITE2(0x8e, c - 0xfec0)
|
||||
NEXT(1, 2)
|
||||
continue;
|
||||
}
|
||||
#ifndef STRICT_BUILD
|
||||
else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
code = 0x2140;
|
||||
else if (c == 0xa5) { /* YEN SIGN */
|
||||
WRITE1(0x5c);
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
} else if (c == 0x203e) { /* OVERLINE */
|
||||
WRITE1(0x7e);
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
else
|
||||
return 1;
|
||||
|
||||
if (code & 0x8000) {
|
||||
/* JIS X 0212 */
|
||||
WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
|
||||
NEXT(1, 3)
|
||||
} else {
|
||||
/* JIS X 0208 */
|
||||
WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(euc_jp)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == 0x8e) {
|
||||
/* JIS X 0201 half-width katakana */
|
||||
unsigned char c2;
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 >= 0xa1 && c2 <= 0xdf) {
|
||||
OUT1(0xfec0 + c2)
|
||||
NEXT(2, 1)
|
||||
} else
|
||||
return 2;
|
||||
} else if (c == 0x8f) {
|
||||
unsigned char c2, c3;
|
||||
|
||||
RESERVE_INBUF(3)
|
||||
c2 = IN2;
|
||||
c3 = IN3;
|
||||
/* JIS X 0212 */
|
||||
TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
|
||||
NEXT(3, 1)
|
||||
} else
|
||||
return 3;
|
||||
} else {
|
||||
unsigned char c2;
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
c2 = IN2;
|
||||
/* JIS X 0208 */
|
||||
#ifndef STRICT_BUILD
|
||||
if (c == 0xa1 && c2 == 0xc0) /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
**outbuf = 0xff3c;
|
||||
else
|
||||
#endif
|
||||
TRYMAP_DEC(jisx0208, **outbuf, c ^ 0x80, c2 ^ 0x80) ;
|
||||
else return 2;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(euc_jp)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_DEC(jisx0212)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(euc_jp)
|
71
Modules/cjkcodecs/_euc_kr.c
Normal file
71
Modules/cjkcodecs/_euc_kr.c
Normal file
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* _euc_kr.c: the EUC-KR codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _euc_kr.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(cp949)
|
||||
DECMAP(ksx1001)
|
||||
|
||||
ENCODER(euc_kr)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
TRYMAP_ENC(cp949, code, c);
|
||||
else return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: CP949 */
|
||||
return 1;
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
OUT2((code & 0xFF) | 0x80)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(euc_kr)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
|
||||
TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
|
||||
NEXT(2, 1)
|
||||
} else return 2;
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(euc_kr)
|
||||
MAPOPEN(ko_KR)
|
||||
IMPORTMAP_DEC(ksx1001)
|
||||
IMPORTMAP_ENC(cp949)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(euc_kr)
|
181
Modules/cjkcodecs/_gb18030.c
Normal file
181
Modules/cjkcodecs/_gb18030.c
Normal file
|
@ -0,0 +1,181 @@
|
|||
/*
|
||||
* _gb18030.c: the GB18030 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _gb18030.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "tweak_gbk.h"
|
||||
#include "map_gb18030uni.h"
|
||||
|
||||
ENCMAP(gbcommon)
|
||||
ENCMAP(gb18030ext)
|
||||
DECMAP(gb2312)
|
||||
DECMAP(gbkext)
|
||||
DECMAP(gb18030ext)
|
||||
|
||||
ENCODER(gb18030)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
DECODE_SURROGATE(c)
|
||||
if (c > 0x10FFFF)
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
return 2; /* surrogates pair */
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
else if (c >= 0x10000) {
|
||||
ucs4_t tc = c - 0x10000;
|
||||
|
||||
RESERVE_OUTBUF(4)
|
||||
|
||||
OUT4((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT3((unsigned char)(tc % 126) + 0x81)
|
||||
tc /= 126;
|
||||
OUT2((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT1((unsigned char)(tc + 0x90))
|
||||
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
NEXT(2, 4) /* surrogates pair */
|
||||
#else
|
||||
NEXT(1, 4)
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
|
||||
GBK_PREENCODE(c, code)
|
||||
else TRYMAP_ENC(gbcommon, code, c);
|
||||
else TRYMAP_ENC(gb18030ext, code, c);
|
||||
else {
|
||||
const struct _gb18030_to_unibmp_ranges *utrrange;
|
||||
|
||||
RESERVE_OUTBUF(4)
|
||||
|
||||
for (utrrange = gb18030_to_unibmp_ranges;
|
||||
utrrange->first != 0;
|
||||
utrrange++)
|
||||
if (utrrange->first <= c && c <= utrrange->last) {
|
||||
Py_UNICODE tc;
|
||||
|
||||
tc = c - utrrange->first + utrrange->base;
|
||||
|
||||
OUT4((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT3((unsigned char)(tc % 126) + 0x81)
|
||||
tc /= 126;
|
||||
OUT2((unsigned char)(tc % 10) + 0x30)
|
||||
tc /= 10;
|
||||
OUT1((unsigned char)tc + 0x81)
|
||||
|
||||
NEXT(1, 4)
|
||||
break;
|
||||
}
|
||||
|
||||
if (utrrange->first == 0) {
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"unicode mapping invalid");
|
||||
return 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
if (code & 0x8000)
|
||||
OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
|
||||
else
|
||||
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
|
||||
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(gb18030)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1, c2;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
|
||||
c2 = IN2;
|
||||
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
|
||||
const struct _gb18030_to_unibmp_ranges *utr;
|
||||
unsigned char c3, c4;
|
||||
ucs4_t lseq;
|
||||
|
||||
RESERVE_INBUF(4)
|
||||
c3 = IN3;
|
||||
c4 = IN4;
|
||||
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
|
||||
return 4;
|
||||
c -= 0x81; c2 -= 0x30;
|
||||
c3 -= 0x81; c4 -= 0x30;
|
||||
|
||||
if (c < 4) { /* U+0080 - U+FFFF */
|
||||
lseq = ((ucs4_t)c * 10 + c2) * 1260 +
|
||||
(ucs4_t)c3 * 10 + c4;
|
||||
if (lseq < 39420) {
|
||||
for (utr = gb18030_to_unibmp_ranges;
|
||||
lseq >= (utr + 1)->base;
|
||||
utr++) ;
|
||||
OUT1(utr->first - utr->base + lseq)
|
||||
NEXT(4, 1)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (c >= 15) { /* U+10000 - U+10FFFF */
|
||||
lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2) * 1260 +
|
||||
(ucs4_t)c3 * 10 + c4;
|
||||
if (lseq <= 0x10FFFF) {
|
||||
PUTUCS4(lseq);
|
||||
NEXT_IN(4)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
|
||||
GBK_PREDECODE(c, c2, **outbuf)
|
||||
else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, c2 ^ 0x80);
|
||||
else TRYMAP_DEC(gbkext, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(gb18030)
|
||||
MAPOPEN(zh_CN)
|
||||
IMPORTMAP_DEC(gb2312)
|
||||
IMPORTMAP_DEC(gbkext)
|
||||
IMPORTMAP_ENC(gbcommon)
|
||||
IMPORTMAP_ENCDEC(gb18030ext)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(gb18030)
|
69
Modules/cjkcodecs/_gb2312.c
Normal file
69
Modules/cjkcodecs/_gb2312.c
Normal file
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* _gb2312.c: the GB2312 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _gb2312.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(gbcommon)
|
||||
DECMAP(gb2312)
|
||||
|
||||
ENCODER(gb2312)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
TRYMAP_ENC(gbcommon, code, c);
|
||||
else return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: GBK */
|
||||
return 1;
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
OUT2((code & 0xFF) | 0x80)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(gb2312)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = **inbuf;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
|
||||
NEXT(2, 1)
|
||||
} else return 2;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(gb2312)
|
||||
MAPOPEN(zh_CN)
|
||||
IMPORTMAP_DEC(gb2312)
|
||||
IMPORTMAP_ENC(gbcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(gb2312)
|
78
Modules/cjkcodecs/_gbk.c
Normal file
78
Modules/cjkcodecs/_gbk.c
Normal file
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* _gbk.c: the GBK codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _gbk.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "tweak_gbk.h"
|
||||
|
||||
ENCMAP(gbcommon)
|
||||
DECMAP(gb2312)
|
||||
DECMAP(gbkext)
|
||||
|
||||
ENCODER(gbk)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
|
||||
GBK_PREENCODE(c, code)
|
||||
else TRYMAP_ENC(gbcommon, code, c);
|
||||
else return 1;
|
||||
|
||||
OUT1((code >> 8) | 0x80)
|
||||
if (code & 0x8000)
|
||||
OUT2((code & 0xFF)) /* MSB set: GBK */
|
||||
else
|
||||
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(gbk)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
|
||||
GBK_PREDECODE(c, IN2, **outbuf)
|
||||
else TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80);
|
||||
else TRYMAP_DEC(gbkext, **outbuf, c, IN2);
|
||||
else return 2;
|
||||
|
||||
NEXT(2, 1)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(gbk)
|
||||
MAPOPEN(zh_CN)
|
||||
IMPORTMAP_DEC(gb2312)
|
||||
IMPORTMAP_DEC(gbkext)
|
||||
IMPORTMAP_ENC(gbcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(gbk)
|
134
Modules/cjkcodecs/_hz.c
Normal file
134
Modules/cjkcodecs/_hz.c
Normal file
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* _hz.c: the HZ codec (RFC1843)
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _hz.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(gbcommon)
|
||||
DECMAP(gb2312)
|
||||
|
||||
#define HAVE_ENCODER_INIT
|
||||
ENCODER_INIT(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_ENCODER_RESET
|
||||
ENCODER_RESET(hz)
|
||||
{
|
||||
if (state->i != 0) {
|
||||
WRITE2('~', '}')
|
||||
state->i = 0;
|
||||
NEXT_OUT(2)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER(hz)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
if (state->i == 0) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
} else {
|
||||
WRITE3('~', '}', c)
|
||||
NEXT(1, 3)
|
||||
state->i = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
UCS4INVALID(c)
|
||||
|
||||
TRYMAP_ENC(gbcommon, code, c);
|
||||
else return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: GBK */
|
||||
return 1;
|
||||
|
||||
if (state->i == 0) {
|
||||
WRITE4('~', '{', code >> 8, code & 0xff)
|
||||
NEXT(1, 4)
|
||||
state->i = 1;
|
||||
} else {
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_INIT
|
||||
DECODER_INIT(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_RESET
|
||||
DECODER_RESET(hz)
|
||||
{
|
||||
state->i = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(hz)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
if (c == '~') {
|
||||
unsigned char c2 = IN2;
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
if (c2 == '~') {
|
||||
WRITE1('~')
|
||||
NEXT(2, 1)
|
||||
continue;
|
||||
} else if (c2 == '{' && state->i == 0)
|
||||
state->i = 1; /* set GB */
|
||||
else if (c2 == '}' && state->i == 1)
|
||||
state->i = 0; /* set ASCII */
|
||||
else if (c2 == '\n')
|
||||
; /* line-continuation */
|
||||
else
|
||||
return 2;
|
||||
NEXT(2, 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c & 0x80)
|
||||
return 1;
|
||||
|
||||
if (state->i == 0) { /* ASCII mode */
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
} else { /* GB mode */
|
||||
RESERVE_INBUF(2)
|
||||
RESERVE_OUTBUF(1)
|
||||
TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
|
||||
NEXT(2, 1)
|
||||
} else
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(hz)
|
||||
MAPOPEN(zh_CN)
|
||||
IMPORTMAP_DEC(gb2312)
|
||||
IMPORTMAP_ENC(gbcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(hz)
|
173
Modules/cjkcodecs/_iso2022_jp.c
Normal file
173
Modules/cjkcodecs/_iso2022_jp.c
Normal file
|
@ -0,0 +1,173 @@
|
|||
/*
|
||||
* _iso2022_jp.c: the ISO-2022-JP codec (RFC1468)
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _iso2022_jp.c,v 1.7 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define ISO2022_DESIGNATIONS \
|
||||
CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, CHARSET_JISX0208_O
|
||||
#define ISO2022_NO_SHIFT
|
||||
#define ISO2022_USE_JISX0208EXT
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "iso2022common.h"
|
||||
#include "alg_jisx0201.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
|
||||
#define HAVE_ENCODER_INIT
|
||||
ENCODER_INIT(iso2022_jp)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_ENCODER_RESET
|
||||
ENCODER_RESET(iso2022_jp)
|
||||
{
|
||||
if (STATE_GETG0(state) != CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(3)
|
||||
WRITE3(ESC, '(', 'B')
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ISO-2022-JP changes designations instead of shifting-out */
|
||||
|
||||
ENCODER(iso2022_jp)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
switch (STATE_GETG0(state)) {
|
||||
case CHARSET_ASCII:
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
case CHARSET_JISX0201_R:
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
else { /* FALLTHROUGH (yay!) */
|
||||
default:
|
||||
WRITE3(ESC, '(', 'B')
|
||||
NEXT_OUT(3)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
code = c;
|
||||
}
|
||||
WRITE1(code)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
}
|
||||
if (c == '\n')
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
}
|
||||
else UCS4INVALID(c)
|
||||
else {
|
||||
unsigned char charset;
|
||||
|
||||
charset = STATE_GETG0(state);
|
||||
if (charset == CHARSET_JISX0201_R) {
|
||||
code = DBCINV;
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
if (code != DBCINV) {
|
||||
WRITE1(code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000) /* MSB set: JIS X 0212 */
|
||||
return 1;
|
||||
jisx0208encode: if (charset != CHARSET_JISX0208) {
|
||||
WRITE3(ESC, '$', 'B')
|
||||
STATE_SETG0(state, CHARSET_JISX0208)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
NEXT(1, 2)
|
||||
} else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
code = 0x2140;
|
||||
goto jisx0208encode;
|
||||
} else {
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
else
|
||||
return 1;
|
||||
/* if (charset == CHARSET_JISX0201_R) : already checked */
|
||||
WRITE4(ESC, '(', 'J', code)
|
||||
STATE_SETG0(state, CHARSET_JISX0201_R)
|
||||
NEXT(1, 4)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_INIT
|
||||
DECODER_INIT(iso2022_jp)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_RESET
|
||||
DECODER_RESET(iso2022_jp)
|
||||
{
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(iso2022_jp)
|
||||
{
|
||||
ISO2022_LOOP_BEGIN
|
||||
unsigned char charset, c2;
|
||||
|
||||
ISO2022_GETCHARSET(charset, c)
|
||||
|
||||
if (charset & CHARSET_DOUBLEBYTE) {
|
||||
/* all double byte character sets are in JIS X 0208 here.
|
||||
* this means that we don't distinguish :1978 from :1983. */
|
||||
RESERVE_INBUF(2)
|
||||
RESERVE_OUTBUF(1)
|
||||
c2 = IN2;
|
||||
if (c == 0x21 && c2 == 0x40) { /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
**outbuf = 0xff3c;
|
||||
NEXT(2, 1)
|
||||
} else TRYMAP_DEC(jisx0208, **outbuf, c, c2) {
|
||||
NEXT(2, 1)
|
||||
} else
|
||||
return 2;
|
||||
} else if (charset == CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(1)
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
} else if (charset == CHARSET_JISX0201_R) {
|
||||
RESERVE_OUTBUF(1)
|
||||
JISX0201_R_DECODE(c, **outbuf)
|
||||
else
|
||||
return 1;
|
||||
NEXT(1, 1)
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
ISO2022_LOOP_END
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(iso2022_jp)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(iso2022_jp)
|
185
Modules/cjkcodecs/_iso2022_jp_1.c
Normal file
185
Modules/cjkcodecs/_iso2022_jp_1.c
Normal file
|
@ -0,0 +1,185 @@
|
|||
/*
|
||||
* _iso2022_jp_1.c: the ISO-2022-JP-1 codec (RFC2237)
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _iso2022_jp_1.c,v 1.8 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define ISO2022_DESIGNATIONS \
|
||||
CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, \
|
||||
CHARSET_JISX0208_O, CHARSET_JISX0212
|
||||
#define ISO2022_NO_SHIFT
|
||||
#define ISO2022_USE_JISX0208EXT
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "iso2022common.h"
|
||||
#include "alg_jisx0201.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
DECMAP(jisx0212)
|
||||
|
||||
#define HAVE_ENCODER_INIT
|
||||
ENCODER_INIT(iso2022_jp_1)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_ENCODER_RESET
|
||||
ENCODER_RESET(iso2022_jp_1)
|
||||
{
|
||||
if (STATE_GETG0(state) != CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(3)
|
||||
WRITE3(ESC, '(', 'B')
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ISO-2022-JP-1 changes designations instead of shifting-out */
|
||||
|
||||
ENCODER(iso2022_jp_1)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = **inbuf;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
switch (STATE_GETG0(state)) {
|
||||
case CHARSET_ASCII:
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
case CHARSET_JISX0201_R:
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
else { /* FALLTHROUGH (yay!) */
|
||||
default:
|
||||
WRITE3(ESC, '(', 'B')
|
||||
NEXT_OUT(3)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
code = c;
|
||||
}
|
||||
WRITE1(code)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
}
|
||||
if (c == '\n')
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
}
|
||||
else UCS4INVALID(c)
|
||||
else {
|
||||
unsigned char charset;
|
||||
|
||||
charset = STATE_GETG0(state);
|
||||
if (charset == CHARSET_JISX0201_R) {
|
||||
code = DBCINV;
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
if (code != DBCINV) {
|
||||
WRITE1(code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000) { /* MSB set: JIS X 0212 */
|
||||
if (charset != CHARSET_JISX0212) {
|
||||
WRITE4(ESC, '$', '(', 'D')
|
||||
STATE_SETG0(state, CHARSET_JISX0212)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
WRITE2((code >> 8) & 0x7f, code & 0x7f)
|
||||
} else { /* MSB unset: JIS X 0208 */
|
||||
jisx0208encode: if (charset != CHARSET_JISX0208) {
|
||||
WRITE3(ESC, '$', 'B')
|
||||
STATE_SETG0(state, CHARSET_JISX0208)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
}
|
||||
NEXT(1, 2)
|
||||
} else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
code = 0x2140;
|
||||
goto jisx0208encode;
|
||||
} else {
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
else
|
||||
return 1;
|
||||
/* if (charset == CHARSET_JISX0201_R) : already checked */
|
||||
WRITE4(ESC, '(', 'J', code)
|
||||
STATE_SETG0(state, CHARSET_JISX0201_R)
|
||||
NEXT(1, 4)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_INIT
|
||||
DECODER_INIT(iso2022_jp_1)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_RESET
|
||||
DECODER_RESET(iso2022_jp_1)
|
||||
{
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(iso2022_jp_1)
|
||||
{
|
||||
ISO2022_LOOP_BEGIN
|
||||
unsigned char charset, c2;
|
||||
|
||||
ISO2022_GETCHARSET(charset, c)
|
||||
|
||||
if (charset & CHARSET_DOUBLEBYTE) {
|
||||
RESERVE_INBUF(2)
|
||||
RESERVE_OUTBUF(1)
|
||||
c2 = IN2;
|
||||
if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) {
|
||||
if (c == 0x21 && c2 == 0x40) /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
**outbuf = 0xff3c;
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else if (charset == CHARSET_JISX0212) {
|
||||
TRYMAP_DEC(jisx0212, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
NEXT(2, 1)
|
||||
} else if (charset == CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(1)
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
} else if (charset == CHARSET_JISX0201_R) {
|
||||
RESERVE_OUTBUF(1)
|
||||
JISX0201_R_DECODE(c, **outbuf)
|
||||
else
|
||||
return 1;
|
||||
NEXT(1, 1)
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
ISO2022_LOOP_END
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(iso2022_jp_1)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_DEC(jisx0212)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(iso2022_jp_1)
|
230
Modules/cjkcodecs/_iso2022_jp_2.c
Normal file
230
Modules/cjkcodecs/_iso2022_jp_2.c
Normal file
|
@ -0,0 +1,230 @@
|
|||
/*
|
||||
* _iso2022_jp_2.c: the ISO-2022-JP-2 codec (RFC1554)
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _iso2022_jp_2.c,v 1.8 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define ISO2022_DESIGNATIONS \
|
||||
CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0208, \
|
||||
CHARSET_JISX0208_O, CHARSET_JISX0212, CHARSET_GB2312, \
|
||||
CHARSET_KSX1001, CHARSET_JISX0212, \
|
||||
CHARSET_ISO8859_1, CHARSET_ISO8859_7
|
||||
#define ISO2022_USE_G2_DESIGNATION yo!
|
||||
#define ISO2022_USE_JISX0208EXT
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "iso2022common.h"
|
||||
#include "alg_jisx0201.h"
|
||||
#include "alg_iso8859_1.h"
|
||||
#include "alg_iso8859_7.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
DECMAP(jisx0212)
|
||||
ENCMAP(cp949)
|
||||
DECMAP(ksx1001)
|
||||
ENCMAP(gbcommon)
|
||||
DECMAP(gb2312)
|
||||
|
||||
#define HAVE_ENCODER_INIT
|
||||
ENCODER_INIT(iso2022_jp_2)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
STATE_SETG2(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_ENCODER_RESET
|
||||
ENCODER_RESET(iso2022_jp_2)
|
||||
{
|
||||
if (STATE_GETG0(state) != CHARSET_ASCII) {
|
||||
WRITE3(ESC, '(', 'B')
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER(iso2022_jp_2)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
switch (STATE_GETG0(state)) {
|
||||
case CHARSET_ASCII:
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
case CHARSET_JISX0201_R:
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
else { /* FALLTHROUGH (yay!) */
|
||||
default:
|
||||
WRITE3(ESC, '(', 'B')
|
||||
NEXT_OUT(3)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
code = c;
|
||||
}
|
||||
WRITE1(code)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
}
|
||||
if (c == '\n')
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
}
|
||||
else UCS4INVALID(c)
|
||||
else {
|
||||
unsigned char charset;
|
||||
|
||||
charset = STATE_GETG0(state);
|
||||
if (charset == CHARSET_JISX0201_R) {
|
||||
code = DBCINV;
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
if (code != DBCINV) {
|
||||
WRITE1(code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000) { /* MSB set: JIS X 0212 */
|
||||
if (charset != CHARSET_JISX0212) {
|
||||
WRITE4(ESC, '$', '(', 'D')
|
||||
STATE_SETG0(state, CHARSET_JISX0212)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
WRITE2((code >> 8) & 0x7f, code & 0x7f)
|
||||
} else { /* MSB unset: JIS X 0208 */
|
||||
jisx0208encode: if (charset != CHARSET_JISX0208) {
|
||||
WRITE3(ESC, '$', 'B')
|
||||
STATE_SETG0(state, CHARSET_JISX0208)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
}
|
||||
NEXT(1, 2)
|
||||
} else TRYMAP_ENC(cp949, code, c) {
|
||||
if (code & 0x8000) /* MSB set: CP949 */
|
||||
return 2;
|
||||
if (charset != CHARSET_KSX1001) {
|
||||
WRITE4(ESC, '$', '(', 'C')
|
||||
STATE_SETG0(state, CHARSET_KSX1001)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
NEXT(1, 2)
|
||||
} else TRYMAP_ENC(gbcommon, code, c) {
|
||||
if (code & 0x8000) /* MSB set: GBK */
|
||||
return 2;
|
||||
if (charset != CHARSET_GB2312) {
|
||||
WRITE4(ESC, '$', '(', 'A')
|
||||
STATE_SETG0(state, CHARSET_GB2312)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
NEXT(1, 2)
|
||||
} else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
code = 0x2140;
|
||||
goto jisx0208encode;
|
||||
} else {
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
else {
|
||||
/* There's no need to try to encode as ISO-8859-1 or
|
||||
* ISO-8859-7 because JIS X 0212 includes them already.
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
/* if (charset == CHARSET_JISX0201_R) : already checked */
|
||||
WRITE4(ESC, '(', 'J', code)
|
||||
STATE_SETG0(state, CHARSET_JISX0201_R)
|
||||
NEXT(1, 4)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_INIT
|
||||
DECODER_INIT(iso2022_jp_2)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
STATE_SETG2(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_RESET
|
||||
DECODER_RESET(iso2022_jp_2)
|
||||
{
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(iso2022_jp_2)
|
||||
{
|
||||
ISO2022_LOOP_BEGIN
|
||||
unsigned char charset, c2;
|
||||
|
||||
ISO2022_GETCHARSET(charset, c)
|
||||
|
||||
if (charset & CHARSET_DOUBLEBYTE) {
|
||||
RESERVE_INBUF(2)
|
||||
RESERVE_OUTBUF(1)
|
||||
c2 = IN2;
|
||||
if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) {
|
||||
if (c == 0x21 && c2 == 0x40) /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
**outbuf = 0xff3c;
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else if (charset == CHARSET_JISX0212) {
|
||||
TRYMAP_DEC(jisx0212, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else if (charset == CHARSET_KSX1001) {
|
||||
TRYMAP_DEC(ksx1001, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else if (charset == CHARSET_GB2312) {
|
||||
TRYMAP_DEC(gb2312, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
NEXT(2, 1)
|
||||
} else if (charset == CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(1)
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
} else if (charset == CHARSET_JISX0201_R) {
|
||||
RESERVE_OUTBUF(1)
|
||||
JISX0201_R_DECODE(c, **outbuf)
|
||||
else
|
||||
return 1;
|
||||
NEXT(1, 1)
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
ISO2022_LOOP_END
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(iso2022_jp_2)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_DEC(jisx0212)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
MAPCLOSE()
|
||||
MAPOPEN(ko_KR)
|
||||
IMPORTMAP_ENC(cp949)
|
||||
IMPORTMAP_DEC(ksx1001)
|
||||
MAPCLOSE()
|
||||
MAPOPEN(zh_CN)
|
||||
IMPORTMAP_ENC(gbcommon)
|
||||
IMPORTMAP_DEC(gb2312)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(iso2022_jp_2)
|
211
Modules/cjkcodecs/_iso2022_jp_3.c
Normal file
211
Modules/cjkcodecs/_iso2022_jp_3.c
Normal file
|
@ -0,0 +1,211 @@
|
|||
/*
|
||||
* _iso2022_jp_3.c: the ISO-2022-JP-3 codec (JIS X 0213)
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _iso2022_jp_3.c,v 1.7 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define USING_BINARY_PAIR_SEARCH
|
||||
#define ISO2022_DESIGNATIONS \
|
||||
CHARSET_ASCII, CHARSET_JISX0208, CHARSET_JISX0213_1, CHARSET_JISX0213_2
|
||||
#define ISO2022_NO_SHIFT
|
||||
#define ISO2022_USE_JISX0208EXT
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "iso2022common.h"
|
||||
#include "map_jisx0213_pairs.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
DECMAP(jisx0212)
|
||||
ENCMAP(jisx0213_bmp)
|
||||
DECMAP(jisx0213_1_bmp)
|
||||
DECMAP(jisx0213_2_bmp)
|
||||
ENCMAP(jisx0213_emp)
|
||||
DECMAP(jisx0213_1_emp)
|
||||
DECMAP(jisx0213_2_emp)
|
||||
|
||||
#define EMPBASE 0x20000
|
||||
|
||||
#define HAVE_ENCODER_INIT
|
||||
ENCODER_INIT(iso2022_jp_3)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_ENCODER_RESET
|
||||
ENCODER_RESET(iso2022_jp_3)
|
||||
{
|
||||
if (STATE_GETG0(state) != CHARSET_ASCII) {
|
||||
WRITE3(ESC, '(', 'B')
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER(iso2022_jp_3)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char charset;
|
||||
ucs4_t c = IN1;
|
||||
DBCHAR code;
|
||||
size_t insize;
|
||||
|
||||
if (c < 0x80) {
|
||||
switch (STATE_GETG0(state)) {
|
||||
case CHARSET_ASCII:
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
default:
|
||||
WRITE4(ESC, '(', 'B', c)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
NEXT(1, 4)
|
||||
break;
|
||||
}
|
||||
if (c == '\n')
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
continue;
|
||||
}
|
||||
|
||||
DECODE_SURROGATE(c)
|
||||
insize = GET_INSIZE(c);
|
||||
|
||||
if (c <= 0xffff) {
|
||||
TRYMAP_ENC(jisx0213_bmp, code, c) {
|
||||
if (code == MULTIC) {
|
||||
if (inleft < 2) {
|
||||
if (flags & MBENC_FLUSH) {
|
||||
code = find_pairencmap(c, 0, jisx0213_pairencmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
return MBERR_TOOFEW;
|
||||
} else {
|
||||
code = find_pairencmap(c, IN2,
|
||||
jisx0213_pairencmap, JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV) {
|
||||
code = find_pairencmap(c, 0, jisx0213_pairencmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
insize = 2;
|
||||
}
|
||||
}
|
||||
} else TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000)
|
||||
return 1; /* avoid JIS X 0212 codes */
|
||||
} else if (c == 0xff3c) /* F/W REVERSE SOLIDUS */
|
||||
code = 0x2140;
|
||||
else
|
||||
return 1;
|
||||
} else if (c >> 16 == EMPBASE >> 16) {
|
||||
TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
|
||||
else return insize;
|
||||
} else
|
||||
return insize;
|
||||
|
||||
charset = STATE_GETG0(state);
|
||||
if (code & 0x8000) { /* MSB set: Plane 2 */
|
||||
if (charset != CHARSET_JISX0213_2) {
|
||||
WRITE4(ESC, '$', '(', 'P')
|
||||
STATE_SETG0(state, CHARSET_JISX0213_2)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
WRITE2((code >> 8) & 0x7f, code & 0x7f)
|
||||
} else { /* MSB unset: Plane 1 */
|
||||
if (charset != CHARSET_JISX0213_1) {
|
||||
WRITE4(ESC, '$', '(', 'O')
|
||||
STATE_SETG0(state, CHARSET_JISX0213_1)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
}
|
||||
NEXT(insize, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_INIT
|
||||
DECODER_INIT(iso2022_jp_3)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_RESET
|
||||
DECODER_RESET(iso2022_jp_3)
|
||||
{
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(iso2022_jp_3)
|
||||
{
|
||||
ISO2022_LOOP_BEGIN
|
||||
unsigned char charset, c2;
|
||||
ucs4_t code;
|
||||
|
||||
ISO2022_GETCHARSET(charset, c)
|
||||
|
||||
if (charset & CHARSET_DOUBLEBYTE) {
|
||||
RESERVE_INBUF(2)
|
||||
RESERVE_OUTBUF(1)
|
||||
c2 = IN2;
|
||||
if (charset == CHARSET_JISX0213_1) {
|
||||
if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
|
||||
PUTUCS4(EMPBASE | code)
|
||||
NEXT_IN(2)
|
||||
continue;
|
||||
} else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
|
||||
WRITE2(code >> 16, code & 0xffff)
|
||||
NEXT(2, 2)
|
||||
continue;
|
||||
} else return 2;
|
||||
} else if (charset == CHARSET_JISX0213_2) {
|
||||
TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c, c2);
|
||||
else TRYMAP_DEC(jisx0213_2_emp, code, c, c2) {
|
||||
PUTUCS4(EMPBASE | code)
|
||||
NEXT_IN(2)
|
||||
continue;
|
||||
} else return 2;
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
NEXT(2, 1)
|
||||
} else if (charset == CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(1)
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
ISO2022_LOOP_END
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(iso2022_jp_3)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_DEC(jisx0212)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
IMPORTMAP_ENC(jisx0213_bmp)
|
||||
IMPORTMAP_DEC(jisx0213_1_bmp)
|
||||
IMPORTMAP_DEC(jisx0213_2_bmp)
|
||||
IMPORTMAP_ENC(jisx0213_emp)
|
||||
IMPORTMAP_DEC(jisx0213_1_emp)
|
||||
IMPORTMAP_DEC(jisx0213_2_emp)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(iso2022_jp_3)
|
200
Modules/cjkcodecs/_iso2022_jp_ext.c
Normal file
200
Modules/cjkcodecs/_iso2022_jp_ext.c
Normal file
|
@ -0,0 +1,200 @@
|
|||
/*
|
||||
* _iso2022_jp_ext.c: the ISO-2022-JP-EXT codec (RFC2237 + alpha)
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _iso2022_jp_ext.c,v 1.3 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define ISO2022_DESIGNATIONS \
|
||||
CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0201_K, \
|
||||
CHARSET_JISX0208, CHARSET_JISX0208_O, CHARSET_JISX0212
|
||||
#define ISO2022_NO_SHIFT
|
||||
#define ISO2022_USE_JISX0208EXT
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "iso2022common.h"
|
||||
#include "alg_jisx0201.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
DECMAP(jisx0212)
|
||||
|
||||
#define HAVE_ENCODER_INIT
|
||||
ENCODER_INIT(iso2022_jp_ext)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_ENCODER_RESET
|
||||
ENCODER_RESET(iso2022_jp_ext)
|
||||
{
|
||||
if (STATE_GETG0(state) != CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(3)
|
||||
WRITE3(ESC, '(', 'B')
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER(iso2022_jp_ext)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = **inbuf;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
switch (STATE_GETG0(state)) {
|
||||
case CHARSET_ASCII:
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
case CHARSET_JISX0201_R:
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
else { /* FALLTHROUGH (yay!) */
|
||||
default:
|
||||
WRITE3(ESC, '(', 'B')
|
||||
NEXT_OUT(3)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
code = c;
|
||||
}
|
||||
WRITE1(code)
|
||||
NEXT(1, 1)
|
||||
break;
|
||||
}
|
||||
if (c == '\n')
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
}
|
||||
else UCS4INVALID(c)
|
||||
else {
|
||||
unsigned char charset;
|
||||
|
||||
charset = STATE_GETG0(state);
|
||||
if (charset == CHARSET_JISX0201_R) {
|
||||
code = DBCINV;
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
if (code != DBCINV) {
|
||||
WRITE1(code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000) { /* MSB set: JIS X 0212 */
|
||||
if (charset != CHARSET_JISX0212) {
|
||||
WRITE4(ESC, '$', '(', 'D')
|
||||
STATE_SETG0(state, CHARSET_JISX0212)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
WRITE2((code >> 8) & 0x7f, code & 0x7f)
|
||||
} else { /* MSB unset: JIS X 0208 */
|
||||
jisx0208encode: if (charset != CHARSET_JISX0208) {
|
||||
WRITE3(ESC, '$', 'B')
|
||||
STATE_SETG0(state, CHARSET_JISX0208)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
}
|
||||
NEXT(1, 2)
|
||||
} else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
code = 0x2140;
|
||||
goto jisx0208encode;
|
||||
} else {
|
||||
JISX0201_ENCODE(c, code)
|
||||
else
|
||||
return 1;
|
||||
|
||||
if (code < 0x80) { /* JIS X 0201 Roman */
|
||||
/* if (charset == CHARSET_JISX0201_R) : already checked */
|
||||
WRITE4(ESC, '(', 'J', code)
|
||||
STATE_SETG0(state, CHARSET_JISX0201_R)
|
||||
NEXT(1, 4)
|
||||
} else { /* JIS X 0201 Katakana */
|
||||
if (charset != CHARSET_JISX0201_K) {
|
||||
WRITE3(ESC, '(', 'I')
|
||||
STATE_SETG0(state, CHARSET_JISX0201_K)
|
||||
NEXT_OUT(3)
|
||||
}
|
||||
WRITE1(code - 0x80)
|
||||
NEXT(1, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_INIT
|
||||
DECODER_INIT(iso2022_jp_ext)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_RESET
|
||||
DECODER_RESET(iso2022_jp_ext)
|
||||
{
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(iso2022_jp_ext)
|
||||
{
|
||||
ISO2022_LOOP_BEGIN
|
||||
unsigned char charset, c2;
|
||||
|
||||
ISO2022_GETCHARSET(charset, c)
|
||||
|
||||
if (charset & CHARSET_DOUBLEBYTE) {
|
||||
RESERVE_INBUF(2)
|
||||
RESERVE_OUTBUF(1)
|
||||
c2 = IN2;
|
||||
if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) {
|
||||
if (c == 0x21 && c2 == 0x40) /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
**outbuf = 0xff3c;
|
||||
else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else if (charset == CHARSET_JISX0212) {
|
||||
TRYMAP_DEC(jisx0212, **outbuf, c, c2);
|
||||
else return 2;
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
NEXT(2, 1)
|
||||
} else if (charset == CHARSET_ASCII) {
|
||||
RESERVE_OUTBUF(1)
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
} else if (charset == CHARSET_JISX0201_R) {
|
||||
RESERVE_OUTBUF(1)
|
||||
JISX0201_R_DECODE(c, **outbuf)
|
||||
else
|
||||
return 1;
|
||||
NEXT(1, 1)
|
||||
} else if (charset == CHARSET_JISX0201_K) {
|
||||
RESERVE_OUTBUF(1)
|
||||
JISX0201_K_DECODE(c ^ 0x80, **outbuf)
|
||||
else
|
||||
return 1;
|
||||
NEXT(1, 1)
|
||||
} else
|
||||
return MBERR_INTERNAL;
|
||||
ISO2022_LOOP_END
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(iso2022_jp_ext)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_DEC(jisx0212)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(iso2022_jp_ext)
|
131
Modules/cjkcodecs/_iso2022_kr.c
Normal file
131
Modules/cjkcodecs/_iso2022_kr.c
Normal file
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
* _iso2022_kr.c: the ISO-2022-KR codec (RFC1557)
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _iso2022_kr.c,v 1.3 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define ISO2022_DESIGNATIONS \
|
||||
CHARSET_ASCII, CHARSET_KSX1001
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "iso2022common.h"
|
||||
|
||||
ENCMAP(cp949)
|
||||
DECMAP(ksx1001)
|
||||
|
||||
#define HAVE_ENCODER_INIT
|
||||
ENCODER_INIT(iso2022_kr)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_ENCODER_RESET
|
||||
ENCODER_RESET(iso2022_kr)
|
||||
{
|
||||
if (STATE_GETFLAG(state, F_SHIFTED)) {
|
||||
RESERVE_OUTBUF(1)
|
||||
OUT1(SI)
|
||||
NEXT_OUT(1)
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ENCODER(iso2022_kr)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = **inbuf;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
if (STATE_GETFLAG(state, F_SHIFTED)) {
|
||||
WRITE2(SI, c)
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
NEXT(1, 2)
|
||||
} else {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
}
|
||||
if (c == '\n')
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
} else UCS4INVALID(c)
|
||||
else {
|
||||
if (STATE_GETG1(state) != CHARSET_KSX1001) {
|
||||
WRITE4(ESC, '$', ')', 'C')
|
||||
STATE_SETG1(state, CHARSET_KSX1001)
|
||||
NEXT_OUT(4)
|
||||
}
|
||||
|
||||
if (!STATE_GETFLAG(state, F_SHIFTED)) {
|
||||
WRITE1(SO)
|
||||
STATE_SETFLAG(state, F_SHIFTED)
|
||||
NEXT_OUT(1)
|
||||
}
|
||||
|
||||
TRYMAP_ENC(cp949, code, c) {
|
||||
if (code & 0x8000) /* MSB set: CP949 */
|
||||
return 1;
|
||||
WRITE2(code >> 8, code & 0xff)
|
||||
NEXT(1, 2)
|
||||
} else
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_INIT
|
||||
DECODER_INIT(iso2022_kr)
|
||||
{
|
||||
STATE_CLEARFLAGS(state)
|
||||
STATE_SETG0(state, CHARSET_ASCII)
|
||||
STATE_SETG1(state, CHARSET_ASCII)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define HAVE_DECODER_RESET
|
||||
DECODER_RESET(iso2022_kr)
|
||||
{
|
||||
STATE_CLEARFLAG(state, F_SHIFTED)
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(iso2022_kr)
|
||||
{
|
||||
ISO2022_LOOP_BEGIN
|
||||
unsigned char charset, c2;
|
||||
|
||||
ISO2022_GETCHARSET(charset, c)
|
||||
|
||||
if (charset & CHARSET_DOUBLEBYTE) {
|
||||
/* all double byte character sets are in KS X 1001 here */
|
||||
RESERVE_INBUF(2)
|
||||
RESERVE_OUTBUF(1)
|
||||
c2 = IN2;
|
||||
if (c2 >= 0x80)
|
||||
return 1;
|
||||
TRYMAP_DEC(ksx1001, **outbuf, c, c2) {
|
||||
NEXT(2, 1)
|
||||
} else
|
||||
return 2;
|
||||
} else {
|
||||
RESERVE_OUTBUF(1)
|
||||
OUT1(c);
|
||||
NEXT(1, 1)
|
||||
}
|
||||
ISO2022_LOOP_END
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(iso2022_kr)
|
||||
MAPOPEN(ko_KR)
|
||||
IMPORTMAP_DEC(ksx1001)
|
||||
IMPORTMAP_ENC(cp949)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(iso2022_kr)
|
223
Modules/cjkcodecs/_johab.c
Normal file
223
Modules/cjkcodecs/_johab.c
Normal file
|
@ -0,0 +1,223 @@
|
|||
/*
|
||||
* _johab.c: the Johab codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _johab.c,v 1.3 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
|
||||
ENCMAP(cp949)
|
||||
DECMAP(ksx1001)
|
||||
|
||||
static const unsigned char u2johabidx_choseong[32] = {
|
||||
0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14,
|
||||
};
|
||||
static const unsigned char u2johabidx_jungseong[32] = {
|
||||
0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x1a, 0x1b, 0x1c, 0x1d,
|
||||
};
|
||||
static const unsigned char u2johabidx_jongseong[32] = {
|
||||
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
|
||||
};
|
||||
static const DBCHAR u2johabjamo[] = {
|
||||
0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
|
||||
0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
|
||||
0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
|
||||
0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
|
||||
0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
|
||||
0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
|
||||
0x8741, 0x8761, 0x8781, 0x87a1,
|
||||
};
|
||||
|
||||
ENCODER(johab)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
|
||||
if (c < 0x80) {
|
||||
WRITE1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
UCS4INVALID(c)
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
|
||||
if (c >= 0xac00 && c <= 0xd7a3) {
|
||||
c -= 0xac00;
|
||||
code = 0x8000 |
|
||||
(u2johabidx_choseong[c / 588] << 10) |
|
||||
(u2johabidx_jungseong[(c / 28) % 21] << 5) |
|
||||
u2johabidx_jongseong[c % 28];
|
||||
} else if (c >= 0x3131 && c <= 0x3163)
|
||||
code = u2johabjamo[c - 0x3131];
|
||||
else TRYMAP_ENC(cp949, code, c) {
|
||||
unsigned char c1, c2, t2;
|
||||
unsigned short t1;
|
||||
|
||||
assert((code & 0x8000) == 0);
|
||||
c1 = code >> 8;
|
||||
c2 = code & 0xff;
|
||||
if (((c1 >= 0x21 && c1 <= 0x2c) || (c1 >= 0x4a && c1 <= 0x7d))
|
||||
&& (c2 >= 0x21 && c2 <= 0x7e)) {
|
||||
t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) : (c1 - 0x21 + 0x197));
|
||||
t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
|
||||
OUT1(t1 >> 1)
|
||||
OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
|
||||
NEXT(1, 2)
|
||||
continue;
|
||||
} else
|
||||
return 1;
|
||||
} else
|
||||
return 1;
|
||||
|
||||
OUT1(code >> 8)
|
||||
OUT2(code & 0xff)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define FILL 0xfd
|
||||
#define NONE 0xff
|
||||
|
||||
static const unsigned char johabidx_choseong[32] = {
|
||||
NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
|
||||
0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
|
||||
0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
|
||||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabidx_jungseong[32] = {
|
||||
NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
|
||||
NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
|
||||
NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
|
||||
NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabidx_jongseong[32] = {
|
||||
NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
|
||||
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
|
||||
0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
|
||||
0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
|
||||
};
|
||||
|
||||
static const unsigned char johabjamo_choseong[32] = {
|
||||
NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
|
||||
0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
|
||||
0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
|
||||
NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabjamo_jungseong[32] = {
|
||||
NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
|
||||
NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
|
||||
NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
|
||||
NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
|
||||
};
|
||||
static const unsigned char johabjamo_jongseong[32] = {
|
||||
NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
|
||||
0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
|
||||
0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
|
||||
0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
|
||||
};
|
||||
|
||||
DECODER(johab)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1, c2;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
if (c < 0x80) {
|
||||
OUT1(c)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
c2 = IN2;
|
||||
|
||||
if (c < 0xd8) {
|
||||
/* johab hangul */
|
||||
unsigned char c_cho, c_jung, c_jong;
|
||||
unsigned char i_cho, i_jung, i_jong;
|
||||
|
||||
c_cho = (c >> 2) & 0x1f;
|
||||
c_jung = ((c << 3) | c2 >> 5) & 0x1f;
|
||||
c_jong = c2 & 0x1f;
|
||||
|
||||
i_cho = johabidx_choseong[c_cho];
|
||||
i_jung = johabidx_jungseong[c_jung];
|
||||
i_jong = johabidx_jongseong[c_jong];
|
||||
|
||||
if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
|
||||
return 2;
|
||||
|
||||
/* we don't use U+1100 hangul jamo yet. */
|
||||
if (i_cho == FILL) {
|
||||
if (i_jung == FILL) {
|
||||
if (i_jong == FILL)
|
||||
OUT1(0x3000)
|
||||
else
|
||||
OUT1(0x3100 | johabjamo_jongseong[c_jong])
|
||||
} else {
|
||||
if (i_jong == FILL)
|
||||
OUT1(0x3100 | johabjamo_jungseong[c_jung])
|
||||
else
|
||||
return 2;
|
||||
}
|
||||
} else {
|
||||
if (i_jung == FILL) {
|
||||
if (i_jong == FILL)
|
||||
OUT1(0x3100 | johabjamo_choseong[c_cho])
|
||||
else
|
||||
return 2;
|
||||
} else
|
||||
OUT1(0xac00 +
|
||||
i_cho * 588 +
|
||||
i_jung * 28 +
|
||||
(i_jong == FILL ? 0 : i_jong))
|
||||
}
|
||||
NEXT(2, 1)
|
||||
} else {
|
||||
/* KS X 1001 except hangul jamos and syllables */
|
||||
if (c == 0xdf || c > 0xf9 ||
|
||||
c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
|
||||
(c2 & 0x7f) == 0x7f ||
|
||||
(c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
|
||||
return 2;
|
||||
else {
|
||||
unsigned char t1, t2;
|
||||
|
||||
t1 = (c < 0xe0 ? 2 * (c - 0xd9) : 2 * c - 0x197);
|
||||
t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
|
||||
t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
|
||||
t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
|
||||
|
||||
TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
|
||||
else return 2;
|
||||
NEXT(2, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#undef NONE
|
||||
#undef FILL
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(johab)
|
||||
MAPOPEN(ko_KR)
|
||||
IMPORTMAP_DEC(ksx1001)
|
||||
IMPORTMAP_ENC(cp949)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(johab)
|
121
Modules/cjkcodecs/_shift_jis.c
Normal file
121
Modules/cjkcodecs/_shift_jis.c
Normal file
|
@ -0,0 +1,121 @@
|
|||
/*
|
||||
* _shift_jis.c: the SHIFT-JIS codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _shift_jis.c,v 1.4 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "codeccommon.h"
|
||||
#include "alg_jisx0201.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
|
||||
ENCODER(shift_jis)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
Py_UNICODE c = IN1;
|
||||
DBCHAR code;
|
||||
unsigned char c1, c2;
|
||||
|
||||
#ifdef STRICT_BUILD
|
||||
JISX0201_R_ENCODE(c, code)
|
||||
#else
|
||||
if (c < 0x80) code = c;
|
||||
else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
|
||||
else if (c == 0x203e) code = 0x7e; /* OVERLINE */
|
||||
#endif
|
||||
else JISX0201_K_ENCODE(c, code)
|
||||
else UCS4INVALID(c)
|
||||
else code = NOCHAR;
|
||||
|
||||
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
OUT1(code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
|
||||
if (code == NOCHAR) {
|
||||
TRYMAP_ENC(jisxcommon, code, c);
|
||||
#ifndef STRICT_BUILD
|
||||
else if (c == 0xff3c)
|
||||
code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
|
||||
#endif
|
||||
else
|
||||
return 1;
|
||||
|
||||
if (code & 0x8000) /* MSB set: JIS X 0212 */
|
||||
return 1;
|
||||
}
|
||||
|
||||
c1 = code >> 8;
|
||||
c2 = code & 0xff;
|
||||
c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
|
||||
c1 = (c1 - 0x21) >> 1;
|
||||
OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
|
||||
OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
|
||||
NEXT(1, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(shift_jis)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
|
||||
#ifdef STRICT_BUILD
|
||||
JISX0201_R_DECODE(c, **outbuf)
|
||||
#else
|
||||
if (c < 0x80) **outbuf = c;
|
||||
#endif
|
||||
else JISX0201_K_DECODE(c, **outbuf)
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)) {
|
||||
unsigned char c1, c2;
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
c2 = IN2;
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 2;
|
||||
|
||||
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
#ifndef STRICT_BUILD
|
||||
if (c1 == 0x21 && c2 == 0x40) {
|
||||
/* FULL-WIDTH REVERSE SOLIDUS */
|
||||
OUT1(0xff3c)
|
||||
NEXT(2, 1)
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
|
||||
NEXT(2, 1)
|
||||
continue;
|
||||
} else
|
||||
return 2;
|
||||
} else
|
||||
return 2;
|
||||
|
||||
NEXT(1, 1) /* JIS X 0201 */
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(shift_jis)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(shift_jis)
|
169
Modules/cjkcodecs/_shift_jisx0213.c
Normal file
169
Modules/cjkcodecs/_shift_jisx0213.c
Normal file
|
@ -0,0 +1,169 @@
|
|||
/*
|
||||
* _shift_jisx0213.c: the SHIFT-JISX0213 codec
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: _shift_jisx0213.c,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define USING_BINARY_PAIR_SEARCH
|
||||
#include "codeccommon.h"
|
||||
#include "alg_jisx0201.h"
|
||||
#include "map_jisx0213_pairs.h"
|
||||
|
||||
ENCMAP(jisxcommon)
|
||||
DECMAP(jisx0208)
|
||||
ENCMAP(jisx0213_bmp)
|
||||
DECMAP(jisx0213_1_bmp)
|
||||
DECMAP(jisx0213_2_bmp)
|
||||
ENCMAP(jisx0213_emp)
|
||||
DECMAP(jisx0213_1_emp)
|
||||
DECMAP(jisx0213_2_emp)
|
||||
|
||||
#define EMPBASE 0x20000
|
||||
|
||||
ENCODER(shift_jisx0213)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
ucs4_t c = IN1;
|
||||
DBCHAR code = NOCHAR;
|
||||
int c1, c2;
|
||||
size_t insize;
|
||||
|
||||
JISX0201_ENCODE(c, code)
|
||||
else DECODE_SURROGATE(c)
|
||||
|
||||
if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
|
||||
WRITE1(code)
|
||||
NEXT(1, 1)
|
||||
continue;
|
||||
}
|
||||
|
||||
RESERVE_OUTBUF(2)
|
||||
insize = GET_INSIZE(c);
|
||||
|
||||
if (code == NOCHAR) {
|
||||
if (c <= 0xffff) {
|
||||
TRYMAP_ENC(jisx0213_bmp, code, c) {
|
||||
if (code == MULTIC) {
|
||||
if (inleft < 2) {
|
||||
if (flags & MBENC_FLUSH) {
|
||||
code = find_pairencmap(c, 0, jisx0213_pairencmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
return MBERR_TOOFEW;
|
||||
} else {
|
||||
code = find_pairencmap(c, IN2,
|
||||
jisx0213_pairencmap, JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV) {
|
||||
code = find_pairencmap(c, 0, jisx0213_pairencmap,
|
||||
JISX0213_ENCPAIRS);
|
||||
if (code == DBCINV)
|
||||
return 1;
|
||||
} else
|
||||
insize = 2;
|
||||
}
|
||||
}
|
||||
} else TRYMAP_ENC(jisxcommon, code, c) {
|
||||
if (code & 0x8000)
|
||||
return 1; /* abandon JIS X 0212 codes */
|
||||
} else return 1;
|
||||
} else if (c >> 16 == EMPBASE >> 16) {
|
||||
TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
|
||||
else return insize;
|
||||
} else
|
||||
return insize;
|
||||
}
|
||||
|
||||
c1 = code >> 8;
|
||||
c2 = (code & 0xff) - 0x21;
|
||||
|
||||
if (c1 & 0x80) { /* Plane 2 */
|
||||
if (c1 >= 0xee) c1 -= 0x87;
|
||||
else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
|
||||
else c1 -= 0x43;
|
||||
} else /* Plane 1 */
|
||||
c1 -= 0x21;
|
||||
|
||||
if (c1 & 1) c2 += 0x5e;
|
||||
c1 >>= 1;
|
||||
OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
|
||||
OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
|
||||
|
||||
NEXT(insize, 2)
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DECODER(shift_jisx0213)
|
||||
{
|
||||
while (inleft > 0) {
|
||||
unsigned char c = IN1;
|
||||
|
||||
RESERVE_OUTBUF(1)
|
||||
JISX0201_DECODE(c, **outbuf)
|
||||
else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)) {
|
||||
unsigned char c1, c2 = IN2;
|
||||
ucs4_t code;
|
||||
|
||||
RESERVE_INBUF(2)
|
||||
if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
|
||||
return 2;
|
||||
|
||||
c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
|
||||
c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
|
||||
c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
|
||||
c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
|
||||
|
||||
if (c1 < 0x5e) { /* Plane 1 */
|
||||
c1 += 0x21;
|
||||
TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
|
||||
NEXT_OUT(1)
|
||||
} else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c1, c2) {
|
||||
NEXT_OUT(1)
|
||||
} else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
|
||||
PUTUCS4(EMPBASE | code)
|
||||
} else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
|
||||
WRITE2(code >> 16, code & 0xffff)
|
||||
NEXT_OUT(2)
|
||||
} else
|
||||
return 2;
|
||||
NEXT_IN(2)
|
||||
} else { /* Plane 2 */
|
||||
if (c1 >= 0x67) c1 += 0x07;
|
||||
else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
|
||||
else c1 -= 0x3d;
|
||||
|
||||
TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c1, c2) {
|
||||
NEXT_OUT(1)
|
||||
} else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
|
||||
PUTUCS4(EMPBASE | code)
|
||||
} else
|
||||
return 2;
|
||||
NEXT_IN(2)
|
||||
}
|
||||
continue;
|
||||
} else
|
||||
return 2;
|
||||
|
||||
NEXT(1, 1) /* JIS X 0201 */
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include "codecentry.h"
|
||||
BEGIN_CODEC_REGISTRY(shift_jisx0213)
|
||||
MAPOPEN(ja_JP)
|
||||
IMPORTMAP_DEC(jisx0208)
|
||||
IMPORTMAP_ENC(jisxcommon)
|
||||
IMPORTMAP_ENC(jisx0213_bmp)
|
||||
IMPORTMAP_DEC(jisx0213_1_bmp)
|
||||
IMPORTMAP_DEC(jisx0213_2_bmp)
|
||||
IMPORTMAP_ENC(jisx0213_emp)
|
||||
IMPORTMAP_DEC(jisx0213_1_emp)
|
||||
IMPORTMAP_DEC(jisx0213_2_emp)
|
||||
MAPCLOSE()
|
||||
END_CODEC_REGISTRY(shift_jisx0213)
|
12
Modules/cjkcodecs/alg_iso8859_1.h
Normal file
12
Modules/cjkcodecs/alg_iso8859_1.h
Normal file
|
@ -0,0 +1,12 @@
|
|||
/*
|
||||
* alg_iso8859_1.c: Encoder/Decoder macro for ISO8859-1
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: alg_iso8859_1.h,v 1.3 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#define ISO8859_1_ENCODE(c, assi) \
|
||||
if ((c) <= 0xff) (assi) = (c);
|
||||
|
||||
#define ISO8859_1_DECODE(c, assi) \
|
||||
if (1/*(c) <= 0xff*/) (assi) = (c);
|
32
Modules/cjkcodecs/alg_iso8859_7.h
Normal file
32
Modules/cjkcodecs/alg_iso8859_7.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* alg_iso8859_7.c: Encoder/Decoder macro for ISO8859-7
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: alg_iso8859_7.h,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
/*
|
||||
* 0x2888fbc9 and 0xbffffd77 are magic number that indicates availability
|
||||
* of mapping for each differences. (0 and 0x2d0)
|
||||
*/
|
||||
|
||||
#define ISO8859_7_ENCODE(c, assi) \
|
||||
if ((c) <= 0xa0) (assi) = (c); \
|
||||
else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
|
||||
(assi) = (c); \
|
||||
else if ((c) >= 0x0384 && (c) <= 0x03ce && ((c) >= 0x03a4 ||\
|
||||
(0xbffffd77L & (1L << ((c)-0x0384))))) \
|
||||
(assi) = (c) - 0x02d0; \
|
||||
else if ((c)>>1 == 0x2018>>1) (assi) = (c) - 0x1f77; \
|
||||
else if ((c) == 0x2015) (assi) = 0xaf;
|
||||
|
||||
#define ISO8859_7_DECODE(c, assi) \
|
||||
if ((c) < 0xa0) (assi) = (c); \
|
||||
else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) \
|
||||
(assi) = (c); \
|
||||
else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
|
||||
(0xbffffd77L & (1L << ((c)-0xb4))))) \
|
||||
(assi) = 0x02d0 + (c); \
|
||||
else if ((c) == 0xa1) (assi) = 0x2018; \
|
||||
else if ((c) == 0xa2) (assi) = 0x2019; \
|
||||
else if ((c) == 0xaf) (assi) = 0x2015;
|
27
Modules/cjkcodecs/alg_jisx0201.h
Normal file
27
Modules/cjkcodecs/alg_jisx0201.h
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* $CJKCodecs: alg_jisx0201.h,v 1.2 2003/11/27 16:42:20 perky Exp $ */
|
||||
|
||||
#define JISX0201_R_ENCODE(c, assi) \
|
||||
if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e) \
|
||||
(assi) = (c); \
|
||||
else if ((c) == 0x00a5) (assi) = 0x5c; \
|
||||
else if ((c) == 0x203e) (assi) = 0x7e;
|
||||
#define JISX0201_K_ENCODE(c, assi) \
|
||||
if ((c) >= 0xff61 && (c) <= 0xff9f) \
|
||||
(assi) = (c) - 0xfec0;
|
||||
#define JISX0201_ENCODE(c, assi) \
|
||||
JISX0201_R_ENCODE(c, assi) \
|
||||
else JISX0201_K_ENCODE(c, assi)
|
||||
|
||||
#define JISX0201_R_DECODE(c, assi) \
|
||||
if ((c) < 0x5c) (assi) = (c); \
|
||||
else if ((c) == 0x5c) (assi) = 0x00a5; \
|
||||
else if ((c) < 0x7e) (assi) = (c); \
|
||||
else if ((c) == 0x7e) (assi) = 0x203e; \
|
||||
else if ((c) == 0x7f) (assi) = 0x7f;
|
||||
#define JISX0201_K_DECODE(c, assi) \
|
||||
if ((c) >= 0xa1 && (c) <= 0xdf) \
|
||||
(assi) = 0xfec0 + (c);
|
||||
#define JISX0201_DECODE(c, assi) \
|
||||
JISX0201_R_DECODE(c, assi) \
|
||||
else JISX0201_K_DECODE(c, assi)
|
||||
|
57
Modules/cjkcodecs/cjkcommon.h
Normal file
57
Modules/cjkcodecs/cjkcommon.h
Normal file
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* cjkcommon.h: Common Constants and Macroes for CJK Character Sets
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: cjkcommon.h,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#ifndef _CJKCOMMON_H_
|
||||
#define _CJKCOMMON_H_
|
||||
|
||||
#ifdef uint32_t
|
||||
typedef uint32_t ucs4_t;
|
||||
#else
|
||||
typedef unsigned int ucs4_t;
|
||||
#endif
|
||||
|
||||
#ifdef uint16_t
|
||||
typedef uint16_t ucs2_t, DBCHAR;
|
||||
#else
|
||||
typedef unsigned short ucs2_t, DBCHAR;
|
||||
#endif
|
||||
|
||||
#define UNIINV Py_UNICODE_REPLACEMENT_CHARACTER
|
||||
#define NOCHAR 0xFFFF
|
||||
#define MULTIC 0xFFFE
|
||||
#define DBCINV 0xFFFD
|
||||
|
||||
struct dbcs_index {
|
||||
const ucs2_t *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct dbcs_index decode_map;
|
||||
|
||||
struct widedbcs_index {
|
||||
const ucs4_t *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct widedbcs_index widedecode_map;
|
||||
|
||||
struct unim_index {
|
||||
const DBCHAR *map;
|
||||
unsigned char bottom, top;
|
||||
};
|
||||
typedef struct unim_index encode_map;
|
||||
|
||||
struct dbcs_map {
|
||||
const char *charset;
|
||||
const struct unim_index *encmap;
|
||||
const struct dbcs_index *decmap;
|
||||
};
|
||||
|
||||
struct pair_encodemap {
|
||||
ucs4_t uniseq;
|
||||
DBCHAR code;
|
||||
};
|
||||
|
||||
#endif
|
144
Modules/cjkcodecs/codeccommon.h
Normal file
144
Modules/cjkcodecs/codeccommon.h
Normal file
|
@ -0,0 +1,144 @@
|
|||
/*
|
||||
* codeccommon.h: Common Codec Routines
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: codeccommon.h,v 1.4 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#include "Python.h"
|
||||
#include "multibytecodec.h"
|
||||
|
||||
|
||||
#ifdef STRICT_BUILD
|
||||
#define STRICT_SUFX "_strict"
|
||||
#else
|
||||
#define STRICT_SUFX
|
||||
#endif
|
||||
|
||||
#define ENCMAP(encoding) \
|
||||
const static encode_map *encoding##encmap;
|
||||
#define DECMAP(encoding) \
|
||||
const static decode_map *encoding##decmap;
|
||||
|
||||
#define ENCODER_INIT(encoding) \
|
||||
static int encoding##_encode_init( \
|
||||
MultibyteCodec_State *state)
|
||||
#define ENCODER(encoding) \
|
||||
static int encoding##_encode( \
|
||||
MultibyteCodec_State *state, \
|
||||
const Py_UNICODE **inbuf, size_t inleft, \
|
||||
unsigned char **outbuf, size_t outleft, int flags)
|
||||
#define ENCODER_RESET(encoding) \
|
||||
static int encoding##_encode_reset( \
|
||||
MultibyteCodec_State *state, \
|
||||
unsigned char **outbuf, size_t outleft)
|
||||
|
||||
#define DECODER_INIT(encoding) \
|
||||
static int encoding##_decode_init( \
|
||||
MultibyteCodec_State *state)
|
||||
#define DECODER(encoding) \
|
||||
static int encoding##_decode( \
|
||||
MultibyteCodec_State *state, \
|
||||
const unsigned char **inbuf, size_t inleft, \
|
||||
Py_UNICODE **outbuf, size_t outleft)
|
||||
#define DECODER_RESET(encoding) \
|
||||
static int encoding##_decode_reset( \
|
||||
MultibyteCodec_State *state)
|
||||
|
||||
#if Py_UNICODE_SIZE == 4
|
||||
#define UCS4INVALID(code) \
|
||||
if ((code) > 0xFFFF) \
|
||||
return 1;
|
||||
#else
|
||||
#define UCS4INVALID(code) \
|
||||
if (0) ;
|
||||
#endif
|
||||
|
||||
#define NEXT_IN(i) \
|
||||
(*inbuf) += (i); \
|
||||
(inleft) -= (i);
|
||||
#define NEXT_OUT(o) \
|
||||
(*outbuf) += (o); \
|
||||
(outleft) -= (o);
|
||||
#define NEXT(i, o) NEXT_IN(i) NEXT_OUT(o)
|
||||
|
||||
#define RESERVE_INBUF(n) \
|
||||
if (inleft < (n)) \
|
||||
return MBERR_TOOFEW;
|
||||
#define RESERVE_OUTBUF(n) \
|
||||
if (outleft < (n)) \
|
||||
return MBERR_TOOSMALL;
|
||||
|
||||
#define IN1 ((*inbuf)[0])
|
||||
#define IN2 ((*inbuf)[1])
|
||||
#define IN3 ((*inbuf)[2])
|
||||
#define IN4 ((*inbuf)[3])
|
||||
|
||||
#define OUT1(c) ((*outbuf)[0]) = (c);
|
||||
#define OUT2(c) ((*outbuf)[1]) = (c);
|
||||
#define OUT3(c) ((*outbuf)[2]) = (c);
|
||||
#define OUT4(c) ((*outbuf)[3]) = (c);
|
||||
|
||||
#define WRITE1(c1) \
|
||||
RESERVE_OUTBUF(1) \
|
||||
(*outbuf)[0] = (c1);
|
||||
#define WRITE2(c1, c2) \
|
||||
RESERVE_OUTBUF(2) \
|
||||
(*outbuf)[0] = (c1); \
|
||||
(*outbuf)[1] = (c2);
|
||||
#define WRITE3(c1, c2, c3) \
|
||||
RESERVE_OUTBUF(3) \
|
||||
(*outbuf)[0] = (c1); \
|
||||
(*outbuf)[1] = (c2); \
|
||||
(*outbuf)[2] = (c3);
|
||||
#define WRITE4(c1, c2, c3, c4) \
|
||||
RESERVE_OUTBUF(4) \
|
||||
(*outbuf)[0] = (c1); \
|
||||
(*outbuf)[1] = (c2); \
|
||||
(*outbuf)[2] = (c3); \
|
||||
(*outbuf)[3] = (c4);
|
||||
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
# define PUTUCS4(c) \
|
||||
RESERVE_OUTBUF(2) \
|
||||
(*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \
|
||||
(*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \
|
||||
NEXT_OUT(2)
|
||||
#else
|
||||
# define PUTUCS4(c) \
|
||||
RESERVE_OUTBUF(1) \
|
||||
**outbuf = (Py_UNICODE)(c); \
|
||||
NEXT_OUT(1)
|
||||
#endif
|
||||
|
||||
#define _TRYMAP_ENC(m, assi, val) \
|
||||
if ((m)->map != NULL && (val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
|
||||
(m)->bottom]) != NOCHAR)
|
||||
#define TRYMAP_ENC(charset, assi, uni) \
|
||||
_TRYMAP_ENC(&charset##encmap[(uni) >> 8], assi, (uni) & 0xff)
|
||||
#define _TRYMAP_DEC(m, assi, val) \
|
||||
if ((m)->map != NULL && (val) >= (m)->bottom && \
|
||||
(val)<= (m)->top && ((assi) = (m)->map[(val) - \
|
||||
(m)->bottom]) != UNIINV)
|
||||
#define TRYMAP_DEC(charset, assi, c1, c2) \
|
||||
_TRYMAP_DEC(&charset##decmap[c1], assi, c2)
|
||||
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
#define DECODE_SURROGATE(c) \
|
||||
if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \
|
||||
RESERVE_INBUF(2) \
|
||||
if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
|
||||
c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
|
||||
((ucs4_t)(IN2) - 0xdc00); \
|
||||
} \
|
||||
}
|
||||
#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
|
||||
#else
|
||||
#define DECODE_SURROGATE(c) {;}
|
||||
#define GET_INSIZE(c) 1
|
||||
#endif
|
||||
|
||||
#ifdef USING_BINARY_PAIR_SEARCH
|
||||
static DBCHAR find_pairencmap(ucs2_t , ucs2_t, struct pair_encodemap *, int);
|
||||
#endif
|
177
Modules/cjkcodecs/codecentry.h
Normal file
177
Modules/cjkcodecs/codecentry.h
Normal file
|
@ -0,0 +1,177 @@
|
|||
/*
|
||||
* codecentry.h: Common Codec Entry Routines
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: codecentry.h,v 1.5 2004/01/17 11:26:10 perky Exp $
|
||||
*/
|
||||
|
||||
#ifdef HAVE_ENCODER_INIT
|
||||
#define ENCODER_INIT_FUNC(encoding) encoding##_encode_init
|
||||
#else
|
||||
#define ENCODER_INIT_FUNC(encoding) NULL
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_ENCODER_RESET
|
||||
#define ENCODER_RESET_FUNC(encoding) encoding##_encode_reset
|
||||
#else
|
||||
#define ENCODER_RESET_FUNC(encoding) NULL
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_DECODER_INIT
|
||||
#define DECODER_INIT_FUNC(encoding) encoding##_decode_init
|
||||
#else
|
||||
#define DECODER_INIT_FUNC(encoding) NULL
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_DECODER_RESET
|
||||
#define DECODER_RESET_FUNC(encoding) encoding##_decode_reset
|
||||
#else
|
||||
#define DECODER_RESET_FUNC(encoding) NULL
|
||||
#endif
|
||||
|
||||
#ifdef STRICT_BUILD
|
||||
#define BEGIN_CODEC_REGISTRY(encoding) \
|
||||
__BEGIN_CODEC_REGISTRY(encoding, init_codecs_##encoding##_strict)
|
||||
#else
|
||||
#define BEGIN_CODEC_REGISTRY(encoding) \
|
||||
__BEGIN_CODEC_REGISTRY(encoding, init_codecs_##encoding)
|
||||
#endif
|
||||
|
||||
#define __BEGIN_CODEC_REGISTRY(encoding, initname) \
|
||||
static MultibyteCodec __codec = { \
|
||||
#encoding STRICT_SUFX, \
|
||||
encoding##_encode, \
|
||||
ENCODER_INIT_FUNC(encoding), \
|
||||
ENCODER_RESET_FUNC(encoding), \
|
||||
encoding##_decode, \
|
||||
DECODER_INIT_FUNC(encoding), \
|
||||
DECODER_RESET_FUNC(encoding), \
|
||||
}; \
|
||||
\
|
||||
static struct PyMethodDef __methods[] = { \
|
||||
{NULL, NULL}, \
|
||||
}; \
|
||||
\
|
||||
void \
|
||||
initname(void) \
|
||||
{ \
|
||||
PyObject *codec; \
|
||||
PyObject *m = NULL, *mod = NULL, *o = NULL; \
|
||||
\
|
||||
m = Py_InitModule("_codecs_" #encoding STRICT_SUFX, __methods);
|
||||
|
||||
#define MAPOPEN(locale) \
|
||||
mod = PyImport_ImportModule("_codecs_mapdata_" #locale);\
|
||||
if (mod == NULL) goto errorexit; \
|
||||
if (
|
||||
#define IMPORTMAP_ENCDEC(charset) \
|
||||
importmap(mod, "__map_" #charset, &charset##encmap, \
|
||||
&charset##decmap) ||
|
||||
#define IMPORTMAP_ENC(charset) \
|
||||
importmap(mod, "__map_" #charset, &charset##encmap, \
|
||||
NULL) ||
|
||||
#define IMPORTMAP_DEC(charset) \
|
||||
importmap(mod, "__map_" #charset, NULL, \
|
||||
&charset##decmap) ||
|
||||
#define MAPCLOSE() \
|
||||
0) goto errorexit; \
|
||||
Py_DECREF(mod);
|
||||
|
||||
#define END_CODEC_REGISTRY(encoding) \
|
||||
mod = PyImport_ImportModule("_multibytecodec"); \
|
||||
if (mod == NULL) goto errorexit; \
|
||||
o = PyObject_GetAttrString(mod, "__create_codec"); \
|
||||
if (o == NULL || !PyCallable_Check(o)) \
|
||||
goto errorexit; \
|
||||
\
|
||||
codec = createcodec(o, &__codec); \
|
||||
if (codec == NULL) \
|
||||
goto errorexit; \
|
||||
PyModule_AddObject(m, "codec", codec); \
|
||||
Py_DECREF(o); Py_DECREF(mod); \
|
||||
\
|
||||
if (PyErr_Occurred()) \
|
||||
Py_FatalError("can't initialize the _" #encoding \
|
||||
STRICT_SUFX " module"); \
|
||||
\
|
||||
return; \
|
||||
\
|
||||
errorexit: \
|
||||
Py_XDECREF(m); \
|
||||
Py_XDECREF(mod); \
|
||||
Py_XDECREF(o); \
|
||||
}
|
||||
|
||||
#define CODEC_REGISTRY(encoding) \
|
||||
BEGIN_CODEC_REGISTRY(encoding) \
|
||||
END_CODEC_REGISTRY(encoding)
|
||||
|
||||
#ifdef USING_BINARY_PAIR_SEARCH
|
||||
static DBCHAR
|
||||
find_pairencmap(ucs2_t body, ucs2_t modifier,
|
||||
struct pair_encodemap *haystack, int haystacksize)
|
||||
{
|
||||
int pos, min, max;
|
||||
ucs4_t value = body << 16 | modifier;
|
||||
|
||||
min = 0;
|
||||
max = haystacksize;
|
||||
|
||||
for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1)
|
||||
if (value < haystack[pos].uniseq) {
|
||||
if (max == pos) break;
|
||||
else max = pos;
|
||||
} else if (value > haystack[pos].uniseq) {
|
||||
if (min == pos) break;
|
||||
else min = pos;
|
||||
} else
|
||||
break;
|
||||
|
||||
if (value == haystack[pos].uniseq)
|
||||
return haystack[pos].code;
|
||||
else
|
||||
return DBCINV;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef CODEC_WITHOUT_MAPS
|
||||
static int
|
||||
importmap(PyObject *mod, const char *symbol,
|
||||
const struct unim_index **encmap, const struct dbcs_index **decmap)
|
||||
{
|
||||
PyObject *o;
|
||||
|
||||
o = PyObject_GetAttrString(mod, (char*)symbol);
|
||||
if (o == NULL)
|
||||
return -1;
|
||||
else if (!PyCObject_Check(o)) {
|
||||
PyErr_SetString(PyExc_ValueError, "map data must be a CObject.");
|
||||
return -1;
|
||||
} else {
|
||||
struct dbcs_map *map;
|
||||
map = PyCObject_AsVoidPtr(o);
|
||||
if (encmap != NULL)
|
||||
*encmap = map->encmap;
|
||||
if (decmap != NULL)
|
||||
*decmap = map->decmap;
|
||||
Py_DECREF(o);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static PyObject *
|
||||
createcodec(PyObject *cofunc, MultibyteCodec *codec)
|
||||
{
|
||||
PyObject *args, *r;
|
||||
|
||||
args = PyTuple_New(1);
|
||||
if (args == NULL) return NULL;
|
||||
PyTuple_SET_ITEM(args, 0, PyCObject_FromVoidPtr(codec, NULL));
|
||||
|
||||
r = PyObject_CallObject(cofunc, args);
|
||||
Py_DECREF(args);
|
||||
|
||||
return r;
|
||||
}
|
256
Modules/cjkcodecs/iso2022common.h
Normal file
256
Modules/cjkcodecs/iso2022common.h
Normal file
|
@ -0,0 +1,256 @@
|
|||
/*
|
||||
* iso2022common.h: Common Codec Routines for ISO-2022 codecs.
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: iso2022common.h,v 1.8 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
/* This ISO-2022 implementation is intended to comply ECMA-43 Level 1
|
||||
* rather than RFCs itself */
|
||||
|
||||
#define ESC 0x1b
|
||||
#define SO 0x0e
|
||||
#define SI 0x0f
|
||||
|
||||
#define MAX_ESCSEQLEN 16
|
||||
|
||||
#define IS_ESCEND(c) (((c) >= 'A' && (c) <= 'Z') || (c) == '@')
|
||||
#define IS_ISO2022ESC(c2) ((c2) == '(' || (c2) == ')' || (c2) == '$' || \
|
||||
(c2) == '.' || (c2) == '&')
|
||||
/* this is not a full list of ISO-2022 escape sequence headers.
|
||||
* but, it's enough to implement CJK instances of iso-2022. */
|
||||
|
||||
/* STATE
|
||||
|
||||
state->c[0-3]
|
||||
|
||||
00000000
|
||||
||^^^^^|
|
||||
|+-----+---- G0-3 Character Set
|
||||
+----------- Is G0-3 double byte?
|
||||
|
||||
state->c[4]
|
||||
|
||||
00000000
|
||||
||
|
||||
|+---- Locked-Shift?
|
||||
+----- ESC Throughout
|
||||
*/
|
||||
|
||||
#define CHARSET_DOUBLEBYTE 0x80
|
||||
|
||||
#define CHARSET_ASCII 'B'
|
||||
|
||||
#define CHARSET_ISO8859_1 'A'
|
||||
#define CHARSET_ISO8859_7 'F'
|
||||
|
||||
#define CHARSET_KSX1001 ('C'|CHARSET_DOUBLEBYTE)
|
||||
|
||||
#define CHARSET_JISX0201_R 'J'
|
||||
#define CHARSET_JISX0201_K 'I'
|
||||
#define CHARSET_JISX0208 ('B'|CHARSET_DOUBLEBYTE)
|
||||
#define CHARSET_JISX0208_O ('@'|CHARSET_DOUBLEBYTE)
|
||||
#define CHARSET_JISX0212 ('D'|CHARSET_DOUBLEBYTE)
|
||||
#define CHARSET_JISX0213_1 ('O'|CHARSET_DOUBLEBYTE)
|
||||
#define CHARSET_JISX0213_2 ('P'|CHARSET_DOUBLEBYTE)
|
||||
|
||||
#define CHARSET_GB2312 ('A'|CHARSET_DOUBLEBYTE)
|
||||
#define CHARSET_GB2312_8565 ('E'|CHARSET_DOUBLEBYTE)
|
||||
|
||||
#define CHARSET_DESIGN(c) ((c) & 0x7f)
|
||||
#define CHARSET_ISDBCS(c) ((c) & 0x80)
|
||||
|
||||
#define F_SHIFTED 0x01
|
||||
#define F_ESCTHROUGHOUT 0x02
|
||||
|
||||
#define STATE_SETG(dn, s, v) ((s)->c[dn]) = (v);
|
||||
#define STATE_GETG(dn, s) ((s)->c[dn])
|
||||
|
||||
#define STATE_SETG0(s, v) STATE_SETG(0, s, v)
|
||||
#define STATE_GETG0(s) STATE_GETG(0, s)
|
||||
#define STATE_SETG1(s, v) STATE_SETG(1, s, v)
|
||||
#define STATE_GETG1(s) STATE_GETG(1, s)
|
||||
#define STATE_SETG2(s, v) STATE_SETG(2, s, v)
|
||||
#define STATE_GETG2(s) STATE_GETG(2, s)
|
||||
#define STATE_SETG3(s, v) STATE_SETG(3, s, v)
|
||||
#define STATE_GETG3(s) STATE_GETG(3, s)
|
||||
|
||||
#define STATE_SETFLAG(s, f) ((s)->c[4]) |= (f);
|
||||
#define STATE_GETFLAG(s, f) ((s)->c[4] & (f))
|
||||
#define STATE_CLEARFLAG(s, f) ((s)->c[4]) &= ~(f);
|
||||
#define STATE_CLEARFLAGS(s) ((s)->c[4]) = 0;
|
||||
|
||||
#define ISO2022_GETCHARSET(charset, c1) \
|
||||
if ((c) >= 0x80) \
|
||||
return 1; \
|
||||
if (STATE_GETFLAG(state, F_SHIFTED)) /* G1 */ \
|
||||
(charset) = STATE_GETG1(state); \
|
||||
else /* G1 */ \
|
||||
(charset) = STATE_GETG0(state); \
|
||||
|
||||
#ifdef ISO2022_USE_G2_DESIGNATION
|
||||
/* hardcoded for iso-2022-jp-2 for now. we'll need to generalize it
|
||||
when we have more G2 designating encodings */
|
||||
#define SS2_ROUTINE \
|
||||
if (IN2 == 'N') { /* SS2 */ \
|
||||
RESERVE_INBUF(3) \
|
||||
if (STATE_GETG2(state) == CHARSET_ISO8859_1) { \
|
||||
ISO8859_1_DECODE(IN3 ^ 0x80, **outbuf) \
|
||||
else return 3; \
|
||||
} else if (STATE_GETG2(state) == CHARSET_ISO8859_7) { \
|
||||
ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf) \
|
||||
else return 3; \
|
||||
} else if (STATE_GETG2(state) == CHARSET_ASCII) { \
|
||||
if (IN3 & 0x80) return 3; \
|
||||
else **outbuf = IN3; \
|
||||
} else \
|
||||
return MBERR_INTERNAL; \
|
||||
NEXT(3, 1) \
|
||||
} else
|
||||
#else
|
||||
#define SS2_ROUTINE
|
||||
#endif
|
||||
|
||||
#ifndef ISO2022_NO_SHIFT
|
||||
#define SHIFT_CASES \
|
||||
case SI: \
|
||||
STATE_CLEARFLAG(state, F_SHIFTED) \
|
||||
NEXT_IN(1) \
|
||||
break; \
|
||||
case SO: \
|
||||
STATE_SETFLAG(state, F_SHIFTED) \
|
||||
NEXT_IN(1) \
|
||||
break;
|
||||
#else
|
||||
/* for compatibility with JapaneseCodecs */
|
||||
#define SHIFT_CASES
|
||||
#endif
|
||||
|
||||
#define ISO2022_BASECASES(c1) \
|
||||
case ESC: \
|
||||
RESERVE_INBUF(2) \
|
||||
if (IS_ISO2022ESC(IN2)) { \
|
||||
int err; \
|
||||
err = iso2022processesc(state, inbuf, &inleft); \
|
||||
if (err != 0) \
|
||||
return err; \
|
||||
} else SS2_ROUTINE { \
|
||||
STATE_SETFLAG(state, F_ESCTHROUGHOUT) \
|
||||
OUT1(ESC) \
|
||||
NEXT(1, 1) \
|
||||
} \
|
||||
break; \
|
||||
SHIFT_CASES \
|
||||
case '\n': \
|
||||
STATE_CLEARFLAG(state, F_SHIFTED) \
|
||||
WRITE1('\n') \
|
||||
NEXT(1, 1) \
|
||||
break;
|
||||
|
||||
#define ISO2022_ESCTHROUGHOUT(c) \
|
||||
if (STATE_GETFLAG(state, F_ESCTHROUGHOUT)) { \
|
||||
/* ESC throughout mode: for non-iso2022 escape sequences */ \
|
||||
RESERVE_OUTBUF(1) \
|
||||
OUT1(c) /* assume as ISO-8859-1 */ \
|
||||
NEXT(1, 1) \
|
||||
if (IS_ESCEND(c)) { \
|
||||
STATE_CLEARFLAG(state, F_ESCTHROUGHOUT) \
|
||||
} \
|
||||
continue; \
|
||||
}
|
||||
|
||||
#define ISO2022_LOOP_BEGIN \
|
||||
while (inleft > 0) { \
|
||||
unsigned char c = IN1; \
|
||||
ISO2022_ESCTHROUGHOUT(c) \
|
||||
switch(c) { \
|
||||
ISO2022_BASECASES(c) \
|
||||
default: \
|
||||
if (c < 0x20) { /* C0 */ \
|
||||
RESERVE_OUTBUF(1) \
|
||||
OUT1(c) \
|
||||
NEXT(1, 1) \
|
||||
} else if (c >= 0x80) \
|
||||
return 1; \
|
||||
else {
|
||||
#define ISO2022_LOOP_END \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
static int
|
||||
iso2022processesc(MultibyteCodec_State *state,
|
||||
const unsigned char **inbuf, size_t *inleft)
|
||||
{
|
||||
unsigned char charset, designation;
|
||||
int i, esclen;
|
||||
|
||||
for (i = 1;i < MAX_ESCSEQLEN;i++) {
|
||||
if (i >= *inleft)
|
||||
return MBERR_TOOFEW;
|
||||
if (IS_ESCEND((*inbuf)[i])) {
|
||||
esclen = i + 1;
|
||||
break;
|
||||
}
|
||||
#ifdef ISO2022_USE_JISX0208EXT
|
||||
else if (i+1 < *inleft && (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@')
|
||||
i += 2;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (i >= MAX_ESCSEQLEN)
|
||||
return 1; /* unterminated escape sequence */
|
||||
|
||||
switch (esclen) {
|
||||
case 3:
|
||||
if (IN2 == '$') {
|
||||
charset = IN3 | CHARSET_DOUBLEBYTE;
|
||||
designation = 0;
|
||||
} else {
|
||||
charset = IN3;
|
||||
if (IN2 == '(') designation = 0;
|
||||
else if (IN2 == ')') designation = 1;
|
||||
#ifdef ISO2022_USE_G2_DESIGNATION
|
||||
else if (IN2 == '.') designation = 2;
|
||||
#endif
|
||||
else return 3;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
if (IN2 != '$')
|
||||
return 4;
|
||||
|
||||
charset = IN4 | CHARSET_DOUBLEBYTE;
|
||||
if (IN3 == '(') designation = 0;
|
||||
else if (IN3 == ')') designation = 1;
|
||||
else return 4;
|
||||
break;
|
||||
#ifdef ISO2022_USE_JISX0208EXT
|
||||
case 6: /* designation with prefix */
|
||||
if ((*inbuf)[3] == ESC && (*inbuf)[4] == '$' && (*inbuf)[5] == 'B') {
|
||||
charset = 'B' | CHARSET_DOUBLEBYTE;
|
||||
designation = 0;
|
||||
} else
|
||||
return 6;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
return esclen;
|
||||
}
|
||||
|
||||
{ /* raise error when the charset is not designated for this encoding */
|
||||
const unsigned char dsgs[] = {ISO2022_DESIGNATIONS, '\x00'};
|
||||
|
||||
for (i = 0; dsgs[i] != '\x00'; i++)
|
||||
if (dsgs[i] == charset)
|
||||
break;
|
||||
|
||||
if (dsgs[i] == '\x00')
|
||||
return esclen;
|
||||
}
|
||||
|
||||
STATE_SETG(designation, state, charset)
|
||||
*inleft -= esclen;
|
||||
(*inbuf) += esclen;
|
||||
return 0;
|
||||
}
|
5338
Modules/cjkcodecs/map_big5.h
Normal file
5338
Modules/cjkcodecs/map_big5.h
Normal file
File diff suppressed because it is too large
Load diff
1862
Modules/cjkcodecs/map_cp932ext.h
Normal file
1862
Modules/cjkcodecs/map_cp932ext.h
Normal file
File diff suppressed because it is too large
Load diff
4409
Modules/cjkcodecs/map_cp949.h
Normal file
4409
Modules/cjkcodecs/map_cp949.h
Normal file
File diff suppressed because it is too large
Load diff
1474
Modules/cjkcodecs/map_cp949ext.h
Normal file
1474
Modules/cjkcodecs/map_cp949ext.h
Normal file
File diff suppressed because it is too large
Load diff
630
Modules/cjkcodecs/map_cp950ext.h
Normal file
630
Modules/cjkcodecs/map_cp950ext.h
Normal file
|
@ -0,0 +1,630 @@
|
|||
/*
|
||||
* $CJKCodecs: map_cp950ext.h,v 1.1.1.1 2003/09/24 17:45:47 perky Exp $
|
||||
*/
|
||||
|
||||
static const ucs2_t __cp950ext_decmap[224] = {
|
||||
0x2027, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, 0xfe51, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0x00af, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0xff5e, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV, UNIINV, 0x2295, 0x2299, 0x2215,
|
||||
0xfe68, UNIINV, 0xffe5, UNIINV, 0xffe0, 0xffe1, 0x20ac, 0x7881,
|
||||
0x92b9, 0x88cf, 0x58bb, 0x6052, 0x7ca7, 0x5afa, 0x2554, 0x2566,
|
||||
0x2557, 0x2560, 0x256c, 0x2563, 0x255a, 0x2569, 0x255d, 0x2552,
|
||||
0x2564, 0x2555, 0x255e, 0x256a, 0x2561, 0x2558, 0x2567, 0x255b,
|
||||
0x2553, 0x2565, 0x2556, 0x255f, 0x256b, 0x2562, 0x2559, 0x2568,
|
||||
0x255c, 0x2551, 0x2550, 0x256d, 0x256e, 0x2570, 0x256f, 0x2593,
|
||||
};
|
||||
|
||||
static const struct dbcs_index cp950ext_decmap[256] = {
|
||||
/* 0x00 */ {0, 0, 0},
|
||||
/* 0x01 */ {0, 0, 0},
|
||||
/* 0x02 */ {0, 0, 0},
|
||||
/* 0x03 */ {0, 0, 0},
|
||||
/* 0x04 */ {0, 0, 0},
|
||||
/* 0x05 */ {0, 0, 0},
|
||||
/* 0x06 */ {0, 0, 0},
|
||||
/* 0x07 */ {0, 0, 0},
|
||||
/* 0x08 */ {0, 0, 0},
|
||||
/* 0x09 */ {0, 0, 0},
|
||||
/* 0x0A */ {0, 0, 0},
|
||||
/* 0x0B */ {0, 0, 0},
|
||||
/* 0x0C */ {0, 0, 0},
|
||||
/* 0x0D */ {0, 0, 0},
|
||||
/* 0x0E */ {0, 0, 0},
|
||||
/* 0x0F */ {0, 0, 0},
|
||||
/* 0x10 */ {0, 0, 0},
|
||||
/* 0x11 */ {0, 0, 0},
|
||||
/* 0x12 */ {0, 0, 0},
|
||||
/* 0x13 */ {0, 0, 0},
|
||||
/* 0x14 */ {0, 0, 0},
|
||||
/* 0x15 */ {0, 0, 0},
|
||||
/* 0x16 */ {0, 0, 0},
|
||||
/* 0x17 */ {0, 0, 0},
|
||||
/* 0x18 */ {0, 0, 0},
|
||||
/* 0x19 */ {0, 0, 0},
|
||||
/* 0x1A */ {0, 0, 0},
|
||||
/* 0x1B */ {0, 0, 0},
|
||||
/* 0x1C */ {0, 0, 0},
|
||||
/* 0x1D */ {0, 0, 0},
|
||||
/* 0x1E */ {0, 0, 0},
|
||||
/* 0x1F */ {0, 0, 0},
|
||||
/* 0x20 */ {0, 0, 0},
|
||||
/* 0x21 */ {0, 0, 0},
|
||||
/* 0x22 */ {0, 0, 0},
|
||||
/* 0x23 */ {0, 0, 0},
|
||||
/* 0x24 */ {0, 0, 0},
|
||||
/* 0x25 */ {0, 0, 0},
|
||||
/* 0x26 */ {0, 0, 0},
|
||||
/* 0x27 */ {0, 0, 0},
|
||||
/* 0x28 */ {0, 0, 0},
|
||||
/* 0x29 */ {0, 0, 0},
|
||||
/* 0x2A */ {0, 0, 0},
|
||||
/* 0x2B */ {0, 0, 0},
|
||||
/* 0x2C */ {0, 0, 0},
|
||||
/* 0x2D */ {0, 0, 0},
|
||||
/* 0x2E */ {0, 0, 0},
|
||||
/* 0x2F */ {0, 0, 0},
|
||||
/* 0x30 */ {0, 0, 0},
|
||||
/* 0x31 */ {0, 0, 0},
|
||||
/* 0x32 */ {0, 0, 0},
|
||||
/* 0x33 */ {0, 0, 0},
|
||||
/* 0x34 */ {0, 0, 0},
|
||||
/* 0x35 */ {0, 0, 0},
|
||||
/* 0x36 */ {0, 0, 0},
|
||||
/* 0x37 */ {0, 0, 0},
|
||||
/* 0x38 */ {0, 0, 0},
|
||||
/* 0x39 */ {0, 0, 0},
|
||||
/* 0x3A */ {0, 0, 0},
|
||||
/* 0x3B */ {0, 0, 0},
|
||||
/* 0x3C */ {0, 0, 0},
|
||||
/* 0x3D */ {0, 0, 0},
|
||||
/* 0x3E */ {0, 0, 0},
|
||||
/* 0x3F */ {0, 0, 0},
|
||||
/* 0x40 */ {0, 0, 0},
|
||||
/* 0x41 */ {0, 0, 0},
|
||||
/* 0x42 */ {0, 0, 0},
|
||||
/* 0x43 */ {0, 0, 0},
|
||||
/* 0x44 */ {0, 0, 0},
|
||||
/* 0x45 */ {0, 0, 0},
|
||||
/* 0x46 */ {0, 0, 0},
|
||||
/* 0x47 */ {0, 0, 0},
|
||||
/* 0x48 */ {0, 0, 0},
|
||||
/* 0x49 */ {0, 0, 0},
|
||||
/* 0x4A */ {0, 0, 0},
|
||||
/* 0x4B */ {0, 0, 0},
|
||||
/* 0x4C */ {0, 0, 0},
|
||||
/* 0x4D */ {0, 0, 0},
|
||||
/* 0x4E */ {0, 0, 0},
|
||||
/* 0x4F */ {0, 0, 0},
|
||||
/* 0x50 */ {0, 0, 0},
|
||||
/* 0x51 */ {0, 0, 0},
|
||||
/* 0x52 */ {0, 0, 0},
|
||||
/* 0x53 */ {0, 0, 0},
|
||||
/* 0x54 */ {0, 0, 0},
|
||||
/* 0x55 */ {0, 0, 0},
|
||||
/* 0x56 */ {0, 0, 0},
|
||||
/* 0x57 */ {0, 0, 0},
|
||||
/* 0x58 */ {0, 0, 0},
|
||||
/* 0x59 */ {0, 0, 0},
|
||||
/* 0x5A */ {0, 0, 0},
|
||||
/* 0x5B */ {0, 0, 0},
|
||||
/* 0x5C */ {0, 0, 0},
|
||||
/* 0x5D */ {0, 0, 0},
|
||||
/* 0x5E */ {0, 0, 0},
|
||||
/* 0x5F */ {0, 0, 0},
|
||||
/* 0x60 */ {0, 0, 0},
|
||||
/* 0x61 */ {0, 0, 0},
|
||||
/* 0x62 */ {0, 0, 0},
|
||||
/* 0x63 */ {0, 0, 0},
|
||||
/* 0x64 */ {0, 0, 0},
|
||||
/* 0x65 */ {0, 0, 0},
|
||||
/* 0x66 */ {0, 0, 0},
|
||||
/* 0x67 */ {0, 0, 0},
|
||||
/* 0x68 */ {0, 0, 0},
|
||||
/* 0x69 */ {0, 0, 0},
|
||||
/* 0x6A */ {0, 0, 0},
|
||||
/* 0x6B */ {0, 0, 0},
|
||||
/* 0x6C */ {0, 0, 0},
|
||||
/* 0x6D */ {0, 0, 0},
|
||||
/* 0x6E */ {0, 0, 0},
|
||||
/* 0x6F */ {0, 0, 0},
|
||||
/* 0x70 */ {0, 0, 0},
|
||||
/* 0x71 */ {0, 0, 0},
|
||||
/* 0x72 */ {0, 0, 0},
|
||||
/* 0x73 */ {0, 0, 0},
|
||||
/* 0x74 */ {0, 0, 0},
|
||||
/* 0x75 */ {0, 0, 0},
|
||||
/* 0x76 */ {0, 0, 0},
|
||||
/* 0x77 */ {0, 0, 0},
|
||||
/* 0x78 */ {0, 0, 0},
|
||||
/* 0x79 */ {0, 0, 0},
|
||||
/* 0x7A */ {0, 0, 0},
|
||||
/* 0x7B */ {0, 0, 0},
|
||||
/* 0x7C */ {0, 0, 0},
|
||||
/* 0x7D */ {0, 0, 0},
|
||||
/* 0x7E */ {0, 0, 0},
|
||||
/* 0x7F */ {0, 0, 0},
|
||||
/* 0x80 */ {0, 0, 0},
|
||||
/* 0x81 */ {0, 0, 0},
|
||||
/* 0x82 */ {0, 0, 0},
|
||||
/* 0x83 */ {0, 0, 0},
|
||||
/* 0x84 */ {0, 0, 0},
|
||||
/* 0x85 */ {0, 0, 0},
|
||||
/* 0x86 */ {0, 0, 0},
|
||||
/* 0x87 */ {0, 0, 0},
|
||||
/* 0x88 */ {0, 0, 0},
|
||||
/* 0x89 */ {0, 0, 0},
|
||||
/* 0x8A */ {0, 0, 0},
|
||||
/* 0x8B */ {0, 0, 0},
|
||||
/* 0x8C */ {0, 0, 0},
|
||||
/* 0x8D */ {0, 0, 0},
|
||||
/* 0x8E */ {0, 0, 0},
|
||||
/* 0x8F */ {0, 0, 0},
|
||||
/* 0x90 */ {0, 0, 0},
|
||||
/* 0x91 */ {0, 0, 0},
|
||||
/* 0x92 */ {0, 0, 0},
|
||||
/* 0x93 */ {0, 0, 0},
|
||||
/* 0x94 */ {0, 0, 0},
|
||||
/* 0x95 */ {0, 0, 0},
|
||||
/* 0x96 */ {0, 0, 0},
|
||||
/* 0x97 */ {0, 0, 0},
|
||||
/* 0x98 */ {0, 0, 0},
|
||||
/* 0x99 */ {0, 0, 0},
|
||||
/* 0x9A */ {0, 0, 0},
|
||||
/* 0x9B */ {0, 0, 0},
|
||||
/* 0x9C */ {0, 0, 0},
|
||||
/* 0x9D */ {0, 0, 0},
|
||||
/* 0x9E */ {0, 0, 0},
|
||||
/* 0x9F */ {0, 0, 0},
|
||||
/* 0xA0 */ {0, 0, 0},
|
||||
/* 0xA1 */ {__cp950ext_decmap+0, 0x45, 0xf3},
|
||||
/* 0xA2 */ {__cp950ext_decmap+175, 0x41, 0x47},
|
||||
/* 0xA3 */ {__cp950ext_decmap+182, 0xe1, 0xe1},
|
||||
/* 0xA4 */ {0, 0, 0},
|
||||
/* 0xA5 */ {0, 0, 0},
|
||||
/* 0xA6 */ {0, 0, 0},
|
||||
/* 0xA7 */ {0, 0, 0},
|
||||
/* 0xA8 */ {0, 0, 0},
|
||||
/* 0xA9 */ {0, 0, 0},
|
||||
/* 0xAA */ {0, 0, 0},
|
||||
/* 0xAB */ {0, 0, 0},
|
||||
/* 0xAC */ {0, 0, 0},
|
||||
/* 0xAD */ {0, 0, 0},
|
||||
/* 0xAE */ {0, 0, 0},
|
||||
/* 0xAF */ {0, 0, 0},
|
||||
/* 0xB0 */ {0, 0, 0},
|
||||
/* 0xB1 */ {0, 0, 0},
|
||||
/* 0xB2 */ {0, 0, 0},
|
||||
/* 0xB3 */ {0, 0, 0},
|
||||
/* 0xB4 */ {0, 0, 0},
|
||||
/* 0xB5 */ {0, 0, 0},
|
||||
/* 0xB6 */ {0, 0, 0},
|
||||
/* 0xB7 */ {0, 0, 0},
|
||||
/* 0xB8 */ {0, 0, 0},
|
||||
/* 0xB9 */ {0, 0, 0},
|
||||
/* 0xBA */ {0, 0, 0},
|
||||
/* 0xBB */ {0, 0, 0},
|
||||
/* 0xBC */ {0, 0, 0},
|
||||
/* 0xBD */ {0, 0, 0},
|
||||
/* 0xBE */ {0, 0, 0},
|
||||
/* 0xBF */ {0, 0, 0},
|
||||
/* 0xC0 */ {0, 0, 0},
|
||||
/* 0xC1 */ {0, 0, 0},
|
||||
/* 0xC2 */ {0, 0, 0},
|
||||
/* 0xC3 */ {0, 0, 0},
|
||||
/* 0xC4 */ {0, 0, 0},
|
||||
/* 0xC5 */ {0, 0, 0},
|
||||
/* 0xC6 */ {0, 0, 0},
|
||||
/* 0xC7 */ {0, 0, 0},
|
||||
/* 0xC8 */ {0, 0, 0},
|
||||
/* 0xC9 */ {0, 0, 0},
|
||||
/* 0xCA */ {0, 0, 0},
|
||||
/* 0xCB */ {0, 0, 0},
|
||||
/* 0xCC */ {0, 0, 0},
|
||||
/* 0xCD */ {0, 0, 0},
|
||||
/* 0xCE */ {0, 0, 0},
|
||||
/* 0xCF */ {0, 0, 0},
|
||||
/* 0xD0 */ {0, 0, 0},
|
||||
/* 0xD1 */ {0, 0, 0},
|
||||
/* 0xD2 */ {0, 0, 0},
|
||||
/* 0xD3 */ {0, 0, 0},
|
||||
/* 0xD4 */ {0, 0, 0},
|
||||
/* 0xD5 */ {0, 0, 0},
|
||||
/* 0xD6 */ {0, 0, 0},
|
||||
/* 0xD7 */ {0, 0, 0},
|
||||
/* 0xD8 */ {0, 0, 0},
|
||||
/* 0xD9 */ {0, 0, 0},
|
||||
/* 0xDA */ {0, 0, 0},
|
||||
/* 0xDB */ {0, 0, 0},
|
||||
/* 0xDC */ {0, 0, 0},
|
||||
/* 0xDD */ {0, 0, 0},
|
||||
/* 0xDE */ {0, 0, 0},
|
||||
/* 0xDF */ {0, 0, 0},
|
||||
/* 0xE0 */ {0, 0, 0},
|
||||
/* 0xE1 */ {0, 0, 0},
|
||||
/* 0xE2 */ {0, 0, 0},
|
||||
/* 0xE3 */ {0, 0, 0},
|
||||
/* 0xE4 */ {0, 0, 0},
|
||||
/* 0xE5 */ {0, 0, 0},
|
||||
/* 0xE6 */ {0, 0, 0},
|
||||
/* 0xE7 */ {0, 0, 0},
|
||||
/* 0xE8 */ {0, 0, 0},
|
||||
/* 0xE9 */ {0, 0, 0},
|
||||
/* 0xEA */ {0, 0, 0},
|
||||
/* 0xEB */ {0, 0, 0},
|
||||
/* 0xEC */ {0, 0, 0},
|
||||
/* 0xED */ {0, 0, 0},
|
||||
/* 0xEE */ {0, 0, 0},
|
||||
/* 0xEF */ {0, 0, 0},
|
||||
/* 0xF0 */ {0, 0, 0},
|
||||
/* 0xF1 */ {0, 0, 0},
|
||||
/* 0xF2 */ {0, 0, 0},
|
||||
/* 0xF3 */ {0, 0, 0},
|
||||
/* 0xF4 */ {0, 0, 0},
|
||||
/* 0xF5 */ {0, 0, 0},
|
||||
/* 0xF6 */ {0, 0, 0},
|
||||
/* 0xF7 */ {0, 0, 0},
|
||||
/* 0xF8 */ {0, 0, 0},
|
||||
/* 0xF9 */ {__cp950ext_decmap+183, 0xd6, 0xfe},
|
||||
/* 0xFA */ {0, 0, 0},
|
||||
/* 0xFB */ {0, 0, 0},
|
||||
/* 0xFC */ {0, 0, 0},
|
||||
/* 0xFD */ {0, 0, 0},
|
||||
/* 0xFE */ {0, 0, 0},
|
||||
/* 0xFF */ {0, 0, 0},
|
||||
};
|
||||
|
||||
static const DBCHAR __cp950ext_encmap[581] = {
|
||||
0xa1c2, 0xa145, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa3e1, 0xa241,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa1f2,
|
||||
NOCHAR, NOCHAR, NOCHAR, 0xa1f3, 0xf9f8, 0xf9e6, 0xf9ef, 0xf9dd,
|
||||
0xf9e8, 0xf9f1, 0xf9df, 0xf9ec, 0xf9f5, 0xf9e3, 0xf9ee, 0xf9f7,
|
||||
0xf9e5, NOCHAR, 0xf9f2, 0xf9e0, NOCHAR, 0xf9f4, 0xf9e2, 0xf9e7,
|
||||
0xf9f0, 0xf9de, 0xf9ed, 0xf9f6, 0xf9e4, NOCHAR, 0xf9f3, 0xf9e1,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xf9fe, 0xf9d9,
|
||||
0xf9dc, 0xf9da, 0xf9d6, 0xf9db, 0xf9d8, 0xf9d7, 0xa14e, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa242, 0xa1fe, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, 0xa240, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa1e3, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR,
|
||||
NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, NOCHAR, 0xa246,
|
||||
0xa247, NOCHAR, NOCHAR, NOCHAR, 0xa244,
|
||||
};
|
||||
|
||||
static const struct unim_index cp950ext_encmap[256] = {
|
||||
/* 0x00 */ {__cp950ext_encmap+0, 0xaf, 0xaf},
|
||||
/* 0x01 */ {0, 0, 0},
|
||||
/* 0x02 */ {0, 0, 0},
|
||||
/* 0x03 */ {0, 0, 0},
|
||||
/* 0x04 */ {0, 0, 0},
|
||||
/* 0x05 */ {0, 0, 0},
|
||||
/* 0x06 */ {0, 0, 0},
|
||||
/* 0x07 */ {0, 0, 0},
|
||||
/* 0x08 */ {0, 0, 0},
|
||||
/* 0x09 */ {0, 0, 0},
|
||||
/* 0x0A */ {0, 0, 0},
|
||||
/* 0x0B */ {0, 0, 0},
|
||||
/* 0x0C */ {0, 0, 0},
|
||||
/* 0x0D */ {0, 0, 0},
|
||||
/* 0x0E */ {0, 0, 0},
|
||||
/* 0x0F */ {0, 0, 0},
|
||||
/* 0x10 */ {0, 0, 0},
|
||||
/* 0x11 */ {0, 0, 0},
|
||||
/* 0x12 */ {0, 0, 0},
|
||||
/* 0x13 */ {0, 0, 0},
|
||||
/* 0x14 */ {0, 0, 0},
|
||||
/* 0x15 */ {0, 0, 0},
|
||||
/* 0x16 */ {0, 0, 0},
|
||||
/* 0x17 */ {0, 0, 0},
|
||||
/* 0x18 */ {0, 0, 0},
|
||||
/* 0x19 */ {0, 0, 0},
|
||||
/* 0x1A */ {0, 0, 0},
|
||||
/* 0x1B */ {0, 0, 0},
|
||||
/* 0x1C */ {0, 0, 0},
|
||||
/* 0x1D */ {0, 0, 0},
|
||||
/* 0x1E */ {0, 0, 0},
|
||||
/* 0x1F */ {0, 0, 0},
|
||||
/* 0x20 */ {__cp950ext_encmap+1, 0x27, 0xac},
|
||||
/* 0x21 */ {0, 0, 0},
|
||||
/* 0x22 */ {__cp950ext_encmap+135, 0x15, 0x99},
|
||||
/* 0x23 */ {0, 0, 0},
|
||||
/* 0x24 */ {0, 0, 0},
|
||||
/* 0x25 */ {__cp950ext_encmap+268, 0x51, 0x93},
|
||||
/* 0x26 */ {0, 0, 0},
|
||||
/* 0x27 */ {0, 0, 0},
|
||||
/* 0x28 */ {0, 0, 0},
|
||||
/* 0x29 */ {0, 0, 0},
|
||||
/* 0x2A */ {0, 0, 0},
|
||||
/* 0x2B */ {0, 0, 0},
|
||||
/* 0x2C */ {0, 0, 0},
|
||||
/* 0x2D */ {0, 0, 0},
|
||||
/* 0x2E */ {0, 0, 0},
|
||||
/* 0x2F */ {0, 0, 0},
|
||||
/* 0x30 */ {0, 0, 0},
|
||||
/* 0x31 */ {0, 0, 0},
|
||||
/* 0x32 */ {0, 0, 0},
|
||||
/* 0x33 */ {0, 0, 0},
|
||||
/* 0x34 */ {0, 0, 0},
|
||||
/* 0x35 */ {0, 0, 0},
|
||||
/* 0x36 */ {0, 0, 0},
|
||||
/* 0x37 */ {0, 0, 0},
|
||||
/* 0x38 */ {0, 0, 0},
|
||||
/* 0x39 */ {0, 0, 0},
|
||||
/* 0x3A */ {0, 0, 0},
|
||||
/* 0x3B */ {0, 0, 0},
|
||||
/* 0x3C */ {0, 0, 0},
|
||||
/* 0x3D */ {0, 0, 0},
|
||||
/* 0x3E */ {0, 0, 0},
|
||||
/* 0x3F */ {0, 0, 0},
|
||||
/* 0x40 */ {0, 0, 0},
|
||||
/* 0x41 */ {0, 0, 0},
|
||||
/* 0x42 */ {0, 0, 0},
|
||||
/* 0x43 */ {0, 0, 0},
|
||||
/* 0x44 */ {0, 0, 0},
|
||||
/* 0x45 */ {0, 0, 0},
|
||||
/* 0x46 */ {0, 0, 0},
|
||||
/* 0x47 */ {0, 0, 0},
|
||||
/* 0x48 */ {0, 0, 0},
|
||||
/* 0x49 */ {0, 0, 0},
|
||||
/* 0x4A */ {0, 0, 0},
|
||||
/* 0x4B */ {0, 0, 0},
|
||||
/* 0x4C */ {0, 0, 0},
|
||||
/* 0x4D */ {0, 0, 0},
|
||||
/* 0x4E */ {0, 0, 0},
|
||||
/* 0x4F */ {0, 0, 0},
|
||||
/* 0x50 */ {0, 0, 0},
|
||||
/* 0x51 */ {0, 0, 0},
|
||||
/* 0x52 */ {0, 0, 0},
|
||||
/* 0x53 */ {0, 0, 0},
|
||||
/* 0x54 */ {0, 0, 0},
|
||||
/* 0x55 */ {0, 0, 0},
|
||||
/* 0x56 */ {0, 0, 0},
|
||||
/* 0x57 */ {0, 0, 0},
|
||||
/* 0x58 */ {__cp950ext_encmap+335, 0xbb, 0xbb},
|
||||
/* 0x59 */ {0, 0, 0},
|
||||
/* 0x5A */ {__cp950ext_encmap+336, 0xfa, 0xfa},
|
||||
/* 0x5B */ {0, 0, 0},
|
||||
/* 0x5C */ {0, 0, 0},
|
||||
/* 0x5D */ {0, 0, 0},
|
||||
/* 0x5E */ {0, 0, 0},
|
||||
/* 0x5F */ {0, 0, 0},
|
||||
/* 0x60 */ {__cp950ext_encmap+337, 0x52, 0x52},
|
||||
/* 0x61 */ {0, 0, 0},
|
||||
/* 0x62 */ {0, 0, 0},
|
||||
/* 0x63 */ {0, 0, 0},
|
||||
/* 0x64 */ {0, 0, 0},
|
||||
/* 0x65 */ {0, 0, 0},
|
||||
/* 0x66 */ {0, 0, 0},
|
||||
/* 0x67 */ {0, 0, 0},
|
||||
/* 0x68 */ {0, 0, 0},
|
||||
/* 0x69 */ {0, 0, 0},
|
||||
/* 0x6A */ {0, 0, 0},
|
||||
/* 0x6B */ {0, 0, 0},
|
||||
/* 0x6C */ {0, 0, 0},
|
||||
/* 0x6D */ {0, 0, 0},
|
||||
/* 0x6E */ {0, 0, 0},
|
||||
/* 0x6F */ {0, 0, 0},
|
||||
/* 0x70 */ {0, 0, 0},
|
||||
/* 0x71 */ {0, 0, 0},
|
||||
/* 0x72 */ {0, 0, 0},
|
||||
/* 0x73 */ {0, 0, 0},
|
||||
/* 0x74 */ {0, 0, 0},
|
||||
/* 0x75 */ {0, 0, 0},
|
||||
/* 0x76 */ {0, 0, 0},
|
||||
/* 0x77 */ {0, 0, 0},
|
||||
/* 0x78 */ {__cp950ext_encmap+338, 0x81, 0x81},
|
||||
/* 0x79 */ {0, 0, 0},
|
||||
/* 0x7A */ {0, 0, 0},
|
||||
/* 0x7B */ {0, 0, 0},
|
||||
/* 0x7C */ {__cp950ext_encmap+339, 0xa7, 0xa7},
|
||||
/* 0x7D */ {0, 0, 0},
|
||||
/* 0x7E */ {0, 0, 0},
|
||||
/* 0x7F */ {0, 0, 0},
|
||||
/* 0x80 */ {0, 0, 0},
|
||||
/* 0x81 */ {0, 0, 0},
|
||||
/* 0x82 */ {0, 0, 0},
|
||||
/* 0x83 */ {0, 0, 0},
|
||||
/* 0x84 */ {0, 0, 0},
|
||||
/* 0x85 */ {0, 0, 0},
|
||||
/* 0x86 */ {0, 0, 0},
|
||||
/* 0x87 */ {0, 0, 0},
|
||||
/* 0x88 */ {__cp950ext_encmap+340, 0xcf, 0xcf},
|
||||
/* 0x89 */ {0, 0, 0},
|
||||
/* 0x8A */ {0, 0, 0},
|
||||
/* 0x8B */ {0, 0, 0},
|
||||
/* 0x8C */ {0, 0, 0},
|
||||
/* 0x8D */ {0, 0, 0},
|
||||
/* 0x8E */ {0, 0, 0},
|
||||
/* 0x8F */ {0, 0, 0},
|
||||
/* 0x90 */ {0, 0, 0},
|
||||
/* 0x91 */ {0, 0, 0},
|
||||
/* 0x92 */ {__cp950ext_encmap+341, 0xb9, 0xb9},
|
||||
/* 0x93 */ {0, 0, 0},
|
||||
/* 0x94 */ {0, 0, 0},
|
||||
/* 0x95 */ {0, 0, 0},
|
||||
/* 0x96 */ {0, 0, 0},
|
||||
/* 0x97 */ {0, 0, 0},
|
||||
/* 0x98 */ {0, 0, 0},
|
||||
/* 0x99 */ {0, 0, 0},
|
||||
/* 0x9A */ {0, 0, 0},
|
||||
/* 0x9B */ {0, 0, 0},
|
||||
/* 0x9C */ {0, 0, 0},
|
||||
/* 0x9D */ {0, 0, 0},
|
||||
/* 0x9E */ {0, 0, 0},
|
||||
/* 0x9F */ {0, 0, 0},
|
||||
/* 0xA0 */ {0, 0, 0},
|
||||
/* 0xA1 */ {0, 0, 0},
|
||||
/* 0xA2 */ {0, 0, 0},
|
||||
/* 0xA3 */ {0, 0, 0},
|
||||
/* 0xA4 */ {0, 0, 0},
|
||||
/* 0xA5 */ {0, 0, 0},
|
||||
/* 0xA6 */ {0, 0, 0},
|
||||
/* 0xA7 */ {0, 0, 0},
|
||||
/* 0xA8 */ {0, 0, 0},
|
||||
/* 0xA9 */ {0, 0, 0},
|
||||
/* 0xAA */ {0, 0, 0},
|
||||
/* 0xAB */ {0, 0, 0},
|
||||
/* 0xAC */ {0, 0, 0},
|
||||
/* 0xAD */ {0, 0, 0},
|
||||
/* 0xAE */ {0, 0, 0},
|
||||
/* 0xAF */ {0, 0, 0},
|
||||
/* 0xB0 */ {0, 0, 0},
|
||||
/* 0xB1 */ {0, 0, 0},
|
||||
/* 0xB2 */ {0, 0, 0},
|
||||
/* 0xB3 */ {0, 0, 0},
|
||||
/* 0xB4 */ {0, 0, 0},
|
||||
/* 0xB5 */ {0, 0, 0},
|
||||
/* 0xB6 */ {0, 0, 0},
|
||||
/* 0xB7 */ {0, 0, 0},
|
||||
/* 0xB8 */ {0, 0, 0},
|
||||
/* 0xB9 */ {0, 0, 0},
|
||||
/* 0xBA */ {0, 0, 0},
|
||||
/* 0xBB */ {0, 0, 0},
|
||||
/* 0xBC */ {0, 0, 0},
|
||||
/* 0xBD */ {0, 0, 0},
|
||||
/* 0xBE */ {0, 0, 0},
|
||||
/* 0xBF */ {0, 0, 0},
|
||||
/* 0xC0 */ {0, 0, 0},
|
||||
/* 0xC1 */ {0, 0, 0},
|
||||
/* 0xC2 */ {0, 0, 0},
|
||||
/* 0xC3 */ {0, 0, 0},
|
||||
/* 0xC4 */ {0, 0, 0},
|
||||
/* 0xC5 */ {0, 0, 0},
|
||||
/* 0xC6 */ {0, 0, 0},
|
||||
/* 0xC7 */ {0, 0, 0},
|
||||
/* 0xC8 */ {0, 0, 0},
|
||||
/* 0xC9 */ {0, 0, 0},
|
||||
/* 0xCA */ {0, 0, 0},
|
||||
/* 0xCB */ {0, 0, 0},
|
||||
/* 0xCC */ {0, 0, 0},
|
||||
/* 0xCD */ {0, 0, 0},
|
||||
/* 0xCE */ {0, 0, 0},
|
||||
/* 0xCF */ {0, 0, 0},
|
||||
/* 0xD0 */ {0, 0, 0},
|
||||
/* 0xD1 */ {0, 0, 0},
|
||||
/* 0xD2 */ {0, 0, 0},
|
||||
/* 0xD3 */ {0, 0, 0},
|
||||
/* 0xD4 */ {0, 0, 0},
|
||||
/* 0xD5 */ {0, 0, 0},
|
||||
/* 0xD6 */ {0, 0, 0},
|
||||
/* 0xD7 */ {0, 0, 0},
|
||||
/* 0xD8 */ {0, 0, 0},
|
||||
/* 0xD9 */ {0, 0, 0},
|
||||
/* 0xDA */ {0, 0, 0},
|
||||
/* 0xDB */ {0, 0, 0},
|
||||
/* 0xDC */ {0, 0, 0},
|
||||
/* 0xDD */ {0, 0, 0},
|
||||
/* 0xDE */ {0, 0, 0},
|
||||
/* 0xDF */ {0, 0, 0},
|
||||
/* 0xE0 */ {0, 0, 0},
|
||||
/* 0xE1 */ {0, 0, 0},
|
||||
/* 0xE2 */ {0, 0, 0},
|
||||
/* 0xE3 */ {0, 0, 0},
|
||||
/* 0xE4 */ {0, 0, 0},
|
||||
/* 0xE5 */ {0, 0, 0},
|
||||
/* 0xE6 */ {0, 0, 0},
|
||||
/* 0xE7 */ {0, 0, 0},
|
||||
/* 0xE8 */ {0, 0, 0},
|
||||
/* 0xE9 */ {0, 0, 0},
|
||||
/* 0xEA */ {0, 0, 0},
|
||||
/* 0xEB */ {0, 0, 0},
|
||||
/* 0xEC */ {0, 0, 0},
|
||||
/* 0xED */ {0, 0, 0},
|
||||
/* 0xEE */ {0, 0, 0},
|
||||
/* 0xEF */ {0, 0, 0},
|
||||
/* 0xF0 */ {0, 0, 0},
|
||||
/* 0xF1 */ {0, 0, 0},
|
||||
/* 0xF2 */ {0, 0, 0},
|
||||
/* 0xF3 */ {0, 0, 0},
|
||||
/* 0xF4 */ {0, 0, 0},
|
||||
/* 0xF5 */ {0, 0, 0},
|
||||
/* 0xF6 */ {0, 0, 0},
|
||||
/* 0xF7 */ {0, 0, 0},
|
||||
/* 0xF8 */ {0, 0, 0},
|
||||
/* 0xF9 */ {0, 0, 0},
|
||||
/* 0xFA */ {0, 0, 0},
|
||||
/* 0xFB */ {0, 0, 0},
|
||||
/* 0xFC */ {0, 0, 0},
|
||||
/* 0xFD */ {0, 0, 0},
|
||||
/* 0xFE */ {__cp950ext_encmap+342, 0x51, 0x68},
|
||||
/* 0xFF */ {__cp950ext_encmap+366, 0x0f, 0xe5},
|
||||
};
|
||||
|
||||
|
1275
Modules/cjkcodecs/map_gb18030ext.h
Normal file
1275
Modules/cjkcodecs/map_gb18030ext.h
Normal file
File diff suppressed because it is too large
Load diff
218
Modules/cjkcodecs/map_gb18030uni.h
Normal file
218
Modules/cjkcodecs/map_gb18030uni.h
Normal file
|
@ -0,0 +1,218 @@
|
|||
/*
|
||||
* $CJKCodecs: map_gb18030uni.h,v 1.1.1.1 2003/09/24 17:45:51 perky Exp $
|
||||
*/
|
||||
|
||||
|
||||
static const struct _gb18030_to_unibmp_ranges {
|
||||
Py_UNICODE first, last;
|
||||
DBCHAR base;
|
||||
} gb18030_to_unibmp_ranges[] = {
|
||||
{ 0x0080, 0x00a3, 0x0000 },
|
||||
{ 0x00a5, 0x00a6, 0x0024 },
|
||||
{ 0x00a9, 0x00af, 0x0026 },
|
||||
{ 0x00b2, 0x00b6, 0x002d },
|
||||
{ 0x00b8, 0x00d6, 0x0032 },
|
||||
{ 0x00d8, 0x00df, 0x0051 },
|
||||
{ 0x00e2, 0x00e7, 0x0059 },
|
||||
{ 0x00eb, 0x00eb, 0x005f },
|
||||
{ 0x00ee, 0x00f1, 0x0060 },
|
||||
{ 0x00f4, 0x00f6, 0x0064 },
|
||||
{ 0x00f8, 0x00f8, 0x0067 },
|
||||
{ 0x00fb, 0x00fb, 0x0068 },
|
||||
{ 0x00fd, 0x0100, 0x0069 },
|
||||
{ 0x0102, 0x0112, 0x006d },
|
||||
{ 0x0114, 0x011a, 0x007e },
|
||||
{ 0x011c, 0x012a, 0x0085 },
|
||||
{ 0x012c, 0x0143, 0x0094 },
|
||||
{ 0x0145, 0x0147, 0x00ac },
|
||||
{ 0x0149, 0x014c, 0x00af },
|
||||
{ 0x014e, 0x016a, 0x00b3 },
|
||||
{ 0x016c, 0x01cd, 0x00d0 },
|
||||
{ 0x01cf, 0x01cf, 0x0132 },
|
||||
{ 0x01d1, 0x01d1, 0x0133 },
|
||||
{ 0x01d3, 0x01d3, 0x0134 },
|
||||
{ 0x01d5, 0x01d5, 0x0135 },
|
||||
{ 0x01d7, 0x01d7, 0x0136 },
|
||||
{ 0x01d9, 0x01d9, 0x0137 },
|
||||
{ 0x01db, 0x01db, 0x0138 },
|
||||
{ 0x01dd, 0x01f8, 0x0139 },
|
||||
{ 0x01fa, 0x0250, 0x0155 },
|
||||
{ 0x0252, 0x0260, 0x01ac },
|
||||
{ 0x0262, 0x02c6, 0x01bb },
|
||||
{ 0x02c8, 0x02c8, 0x0220 },
|
||||
{ 0x02cc, 0x02d8, 0x0221 },
|
||||
{ 0x02da, 0x0390, 0x022e },
|
||||
{ 0x03a2, 0x03a2, 0x02e5 },
|
||||
{ 0x03aa, 0x03b0, 0x02e6 },
|
||||
{ 0x03c2, 0x03c2, 0x02ed },
|
||||
{ 0x03ca, 0x0400, 0x02ee },
|
||||
{ 0x0402, 0x040f, 0x0325 },
|
||||
{ 0x0450, 0x0450, 0x0333 },
|
||||
{ 0x0452, 0x200f, 0x0334 },
|
||||
{ 0x2011, 0x2012, 0x1ef2 },
|
||||
{ 0x2017, 0x2017, 0x1ef4 },
|
||||
{ 0x201a, 0x201b, 0x1ef5 },
|
||||
{ 0x201e, 0x2024, 0x1ef7 },
|
||||
{ 0x2027, 0x202f, 0x1efe },
|
||||
{ 0x2031, 0x2031, 0x1f07 },
|
||||
{ 0x2034, 0x2034, 0x1f08 },
|
||||
{ 0x2036, 0x203a, 0x1f09 },
|
||||
{ 0x203c, 0x20ab, 0x1f0e },
|
||||
{ 0x20ad, 0x2102, 0x1f7e },
|
||||
{ 0x2104, 0x2104, 0x1fd4 },
|
||||
{ 0x2106, 0x2108, 0x1fd5 },
|
||||
{ 0x210a, 0x2115, 0x1fd8 },
|
||||
{ 0x2117, 0x2120, 0x1fe4 },
|
||||
{ 0x2122, 0x215f, 0x1fee },
|
||||
{ 0x216c, 0x216f, 0x202c },
|
||||
{ 0x217a, 0x218f, 0x2030 },
|
||||
{ 0x2194, 0x2195, 0x2046 },
|
||||
{ 0x219a, 0x2207, 0x2048 },
|
||||
{ 0x2209, 0x220e, 0x20b6 },
|
||||
{ 0x2210, 0x2210, 0x20bc },
|
||||
{ 0x2212, 0x2214, 0x20bd },
|
||||
{ 0x2216, 0x2219, 0x20c0 },
|
||||
{ 0x221b, 0x221c, 0x20c4 },
|
||||
{ 0x2221, 0x2222, 0x20c6 },
|
||||
{ 0x2224, 0x2224, 0x20c8 },
|
||||
{ 0x2226, 0x2226, 0x20c9 },
|
||||
{ 0x222c, 0x222d, 0x20ca },
|
||||
{ 0x222f, 0x2233, 0x20cc },
|
||||
{ 0x2238, 0x223c, 0x20d1 },
|
||||
{ 0x223e, 0x2247, 0x20d6 },
|
||||
{ 0x2249, 0x224b, 0x20e0 },
|
||||
{ 0x224d, 0x2251, 0x20e3 },
|
||||
{ 0x2253, 0x225f, 0x20e8 },
|
||||
{ 0x2262, 0x2263, 0x20f5 },
|
||||
{ 0x2268, 0x226d, 0x20f7 },
|
||||
{ 0x2270, 0x2294, 0x20fd },
|
||||
{ 0x2296, 0x2298, 0x2122 },
|
||||
{ 0x229a, 0x22a4, 0x2125 },
|
||||
{ 0x22a6, 0x22be, 0x2130 },
|
||||
{ 0x22c0, 0x2311, 0x2149 },
|
||||
{ 0x2313, 0x245f, 0x219b },
|
||||
{ 0x246a, 0x2473, 0x22e8 },
|
||||
{ 0x249c, 0x24ff, 0x22f2 },
|
||||
{ 0x254c, 0x254f, 0x2356 },
|
||||
{ 0x2574, 0x2580, 0x235a },
|
||||
{ 0x2590, 0x2592, 0x2367 },
|
||||
{ 0x2596, 0x259f, 0x236a },
|
||||
{ 0x25a2, 0x25b1, 0x2374 },
|
||||
{ 0x25b4, 0x25bb, 0x2384 },
|
||||
{ 0x25be, 0x25c5, 0x238c },
|
||||
{ 0x25c8, 0x25ca, 0x2394 },
|
||||
{ 0x25cc, 0x25cd, 0x2397 },
|
||||
{ 0x25d0, 0x25e1, 0x2399 },
|
||||
{ 0x25e6, 0x2604, 0x23ab },
|
||||
{ 0x2607, 0x2608, 0x23ca },
|
||||
{ 0x260a, 0x263f, 0x23cc },
|
||||
{ 0x2641, 0x2641, 0x2402 },
|
||||
{ 0x2643, 0x2e80, 0x2403 },
|
||||
{ 0x2e82, 0x2e83, 0x2c41 },
|
||||
{ 0x2e85, 0x2e87, 0x2c43 },
|
||||
{ 0x2e89, 0x2e8a, 0x2c46 },
|
||||
{ 0x2e8d, 0x2e96, 0x2c48 },
|
||||
{ 0x2e98, 0x2ea6, 0x2c52 },
|
||||
{ 0x2ea8, 0x2ea9, 0x2c61 },
|
||||
{ 0x2eab, 0x2ead, 0x2c63 },
|
||||
{ 0x2eaf, 0x2eb2, 0x2c66 },
|
||||
{ 0x2eb4, 0x2eb5, 0x2c6a },
|
||||
{ 0x2eb8, 0x2eba, 0x2c6c },
|
||||
{ 0x2ebc, 0x2ec9, 0x2c6f },
|
||||
{ 0x2ecb, 0x2fef, 0x2c7d },
|
||||
{ 0x2ffc, 0x2fff, 0x2da2 },
|
||||
{ 0x3004, 0x3004, 0x2da6 },
|
||||
{ 0x3018, 0x301c, 0x2da7 },
|
||||
{ 0x301f, 0x3020, 0x2dac },
|
||||
{ 0x302a, 0x303d, 0x2dae },
|
||||
{ 0x303f, 0x3040, 0x2dc2 },
|
||||
{ 0x3094, 0x309a, 0x2dc4 },
|
||||
{ 0x309f, 0x30a0, 0x2dcb },
|
||||
{ 0x30f7, 0x30fb, 0x2dcd },
|
||||
{ 0x30ff, 0x3104, 0x2dd2 },
|
||||
{ 0x312a, 0x321f, 0x2dd8 },
|
||||
{ 0x322a, 0x3230, 0x2ece },
|
||||
{ 0x3232, 0x32a2, 0x2ed5 },
|
||||
{ 0x32a4, 0x338d, 0x2f46 },
|
||||
{ 0x3390, 0x339b, 0x3030 },
|
||||
{ 0x339f, 0x33a0, 0x303c },
|
||||
{ 0x33a2, 0x33c3, 0x303e },
|
||||
{ 0x33c5, 0x33cd, 0x3060 },
|
||||
{ 0x33cf, 0x33d0, 0x3069 },
|
||||
{ 0x33d3, 0x33d4, 0x306b },
|
||||
{ 0x33d6, 0x3446, 0x306d },
|
||||
{ 0x3448, 0x3472, 0x30de },
|
||||
{ 0x3474, 0x359d, 0x3109 },
|
||||
{ 0x359f, 0x360d, 0x3233 },
|
||||
{ 0x360f, 0x3619, 0x32a2 },
|
||||
{ 0x361b, 0x3917, 0x32ad },
|
||||
{ 0x3919, 0x396d, 0x35aa },
|
||||
{ 0x396f, 0x39ce, 0x35ff },
|
||||
{ 0x39d1, 0x39de, 0x365f },
|
||||
{ 0x39e0, 0x3a72, 0x366d },
|
||||
{ 0x3a74, 0x3b4d, 0x3700 },
|
||||
{ 0x3b4f, 0x3c6d, 0x37da },
|
||||
{ 0x3c6f, 0x3cdf, 0x38f9 },
|
||||
{ 0x3ce1, 0x4055, 0x396a },
|
||||
{ 0x4057, 0x415e, 0x3cdf },
|
||||
{ 0x4160, 0x4336, 0x3de7 },
|
||||
{ 0x4338, 0x43ab, 0x3fbe },
|
||||
{ 0x43ad, 0x43b0, 0x4032 },
|
||||
{ 0x43b2, 0x43dc, 0x4036 },
|
||||
{ 0x43de, 0x44d5, 0x4061 },
|
||||
{ 0x44d7, 0x464b, 0x4159 },
|
||||
{ 0x464d, 0x4660, 0x42ce },
|
||||
{ 0x4662, 0x4722, 0x42e2 },
|
||||
{ 0x4724, 0x4728, 0x43a3 },
|
||||
{ 0x472a, 0x477b, 0x43a8 },
|
||||
{ 0x477d, 0x478c, 0x43fa },
|
||||
{ 0x478e, 0x4946, 0x440a },
|
||||
{ 0x4948, 0x4979, 0x45c3 },
|
||||
{ 0x497b, 0x497c, 0x45f5 },
|
||||
{ 0x497e, 0x4981, 0x45f7 },
|
||||
{ 0x4984, 0x4984, 0x45fb },
|
||||
{ 0x4987, 0x499a, 0x45fc },
|
||||
{ 0x499c, 0x499e, 0x4610 },
|
||||
{ 0x49a0, 0x49b5, 0x4613 },
|
||||
{ 0x49b8, 0x4c76, 0x4629 },
|
||||
{ 0x4c78, 0x4c9e, 0x48e8 },
|
||||
{ 0x4ca4, 0x4d12, 0x490f },
|
||||
{ 0x4d1a, 0x4dad, 0x497e },
|
||||
{ 0x4daf, 0x4dff, 0x4a12 },
|
||||
{ 0x9fa6, 0xd7ff, 0x4a63 },
|
||||
{ 0xe76c, 0xe76c, 0x82bd },
|
||||
{ 0xe7c8, 0xe7c8, 0x82be },
|
||||
{ 0xe7e7, 0xe7f3, 0x82bf },
|
||||
{ 0xe815, 0xe815, 0x82cc },
|
||||
{ 0xe819, 0xe81d, 0x82cd },
|
||||
{ 0xe81f, 0xe825, 0x82d2 },
|
||||
{ 0xe827, 0xe82a, 0x82d9 },
|
||||
{ 0xe82d, 0xe830, 0x82dd },
|
||||
{ 0xe833, 0xe83a, 0x82e1 },
|
||||
{ 0xe83c, 0xe842, 0x82e9 },
|
||||
{ 0xe844, 0xe853, 0x82f0 },
|
||||
{ 0xe856, 0xe863, 0x8300 },
|
||||
{ 0xe865, 0xf92b, 0x830e },
|
||||
{ 0xf92d, 0xf978, 0x93d5 },
|
||||
{ 0xf97a, 0xf994, 0x9421 },
|
||||
{ 0xf996, 0xf9e6, 0x943c },
|
||||
{ 0xf9e8, 0xf9f0, 0x948d },
|
||||
{ 0xf9f2, 0xfa0b, 0x9496 },
|
||||
{ 0xfa10, 0xfa10, 0x94b0 },
|
||||
{ 0xfa12, 0xfa12, 0x94b1 },
|
||||
{ 0xfa15, 0xfa17, 0x94b2 },
|
||||
{ 0xfa19, 0xfa1e, 0x94b5 },
|
||||
{ 0xfa22, 0xfa22, 0x94bb },
|
||||
{ 0xfa25, 0xfa26, 0x94bc },
|
||||
{ 0xfa2a, 0xfe2f, 0x94be },
|
||||
{ 0xfe32, 0xfe32, 0x98c4 },
|
||||
{ 0xfe45, 0xfe48, 0x98c5 },
|
||||
{ 0xfe53, 0xfe53, 0x98c9 },
|
||||
{ 0xfe58, 0xfe58, 0x98ca },
|
||||
{ 0xfe67, 0xfe67, 0x98cb },
|
||||
{ 0xfe6c, 0xff00, 0x98cc },
|
||||
{ 0xff5f, 0xffdf, 0x9961 },
|
||||
{ 0xffe6, 0xffff, 0x99e2 },
|
||||
{ 0x0000, 0x0000, 0x99fc },
|
||||
};
|
||||
|
1203
Modules/cjkcodecs/map_gb2312.h
Normal file
1203
Modules/cjkcodecs/map_gb2312.h
Normal file
File diff suppressed because it is too large
Load diff
3171
Modules/cjkcodecs/map_gbcommon.h
Normal file
3171
Modules/cjkcodecs/map_gbcommon.h
Normal file
File diff suppressed because it is too large
Load diff
2084
Modules/cjkcodecs/map_gbkext.h
Normal file
2084
Modules/cjkcodecs/map_gbkext.h
Normal file
File diff suppressed because it is too large
Load diff
1137
Modules/cjkcodecs/map_jisx0208.h
Normal file
1137
Modules/cjkcodecs/map_jisx0208.h
Normal file
File diff suppressed because it is too large
Load diff
1040
Modules/cjkcodecs/map_jisx0212.h
Normal file
1040
Modules/cjkcodecs/map_jisx0212.h
Normal file
File diff suppressed because it is too large
Load diff
6946
Modules/cjkcodecs/map_jisx0213.h
Normal file
6946
Modules/cjkcodecs/map_jisx0213.h
Normal file
File diff suppressed because it is too large
Load diff
329
Modules/cjkcodecs/map_jisx0213_pairs.h
Normal file
329
Modules/cjkcodecs/map_jisx0213_pairs.h
Normal file
|
@ -0,0 +1,329 @@
|
|||
/*
|
||||
* $CJKCodecs: map_jisx0213_pairs.h,v 1.2 2003/11/27 13:29:01 perky Exp $
|
||||
*/
|
||||
|
||||
static const ucs4_t __jisx0213_pairdecmap[49] = {
|
||||
0x304b309a, 0x304d309a, 0x304f309a, 0x3051309a,
|
||||
0x3053309a, 0x30ab309a, 0x30ad309a, 0x30af309a,
|
||||
0x30b1309a, 0x30b3309a, 0x30bb309a, 0x30c4309a,
|
||||
0x30c8309a, 0x31f7309a, 0x00e60300, UNIINV,
|
||||
UNIINV, UNIINV, 0x02540300, 0x02540301,
|
||||
0x028c0300, 0x028c0301, 0x02590300, 0x02590301,
|
||||
0x025a0300, 0x025a0301, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, UNIINV,
|
||||
UNIINV, UNIINV, UNIINV, 0x02e902e5,
|
||||
0x02e502e9,
|
||||
};
|
||||
|
||||
static const struct widedbcs_index jisx0213_pairdecmap[256] = {
|
||||
/* 0x00 */ {0, 0, 0},
|
||||
/* 0x01 */ {0, 0, 0},
|
||||
/* 0x02 */ {0, 0, 0},
|
||||
/* 0x03 */ {0, 0, 0},
|
||||
/* 0x04 */ {0, 0, 0},
|
||||
/* 0x05 */ {0, 0, 0},
|
||||
/* 0x06 */ {0, 0, 0},
|
||||
/* 0x07 */ {0, 0, 0},
|
||||
/* 0x08 */ {0, 0, 0},
|
||||
/* 0x09 */ {0, 0, 0},
|
||||
/* 0x0A */ {0, 0, 0},
|
||||
/* 0x0B */ {0, 0, 0},
|
||||
/* 0x0C */ {0, 0, 0},
|
||||
/* 0x0D */ {0, 0, 0},
|
||||
/* 0x0E */ {0, 0, 0},
|
||||
/* 0x0F */ {0, 0, 0},
|
||||
/* 0x10 */ {0, 0, 0},
|
||||
/* 0x11 */ {0, 0, 0},
|
||||
/* 0x12 */ {0, 0, 0},
|
||||
/* 0x13 */ {0, 0, 0},
|
||||
/* 0x14 */ {0, 0, 0},
|
||||
/* 0x15 */ {0, 0, 0},
|
||||
/* 0x16 */ {0, 0, 0},
|
||||
/* 0x17 */ {0, 0, 0},
|
||||
/* 0x18 */ {0, 0, 0},
|
||||
/* 0x19 */ {0, 0, 0},
|
||||
/* 0x1A */ {0, 0, 0},
|
||||
/* 0x1B */ {0, 0, 0},
|
||||
/* 0x1C */ {0, 0, 0},
|
||||
/* 0x1D */ {0, 0, 0},
|
||||
/* 0x1E */ {0, 0, 0},
|
||||
/* 0x1F */ {0, 0, 0},
|
||||
/* 0x20 */ {0, 0, 0},
|
||||
/* 0x21 */ {0, 0, 0},
|
||||
/* 0x22 */ {0, 0, 0},
|
||||
/* 0x23 */ {0, 0, 0},
|
||||
/* 0x24 */ {__jisx0213_pairdecmap+0, 0x77, 0x7b},
|
||||
/* 0x25 */ {__jisx0213_pairdecmap+5, 0x77, 0x7e},
|
||||
/* 0x26 */ {__jisx0213_pairdecmap+13, 0x78, 0x78},
|
||||
/* 0x27 */ {0, 0, 0},
|
||||
/* 0x28 */ {0, 0, 0},
|
||||
/* 0x29 */ {0, 0, 0},
|
||||
/* 0x2A */ {0, 0, 0},
|
||||
/* 0x2B */ {__jisx0213_pairdecmap+14, 0x44, 0x66},
|
||||
/* 0x2C */ {0, 0, 0},
|
||||
/* 0x2D */ {0, 0, 0},
|
||||
/* 0x2E */ {0, 0, 0},
|
||||
/* 0x2F */ {0, 0, 0},
|
||||
/* 0x30 */ {0, 0, 0},
|
||||
/* 0x31 */ {0, 0, 0},
|
||||
/* 0x32 */ {0, 0, 0},
|
||||
/* 0x33 */ {0, 0, 0},
|
||||
/* 0x34 */ {0, 0, 0},
|
||||
/* 0x35 */ {0, 0, 0},
|
||||
/* 0x36 */ {0, 0, 0},
|
||||
/* 0x37 */ {0, 0, 0},
|
||||
/* 0x38 */ {0, 0, 0},
|
||||
/* 0x39 */ {0, 0, 0},
|
||||
/* 0x3A */ {0, 0, 0},
|
||||
/* 0x3B */ {0, 0, 0},
|
||||
/* 0x3C */ {0, 0, 0},
|
||||
/* 0x3D */ {0, 0, 0},
|
||||
/* 0x3E */ {0, 0, 0},
|
||||
/* 0x3F */ {0, 0, 0},
|
||||
/* 0x40 */ {0, 0, 0},
|
||||
/* 0x41 */ {0, 0, 0},
|
||||
/* 0x42 */ {0, 0, 0},
|
||||
/* 0x43 */ {0, 0, 0},
|
||||
/* 0x44 */ {0, 0, 0},
|
||||
/* 0x45 */ {0, 0, 0},
|
||||
/* 0x46 */ {0, 0, 0},
|
||||
/* 0x47 */ {0, 0, 0},
|
||||
/* 0x48 */ {0, 0, 0},
|
||||
/* 0x49 */ {0, 0, 0},
|
||||
/* 0x4A */ {0, 0, 0},
|
||||
/* 0x4B */ {0, 0, 0},
|
||||
/* 0x4C */ {0, 0, 0},
|
||||
/* 0x4D */ {0, 0, 0},
|
||||
/* 0x4E */ {0, 0, 0},
|
||||
/* 0x4F */ {0, 0, 0},
|
||||
/* 0x50 */ {0, 0, 0},
|
||||
/* 0x51 */ {0, 0, 0},
|
||||
/* 0x52 */ {0, 0, 0},
|
||||
/* 0x53 */ {0, 0, 0},
|
||||
/* 0x54 */ {0, 0, 0},
|
||||
/* 0x55 */ {0, 0, 0},
|
||||
/* 0x56 */ {0, 0, 0},
|
||||
/* 0x57 */ {0, 0, 0},
|
||||
/* 0x58 */ {0, 0, 0},
|
||||
/* 0x59 */ {0, 0, 0},
|
||||
/* 0x5A */ {0, 0, 0},
|
||||
/* 0x5B */ {0, 0, 0},
|
||||
/* 0x5C */ {0, 0, 0},
|
||||
/* 0x5D */ {0, 0, 0},
|
||||
/* 0x5E */ {0, 0, 0},
|
||||
/* 0x5F */ {0, 0, 0},
|
||||
/* 0x60 */ {0, 0, 0},
|
||||
/* 0x61 */ {0, 0, 0},
|
||||
/* 0x62 */ {0, 0, 0},
|
||||
/* 0x63 */ {0, 0, 0},
|
||||
/* 0x64 */ {0, 0, 0},
|
||||
/* 0x65 */ {0, 0, 0},
|
||||
/* 0x66 */ {0, 0, 0},
|
||||
/* 0x67 */ {0, 0, 0},
|
||||
/* 0x68 */ {0, 0, 0},
|
||||
/* 0x69 */ {0, 0, 0},
|
||||
/* 0x6A */ {0, 0, 0},
|
||||
/* 0x6B */ {0, 0, 0},
|
||||
/* 0x6C */ {0, 0, 0},
|
||||
/* 0x6D */ {0, 0, 0},
|
||||
/* 0x6E */ {0, 0, 0},
|
||||
/* 0x6F */ {0, 0, 0},
|
||||
/* 0x70 */ {0, 0, 0},
|
||||
/* 0x71 */ {0, 0, 0},
|
||||
/* 0x72 */ {0, 0, 0},
|
||||
/* 0x73 */ {0, 0, 0},
|
||||
/* 0x74 */ {0, 0, 0},
|
||||
/* 0x75 */ {0, 0, 0},
|
||||
/* 0x76 */ {0, 0, 0},
|
||||
/* 0x77 */ {0, 0, 0},
|
||||
/* 0x78 */ {0, 0, 0},
|
||||
/* 0x79 */ {0, 0, 0},
|
||||
/* 0x7A */ {0, 0, 0},
|
||||
/* 0x7B */ {0, 0, 0},
|
||||
/* 0x7C */ {0, 0, 0},
|
||||
/* 0x7D */ {0, 0, 0},
|
||||
/* 0x7E */ {0, 0, 0},
|
||||
/* 0x7F */ {0, 0, 0},
|
||||
/* 0x80 */ {0, 0, 0},
|
||||
/* 0x81 */ {0, 0, 0},
|
||||
/* 0x82 */ {0, 0, 0},
|
||||
/* 0x83 */ {0, 0, 0},
|
||||
/* 0x84 */ {0, 0, 0},
|
||||
/* 0x85 */ {0, 0, 0},
|
||||
/* 0x86 */ {0, 0, 0},
|
||||
/* 0x87 */ {0, 0, 0},
|
||||
/* 0x88 */ {0, 0, 0},
|
||||
/* 0x89 */ {0, 0, 0},
|
||||
/* 0x8A */ {0, 0, 0},
|
||||
/* 0x8B */ {0, 0, 0},
|
||||
/* 0x8C */ {0, 0, 0},
|
||||
/* 0x8D */ {0, 0, 0},
|
||||
/* 0x8E */ {0, 0, 0},
|
||||
/* 0x8F */ {0, 0, 0},
|
||||
/* 0x90 */ {0, 0, 0},
|
||||
/* 0x91 */ {0, 0, 0},
|
||||
/* 0x92 */ {0, 0, 0},
|
||||
/* 0x93 */ {0, 0, 0},
|
||||
/* 0x94 */ {0, 0, 0},
|
||||
/* 0x95 */ {0, 0, 0},
|
||||
/* 0x96 */ {0, 0, 0},
|
||||
/* 0x97 */ {0, 0, 0},
|
||||
/* 0x98 */ {0, 0, 0},
|
||||
/* 0x99 */ {0, 0, 0},
|
||||
/* 0x9A */ {0, 0, 0},
|
||||
/* 0x9B */ {0, 0, 0},
|
||||
/* 0x9C */ {0, 0, 0},
|
||||
/* 0x9D */ {0, 0, 0},
|
||||
/* 0x9E */ {0, 0, 0},
|
||||
/* 0x9F */ {0, 0, 0},
|
||||
/* 0xA0 */ {0, 0, 0},
|
||||
/* 0xA1 */ {0, 0, 0},
|
||||
/* 0xA2 */ {0, 0, 0},
|
||||
/* 0xA3 */ {0, 0, 0},
|
||||
/* 0xA4 */ {0, 0, 0},
|
||||
/* 0xA5 */ {0, 0, 0},
|
||||
/* 0xA6 */ {0, 0, 0},
|
||||
/* 0xA7 */ {0, 0, 0},
|
||||
/* 0xA8 */ {0, 0, 0},
|
||||
/* 0xA9 */ {0, 0, 0},
|
||||
/* 0xAA */ {0, 0, 0},
|
||||
/* 0xAB */ {0, 0, 0},
|
||||
/* 0xAC */ {0, 0, 0},
|
||||
/* 0xAD */ {0, 0, 0},
|
||||
/* 0xAE */ {0, 0, 0},
|
||||
/* 0xAF */ {0, 0, 0},
|
||||
/* 0xB0 */ {0, 0, 0},
|
||||
/* 0xB1 */ {0, 0, 0},
|
||||
/* 0xB2 */ {0, 0, 0},
|
||||
/* 0xB3 */ {0, 0, 0},
|
||||
/* 0xB4 */ {0, 0, 0},
|
||||
/* 0xB5 */ {0, 0, 0},
|
||||
/* 0xB6 */ {0, 0, 0},
|
||||
/* 0xB7 */ {0, 0, 0},
|
||||
/* 0xB8 */ {0, 0, 0},
|
||||
/* 0xB9 */ {0, 0, 0},
|
||||
/* 0xBA */ {0, 0, 0},
|
||||
/* 0xBB */ {0, 0, 0},
|
||||
/* 0xBC */ {0, 0, 0},
|
||||
/* 0xBD */ {0, 0, 0},
|
||||
/* 0xBE */ {0, 0, 0},
|
||||
/* 0xBF */ {0, 0, 0},
|
||||
/* 0xC0 */ {0, 0, 0},
|
||||
/* 0xC1 */ {0, 0, 0},
|
||||
/* 0xC2 */ {0, 0, 0},
|
||||
/* 0xC3 */ {0, 0, 0},
|
||||
/* 0xC4 */ {0, 0, 0},
|
||||
/* 0xC5 */ {0, 0, 0},
|
||||
/* 0xC6 */ {0, 0, 0},
|
||||
/* 0xC7 */ {0, 0, 0},
|
||||
/* 0xC8 */ {0, 0, 0},
|
||||
/* 0xC9 */ {0, 0, 0},
|
||||
/* 0xCA */ {0, 0, 0},
|
||||
/* 0xCB */ {0, 0, 0},
|
||||
/* 0xCC */ {0, 0, 0},
|
||||
/* 0xCD */ {0, 0, 0},
|
||||
/* 0xCE */ {0, 0, 0},
|
||||
/* 0xCF */ {0, 0, 0},
|
||||
/* 0xD0 */ {0, 0, 0},
|
||||
/* 0xD1 */ {0, 0, 0},
|
||||
/* 0xD2 */ {0, 0, 0},
|
||||
/* 0xD3 */ {0, 0, 0},
|
||||
/* 0xD4 */ {0, 0, 0},
|
||||
/* 0xD5 */ {0, 0, 0},
|
||||
/* 0xD6 */ {0, 0, 0},
|
||||
/* 0xD7 */ {0, 0, 0},
|
||||
/* 0xD8 */ {0, 0, 0},
|
||||
/* 0xD9 */ {0, 0, 0},
|
||||
/* 0xDA */ {0, 0, 0},
|
||||
/* 0xDB */ {0, 0, 0},
|
||||
/* 0xDC */ {0, 0, 0},
|
||||
/* 0xDD */ {0, 0, 0},
|
||||
/* 0xDE */ {0, 0, 0},
|
||||
/* 0xDF */ {0, 0, 0},
|
||||
/* 0xE0 */ {0, 0, 0},
|
||||
/* 0xE1 */ {0, 0, 0},
|
||||
/* 0xE2 */ {0, 0, 0},
|
||||
/* 0xE3 */ {0, 0, 0},
|
||||
/* 0xE4 */ {0, 0, 0},
|
||||
/* 0xE5 */ {0, 0, 0},
|
||||
/* 0xE6 */ {0, 0, 0},
|
||||
/* 0xE7 */ {0, 0, 0},
|
||||
/* 0xE8 */ {0, 0, 0},
|
||||
/* 0xE9 */ {0, 0, 0},
|
||||
/* 0xEA */ {0, 0, 0},
|
||||
/* 0xEB */ {0, 0, 0},
|
||||
/* 0xEC */ {0, 0, 0},
|
||||
/* 0xED */ {0, 0, 0},
|
||||
/* 0xEE */ {0, 0, 0},
|
||||
/* 0xEF */ {0, 0, 0},
|
||||
/* 0xF0 */ {0, 0, 0},
|
||||
/* 0xF1 */ {0, 0, 0},
|
||||
/* 0xF2 */ {0, 0, 0},
|
||||
/* 0xF3 */ {0, 0, 0},
|
||||
/* 0xF4 */ {0, 0, 0},
|
||||
/* 0xF5 */ {0, 0, 0},
|
||||
/* 0xF6 */ {0, 0, 0},
|
||||
/* 0xF7 */ {0, 0, 0},
|
||||
/* 0xF8 */ {0, 0, 0},
|
||||
/* 0xF9 */ {0, 0, 0},
|
||||
/* 0xFA */ {0, 0, 0},
|
||||
/* 0xFB */ {0, 0, 0},
|
||||
/* 0xFC */ {0, 0, 0},
|
||||
/* 0xFD */ {0, 0, 0},
|
||||
/* 0xFE */ {0, 0, 0},
|
||||
/* 0xFF */ {0, 0, 0},
|
||||
};
|
||||
|
||||
#define JISX0213_ENCPAIRS 46
|
||||
static struct pair_encodemap jisx0213_pairencmap[JISX0213_ENCPAIRS] = {
|
||||
{ 0x00e60000, 0x295c },
|
||||
{ 0x00e60300, 0x2b44 },
|
||||
{ 0x02540000, 0x2b38 },
|
||||
{ 0x02540300, 0x2b48 },
|
||||
{ 0x02540301, 0x2b49 },
|
||||
{ 0x02590000, 0x2b30 },
|
||||
{ 0x02590300, 0x2b4c },
|
||||
{ 0x02590301, 0x2b4d },
|
||||
{ 0x025a0000, 0x2b43 },
|
||||
{ 0x025a0300, 0x2b4e },
|
||||
{ 0x025a0301, 0x2b4f },
|
||||
{ 0x028c0000, 0x2b37 },
|
||||
{ 0x028c0300, 0x2b4a },
|
||||
{ 0x028c0301, 0x2b4b },
|
||||
{ 0x02e50000, 0x2b60 },
|
||||
{ 0x02e502e9, 0x2b66 },
|
||||
{ 0x02e90000, 0x2b64 },
|
||||
{ 0x02e902e5, 0x2b65 },
|
||||
{ 0x304b0000, 0x242b },
|
||||
{ 0x304b309a, 0x2477 },
|
||||
{ 0x304d0000, 0x242d },
|
||||
{ 0x304d309a, 0x2478 },
|
||||
{ 0x304f0000, 0x242f },
|
||||
{ 0x304f309a, 0x2479 },
|
||||
{ 0x30510000, 0x2431 },
|
||||
{ 0x3051309a, 0x247a },
|
||||
{ 0x30530000, 0x2433 },
|
||||
{ 0x3053309a, 0x247b },
|
||||
{ 0x30ab0000, 0x252b },
|
||||
{ 0x30ab309a, 0x2577 },
|
||||
{ 0x30ad0000, 0x252d },
|
||||
{ 0x30ad309a, 0x2578 },
|
||||
{ 0x30af0000, 0x252f },
|
||||
{ 0x30af309a, 0x2579 },
|
||||
{ 0x30b10000, 0x2531 },
|
||||
{ 0x30b1309a, 0x257a },
|
||||
{ 0x30b30000, 0x2533 },
|
||||
{ 0x30b3309a, 0x257b },
|
||||
{ 0x30bb0000, 0x253b },
|
||||
{ 0x30bb309a, 0x257c },
|
||||
{ 0x30c40000, 0x2544 },
|
||||
{ 0x30c4309a, 0x257d },
|
||||
{ 0x30c80000, 0x2548 },
|
||||
{ 0x30c8309a, 0x257e },
|
||||
{ 0x31f70000, 0x2675 },
|
||||
{ 0x31f7309a, 0x2678 },
|
||||
};
|
||||
|
3019
Modules/cjkcodecs/map_jisxcommon.h
Normal file
3019
Modules/cjkcodecs/map_jisxcommon.h
Normal file
File diff suppressed because it is too large
Load diff
1300
Modules/cjkcodecs/map_ksx1001.h
Normal file
1300
Modules/cjkcodecs/map_ksx1001.h
Normal file
File diff suppressed because it is too large
Load diff
51
Modules/cjkcodecs/mapdata_ja_JP.c
Normal file
51
Modules/cjkcodecs/mapdata_ja_JP.c
Normal file
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* mapdata_ja_JP.c: Map Provider for Japanese Encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: mapdata_ja_JP.c,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
*/
|
||||
|
||||
#include "Python.h"
|
||||
#include "cjkcommon.h"
|
||||
#include "map_jisx0208.h"
|
||||
#include "map_jisx0212.h"
|
||||
#include "map_jisx0213.h"
|
||||
#include "map_jisxcommon.h"
|
||||
#include "map_cp932ext.h"
|
||||
|
||||
static struct dbcs_map mapholders[] = {
|
||||
{"jisx0208", NULL, jisx0208_decmap},
|
||||
{"jisx0212", NULL, jisx0212_decmap},
|
||||
{"jisxcommon", jisxcommon_encmap, NULL},
|
||||
{"jisx0213_1_bmp", NULL, jisx0213_1_bmp_decmap},
|
||||
{"jisx0213_2_bmp", NULL, jisx0213_2_bmp_decmap},
|
||||
{"jisx0213_bmp", jisx0213_bmp_encmap, NULL},
|
||||
{"jisx0213_1_emp", NULL, jisx0213_1_emp_decmap},
|
||||
{"jisx0213_2_emp", NULL, jisx0213_2_emp_decmap},
|
||||
{"jisx0213_emp", jisx0213_emp_encmap, NULL},
|
||||
{"cp932ext", cp932ext_encmap, cp932ext_decmap},
|
||||
{"", NULL, NULL},
|
||||
};
|
||||
|
||||
static struct PyMethodDef __methods[] = {
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
void
|
||||
init_codecs_mapdata_ja_JP(void)
|
||||
{
|
||||
struct dbcs_map *h;
|
||||
PyObject *m;
|
||||
|
||||
m = Py_InitModule("_codecs_mapdata_ja_JP", __methods);
|
||||
|
||||
for (h = mapholders; h->charset[0] != '\0'; h++) {
|
||||
char mhname[256] = "__map_";
|
||||
|
||||
strcpy(mhname + sizeof("__map_") - 1, h->charset);
|
||||
PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL));
|
||||
}
|
||||
|
||||
if (PyErr_Occurred())
|
||||
Py_FatalError("can't initialize the _codecs_mapdata_ja_JP module");
|
||||
}
|
42
Modules/cjkcodecs/mapdata_ko_KR.c
Normal file
42
Modules/cjkcodecs/mapdata_ko_KR.c
Normal file
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* mapdata_ko_KR.c: Map Provider for Korean Encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: mapdata_ko_KR.c,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
*/
|
||||
|
||||
#include "Python.h"
|
||||
#include "cjkcommon.h"
|
||||
#include "map_ksx1001.h"
|
||||
#include "map_cp949.h"
|
||||
#include "map_cp949ext.h"
|
||||
|
||||
static struct dbcs_map mapholders[] = {
|
||||
{"ksx1001", NULL, ksx1001_decmap},
|
||||
{"cp949", cp949_encmap, NULL},
|
||||
{"cp949ext", NULL, cp949ext_decmap},
|
||||
{"", NULL, NULL},
|
||||
};
|
||||
|
||||
static struct PyMethodDef __methods[] = {
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
void
|
||||
init_codecs_mapdata_ko_KR(void)
|
||||
{
|
||||
struct dbcs_map *h;
|
||||
PyObject *m;
|
||||
|
||||
m = Py_InitModule("_codecs_mapdata_ko_KR", __methods);
|
||||
|
||||
for (h = mapholders; h->charset[0] != '\0'; h++) {
|
||||
char mhname[256] = "__map_";
|
||||
|
||||
strcpy(mhname + sizeof("__map_") - 1, h->charset);
|
||||
PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL));
|
||||
}
|
||||
|
||||
if (PyErr_Occurred())
|
||||
Py_FatalError("can't initialize the _codecs_mapdata_ko_KR module");
|
||||
}
|
44
Modules/cjkcodecs/mapdata_zh_CN.c
Normal file
44
Modules/cjkcodecs/mapdata_zh_CN.c
Normal file
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* mapdata_zh_CN.c: Map Provider for Simplified Chinese Encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: mapdata_zh_CN.c,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
*/
|
||||
|
||||
#include "Python.h"
|
||||
#include "cjkcommon.h"
|
||||
#include "map_gb2312.h"
|
||||
#include "map_gbkext.h"
|
||||
#include "map_gbcommon.h"
|
||||
#include "map_gb18030ext.h"
|
||||
|
||||
static struct dbcs_map mapholders[] = {
|
||||
{"gb2312", NULL, gb2312_decmap},
|
||||
{"gbkext", NULL, gbkext_decmap},
|
||||
{"gbcommon", gbcommon_encmap, NULL},
|
||||
{"gb18030ext", gb18030ext_encmap, gb18030ext_decmap},
|
||||
{"", NULL, NULL},
|
||||
};
|
||||
|
||||
static struct PyMethodDef __methods[] = {
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
void
|
||||
init_codecs_mapdata_zh_CN(void)
|
||||
{
|
||||
struct dbcs_map *h;
|
||||
PyObject *m;
|
||||
|
||||
m = Py_InitModule("_codecs_mapdata_zh_CN", __methods);
|
||||
|
||||
for (h = mapholders; h->charset[0] != '\0'; h++) {
|
||||
char mhname[256] = "__map_";
|
||||
|
||||
strcpy(mhname + sizeof("__map_") - 1, h->charset);
|
||||
PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL));
|
||||
}
|
||||
|
||||
if (PyErr_Occurred())
|
||||
Py_FatalError("can't initialize the _codecs_mapdata_zh_CN module");
|
||||
}
|
40
Modules/cjkcodecs/mapdata_zh_TW.c
Normal file
40
Modules/cjkcodecs/mapdata_zh_TW.c
Normal file
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* mapdata_zh_TW.c: Map Provider for Traditional Chinese Encodings
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: mapdata_zh_TW.c,v 1.3 2004/01/17 11:26:10 perky Exp $
|
||||
*/
|
||||
|
||||
#include "Python.h"
|
||||
#include "cjkcommon.h"
|
||||
#include "map_big5.h"
|
||||
#include "map_cp950ext.h"
|
||||
|
||||
static struct dbcs_map mapholders[] = {
|
||||
{"big5", big5_encmap, big5_decmap},
|
||||
{"cp950ext", cp950ext_encmap, cp950ext_decmap},
|
||||
{"", NULL, NULL},
|
||||
};
|
||||
|
||||
static struct PyMethodDef __methods[] = {
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
void
|
||||
init_codecs_mapdata_zh_TW(void)
|
||||
{
|
||||
struct dbcs_map *h;
|
||||
PyObject *m;
|
||||
|
||||
m = Py_InitModule("_codecs_mapdata_zh_TW", __methods);
|
||||
|
||||
for (h = mapholders; h->charset[0] != '\0'; h++) {
|
||||
char mhname[256] = "__map_";
|
||||
|
||||
strcpy(mhname + sizeof("__map_") - 1, h->charset);
|
||||
PyModule_AddObject(m, mhname, PyCObject_FromVoidPtr(h, NULL));
|
||||
}
|
||||
|
||||
if (PyErr_Occurred())
|
||||
Py_FatalError("can't initialize the _codecs_mapdata_zh_TW module");
|
||||
}
|
1210
Modules/cjkcodecs/multibytecodec.c
Normal file
1210
Modules/cjkcodecs/multibytecodec.c
Normal file
File diff suppressed because it is too large
Load diff
88
Modules/cjkcodecs/multibytecodec.h
Normal file
88
Modules/cjkcodecs/multibytecodec.h
Normal file
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* multibytecodec.h: Common Multibyte Codec Implementation
|
||||
*
|
||||
* Written by Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* $CJKCodecs: multibytecodec.h,v 1.2 2003/12/31 05:46:55 perky Exp $
|
||||
*/
|
||||
|
||||
#ifndef _PYTHON_MULTIBYTECODEC_H_
|
||||
#define _PYTHON_MULTIBYTECODEC_H_
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "cjkcommon.h"
|
||||
|
||||
typedef union {
|
||||
void *p;
|
||||
int i;
|
||||
unsigned char c[8];
|
||||
ucs2_t u2[4];
|
||||
ucs4_t u4[2];
|
||||
} MultibyteCodec_State;
|
||||
|
||||
typedef int (*mbencode_func)(MultibyteCodec_State *state,
|
||||
const Py_UNICODE **inbuf, size_t inleft,
|
||||
unsigned char **outbuf, size_t outleft,
|
||||
int flags);
|
||||
typedef int (*mbencodeinit_func)(MultibyteCodec_State *state);
|
||||
typedef int (*mbencodereset_func)(MultibyteCodec_State *state,
|
||||
unsigned char **outbuf, size_t outleft);
|
||||
typedef int (*mbdecode_func)(MultibyteCodec_State *state,
|
||||
const unsigned char **inbuf, size_t inleft,
|
||||
Py_UNICODE **outbuf, size_t outleft);
|
||||
typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state);
|
||||
typedef int (*mbdecodereset_func)(MultibyteCodec_State *state);
|
||||
|
||||
typedef struct {
|
||||
const char *encoding;
|
||||
mbencode_func encode;
|
||||
mbencodeinit_func encinit;
|
||||
mbencodereset_func encreset;
|
||||
mbdecode_func decode;
|
||||
mbdecodeinit_func decinit;
|
||||
mbdecodereset_func decreset;
|
||||
} MultibyteCodec;
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
MultibyteCodec *codec;
|
||||
} MultibyteCodecObject;
|
||||
|
||||
#define MAXDECPENDING 8
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
MultibyteCodec *codec;
|
||||
MultibyteCodec_State state;
|
||||
unsigned char pending[MAXDECPENDING];
|
||||
int pendingsize;
|
||||
PyObject *stream, *errors;
|
||||
} MultibyteStreamReaderObject;
|
||||
|
||||
#define MAXENCPENDING 2
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
MultibyteCodec *codec;
|
||||
MultibyteCodec_State state;
|
||||
Py_UNICODE pending[MAXENCPENDING];
|
||||
int pendingsize;
|
||||
PyObject *stream, *errors;
|
||||
} MultibyteStreamWriterObject;
|
||||
|
||||
/* positive values for illegal sequences */
|
||||
#define MBERR_TOOSMALL (-1) /* insufficient output buffer space */
|
||||
#define MBERR_TOOFEW (-2) /* incomplete input buffer */
|
||||
#define MBERR_INTERNAL (-3) /* internal runtime error */
|
||||
|
||||
#define ERROR_STRICT (PyObject *)(1)
|
||||
#define ERROR_IGNORE (PyObject *)(2)
|
||||
#define ERROR_REPLACE (PyObject *)(3)
|
||||
#define ERROR_MAX ERROR_REPLACE
|
||||
|
||||
#define MBENC_FLUSH 0x0001 /* encode all characters encodable */
|
||||
#define MBENC_MAX MBENC_FLUSH
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
11
Modules/cjkcodecs/tweak_gbk.h
Normal file
11
Modules/cjkcodecs/tweak_gbk.h
Normal file
|
@ -0,0 +1,11 @@
|
|||
/* $CJKCodecs: tweak_gbk.h,v 1.1.1.1 2003/09/24 17:47:00 perky Exp $ */
|
||||
|
||||
#define GBK_PREDECODE(dc1, dc2, assi) \
|
||||
if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \
|
||||
else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \
|
||||
else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7;
|
||||
#define GBK_PREENCODE(code, assi) \
|
||||
if ((code) == 0x2014) (assi) = 0xa1aa; \
|
||||
else if ((code) == 0x2015) (assi) = 0xa844; \
|
||||
else if ((code) == 0x00b7) (assi) = 0xa1a4;
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue