Removed the decoding_map from the codecs where this is possible.

Replaced the tis_620, cp1140 and koi8_u codecs with new ones
based on custom mapping files.
This commit is contained in:
Marc-André Lemburg 2005-10-24 12:07:49 +00:00
parent 921fa8595e
commit 3c72ded23d
45 changed files with 22705 additions and 25634 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,11 +1,8 @@
""" Python Character Mapping Codec for cp1140 """ Python Character Mapping Codec generated from 'python-mappings/CP1140.TXT' with gencodec.py.
Written by Brian Quinlan(brian@sweetapp.com). NO WARRANTY. """#"
"""
import codecs import codecs
import copy
import cp037
### Codec APIs ### Codec APIs
@ -17,7 +14,7 @@ class Codec(codecs.Codec):
def decode(self,input,errors='strict'): def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map) return codecs.charmap_decode(input,errors,decoding_table)
class StreamWriter(Codec,codecs.StreamWriter): class StreamWriter(Codec,codecs.StreamWriter):
pass pass
@ -31,14 +28,525 @@ def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter) return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = copy.copy(cp037.decoding_map) ### Decoding Table
decoding_map.update({ decoding_table = (
0x009f: 0x20ac # EURO SIGN u'\x00' # 0x00 -> NULL
}) u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x9c' # 0x04 -> CONTROL
u'\t' # 0x05 -> HORIZONTAL TABULATION
u'\x86' # 0x06 -> CONTROL
u'\x7f' # 0x07 -> DELETE
u'\x97' # 0x08 -> CONTROL
u'\x8d' # 0x09 -> CONTROL
u'\x8e' # 0x0a -> CONTROL
u'\x0b' # 0x0b -> VERTICAL TABULATION
u'\x0c' # 0x0c -> FORM FEED
u'\r' # 0x0d -> CARRIAGE RETURN
u'\x0e' # 0x0e -> SHIFT OUT
u'\x0f' # 0x0f -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x9d' # 0x14 -> CONTROL
u'\x85' # 0x15 -> CONTROL
u'\x08' # 0x16 -> BACKSPACE
u'\x87' # 0x17 -> CONTROL
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x92' # 0x1a -> CONTROL
u'\x8f' # 0x1b -> CONTROL
u'\x1c' # 0x1c -> FILE SEPARATOR
u'\x1d' # 0x1d -> GROUP SEPARATOR
u'\x1e' # 0x1e -> RECORD SEPARATOR
u'\x1f' # 0x1f -> UNIT SEPARATOR
u'\x80' # 0x20 -> CONTROL
u'\x81' # 0x21 -> CONTROL
u'\x82' # 0x22 -> CONTROL
u'\x83' # 0x23 -> CONTROL
u'\x84' # 0x24 -> CONTROL
u'\n' # 0x25 -> LINE FEED
u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK
u'\x1b' # 0x27 -> ESCAPE
u'\x88' # 0x28 -> CONTROL
u'\x89' # 0x29 -> CONTROL
u'\x8a' # 0x2a -> CONTROL
u'\x8b' # 0x2b -> CONTROL
u'\x8c' # 0x2c -> CONTROL
u'\x05' # 0x2d -> ENQUIRY
u'\x06' # 0x2e -> ACKNOWLEDGE
u'\x07' # 0x2f -> BELL
u'\x90' # 0x30 -> CONTROL
u'\x91' # 0x31 -> CONTROL
u'\x16' # 0x32 -> SYNCHRONOUS IDLE
u'\x93' # 0x33 -> CONTROL
u'\x94' # 0x34 -> CONTROL
u'\x95' # 0x35 -> CONTROL
u'\x96' # 0x36 -> CONTROL
u'\x04' # 0x37 -> END OF TRANSMISSION
u'\x98' # 0x38 -> CONTROL
u'\x99' # 0x39 -> CONTROL
u'\x9a' # 0x3a -> CONTROL
u'\x9b' # 0x3b -> CONTROL
u'\x14' # 0x3c -> DEVICE CONTROL FOUR
u'\x15' # 0x3d -> NEGATIVE ACKNOWLEDGE
u'\x9e' # 0x3e -> CONTROL
u'\x1a' # 0x3f -> SUBSTITUTE
u' ' # 0x40 -> SPACE
u'\xa0' # 0x41 -> NO-BREAK SPACE
u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA
u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
u'\xa2' # 0x4a -> CENT SIGN
u'.' # 0x4b -> FULL STOP
u'<' # 0x4c -> LESS-THAN SIGN
u'(' # 0x4d -> LEFT PARENTHESIS
u'+' # 0x4e -> PLUS SIGN
u'|' # 0x4f -> VERTICAL LINE
u'&' # 0x50 -> AMPERSAND
u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
u'!' # 0x5a -> EXCLAMATION MARK
u'$' # 0x5b -> DOLLAR SIGN
u'*' # 0x5c -> ASTERISK
u')' # 0x5d -> RIGHT PARENTHESIS
u';' # 0x5e -> SEMICOLON
u'\xac' # 0x5f -> NOT SIGN
u'-' # 0x60 -> HYPHEN-MINUS
u'/' # 0x61 -> SOLIDUS
u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
u'\xa6' # 0x6a -> BROKEN BAR
u',' # 0x6b -> COMMA
u'%' # 0x6c -> PERCENT SIGN
u'_' # 0x6d -> LOW LINE
u'>' # 0x6e -> GREATER-THAN SIGN
u'?' # 0x6f -> QUESTION MARK
u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
u'`' # 0x79 -> GRAVE ACCENT
u':' # 0x7a -> COLON
u'#' # 0x7b -> NUMBER SIGN
u'@' # 0x7c -> COMMERCIAL AT
u"'" # 0x7d -> APOSTROPHE
u'=' # 0x7e -> EQUALS SIGN
u'"' # 0x7f -> QUOTATION MARK
u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
u'a' # 0x81 -> LATIN SMALL LETTER A
u'b' # 0x82 -> LATIN SMALL LETTER B
u'c' # 0x83 -> LATIN SMALL LETTER C
u'd' # 0x84 -> LATIN SMALL LETTER D
u'e' # 0x85 -> LATIN SMALL LETTER E
u'f' # 0x86 -> LATIN SMALL LETTER F
u'g' # 0x87 -> LATIN SMALL LETTER G
u'h' # 0x88 -> LATIN SMALL LETTER H
u'i' # 0x89 -> LATIN SMALL LETTER I
u'\xab' # 0x8a -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbb' # 0x8b -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xf0' # 0x8c -> LATIN SMALL LETTER ETH (ICELANDIC)
u'\xfd' # 0x8d -> LATIN SMALL LETTER Y WITH ACUTE
u'\xfe' # 0x8e -> LATIN SMALL LETTER THORN (ICELANDIC)
u'\xb1' # 0x8f -> PLUS-MINUS SIGN
u'\xb0' # 0x90 -> DEGREE SIGN
u'j' # 0x91 -> LATIN SMALL LETTER J
u'k' # 0x92 -> LATIN SMALL LETTER K
u'l' # 0x93 -> LATIN SMALL LETTER L
u'm' # 0x94 -> LATIN SMALL LETTER M
u'n' # 0x95 -> LATIN SMALL LETTER N
u'o' # 0x96 -> LATIN SMALL LETTER O
u'p' # 0x97 -> LATIN SMALL LETTER P
u'q' # 0x98 -> LATIN SMALL LETTER Q
u'r' # 0x99 -> LATIN SMALL LETTER R
u'\xaa' # 0x9a -> FEMININE ORDINAL INDICATOR
u'\xba' # 0x9b -> MASCULINE ORDINAL INDICATOR
u'\xe6' # 0x9c -> LATIN SMALL LIGATURE AE
u'\xb8' # 0x9d -> CEDILLA
u'\xc6' # 0x9e -> LATIN CAPITAL LIGATURE AE
u'\u20ac' # 0x9f -> EURO SIGN
u'\xb5' # 0xa0 -> MICRO SIGN
u'~' # 0xa1 -> TILDE
u's' # 0xa2 -> LATIN SMALL LETTER S
u't' # 0xa3 -> LATIN SMALL LETTER T
u'u' # 0xa4 -> LATIN SMALL LETTER U
u'v' # 0xa5 -> LATIN SMALL LETTER V
u'w' # 0xa6 -> LATIN SMALL LETTER W
u'x' # 0xa7 -> LATIN SMALL LETTER X
u'y' # 0xa8 -> LATIN SMALL LETTER Y
u'z' # 0xa9 -> LATIN SMALL LETTER Z
u'\xa1' # 0xaa -> INVERTED EXCLAMATION MARK
u'\xbf' # 0xab -> INVERTED QUESTION MARK
u'\xd0' # 0xac -> LATIN CAPITAL LETTER ETH (ICELANDIC)
u'\xdd' # 0xad -> LATIN CAPITAL LETTER Y WITH ACUTE
u'\xde' # 0xae -> LATIN CAPITAL LETTER THORN (ICELANDIC)
u'\xae' # 0xaf -> REGISTERED SIGN
u'^' # 0xb0 -> CIRCUMFLEX ACCENT
u'\xa3' # 0xb1 -> POUND SIGN
u'\xa5' # 0xb2 -> YEN SIGN
u'\xb7' # 0xb3 -> MIDDLE DOT
u'\xa9' # 0xb4 -> COPYRIGHT SIGN
u'\xa7' # 0xb5 -> SECTION SIGN
u'\xb6' # 0xb6 -> PILCROW SIGN
u'\xbc' # 0xb7 -> VULGAR FRACTION ONE QUARTER
u'\xbd' # 0xb8 -> VULGAR FRACTION ONE HALF
u'\xbe' # 0xb9 -> VULGAR FRACTION THREE QUARTERS
u'[' # 0xba -> LEFT SQUARE BRACKET
u']' # 0xbb -> RIGHT SQUARE BRACKET
u'\xaf' # 0xbc -> MACRON
u'\xa8' # 0xbd -> DIAERESIS
u'\xb4' # 0xbe -> ACUTE ACCENT
u'\xd7' # 0xbf -> MULTIPLICATION SIGN
u'{' # 0xc0 -> LEFT CURLY BRACKET
u'A' # 0xc1 -> LATIN CAPITAL LETTER A
u'B' # 0xc2 -> LATIN CAPITAL LETTER B
u'C' # 0xc3 -> LATIN CAPITAL LETTER C
u'D' # 0xc4 -> LATIN CAPITAL LETTER D
u'E' # 0xc5 -> LATIN CAPITAL LETTER E
u'F' # 0xc6 -> LATIN CAPITAL LETTER F
u'G' # 0xc7 -> LATIN CAPITAL LETTER G
u'H' # 0xc8 -> LATIN CAPITAL LETTER H
u'I' # 0xc9 -> LATIN CAPITAL LETTER I
u'\xad' # 0xca -> SOFT HYPHEN
u'\xf4' # 0xcb -> LATIN SMALL LETTER O WITH CIRCUMFLEX
u'\xf6' # 0xcc -> LATIN SMALL LETTER O WITH DIAERESIS
u'\xf2' # 0xcd -> LATIN SMALL LETTER O WITH GRAVE
u'\xf3' # 0xce -> LATIN SMALL LETTER O WITH ACUTE
u'\xf5' # 0xcf -> LATIN SMALL LETTER O WITH TILDE
u'}' # 0xd0 -> RIGHT CURLY BRACKET
u'J' # 0xd1 -> LATIN CAPITAL LETTER J
u'K' # 0xd2 -> LATIN CAPITAL LETTER K
u'L' # 0xd3 -> LATIN CAPITAL LETTER L
u'M' # 0xd4 -> LATIN CAPITAL LETTER M
u'N' # 0xd5 -> LATIN CAPITAL LETTER N
u'O' # 0xd6 -> LATIN CAPITAL LETTER O
u'P' # 0xd7 -> LATIN CAPITAL LETTER P
u'Q' # 0xd8 -> LATIN CAPITAL LETTER Q
u'R' # 0xd9 -> LATIN CAPITAL LETTER R
u'\xb9' # 0xda -> SUPERSCRIPT ONE
u'\xfb' # 0xdb -> LATIN SMALL LETTER U WITH CIRCUMFLEX
u'\xfc' # 0xdc -> LATIN SMALL LETTER U WITH DIAERESIS
u'\xf9' # 0xdd -> LATIN SMALL LETTER U WITH GRAVE
u'\xfa' # 0xde -> LATIN SMALL LETTER U WITH ACUTE
u'\xff' # 0xdf -> LATIN SMALL LETTER Y WITH DIAERESIS
u'\\' # 0xe0 -> REVERSE SOLIDUS
u'\xf7' # 0xe1 -> DIVISION SIGN
u'S' # 0xe2 -> LATIN CAPITAL LETTER S
u'T' # 0xe3 -> LATIN CAPITAL LETTER T
u'U' # 0xe4 -> LATIN CAPITAL LETTER U
u'V' # 0xe5 -> LATIN CAPITAL LETTER V
u'W' # 0xe6 -> LATIN CAPITAL LETTER W
u'X' # 0xe7 -> LATIN CAPITAL LETTER X
u'Y' # 0xe8 -> LATIN CAPITAL LETTER Y
u'Z' # 0xe9 -> LATIN CAPITAL LETTER Z
u'\xb2' # 0xea -> SUPERSCRIPT TWO
u'\xd4' # 0xeb -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
u'\xd6' # 0xec -> LATIN CAPITAL LETTER O WITH DIAERESIS
u'\xd2' # 0xed -> LATIN CAPITAL LETTER O WITH GRAVE
u'\xd3' # 0xee -> LATIN CAPITAL LETTER O WITH ACUTE
u'\xd5' # 0xef -> LATIN CAPITAL LETTER O WITH TILDE
u'0' # 0xf0 -> DIGIT ZERO
u'1' # 0xf1 -> DIGIT ONE
u'2' # 0xf2 -> DIGIT TWO
u'3' # 0xf3 -> DIGIT THREE
u'4' # 0xf4 -> DIGIT FOUR
u'5' # 0xf5 -> DIGIT FIVE
u'6' # 0xf6 -> DIGIT SIX
u'7' # 0xf7 -> DIGIT SEVEN
u'8' # 0xf8 -> DIGIT EIGHT
u'9' # 0xf9 -> DIGIT NINE
u'\xb3' # 0xfa -> SUPERSCRIPT THREE
u'\xdb' # 0xfb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
u'\xdc' # 0xfc -> LATIN CAPITAL LETTER U WITH DIAERESIS
u'\xd9' # 0xfd -> LATIN CAPITAL LETTER U WITH GRAVE
u'\xda' # 0xfe -> LATIN CAPITAL LETTER U WITH ACUTE
u'\x9f' # 0xff -> CONTROL
)
### Encoding Map ### Encoding Map
encoding_map = codecs.make_encoding_map(decoding_map) encoding_map = {
0x0000: 0x00, # NULL
0x0001: 0x01, # START OF HEADING
0x0002: 0x02, # START OF TEXT
0x0003: 0x03, # END OF TEXT
0x0004: 0x37, # END OF TRANSMISSION
0x0005: 0x2d, # ENQUIRY
0x0006: 0x2e, # ACKNOWLEDGE
0x0007: 0x2f, # BELL
0x0008: 0x16, # BACKSPACE
0x0009: 0x05, # HORIZONTAL TABULATION
0x000a: 0x25, # LINE FEED
0x000b: 0x0b, # VERTICAL TABULATION
0x000c: 0x0c, # FORM FEED
0x000d: 0x0d, # CARRIAGE RETURN
0x000e: 0x0e, # SHIFT OUT
0x000f: 0x0f, # SHIFT IN
0x0010: 0x10, # DATA LINK ESCAPE
0x0011: 0x11, # DEVICE CONTROL ONE
0x0012: 0x12, # DEVICE CONTROL TWO
0x0013: 0x13, # DEVICE CONTROL THREE
0x0014: 0x3c, # DEVICE CONTROL FOUR
0x0015: 0x3d, # NEGATIVE ACKNOWLEDGE
0x0016: 0x32, # SYNCHRONOUS IDLE
0x0017: 0x26, # END OF TRANSMISSION BLOCK
0x0018: 0x18, # CANCEL
0x0019: 0x19, # END OF MEDIUM
0x001a: 0x3f, # SUBSTITUTE
0x001b: 0x27, # ESCAPE
0x001c: 0x1c, # FILE SEPARATOR
0x001d: 0x1d, # GROUP SEPARATOR
0x001e: 0x1e, # RECORD SEPARATOR
0x001f: 0x1f, # UNIT SEPARATOR
0x0020: 0x40, # SPACE
0x0021: 0x5a, # EXCLAMATION MARK
0x0022: 0x7f, # QUOTATION MARK
0x0023: 0x7b, # NUMBER SIGN
0x0024: 0x5b, # DOLLAR SIGN
0x0025: 0x6c, # PERCENT SIGN
0x0026: 0x50, # AMPERSAND
0x0027: 0x7d, # APOSTROPHE
0x0028: 0x4d, # LEFT PARENTHESIS
0x0029: 0x5d, # RIGHT PARENTHESIS
0x002a: 0x5c, # ASTERISK
0x002b: 0x4e, # PLUS SIGN
0x002c: 0x6b, # COMMA
0x002d: 0x60, # HYPHEN-MINUS
0x002e: 0x4b, # FULL STOP
0x002f: 0x61, # SOLIDUS
0x0030: 0xf0, # DIGIT ZERO
0x0031: 0xf1, # DIGIT ONE
0x0032: 0xf2, # DIGIT TWO
0x0033: 0xf3, # DIGIT THREE
0x0034: 0xf4, # DIGIT FOUR
0x0035: 0xf5, # DIGIT FIVE
0x0036: 0xf6, # DIGIT SIX
0x0037: 0xf7, # DIGIT SEVEN
0x0038: 0xf8, # DIGIT EIGHT
0x0039: 0xf9, # DIGIT NINE
0x003a: 0x7a, # COLON
0x003b: 0x5e, # SEMICOLON
0x003c: 0x4c, # LESS-THAN SIGN
0x003d: 0x7e, # EQUALS SIGN
0x003e: 0x6e, # GREATER-THAN SIGN
0x003f: 0x6f, # QUESTION MARK
0x0040: 0x7c, # COMMERCIAL AT
0x0041: 0xc1, # LATIN CAPITAL LETTER A
0x0042: 0xc2, # LATIN CAPITAL LETTER B
0x0043: 0xc3, # LATIN CAPITAL LETTER C
0x0044: 0xc4, # LATIN CAPITAL LETTER D
0x0045: 0xc5, # LATIN CAPITAL LETTER E
0x0046: 0xc6, # LATIN CAPITAL LETTER F
0x0047: 0xc7, # LATIN CAPITAL LETTER G
0x0048: 0xc8, # LATIN CAPITAL LETTER H
0x0049: 0xc9, # LATIN CAPITAL LETTER I
0x004a: 0xd1, # LATIN CAPITAL LETTER J
0x004b: 0xd2, # LATIN CAPITAL LETTER K
0x004c: 0xd3, # LATIN CAPITAL LETTER L
0x004d: 0xd4, # LATIN CAPITAL LETTER M
0x004e: 0xd5, # LATIN CAPITAL LETTER N
0x004f: 0xd6, # LATIN CAPITAL LETTER O
0x0050: 0xd7, # LATIN CAPITAL LETTER P
0x0051: 0xd8, # LATIN CAPITAL LETTER Q
0x0052: 0xd9, # LATIN CAPITAL LETTER R
0x0053: 0xe2, # LATIN CAPITAL LETTER S
0x0054: 0xe3, # LATIN CAPITAL LETTER T
0x0055: 0xe4, # LATIN CAPITAL LETTER U
0x0056: 0xe5, # LATIN CAPITAL LETTER V
0x0057: 0xe6, # LATIN CAPITAL LETTER W
0x0058: 0xe7, # LATIN CAPITAL LETTER X
0x0059: 0xe8, # LATIN CAPITAL LETTER Y
0x005a: 0xe9, # LATIN CAPITAL LETTER Z
0x005b: 0xba, # LEFT SQUARE BRACKET
0x005c: 0xe0, # REVERSE SOLIDUS
0x005d: 0xbb, # RIGHT SQUARE BRACKET
0x005e: 0xb0, # CIRCUMFLEX ACCENT
0x005f: 0x6d, # LOW LINE
0x0060: 0x79, # GRAVE ACCENT
0x0061: 0x81, # LATIN SMALL LETTER A
0x0062: 0x82, # LATIN SMALL LETTER B
0x0063: 0x83, # LATIN SMALL LETTER C
0x0064: 0x84, # LATIN SMALL LETTER D
0x0065: 0x85, # LATIN SMALL LETTER E
0x0066: 0x86, # LATIN SMALL LETTER F
0x0067: 0x87, # LATIN SMALL LETTER G
0x0068: 0x88, # LATIN SMALL LETTER H
0x0069: 0x89, # LATIN SMALL LETTER I
0x006a: 0x91, # LATIN SMALL LETTER J
0x006b: 0x92, # LATIN SMALL LETTER K
0x006c: 0x93, # LATIN SMALL LETTER L
0x006d: 0x94, # LATIN SMALL LETTER M
0x006e: 0x95, # LATIN SMALL LETTER N
0x006f: 0x96, # LATIN SMALL LETTER O
0x0070: 0x97, # LATIN SMALL LETTER P
0x0071: 0x98, # LATIN SMALL LETTER Q
0x0072: 0x99, # LATIN SMALL LETTER R
0x0073: 0xa2, # LATIN SMALL LETTER S
0x0074: 0xa3, # LATIN SMALL LETTER T
0x0075: 0xa4, # LATIN SMALL LETTER U
0x0076: 0xa5, # LATIN SMALL LETTER V
0x0077: 0xa6, # LATIN SMALL LETTER W
0x0078: 0xa7, # LATIN SMALL LETTER X
0x0079: 0xa8, # LATIN SMALL LETTER Y
0x007a: 0xa9, # LATIN SMALL LETTER Z
0x007b: 0xc0, # LEFT CURLY BRACKET
0x007c: 0x4f, # VERTICAL LINE
0x007d: 0xd0, # RIGHT CURLY BRACKET
0x007e: 0xa1, # TILDE
0x007f: 0x07, # DELETE
0x0080: 0x20, # CONTROL
0x0081: 0x21, # CONTROL
0x0082: 0x22, # CONTROL
0x0083: 0x23, # CONTROL
0x0084: 0x24, # CONTROL
0x0085: 0x15, # CONTROL
0x0086: 0x06, # CONTROL
0x0087: 0x17, # CONTROL
0x0088: 0x28, # CONTROL
0x0089: 0x29, # CONTROL
0x008a: 0x2a, # CONTROL
0x008b: 0x2b, # CONTROL
0x008c: 0x2c, # CONTROL
0x008d: 0x09, # CONTROL
0x008e: 0x0a, # CONTROL
0x008f: 0x1b, # CONTROL
0x0090: 0x30, # CONTROL
0x0091: 0x31, # CONTROL
0x0092: 0x1a, # CONTROL
0x0093: 0x33, # CONTROL
0x0094: 0x34, # CONTROL
0x0095: 0x35, # CONTROL
0x0096: 0x36, # CONTROL
0x0097: 0x08, # CONTROL
0x0098: 0x38, # CONTROL
0x0099: 0x39, # CONTROL
0x009a: 0x3a, # CONTROL
0x009b: 0x3b, # CONTROL
0x009c: 0x04, # CONTROL
0x009d: 0x14, # CONTROL
0x009e: 0x3e, # CONTROL
0x009f: 0xff, # CONTROL
0x00a0: 0x41, # NO-BREAK SPACE
0x00a1: 0xaa, # INVERTED EXCLAMATION MARK
0x00a2: 0x4a, # CENT SIGN
0x00a3: 0xb1, # POUND SIGN
0x00a5: 0xb2, # YEN SIGN
0x00a6: 0x6a, # BROKEN BAR
0x00a7: 0xb5, # SECTION SIGN
0x00a8: 0xbd, # DIAERESIS
0x00a9: 0xb4, # COPYRIGHT SIGN
0x00aa: 0x9a, # FEMININE ORDINAL INDICATOR
0x00ab: 0x8a, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ac: 0x5f, # NOT SIGN
0x00ad: 0xca, # SOFT HYPHEN
0x00ae: 0xaf, # REGISTERED SIGN
0x00af: 0xbc, # MACRON
0x00b0: 0x90, # DEGREE SIGN
0x00b1: 0x8f, # PLUS-MINUS SIGN
0x00b2: 0xea, # SUPERSCRIPT TWO
0x00b3: 0xfa, # SUPERSCRIPT THREE
0x00b4: 0xbe, # ACUTE ACCENT
0x00b5: 0xa0, # MICRO SIGN
0x00b6: 0xb6, # PILCROW SIGN
0x00b7: 0xb3, # MIDDLE DOT
0x00b8: 0x9d, # CEDILLA
0x00b9: 0xda, # SUPERSCRIPT ONE
0x00ba: 0x9b, # MASCULINE ORDINAL INDICATOR
0x00bb: 0x8b, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00bc: 0xb7, # VULGAR FRACTION ONE QUARTER
0x00bd: 0xb8, # VULGAR FRACTION ONE HALF
0x00be: 0xb9, # VULGAR FRACTION THREE QUARTERS
0x00bf: 0xab, # INVERTED QUESTION MARK
0x00c0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE
0x00c1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE
0x00c2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
0x00c3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE
0x00c4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS
0x00c5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE
0x00c6: 0x9e, # LATIN CAPITAL LIGATURE AE
0x00c7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA
0x00c8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE
0x00c9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE
0x00ca: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
0x00cb: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS
0x00cc: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE
0x00cd: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE
0x00ce: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
0x00cf: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS
0x00d0: 0xac, # LATIN CAPITAL LETTER ETH (ICELANDIC)
0x00d1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE
0x00d2: 0xed, # LATIN CAPITAL LETTER O WITH GRAVE
0x00d3: 0xee, # LATIN CAPITAL LETTER O WITH ACUTE
0x00d4: 0xeb, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
0x00d5: 0xef, # LATIN CAPITAL LETTER O WITH TILDE
0x00d6: 0xec, # LATIN CAPITAL LETTER O WITH DIAERESIS
0x00d7: 0xbf, # MULTIPLICATION SIGN
0x00d8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE
0x00d9: 0xfd, # LATIN CAPITAL LETTER U WITH GRAVE
0x00da: 0xfe, # LATIN CAPITAL LETTER U WITH ACUTE
0x00db: 0xfb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
0x00dc: 0xfc, # LATIN CAPITAL LETTER U WITH DIAERESIS
0x00dd: 0xad, # LATIN CAPITAL LETTER Y WITH ACUTE
0x00de: 0xae, # LATIN CAPITAL LETTER THORN (ICELANDIC)
0x00df: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN)
0x00e0: 0x44, # LATIN SMALL LETTER A WITH GRAVE
0x00e1: 0x45, # LATIN SMALL LETTER A WITH ACUTE
0x00e2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX
0x00e3: 0x46, # LATIN SMALL LETTER A WITH TILDE
0x00e4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS
0x00e5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE
0x00e6: 0x9c, # LATIN SMALL LIGATURE AE
0x00e7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA
0x00e8: 0x54, # LATIN SMALL LETTER E WITH GRAVE
0x00e9: 0x51, # LATIN SMALL LETTER E WITH ACUTE
0x00ea: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX
0x00eb: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS
0x00ec: 0x58, # LATIN SMALL LETTER I WITH GRAVE
0x00ed: 0x55, # LATIN SMALL LETTER I WITH ACUTE
0x00ee: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX
0x00ef: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS
0x00f0: 0x8c, # LATIN SMALL LETTER ETH (ICELANDIC)
0x00f1: 0x49, # LATIN SMALL LETTER N WITH TILDE
0x00f2: 0xcd, # LATIN SMALL LETTER O WITH GRAVE
0x00f3: 0xce, # LATIN SMALL LETTER O WITH ACUTE
0x00f4: 0xcb, # LATIN SMALL LETTER O WITH CIRCUMFLEX
0x00f5: 0xcf, # LATIN SMALL LETTER O WITH TILDE
0x00f6: 0xcc, # LATIN SMALL LETTER O WITH DIAERESIS
0x00f7: 0xe1, # DIVISION SIGN
0x00f8: 0x70, # LATIN SMALL LETTER O WITH STROKE
0x00f9: 0xdd, # LATIN SMALL LETTER U WITH GRAVE
0x00fa: 0xde, # LATIN SMALL LETTER U WITH ACUTE
0x00fb: 0xdb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
0x00fc: 0xdc, # LATIN SMALL LETTER U WITH DIAERESIS
0x00fd: 0x8d, # LATIN SMALL LETTER Y WITH ACUTE
0x00fe: 0x8e, # LATIN SMALL LETTER THORN (ICELANDIC)
0x00ff: 0xdf, # LATIN SMALL LETTER Y WITH DIAERESIS
0x20ac: 0x9f, # EURO SIGN
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
""" Python Character Mapping Codec generated from 'ISO8859/8859-6.TXT' with gencodec.py. """ Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-6.TXT' with gencodec.py.
"""#" """#"
@ -28,273 +28,175 @@ def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter) return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00a1: None,
0x00a2: None,
0x00a3: None,
0x00a5: None,
0x00a6: None,
0x00a7: None,
0x00a8: None,
0x00a9: None,
0x00aa: None,
0x00ab: None,
0x00ac: 0x060c, # ARABIC COMMA
0x00ae: None,
0x00af: None,
0x00b0: None,
0x00b1: None,
0x00b2: None,
0x00b3: None,
0x00b4: None,
0x00b5: None,
0x00b6: None,
0x00b7: None,
0x00b8: None,
0x00b9: None,
0x00ba: None,
0x00bb: 0x061b, # ARABIC SEMICOLON
0x00bc: None,
0x00bd: None,
0x00be: None,
0x00bf: 0x061f, # ARABIC QUESTION MARK
0x00c0: None,
0x00c1: 0x0621, # ARABIC LETTER HAMZA
0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE
0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE
0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE
0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW
0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE
0x00c7: 0x0627, # ARABIC LETTER ALEF
0x00c8: 0x0628, # ARABIC LETTER BEH
0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA
0x00ca: 0x062a, # ARABIC LETTER TEH
0x00cb: 0x062b, # ARABIC LETTER THEH
0x00cc: 0x062c, # ARABIC LETTER JEEM
0x00cd: 0x062d, # ARABIC LETTER HAH
0x00ce: 0x062e, # ARABIC LETTER KHAH
0x00cf: 0x062f, # ARABIC LETTER DAL
0x00d0: 0x0630, # ARABIC LETTER THAL
0x00d1: 0x0631, # ARABIC LETTER REH
0x00d2: 0x0632, # ARABIC LETTER ZAIN
0x00d3: 0x0633, # ARABIC LETTER SEEN
0x00d4: 0x0634, # ARABIC LETTER SHEEN
0x00d5: 0x0635, # ARABIC LETTER SAD
0x00d6: 0x0636, # ARABIC LETTER DAD
0x00d7: 0x0637, # ARABIC LETTER TAH
0x00d8: 0x0638, # ARABIC LETTER ZAH
0x00d9: 0x0639, # ARABIC LETTER AIN
0x00da: 0x063a, # ARABIC LETTER GHAIN
0x00db: None,
0x00dc: None,
0x00dd: None,
0x00de: None,
0x00df: None,
0x00e0: 0x0640, # ARABIC TATWEEL
0x00e1: 0x0641, # ARABIC LETTER FEH
0x00e2: 0x0642, # ARABIC LETTER QAF
0x00e3: 0x0643, # ARABIC LETTER KAF
0x00e4: 0x0644, # ARABIC LETTER LAM
0x00e5: 0x0645, # ARABIC LETTER MEEM
0x00e6: 0x0646, # ARABIC LETTER NOON
0x00e7: 0x0647, # ARABIC LETTER HEH
0x00e8: 0x0648, # ARABIC LETTER WAW
0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA
0x00ea: 0x064a, # ARABIC LETTER YEH
0x00eb: 0x064b, # ARABIC FATHATAN
0x00ec: 0x064c, # ARABIC DAMMATAN
0x00ed: 0x064d, # ARABIC KASRATAN
0x00ee: 0x064e, # ARABIC FATHA
0x00ef: 0x064f, # ARABIC DAMMA
0x00f0: 0x0650, # ARABIC KASRA
0x00f1: 0x0651, # ARABIC SHADDA
0x00f2: 0x0652, # ARABIC SUKUN
0x00f3: None,
0x00f4: None,
0x00f5: None,
0x00f6: None,
0x00f7: None,
0x00f8: None,
0x00f9: None,
0x00fa: None,
0x00fb: None,
0x00fc: None,
0x00fd: None,
0x00fe: None,
0x00ff: None,
})
### Decoding Table ### Decoding Table
decoding_table = ( decoding_table = (
u'\x00' # 0x0000 -> NULL u'\x00' # 0x00 -> NULL
u'\x01' # 0x0001 -> START OF HEADING u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL u'\x07' # 0x07 -> BELL
u'\x08' # 0x0008 -> BACKSPACE u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED u'\n' # 0x0a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION u'\x0b' # 0x0b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED u'\x0c' # 0x0c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN u'\r' # 0x0d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT u'\x0e' # 0x0e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN u'\x0f' # 0x0f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE u'\x1a' # 0x1a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE u'\x1b' # 0x1b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR u'\x1c' # 0x1c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR u'\x1d' # 0x1d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR u'\x1e' # 0x1e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR u'\x1f' # 0x1f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE u' ' # 0x20 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND u'&' # 0x26 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE u"'" # 0x27 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK u'*' # 0x2a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN u'+' # 0x2b -> PLUS SIGN
u',' # 0x002c -> COMMA u',' # 0x2c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS u'-' # 0x2d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP u'.' # 0x2e -> FULL STOP
u'/' # 0x002f -> SOLIDUS u'/' # 0x2f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE u'1' # 0x31 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO u'2' # 0x32 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE u'3' # 0x33 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX u'6' # 0x36 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE u'9' # 0x39 -> DIGIT NINE
u':' # 0x003a -> COLON u':' # 0x3a -> COLON
u';' # 0x003b -> SEMICOLON u';' # 0x3b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN u'<' # 0x3c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN u'=' # 0x3d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN u'>' # 0x3e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK u'?' # 0x3f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J u'J' # 0x4a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K u'K' # 0x4b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L u'L' # 0x4c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M u'M' # 0x4d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N u'N' # 0x4e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O u'O' # 0x4f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'Z' # 0x5a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET u'[' # 0x5b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS u'\\' # 0x5c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET u']' # 0x5d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT u'^' # 0x5e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE u'_' # 0x5f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J u'j' # 0x6a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K u'k' # 0x6b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L u'l' # 0x6c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M u'm' # 0x6d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N u'n' # 0x6e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O u'o' # 0x6f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z u'z' # 0x7a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET u'{' # 0x7b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE u'|' # 0x7c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET u'}' # 0x7d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE u'~' # 0x7e -> TILDE
u'\x7f' # 0x007f -> DELETE u'\x7f' # 0x7f -> DELETE
u'\x80' # 0x0080 -> <control> u'\x80' # 0x80 -> <control>
u'\x81' # 0x0081 -> <control> u'\x81' # 0x81 -> <control>
u'\x82' # 0x0082 -> <control> u'\x82' # 0x82 -> <control>
u'\x83' # 0x0083 -> <control> u'\x83' # 0x83 -> <control>
u'\x84' # 0x0084 -> <control> u'\x84' # 0x84 -> <control>
u'\x85' # 0x0085 -> <control> u'\x85' # 0x85 -> <control>
u'\x86' # 0x0086 -> <control> u'\x86' # 0x86 -> <control>
u'\x87' # 0x0087 -> <control> u'\x87' # 0x87 -> <control>
u'\x88' # 0x0088 -> <control> u'\x88' # 0x88 -> <control>
u'\x89' # 0x0089 -> <control> u'\x89' # 0x89 -> <control>
u'\x8a' # 0x008a -> <control> u'\x8a' # 0x8a -> <control>
u'\x8b' # 0x008b -> <control> u'\x8b' # 0x8b -> <control>
u'\x8c' # 0x008c -> <control> u'\x8c' # 0x8c -> <control>
u'\x8d' # 0x008d -> <control> u'\x8d' # 0x8d -> <control>
u'\x8e' # 0x008e -> <control> u'\x8e' # 0x8e -> <control>
u'\x8f' # 0x008f -> <control> u'\x8f' # 0x8f -> <control>
u'\x90' # 0x0090 -> <control> u'\x90' # 0x90 -> <control>
u'\x91' # 0x0091 -> <control> u'\x91' # 0x91 -> <control>
u'\x92' # 0x0092 -> <control> u'\x92' # 0x92 -> <control>
u'\x93' # 0x0093 -> <control> u'\x93' # 0x93 -> <control>
u'\x94' # 0x0094 -> <control> u'\x94' # 0x94 -> <control>
u'\x95' # 0x0095 -> <control> u'\x95' # 0x95 -> <control>
u'\x96' # 0x0096 -> <control> u'\x96' # 0x96 -> <control>
u'\x97' # 0x0097 -> <control> u'\x97' # 0x97 -> <control>
u'\x98' # 0x0098 -> <control> u'\x98' # 0x98 -> <control>
u'\x99' # 0x0099 -> <control> u'\x99' # 0x99 -> <control>
u'\x9a' # 0x009a -> <control> u'\x9a' # 0x9a -> <control>
u'\x9b' # 0x009b -> <control> u'\x9b' # 0x9b -> <control>
u'\x9c' # 0x009c -> <control> u'\x9c' # 0x9c -> <control>
u'\x9d' # 0x009d -> <control> u'\x9d' # 0x9d -> <control>
u'\x9e' # 0x009e -> <control> u'\x9e' # 0x9e -> <control>
u'\x9f' # 0x009f -> <control> u'\x9f' # 0x9f -> <control>
u'\xa0' # 0x00a0 -> NO-BREAK SPACE u'\xa0' # 0xa0 -> NO-BREAK SPACE
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\xa4' # 0x00a4 -> CURRENCY SIGN u'\xa4' # 0xa4 -> CURRENCY SIGN
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
@ -302,8 +204,8 @@ decoding_table = (
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\u060c' # 0x00ac -> ARABIC COMMA u'\u060c' # 0xac -> ARABIC COMMA
u'\xad' # 0x00ad -> SOFT HYPHEN u'\xad' # 0xad -> SOFT HYPHEN
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
@ -317,62 +219,62 @@ decoding_table = (
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\u061b' # 0x00bb -> ARABIC SEMICOLON u'\u061b' # 0xbb -> ARABIC SEMICOLON
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\u061f' # 0x00bf -> ARABIC QUESTION MARK u'\u061f' # 0xbf -> ARABIC QUESTION MARK
u'\ufffe' u'\ufffe'
u'\u0621' # 0x00c1 -> ARABIC LETTER HAMZA u'\u0621' # 0xc1 -> ARABIC LETTER HAMZA
u'\u0622' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE u'\u0622' # 0xc2 -> ARABIC LETTER ALEF WITH MADDA ABOVE
u'\u0623' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE u'\u0623' # 0xc3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE
u'\u0624' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE u'\u0624' # 0xc4 -> ARABIC LETTER WAW WITH HAMZA ABOVE
u'\u0625' # 0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW u'\u0625' # 0xc5 -> ARABIC LETTER ALEF WITH HAMZA BELOW
u'\u0626' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE u'\u0626' # 0xc6 -> ARABIC LETTER YEH WITH HAMZA ABOVE
u'\u0627' # 0x00c7 -> ARABIC LETTER ALEF u'\u0627' # 0xc7 -> ARABIC LETTER ALEF
u'\u0628' # 0x00c8 -> ARABIC LETTER BEH u'\u0628' # 0xc8 -> ARABIC LETTER BEH
u'\u0629' # 0x00c9 -> ARABIC LETTER TEH MARBUTA u'\u0629' # 0xc9 -> ARABIC LETTER TEH MARBUTA
u'\u062a' # 0x00ca -> ARABIC LETTER TEH u'\u062a' # 0xca -> ARABIC LETTER TEH
u'\u062b' # 0x00cb -> ARABIC LETTER THEH u'\u062b' # 0xcb -> ARABIC LETTER THEH
u'\u062c' # 0x00cc -> ARABIC LETTER JEEM u'\u062c' # 0xcc -> ARABIC LETTER JEEM
u'\u062d' # 0x00cd -> ARABIC LETTER HAH u'\u062d' # 0xcd -> ARABIC LETTER HAH
u'\u062e' # 0x00ce -> ARABIC LETTER KHAH u'\u062e' # 0xce -> ARABIC LETTER KHAH
u'\u062f' # 0x00cf -> ARABIC LETTER DAL u'\u062f' # 0xcf -> ARABIC LETTER DAL
u'\u0630' # 0x00d0 -> ARABIC LETTER THAL u'\u0630' # 0xd0 -> ARABIC LETTER THAL
u'\u0631' # 0x00d1 -> ARABIC LETTER REH u'\u0631' # 0xd1 -> ARABIC LETTER REH
u'\u0632' # 0x00d2 -> ARABIC LETTER ZAIN u'\u0632' # 0xd2 -> ARABIC LETTER ZAIN
u'\u0633' # 0x00d3 -> ARABIC LETTER SEEN u'\u0633' # 0xd3 -> ARABIC LETTER SEEN
u'\u0634' # 0x00d4 -> ARABIC LETTER SHEEN u'\u0634' # 0xd4 -> ARABIC LETTER SHEEN
u'\u0635' # 0x00d5 -> ARABIC LETTER SAD u'\u0635' # 0xd5 -> ARABIC LETTER SAD
u'\u0636' # 0x00d6 -> ARABIC LETTER DAD u'\u0636' # 0xd6 -> ARABIC LETTER DAD
u'\u0637' # 0x00d7 -> ARABIC LETTER TAH u'\u0637' # 0xd7 -> ARABIC LETTER TAH
u'\u0638' # 0x00d8 -> ARABIC LETTER ZAH u'\u0638' # 0xd8 -> ARABIC LETTER ZAH
u'\u0639' # 0x00d9 -> ARABIC LETTER AIN u'\u0639' # 0xd9 -> ARABIC LETTER AIN
u'\u063a' # 0x00da -> ARABIC LETTER GHAIN u'\u063a' # 0xda -> ARABIC LETTER GHAIN
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\u0640' # 0x00e0 -> ARABIC TATWEEL u'\u0640' # 0xe0 -> ARABIC TATWEEL
u'\u0641' # 0x00e1 -> ARABIC LETTER FEH u'\u0641' # 0xe1 -> ARABIC LETTER FEH
u'\u0642' # 0x00e2 -> ARABIC LETTER QAF u'\u0642' # 0xe2 -> ARABIC LETTER QAF
u'\u0643' # 0x00e3 -> ARABIC LETTER KAF u'\u0643' # 0xe3 -> ARABIC LETTER KAF
u'\u0644' # 0x00e4 -> ARABIC LETTER LAM u'\u0644' # 0xe4 -> ARABIC LETTER LAM
u'\u0645' # 0x00e5 -> ARABIC LETTER MEEM u'\u0645' # 0xe5 -> ARABIC LETTER MEEM
u'\u0646' # 0x00e6 -> ARABIC LETTER NOON u'\u0646' # 0xe6 -> ARABIC LETTER NOON
u'\u0647' # 0x00e7 -> ARABIC LETTER HEH u'\u0647' # 0xe7 -> ARABIC LETTER HEH
u'\u0648' # 0x00e8 -> ARABIC LETTER WAW u'\u0648' # 0xe8 -> ARABIC LETTER WAW
u'\u0649' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA u'\u0649' # 0xe9 -> ARABIC LETTER ALEF MAKSURA
u'\u064a' # 0x00ea -> ARABIC LETTER YEH u'\u064a' # 0xea -> ARABIC LETTER YEH
u'\u064b' # 0x00eb -> ARABIC FATHATAN u'\u064b' # 0xeb -> ARABIC FATHATAN
u'\u064c' # 0x00ec -> ARABIC DAMMATAN u'\u064c' # 0xec -> ARABIC DAMMATAN
u'\u064d' # 0x00ed -> ARABIC KASRATAN u'\u064d' # 0xed -> ARABIC KASRATAN
u'\u064e' # 0x00ee -> ARABIC FATHA u'\u064e' # 0xee -> ARABIC FATHA
u'\u064f' # 0x00ef -> ARABIC DAMMA u'\u064f' # 0xef -> ARABIC DAMMA
u'\u0650' # 0x00f0 -> ARABIC KASRA u'\u0650' # 0xf0 -> ARABIC KASRA
u'\u0651' # 0x00f1 -> ARABIC SHADDA u'\u0651' # 0xf1 -> ARABIC SHADDA
u'\u0652' # 0x00f2 -> ARABIC SUKUN u'\u0652' # 0xf2 -> ARABIC SUKUN
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
@ -391,215 +293,215 @@ decoding_table = (
### Encoding Map ### Encoding Map
encoding_map = { encoding_map = {
0x0000: 0x0000, # NULL 0x0000: 0x00, # NULL
0x0001: 0x0001, # START OF HEADING 0x0001: 0x01, # START OF HEADING
0x0002: 0x0002, # START OF TEXT 0x0002: 0x02, # START OF TEXT
0x0003: 0x0003, # END OF TEXT 0x0003: 0x03, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION 0x0004: 0x04, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY 0x0005: 0x05, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE 0x0006: 0x06, # ACKNOWLEDGE
0x0007: 0x0007, # BELL 0x0007: 0x07, # BELL
0x0008: 0x0008, # BACKSPACE 0x0008: 0x08, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION 0x0009: 0x09, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED 0x000a: 0x0a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION 0x000b: 0x0b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED 0x000c: 0x0c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN 0x000d: 0x0d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT 0x000e: 0x0e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN 0x000f: 0x0f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE 0x0010: 0x10, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE 0x0011: 0x11, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO 0x0012: 0x12, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE 0x0013: 0x13, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR 0x0014: 0x14, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE 0x0016: 0x16, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK 0x0017: 0x17, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL 0x0018: 0x18, # CANCEL
0x0019: 0x0019, # END OF MEDIUM 0x0019: 0x19, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE 0x001a: 0x1a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE 0x001b: 0x1b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR 0x001c: 0x1c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR 0x001d: 0x1d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR 0x001e: 0x1e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR 0x001f: 0x1f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE 0x0020: 0x20, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK 0x0021: 0x21, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK 0x0022: 0x22, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN 0x0023: 0x23, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN 0x0024: 0x24, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN 0x0025: 0x25, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND 0x0026: 0x26, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE 0x0027: 0x27, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS 0x0028: 0x28, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK 0x002a: 0x2a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN 0x002b: 0x2b, # PLUS SIGN
0x002c: 0x002c, # COMMA 0x002c: 0x2c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS 0x002d: 0x2d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP 0x002e: 0x2e, # FULL STOP
0x002f: 0x002f, # SOLIDUS 0x002f: 0x2f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO 0x0030: 0x30, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE 0x0031: 0x31, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO 0x0032: 0x32, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE 0x0033: 0x33, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR 0x0034: 0x34, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE 0x0035: 0x35, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX 0x0036: 0x36, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN 0x0037: 0x37, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT 0x0038: 0x38, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE 0x0039: 0x39, # DIGIT NINE
0x003a: 0x003a, # COLON 0x003a: 0x3a, # COLON
0x003b: 0x003b, # SEMICOLON 0x003b: 0x3b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN 0x003c: 0x3c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN 0x003d: 0x3d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN 0x003e: 0x3e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK 0x003f: 0x3f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT 0x0040: 0x40, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0041: 0x41, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0042: 0x42, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0043: 0x43, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0044: 0x44, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0045: 0x45, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0046: 0x46, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0047: 0x47, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0048: 0x48, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x0049: 0x49, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004a: 0x4a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004b: 0x4b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004c: 0x4c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004d: 0x4d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004e: 0x4e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x004f: 0x4f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0050: 0x50, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0051: 0x51, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0052: 0x52, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0053: 0x53, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0054: 0x54, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0055: 0x55, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0056: 0x56, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0057: 0x57, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0058: 0x58, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x0059: 0x59, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005a: 0x5a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005b: 0x5b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS 0x005c: 0x5c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005d: 0x5d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005e: 0x5e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE 0x005f: 0x5f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT 0x0060: 0x60, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A 0x0061: 0x61, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B 0x0062: 0x62, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C 0x0063: 0x63, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D 0x0064: 0x64, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E 0x0065: 0x65, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F 0x0066: 0x66, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G 0x0067: 0x67, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H 0x0068: 0x68, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I 0x0069: 0x69, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J 0x006a: 0x6a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K 0x006b: 0x6b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L 0x006c: 0x6c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M 0x006d: 0x6d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N 0x006e: 0x6e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O 0x006f: 0x6f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P 0x0070: 0x70, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0071: 0x71, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R 0x0072: 0x72, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S 0x0073: 0x73, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T 0x0074: 0x74, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U 0x0075: 0x75, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V 0x0076: 0x76, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W 0x0077: 0x77, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X 0x0078: 0x78, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y 0x0079: 0x79, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007a: 0x7a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET 0x007b: 0x7b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE 0x007c: 0x7c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007d: 0x7d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE 0x007e: 0x7e, # TILDE
0x007f: 0x007f, # DELETE 0x007f: 0x7f, # DELETE
0x0080: 0x0080, # <control> 0x0080: 0x80, # <control>
0x0081: 0x0081, # <control> 0x0081: 0x81, # <control>
0x0082: 0x0082, # <control> 0x0082: 0x82, # <control>
0x0083: 0x0083, # <control> 0x0083: 0x83, # <control>
0x0084: 0x0084, # <control> 0x0084: 0x84, # <control>
0x0085: 0x0085, # <control> 0x0085: 0x85, # <control>
0x0086: 0x0086, # <control> 0x0086: 0x86, # <control>
0x0087: 0x0087, # <control> 0x0087: 0x87, # <control>
0x0088: 0x0088, # <control> 0x0088: 0x88, # <control>
0x0089: 0x0089, # <control> 0x0089: 0x89, # <control>
0x008a: 0x008a, # <control> 0x008a: 0x8a, # <control>
0x008b: 0x008b, # <control> 0x008b: 0x8b, # <control>
0x008c: 0x008c, # <control> 0x008c: 0x8c, # <control>
0x008d: 0x008d, # <control> 0x008d: 0x8d, # <control>
0x008e: 0x008e, # <control> 0x008e: 0x8e, # <control>
0x008f: 0x008f, # <control> 0x008f: 0x8f, # <control>
0x0090: 0x0090, # <control> 0x0090: 0x90, # <control>
0x0091: 0x0091, # <control> 0x0091: 0x91, # <control>
0x0092: 0x0092, # <control> 0x0092: 0x92, # <control>
0x0093: 0x0093, # <control> 0x0093: 0x93, # <control>
0x0094: 0x0094, # <control> 0x0094: 0x94, # <control>
0x0095: 0x0095, # <control> 0x0095: 0x95, # <control>
0x0096: 0x0096, # <control> 0x0096: 0x96, # <control>
0x0097: 0x0097, # <control> 0x0097: 0x97, # <control>
0x0098: 0x0098, # <control> 0x0098: 0x98, # <control>
0x0099: 0x0099, # <control> 0x0099: 0x99, # <control>
0x009a: 0x009a, # <control> 0x009a: 0x9a, # <control>
0x009b: 0x009b, # <control> 0x009b: 0x9b, # <control>
0x009c: 0x009c, # <control> 0x009c: 0x9c, # <control>
0x009d: 0x009d, # <control> 0x009d: 0x9d, # <control>
0x009e: 0x009e, # <control> 0x009e: 0x9e, # <control>
0x009f: 0x009f, # <control> 0x009f: 0x9f, # <control>
0x00a0: 0x00a0, # NO-BREAK SPACE 0x00a0: 0xa0, # NO-BREAK SPACE
0x00a4: 0x00a4, # CURRENCY SIGN 0x00a4: 0xa4, # CURRENCY SIGN
0x00ad: 0x00ad, # SOFT HYPHEN 0x00ad: 0xad, # SOFT HYPHEN
0x060c: 0x00ac, # ARABIC COMMA 0x060c: 0xac, # ARABIC COMMA
0x061b: 0x00bb, # ARABIC SEMICOLON 0x061b: 0xbb, # ARABIC SEMICOLON
0x061f: 0x00bf, # ARABIC QUESTION MARK 0x061f: 0xbf, # ARABIC QUESTION MARK
0x0621: 0x00c1, # ARABIC LETTER HAMZA 0x0621: 0xc1, # ARABIC LETTER HAMZA
0x0622: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE 0x0622: 0xc2, # ARABIC LETTER ALEF WITH MADDA ABOVE
0x0623: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE 0x0623: 0xc3, # ARABIC LETTER ALEF WITH HAMZA ABOVE
0x0624: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE 0x0624: 0xc4, # ARABIC LETTER WAW WITH HAMZA ABOVE
0x0625: 0x00c5, # ARABIC LETTER ALEF WITH HAMZA BELOW 0x0625: 0xc5, # ARABIC LETTER ALEF WITH HAMZA BELOW
0x0626: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE 0x0626: 0xc6, # ARABIC LETTER YEH WITH HAMZA ABOVE
0x0627: 0x00c7, # ARABIC LETTER ALEF 0x0627: 0xc7, # ARABIC LETTER ALEF
0x0628: 0x00c8, # ARABIC LETTER BEH 0x0628: 0xc8, # ARABIC LETTER BEH
0x0629: 0x00c9, # ARABIC LETTER TEH MARBUTA 0x0629: 0xc9, # ARABIC LETTER TEH MARBUTA
0x062a: 0x00ca, # ARABIC LETTER TEH 0x062a: 0xca, # ARABIC LETTER TEH
0x062b: 0x00cb, # ARABIC LETTER THEH 0x062b: 0xcb, # ARABIC LETTER THEH
0x062c: 0x00cc, # ARABIC LETTER JEEM 0x062c: 0xcc, # ARABIC LETTER JEEM
0x062d: 0x00cd, # ARABIC LETTER HAH 0x062d: 0xcd, # ARABIC LETTER HAH
0x062e: 0x00ce, # ARABIC LETTER KHAH 0x062e: 0xce, # ARABIC LETTER KHAH
0x062f: 0x00cf, # ARABIC LETTER DAL 0x062f: 0xcf, # ARABIC LETTER DAL
0x0630: 0x00d0, # ARABIC LETTER THAL 0x0630: 0xd0, # ARABIC LETTER THAL
0x0631: 0x00d1, # ARABIC LETTER REH 0x0631: 0xd1, # ARABIC LETTER REH
0x0632: 0x00d2, # ARABIC LETTER ZAIN 0x0632: 0xd2, # ARABIC LETTER ZAIN
0x0633: 0x00d3, # ARABIC LETTER SEEN 0x0633: 0xd3, # ARABIC LETTER SEEN
0x0634: 0x00d4, # ARABIC LETTER SHEEN 0x0634: 0xd4, # ARABIC LETTER SHEEN
0x0635: 0x00d5, # ARABIC LETTER SAD 0x0635: 0xd5, # ARABIC LETTER SAD
0x0636: 0x00d6, # ARABIC LETTER DAD 0x0636: 0xd6, # ARABIC LETTER DAD
0x0637: 0x00d7, # ARABIC LETTER TAH 0x0637: 0xd7, # ARABIC LETTER TAH
0x0638: 0x00d8, # ARABIC LETTER ZAH 0x0638: 0xd8, # ARABIC LETTER ZAH
0x0639: 0x00d9, # ARABIC LETTER AIN 0x0639: 0xd9, # ARABIC LETTER AIN
0x063a: 0x00da, # ARABIC LETTER GHAIN 0x063a: 0xda, # ARABIC LETTER GHAIN
0x0640: 0x00e0, # ARABIC TATWEEL 0x0640: 0xe0, # ARABIC TATWEEL
0x0641: 0x00e1, # ARABIC LETTER FEH 0x0641: 0xe1, # ARABIC LETTER FEH
0x0642: 0x00e2, # ARABIC LETTER QAF 0x0642: 0xe2, # ARABIC LETTER QAF
0x0643: 0x00e3, # ARABIC LETTER KAF 0x0643: 0xe3, # ARABIC LETTER KAF
0x0644: 0x00e4, # ARABIC LETTER LAM 0x0644: 0xe4, # ARABIC LETTER LAM
0x0645: 0x00e5, # ARABIC LETTER MEEM 0x0645: 0xe5, # ARABIC LETTER MEEM
0x0646: 0x00e6, # ARABIC LETTER NOON 0x0646: 0xe6, # ARABIC LETTER NOON
0x0647: 0x00e7, # ARABIC LETTER HEH 0x0647: 0xe7, # ARABIC LETTER HEH
0x0648: 0x00e8, # ARABIC LETTER WAW 0x0648: 0xe8, # ARABIC LETTER WAW
0x0649: 0x00e9, # ARABIC LETTER ALEF MAKSURA 0x0649: 0xe9, # ARABIC LETTER ALEF MAKSURA
0x064a: 0x00ea, # ARABIC LETTER YEH 0x064a: 0xea, # ARABIC LETTER YEH
0x064b: 0x00eb, # ARABIC FATHATAN 0x064b: 0xeb, # ARABIC FATHATAN
0x064c: 0x00ec, # ARABIC DAMMATAN 0x064c: 0xec, # ARABIC DAMMATAN
0x064d: 0x00ed, # ARABIC KASRATAN 0x064d: 0xed, # ARABIC KASRATAN
0x064e: 0x00ee, # ARABIC FATHA 0x064e: 0xee, # ARABIC FATHA
0x064f: 0x00ef, # ARABIC DAMMA 0x064f: 0xef, # ARABIC DAMMA
0x0650: 0x00f0, # ARABIC KASRA 0x0650: 0xf0, # ARABIC KASRA
0x0651: 0x00f1, # ARABIC SHADDA 0x0651: 0xf1, # ARABIC SHADDA
0x0652: 0x00f2, # ARABIC SUKUN 0x0652: 0xf2, # ARABIC SUKUN
} }

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
""" Python Character Mapping Codec generated from 'ISO8859/8859-8.TXT' with gencodec.py. """ Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-8.TXT' with gencodec.py.
"""#" """#"
@ -28,274 +28,201 @@ def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter) return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = codecs.make_identity_dict(range(256))
decoding_map.update({
0x00a1: None,
0x00aa: 0x00d7, # MULTIPLICATION SIGN
0x00ba: 0x00f7, # DIVISION SIGN
0x00bf: None,
0x00c0: None,
0x00c1: None,
0x00c2: None,
0x00c3: None,
0x00c4: None,
0x00c5: None,
0x00c6: None,
0x00c7: None,
0x00c8: None,
0x00c9: None,
0x00ca: None,
0x00cb: None,
0x00cc: None,
0x00cd: None,
0x00ce: None,
0x00cf: None,
0x00d0: None,
0x00d1: None,
0x00d2: None,
0x00d3: None,
0x00d4: None,
0x00d5: None,
0x00d6: None,
0x00d7: None,
0x00d8: None,
0x00d9: None,
0x00da: None,
0x00db: None,
0x00dc: None,
0x00dd: None,
0x00de: None,
0x00df: 0x2017, # DOUBLE LOW LINE
0x00e0: 0x05d0, # HEBREW LETTER ALEF
0x00e1: 0x05d1, # HEBREW LETTER BET
0x00e2: 0x05d2, # HEBREW LETTER GIMEL
0x00e3: 0x05d3, # HEBREW LETTER DALET
0x00e4: 0x05d4, # HEBREW LETTER HE
0x00e5: 0x05d5, # HEBREW LETTER VAV
0x00e6: 0x05d6, # HEBREW LETTER ZAYIN
0x00e7: 0x05d7, # HEBREW LETTER HET
0x00e8: 0x05d8, # HEBREW LETTER TET
0x00e9: 0x05d9, # HEBREW LETTER YOD
0x00ea: 0x05da, # HEBREW LETTER FINAL KAF
0x00eb: 0x05db, # HEBREW LETTER KAF
0x00ec: 0x05dc, # HEBREW LETTER LAMED
0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM
0x00ee: 0x05de, # HEBREW LETTER MEM
0x00ef: 0x05df, # HEBREW LETTER FINAL NUN
0x00f0: 0x05e0, # HEBREW LETTER NUN
0x00f1: 0x05e1, # HEBREW LETTER SAMEKH
0x00f2: 0x05e2, # HEBREW LETTER AYIN
0x00f3: 0x05e3, # HEBREW LETTER FINAL PE
0x00f4: 0x05e4, # HEBREW LETTER PE
0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI
0x00f6: 0x05e6, # HEBREW LETTER TSADI
0x00f7: 0x05e7, # HEBREW LETTER QOF
0x00f8: 0x05e8, # HEBREW LETTER RESH
0x00f9: 0x05e9, # HEBREW LETTER SHIN
0x00fa: 0x05ea, # HEBREW LETTER TAV
0x00fb: None,
0x00fc: None,
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
0x00ff: None,
})
### Decoding Table ### Decoding Table
decoding_table = ( decoding_table = (
u'\x00' # 0x0000 -> NULL u'\x00' # 0x00 -> NULL
u'\x01' # 0x0001 -> START OF HEADING u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x0002 -> START OF TEXT u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x0003 -> END OF TEXT u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x0004 -> END OF TRANSMISSION u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x0005 -> ENQUIRY u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x0006 -> ACKNOWLEDGE u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x0007 -> BELL u'\x07' # 0x07 -> BELL
u'\x08' # 0x0008 -> BACKSPACE u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x0009 -> HORIZONTAL TABULATION u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x000a -> LINE FEED u'\n' # 0x0a -> LINE FEED
u'\x0b' # 0x000b -> VERTICAL TABULATION u'\x0b' # 0x0b -> VERTICAL TABULATION
u'\x0c' # 0x000c -> FORM FEED u'\x0c' # 0x0c -> FORM FEED
u'\r' # 0x000d -> CARRIAGE RETURN u'\r' # 0x0d -> CARRIAGE RETURN
u'\x0e' # 0x000e -> SHIFT OUT u'\x0e' # 0x0e -> SHIFT OUT
u'\x0f' # 0x000f -> SHIFT IN u'\x0f' # 0x0f -> SHIFT IN
u'\x10' # 0x0010 -> DATA LINK ESCAPE u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x0011 -> DEVICE CONTROL ONE u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x0012 -> DEVICE CONTROL TWO u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x0013 -> DEVICE CONTROL THREE u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x0018 -> CANCEL u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x0019 -> END OF MEDIUM u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x001a -> SUBSTITUTE u'\x1a' # 0x1a -> SUBSTITUTE
u'\x1b' # 0x001b -> ESCAPE u'\x1b' # 0x1b -> ESCAPE
u'\x1c' # 0x001c -> FILE SEPARATOR u'\x1c' # 0x1c -> FILE SEPARATOR
u'\x1d' # 0x001d -> GROUP SEPARATOR u'\x1d' # 0x1d -> GROUP SEPARATOR
u'\x1e' # 0x001e -> RECORD SEPARATOR u'\x1e' # 0x1e -> RECORD SEPARATOR
u'\x1f' # 0x001f -> UNIT SEPARATOR u'\x1f' # 0x1f -> UNIT SEPARATOR
u' ' # 0x0020 -> SPACE u' ' # 0x20 -> SPACE
u'!' # 0x0021 -> EXCLAMATION MARK u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x0022 -> QUOTATION MARK u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x0023 -> NUMBER SIGN u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x0024 -> DOLLAR SIGN u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x0025 -> PERCENT SIGN u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x0026 -> AMPERSAND u'&' # 0x26 -> AMPERSAND
u"'" # 0x0027 -> APOSTROPHE u"'" # 0x27 -> APOSTROPHE
u'(' # 0x0028 -> LEFT PARENTHESIS u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x0029 -> RIGHT PARENTHESIS u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x002a -> ASTERISK u'*' # 0x2a -> ASTERISK
u'+' # 0x002b -> PLUS SIGN u'+' # 0x2b -> PLUS SIGN
u',' # 0x002c -> COMMA u',' # 0x2c -> COMMA
u'-' # 0x002d -> HYPHEN-MINUS u'-' # 0x2d -> HYPHEN-MINUS
u'.' # 0x002e -> FULL STOP u'.' # 0x2e -> FULL STOP
u'/' # 0x002f -> SOLIDUS u'/' # 0x2f -> SOLIDUS
u'0' # 0x0030 -> DIGIT ZERO u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x0031 -> DIGIT ONE u'1' # 0x31 -> DIGIT ONE
u'2' # 0x0032 -> DIGIT TWO u'2' # 0x32 -> DIGIT TWO
u'3' # 0x0033 -> DIGIT THREE u'3' # 0x33 -> DIGIT THREE
u'4' # 0x0034 -> DIGIT FOUR u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x0035 -> DIGIT FIVE u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x0036 -> DIGIT SIX u'6' # 0x36 -> DIGIT SIX
u'7' # 0x0037 -> DIGIT SEVEN u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x0038 -> DIGIT EIGHT u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x0039 -> DIGIT NINE u'9' # 0x39 -> DIGIT NINE
u':' # 0x003a -> COLON u':' # 0x3a -> COLON
u';' # 0x003b -> SEMICOLON u';' # 0x3b -> SEMICOLON
u'<' # 0x003c -> LESS-THAN SIGN u'<' # 0x3c -> LESS-THAN SIGN
u'=' # 0x003d -> EQUALS SIGN u'=' # 0x3d -> EQUALS SIGN
u'>' # 0x003e -> GREATER-THAN SIGN u'>' # 0x3e -> GREATER-THAN SIGN
u'?' # 0x003f -> QUESTION MARK u'?' # 0x3f -> QUESTION MARK
u'@' # 0x0040 -> COMMERCIAL AT u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x0041 -> LATIN CAPITAL LETTER A u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x0042 -> LATIN CAPITAL LETTER B u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x0043 -> LATIN CAPITAL LETTER C u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x0044 -> LATIN CAPITAL LETTER D u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x0045 -> LATIN CAPITAL LETTER E u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x0046 -> LATIN CAPITAL LETTER F u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x0047 -> LATIN CAPITAL LETTER G u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x0048 -> LATIN CAPITAL LETTER H u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x0049 -> LATIN CAPITAL LETTER I u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x004a -> LATIN CAPITAL LETTER J u'J' # 0x4a -> LATIN CAPITAL LETTER J
u'K' # 0x004b -> LATIN CAPITAL LETTER K u'K' # 0x4b -> LATIN CAPITAL LETTER K
u'L' # 0x004c -> LATIN CAPITAL LETTER L u'L' # 0x4c -> LATIN CAPITAL LETTER L
u'M' # 0x004d -> LATIN CAPITAL LETTER M u'M' # 0x4d -> LATIN CAPITAL LETTER M
u'N' # 0x004e -> LATIN CAPITAL LETTER N u'N' # 0x4e -> LATIN CAPITAL LETTER N
u'O' # 0x004f -> LATIN CAPITAL LETTER O u'O' # 0x4f -> LATIN CAPITAL LETTER O
u'P' # 0x0050 -> LATIN CAPITAL LETTER P u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x0052 -> LATIN CAPITAL LETTER R u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x0053 -> LATIN CAPITAL LETTER S u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x0054 -> LATIN CAPITAL LETTER T u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x0055 -> LATIN CAPITAL LETTER U u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x0056 -> LATIN CAPITAL LETTER V u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x0057 -> LATIN CAPITAL LETTER W u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x0058 -> LATIN CAPITAL LETTER X u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z u'Z' # 0x5a -> LATIN CAPITAL LETTER Z
u'[' # 0x005b -> LEFT SQUARE BRACKET u'[' # 0x5b -> LEFT SQUARE BRACKET
u'\\' # 0x005c -> REVERSE SOLIDUS u'\\' # 0x5c -> REVERSE SOLIDUS
u']' # 0x005d -> RIGHT SQUARE BRACKET u']' # 0x5d -> RIGHT SQUARE BRACKET
u'^' # 0x005e -> CIRCUMFLEX ACCENT u'^' # 0x5e -> CIRCUMFLEX ACCENT
u'_' # 0x005f -> LOW LINE u'_' # 0x5f -> LOW LINE
u'`' # 0x0060 -> GRAVE ACCENT u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x0061 -> LATIN SMALL LETTER A u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x0062 -> LATIN SMALL LETTER B u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x0063 -> LATIN SMALL LETTER C u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x0064 -> LATIN SMALL LETTER D u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x0065 -> LATIN SMALL LETTER E u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x0066 -> LATIN SMALL LETTER F u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x0067 -> LATIN SMALL LETTER G u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x0068 -> LATIN SMALL LETTER H u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x0069 -> LATIN SMALL LETTER I u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x006a -> LATIN SMALL LETTER J u'j' # 0x6a -> LATIN SMALL LETTER J
u'k' # 0x006b -> LATIN SMALL LETTER K u'k' # 0x6b -> LATIN SMALL LETTER K
u'l' # 0x006c -> LATIN SMALL LETTER L u'l' # 0x6c -> LATIN SMALL LETTER L
u'm' # 0x006d -> LATIN SMALL LETTER M u'm' # 0x6d -> LATIN SMALL LETTER M
u'n' # 0x006e -> LATIN SMALL LETTER N u'n' # 0x6e -> LATIN SMALL LETTER N
u'o' # 0x006f -> LATIN SMALL LETTER O u'o' # 0x6f -> LATIN SMALL LETTER O
u'p' # 0x0070 -> LATIN SMALL LETTER P u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x0071 -> LATIN SMALL LETTER Q u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x0072 -> LATIN SMALL LETTER R u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x0073 -> LATIN SMALL LETTER S u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x0074 -> LATIN SMALL LETTER T u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x0075 -> LATIN SMALL LETTER U u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x0076 -> LATIN SMALL LETTER V u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x0077 -> LATIN SMALL LETTER W u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x0078 -> LATIN SMALL LETTER X u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x0079 -> LATIN SMALL LETTER Y u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x007a -> LATIN SMALL LETTER Z u'z' # 0x7a -> LATIN SMALL LETTER Z
u'{' # 0x007b -> LEFT CURLY BRACKET u'{' # 0x7b -> LEFT CURLY BRACKET
u'|' # 0x007c -> VERTICAL LINE u'|' # 0x7c -> VERTICAL LINE
u'}' # 0x007d -> RIGHT CURLY BRACKET u'}' # 0x7d -> RIGHT CURLY BRACKET
u'~' # 0x007e -> TILDE u'~' # 0x7e -> TILDE
u'\x7f' # 0x007f -> DELETE u'\x7f' # 0x7f -> DELETE
u'\x80' # 0x0080 -> <control> u'\x80' # 0x80 -> <control>
u'\x81' # 0x0081 -> <control> u'\x81' # 0x81 -> <control>
u'\x82' # 0x0082 -> <control> u'\x82' # 0x82 -> <control>
u'\x83' # 0x0083 -> <control> u'\x83' # 0x83 -> <control>
u'\x84' # 0x0084 -> <control> u'\x84' # 0x84 -> <control>
u'\x85' # 0x0085 -> <control> u'\x85' # 0x85 -> <control>
u'\x86' # 0x0086 -> <control> u'\x86' # 0x86 -> <control>
u'\x87' # 0x0087 -> <control> u'\x87' # 0x87 -> <control>
u'\x88' # 0x0088 -> <control> u'\x88' # 0x88 -> <control>
u'\x89' # 0x0089 -> <control> u'\x89' # 0x89 -> <control>
u'\x8a' # 0x008a -> <control> u'\x8a' # 0x8a -> <control>
u'\x8b' # 0x008b -> <control> u'\x8b' # 0x8b -> <control>
u'\x8c' # 0x008c -> <control> u'\x8c' # 0x8c -> <control>
u'\x8d' # 0x008d -> <control> u'\x8d' # 0x8d -> <control>
u'\x8e' # 0x008e -> <control> u'\x8e' # 0x8e -> <control>
u'\x8f' # 0x008f -> <control> u'\x8f' # 0x8f -> <control>
u'\x90' # 0x0090 -> <control> u'\x90' # 0x90 -> <control>
u'\x91' # 0x0091 -> <control> u'\x91' # 0x91 -> <control>
u'\x92' # 0x0092 -> <control> u'\x92' # 0x92 -> <control>
u'\x93' # 0x0093 -> <control> u'\x93' # 0x93 -> <control>
u'\x94' # 0x0094 -> <control> u'\x94' # 0x94 -> <control>
u'\x95' # 0x0095 -> <control> u'\x95' # 0x95 -> <control>
u'\x96' # 0x0096 -> <control> u'\x96' # 0x96 -> <control>
u'\x97' # 0x0097 -> <control> u'\x97' # 0x97 -> <control>
u'\x98' # 0x0098 -> <control> u'\x98' # 0x98 -> <control>
u'\x99' # 0x0099 -> <control> u'\x99' # 0x99 -> <control>
u'\x9a' # 0x009a -> <control> u'\x9a' # 0x9a -> <control>
u'\x9b' # 0x009b -> <control> u'\x9b' # 0x9b -> <control>
u'\x9c' # 0x009c -> <control> u'\x9c' # 0x9c -> <control>
u'\x9d' # 0x009d -> <control> u'\x9d' # 0x9d -> <control>
u'\x9e' # 0x009e -> <control> u'\x9e' # 0x9e -> <control>
u'\x9f' # 0x009f -> <control> u'\x9f' # 0x9f -> <control>
u'\xa0' # 0x00a0 -> NO-BREAK SPACE u'\xa0' # 0xa0 -> NO-BREAK SPACE
u'\ufffe' u'\ufffe'
u'\xa2' # 0x00a2 -> CENT SIGN u'\xa2' # 0xa2 -> CENT SIGN
u'\xa3' # 0x00a3 -> POUND SIGN u'\xa3' # 0xa3 -> POUND SIGN
u'\xa4' # 0x00a4 -> CURRENCY SIGN u'\xa4' # 0xa4 -> CURRENCY SIGN
u'\xa5' # 0x00a5 -> YEN SIGN u'\xa5' # 0xa5 -> YEN SIGN
u'\xa6' # 0x00a6 -> BROKEN BAR u'\xa6' # 0xa6 -> BROKEN BAR
u'\xa7' # 0x00a7 -> SECTION SIGN u'\xa7' # 0xa7 -> SECTION SIGN
u'\xa8' # 0x00a8 -> DIAERESIS u'\xa8' # 0xa8 -> DIAERESIS
u'\xa9' # 0x00a9 -> COPYRIGHT SIGN u'\xa9' # 0xa9 -> COPYRIGHT SIGN
u'\xd7' # 0x00aa -> MULTIPLICATION SIGN u'\xd7' # 0xaa -> MULTIPLICATION SIGN
u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xac' # 0x00ac -> NOT SIGN u'\xac' # 0xac -> NOT SIGN
u'\xad' # 0x00ad -> SOFT HYPHEN u'\xad' # 0xad -> SOFT HYPHEN
u'\xae' # 0x00ae -> REGISTERED SIGN u'\xae' # 0xae -> REGISTERED SIGN
u'\xaf' # 0x00af -> MACRON u'\xaf' # 0xaf -> MACRON
u'\xb0' # 0x00b0 -> DEGREE SIGN u'\xb0' # 0xb0 -> DEGREE SIGN
u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN u'\xb1' # 0xb1 -> PLUS-MINUS SIGN
u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO u'\xb2' # 0xb2 -> SUPERSCRIPT TWO
u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE u'\xb3' # 0xb3 -> SUPERSCRIPT THREE
u'\xb4' # 0x00b4 -> ACUTE ACCENT u'\xb4' # 0xb4 -> ACUTE ACCENT
u'\xb5' # 0x00b5 -> MICRO SIGN u'\xb5' # 0xb5 -> MICRO SIGN
u'\xb6' # 0x00b6 -> PILCROW SIGN u'\xb6' # 0xb6 -> PILCROW SIGN
u'\xb7' # 0x00b7 -> MIDDLE DOT u'\xb7' # 0xb7 -> MIDDLE DOT
u'\xb8' # 0x00b8 -> CEDILLA u'\xb8' # 0xb8 -> CEDILLA
u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE u'\xb9' # 0xb9 -> SUPERSCRIPT ONE
u'\xf7' # 0x00ba -> DIVISION SIGN u'\xf7' # 0xba -> DIVISION SIGN
u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER u'\xbc' # 0xbc -> VULGAR FRACTION ONE QUARTER
u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF
u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS u'\xbe' # 0xbe -> VULGAR FRACTION THREE QUARTERS
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
@ -328,262 +255,262 @@ decoding_table = (
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\u2017' # 0x00df -> DOUBLE LOW LINE u'\u2017' # 0xdf -> DOUBLE LOW LINE
u'\u05d0' # 0x00e0 -> HEBREW LETTER ALEF u'\u05d0' # 0xe0 -> HEBREW LETTER ALEF
u'\u05d1' # 0x00e1 -> HEBREW LETTER BET u'\u05d1' # 0xe1 -> HEBREW LETTER BET
u'\u05d2' # 0x00e2 -> HEBREW LETTER GIMEL u'\u05d2' # 0xe2 -> HEBREW LETTER GIMEL
u'\u05d3' # 0x00e3 -> HEBREW LETTER DALET u'\u05d3' # 0xe3 -> HEBREW LETTER DALET
u'\u05d4' # 0x00e4 -> HEBREW LETTER HE u'\u05d4' # 0xe4 -> HEBREW LETTER HE
u'\u05d5' # 0x00e5 -> HEBREW LETTER VAV u'\u05d5' # 0xe5 -> HEBREW LETTER VAV
u'\u05d6' # 0x00e6 -> HEBREW LETTER ZAYIN u'\u05d6' # 0xe6 -> HEBREW LETTER ZAYIN
u'\u05d7' # 0x00e7 -> HEBREW LETTER HET u'\u05d7' # 0xe7 -> HEBREW LETTER HET
u'\u05d8' # 0x00e8 -> HEBREW LETTER TET u'\u05d8' # 0xe8 -> HEBREW LETTER TET
u'\u05d9' # 0x00e9 -> HEBREW LETTER YOD u'\u05d9' # 0xe9 -> HEBREW LETTER YOD
u'\u05da' # 0x00ea -> HEBREW LETTER FINAL KAF u'\u05da' # 0xea -> HEBREW LETTER FINAL KAF
u'\u05db' # 0x00eb -> HEBREW LETTER KAF u'\u05db' # 0xeb -> HEBREW LETTER KAF
u'\u05dc' # 0x00ec -> HEBREW LETTER LAMED u'\u05dc' # 0xec -> HEBREW LETTER LAMED
u'\u05dd' # 0x00ed -> HEBREW LETTER FINAL MEM u'\u05dd' # 0xed -> HEBREW LETTER FINAL MEM
u'\u05de' # 0x00ee -> HEBREW LETTER MEM u'\u05de' # 0xee -> HEBREW LETTER MEM
u'\u05df' # 0x00ef -> HEBREW LETTER FINAL NUN u'\u05df' # 0xef -> HEBREW LETTER FINAL NUN
u'\u05e0' # 0x00f0 -> HEBREW LETTER NUN u'\u05e0' # 0xf0 -> HEBREW LETTER NUN
u'\u05e1' # 0x00f1 -> HEBREW LETTER SAMEKH u'\u05e1' # 0xf1 -> HEBREW LETTER SAMEKH
u'\u05e2' # 0x00f2 -> HEBREW LETTER AYIN u'\u05e2' # 0xf2 -> HEBREW LETTER AYIN
u'\u05e3' # 0x00f3 -> HEBREW LETTER FINAL PE u'\u05e3' # 0xf3 -> HEBREW LETTER FINAL PE
u'\u05e4' # 0x00f4 -> HEBREW LETTER PE u'\u05e4' # 0xf4 -> HEBREW LETTER PE
u'\u05e5' # 0x00f5 -> HEBREW LETTER FINAL TSADI u'\u05e5' # 0xf5 -> HEBREW LETTER FINAL TSADI
u'\u05e6' # 0x00f6 -> HEBREW LETTER TSADI u'\u05e6' # 0xf6 -> HEBREW LETTER TSADI
u'\u05e7' # 0x00f7 -> HEBREW LETTER QOF u'\u05e7' # 0xf7 -> HEBREW LETTER QOF
u'\u05e8' # 0x00f8 -> HEBREW LETTER RESH u'\u05e8' # 0xf8 -> HEBREW LETTER RESH
u'\u05e9' # 0x00f9 -> HEBREW LETTER SHIN u'\u05e9' # 0xf9 -> HEBREW LETTER SHIN
u'\u05ea' # 0x00fa -> HEBREW LETTER TAV u'\u05ea' # 0xfa -> HEBREW LETTER TAV
u'\ufffe' u'\ufffe'
u'\ufffe' u'\ufffe'
u'\u200e' # 0x00fd -> LEFT-TO-RIGHT MARK u'\u200e' # 0xfd -> LEFT-TO-RIGHT MARK
u'\u200f' # 0x00fe -> RIGHT-TO-LEFT MARK u'\u200f' # 0xfe -> RIGHT-TO-LEFT MARK
u'\ufffe' u'\ufffe'
) )
### Encoding Map ### Encoding Map
encoding_map = { encoding_map = {
0x0000: 0x0000, # NULL 0x0000: 0x00, # NULL
0x0001: 0x0001, # START OF HEADING 0x0001: 0x01, # START OF HEADING
0x0002: 0x0002, # START OF TEXT 0x0002: 0x02, # START OF TEXT
0x0003: 0x0003, # END OF TEXT 0x0003: 0x03, # END OF TEXT
0x0004: 0x0004, # END OF TRANSMISSION 0x0004: 0x04, # END OF TRANSMISSION
0x0005: 0x0005, # ENQUIRY 0x0005: 0x05, # ENQUIRY
0x0006: 0x0006, # ACKNOWLEDGE 0x0006: 0x06, # ACKNOWLEDGE
0x0007: 0x0007, # BELL 0x0007: 0x07, # BELL
0x0008: 0x0008, # BACKSPACE 0x0008: 0x08, # BACKSPACE
0x0009: 0x0009, # HORIZONTAL TABULATION 0x0009: 0x09, # HORIZONTAL TABULATION
0x000a: 0x000a, # LINE FEED 0x000a: 0x0a, # LINE FEED
0x000b: 0x000b, # VERTICAL TABULATION 0x000b: 0x0b, # VERTICAL TABULATION
0x000c: 0x000c, # FORM FEED 0x000c: 0x0c, # FORM FEED
0x000d: 0x000d, # CARRIAGE RETURN 0x000d: 0x0d, # CARRIAGE RETURN
0x000e: 0x000e, # SHIFT OUT 0x000e: 0x0e, # SHIFT OUT
0x000f: 0x000f, # SHIFT IN 0x000f: 0x0f, # SHIFT IN
0x0010: 0x0010, # DATA LINK ESCAPE 0x0010: 0x10, # DATA LINK ESCAPE
0x0011: 0x0011, # DEVICE CONTROL ONE 0x0011: 0x11, # DEVICE CONTROL ONE
0x0012: 0x0012, # DEVICE CONTROL TWO 0x0012: 0x12, # DEVICE CONTROL TWO
0x0013: 0x0013, # DEVICE CONTROL THREE 0x0013: 0x13, # DEVICE CONTROL THREE
0x0014: 0x0014, # DEVICE CONTROL FOUR 0x0014: 0x14, # DEVICE CONTROL FOUR
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE 0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
0x0016: 0x0016, # SYNCHRONOUS IDLE 0x0016: 0x16, # SYNCHRONOUS IDLE
0x0017: 0x0017, # END OF TRANSMISSION BLOCK 0x0017: 0x17, # END OF TRANSMISSION BLOCK
0x0018: 0x0018, # CANCEL 0x0018: 0x18, # CANCEL
0x0019: 0x0019, # END OF MEDIUM 0x0019: 0x19, # END OF MEDIUM
0x001a: 0x001a, # SUBSTITUTE 0x001a: 0x1a, # SUBSTITUTE
0x001b: 0x001b, # ESCAPE 0x001b: 0x1b, # ESCAPE
0x001c: 0x001c, # FILE SEPARATOR 0x001c: 0x1c, # FILE SEPARATOR
0x001d: 0x001d, # GROUP SEPARATOR 0x001d: 0x1d, # GROUP SEPARATOR
0x001e: 0x001e, # RECORD SEPARATOR 0x001e: 0x1e, # RECORD SEPARATOR
0x001f: 0x001f, # UNIT SEPARATOR 0x001f: 0x1f, # UNIT SEPARATOR
0x0020: 0x0020, # SPACE 0x0020: 0x20, # SPACE
0x0021: 0x0021, # EXCLAMATION MARK 0x0021: 0x21, # EXCLAMATION MARK
0x0022: 0x0022, # QUOTATION MARK 0x0022: 0x22, # QUOTATION MARK
0x0023: 0x0023, # NUMBER SIGN 0x0023: 0x23, # NUMBER SIGN
0x0024: 0x0024, # DOLLAR SIGN 0x0024: 0x24, # DOLLAR SIGN
0x0025: 0x0025, # PERCENT SIGN 0x0025: 0x25, # PERCENT SIGN
0x0026: 0x0026, # AMPERSAND 0x0026: 0x26, # AMPERSAND
0x0027: 0x0027, # APOSTROPHE 0x0027: 0x27, # APOSTROPHE
0x0028: 0x0028, # LEFT PARENTHESIS 0x0028: 0x28, # LEFT PARENTHESIS
0x0029: 0x0029, # RIGHT PARENTHESIS 0x0029: 0x29, # RIGHT PARENTHESIS
0x002a: 0x002a, # ASTERISK 0x002a: 0x2a, # ASTERISK
0x002b: 0x002b, # PLUS SIGN 0x002b: 0x2b, # PLUS SIGN
0x002c: 0x002c, # COMMA 0x002c: 0x2c, # COMMA
0x002d: 0x002d, # HYPHEN-MINUS 0x002d: 0x2d, # HYPHEN-MINUS
0x002e: 0x002e, # FULL STOP 0x002e: 0x2e, # FULL STOP
0x002f: 0x002f, # SOLIDUS 0x002f: 0x2f, # SOLIDUS
0x0030: 0x0030, # DIGIT ZERO 0x0030: 0x30, # DIGIT ZERO
0x0031: 0x0031, # DIGIT ONE 0x0031: 0x31, # DIGIT ONE
0x0032: 0x0032, # DIGIT TWO 0x0032: 0x32, # DIGIT TWO
0x0033: 0x0033, # DIGIT THREE 0x0033: 0x33, # DIGIT THREE
0x0034: 0x0034, # DIGIT FOUR 0x0034: 0x34, # DIGIT FOUR
0x0035: 0x0035, # DIGIT FIVE 0x0035: 0x35, # DIGIT FIVE
0x0036: 0x0036, # DIGIT SIX 0x0036: 0x36, # DIGIT SIX
0x0037: 0x0037, # DIGIT SEVEN 0x0037: 0x37, # DIGIT SEVEN
0x0038: 0x0038, # DIGIT EIGHT 0x0038: 0x38, # DIGIT EIGHT
0x0039: 0x0039, # DIGIT NINE 0x0039: 0x39, # DIGIT NINE
0x003a: 0x003a, # COLON 0x003a: 0x3a, # COLON
0x003b: 0x003b, # SEMICOLON 0x003b: 0x3b, # SEMICOLON
0x003c: 0x003c, # LESS-THAN SIGN 0x003c: 0x3c, # LESS-THAN SIGN
0x003d: 0x003d, # EQUALS SIGN 0x003d: 0x3d, # EQUALS SIGN
0x003e: 0x003e, # GREATER-THAN SIGN 0x003e: 0x3e, # GREATER-THAN SIGN
0x003f: 0x003f, # QUESTION MARK 0x003f: 0x3f, # QUESTION MARK
0x0040: 0x0040, # COMMERCIAL AT 0x0040: 0x40, # COMMERCIAL AT
0x0041: 0x0041, # LATIN CAPITAL LETTER A 0x0041: 0x41, # LATIN CAPITAL LETTER A
0x0042: 0x0042, # LATIN CAPITAL LETTER B 0x0042: 0x42, # LATIN CAPITAL LETTER B
0x0043: 0x0043, # LATIN CAPITAL LETTER C 0x0043: 0x43, # LATIN CAPITAL LETTER C
0x0044: 0x0044, # LATIN CAPITAL LETTER D 0x0044: 0x44, # LATIN CAPITAL LETTER D
0x0045: 0x0045, # LATIN CAPITAL LETTER E 0x0045: 0x45, # LATIN CAPITAL LETTER E
0x0046: 0x0046, # LATIN CAPITAL LETTER F 0x0046: 0x46, # LATIN CAPITAL LETTER F
0x0047: 0x0047, # LATIN CAPITAL LETTER G 0x0047: 0x47, # LATIN CAPITAL LETTER G
0x0048: 0x0048, # LATIN CAPITAL LETTER H 0x0048: 0x48, # LATIN CAPITAL LETTER H
0x0049: 0x0049, # LATIN CAPITAL LETTER I 0x0049: 0x49, # LATIN CAPITAL LETTER I
0x004a: 0x004a, # LATIN CAPITAL LETTER J 0x004a: 0x4a, # LATIN CAPITAL LETTER J
0x004b: 0x004b, # LATIN CAPITAL LETTER K 0x004b: 0x4b, # LATIN CAPITAL LETTER K
0x004c: 0x004c, # LATIN CAPITAL LETTER L 0x004c: 0x4c, # LATIN CAPITAL LETTER L
0x004d: 0x004d, # LATIN CAPITAL LETTER M 0x004d: 0x4d, # LATIN CAPITAL LETTER M
0x004e: 0x004e, # LATIN CAPITAL LETTER N 0x004e: 0x4e, # LATIN CAPITAL LETTER N
0x004f: 0x004f, # LATIN CAPITAL LETTER O 0x004f: 0x4f, # LATIN CAPITAL LETTER O
0x0050: 0x0050, # LATIN CAPITAL LETTER P 0x0050: 0x50, # LATIN CAPITAL LETTER P
0x0051: 0x0051, # LATIN CAPITAL LETTER Q 0x0051: 0x51, # LATIN CAPITAL LETTER Q
0x0052: 0x0052, # LATIN CAPITAL LETTER R 0x0052: 0x52, # LATIN CAPITAL LETTER R
0x0053: 0x0053, # LATIN CAPITAL LETTER S 0x0053: 0x53, # LATIN CAPITAL LETTER S
0x0054: 0x0054, # LATIN CAPITAL LETTER T 0x0054: 0x54, # LATIN CAPITAL LETTER T
0x0055: 0x0055, # LATIN CAPITAL LETTER U 0x0055: 0x55, # LATIN CAPITAL LETTER U
0x0056: 0x0056, # LATIN CAPITAL LETTER V 0x0056: 0x56, # LATIN CAPITAL LETTER V
0x0057: 0x0057, # LATIN CAPITAL LETTER W 0x0057: 0x57, # LATIN CAPITAL LETTER W
0x0058: 0x0058, # LATIN CAPITAL LETTER X 0x0058: 0x58, # LATIN CAPITAL LETTER X
0x0059: 0x0059, # LATIN CAPITAL LETTER Y 0x0059: 0x59, # LATIN CAPITAL LETTER Y
0x005a: 0x005a, # LATIN CAPITAL LETTER Z 0x005a: 0x5a, # LATIN CAPITAL LETTER Z
0x005b: 0x005b, # LEFT SQUARE BRACKET 0x005b: 0x5b, # LEFT SQUARE BRACKET
0x005c: 0x005c, # REVERSE SOLIDUS 0x005c: 0x5c, # REVERSE SOLIDUS
0x005d: 0x005d, # RIGHT SQUARE BRACKET 0x005d: 0x5d, # RIGHT SQUARE BRACKET
0x005e: 0x005e, # CIRCUMFLEX ACCENT 0x005e: 0x5e, # CIRCUMFLEX ACCENT
0x005f: 0x005f, # LOW LINE 0x005f: 0x5f, # LOW LINE
0x0060: 0x0060, # GRAVE ACCENT 0x0060: 0x60, # GRAVE ACCENT
0x0061: 0x0061, # LATIN SMALL LETTER A 0x0061: 0x61, # LATIN SMALL LETTER A
0x0062: 0x0062, # LATIN SMALL LETTER B 0x0062: 0x62, # LATIN SMALL LETTER B
0x0063: 0x0063, # LATIN SMALL LETTER C 0x0063: 0x63, # LATIN SMALL LETTER C
0x0064: 0x0064, # LATIN SMALL LETTER D 0x0064: 0x64, # LATIN SMALL LETTER D
0x0065: 0x0065, # LATIN SMALL LETTER E 0x0065: 0x65, # LATIN SMALL LETTER E
0x0066: 0x0066, # LATIN SMALL LETTER F 0x0066: 0x66, # LATIN SMALL LETTER F
0x0067: 0x0067, # LATIN SMALL LETTER G 0x0067: 0x67, # LATIN SMALL LETTER G
0x0068: 0x0068, # LATIN SMALL LETTER H 0x0068: 0x68, # LATIN SMALL LETTER H
0x0069: 0x0069, # LATIN SMALL LETTER I 0x0069: 0x69, # LATIN SMALL LETTER I
0x006a: 0x006a, # LATIN SMALL LETTER J 0x006a: 0x6a, # LATIN SMALL LETTER J
0x006b: 0x006b, # LATIN SMALL LETTER K 0x006b: 0x6b, # LATIN SMALL LETTER K
0x006c: 0x006c, # LATIN SMALL LETTER L 0x006c: 0x6c, # LATIN SMALL LETTER L
0x006d: 0x006d, # LATIN SMALL LETTER M 0x006d: 0x6d, # LATIN SMALL LETTER M
0x006e: 0x006e, # LATIN SMALL LETTER N 0x006e: 0x6e, # LATIN SMALL LETTER N
0x006f: 0x006f, # LATIN SMALL LETTER O 0x006f: 0x6f, # LATIN SMALL LETTER O
0x0070: 0x0070, # LATIN SMALL LETTER P 0x0070: 0x70, # LATIN SMALL LETTER P
0x0071: 0x0071, # LATIN SMALL LETTER Q 0x0071: 0x71, # LATIN SMALL LETTER Q
0x0072: 0x0072, # LATIN SMALL LETTER R 0x0072: 0x72, # LATIN SMALL LETTER R
0x0073: 0x0073, # LATIN SMALL LETTER S 0x0073: 0x73, # LATIN SMALL LETTER S
0x0074: 0x0074, # LATIN SMALL LETTER T 0x0074: 0x74, # LATIN SMALL LETTER T
0x0075: 0x0075, # LATIN SMALL LETTER U 0x0075: 0x75, # LATIN SMALL LETTER U
0x0076: 0x0076, # LATIN SMALL LETTER V 0x0076: 0x76, # LATIN SMALL LETTER V
0x0077: 0x0077, # LATIN SMALL LETTER W 0x0077: 0x77, # LATIN SMALL LETTER W
0x0078: 0x0078, # LATIN SMALL LETTER X 0x0078: 0x78, # LATIN SMALL LETTER X
0x0079: 0x0079, # LATIN SMALL LETTER Y 0x0079: 0x79, # LATIN SMALL LETTER Y
0x007a: 0x007a, # LATIN SMALL LETTER Z 0x007a: 0x7a, # LATIN SMALL LETTER Z
0x007b: 0x007b, # LEFT CURLY BRACKET 0x007b: 0x7b, # LEFT CURLY BRACKET
0x007c: 0x007c, # VERTICAL LINE 0x007c: 0x7c, # VERTICAL LINE
0x007d: 0x007d, # RIGHT CURLY BRACKET 0x007d: 0x7d, # RIGHT CURLY BRACKET
0x007e: 0x007e, # TILDE 0x007e: 0x7e, # TILDE
0x007f: 0x007f, # DELETE 0x007f: 0x7f, # DELETE
0x0080: 0x0080, # <control> 0x0080: 0x80, # <control>
0x0081: 0x0081, # <control> 0x0081: 0x81, # <control>
0x0082: 0x0082, # <control> 0x0082: 0x82, # <control>
0x0083: 0x0083, # <control> 0x0083: 0x83, # <control>
0x0084: 0x0084, # <control> 0x0084: 0x84, # <control>
0x0085: 0x0085, # <control> 0x0085: 0x85, # <control>
0x0086: 0x0086, # <control> 0x0086: 0x86, # <control>
0x0087: 0x0087, # <control> 0x0087: 0x87, # <control>
0x0088: 0x0088, # <control> 0x0088: 0x88, # <control>
0x0089: 0x0089, # <control> 0x0089: 0x89, # <control>
0x008a: 0x008a, # <control> 0x008a: 0x8a, # <control>
0x008b: 0x008b, # <control> 0x008b: 0x8b, # <control>
0x008c: 0x008c, # <control> 0x008c: 0x8c, # <control>
0x008d: 0x008d, # <control> 0x008d: 0x8d, # <control>
0x008e: 0x008e, # <control> 0x008e: 0x8e, # <control>
0x008f: 0x008f, # <control> 0x008f: 0x8f, # <control>
0x0090: 0x0090, # <control> 0x0090: 0x90, # <control>
0x0091: 0x0091, # <control> 0x0091: 0x91, # <control>
0x0092: 0x0092, # <control> 0x0092: 0x92, # <control>
0x0093: 0x0093, # <control> 0x0093: 0x93, # <control>
0x0094: 0x0094, # <control> 0x0094: 0x94, # <control>
0x0095: 0x0095, # <control> 0x0095: 0x95, # <control>
0x0096: 0x0096, # <control> 0x0096: 0x96, # <control>
0x0097: 0x0097, # <control> 0x0097: 0x97, # <control>
0x0098: 0x0098, # <control> 0x0098: 0x98, # <control>
0x0099: 0x0099, # <control> 0x0099: 0x99, # <control>
0x009a: 0x009a, # <control> 0x009a: 0x9a, # <control>
0x009b: 0x009b, # <control> 0x009b: 0x9b, # <control>
0x009c: 0x009c, # <control> 0x009c: 0x9c, # <control>
0x009d: 0x009d, # <control> 0x009d: 0x9d, # <control>
0x009e: 0x009e, # <control> 0x009e: 0x9e, # <control>
0x009f: 0x009f, # <control> 0x009f: 0x9f, # <control>
0x00a0: 0x00a0, # NO-BREAK SPACE 0x00a0: 0xa0, # NO-BREAK SPACE
0x00a2: 0x00a2, # CENT SIGN 0x00a2: 0xa2, # CENT SIGN
0x00a3: 0x00a3, # POUND SIGN 0x00a3: 0xa3, # POUND SIGN
0x00a4: 0x00a4, # CURRENCY SIGN 0x00a4: 0xa4, # CURRENCY SIGN
0x00a5: 0x00a5, # YEN SIGN 0x00a5: 0xa5, # YEN SIGN
0x00a6: 0x00a6, # BROKEN BAR 0x00a6: 0xa6, # BROKEN BAR
0x00a7: 0x00a7, # SECTION SIGN 0x00a7: 0xa7, # SECTION SIGN
0x00a8: 0x00a8, # DIAERESIS 0x00a8: 0xa8, # DIAERESIS
0x00a9: 0x00a9, # COPYRIGHT SIGN 0x00a9: 0xa9, # COPYRIGHT SIGN
0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00ac: 0x00ac, # NOT SIGN 0x00ac: 0xac, # NOT SIGN
0x00ad: 0x00ad, # SOFT HYPHEN 0x00ad: 0xad, # SOFT HYPHEN
0x00ae: 0x00ae, # REGISTERED SIGN 0x00ae: 0xae, # REGISTERED SIGN
0x00af: 0x00af, # MACRON 0x00af: 0xaf, # MACRON
0x00b0: 0x00b0, # DEGREE SIGN 0x00b0: 0xb0, # DEGREE SIGN
0x00b1: 0x00b1, # PLUS-MINUS SIGN 0x00b1: 0xb1, # PLUS-MINUS SIGN
0x00b2: 0x00b2, # SUPERSCRIPT TWO 0x00b2: 0xb2, # SUPERSCRIPT TWO
0x00b3: 0x00b3, # SUPERSCRIPT THREE 0x00b3: 0xb3, # SUPERSCRIPT THREE
0x00b4: 0x00b4, # ACUTE ACCENT 0x00b4: 0xb4, # ACUTE ACCENT
0x00b5: 0x00b5, # MICRO SIGN 0x00b5: 0xb5, # MICRO SIGN
0x00b6: 0x00b6, # PILCROW SIGN 0x00b6: 0xb6, # PILCROW SIGN
0x00b7: 0x00b7, # MIDDLE DOT 0x00b7: 0xb7, # MIDDLE DOT
0x00b8: 0x00b8, # CEDILLA 0x00b8: 0xb8, # CEDILLA
0x00b9: 0x00b9, # SUPERSCRIPT ONE 0x00b9: 0xb9, # SUPERSCRIPT ONE
0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER 0x00bc: 0xbc, # VULGAR FRACTION ONE QUARTER
0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF 0x00bd: 0xbd, # VULGAR FRACTION ONE HALF
0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS 0x00be: 0xbe, # VULGAR FRACTION THREE QUARTERS
0x00d7: 0x00aa, # MULTIPLICATION SIGN 0x00d7: 0xaa, # MULTIPLICATION SIGN
0x00f7: 0x00ba, # DIVISION SIGN 0x00f7: 0xba, # DIVISION SIGN
0x05d0: 0x00e0, # HEBREW LETTER ALEF 0x05d0: 0xe0, # HEBREW LETTER ALEF
0x05d1: 0x00e1, # HEBREW LETTER BET 0x05d1: 0xe1, # HEBREW LETTER BET
0x05d2: 0x00e2, # HEBREW LETTER GIMEL 0x05d2: 0xe2, # HEBREW LETTER GIMEL
0x05d3: 0x00e3, # HEBREW LETTER DALET 0x05d3: 0xe3, # HEBREW LETTER DALET
0x05d4: 0x00e4, # HEBREW LETTER HE 0x05d4: 0xe4, # HEBREW LETTER HE
0x05d5: 0x00e5, # HEBREW LETTER VAV 0x05d5: 0xe5, # HEBREW LETTER VAV
0x05d6: 0x00e6, # HEBREW LETTER ZAYIN 0x05d6: 0xe6, # HEBREW LETTER ZAYIN
0x05d7: 0x00e7, # HEBREW LETTER HET 0x05d7: 0xe7, # HEBREW LETTER HET
0x05d8: 0x00e8, # HEBREW LETTER TET 0x05d8: 0xe8, # HEBREW LETTER TET
0x05d9: 0x00e9, # HEBREW LETTER YOD 0x05d9: 0xe9, # HEBREW LETTER YOD
0x05da: 0x00ea, # HEBREW LETTER FINAL KAF 0x05da: 0xea, # HEBREW LETTER FINAL KAF
0x05db: 0x00eb, # HEBREW LETTER KAF 0x05db: 0xeb, # HEBREW LETTER KAF
0x05dc: 0x00ec, # HEBREW LETTER LAMED 0x05dc: 0xec, # HEBREW LETTER LAMED
0x05dd: 0x00ed, # HEBREW LETTER FINAL MEM 0x05dd: 0xed, # HEBREW LETTER FINAL MEM
0x05de: 0x00ee, # HEBREW LETTER MEM 0x05de: 0xee, # HEBREW LETTER MEM
0x05df: 0x00ef, # HEBREW LETTER FINAL NUN 0x05df: 0xef, # HEBREW LETTER FINAL NUN
0x05e0: 0x00f0, # HEBREW LETTER NUN 0x05e0: 0xf0, # HEBREW LETTER NUN
0x05e1: 0x00f1, # HEBREW LETTER SAMEKH 0x05e1: 0xf1, # HEBREW LETTER SAMEKH
0x05e2: 0x00f2, # HEBREW LETTER AYIN 0x05e2: 0xf2, # HEBREW LETTER AYIN
0x05e3: 0x00f3, # HEBREW LETTER FINAL PE 0x05e3: 0xf3, # HEBREW LETTER FINAL PE
0x05e4: 0x00f4, # HEBREW LETTER PE 0x05e4: 0xf4, # HEBREW LETTER PE
0x05e5: 0x00f5, # HEBREW LETTER FINAL TSADI 0x05e5: 0xf5, # HEBREW LETTER FINAL TSADI
0x05e6: 0x00f6, # HEBREW LETTER TSADI 0x05e6: 0xf6, # HEBREW LETTER TSADI
0x05e7: 0x00f7, # HEBREW LETTER QOF 0x05e7: 0xf7, # HEBREW LETTER QOF
0x05e8: 0x00f8, # HEBREW LETTER RESH 0x05e8: 0xf8, # HEBREW LETTER RESH
0x05e9: 0x00f9, # HEBREW LETTER SHIN 0x05e9: 0xf9, # HEBREW LETTER SHIN
0x05ea: 0x00fa, # HEBREW LETTER TAV 0x05ea: 0xfa, # HEBREW LETTER TAV
0x200e: 0x00fd, # LEFT-TO-RIGHT MARK 0x200e: 0xfd, # LEFT-TO-RIGHT MARK
0x200f: 0x00fe, # RIGHT-TO-LEFT MARK 0x200f: 0xfe, # RIGHT-TO-LEFT MARK
0x2017: 0x00df, # DOUBLE LOW LINE 0x2017: 0xdf, # DOUBLE LOW LINE
} }

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,15 +1,8 @@
""" Python Character Mapping Codec for KOI8U. """ Python Character Mapping Codec generated from 'python-mappings/KOI8-U.TXT' with gencodec.py.
This character scheme is compliant to RFC2319
Written by Marc-Andre Lemburg (mal@lemburg.com).
Modified by Maxim Dzumanenko <mvd@mylinux.com.ua>.
(c) Copyright 2002, Python Software Foundation.
"""#" """#"
import codecs, koi8_r import codecs
### Codec APIs ### Codec APIs
@ -21,7 +14,7 @@ class Codec(codecs.Codec):
def decode(self,input,errors='strict'): def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map) return codecs.charmap_decode(input,errors,decoding_table)
class StreamWriter(Codec,codecs.StreamWriter): class StreamWriter(Codec,codecs.StreamWriter):
pass pass
@ -35,20 +28,525 @@ def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter) return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = koi8_r.decoding_map.copy() ### Decoding Table
decoding_map.update({
0x00a4: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE decoding_table = (
0x00a6: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I u'\x00' # 0x00 -> NULL
0x00a7: 0x0457, # CYRILLIC SMALL LETTER YI (UKRAINIAN) u'\x01' # 0x01 -> START OF HEADING
0x00ad: 0x0491, # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN u'\x02' # 0x02 -> START OF TEXT
0x00b4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE u'\x03' # 0x03 -> END OF TEXT
0x00b6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I u'\x04' # 0x04 -> END OF TRANSMISSION
0x00b7: 0x0407, # CYRILLIC CAPITAL LETTER YI (UKRAINIAN) u'\x05' # 0x05 -> ENQUIRY
0x00bd: 0x0490, # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN u'\x06' # 0x06 -> ACKNOWLEDGE
}) u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0a -> LINE FEED
u'\x0b' # 0x0b -> VERTICAL TABULATION
u'\x0c' # 0x0c -> FORM FEED
u'\r' # 0x0d -> CARRIAGE RETURN
u'\x0e' # 0x0e -> SHIFT OUT
u'\x0f' # 0x0f -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1a -> SUBSTITUTE
u'\x1b' # 0x1b -> ESCAPE
u'\x1c' # 0x1c -> FILE SEPARATOR
u'\x1d' # 0x1d -> GROUP SEPARATOR
u'\x1e' # 0x1e -> RECORD SEPARATOR
u'\x1f' # 0x1f -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2a -> ASTERISK
u'+' # 0x2b -> PLUS SIGN
u',' # 0x2c -> COMMA
u'-' # 0x2d -> HYPHEN-MINUS
u'.' # 0x2e -> FULL STOP
u'/' # 0x2f -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3a -> COLON
u';' # 0x3b -> SEMICOLON
u'<' # 0x3c -> LESS-THAN SIGN
u'=' # 0x3d -> EQUALS SIGN
u'>' # 0x3e -> GREATER-THAN SIGN
u'?' # 0x3f -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4a -> LATIN CAPITAL LETTER J
u'K' # 0x4b -> LATIN CAPITAL LETTER K
u'L' # 0x4c -> LATIN CAPITAL LETTER L
u'M' # 0x4d -> LATIN CAPITAL LETTER M
u'N' # 0x4e -> LATIN CAPITAL LETTER N
u'O' # 0x4f -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5a -> LATIN CAPITAL LETTER Z
u'[' # 0x5b -> LEFT SQUARE BRACKET
u'\\' # 0x5c -> REVERSE SOLIDUS
u']' # 0x5d -> RIGHT SQUARE BRACKET
u'^' # 0x5e -> CIRCUMFLEX ACCENT
u'_' # 0x5f -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6a -> LATIN SMALL LETTER J
u'k' # 0x6b -> LATIN SMALL LETTER K
u'l' # 0x6c -> LATIN SMALL LETTER L
u'm' # 0x6d -> LATIN SMALL LETTER M
u'n' # 0x6e -> LATIN SMALL LETTER N
u'o' # 0x6f -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7a -> LATIN SMALL LETTER Z
u'{' # 0x7b -> LEFT CURLY BRACKET
u'|' # 0x7c -> VERTICAL LINE
u'}' # 0x7d -> RIGHT CURLY BRACKET
u'~' # 0x7e -> TILDE
u'\x7f' # 0x7f -> DELETE
u'\u2500' # 0x80 -> BOX DRAWINGS LIGHT HORIZONTAL
u'\u2502' # 0x81 -> BOX DRAWINGS LIGHT VERTICAL
u'\u250c' # 0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT
u'\u2510' # 0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT
u'\u2514' # 0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT
u'\u2518' # 0x85 -> BOX DRAWINGS LIGHT UP AND LEFT
u'\u251c' # 0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
u'\u2524' # 0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
u'\u252c' # 0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
u'\u2534' # 0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
u'\u253c' # 0x8a -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
u'\u2580' # 0x8b -> UPPER HALF BLOCK
u'\u2584' # 0x8c -> LOWER HALF BLOCK
u'\u2588' # 0x8d -> FULL BLOCK
u'\u258c' # 0x8e -> LEFT HALF BLOCK
u'\u2590' # 0x8f -> RIGHT HALF BLOCK
u'\u2591' # 0x90 -> LIGHT SHADE
u'\u2592' # 0x91 -> MEDIUM SHADE
u'\u2593' # 0x92 -> DARK SHADE
u'\u2320' # 0x93 -> TOP HALF INTEGRAL
u'\u25a0' # 0x94 -> BLACK SQUARE
u'\u2219' # 0x95 -> BULLET OPERATOR
u'\u221a' # 0x96 -> SQUARE ROOT
u'\u2248' # 0x97 -> ALMOST EQUAL TO
u'\u2264' # 0x98 -> LESS-THAN OR EQUAL TO
u'\u2265' # 0x99 -> GREATER-THAN OR EQUAL TO
u'\xa0' # 0x9a -> NO-BREAK SPACE
u'\u2321' # 0x9b -> BOTTOM HALF INTEGRAL
u'\xb0' # 0x9c -> DEGREE SIGN
u'\xb2' # 0x9d -> SUPERSCRIPT TWO
u'\xb7' # 0x9e -> MIDDLE DOT
u'\xf7' # 0x9f -> DIVISION SIGN
u'\u2550' # 0xa0 -> BOX DRAWINGS DOUBLE HORIZONTAL
u'\u2551' # 0xa1 -> BOX DRAWINGS DOUBLE VERTICAL
u'\u2552' # 0xa2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
u'\u0451' # 0xa3 -> CYRILLIC SMALL LETTER IO
u'\u0454' # 0xa4 -> CYRILLIC SMALL LETTER UKRAINIAN IE
u'\u2554' # 0xa5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
u'\u0456' # 0xa6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
u'\u0457' # 0xa7 -> CYRILLIC SMALL LETTER YI (UKRAINIAN)
u'\u2557' # 0xa8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT
u'\u2558' # 0xa9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
u'\u2559' # 0xaa -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
u'\u255a' # 0xab -> BOX DRAWINGS DOUBLE UP AND RIGHT
u'\u255b' # 0xac -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
u'\u0491' # 0xad -> CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN
u'\u255d' # 0xae -> BOX DRAWINGS DOUBLE UP AND LEFT
u'\u255e' # 0xaf -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
u'\u255f' # 0xb0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
u'\u2560' # 0xb1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
u'\u2561' # 0xb2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
u'\u0401' # 0xb3 -> CYRILLIC CAPITAL LETTER IO
u'\u0404' # 0xb4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
u'\u2563' # 0xb5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
u'\u0406' # 0xb6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
u'\u0407' # 0xb7 -> CYRILLIC CAPITAL LETTER YI (UKRAINIAN)
u'\u2566' # 0xb8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
u'\u2567' # 0xb9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
u'\u2568' # 0xba -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
u'\u2569' # 0xbb -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
u'\u256a' # 0xbc -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
u'\u0490' # 0xbd -> CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN
u'\u256c' # 0xbe -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
u'\xa9' # 0xbf -> COPYRIGHT SIGN
u'\u044e' # 0xc0 -> CYRILLIC SMALL LETTER YU
u'\u0430' # 0xc1 -> CYRILLIC SMALL LETTER A
u'\u0431' # 0xc2 -> CYRILLIC SMALL LETTER BE
u'\u0446' # 0xc3 -> CYRILLIC SMALL LETTER TSE
u'\u0434' # 0xc4 -> CYRILLIC SMALL LETTER DE
u'\u0435' # 0xc5 -> CYRILLIC SMALL LETTER IE
u'\u0444' # 0xc6 -> CYRILLIC SMALL LETTER EF
u'\u0433' # 0xc7 -> CYRILLIC SMALL LETTER GHE
u'\u0445' # 0xc8 -> CYRILLIC SMALL LETTER HA
u'\u0438' # 0xc9 -> CYRILLIC SMALL LETTER I
u'\u0439' # 0xca -> CYRILLIC SMALL LETTER SHORT I
u'\u043a' # 0xcb -> CYRILLIC SMALL LETTER KA
u'\u043b' # 0xcc -> CYRILLIC SMALL LETTER EL
u'\u043c' # 0xcd -> CYRILLIC SMALL LETTER EM
u'\u043d' # 0xce -> CYRILLIC SMALL LETTER EN
u'\u043e' # 0xcf -> CYRILLIC SMALL LETTER O
u'\u043f' # 0xd0 -> CYRILLIC SMALL LETTER PE
u'\u044f' # 0xd1 -> CYRILLIC SMALL LETTER YA
u'\u0440' # 0xd2 -> CYRILLIC SMALL LETTER ER
u'\u0441' # 0xd3 -> CYRILLIC SMALL LETTER ES
u'\u0442' # 0xd4 -> CYRILLIC SMALL LETTER TE
u'\u0443' # 0xd5 -> CYRILLIC SMALL LETTER U
u'\u0436' # 0xd6 -> CYRILLIC SMALL LETTER ZHE
u'\u0432' # 0xd7 -> CYRILLIC SMALL LETTER VE
u'\u044c' # 0xd8 -> CYRILLIC SMALL LETTER SOFT SIGN
u'\u044b' # 0xd9 -> CYRILLIC SMALL LETTER YERU
u'\u0437' # 0xda -> CYRILLIC SMALL LETTER ZE
u'\u0448' # 0xdb -> CYRILLIC SMALL LETTER SHA
u'\u044d' # 0xdc -> CYRILLIC SMALL LETTER E
u'\u0449' # 0xdd -> CYRILLIC SMALL LETTER SHCHA
u'\u0447' # 0xde -> CYRILLIC SMALL LETTER CHE
u'\u044a' # 0xdf -> CYRILLIC SMALL LETTER HARD SIGN
u'\u042e' # 0xe0 -> CYRILLIC CAPITAL LETTER YU
u'\u0410' # 0xe1 -> CYRILLIC CAPITAL LETTER A
u'\u0411' # 0xe2 -> CYRILLIC CAPITAL LETTER BE
u'\u0426' # 0xe3 -> CYRILLIC CAPITAL LETTER TSE
u'\u0414' # 0xe4 -> CYRILLIC CAPITAL LETTER DE
u'\u0415' # 0xe5 -> CYRILLIC CAPITAL LETTER IE
u'\u0424' # 0xe6 -> CYRILLIC CAPITAL LETTER EF
u'\u0413' # 0xe7 -> CYRILLIC CAPITAL LETTER GHE
u'\u0425' # 0xe8 -> CYRILLIC CAPITAL LETTER HA
u'\u0418' # 0xe9 -> CYRILLIC CAPITAL LETTER I
u'\u0419' # 0xea -> CYRILLIC CAPITAL LETTER SHORT I
u'\u041a' # 0xeb -> CYRILLIC CAPITAL LETTER KA
u'\u041b' # 0xec -> CYRILLIC CAPITAL LETTER EL
u'\u041c' # 0xed -> CYRILLIC CAPITAL LETTER EM
u'\u041d' # 0xee -> CYRILLIC CAPITAL LETTER EN
u'\u041e' # 0xef -> CYRILLIC CAPITAL LETTER O
u'\u041f' # 0xf0 -> CYRILLIC CAPITAL LETTER PE
u'\u042f' # 0xf1 -> CYRILLIC CAPITAL LETTER YA
u'\u0420' # 0xf2 -> CYRILLIC CAPITAL LETTER ER
u'\u0421' # 0xf3 -> CYRILLIC CAPITAL LETTER ES
u'\u0422' # 0xf4 -> CYRILLIC CAPITAL LETTER TE
u'\u0423' # 0xf5 -> CYRILLIC CAPITAL LETTER U
u'\u0416' # 0xf6 -> CYRILLIC CAPITAL LETTER ZHE
u'\u0412' # 0xf7 -> CYRILLIC CAPITAL LETTER VE
u'\u042c' # 0xf8 -> CYRILLIC CAPITAL LETTER SOFT SIGN
u'\u042b' # 0xf9 -> CYRILLIC CAPITAL LETTER YERU
u'\u0417' # 0xfa -> CYRILLIC CAPITAL LETTER ZE
u'\u0428' # 0xfb -> CYRILLIC CAPITAL LETTER SHA
u'\u042d' # 0xfc -> CYRILLIC CAPITAL LETTER E
u'\u0429' # 0xfd -> CYRILLIC CAPITAL LETTER SHCHA
u'\u0427' # 0xfe -> CYRILLIC CAPITAL LETTER CHE
u'\u042a' # 0xff -> CYRILLIC CAPITAL LETTER HARD SIGN
)
### Encoding Map ### Encoding Map
encoding_map = codecs.make_encoding_map(decoding_map) encoding_map = {
0x0000: 0x00, # NULL
0x0001: 0x01, # START OF HEADING
0x0002: 0x02, # START OF TEXT
0x0003: 0x03, # END OF TEXT
0x0004: 0x04, # END OF TRANSMISSION
0x0005: 0x05, # ENQUIRY
0x0006: 0x06, # ACKNOWLEDGE
0x0007: 0x07, # BELL
0x0008: 0x08, # BACKSPACE
0x0009: 0x09, # HORIZONTAL TABULATION
0x000a: 0x0a, # LINE FEED
0x000b: 0x0b, # VERTICAL TABULATION
0x000c: 0x0c, # FORM FEED
0x000d: 0x0d, # CARRIAGE RETURN
0x000e: 0x0e, # SHIFT OUT
0x000f: 0x0f, # SHIFT IN
0x0010: 0x10, # DATA LINK ESCAPE
0x0011: 0x11, # DEVICE CONTROL ONE
0x0012: 0x12, # DEVICE CONTROL TWO
0x0013: 0x13, # DEVICE CONTROL THREE
0x0014: 0x14, # DEVICE CONTROL FOUR
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
0x0016: 0x16, # SYNCHRONOUS IDLE
0x0017: 0x17, # END OF TRANSMISSION BLOCK
0x0018: 0x18, # CANCEL
0x0019: 0x19, # END OF MEDIUM
0x001a: 0x1a, # SUBSTITUTE
0x001b: 0x1b, # ESCAPE
0x001c: 0x1c, # FILE SEPARATOR
0x001d: 0x1d, # GROUP SEPARATOR
0x001e: 0x1e, # RECORD SEPARATOR
0x001f: 0x1f, # UNIT SEPARATOR
0x0020: 0x20, # SPACE
0x0021: 0x21, # EXCLAMATION MARK
0x0022: 0x22, # QUOTATION MARK
0x0023: 0x23, # NUMBER SIGN
0x0024: 0x24, # DOLLAR SIGN
0x0025: 0x25, # PERCENT SIGN
0x0026: 0x26, # AMPERSAND
0x0027: 0x27, # APOSTROPHE
0x0028: 0x28, # LEFT PARENTHESIS
0x0029: 0x29, # RIGHT PARENTHESIS
0x002a: 0x2a, # ASTERISK
0x002b: 0x2b, # PLUS SIGN
0x002c: 0x2c, # COMMA
0x002d: 0x2d, # HYPHEN-MINUS
0x002e: 0x2e, # FULL STOP
0x002f: 0x2f, # SOLIDUS
0x0030: 0x30, # DIGIT ZERO
0x0031: 0x31, # DIGIT ONE
0x0032: 0x32, # DIGIT TWO
0x0033: 0x33, # DIGIT THREE
0x0034: 0x34, # DIGIT FOUR
0x0035: 0x35, # DIGIT FIVE
0x0036: 0x36, # DIGIT SIX
0x0037: 0x37, # DIGIT SEVEN
0x0038: 0x38, # DIGIT EIGHT
0x0039: 0x39, # DIGIT NINE
0x003a: 0x3a, # COLON
0x003b: 0x3b, # SEMICOLON
0x003c: 0x3c, # LESS-THAN SIGN
0x003d: 0x3d, # EQUALS SIGN
0x003e: 0x3e, # GREATER-THAN SIGN
0x003f: 0x3f, # QUESTION MARK
0x0040: 0x40, # COMMERCIAL AT
0x0041: 0x41, # LATIN CAPITAL LETTER A
0x0042: 0x42, # LATIN CAPITAL LETTER B
0x0043: 0x43, # LATIN CAPITAL LETTER C
0x0044: 0x44, # LATIN CAPITAL LETTER D
0x0045: 0x45, # LATIN CAPITAL LETTER E
0x0046: 0x46, # LATIN CAPITAL LETTER F
0x0047: 0x47, # LATIN CAPITAL LETTER G
0x0048: 0x48, # LATIN CAPITAL LETTER H
0x0049: 0x49, # LATIN CAPITAL LETTER I
0x004a: 0x4a, # LATIN CAPITAL LETTER J
0x004b: 0x4b, # LATIN CAPITAL LETTER K
0x004c: 0x4c, # LATIN CAPITAL LETTER L
0x004d: 0x4d, # LATIN CAPITAL LETTER M
0x004e: 0x4e, # LATIN CAPITAL LETTER N
0x004f: 0x4f, # LATIN CAPITAL LETTER O
0x0050: 0x50, # LATIN CAPITAL LETTER P
0x0051: 0x51, # LATIN CAPITAL LETTER Q
0x0052: 0x52, # LATIN CAPITAL LETTER R
0x0053: 0x53, # LATIN CAPITAL LETTER S
0x0054: 0x54, # LATIN CAPITAL LETTER T
0x0055: 0x55, # LATIN CAPITAL LETTER U
0x0056: 0x56, # LATIN CAPITAL LETTER V
0x0057: 0x57, # LATIN CAPITAL LETTER W
0x0058: 0x58, # LATIN CAPITAL LETTER X
0x0059: 0x59, # LATIN CAPITAL LETTER Y
0x005a: 0x5a, # LATIN CAPITAL LETTER Z
0x005b: 0x5b, # LEFT SQUARE BRACKET
0x005c: 0x5c, # REVERSE SOLIDUS
0x005d: 0x5d, # RIGHT SQUARE BRACKET
0x005e: 0x5e, # CIRCUMFLEX ACCENT
0x005f: 0x5f, # LOW LINE
0x0060: 0x60, # GRAVE ACCENT
0x0061: 0x61, # LATIN SMALL LETTER A
0x0062: 0x62, # LATIN SMALL LETTER B
0x0063: 0x63, # LATIN SMALL LETTER C
0x0064: 0x64, # LATIN SMALL LETTER D
0x0065: 0x65, # LATIN SMALL LETTER E
0x0066: 0x66, # LATIN SMALL LETTER F
0x0067: 0x67, # LATIN SMALL LETTER G
0x0068: 0x68, # LATIN SMALL LETTER H
0x0069: 0x69, # LATIN SMALL LETTER I
0x006a: 0x6a, # LATIN SMALL LETTER J
0x006b: 0x6b, # LATIN SMALL LETTER K
0x006c: 0x6c, # LATIN SMALL LETTER L
0x006d: 0x6d, # LATIN SMALL LETTER M
0x006e: 0x6e, # LATIN SMALL LETTER N
0x006f: 0x6f, # LATIN SMALL LETTER O
0x0070: 0x70, # LATIN SMALL LETTER P
0x0071: 0x71, # LATIN SMALL LETTER Q
0x0072: 0x72, # LATIN SMALL LETTER R
0x0073: 0x73, # LATIN SMALL LETTER S
0x0074: 0x74, # LATIN SMALL LETTER T
0x0075: 0x75, # LATIN SMALL LETTER U
0x0076: 0x76, # LATIN SMALL LETTER V
0x0077: 0x77, # LATIN SMALL LETTER W
0x0078: 0x78, # LATIN SMALL LETTER X
0x0079: 0x79, # LATIN SMALL LETTER Y
0x007a: 0x7a, # LATIN SMALL LETTER Z
0x007b: 0x7b, # LEFT CURLY BRACKET
0x007c: 0x7c, # VERTICAL LINE
0x007d: 0x7d, # RIGHT CURLY BRACKET
0x007e: 0x7e, # TILDE
0x007f: 0x7f, # DELETE
0x00a0: 0x9a, # NO-BREAK SPACE
0x00a9: 0xbf, # COPYRIGHT SIGN
0x00b0: 0x9c, # DEGREE SIGN
0x00b2: 0x9d, # SUPERSCRIPT TWO
0x00b7: 0x9e, # MIDDLE DOT
0x00f7: 0x9f, # DIVISION SIGN
0x0401: 0xb3, # CYRILLIC CAPITAL LETTER IO
0x0404: 0xb4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
0x0406: 0xb6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
0x0407: 0xb7, # CYRILLIC CAPITAL LETTER YI (UKRAINIAN)
0x0410: 0xe1, # CYRILLIC CAPITAL LETTER A
0x0411: 0xe2, # CYRILLIC CAPITAL LETTER BE
0x0412: 0xf7, # CYRILLIC CAPITAL LETTER VE
0x0413: 0xe7, # CYRILLIC CAPITAL LETTER GHE
0x0414: 0xe4, # CYRILLIC CAPITAL LETTER DE
0x0415: 0xe5, # CYRILLIC CAPITAL LETTER IE
0x0416: 0xf6, # CYRILLIC CAPITAL LETTER ZHE
0x0417: 0xfa, # CYRILLIC CAPITAL LETTER ZE
0x0418: 0xe9, # CYRILLIC CAPITAL LETTER I
0x0419: 0xea, # CYRILLIC CAPITAL LETTER SHORT I
0x041a: 0xeb, # CYRILLIC CAPITAL LETTER KA
0x041b: 0xec, # CYRILLIC CAPITAL LETTER EL
0x041c: 0xed, # CYRILLIC CAPITAL LETTER EM
0x041d: 0xee, # CYRILLIC CAPITAL LETTER EN
0x041e: 0xef, # CYRILLIC CAPITAL LETTER O
0x041f: 0xf0, # CYRILLIC CAPITAL LETTER PE
0x0420: 0xf2, # CYRILLIC CAPITAL LETTER ER
0x0421: 0xf3, # CYRILLIC CAPITAL LETTER ES
0x0422: 0xf4, # CYRILLIC CAPITAL LETTER TE
0x0423: 0xf5, # CYRILLIC CAPITAL LETTER U
0x0424: 0xe6, # CYRILLIC CAPITAL LETTER EF
0x0425: 0xe8, # CYRILLIC CAPITAL LETTER HA
0x0426: 0xe3, # CYRILLIC CAPITAL LETTER TSE
0x0427: 0xfe, # CYRILLIC CAPITAL LETTER CHE
0x0428: 0xfb, # CYRILLIC CAPITAL LETTER SHA
0x0429: 0xfd, # CYRILLIC CAPITAL LETTER SHCHA
0x042a: 0xff, # CYRILLIC CAPITAL LETTER HARD SIGN
0x042b: 0xf9, # CYRILLIC CAPITAL LETTER YERU
0x042c: 0xf8, # CYRILLIC CAPITAL LETTER SOFT SIGN
0x042d: 0xfc, # CYRILLIC CAPITAL LETTER E
0x042e: 0xe0, # CYRILLIC CAPITAL LETTER YU
0x042f: 0xf1, # CYRILLIC CAPITAL LETTER YA
0x0430: 0xc1, # CYRILLIC SMALL LETTER A
0x0431: 0xc2, # CYRILLIC SMALL LETTER BE
0x0432: 0xd7, # CYRILLIC SMALL LETTER VE
0x0433: 0xc7, # CYRILLIC SMALL LETTER GHE
0x0434: 0xc4, # CYRILLIC SMALL LETTER DE
0x0435: 0xc5, # CYRILLIC SMALL LETTER IE
0x0436: 0xd6, # CYRILLIC SMALL LETTER ZHE
0x0437: 0xda, # CYRILLIC SMALL LETTER ZE
0x0438: 0xc9, # CYRILLIC SMALL LETTER I
0x0439: 0xca, # CYRILLIC SMALL LETTER SHORT I
0x043a: 0xcb, # CYRILLIC SMALL LETTER KA
0x043b: 0xcc, # CYRILLIC SMALL LETTER EL
0x043c: 0xcd, # CYRILLIC SMALL LETTER EM
0x043d: 0xce, # CYRILLIC SMALL LETTER EN
0x043e: 0xcf, # CYRILLIC SMALL LETTER O
0x043f: 0xd0, # CYRILLIC SMALL LETTER PE
0x0440: 0xd2, # CYRILLIC SMALL LETTER ER
0x0441: 0xd3, # CYRILLIC SMALL LETTER ES
0x0442: 0xd4, # CYRILLIC SMALL LETTER TE
0x0443: 0xd5, # CYRILLIC SMALL LETTER U
0x0444: 0xc6, # CYRILLIC SMALL LETTER EF
0x0445: 0xc8, # CYRILLIC SMALL LETTER HA
0x0446: 0xc3, # CYRILLIC SMALL LETTER TSE
0x0447: 0xde, # CYRILLIC SMALL LETTER CHE
0x0448: 0xdb, # CYRILLIC SMALL LETTER SHA
0x0449: 0xdd, # CYRILLIC SMALL LETTER SHCHA
0x044a: 0xdf, # CYRILLIC SMALL LETTER HARD SIGN
0x044b: 0xd9, # CYRILLIC SMALL LETTER YERU
0x044c: 0xd8, # CYRILLIC SMALL LETTER SOFT SIGN
0x044d: 0xdc, # CYRILLIC SMALL LETTER E
0x044e: 0xc0, # CYRILLIC SMALL LETTER YU
0x044f: 0xd1, # CYRILLIC SMALL LETTER YA
0x0451: 0xa3, # CYRILLIC SMALL LETTER IO
0x0454: 0xa4, # CYRILLIC SMALL LETTER UKRAINIAN IE
0x0456: 0xa6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
0x0457: 0xa7, # CYRILLIC SMALL LETTER YI (UKRAINIAN)
0x0490: 0xbd, # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN
0x0491: 0xad, # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN
0x2219: 0x95, # BULLET OPERATOR
0x221a: 0x96, # SQUARE ROOT
0x2248: 0x97, # ALMOST EQUAL TO
0x2264: 0x98, # LESS-THAN OR EQUAL TO
0x2265: 0x99, # GREATER-THAN OR EQUAL TO
0x2320: 0x93, # TOP HALF INTEGRAL
0x2321: 0x9b, # BOTTOM HALF INTEGRAL
0x2500: 0x80, # BOX DRAWINGS LIGHT HORIZONTAL
0x2502: 0x81, # BOX DRAWINGS LIGHT VERTICAL
0x250c: 0x82, # BOX DRAWINGS LIGHT DOWN AND RIGHT
0x2510: 0x83, # BOX DRAWINGS LIGHT DOWN AND LEFT
0x2514: 0x84, # BOX DRAWINGS LIGHT UP AND RIGHT
0x2518: 0x85, # BOX DRAWINGS LIGHT UP AND LEFT
0x251c: 0x86, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0x2524: 0x87, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
0x252c: 0x88, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0x2534: 0x89, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
0x253c: 0x8a, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0x2550: 0xa0, # BOX DRAWINGS DOUBLE HORIZONTAL
0x2551: 0xa1, # BOX DRAWINGS DOUBLE VERTICAL
0x2552: 0xa2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0x2554: 0xa5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
0x2557: 0xa8, # BOX DRAWINGS DOUBLE DOWN AND LEFT
0x2558: 0xa9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0x2559: 0xaa, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0x255a: 0xab, # BOX DRAWINGS DOUBLE UP AND RIGHT
0x255b: 0xac, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0x255d: 0xae, # BOX DRAWINGS DOUBLE UP AND LEFT
0x255e: 0xaf, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0x255f: 0xb0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0x2560: 0xb1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0x2561: 0xb2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0x2563: 0xb5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0x2566: 0xb8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0x2567: 0xb9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0x2568: 0xba, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0x2569: 0xbb, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0x256a: 0xbc, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0x256c: 0xbe, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0x2580: 0x8b, # UPPER HALF BLOCK
0x2584: 0x8c, # LOWER HALF BLOCK
0x2588: 0x8d, # FULL BLOCK
0x258c: 0x8e, # LEFT HALF BLOCK
0x2590: 0x8f, # RIGHT HALF BLOCK
0x2591: 0x90, # LIGHT SHADE
0x2592: 0x91, # MEDIUM SHADE
0x2593: 0x92, # DARK SHADE
0x25a0: 0x94, # BLACK SQUARE
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,14 +1,8 @@
""" Python Character Mapping Codec for TIS-620. """ Python Character Mapping Codec generated from 'python-mappings/TIS-620.TXT' with gencodec.py.
According to
ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT the
TIS-620 is the identical to ISO_8859-11 with the 0xA0 (no-break
space) mapping removed.
"""#" """#"
import codecs import codecs
from encodings.iso8859_11 import decoding_map
### Codec APIs ### Codec APIs
@ -20,7 +14,7 @@ class Codec(codecs.Codec):
def decode(self,input,errors='strict'): def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map) return codecs.charmap_decode(input,errors,decoding_table)
class StreamWriter(Codec,codecs.StreamWriter): class StreamWriter(Codec,codecs.StreamWriter):
pass pass
@ -34,13 +28,516 @@ def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter) return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
decoding_map = decoding_map.copy() ### Decoding Table
decoding_map.update({
0x00a0: None, decoding_table = (
}) u'\x00' # 0x00 -> NULL
u'\x01' # 0x01 -> START OF HEADING
u'\x02' # 0x02 -> START OF TEXT
u'\x03' # 0x03 -> END OF TEXT
u'\x04' # 0x04 -> END OF TRANSMISSION
u'\x05' # 0x05 -> ENQUIRY
u'\x06' # 0x06 -> ACKNOWLEDGE
u'\x07' # 0x07 -> BELL
u'\x08' # 0x08 -> BACKSPACE
u'\t' # 0x09 -> HORIZONTAL TABULATION
u'\n' # 0x0a -> LINE FEED
u'\x0b' # 0x0b -> VERTICAL TABULATION
u'\x0c' # 0x0c -> FORM FEED
u'\r' # 0x0d -> CARRIAGE RETURN
u'\x0e' # 0x0e -> SHIFT OUT
u'\x0f' # 0x0f -> SHIFT IN
u'\x10' # 0x10 -> DATA LINK ESCAPE
u'\x11' # 0x11 -> DEVICE CONTROL ONE
u'\x12' # 0x12 -> DEVICE CONTROL TWO
u'\x13' # 0x13 -> DEVICE CONTROL THREE
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
u'\x18' # 0x18 -> CANCEL
u'\x19' # 0x19 -> END OF MEDIUM
u'\x1a' # 0x1a -> SUBSTITUTE
u'\x1b' # 0x1b -> ESCAPE
u'\x1c' # 0x1c -> FILE SEPARATOR
u'\x1d' # 0x1d -> GROUP SEPARATOR
u'\x1e' # 0x1e -> RECORD SEPARATOR
u'\x1f' # 0x1f -> UNIT SEPARATOR
u' ' # 0x20 -> SPACE
u'!' # 0x21 -> EXCLAMATION MARK
u'"' # 0x22 -> QUOTATION MARK
u'#' # 0x23 -> NUMBER SIGN
u'$' # 0x24 -> DOLLAR SIGN
u'%' # 0x25 -> PERCENT SIGN
u'&' # 0x26 -> AMPERSAND
u"'" # 0x27 -> APOSTROPHE
u'(' # 0x28 -> LEFT PARENTHESIS
u')' # 0x29 -> RIGHT PARENTHESIS
u'*' # 0x2a -> ASTERISK
u'+' # 0x2b -> PLUS SIGN
u',' # 0x2c -> COMMA
u'-' # 0x2d -> HYPHEN-MINUS
u'.' # 0x2e -> FULL STOP
u'/' # 0x2f -> SOLIDUS
u'0' # 0x30 -> DIGIT ZERO
u'1' # 0x31 -> DIGIT ONE
u'2' # 0x32 -> DIGIT TWO
u'3' # 0x33 -> DIGIT THREE
u'4' # 0x34 -> DIGIT FOUR
u'5' # 0x35 -> DIGIT FIVE
u'6' # 0x36 -> DIGIT SIX
u'7' # 0x37 -> DIGIT SEVEN
u'8' # 0x38 -> DIGIT EIGHT
u'9' # 0x39 -> DIGIT NINE
u':' # 0x3a -> COLON
u';' # 0x3b -> SEMICOLON
u'<' # 0x3c -> LESS-THAN SIGN
u'=' # 0x3d -> EQUALS SIGN
u'>' # 0x3e -> GREATER-THAN SIGN
u'?' # 0x3f -> QUESTION MARK
u'@' # 0x40 -> COMMERCIAL AT
u'A' # 0x41 -> LATIN CAPITAL LETTER A
u'B' # 0x42 -> LATIN CAPITAL LETTER B
u'C' # 0x43 -> LATIN CAPITAL LETTER C
u'D' # 0x44 -> LATIN CAPITAL LETTER D
u'E' # 0x45 -> LATIN CAPITAL LETTER E
u'F' # 0x46 -> LATIN CAPITAL LETTER F
u'G' # 0x47 -> LATIN CAPITAL LETTER G
u'H' # 0x48 -> LATIN CAPITAL LETTER H
u'I' # 0x49 -> LATIN CAPITAL LETTER I
u'J' # 0x4a -> LATIN CAPITAL LETTER J
u'K' # 0x4b -> LATIN CAPITAL LETTER K
u'L' # 0x4c -> LATIN CAPITAL LETTER L
u'M' # 0x4d -> LATIN CAPITAL LETTER M
u'N' # 0x4e -> LATIN CAPITAL LETTER N
u'O' # 0x4f -> LATIN CAPITAL LETTER O
u'P' # 0x50 -> LATIN CAPITAL LETTER P
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
u'R' # 0x52 -> LATIN CAPITAL LETTER R
u'S' # 0x53 -> LATIN CAPITAL LETTER S
u'T' # 0x54 -> LATIN CAPITAL LETTER T
u'U' # 0x55 -> LATIN CAPITAL LETTER U
u'V' # 0x56 -> LATIN CAPITAL LETTER V
u'W' # 0x57 -> LATIN CAPITAL LETTER W
u'X' # 0x58 -> LATIN CAPITAL LETTER X
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
u'Z' # 0x5a -> LATIN CAPITAL LETTER Z
u'[' # 0x5b -> LEFT SQUARE BRACKET
u'\\' # 0x5c -> REVERSE SOLIDUS
u']' # 0x5d -> RIGHT SQUARE BRACKET
u'^' # 0x5e -> CIRCUMFLEX ACCENT
u'_' # 0x5f -> LOW LINE
u'`' # 0x60 -> GRAVE ACCENT
u'a' # 0x61 -> LATIN SMALL LETTER A
u'b' # 0x62 -> LATIN SMALL LETTER B
u'c' # 0x63 -> LATIN SMALL LETTER C
u'd' # 0x64 -> LATIN SMALL LETTER D
u'e' # 0x65 -> LATIN SMALL LETTER E
u'f' # 0x66 -> LATIN SMALL LETTER F
u'g' # 0x67 -> LATIN SMALL LETTER G
u'h' # 0x68 -> LATIN SMALL LETTER H
u'i' # 0x69 -> LATIN SMALL LETTER I
u'j' # 0x6a -> LATIN SMALL LETTER J
u'k' # 0x6b -> LATIN SMALL LETTER K
u'l' # 0x6c -> LATIN SMALL LETTER L
u'm' # 0x6d -> LATIN SMALL LETTER M
u'n' # 0x6e -> LATIN SMALL LETTER N
u'o' # 0x6f -> LATIN SMALL LETTER O
u'p' # 0x70 -> LATIN SMALL LETTER P
u'q' # 0x71 -> LATIN SMALL LETTER Q
u'r' # 0x72 -> LATIN SMALL LETTER R
u's' # 0x73 -> LATIN SMALL LETTER S
u't' # 0x74 -> LATIN SMALL LETTER T
u'u' # 0x75 -> LATIN SMALL LETTER U
u'v' # 0x76 -> LATIN SMALL LETTER V
u'w' # 0x77 -> LATIN SMALL LETTER W
u'x' # 0x78 -> LATIN SMALL LETTER X
u'y' # 0x79 -> LATIN SMALL LETTER Y
u'z' # 0x7a -> LATIN SMALL LETTER Z
u'{' # 0x7b -> LEFT CURLY BRACKET
u'|' # 0x7c -> VERTICAL LINE
u'}' # 0x7d -> RIGHT CURLY BRACKET
u'~' # 0x7e -> TILDE
u'\x7f' # 0x7f -> DELETE
u'\x80' # 0x80 -> <control>
u'\x81' # 0x81 -> <control>
u'\x82' # 0x82 -> <control>
u'\x83' # 0x83 -> <control>
u'\x84' # 0x84 -> <control>
u'\x85' # 0x85 -> <control>
u'\x86' # 0x86 -> <control>
u'\x87' # 0x87 -> <control>
u'\x88' # 0x88 -> <control>
u'\x89' # 0x89 -> <control>
u'\x8a' # 0x8a -> <control>
u'\x8b' # 0x8b -> <control>
u'\x8c' # 0x8c -> <control>
u'\x8d' # 0x8d -> <control>
u'\x8e' # 0x8e -> <control>
u'\x8f' # 0x8f -> <control>
u'\x90' # 0x90 -> <control>
u'\x91' # 0x91 -> <control>
u'\x92' # 0x92 -> <control>
u'\x93' # 0x93 -> <control>
u'\x94' # 0x94 -> <control>
u'\x95' # 0x95 -> <control>
u'\x96' # 0x96 -> <control>
u'\x97' # 0x97 -> <control>
u'\x98' # 0x98 -> <control>
u'\x99' # 0x99 -> <control>
u'\x9a' # 0x9a -> <control>
u'\x9b' # 0x9b -> <control>
u'\x9c' # 0x9c -> <control>
u'\x9d' # 0x9d -> <control>
u'\x9e' # 0x9e -> <control>
u'\x9f' # 0x9f -> <control>
u'\ufffe'
u'\u0e01' # 0xa1 -> THAI CHARACTER KO KAI
u'\u0e02' # 0xa2 -> THAI CHARACTER KHO KHAI
u'\u0e03' # 0xa3 -> THAI CHARACTER KHO KHUAT
u'\u0e04' # 0xa4 -> THAI CHARACTER KHO KHWAI
u'\u0e05' # 0xa5 -> THAI CHARACTER KHO KHON
u'\u0e06' # 0xa6 -> THAI CHARACTER KHO RAKHANG
u'\u0e07' # 0xa7 -> THAI CHARACTER NGO NGU
u'\u0e08' # 0xa8 -> THAI CHARACTER CHO CHAN
u'\u0e09' # 0xa9 -> THAI CHARACTER CHO CHING
u'\u0e0a' # 0xaa -> THAI CHARACTER CHO CHANG
u'\u0e0b' # 0xab -> THAI CHARACTER SO SO
u'\u0e0c' # 0xac -> THAI CHARACTER CHO CHOE
u'\u0e0d' # 0xad -> THAI CHARACTER YO YING
u'\u0e0e' # 0xae -> THAI CHARACTER DO CHADA
u'\u0e0f' # 0xaf -> THAI CHARACTER TO PATAK
u'\u0e10' # 0xb0 -> THAI CHARACTER THO THAN
u'\u0e11' # 0xb1 -> THAI CHARACTER THO NANGMONTHO
u'\u0e12' # 0xb2 -> THAI CHARACTER THO PHUTHAO
u'\u0e13' # 0xb3 -> THAI CHARACTER NO NEN
u'\u0e14' # 0xb4 -> THAI CHARACTER DO DEK
u'\u0e15' # 0xb5 -> THAI CHARACTER TO TAO
u'\u0e16' # 0xb6 -> THAI CHARACTER THO THUNG
u'\u0e17' # 0xb7 -> THAI CHARACTER THO THAHAN
u'\u0e18' # 0xb8 -> THAI CHARACTER THO THONG
u'\u0e19' # 0xb9 -> THAI CHARACTER NO NU
u'\u0e1a' # 0xba -> THAI CHARACTER BO BAIMAI
u'\u0e1b' # 0xbb -> THAI CHARACTER PO PLA
u'\u0e1c' # 0xbc -> THAI CHARACTER PHO PHUNG
u'\u0e1d' # 0xbd -> THAI CHARACTER FO FA
u'\u0e1e' # 0xbe -> THAI CHARACTER PHO PHAN
u'\u0e1f' # 0xbf -> THAI CHARACTER FO FAN
u'\u0e20' # 0xc0 -> THAI CHARACTER PHO SAMPHAO
u'\u0e21' # 0xc1 -> THAI CHARACTER MO MA
u'\u0e22' # 0xc2 -> THAI CHARACTER YO YAK
u'\u0e23' # 0xc3 -> THAI CHARACTER RO RUA
u'\u0e24' # 0xc4 -> THAI CHARACTER RU
u'\u0e25' # 0xc5 -> THAI CHARACTER LO LING
u'\u0e26' # 0xc6 -> THAI CHARACTER LU
u'\u0e27' # 0xc7 -> THAI CHARACTER WO WAEN
u'\u0e28' # 0xc8 -> THAI CHARACTER SO SALA
u'\u0e29' # 0xc9 -> THAI CHARACTER SO RUSI
u'\u0e2a' # 0xca -> THAI CHARACTER SO SUA
u'\u0e2b' # 0xcb -> THAI CHARACTER HO HIP
u'\u0e2c' # 0xcc -> THAI CHARACTER LO CHULA
u'\u0e2d' # 0xcd -> THAI CHARACTER O ANG
u'\u0e2e' # 0xce -> THAI CHARACTER HO NOKHUK
u'\u0e2f' # 0xcf -> THAI CHARACTER PAIYANNOI
u'\u0e30' # 0xd0 -> THAI CHARACTER SARA A
u'\u0e31' # 0xd1 -> THAI CHARACTER MAI HAN-AKAT
u'\u0e32' # 0xd2 -> THAI CHARACTER SARA AA
u'\u0e33' # 0xd3 -> THAI CHARACTER SARA AM
u'\u0e34' # 0xd4 -> THAI CHARACTER SARA I
u'\u0e35' # 0xd5 -> THAI CHARACTER SARA II
u'\u0e36' # 0xd6 -> THAI CHARACTER SARA UE
u'\u0e37' # 0xd7 -> THAI CHARACTER SARA UEE
u'\u0e38' # 0xd8 -> THAI CHARACTER SARA U
u'\u0e39' # 0xd9 -> THAI CHARACTER SARA UU
u'\u0e3a' # 0xda -> THAI CHARACTER PHINTHU
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\u0e3f' # 0xdf -> THAI CURRENCY SYMBOL BAHT
u'\u0e40' # 0xe0 -> THAI CHARACTER SARA E
u'\u0e41' # 0xe1 -> THAI CHARACTER SARA AE
u'\u0e42' # 0xe2 -> THAI CHARACTER SARA O
u'\u0e43' # 0xe3 -> THAI CHARACTER SARA AI MAIMUAN
u'\u0e44' # 0xe4 -> THAI CHARACTER SARA AI MAIMALAI
u'\u0e45' # 0xe5 -> THAI CHARACTER LAKKHANGYAO
u'\u0e46' # 0xe6 -> THAI CHARACTER MAIYAMOK
u'\u0e47' # 0xe7 -> THAI CHARACTER MAITAIKHU
u'\u0e48' # 0xe8 -> THAI CHARACTER MAI EK
u'\u0e49' # 0xe9 -> THAI CHARACTER MAI THO
u'\u0e4a' # 0xea -> THAI CHARACTER MAI TRI
u'\u0e4b' # 0xeb -> THAI CHARACTER MAI CHATTAWA
u'\u0e4c' # 0xec -> THAI CHARACTER THANTHAKHAT
u'\u0e4d' # 0xed -> THAI CHARACTER NIKHAHIT
u'\u0e4e' # 0xee -> THAI CHARACTER YAMAKKAN
u'\u0e4f' # 0xef -> THAI CHARACTER FONGMAN
u'\u0e50' # 0xf0 -> THAI DIGIT ZERO
u'\u0e51' # 0xf1 -> THAI DIGIT ONE
u'\u0e52' # 0xf2 -> THAI DIGIT TWO
u'\u0e53' # 0xf3 -> THAI DIGIT THREE
u'\u0e54' # 0xf4 -> THAI DIGIT FOUR
u'\u0e55' # 0xf5 -> THAI DIGIT FIVE
u'\u0e56' # 0xf6 -> THAI DIGIT SIX
u'\u0e57' # 0xf7 -> THAI DIGIT SEVEN
u'\u0e58' # 0xf8 -> THAI DIGIT EIGHT
u'\u0e59' # 0xf9 -> THAI DIGIT NINE
u'\u0e5a' # 0xfa -> THAI CHARACTER ANGKHANKHU
u'\u0e5b' # 0xfb -> THAI CHARACTER KHOMUT
u'\ufffe'
u'\ufffe'
u'\ufffe'
u'\ufffe'
)
### Encoding Map ### Encoding Map
encoding_map = codecs.make_encoding_map(decoding_map) encoding_map = {
0x0000: 0x00, # NULL
0x0001: 0x01, # START OF HEADING
0x0002: 0x02, # START OF TEXT
0x0003: 0x03, # END OF TEXT
0x0004: 0x04, # END OF TRANSMISSION
0x0005: 0x05, # ENQUIRY
0x0006: 0x06, # ACKNOWLEDGE
0x0007: 0x07, # BELL
0x0008: 0x08, # BACKSPACE
0x0009: 0x09, # HORIZONTAL TABULATION
0x000a: 0x0a, # LINE FEED
0x000b: 0x0b, # VERTICAL TABULATION
0x000c: 0x0c, # FORM FEED
0x000d: 0x0d, # CARRIAGE RETURN
0x000e: 0x0e, # SHIFT OUT
0x000f: 0x0f, # SHIFT IN
0x0010: 0x10, # DATA LINK ESCAPE
0x0011: 0x11, # DEVICE CONTROL ONE
0x0012: 0x12, # DEVICE CONTROL TWO
0x0013: 0x13, # DEVICE CONTROL THREE
0x0014: 0x14, # DEVICE CONTROL FOUR
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
0x0016: 0x16, # SYNCHRONOUS IDLE
0x0017: 0x17, # END OF TRANSMISSION BLOCK
0x0018: 0x18, # CANCEL
0x0019: 0x19, # END OF MEDIUM
0x001a: 0x1a, # SUBSTITUTE
0x001b: 0x1b, # ESCAPE
0x001c: 0x1c, # FILE SEPARATOR
0x001d: 0x1d, # GROUP SEPARATOR
0x001e: 0x1e, # RECORD SEPARATOR
0x001f: 0x1f, # UNIT SEPARATOR
0x0020: 0x20, # SPACE
0x0021: 0x21, # EXCLAMATION MARK
0x0022: 0x22, # QUOTATION MARK
0x0023: 0x23, # NUMBER SIGN
0x0024: 0x24, # DOLLAR SIGN
0x0025: 0x25, # PERCENT SIGN
0x0026: 0x26, # AMPERSAND
0x0027: 0x27, # APOSTROPHE
0x0028: 0x28, # LEFT PARENTHESIS
0x0029: 0x29, # RIGHT PARENTHESIS
0x002a: 0x2a, # ASTERISK
0x002b: 0x2b, # PLUS SIGN
0x002c: 0x2c, # COMMA
0x002d: 0x2d, # HYPHEN-MINUS
0x002e: 0x2e, # FULL STOP
0x002f: 0x2f, # SOLIDUS
0x0030: 0x30, # DIGIT ZERO
0x0031: 0x31, # DIGIT ONE
0x0032: 0x32, # DIGIT TWO
0x0033: 0x33, # DIGIT THREE
0x0034: 0x34, # DIGIT FOUR
0x0035: 0x35, # DIGIT FIVE
0x0036: 0x36, # DIGIT SIX
0x0037: 0x37, # DIGIT SEVEN
0x0038: 0x38, # DIGIT EIGHT
0x0039: 0x39, # DIGIT NINE
0x003a: 0x3a, # COLON
0x003b: 0x3b, # SEMICOLON
0x003c: 0x3c, # LESS-THAN SIGN
0x003d: 0x3d, # EQUALS SIGN
0x003e: 0x3e, # GREATER-THAN SIGN
0x003f: 0x3f, # QUESTION MARK
0x0040: 0x40, # COMMERCIAL AT
0x0041: 0x41, # LATIN CAPITAL LETTER A
0x0042: 0x42, # LATIN CAPITAL LETTER B
0x0043: 0x43, # LATIN CAPITAL LETTER C
0x0044: 0x44, # LATIN CAPITAL LETTER D
0x0045: 0x45, # LATIN CAPITAL LETTER E
0x0046: 0x46, # LATIN CAPITAL LETTER F
0x0047: 0x47, # LATIN CAPITAL LETTER G
0x0048: 0x48, # LATIN CAPITAL LETTER H
0x0049: 0x49, # LATIN CAPITAL LETTER I
0x004a: 0x4a, # LATIN CAPITAL LETTER J
0x004b: 0x4b, # LATIN CAPITAL LETTER K
0x004c: 0x4c, # LATIN CAPITAL LETTER L
0x004d: 0x4d, # LATIN CAPITAL LETTER M
0x004e: 0x4e, # LATIN CAPITAL LETTER N
0x004f: 0x4f, # LATIN CAPITAL LETTER O
0x0050: 0x50, # LATIN CAPITAL LETTER P
0x0051: 0x51, # LATIN CAPITAL LETTER Q
0x0052: 0x52, # LATIN CAPITAL LETTER R
0x0053: 0x53, # LATIN CAPITAL LETTER S
0x0054: 0x54, # LATIN CAPITAL LETTER T
0x0055: 0x55, # LATIN CAPITAL LETTER U
0x0056: 0x56, # LATIN CAPITAL LETTER V
0x0057: 0x57, # LATIN CAPITAL LETTER W
0x0058: 0x58, # LATIN CAPITAL LETTER X
0x0059: 0x59, # LATIN CAPITAL LETTER Y
0x005a: 0x5a, # LATIN CAPITAL LETTER Z
0x005b: 0x5b, # LEFT SQUARE BRACKET
0x005c: 0x5c, # REVERSE SOLIDUS
0x005d: 0x5d, # RIGHT SQUARE BRACKET
0x005e: 0x5e, # CIRCUMFLEX ACCENT
0x005f: 0x5f, # LOW LINE
0x0060: 0x60, # GRAVE ACCENT
0x0061: 0x61, # LATIN SMALL LETTER A
0x0062: 0x62, # LATIN SMALL LETTER B
0x0063: 0x63, # LATIN SMALL LETTER C
0x0064: 0x64, # LATIN SMALL LETTER D
0x0065: 0x65, # LATIN SMALL LETTER E
0x0066: 0x66, # LATIN SMALL LETTER F
0x0067: 0x67, # LATIN SMALL LETTER G
0x0068: 0x68, # LATIN SMALL LETTER H
0x0069: 0x69, # LATIN SMALL LETTER I
0x006a: 0x6a, # LATIN SMALL LETTER J
0x006b: 0x6b, # LATIN SMALL LETTER K
0x006c: 0x6c, # LATIN SMALL LETTER L
0x006d: 0x6d, # LATIN SMALL LETTER M
0x006e: 0x6e, # LATIN SMALL LETTER N
0x006f: 0x6f, # LATIN SMALL LETTER O
0x0070: 0x70, # LATIN SMALL LETTER P
0x0071: 0x71, # LATIN SMALL LETTER Q
0x0072: 0x72, # LATIN SMALL LETTER R
0x0073: 0x73, # LATIN SMALL LETTER S
0x0074: 0x74, # LATIN SMALL LETTER T
0x0075: 0x75, # LATIN SMALL LETTER U
0x0076: 0x76, # LATIN SMALL LETTER V
0x0077: 0x77, # LATIN SMALL LETTER W
0x0078: 0x78, # LATIN SMALL LETTER X
0x0079: 0x79, # LATIN SMALL LETTER Y
0x007a: 0x7a, # LATIN SMALL LETTER Z
0x007b: 0x7b, # LEFT CURLY BRACKET
0x007c: 0x7c, # VERTICAL LINE
0x007d: 0x7d, # RIGHT CURLY BRACKET
0x007e: 0x7e, # TILDE
0x007f: 0x7f, # DELETE
0x0080: 0x80, # <control>
0x0081: 0x81, # <control>
0x0082: 0x82, # <control>
0x0083: 0x83, # <control>
0x0084: 0x84, # <control>
0x0085: 0x85, # <control>
0x0086: 0x86, # <control>
0x0087: 0x87, # <control>
0x0088: 0x88, # <control>
0x0089: 0x89, # <control>
0x008a: 0x8a, # <control>
0x008b: 0x8b, # <control>
0x008c: 0x8c, # <control>
0x008d: 0x8d, # <control>
0x008e: 0x8e, # <control>
0x008f: 0x8f, # <control>
0x0090: 0x90, # <control>
0x0091: 0x91, # <control>
0x0092: 0x92, # <control>
0x0093: 0x93, # <control>
0x0094: 0x94, # <control>
0x0095: 0x95, # <control>
0x0096: 0x96, # <control>
0x0097: 0x97, # <control>
0x0098: 0x98, # <control>
0x0099: 0x99, # <control>
0x009a: 0x9a, # <control>
0x009b: 0x9b, # <control>
0x009c: 0x9c, # <control>
0x009d: 0x9d, # <control>
0x009e: 0x9e, # <control>
0x009f: 0x9f, # <control>
0x0e01: 0xa1, # THAI CHARACTER KO KAI
0x0e02: 0xa2, # THAI CHARACTER KHO KHAI
0x0e03: 0xa3, # THAI CHARACTER KHO KHUAT
0x0e04: 0xa4, # THAI CHARACTER KHO KHWAI
0x0e05: 0xa5, # THAI CHARACTER KHO KHON
0x0e06: 0xa6, # THAI CHARACTER KHO RAKHANG
0x0e07: 0xa7, # THAI CHARACTER NGO NGU
0x0e08: 0xa8, # THAI CHARACTER CHO CHAN
0x0e09: 0xa9, # THAI CHARACTER CHO CHING
0x0e0a: 0xaa, # THAI CHARACTER CHO CHANG
0x0e0b: 0xab, # THAI CHARACTER SO SO
0x0e0c: 0xac, # THAI CHARACTER CHO CHOE
0x0e0d: 0xad, # THAI CHARACTER YO YING
0x0e0e: 0xae, # THAI CHARACTER DO CHADA
0x0e0f: 0xaf, # THAI CHARACTER TO PATAK
0x0e10: 0xb0, # THAI CHARACTER THO THAN
0x0e11: 0xb1, # THAI CHARACTER THO NANGMONTHO
0x0e12: 0xb2, # THAI CHARACTER THO PHUTHAO
0x0e13: 0xb3, # THAI CHARACTER NO NEN
0x0e14: 0xb4, # THAI CHARACTER DO DEK
0x0e15: 0xb5, # THAI CHARACTER TO TAO
0x0e16: 0xb6, # THAI CHARACTER THO THUNG
0x0e17: 0xb7, # THAI CHARACTER THO THAHAN
0x0e18: 0xb8, # THAI CHARACTER THO THONG
0x0e19: 0xb9, # THAI CHARACTER NO NU
0x0e1a: 0xba, # THAI CHARACTER BO BAIMAI
0x0e1b: 0xbb, # THAI CHARACTER PO PLA
0x0e1c: 0xbc, # THAI CHARACTER PHO PHUNG
0x0e1d: 0xbd, # THAI CHARACTER FO FA
0x0e1e: 0xbe, # THAI CHARACTER PHO PHAN
0x0e1f: 0xbf, # THAI CHARACTER FO FAN
0x0e20: 0xc0, # THAI CHARACTER PHO SAMPHAO
0x0e21: 0xc1, # THAI CHARACTER MO MA
0x0e22: 0xc2, # THAI CHARACTER YO YAK
0x0e23: 0xc3, # THAI CHARACTER RO RUA
0x0e24: 0xc4, # THAI CHARACTER RU
0x0e25: 0xc5, # THAI CHARACTER LO LING
0x0e26: 0xc6, # THAI CHARACTER LU
0x0e27: 0xc7, # THAI CHARACTER WO WAEN
0x0e28: 0xc8, # THAI CHARACTER SO SALA
0x0e29: 0xc9, # THAI CHARACTER SO RUSI
0x0e2a: 0xca, # THAI CHARACTER SO SUA
0x0e2b: 0xcb, # THAI CHARACTER HO HIP
0x0e2c: 0xcc, # THAI CHARACTER LO CHULA
0x0e2d: 0xcd, # THAI CHARACTER O ANG
0x0e2e: 0xce, # THAI CHARACTER HO NOKHUK
0x0e2f: 0xcf, # THAI CHARACTER PAIYANNOI
0x0e30: 0xd0, # THAI CHARACTER SARA A
0x0e31: 0xd1, # THAI CHARACTER MAI HAN-AKAT
0x0e32: 0xd2, # THAI CHARACTER SARA AA
0x0e33: 0xd3, # THAI CHARACTER SARA AM
0x0e34: 0xd4, # THAI CHARACTER SARA I
0x0e35: 0xd5, # THAI CHARACTER SARA II
0x0e36: 0xd6, # THAI CHARACTER SARA UE
0x0e37: 0xd7, # THAI CHARACTER SARA UEE
0x0e38: 0xd8, # THAI CHARACTER SARA U
0x0e39: 0xd9, # THAI CHARACTER SARA UU
0x0e3a: 0xda, # THAI CHARACTER PHINTHU
0x0e3f: 0xdf, # THAI CURRENCY SYMBOL BAHT
0x0e40: 0xe0, # THAI CHARACTER SARA E
0x0e41: 0xe1, # THAI CHARACTER SARA AE
0x0e42: 0xe2, # THAI CHARACTER SARA O
0x0e43: 0xe3, # THAI CHARACTER SARA AI MAIMUAN
0x0e44: 0xe4, # THAI CHARACTER SARA AI MAIMALAI
0x0e45: 0xe5, # THAI CHARACTER LAKKHANGYAO
0x0e46: 0xe6, # THAI CHARACTER MAIYAMOK
0x0e47: 0xe7, # THAI CHARACTER MAITAIKHU
0x0e48: 0xe8, # THAI CHARACTER MAI EK
0x0e49: 0xe9, # THAI CHARACTER MAI THO
0x0e4a: 0xea, # THAI CHARACTER MAI TRI
0x0e4b: 0xeb, # THAI CHARACTER MAI CHATTAWA
0x0e4c: 0xec, # THAI CHARACTER THANTHAKHAT
0x0e4d: 0xed, # THAI CHARACTER NIKHAHIT
0x0e4e: 0xee, # THAI CHARACTER YAMAKKAN
0x0e4f: 0xef, # THAI CHARACTER FONGMAN
0x0e50: 0xf0, # THAI DIGIT ZERO
0x0e51: 0xf1, # THAI DIGIT ONE
0x0e52: 0xf2, # THAI DIGIT TWO
0x0e53: 0xf3, # THAI DIGIT THREE
0x0e54: 0xf4, # THAI DIGIT FOUR
0x0e55: 0xf5, # THAI DIGIT FIVE
0x0e56: 0xf6, # THAI DIGIT SIX
0x0e57: 0xf7, # THAI DIGIT SEVEN
0x0e58: 0xf8, # THAI DIGIT EIGHT
0x0e59: 0xf9, # THAI DIGIT NINE
0x0e5a: 0xfa, # THAI CHARACTER ANGKHANKHU
0x0e5b: 0xfb, # THAI CHARACTER KHOMUT
}