cjk support

This commit is contained in:
Will McGugan 2020-03-16 22:18:22 +00:00
parent 137b932392
commit 33d0c9796f
13 changed files with 687 additions and 88 deletions

View file

@ -1,62 +1,89 @@
from functools import partial
from typing import List
import os.path
from urllib.request import urlopen
import subprocess
from typing import List, Tuple
import sys
from rich.progress import Progress
def download(url: str) -> str:
"""Copy data from a url to a local file."""
# This will break if the response doesn't contain content length
filename = url.rsplit("/")[-1]
if os.path.exists(filename):
print(f"{filename} exists")
return filename
progress = Progress()
task = progress.add_task(filename)
with progress:
response = urlopen(url)
progress.update(task, total=int(response.info()["Content-length"]))
with open(filename, "wb") as dest_file:
for data in iter(partial(response.read, 32768), b""):
dest_file.write(data)
progress.advance(task, len(data))
return filename
from wcwidth import wcwidth
def get_data():
east_asian_filename = download(
"http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt"
progress = Progress()
def make_widths_table():
table: List[Tuple[int, int, int]] = []
append = table.append
make_table_task = progress.add_task("Calculating table...")
widths = (
(codepoint, wcwidth(chr(codepoint)))
for codepoint in range(0, sys.maxunicode + 1)
)
download(
"http://www.unicode.org/Public/UNIDATA/extracted/DerivedGeneralCategory.txt"
)
print(parse_east_asian(east_asian_filename))
widths = [(codepoint, width) for codepoint, width in widths if width != 1]
iter_widths = iter(widths)
def parse_east_asian(filename: str) -> List[int]:
codepoints: List[int] = []
for line in open(filename, "rt"):
if line.startswith("#") or not line.strip():
continue
print(line)
first_field = line.split()[0]
if ";" not in first_field:
continue
codepoint_range, details = first_field.split(";", 1)
if ".." in codepoint_range:
start, end = codepoint_range.split("..")
codepoints.extend(range(int(start, 16), int(end, 16) + 1))
endpoint, group_cell_size = next(iter_widths)
start_codepoint = end_codepoint = endpoint
for codepoint, cell_size in progress.track(
iter_widths, task_id=make_table_task, total=len(widths) - 1
):
if cell_size != group_cell_size or codepoint != end_codepoint + 1:
append((start_codepoint, end_codepoint, group_cell_size))
start_codepoint = end_codepoint = codepoint
group_cell_size = cell_size
else:
codepoints.append(int(codepoint_range, 16))
end_codepoint = codepoint
append((start_codepoint, end_codepoint, group_cell_size))
return table
return codepoints
def get_cell_size(table: List[Tuple[int, int, int]], character: str) -> int:
codepoint = ord(character)
lower_bound = 0
upper_bound = len(table) - 1
index = (lower_bound + upper_bound) // 2
while True:
start, end, width = table[index]
if codepoint < start:
upper_bound = index - 1
elif codepoint > end:
lower_bound = index + 1
else:
return width
if upper_bound < lower_bound:
break
index = (lower_bound + upper_bound) // 2
return 1
def test(widths_table):
for codepoint in progress.track(
range(0, sys.maxunicode + 1), description="Testing..."
):
character = chr(codepoint)
width1 = get_cell_size(widths_table, character)
width2 = wcwidth(character)
if width1 != width2:
print(f"{width1} != {width2}")
break
def run():
get_data()
with progress:
widths_table = make_widths_table()
test(widths_table)
table_file = f"""# Auto generated by make_terminal_widths.py
CELL_WIDTHS = {widths_table!r}
"""
with open("../rich/_cell_widths.py", "wt") as fh:
fh.write(table_file)
subprocess.run("black ../rich/_cell_widths.py", shell=True)
if __name__ == "__main__":