cjk support

2025-08-31 15:37:24 +00:00 · 2020-03-16 22:18:22 +00:00 · 2020-03-16 22:18:22 +00:00 · 33d0c9796f
commit 33d0c9796f
parent 137b932392
13 changed files with 687 additions and 88 deletions
--- a/tools/make_terminal_widths.py
+++ b/tools/make_terminal_widths.py
@ -1,62 +1,89 @@
-from functools import partial
-from typing import List
-import os.path
-from urllib.request import urlopen
+import subprocess
+from typing import List, Tuple
+import sys

 from rich.progress import Progress

-
-def download(url: str) -> str:
-    """Copy data from a url to a local file."""
-
-    # This will break if the response doesn't contain content length
-    filename = url.rsplit("/")[-1]
-    if os.path.exists(filename):
-        print(f"{filename} exists")
-        return filename
-    progress = Progress()
-    task = progress.add_task(filename)
-    with progress:
-        response = urlopen(url)
-        progress.update(task, total=int(response.info()["Content-length"]))
-        with open(filename, "wb") as dest_file:
-            for data in iter(partial(response.read, 32768), b""):
-                dest_file.write(data)
-                progress.advance(task, len(data))
-    return filename
+from wcwidth import wcwidth


-def get_data():
-    east_asian_filename = download(
-        "http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt"
+progress = Progress()
+
+
+def make_widths_table():
+    table: List[Tuple[int, int, int]] = []
+    append = table.append
+
+    make_table_task = progress.add_task("Calculating table...")
+
+    widths = (
+        (codepoint, wcwidth(chr(codepoint)))
+        for codepoint in range(0, sys.maxunicode + 1)
    )
-    download(
-        "http://www.unicode.org/Public/UNIDATA/extracted/DerivedGeneralCategory.txt"
-    )
-    print(parse_east_asian(east_asian_filename))

+    widths = [(codepoint, width) for codepoint, width in widths if width != 1]
+    iter_widths = iter(widths)

-def parse_east_asian(filename: str) -> List[int]:
-    codepoints: List[int] = []
-    for line in open(filename, "rt"):
-        if line.startswith("#") or not line.strip():
-            continue
-        print(line)
-        first_field = line.split()[0]
-        if ";" not in first_field:
-            continue
-        codepoint_range, details = first_field.split(";", 1)
-        if ".." in codepoint_range:
-            start, end = codepoint_range.split("..")
-            codepoints.extend(range(int(start, 16), int(end, 16) + 1))
+    endpoint, group_cell_size = next(iter_widths)
+    start_codepoint = end_codepoint = endpoint
+    for codepoint, cell_size in progress.track(
+        iter_widths, task_id=make_table_task, total=len(widths) - 1
+    ):
+        if cell_size != group_cell_size or codepoint != end_codepoint + 1:
+            append((start_codepoint, end_codepoint, group_cell_size))
+            start_codepoint = end_codepoint = codepoint
+            group_cell_size = cell_size
        else:
-            codepoints.append(int(codepoint_range, 16))
+            end_codepoint = codepoint
+    append((start_codepoint, end_codepoint, group_cell_size))
+    return table

-    return codepoints
+
+def get_cell_size(table: List[Tuple[int, int, int]], character: str) -> int:
+
+    codepoint = ord(character)
+    lower_bound = 0
+    upper_bound = len(table) - 1
+    index = (lower_bound + upper_bound) // 2
+    while True:
+        start, end, width = table[index]
+        if codepoint < start:
+            upper_bound = index - 1
+        elif codepoint > end:
+            lower_bound = index + 1
+        else:
+            return width
+        if upper_bound < lower_bound:
+            break
+        index = (lower_bound + upper_bound) // 2
+    return 1
+
+
+def test(widths_table):
+    for codepoint in progress.track(
+        range(0, sys.maxunicode + 1), description="Testing..."
+    ):
+        character = chr(codepoint)
+        width1 = get_cell_size(widths_table, character)
+        width2 = wcwidth(character)
+        if width1 != width2:
+            print(f"{width1} != {width2}")
+            break


 def run():
-    get_data()
+    with progress:
+        widths_table = make_widths_table()
+        test(widths_table)
+    table_file = f"""# Auto generated by make_terminal_widths.py
+
+CELL_WIDTHS = {widths_table!r}
+
+"""
+    with open("../rich/_cell_widths.py", "wt") as fh:
+        fh.write(table_file)
+
+    subprocess.run("black ../rich/_cell_widths.py", shell=True)


 if __name__ == "__main__":