adjust docs processor
Some checks failed
test / test (macos-latest) (push) Has been cancelled
test / test (ubuntu-latest) (push) Has been cancelled
test / test (windows-latest) (push) Has been cancelled

This commit is contained in:
Josh Thomas 2024-12-16 21:36:28 -06:00
parent ed1a1c65d2
commit df30aafde5

View file

@ -13,16 +13,21 @@ from __future__ import annotations
import logging
import re
from dataclasses import dataclass
from difflib import Differ
from functools import reduce
from itertools import islice
from pathlib import Path
from typing import Callable
from typing import Dict
from typing import List
from typing import NamedTuple
from rich.console import Console
from rich.logging import RichHandler
from rich.panel import Panel
from rich.progress import track
from rich.rule import Rule
console = Console()
logging.basicConfig(
@ -66,103 +71,203 @@ def write_file(path: Path, content: str) -> bool:
return False
def preview_changes(original: str, processed: str, context_lines: int = 2) -> None:
"""Show a preview of the changes made."""
console.print("\n[yellow]Preview of changes:[/yellow]")
@dataclass
class DiffStats:
original_lines: int
processed_lines: int
difference: int
# Basic statistics
orig_lines = original.count("\n")
proc_lines = processed.count("\n")
diff_lines = proc_lines - orig_lines
stats_panel = Panel(
f"Original lines: {orig_lines}\n"
f"Processed lines: {proc_lines}\n"
f"Difference: {diff_lines:+d} lines",
title="Statistics",
border_style="blue",
class DiffLine(NamedTuple):
orig_line_no: int
proc_line_no: int
change_type: str
content: str
def calculate_stats(original: str, processed: str) -> DiffStats:
return DiffStats(
original_lines=original.count("\n"),
processed_lines=processed.count("\n"),
difference=processed.count("\n") - original.count("\n"),
)
console.print(stats_panel)
# Create diff
differ = Differ()
diff = list(differ.compare(original.splitlines(), processed.splitlines()))
# Find changed line groups with context
changes = []
current_group = []
in_change = False
last_change_line = -1
def create_diff_lines(diff_output: List[str]) -> List[DiffLine]:
"""Convert raw diff output into structured DiffLine objects with line numbers."""
diff_lines = []
orig_line_no = proc_line_no = 0
for i, line in enumerate(diff):
for line in diff_output:
if line.startswith("? "): # Skip hint lines
continue
is_change = line.startswith(("- ", "+ "))
change_type = line[0:2]
content = line[2:]
current_orig = orig_line_no if change_type in (" ", "- ") else 0
current_proc = proc_line_no if change_type in (" ", "+ ") else 0
diff_lines.append(DiffLine(current_orig, current_proc, change_type, content))
# Update line numbers
if change_type == " ":
orig_line_no += 1
proc_line_no += 1
elif change_type == "- ":
orig_line_no += 1
elif change_type == "+ ":
proc_line_no += 1
return diff_lines
def group_changes(
diff_lines: List[DiffLine], context_lines: int = 5
) -> List[List[DiffLine]]:
"""Group changes with their context lines."""
changes = []
current_group = []
in_change = False
last_change_idx = -1
for i, line in enumerate(diff_lines):
is_change = line.change_type in ("- ", "+ ")
if is_change:
if not in_change: # Start of a new change group
start = max(0, i - context_lines)
# If we're close to previous group, connect them
if start <= last_change_line + context_lines:
start = last_change_line + 1
if not in_change:
# Start of a new change group
start_idx = max(0, i - context_lines)
# Connect nearby groups or start new group
if start_idx <= last_change_idx + context_lines:
start_idx = last_change_idx + 1
else:
if current_group:
changes.append(current_group)
current_group = []
# Add previous context
current_group.extend(
l for l in diff[start:i] if not l.startswith("? ")
)
# Add leading context
current_group.extend(diff_lines[start_idx:i])
current_group.append(line)
in_change = True
last_change_line = i
else:
if in_change:
# Add following context
following_context = list(
islice(
(l for l in diff[i:] if not l.startswith("? ")), context_lines
)
last_change_idx = i
elif in_change:
# Add trailing context
following_context = list(
islice(
(l for l in diff_lines[i:] if l.change_type == " "), context_lines
)
if following_context: # Only extend if we have context to add
current_group.extend(following_context)
in_change = False
)
current_group.extend(following_context)
in_change = False
if current_group:
changes.append(current_group)
# Format and display the changes
formatted_output = []
return changes
def get_changes(
original: str, processed: str
) -> tuple[Dict[str, int], List[List[DiffLine]]]:
"""Generate diff information and statistics."""
# Get basic statistics
stats = calculate_stats(original, processed)
# Create and process diff
differ = Differ()
diff_output = list(differ.compare(original.splitlines(), processed.splitlines()))
diff_lines = create_diff_lines(diff_output)
grouped_changes = group_changes(diff_lines)
return vars(stats), grouped_changes
@dataclass
class ChangeGroup:
orig_no: int
proc_no: int
change_type: str
content: str
def format_line_info(self) -> str:
"""Format the line numbers and separator based on change type."""
if self.change_type == " ":
return f"[bright_black]{self.orig_no:4d}{self.proc_no:4d}│[/bright_black]"
elif self.change_type == "- ":
return f"[bright_black]{self.orig_no:4d}│ │[/bright_black]"
else: # "+" case
return f"[bright_black] │{self.proc_no:4d}│[/bright_black]"
def format_content(self) -> str:
"""Format the content based on change type."""
if self.change_type == " ":
return f"[white]{self.content}[/white]"
elif self.change_type == "- ":
return f"[red]- {self.content}[/red]"
else: # "+" case
return f"[green]+ {self.content}[/green]"
def create_stats_panel(stats: dict) -> Panel:
"""Create a formatted statistics panel."""
stats_content = (
f"Original lines: {stats['original_lines']}\n"
f"Processed lines: {stats['processed_lines']}\n"
f"Difference: {stats['difference']:+d} lines"
)
return Panel(
stats_content,
title="Statistics",
border_style="blue",
)
def create_separator(prev_group: List[tuple], current_group: List[tuple]) -> Rule:
"""Create a separator between change groups with skip line information."""
if not prev_group:
return None
last_orig = max(l[0] for l in prev_group if l[0] > 0)
next_orig = min(l[0] for l in current_group if l[0] > 0)
skipped_lines = next_orig - last_orig - 1
if skipped_lines > 0:
return Rule(
f" {skipped_lines} lines skipped ",
style="bright_black",
characters="",
)
return Rule(style="bright_black", characters="")
def print_change_group(group: List[tuple]) -> None:
"""Print a group of changes with formatting."""
for orig_no, proc_no, change_type, content in group:
change = ChangeGroup(orig_no, proc_no, change_type, content)
line_info = change.format_line_info()
content_formatted = change.format_content()
console.print(f"{line_info} {content_formatted}")
def preview_changes(original: str, processed: str) -> None:
"""Show a preview of the changes made."""
console.print("\n[yellow]Preview of changes:[/yellow]")
# Get diff information and show statistics
stats, changes = get_changes(original, processed)
console.print(create_stats_panel(stats))
# Print changes with separators between groups
for i, group in enumerate(changes):
if i > 0:
formatted_output.append(
"[bright_black]⋮ skipped unchanged content ⋮[/bright_black]"
)
separator = create_separator(changes[i - 1], group)
if separator:
console.print(separator)
# Track the last line to avoid duplicates
last_line = None
for line in group:
# Skip if this line is the same as the last one
if line == last_line:
continue
if line.startswith(" "): # unchanged
formatted_output.append(f"[white]{line[2:]}[/white]")
elif line.startswith("- "): # removed
formatted_output.append(f"[red]━ {line[2:]}[/red]")
elif line.startswith("+ "): # added
formatted_output.append(f"[green]+ {line[2:]}[/green]")
last_line = line
console.print(
Panel(
"\n".join(formatted_output),
title="Changes with Context",
border_style="yellow",
)
)
print_change_group(group)
def process_readme(