mirror of
https://github.com/Instagram/LibCST.git
synced 2025-12-23 10:35:53 +00:00
111 lines
3.5 KiB
Python
111 lines
3.5 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
#
|
|
# This source code is licensed under the MIT license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
|
|
from contextlib import contextmanager
|
|
from dataclasses import dataclass, field
|
|
from typing import Callable, Iterator, List, Optional
|
|
|
|
from libcst import CSTNode, Module
|
|
from libcst._nodes.internal import CodegenState
|
|
from libcst.metadata.base_provider import BaseMetadataProvider
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class CodeSpan:
|
|
"""
|
|
Represents the position of a piece of code by its starting position and length.
|
|
|
|
Note: This class does not specify the unit of distance - it can be bytes,
|
|
Unicode characters, or something else entirely.
|
|
"""
|
|
|
|
#: Offset of the code from the beginning of the file. Can be 0.
|
|
start: int
|
|
#: Length of the span
|
|
length: int
|
|
|
|
|
|
@dataclass(frozen=False)
|
|
class SpanProvidingCodegenState(CodegenState):
|
|
provider: BaseMetadataProvider[CodeSpan]
|
|
get_length: Optional[Callable[[str], int]] = None
|
|
position: int = 0
|
|
_stack: List[int] = field(default_factory=list)
|
|
|
|
def add_indent_tokens(self) -> None:
|
|
super().add_indent_tokens()
|
|
for token in self.indent_tokens:
|
|
self._update_position(token)
|
|
|
|
def add_token(self, value: str) -> None:
|
|
super().add_token(value)
|
|
self._update_position(value)
|
|
|
|
def _update_position(self, value: str) -> None:
|
|
get_length = self.get_length or len
|
|
self.position += get_length(value)
|
|
|
|
def before_codegen(self, node: CSTNode) -> None:
|
|
self._stack.append(self.position)
|
|
|
|
def after_codegen(self, node: CSTNode) -> None:
|
|
start = self._stack.pop()
|
|
|
|
if node not in self.provider._computed:
|
|
end = self.position
|
|
self.provider._computed[node] = CodeSpan(start, length=end - start)
|
|
|
|
@contextmanager
|
|
def record_syntactic_position(
|
|
self,
|
|
node: CSTNode,
|
|
*,
|
|
start_node: Optional[CSTNode] = None,
|
|
end_node: Optional[CSTNode] = None,
|
|
) -> Iterator[None]:
|
|
start = self.position
|
|
try:
|
|
yield
|
|
finally:
|
|
end = self.position
|
|
start = (
|
|
self.provider._computed[start_node].start
|
|
if start_node is not None
|
|
else start
|
|
)
|
|
if end_node is not None:
|
|
end_span = self.provider._computed[end_node]
|
|
length = (end_span.start + end_span.length) - start
|
|
else:
|
|
length = end - start
|
|
self.provider._computed[node] = CodeSpan(start, length=length)
|
|
|
|
|
|
def byte_length_in_utf8(value: str) -> int:
|
|
return len(value.encode("utf8"))
|
|
|
|
|
|
class ByteSpanPositionProvider(BaseMetadataProvider[CodeSpan]):
|
|
"""
|
|
Generates offset and length metadata for nodes' positions.
|
|
|
|
For each :class:`CSTNode` this provider generates a :class:`CodeSpan` that
|
|
contains the byte-offset of the node from the start of the file, and its
|
|
length (also in bytes). The whitespace owned by the node is not included in
|
|
this length.
|
|
|
|
Note: offset and length measure bytes, not characters (which is significant for
|
|
example in the case of Unicode characters encoded in more than one byte)
|
|
"""
|
|
|
|
def _gen_impl(self, module: Module) -> None:
|
|
state = SpanProvidingCodegenState(
|
|
default_indent=module.default_indent,
|
|
default_newline=module.default_newline,
|
|
provider=self,
|
|
get_length=byte_length_in_utf8,
|
|
)
|
|
module._codegen(state)
|