mirror of
https://github.com/Instagram/LibCST.git
synced 2025-12-23 10:35:53 +00:00
209 lines
8.2 KiB
Python
209 lines
8.2 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
#
|
|
# This source code is licensed under the MIT license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import List, Optional, Sequence
|
|
|
|
from libcst import BaseStatement, CSTNode, Module
|
|
from libcst._add_slots import add_slots
|
|
from libcst._nodes.internal import CodegenState
|
|
from libcst.metadata import BaseMetadataProvider
|
|
|
|
|
|
class CodegenPartial:
|
|
"""
|
|
Provided by :class:`ExperimentalReentrantCodegenProvider`.
|
|
|
|
Stores enough information to generate either a small patch
|
|
(:meth:`get_modified_code_range`) or a new file (:meth:`get_modified_code`) by
|
|
replacing the old node at this position.
|
|
"""
|
|
|
|
__slots__ = [
|
|
"start_offset",
|
|
"end_offset",
|
|
"has_trailing_newline",
|
|
"_indent_tokens",
|
|
"_prev_codegen_state",
|
|
]
|
|
|
|
def __init__(self, state: "_ReentrantCodegenState") -> None:
|
|
# store a frozen copy of these values, since they change over time
|
|
self.start_offset: int = state.start_offset_stack[-1]
|
|
self.end_offset: int = state.char_offset
|
|
self.has_trailing_newline: bool = True # this may get updated to False later
|
|
self._indent_tokens: Sequence[str] = tuple(state.indent_tokens)
|
|
# everything else can be accessed from the codegen state object
|
|
self._prev_codegen_state: _ReentrantCodegenState = state
|
|
|
|
def get_original_module_code(self) -> str:
|
|
"""
|
|
Equivalent to :meth:`libcst.Module.bytes` on the top-level module that contains
|
|
this statement, except that it uses the cached result from our previous code
|
|
generation pass, so it's faster.
|
|
"""
|
|
return self._prev_codegen_state.get_code()
|
|
|
|
def get_original_module_bytes(self) -> bytes:
|
|
"""
|
|
Equivalent to :meth:`libcst.Module.bytes` on the top-level module that contains
|
|
this statement, except that it uses the cached result from our previous code
|
|
generation pass, so it's faster.
|
|
"""
|
|
return self.get_original_module_code().encode(self._prev_codegen_state.encoding)
|
|
|
|
def get_original_statement_code(self) -> str:
|
|
"""
|
|
Equivalent to :meth:`libcst.Module.code_for_node` on the current statement,
|
|
except that it uses the cached result from our previous code generation pass,
|
|
so it's faster.
|
|
"""
|
|
return self._prev_codegen_state.get_code()[self.start_offset : self.end_offset]
|
|
|
|
def get_modified_statement_code(self, node: BaseStatement) -> str:
|
|
"""
|
|
Gets the new code for ``node`` as if it were in same location as the old
|
|
statement being replaced. This means that it inherits details like the old
|
|
statement's indentation.
|
|
"""
|
|
new_codegen_state = CodegenState(
|
|
default_indent=self._prev_codegen_state.default_indent,
|
|
default_newline=self._prev_codegen_state.default_newline,
|
|
indent_tokens=list(self._indent_tokens),
|
|
)
|
|
node._codegen(new_codegen_state)
|
|
if not self.has_trailing_newline:
|
|
new_codegen_state.pop_trailing_newline()
|
|
return "".join(new_codegen_state.tokens)
|
|
|
|
def get_modified_module_code(self, node: BaseStatement) -> str:
|
|
"""
|
|
Gets the new code for the module at the root of this statement's tree, but with
|
|
the supplied replacement ``node`` in its place.
|
|
"""
|
|
original = self.get_original_module_code()
|
|
patch = self.get_modified_statement_code(node)
|
|
return f"{original[:self.start_offset]}{patch}{original[self.end_offset:]}"
|
|
|
|
def get_modified_module_bytes(self, node: BaseStatement) -> bytes:
|
|
"""
|
|
Gets the new bytes for the module at the root of this statement's tree, but with
|
|
the supplied replacement ``node`` in its place.
|
|
"""
|
|
return self.get_modified_module_code(node).encode(
|
|
self._prev_codegen_state.encoding
|
|
)
|
|
|
|
|
|
@add_slots
|
|
@dataclass(frozen=False)
|
|
class _ReentrantCodegenState(CodegenState):
|
|
provider: BaseMetadataProvider[CodegenPartial]
|
|
encoding: str = "utf-8"
|
|
indent_size: int = 0
|
|
char_offset: int = 0
|
|
start_offset_stack: List[int] = field(default_factory=list)
|
|
cached_code: Optional[str] = None
|
|
trailing_partials: List[CodegenPartial] = field(default_factory=list)
|
|
|
|
def increase_indent(self, value: str) -> None:
|
|
super(_ReentrantCodegenState, self).increase_indent(value)
|
|
self.indent_size += len(value)
|
|
|
|
def decrease_indent(self) -> None:
|
|
self.indent_size -= len(self.indent_tokens[-1])
|
|
super(_ReentrantCodegenState, self).decrease_indent()
|
|
|
|
def add_indent_tokens(self) -> None:
|
|
super(_ReentrantCodegenState, self).add_indent_tokens()
|
|
self.char_offset += self.indent_size
|
|
|
|
def add_token(self, value: str) -> None:
|
|
super(_ReentrantCodegenState, self).add_token(value)
|
|
self.char_offset += len(value)
|
|
self.trailing_partials.clear()
|
|
|
|
def before_codegen(self, node: CSTNode) -> None:
|
|
if not isinstance(node, BaseStatement):
|
|
return
|
|
|
|
self.start_offset_stack.append(self.char_offset)
|
|
|
|
def after_codegen(self, node: CSTNode) -> None:
|
|
if not isinstance(node, BaseStatement):
|
|
return
|
|
|
|
partial = CodegenPartial(self)
|
|
self.provider.set_metadata(node, partial)
|
|
self.start_offset_stack.pop()
|
|
self.trailing_partials.append(partial)
|
|
|
|
def pop_trailing_newline(self) -> None:
|
|
"""
|
|
:class:`libcst.Module` contains a hack where it removes the last token (a
|
|
newline) if the original file didn't have a newline.
|
|
|
|
If this happens, we need to go back through every node at the end of the file,
|
|
and fix their `end_offset`.
|
|
"""
|
|
for tp in self.trailing_partials:
|
|
tp.end_offset -= len(self.tokens[-1])
|
|
tp.has_trailing_newline = False
|
|
super(_ReentrantCodegenState, self).pop_trailing_newline()
|
|
|
|
def get_code(self) -> str:
|
|
# Ideally this would use functools.cached_property, but that's only in
|
|
# Python 3.8+.
|
|
#
|
|
# This is a little ugly to make pyre's attribute refinement checks happy.
|
|
cached_code = self.cached_code
|
|
if cached_code is not None:
|
|
return cached_code
|
|
cached_code = "".join(self.tokens)
|
|
self.cached_code = cached_code
|
|
return cached_code
|
|
|
|
|
|
class ExperimentalReentrantCodegenProvider(BaseMetadataProvider[CodegenPartial]):
|
|
"""
|
|
An experimental API that allows fast generation of modified code by recording an
|
|
initial code-generation pass, and incrementally applying updates. It is a
|
|
performance optimization for a few niche use-cases and is not user-friendly.
|
|
|
|
**This API may change at any time without warning (including in minor releases).**
|
|
|
|
This is rarely useful. Instead you should make multiple modifications to a single
|
|
syntax tree, and generate the code once. However, we can think of a few use-cases
|
|
for this API (hence, why it exists):
|
|
|
|
- When linting a file, you might generate multiple independent patches that a user
|
|
can accept or reject. Depending on your architecture, it may be advantageous to
|
|
avoid regenerating the file when computing each patch.
|
|
|
|
- You might want to call out to an external utility (e.g. a typechecker, such as
|
|
pyre or mypy) to validate a small change. You may need to generate and test lots
|
|
of these patches.
|
|
|
|
Restrictions:
|
|
|
|
- For safety and sanity reasons, the smallest/only level of granularity is a
|
|
statement. If you need to patch part of a statement, you regenerate the entire
|
|
statement. If you need to regenerate an entire module, just call
|
|
:meth:`libcst.Module.code`.
|
|
|
|
- This does not (currently) operate recursively. You can patch an unpatched piece
|
|
of code multiple times, but you can't layer additional patches on an already
|
|
patched piece of code.
|
|
"""
|
|
|
|
def _gen_impl(self, module: Module) -> None:
|
|
state = _ReentrantCodegenState(
|
|
default_indent=module.default_indent,
|
|
default_newline=module.default_newline,
|
|
provider=self,
|
|
encoding=module.encoding,
|
|
)
|
|
module._codegen(state)
|