mirror of
https://github.com/Instagram/LibCST.git
synced 2025-12-23 10:35:53 +00:00
260 lines
9.2 KiB
Python
260 lines
9.2 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
#
|
|
# This source code is licensed under the MIT license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
from typing import List, Optional, Sequence, Tuple, Union
|
|
|
|
from libcst._nodes.whitespace import (
|
|
Comment,
|
|
COMMENT_RE,
|
|
EmptyLine,
|
|
Newline,
|
|
NEWLINE_RE,
|
|
ParenthesizedWhitespace,
|
|
SIMPLE_WHITESPACE_RE,
|
|
SimpleWhitespace,
|
|
TrailingWhitespace,
|
|
)
|
|
from libcst._parser.types.config import BaseWhitespaceParserConfig
|
|
from libcst._parser.types.whitespace_state import WhitespaceState as State
|
|
|
|
# BEGIN PARSER ENTRYPOINTS
|
|
|
|
|
|
def parse_simple_whitespace(
|
|
config: BaseWhitespaceParserConfig, state: State
|
|
) -> SimpleWhitespace:
|
|
# The match never fails because the pattern can match an empty string
|
|
lines = config.lines
|
|
# pyre-fixme[16]: Optional type has no attribute `group`.
|
|
ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group(0)
|
|
ws_line_list = [ws_line]
|
|
while "\\" in ws_line:
|
|
# continuation character
|
|
state.line += 1
|
|
state.column = 0
|
|
ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group(
|
|
0
|
|
)
|
|
ws_line_list.append(ws_line)
|
|
|
|
# TODO: we could special-case the common case where there's no continuation
|
|
# character to avoid list construction and joining.
|
|
|
|
# once we've finished collecting continuation characters
|
|
state.column += len(ws_line)
|
|
return SimpleWhitespace("".join(ws_line_list))
|
|
|
|
|
|
def parse_empty_lines(
|
|
config: BaseWhitespaceParserConfig,
|
|
state: State,
|
|
*,
|
|
override_absolute_indent: Optional[str] = None,
|
|
) -> Sequence[EmptyLine]:
|
|
# If override_absolute_indent is true, then we need to parse all lines up
|
|
# to and including the last line that is indented at our level. These all
|
|
# belong to the footer and not to the next line's leading_lines. All lines
|
|
# that have indent=False and come after the last line where indent=True
|
|
# do not belong to this node.
|
|
state_for_line = State(
|
|
state.line, state.column, state.absolute_indent, state.is_parenthesized
|
|
)
|
|
lines: List[Tuple[State, EmptyLine]] = []
|
|
while True:
|
|
el = _parse_empty_line(
|
|
config, state_for_line, override_absolute_indent=override_absolute_indent
|
|
)
|
|
if el is None:
|
|
break
|
|
|
|
# Store the updated state with the element we parsed. Then make a new state
|
|
# clone for the next element.
|
|
lines.append((state_for_line, el))
|
|
state_for_line = State(
|
|
state_for_line.line,
|
|
state_for_line.column,
|
|
state.absolute_indent,
|
|
state.is_parenthesized,
|
|
)
|
|
|
|
if override_absolute_indent is not None:
|
|
# We need to find the last element that is indented, and then split the list
|
|
# at that point.
|
|
for i in range(len(lines) - 1, -1, -1):
|
|
if lines[i][1].indent:
|
|
lines = lines[: (i + 1)]
|
|
break
|
|
else:
|
|
# We didn't find any lines, throw them all away
|
|
lines = []
|
|
|
|
if lines:
|
|
# Update the state line and column to match the last line actually parsed.
|
|
final_state: State = lines[-1][0]
|
|
state.line = final_state.line
|
|
state.column = final_state.column
|
|
return [r[1] for r in lines]
|
|
|
|
|
|
def parse_trailing_whitespace(
|
|
config: BaseWhitespaceParserConfig, state: State
|
|
) -> TrailingWhitespace:
|
|
trailing_whitespace = _parse_trailing_whitespace(config, state)
|
|
if trailing_whitespace is None:
|
|
raise Exception(
|
|
"Internal Error: Failed to parse TrailingWhitespace. This should never "
|
|
+ "happen because a TrailingWhitespace is never optional in the grammar, "
|
|
+ "so this error should've been caught by parso first."
|
|
)
|
|
return trailing_whitespace
|
|
|
|
|
|
def parse_parenthesizable_whitespace(
|
|
config: BaseWhitespaceParserConfig, state: State
|
|
) -> Union[SimpleWhitespace, ParenthesizedWhitespace]:
|
|
if state.is_parenthesized:
|
|
# First, try parenthesized (don't need speculation because it either
|
|
# parses or doesn't modify state).
|
|
parenthesized_whitespace = _parse_parenthesized_whitespace(config, state)
|
|
if parenthesized_whitespace is not None:
|
|
return parenthesized_whitespace
|
|
# Now, just parse and return a simple whitespace
|
|
return parse_simple_whitespace(config, state)
|
|
|
|
|
|
# END PARSER ENTRYPOINTS
|
|
# BEGIN PARSER INTERNAL PRODUCTIONS
|
|
|
|
|
|
def _parse_empty_line(
|
|
config: BaseWhitespaceParserConfig,
|
|
state: State,
|
|
*,
|
|
override_absolute_indent: Optional[str] = None,
|
|
) -> Optional[EmptyLine]:
|
|
# begin speculative parsing
|
|
speculative_state = State(
|
|
state.line, state.column, state.absolute_indent, state.is_parenthesized
|
|
)
|
|
try:
|
|
indent = _parse_indent(
|
|
config, speculative_state, override_absolute_indent=override_absolute_indent
|
|
)
|
|
except Exception:
|
|
# We aren't on a new line, speculative parsing failed
|
|
return None
|
|
whitespace = parse_simple_whitespace(config, speculative_state)
|
|
comment = _parse_comment(config, speculative_state)
|
|
newline = _parse_newline(config, speculative_state)
|
|
if newline is None:
|
|
# speculative parsing failed
|
|
return None
|
|
# speculative parsing succeeded
|
|
state.line = speculative_state.line
|
|
state.column = speculative_state.column
|
|
# don't need to copy absolute_indent/is_parenthesized because they don't change.
|
|
return EmptyLine(indent, whitespace, comment, newline)
|
|
|
|
|
|
def _parse_indent(
|
|
config: BaseWhitespaceParserConfig,
|
|
state: State,
|
|
*,
|
|
override_absolute_indent: Optional[str] = None,
|
|
) -> bool:
|
|
"""
|
|
Returns True if indentation was found, otherwise False.
|
|
"""
|
|
absolute_indent = (
|
|
override_absolute_indent
|
|
if override_absolute_indent is not None
|
|
else state.absolute_indent
|
|
)
|
|
line_str = config.lines[state.line - 1]
|
|
if state.column != 0:
|
|
if state.column == len(line_str) and state.line == len(config.lines):
|
|
# We're at EOF, treat this as a failed speculative parse
|
|
return False
|
|
raise Exception("Internal Error: Column should be 0 when parsing an indent.")
|
|
if line_str.startswith(absolute_indent, state.column):
|
|
state.column += len(absolute_indent)
|
|
return True
|
|
return False
|
|
|
|
|
|
def _parse_comment(
|
|
config: BaseWhitespaceParserConfig, state: State
|
|
) -> Optional[Comment]:
|
|
comment_match = COMMENT_RE.match(config.lines[state.line - 1], state.column)
|
|
if comment_match is None:
|
|
return None
|
|
comment = comment_match.group(0)
|
|
state.column += len(comment)
|
|
return Comment(comment)
|
|
|
|
|
|
def _parse_newline(
|
|
config: BaseWhitespaceParserConfig, state: State
|
|
) -> Optional[Newline]:
|
|
# begin speculative parsing
|
|
line_str = config.lines[state.line - 1]
|
|
newline_match = NEWLINE_RE.match(line_str, state.column)
|
|
if newline_match is not None:
|
|
# speculative parsing succeeded
|
|
newline_str = newline_match.group(0)
|
|
state.column += len(newline_str)
|
|
if state.column != len(line_str):
|
|
raise Exception("Internal Error: Found a newline, but it wasn't the EOL.")
|
|
if state.line < len(config.lines):
|
|
# this newline was the end of a line, and there's another line,
|
|
# therefore we should move to the next line
|
|
state.line += 1
|
|
state.column = 0
|
|
if newline_str == config.default_newline:
|
|
# Just inherit it from the Module instead of explicitly setting it.
|
|
return Newline()
|
|
else:
|
|
return Newline(newline_str)
|
|
else: # no newline was found, speculative parsing failed
|
|
return None
|
|
|
|
|
|
def _parse_trailing_whitespace(
|
|
config: BaseWhitespaceParserConfig, state: State
|
|
) -> Optional[TrailingWhitespace]:
|
|
# Begin speculative parsing
|
|
speculative_state = State(
|
|
state.line, state.column, state.absolute_indent, state.is_parenthesized
|
|
)
|
|
whitespace = parse_simple_whitespace(config, speculative_state)
|
|
comment = _parse_comment(config, speculative_state)
|
|
newline = _parse_newline(config, speculative_state)
|
|
if newline is None:
|
|
# Speculative parsing failed
|
|
return None
|
|
# Speculative parsing succeeded
|
|
state.line = speculative_state.line
|
|
state.column = speculative_state.column
|
|
# don't need to copy absolute_indent/is_parenthesized because they don't change.
|
|
return TrailingWhitespace(whitespace, comment, newline)
|
|
|
|
|
|
def _parse_parenthesized_whitespace(
|
|
config: BaseWhitespaceParserConfig, state: State
|
|
) -> Optional[ParenthesizedWhitespace]:
|
|
first_line = _parse_trailing_whitespace(config, state)
|
|
if first_line is None:
|
|
# Speculative parsing failed
|
|
return None
|
|
empty_lines = ()
|
|
while True:
|
|
empty_line = _parse_empty_line(config, state)
|
|
if empty_line is None:
|
|
# This isn't an empty line, so parse it below
|
|
break
|
|
empty_lines = empty_lines + (empty_line,)
|
|
indent = _parse_indent(config, state)
|
|
last_line = parse_simple_whitespace(config, state)
|
|
return ParenthesizedWhitespace(first_line, empty_lines, indent, last_line)
|