LibCST/libcst/_parser/py_whitespace_parser.py
2022-02-01 11:13:17 +00:00

260 lines
9.2 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from typing import List, Optional, Sequence, Tuple, Union
from libcst._nodes.whitespace import (
Comment,
COMMENT_RE,
EmptyLine,
Newline,
NEWLINE_RE,
ParenthesizedWhitespace,
SIMPLE_WHITESPACE_RE,
SimpleWhitespace,
TrailingWhitespace,
)
from libcst._parser.types.config import BaseWhitespaceParserConfig
from libcst._parser.types.whitespace_state import WhitespaceState as State
# BEGIN PARSER ENTRYPOINTS
def parse_simple_whitespace(
config: BaseWhitespaceParserConfig, state: State
) -> SimpleWhitespace:
# The match never fails because the pattern can match an empty string
lines = config.lines
# pyre-fixme[16]: Optional type has no attribute `group`.
ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group(0)
ws_line_list = [ws_line]
while "\\" in ws_line:
# continuation character
state.line += 1
state.column = 0
ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1], state.column).group(
0
)
ws_line_list.append(ws_line)
# TODO: we could special-case the common case where there's no continuation
# character to avoid list construction and joining.
# once we've finished collecting continuation characters
state.column += len(ws_line)
return SimpleWhitespace("".join(ws_line_list))
def parse_empty_lines(
config: BaseWhitespaceParserConfig,
state: State,
*,
override_absolute_indent: Optional[str] = None,
) -> Sequence[EmptyLine]:
# If override_absolute_indent is true, then we need to parse all lines up
# to and including the last line that is indented at our level. These all
# belong to the footer and not to the next line's leading_lines. All lines
# that have indent=False and come after the last line where indent=True
# do not belong to this node.
state_for_line = State(
state.line, state.column, state.absolute_indent, state.is_parenthesized
)
lines: List[Tuple[State, EmptyLine]] = []
while True:
el = _parse_empty_line(
config, state_for_line, override_absolute_indent=override_absolute_indent
)
if el is None:
break
# Store the updated state with the element we parsed. Then make a new state
# clone for the next element.
lines.append((state_for_line, el))
state_for_line = State(
state_for_line.line,
state_for_line.column,
state.absolute_indent,
state.is_parenthesized,
)
if override_absolute_indent is not None:
# We need to find the last element that is indented, and then split the list
# at that point.
for i in range(len(lines) - 1, -1, -1):
if lines[i][1].indent:
lines = lines[: (i + 1)]
break
else:
# We didn't find any lines, throw them all away
lines = []
if lines:
# Update the state line and column to match the last line actually parsed.
final_state: State = lines[-1][0]
state.line = final_state.line
state.column = final_state.column
return [r[1] for r in lines]
def parse_trailing_whitespace(
config: BaseWhitespaceParserConfig, state: State
) -> TrailingWhitespace:
trailing_whitespace = _parse_trailing_whitespace(config, state)
if trailing_whitespace is None:
raise Exception(
"Internal Error: Failed to parse TrailingWhitespace. This should never "
+ "happen because a TrailingWhitespace is never optional in the grammar, "
+ "so this error should've been caught by parso first."
)
return trailing_whitespace
def parse_parenthesizable_whitespace(
config: BaseWhitespaceParserConfig, state: State
) -> Union[SimpleWhitespace, ParenthesizedWhitespace]:
if state.is_parenthesized:
# First, try parenthesized (don't need speculation because it either
# parses or doesn't modify state).
parenthesized_whitespace = _parse_parenthesized_whitespace(config, state)
if parenthesized_whitespace is not None:
return parenthesized_whitespace
# Now, just parse and return a simple whitespace
return parse_simple_whitespace(config, state)
# END PARSER ENTRYPOINTS
# BEGIN PARSER INTERNAL PRODUCTIONS
def _parse_empty_line(
config: BaseWhitespaceParserConfig,
state: State,
*,
override_absolute_indent: Optional[str] = None,
) -> Optional[EmptyLine]:
# begin speculative parsing
speculative_state = State(
state.line, state.column, state.absolute_indent, state.is_parenthesized
)
try:
indent = _parse_indent(
config, speculative_state, override_absolute_indent=override_absolute_indent
)
except Exception:
# We aren't on a new line, speculative parsing failed
return None
whitespace = parse_simple_whitespace(config, speculative_state)
comment = _parse_comment(config, speculative_state)
newline = _parse_newline(config, speculative_state)
if newline is None:
# speculative parsing failed
return None
# speculative parsing succeeded
state.line = speculative_state.line
state.column = speculative_state.column
# don't need to copy absolute_indent/is_parenthesized because they don't change.
return EmptyLine(indent, whitespace, comment, newline)
def _parse_indent(
config: BaseWhitespaceParserConfig,
state: State,
*,
override_absolute_indent: Optional[str] = None,
) -> bool:
"""
Returns True if indentation was found, otherwise False.
"""
absolute_indent = (
override_absolute_indent
if override_absolute_indent is not None
else state.absolute_indent
)
line_str = config.lines[state.line - 1]
if state.column != 0:
if state.column == len(line_str) and state.line == len(config.lines):
# We're at EOF, treat this as a failed speculative parse
return False
raise Exception("Internal Error: Column should be 0 when parsing an indent.")
if line_str.startswith(absolute_indent, state.column):
state.column += len(absolute_indent)
return True
return False
def _parse_comment(
config: BaseWhitespaceParserConfig, state: State
) -> Optional[Comment]:
comment_match = COMMENT_RE.match(config.lines[state.line - 1], state.column)
if comment_match is None:
return None
comment = comment_match.group(0)
state.column += len(comment)
return Comment(comment)
def _parse_newline(
config: BaseWhitespaceParserConfig, state: State
) -> Optional[Newline]:
# begin speculative parsing
line_str = config.lines[state.line - 1]
newline_match = NEWLINE_RE.match(line_str, state.column)
if newline_match is not None:
# speculative parsing succeeded
newline_str = newline_match.group(0)
state.column += len(newline_str)
if state.column != len(line_str):
raise Exception("Internal Error: Found a newline, but it wasn't the EOL.")
if state.line < len(config.lines):
# this newline was the end of a line, and there's another line,
# therefore we should move to the next line
state.line += 1
state.column = 0
if newline_str == config.default_newline:
# Just inherit it from the Module instead of explicitly setting it.
return Newline()
else:
return Newline(newline_str)
else: # no newline was found, speculative parsing failed
return None
def _parse_trailing_whitespace(
config: BaseWhitespaceParserConfig, state: State
) -> Optional[TrailingWhitespace]:
# Begin speculative parsing
speculative_state = State(
state.line, state.column, state.absolute_indent, state.is_parenthesized
)
whitespace = parse_simple_whitespace(config, speculative_state)
comment = _parse_comment(config, speculative_state)
newline = _parse_newline(config, speculative_state)
if newline is None:
# Speculative parsing failed
return None
# Speculative parsing succeeded
state.line = speculative_state.line
state.column = speculative_state.column
# don't need to copy absolute_indent/is_parenthesized because they don't change.
return TrailingWhitespace(whitespace, comment, newline)
def _parse_parenthesized_whitespace(
config: BaseWhitespaceParserConfig, state: State
) -> Optional[ParenthesizedWhitespace]:
first_line = _parse_trailing_whitespace(config, state)
if first_line is None:
# Speculative parsing failed
return None
empty_lines = ()
while True:
empty_line = _parse_empty_line(config, state)
if empty_line is None:
# This isn't an empty line, so parse it below
break
empty_lines = empty_lines + (empty_line,)
indent = _parse_indent(config, state)
last_line = parse_simple_whitespace(config, state)
return ParenthesizedWhitespace(first_line, empty_lines, indent, last_line)