mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 11:49:12 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			128 lines
		
	
	
	
		
			2.8 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			128 lines
		
	
	
	
		
			2.8 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
# Module 'parser'
 | 
						|
#
 | 
						|
# Parse S-expressions output by the Panel Editor
 | 
						|
# (which is written in Scheme so it can't help writing S-expressions).
 | 
						|
#
 | 
						|
# See notes at end of file.
 | 
						|
 | 
						|
 | 
						|
whitespace = ' \t\n'
 | 
						|
operators = '()\''
 | 
						|
separators = operators + whitespace + ';' + '"'
 | 
						|
 | 
						|
 | 
						|
# Tokenize a string.
 | 
						|
# Return a list of tokens (strings).
 | 
						|
#
 | 
						|
def tokenize_string(s):
 | 
						|
	tokens = []
 | 
						|
	while s:
 | 
						|
		c = s[:1]
 | 
						|
		if c in whitespace:
 | 
						|
			s = s[1:]
 | 
						|
		elif c = ';':
 | 
						|
			s = ''
 | 
						|
		elif c = '"':
 | 
						|
			n = len(s)
 | 
						|
			i = 1
 | 
						|
			while i < n:
 | 
						|
				c = s[i]
 | 
						|
				i = i+1
 | 
						|
				if c = '"': break
 | 
						|
				if c = '\\': i = i+1
 | 
						|
			tokens.append(s[:i])
 | 
						|
			s = s[i:]
 | 
						|
		elif c in operators:
 | 
						|
			tokens.append(c)
 | 
						|
			s = s[1:]
 | 
						|
		else:
 | 
						|
			n = len(s)
 | 
						|
			i = 1
 | 
						|
			while i < n:
 | 
						|
				if s[i] in separators: break
 | 
						|
				i = i+1
 | 
						|
			tokens.append(s[:i])
 | 
						|
			s = s[i:]
 | 
						|
	return tokens
 | 
						|
 | 
						|
 | 
						|
# Tokenize a whole file (given as file object, not as file name).
 | 
						|
# Return a list of tokens (strings).
 | 
						|
#
 | 
						|
def tokenize_file(fp):
 | 
						|
	tokens = []
 | 
						|
	while 1:
 | 
						|
		line = fp.readline()
 | 
						|
		if not line: break
 | 
						|
		tokens = tokens + tokenize_string(line)
 | 
						|
	return tokens
 | 
						|
 | 
						|
 | 
						|
# Exception raised by parse_exr.
 | 
						|
#
 | 
						|
syntax_error = 'syntax error'
 | 
						|
 | 
						|
 | 
						|
# Parse an S-expression.
 | 
						|
# Input is a list of tokens as returned by tokenize_*().
 | 
						|
# Return a pair (expr, tokens)
 | 
						|
# where expr is a list representing the s-expression,
 | 
						|
# and tokens contains the remaining tokens.
 | 
						|
# May raise syntax_error.
 | 
						|
#
 | 
						|
def parse_expr(tokens):
 | 
						|
	if (not tokens) or tokens[0] <> '(':
 | 
						|
		raise syntax_error, 'expected "("'
 | 
						|
	tokens = tokens[1:]
 | 
						|
	expr = []
 | 
						|
	while 1:
 | 
						|
		if not tokens:
 | 
						|
			raise syntax_error, 'missing ")"'
 | 
						|
		if tokens[0] = ')':
 | 
						|
			return expr, tokens[1:]
 | 
						|
		elif tokens[0] = '(':
 | 
						|
			subexpr, tokens = parse_expr(tokens)
 | 
						|
			expr.append(subexpr)
 | 
						|
		else:
 | 
						|
			expr.append(tokens[0])
 | 
						|
			tokens = tokens[1:]
 | 
						|
 | 
						|
 | 
						|
# Parse a file (given as file object, not as file name).
 | 
						|
# Return a list of parsed S-expressions found at the top level.
 | 
						|
#
 | 
						|
def parse_file(fp):
 | 
						|
	tokens = tokenize_file(fp)
 | 
						|
	exprlist = []
 | 
						|
	while tokens:
 | 
						|
		expr, tokens = parse_expr(tokens)
 | 
						|
		exprlist.append(expr)
 | 
						|
	return exprlist
 | 
						|
 | 
						|
 | 
						|
# EXAMPLE:
 | 
						|
#
 | 
						|
# The input
 | 
						|
#	'(hip (hop hur-ray))'
 | 
						|
#
 | 
						|
# passed to tokenize_string() returns the token list
 | 
						|
#	['(', 'hip', '(', 'hop', 'hur-ray', ')', ')']
 | 
						|
#
 | 
						|
# When this is passed to parse_expr() it returns the expression
 | 
						|
#	['hip', ['hop', 'hur-ray']]
 | 
						|
# plus an empty token list (because there are no tokens left.
 | 
						|
#
 | 
						|
# When a file containing the example is passed to parse_file() it returns
 | 
						|
# a list whose only element is the output of parse_expr() above:
 | 
						|
#	[['hip', ['hop', 'hur-ray']]]
 | 
						|
 | 
						|
 | 
						|
# TOKENIZING:
 | 
						|
#
 | 
						|
# Comments start with semicolon (;) and continue till the end of the line.
 | 
						|
#
 | 
						|
# Tokens are separated by whitespace, except the following characters
 | 
						|
# always form a separate token (outside strings):
 | 
						|
#	( ) '
 | 
						|
# Strings are enclosed in double quotes (") and backslash (\) is used
 | 
						|
# as escape character in strings.
 |