mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 02:15:10 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			128 lines
		
	
	
	
		
			2.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			128 lines
		
	
	
	
		
			2.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Module 'parser'
 | |
| #
 | |
| # Parse S-expressions output by the Panel Editor
 | |
| # (which is written in Scheme so it can't help writing S-expressions).
 | |
| #
 | |
| # See notes at end of file.
 | |
| 
 | |
| 
 | |
| whitespace = ' \t\n'
 | |
| operators = '()\''
 | |
| separators = operators + whitespace + ';' + '"'
 | |
| 
 | |
| 
 | |
| # Tokenize a string.
 | |
| # Return a list of tokens (strings).
 | |
| #
 | |
| def tokenize_string(s):
 | |
| 	tokens = []
 | |
| 	while s:
 | |
| 		c = s[:1]
 | |
| 		if c in whitespace:
 | |
| 			s = s[1:]
 | |
| 		elif c == ';':
 | |
| 			s = ''
 | |
| 		elif c == '"':
 | |
| 			n = len(s)
 | |
| 			i = 1
 | |
| 			while i < n:
 | |
| 				c = s[i]
 | |
| 				i = i+1
 | |
| 				if c == '"': break
 | |
| 				if c == '\\': i = i+1
 | |
| 			tokens.append(s[:i])
 | |
| 			s = s[i:]
 | |
| 		elif c in operators:
 | |
| 			tokens.append(c)
 | |
| 			s = s[1:]
 | |
| 		else:
 | |
| 			n = len(s)
 | |
| 			i = 1
 | |
| 			while i < n:
 | |
| 				if s[i] in separators: break
 | |
| 				i = i+1
 | |
| 			tokens.append(s[:i])
 | |
| 			s = s[i:]
 | |
| 	return tokens
 | |
| 
 | |
| 
 | |
| # Tokenize a whole file (given as file object, not as file name).
 | |
| # Return a list of tokens (strings).
 | |
| #
 | |
| def tokenize_file(fp):
 | |
| 	tokens = []
 | |
| 	while 1:
 | |
| 		line = fp.readline()
 | |
| 		if not line: break
 | |
| 		tokens = tokens + tokenize_string(line)
 | |
| 	return tokens
 | |
| 
 | |
| 
 | |
| # Exception raised by parse_exr.
 | |
| #
 | |
| syntax_error = 'syntax error'
 | |
| 
 | |
| 
 | |
| # Parse an S-expression.
 | |
| # Input is a list of tokens as returned by tokenize_*().
 | |
| # Return a pair (expr, tokens)
 | |
| # where expr is a list representing the s-expression,
 | |
| # and tokens contains the remaining tokens.
 | |
| # May raise syntax_error.
 | |
| #
 | |
| def parse_expr(tokens):
 | |
| 	if (not tokens) or tokens[0] <> '(':
 | |
| 		raise syntax_error, 'expected "("'
 | |
| 	tokens = tokens[1:]
 | |
| 	expr = []
 | |
| 	while 1:
 | |
| 		if not tokens:
 | |
| 			raise syntax_error, 'missing ")"'
 | |
| 		if tokens[0] == ')':
 | |
| 			return expr, tokens[1:]
 | |
| 		elif tokens[0] == '(':
 | |
| 			subexpr, tokens = parse_expr(tokens)
 | |
| 			expr.append(subexpr)
 | |
| 		else:
 | |
| 			expr.append(tokens[0])
 | |
| 			tokens = tokens[1:]
 | |
| 
 | |
| 
 | |
| # Parse a file (given as file object, not as file name).
 | |
| # Return a list of parsed S-expressions found at the top level.
 | |
| #
 | |
| def parse_file(fp):
 | |
| 	tokens = tokenize_file(fp)
 | |
| 	exprlist = []
 | |
| 	while tokens:
 | |
| 		expr, tokens = parse_expr(tokens)
 | |
| 		exprlist.append(expr)
 | |
| 	return exprlist
 | |
| 
 | |
| 
 | |
| # EXAMPLE:
 | |
| #
 | |
| # The input
 | |
| #	'(hip (hop hur-ray))'
 | |
| #
 | |
| # passed to tokenize_string() returns the token list
 | |
| #	['(', 'hip', '(', 'hop', 'hur-ray', ')', ')']
 | |
| #
 | |
| # When this is passed to parse_expr() it returns the expression
 | |
| #	['hip', ['hop', 'hur-ray']]
 | |
| # plus an empty token list (because there are no tokens left.
 | |
| #
 | |
| # When a file containing the example is passed to parse_file() it returns
 | |
| # a list whose only element is the output of parse_expr() above:
 | |
| #	[['hip', ['hop', 'hur-ray']]]
 | |
| 
 | |
| 
 | |
| # TOKENIZING:
 | |
| #
 | |
| # Comments start with semicolon (;) and continue till the end of the line.
 | |
| #
 | |
| # Tokens are separated by whitespace, except the following characters
 | |
| # always form a separate token (outside strings):
 | |
| #	( ) '
 | |
| # Strings are enclosed in double quotes (") and backslash (\) is used
 | |
| # as escape character in strings.
 | 
