mirror of
https://github.com/python/cpython.git
synced 2025-10-17 12:18:23 +00:00
New address parser by Ben Escoto replaces
Sjoerd Mullender's parseaddr()
This commit is contained in:
parent
e6c128f428
commit
be7c45eec4
1 changed files with 247 additions and 125 deletions
372
Lib/rfc822.py
372
Lib/rfc822.py
|
@ -221,27 +221,24 @@ class Message:
|
||||||
# ('Guido van Rossum', 'guido@cwi.nl').
|
# ('Guido van Rossum', 'guido@cwi.nl').
|
||||||
|
|
||||||
def getaddr(self, name):
|
def getaddr(self, name):
|
||||||
try:
|
# New, by Ben Escoto
|
||||||
data = self[name]
|
alist = self.getaddrlist(name)
|
||||||
except KeyError:
|
if alist:
|
||||||
return None, None
|
return alist[0]
|
||||||
return parseaddr(data)
|
else:
|
||||||
|
return (None, None)
|
||||||
|
|
||||||
# Retrieve a list of addresses from a header, where each
|
# Retrieve a list of addresses from a header, where each
|
||||||
# address is a tuple as returned by getaddr().
|
# address is a tuple as returned by getaddr().
|
||||||
|
|
||||||
def getaddrlist(self, name):
|
def getaddrlist(self, name):
|
||||||
# XXX This function is not really correct. The split
|
# New, by Ben Escoto
|
||||||
# on ',' might fail in the case of commas within
|
|
||||||
# quoted strings.
|
|
||||||
try:
|
try:
|
||||||
data = self[name]
|
data = self[name]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return []
|
return []
|
||||||
data = string.splitfields(data, ',')
|
a = AddrlistClass(data)
|
||||||
for i in range(len(data)):
|
return a.getaddrlist()
|
||||||
data[i] = parseaddr(data[i])
|
|
||||||
return data
|
|
||||||
|
|
||||||
# Retrieve a date field from a header as a tuple compatible
|
# Retrieve a date field from a header as a tuple compatible
|
||||||
# with time.mktime().
|
# with time.mktime().
|
||||||
|
@ -291,7 +288,7 @@ class Message:
|
||||||
# Utility functions
|
# Utility functions
|
||||||
# -----------------
|
# -----------------
|
||||||
|
|
||||||
# XXX Should fix these to be really conformant.
|
# XXX Should fix unquote() and quote() to be really conformant.
|
||||||
# XXX The inverses of the parse functions may also be useful.
|
# XXX The inverses of the parse functions may also be useful.
|
||||||
|
|
||||||
|
|
||||||
|
@ -306,12 +303,7 @@ def unquote(str):
|
||||||
return str
|
return str
|
||||||
|
|
||||||
|
|
||||||
# Parse an address into (name, address) tuple
|
# Add quotes around a string.
|
||||||
# (By Sjoerd Mullender)
|
|
||||||
|
|
||||||
error = 'parseaddr.error'
|
|
||||||
|
|
||||||
specials = re.compile(r'[][()<>,.;:@\" \000-\037\177-\377]')
|
|
||||||
|
|
||||||
def quote(str):
|
def quote(str):
|
||||||
return '"%s"' % string.join(
|
return '"%s"' % string.join(
|
||||||
|
@ -322,114 +314,244 @@ def quote(str):
|
||||||
'"'),
|
'"'),
|
||||||
'\\"')
|
'\\"')
|
||||||
|
|
||||||
|
|
||||||
|
# External interface to parse an address
|
||||||
|
|
||||||
def parseaddr(address):
|
def parseaddr(address):
|
||||||
token = [] # the current token
|
a = AddrlistClass(address)
|
||||||
tokens = [] # the list of tokens
|
list = a.getaddrlist()
|
||||||
backslash = 0
|
if not list:
|
||||||
dquote = 0
|
return (None, None)
|
||||||
was_quoted = 0
|
else:
|
||||||
space = 0
|
return list[0]
|
||||||
paren = 0
|
|
||||||
for c in address:
|
|
||||||
if backslash:
|
# Address parser class by Ben Escoto
|
||||||
token.append(c)
|
|
||||||
backslash = 0
|
class AddrlistClass:
|
||||||
if c == '\\':
|
|
||||||
backslash = 1
|
def __init__(self, field):
|
||||||
was_quoted = 1
|
|
||||||
continue
|
self.specials = '()<>@,:;.\"[]'
|
||||||
if dquote:
|
self.pos = 0
|
||||||
if c == '"':
|
self.LWS = ' \t'
|
||||||
dquote = 0
|
self.CR = '\r'
|
||||||
else:
|
self.atomends = self.specials + self.LWS + self.CR
|
||||||
token.append(c)
|
|
||||||
continue
|
self.field = field
|
||||||
if c == '"':
|
self.commentlist = []
|
||||||
dquote = 1
|
|
||||||
was_quoted = 1
|
|
||||||
continue
|
def gotonext(self):
|
||||||
if paren:
|
|
||||||
if c == '(':
|
while self.pos < len(self.field):
|
||||||
paren = paren + 1
|
if self.field[self.pos] in self.LWS + '\n\r':
|
||||||
elif c == ')':
|
self.pos = self.pos + 1
|
||||||
paren = paren - 1
|
elif self.field[self.pos] == '(':
|
||||||
if paren == 0:
|
self.commentlist.append(self.getcomment())
|
||||||
token = string.join(token, '')
|
else: break
|
||||||
tokens.append((2, token))
|
|
||||||
token = []
|
def getaddrlist(self):
|
||||||
continue
|
|
||||||
token.append(c)
|
ad = self.getaddress()
|
||||||
continue
|
if ad:
|
||||||
if c == '(':
|
return ad + self.getaddrlist()
|
||||||
paren = 1
|
else: return []
|
||||||
token = string.join(token, '')
|
|
||||||
tokens.append((was_quoted, token))
|
def getaddress(self):
|
||||||
was_quoted = 0
|
self.commentlist = []
|
||||||
token = []
|
self.gotonext()
|
||||||
continue
|
|
||||||
if c in string.whitespace:
|
oldpos = self.pos
|
||||||
space = 1
|
oldcl = self.commentlist
|
||||||
continue
|
plist = self.getphraselist()
|
||||||
if c in '<>@,;:.[]':
|
|
||||||
token = string.join(token, '')
|
self.gotonext()
|
||||||
tokens.append((was_quoted, token))
|
returnlist = []
|
||||||
was_quoted = 0
|
|
||||||
token = []
|
if self.pos >= len(self.field):
|
||||||
tokens.append((0, c))
|
# Bad email address technically, no domain.
|
||||||
space = 0
|
if plist:
|
||||||
continue
|
returnlist = [(string.join(self.commentlist), plist[0])]
|
||||||
if space:
|
|
||||||
token = string.join(token, '')
|
elif self.field[self.pos] in '.@':
|
||||||
tokens.append((was_quoted, token))
|
# email address is just an addrspec
|
||||||
was_quoted = 0
|
# this isn't very efficient since we start over
|
||||||
token = []
|
self.pos = oldpos
|
||||||
space = 0
|
self.commentlist = oldcl
|
||||||
token.append(c)
|
addrspec = self.getaddrspec()
|
||||||
token = string.join(token, '')
|
returnlist = [(string.join(self.commentlist), addrspec)]
|
||||||
tokens.append((was_quoted, token))
|
|
||||||
if (0, '<') in tokens:
|
elif self.field[self.pos] == ':':
|
||||||
name = []
|
# address is a group
|
||||||
addr = []
|
returnlist = []
|
||||||
cur = name
|
|
||||||
for token in tokens:
|
self.pos = self.pos + 1
|
||||||
if token[1] == '':
|
while self.pos < len(self.field):
|
||||||
continue
|
self.gotonext()
|
||||||
if token == (0, '<'):
|
if self.field[self.pos] == ';':
|
||||||
if addr:
|
self.pos = self.pos + 1
|
||||||
raise error, 'syntax error'
|
break
|
||||||
cur = addr
|
returnlist = returnlist + self.getaddress()
|
||||||
elif token == (0, '>'):
|
|
||||||
if cur is not addr:
|
elif self.field[self.pos] == '<':
|
||||||
raise error, 'syntax error'
|
# Address is a phrase then a route addr
|
||||||
cur = name
|
routeaddr = self.getrouteaddr()
|
||||||
elif token[0] == 2:
|
|
||||||
if cur is name:
|
if self.commentlist:
|
||||||
name.append('(' + token[1] + ')')
|
returnlist = [(string.join(plist) + ' (' + \
|
||||||
else:
|
string.join(self.commentlist) + ')', routeaddr)]
|
||||||
name.append(token[1])
|
else: returnlist = [(string.join(plist), routeaddr)]
|
||||||
elif token[0] == 1 and cur is addr:
|
|
||||||
if specials.search(token[1]):
|
|
||||||
cur.append(quote(token[1]))
|
|
||||||
else:
|
|
||||||
cur.append(token[1])
|
|
||||||
else:
|
|
||||||
cur.append(token[1])
|
|
||||||
else:
|
else:
|
||||||
name = []
|
if plist:
|
||||||
addr = []
|
returnlist = [(string.join(self.commentlist), plist[0])]
|
||||||
for token in tokens:
|
|
||||||
if token[1] == '':
|
self.gotonext()
|
||||||
continue
|
if self.pos < len(self.field) and self.field[self.pos] == ',':
|
||||||
if token[0] == 2:
|
self.pos = self.pos + 1
|
||||||
name.append(token[1])
|
return returnlist
|
||||||
elif token[0] == 1:
|
|
||||||
if specials.search(token[1]):
|
|
||||||
addr.append(quote(token[1]))
|
def getrouteaddr(self):
|
||||||
else:
|
# This just skips all the route stuff and returns the addrspec
|
||||||
addr.append(token[1])
|
if self.field[self.pos] != '<':
|
||||||
else:
|
return
|
||||||
addr.append(token[1])
|
|
||||||
return string.join(name, ' '), string.join(addr, '')
|
expectroute = 0
|
||||||
|
self.pos = self.pos + 1
|
||||||
|
self.gotonext()
|
||||||
|
while self.pos < len(self.field):
|
||||||
|
if expectroute:
|
||||||
|
self.getdomain()
|
||||||
|
expectroute = 0
|
||||||
|
elif self.field[self.pos] == '>':
|
||||||
|
self.pos = self.pos + 1
|
||||||
|
break
|
||||||
|
elif self.field[self.pos] == '@':
|
||||||
|
self.pos = self.pos + 1
|
||||||
|
expectroute = 1
|
||||||
|
elif self.field[self.pos] == ':':
|
||||||
|
self.pos = self.pos + 1
|
||||||
|
expectaddrspec = 1
|
||||||
|
else:
|
||||||
|
adlist = self.getaddrspec()
|
||||||
|
self.pos = self.pos + 1
|
||||||
|
break
|
||||||
|
self.gotonext()
|
||||||
|
|
||||||
|
return adlist
|
||||||
|
|
||||||
|
|
||||||
|
def getaddrspec(self):
|
||||||
|
|
||||||
|
aslist = []
|
||||||
|
|
||||||
|
self.gotonext()
|
||||||
|
while self.pos < len(self.field):
|
||||||
|
if self.field[self.pos] == '.':
|
||||||
|
aslist.append('.')
|
||||||
|
self.pos = self.pos + 1
|
||||||
|
elif self.field[self.pos] == '"':
|
||||||
|
aslist.append(self.getquote())
|
||||||
|
elif self.field[self.pos] in self.atomends:
|
||||||
|
break
|
||||||
|
else: aslist.append(self.getatom())
|
||||||
|
self.gotonext()
|
||||||
|
|
||||||
|
if self.pos >= len(self.field) or self.field[self.pos] != '@':
|
||||||
|
return string.join(aslist, '')
|
||||||
|
|
||||||
|
aslist.append('@')
|
||||||
|
self.pos = self.pos + 1
|
||||||
|
self.gotonext()
|
||||||
|
return string.join(aslist, '') + self.getdomain()
|
||||||
|
|
||||||
|
|
||||||
|
def getdomain(self):
|
||||||
|
|
||||||
|
sdlist = []
|
||||||
|
while self.pos < len(self.field):
|
||||||
|
if self.field[self.pos] in self.LWS:
|
||||||
|
self.pos = self.pos + 1
|
||||||
|
elif self.field[self.pos] == '(':
|
||||||
|
self.commentlist.append(self.getcomment())
|
||||||
|
elif self.field[self.pos] == '[':
|
||||||
|
sdlist.append(self.getdomainliteral())
|
||||||
|
elif self.field[self.pos] == '.':
|
||||||
|
self.pos = self.pos + 1
|
||||||
|
sdlist.append('.')
|
||||||
|
elif self.field[self.pos] in self.atomends:
|
||||||
|
break
|
||||||
|
else: sdlist.append(self.getatom())
|
||||||
|
|
||||||
|
return string.join(sdlist, '')
|
||||||
|
|
||||||
|
|
||||||
|
def getdelimited(self, beginchar, endchars, allowcomments = 1):
|
||||||
|
|
||||||
|
if self.field[self.pos] != beginchar:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
slist = ['']
|
||||||
|
quote = 0
|
||||||
|
self.pos = self.pos + 1
|
||||||
|
while self.pos < len(self.field):
|
||||||
|
if quote == 1:
|
||||||
|
slist.append(self.field[self.pos])
|
||||||
|
quote = 0
|
||||||
|
elif self.field[self.pos] in endchars:
|
||||||
|
self.pos = self.pos + 1
|
||||||
|
break
|
||||||
|
elif allowcomments and self.field[self.pos] == '(':
|
||||||
|
slist.append(self.getcomment())
|
||||||
|
elif self.field[self.pos] == '\\':
|
||||||
|
quote = 1
|
||||||
|
else:
|
||||||
|
slist.append(self.field[self.pos])
|
||||||
|
self.pos = self.pos + 1
|
||||||
|
|
||||||
|
return string.join(slist, '')
|
||||||
|
|
||||||
|
def getquote(self):
|
||||||
|
return self.getdelimited('"', '"\r', 0)
|
||||||
|
|
||||||
|
def getcomment(self):
|
||||||
|
return self.getdelimited('(', ')\r', 1)
|
||||||
|
|
||||||
|
def getdomainliteral(self):
|
||||||
|
return self.getdelimited('[', ']\r', 0)
|
||||||
|
|
||||||
|
|
||||||
|
def getatom(self):
|
||||||
|
|
||||||
|
atomlist = ['']
|
||||||
|
|
||||||
|
while self.pos < len(self.field):
|
||||||
|
if self.field[self.pos] in self.atomends:
|
||||||
|
break
|
||||||
|
else: atomlist.append(self.field[self.pos])
|
||||||
|
self.pos = self.pos + 1
|
||||||
|
|
||||||
|
return string.join(atomlist, '')
|
||||||
|
|
||||||
|
|
||||||
|
def getphraselist(self):
|
||||||
|
|
||||||
|
plist = []
|
||||||
|
|
||||||
|
while self.pos < len(self.field):
|
||||||
|
if self.field[self.pos] in self.LWS:
|
||||||
|
self.pos = self.pos + 1
|
||||||
|
elif self.field[self.pos] == '"':
|
||||||
|
plist.append(self.getquote())
|
||||||
|
elif self.field[self.pos] == '(':
|
||||||
|
self.commentlist.append(self.getcomment())
|
||||||
|
elif self.field[self.pos] in self.atomends:
|
||||||
|
break
|
||||||
|
else: plist.append(self.getatom())
|
||||||
|
|
||||||
|
return plist
|
||||||
|
|
||||||
|
|
||||||
# Parse a date field
|
# Parse a date field
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue