mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 03:44:55 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			511 lines
		
	
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			511 lines
		
	
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# Copyright (C) 2002-2007 Python Software Foundation
 | 
						|
# Contact: email-sig@python.org
 | 
						|
 | 
						|
"""Email address parsing code.
 | 
						|
 | 
						|
Lifted directly from rfc822.py.  This should eventually be rewritten.
 | 
						|
"""
 | 
						|
 | 
						|
__all__ = [
 | 
						|
    'mktime_tz',
 | 
						|
    'parsedate',
 | 
						|
    'parsedate_tz',
 | 
						|
    'quote',
 | 
						|
    ]
 | 
						|
 | 
						|
import time
 | 
						|
 | 
						|
SPACE = ' '
 | 
						|
EMPTYSTRING = ''
 | 
						|
COMMASPACE = ', '
 | 
						|
 | 
						|
# Parse a date field
 | 
						|
_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
 | 
						|
               'aug', 'sep', 'oct', 'nov', 'dec',
 | 
						|
               'january', 'february', 'march', 'april', 'may', 'june', 'july',
 | 
						|
               'august', 'september', 'october', 'november', 'december']
 | 
						|
 | 
						|
_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
 | 
						|
 | 
						|
# The timezone table does not include the military time zones defined
 | 
						|
# in RFC822, other than Z.  According to RFC1123, the description in
 | 
						|
# RFC822 gets the signs wrong, so we can't rely on any such time
 | 
						|
# zones.  RFC1123 recommends that numeric timezone indicators be used
 | 
						|
# instead of timezone names.
 | 
						|
 | 
						|
_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
 | 
						|
              'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)
 | 
						|
              'EST': -500, 'EDT': -400,  # Eastern
 | 
						|
              'CST': -600, 'CDT': -500,  # Central
 | 
						|
              'MST': -700, 'MDT': -600,  # Mountain
 | 
						|
              'PST': -800, 'PDT': -700   # Pacific
 | 
						|
              }
 | 
						|
 | 
						|
 | 
						|
def parsedate_tz(data):
 | 
						|
    """Convert a date string to a time tuple.
 | 
						|
 | 
						|
    Accounts for military timezones.
 | 
						|
    """
 | 
						|
    data = data.split()
 | 
						|
    # The FWS after the comma after the day-of-week is optional, so search and
 | 
						|
    # adjust for this.
 | 
						|
    if data[0].endswith(',') or data[0].lower() in _daynames:
 | 
						|
        # There's a dayname here. Skip it
 | 
						|
        del data[0]
 | 
						|
    else:
 | 
						|
        i = data[0].rfind(',')
 | 
						|
        if i >= 0:
 | 
						|
            data[0] = data[0][i+1:]
 | 
						|
    if len(data) == 3: # RFC 850 date, deprecated
 | 
						|
        stuff = data[0].split('-')
 | 
						|
        if len(stuff) == 3:
 | 
						|
            data = stuff + data[1:]
 | 
						|
    if len(data) == 4:
 | 
						|
        s = data[3]
 | 
						|
        i = s.find('+')
 | 
						|
        if i == -1:
 | 
						|
            i = s.find('-')
 | 
						|
        if i > 0:
 | 
						|
            data[3:] = [s[:i], s[i:]]
 | 
						|
        else:
 | 
						|
            data.append('') # Dummy tz
 | 
						|
    if len(data) < 5:
 | 
						|
        return None
 | 
						|
    data = data[:5]
 | 
						|
    [dd, mm, yy, tm, tz] = data
 | 
						|
    mm = mm.lower()
 | 
						|
    if mm not in _monthnames:
 | 
						|
        dd, mm = mm, dd.lower()
 | 
						|
        if mm not in _monthnames:
 | 
						|
            return None
 | 
						|
    mm = _monthnames.index(mm) + 1
 | 
						|
    if mm > 12:
 | 
						|
        mm -= 12
 | 
						|
    if dd[-1] == ',':
 | 
						|
        dd = dd[:-1]
 | 
						|
    i = yy.find(':')
 | 
						|
    if i > 0:
 | 
						|
        yy, tm = tm, yy
 | 
						|
    if yy[-1] == ',':
 | 
						|
        yy = yy[:-1]
 | 
						|
    if not yy[0].isdigit():
 | 
						|
        yy, tz = tz, yy
 | 
						|
    if tm[-1] == ',':
 | 
						|
        tm = tm[:-1]
 | 
						|
    tm = tm.split(':')
 | 
						|
    if len(tm) == 2:
 | 
						|
        [thh, tmm] = tm
 | 
						|
        tss = '0'
 | 
						|
    elif len(tm) == 3:
 | 
						|
        [thh, tmm, tss] = tm
 | 
						|
    else:
 | 
						|
        return None
 | 
						|
    try:
 | 
						|
        yy = int(yy)
 | 
						|
        dd = int(dd)
 | 
						|
        thh = int(thh)
 | 
						|
        tmm = int(tmm)
 | 
						|
        tss = int(tss)
 | 
						|
    except ValueError:
 | 
						|
        return None
 | 
						|
    # Check for a yy specified in two-digit format, then convert it to the
 | 
						|
    # appropriate four-digit format, according to the POSIX standard. RFC 822
 | 
						|
    # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
 | 
						|
    # mandates a 4-digit yy. For more information, see the documentation for
 | 
						|
    # the time module.
 | 
						|
    if yy < 100:
 | 
						|
        # The year is between 1969 and 1999 (inclusive).
 | 
						|
        if yy > 68:
 | 
						|
            yy += 1900
 | 
						|
        # The year is between 2000 and 2068 (inclusive).
 | 
						|
        else:
 | 
						|
            yy += 2000
 | 
						|
    tzoffset = None
 | 
						|
    tz = tz.upper()
 | 
						|
    if tz in _timezones:
 | 
						|
        tzoffset = _timezones[tz]
 | 
						|
    else:
 | 
						|
        try:
 | 
						|
            tzoffset = int(tz)
 | 
						|
        except ValueError:
 | 
						|
            pass
 | 
						|
    # Convert a timezone offset into seconds ; -0500 -> -18000
 | 
						|
    if tzoffset:
 | 
						|
        if tzoffset < 0:
 | 
						|
            tzsign = -1
 | 
						|
            tzoffset = -tzoffset
 | 
						|
        else:
 | 
						|
            tzsign = 1
 | 
						|
        tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
 | 
						|
    # Daylight Saving Time flag is set to -1, since DST is unknown.
 | 
						|
    return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset
 | 
						|
 | 
						|
 | 
						|
def parsedate(data):
 | 
						|
    """Convert a time string to a time tuple."""
 | 
						|
    t = parsedate_tz(data)
 | 
						|
    if isinstance(t, tuple):
 | 
						|
        return t[:9]
 | 
						|
    else:
 | 
						|
        return t
 | 
						|
 | 
						|
 | 
						|
def mktime_tz(data):
 | 
						|
    """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
 | 
						|
    if data[9] is None:
 | 
						|
        # No zone info, so localtime is better assumption than GMT
 | 
						|
        return time.mktime(data[:8] + (-1,))
 | 
						|
    else:
 | 
						|
        t = time.mktime(data[:8] + (0,))
 | 
						|
        return t - data[9] - time.timezone
 | 
						|
 | 
						|
 | 
						|
def quote(str):
 | 
						|
    """Prepare string to be used in a quoted string.
 | 
						|
 | 
						|
    Turns backslash and double quote characters into quoted pairs.  These
 | 
						|
    are the only characters that need to be quoted inside a quoted string.
 | 
						|
    Does not add the surrounding double quotes.
 | 
						|
    """
 | 
						|
    return str.replace('\\', '\\\\').replace('"', '\\"')
 | 
						|
 | 
						|
 | 
						|
class AddrlistClass:
 | 
						|
    """Address parser class by Ben Escoto.
 | 
						|
 | 
						|
    To understand what this class does, it helps to have a copy of RFC 2822 in
 | 
						|
    front of you.
 | 
						|
 | 
						|
    Note: this class interface is deprecated and may be removed in the future.
 | 
						|
    Use rfc822.AddressList instead.
 | 
						|
    """
 | 
						|
 | 
						|
    def __init__(self, field):
 | 
						|
        """Initialize a new instance.
 | 
						|
 | 
						|
        `field' is an unparsed address header field, containing
 | 
						|
        one or more addresses.
 | 
						|
        """
 | 
						|
        self.specials = '()<>@,:;.\"[]'
 | 
						|
        self.pos = 0
 | 
						|
        self.LWS = ' \t'
 | 
						|
        self.CR = '\r\n'
 | 
						|
        self.FWS = self.LWS + self.CR
 | 
						|
        self.atomends = self.specials + self.LWS + self.CR
 | 
						|
        # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
 | 
						|
        # is obsolete syntax.  RFC 2822 requires that we recognize obsolete
 | 
						|
        # syntax, so allow dots in phrases.
 | 
						|
        self.phraseends = self.atomends.replace('.', '')
 | 
						|
        self.field = field
 | 
						|
        self.commentlist = []
 | 
						|
 | 
						|
    def gotonext(self):
 | 
						|
        """Skip white space and extract comments."""
 | 
						|
        wslist = []
 | 
						|
        while self.pos < len(self.field):
 | 
						|
            if self.field[self.pos] in self.LWS + '\n\r':
 | 
						|
                if self.field[self.pos] not in '\n\r':
 | 
						|
                    wslist.append(self.field[self.pos])
 | 
						|
                self.pos += 1
 | 
						|
            elif self.field[self.pos] == '(':
 | 
						|
                self.commentlist.append(self.getcomment())
 | 
						|
            else:
 | 
						|
                break
 | 
						|
        return EMPTYSTRING.join(wslist)
 | 
						|
 | 
						|
    def getaddrlist(self):
 | 
						|
        """Parse all addresses.
 | 
						|
 | 
						|
        Returns a list containing all of the addresses.
 | 
						|
        """
 | 
						|
        result = []
 | 
						|
        while self.pos < len(self.field):
 | 
						|
            ad = self.getaddress()
 | 
						|
            if ad:
 | 
						|
                result += ad
 | 
						|
            else:
 | 
						|
                result.append(('', ''))
 | 
						|
        return result
 | 
						|
 | 
						|
    def getaddress(self):
 | 
						|
        """Parse the next address."""
 | 
						|
        self.commentlist = []
 | 
						|
        self.gotonext()
 | 
						|
 | 
						|
        oldpos = self.pos
 | 
						|
        oldcl = self.commentlist
 | 
						|
        plist = self.getphraselist()
 | 
						|
 | 
						|
        self.gotonext()
 | 
						|
        returnlist = []
 | 
						|
 | 
						|
        if self.pos >= len(self.field):
 | 
						|
            # Bad email address technically, no domain.
 | 
						|
            if plist:
 | 
						|
                returnlist = [(SPACE.join(self.commentlist), plist[0])]
 | 
						|
 | 
						|
        elif self.field[self.pos] in '.@':
 | 
						|
            # email address is just an addrspec
 | 
						|
            # this isn't very efficient since we start over
 | 
						|
            self.pos = oldpos
 | 
						|
            self.commentlist = oldcl
 | 
						|
            addrspec = self.getaddrspec()
 | 
						|
            returnlist = [(SPACE.join(self.commentlist), addrspec)]
 | 
						|
 | 
						|
        elif self.field[self.pos] == ':':
 | 
						|
            # address is a group
 | 
						|
            returnlist = []
 | 
						|
 | 
						|
            fieldlen = len(self.field)
 | 
						|
            self.pos += 1
 | 
						|
            while self.pos < len(self.field):
 | 
						|
                self.gotonext()
 | 
						|
                if self.pos < fieldlen and self.field[self.pos] == ';':
 | 
						|
                    self.pos += 1
 | 
						|
                    break
 | 
						|
                returnlist = returnlist + self.getaddress()
 | 
						|
 | 
						|
        elif self.field[self.pos] == '<':
 | 
						|
            # Address is a phrase then a route addr
 | 
						|
            routeaddr = self.getrouteaddr()
 | 
						|
 | 
						|
            if self.commentlist:
 | 
						|
                returnlist = [(SPACE.join(plist) + ' (' +
 | 
						|
                               ' '.join(self.commentlist) + ')', routeaddr)]
 | 
						|
            else:
 | 
						|
                returnlist = [(SPACE.join(plist), routeaddr)]
 | 
						|
 | 
						|
        else:
 | 
						|
            if plist:
 | 
						|
                returnlist = [(SPACE.join(self.commentlist), plist[0])]
 | 
						|
            elif self.field[self.pos] in self.specials:
 | 
						|
                self.pos += 1
 | 
						|
 | 
						|
        self.gotonext()
 | 
						|
        if self.pos < len(self.field) and self.field[self.pos] == ',':
 | 
						|
            self.pos += 1
 | 
						|
        return returnlist
 | 
						|
 | 
						|
    def getrouteaddr(self):
 | 
						|
        """Parse a route address (Return-path value).
 | 
						|
 | 
						|
        This method just skips all the route stuff and returns the addrspec.
 | 
						|
        """
 | 
						|
        if self.field[self.pos] != '<':
 | 
						|
            return
 | 
						|
 | 
						|
        expectroute = False
 | 
						|
        self.pos += 1
 | 
						|
        self.gotonext()
 | 
						|
        adlist = ''
 | 
						|
        while self.pos < len(self.field):
 | 
						|
            if expectroute:
 | 
						|
                self.getdomain()
 | 
						|
                expectroute = False
 | 
						|
            elif self.field[self.pos] == '>':
 | 
						|
                self.pos += 1
 | 
						|
                break
 | 
						|
            elif self.field[self.pos] == '@':
 | 
						|
                self.pos += 1
 | 
						|
                expectroute = True
 | 
						|
            elif self.field[self.pos] == ':':
 | 
						|
                self.pos += 1
 | 
						|
            else:
 | 
						|
                adlist = self.getaddrspec()
 | 
						|
                self.pos += 1
 | 
						|
                break
 | 
						|
            self.gotonext()
 | 
						|
 | 
						|
        return adlist
 | 
						|
 | 
						|
    def getaddrspec(self):
 | 
						|
        """Parse an RFC 2822 addr-spec."""
 | 
						|
        aslist = []
 | 
						|
 | 
						|
        self.gotonext()
 | 
						|
        while self.pos < len(self.field):
 | 
						|
            preserve_ws = True
 | 
						|
            if self.field[self.pos] == '.':
 | 
						|
                if aslist and not aslist[-1].strip():
 | 
						|
                    aslist.pop()
 | 
						|
                aslist.append('.')
 | 
						|
                self.pos += 1
 | 
						|
                preserve_ws = False
 | 
						|
            elif self.field[self.pos] == '"':
 | 
						|
                aslist.append('"%s"' % quote(self.getquote()))
 | 
						|
            elif self.field[self.pos] in self.atomends:
 | 
						|
                if aslist and not aslist[-1].strip():
 | 
						|
                    aslist.pop()
 | 
						|
                break
 | 
						|
            else:
 | 
						|
                aslist.append(self.getatom())
 | 
						|
            ws = self.gotonext()
 | 
						|
            if preserve_ws and ws:
 | 
						|
                aslist.append(ws)
 | 
						|
 | 
						|
        if self.pos >= len(self.field) or self.field[self.pos] != '@':
 | 
						|
            return EMPTYSTRING.join(aslist)
 | 
						|
 | 
						|
        aslist.append('@')
 | 
						|
        self.pos += 1
 | 
						|
        self.gotonext()
 | 
						|
        return EMPTYSTRING.join(aslist) + self.getdomain()
 | 
						|
 | 
						|
    def getdomain(self):
 | 
						|
        """Get the complete domain name from an address."""
 | 
						|
        sdlist = []
 | 
						|
        while self.pos < len(self.field):
 | 
						|
            if self.field[self.pos] in self.LWS:
 | 
						|
                self.pos += 1
 | 
						|
            elif self.field[self.pos] == '(':
 | 
						|
                self.commentlist.append(self.getcomment())
 | 
						|
            elif self.field[self.pos] == '[':
 | 
						|
                sdlist.append(self.getdomainliteral())
 | 
						|
            elif self.field[self.pos] == '.':
 | 
						|
                self.pos += 1
 | 
						|
                sdlist.append('.')
 | 
						|
            elif self.field[self.pos] in self.atomends:
 | 
						|
                break
 | 
						|
            else:
 | 
						|
                sdlist.append(self.getatom())
 | 
						|
        return EMPTYSTRING.join(sdlist)
 | 
						|
 | 
						|
    def getdelimited(self, beginchar, endchars, allowcomments=True):
 | 
						|
        """Parse a header fragment delimited by special characters.
 | 
						|
 | 
						|
        `beginchar' is the start character for the fragment.
 | 
						|
        If self is not looking at an instance of `beginchar' then
 | 
						|
        getdelimited returns the empty string.
 | 
						|
 | 
						|
        `endchars' is a sequence of allowable end-delimiting characters.
 | 
						|
        Parsing stops when one of these is encountered.
 | 
						|
 | 
						|
        If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
 | 
						|
        within the parsed fragment.
 | 
						|
        """
 | 
						|
        if self.field[self.pos] != beginchar:
 | 
						|
            return ''
 | 
						|
 | 
						|
        slist = ['']
 | 
						|
        quote = False
 | 
						|
        self.pos += 1
 | 
						|
        while self.pos < len(self.field):
 | 
						|
            if quote:
 | 
						|
                slist.append(self.field[self.pos])
 | 
						|
                quote = False
 | 
						|
            elif self.field[self.pos] in endchars:
 | 
						|
                self.pos += 1
 | 
						|
                break
 | 
						|
            elif allowcomments and self.field[self.pos] == '(':
 | 
						|
                slist.append(self.getcomment())
 | 
						|
                continue        # have already advanced pos from getcomment
 | 
						|
            elif self.field[self.pos] == '\\':
 | 
						|
                quote = True
 | 
						|
            else:
 | 
						|
                slist.append(self.field[self.pos])
 | 
						|
            self.pos += 1
 | 
						|
 | 
						|
        return EMPTYSTRING.join(slist)
 | 
						|
 | 
						|
    def getquote(self):
 | 
						|
        """Get a quote-delimited fragment from self's field."""
 | 
						|
        return self.getdelimited('"', '"\r', False)
 | 
						|
 | 
						|
    def getcomment(self):
 | 
						|
        """Get a parenthesis-delimited fragment from self's field."""
 | 
						|
        return self.getdelimited('(', ')\r', True)
 | 
						|
 | 
						|
    def getdomainliteral(self):
 | 
						|
        """Parse an RFC 2822 domain-literal."""
 | 
						|
        return '[%s]' % self.getdelimited('[', ']\r', False)
 | 
						|
 | 
						|
    def getatom(self, atomends=None):
 | 
						|
        """Parse an RFC 2822 atom.
 | 
						|
 | 
						|
        Optional atomends specifies a different set of end token delimiters
 | 
						|
        (the default is to use self.atomends).  This is used e.g. in
 | 
						|
        getphraselist() since phrase endings must not include the `.' (which
 | 
						|
        is legal in phrases)."""
 | 
						|
        atomlist = ['']
 | 
						|
        if atomends is None:
 | 
						|
            atomends = self.atomends
 | 
						|
 | 
						|
        while self.pos < len(self.field):
 | 
						|
            if self.field[self.pos] in atomends:
 | 
						|
                break
 | 
						|
            else:
 | 
						|
                atomlist.append(self.field[self.pos])
 | 
						|
            self.pos += 1
 | 
						|
 | 
						|
        return EMPTYSTRING.join(atomlist)
 | 
						|
 | 
						|
    def getphraselist(self):
 | 
						|
        """Parse a sequence of RFC 2822 phrases.
 | 
						|
 | 
						|
        A phrase is a sequence of words, which are in turn either RFC 2822
 | 
						|
        atoms or quoted-strings.  Phrases are canonicalized by squeezing all
 | 
						|
        runs of continuous whitespace into one space.
 | 
						|
        """
 | 
						|
        plist = []
 | 
						|
 | 
						|
        while self.pos < len(self.field):
 | 
						|
            if self.field[self.pos] in self.FWS:
 | 
						|
                self.pos += 1
 | 
						|
            elif self.field[self.pos] == '"':
 | 
						|
                plist.append(self.getquote())
 | 
						|
            elif self.field[self.pos] == '(':
 | 
						|
                self.commentlist.append(self.getcomment())
 | 
						|
            elif self.field[self.pos] in self.phraseends:
 | 
						|
                break
 | 
						|
            else:
 | 
						|
                plist.append(self.getatom(self.phraseends))
 | 
						|
 | 
						|
        return plist
 | 
						|
 | 
						|
class AddressList(AddrlistClass):
 | 
						|
    """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
 | 
						|
    def __init__(self, field):
 | 
						|
        AddrlistClass.__init__(self, field)
 | 
						|
        if field:
 | 
						|
            self.addresslist = self.getaddrlist()
 | 
						|
        else:
 | 
						|
            self.addresslist = []
 | 
						|
 | 
						|
    def __len__(self):
 | 
						|
        return len(self.addresslist)
 | 
						|
 | 
						|
    def __add__(self, other):
 | 
						|
        # Set union
 | 
						|
        newaddr = AddressList(None)
 | 
						|
        newaddr.addresslist = self.addresslist[:]
 | 
						|
        for x in other.addresslist:
 | 
						|
            if not x in self.addresslist:
 | 
						|
                newaddr.addresslist.append(x)
 | 
						|
        return newaddr
 | 
						|
 | 
						|
    def __iadd__(self, other):
 | 
						|
        # Set union, in-place
 | 
						|
        for x in other.addresslist:
 | 
						|
            if not x in self.addresslist:
 | 
						|
                self.addresslist.append(x)
 | 
						|
        return self
 | 
						|
 | 
						|
    def __sub__(self, other):
 | 
						|
        # Set difference
 | 
						|
        newaddr = AddressList(None)
 | 
						|
        for x in self.addresslist:
 | 
						|
            if not x in other.addresslist:
 | 
						|
                newaddr.addresslist.append(x)
 | 
						|
        return newaddr
 | 
						|
 | 
						|
    def __isub__(self, other):
 | 
						|
        # Set difference, in-place
 | 
						|
        for x in other.addresslist:
 | 
						|
            if x in self.addresslist:
 | 
						|
                self.addresslist.remove(x)
 | 
						|
        return self
 | 
						|
 | 
						|
    def __getitem__(self, index):
 | 
						|
        # Make indexing, slices, and 'in' work
 | 
						|
        return self.addresslist[index]
 |