diff options
Diffstat (limited to 'lib-python/3/email')
-rw-r--r--lib-python/3/email/test/data/PyBanner048.gifbin0 -> 954 bytes
-rw-r--r--lib-python/3/email/test/data/audiotest.aubin0 -> 28144 bytes
76 files changed, 11511 insertions, 0 deletions
diff --git a/lib-python/3/email/__init__.py b/lib-python/3/email/__init__.py
new file mode 100644
index 0000000000..bd316fdaf3
--- /dev/null
+++ b/lib-python/3/email/__init__.py
@@ -0,0 +1,63 @@
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+"""A package for parsing, handling, and generating email messages."""
+__version__ = '5.1.0'
+__all__ = [
+ 'base64mime',
+ 'charset',
+ 'encoders',
+ 'errors',
+ 'generator',
+ 'header',
+ 'iterators',
+ 'message',
+ 'message_from_file',
+ 'message_from_binary_file',
+ 'message_from_string',
+ 'message_from_bytes',
+ 'mime',
+ 'parser',
+ 'quoprimime',
+ 'utils',
+ ]
+# Some convenience routines. Don't import Parser and Message as side-effects
+# of importing email since those cascadingly import most of the rest of the
+# email package.
+def message_from_string(s, *args, **kws):
+ """Parse a string into a Message object model.
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from email.parser import Parser
+ return Parser(*args, **kws).parsestr(s)
+def message_from_bytes(s, *args, **kws):
+ """Parse a bytes string into a Message object model.
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from email.parser import BytesParser
+ return BytesParser(*args, **kws).parsebytes(s)
+def message_from_file(fp, *args, **kws):
+ """Read a file and parse its contents into a Message object model.
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from email.parser import Parser
+ return Parser(*args, **kws).parse(fp)
+def message_from_binary_file(fp, *args, **kws):
+ """Read a binary file and parse its contents into a Message object model.
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from email.parser import BytesParser
+ return BytesParser(*args, **kws).parse(fp)
diff --git a/lib-python/3/email/_parseaddr.py b/lib-python/3/email/_parseaddr.py
new file mode 100644
index 0000000000..a295757281
--- /dev/null
+++ b/lib-python/3/email/_parseaddr.py
@@ -0,0 +1,511 @@
+# Copyright (C) 2002-2007 Python Software Foundation
+# Contact: email-sig@python.org
+"""Email address parsing code.
+Lifted directly from rfc822.py. This should eventually be rewritten.
+__all__ = [
+ 'mktime_tz',
+ 'parsedate',
+ 'parsedate_tz',
+ 'quote',
+ ]
+import time
+SPACE = ' '
+# Parse a date field
+_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
+ 'aug', 'sep', 'oct', 'nov', 'dec',
+ 'january', 'february', 'march', 'april', 'may', 'june', 'july',
+ 'august', 'september', 'october', 'november', 'december']
+_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
+# The timezone table does not include the military time zones defined
+# in RFC822, other than Z. According to RFC1123, the description in
+# RFC822 gets the signs wrong, so we can't rely on any such time
+# zones. RFC1123 recommends that numeric timezone indicators be used
+# instead of timezone names.
+_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
+ 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
+ 'EST': -500, 'EDT': -400, # Eastern
+ 'CST': -600, 'CDT': -500, # Central
+ 'MST': -700, 'MDT': -600, # Mountain
+ 'PST': -800, 'PDT': -700 # Pacific
+ }
+def parsedate_tz(data):
+ """Convert a date string to a time tuple.
+ Accounts for military timezones.
+ """
+ data = data.split()
+ # The FWS after the comma after the day-of-week is optional, so search and
+ # adjust for this.
+ if data[0].endswith(',') or data[0].lower() in _daynames:
+ # There's a dayname here. Skip it
+ del data[0]
+ else:
+ i = data[0].rfind(',')
+ if i >= 0:
+ data[0] = data[0][i+1:]
+ if len(data) == 3: # RFC 850 date, deprecated
+ stuff = data[0].split('-')
+ if len(stuff) == 3:
+ data = stuff + data[1:]
+ if len(data) == 4:
+ s = data[3]
+ i = s.find('+')
+ if i == -1:
+ i = s.find('-')
+ if i > 0:
+ data[3:] = [s[:i], s[i:]]
+ else:
+ data.append('') # Dummy tz
+ if len(data) < 5:
+ return None
+ data = data[:5]
+ [dd, mm, yy, tm, tz] = data
+ mm = mm.lower()
+ if mm not in _monthnames:
+ dd, mm = mm, dd.lower()
+ if mm not in _monthnames:
+ return None
+ mm = _monthnames.index(mm) + 1
+ if mm > 12:
+ mm -= 12
+ if dd[-1] == ',':
+ dd = dd[:-1]
+ i = yy.find(':')
+ if i > 0:
+ yy, tm = tm, yy
+ if yy[-1] == ',':
+ yy = yy[:-1]
+ if not yy[0].isdigit():
+ yy, tz = tz, yy
+ if tm[-1] == ',':
+ tm = tm[:-1]
+ tm = tm.split(':')
+ if len(tm) == 2:
+ [thh, tmm] = tm
+ tss = '0'
+ elif len(tm) == 3:
+ [thh, tmm, tss] = tm
+ else:
+ return None
+ try:
+ yy = int(yy)
+ dd = int(dd)
+ thh = int(thh)
+ tmm = int(tmm)
+ tss = int(tss)
+ except ValueError:
+ return None
+ # Check for a yy specified in two-digit format, then convert it to the
+ # appropriate four-digit format, according to the POSIX standard. RFC 822
+ # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
+ # mandates a 4-digit yy. For more information, see the documentation for
+ # the time module.
+ if yy < 100:
+ # The year is between 1969 and 1999 (inclusive).
+ if yy > 68:
+ yy += 1900
+ # The year is between 2000 and 2068 (inclusive).
+ else:
+ yy += 2000
+ tzoffset = None
+ tz = tz.upper()
+ if tz in _timezones:
+ tzoffset = _timezones[tz]
+ else:
+ try:
+ tzoffset = int(tz)
+ except ValueError:
+ pass
+ # Convert a timezone offset into seconds ; -0500 -> -18000
+ if tzoffset:
+ if tzoffset < 0:
+ tzsign = -1
+ tzoffset = -tzoffset
+ else:
+ tzsign = 1
+ tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
+ # Daylight Saving Time flag is set to -1, since DST is unknown.
+ return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset
+def parsedate(data):
+ """Convert a time string to a time tuple."""
+ t = parsedate_tz(data)
+ if isinstance(t, tuple):
+ return t[:9]
+ else:
+ return t
+def mktime_tz(data):
+ """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
+ if data[9] is None:
+ # No zone info, so localtime is better assumption than GMT
+ return time.mktime(data[:8] + (-1,))
+ else:
+ t = time.mktime(data[:8] + (0,))
+ return t - data[9] - time.timezone
+def quote(str):
+ """Prepare string to be used in a quoted string.
+ Turns backslash and double quote characters into quoted pairs. These
+ are the only characters that need to be quoted inside a quoted string.
+ Does not add the surrounding double quotes.
+ """
+ return str.replace('\\', '\\\\').replace('"', '\\"')
+class AddrlistClass:
+ """Address parser class by Ben Escoto.
+ To understand what this class does, it helps to have a copy of RFC 2822 in
+ front of you.
+ Note: this class interface is deprecated and may be removed in the future.
+ Use email.utils.AddressList instead.
+ """
+ def __init__(self, field):
+ """Initialize a new instance.
+ `field' is an unparsed address header field, containing
+ one or more addresses.
+ """
+ self.specials = '()<>@,:;.\"[]'
+ self.pos = 0
+ self.LWS = ' \t'
+ self.CR = '\r\n'
+ self.FWS = self.LWS + self.CR
+ self.atomends = self.specials + self.LWS + self.CR
+ # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
+ # is obsolete syntax. RFC 2822 requires that we recognize obsolete
+ # syntax, so allow dots in phrases.
+ self.phraseends = self.atomends.replace('.', '')
+ self.field = field
+ self.commentlist = []
+ def gotonext(self):
+ """Skip white space and extract comments."""
+ wslist = []
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.LWS + '\n\r':
+ if self.field[self.pos] not in '\n\r':
+ wslist.append(self.field[self.pos])
+ self.pos += 1
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ else:
+ break
+ return EMPTYSTRING.join(wslist)
+ def getaddrlist(self):
+ """Parse all addresses.
+ Returns a list containing all of the addresses.
+ """
+ result = []
+ while self.pos < len(self.field):
+ ad = self.getaddress()
+ if ad:
+ result += ad
+ else:
+ result.append(('', ''))
+ return result
+ def getaddress(self):
+ """Parse the next address."""
+ self.commentlist = []
+ self.gotonext()
+ oldpos = self.pos
+ oldcl = self.commentlist
+ plist = self.getphraselist()
+ self.gotonext()
+ returnlist = []
+ if self.pos >= len(self.field):
+ # Bad email address technically, no domain.
+ if plist:
+ returnlist = [(SPACE.join(self.commentlist), plist[0])]
+ elif self.field[self.pos] in '.@':
+ # email address is just an addrspec
+ # this isn't very efficient since we start over
+ self.pos = oldpos
+ self.commentlist = oldcl
+ addrspec = self.getaddrspec()
+ returnlist = [(SPACE.join(self.commentlist), addrspec)]
+ elif self.field[self.pos] == ':':
+ # address is a group
+ returnlist = []
+ fieldlen = len(self.field)
+ self.pos += 1
+ while self.pos < len(self.field):
+ self.gotonext()
+ if self.pos < fieldlen and self.field[self.pos] == ';':
+ self.pos += 1
+ break
+ returnlist = returnlist + self.getaddress()
+ elif self.field[self.pos] == '<':
+ # Address is a phrase then a route addr
+ routeaddr = self.getrouteaddr()
+ if self.commentlist:
+ returnlist = [(SPACE.join(plist) + ' (' +
+ ' '.join(self.commentlist) + ')', routeaddr)]
+ else:
+ returnlist = [(SPACE.join(plist), routeaddr)]
+ else:
+ if plist:
+ returnlist = [(SPACE.join(self.commentlist), plist[0])]
+ elif self.field[self.pos] in self.specials:
+ self.pos += 1
+ self.gotonext()
+ if self.pos < len(self.field) and self.field[self.pos] == ',':
+ self.pos += 1
+ return returnlist
+ def getrouteaddr(self):
+ """Parse a route address (Return-path value).
+ This method just skips all the route stuff and returns the addrspec.
+ """
+ if self.field[self.pos] != '<':
+ return
+ expectroute = False
+ self.pos += 1
+ self.gotonext()
+ adlist = ''
+ while self.pos < len(self.field):
+ if expectroute:
+ self.getdomain()
+ expectroute = False
+ elif self.field[self.pos] == '>':
+ self.pos += 1
+ break
+ elif self.field[self.pos] == '@':
+ self.pos += 1
+ expectroute = True
+ elif self.field[self.pos] == ':':
+ self.pos += 1
+ else:
+ adlist = self.getaddrspec()
+ self.pos += 1
+ break
+ self.gotonext()
+ return adlist
+ def getaddrspec(self):
+ """Parse an RFC 2822 addr-spec."""
+ aslist = []
+ self.gotonext()
+ while self.pos < len(self.field):
+ preserve_ws = True
+ if self.field[self.pos] == '.':
+ if aslist and not aslist[-1].strip():
+ aslist.pop()
+ aslist.append('.')
+ self.pos += 1
+ preserve_ws = False
+ elif self.field[self.pos] == '"':
+ aslist.append('"%s"' % quote(self.getquote()))
+ elif self.field[self.pos] in self.atomends:
+ if aslist and not aslist[-1].strip():
+ aslist.pop()
+ break
+ else:
+ aslist.append(self.getatom())
+ ws = self.gotonext()
+ if preserve_ws and ws:
+ aslist.append(ws)
+ if self.pos >= len(self.field) or self.field[self.pos] != '@':
+ return EMPTYSTRING.join(aslist)
+ aslist.append('@')
+ self.pos += 1
+ self.gotonext()
+ return EMPTYSTRING.join(aslist) + self.getdomain()
+ def getdomain(self):
+ """Get the complete domain name from an address."""
+ sdlist = []
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.LWS:
+ self.pos += 1
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ elif self.field[self.pos] == '[':
+ sdlist.append(self.getdomainliteral())
+ elif self.field[self.pos] == '.':
+ self.pos += 1
+ sdlist.append('.')
+ elif self.field[self.pos] in self.atomends:
+ break
+ else:
+ sdlist.append(self.getatom())
+ return EMPTYSTRING.join(sdlist)
+ def getdelimited(self, beginchar, endchars, allowcomments=True):
+ """Parse a header fragment delimited by special characters.
+ `beginchar' is the start character for the fragment.
+ If self is not looking at an instance of `beginchar' then
+ getdelimited returns the empty string.
+ `endchars' is a sequence of allowable end-delimiting characters.
+ Parsing stops when one of these is encountered.
+ If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
+ within the parsed fragment.
+ """
+ if self.field[self.pos] != beginchar:
+ return ''
+ slist = ['']
+ quote = False
+ self.pos += 1
+ while self.pos < len(self.field):
+ if quote:
+ slist.append(self.field[self.pos])
+ quote = False
+ elif self.field[self.pos] in endchars:
+ self.pos += 1
+ break
+ elif allowcomments and self.field[self.pos] == '(':
+ slist.append(self.getcomment())
+ continue # have already advanced pos from getcomment
+ elif self.field[self.pos] == '\\':
+ quote = True
+ else:
+ slist.append(self.field[self.pos])
+ self.pos += 1
+ return EMPTYSTRING.join(slist)
+ def getquote(self):
+ """Get a quote-delimited fragment from self's field."""
+ return self.getdelimited('"', '"\r', False)
+ def getcomment(self):
+ """Get a parenthesis-delimited fragment from self's field."""
+ return self.getdelimited('(', ')\r', True)
+ def getdomainliteral(self):
+ """Parse an RFC 2822 domain-literal."""
+ return '[%s]' % self.getdelimited('[', ']\r', False)
+ def getatom(self, atomends=None):
+ """Parse an RFC 2822 atom.
+ Optional atomends specifies a different set of end token delimiters
+ (the default is to use self.atomends). This is used e.g. in
+ getphraselist() since phrase endings must not include the `.' (which
+ is legal in phrases)."""
+ atomlist = ['']
+ if atomends is None:
+ atomends = self.atomends
+ while self.pos < len(self.field):
+ if self.field[self.pos] in atomends:
+ break
+ else:
+ atomlist.append(self.field[self.pos])
+ self.pos += 1
+ return EMPTYSTRING.join(atomlist)
+ def getphraselist(self):
+ """Parse a sequence of RFC 2822 phrases.
+ A phrase is a sequence of words, which are in turn either RFC 2822
+ atoms or quoted-strings. Phrases are canonicalized by squeezing all
+ runs of continuous whitespace into one space.
+ """
+ plist = []
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.FWS:
+ self.pos += 1
+ elif self.field[self.pos] == '"':
+ plist.append(self.getquote())
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ elif self.field[self.pos] in self.phraseends:
+ break
+ else:
+ plist.append(self.getatom(self.phraseends))
+ return plist
+class AddressList(AddrlistClass):
+ """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
+ def __init__(self, field):
+ AddrlistClass.__init__(self, field)
+ if field:
+ self.addresslist = self.getaddrlist()
+ else:
+ self.addresslist = []
+ def __len__(self):
+ return len(self.addresslist)
+ def __add__(self, other):
+ # Set union
+ newaddr = AddressList(None)
+ newaddr.addresslist = self.addresslist[:]
+ for x in other.addresslist:
+ if not x in self.addresslist:
+ newaddr.addresslist.append(x)
+ return newaddr
+ def __iadd__(self, other):
+ # Set union, in-place
+ for x in other.addresslist:
+ if not x in self.addresslist:
+ self.addresslist.append(x)
+ return self
+ def __sub__(self, other):
+ # Set difference
+ newaddr = AddressList(None)
+ for x in self.addresslist:
+ if not x in other.addresslist:
+ newaddr.addresslist.append(x)
+ return newaddr
+ def __isub__(self, other):
+ # Set difference, in-place
+ for x in other.addresslist:
+ if x in self.addresslist:
+ self.addresslist.remove(x)
+ return self
+ def __getitem__(self, index):
+ # Make indexing, slices, and 'in' work
+ return self.addresslist[index]
diff --git a/lib-python/3/email/base64mime.py b/lib-python/3/email/base64mime.py
new file mode 100644
index 0000000000..f3bbac1caf
--- /dev/null
+++ b/lib-python/3/email/base64mime.py
@@ -0,0 +1,119 @@
+# Copyright (C) 2002-2007 Python Software Foundation
+# Author: Ben Gertzfield
+# Contact: email-sig@python.org
+"""Base64 content transfer encoding per RFCs 2045-2047.
+This module handles the content transfer encoding method defined in RFC 2045
+to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
+characters encoding known as Base64.
+It is used in the MIME standards for email to attach images, audio, and text
+using some 8-bit character sets to messages.
+This module provides an interface to encode and decode both headers and bodies
+with Base64 encoding.
+RFC 2045 defines a method for including character set information in an
+`encoded-word' in a header. This method is commonly used for 8-bit real names
+in To:, From:, Cc:, etc. fields, as well as Subject: lines.
+This module does not do the line wrapping or end-of-line character conversion
+necessary for proper internationalized headers; it only does dumb encoding and
+decoding. To deal with the various line wrapping issues, use the email.header
+__all__ = [
+ 'body_decode',
+ 'body_encode',
+ 'decode',
+ 'decodestring',
+ 'header_encode',
+ 'header_length',
+ ]
+from base64 import b64encode
+from binascii import b2a_base64, a2b_base64
+CRLF = '\r\n'
+NL = '\n'
+# See also Charset.py
+# Helpers
+def header_length(bytearray):
+ """Return the length of s when it is encoded with base64."""
+ groups_of_3, leftover = divmod(len(bytearray), 3)
+ # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
+ n = groups_of_3 * 4
+ if leftover:
+ n += 4
+ return n
+def header_encode(header_bytes, charset='iso-8859-1'):
+ """Encode a single header line with Base64 encoding in a given charset.
+ charset names the character set to use to encode the header. It defaults
+ to iso-8859-1. Base64 encoding is defined in RFC 2045.
+ """
+ if not header_bytes:
+ return ""
+ if isinstance(header_bytes, str):
+ header_bytes = header_bytes.encode(charset)
+ encoded = b64encode(header_bytes).decode("ascii")
+ return '=?%s?b?%s?=' % (charset, encoded)
+def body_encode(s, maxlinelen=76, eol=NL):
+ r"""Encode a string with base64.
+ Each line will be wrapped at, at most, maxlinelen characters (defaults to
+ 76 characters).
+ Each line of encoded text will end with eol, which defaults to "\n". Set
+ this to "\r\n" if you will be using the result of this function directly
+ in an email.
+ """
+ if not s:
+ return s
+ encvec = []
+ max_unencoded = maxlinelen * 3 // 4
+ for i in range(0, len(s), max_unencoded):
+ # BAW: should encode() inherit b2a_base64()'s dubious behavior in
+ # adding a newline to the encoded string?
+ enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
+ if enc.endswith(NL) and eol != NL:
+ enc = enc[:-1] + eol
+ encvec.append(enc)
+ return EMPTYSTRING.join(encvec)
+def decode(string):
+ """Decode a raw base64 string, returning a bytes object.
+ This function does not parse a full MIME header value encoded with
+ base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
+ level email.header class for that functionality.
+ """
+ if not string:
+ return bytes()
+ elif isinstance(string, str):
+ return a2b_base64(string.encode('raw-unicode-escape'))
+ else:
+ return a2b_base64(string)
+# For convenience and backwards compatibility w/ standard base64 module
+body_decode = decode
+decodestring = decode
diff --git a/lib-python/3/email/charset.py b/lib-python/3/email/charset.py
new file mode 100644
index 0000000000..f22be2c52c
--- /dev/null
+++ b/lib-python/3/email/charset.py
@@ -0,0 +1,399 @@
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Ben Gertzfield, Barry Warsaw
+# Contact: email-sig@python.org
+__all__ = [
+ 'Charset',
+ 'add_alias',
+ 'add_charset',
+ 'add_codec',
+ ]
+from functools import partial
+import email.base64mime
+import email.quoprimime
+from email import errors
+from email.encoders import encode_7or8bit
+# Flags for types of header encodings
+QP = 1 # Quoted-Printable
+BASE64 = 2 # Base64
+SHORTEST = 3 # the shorter of QP and base64, but only for headers
+# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
+DEFAULT_CHARSET = 'us-ascii'
+UNKNOWN8BIT = 'unknown-8bit'
+# Defaults
+ # input header enc body enc output conv
+ 'iso-8859-1': (QP, QP, None),
+ 'iso-8859-2': (QP, QP, None),
+ 'iso-8859-3': (QP, QP, None),
+ 'iso-8859-4': (QP, QP, None),
+ # iso-8859-5 is Cyrillic, and not especially used
+ # iso-8859-6 is Arabic, also not particularly used
+ # iso-8859-7 is Greek, QP will not make it readable
+ # iso-8859-8 is Hebrew, QP will not make it readable
+ 'iso-8859-9': (QP, QP, None),
+ 'iso-8859-10': (QP, QP, None),
+ # iso-8859-11 is Thai, QP will not make it readable
+ 'iso-8859-13': (QP, QP, None),
+ 'iso-8859-14': (QP, QP, None),
+ 'iso-8859-15': (QP, QP, None),
+ 'iso-8859-16': (QP, QP, None),
+ 'windows-1252':(QP, QP, None),
+ 'viscii': (QP, QP, None),
+ 'us-ascii': (None, None, None),
+ 'big5': (BASE64, BASE64, None),
+ 'gb2312': (BASE64, BASE64, None),
+ 'euc-jp': (BASE64, None, 'iso-2022-jp'),
+ 'shift_jis': (BASE64, None, 'iso-2022-jp'),
+ 'iso-2022-jp': (BASE64, None, None),
+ 'koi8-r': (BASE64, BASE64, None),
+ 'utf-8': (SHORTEST, BASE64, 'utf-8'),
+ }
+# Aliases for other commonly-used names for character sets. Map
+# them to the real ones used in email.
+ 'latin_1': 'iso-8859-1',
+ 'latin-1': 'iso-8859-1',
+ 'latin_2': 'iso-8859-2',
+ 'latin-2': 'iso-8859-2',
+ 'latin_3': 'iso-8859-3',
+ 'latin-3': 'iso-8859-3',
+ 'latin_4': 'iso-8859-4',
+ 'latin-4': 'iso-8859-4',
+ 'latin_5': 'iso-8859-9',
+ 'latin-5': 'iso-8859-9',
+ 'latin_6': 'iso-8859-10',
+ 'latin-6': 'iso-8859-10',
+ 'latin_7': 'iso-8859-13',
+ 'latin-7': 'iso-8859-13',
+ 'latin_8': 'iso-8859-14',
+ 'latin-8': 'iso-8859-14',
+ 'latin_9': 'iso-8859-15',
+ 'latin-9': 'iso-8859-15',
+ 'latin_10':'iso-8859-16',
+ 'latin-10':'iso-8859-16',
+ 'cp949': 'ks_c_5601-1987',
+ 'euc_jp': 'euc-jp',
+ 'euc_kr': 'euc-kr',
+ 'ascii': 'us-ascii',
+ }
+# Map charsets to their Unicode codec strings.
+ 'gb2312': 'eucgb2312_cn',
+ 'big5': 'big5_tw',
+ # Hack: We don't want *any* conversion for stuff marked us-ascii, as all
+ # sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
+ # Let that stuff pass through without conversion to/from Unicode.
+ 'us-ascii': None,
+ }
+# Convenience functions for extending the above mappings
+def add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
+ """Add character set properties to the global registry.
+ charset is the input character set, and must be the canonical name of a
+ character set.
+ Optional header_enc and body_enc is either Charset.QP for
+ quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for
+ the shortest of qp or base64 encoding, or None for no encoding. SHORTEST
+ is only valid for header_enc. It describes how message headers and
+ message bodies in the input charset are to be encoded. Default is no
+ encoding.
+ Optional output_charset is the character set that the output should be
+ in. Conversions will proceed from input charset, to Unicode, to the
+ output charset when the method Charset.convert() is called. The default
+ is to output in the same character set as the input.
+ Both input_charset and output_charset must have Unicode codec entries in
+ the module's charset-to-codec mapping; use add_codec(charset, codecname)
+ to add codecs the module does not know about. See the codecs module's
+ documentation for more information.
+ """
+ if body_enc == SHORTEST:
+ raise ValueError('SHORTEST not allowed for body_enc')
+ CHARSETS[charset] = (header_enc, body_enc, output_charset)
+def add_alias(alias, canonical):
+ """Add a character set alias.
+ alias is the alias name, e.g. latin-1
+ canonical is the character set's canonical name, e.g. iso-8859-1
+ """
+ ALIASES[alias] = canonical
+def add_codec(charset, codecname):
+ """Add a codec that map characters in the given charset to/from Unicode.
+ charset is the canonical name of a character set. codecname is the name
+ of a Python codec, as appropriate for the second argument to the unicode()
+ built-in, or to the encode() method of a Unicode string.
+ """
+ CODEC_MAP[charset] = codecname
+# Convenience function for encoding strings, taking into account
+# that they might be unknown-8bit (ie: have surrogate-escaped bytes)
+def _encode(string, codec):
+ if codec == UNKNOWN8BIT:
+ return string.encode('ascii', 'surrogateescape')
+ else:
+ return string.encode(codec)
+class Charset:
+ """Map character sets to their email properties.
+ This class provides information about the requirements imposed on email
+ for a specific character set. It also provides convenience routines for
+ converting between character sets, given the availability of the
+ applicable codecs. Given a character set, it will do its best to provide
+ information on how to use that character set in an email in an
+ RFC-compliant way.
+ Certain character sets must be encoded with quoted-printable or base64
+ when used in email headers or bodies. Certain character sets must be
+ converted outright, and are not allowed in email. Instances of this
+ module expose the following information about a character set:
+ input_charset: The initial character set specified. Common aliases
+ are converted to their `official' email names (e.g. latin_1
+ is converted to iso-8859-1). Defaults to 7-bit us-ascii.
+ header_encoding: If the character set must be encoded before it can be
+ used in an email header, this attribute will be set to
+ Charset.QP (for quoted-printable), Charset.BASE64 (for
+ base64 encoding), or Charset.SHORTEST for the shortest of
+ QP or BASE64 encoding. Otherwise, it will be None.
+ body_encoding: Same as header_encoding, but describes the encoding for the
+ mail message's body, which indeed may be different than the
+ header encoding. Charset.SHORTEST is not allowed for
+ body_encoding.
+ output_charset: Some character sets must be converted before the can be
+ used in email headers or bodies. If the input_charset is
+ one of them, this attribute will contain the name of the
+ charset output will be converted to. Otherwise, it will
+ be None.
+ input_codec: The name of the Python codec used to convert the
+ input_charset to Unicode. If no conversion codec is
+ necessary, this attribute will be None.
+ output_codec: The name of the Python codec used to convert Unicode
+ to the output_charset. If no conversion codec is necessary,
+ this attribute will have the same value as the input_codec.
+ """
+ def __init__(self, input_charset=DEFAULT_CHARSET):
+ # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to
+ # unicode because its .lower() is locale insensitive. If the argument
+ # is already a unicode, we leave it at that, but ensure that the
+ # charset is ASCII, as the standard (RFC XXX) requires.
+ try:
+ if isinstance(input_charset, str):
+ input_charset.encode('ascii')
+ else:
+ input_charset = str(input_charset, 'ascii')
+ except UnicodeError:
+ raise errors.CharsetError(input_charset)
+ input_charset = input_charset.lower()
+ # Set the input charset after filtering through the aliases
+ self.input_charset = ALIASES.get(input_charset, input_charset)
+ # We can try to guess which encoding and conversion to use by the
+ # charset_map dictionary. Try that first, but let the user override
+ # it.
+ henc, benc, conv = CHARSETS.get(self.input_charset,
+ (SHORTEST, BASE64, None))
+ if not conv:
+ conv = self.input_charset
+ # Set the attributes, allowing the arguments to override the default.
+ self.header_encoding = henc
+ self.body_encoding = benc
+ self.output_charset = ALIASES.get(conv, conv)
+ # Now set the codecs. If one isn't defined for input_charset,
+ # guess and try a Unicode codec with the same name as input_codec.
+ self.input_codec = CODEC_MAP.get(self.input_charset,
+ self.input_charset)
+ self.output_codec = CODEC_MAP.get(self.output_charset,
+ self.output_charset)
+ def __str__(self):
+ return self.input_charset.lower()
+ __repr__ = __str__
+ def __eq__(self, other):
+ return str(self) == str(other).lower()
+ def __ne__(self, other):
+ return not self.__eq__(other)
+ def get_body_encoding(self):
+ """Return the content-transfer-encoding used for body encoding.
+ This is either the string `quoted-printable' or `base64' depending on
+ the encoding used, or it is a function in which case you should call
+ the function with a single argument, the Message object being
+ encoded. The function should then set the Content-Transfer-Encoding
+ header itself to whatever is appropriate.
+ Returns "quoted-printable" if self.body_encoding is QP.
+ Returns "base64" if self.body_encoding is BASE64.
+ Returns conversion function otherwise.
+ """
+ assert self.body_encoding != SHORTEST
+ if self.body_encoding == QP:
+ return 'quoted-printable'
+ elif self.body_encoding == BASE64:
+ return 'base64'
+ else:
+ return encode_7or8bit
+ def get_output_charset(self):
+ """Return the output character set.
+ This is self.output_charset if that is not None, otherwise it is
+ self.input_charset.
+ """
+ return self.output_charset or self.input_charset
+ def header_encode(self, string):
+ """Header-encode a string by converting it first to bytes.
+ The type of encoding (base64 or quoted-printable) will be based on
+ this charset's `header_encoding`.
+ :param string: A unicode string for the header. It must be possible
+ to encode this string to bytes using the character set's
+ output codec.
+ :return: The encoded string, with RFC 2047 chrome.
+ """
+ codec = self.output_codec or 'us-ascii'
+ header_bytes = _encode(string, codec)
+ # 7bit/8bit encodings return the string unchanged (modulo conversions)
+ encoder_module = self._get_encoder(header_bytes)
+ if encoder_module is None:
+ return string
+ return encoder_module.header_encode(header_bytes, codec)
+ def header_encode_lines(self, string, maxlengths):
+ """Header-encode a string by converting it first to bytes.
+ This is similar to `header_encode()` except that the string is fit
+ into maximum line lengths as given by the argument.
+ :param string: A unicode string for the header. It must be possible
+ to encode this string to bytes using the character set's
+ output codec.
+ :param maxlengths: Maximum line length iterator. Each element
+ returned from this iterator will provide the next maximum line
+ length. This parameter is used as an argument to built-in next()
+ and should never be exhausted. The maximum line lengths should
+ not count the RFC 2047 chrome. These line lengths are only a
+ hint; the splitter does the best it can.
+ :return: Lines of encoded strings, each with RFC 2047 chrome.
+ """
+ # See which encoding we should use.
+ codec = self.output_codec or 'us-ascii'
+ header_bytes = _encode(string, codec)
+ encoder_module = self._get_encoder(header_bytes)
+ encoder = partial(encoder_module.header_encode, charset=codec)
+ # Calculate the number of characters that the RFC 2047 chrome will
+ # contribute to each line.
+ charset = self.get_output_charset()
+ extra = len(charset) + RFC2047_CHROME_LEN
+ # Now comes the hard part. We must encode bytes but we can't split on
+ # bytes because some character sets are variable length and each
+ # encoded word must stand on its own. So the problem is you have to
+ # encode to bytes to figure out this word's length, but you must split
+ # on characters. This causes two problems: first, we don't know how
+ # many octets a specific substring of unicode characters will get
+ # encoded to, and second, we don't know how many ASCII characters
+ # those octets will get encoded to. Unless we try it. Which seems
+ # inefficient. In the interest of being correct rather than fast (and
+ # in the hope that there will be few encoded headers in any such
+ # message), brute force it. :(
+ lines = []
+ current_line = []
+ maxlen = next(maxlengths) - extra
+ for character in string:
+ current_line.append(character)
+ this_line = EMPTYSTRING.join(current_line)
+ length = encoder_module.header_length(_encode(this_line, charset))
+ if length > maxlen:
+ # This last character doesn't fit so pop it off.
+ current_line.pop()
+ # Does nothing fit on the first line?
+ if not lines and not current_line:
+ lines.append(None)
+ else:
+ separator = (' ' if lines else '')
+ joined_line = EMPTYSTRING.join(current_line)
+ header_bytes = _encode(joined_line, codec)
+ lines.append(encoder(header_bytes))
+ current_line = [character]
+ maxlen = next(maxlengths) - extra
+ joined_line = EMPTYSTRING.join(current_line)
+ header_bytes = _encode(joined_line, codec)
+ lines.append(encoder(header_bytes))
+ return lines
+ def _get_encoder(self, header_bytes):
+ if self.header_encoding == BASE64:
+ return email.base64mime
+ elif self.header_encoding == QP:
+ return email.quoprimime
+ elif self.header_encoding == SHORTEST:
+ len64 = email.base64mime.header_length(header_bytes)
+ lenqp = email.quoprimime.header_length(header_bytes)
+ if len64 < lenqp:
+ return email.base64mime
+ else:
+ return email.quoprimime
+ else:
+ return None
+ def body_encode(self, string):
+ """Body-encode a string by converting it first to bytes.
+ The type of encoding (base64 or quoted-printable) will be based on
+ self.body_encoding. If body_encoding is None, we assume the
+ output charset is a 7bit encoding, so re-encoding the decoded
+ string using the ascii codec produces the correct string version
+ of the content.
+ """
+ # 7bit/8bit encodings return the string unchanged (module conversions)
+ if self.body_encoding is BASE64:
+ if isinstance(string, str):
+ string = string.encode(self.output_charset)
+ return email.base64mime.body_encode(string)
+ elif self.body_encoding is QP:
+ return email.quoprimime.body_encode(string)
+ else:
+ if isinstance(string, str):
+ string = string.encode(self.output_charset).decode('ascii')
+ return string
diff --git a/lib-python/3/email/encoders.py b/lib-python/3/email/encoders.py
new file mode 100644
index 0000000000..e5c099f35a
--- /dev/null
+++ b/lib-python/3/email/encoders.py
@@ -0,0 +1,78 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+"""Encodings and related functions."""
+__all__ = [
+ 'encode_7or8bit',
+ 'encode_base64',
+ 'encode_noop',
+ 'encode_quopri',
+ ]
+from base64 import encodebytes as _bencode
+from quopri import encodestring as _encodestring
+def _qencode(s):
+ enc = _encodestring(s, quotetabs=True)
+ # Must encode spaces, which quopri.encodestring() doesn't do
+ return enc.replace(' ', '=20')
+def encode_base64(msg):
+ """Encode the message's payload in Base64.
+ Also, add an appropriate Content-Transfer-Encoding header.
+ """
+ orig = msg.get_payload()
+ encdata = str(_bencode(orig), 'ascii')
+ msg.set_payload(encdata)
+ msg['Content-Transfer-Encoding'] = 'base64'
+def encode_quopri(msg):
+ """Encode the message's payload in quoted-printable.
+ Also, add an appropriate Content-Transfer-Encoding header.
+ """
+ orig = msg.get_payload()
+ encdata = _qencode(orig)
+ msg.set_payload(encdata)
+ msg['Content-Transfer-Encoding'] = 'quoted-printable'
+def encode_7or8bit(msg):
+ """Set the Content-Transfer-Encoding header to 7bit or 8bit."""
+ orig = msg.get_payload()
+ if orig is None:
+ # There's no payload. For backwards compatibility we use 7bit
+ msg['Content-Transfer-Encoding'] = '7bit'
+ return
+ # We play a trick to make this go fast. If encoding/decode to ASCII
+ # succeeds, we know the data must be 7bit, otherwise treat it as 8bit.
+ try:
+ if isinstance(orig, str):
+ orig.encode('ascii')
+ else:
+ orig.decode('ascii')
+ except UnicodeError:
+ # iso-2022-* is non-ASCII but still 7-bit
+ charset = msg.get_charset()
+ output_cset = charset and charset.output_charset
+ if output_cset and output_cset.lower().startswith('iso-2022-'):
+ msg['Content-Transfer-Encoding'] = '7bit'
+ else:
+ msg['Content-Transfer-Encoding'] = '8bit'
+ else:
+ msg['Content-Transfer-Encoding'] = '7bit'
+def encode_noop(msg):
+ """Do nothing."""
diff --git a/lib-python/3/email/errors.py b/lib-python/3/email/errors.py
new file mode 100644
index 0000000000..d52a624601
--- /dev/null
+++ b/lib-python/3/email/errors.py
@@ -0,0 +1,57 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+"""email package exception classes."""
+class MessageError(Exception):
+ """Base class for errors in the email package."""
+class MessageParseError(MessageError):
+ """Base class for message parsing errors."""
+class HeaderParseError(MessageParseError):
+ """Error while parsing headers."""
+class BoundaryError(MessageParseError):
+ """Couldn't find terminating boundary."""
+class MultipartConversionError(MessageError, TypeError):
+ """Conversion to a multipart is prohibited."""
+class CharsetError(MessageError):
+ """An illegal charset was given."""
+# These are parsing defects which the parser was able to work around.
+class MessageDefect:
+ """Base class for a message defect."""
+ def __init__(self, line=None):
+ self.line = line
+class NoBoundaryInMultipartDefect(MessageDefect):
+ """A message claimed to be a multipart but had no boundary parameter."""
+class StartBoundaryNotFoundDefect(MessageDefect):
+ """The claimed start boundary was never found."""
+class FirstHeaderLineIsContinuationDefect(MessageDefect):
+ """A message had a continuation line as its first header line."""
+class MisplacedEnvelopeHeaderDefect(MessageDefect):
+ """A 'Unix-from' header was found in the middle of a header block."""
+class MalformedHeaderDefect(MessageDefect):
+ """Found a header that was missing a colon, or was otherwise malformed."""
+class MultipartInvariantViolationDefect(MessageDefect):
+ """A message claimed to be a multipart but no subparts were found."""
diff --git a/lib-python/3/email/feedparser.py b/lib-python/3/email/feedparser.py
new file mode 100644
index 0000000000..60a83255c0
--- /dev/null
+++ b/lib-python/3/email/feedparser.py
@@ -0,0 +1,491 @@
+# Copyright (C) 2004-2006 Python Software Foundation
+# Authors: Baxter, Wouters and Warsaw
+# Contact: email-sig@python.org
+"""FeedParser - An email feed parser.
+The feed parser implements an interface for incrementally parsing an email
+message, line by line. This has advantages for certain applications, such as
+those reading email messages off a socket.
+FeedParser.feed() is the primary interface for pushing new data into the
+parser. It returns when there's nothing more it can do with the available
+data. When you have no more data to push into the parser, call .close().
+This completes the parsing and returns the root message object.
+The other advantage of this parser is that it will never throw a parsing
+exception. Instead, when it finds something unexpected, it adds a 'defect' to
+the current message. Defects are just instances that live on the message
+object's .defects attribute.
+__all__ = ['FeedParser']
+import re
+from email import errors
+from email import message
+NLCRE = re.compile('\r\n|\r|\n')
+NLCRE_bol = re.compile('(\r\n|\r|\n)')
+NLCRE_eol = re.compile('(\r\n|\r|\n)\Z')
+NLCRE_crack = re.compile('(\r\n|\r|\n)')
+# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
+# except controls, SP, and ":".
+headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])')
+NL = '\n'
+NeedMoreData = object()
+class BufferedSubFile(object):
+ """A file-ish object that can have new data loaded into it.
+ You can also push and pop line-matching predicates onto a stack. When the
+ current predicate matches the current line, a false EOF response
+ (i.e. empty string) is returned instead. This lets the parser adhere to a
+ simple abstraction -- it parses until EOF closes the current message.
+ """
+ def __init__(self):
+ # The last partial line pushed into this object.
+ self._partial = ''
+ # The list of full, pushed lines, in reverse order
+ self._lines = []
+ # The stack of false-EOF checking predicates.
+ self._eofstack = []
+ # A flag indicating whether the file has been closed or not.
+ self._closed = False
+ def push_eof_matcher(self, pred):
+ self._eofstack.append(pred)
+ def pop_eof_matcher(self):
+ return self._eofstack.pop()
+ def close(self):
+ # Don't forget any trailing partial line.
+ self._lines.append(self._partial)
+ self._partial = ''
+ self._closed = True
+ def readline(self):
+ if not self._lines:
+ if self._closed:
+ return ''
+ return NeedMoreData
+ # Pop the line off the stack and see if it matches the current
+ # false-EOF predicate.
+ line = self._lines.pop()
+ # RFC 2046, section 5.1.2 requires us to recognize outer level
+ # boundaries at any level of inner nesting. Do this, but be sure it's
+ # in the order of most to least nested.
+ for ateof in self._eofstack[::-1]:
+ if ateof(line):
+ # We're at the false EOF. But push the last line back first.
+ self._lines.append(line)
+ return ''
+ return line
+ def unreadline(self, line):
+ # Let the consumer push a line back into the buffer.
+ assert line is not NeedMoreData
+ self._lines.append(line)
+ def push(self, data):
+ """Push some new data into this object."""
+ # Handle any previous leftovers
+ data, self._partial = self._partial + data, ''
+ # Crack into lines, but preserve the newlines on the end of each
+ parts = NLCRE_crack.split(data)
+ # The *ahem* interesting behaviour of re.split when supplied grouping
+ # parentheses is that the last element of the resulting list is the
+ # data after the final RE. In the case of a NL/CR terminated string,
+ # this is the empty string.
+ self._partial = parts.pop()
+ #GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r:
+ # is there a \n to follow later?
+ if not self._partial and parts and parts[-1].endswith('\r'):
+ self._partial = parts.pop(-2)+parts.pop()
+ # parts is a list of strings, alternating between the line contents
+ # and the eol character(s). Gather up a list of lines after
+ # re-attaching the newlines.
+ lines = []
+ for i in range(len(parts) // 2):
+ lines.append(parts[i*2] + parts[i*2+1])
+ self.pushlines(lines)
+ def pushlines(self, lines):
+ # Reverse and insert at the front of the lines.
+ self._lines[:0] = lines[::-1]
+ def is_closed(self):
+ return self._closed
+ def __iter__(self):
+ return self
+ def __next__(self):
+ line = self.readline()
+ if line == '':
+ raise StopIteration
+ return line
+class FeedParser:
+ """A feed-style parser of email."""
+ def __init__(self, _factory=message.Message):
+ """_factory is called with no arguments to create a new message obj"""
+ self._factory = _factory
+ self._input = BufferedSubFile()
+ self._msgstack = []
+ self._parse = self._parsegen().__next__
+ self._cur = None
+ self._last = None
+ self._headersonly = False
+ # Non-public interface for supporting Parser's headersonly flag
+ def _set_headersonly(self):
+ self._headersonly = True
+ def feed(self, data):
+ """Push more data into the parser."""
+ self._input.push(data)
+ self._call_parse()
+ def _call_parse(self):
+ try:
+ self._parse()
+ except StopIteration:
+ pass
+ def close(self):
+ """Parse all remaining data and return the root message object."""
+ self._input.close()
+ self._call_parse()
+ root = self._pop_message()
+ assert not self._msgstack
+ # Look for final set of defects
+ if root.get_content_maintype() == 'multipart' \
+ and not root.is_multipart():
+ root.defects.append(errors.MultipartInvariantViolationDefect())
+ return root
+ def _new_message(self):
+ msg = self._factory()
+ if self._cur and self._cur.get_content_type() == 'multipart/digest':
+ msg.set_default_type('message/rfc822')
+ if self._msgstack:
+ self._msgstack[-1].attach(msg)
+ self._msgstack.append(msg)
+ self._cur = msg
+ self._last = msg
+ def _pop_message(self):
+ retval = self._msgstack.pop()
+ if self._msgstack:
+ self._cur = self._msgstack[-1]
+ else:
+ self._cur = None
+ return retval
+ def _parsegen(self):
+ # Create a new message and start by parsing headers.
+ self._new_message()
+ headers = []
+ # Collect the headers, searching for a line that doesn't match the RFC
+ # 2822 header or continuation pattern (including an empty line).
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if not headerRE.match(line):
+ # If we saw the RFC defined header/body separator
+ # (i.e. newline), just throw it away. Otherwise the line is
+ # part of the body so push it back.
+ if not NLCRE.match(line):
+ self._input.unreadline(line)
+ break
+ headers.append(line)
+ # Done with the headers, so parse them and figure out what we're
+ # supposed to see in the body of the message.
+ self._parse_headers(headers)
+ # Headers-only parsing is a backwards compatibility hack, which was
+ # necessary in the older parser, which could throw errors. All
+ # remaining lines in the input are thrown into the message body.
+ if self._headersonly:
+ lines = []
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if line == '':
+ break
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+ return
+ if self._cur.get_content_type() == 'message/delivery-status':
+ # message/delivery-status contains blocks of headers separated by
+ # a blank line. We'll represent each header block as a separate
+ # nested message object, but the processing is a bit different
+ # than standard message/* types because there is no body for the
+ # nested messages. A blank line separates the subparts.
+ while True:
+ self._input.push_eof_matcher(NLCRE.match)
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ msg = self._pop_message()
+ # We need to pop the EOF matcher in order to tell if we're at
+ # the end of the current file, not the end of the last block
+ # of message headers.
+ self._input.pop_eof_matcher()
+ # The input stream must be sitting at the newline or at the
+ # EOF. We want to see if we're at the end of this subpart, so
+ # first consume the blank line, then test the next line to see
+ # if we're at this subpart's EOF.
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ if line == '':
+ break
+ # Not at EOF so this is a line we're going to need.
+ self._input.unreadline(line)
+ return
+ if self._cur.get_content_maintype() == 'message':
+ # The message claims to be a message/* type, then what follows is
+ # another RFC 2822 message.
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ self._pop_message()
+ return
+ if self._cur.get_content_maintype() == 'multipart':
+ boundary = self._cur.get_boundary()
+ if boundary is None:
+ # The message /claims/ to be a multipart but it has not
+ # defined a boundary. That's a problem which we'll handle by
+ # reading everything until the EOF and marking the message as
+ # defective.
+ self._cur.defects.append(errors.NoBoundaryInMultipartDefect())
+ lines = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+ return
+ # Create a line match predicate which matches the inter-part
+ # boundary as well as the end-of-multipart boundary. Don't push
+ # this onto the input stream until we've scanned past the
+ # preamble.
+ separator = '--' + boundary
+ boundaryre = re.compile(
+ '(?P<sep>' + re.escape(separator) +
+ r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
+ capturing_preamble = True
+ preamble = []
+ linesep = False
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if line == '':
+ break
+ mo = boundaryre.match(line)
+ if mo:
+ # If we're looking at the end boundary, we're done with
+ # this multipart. If there was a newline at the end of
+ # the closing boundary, then we need to initialize the
+ # epilogue with the empty string (see below).
+ if mo.group('end'):
+ linesep = mo.group('linesep')
+ break
+ # We saw an inter-part boundary. Were we in the preamble?
+ if capturing_preamble:
+ if preamble:
+ # According to RFC 2046, the last newline belongs
+ # to the boundary.
+ lastline = preamble[-1]
+ eolmo = NLCRE_eol.search(lastline)
+ if eolmo:
+ preamble[-1] = lastline[:-len(eolmo.group(0))]
+ self._cur.preamble = EMPTYSTRING.join(preamble)
+ capturing_preamble = False
+ self._input.unreadline(line)
+ continue
+ # We saw a boundary separating two parts. Consume any
+ # multiple boundary lines that may be following. Our
+ # interpretation of RFC 2046 BNF grammar does not produce
+ # body parts within such double boundaries.
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ mo = boundaryre.match(line)
+ if not mo:
+ self._input.unreadline(line)
+ break
+ # Recurse to parse this subpart; the input stream points
+ # at the subpart's first line.
+ self._input.push_eof_matcher(boundaryre.match)
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ # Because of RFC 2046, the newline preceding the boundary
+ # separator actually belongs to the boundary, not the
+ # previous subpart's payload (or epilogue if the previous
+ # part is a multipart).
+ if self._last.get_content_maintype() == 'multipart':
+ epilogue = self._last.epilogue
+ if epilogue == '':
+ self._last.epilogue = None
+ elif epilogue is not None:
+ mo = NLCRE_eol.search(epilogue)
+ if mo:
+ end = len(mo.group(0))
+ self._last.epilogue = epilogue[:-end]
+ else:
+ payload = self._last._payload
+ if isinstance(payload, str):
+ mo = NLCRE_eol.search(payload)
+ if mo:
+ payload = payload[:-len(mo.group(0))]
+ self._last._payload = payload
+ self._input.pop_eof_matcher()
+ self._pop_message()
+ # Set the multipart up for newline cleansing, which will
+ # happen if we're in a nested multipart.
+ self._last = self._cur
+ else:
+ # I think we must be in the preamble
+ assert capturing_preamble
+ preamble.append(line)
+ # We've seen either the EOF or the end boundary. If we're still
+ # capturing the preamble, we never saw the start boundary. Note
+ # that as a defect and store the captured text as the payload.
+ # Everything from here to the EOF is epilogue.
+ if capturing_preamble:
+ self._cur.defects.append(errors.StartBoundaryNotFoundDefect())
+ self._cur.set_payload(EMPTYSTRING.join(preamble))
+ epilogue = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ self._cur.epilogue = EMPTYSTRING.join(epilogue)
+ return
+ # If the end boundary ended in a newline, we'll need to make sure
+ # the epilogue isn't None
+ if linesep:
+ epilogue = ['']
+ else:
+ epilogue = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ epilogue.append(line)
+ # Any CRLF at the front of the epilogue is not technically part of
+ # the epilogue. Also, watch out for an empty string epilogue,
+ # which means a single newline.
+ if epilogue:
+ firstline = epilogue[0]
+ bolmo = NLCRE_bol.match(firstline)
+ if bolmo:
+ epilogue[0] = firstline[len(bolmo.group(0)):]
+ self._cur.epilogue = EMPTYSTRING.join(epilogue)
+ return
+ # Otherwise, it's some non-multipart type, so the entire rest of the
+ # file contents becomes the payload.
+ lines = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+ def _parse_headers(self, lines):
+ # Passed a list of lines that make up the headers for the current msg
+ lastheader = ''
+ lastvalue = []
+ for lineno, line in enumerate(lines):
+ # Check for continuation
+ if line[0] in ' \t':
+ if not lastheader:
+ # The first line of the headers was a continuation. This
+ # is illegal, so let's note the defect, store the illegal
+ # line, and ignore it for purposes of headers.
+ defect = errors.FirstHeaderLineIsContinuationDefect(line)
+ self._cur.defects.append(defect)
+ continue
+ lastvalue.append(line)
+ continue
+ if lastheader:
+ # XXX reconsider the joining of folded lines
+ lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n')
+ self._cur[lastheader] = lhdr
+ lastheader, lastvalue = '', []
+ # Check for envelope header, i.e. unix-from
+ if line.startswith('From '):
+ if lineno == 0:
+ # Strip off the trailing newline
+ mo = NLCRE_eol.search(line)
+ if mo:
+ line = line[:-len(mo.group(0))]
+ self._cur.set_unixfrom(line)
+ continue
+ elif lineno == len(lines) - 1:
+ # Something looking like a unix-from at the end - it's
+ # probably the first line of the body, so push back the
+ # line and stop.
+ self._input.unreadline(line)
+ return
+ else:
+ # Weirdly placed unix-from line. Note this as a defect
+ # and ignore it.
+ defect = errors.MisplacedEnvelopeHeaderDefect(line)
+ self._cur.defects.append(defect)
+ continue
+ # Split the line on the colon separating field name from value.
+ i = line.find(':')
+ if i < 0:
+ defect = errors.MalformedHeaderDefect(line)
+ self._cur.defects.append(defect)
+ continue
+ lastheader = line[:i]
+ lastvalue = [line[i+1:].lstrip()]
+ # Done with all the lines, so handle the last header.
+ if lastheader:
+ # XXX reconsider the joining of folded lines
+ self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
+class BytesFeedParser(FeedParser):
+ """Like FeedParser, but feed accepts bytes."""
+ def feed(self, data):
+ super().feed(data.decode('ascii', 'surrogateescape'))
diff --git a/lib-python/3/email/generator.py b/lib-python/3/email/generator.py
new file mode 100644
index 0000000000..f0e7a95477
--- /dev/null
+++ b/lib-python/3/email/generator.py
@@ -0,0 +1,459 @@
+# Copyright (C) 2001-2010 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+"""Classes to generate plain text from a message object tree."""
+__all__ = ['Generator', 'DecodedGenerator']
+import re
+import sys
+import time
+import random
+import warnings
+from io import StringIO, BytesIO
+from email.header import Header
+from email.message import _has_surrogates
+NL = '\n' # XXX: no longer used by the code below.
+fcre = re.compile(r'^From ', re.MULTILINE)
+class Generator:
+ """Generates output from a Message object tree.
+ This basic generator writes the message to the given file object as plain
+ text.
+ """
+ #
+ # Public interface
+ #
+ def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
+ """Create the generator for message flattening.
+ outfp is the output file-like object for writing the message to. It
+ must have a write() method.
+ Optional mangle_from_ is a flag that, when True (the default), escapes
+ From_ lines in the body of the message by putting a `>' in front of
+ them.
+ Optional maxheaderlen specifies the longest length for a non-continued
+ header. When a header line is longer (in characters, with tabs
+ expanded to 8 spaces) than maxheaderlen, the header will split as
+ defined in the Header class. Set maxheaderlen to zero to disable
+ header wrapping. The default is 78, as recommended (but not required)
+ by RFC 2822.
+ """
+ self._fp = outfp
+ self._mangle_from_ = mangle_from_
+ self._maxheaderlen = maxheaderlen
+ def write(self, s):
+ # Just delegate to the file object
+ self._fp.write(s)
+ def flatten(self, msg, unixfrom=False, linesep='\n'):
+ r"""Print the message object tree rooted at msg to the output file
+ specified when the Generator instance was created.
+ unixfrom is a flag that forces the printing of a Unix From_ delimiter
+ before the first object in the message tree. If the original message
+ has no From_ delimiter, a `standard' one is crafted. By default, this
+ is False to inhibit the printing of any From_ delimiter.
+ Note that for subobjects, no From_ line is printed.
+ linesep specifies the characters used to indicate a new line in
+ the output. The default value is the most useful for typical
+ Python applications, but it can be set to \r\n to produce RFC-compliant
+ line separators when needed.
+ """
+ # We use the _XXX constants for operating on data that comes directly
+ # from the msg, and _encoded_XXX constants for operating on data that
+ # has already been converted (to bytes in the BytesGenerator) and
+ # inserted into a temporary buffer.
+ self._NL = linesep
+ self._encoded_NL = self._encode(linesep)
+ self._EMPTY = ''
+ self._encoded_EMTPY = self._encode('')
+ if unixfrom:
+ ufrom = msg.get_unixfrom()
+ if not ufrom:
+ ufrom = 'From nobody ' + time.ctime(time.time())
+ self.write(ufrom + self._NL)
+ self._write(msg)
+ def clone(self, fp):
+ """Clone this generator with the exact same options."""
+ return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
+ #
+ # Protected interface - undocumented ;/
+ #
+ # Note that we use 'self.write' when what we are writing is coming from
+ # the source, and self._fp.write when what we are writing is coming from a
+ # buffer (because the Bytes subclass has already had a chance to transform
+ # the data in its write method in that case). This is an entirely
+ # pragmatic split determined by experiment; we could be more general by
+ # always using write and having the Bytes subclass write method detect when
+ # it has already transformed the input; but, since this whole thing is a
+ # hack anyway this seems good enough.
+ # Similarly, we have _XXX and _encoded_XXX attributes that are used on
+ # source and buffer data, respectively.
+ _encoded_EMPTY = ''
+ def _new_buffer(self):
+ # BytesGenerator overrides this to return BytesIO.
+ return StringIO()
+ def _encode(self, s):
+ # BytesGenerator overrides this to encode strings to bytes.
+ return s
+ def _write(self, msg):
+ # We can't write the headers yet because of the following scenario:
+ # say a multipart message includes the boundary string somewhere in
+ # its body. We'd have to calculate the new boundary /before/ we write
+ # the headers so that we can write the correct Content-Type:
+ # parameter.
+ #
+ # The way we do this, so as to make the _handle_*() methods simpler,
+ # is to cache any subpart writes into a buffer. The we write the
+ # headers and the buffer contents. That way, subpart handlers can
+ # Do The Right Thing, and can still modify the Content-Type: header if
+ # necessary.
+ oldfp = self._fp
+ try:
+ self._fp = sfp = self._new_buffer()
+ self._dispatch(msg)
+ finally:
+ self._fp = oldfp
+ # Write the headers. First we see if the message object wants to
+ # handle that itself. If not, we'll do it generically.
+ meth = getattr(msg, '_write_headers', None)
+ if meth is None:
+ self._write_headers(msg)
+ else:
+ meth(self)
+ self._fp.write(sfp.getvalue())
+ def _dispatch(self, msg):
+ # Get the Content-Type: for the message, then try to dispatch to
+ # self._handle_<maintype>_<subtype>(). If there's no handler for the
+ # full MIME type, then dispatch to self._handle_<maintype>(). If
+ # that's missing too, then dispatch to self._writeBody().
+ main = msg.get_content_maintype()
+ sub = msg.get_content_subtype()
+ specific = UNDERSCORE.join((main, sub)).replace('-', '_')
+ meth = getattr(self, '_handle_' + specific, None)
+ if meth is None:
+ generic = main.replace('-', '_')
+ meth = getattr(self, '_handle_' + generic, None)
+ if meth is None:
+ meth = self._writeBody
+ meth(msg)
+ #
+ # Default handlers
+ #
+ def _write_headers(self, msg):
+ for h, v in msg.items():
+ self.write('%s: ' % h)
+ if isinstance(v, Header):
+ self.write(v.encode(
+ maxlinelen=self._maxheaderlen, linesep=self._NL)+self._NL)
+ else:
+ # Header's got lots of smarts, so use it.
+ header = Header(v, maxlinelen=self._maxheaderlen,
+ header_name=h)
+ self.write(header.encode(linesep=self._NL)+self._NL)
+ # A blank line always separates headers from body
+ self.write(self._NL)
+ #
+ # Handlers for writing types and subtypes
+ #
+ def _handle_text(self, msg):
+ payload = msg.get_payload()
+ if payload is None:
+ return
+ if not isinstance(payload, str):
+ raise TypeError('string payload expected: %s' % type(payload))
+ if _has_surrogates(msg._payload):
+ charset = msg.get_param('charset')
+ if charset is not None:
+ del msg['content-transfer-encoding']
+ msg.set_payload(payload, charset)
+ payload = msg.get_payload()
+ if self._mangle_from_:
+ payload = fcre.sub('>From ', payload)
+ self.write(payload)
+ # Default body handler
+ _writeBody = _handle_text
+ def _handle_multipart(self, msg):
+ # The trick here is to write out each part separately, merge them all
+ # together, and then make sure that the boundary we've chosen isn't
+ # present in the payload.
+ msgtexts = []
+ subparts = msg.get_payload()
+ if subparts is None:
+ subparts = []
+ elif isinstance(subparts, str):
+ # e.g. a non-strict parse of a message with no starting boundary.
+ self.write(subparts)
+ return
+ elif not isinstance(subparts, list):
+ # Scalar payload
+ subparts = [subparts]
+ for part in subparts:
+ s = self._new_buffer()
+ g = self.clone(s)
+ g.flatten(part, unixfrom=False, linesep=self._NL)
+ msgtexts.append(s.getvalue())
+ # BAW: What about boundaries that are wrapped in double-quotes?
+ boundary = msg.get_boundary()
+ if not boundary:
+ # Create a boundary that doesn't appear in any of the
+ # message texts.
+ alltext = self._encoded_NL.join(msgtexts)
+ boundary = self._make_boundary(alltext)
+ msg.set_boundary(boundary)
+ # If there's a preamble, write it out, with a trailing CRLF
+ if msg.preamble is not None:
+ self.write(msg.preamble + self._NL)
+ # dash-boundary transport-padding CRLF
+ self.write('--' + boundary + self._NL)
+ # body-part
+ if msgtexts:
+ self._fp.write(msgtexts.pop(0))
+ # *encapsulation
+ # --> delimiter transport-padding
+ # --> CRLF body-part
+ for body_part in msgtexts:
+ # delimiter transport-padding CRLF
+ self.write(self._NL + '--' + boundary + self._NL)
+ # body-part
+ self._fp.write(body_part)
+ # close-delimiter transport-padding
+ self.write(self._NL + '--' + boundary + '--')
+ if msg.epilogue is not None:
+ self.write(self._NL)
+ self.write(msg.epilogue)
+ def _handle_multipart_signed(self, msg):
+ # The contents of signed parts has to stay unmodified in order to keep
+ # the signature intact per RFC1847 2.1, so we disable header wrapping.
+ # RDM: This isn't enough to completely preserve the part, but it helps.
+ old_maxheaderlen = self._maxheaderlen
+ try:
+ self._maxheaderlen = 0
+ self._handle_multipart(msg)
+ finally:
+ self._maxheaderlen = old_maxheaderlen
+ def _handle_message_delivery_status(self, msg):
+ # We can't just write the headers directly to self's file object
+ # because this will leave an extra newline between the last header
+ # block and the boundary. Sigh.
+ blocks = []
+ for part in msg.get_payload():
+ s = self._new_buffer()
+ g = self.clone(s)
+ g.flatten(part, unixfrom=False, linesep=self._NL)
+ text = s.getvalue()
+ lines = text.split(self._encoded_NL)
+ # Strip off the unnecessary trailing empty line
+ if lines and lines[-1] == self._encoded_EMPTY:
+ blocks.append(self._encoded_NL.join(lines[:-1]))
+ else:
+ blocks.append(text)
+ # Now join all the blocks with an empty line. This has the lovely
+ # effect of separating each block with an empty line, but not adding
+ # an extra one after the last one.
+ self._fp.write(self._encoded_NL.join(blocks))
+ def _handle_message(self, msg):
+ s = self._new_buffer()
+ g = self.clone(s)
+ # The payload of a message/rfc822 part should be a multipart sequence
+ # of length 1. The zeroth element of the list should be the Message
+ # object for the subpart. Extract that object, stringify it, and
+ # write it out.
+ # Except, it turns out, when it's a string instead, which happens when
+ # and only when HeaderParser is used on a message of mime type
+ # message/rfc822. Such messages are generated by, for example,
+ # Groupwise when forwarding unadorned messages. (Issue 7970.) So
+ # in that case we just emit the string body.
+ payload = msg.get_payload()
+ if isinstance(payload, list):
+ g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
+ payload = s.getvalue()
+ self._fp.write(payload)
+ # This used to be a module level function; we use a classmethod for this
+ # and _compile_re so we can continue to provide the module level function
+ # for backward compatibility by doing
+ # _make_boudary = Generator._make_boundary
+ # at the end of the module. It *is* internal, so we could drop that...
+ @classmethod
+ def _make_boundary(cls, text=None):
+ # Craft a random boundary. If text is given, ensure that the chosen
+ # boundary doesn't appear in the text.
+ token = random.randrange(sys.maxsize)
+ boundary = ('=' * 15) + (_fmt % token) + '=='
+ if text is None:
+ return boundary
+ b = boundary
+ counter = 0
+ while True:
+ cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
+ if not cre.search(text):
+ break
+ b = boundary + '.' + str(counter)
+ counter += 1
+ return b
+ @classmethod
+ def _compile_re(cls, s, flags):
+ return re.compile(s, flags)
+class BytesGenerator(Generator):
+ """Generates a bytes version of a Message object tree.
+ Functionally identical to the base Generator except that the output is
+ bytes and not string. When surrogates were used in the input to encode
+ bytes, these are decoded back to bytes for output.
+ The outfp object must accept bytes in its write method.
+ """
+ # Bytes versions of this constant for use in manipulating data from
+ # the BytesIO buffer.
+ _encoded_EMPTY = b''
+ def write(self, s):
+ self._fp.write(s.encode('ascii', 'surrogateescape'))
+ def _new_buffer(self):
+ return BytesIO()
+ def _encode(self, s):
+ return s.encode('ascii')
+ def _write_headers(self, msg):
+ # This is almost the same as the string version, except for handling
+ # strings with 8bit bytes.
+ for h, v in msg._headers:
+ self.write('%s: ' % h)
+ if isinstance(v, Header):
+ self.write(v.encode(maxlinelen=self._maxheaderlen)+NL)
+ elif _has_surrogates(v):
+ # If we have raw 8bit data in a byte string, we have no idea
+ # what the encoding is. There is no safe way to split this
+ # string. If it's ascii-subset, then we could do a normal
+ # ascii split, but if it's multibyte then we could break the
+ # string. There's no way to know so the least harm seems to
+ # be to not split the string and risk it being too long.
+ self.write(v+NL)
+ else:
+ # Header's got lots of smarts and this string is safe...
+ header = Header(v, maxlinelen=self._maxheaderlen,
+ header_name=h)
+ self.write(header.encode(linesep=self._NL)+self._NL)
+ # A blank line always separates headers from body
+ self.write(self._NL)
+ def _handle_text(self, msg):
+ # If the string has surrogates the original source was bytes, so
+ # just write it back out.
+ if msg._payload is None:
+ return
+ if _has_surrogates(msg._payload):
+ self.write(msg._payload)
+ else:
+ super(BytesGenerator,self)._handle_text(msg)
+ @classmethod
+ def _compile_re(cls, s, flags):
+ return re.compile(s.encode('ascii'), flags)
+_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
+class DecodedGenerator(Generator):
+ """Generates a text representation of a message.
+ Like the Generator base class, except that non-text parts are substituted
+ with a format string representing the part.
+ """
+ def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
+ """Like Generator.__init__() except that an additional optional
+ argument is allowed.
+ Walks through all subparts of a message. If the subpart is of main
+ type `text', then it prints the decoded payload of the subpart.
+ Otherwise, fmt is a format string that is used instead of the message
+ payload. fmt is expanded with the following keywords (in
+ %(keyword)s format):
+ type : Full MIME type of the non-text part
+ maintype : Main MIME type of the non-text part
+ subtype : Sub-MIME type of the non-text part
+ filename : Filename of the non-text part
+ description: Description associated with the non-text part
+ encoding : Content transfer encoding of the non-text part
+ The default value for fmt is None, meaning
+ [Non-text (%(type)s) part of message omitted, filename %(filename)s]
+ """
+ Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
+ if fmt is None:
+ self._fmt = _FMT
+ else:
+ self._fmt = fmt
+ def _dispatch(self, msg):
+ for part in msg.walk():
+ maintype = part.get_content_maintype()
+ if maintype == 'text':
+ print(part.get_payload(decode=False), file=self)
+ elif maintype == 'multipart':
+ # Just skip this
+ pass
+ else:
+ print(self._fmt % {
+ 'type' : part.get_content_type(),
+ 'maintype' : part.get_content_maintype(),
+ 'subtype' : part.get_content_subtype(),
+ 'filename' : part.get_filename('[no filename]'),
+ 'description': part.get('Content-Description',
+ '[no description]'),
+ 'encoding' : part.get('Content-Transfer-Encoding',
+ '[no encoding]'),
+ }, file=self)
+# Helper used by Generator._make_boundary
+_width = len(repr(sys.maxsize-1))
+_fmt = '%%0%dd' % _width
+# Backward compatibility
+_make_boundary = Generator._make_boundary
diff --git a/lib-python/3/email/header.py b/lib-python/3/email/header.py
new file mode 100644
index 0000000000..2e687b7a6f
--- /dev/null
+++ b/lib-python/3/email/header.py
@@ -0,0 +1,541 @@
+# Copyright (C) 2002-2007 Python Software Foundation
+# Author: Ben Gertzfield, Barry Warsaw
+# Contact: email-sig@python.org
+"""Header encoding and decoding functionality."""
+__all__ = [
+ 'Header',
+ 'decode_header',
+ 'make_header',
+ ]
+import re
+import binascii
+import email.quoprimime
+import email.base64mime
+from email.errors import HeaderParseError
+from email import charset as _charset
+Charset = _charset.Charset
+NL = '\n'
+SPACE = ' '
+BSPACE = b' '
+SPACE8 = ' ' * 8
+FWS = ' \t'
+USASCII = Charset('us-ascii')
+UTF8 = Charset('utf-8')
+# Match encoded-word strings in the form =?charset?q?Hello_World?=
+ecre = re.compile(r'''
+ =\? # literal =?
+ (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
+ \? # literal ?
+ (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
+ \? # literal ?
+ (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
+ \?= # literal ?=
+ (?=[ \t]|$) # whitespace or the end of the string
+# Field name regexp, including trailing colon, but not separating whitespace,
+# according to RFC 2822. Character range is from tilde to exclamation mark.
+# For use with .match()
+fcre = re.compile(r'[\041-\176]+:$')
+# Find a header embedded in a putative header value. Used to check for
+# header injection attack.
+_embeded_header = re.compile(r'\n[^ \t]+:')
+# Helpers
+_max_append = email.quoprimime._max_append
+def decode_header(header):
+ """Decode a message header value without converting charset.
+ Returns a list of (string, charset) pairs containing each of the decoded
+ parts of the header. Charset is None for non-encoded parts of the header,
+ otherwise a lower-case string containing the name of the character set
+ specified in the encoded string.
+ header may be a string that may or may not contain RFC2047 encoded words,
+ or it may be a Header object.
+ An email.errors.HeaderParseError may be raised when certain decoding error
+ occurs (e.g. a base64 decoding exception).
+ """
+ # If it is a Header object, we can just return the encoded chunks.
+ if hasattr(header, '_chunks'):
+ return [(_charset._encode(string, str(charset)), str(charset))
+ for string, charset in header._chunks]
+ # If no encoding, just return the header with no charset.
+ if not ecre.search(header):
+ return [(header, None)]
+ # First step is to parse all the encoded parts into triplets of the form
+ # (encoded_string, encoding, charset). For unencoded strings, the last
+ # two parts will be None.
+ words = []
+ for line in header.splitlines():
+ parts = ecre.split(line)
+ while parts:
+ unencoded = parts.pop(0).strip()
+ if unencoded:
+ words.append((unencoded, None, None))
+ if parts:
+ charset = parts.pop(0).lower()
+ encoding = parts.pop(0).lower()
+ encoded = parts.pop(0)
+ words.append((encoded, encoding, charset))
+ # The next step is to decode each encoded word by applying the reverse
+ # base64 or quopri transformation. decoded_words is now a list of the
+ # form (decoded_word, charset).
+ decoded_words = []
+ for encoded_string, encoding, charset in words:
+ if encoding is None:
+ # This is an unencoded word.
+ decoded_words.append((encoded_string, charset))
+ elif encoding == 'q':
+ word = email.quoprimime.header_decode(encoded_string)
+ decoded_words.append((word, charset))
+ elif encoding == 'b':
+ paderr = len(encoded_string) % 4 # Postel's law: add missing padding
+ if paderr:
+ encoded_string += '==='[:4 - paderr]
+ try:
+ word = email.base64mime.decode(encoded_string)
+ except binascii.Error:
+ raise HeaderParseError('Base64 decoding error')
+ else:
+ decoded_words.append((word, charset))
+ else:
+ raise AssertionError('Unexpected encoding: ' + encoding)
+ # Now convert all words to bytes and collapse consecutive runs of
+ # similarly encoded words.
+ collapsed = []
+ last_word = last_charset = None
+ for word, charset in decoded_words:
+ if isinstance(word, str):
+ word = bytes(word, 'raw-unicode-escape')
+ if last_word is None:
+ last_word = word
+ last_charset = charset
+ elif charset != last_charset:
+ collapsed.append((last_word, last_charset))
+ last_word = word
+ last_charset = charset
+ elif last_charset is None:
+ last_word += BSPACE + word
+ else:
+ last_word += word
+ collapsed.append((last_word, last_charset))
+ return collapsed
+def make_header(decoded_seq, maxlinelen=None, header_name=None,
+ continuation_ws=' '):
+ """Create a Header from a sequence of pairs as returned by decode_header()
+ decode_header() takes a header value string and returns a sequence of
+ pairs of the format (decoded_string, charset) where charset is the string
+ name of the character set.
+ This function takes one of those sequence of pairs and returns a Header
+ instance. Optional maxlinelen, header_name, and continuation_ws are as in
+ the Header constructor.
+ """
+ h = Header(maxlinelen=maxlinelen, header_name=header_name,
+ continuation_ws=continuation_ws)
+ for s, charset in decoded_seq:
+ # None means us-ascii but we can simply pass it on to h.append()
+ if charset is not None and not isinstance(charset, Charset):
+ charset = Charset(charset)
+ h.append(s, charset)
+ return h
+class Header:
+ def __init__(self, s=None, charset=None,
+ maxlinelen=None, header_name=None,
+ continuation_ws=' ', errors='strict'):
+ """Create a MIME-compliant header that can contain many character sets.
+ Optional s is the initial header value. If None, the initial header
+ value is not set. You can later append to the header with .append()
+ method calls. s may be a byte string or a Unicode string, but see the
+ .append() documentation for semantics.
+ Optional charset serves two purposes: it has the same meaning as the
+ charset argument to the .append() method. It also sets the default
+ character set for all subsequent .append() calls that omit the charset
+ argument. If charset is not provided in the constructor, the us-ascii
+ charset is used both as s's initial charset and as the default for
+ subsequent .append() calls.
+ The maximum line length can be specified explicitly via maxlinelen. For
+ splitting the first line to a shorter value (to account for the field
+ header which isn't included in s, e.g. `Subject') pass in the name of
+ the field in header_name. The default maxlinelen is 78 as recommended
+ by RFC 2822.
+ continuation_ws must be RFC 2822 compliant folding whitespace (usually
+ either a space or a hard tab) which will be prepended to continuation
+ lines.
+ errors is passed through to the .append() call.
+ """
+ if charset is None:
+ charset = USASCII
+ elif not isinstance(charset, Charset):
+ charset = Charset(charset)
+ self._charset = charset
+ self._continuation_ws = continuation_ws
+ self._chunks = []
+ if s is not None:
+ self.append(s, charset, errors)
+ if maxlinelen is None:
+ maxlinelen = MAXLINELEN
+ self._maxlinelen = maxlinelen
+ if header_name is None:
+ self._headerlen = 0
+ else:
+ # Take the separating colon and space into account.
+ self._headerlen = len(header_name) + 2
+ def __str__(self):
+ """Return the string value of the header."""
+ self._normalize()
+ uchunks = []
+ lastcs = None
+ for string, charset in self._chunks:
+ # We must preserve spaces between encoded and non-encoded word
+ # boundaries, which means for us we need to add a space when we go
+ # from a charset to None/us-ascii, or from None/us-ascii to a
+ # charset. Only do this for the second and subsequent chunks.
+ nextcs = charset
+ if nextcs == _charset.UNKNOWN8BIT:
+ original_bytes = string.encode('ascii', 'surrogateescape')
+ string = original_bytes.decode('ascii', 'replace')
+ if uchunks:
+ if lastcs not in (None, 'us-ascii'):
+ if nextcs in (None, 'us-ascii'):
+ uchunks.append(SPACE)
+ nextcs = None
+ elif nextcs not in (None, 'us-ascii'):
+ uchunks.append(SPACE)
+ lastcs = nextcs
+ uchunks.append(string)
+ return EMPTYSTRING.join(uchunks)
+ # Rich comparison operators for equality only. BAW: does it make sense to
+ # have or explicitly disable <, <=, >, >= operators?
+ def __eq__(self, other):
+ # other may be a Header or a string. Both are fine so coerce
+ # ourselves to a unicode (of the unencoded header value), swap the
+ # args and do another comparison.
+ return other == str(self)
+ def __ne__(self, other):
+ return not self == other
+ def append(self, s, charset=None, errors='strict'):
+ """Append a string to the MIME header.
+ Optional charset, if given, should be a Charset instance or the name
+ of a character set (which will be converted to a Charset instance). A
+ value of None (the default) means that the charset given in the
+ constructor is used.
+ s may be a byte string or a Unicode string. If it is a byte string
+ (i.e. isinstance(s, str) is false), then charset is the encoding of
+ that byte string, and a UnicodeError will be raised if the string
+ cannot be decoded with that charset. If s is a Unicode string, then
+ charset is a hint specifying the character set of the characters in
+ the string. In either case, when producing an RFC 2822 compliant
+ header using RFC 2047 rules, the string will be encoded using the
+ output codec of the charset. If the string cannot be encoded to the
+ output codec, a UnicodeError will be raised.
+ Optional `errors' is passed as the errors argument to the decode
+ call if s is a byte string.
+ """
+ if charset is None:
+ charset = self._charset
+ elif not isinstance(charset, Charset):
+ charset = Charset(charset)
+ if not isinstance(s, str):
+ input_charset = charset.input_codec or 'us-ascii'
+ if input_charset == _charset.UNKNOWN8BIT:
+ s = s.decode('us-ascii', 'surrogateescape')
+ else:
+ s = s.decode(input_charset, errors)
+ # Ensure that the bytes we're storing can be decoded to the output
+ # character set, otherwise an early error is thrown.
+ output_charset = charset.output_codec or 'us-ascii'
+ if output_charset != _charset.UNKNOWN8BIT:
+ s.encode(output_charset, errors)
+ self._chunks.append((s, charset))
+ def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
+ r"""Encode a message header into an RFC-compliant format.
+ There are many issues involved in converting a given string for use in
+ an email header. Only certain character sets are readable in most
+ email clients, and as header strings can only contain a subset of
+ 7-bit ASCII, care must be taken to properly convert and encode (with
+ Base64 or quoted-printable) header strings. In addition, there is a
+ 75-character length limit on any given encoded header field, so
+ line-wrapping must be performed, even with double-byte character sets.
+ Optional maxlinelen specifies the maximum length of each generated
+ line, exclusive of the linesep string. Individual lines may be longer
+ than maxlinelen if a folding point cannot be found. The first line
+ will be shorter by the length of the header name plus ": " if a header
+ name was specified at Header construction time. The default value for
+ maxlinelen is determined at header construction time.
+ Optional splitchars is a string containing characters which should be
+ given extra weight by the splitting algorithm during normal header
+ wrapping. This is in very rough support of RFC 2822's `higher level
+ syntactic breaks': split points preceded by a splitchar are preferred
+ during line splitting, with the characters preferred in the order in
+ which they appear in the string. Space and tab may be included in the
+ string to indicate whether preference should be given to one over the
+ other as a split point when other split chars do not appear in the line
+ being split. Splitchars does not affect RFC 2047 encoded lines.
+ Optional linesep is a string to be used to separate the lines of
+ the value. The default value is the most useful for typical
+ Python applications, but it can be set to \r\n to produce RFC-compliant
+ line separators when needed.
+ """
+ self._normalize()
+ if maxlinelen is None:
+ maxlinelen = self._maxlinelen
+ # A maxlinelen of 0 means don't wrap. For all practical purposes,
+ # choosing a huge number here accomplishes that and makes the
+ # _ValueFormatter algorithm much simpler.
+ if maxlinelen == 0:
+ maxlinelen = 1000000
+ formatter = _ValueFormatter(self._headerlen, maxlinelen,
+ self._continuation_ws, splitchars)
+ for string, charset in self._chunks:
+ lines = string.splitlines()
+ if lines:
+ formatter.feed('', lines[0], charset)
+ else:
+ formatter.feed('', '', charset)
+ for line in lines[1:]:
+ formatter.newline()
+ if charset.header_encoding is not None:
+ formatter.feed(self._continuation_ws, ' ' + line.lstrip(),
+ charset)
+ else:
+ sline = line.lstrip()
+ fws = line[:len(line)-len(sline)]
+ formatter.feed(fws, sline, charset)
+ if len(lines) > 1:
+ formatter.newline()
+ formatter.add_transition()
+ value = formatter._str(linesep)
+ if _embeded_header.search(value):
+ raise HeaderParseError("header value appears to contain "
+ "an embedded header: {!r}".format(value))
+ return value
+ def _normalize(self):
+ # Step 1: Normalize the chunks so that all runs of identical charsets
+ # get collapsed into a single unicode string.
+ chunks = []
+ last_charset = None
+ last_chunk = []
+ for string, charset in self._chunks:
+ if charset == last_charset:
+ last_chunk.append(string)
+ else:
+ if last_charset is not None:
+ chunks.append((SPACE.join(last_chunk), last_charset))
+ last_chunk = [string]
+ last_charset = charset
+ if last_chunk:
+ chunks.append((SPACE.join(last_chunk), last_charset))
+ self._chunks = chunks
+class _ValueFormatter:
+ def __init__(self, headerlen, maxlen, continuation_ws, splitchars):
+ self._maxlen = maxlen
+ self._continuation_ws = continuation_ws
+ self._continuation_ws_len = len(continuation_ws)
+ self._splitchars = splitchars
+ self._lines = []
+ self._current_line = _Accumulator(headerlen)
+ def _str(self, linesep):
+ self.newline()
+ return linesep.join(self._lines)
+ def __str__(self):
+ return self._str(NL)
+ def newline(self):
+ end_of_line = self._current_line.pop()
+ if end_of_line != (' ', ''):
+ self._current_line.push(*end_of_line)
+ if len(self._current_line) > 0:
+ if self._current_line.is_onlyws():
+ self._lines[-1] += str(self._current_line)
+ else:
+ self._lines.append(str(self._current_line))
+ self._current_line.reset()
+ def add_transition(self):
+ self._current_line.push(' ', '')
+ def feed(self, fws, string, charset):
+ # If the charset has no header encoding (i.e. it is an ASCII encoding)
+ # then we must split the header at the "highest level syntactic break"
+ # possible. Note that we don't have a lot of smarts about field
+ # syntax; we just try to break on semi-colons, then commas, then
+ # whitespace. Eventually, this should be pluggable.
+ if charset.header_encoding is None:
+ self._ascii_split(fws, string, self._splitchars)
+ return
+ # Otherwise, we're doing either a Base64 or a quoted-printable
+ # encoding which means we don't need to split the line on syntactic
+ # breaks. We can basically just find enough characters to fit on the
+ # current line, minus the RFC 2047 chrome. What makes this trickier
+ # though is that we have to split at octet boundaries, not character
+ # boundaries but it's only safe to split at character boundaries so at
+ # best we can only get close.
+ encoded_lines = charset.header_encode_lines(string, self._maxlengths())
+ # The first element extends the current line, but if it's None then
+ # nothing more fit on the current line so start a new line.
+ try:
+ first_line = encoded_lines.pop(0)
+ except IndexError:
+ # There are no encoded lines, so we're done.
+ return
+ if first_line is not None:
+ self._append_chunk(fws, first_line)
+ try:
+ last_line = encoded_lines.pop()
+ except IndexError:
+ # There was only one line.
+ return
+ self.newline()
+ self._current_line.push(self._continuation_ws, last_line)
+ # Everything else are full lines in themselves.
+ for line in encoded_lines:
+ self._lines.append(self._continuation_ws + line)
+ def _maxlengths(self):
+ # The first line's length.
+ yield self._maxlen - len(self._current_line)
+ while True:
+ yield self._maxlen - self._continuation_ws_len
+ def _ascii_split(self, fws, string, splitchars):
+ # The RFC 2822 header folding algorithm is simple in principle but
+ # complex in practice. Lines may be folded any place where "folding
+ # white space" appears by inserting a linesep character in front of the
+ # FWS. The complication is that not all spaces or tabs qualify as FWS,
+ # and we are also supposed to prefer to break at "higher level
+ # syntactic breaks". We can't do either of these without intimate
+ # knowledge of the structure of structured headers, which we don't have
+ # here. So the best we can do here is prefer to break at the specified
+ # splitchars, and hope that we don't choose any spaces or tabs that
+ # aren't legal FWS. (This is at least better than the old algorithm,
+ # where we would sometimes *introduce* FWS after a splitchar, or the
+ # algorithm before that, where we would turn all white space runs into
+ # single spaces or tabs.)
+ parts = re.split("(["+FWS+"]+)", fws+string)
+ if parts[0]:
+ parts[:0] = ['']
+ else:
+ parts.pop(0)
+ for fws, part in zip(*[iter(parts)]*2):
+ self._append_chunk(fws, part)
+ def _append_chunk(self, fws, string):
+ self._current_line.push(fws, string)
+ if len(self._current_line) > self._maxlen:
+ # Find the best split point, working backward from the end.
+ # There might be none, on a long first line.
+ for ch in self._splitchars:
+ for i in range(self._current_line.part_count()-1, 0, -1):
+ if ch.isspace():
+ fws = self._current_line[i][0]
+ if fws and fws[0]==ch:
+ break
+ prevpart = self._current_line[i-1][1]
+ if prevpart and prevpart[-1]==ch:
+ break
+ else:
+ continue
+ break
+ else:
+ fws, part = self._current_line.pop()
+ if self._current_line._initial_size > 0:
+ # There will be a header, so leave it on a line by itself.
+ self.newline()
+ if not fws:
+ # We don't use continuation_ws here because the whitespace
+ # after a header should always be a space.
+ fws = ' '
+ self._current_line.push(fws, part)
+ return
+ remainder = self._current_line.pop_from(i)
+ self._lines.append(str(self._current_line))
+ self._current_line.reset(remainder)
+class _Accumulator(list):
+ def __init__(self, initial_size=0):
+ self._initial_size = initial_size
+ super().__init__()
+ def push(self, fws, string):
+ self.append((fws, string))
+ def pop_from(self, i=0):
+ popped = self[i:]
+ self[i:] = []
+ return popped
+ def pop(self):
+ if self.part_count()==0:
+ return ('', '')
+ return super().pop()
+ def __len__(self):
+ return sum((len(fws)+len(part) for fws, part in self),
+ self._initial_size)
+ def __str__(self):
+ return EMPTYSTRING.join((EMPTYSTRING.join((fws, part))
+ for fws, part in self))
+ def reset(self, startval=None):
+ if startval is None:
+ startval = []
+ self[:] = startval
+ self._initial_size = 0
+ def is_onlyws(self):
+ return self._initial_size==0 and (not self or str(self).isspace())
+ def part_count(self):
+ return super().__len__()
diff --git a/lib-python/3/email/iterators.py b/lib-python/3/email/iterators.py
new file mode 100644
index 0000000000..3adc4a04ba
--- /dev/null
+++ b/lib-python/3/email/iterators.py
@@ -0,0 +1,73 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+"""Various types of useful iterators and generators."""
+__all__ = [
+ 'body_line_iterator',
+ 'typed_subpart_iterator',
+ 'walk',
+ # Do not include _structure() since it's part of the debugging API.
+ ]
+import sys
+from io import StringIO
+# This function will become a method of the Message class
+def walk(self):
+ """Walk over the message tree, yielding each subpart.
+ The walk is performed in depth-first order. This method is a
+ generator.
+ """
+ yield self
+ if self.is_multipart():
+ for subpart in self.get_payload():
+ for subsubpart in subpart.walk():
+ yield subsubpart
+# These two functions are imported into the Iterators.py interface module.
+def body_line_iterator(msg, decode=False):
+ """Iterate over the parts, returning string payloads line-by-line.
+ Optional decode (default False) is passed through to .get_payload().
+ """
+ for subpart in msg.walk():
+ payload = subpart.get_payload(decode=decode)
+ if isinstance(payload, str):
+ for line in StringIO(payload):
+ yield line
+def typed_subpart_iterator(msg, maintype='text', subtype=None):
+ """Iterate over the subparts with a given MIME type.
+ Use `maintype' as the main MIME type to match against; this defaults to
+ "text". Optional `subtype' is the MIME subtype to match against; if
+ omitted, only the main type is matched.
+ """
+ for subpart in msg.walk():
+ if subpart.get_content_maintype() == maintype:
+ if subtype is None or subpart.get_content_subtype() == subtype:
+ yield subpart
+def _structure(msg, fp=None, level=0, include_default=False):
+ """A handy debugging aid"""
+ if fp is None:
+ fp = sys.stdout
+ tab = ' ' * (level * 4)
+ print(tab + msg.get_content_type(), end='', file=fp)
+ if include_default:
+ print(' [%s]' % msg.get_default_type(), file=fp)
+ else:
+ print(file=fp)
+ if msg.is_multipart():
+ for subpart in msg.get_payload():
+ _structure(subpart, fp, level+1, include_default)
diff --git a/lib-python/3/email/message.py b/lib-python/3/email/message.py
new file mode 100644
index 0000000000..922617adbb
--- /dev/null
+++ b/lib-python/3/email/message.py
@@ -0,0 +1,863 @@
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+"""Basic message object for the email package object model."""
+__all__ = ['Message']
+import re
+import uu
+import base64
+import binascii
+import warnings
+from io import BytesIO, StringIO
+# Intrapackage imports
+from email import utils
+from email import errors
+from email import header
+from email import charset as _charset
+Charset = _charset.Charset
+# Regular expression that matches `special' characters in parameters, the
+# existence of which force quoting of the parameter value.
+tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
+# How to figure out if we are processing strings that come from a byte
+# source with undecodable characters.
+_has_surrogates = re.compile(
+ '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
+# Helper functions
+def _sanitize_header(name, value):
+ # If the header value contains surrogates, return a Header using
+ # the unknown-8bit charset to encode the bytes as encoded words.
+ if not isinstance(value, str):
+ # Assume it is already a header object
+ return value
+ if _has_surrogates(value):
+ return header.Header(value, charset=_charset.UNKNOWN8BIT,
+ header_name=name)
+ else:
+ return value
+def _splitparam(param):
+ # Split header parameters. BAW: this may be too simple. It isn't
+ # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
+ # found in the wild. We may eventually need a full fledged parser.
+ # RDM: we might have a Header here; for now just stringify it.
+ a, sep, b = str(param).partition(';')
+ if not sep:
+ return a.strip(), None
+ return a.strip(), b.strip()
+def _formatparam(param, value=None, quote=True):
+ """Convenience function to format and return a key=value pair.
+ This will quote the value if needed or if quote is true. If value is a
+ three tuple (charset, language, value), it will be encoded according
+ to RFC2231 rules. If it contains non-ascii characters it will likewise
+ be encoded according to RFC2231 rules, using the utf-8 charset and
+ a null language.
+ """
+ if value is not None and len(value) > 0:
+ # A tuple is used for RFC 2231 encoded parameter values where items
+ # are (charset, language, value). charset is a string, not a Charset
+ # instance. RFC 2231 encoded values are never quoted, per RFC.
+ if isinstance(value, tuple):
+ # Encode as per RFC 2231
+ param += '*'
+ value = utils.encode_rfc2231(value[2], value[0], value[1])
+ return '%s=%s' % (param, value)
+ else:
+ try:
+ value.encode('ascii')
+ except UnicodeEncodeError:
+ param += '*'
+ value = utils.encode_rfc2231(value, 'utf-8', '')
+ return '%s=%s' % (param, value)
+ # BAW: Please check this. I think that if quote is set it should
+ # force quoting even if not necessary.
+ if quote or tspecials.search(value):
+ return '%s="%s"' % (param, utils.quote(value))
+ else:
+ return '%s=%s' % (param, value)
+ else:
+ return param
+def _parseparam(s):
+ # RDM This might be a Header, so for now stringify it.
+ s = ';' + str(s)
+ plist = []
+ while s[:1] == ';':
+ s = s[1:]
+ end = s.find(';')
+ while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
+ end = s.find(';', end + 1)
+ if end < 0:
+ end = len(s)
+ f = s[:end]
+ if '=' in f:
+ i = f.index('=')
+ f = f[:i].strip().lower() + '=' + f[i+1:].strip()
+ plist.append(f.strip())
+ s = s[end:]
+ return plist
+def _unquotevalue(value):
+ # This is different than utils.collapse_rfc2231_value() because it doesn't
+ # try to convert the value to a unicode. Message.get_param() and
+ # Message.get_params() are both currently defined to return the tuple in
+ # the face of RFC 2231 parameters.
+ if isinstance(value, tuple):
+ return value[0], value[1], utils.unquote(value[2])
+ else:
+ return utils.unquote(value)
+class Message:
+ """Basic message object.
+ A message object is defined as something that has a bunch of RFC 2822
+ headers and a payload. It may optionally have an envelope header
+ (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
+ multipart or a message/rfc822), then the payload is a list of Message
+ objects, otherwise it is a string.
+ Message objects implement part of the `mapping' interface, which assumes
+ there is exactly one occurrence of the header per message. Some headers
+ do in fact appear multiple times (e.g. Received) and for those headers,
+ you must use the explicit API to set or get all the headers. Not all of
+ the mapping methods are implemented.
+ """
+ def __init__(self):
+ self._headers = []
+ self._unixfrom = None
+ self._payload = None
+ self._charset = None
+ # Defaults for multipart messages
+ self.preamble = self.epilogue = None
+ self.defects = []
+ # Default content type
+ self._default_type = 'text/plain'
+ def __str__(self):
+ """Return the entire formatted message as a string.
+ This includes the headers, body, and envelope header.
+ """
+ return self.as_string()
+ def as_string(self, unixfrom=False, maxheaderlen=0):
+ """Return the entire formatted message as a string.
+ Optional `unixfrom' when True, means include the Unix From_ envelope
+ header.
+ This is a convenience method and may not generate the message exactly
+ as you intend. For more flexibility, use the flatten() method of a
+ Generator instance.
+ """
+ from email.generator import Generator
+ fp = StringIO()
+ g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
+ g.flatten(self, unixfrom=unixfrom)
+ return fp.getvalue()
+ def is_multipart(self):
+ """Return True if the message consists of multiple parts."""
+ return isinstance(self._payload, list)
+ #
+ # Unix From_ line
+ #
+ def set_unixfrom(self, unixfrom):
+ self._unixfrom = unixfrom
+ def get_unixfrom(self):
+ return self._unixfrom
+ #
+ # Payload manipulation.
+ #
+ def attach(self, payload):
+ """Add the given payload to the current payload.
+ The current payload will always be a list of objects after this method
+ is called. If you want to set the payload to a scalar object, use
+ set_payload() instead.
+ """
+ if self._payload is None:
+ self._payload = [payload]
+ else:
+ self._payload.append(payload)
+ def get_payload(self, i=None, decode=False):
+ """Return a reference to the payload.
+ The payload will either be a list object or a string. If you mutate
+ the list object, you modify the message's payload in place. Optional
+ i returns that index into the payload.
+ Optional decode is a flag indicating whether the payload should be
+ decoded or not, according to the Content-Transfer-Encoding header
+ (default is False).
+ When True and the message is not a multipart, the payload will be
+ decoded if this header's value is `quoted-printable' or `base64'. If
+ some other encoding is used, or the header is missing, or if the
+ payload has bogus data (i.e. bogus base64 or uuencoded data), the
+ payload is returned as-is.
+ If the message is a multipart and the decode flag is True, then None
+ is returned.
+ """
+ # Here is the logic table for this code, based on the email5.0.0 code:
+ # i decode is_multipart result
+ # ------ ------ ------------ ------------------------------
+ # None True True None
+ # i True True None
+ # None False True _payload (a list)
+ # i False True _payload element i (a Message)
+ # i False False error (not a list)
+ # i True False error (not a list)
+ # None False False _payload
+ # None True False _payload decoded (bytes)
+ # Note that Barry planned to factor out the 'decode' case, but that
+ # isn't so easy now that we handle the 8 bit data, which needs to be
+ # converted in both the decode and non-decode path.
+ if self.is_multipart():
+ if decode:
+ return None
+ if i is None:
+ return self._payload
+ else:
+ return self._payload[i]
+ # For backward compatibility, Use isinstance and this error message
+ # instead of the more logical is_multipart test.
+ if i is not None and not isinstance(self._payload, list):
+ raise TypeError('Expected list, got %s' % type(self._payload))
+ payload = self._payload
+ # cte might be a Header, so for now stringify it.
+ cte = str(self.get('content-transfer-encoding', '')).lower()
+ # payload may be bytes here.
+ if isinstance(payload, str):
+ if _has_surrogates(payload):
+ bpayload = payload.encode('ascii', 'surrogateescape')
+ if not decode:
+ try:
+ payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
+ except LookupError:
+ payload = bpayload.decode('ascii', 'replace')
+ elif decode:
+ try:
+ bpayload = payload.encode('ascii')
+ except UnicodeError:
+ # This won't happen for RFC compliant messages (messages
+ # containing only ASCII codepoints in the unicode input).
+ # If it does happen, turn the string into bytes in a way
+ # guaranteed not to fail.
+ bpayload = payload.encode('raw-unicode-escape')
+ if not decode:
+ return payload
+ if cte == 'quoted-printable':
+ return utils._qdecode(bpayload)
+ elif cte == 'base64':
+ try:
+ return base64.b64decode(bpayload)
+ except binascii.Error:
+ # Incorrect padding
+ return bpayload
+ elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
+ in_file = BytesIO(bpayload)
+ out_file = BytesIO()
+ try:
+ uu.decode(in_file, out_file, quiet=True)
+ return out_file.getvalue()
+ except uu.Error:
+ # Some decoding problem
+ return bpayload
+ if isinstance(payload, str):
+ return bpayload
+ return payload
+ def set_payload(self, payload, charset=None):
+ """Set the payload to the given value.
+ Optional charset sets the message's default character set. See
+ set_charset() for details.
+ """
+ self._payload = payload
+ if charset is not None:
+ self.set_charset(charset)
+ def set_charset(self, charset):
+ """Set the charset of the payload to a given character set.
+ charset can be a Charset instance, a string naming a character set, or
+ None. If it is a string it will be converted to a Charset instance.
+ If charset is None, the charset parameter will be removed from the
+ Content-Type field. Anything else will generate a TypeError.
+ The message will be assumed to be of type text/* encoded with
+ charset.input_charset. It will be converted to charset.output_charset
+ and encoded properly, if needed, when generating the plain text
+ representation of the message. MIME headers (MIME-Version,
+ Content-Type, Content-Transfer-Encoding) will be added as needed.
+ """
+ if charset is None:
+ self.del_param('charset')
+ self._charset = None
+ return
+ if not isinstance(charset, Charset):
+ charset = Charset(charset)
+ self._charset = charset
+ if 'MIME-Version' not in self:
+ self.add_header('MIME-Version', '1.0')
+ if 'Content-Type' not in self:
+ self.add_header('Content-Type', 'text/plain',
+ charset=charset.get_output_charset())
+ else:
+ self.set_param('charset', charset.get_output_charset())
+ if charset != charset.get_output_charset():
+ self._payload = charset.body_encode(self._payload)
+ if 'Content-Transfer-Encoding' not in self:
+ cte = charset.get_body_encoding()
+ try:
+ cte(self)
+ except TypeError:
+ self._payload = charset.body_encode(self._payload)
+ self.add_header('Content-Transfer-Encoding', cte)
+ def get_charset(self):
+ """Return the Charset instance associated with the message's payload.
+ """
+ return self._charset
+ #
+ #
+ def __len__(self):
+ """Return the total number of headers, including duplicates."""
+ return len(self._headers)
+ def __getitem__(self, name):
+ """Get a header value.
+ Return None if the header is missing instead of raising an exception.
+ Note that if the header appeared multiple times, exactly which
+ occurrence gets returned is undefined. Use get_all() to get all
+ the values matching a header field name.
+ """
+ return self.get(name)
+ def __setitem__(self, name, val):
+ """Set the value of a header.
+ Note: this does not overwrite an existing header with the same field
+ name. Use __delitem__() first to delete any existing headers.
+ """
+ self._headers.append((name, val))
+ def __delitem__(self, name):
+ """Delete all occurrences of a header, if present.
+ Does not raise an exception if the header is missing.
+ """
+ name = name.lower()
+ newheaders = []
+ for k, v in self._headers:
+ if k.lower() != name:
+ newheaders.append((k, v))
+ self._headers = newheaders
+ def __contains__(self, name):
+ return name.lower() in [k.lower() for k, v in self._headers]
+ def __iter__(self):
+ for field, value in self._headers:
+ yield field
+ def keys(self):
+ """Return a list of all the message's header field names.
+ These will be sorted in the order they appeared in the original
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
+ """
+ return [k for k, v in self._headers]
+ def values(self):
+ """Return a list of all the message's header values.
+ These will be sorted in the order they appeared in the original
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
+ """
+ return [_sanitize_header(k, v) for k, v in self._headers]
+ def items(self):
+ """Get all the message's header fields and values.
+ These will be sorted in the order they appeared in the original
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
+ """
+ return [(k, _sanitize_header(k, v)) for k, v in self._headers]
+ def get(self, name, failobj=None):
+ """Get a header value.
+ Like __getitem__() but return failobj instead of None when the field
+ is missing.
+ """
+ name = name.lower()
+ for k, v in self._headers:
+ if k.lower() == name:
+ return _sanitize_header(k, v)
+ return failobj
+ #
+ # Additional useful stuff
+ #
+ def get_all(self, name, failobj=None):
+ """Return a list of all the values for the named field.
+ These will be sorted in the order they appeared in the original
+ message, and may contain duplicates. Any fields deleted and
+ re-inserted are always appended to the header list.
+ If no such fields exist, failobj is returned (defaults to None).
+ """
+ values = []
+ name = name.lower()
+ for k, v in self._headers:
+ if k.lower() == name:
+ values.append(_sanitize_header(k, v))
+ if not values:
+ return failobj
+ return values
+ def add_header(self, _name, _value, **_params):
+ """Extended header setting.
+ name is the header field to add. keyword arguments can be used to set
+ additional parameters for the header field, with underscores converted
+ to dashes. Normally the parameter will be added as key="value" unless
+ value is None, in which case only the key will be added. If a
+ parameter value contains non-ASCII characters it can be specified as a
+ three-tuple of (charset, language, value), in which case it will be
+ encoded according to RFC2231 rules. Otherwise it will be encoded using
+ the utf-8 charset and a language of ''.
+ Examples:
+ msg.add_header('content-disposition', 'attachment', filename='bud.gif')
+ msg.add_header('content-disposition', 'attachment',
+ filename=('utf-8', '', Fußballer.ppt'))
+ msg.add_header('content-disposition', 'attachment',
+ filename='Fußballer.ppt'))
+ """
+ parts = []
+ for k, v in _params.items():
+ if v is None:
+ parts.append(k.replace('_', '-'))
+ else:
+ parts.append(_formatparam(k.replace('_', '-'), v))
+ if _value is not None:
+ parts.insert(0, _value)
+ self._headers.append((_name, SEMISPACE.join(parts)))
+ def replace_header(self, _name, _value):
+ """Replace a header.
+ Replace the first matching header found in the message, retaining
+ header order and case. If no matching header was found, a KeyError is
+ raised.
+ """
+ _name = _name.lower()
+ for i, (k, v) in zip(range(len(self._headers)), self._headers):
+ if k.lower() == _name:
+ self._headers[i] = (k, _value)
+ break
+ else:
+ raise KeyError(_name)
+ #
+ # Use these three methods instead of the three above.
+ #
+ def get_content_type(self):
+ """Return the message's content type.
+ The returned string is coerced to lower case of the form
+ `maintype/subtype'. If there was no Content-Type header in the
+ message, the default type as given by get_default_type() will be
+ returned. Since according to RFC 2045, messages always have a default
+ type this will always return a value.
+ RFC 2045 defines a message's default type to be text/plain unless it
+ appears inside a multipart/digest container, in which case it would be
+ message/rfc822.
+ """
+ missing = object()
+ value = self.get('content-type', missing)
+ if value is missing:
+ # This should have no parameters
+ return self.get_default_type()
+ ctype = _splitparam(value)[0].lower()
+ # RFC 2045, section 5.2 says if its invalid, use text/plain
+ if ctype.count('/') != 1:
+ return 'text/plain'
+ return ctype
+ def get_content_maintype(self):
+ """Return the message's main content type.
+ This is the `maintype' part of the string returned by
+ get_content_type().
+ """
+ ctype = self.get_content_type()
+ return ctype.split('/')[0]
+ def get_content_subtype(self):
+ """Returns the message's sub-content type.
+ This is the `subtype' part of the string returned by
+ get_content_type().
+ """
+ ctype = self.get_content_type()
+ return ctype.split('/')[1]
+ def get_default_type(self):
+ """Return the `default' content type.
+ Most messages have a default content type of text/plain, except for
+ messages that are subparts of multipart/digest containers. Such
+ subparts have a default content type of message/rfc822.
+ """
+ return self._default_type
+ def set_default_type(self, ctype):
+ """Set the `default' content type.
+ ctype should be either "text/plain" or "message/rfc822", although this
+ is not enforced. The default content type is not stored in the
+ Content-Type header.
+ """
+ self._default_type = ctype
+ def _get_params_preserve(self, failobj, header):
+ # Like get_params() but preserves the quoting of values. BAW:
+ # should this be part of the public interface?
+ missing = object()
+ value = self.get(header, missing)
+ if value is missing:
+ return failobj
+ params = []
+ for p in _parseparam(value):
+ try:
+ name, val = p.split('=', 1)
+ name = name.strip()
+ val = val.strip()
+ except ValueError:
+ # Must have been a bare attribute
+ name = p.strip()
+ val = ''
+ params.append((name, val))
+ params = utils.decode_params(params)
+ return params
+ def get_params(self, failobj=None, header='content-type', unquote=True):
+ """Return the message's Content-Type parameters, as a list.
+ The elements of the returned list are 2-tuples of key/value pairs, as
+ split on the `=' sign. The left hand side of the `=' is the key,
+ while the right hand side is the value. If there is no `=' sign in
+ the parameter the value is the empty string. The value is as
+ described in the get_param() method.
+ Optional failobj is the object to return if there is no Content-Type
+ header. Optional header is the header to search instead of
+ Content-Type. If unquote is True, the value is unquoted.
+ """
+ missing = object()
+ params = self._get_params_preserve(missing, header)
+ if params is missing:
+ return failobj
+ if unquote:
+ return [(k, _unquotevalue(v)) for k, v in params]
+ else:
+ return params
+ def get_param(self, param, failobj=None, header='content-type',
+ unquote=True):
+ """Return the parameter value if found in the Content-Type header.
+ Optional failobj is the object to return if there is no Content-Type
+ header, or the Content-Type header has no such parameter. Optional
+ header is the header to search instead of Content-Type.
+ Parameter keys are always compared case insensitively. The return
+ value can either be a string, or a 3-tuple if the parameter was RFC
+ 2231 encoded. When it's a 3-tuple, the elements of the value are of
+ the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
+ LANGUAGE can be None, in which case you should consider VALUE to be
+ encoded in the us-ascii charset. You can usually ignore LANGUAGE.
+ Your application should be prepared to deal with 3-tuple return
+ values, and can convert the parameter to a Unicode string like so:
+ param = msg.get_param('foo')
+ if isinstance(param, tuple):
+ param = unicode(param[2], param[0] or 'us-ascii')
+ In any case, the parameter value (either the returned string, or the
+ VALUE item in the 3-tuple) is always unquoted, unless unquote is set
+ to False.
+ """
+ if header not in self:
+ return failobj
+ for k, v in self._get_params_preserve(failobj, header):
+ if k.lower() == param.lower():
+ if unquote:
+ return _unquotevalue(v)
+ else:
+ return v
+ return failobj
+ def set_param(self, param, value, header='Content-Type', requote=True,
+ charset=None, language=''):
+ """Set a parameter in the Content-Type header.
+ If the parameter already exists in the header, its value will be
+ replaced with the new value.
+ If header is Content-Type and has not yet been defined for this
+ message, it will be set to "text/plain" and the new parameter and
+ value will be appended as per RFC 2045.
+ An alternate header can specified in the header argument, and all
+ parameters will be quoted as necessary unless requote is False.
+ If charset is specified, the parameter will be encoded according to RFC
+ 2231. Optional language specifies the RFC 2231 language, defaulting
+ to the empty string. Both charset and language should be strings.
+ """
+ if not isinstance(value, tuple) and charset:
+ value = (charset, language, value)
+ if header not in self and header.lower() == 'content-type':
+ ctype = 'text/plain'
+ else:
+ ctype = self.get(header)
+ if not self.get_param(param, header=header):
+ if not ctype:
+ ctype = _formatparam(param, value, requote)
+ else:
+ ctype = SEMISPACE.join(
+ [ctype, _formatparam(param, value, requote)])
+ else:
+ ctype = ''
+ for old_param, old_value in self.get_params(header=header,
+ unquote=requote):
+ append_param = ''
+ if old_param.lower() == param.lower():
+ append_param = _formatparam(param, value, requote)
+ else:
+ append_param = _formatparam(old_param, old_value, requote)
+ if not ctype:
+ ctype = append_param
+ else:
+ ctype = SEMISPACE.join([ctype, append_param])
+ if ctype != self.get(header):
+ del self[header]
+ self[header] = ctype
+ def del_param(self, param, header='content-type', requote=True):
+ """Remove the given parameter completely from the Content-Type header.
+ The header will be re-written in place without the parameter or its
+ value. All values will be quoted as necessary unless requote is
+ False. Optional header specifies an alternative to the Content-Type
+ header.
+ """
+ if header not in self:
+ return
+ new_ctype = ''
+ for p, v in self.get_params(header=header, unquote=requote):
+ if p.lower() != param.lower():
+ if not new_ctype:
+ new_ctype = _formatparam(p, v, requote)
+ else:
+ new_ctype = SEMISPACE.join([new_ctype,
+ _formatparam(p, v, requote)])
+ if new_ctype != self.get(header):
+ del self[header]
+ self[header] = new_ctype
+ def set_type(self, type, header='Content-Type', requote=True):
+ """Set the main type and subtype for the Content-Type header.
+ type must be a string in the form "maintype/subtype", otherwise a
+ ValueError is raised.
+ This method replaces the Content-Type header, keeping all the
+ parameters in place. If requote is False, this leaves the existing
+ header's quoting as is. Otherwise, the parameters will be quoted (the
+ default).
+ An alternative header can be specified in the header argument. When
+ the Content-Type header is set, we'll always also add a MIME-Version
+ header.
+ """
+ # BAW: should we be strict?
+ if not type.count('/') == 1:
+ raise ValueError
+ # Set the Content-Type, you get a MIME-Version
+ if header.lower() == 'content-type':
+ del self['mime-version']
+ self['MIME-Version'] = '1.0'
+ if header not in self:
+ self[header] = type
+ return
+ params = self.get_params(header=header, unquote=requote)
+ del self[header]
+ self[header] = type
+ # Skip the first param; it's the old type.
+ for p, v in params[1:]:
+ self.set_param(p, v, header, requote)
+ def get_filename(self, failobj=None):
+ """Return the filename associated with the payload if present.
+ The filename is extracted from the Content-Disposition header's
+ `filename' parameter, and it is unquoted. If that header is missing
+ the `filename' parameter, this method falls back to looking for the
+ `name' parameter.
+ """
+ missing = object()
+ filename = self.get_param('filename', missing, 'content-disposition')
+ if filename is missing:
+ filename = self.get_param('name', missing, 'content-type')
+ if filename is missing:
+ return failobj
+ return utils.collapse_rfc2231_value(filename).strip()
+ def get_boundary(self, failobj=None):
+ """Return the boundary associated with the payload if present.
+ The boundary is extracted from the Content-Type header's `boundary'
+ parameter, and it is unquoted.
+ """
+ missing = object()
+ boundary = self.get_param('boundary', missing)
+ if boundary is missing:
+ return failobj
+ # RFC 2046 says that boundaries may begin but not end in w/s
+ return utils.collapse_rfc2231_value(boundary).rstrip()
+ def set_boundary(self, boundary):
+ """Set the boundary parameter in Content-Type to 'boundary'.
+ This is subtly different than deleting the Content-Type header and
+ adding a new one with a new boundary parameter via add_header(). The
+ main difference is that using the set_boundary() method preserves the
+ order of the Content-Type header in the original message.
+ HeaderParseError is raised if the message has no Content-Type header.
+ """
+ missing = object()
+ params = self._get_params_preserve(missing, 'content-type')
+ if params is missing:
+ # There was no Content-Type header, and we don't know what type
+ # to set it to, so raise an exception.
+ raise errors.HeaderParseError('No Content-Type header found')
+ newparams = []
+ foundp = False
+ for pk, pv in params:
+ if pk.lower() == 'boundary':
+ newparams.append(('boundary', '"%s"' % boundary))
+ foundp = True
+ else:
+ newparams.append((pk, pv))
+ if not foundp:
+ # The original Content-Type header had no boundary attribute.
+ # Tack one on the end. BAW: should we raise an exception
+ # instead???
+ newparams.append(('boundary', '"%s"' % boundary))
+ # Replace the existing Content-Type header with the new value
+ newheaders = []
+ for h, v in self._headers:
+ if h.lower() == 'content-type':
+ parts = []
+ for k, v in newparams:
+ if v == '':
+ parts.append(k)
+ else:
+ parts.append('%s=%s' % (k, v))
+ newheaders.append((h, SEMISPACE.join(parts)))
+ else:
+ newheaders.append((h, v))
+ self._headers = newheaders
+ def get_content_charset(self, failobj=None):
+ """Return the charset parameter of the Content-Type header.
+ The returned string is always coerced to lower case. If there is no
+ Content-Type header, or if that header has no charset parameter,
+ failobj is returned.
+ """
+ missing = object()
+ charset = self.get_param('charset', missing)
+ if charset is missing:
+ return failobj
+ if isinstance(charset, tuple):
+ # RFC 2231 encoded, so decode it, and it better end up as ascii.
+ pcharset = charset[0] or 'us-ascii'
+ try:
+ # LookupError will be raised if the charset isn't known to
+ # Python. UnicodeError will be raised if the encoded text
+ # contains a character not in the charset.
+ as_bytes = charset[2].encode('raw-unicode-escape')
+ charset = str(as_bytes, pcharset)
+ except (LookupError, UnicodeError):
+ charset = charset[2]
+ # charset characters must be in us-ascii range
+ try:
+ charset.encode('us-ascii')
+ except UnicodeError:
+ return failobj
+ # RFC 2046, $4.1.2 says charsets are not case sensitive
+ return charset.lower()
+ def get_charsets(self, failobj=None):
+ """Return a list containing the charset(s) used in this message.
+ The returned list of items describes the Content-Type headers'
+ charset parameter for this message and all the subparts in its
+ payload.
+ Each item will either be a string (the value of the charset parameter
+ in the Content-Type header of that part) or the value of the
+ 'failobj' parameter (defaults to None), if the part does not have a
+ main MIME type of "text", or the charset is not defined.
+ The list will contain one string for each part of the message, plus
+ one for the container message (i.e. self), so that a non-multipart
+ message will still return a list of length 1.
+ """
+ return [part.get_content_charset(failobj) for part in self.walk()]
+ # I.e. def walk(self): ...
+ from email.iterators import walk
diff --git a/lib-python/3/email/mime/__init__.py b/lib-python/3/email/mime/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/lib-python/3/email/mime/__init__.py
diff --git a/lib-python/3/email/mime/application.py b/lib-python/3/email/mime/application.py
new file mode 100644
index 0000000000..f5c5905564
--- /dev/null
+++ b/lib-python/3/email/mime/application.py
@@ -0,0 +1,36 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Keith Dart
+# Contact: email-sig@python.org
+"""Class representing application/* type MIME documents."""
+__all__ = ["MIMEApplication"]
+from email import encoders
+from email.mime.nonmultipart import MIMENonMultipart
+class MIMEApplication(MIMENonMultipart):
+ """Class for generating application/* MIME documents."""
+ def __init__(self, _data, _subtype='octet-stream',
+ _encoder=encoders.encode_base64, **_params):
+ """Create an application/* type MIME document.
+ _data is a string containing the raw application data.
+ _subtype is the MIME content type subtype, defaulting to
+ 'octet-stream'.
+ _encoder is a function which will perform the actual encoding for
+ transport of the application data, defaulting to base64 encoding.
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ raise TypeError('Invalid application MIME subtype')
+ MIMENonMultipart.__init__(self, 'application', _subtype, **_params)
+ self.set_payload(_data)
+ _encoder(self)
diff --git a/lib-python/3/email/mime/audio.py b/lib-python/3/email/mime/audio.py
new file mode 100644
index 0000000000..fbc118951a
--- /dev/null
+++ b/lib-python/3/email/mime/audio.py
@@ -0,0 +1,73 @@
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Anthony Baxter
+# Contact: email-sig@python.org
+"""Class representing audio/* type MIME documents."""
+__all__ = ['MIMEAudio']
+import sndhdr
+from io import BytesIO
+from email import encoders
+from email.mime.nonmultipart import MIMENonMultipart
+_sndhdr_MIMEmap = {'au' : 'basic',
+ 'wav' :'x-wav',
+ 'aiff':'x-aiff',
+ 'aifc':'x-aiff',
+ }
+# There are others in sndhdr that don't have MIME types. :(
+# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma??
+def _whatsnd(data):
+ """Try to identify a sound file type.
+ sndhdr.what() has a pretty cruddy interface, unfortunately. This is why
+ we re-do it here. It would be easier to reverse engineer the Unix 'file'
+ command and use the standard 'magic' file, as shipped with a modern Unix.
+ """
+ hdr = data[:512]
+ fakefile = BytesIO(hdr)
+ for testfn in sndhdr.tests:
+ res = testfn(hdr, fakefile)
+ if res is not None:
+ return _sndhdr_MIMEmap.get(res[0])
+ return None
+class MIMEAudio(MIMENonMultipart):
+ """Class for generating audio/* MIME documents."""
+ def __init__(self, _audiodata, _subtype=None,
+ _encoder=encoders.encode_base64, **_params):
+ """Create an audio/* type MIME document.
+ _audiodata is a string containing the raw audio data. If this data
+ can be decoded by the standard Python `sndhdr' module, then the
+ subtype will be automatically included in the Content-Type header.
+ Otherwise, you can specify the specific audio subtype via the
+ _subtype parameter. If _subtype is not given, and no subtype can be
+ guessed, a TypeError is raised.
+ _encoder is a function which will perform the actual encoding for
+ transport of the image data. It takes one argument, which is this
+ Image instance. It should use get_payload() and set_payload() to
+ change the payload to the encoded form. It should also add any
+ Content-Transfer-Encoding or other headers to the message as
+ necessary. The default encoding is Base64.
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ _subtype = _whatsnd(_audiodata)
+ if _subtype is None:
+ raise TypeError('Could not find audio MIME subtype')
+ MIMENonMultipart.__init__(self, 'audio', _subtype, **_params)
+ self.set_payload(_audiodata)
+ _encoder(self)
diff --git a/lib-python/3/email/mime/base.py b/lib-python/3/email/mime/base.py
new file mode 100644
index 0000000000..ac919258b1
--- /dev/null
+++ b/lib-python/3/email/mime/base.py
@@ -0,0 +1,26 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+"""Base class for MIME specializations."""
+__all__ = ['MIMEBase']
+from email import message
+class MIMEBase(message.Message):
+ """Base class for MIME specializations."""
+ def __init__(self, _maintype, _subtype, **_params):
+ """This constructor adds a Content-Type: and a MIME-Version: header.
+ The Content-Type: header is taken from the _maintype and _subtype
+ arguments. Additional parameters for this header are taken from the
+ keyword arguments.
+ """
+ message.Message.__init__(self)
+ ctype = '%s/%s' % (_maintype, _subtype)
+ self.add_header('Content-Type', ctype, **_params)
+ self['MIME-Version'] = '1.0'
diff --git a/lib-python/3/email/mime/image.py b/lib-python/3/email/mime/image.py
new file mode 100644
index 0000000000..5563823239
--- /dev/null
+++ b/lib-python/3/email/mime/image.py
@@ -0,0 +1,46 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+"""Class representing image/* type MIME documents."""
+__all__ = ['MIMEImage']
+import imghdr
+from email import encoders
+from email.mime.nonmultipart import MIMENonMultipart
+class MIMEImage(MIMENonMultipart):
+ """Class for generating image/* type MIME documents."""
+ def __init__(self, _imagedata, _subtype=None,
+ _encoder=encoders.encode_base64, **_params):
+ """Create an image/* type MIME document.
+ _imagedata is a string containing the raw image data. If this data
+ can be decoded by the standard Python `imghdr' module, then the
+ subtype will be automatically included in the Content-Type header.
+ Otherwise, you can specify the specific image subtype via the _subtype
+ parameter.
+ _encoder is a function which will perform the actual encoding for
+ transport of the image data. It takes one argument, which is this
+ Image instance. It should use get_payload() and set_payload() to
+ change the payload to the encoded form. It should also add any
+ Content-Transfer-Encoding or other headers to the message as
+ necessary. The default encoding is Base64.
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ _subtype = imghdr.what(None, _imagedata)
+ if _subtype is None:
+ raise TypeError('Could not guess image MIME subtype')
+ MIMENonMultipart.__init__(self, 'image', _subtype, **_params)
+ self.set_payload(_imagedata)
+ _encoder(self)
diff --git a/lib-python/3/email/mime/message.py b/lib-python/3/email/mime/message.py
new file mode 100644
index 0000000000..275dbfd088
--- /dev/null
+++ b/lib-python/3/email/mime/message.py
@@ -0,0 +1,34 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+"""Class representing message/* MIME documents."""
+__all__ = ['MIMEMessage']
+from email import message
+from email.mime.nonmultipart import MIMENonMultipart
+class MIMEMessage(MIMENonMultipart):
+ """Class representing message/* MIME documents."""
+ def __init__(self, _msg, _subtype='rfc822'):
+ """Create a message/* type MIME document.
+ _msg is a message object and must be an instance of Message, or a
+ derived class of Message, otherwise a TypeError is raised.
+ Optional _subtype defines the subtype of the contained message. The
+ default is "rfc822" (this is defined by the MIME standard, even though
+ the term "rfc822" is technically outdated by RFC 2822).
+ """
+ MIMENonMultipart.__init__(self, 'message', _subtype)
+ if not isinstance(_msg, message.Message):
+ raise TypeError('Argument is not an instance of Message')
+ # It's convenient to use this base class method. We need to do it
+ # this way or we'll get an exception
+ message.Message.attach(self, _msg)
+ # And be sure our default type is set correctly
+ self.set_default_type('message/rfc822')
diff --git a/lib-python/3/email/mime/multipart.py b/lib-python/3/email/mime/multipart.py
new file mode 100644
index 0000000000..96618650c5
--- /dev/null
+++ b/lib-python/3/email/mime/multipart.py
@@ -0,0 +1,47 @@
+# Copyright (C) 2002-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+"""Base class for MIME multipart/* type messages."""
+__all__ = ['MIMEMultipart']
+from email.mime.base import MIMEBase
+class MIMEMultipart(MIMEBase):
+ """Base class for MIME multipart/* type messages."""
+ def __init__(self, _subtype='mixed', boundary=None, _subparts=None,
+ **_params):
+ """Creates a multipart/* type message.
+ By default, creates a multipart/mixed message, with proper
+ Content-Type and MIME-Version headers.
+ _subtype is the subtype of the multipart content type, defaulting to
+ `mixed'.
+ boundary is the multipart boundary string. By default it is
+ calculated as needed.
+ _subparts is a sequence of initial subparts for the payload. It
+ must be an iterable object, such as a list. You can always
+ attach new subparts to the message by using the attach() method.
+ Additional parameters for the Content-Type header are taken from the
+ keyword arguments (or passed into the _params argument).
+ """
+ MIMEBase.__init__(self, 'multipart', _subtype, **_params)
+ # Initialise _payload to an empty list as the Message superclass's
+ # implementation of is_multipart assumes that _payload is a list for
+ # multipart messages.
+ self._payload = []
+ if _subparts:
+ for p in _subparts:
+ self.attach(p)
+ if boundary:
+ self.set_boundary(boundary)
diff --git a/lib-python/3/email/mime/nonmultipart.py b/lib-python/3/email/mime/nonmultipart.py
new file mode 100644
index 0000000000..fc3b9eb4dc
--- /dev/null
+++ b/lib-python/3/email/mime/nonmultipart.py
@@ -0,0 +1,22 @@
+# Copyright (C) 2002-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+"""Base class for MIME type messages that are not multipart."""
+__all__ = ['MIMENonMultipart']
+from email import errors
+from email.mime.base import MIMEBase
+class MIMENonMultipart(MIMEBase):
+ """Base class for MIME multipart/* type messages."""
+ def attach(self, payload):
+ # The public API prohibits attaching multiple subparts to MIMEBase
+ # derived subtypes since none of them are, by definition, of content
+ # type multipart/*
+ raise errors.MultipartConversionError(
+ 'Cannot attach additional subparts to non-multipart/*')
diff --git a/lib-python/3/email/mime/text.py b/lib-python/3/email/mime/text.py
new file mode 100644
index 0000000000..5747db5d67
--- /dev/null
+++ b/lib-python/3/email/mime/text.py
@@ -0,0 +1,30 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+"""Class representing text/* type MIME documents."""
+__all__ = ['MIMEText']
+from email.encoders import encode_7or8bit
+from email.mime.nonmultipart import MIMENonMultipart
+class MIMEText(MIMENonMultipart):
+ """Class for generating text/* type MIME documents."""
+ def __init__(self, _text, _subtype='plain', _charset='us-ascii'):
+ """Create a text/* type MIME document.
+ _text is the string for this message object.
+ _subtype is the MIME sub content type, defaulting to "plain".
+ _charset is the character set parameter added to the Content-Type
+ header. This defaults to "us-ascii". Note that as a side-effect, the
+ Content-Transfer-Encoding header will also be set.
+ """
+ MIMENonMultipart.__init__(self, 'text', _subtype,
+ **{'charset': _charset})
+ self.set_payload(_text, _charset)
diff --git a/lib-python/3/email/parser.py b/lib-python/3/email/parser.py
new file mode 100644
index 0000000000..6caaff53ad
--- /dev/null
+++ b/lib-python/3/email/parser.py
@@ -0,0 +1,136 @@
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
+# Contact: email-sig@python.org
+"""A parser of RFC 2822 and MIME email messages."""
+__all__ = ['Parser', 'HeaderParser']
+import warnings
+from io import StringIO, TextIOWrapper
+from email.feedparser import FeedParser
+from email.message import Message
+class Parser:
+ def __init__(self, *args, **kws):
+ """Parser of RFC 2822 and MIME email messages.
+ Creates an in-memory object tree representing the email message, which
+ can then be manipulated and turned over to a Generator to return the
+ textual representation of the message.
+ The string must be formatted as a block of RFC 2822 headers and header
+ continuation lines, optionally preceeded by a `Unix-from' header. The
+ header block is terminated either by the end of the string or by a
+ blank line.
+ _class is the class to instantiate for new message objects when they
+ must be created. This class must have a constructor that can take
+ zero arguments. Default is Message.Message.
+ """
+ if len(args) >= 1:
+ if '_class' in kws:
+ raise TypeError("Multiple values for keyword arg '_class'")
+ kws['_class'] = args[0]
+ if len(args) == 2:
+ if 'strict' in kws:
+ raise TypeError("Multiple values for keyword arg 'strict'")
+ kws['strict'] = args[1]
+ if len(args) > 2:
+ raise TypeError('Too many arguments')
+ if '_class' in kws:
+ self._class = kws['_class']
+ del kws['_class']
+ else:
+ self._class = Message
+ if 'strict' in kws:
+ warnings.warn("'strict' argument is deprecated (and ignored)",
+ DeprecationWarning, 2)
+ del kws['strict']
+ if kws:
+ raise TypeError('Unexpected keyword arguments')
+ def parse(self, fp, headersonly=False):
+ """Create a message structure from the data in a file.
+ Reads all the data from the file and returns the root of the message
+ structure. Optional headersonly is a flag specifying whether to stop
+ parsing after reading the headers or not. The default is False,
+ meaning it parses the entire contents of the file.
+ """
+ feedparser = FeedParser(self._class)
+ if headersonly:
+ feedparser._set_headersonly()
+ while True:
+ data = fp.read(8192)
+ if not data:
+ break
+ feedparser.feed(data)
+ return feedparser.close()
+ def parsestr(self, text, headersonly=False):
+ """Create a message structure from a string.
+ Returns the root of the message structure. Optional headersonly is a
+ flag specifying whether to stop parsing after reading the headers or
+ not. The default is False, meaning it parses the entire contents of
+ the file.
+ """
+ return self.parse(StringIO(text), headersonly=headersonly)
+class HeaderParser(Parser):
+ def parse(self, fp, headersonly=True):
+ return Parser.parse(self, fp, True)
+ def parsestr(self, text, headersonly=True):
+ return Parser.parsestr(self, text, True)
+class BytesParser:
+ def __init__(self, *args, **kw):
+ """Parser of binary RFC 2822 and MIME email messages.
+ Creates an in-memory object tree representing the email message, which
+ can then be manipulated and turned over to a Generator to return the
+ textual representation of the message.
+ The input must be formatted as a block of RFC 2822 headers and header
+ continuation lines, optionally preceeded by a `Unix-from' header. The
+ header block is terminated either by the end of the input or by a
+ blank line.
+ _class is the class to instantiate for new message objects when they
+ must be created. This class must have a constructor that can take
+ zero arguments. Default is Message.Message.
+ """
+ self.parser = Parser(*args, **kw)
+ def parse(self, fp, headersonly=False):
+ """Create a message structure from the data in a binary file.
+ Reads all the data from the file and returns the root of the message
+ structure. Optional headersonly is a flag specifying whether to stop
+ parsing after reading the headers or not. The default is False,
+ meaning it parses the entire contents of the file.
+ """
+ fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
+ with fp:
+ return self.parser.parse(fp, headersonly)
+ def parsebytes(self, text, headersonly=False):
+ """Create a message structure from a byte string.
+ Returns the root of the message structure. Optional headersonly is a
+ flag specifying whether to stop parsing after reading the headers or
+ not. The default is False, meaning it parses the entire contents of
+ the file.
+ """
+ text = text.decode('ASCII', errors='surrogateescape')
+ return self.parser.parsestr(text, headersonly)
diff --git a/lib-python/3/email/quoprimime.py b/lib-python/3/email/quoprimime.py
new file mode 100644
index 0000000000..78638d5904
--- /dev/null
+++ b/lib-python/3/email/quoprimime.py
@@ -0,0 +1,322 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Ben Gertzfield
+# Contact: email-sig@python.org
+"""Quoted-printable content transfer encoding per RFCs 2045-2047.
+This module handles the content transfer encoding method defined in RFC 2045
+to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to
+safely encode text that is in a character set similar to the 7-bit US ASCII
+character set, but that includes some 8-bit characters that are normally not
+allowed in email bodies or headers.
+Quoted-printable is very space-inefficient for encoding binary files; use the
+email.base64mime module for that instead.
+This module provides an interface to encode and decode both headers and bodies
+with quoted-printable encoding.
+RFC 2045 defines a method for including character set information in an
+`encoded-word' in a header. This method is commonly used for 8-bit real names
+in To:/From:/Cc: etc. fields, as well as Subject: lines.
+This module does not do the line wrapping or end-of-line character
+conversion necessary for proper internationalized headers; it only
+does dumb encoding and decoding. To deal with the various line
+wrapping issues, use the email.header module.
+__all__ = [
+ 'body_decode',
+ 'body_encode',
+ 'body_length',
+ 'decode',
+ 'decodestring',
+ 'header_decode',
+ 'header_encode',
+ 'header_length',
+ 'quote',
+ 'unquote',
+ ]
+import re
+import io
+from string import ascii_letters, digits, hexdigits
+CRLF = '\r\n'
+NL = '\n'
+# Build a mapping of octets to the expansion of that octet. Since we're only
+# going to have 256 of these things, this isn't terribly inefficient
+# space-wise. Remember that headers and bodies have different sets of safe
+# characters. Initialize both maps with the full expansion, and then override
+# the safe bytes with the more compact form.
+_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256))
+# Safe header bytes which need no encoding.
+for c in b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii'):
+ _QUOPRI_HEADER_MAP[c] = chr(c)
+# Headers have one other special encoding; spaces become underscores.
+_QUOPRI_HEADER_MAP[ord(' ')] = '_'
+# Safe body bytes which need no encoding.
+for c in (b' !"#$%&\'()*+,-./0123456789:;<>'
+ b'abcdefghijklmnopqrstuvwxyz{|}~\t'):
+ _QUOPRI_BODY_MAP[c] = chr(c)
+# Helpers
+def header_check(octet):
+ """Return True if the octet should be escaped with header quopri."""
+ return chr(octet) != _QUOPRI_HEADER_MAP[octet]
+def body_check(octet):
+ """Return True if the octet should be escaped with body quopri."""
+ return chr(octet) != _QUOPRI_BODY_MAP[octet]
+def header_length(bytearray):
+ """Return a header quoted-printable encoding length.
+ Note that this does not include any RFC 2047 chrome added by
+ `header_encode()`.
+ :param bytearray: An array of bytes (a.k.a. octets).
+ :return: The length in bytes of the byte array when it is encoded with
+ quoted-printable for headers.
+ """
+ return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray)
+def body_length(bytearray):
+ """Return a body quoted-printable encoding length.
+ :param bytearray: An array of bytes (a.k.a. octets).
+ :return: The length in bytes of the byte array when it is encoded with
+ quoted-printable for bodies.
+ """
+ return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray)
+def _max_append(L, s, maxlen, extra=''):
+ if not isinstance(s, str):
+ s = chr(s)
+ if not L:
+ L.append(s.lstrip())
+ elif len(L[-1]) + len(s) <= maxlen:
+ L[-1] += extra + s
+ else:
+ L.append(s.lstrip())
+def unquote(s):
+ """Turn a string in the form =AB to the ASCII character with value 0xab"""
+ return chr(int(s[1:3], 16))
+def quote(c):
+ return '=%02X' % ord(c)
+def header_encode(header_bytes, charset='iso-8859-1'):
+ """Encode a single header line with quoted-printable (like) encoding.
+ Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
+ used specifically for email header fields to allow charsets with mostly 7
+ bit characters (and some 8 bit) to remain more or less readable in non-RFC
+ 2045 aware mail clients.
+ charset names the character set to use in the RFC 2046 header. It
+ defaults to iso-8859-1.
+ """
+ # Return empty headers as an empty string.
+ if not header_bytes:
+ return ''
+ # Iterate over every byte, encoding if necessary.
+ encoded = []
+ for octet in header_bytes:
+ encoded.append(_QUOPRI_HEADER_MAP[octet])
+ # Now add the RFC chrome to each encoded chunk and glue the chunks
+ # together.
+ return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))
+class _body_accumulator(io.StringIO):
+ def __init__(self, maxlinelen, eol, *args, **kw):
+ super().__init__(*args, **kw)
+ self.eol = eol
+ self.maxlinelen = self.room = maxlinelen
+ def write_str(self, s):
+ """Add string s to the accumulated body."""
+ self.write(s)
+ self.room -= len(s)
+ def newline(self):
+ """Write eol, then start new line."""
+ self.write_str(self.eol)
+ self.room = self.maxlinelen
+ def write_soft_break(self):
+ """Write a soft break, then start a new line."""
+ self.write_str('=')
+ self.newline()
+ def write_wrapped(self, s, extra_room=0):
+ """Add a soft line break if needed, then write s."""
+ if self.room < len(s) + extra_room:
+ self.write_soft_break()
+ self.write_str(s)
+ def write_char(self, c, is_last_char):
+ if not is_last_char:
+ # Another character follows on this line, so we must leave
+ # extra room, either for it or a soft break, and whitespace
+ # need not be quoted.
+ self.write_wrapped(c, extra_room=1)
+ elif c not in ' \t':
+ # For this and remaining cases, no more characters follow,
+ # so there is no need to reserve extra room (since a hard
+ # break will immediately follow).
+ self.write_wrapped(c)
+ elif self.room >= 3:
+ # It's a whitespace character at end-of-line, and we have room
+ # for the three-character quoted encoding.
+ self.write(quote(c))
+ elif self.room == 2:
+ # There's room for the whitespace character and a soft break.
+ self.write(c)
+ self.write_soft_break()
+ else:
+ # There's room only for a soft break. The quoted whitespace
+ # will be the only content on the subsequent line.
+ self.write_soft_break()
+ self.write(quote(c))
+def body_encode(body, maxlinelen=76, eol=NL):
+ """Encode with quoted-printable, wrapping at maxlinelen characters.
+ Each line of encoded text will end with eol, which defaults to "\\n". Set
+ this to "\\r\\n" if you will be using the result of this function directly
+ in an email.
+ Each line will be wrapped at, at most, maxlinelen characters before the
+ eol string (maxlinelen defaults to 76 characters, the maximum value
+ permitted by RFC 2045). Long lines will have the 'soft line break'
+ quoted-printable character "=" appended to them, so the decoded text will
+ be identical to the original text.
+ The minimum maxlinelen is 4 to have room for a quoted character ("=XX")
+ followed by a soft line break. Smaller values will generate a
+ ValueError.
+ """
+ if maxlinelen < 4:
+ raise ValueError("maxlinelen must be at least 4")
+ if not body:
+ return body
+ # The last line may or may not end in eol, but all other lines do.
+ last_has_eol = (body[-1] in '\r\n')
+ # This accumulator will make it easier to build the encoded body.
+ encoded_body = _body_accumulator(maxlinelen, eol)
+ lines = body.splitlines()
+ last_line_no = len(lines) - 1
+ for line_no, line in enumerate(lines):
+ last_char_index = len(line) - 1
+ for i, c in enumerate(line):
+ if body_check(ord(c)):
+ c = quote(c)
+ encoded_body.write_char(c, i==last_char_index)
+ # Add an eol if input line had eol. All input lines have eol except
+ # possibly the last one.
+ if line_no < last_line_no or last_has_eol:
+ encoded_body.newline()
+ return encoded_body.getvalue()
+# BAW: I'm not sure if the intent was for the signature of this function to be
+# the same as base64MIME.decode() or not...
+def decode(encoded, eol=NL):
+ """Decode a quoted-printable string.
+ Lines are separated with eol, which defaults to \\n.
+ """
+ if not encoded:
+ return encoded
+ # BAW: see comment in encode() above. Again, we're building up the
+ # decoded string with string concatenation, which could be done much more
+ # efficiently.
+ decoded = ''
+ for line in encoded.splitlines():
+ line = line.rstrip()
+ if not line:
+ decoded += eol
+ continue
+ i = 0
+ n = len(line)
+ while i < n:
+ c = line[i]
+ if c != '=':
+ decoded += c
+ i += 1
+ # Otherwise, c == "=". Are we at the end of the line? If so, add
+ # a soft line break.
+ elif i+1 == n:
+ i += 1
+ continue
+ # Decode if in form =AB
+ elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:
+ decoded += unquote(line[i:i+3])
+ i += 3
+ # Otherwise, not in form =AB, pass literally
+ else:
+ decoded += c
+ i += 1
+ if i == n:
+ decoded += eol
+ # Special case if original string did not end with eol
+ if encoded[-1] not in '\r\n' and decoded.endswith(eol):
+ decoded = decoded[:-1]
+ return decoded
+# For convenience and backwards compatibility w/ standard base64 module
+body_decode = decode
+decodestring = decode
+def _unquote_match(match):
+ """Turn a match in the form =AB to the ASCII character with value 0xab"""
+ s = match.group(0)
+ return unquote(s)
+# Header decoding is done a bit differently
+def header_decode(s):
+ """Decode a string encoded with RFC 2045 MIME header `Q' encoding.
+ This function does not parse a full MIME header value encoded with
+ quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use
+ the high level email.header class for that functionality.
+ """
+ s = s.replace('_', ' ')
+ return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, re.ASCII)
diff --git a/lib-python/3/email/test/__init__.py b/lib-python/3/email/test/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/lib-python/3/email/test/__init__.py
diff --git a/lib-python/3/email/test/data/PyBanner048.gif b/lib-python/3/email/test/data/PyBanner048.gif
new file mode 100644
index 0000000000..1a5c87f647
--- /dev/null
+++ b/lib-python/3/email/test/data/PyBanner048.gif
Binary files differ
diff --git a/lib-python/3/email/test/data/audiotest.au b/lib-python/3/email/test/data/audiotest.au
new file mode 100644
index 0000000000..f76b0501b8
--- /dev/null
+++ b/lib-python/3/email/test/data/audiotest.au
Binary files differ
diff --git a/lib-python/3/email/test/data/msg_01.txt b/lib-python/3/email/test/data/msg_01.txt
new file mode 100644
index 0000000000..7e33bcf96a
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_01.txt
@@ -0,0 +1,19 @@
+Return-Path: <bbb@zzz.org>
+Delivered-To: bbb@zzz.org
+Received: by mail.zzz.org (Postfix, from userid 889)
+ id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=us-ascii
+Content-Transfer-Encoding: 7bit
+Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
+From: bbb@ddd.com (John X. Doe)
+To: bbb@zzz.org
+Subject: This is a test message
+Date: Fri, 4 May 2001 14:05:44 -0400
+Do you like this message?
diff --git a/lib-python/3/email/test/data/msg_02.txt b/lib-python/3/email/test/data/msg_02.txt
new file mode 100644
index 0000000000..43f248038a
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_02.txt
@@ -0,0 +1,135 @@
+MIME-version: 1.0
+From: ppp-request@zzz.org
+Sender: ppp-admin@zzz.org
+To: ppp@zzz.org
+Subject: Ppp digest, Vol 1 #2 - 5 msgs
+Date: Fri, 20 Apr 2001 20:18:00 -0400 (EDT)
+X-Mailer: Mailman v2.0.4
+X-Mailman-Version: 2.0.4
+Content-Type: multipart/mixed; boundary=""
+Content-type: text/plain; charset=us-ascii
+Content-description: Masthead (Ppp digest, Vol 1 #2)
+Send Ppp mailing list submissions to
+ ppp@zzz.org
+To subscribe or unsubscribe via the World Wide Web, visit
+ http://www.zzz.org/mailman/listinfo/ppp
+or, via email, send a message with subject or body 'help' to
+ ppp-request@zzz.org
+You can reach the person managing the list at
+ ppp-admin@zzz.org
+When replying, please edit your Subject line so it is more specific
+than "Re: Contents of Ppp digest..."
+Content-type: text/plain; charset=us-ascii
+Content-description: Today's Topics (5 msgs)
+Today's Topics:
+ 1. testing #1 (Barry A. Warsaw)
+ 2. testing #2 (Barry A. Warsaw)
+ 3. testing #3 (Barry A. Warsaw)
+ 4. testing #4 (Barry A. Warsaw)
+ 5. testing #5 (Barry A. Warsaw)
+Content-Type: multipart/digest; boundary="__--__--"
+Message: 1
+Content-Type: text/plain; charset=us-ascii
+Content-Transfer-Encoding: 7bit
+Date: Fri, 20 Apr 2001 20:16:13 -0400
+To: ppp@zzz.org
+From: barry@digicool.com (Barry A. Warsaw)
+Subject: [Ppp] testing #1
+Precedence: bulk
+Message: 2
+Date: Fri, 20 Apr 2001 20:16:21 -0400
+Content-Type: text/plain; charset=us-ascii
+Content-Transfer-Encoding: 7bit
+To: ppp@zzz.org
+From: barry@digicool.com (Barry A. Warsaw)
+Precedence: bulk
+Message: 3
+Date: Fri, 20 Apr 2001 20:16:25 -0400
+Content-Type: text/plain; charset=us-ascii
+Content-Transfer-Encoding: 7bit
+To: ppp@zzz.org
+From: barry@digicool.com (Barry A. Warsaw)
+Subject: [Ppp] testing #3
+Precedence: bulk
+Message: 4
+Date: Fri, 20 Apr 2001 20:16:28 -0400
+Content-Type: text/plain; charset=us-ascii
+Content-Transfer-Encoding: 7bit
+To: ppp@zzz.org
+From: barry@digicool.com (Barry A. Warsaw)
+Subject: [Ppp] testing #4
+Precedence: bulk
+Message: 5
+Date: Fri, 20 Apr 2001 20:16:32 -0400
+Content-Type: text/plain; charset=us-ascii
+Content-Transfer-Encoding: 7bit
+To: ppp@zzz.org
+From: barry@digicool.com (Barry A. Warsaw)
+Subject: [Ppp] testing #5
+Precedence: bulk
+Content-type: text/plain; charset=us-ascii
+Content-description: Digest Footer
+Ppp mailing list
+End of Ppp Digest
diff --git a/lib-python/3/email/test/data/msg_03.txt b/lib-python/3/email/test/data/msg_03.txt
new file mode 100644
index 0000000000..c748ebf117
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_03.txt
@@ -0,0 +1,16 @@
+Return-Path: <bbb@zzz.org>
+Delivered-To: bbb@zzz.org
+Received: by mail.zzz.org (Postfix, from userid 889)
+ id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
+Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
+From: bbb@ddd.com (John X. Doe)
+To: bbb@zzz.org
+Subject: This is a test message
+Date: Fri, 4 May 2001 14:05:44 -0400
+Do you like this message?
diff --git a/lib-python/3/email/test/data/msg_04.txt b/lib-python/3/email/test/data/msg_04.txt
new file mode 100644
index 0000000000..1f633c4496
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_04.txt
@@ -0,0 +1,37 @@
+Return-Path: <barry@python.org>
+Delivered-To: barry@python.org
+Received: by mail.python.org (Postfix, from userid 889)
+ id C2BF0D37C6; Tue, 11 Sep 2001 00:05:05 -0400 (EDT)
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="h90VIIIKmx"
+Content-Transfer-Encoding: 7bit
+Message-ID: <15261.36209.358846.118674@anthem.python.org>
+From: barry@python.org (Barry A. Warsaw)
+To: barry@python.org
+Subject: a simple multipart
+Date: Tue, 11 Sep 2001 00:05:05 -0400
+X-Mailer: VM 6.95 under 21.4 (patch 4) "Artificial Intelligence" XEmacs Lucid
+X-Attribution: BAW
+X-Oblique-Strategy: Make a door into a window
+Content-Type: text/plain
+Content-Disposition: inline;
+ filename="msg.txt"
+Content-Transfer-Encoding: 7bit
+a simple kind of mirror
+to reflect upon our own
+Content-Type: text/plain
+Content-Disposition: inline;
+ filename="msg.txt"
+Content-Transfer-Encoding: 7bit
+a simple kind of mirror
+to reflect upon our own
diff --git a/lib-python/3/email/test/data/msg_05.txt b/lib-python/3/email/test/data/msg_05.txt
new file mode 100644
index 0000000000..87d5e9cbf8
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_05.txt
@@ -0,0 +1,28 @@
+From: foo
+Subject: bar
+To: baz
+MIME-Version: 1.0
+Content-Type: multipart/report; report-type=delivery-status;
+ boundary="D1690A7AC1.996856090/mail.example.com"
+Message-Id: <20010803162810.0CA8AA7ACC@mail.example.com>
+This is a MIME-encapsulated message.
+Content-Type: text/plain
+Yadda yadda yadda
+Yadda yadda yadda
+Content-Type: message/rfc822
+From: nobody@python.org
+Yadda yadda yadda
diff --git a/lib-python/3/email/test/data/msg_06.txt b/lib-python/3/email/test/data/msg_06.txt
new file mode 100644
index 0000000000..69f3a47ff4
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_06.txt
@@ -0,0 +1,33 @@
+Return-Path: <barry@python.org>
+Delivered-To: barry@python.org
+MIME-Version: 1.0
+Content-Type: message/rfc822
+Content-Description: forwarded message
+Content-Transfer-Encoding: 7bit
+Message-ID: <15265.9482.641338.555352@python.org>
+From: barry@zope.com (Barry A. Warsaw)
+Sender: barry@python.org
+To: barry@python.org
+Subject: forwarded message from Barry A. Warsaw
+Date: Thu, 13 Sep 2001 17:28:42 -0400
+X-Mailer: VM 6.95 under 21.4 (patch 4) "Artificial Intelligence" XEmacs Lucid
+X-Attribution: BAW
+X-Oblique-Strategy: Be dirty
+X-Url: http://barry.wooz.org
+MIME-Version: 1.0
+Content-Type: text/plain; charset=us-ascii
+Return-Path: <barry@python.org>
+Delivered-To: barry@python.org
+Message-ID: <15265.9468.713530.98441@python.org>
+From: barry@zope.com (Barry A. Warsaw)
+Sender: barry@python.org
+To: barry@python.org
+Subject: testing
+Date: Thu, 13 Sep 2001 17:28:28 -0400
+X-Mailer: VM 6.95 under 21.4 (patch 4) "Artificial Intelligence" XEmacs Lucid
+X-Attribution: BAW
+X-Oblique-Strategy: Spectrum analysis
+X-Url: http://barry.wooz.org
diff --git a/lib-python/3/email/test/data/msg_07.txt b/lib-python/3/email/test/data/msg_07.txt
new file mode 100644
index 0000000000..721f3a0d31
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_07.txt
@@ -0,0 +1,83 @@
+MIME-Version: 1.0
+From: Barry <barry@digicool.com>
+To: Dingus Lovers <cravindogs@cravindogs.com>
+Subject: Here is your dingus fish
+Date: Fri, 20 Apr 2001 19:35:02 -0400
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+Content-Type: text/plain; charset="us-ascii"
+Hi there,
+This is the dingus fish.
+Content-Type: image/gif; name="dingusfish.gif"
+Content-Transfer-Encoding: base64
+content-disposition: attachment; filename="dingusfish.gif"
diff --git a/lib-python/3/email/test/data/msg_08.txt b/lib-python/3/email/test/data/msg_08.txt
new file mode 100644
index 0000000000..b5630836c5
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_08.txt
@@ -0,0 +1,24 @@
+MIME-Version: 1.0
+From: Barry Warsaw <barry@zope.com>
+To: Dingus Lovers <cravindogs@cravindogs.com>
+Subject: Lyrics
+Date: Fri, 20 Apr 2001 19:35:02 -0400
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+Content-Type: text/plain; charset="us-ascii"
+Content-Type: text/html; charset="iso-8859-1"
+Content-Type: text/plain; charset="iso-8859-2"
+Content-Type: text/plain; charset="koi8-r"
diff --git a/lib-python/3/email/test/data/msg_09.txt b/lib-python/3/email/test/data/msg_09.txt
new file mode 100644
index 0000000000..575c4c205a
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_09.txt
@@ -0,0 +1,24 @@
+MIME-Version: 1.0
+From: Barry Warsaw <barry@zope.com>
+To: Dingus Lovers <cravindogs@cravindogs.com>
+Subject: Lyrics
+Date: Fri, 20 Apr 2001 19:35:02 -0400
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+Content-Type: text/plain; charset="us-ascii"
+Content-Type: text/html; charset="iso-8859-1"
+Content-Type: text/plain
+Content-Type: text/plain; charset="koi8-r"
diff --git a/lib-python/3/email/test/data/msg_10.txt b/lib-python/3/email/test/data/msg_10.txt
new file mode 100644
index 0000000000..07903960f9
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_10.txt
@@ -0,0 +1,39 @@
+MIME-Version: 1.0
+From: Barry Warsaw <barry@zope.com>
+To: Dingus Lovers <cravindogs@cravindogs.com>
+Subject: Lyrics
+Date: Fri, 20 Apr 2001 19:35:02 -0400
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+Content-Type: text/plain; charset="us-ascii"
+Content-Transfer-Encoding: 7bit
+This is a 7bit encoded message.
+Content-Type: text/html; charset="iso-8859-1"
+Content-Transfer-Encoding: Quoted-Printable
+=A1This is a Quoted Printable encoded message!
+Content-Type: text/plain; charset="iso-8859-1"
+Content-Transfer-Encoding: Base64
+Content-Type: text/plain; charset="iso-8859-1"
+Content-Transfer-Encoding: Base64
+Content-Type: text/plain; charset="iso-8859-1"
+This has no Content-Transfer-Encoding: header.
diff --git a/lib-python/3/email/test/data/msg_11.txt b/lib-python/3/email/test/data/msg_11.txt
new file mode 100644
index 0000000000..8f7f1991cb
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_11.txt
@@ -0,0 +1,7 @@
+Content-Type: message/rfc822
+MIME-Version: 1.0
+Subject: The enclosing message
+Subject: An enclosed message
+Here is the body of the message.
diff --git a/lib-python/3/email/test/data/msg_12.txt b/lib-python/3/email/test/data/msg_12.txt
new file mode 100644
index 0000000000..4bec8d9444
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_12.txt
@@ -0,0 +1,36 @@
+MIME-Version: 1.0
+From: Barry Warsaw <barry@zope.com>
+To: Dingus Lovers <cravindogs@cravindogs.com>
+Subject: Lyrics
+Date: Fri, 20 Apr 2001 19:35:02 -0400
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+Content-Type: text/plain; charset="us-ascii"
+Content-Type: text/html; charset="iso-8859-1"
+Content-Type: multipart/mixed; boundary="ANOTHER"
+Content-Type: text/plain; charset="iso-8859-2"
+Content-Type: text/plain; charset="iso-8859-3"
+Content-Type: text/plain; charset="us-ascii"
+Content-Type: text/plain; charset="koi8-r"
diff --git a/lib-python/3/email/test/data/msg_12a.txt b/lib-python/3/email/test/data/msg_12a.txt
new file mode 100644
index 0000000000..e94224ecfe
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_12a.txt
@@ -0,0 +1,38 @@
+MIME-Version: 1.0
+From: Barry Warsaw <barry@zope.com>
+To: Dingus Lovers <cravindogs@cravindogs.com>
+Subject: Lyrics
+Date: Fri, 20 Apr 2001 19:35:02 -0400
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+Content-Type: text/plain; charset="us-ascii"
+Content-Type: text/html; charset="iso-8859-1"
+Content-Type: multipart/mixed; boundary="ANOTHER"
+Content-Type: text/plain; charset="iso-8859-2"
+Content-Type: text/plain; charset="iso-8859-3"
+Content-Type: text/plain; charset="us-ascii"
+Content-Type: text/plain; charset="koi8-r"
diff --git a/lib-python/3/email/test/data/msg_13.txt b/lib-python/3/email/test/data/msg_13.txt
new file mode 100644
index 0000000000..8e6d52d5be
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_13.txt
@@ -0,0 +1,94 @@
+MIME-Version: 1.0
+From: Barry <barry@digicool.com>
+To: Dingus Lovers <cravindogs@cravindogs.com>
+Subject: Here is your dingus fish
+Date: Fri, 20 Apr 2001 19:35:02 -0400
+Content-Type: multipart/mixed; boundary="OUTER"
+Content-Type: text/plain; charset="us-ascii"
+A text/plain part
+Content-Type: multipart/mixed; boundary=BOUNDARY
+Content-Type: text/plain; charset="us-ascii"
+Hi there,
+This is the dingus fish.
+Content-Type: image/gif; name="dingusfish.gif"
+Content-Transfer-Encoding: base64
+content-disposition: attachment; filename="dingusfish.gif"
diff --git a/lib-python/3/email/test/data/msg_14.txt b/lib-python/3/email/test/data/msg_14.txt
new file mode 100644
index 0000000000..5d98d2fd14
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_14.txt
@@ -0,0 +1,23 @@
+Return-Path: <bbb@zzz.org>
+Delivered-To: bbb@zzz.org
+Received: by mail.zzz.org (Postfix, from userid 889)
+ id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
+MIME-Version: 1.0
+Content-Type: text; charset=us-ascii
+Content-Transfer-Encoding: 7bit
+Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
+From: bbb@ddd.com (John X. Doe)
+To: bbb@zzz.org
+Subject: This is a test message
+Date: Fri, 4 May 2001 14:05:44 -0400
+I'm sorry but I'm using a drainbread ISP, which although big and
+wealthy can't seem to generate standard compliant email. :(
+This message has a Content-Type: header with no subtype. I hope you
+can still read it.
diff --git a/lib-python/3/email/test/data/msg_15.txt b/lib-python/3/email/test/data/msg_15.txt
new file mode 100644
index 0000000000..0025624e75
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_15.txt
@@ -0,0 +1,52 @@
+Return-Path: <xx@xx.dk>
+Received: from fepD.post.tele.dk ( by mail.groupcare.dk (LSMTP for Windows NT v1.1b) with SMTP id <0.0014F8A2@mail.groupcare.dk>; Mon, 30 Apr 2001 12:17:50 +0200
+User-Agent: Microsoft-Outlook-Express-Macintosh-Edition/5.02.2106
+Subject: XX
+From: xx@xx.dk
+To: XX
+Message-ID: <xxxx>
+Mime-version: 1.0
+Content-type: multipart/mixed;
+ boundary="MS_Mac_OE_3071477847_720252_MIME_Part"
+> Denne meddelelse er i MIME-format. Da dit postl
+Content-type: multipart/alternative;
+ boundary="MS_Mac_OE_3071477847_720252_MIME_Part"
+Content-type: text/plain; charset="ISO-8859-1"
+Content-transfer-encoding: quoted-printable
+Some removed test.
+Content-type: text/html; charset="ISO-8859-1"
+Content-transfer-encoding: quoted-printable
+<TITLE>Some removed HTML</TITLE>
+Some removed text.
+Content-type: image/gif; name="xx.gif";
+ x-mac-creator="6F676C65";
+ x-mac-type="47494666"
+Content-disposition: attachment
+Content-transfer-encoding: base64
+Some removed base64 encoded chars.
diff --git a/lib-python/3/email/test/data/msg_16.txt b/lib-python/3/email/test/data/msg_16.txt
new file mode 100644
index 0000000000..56167e9f5b
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_16.txt
@@ -0,0 +1,123 @@
+Return-Path: <>
+Delivered-To: scr-admin@socal-raves.org
+Received: from cougar.noc.ucla.edu (cougar.noc.ucla.edu [])
+ by babylon.socal-raves.org (Postfix) with ESMTP id CCC2C51B84
+ for <scr-admin@socal-raves.org>; Sun, 23 Sep 2001 20:13:54 -0700 (PDT)
+Received: from sims-ms-daemon by cougar.noc.ucla.edu
+ (Sun Internet Mail Server sims.3.5.2000.
+ id <0GK500B01D0B8Y@cougar.noc.ucla.edu> for scr-admin@socal-raves.org; Sun,
+ 23 Sep 2001 20:14:35 -0700 (PDT)
+Received: from cougar.noc.ucla.edu
+ (Sun Internet Mail Server sims.3.5.2000.
+ id <0GK500B01D0B8X@cougar.noc.ucla.edu>; Sun, 23 Sep 2001 20:14:35 -0700 (PDT)
+Date: Sun, 23 Sep 2001 20:14:35 -0700 (PDT)
+From: Internet Mail Delivery <postmaster@ucla.edu>
+Subject: Delivery Notification: Delivery has failed
+To: scr-admin@socal-raves.org
+Message-id: <0GK500B04D0B8X@cougar.noc.ucla.edu>
+MIME-version: 1.0
+Sender: scr-owner@socal-raves.org
+Errors-To: scr-owner@socal-raves.org
+X-BeenThere: scr@socal-raves.org
+X-Mailman-Version: 2.1a3
+Precedence: bulk
+List-Help: <mailto:scr-request@socal-raves.org?subject=help>
+List-Post: <mailto:scr@socal-raves.org>
+List-Subscribe: <http://socal-raves.org/mailman/listinfo/scr>,
+ <mailto:scr-request@socal-raves.org?subject=subscribe>
+List-Id: SoCal-Raves <scr.socal-raves.org>
+List-Unsubscribe: <http://socal-raves.org/mailman/listinfo/scr>,
+ <mailto:scr-request@socal-raves.org?subject=unsubscribe>
+List-Archive: <http://socal-raves.org/mailman/private/scr/>
+Content-Type: multipart/report; boundary="Boundary_(ID_PGS2F2a+z+/jL7hupKgRhA)"
+Content-type: text/plain; charset=ISO-8859-1
+This report relates to a message you sent with the following header fields:
+ Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
+ Date: Sun, 23 Sep 2001 20:10:55 -0700
+ From: "Ian T. Henry" <henryi@oxy.edu>
+ To: SoCal Raves <scr@socal-raves.org>
+ Subject: [scr] yeah for Ians!!
+Your message cannot be delivered to the following recipients:
+ Recipient address: jangel1@cougar.noc.ucla.edu
+ Reason: recipient reached disk quota
+Content-type: message/DELIVERY-STATUS
+Original-envelope-id: 0GK500B4HD0888@cougar.noc.ucla.edu
+Reporting-MTA: dns; cougar.noc.ucla.edu
+Action: failed
+Status: 5.0.0 (recipient reached disk quota)
+Original-recipient: rfc822;jangel1@cougar.noc.ucla.edu
+Final-recipient: rfc822;jangel1@cougar.noc.ucla.edu
+Content-type: MESSAGE/RFC822
+Return-path: scr-admin@socal-raves.org
+Received: from sims-ms-daemon by cougar.noc.ucla.edu
+ (Sun Internet Mail Server sims.3.5.2000.
+ id <0GK500B01D0B8X@cougar.noc.ucla.edu>; Sun, 23 Sep 2001 20:14:35 -0700 (PDT)
+Received: from panther.noc.ucla.edu by cougar.noc.ucla.edu
+ (Sun Internet Mail Server sims.3.5.2000.
+ with ESMTP id <0GK500B4GD0888@cougar.noc.ucla.edu> for jangel1@sims-ms-daemon;
+ Sun, 23 Sep 2001 20:14:33 -0700 (PDT)
+Received: from babylon.socal-raves.org
+ (ip-209-85-222-117.dreamhost.com [])
+ by panther.noc.ucla.edu (8.9.1a/8.9.1) with ESMTP id UAA09793 for
+ <jangel1@ucla.edu>; Sun, 23 Sep 2001 20:14:32 -0700 (PDT)
+Received: from babylon (localhost []) by babylon.socal-raves.org
+ (Postfix) with ESMTP id D3B2951B70; Sun, 23 Sep 2001 20:13:47 -0700 (PDT)
+Received: by babylon.socal-raves.org (Postfix, from userid 60001)
+ id A611F51B82; Sun, 23 Sep 2001 20:13:46 -0700 (PDT)
+Received: from tiger.cc.oxy.edu (tiger.cc.oxy.edu [])
+ by babylon.socal-raves.org (Postfix) with ESMTP id ADA7351B70 for
+ <scr@socal-raves.org>; Sun, 23 Sep 2001 20:13:44 -0700 (PDT)
+Received: from ent (n16h86.dhcp.oxy.edu [])
+ by tiger.cc.oxy.edu (8.8.8/8.8.8) with SMTP id UAA08100 for
+ <scr@socal-raves.org>; Sun, 23 Sep 2001 20:14:24 -0700 (PDT)
+Date: Sun, 23 Sep 2001 20:10:55 -0700
+From: "Ian T. Henry" <henryi@oxy.edu>
+Subject: [scr] yeah for Ians!!
+Sender: scr-admin@socal-raves.org
+To: SoCal Raves <scr@socal-raves.org>
+Errors-to: scr-admin@socal-raves.org
+Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
+MIME-version: 1.0
+X-Mailer: Microsoft Outlook Express 5.50.4522.1200
+Content-type: text/plain; charset=us-ascii
+Precedence: bulk
+Delivered-to: scr-post@babylon.socal-raves.org
+Delivered-to: scr@socal-raves.org
+X-Converted-To-Plain-Text: from multipart/alternative by demime 0.98e
+X-Converted-To-Plain-Text: Alternative section used was text/plain
+X-BeenThere: scr@socal-raves.org
+X-Mailman-Version: 2.1a3
+List-Help: <mailto:scr-request@socal-raves.org?subject=help>
+List-Post: <mailto:scr@socal-raves.org>
+List-Subscribe: <http://socal-raves.org/mailman/listinfo/scr>,
+ <mailto:scr-request@socal-raves.org?subject=subscribe>
+List-Id: SoCal-Raves <scr.socal-raves.org>
+List-Unsubscribe: <http://socal-raves.org/mailman/listinfo/scr>,
+ <mailto:scr-request@socal-raves.org?subject=unsubscribe>
+List-Archive: <http://socal-raves.org/mailman/private/scr/>
+I always love to find more Ian's that are over 3 years old!!
+For event info, list questions, or to unsubscribe, see http://www.socal-raves.org/
diff --git a/lib-python/3/email/test/data/msg_17.txt b/lib-python/3/email/test/data/msg_17.txt
new file mode 100644
index 0000000000..8d86e4180d
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_17.txt
@@ -0,0 +1,12 @@
+MIME-Version: 1.0
+From: Barry <barry@digicool.com>
+To: Dingus Lovers <cravindogs@cravindogs.com>
+Subject: Here is your dingus fish
+Date: Fri, 20 Apr 2001 19:35:02 -0400
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+Hi there,
+This is the dingus fish.
+[Non-text (image/gif) part of message omitted, filename dingusfish.gif]
diff --git a/lib-python/3/email/test/data/msg_18.txt b/lib-python/3/email/test/data/msg_18.txt
new file mode 100644
index 0000000000..f9f4904d36
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_18.txt
@@ -0,0 +1,6 @@
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
+ spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
diff --git a/lib-python/3/email/test/data/msg_19.txt b/lib-python/3/email/test/data/msg_19.txt
new file mode 100644
index 0000000000..49bf7fccdd
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_19.txt
@@ -0,0 +1,43 @@
+Send Ppp mailing list submissions to
+ ppp@zzz.org
+To subscribe or unsubscribe via the World Wide Web, visit
+ http://www.zzz.org/mailman/listinfo/ppp
+or, via email, send a message with subject or body 'help' to
+ ppp-request@zzz.org
+You can reach the person managing the list at
+ ppp-admin@zzz.org
+When replying, please edit your Subject line so it is more specific
+than "Re: Contents of Ppp digest..."
+Today's Topics:
+ 1. testing #1 (Barry A. Warsaw)
+ 2. testing #2 (Barry A. Warsaw)
+ 3. testing #3 (Barry A. Warsaw)
+ 4. testing #4 (Barry A. Warsaw)
+ 5. testing #5 (Barry A. Warsaw)
+Ppp mailing list
diff --git a/lib-python/3/email/test/data/msg_20.txt b/lib-python/3/email/test/data/msg_20.txt
new file mode 100644
index 0000000000..1a6a88783e
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_20.txt
@@ -0,0 +1,22 @@
+Return-Path: <bbb@zzz.org>
+Delivered-To: bbb@zzz.org
+Received: by mail.zzz.org (Postfix, from userid 889)
+ id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=us-ascii
+Content-Transfer-Encoding: 7bit
+Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
+From: bbb@ddd.com (John X. Doe)
+To: bbb@zzz.org
+Cc: ccc@zzz.org
+CC: ddd@zzz.org
+cc: eee@zzz.org
+Subject: This is a test message
+Date: Fri, 4 May 2001 14:05:44 -0400
+Do you like this message?
diff --git a/lib-python/3/email/test/data/msg_21.txt b/lib-python/3/email/test/data/msg_21.txt
new file mode 100644
index 0000000000..23590b255d
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_21.txt
@@ -0,0 +1,20 @@
+From: aperson@dom.ain
+To: bperson@dom.ain
+Subject: Test
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME message
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+End of MIME message
diff --git a/lib-python/3/email/test/data/msg_22.txt b/lib-python/3/email/test/data/msg_22.txt
new file mode 100644
index 0000000000..af9de5fa27
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_22.txt
@@ -0,0 +1,46 @@
+Mime-Version: 1.0
+Message-Id: <a05001902b7f1c33773e9@[]>
+Date: Tue, 16 Oct 2001 13:59:25 +0300
+To: a@example.com
+From: b@example.com
+Content-Type: multipart/mixed; boundary="============_-1208892523==_============"
+Content-Type: text/plain; charset="us-ascii" ; format="flowed"
+Text text text.
+Content-Id: <a05001902b7f1c33773e9@[].0.0>
+Content-Type: image/jpeg; name="wibble.JPG"
+ ; x-mac-type="4A504547"
+ ; x-mac-creator="474B4F4E"
+Content-Disposition: attachment; filename="wibble.JPG"
+Content-Transfer-Encoding: base64
+Content-Id: <a05001902b7f1c33773e9@[].0.1>
+Content-Type: image/jpeg; name="wibble2.JPG"
+ ; x-mac-type="4A504547"
+ ; x-mac-creator="474B4F4E"
+Content-Disposition: attachment; filename="wibble2.JPG"
+Content-Transfer-Encoding: base64
+Content-Type: text/plain; charset="us-ascii" ; format="flowed"
+Text text text.
diff --git a/lib-python/3/email/test/data/msg_23.txt b/lib-python/3/email/test/data/msg_23.txt
new file mode 100644
index 0000000000..bb2e8ec36b
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_23.txt
@@ -0,0 +1,8 @@
+From: aperson@dom.ain
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+Content-Type: text/plain
+A message part
diff --git a/lib-python/3/email/test/data/msg_24.txt b/lib-python/3/email/test/data/msg_24.txt
new file mode 100644
index 0000000000..4e52339e86
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_24.txt
@@ -0,0 +1,10 @@
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME-Version: 1.0
+Subject: A subject
+To: aperson@dom.ain
+From: bperson@dom.ain
diff --git a/lib-python/3/email/test/data/msg_25.txt b/lib-python/3/email/test/data/msg_25.txt
new file mode 100644
index 0000000000..9e35275fe0
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_25.txt
@@ -0,0 +1,117 @@
+From MAILER-DAEMON Fri Apr 06 16:46:09 2001
+Received: from [] (helo=zinfandel.lacita.com)
+ by www.linux.org.uk with esmtp (Exim 3.13 #1)
+ id 14lYR6-0008Iv-00
+ for linuxuser-admin@www.linux.org.uk; Fri, 06 Apr 2001 16:46:09 +0100
+Received: from localhost (localhost) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with internal id JAB03225; Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
+Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
+From: Mail Delivery Subsystem <MAILER-DAEMON@zinfandel.lacita.com>
+Subject: Returned mail: Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [], to <scoffman@wellpartner.com>
+Message-Id: <200104061723.JAB03225@zinfandel.lacita.com>
+To: <linuxuser-admin@www.linux.org.uk>
+To: postmaster@zinfandel.lacita.com
+MIME-Version: 1.0
+Content-Type: multipart/report; report-type=delivery-status;
+ bo
+Auto-Submitted: auto-generated (failure)
+This is a MIME-encapsulated message
+The original message was received at Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
+from []
+ ----- The following addresses have delivery notifications -----
+<scoffman@wellpartner.com> (unrecoverable error)
+ ----- Transcript of session follows -----
+554 Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [], to <scoffman@wellpartner.com>
+Content-Type: message/delivery-status
+Reporting-MTA: dns; zinfandel.lacita.com
+Received-From-MTA: dns; []
+Arrival-Date: Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
+Final-Recipient: rfc822; scoffman@wellpartner.com
+Action: failed
+Status: 5.4.6
+Last-Attempt-Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
+Content-Type: text/rfc822-headers
+Return-Path: linuxuser-admin@www.linux.org.uk
+Received: from ns1.wellpartner.net ([]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03225 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([])
+ by
+ fo
+Received: from ns1.wellpartner.net ([]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03221 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:22:18 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([])
+ by
+ fo
+Received: from ns1.wellpartner.net ([]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03217 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:21:37 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([])
+ by
+ fo
+Received: from ns1.wellpartner.net ([]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03213 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:56 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([])
+ by
+ fo
+Received: from ns1.wellpartner.net ([]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03209 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:15 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([])
+ by
+ fo
+Received: from ns1.wellpartner.net ([]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03205 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:19:33 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([])
+ by
+ fo
+Received: from ns1.wellpartner.net ([]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03201 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:18:52 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([])
+ by
+ fo
+Received: from ns1.wellpartner.net ([]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03197 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:17:54 -0800 (GMT-0800)
+Received: from www.linux.org.uk (parcelfarce.linux.theplanet.co.uk [])
+ by
+ fo
+Received: from localhost.localdomain
+ ([
+ by
+ id
+Received: from [] (helo=s1.uklinux.net ident=root)
+ by
+ id
+ fo
+Received: from server (ppp-2-22.cvx4.telinco.net [])
+ by
+ fo
+From: Daniel James <daniel@linuxuser.co.uk>
+Organization: LinuxUser
+To: linuxuser@www.linux.org.uk
+X-Mailer: KMail [version 1.1.99]
+Content-Type: text/plain;
+ c
+MIME-Version: 1.0
+Message-Id: <01040616033903.00962@server>
+Content-Transfer-Encoding: 8bit
+Subject: [LinuxUser] bulletin no. 45
+Sender: linuxuser-admin@www.linux.org.uk
+Errors-To: linuxuser-admin@www.linux.org.uk
+X-BeenThere: linuxuser@www.linux.org.uk
+X-Mailman-Version: 2.0.3
+Precedence: bulk
+List-Help: <mailto:linuxuser-request@www.linux.org.uk?subject=help>
+List-Post: <mailto:linuxuser@www.linux.org.uk>
+List-Subscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>,
+ <m
+List-Id: bulletins from LinuxUser magazine <linuxuser.www.linux.org.uk>
+List-Unsubscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>,
+ <m
+List-Archive: <http://www.linux.org.uk/pipermail/linuxuser/>
+Date: Fri, 6 Apr 2001 16:03:39 +0100
diff --git a/lib-python/3/email/test/data/msg_26.txt b/lib-python/3/email/test/data/msg_26.txt
new file mode 100644
index 0000000000..58efaa9c9a
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_26.txt
@@ -0,0 +1,46 @@
+Received: from xcar [] by jeeves.wooster.local
+ (SMTPD32-7.07 EVAL) id AFF92F0214; Sun, 12 May 2002 08:55:37 +0100
+Date: Sun, 12 May 2002 08:56:15 +0100
+From: Father Time <father.time@xcar.wooster.local>
+To: timbo@jeeves.wooster.local
+Subject: IMAP file test
+Message-ID: <6df65d354b.father.time@rpc.wooster.local>
+X-Organization: Home
+User-Agent: Messenger-Pro/2.50a (MsgServe/1.50) (RISC-OS/4.02) POPstar/2.03
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="1618492860--2051301190--113853680"
+Status: R
+X-UIDL: 319998302
+This message is in MIME format which your mailer apparently does not support.
+You either require a newer version of your software which supports MIME, or
+a separate MIME decoding utility. Alternatively, ask the sender of this
+message to resend it in a different format.
+Content-Type: text/plain; charset=us-ascii
+Simple email with attachment.
+Content-Type: application/riscos; name="clock.bmp,69c"; type=BMP;
+ load=&fff69c4b; exec=&355dd4d1; access=&03
+Content-Disposition: attachment; filename="clock.bmp"
+Content-Transfer-Encoding: base64
diff --git a/lib-python/3/email/test/data/msg_27.txt b/lib-python/3/email/test/data/msg_27.txt
new file mode 100644
index 0000000000..d0191769d7
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_27.txt
@@ -0,0 +1,15 @@
+Return-Path: <aperson@dom.ain>
+Received: by mail.dom.ain (Postfix, from userid 889)
+ id B9D0AD35DB; Tue, 4 Jun 2002 21:46:59 -0400 (EDT)
+Message-ID: <15613.28051.707126.569693@dom.ain>
+Date: Tue, 4 Jun 2002 21:46:59 -0400
+MIME-Version: 1.0
+Content-Type: text/plain; charset=us-ascii
+Content-Transfer-Encoding: 7bit
+Subject: bug demonstration
+ 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
+ more text
+From: aperson@dom.ain (Anne P. Erson)
+To: bperson@dom.ain (Barney P. Erson)
diff --git a/lib-python/3/email/test/data/msg_28.txt b/lib-python/3/email/test/data/msg_28.txt
new file mode 100644
index 0000000000..1e4824cabb
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_28.txt
@@ -0,0 +1,25 @@
+From: aperson@dom.ain
+MIME-Version: 1.0
+Content-Type: multipart/digest; boundary=BOUNDARY
+Content-Type: message/rfc822
+Content-Type: text/plain; charset=us-ascii
+To: aa@bb.org
+From: cc@dd.org
+Subject: ee
+message 1
+Content-Type: message/rfc822
+Content-Type: text/plain; charset=us-ascii
+To: aa@bb.org
+From: cc@dd.org
+Subject: ee
+message 2
diff --git a/lib-python/3/email/test/data/msg_29.txt b/lib-python/3/email/test/data/msg_29.txt
new file mode 100644
index 0000000000..1fab561617
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_29.txt
@@ -0,0 +1,22 @@
+Return-Path: <bbb@zzz.org>
+Delivered-To: bbb@zzz.org
+Received: by mail.zzz.org (Postfix, from userid 889)
+ id 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=us-ascii;
+ title*0*="us-ascii'en'This%20is%20even%20more%20";
+ title*1*="%2A%2A%2Afun%2A%2A%2A%20";
+ title*2="isn't it!"
+Content-Transfer-Encoding: 7bit
+Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
+From: bbb@ddd.com (John X. Doe)
+To: bbb@zzz.org
+Subject: This is a test message
+Date: Fri, 4 May 2001 14:05:44 -0400
+Do you like this message?
diff --git a/lib-python/3/email/test/data/msg_30.txt b/lib-python/3/email/test/data/msg_30.txt
new file mode 100644
index 0000000000..4334bb6e60
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_30.txt
@@ -0,0 +1,23 @@
+From: aperson@dom.ain
+MIME-Version: 1.0
+Content-Type: multipart/digest; boundary=BOUNDARY
+Content-Type: text/plain; charset=us-ascii
+To: aa@bb.org
+From: cc@dd.org
+Subject: ee
+message 1
+Content-Type: text/plain; charset=us-ascii
+To: aa@bb.org
+From: cc@dd.org
+Subject: ee
+message 2
diff --git a/lib-python/3/email/test/data/msg_31.txt b/lib-python/3/email/test/data/msg_31.txt
new file mode 100644
index 0000000000..1e58e56cf5
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_31.txt
@@ -0,0 +1,15 @@
+From: aperson@dom.ain
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary=BOUNDARY_
+Content-Type: text/plain
+message 1
+Content-Type: text/plain
+message 2
diff --git a/lib-python/3/email/test/data/msg_32.txt b/lib-python/3/email/test/data/msg_32.txt
new file mode 100644
index 0000000000..07ec5af9a3
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_32.txt
@@ -0,0 +1,14 @@
+Delivered-To: freebsd-isp@freebsd.org
+Date: Tue, 26 Sep 2000 12:23:03 -0500
+From: Anne Person <aperson@example.com>
+To: Barney Dude <bdude@example.com>
+Subject: Re: Limiting Perl CPU Utilization...
+Mime-Version: 1.0
+Content-Type: text/plain; charset*=ansi-x3.4-1968''us-ascii
+Content-Disposition: inline
+User-Agent: Mutt/1.3.8i
+Sender: owner-freebsd-isp@FreeBSD.ORG
+Precedence: bulk
+X-Loop: FreeBSD.org
+Some message.
diff --git a/lib-python/3/email/test/data/msg_33.txt b/lib-python/3/email/test/data/msg_33.txt
new file mode 100644
index 0000000000..042787a4fd
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_33.txt
@@ -0,0 +1,29 @@
+Delivered-To: freebsd-isp@freebsd.org
+Date: Wed, 27 Sep 2000 11:11:09 -0500
+From: Anne Person <aperson@example.com>
+To: Barney Dude <bdude@example.com>
+Subject: Re: Limiting Perl CPU Utilization...
+Mime-Version: 1.0
+Content-Type: multipart/signed; micalg*=ansi-x3.4-1968''pgp-md5;
+ protocol*=ansi-x3.4-1968''application%2Fpgp-signature;
+ boundary*="ansi-x3.4-1968''EeQfGwPcQSOJBaQU"
+Content-Disposition: inline
+Sender: owner-freebsd-isp@FreeBSD.ORG
+Precedence: bulk
+X-Loop: FreeBSD.org
+Content-Type: text/plain; charset*=ansi-x3.4-1968''us-ascii
+Content-Disposition: inline
+Content-Transfer-Encoding: quoted-printable
+part 1
+Content-Type: text/plain
+Content-Disposition: inline
+part 2
diff --git a/lib-python/3/email/test/data/msg_34.txt b/lib-python/3/email/test/data/msg_34.txt
new file mode 100644
index 0000000000..055dfea531
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_34.txt
@@ -0,0 +1,19 @@
+From: aperson@dom.ain
+To: bperson@dom.ain
+Content-Type: multipart/digest; boundary=XYZ
+Content-Type: text/plain
+This is a text plain part that is counter to recommended practice in
+RFC 2046, $5.1.5, but is not illegal
+From: cperson@dom.ain
+To: dperson@dom.ain
+A submessage
diff --git a/lib-python/3/email/test/data/msg_35.txt b/lib-python/3/email/test/data/msg_35.txt
new file mode 100644
index 0000000000..be7d5a2f7b
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_35.txt
@@ -0,0 +1,4 @@
+From: aperson@dom.ain
+To: bperson@dom.ain
+Subject: here's something interesting
+counter to RFC 2822, there's no separating newline here
diff --git a/lib-python/3/email/test/data/msg_36.txt b/lib-python/3/email/test/data/msg_36.txt
new file mode 100644
index 0000000000..5632c3062c
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_36.txt
@@ -0,0 +1,40 @@
+Mime-Version: 1.0
+Content-Type: Multipart/Mixed; Boundary="NextPart"
+To: IETF-Announce:;
+From: Internet-Drafts@ietf.org
+Subject: I-D ACTION:draft-ietf-mboned-mix-00.txt
+Date: Tue, 22 Dec 1998 16:55:06 -0500
+Blah blah blah
+Content-Type: Multipart/Alternative; Boundary="OtherAccess"
+Content-Type: Message/External-body;
+ access-type="mail-server";
+ server="mailserv@ietf.org"
+Content-Type: text/plain
+Content-ID: <19981222151406.I-D@ietf.org>
+FILE /internet-drafts/draft-ietf-mboned-mix-00.txt
+Content-Type: Message/External-body;
+ name="draft-ietf-mboned-mix-00.txt";
+ site="ftp.ietf.org";
+ access-type="anon-ftp";
+ directory="internet-drafts"
+Content-Type: text/plain
+Content-ID: <19981222151406.I-D@ietf.org>
diff --git a/lib-python/3/email/test/data/msg_37.txt b/lib-python/3/email/test/data/msg_37.txt
new file mode 100644
index 0000000000..038d34a1a4
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_37.txt
@@ -0,0 +1,22 @@
+Content-Type: multipart/mixed; boundary=ABCDE
+Content-Type: text/x-one
+Content-Type: text/x-two
+Content-Type: text/x-two
diff --git a/lib-python/3/email/test/data/msg_38.txt b/lib-python/3/email/test/data/msg_38.txt
new file mode 100644
index 0000000000..006df81cb5
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_38.txt
@@ -0,0 +1,101 @@
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
+------- =_aaaaaaaaaa0
+Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa1"
+Content-ID: <20592.1022586929.1@example.com>
+------- =_aaaaaaaaaa1
+Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa2"
+Content-ID: <20592.1022586929.2@example.com>
+------- =_aaaaaaaaaa2
+Content-Type: text/plain
+Content-ID: <20592.1022586929.3@example.com>
+Content-Description: very tricky
+Content-Transfer-Encoding: 7bit
+Unlike the test test_nested-multiples-with-internal-boundary, this
+piece of text not only contains the outer boundary tags
+------- =_aaaaaaaaaa1
+------- =_aaaaaaaaaa0
+but puts them at the start of a line! And, to be even nastier, it
+even includes a couple of end tags, such as this one:
+------- =_aaaaaaaaaa1--
+and this one, which is from a multipart we haven't even seen yet!
+------- =_aaaaaaaaaa4--
+This will, I'm sure, cause much breakage of MIME parsers. But, as
+far as I can tell, it's perfectly legal. I have not yet ever seen
+a case of this in the wild, but I've seen *similar* things.
+------- =_aaaaaaaaaa2
+Content-Type: application/octet-stream
+Content-ID: <20592.1022586929.4@example.com>
+Content-Description: patch2
+Content-Transfer-Encoding: base64
+------- =_aaaaaaaaaa2--
+------- =_aaaaaaaaaa1
+Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa3"
+Content-ID: <20592.1022586929.6@example.com>
+------- =_aaaaaaaaaa3
+Content-Type: application/octet-stream
+Content-ID: <20592.1022586929.7@example.com>
+Content-Description: patch3
+Content-Transfer-Encoding: base64
+------- =_aaaaaaaaaa3
+Content-Type: application/octet-stream
+Content-ID: <20592.1022586929.8@example.com>
+Content-Description: patch4
+Content-Transfer-Encoding: base64
+------- =_aaaaaaaaaa3--
+------- =_aaaaaaaaaa1
+Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa4"
+Content-ID: <20592.1022586929.10@example.com>
+------- =_aaaaaaaaaa4
+Content-Type: application/octet-stream
+Content-ID: <20592.1022586929.11@example.com>
+Content-Description: patch5
+Content-Transfer-Encoding: base64
+------- =_aaaaaaaaaa4
+Content-Type: application/octet-stream
+Content-ID: <20592.1022586929.12@example.com>
+Content-Description: patch6
+Content-Transfer-Encoding: base64
+------- =_aaaaaaaaaa4--
+------- =_aaaaaaaaaa1--
+------- =_aaaaaaaaaa0
+Content-Type: text/plain; charset="us-ascii"
+Content-ID: <20592.1022586929.15@example.com>
+It's never too late to have a happy childhood.
+------- =_aaaaaaaaaa0--
diff --git a/lib-python/3/email/test/data/msg_39.txt b/lib-python/3/email/test/data/msg_39.txt
new file mode 100644
index 0000000000..124b269192
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_39.txt
@@ -0,0 +1,83 @@
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa0"
+------- =_aaaaaaaaaa0
+Content-Type: multipart/mixed; boundary="----- =_aaaaaaaaaa1"
+Content-ID: <20592.1022586929.1@example.com>
+------- =_aaaaaaaaaa1
+Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa1"
+Content-ID: <20592.1022586929.2@example.com>
+------- =_aaaaaaaaaa1
+Content-Type: application/octet-stream
+Content-ID: <20592.1022586929.3@example.com>
+Content-Description: patch1
+Content-Transfer-Encoding: base64
+------- =_aaaaaaaaaa1
+Content-Type: application/octet-stream
+Content-ID: <20592.1022586929.4@example.com>
+Content-Description: patch2
+Content-Transfer-Encoding: base64
+------- =_aaaaaaaaaa1--
+------- =_aaaaaaaaaa1
+Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa1"
+Content-ID: <20592.1022586929.6@example.com>
+------- =_aaaaaaaaaa1
+Content-Type: application/octet-stream
+Content-ID: <20592.1022586929.7@example.com>
+Content-Description: patch3
+Content-Transfer-Encoding: base64
+------- =_aaaaaaaaaa1
+Content-Type: application/octet-stream
+Content-ID: <20592.1022586929.8@example.com>
+Content-Description: patch4
+Content-Transfer-Encoding: base64
+------- =_aaaaaaaaaa1--
+------- =_aaaaaaaaaa1
+Content-Type: multipart/alternative; boundary="----- =_aaaaaaaaaa1"
+Content-ID: <20592.1022586929.10@example.com>
+------- =_aaaaaaaaaa1
+Content-Type: application/octet-stream
+Content-ID: <20592.1022586929.11@example.com>
+Content-Description: patch5
+Content-Transfer-Encoding: base64
+------- =_aaaaaaaaaa1
+Content-Type: application/octet-stream
+Content-ID: <20592.1022586929.12@example.com>
+Content-Description: patch6
+Content-Transfer-Encoding: base64
+------- =_aaaaaaaaaa1--
+------- =_aaaaaaaaaa1--
+------- =_aaaaaaaaaa0
+Content-Type: text/plain; charset="us-ascii"
+Content-ID: <20592.1022586929.15@example.com>
+It's never too late to have a happy childhood.
+------- =_aaaaaaaaaa0--
diff --git a/lib-python/3/email/test/data/msg_40.txt b/lib-python/3/email/test/data/msg_40.txt
new file mode 100644
index 0000000000..1435fa1e1a
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_40.txt
@@ -0,0 +1,10 @@
+MIME-Version: 1.0
+Content-Type: text/html; boundary="--961284236552522269"
+Content-Type: text/html;
+Content-Transfer-Encoding: 7Bit
diff --git a/lib-python/3/email/test/data/msg_41.txt b/lib-python/3/email/test/data/msg_41.txt
new file mode 100644
index 0000000000..76cdd1cb7f
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_41.txt
@@ -0,0 +1,8 @@
+From: "Allison Dunlap" <xxx@example.com>
+To: yyy@example.com
+Subject: 64423
+Date: Sun, 11 Jul 2004 16:09:27 -0300
+MIME-Version: 1.0
+Content-Type: multipart/alternative;
+Blah blah blah
diff --git a/lib-python/3/email/test/data/msg_42.txt b/lib-python/3/email/test/data/msg_42.txt
new file mode 100644
index 0000000000..a75f8f4a02
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_42.txt
@@ -0,0 +1,20 @@
+Content-Type: multipart/mixed; boundary="AAA"
+From: Mail Delivery Subsystem <xxx@example.com>
+To: yyy@example.com
+This is a MIME-encapsulated message
+Content-Type: message/rfc822
+From: webmaster@python.org
+To: zzz@example.com
+Content-Type: multipart/mixed; boundary="BBB"
diff --git a/lib-python/3/email/test/data/msg_43.txt b/lib-python/3/email/test/data/msg_43.txt
new file mode 100644
index 0000000000..797d12c568
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_43.txt
@@ -0,0 +1,217 @@
+From SRS0=aO/p=ON=bag.python.org=None@bounce2.pobox.com Fri Nov 26 21:40:36 2004
+X-VM-v5-Data: ([nil nil nil nil nil nil nil nil nil]
+ [nil nil nil nil nil nil nil "MAILER DAEMON <>" "MAILER DAEMON <>" nil nil "Banned file: auto__mail.python.bat in mail from you" "^From:" nil nil nil nil "Banned file: auto__mail.python.bat in mail from you" nil nil nil nil nil nil nil]
+ nil)
+MIME-Version: 1.0
+Message-Id: <edab.7804f5cb8070@python.org>
+Content-Type: multipart/report; report-type=delivery-status;
+ charset=utf-8;
+ boundary="----------=_1101526904-1956-5"
+X-Virus-Scanned: by XS4ALL Virus Scanner
+X-UIDL: 4\G!!!<c"!UV["!M7C!!
+To: <webmaster@python.org>
+Subject: Banned file: auto__mail.python.bat in mail from you
+Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+This is a multi-part message in MIME format...
+Content-Type: text/plain; charset="utf-8"
+Content-Disposition: inline
+Content-Transfer-Encoding: 7bit
+Your message to: xxxxxxx@dot.ca.gov, xxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxx@dot.ca.gov, xxxxxx@dot.ca.gov, xxxxxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxx@dot.ca.gov, xxxxxxx@dot.ca.gov, xxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxx@dot.ca.gov, xxx@dot.ca.gov, xxxxxxx@dot.ca.gov, xxxxxxx@dot.ca.gov, xxxxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxx@dot.ca.gov, xxx@dot.ca.gov, xxxxxxxx@dot.ca.gov, xxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxxx@dot.ca.gov, xxxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxx@dot.ca.gov, xxxxxxx@dot.ca.gov, xxxxxxxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxxx@dot.ca.gov, xxxx@dot.ca.gov, xxxxxxxx@dot.ca.gov, xxxxxxxxxx@dot.ca.gov, xxxxxxxxxxxxxxxxxx@dot.ca.gov
+was blocked by our Spam Firewall. The email you sent with the following subject has NOT BEEN DELIVERED:
+Subject: Delivery_failure_notice
+An attachment in that mail was of a file type that the Spam Firewall is set to block.
+Content-Type: message/delivery-status
+Content-Disposition: inline
+Content-Transfer-Encoding: 7bit
+Content-Description: Delivery error report
+Reporting-MTA: dns; sacspam01.dot.ca.gov
+Received-From-MTA: smtp; sacspam01.dot.ca.gov ([])
+Arrival-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxxxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxxxxxxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxxxxxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Final-Recipient: rfc822; xxxxxxx@dot.ca.gov
+Action: failed
+Status: 5.7.1
+Diagnostic-Code: smtp; 550 5.7.1 Message content rejected, id=01956-02-2 - BANNED: auto__mail.python.bat
+Last-Attempt-Date: Fri, 26 Nov 2004 19:41:44 -0800 (PST)
+Content-Type: text/rfc822-headers
+Content-Disposition: inline
+Content-Transfer-Encoding: 7bit
+Content-Description: Undelivered-message headers
+Received: from kgsav.org (ppp-70-242-162-63.dsl.spfdmo.swbell.net [])
+ by sacspam01.dot.ca.gov (Spam Firewall) with SMTP
+ id A232AD03DE3A; Fri, 26 Nov 2004 19:41:35 -0800 (PST)
+From: webmaster@python.org
+To: xxxxx@dot.ca.gov
+Date: Sat, 27 Nov 2004 03:35:30 UTC
+Subject: Delivery_failure_notice
+Importance: Normal
+X-Priority: 3 (Normal)
+X-MSMail-Priority: Normal
+Message-ID: <edab.7804f5cb8070@python.org>
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="====67bd2b7a5.f99f7"
+Content-Transfer-Encoding: 7bit
diff --git a/lib-python/3/email/test/data/msg_44.txt b/lib-python/3/email/test/data/msg_44.txt
new file mode 100644
index 0000000000..15a225287b
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_44.txt
@@ -0,0 +1,33 @@
+Return-Path: <barry@python.org>
+Delivered-To: barry@python.org
+Received: by mail.python.org (Postfix, from userid 889)
+ id C2BF0D37C6; Tue, 11 Sep 2001 00:05:05 -0400 (EDT)
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="h90VIIIKmx"
+Content-Transfer-Encoding: 7bit
+Message-ID: <15261.36209.358846.118674@anthem.python.org>
+From: barry@python.org (Barry A. Warsaw)
+To: barry@python.org
+Subject: a simple multipart
+Date: Tue, 11 Sep 2001 00:05:05 -0400
+X-Mailer: VM 6.95 under 21.4 (patch 4) "Artificial Intelligence" XEmacs Lucid
+X-Attribution: BAW
+X-Oblique-Strategy: Make a door into a window
+Content-Type: text/plain; name="msg.txt"
+Content-Transfer-Encoding: 7bit
+a simple kind of mirror
+to reflect upon our own
+Content-Type: text/plain; name="msg.txt"
+Content-Transfer-Encoding: 7bit
+a simple kind of mirror
+to reflect upon our own
diff --git a/lib-python/3/email/test/data/msg_45.txt b/lib-python/3/email/test/data/msg_45.txt
new file mode 100644
index 0000000000..58fde956e7
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_45.txt
@@ -0,0 +1,33 @@
+From: <foo@bar.baz>
+To: <baz@bar.foo>
+Subject: test
+X-Long-Line: Some really long line contains a lot of text and thus has to be rewrapped because it is some
+ really long
+ line
+MIME-Version: 1.0
+Content-Type: multipart/signed; boundary="borderline";
+ protocol="application/pgp-signature"; micalg=pgp-sha1
+This is an OpenPGP/MIME signed message (RFC 2440 and 3156)
+Content-Type: text/plain
+X-Long-Line: Another really long line contains a lot of text and thus has to be rewrapped because it is another
+ really long
+ line
+This is the signed contents.
+Content-Type: application/pgp-signature; name="signature.asc"
+Content-Description: OpenPGP digital signature
+Content-Disposition: attachment; filename="signature.asc"
+Version: GnuPG v2.0.6 (GNU/Linux)
diff --git a/lib-python/3/email/test/data/msg_46.txt b/lib-python/3/email/test/data/msg_46.txt
new file mode 100644
index 0000000000..1e22c4f600
--- /dev/null
+++ b/lib-python/3/email/test/data/msg_46.txt
@@ -0,0 +1,23 @@
+Return-Path: <sender@example.net>
+Delivery-Date: Mon, 08 Feb 2010 14:05:16 +0100
+Received: from example.org (example.org [])
+ by example.net (node=mxbap2) with ESMTP (Nemesis)
+ id UNIQUE for someone@example.com; Mon, 08 Feb 2010 14:05:16 +0100
+Date: Mon, 01 Feb 2010 12:21:16 +0100
+From: "Sender" <sender@example.net>
+To: <someone@example.com>
+Subject: GroupwiseForwardingTest
+Mime-Version: 1.0
+Content-Type: message/rfc822
+Return-path: <sender@example.net>
+Message-ID: <4B66B890.4070408@teconcept.de>
+Date: Mon, 01 Feb 2010 12:18:40 +0100
+From: "Dr. Sender" <sender@example.net>
+MIME-Version: 1.0
+To: "Recipient" <recipient@example.com>
+Subject: GroupwiseForwardingTest
+Content-Type: text/plain; charset=ISO-8859-15
+Content-Transfer-Encoding: 7bit
+Testing email forwarding with Groupwise 1.2.2010
diff --git a/lib-python/3/email/test/test_email.py b/lib-python/3/email/test/test_email.py
new file mode 100644
index 0000000000..102e15b9ff
--- /dev/null
+++ b/lib-python/3/email/test/test_email.py
@@ -0,0 +1,4653 @@
+# Copyright (C) 2001-2010 Python Software Foundation
+# Contact: email-sig@python.org
+# email package unit tests
+import os
+import re
+import sys
+import time
+import base64
+import difflib
+import unittest
+import warnings
+import textwrap
+from io import StringIO, BytesIO
+from itertools import chain
+import email
+from email.charset import Charset
+from email.header import Header, decode_header, make_header
+from email.parser import Parser, HeaderParser
+from email.generator import Generator, DecodedGenerator
+from email.message import Message
+from email.mime.application import MIMEApplication
+from email.mime.audio import MIMEAudio
+from email.mime.text import MIMEText
+from email.mime.image import MIMEImage
+from email.mime.base import MIMEBase
+from email.mime.message import MIMEMessage
+from email.mime.multipart import MIMEMultipart
+from email import utils
+from email import errors
+from email import encoders
+from email import iterators
+from email import base64mime
+from email import quoprimime
+from test.support import findfile, run_unittest, unlink
+from email.test import __file__ as landmark
+NL = '\n'
+SPACE = ' '
+def openfile(filename, *args, **kws):
+ path = os.path.join(os.path.dirname(landmark), 'data', filename)
+ return open(path, *args, **kws)
+# Base test class
+class TestEmailBase(unittest.TestCase):
+ def ndiffAssertEqual(self, first, second):
+ """Like assertEqual except use ndiff for readable output."""
+ if first != second:
+ sfirst = str(first)
+ ssecond = str(second)
+ rfirst = [repr(line) for line in sfirst.splitlines()]
+ rsecond = [repr(line) for line in ssecond.splitlines()]
+ diff = difflib.ndiff(rfirst, rsecond)
+ raise self.failureException(NL + NL.join(diff))
+ def _msgobj(self, filename):
+ with openfile(findfile(filename)) as fp:
+ return email.message_from_file(fp)
+# Test various aspects of the Message class's API
+class TestMessageAPI(TestEmailBase):
+ def test_get_all(self):
+ eq = self.assertEqual
+ msg = self._msgobj('msg_20.txt')
+ eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
+ eq(msg.get_all('xx', 'n/a'), 'n/a')
+ def test_getset_charset(self):
+ eq = self.assertEqual
+ msg = Message()
+ eq(msg.get_charset(), None)
+ charset = Charset('iso-8859-1')
+ msg.set_charset(charset)
+ eq(msg['mime-version'], '1.0')
+ eq(msg.get_content_type(), 'text/plain')
+ eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
+ eq(msg.get_param('charset'), 'iso-8859-1')
+ eq(msg['content-transfer-encoding'], 'quoted-printable')
+ eq(msg.get_charset().input_charset, 'iso-8859-1')
+ # Remove the charset
+ msg.set_charset(None)
+ eq(msg.get_charset(), None)
+ eq(msg['content-type'], 'text/plain')
+ # Try adding a charset when there's already MIME headers present
+ msg = Message()
+ msg['MIME-Version'] = '2.0'
+ msg['Content-Type'] = 'text/x-weird'
+ msg['Content-Transfer-Encoding'] = 'quinted-puntable'
+ msg.set_charset(charset)
+ eq(msg['mime-version'], '2.0')
+ eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
+ eq(msg['content-transfer-encoding'], 'quinted-puntable')
+ def test_set_charset_from_string(self):
+ eq = self.assertEqual
+ msg = Message()
+ msg.set_charset('us-ascii')
+ eq(msg.get_charset().input_charset, 'us-ascii')
+ eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+ def test_set_payload_with_charset(self):
+ msg = Message()
+ charset = Charset('iso-8859-1')
+ msg.set_payload('This is a string payload', charset)
+ self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
+ def test_get_charsets(self):
+ eq = self.assertEqual
+ msg = self._msgobj('msg_08.txt')
+ charsets = msg.get_charsets()
+ eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
+ msg = self._msgobj('msg_09.txt')
+ charsets = msg.get_charsets('dingbat')
+ eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
+ 'koi8-r'])
+ msg = self._msgobj('msg_12.txt')
+ charsets = msg.get_charsets()
+ eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
+ 'iso-8859-3', 'us-ascii', 'koi8-r'])
+ def test_get_filename(self):
+ eq = self.assertEqual
+ msg = self._msgobj('msg_04.txt')
+ filenames = [p.get_filename() for p in msg.get_payload()]
+ eq(filenames, ['msg.txt', 'msg.txt'])
+ msg = self._msgobj('msg_07.txt')
+ subpart = msg.get_payload(1)
+ eq(subpart.get_filename(), 'dingusfish.gif')
+ def test_get_filename_with_name_parameter(self):
+ eq = self.assertEqual
+ msg = self._msgobj('msg_44.txt')
+ filenames = [p.get_filename() for p in msg.get_payload()]
+ eq(filenames, ['msg.txt', 'msg.txt'])
+ def test_get_boundary(self):
+ eq = self.assertEqual
+ msg = self._msgobj('msg_07.txt')
+ # No quotes!
+ eq(msg.get_boundary(), 'BOUNDARY')
+ def test_set_boundary(self):
+ eq = self.assertEqual
+ # This one has no existing boundary parameter, but the Content-Type:
+ # header appears fifth.
+ msg = self._msgobj('msg_01.txt')
+ msg.set_boundary('BOUNDARY')
+ header, value = msg.items()[4]
+ eq(header.lower(), 'content-type')
+ eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
+ # This one has a Content-Type: header, with a boundary, stuck in the
+ # middle of its headers. Make sure the order is preserved; it should
+ # be fifth.
+ msg = self._msgobj('msg_04.txt')
+ msg.set_boundary('BOUNDARY')
+ header, value = msg.items()[4]
+ eq(header.lower(), 'content-type')
+ eq(value, 'multipart/mixed; boundary="BOUNDARY"')
+ # And this one has no Content-Type: header at all.
+ msg = self._msgobj('msg_03.txt')
+ self.assertRaises(errors.HeaderParseError,
+ msg.set_boundary, 'BOUNDARY')
+ def test_make_boundary(self):
+ msg = MIMEMultipart('form-data')
+ # Note that when the boundary gets created is an implementation
+ # detail and might change.
+ self.assertEqual(msg.items()[0][1], 'multipart/form-data')
+ # Trigger creation of boundary
+ msg.as_string()
+ self.assertEqual(msg.items()[0][1][:33],
+ 'multipart/form-data; boundary="==')
+ # XXX: there ought to be tests of the uniqueness of the boundary, too.
+ def test_message_rfc822_only(self):
+ # Issue 7970: message/rfc822 not in multipart parsed by
+ # HeaderParser caused an exception when flattened.
+ with openfile(findfile('msg_46.txt')) as fp:
+ msgdata = fp.read()
+ parser = HeaderParser()
+ msg = parser.parsestr(msgdata)
+ out = StringIO()
+ gen = Generator(out, True, 0)
+ gen.flatten(msg, False)
+ self.assertEqual(out.getvalue(), msgdata)
+ def test_get_decoded_payload(self):
+ eq = self.assertEqual
+ msg = self._msgobj('msg_10.txt')
+ # The outer message is a multipart
+ eq(msg.get_payload(decode=True), None)
+ # Subpart 1 is 7bit encoded
+ eq(msg.get_payload(0).get_payload(decode=True),
+ b'This is a 7bit encoded message.\n')
+ # Subpart 2 is quopri
+ eq(msg.get_payload(1).get_payload(decode=True),
+ b'\xa1This is a Quoted Printable encoded message!\n')
+ # Subpart 3 is base64
+ eq(msg.get_payload(2).get_payload(decode=True),
+ b'This is a Base64 encoded message.')
+ # Subpart 4 is base64 with a trailing newline, which
+ # used to be stripped (issue 7143).
+ eq(msg.get_payload(3).get_payload(decode=True),
+ b'This is a Base64 encoded message.\n')
+ # Subpart 5 has no Content-Transfer-Encoding: header.
+ eq(msg.get_payload(4).get_payload(decode=True),
+ b'This has no Content-Transfer-Encoding: header.\n')
+ def test_get_decoded_uu_payload(self):
+ eq = self.assertEqual
+ msg = Message()
+ msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
+ for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
+ msg['content-transfer-encoding'] = cte
+ eq(msg.get_payload(decode=True), b'hello world')
+ # Now try some bogus data
+ msg.set_payload('foo')
+ eq(msg.get_payload(decode=True), b'foo')
+ def test_get_payload_n_raises_on_non_multipart(self):
+ msg = Message()
+ self.assertRaises(TypeError, msg.get_payload, 1)
+ def test_decoded_generator(self):
+ eq = self.assertEqual
+ msg = self._msgobj('msg_07.txt')
+ with openfile('msg_17.txt') as fp:
+ text = fp.read()
+ s = StringIO()
+ g = DecodedGenerator(s)
+ g.flatten(msg)
+ eq(s.getvalue(), text)
+ def test__contains__(self):
+ msg = Message()
+ msg['From'] = 'Me'
+ msg['to'] = 'You'
+ # Check for case insensitivity
+ self.assertTrue('from' in msg)
+ self.assertTrue('From' in msg)
+ self.assertTrue('FROM' in msg)
+ self.assertTrue('to' in msg)
+ self.assertTrue('To' in msg)
+ self.assertTrue('TO' in msg)
+ def test_as_string(self):
+ eq = self.ndiffAssertEqual
+ msg = self._msgobj('msg_01.txt')
+ with openfile('msg_01.txt') as fp:
+ text = fp.read()
+ eq(text, str(msg))
+ fullrepr = msg.as_string(unixfrom=True)
+ lines = fullrepr.split('\n')
+ self.assertTrue(lines[0].startswith('From '))
+ eq(text, NL.join(lines[1:]))
+ def test_bad_param(self):
+ msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
+ self.assertEqual(msg.get_param('baz'), '')
+ def test_missing_filename(self):
+ msg = email.message_from_string("From: foo\n")
+ self.assertEqual(msg.get_filename(), None)
+ def test_bogus_filename(self):
+ msg = email.message_from_string(
+ "Content-Disposition: blarg; filename\n")
+ self.assertEqual(msg.get_filename(), '')
+ def test_missing_boundary(self):
+ msg = email.message_from_string("From: foo\n")
+ self.assertEqual(msg.get_boundary(), None)
+ def test_get_params(self):
+ eq = self.assertEqual
+ msg = email.message_from_string(
+ 'X-Header: foo=one; bar=two; baz=three\n')
+ eq(msg.get_params(header='x-header'),
+ [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
+ msg = email.message_from_string(
+ 'X-Header: foo; bar=one; baz=two\n')
+ eq(msg.get_params(header='x-header'),
+ [('foo', ''), ('bar', 'one'), ('baz', 'two')])
+ eq(msg.get_params(), None)
+ msg = email.message_from_string(
+ 'X-Header: foo; bar="one"; baz=two\n')
+ eq(msg.get_params(header='x-header'),
+ [('foo', ''), ('bar', 'one'), ('baz', 'two')])
+ def test_get_param_liberal(self):
+ msg = Message()
+ msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
+ self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
+ def test_get_param(self):
+ eq = self.assertEqual
+ msg = email.message_from_string(
+ "X-Header: foo=one; bar=two; baz=three\n")
+ eq(msg.get_param('bar', header='x-header'), 'two')
+ eq(msg.get_param('quuz', header='x-header'), None)
+ eq(msg.get_param('quuz'), None)
+ msg = email.message_from_string(
+ 'X-Header: foo; bar="one"; baz=two\n')
+ eq(msg.get_param('foo', header='x-header'), '')
+ eq(msg.get_param('bar', header='x-header'), 'one')
+ eq(msg.get_param('baz', header='x-header'), 'two')
+ # XXX: We are not RFC-2045 compliant! We cannot parse:
+ # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
+ # msg.get_param("weird")
+ # yet.
+ def test_get_param_funky_continuation_lines(self):
+ msg = self._msgobj('msg_22.txt')
+ self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
+ def test_get_param_with_semis_in_quotes(self):
+ msg = email.message_from_string(
+ 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
+ self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
+ self.assertEqual(msg.get_param('name', unquote=False),
+ '"Jim&amp;&amp;Jill"')
+ def test_get_param_with_quotes(self):
+ msg = email.message_from_string(
+ 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
+ self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
+ msg = email.message_from_string(
+ "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
+ self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
+ def test_field_containment(self):
+ unless = self.assertTrue
+ msg = email.message_from_string('Header: exists')
+ unless('header' in msg)
+ unless('Header' in msg)
+ unless('HEADER' in msg)
+ self.assertFalse('headerx' in msg)
+ def test_set_param(self):
+ eq = self.assertEqual
+ msg = Message()
+ msg.set_param('charset', 'iso-2022-jp')
+ eq(msg.get_param('charset'), 'iso-2022-jp')
+ msg.set_param('importance', 'high value')
+ eq(msg.get_param('importance'), 'high value')
+ eq(msg.get_param('importance', unquote=False), '"high value"')
+ eq(msg.get_params(), [('text/plain', ''),
+ ('charset', 'iso-2022-jp'),
+ ('importance', 'high value')])
+ eq(msg.get_params(unquote=False), [('text/plain', ''),
+ ('charset', '"iso-2022-jp"'),
+ ('importance', '"high value"')])
+ msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
+ eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
+ def test_del_param(self):
+ eq = self.assertEqual
+ msg = self._msgobj('msg_05.txt')
+ eq(msg.get_params(),
+ [('multipart/report', ''), ('report-type', 'delivery-status'),
+ ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
+ old_val = msg.get_param("report-type")
+ msg.del_param("report-type")
+ eq(msg.get_params(),
+ [('multipart/report', ''),
+ ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
+ msg.set_param("report-type", old_val)
+ eq(msg.get_params(),
+ [('multipart/report', ''),
+ ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
+ ('report-type', old_val)])
+ def test_del_param_on_other_header(self):
+ msg = Message()
+ msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
+ msg.del_param('filename', 'content-disposition')
+ self.assertEqual(msg['content-disposition'], 'attachment')
+ def test_del_param_on_nonexistent_header(self):
+ msg = Message()
+ msg.del_param('filename', 'content-disposition')
+ def test_del_nonexistent_param(self):
+ msg = Message()
+ msg.add_header('Content-Type', 'text/plain', charset='utf-8')
+ existing_header = msg['Content-Type']
+ msg.del_param('foobar', header='Content-Type')
+ self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
+ def test_set_type(self):
+ eq = self.assertEqual
+ msg = Message()
+ self.assertRaises(ValueError, msg.set_type, 'text')
+ msg.set_type('text/plain')
+ eq(msg['content-type'], 'text/plain')
+ msg.set_param('charset', 'us-ascii')
+ eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+ msg.set_type('text/html')
+ eq(msg['content-type'], 'text/html; charset="us-ascii"')
+ def test_set_type_on_other_header(self):
+ msg = Message()
+ msg['X-Content-Type'] = 'text/plain'
+ msg.set_type('application/octet-stream', 'X-Content-Type')
+ self.assertEqual(msg['x-content-type'], 'application/octet-stream')
+ def test_get_content_type_missing(self):
+ msg = Message()
+ self.assertEqual(msg.get_content_type(), 'text/plain')
+ def test_get_content_type_missing_with_default_type(self):
+ msg = Message()
+ msg.set_default_type('message/rfc822')
+ self.assertEqual(msg.get_content_type(), 'message/rfc822')
+ def test_get_content_type_from_message_implicit(self):
+ msg = self._msgobj('msg_30.txt')
+ self.assertEqual(msg.get_payload(0).get_content_type(),
+ 'message/rfc822')
+ def test_get_content_type_from_message_explicit(self):
+ msg = self._msgobj('msg_28.txt')
+ self.assertEqual(msg.get_payload(0).get_content_type(),
+ 'message/rfc822')
+ def test_get_content_type_from_message_text_plain_implicit(self):
+ msg = self._msgobj('msg_03.txt')
+ self.assertEqual(msg.get_content_type(), 'text/plain')
+ def test_get_content_type_from_message_text_plain_explicit(self):
+ msg = self._msgobj('msg_01.txt')
+ self.assertEqual(msg.get_content_type(), 'text/plain')
+ def test_get_content_maintype_missing(self):
+ msg = Message()
+ self.assertEqual(msg.get_content_maintype(), 'text')
+ def test_get_content_maintype_missing_with_default_type(self):
+ msg = Message()
+ msg.set_default_type('message/rfc822')
+ self.assertEqual(msg.get_content_maintype(), 'message')
+ def test_get_content_maintype_from_message_implicit(self):
+ msg = self._msgobj('msg_30.txt')
+ self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
+ def test_get_content_maintype_from_message_explicit(self):
+ msg = self._msgobj('msg_28.txt')
+ self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
+ def test_get_content_maintype_from_message_text_plain_implicit(self):
+ msg = self._msgobj('msg_03.txt')
+ self.assertEqual(msg.get_content_maintype(), 'text')
+ def test_get_content_maintype_from_message_text_plain_explicit(self):
+ msg = self._msgobj('msg_01.txt')
+ self.assertEqual(msg.get_content_maintype(), 'text')
+ def test_get_content_subtype_missing(self):
+ msg = Message()
+ self.assertEqual(msg.get_content_subtype(), 'plain')
+ def test_get_content_subtype_missing_with_default_type(self):
+ msg = Message()
+ msg.set_default_type('message/rfc822')
+ self.assertEqual(msg.get_content_subtype(), 'rfc822')
+ def test_get_content_subtype_from_message_implicit(self):
+ msg = self._msgobj('msg_30.txt')
+ self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
+ def test_get_content_subtype_from_message_explicit(self):
+ msg = self._msgobj('msg_28.txt')
+ self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
+ def test_get_content_subtype_from_message_text_plain_implicit(self):
+ msg = self._msgobj('msg_03.txt')
+ self.assertEqual(msg.get_content_subtype(), 'plain')
+ def test_get_content_subtype_from_message_text_plain_explicit(self):
+ msg = self._msgobj('msg_01.txt')
+ self.assertEqual(msg.get_content_subtype(), 'plain')
+ def test_get_content_maintype_error(self):
+ msg = Message()
+ msg['Content-Type'] = 'no-slash-in-this-string'
+ self.assertEqual(msg.get_content_maintype(), 'text')
+ def test_get_content_subtype_error(self):
+ msg = Message()
+ msg['Content-Type'] = 'no-slash-in-this-string'
+ self.assertEqual(msg.get_content_subtype(), 'plain')
+ def test_replace_header(self):
+ eq = self.assertEqual
+ msg = Message()
+ msg.add_header('First', 'One')
+ msg.add_header('Second', 'Two')
+ msg.add_header('Third', 'Three')
+ eq(msg.keys(), ['First', 'Second', 'Third'])
+ eq(msg.values(), ['One', 'Two', 'Three'])
+ msg.replace_header('Second', 'Twenty')
+ eq(msg.keys(), ['First', 'Second', 'Third'])
+ eq(msg.values(), ['One', 'Twenty', 'Three'])
+ msg.add_header('First', 'Eleven')
+ msg.replace_header('First', 'One Hundred')
+ eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
+ eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
+ self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
+ def test_broken_base64_payload(self):
+ x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
+ msg = Message()
+ msg['content-type'] = 'audio/x-midi'
+ msg['content-transfer-encoding'] = 'base64'
+ msg.set_payload(x)
+ self.assertEqual(msg.get_payload(decode=True),
+ bytes(x, 'raw-unicode-escape'))
+ def test_broken_unicode_payload(self):
+ # This test improves coverage but is not a compliance test.
+ # The behavior in this situation is currently undefined by the API.
+ x = 'this is a br\xf6ken thing to do'
+ msg = Message()
+ msg['content-type'] = 'text/plain'
+ msg['content-transfer-encoding'] = '8bit'
+ msg.set_payload(x)
+ self.assertEqual(msg.get_payload(decode=True),
+ bytes(x, 'raw-unicode-escape'))
+ def test_questionable_bytes_payload(self):
+ # This test improves coverage but is not a compliance test,
+ # since it involves poking inside the black box.
+ x = 'this is a quéstionable thing to do'.encode('utf-8')
+ msg = Message()
+ msg['content-type'] = 'text/plain; charset="utf-8"'
+ msg['content-transfer-encoding'] = '8bit'
+ msg._payload = x
+ self.assertEqual(msg.get_payload(decode=True), x)
+ # Issue 1078919
+ def test_ascii_add_header(self):
+ msg = Message()
+ msg.add_header('Content-Disposition', 'attachment',
+ filename='bud.gif')
+ self.assertEqual('attachment; filename="bud.gif"',
+ msg['Content-Disposition'])
+ def test_noascii_add_header(self):
+ msg = Message()
+ msg.add_header('Content-Disposition', 'attachment',
+ filename="Fußballer.ppt")
+ self.assertEqual(
+ 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
+ msg['Content-Disposition'])
+ def test_nonascii_add_header_via_triple(self):
+ msg = Message()
+ msg.add_header('Content-Disposition', 'attachment',
+ filename=('iso-8859-1', '', 'Fußballer.ppt'))
+ self.assertEqual(
+ 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
+ msg['Content-Disposition'])
+ def test_ascii_add_header_with_tspecial(self):
+ msg = Message()
+ msg.add_header('Content-Disposition', 'attachment',
+ filename="windows [filename].ppt")
+ self.assertEqual(
+ 'attachment; filename="windows [filename].ppt"',
+ msg['Content-Disposition'])
+ def test_nonascii_add_header_with_tspecial(self):
+ msg = Message()
+ msg.add_header('Content-Disposition', 'attachment',
+ filename="Fußballer [filename].ppt")
+ self.assertEqual(
+ "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
+ msg['Content-Disposition'])
+ def test_add_header_with_name_only_param(self):
+ msg = Message()
+ msg.add_header('Content-Disposition', 'inline', foo_bar=None)
+ self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
+ def test_add_header_with_no_value(self):
+ msg = Message()
+ msg.add_header('X-Status', None)
+ self.assertEqual('', msg['X-Status'])
+ # Issue 5871: reject an attempt to embed a header inside a header value
+ # (header injection attack).
+ def test_embeded_header_via_Header_rejected(self):
+ msg = Message()
+ msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
+ self.assertRaises(errors.HeaderParseError, msg.as_string)
+ def test_embeded_header_via_string_rejected(self):
+ msg = Message()
+ msg['Dummy'] = 'dummy\nX-Injected-Header: test'
+ self.assertRaises(errors.HeaderParseError, msg.as_string)
+# Test the email.encoders module
+class TestEncoders(unittest.TestCase):
+ def test_EncodersEncode_base64(self):
+ with openfile('PyBanner048.gif', 'rb') as fp:
+ bindata = fp.read()
+ mimed = email.mime.image.MIMEImage(bindata)
+ base64ed = mimed.get_payload()
+ # the transfer-encoded body lines should all be <=76 characters
+ lines = base64ed.split('\n')
+ self.assertLessEqual(max([ len(x) for x in lines ]), 76)
+ def test_encode_empty_payload(self):
+ eq = self.assertEqual
+ msg = Message()
+ msg.set_charset('us-ascii')
+ eq(msg['content-transfer-encoding'], '7bit')
+ def test_default_cte(self):
+ eq = self.assertEqual
+ # 7bit data and the default us-ascii _charset
+ msg = MIMEText('hello world')
+ eq(msg['content-transfer-encoding'], '7bit')
+ # Similar, but with 8bit data
+ msg = MIMEText('hello \xf8 world')
+ eq(msg['content-transfer-encoding'], '8bit')
+ # And now with a different charset
+ msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
+ eq(msg['content-transfer-encoding'], 'quoted-printable')
+ def test_encode7or8bit(self):
+ # Make sure a charset whose input character set is 8bit but
+ # whose output character set is 7bit gets a transfer-encoding
+ # of 7bit.
+ eq = self.assertEqual
+ msg = MIMEText('文', _charset='euc-jp')
+ eq(msg['content-transfer-encoding'], '7bit')
+# Test long header wrapping
+class TestLongHeaders(TestEmailBase):
+ maxDiff = None
+ def test_split_long_continuation(self):
+ eq = self.ndiffAssertEqual
+ msg = email.message_from_string("""\
+Subject: bug demonstration
+\tmore text
+ sfp = StringIO()
+ g = Generator(sfp)
+ g.flatten(msg)
+ eq(sfp.getvalue(), """\
+Subject: bug demonstration
+\tmore text
+ def test_another_long_almost_unsplittable_header(self):
+ eq = self.ndiffAssertEqual
+ hstr = """\
+bug demonstration
+\tmore text"""
+ h = Header(hstr, continuation_ws='\t')
+ eq(h.encode(), """\
+bug demonstration
+\tmore text""")
+ h = Header(hstr.replace('\t', ' '))
+ eq(h.encode(), """\
+bug demonstration
+ 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
+ more text""")
+ def test_long_nonstring(self):
+ eq = self.ndiffAssertEqual
+ g = Charset("iso-8859-1")
+ cz = Charset("iso-8859-2")
+ utf8 = Charset("utf-8")
+ g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
+ b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
+ b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
+ b'bef\xf6rdert. ')
+ cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
+ b'd\xf9vtipu.. ')
+ utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
+ '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
+ '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
+ '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
+ '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
+ 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
+ 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
+ '\u3044\u307e\u3059\u3002')
+ h = Header(g_head, g, header_name='Subject')
+ h.append(cz_head, cz)
+ h.append(utf8_head, utf8)
+ msg = Message()
+ msg['Subject'] = h
+ sfp = StringIO()
+ g = Generator(sfp)
+ g.flatten(msg)
+ eq(sfp.getvalue(), """\
+Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
+ =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
+ =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
+ =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
+ =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
+ =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
+ =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
+ =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
+ =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
+ =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
+ =?utf-8?b?44CC?=
+ eq(h.encode(maxlinelen=76), """\
+ =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
+ =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
+ =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
+ =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
+ =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
+ =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
+ =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
+ =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
+ =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
+ =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
+ def test_long_header_encode(self):
+ eq = self.ndiffAssertEqual
+ h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
+ 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
+ header_name='X-Foobar-Spoink-Defrobnit')
+ eq(h.encode(), '''\
+wasnipoop; giraffes="very-long-necked-animals";
+ spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
+ def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
+ eq = self.ndiffAssertEqual
+ h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
+ 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
+ header_name='X-Foobar-Spoink-Defrobnit',
+ continuation_ws='\t')
+ eq(h.encode(), '''\
+wasnipoop; giraffes="very-long-necked-animals";
+ spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
+ def test_long_header_encode_with_tab_continuation(self):
+ eq = self.ndiffAssertEqual
+ h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
+ 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
+ header_name='X-Foobar-Spoink-Defrobnit',
+ continuation_ws='\t')
+ eq(h.encode(), '''\
+wasnipoop; giraffes="very-long-necked-animals";
+\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
+ def test_header_encode_with_different_output_charset(self):
+ h = Header('文', 'euc-jp')
+ self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
+ def test_long_header_encode_with_different_output_charset(self):
+ h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
+ b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
+ b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
+ b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
+ res = """\
+ =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
+ self.assertEqual(h.encode(), res)
+ def test_header_splitter(self):
+ eq = self.ndiffAssertEqual
+ msg = MIMEText('')
+ # It'd be great if we could use add_header() here, but that doesn't
+ # guarantee an order of the parameters.
+ msg['X-Foobar-Spoink-Defrobnit'] = (
+ 'wasnipoop; giraffes="very-long-necked-animals"; '
+ 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
+ sfp = StringIO()
+ g = Generator(sfp)
+ g.flatten(msg)
+ eq(sfp.getvalue(), '''\
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
+ spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
+ def test_no_semis_header_splitter(self):
+ eq = self.ndiffAssertEqual
+ msg = Message()
+ msg['From'] = 'test@dom.ain'
+ msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
+ msg.set_payload('Test')
+ sfp = StringIO()
+ g = Generator(sfp)
+ g.flatten(msg)
+ eq(sfp.getvalue(), """\
+From: test@dom.ain
+References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
+ <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
+ def test_last_split_chunk_does_not_fit(self):
+ eq = self.ndiffAssertEqual
+ h = Header('Subject: the first part of this is short, but_the_second'
+ '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
+ '_all_by_itself')
+ eq(h.encode(), """\
+Subject: the first part of this is short,
+ but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
+ def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
+ eq = self.ndiffAssertEqual
+ h = Header(', but_the_second'
+ '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
+ '_all_by_itself')
+ eq(h.encode(), """\
+ but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
+ def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
+ eq = self.ndiffAssertEqual
+ h = Header(', , but_the_second'
+ '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
+ '_all_by_itself')
+ eq(h.encode(), """\
+, ,
+ but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
+ def test_trailing_splitable_on_overlong_unsplitable(self):
+ eq = self.ndiffAssertEqual
+ h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
+ 'be_on_a_line_all_by_itself;')
+ eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
+ "be_on_a_line_all_by_itself;")
+ def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
+ eq = self.ndiffAssertEqual
+ h = Header('; '
+ 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
+ 'be_on_a_line_all_by_itself; ')
+ eq(h.encode(), """\
+ this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
+ def test_long_header_with_multiple_sequential_split_chars(self):
+ eq = self.ndiffAssertEqual
+ h = Header('This is a long line that has two whitespaces in a row. '
+ 'This used to cause truncation of the header when folded')
+ eq(h.encode(), """\
+This is a long line that has two whitespaces in a row. This used to cause
+ truncation of the header when folded""")
+ def test_splitter_split_on_punctuation_only_if_fws(self):
+ eq = self.ndiffAssertEqual
+ h = Header('thisverylongheaderhas;semicolons;and,commas,but'
+ 'they;arenotlegal;fold,points')
+ eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
+ "arenotlegal;fold,points")
+ def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
+ eq = self.ndiffAssertEqual
+ h = Header('this is a test where we need to have more than one line '
+ 'before; our final line that is just too big to fit;; '
+ 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
+ 'be_on_a_line_all_by_itself;')
+ eq(h.encode(), """\
+this is a test where we need to have more than one line before;
+ our final line that is just too big to fit;;
+ this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
+ def test_overlong_last_part_followed_by_split_point(self):
+ eq = self.ndiffAssertEqual
+ h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
+ 'be_on_a_line_all_by_itself ')
+ eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
+ "should_be_on_a_line_all_by_itself ")
+ def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
+ eq = self.ndiffAssertEqual
+ h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
+ 'before_our_final_line_; ; '
+ 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
+ 'be_on_a_line_all_by_itself; ')
+ eq(h.encode(), """\
+ ;
+ this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
+ def test_multiline_with_overlong_last_part_followed_by_split_point(self):
+ eq = self.ndiffAssertEqual
+ h = Header('this is a test where we need to have more than one line '
+ 'before our final line; ; '
+ 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
+ 'be_on_a_line_all_by_itself; ')
+ eq(h.encode(), """\
+this is a test where we need to have more than one line before our final line;
+ ;
+ this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
+ def test_long_header_with_whitespace_runs(self):
+ eq = self.ndiffAssertEqual
+ msg = Message()
+ msg['From'] = 'test@dom.ain'
+ msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
+ msg.set_payload('Test')
+ sfp = StringIO()
+ g = Generator(sfp)
+ g.flatten(msg)
+ eq(sfp.getvalue(), """\
+From: test@dom.ain
+References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
+ <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
+ <foo@dom.ain> <foo@dom.ain>\x20\x20
+ def test_long_run_with_semi_header_splitter(self):
+ eq = self.ndiffAssertEqual
+ msg = Message()
+ msg['From'] = 'test@dom.ain'
+ msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
+ msg.set_payload('Test')
+ sfp = StringIO()
+ g = Generator(sfp)
+ g.flatten(msg)
+ eq(sfp.getvalue(), """\
+From: test@dom.ain
+References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
+ <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
+ <foo@dom.ain>; abc
+ def test_splitter_split_on_punctuation_only_if_fws(self):
+ eq = self.ndiffAssertEqual
+ msg = Message()
+ msg['From'] = 'test@dom.ain'
+ msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
+ 'they;arenotlegal;fold,points')
+ msg.set_payload('Test')
+ sfp = StringIO()
+ g = Generator(sfp)
+ g.flatten(msg)
+ # XXX the space after the header should not be there.
+ eq(sfp.getvalue(), """\
+From: test@dom.ain
+ thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
+ def test_no_split_long_header(self):
+ eq = self.ndiffAssertEqual
+ hstr = 'References: ' + 'x' * 80
+ h = Header(hstr)
+ # These come on two lines because Headers are really field value
+ # classes and don't really know about their field names.
+ eq(h.encode(), """\
+ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
+ h = Header('x' * 80)
+ eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
+ def test_splitting_multiple_long_lines(self):
+ eq = self.ndiffAssertEqual
+ hstr = """\
+from babylon.socal-raves.org (localhost []); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
+\tfrom babylon.socal-raves.org (localhost []); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
+\tfrom babylon.socal-raves.org (localhost []); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
+ h = Header(hstr, continuation_ws='\t')
+ eq(h.encode(), """\
+from babylon.socal-raves.org (localhost []);
+ by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
+ for <mailman-admin@babylon.socal-raves.org>;
+ Sat, 2 Feb 2002 17:00:06 -0800 (PST)
+\tfrom babylon.socal-raves.org (localhost []);
+ by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
+ for <mailman-admin@babylon.socal-raves.org>;
+ Sat, 2 Feb 2002 17:00:06 -0800 (PST)
+\tfrom babylon.socal-raves.org (localhost []);
+ by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
+ for <mailman-admin@babylon.socal-raves.org>;
+ Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
+ def test_splitting_first_line_only_is_long(self):
+ eq = self.ndiffAssertEqual
+ hstr = """\
+from modemcable093.139-201-24.que.mc.videotron.ca ([] helo=cthulhu.gerg.ca)
+\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
+\tid 17k4h5-00034i-00
+\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
+ h = Header(hstr, maxlinelen=78, header_name='Received',
+ continuation_ws='\t')
+ eq(h.encode(), """\
+from modemcable093.139-201-24.que.mc.videotron.ca ([]
+ helo=cthulhu.gerg.ca)
+\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
+\tid 17k4h5-00034i-00
+\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
+ def test_long_8bit_header(self):
+ eq = self.ndiffAssertEqual
+ msg = Message()
+ h = Header('Britische Regierung gibt', 'iso-8859-1',
+ header_name='Subject')
+ h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
+ eq(h.encode(maxlinelen=76), """\
+ =?iso-8859-1?q?hore-Windkraftprojekte?=""")
+ msg['Subject'] = h
+ eq(msg.as_string(maxheaderlen=76), """\
+Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
+ =?iso-8859-1?q?hore-Windkraftprojekte?=
+ eq(msg.as_string(maxheaderlen=0), """\
+Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
+ def test_long_8bit_header_no_charset(self):
+ eq = self.ndiffAssertEqual
+ msg = Message()
+ header_string = ('Britische Regierung gibt gr\xfcnes Licht '
+ 'f\xfcr Offshore-Windkraftprojekte '
+ '<a-very-long-address@example.com>')
+ msg['Reply-To'] = header_string
+ self.assertRaises(UnicodeEncodeError, msg.as_string)
+ msg = Message()
+ msg['Reply-To'] = Header(header_string, 'utf-8',
+ header_name='Reply-To')
+ eq(msg.as_string(maxheaderlen=78), """\
+Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
+ =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
+ def test_long_to_header(self):
+ eq = self.ndiffAssertEqual
+ to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
+ '<someone@eecs.umich.edu>, '
+ '"Someone Test #B" <someone@umich.edu>, '
+ '"Someone Test #C" <someone@eecs.umich.edu>, '
+ '"Someone Test #D" <someone@eecs.umich.edu>')
+ msg = Message()
+ msg['To'] = to
+ eq(msg.as_string(maxheaderlen=78), '''\
+To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
+ "Someone Test #B" <someone@umich.edu>,
+ "Someone Test #C" <someone@eecs.umich.edu>,
+ "Someone Test #D" <someone@eecs.umich.edu>
+ def test_long_line_after_append(self):
+ eq = self.ndiffAssertEqual
+ s = 'This is an example of string which has almost the limit of header length.'
+ h = Header(s)
+ h.append('Add another line.')
+ eq(h.encode(maxlinelen=76), """\
+This is an example of string which has almost the limit of header length.
+ Add another line.""")
+ def test_shorter_line_with_append(self):
+ eq = self.ndiffAssertEqual
+ s = 'This is a shorter line.'
+ h = Header(s)
+ h.append('Add another sentence. (Surprise?)')
+ eq(h.encode(),
+ 'This is a shorter line. Add another sentence. (Surprise?)')
+ def test_long_field_name(self):
+ eq = self.ndiffAssertEqual
+ fn = 'X-Very-Very-Very-Long-Header-Name'
+ gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
+ 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
+ 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
+ 'bef\xf6rdert. ')
+ h = Header(gs, 'iso-8859-1', header_name=fn)
+ # BAW: this seems broken because the first line is too long
+ eq(h.encode(maxlinelen=76), """\
+ =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
+ =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
+ =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
+ def test_long_received_header(self):
+ h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
+ 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
+ 'Wed, 05 Mar 2003 18:10:18 -0700')
+ msg = Message()
+ msg['Received-1'] = Header(h, continuation_ws='\t')
+ msg['Received-2'] = h
+ # This should be splitting on spaces not semicolons.
+ self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
+Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
+ hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
+ Wed, 05 Mar 2003 18:10:18 -0700
+Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
+ hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
+ Wed, 05 Mar 2003 18:10:18 -0700
+ def test_string_headerinst_eq(self):
+ h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
+ 'tu-muenchen.de> (David Bremner\'s message of '
+ '"Thu, 6 Mar 2003 13:58:21 +0100")')
+ msg = Message()
+ msg['Received-1'] = Header(h, header_name='Received-1',
+ continuation_ws='\t')
+ msg['Received-2'] = h
+ # XXX The space after the ':' should not be there.
+ self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
+ <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
+ Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
+ <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
+ Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
+ def test_long_unbreakable_lines_with_continuation(self):
+ eq = self.ndiffAssertEqual
+ msg = Message()
+ t = """\
+ locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
+ msg['Face-1'] = t
+ msg['Face-2'] = Header(t, header_name='Face-2')
+ msg['Face-3'] = ' ' + t
+ # XXX This splitting is all wrong. It the first value line should be
+ # snug against the field name or the space after the header not there.
+ eq(msg.as_string(maxheaderlen=78), """\
+ locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
+ locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
+ locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
+ def test_another_long_multiline_header(self):
+ eq = self.ndiffAssertEqual
+ m = ('Received: from siimage.com '
+ '([]) by zima.siliconimage.com with '
+ 'Microsoft SMTPSVC(5.0.2195.4905); '
+ 'Wed, 16 Oct 2002 07:41:11 -0700')
+ msg = email.message_from_string(m)
+ eq(msg.as_string(maxheaderlen=78), '''\
+Received: from siimage.com ([]) by zima.siliconimage.com with
+ Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
+ def test_long_lines_with_different_header(self):
+ eq = self.ndiffAssertEqual
+ h = ('List-Unsubscribe: '
+ '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
+ ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
+ '?subject=unsubscribe>')
+ msg = Message()
+ msg['List'] = h
+ msg['List'] = Header(h, header_name='List')
+ eq(msg.as_string(maxheaderlen=78), """\
+List: List-Unsubscribe:
+ <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
+ <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
+List: List-Unsubscribe:
+ <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
+ <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
+ def test_long_rfc2047_header_with_embedded_fws(self):
+ h = Header(textwrap.dedent("""\
+ We're going to pretend this header is in a non-ascii character set
+ \tto see if line wrapping with encoded words and embedded
+ folding white space works"""),
+ charset='utf-8',
+ header_name='Test')
+ self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
+ =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
+ =?utf-8?q?cter_set?=
+ =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
+ =?utf-8?q?_folding_white_space_works?=""")+'\n')
+# Test mangling of "From " lines in the body of a message
+class TestFromMangling(unittest.TestCase):
+ def setUp(self):
+ self.msg = Message()
+ self.msg['From'] = 'aaa@bbb.org'
+ self.msg.set_payload("""\
+From the desk of A.A.A.:
+Blah blah blah
+ def test_mangled_from(self):
+ s = StringIO()
+ g = Generator(s, mangle_from_=True)
+ g.flatten(self.msg)
+ self.assertEqual(s.getvalue(), """\
+From: aaa@bbb.org
+>From the desk of A.A.A.:
+Blah blah blah
+ def test_dont_mangle_from(self):
+ s = StringIO()
+ g = Generator(s, mangle_from_=False)
+ g.flatten(self.msg)
+ self.assertEqual(s.getvalue(), """\
+From: aaa@bbb.org
+From the desk of A.A.A.:
+Blah blah blah
+# Test the basic MIMEAudio class
+class TestMIMEAudio(unittest.TestCase):
+ def setUp(self):
+ # Make sure we pick up the audiotest.au that lives in email/test/data.
+ # In Python, there's an audiotest.au living in Lib/test but that isn't
+ # included in some binary distros that don't include the test
+ # package. The trailing empty string on the .join() is significant
+ # since findfile() will do a dirname().
+ datadir = os.path.join(os.path.dirname(landmark), 'data', '')
+ with open(findfile('audiotest.au', datadir), 'rb') as fp:
+ self._audiodata = fp.read()
+ self._au = MIMEAudio(self._audiodata)
+ def test_guess_minor_type(self):
+ self.assertEqual(self._au.get_content_type(), 'audio/basic')
+ def test_encoding(self):
+ payload = self._au.get_payload()
+ self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
+ self._audiodata)
+ def test_checkSetMinor(self):
+ au = MIMEAudio(self._audiodata, 'fish')
+ self.assertEqual(au.get_content_type(), 'audio/fish')
+ def test_add_header(self):
+ eq = self.assertEqual
+ unless = self.assertTrue
+ self._au.add_header('Content-Disposition', 'attachment',
+ filename='audiotest.au')
+ eq(self._au['content-disposition'],
+ 'attachment; filename="audiotest.au"')
+ eq(self._au.get_params(header='content-disposition'),
+ [('attachment', ''), ('filename', 'audiotest.au')])
+ eq(self._au.get_param('filename', header='content-disposition'),
+ 'audiotest.au')
+ missing = []
+ eq(self._au.get_param('attachment', header='content-disposition'), '')
+ unless(self._au.get_param('foo', failobj=missing,
+ header='content-disposition') is missing)
+ # Try some missing stuff
+ unless(self._au.get_param('foobar', missing) is missing)
+ unless(self._au.get_param('attachment', missing,
+ header='foobar') is missing)
+# Test the basic MIMEImage class
+class TestMIMEImage(unittest.TestCase):
+ def setUp(self):
+ with openfile('PyBanner048.gif', 'rb') as fp:
+ self._imgdata = fp.read()
+ self._im = MIMEImage(self._imgdata)
+ def test_guess_minor_type(self):
+ self.assertEqual(self._im.get_content_type(), 'image/gif')
+ def test_encoding(self):
+ payload = self._im.get_payload()
+ self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
+ self._imgdata)
+ def test_checkSetMinor(self):
+ im = MIMEImage(self._imgdata, 'fish')
+ self.assertEqual(im.get_content_type(), 'image/fish')
+ def test_add_header(self):
+ eq = self.assertEqual
+ unless = self.assertTrue
+ self._im.add_header('Content-Disposition', 'attachment',
+ filename='dingusfish.gif')
+ eq(self._im['content-disposition'],
+ 'attachment; filename="dingusfish.gif"')
+ eq(self._im.get_params(header='content-disposition'),
+ [('attachment', ''), ('filename', 'dingusfish.gif')])
+ eq(self._im.get_param('filename', header='content-disposition'),
+ 'dingusfish.gif')
+ missing = []
+ eq(self._im.get_param('attachment', header='content-disposition'), '')
+ unless(self._im.get_param('foo', failobj=missing,
+ header='content-disposition') is missing)
+ # Try some missing stuff
+ unless(self._im.get_param('foobar', missing) is missing)
+ unless(self._im.get_param('attachment', missing,
+ header='foobar') is missing)
+# Test the basic MIMEApplication class
+class TestMIMEApplication(unittest.TestCase):
+ def test_headers(self):
+ eq = self.assertEqual
+ msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
+ eq(msg.get_content_type(), 'application/octet-stream')
+ eq(msg['content-transfer-encoding'], 'base64')
+ def test_body(self):
+ eq = self.assertEqual
+ bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
+ msg = MIMEApplication(bytesdata)
+ # whitespace in the cte encoded block is RFC-irrelevant.
+ eq(msg.get_payload().strip(), '+vv8/f7/')
+ eq(msg.get_payload(decode=True), bytesdata)
+# Test the basic MIMEText class
+class TestMIMEText(unittest.TestCase):
+ def setUp(self):
+ self._msg = MIMEText('hello there')
+ def test_types(self):
+ eq = self.assertEqual
+ unless = self.assertTrue
+ eq(self._msg.get_content_type(), 'text/plain')
+ eq(self._msg.get_param('charset'), 'us-ascii')
+ missing = []
+ unless(self._msg.get_param('foobar', missing) is missing)
+ unless(self._msg.get_param('charset', missing, header='foobar')
+ is missing)
+ def test_payload(self):
+ self.assertEqual(self._msg.get_payload(), 'hello there')
+ self.assertTrue(not self._msg.is_multipart())
+ def test_charset(self):
+ eq = self.assertEqual
+ msg = MIMEText('hello there', _charset='us-ascii')
+ eq(msg.get_charset().input_charset, 'us-ascii')
+ eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+ def test_7bit_input(self):
+ eq = self.assertEqual
+ msg = MIMEText('hello there', _charset='us-ascii')
+ eq(msg.get_charset().input_charset, 'us-ascii')
+ eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+ def test_7bit_input_no_charset(self):
+ eq = self.assertEqual
+ msg = MIMEText('hello there')
+ eq(msg.get_charset(), 'us-ascii')
+ eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+ self.assertTrue('hello there' in msg.as_string())
+ def test_utf8_input(self):
+ teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
+ eq = self.assertEqual
+ msg = MIMEText(teststr, _charset='utf-8')
+ eq(msg.get_charset().output_charset, 'utf-8')
+ eq(msg['content-type'], 'text/plain; charset="utf-8"')
+ eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
+ @unittest.skip("can't fix because of backward compat in email5, "
+ "will fix in email6")
+ def test_utf8_input_no_charset(self):
+ teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
+ self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
+# Test complicated multipart/* messages
+class TestMultipart(TestEmailBase):
+ def setUp(self):
+ with openfile('PyBanner048.gif', 'rb') as fp:
+ data = fp.read()
+ container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
+ image = MIMEImage(data, name='dingusfish.gif')
+ image.add_header('content-disposition', 'attachment',
+ filename='dingusfish.gif')
+ intro = MIMEText('''\
+Hi there,
+This is the dingus fish.
+ container.attach(intro)
+ container.attach(image)
+ container['From'] = 'Barry <barry@digicool.com>'
+ container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
+ container['Subject'] = 'Here is your dingus fish'
+ now = 987809702.54848599
+ timetuple = time.localtime(now)
+ if timetuple[-1] == 0:
+ tzsecs = time.timezone
+ else:
+ tzsecs = time.altzone
+ if tzsecs > 0:
+ sign = '-'
+ else:
+ sign = '+'
+ tzoffset = ' %s%04d' % (sign, tzsecs / 36)
+ container['Date'] = time.strftime(
+ '%a, %d %b %Y %H:%M:%S',
+ time.localtime(now)) + tzoffset
+ self._msg = container
+ self._im = image
+ self._txt = intro
+ def test_hierarchy(self):
+ # convenience
+ eq = self.assertEqual
+ unless = self.assertTrue
+ raises = self.assertRaises
+ # tests
+ m = self._msg
+ unless(m.is_multipart())
+ eq(m.get_content_type(), 'multipart/mixed')
+ eq(len(m.get_payload()), 2)
+ raises(IndexError, m.get_payload, 2)
+ m0 = m.get_payload(0)
+ m1 = m.get_payload(1)
+ unless(m0 is self._txt)
+ unless(m1 is self._im)
+ eq(m.get_payload(), [m0, m1])
+ unless(not m0.is_multipart())
+ unless(not m1.is_multipart())
+ def test_empty_multipart_idempotent(self):
+ text = """\
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME-Version: 1.0
+Subject: A subject
+To: aperson@dom.ain
+From: bperson@dom.ain
+ msg = Parser().parsestr(text)
+ self.ndiffAssertEqual(text, msg.as_string())
+ def test_no_parts_in_a_multipart_with_none_epilogue(self):
+ outer = MIMEBase('multipart', 'mixed')
+ outer['Subject'] = 'A subject'
+ outer['To'] = 'aperson@dom.ain'
+ outer['From'] = 'bperson@dom.ain'
+ outer.set_boundary('BOUNDARY')
+ self.ndiffAssertEqual(outer.as_string(), '''\
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME-Version: 1.0
+Subject: A subject
+To: aperson@dom.ain
+From: bperson@dom.ain
+ def test_no_parts_in_a_multipart_with_empty_epilogue(self):
+ outer = MIMEBase('multipart', 'mixed')
+ outer['Subject'] = 'A subject'
+ outer['To'] = 'aperson@dom.ain'
+ outer['From'] = 'bperson@dom.ain'
+ outer.preamble = ''
+ outer.epilogue = ''
+ outer.set_boundary('BOUNDARY')
+ self.ndiffAssertEqual(outer.as_string(), '''\
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME-Version: 1.0
+Subject: A subject
+To: aperson@dom.ain
+From: bperson@dom.ain
+ def test_one_part_in_a_multipart(self):
+ eq = self.ndiffAssertEqual
+ outer = MIMEBase('multipart', 'mixed')
+ outer['Subject'] = 'A subject'
+ outer['To'] = 'aperson@dom.ain'
+ outer['From'] = 'bperson@dom.ain'
+ outer.set_boundary('BOUNDARY')
+ msg = MIMEText('hello world')
+ outer.attach(msg)
+ eq(outer.as_string(), '''\
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME-Version: 1.0
+Subject: A subject
+To: aperson@dom.ain
+From: bperson@dom.ain
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+hello world
+ def test_seq_parts_in_a_multipart_with_empty_preamble(self):
+ eq = self.ndiffAssertEqual
+ outer = MIMEBase('multipart', 'mixed')
+ outer['Subject'] = 'A subject'
+ outer['To'] = 'aperson@dom.ain'
+ outer['From'] = 'bperson@dom.ain'
+ outer.preamble = ''
+ msg = MIMEText('hello world')
+ outer.attach(msg)
+ outer.set_boundary('BOUNDARY')
+ eq(outer.as_string(), '''\
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME-Version: 1.0
+Subject: A subject
+To: aperson@dom.ain
+From: bperson@dom.ain
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+hello world
+ def test_seq_parts_in_a_multipart_with_none_preamble(self):
+ eq = self.ndiffAssertEqual
+ outer = MIMEBase('multipart', 'mixed')
+ outer['Subject'] = 'A subject'
+ outer['To'] = 'aperson@dom.ain'
+ outer['From'] = 'bperson@dom.ain'
+ outer.preamble = None
+ msg = MIMEText('hello world')
+ outer.attach(msg)
+ outer.set_boundary('BOUNDARY')
+ eq(outer.as_string(), '''\
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME-Version: 1.0
+Subject: A subject
+To: aperson@dom.ain
+From: bperson@dom.ain
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+hello world
+ def test_seq_parts_in_a_multipart_with_none_epilogue(self):
+ eq = self.ndiffAssertEqual
+ outer = MIMEBase('multipart', 'mixed')
+ outer['Subject'] = 'A subject'
+ outer['To'] = 'aperson@dom.ain'
+ outer['From'] = 'bperson@dom.ain'
+ outer.epilogue = None
+ msg = MIMEText('hello world')
+ outer.attach(msg)
+ outer.set_boundary('BOUNDARY')
+ eq(outer.as_string(), '''\
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME-Version: 1.0
+Subject: A subject
+To: aperson@dom.ain
+From: bperson@dom.ain
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+hello world
+ def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
+ eq = self.ndiffAssertEqual
+ outer = MIMEBase('multipart', 'mixed')
+ outer['Subject'] = 'A subject'
+ outer['To'] = 'aperson@dom.ain'
+ outer['From'] = 'bperson@dom.ain'
+ outer.epilogue = ''
+ msg = MIMEText('hello world')
+ outer.attach(msg)
+ outer.set_boundary('BOUNDARY')
+ eq(outer.as_string(), '''\
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME-Version: 1.0
+Subject: A subject
+To: aperson@dom.ain
+From: bperson@dom.ain
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+hello world
+ def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
+ eq = self.ndiffAssertEqual
+ outer = MIMEBase('multipart', 'mixed')
+ outer['Subject'] = 'A subject'
+ outer['To'] = 'aperson@dom.ain'
+ outer['From'] = 'bperson@dom.ain'
+ outer.epilogue = '\n'
+ msg = MIMEText('hello world')
+ outer.attach(msg)
+ outer.set_boundary('BOUNDARY')
+ eq(outer.as_string(), '''\
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME-Version: 1.0
+Subject: A subject
+To: aperson@dom.ain
+From: bperson@dom.ain
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+hello world
+ def test_message_external_body(self):
+ eq = self.assertEqual
+ msg = self._msgobj('msg_36.txt')
+ eq(len(msg.get_payload()), 2)
+ msg1 = msg.get_payload(1)
+ eq(msg1.get_content_type(), 'multipart/alternative')
+ eq(len(msg1.get_payload()), 2)
+ for subpart in msg1.get_payload():
+ eq(subpart.get_content_type(), 'message/external-body')
+ eq(len(subpart.get_payload()), 1)
+ subsubpart = subpart.get_payload(0)
+ eq(subsubpart.get_content_type(), 'text/plain')
+ def test_double_boundary(self):
+ # msg_37.txt is a multipart that contains two dash-boundary's in a
+ # row. Our interpretation of RFC 2046 calls for ignoring the second
+ # and subsequent boundaries.
+ msg = self._msgobj('msg_37.txt')
+ self.assertEqual(len(msg.get_payload()), 3)
+ def test_nested_inner_contains_outer_boundary(self):
+ eq = self.ndiffAssertEqual
+ # msg_38.txt has an inner part that contains outer boundaries. My
+ # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
+ # these are illegal and should be interpreted as unterminated inner
+ # parts.
+ msg = self._msgobj('msg_38.txt')
+ sfp = StringIO()
+ iterators._structure(msg, sfp)
+ eq(sfp.getvalue(), """\
+ multipart/mixed
+ multipart/alternative
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ def test_nested_with_same_boundary(self):
+ eq = self.ndiffAssertEqual
+ # msg 39.txt is similarly evil in that it's got inner parts that use
+ # the same boundary as outer parts. Again, I believe the way this is
+ # parsed is closest to the spirit of RFC 2046
+ msg = self._msgobj('msg_39.txt')
+ sfp = StringIO()
+ iterators._structure(msg, sfp)
+ eq(sfp.getvalue(), """\
+ multipart/mixed
+ multipart/alternative
+ application/octet-stream
+ application/octet-stream
+ text/plain
+ def test_boundary_in_non_multipart(self):
+ msg = self._msgobj('msg_40.txt')
+ self.assertEqual(msg.as_string(), '''\
+MIME-Version: 1.0
+Content-Type: text/html; boundary="--961284236552522269"
+Content-Type: text/html;
+Content-Transfer-Encoding: 7Bit
+ def test_boundary_with_leading_space(self):
+ eq = self.assertEqual
+ msg = email.message_from_string('''\
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary=" XXXX"
+-- XXXX
+Content-Type: text/plain
+-- XXXX
+Content-Type: text/plain
+-- XXXX--
+ self.assertTrue(msg.is_multipart())
+ eq(msg.get_boundary(), ' XXXX')
+ eq(len(msg.get_payload()), 2)
+ def test_boundary_without_trailing_newline(self):
+ m = Parser().parsestr("""\
+Content-Type: multipart/mixed; boundary="===============0012394164=="
+MIME-Version: 1.0
+Content-Type: image/file1.jpg
+MIME-Version: 1.0
+Content-Transfer-Encoding: base64
+ self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
+# Test some badly formatted messages
+class TestNonConformant(TestEmailBase):
+ def test_parse_missing_minor_type(self):
+ eq = self.assertEqual
+ msg = self._msgobj('msg_14.txt')
+ eq(msg.get_content_type(), 'text/plain')
+ eq(msg.get_content_maintype(), 'text')
+ eq(msg.get_content_subtype(), 'plain')
+ def test_same_boundary_inner_outer(self):
+ unless = self.assertTrue
+ msg = self._msgobj('msg_15.txt')
+ # XXX We can probably eventually do better
+ inner = msg.get_payload(0)
+ unless(hasattr(inner, 'defects'))
+ self.assertEqual(len(inner.defects), 1)
+ unless(isinstance(inner.defects[0],
+ errors.StartBoundaryNotFoundDefect))
+ def test_multipart_no_boundary(self):
+ unless = self.assertTrue
+ msg = self._msgobj('msg_25.txt')
+ unless(isinstance(msg.get_payload(), str))
+ self.assertEqual(len(msg.defects), 2)
+ unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
+ unless(isinstance(msg.defects[1],
+ errors.MultipartInvariantViolationDefect))
+ def test_invalid_content_type(self):
+ eq = self.assertEqual
+ neq = self.ndiffAssertEqual
+ msg = Message()
+ # RFC 2045, $5.2 says invalid yields text/plain
+ msg['Content-Type'] = 'text'
+ eq(msg.get_content_maintype(), 'text')
+ eq(msg.get_content_subtype(), 'plain')
+ eq(msg.get_content_type(), 'text/plain')
+ # Clear the old value and try something /really/ invalid
+ del msg['content-type']
+ msg['Content-Type'] = 'foo'
+ eq(msg.get_content_maintype(), 'text')
+ eq(msg.get_content_subtype(), 'plain')
+ eq(msg.get_content_type(), 'text/plain')
+ # Still, make sure that the message is idempotently generated
+ s = StringIO()
+ g = Generator(s)
+ g.flatten(msg)
+ neq(s.getvalue(), 'Content-Type: foo\n\n')
+ def test_no_start_boundary(self):
+ eq = self.ndiffAssertEqual
+ msg = self._msgobj('msg_31.txt')
+ eq(msg.get_payload(), """\
+Content-Type: text/plain
+message 1
+Content-Type: text/plain
+message 2
+ def test_no_separating_blank_line(self):
+ eq = self.ndiffAssertEqual
+ msg = self._msgobj('msg_35.txt')
+ eq(msg.as_string(), """\
+From: aperson@dom.ain
+To: bperson@dom.ain
+Subject: here's something interesting
+counter to RFC 2822, there's no separating newline here
+ def test_lying_multipart(self):
+ unless = self.assertTrue
+ msg = self._msgobj('msg_41.txt')
+ unless(hasattr(msg, 'defects'))
+ self.assertEqual(len(msg.defects), 2)
+ unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
+ unless(isinstance(msg.defects[1],
+ errors.MultipartInvariantViolationDefect))
+ def test_missing_start_boundary(self):
+ outer = self._msgobj('msg_42.txt')
+ # The message structure is:
+ #
+ # multipart/mixed
+ # text/plain
+ # message/rfc822
+ # multipart/mixed [*]
+ #
+ # [*] This message is missing its start boundary
+ bad = outer.get_payload(1).get_payload(0)
+ self.assertEqual(len(bad.defects), 1)
+ self.assertTrue(isinstance(bad.defects[0],
+ errors.StartBoundaryNotFoundDefect))
+ def test_first_line_is_continuation_header(self):
+ eq = self.assertEqual
+ m = ' Line 1\nLine 2\nLine 3'
+ msg = email.message_from_string(m)
+ eq(msg.keys(), [])
+ eq(msg.get_payload(), 'Line 2\nLine 3')
+ eq(len(msg.defects), 1)
+ self.assertTrue(isinstance(msg.defects[0],
+ errors.FirstHeaderLineIsContinuationDefect))
+ eq(msg.defects[0].line, ' Line 1\n')
+# Test RFC 2047 header encoding and decoding
+class TestRFC2047(TestEmailBase):
+ def test_rfc2047_multiline(self):
+ eq = self.assertEqual
+ s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
+ foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
+ dh = decode_header(s)
+ eq(dh, [
+ (b'Re:', None),
+ (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
+ (b'baz foo bar', None),
+ (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
+ header = make_header(dh)
+ eq(str(header),
+ 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
+ self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
+Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
+ =?mac-iceland?q?=9Arg=8Cs?=""")
+ def test_whitespace_eater_unicode(self):
+ eq = self.assertEqual
+ s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
+ dh = decode_header(s)
+ eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
+ (b'Pirard <pirard@dom.ain>', None)])
+ header = str(make_header(dh))
+ eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
+ def test_whitespace_eater_unicode_2(self):
+ eq = self.assertEqual
+ s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
+ dh = decode_header(s)
+ eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
+ (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
+ hu = str(make_header(dh))
+ eq(hu, 'The quick brown fox jumped over the lazy dog')
+ def test_rfc2047_missing_whitespace(self):
+ s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
+ dh = decode_header(s)
+ self.assertEqual(dh, [(s, None)])
+ def test_rfc2047_with_whitespace(self):
+ s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
+ dh = decode_header(s)
+ self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
+ (b'rg', None), (b'\xe5', 'iso-8859-1'),
+ (b'sbord', None)])
+ def test_rfc2047_B_bad_padding(self):
+ s = '=?iso-8859-1?B?%s?='
+ data = [ # only test complete bytes
+ ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
+ ('dmk=', b'vi'), ('dmk', b'vi')
+ ]
+ for q, a in data:
+ dh = decode_header(s % q)
+ self.assertEqual(dh, [(a, 'iso-8859-1')])
+ def test_rfc2047_Q_invalid_digits(self):
+ # issue 10004.
+ s = '=?iso-8659-1?Q?andr=e9=zz?='
+ self.assertEqual(decode_header(s),
+ [(b'andr\xe9=zz', 'iso-8659-1')])
+# Test the MIMEMessage class
+class TestMIMEMessage(TestEmailBase):
+ def setUp(self):
+ with openfile('msg_11.txt') as fp:
+ self._text = fp.read()
+ def test_type_error(self):
+ self.assertRaises(TypeError, MIMEMessage, 'a plain string')
+ def test_valid_argument(self):
+ eq = self.assertEqual
+ unless = self.assertTrue
+ subject = 'A sub-message'
+ m = Message()
+ m['Subject'] = subject
+ r = MIMEMessage(m)
+ eq(r.get_content_type(), 'message/rfc822')
+ payload = r.get_payload()
+ unless(isinstance(payload, list))
+ eq(len(payload), 1)
+ subpart = payload[0]
+ unless(subpart is m)
+ eq(subpart['subject'], subject)
+ def test_bad_multipart(self):
+ eq = self.assertEqual
+ msg1 = Message()
+ msg1['Subject'] = 'subpart 1'
+ msg2 = Message()
+ msg2['Subject'] = 'subpart 2'
+ r = MIMEMessage(msg1)
+ self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
+ def test_generate(self):
+ # First craft the message to be encapsulated
+ m = Message()
+ m['Subject'] = 'An enclosed message'
+ m.set_payload('Here is the body of the message.\n')
+ r = MIMEMessage(m)
+ r['Subject'] = 'The enclosing message'
+ s = StringIO()
+ g = Generator(s)
+ g.flatten(r)
+ self.assertEqual(s.getvalue(), """\
+Content-Type: message/rfc822
+MIME-Version: 1.0
+Subject: The enclosing message
+Subject: An enclosed message
+Here is the body of the message.
+ def test_parse_message_rfc822(self):
+ eq = self.assertEqual
+ unless = self.assertTrue
+ msg = self._msgobj('msg_11.txt')
+ eq(msg.get_content_type(), 'message/rfc822')
+ payload = msg.get_payload()
+ unless(isinstance(payload, list))
+ eq(len(payload), 1)
+ submsg = payload[0]
+ self.assertTrue(isinstance(submsg, Message))
+ eq(submsg['subject'], 'An enclosed message')
+ eq(submsg.get_payload(), 'Here is the body of the message.\n')
+ def test_dsn(self):
+ eq = self.assertEqual
+ unless = self.assertTrue
+ # msg 16 is a Delivery Status Notification, see RFC 1894
+ msg = self._msgobj('msg_16.txt')
+ eq(msg.get_content_type(), 'multipart/report')
+ unless(msg.is_multipart())
+ eq(len(msg.get_payload()), 3)
+ # Subpart 1 is a text/plain, human readable section
+ subpart = msg.get_payload(0)
+ eq(subpart.get_content_type(), 'text/plain')
+ eq(subpart.get_payload(), """\
+This report relates to a message you sent with the following header fields:
+ Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
+ Date: Sun, 23 Sep 2001 20:10:55 -0700
+ From: "Ian T. Henry" <henryi@oxy.edu>
+ To: SoCal Raves <scr@socal-raves.org>
+ Subject: [scr] yeah for Ians!!
+Your message cannot be delivered to the following recipients:
+ Recipient address: jangel1@cougar.noc.ucla.edu
+ Reason: recipient reached disk quota
+ # Subpart 2 contains the machine parsable DSN information. It
+ # consists of two blocks of headers, represented by two nested Message
+ # objects.
+ subpart = msg.get_payload(1)
+ eq(subpart.get_content_type(), 'message/delivery-status')
+ eq(len(subpart.get_payload()), 2)
+ # message/delivery-status should treat each block as a bunch of
+ # headers, i.e. a bunch of Message objects.
+ dsn1 = subpart.get_payload(0)
+ unless(isinstance(dsn1, Message))
+ eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
+ eq(dsn1.get_param('dns', header='reporting-mta'), '')
+ # Try a missing one <wink>
+ eq(dsn1.get_param('nsd', header='reporting-mta'), None)
+ dsn2 = subpart.get_payload(1)
+ unless(isinstance(dsn2, Message))
+ eq(dsn2['action'], 'failed')
+ eq(dsn2.get_params(header='original-recipient'),
+ [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
+ eq(dsn2.get_param('rfc822', header='final-recipient'), '')
+ # Subpart 3 is the original message
+ subpart = msg.get_payload(2)
+ eq(subpart.get_content_type(), 'message/rfc822')
+ payload = subpart.get_payload()
+ unless(isinstance(payload, list))
+ eq(len(payload), 1)
+ subsubpart = payload[0]
+ unless(isinstance(subsubpart, Message))
+ eq(subsubpart.get_content_type(), 'text/plain')
+ eq(subsubpart['message-id'],
+ '<002001c144a6$8752e060$56104586@oxy.edu>')
+ def test_epilogue(self):
+ eq = self.ndiffAssertEqual
+ with openfile('msg_21.txt') as fp:
+ text = fp.read()
+ msg = Message()
+ msg['From'] = 'aperson@dom.ain'
+ msg['To'] = 'bperson@dom.ain'
+ msg['Subject'] = 'Test'
+ msg.preamble = 'MIME message'
+ msg.epilogue = 'End of MIME message\n'
+ msg1 = MIMEText('One')
+ msg2 = MIMEText('Two')
+ msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
+ msg.attach(msg1)
+ msg.attach(msg2)
+ sfp = StringIO()
+ g = Generator(sfp)
+ g.flatten(msg)
+ eq(sfp.getvalue(), text)
+ def test_no_nl_preamble(self):
+ eq = self.ndiffAssertEqual
+ msg = Message()
+ msg['From'] = 'aperson@dom.ain'
+ msg['To'] = 'bperson@dom.ain'
+ msg['Subject'] = 'Test'
+ msg.preamble = 'MIME message'
+ msg.epilogue = ''
+ msg1 = MIMEText('One')
+ msg2 = MIMEText('Two')
+ msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
+ msg.attach(msg1)
+ msg.attach(msg2)
+ eq(msg.as_string(), """\
+From: aperson@dom.ain
+To: bperson@dom.ain
+Subject: Test
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME message
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+ def test_default_type(self):
+ eq = self.assertEqual
+ with openfile('msg_30.txt') as fp:
+ msg = email.message_from_file(fp)
+ container1 = msg.get_payload(0)
+ eq(container1.get_default_type(), 'message/rfc822')
+ eq(container1.get_content_type(), 'message/rfc822')
+ container2 = msg.get_payload(1)
+ eq(container2.get_default_type(), 'message/rfc822')
+ eq(container2.get_content_type(), 'message/rfc822')
+ container1a = container1.get_payload(0)
+ eq(container1a.get_default_type(), 'text/plain')
+ eq(container1a.get_content_type(), 'text/plain')
+ container2a = container2.get_payload(0)
+ eq(container2a.get_default_type(), 'text/plain')
+ eq(container2a.get_content_type(), 'text/plain')
+ def test_default_type_with_explicit_container_type(self):
+ eq = self.assertEqual
+ with openfile('msg_28.txt') as fp:
+ msg = email.message_from_file(fp)
+ container1 = msg.get_payload(0)
+ eq(container1.get_default_type(), 'message/rfc822')
+ eq(container1.get_content_type(), 'message/rfc822')
+ container2 = msg.get_payload(1)
+ eq(container2.get_default_type(), 'message/rfc822')
+ eq(container2.get_content_type(), 'message/rfc822')
+ container1a = container1.get_payload(0)
+ eq(container1a.get_default_type(), 'text/plain')
+ eq(container1a.get_content_type(), 'text/plain')
+ container2a = container2.get_payload(0)
+ eq(container2a.get_default_type(), 'text/plain')
+ eq(container2a.get_content_type(), 'text/plain')
+ def test_default_type_non_parsed(self):
+ eq = self.assertEqual
+ neq = self.ndiffAssertEqual
+ # Set up container
+ container = MIMEMultipart('digest', 'BOUNDARY')
+ container.epilogue = ''
+ # Set up subparts
+ subpart1a = MIMEText('message 1\n')
+ subpart2a = MIMEText('message 2\n')
+ subpart1 = MIMEMessage(subpart1a)
+ subpart2 = MIMEMessage(subpart2a)
+ container.attach(subpart1)
+ container.attach(subpart2)
+ eq(subpart1.get_content_type(), 'message/rfc822')
+ eq(subpart1.get_default_type(), 'message/rfc822')
+ eq(subpart2.get_content_type(), 'message/rfc822')
+ eq(subpart2.get_default_type(), 'message/rfc822')
+ neq(container.as_string(0), '''\
+Content-Type: multipart/digest; boundary="BOUNDARY"
+MIME-Version: 1.0
+Content-Type: message/rfc822
+MIME-Version: 1.0
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+message 1
+Content-Type: message/rfc822
+MIME-Version: 1.0
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+message 2
+ del subpart1['content-type']
+ del subpart1['mime-version']
+ del subpart2['content-type']
+ del subpart2['mime-version']
+ eq(subpart1.get_content_type(), 'message/rfc822')
+ eq(subpart1.get_default_type(), 'message/rfc822')
+ eq(subpart2.get_content_type(), 'message/rfc822')
+ eq(subpart2.get_default_type(), 'message/rfc822')
+ neq(container.as_string(0), '''\
+Content-Type: multipart/digest; boundary="BOUNDARY"
+MIME-Version: 1.0
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+message 1
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+message 2
+ def test_mime_attachments_in_constructor(self):
+ eq = self.assertEqual
+ text1 = MIMEText('')
+ text2 = MIMEText('')
+ msg = MIMEMultipart(_subparts=(text1, text2))
+ eq(len(msg.get_payload()), 2)
+ eq(msg.get_payload(0), text1)
+ eq(msg.get_payload(1), text2)
+ def test_default_multipart_constructor(self):
+ msg = MIMEMultipart()
+ self.assertTrue(msg.is_multipart())
+# A general test of parser->model->generator idempotency. IOW, read a message
+# in, parse it into a message object tree, then without touching the tree,
+# regenerate the plain text. The original text and the transformed text
+# should be identical. Note: that we ignore the Unix-From since that may
+# contain a changed date.
+class TestIdempotent(TestEmailBase):
+ linesep = '\n'
+ def _msgobj(self, filename):
+ with openfile(filename) as fp:
+ data = fp.read()
+ msg = email.message_from_string(data)
+ return msg, data
+ def _idempotent(self, msg, text, unixfrom=False):
+ eq = self.ndiffAssertEqual
+ s = StringIO()
+ g = Generator(s, maxheaderlen=0)
+ g.flatten(msg, unixfrom=unixfrom)
+ eq(text, s.getvalue())
+ def test_parse_text_message(self):
+ eq = self.assertEqual
+ msg, text = self._msgobj('msg_01.txt')
+ eq(msg.get_content_type(), 'text/plain')
+ eq(msg.get_content_maintype(), 'text')
+ eq(msg.get_content_subtype(), 'plain')
+ eq(msg.get_params()[1], ('charset', 'us-ascii'))
+ eq(msg.get_param('charset'), 'us-ascii')
+ eq(msg.preamble, None)
+ eq(msg.epilogue, None)
+ self._idempotent(msg, text)
+ def test_parse_untyped_message(self):
+ eq = self.assertEqual
+ msg, text = self._msgobj('msg_03.txt')
+ eq(msg.get_content_type(), 'text/plain')
+ eq(msg.get_params(), None)
+ eq(msg.get_param('charset'), None)
+ self._idempotent(msg, text)
+ def test_simple_multipart(self):
+ msg, text = self._msgobj('msg_04.txt')
+ self._idempotent(msg, text)
+ def test_MIME_digest(self):
+ msg, text = self._msgobj('msg_02.txt')
+ self._idempotent(msg, text)
+ def test_long_header(self):
+ msg, text = self._msgobj('msg_27.txt')
+ self._idempotent(msg, text)
+ def test_MIME_digest_with_part_headers(self):
+ msg, text = self._msgobj('msg_28.txt')
+ self._idempotent(msg, text)
+ def test_mixed_with_image(self):
+ msg, text = self._msgobj('msg_06.txt')
+ self._idempotent(msg, text)
+ def test_multipart_report(self):
+ msg, text = self._msgobj('msg_05.txt')
+ self._idempotent(msg, text)
+ def test_dsn(self):
+ msg, text = self._msgobj('msg_16.txt')
+ self._idempotent(msg, text)
+ def test_preamble_epilogue(self):
+ msg, text = self._msgobj('msg_21.txt')
+ self._idempotent(msg, text)
+ def test_multipart_one_part(self):
+ msg, text = self._msgobj('msg_23.txt')
+ self._idempotent(msg, text)
+ def test_multipart_no_parts(self):
+ msg, text = self._msgobj('msg_24.txt')
+ self._idempotent(msg, text)
+ def test_no_start_boundary(self):
+ msg, text = self._msgobj('msg_31.txt')
+ self._idempotent(msg, text)
+ def test_rfc2231_charset(self):
+ msg, text = self._msgobj('msg_32.txt')
+ self._idempotent(msg, text)
+ def test_more_rfc2231_parameters(self):
+ msg, text = self._msgobj('msg_33.txt')
+ self._idempotent(msg, text)
+ def test_text_plain_in_a_multipart_digest(self):
+ msg, text = self._msgobj('msg_34.txt')
+ self._idempotent(msg, text)
+ def test_nested_multipart_mixeds(self):
+ msg, text = self._msgobj('msg_12a.txt')
+ self._idempotent(msg, text)
+ def test_message_external_body_idempotent(self):
+ msg, text = self._msgobj('msg_36.txt')
+ self._idempotent(msg, text)
+ def test_message_delivery_status(self):
+ msg, text = self._msgobj('msg_43.txt')
+ self._idempotent(msg, text, unixfrom=True)
+ def test_message_signed_idempotent(self):
+ msg, text = self._msgobj('msg_45.txt')
+ self._idempotent(msg, text)
+ def test_content_type(self):
+ eq = self.assertEqual
+ unless = self.assertTrue
+ # Get a message object and reset the seek pointer for other tests
+ msg, text = self._msgobj('msg_05.txt')
+ eq(msg.get_content_type(), 'multipart/report')
+ # Test the Content-Type: parameters
+ params = {}
+ for pk, pv in msg.get_params():
+ params[pk] = pv
+ eq(params['report-type'], 'delivery-status')
+ eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
+ eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
+ eq(msg.epilogue, self.linesep)
+ eq(len(msg.get_payload()), 3)
+ # Make sure the subparts are what we expect
+ msg1 = msg.get_payload(0)
+ eq(msg1.get_content_type(), 'text/plain')
+ eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
+ msg2 = msg.get_payload(1)
+ eq(msg2.get_content_type(), 'text/plain')
+ eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
+ msg3 = msg.get_payload(2)
+ eq(msg3.get_content_type(), 'message/rfc822')
+ self.assertTrue(isinstance(msg3, Message))
+ payload = msg3.get_payload()
+ unless(isinstance(payload, list))
+ eq(len(payload), 1)
+ msg4 = payload[0]
+ unless(isinstance(msg4, Message))
+ eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
+ def test_parser(self):
+ eq = self.assertEqual
+ unless = self.assertTrue
+ msg, text = self._msgobj('msg_06.txt')
+ # Check some of the outer headers
+ eq(msg.get_content_type(), 'message/rfc822')
+ # Make sure the payload is a list of exactly one sub-Message, and that
+ # that submessage has a type of text/plain
+ payload = msg.get_payload()
+ unless(isinstance(payload, list))
+ eq(len(payload), 1)
+ msg1 = payload[0]
+ self.assertTrue(isinstance(msg1, Message))
+ eq(msg1.get_content_type(), 'text/plain')
+ self.assertTrue(isinstance(msg1.get_payload(), str))
+ eq(msg1.get_payload(), self.linesep)
+# Test various other bits of the package's functionality
+class TestMiscellaneous(TestEmailBase):
+ def test_message_from_string(self):
+ with openfile('msg_01.txt') as fp:
+ text = fp.read()
+ msg = email.message_from_string(text)
+ s = StringIO()
+ # Don't wrap/continue long headers since we're trying to test
+ # idempotency.
+ g = Generator(s, maxheaderlen=0)
+ g.flatten(msg)
+ self.assertEqual(text, s.getvalue())
+ def test_message_from_file(self):
+ with openfile('msg_01.txt') as fp:
+ text = fp.read()
+ fp.seek(0)
+ msg = email.message_from_file(fp)
+ s = StringIO()
+ # Don't wrap/continue long headers since we're trying to test
+ # idempotency.
+ g = Generator(s, maxheaderlen=0)
+ g.flatten(msg)
+ self.assertEqual(text, s.getvalue())
+ def test_message_from_string_with_class(self):
+ unless = self.assertTrue
+ with openfile('msg_01.txt') as fp:
+ text = fp.read()
+ # Create a subclass
+ class MyMessage(Message):
+ pass
+ msg = email.message_from_string(text, MyMessage)
+ unless(isinstance(msg, MyMessage))
+ # Try something more complicated
+ with openfile('msg_02.txt') as fp:
+ text = fp.read()
+ msg = email.message_from_string(text, MyMessage)
+ for subpart in msg.walk():
+ unless(isinstance(subpart, MyMessage))
+ def test_message_from_file_with_class(self):
+ unless = self.assertTrue
+ # Create a subclass
+ class MyMessage(Message):
+ pass
+ with openfile('msg_01.txt') as fp:
+ msg = email.message_from_file(fp, MyMessage)
+ unless(isinstance(msg, MyMessage))
+ # Try something more complicated
+ with openfile('msg_02.txt') as fp:
+ msg = email.message_from_file(fp, MyMessage)
+ for subpart in msg.walk():
+ unless(isinstance(subpart, MyMessage))
+ def test__all__(self):
+ module = __import__('email')
+ # Can't use sorted() here due to Python 2.3 compatibility
+ all = module.__all__[:]
+ all.sort()
+ self.assertEqual(all, [
+ 'base64mime', 'charset', 'encoders', 'errors', 'generator',
+ 'header', 'iterators', 'message', 'message_from_binary_file',
+ 'message_from_bytes', 'message_from_file',
+ 'message_from_string', 'mime', 'parser',
+ 'quoprimime', 'utils',
+ ])
+ def test_formatdate(self):
+ now = time.time()
+ self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
+ time.gmtime(now)[:6])
+ def test_formatdate_localtime(self):
+ now = time.time()
+ self.assertEqual(
+ utils.parsedate(utils.formatdate(now, localtime=True))[:6],
+ time.localtime(now)[:6])
+ def test_formatdate_usegmt(self):
+ now = time.time()
+ self.assertEqual(
+ utils.formatdate(now, localtime=False),
+ time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
+ self.assertEqual(
+ utils.formatdate(now, localtime=False, usegmt=True),
+ time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
+ def test_parsedate_none(self):
+ self.assertEqual(utils.parsedate(''), None)
+ def test_parsedate_compact(self):
+ # The FWS after the comma is optional
+ self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
+ utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
+ def test_parsedate_no_dayofweek(self):
+ eq = self.assertEqual
+ eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
+ (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
+ def test_parsedate_compact_no_dayofweek(self):
+ eq = self.assertEqual
+ eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
+ (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
+ def test_parsedate_no_space_before_positive_offset(self):
+ self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
+ (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
+ def test_parsedate_no_space_before_negative_offset(self):
+ # Issue 1155362: we already handled '+' for this case.
+ self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
+ (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
+ def test_parsedate_acceptable_to_time_functions(self):
+ eq = self.assertEqual
+ timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
+ t = int(time.mktime(timetup))
+ eq(time.localtime(t)[:6], timetup[:6])
+ eq(int(time.strftime('%Y', timetup)), 2003)
+ timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
+ t = int(time.mktime(timetup[:9]))
+ eq(time.localtime(t)[:6], timetup[:6])
+ eq(int(time.strftime('%Y', timetup[:9])), 2003)
+ def test_parsedate_y2k(self):
+ """Test for parsing a date with a two-digit year.
+ Parsing a date with a two-digit year should return the correct
+ four-digit year. RFC822 allows two-digit years, but RFC2822 (which
+ obsoletes RFC822) requires four-digit years.
+ """
+ self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
+ utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
+ self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
+ utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
+ def test_parseaddr_empty(self):
+ self.assertEqual(utils.parseaddr('<>'), ('', ''))
+ self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
+ def test_noquote_dump(self):
+ self.assertEqual(
+ utils.formataddr(('A Silly Person', 'person@dom.ain')),
+ 'A Silly Person <person@dom.ain>')
+ def test_escape_dump(self):
+ self.assertEqual(
+ utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
+ r'"A \(Very\) Silly Person" <person@dom.ain>')
+ a = r'A \(Special\) Person'
+ b = 'person@dom.ain'
+ self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
+ def test_escape_backslashes(self):
+ self.assertEqual(
+ utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
+ r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
+ a = r'Arthur \Backslash\ Foobar'
+ b = 'person@dom.ain'
+ self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
+ def test_name_with_dot(self):
+ x = 'John X. Doe <jxd@example.com>'
+ y = '"John X. Doe" <jxd@example.com>'
+ a, b = ('John X. Doe', 'jxd@example.com')
+ self.assertEqual(utils.parseaddr(x), (a, b))
+ self.assertEqual(utils.parseaddr(y), (a, b))
+ # formataddr() quotes the name if there's a dot in it
+ self.assertEqual(utils.formataddr((a, b)), y)
+ def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
+ # issue 10005. Note that in the third test the second pair of
+ # backslashes is not actually a quoted pair because it is not inside a
+ # comment or quoted string: the address being parsed has a quoted
+ # string containing a quoted backslash, followed by 'example' and two
+ # backslashes, followed by another quoted string containing a space and
+ # the word 'example'. parseaddr copies those two backslashes
+ # literally. Per rfc5322 this is not technically correct since a \ may
+ # not appear in an address outside of a quoted string. It is probably
+ # a sensible Postel interpretation, though.
+ eq = self.assertEqual
+ eq(utils.parseaddr('""example" example"@example.com'),
+ ('', '""example" example"@example.com'))
+ eq(utils.parseaddr('"\\"example\\" example"@example.com'),
+ ('', '"\\"example\\" example"@example.com'))
+ eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
+ ('', '"\\\\"example\\\\" example"@example.com'))
+ def test_parseaddr_preserves_spaces_in_local_part(self):
+ # issue 9286. A normal RFC5322 local part should not contain any
+ # folding white space, but legacy local parts can (they are a sequence
+ # of atoms, not dotatoms). On the other hand we strip whitespace from
+ # before the @ and around dots, on the assumption that the whitespace
+ # around the punctuation is a mistake in what would otherwise be
+ # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
+ self.assertEqual(('', "merwok wok@xample.com"),
+ utils.parseaddr("merwok wok@xample.com"))
+ self.assertEqual(('', "merwok wok@xample.com"),
+ utils.parseaddr("merwok wok@xample.com"))
+ self.assertEqual(('', "merwok wok@xample.com"),
+ utils.parseaddr(" merwok wok @xample.com"))
+ self.assertEqual(('', 'merwok"wok" wok@xample.com'),
+ utils.parseaddr('merwok"wok" wok@xample.com'))
+ self.assertEqual(('', 'merwok.wok.wok@xample.com'),
+ utils.parseaddr('merwok. wok . wok@xample.com'))
+ def test_multiline_from_comment(self):
+ x = """\
+\tBar <foo@example.com>"""
+ self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
+ def test_quote_dump(self):
+ self.assertEqual(
+ utils.formataddr(('A Silly; Person', 'person@dom.ain')),
+ r'"A Silly; Person" <person@dom.ain>')
+ def test_charset_richcomparisons(self):
+ eq = self.assertEqual
+ ne = self.assertNotEqual
+ cset1 = Charset()
+ cset2 = Charset()
+ eq(cset1, 'us-ascii')
+ eq(cset1, 'US-ASCII')
+ eq(cset1, 'Us-AsCiI')
+ eq('us-ascii', cset1)
+ eq('US-ASCII', cset1)
+ eq('Us-AsCiI', cset1)
+ ne(cset1, 'usascii')
+ ne(cset1, 'USASCII')
+ ne(cset1, 'UsAsCiI')
+ ne('usascii', cset1)
+ ne('USASCII', cset1)
+ ne('UsAsCiI', cset1)
+ eq(cset1, cset2)
+ eq(cset2, cset1)
+ def test_getaddresses(self):
+ eq = self.assertEqual
+ eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
+ 'Bud Person <bperson@dom.ain>']),
+ [('Al Person', 'aperson@dom.ain'),
+ ('Bud Person', 'bperson@dom.ain')])
+ def test_getaddresses_nasty(self):
+ eq = self.assertEqual
+ eq(utils.getaddresses(['foo: ;']), [('', '')])
+ eq(utils.getaddresses(
+ ['[]*-- =~$']),
+ [('', ''), ('', ''), ('', '*--')])
+ eq(utils.getaddresses(
+ ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
+ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
+ def test_getaddresses_embedded_comment(self):
+ """Test proper handling of a nested comment"""
+ eq = self.assertEqual
+ addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
+ eq(addrs[0][1], 'foo@bar.com')
+ def test_utils_quote_unquote(self):
+ eq = self.assertEqual
+ msg = Message()
+ msg.add_header('content-disposition', 'attachment',
+ filename='foo\\wacky"name')
+ eq(msg.get_filename(), 'foo\\wacky"name')
+ def test_get_body_encoding_with_bogus_charset(self):
+ charset = Charset('not a charset')
+ self.assertEqual(charset.get_body_encoding(), 'base64')
+ def test_get_body_encoding_with_uppercase_charset(self):
+ eq = self.assertEqual
+ msg = Message()
+ msg['Content-Type'] = 'text/plain; charset=UTF-8'
+ eq(msg['content-type'], 'text/plain; charset=UTF-8')
+ charsets = msg.get_charsets()
+ eq(len(charsets), 1)
+ eq(charsets[0], 'utf-8')
+ charset = Charset(charsets[0])
+ eq(charset.get_body_encoding(), 'base64')
+ msg.set_payload(b'hello world', charset=charset)
+ eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
+ eq(msg.get_payload(decode=True), b'hello world')
+ eq(msg['content-transfer-encoding'], 'base64')
+ # Try another one
+ msg = Message()
+ msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
+ charsets = msg.get_charsets()
+ eq(len(charsets), 1)
+ eq(charsets[0], 'us-ascii')
+ charset = Charset(charsets[0])
+ eq(charset.get_body_encoding(), encoders.encode_7or8bit)
+ msg.set_payload('hello world', charset=charset)
+ eq(msg.get_payload(), 'hello world')
+ eq(msg['content-transfer-encoding'], '7bit')
+ def test_charsets_case_insensitive(self):
+ lc = Charset('us-ascii')
+ uc = Charset('US-ASCII')
+ self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
+ def test_partial_falls_inside_message_delivery_status(self):
+ eq = self.ndiffAssertEqual
+ # The Parser interface provides chunks of data to FeedParser in 8192
+ # byte gulps. SF bug #1076485 found one of those chunks inside
+ # message/delivery-status header block, which triggered an
+ # unreadline() of NeedMoreData.
+ msg = self._msgobj('msg_43.txt')
+ sfp = StringIO()
+ iterators._structure(msg, sfp)
+ eq(sfp.getvalue(), """\
+ text/plain
+ message/delivery-status
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/plain
+ text/rfc822-headers
+ def test_make_msgid_domain(self):
+ self.assertEqual(
+ email.utils.make_msgid(domain='testdomain-string')[-19:],
+ '@testdomain-string>')
+# Test the iterator/generators
+class TestIterators(TestEmailBase):
+ def test_body_line_iterator(self):
+ eq = self.assertEqual
+ neq = self.ndiffAssertEqual
+ # First a simple non-multipart message
+ msg = self._msgobj('msg_01.txt')
+ it = iterators.body_line_iterator(msg)
+ lines = list(it)
+ eq(len(lines), 6)
+ neq(EMPTYSTRING.join(lines), msg.get_payload())
+ # Now a more complicated multipart
+ msg = self._msgobj('msg_02.txt')
+ it = iterators.body_line_iterator(msg)
+ lines = list(it)
+ eq(len(lines), 43)
+ with openfile('msg_19.txt') as fp:
+ neq(EMPTYSTRING.join(lines), fp.read())
+ def test_typed_subpart_iterator(self):
+ eq = self.assertEqual
+ msg = self._msgobj('msg_04.txt')
+ it = iterators.typed_subpart_iterator(msg, 'text')
+ lines = []
+ subparts = 0
+ for subpart in it:
+ subparts += 1
+ lines.append(subpart.get_payload())
+ eq(subparts, 2)
+ eq(EMPTYSTRING.join(lines), """\
+a simple kind of mirror
+to reflect upon our own
+a simple kind of mirror
+to reflect upon our own
+ def test_typed_subpart_iterator_default_type(self):
+ eq = self.assertEqual
+ msg = self._msgobj('msg_03.txt')
+ it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
+ lines = []
+ subparts = 0
+ for subpart in it:
+ subparts += 1
+ lines.append(subpart.get_payload())
+ eq(subparts, 1)
+ eq(EMPTYSTRING.join(lines), """\
+Do you like this message?
+ def test_pushCR_LF(self):
+ '''FeedParser BufferedSubFile.push() assumed it received complete
+ line endings. A CR ending one push() followed by a LF starting
+ the next push() added an empty line.
+ '''
+ imt = [
+ ("a\r \n", 2),
+ ("b", 0),
+ ("c\n", 1),
+ ("", 0),
+ ("d\r\n", 1),
+ ("e\r", 0),
+ ("\nf", 1),
+ ("\r\n", 1),
+ ]
+ from email.feedparser import BufferedSubFile, NeedMoreData
+ bsf = BufferedSubFile()
+ om = []
+ nt = 0
+ for il, n in imt:
+ bsf.push(il)
+ nt += n
+ n1 = 0
+ while True:
+ ol = bsf.readline()
+ if ol == NeedMoreData:
+ break
+ om.append(ol)
+ n1 += 1
+ self.assertTrue(n == n1)
+ self.assertTrue(len(om) == nt)
+ self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
+class TestParsers(TestEmailBase):
+ def test_header_parser(self):
+ eq = self.assertEqual
+ # Parse only the headers of a complex multipart MIME document
+ with openfile('msg_02.txt') as fp:
+ msg = HeaderParser().parse(fp)
+ eq(msg['from'], 'ppp-request@zzz.org')
+ eq(msg['to'], 'ppp@zzz.org')
+ eq(msg.get_content_type(), 'multipart/mixed')
+ self.assertFalse(msg.is_multipart())
+ self.assertTrue(isinstance(msg.get_payload(), str))
+ def test_whitespace_continuation(self):
+ eq = self.assertEqual
+ # This message contains a line after the Subject: header that has only
+ # whitespace, but it is not empty!
+ msg = email.message_from_string("""\
+From: aperson@dom.ain
+To: bperson@dom.ain
+Subject: the next line has a space on it
+Date: Mon, 8 Apr 2002 15:09:19 -0400
+Message-ID: spam
+Here's the message body
+ eq(msg['subject'], 'the next line has a space on it\n ')
+ eq(msg['message-id'], 'spam')
+ eq(msg.get_payload(), "Here's the message body\n")
+ def test_whitespace_continuation_last_header(self):
+ eq = self.assertEqual
+ # Like the previous test, but the subject line is the last
+ # header.
+ msg = email.message_from_string("""\
+From: aperson@dom.ain
+To: bperson@dom.ain
+Date: Mon, 8 Apr 2002 15:09:19 -0400
+Message-ID: spam
+Subject: the next line has a space on it
+Here's the message body
+ eq(msg['subject'], 'the next line has a space on it\n ')
+ eq(msg['message-id'], 'spam')
+ eq(msg.get_payload(), "Here's the message body\n")
+ def test_crlf_separation(self):
+ eq = self.assertEqual
+ with openfile('msg_26.txt', newline='\n') as fp:
+ msg = Parser().parse(fp)
+ eq(len(msg.get_payload()), 2)
+ part1 = msg.get_payload(0)
+ eq(part1.get_content_type(), 'text/plain')
+ eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
+ part2 = msg.get_payload(1)
+ eq(part2.get_content_type(), 'application/riscos')
+ def test_crlf_flatten(self):
+ # Using newline='\n' preserves the crlfs in this input file.
+ with openfile('msg_26.txt', newline='\n') as fp:
+ text = fp.read()
+ msg = email.message_from_string(text)
+ s = StringIO()
+ g = Generator(s)
+ g.flatten(msg, linesep='\r\n')
+ self.assertEqual(s.getvalue(), text)
+ maxDiff = None
+ def test_multipart_digest_with_extra_mime_headers(self):
+ eq = self.assertEqual
+ neq = self.ndiffAssertEqual
+ with openfile('msg_28.txt') as fp:
+ msg = email.message_from_file(fp)
+ # Structure is:
+ # multipart/digest
+ # message/rfc822
+ # text/plain
+ # message/rfc822
+ # text/plain
+ eq(msg.is_multipart(), 1)
+ eq(len(msg.get_payload()), 2)
+ part1 = msg.get_payload(0)
+ eq(part1.get_content_type(), 'message/rfc822')
+ eq(part1.is_multipart(), 1)
+ eq(len(part1.get_payload()), 1)
+ part1a = part1.get_payload(0)
+ eq(part1a.is_multipart(), 0)
+ eq(part1a.get_content_type(), 'text/plain')
+ neq(part1a.get_payload(), 'message 1\n')
+ # next message/rfc822
+ part2 = msg.get_payload(1)
+ eq(part2.get_content_type(), 'message/rfc822')
+ eq(part2.is_multipart(), 1)
+ eq(len(part2.get_payload()), 1)
+ part2a = part2.get_payload(0)
+ eq(part2a.is_multipart(), 0)
+ eq(part2a.get_content_type(), 'text/plain')
+ neq(part2a.get_payload(), 'message 2\n')
+ def test_three_lines(self):
+ # A bug report by Andrew McNamara
+ lines = ['From: Andrew Person <aperson@dom.ain',
+ 'Subject: Test',
+ 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
+ msg = email.message_from_string(NL.join(lines))
+ self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
+ def test_strip_line_feed_and_carriage_return_in_headers(self):
+ eq = self.assertEqual
+ # For [ 1002475 ] email message parser doesn't handle \r\n correctly
+ value1 = 'text'
+ value2 = 'more text'
+ m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
+ value1, value2)
+ msg = email.message_from_string(m)
+ eq(msg.get('Header'), value1)
+ eq(msg.get('Next-Header'), value2)
+ def test_rfc2822_header_syntax(self):
+ eq = self.assertEqual
+ m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
+ msg = email.message_from_string(m)
+ eq(len(msg), 3)
+ eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
+ eq(msg.get_payload(), 'body')
+ def test_rfc2822_space_not_allowed_in_header(self):
+ eq = self.assertEqual
+ m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
+ msg = email.message_from_string(m)
+ eq(len(msg.keys()), 0)
+ def test_rfc2822_one_character_header(self):
+ eq = self.assertEqual
+ m = 'A: first header\nB: second header\nCC: third header\n\nbody'
+ msg = email.message_from_string(m)
+ headers = msg.keys()
+ headers.sort()
+ eq(headers, ['A', 'B', 'CC'])
+ eq(msg.get_payload(), 'body')
+ def test_CRLFLF_at_end_of_part(self):
+ # issue 5610: feedparser should not eat two chars from body part ending
+ # with "\r\n\n".
+ m = (
+ "From: foo@bar.com\n"
+ "To: baz\n"
+ "Mime-Version: 1.0\n"
+ "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
+ "\n"
+ "--BOUNDARY\n"
+ "Content-Type: text/plain\n"
+ "\n"
+ "body ending with CRLF newline\r\n"
+ "\n"
+ "--BOUNDARY--\n"
+ )
+ msg = email.message_from_string(m)
+ self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
+class Test8BitBytesHandling(unittest.TestCase):
+ # In Python3 all input is string, but that doesn't work if the actual input
+ # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
+ # decode byte streams using the surrogateescape error handler, and
+ # reconvert to binary at appropriate places if we detect surrogates. This
+ # doesn't allow us to transform headers with 8bit bytes (they get munged),
+ # but it does allow us to parse and preserve them, and to decode body
+ # parts that use an 8bit CTE.
+ bodytest_msg = textwrap.dedent("""\
+ From: foo@bar.com
+ To: baz
+ Mime-Version: 1.0
+ Content-Type: text/plain; charset={charset}
+ Content-Transfer-Encoding: {cte}
+ {bodyline}
+ """)
+ def test_known_8bit_CTE(self):
+ m = self.bodytest_msg.format(charset='utf-8',
+ cte='8bit',
+ bodyline='pöstal').encode('utf-8')
+ msg = email.message_from_bytes(m)
+ self.assertEqual(msg.get_payload(), "pöstal\n")
+ self.assertEqual(msg.get_payload(decode=True),
+ "pöstal\n".encode('utf-8'))
+ def test_unknown_8bit_CTE(self):
+ m = self.bodytest_msg.format(charset='notavalidcharset',
+ cte='8bit',
+ bodyline='pöstal').encode('utf-8')
+ msg = email.message_from_bytes(m)
+ self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
+ self.assertEqual(msg.get_payload(decode=True),
+ "pöstal\n".encode('utf-8'))
+ def test_8bit_in_quopri_body(self):
+ # This is non-RFC compliant data...without 'decode' the library code
+ # decodes the body using the charset from the headers, and because the
+ # source byte really is utf-8 this works. This is likely to fail
+ # against real dirty data (ie: produce mojibake), but the data is
+ # invalid anyway so it is as good a guess as any. But this means that
+ # this test just confirms the current behavior; that behavior is not
+ # necessarily the best possible behavior. With 'decode' it is
+ # returning the raw bytes, so that test should be of correct behavior,
+ # or at least produce the same result that email4 did.
+ m = self.bodytest_msg.format(charset='utf-8',
+ cte='quoted-printable',
+ bodyline='p=C3=B6stál').encode('utf-8')
+ msg = email.message_from_bytes(m)
+ self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
+ self.assertEqual(msg.get_payload(decode=True),
+ 'pöstál\n'.encode('utf-8'))
+ def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
+ # This is similar to the previous test, but proves that if the 8bit
+ # byte is undecodeable in the specified charset, it gets replaced
+ # by the unicode 'unknown' character. Again, this may or may not
+ # be the ideal behavior. Note that if decode=False none of the
+ # decoders will get involved, so this is the only test we need
+ # for this behavior.
+ m = self.bodytest_msg.format(charset='ascii',
+ cte='quoted-printable',
+ bodyline='p=C3=B6stál').encode('utf-8')
+ msg = email.message_from_bytes(m)
+ self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
+ self.assertEqual(msg.get_payload(decode=True),
+ 'pöstál\n'.encode('utf-8'))
+ def test_8bit_in_base64_body(self):
+ # Sticking an 8bit byte in a base64 block makes it undecodable by
+ # normal means, so the block is returned undecoded, but as bytes.
+ m = self.bodytest_msg.format(charset='utf-8',
+ cte='base64',
+ bodyline='cMO2c3RhbAá=').encode('utf-8')
+ msg = email.message_from_bytes(m)
+ self.assertEqual(msg.get_payload(decode=True),
+ 'cMO2c3RhbAá=\n'.encode('utf-8'))
+ def test_8bit_in_uuencode_body(self):
+ # Sticking an 8bit byte in a uuencode block makes it undecodable by
+ # normal means, so the block is returned undecoded, but as bytes.
+ m = self.bodytest_msg.format(charset='utf-8',
+ cte='uuencode',
+ bodyline='<,.V<W1A; á ').encode('utf-8')
+ msg = email.message_from_bytes(m)
+ self.assertEqual(msg.get_payload(decode=True),
+ '<,.V<W1A; á \n'.encode('utf-8'))
+ headertest_headers = (
+ ('From: foo@bar.com', ('From', 'foo@bar.com')),
+ ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
+ ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
+ '\tJean de Baddie',
+ ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
+ 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
+ ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
+ ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
+ )
+ headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
+ '\nYes, they are flying.\n').encode('utf-8')
+ def test_get_8bit_header(self):
+ msg = email.message_from_bytes(self.headertest_msg)
+ self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
+ self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
+ def test_print_8bit_headers(self):
+ msg = email.message_from_bytes(self.headertest_msg)
+ self.assertEqual(str(msg),
+ textwrap.dedent("""\
+ From: {}
+ To: {}
+ Subject: {}
+ From: {}
+ Yes, they are flying.
+ """).format(*[expected[1] for (_, expected) in
+ self.headertest_headers]))
+ def test_values_with_8bit_headers(self):
+ msg = email.message_from_bytes(self.headertest_msg)
+ self.assertListEqual([str(x) for x in msg.values()],
+ ['foo@bar.com',
+ 'b\uFFFD\uFFFDz',
+ 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
+ 'coll\uFFFD\uFFFDgue, le pouf '
+ 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
+ '\tJean de Baddie',
+ "g\uFFFD\uFFFDst"])
+ def test_items_with_8bit_headers(self):
+ msg = email.message_from_bytes(self.headertest_msg)
+ self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
+ [('From', 'foo@bar.com'),
+ ('To', 'b\uFFFD\uFFFDz'),
+ ('Subject', 'Maintenant je vous '
+ 'pr\uFFFD\uFFFDsente '
+ 'mon coll\uFFFD\uFFFDgue, le pouf '
+ 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
+ '\tJean de Baddie'),
+ ('From', 'g\uFFFD\uFFFDst')])
+ def test_get_all_with_8bit_headers(self):
+ msg = email.message_from_bytes(self.headertest_msg)
+ self.assertListEqual([str(x) for x in msg.get_all('from')],
+ ['foo@bar.com',
+ 'g\uFFFD\uFFFDst'])
+ def test_get_content_type_with_8bit(self):
+ msg = email.message_from_bytes(textwrap.dedent("""\
+ Content-Type: text/pl\xA7in; charset=utf-8
+ """).encode('latin-1'))
+ self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
+ self.assertEqual(msg.get_content_maintype(), "text")
+ self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
+ def test_get_params_with_8bit(self):
+ msg = email.message_from_bytes(
+ 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
+ self.assertEqual(msg.get_params(header='x-header'),
+ [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
+ self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
+ # XXX: someday you might be able to get 'b\xa7r', for now you can't.
+ self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
+ def test_get_rfc2231_params_with_8bit(self):
+ msg = email.message_from_bytes(textwrap.dedent("""\
+ Content-Type: text/plain; charset=us-ascii;
+ title*=us-ascii'en'This%20is%20not%20f\xa7n"""
+ ).encode('latin-1'))
+ self.assertEqual(msg.get_param('title'),
+ ('us-ascii', 'en', 'This is not f\uFFFDn'))
+ def test_set_rfc2231_params_with_8bit(self):
+ msg = email.message_from_bytes(textwrap.dedent("""\
+ Content-Type: text/plain; charset=us-ascii;
+ title*=us-ascii'en'This%20is%20not%20f\xa7n"""
+ ).encode('latin-1'))
+ msg.set_param('title', 'test')
+ self.assertEqual(msg.get_param('title'), 'test')
+ def test_del_rfc2231_params_with_8bit(self):
+ msg = email.message_from_bytes(textwrap.dedent("""\
+ Content-Type: text/plain; charset=us-ascii;
+ title*=us-ascii'en'This%20is%20not%20f\xa7n"""
+ ).encode('latin-1'))
+ msg.del_param('title')
+ self.assertEqual(msg.get_param('title'), None)
+ self.assertEqual(msg.get_content_maintype(), 'text')
+ def test_get_payload_with_8bit_cte_header(self):
+ msg = email.message_from_bytes(textwrap.dedent("""\
+ Content-Transfer-Encoding: b\xa7se64
+ Content-Type: text/plain; charset=latin-1
+ payload
+ """).encode('latin-1'))
+ self.assertEqual(msg.get_payload(), 'payload\n')
+ self.assertEqual(msg.get_payload(decode=True), b'payload\n')
+ non_latin_bin_msg = textwrap.dedent("""\
+ From: foo@bar.com
+ To: báz
+ Subject: Maintenant je vous présente mon collègue, le pouf célèbre
+ \tJean de Baddie
+ Mime-Version: 1.0
+ Content-Type: text/plain; charset="utf-8"
+ Content-Transfer-Encoding: 8bit
+ Да, они летят.
+ """).encode('utf-8')
+ def test_bytes_generator(self):
+ msg = email.message_from_bytes(self.non_latin_bin_msg)
+ out = BytesIO()
+ email.generator.BytesGenerator(out).flatten(msg)
+ self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
+ def test_bytes_generator_handles_None_body(self):
+ #Issue 11019
+ msg = email.message.Message()
+ out = BytesIO()
+ email.generator.BytesGenerator(out).flatten(msg)
+ self.assertEqual(out.getvalue(), b"\n")
+ non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
+ From: foo@bar.com
+ To: =?unknown-8bit?q?b=C3=A1z?=
+ Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
+ =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
+ =?unknown-8bit?q?_Jean_de_Baddie?=
+ Mime-Version: 1.0
+ Content-Type: text/plain; charset="utf-8"
+ Content-Transfer-Encoding: base64
+ 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
+ """)
+ def test_generator_handles_8bit(self):
+ msg = email.message_from_bytes(self.non_latin_bin_msg)
+ out = StringIO()
+ email.generator.Generator(out).flatten(msg)
+ self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
+ def test_bytes_generator_with_unix_from(self):
+ # The unixfrom contains a current date, so we can't check it
+ # literally. Just make sure the first word is 'From' and the
+ # rest of the message matches the input.
+ msg = email.message_from_bytes(self.non_latin_bin_msg)
+ out = BytesIO()
+ email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
+ lines = out.getvalue().split(b'\n')
+ self.assertEqual(lines[0].split()[0], b'From')
+ self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
+ non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
+ non_latin_bin_msg_as7bit[2:4] = [
+ 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
+ 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
+ non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
+ def test_message_from_binary_file(self):
+ fn = 'test.msg'
+ self.addCleanup(unlink, fn)
+ with open(fn, 'wb') as testfile:
+ testfile.write(self.non_latin_bin_msg)
+ with open(fn, 'rb') as testfile:
+ m = email.parser.BytesParser().parse(testfile)
+ self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
+ latin_bin_msg = textwrap.dedent("""\
+ From: foo@bar.com
+ To: Dinsdale
+ Subject: Nudge nudge, wink, wink
+ Mime-Version: 1.0
+ Content-Type: text/plain; charset="latin-1"
+ Content-Transfer-Encoding: 8bit
+ oh là là, know what I mean, know what I mean?
+ """).encode('latin-1')
+ latin_bin_msg_as7bit = textwrap.dedent("""\
+ From: foo@bar.com
+ To: Dinsdale
+ Subject: Nudge nudge, wink, wink
+ Mime-Version: 1.0
+ Content-Type: text/plain; charset="iso-8859-1"
+ Content-Transfer-Encoding: quoted-printable
+ oh l=E0 l=E0, know what I mean, know what I mean?
+ """)
+ def test_string_generator_reencodes_to_quopri_when_appropriate(self):
+ m = email.message_from_bytes(self.latin_bin_msg)
+ self.assertEqual(str(m), self.latin_bin_msg_as7bit)
+ def test_decoded_generator_emits_unicode_body(self):
+ m = email.message_from_bytes(self.latin_bin_msg)
+ out = StringIO()
+ email.generator.DecodedGenerator(out).flatten(m)
+ #DecodedHeader output contains an extra blank line compared
+ #to the input message. RDM: not sure if this is a bug or not,
+ #but it is not specific to the 8bit->7bit conversion.
+ self.assertEqual(out.getvalue(),
+ self.latin_bin_msg.decode('latin-1')+'\n')
+ def test_bytes_feedparser(self):
+ bfp = email.feedparser.BytesFeedParser()
+ for i in range(0, len(self.latin_bin_msg), 10):
+ bfp.feed(self.latin_bin_msg[i:i+10])
+ m = bfp.close()
+ self.assertEqual(str(m), self.latin_bin_msg_as7bit)
+ def test_crlf_flatten(self):
+ with openfile('msg_26.txt', 'rb') as fp:
+ text = fp.read()
+ msg = email.message_from_bytes(text)
+ s = BytesIO()
+ g = email.generator.BytesGenerator(s)
+ g.flatten(msg, linesep='\r\n')
+ self.assertEqual(s.getvalue(), text)
+ def test_8bit_multipart(self):
+ # Issue 11605
+ source = textwrap.dedent("""\
+ Date: Fri, 18 Mar 2011 17:15:43 +0100
+ To: foo@example.com
+ From: foodwatch-Newsletter <bar@example.com>
+ Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
+ Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
+ MIME-Version: 1.0
+ Content-Type: multipart/alternative;
+ boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
+ --b1_76a486bee62b0d200f33dc2ca08220ad
+ Content-Type: text/plain; charset="utf-8"
+ Content-Transfer-Encoding: 8bit
+ Guten Tag, ,
+ mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
+ Nachrichten aus Japan.
+ --b1_76a486bee62b0d200f33dc2ca08220ad
+ Content-Type: text/html; charset="utf-8"
+ Content-Transfer-Encoding: 8bit
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
+ "http://www.w3.org/TR/html4/loose.dtd">
+ <html lang="de">
+ <head>
+ <title>foodwatch - Newsletter</title>
+ </head>
+ <body>
+ <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
+ die Nachrichten aus Japan.</p>
+ </body>
+ </html>
+ --b1_76a486bee62b0d200f33dc2ca08220ad--
+ """).encode('utf-8')
+ msg = email.message_from_bytes(source)
+ s = BytesIO()
+ g = email.generator.BytesGenerator(s)
+ g.flatten(msg)
+ self.assertEqual(s.getvalue(), source)
+ maxDiff = None
+class BaseTestBytesGeneratorIdempotent:
+ maxDiff = None
+ def _msgobj(self, filename):
+ with openfile(filename, 'rb') as fp:
+ data = fp.read()
+ data = self.normalize_linesep_regex.sub(self.blinesep, data)
+ msg = email.message_from_bytes(data)
+ return msg, data
+ def _idempotent(self, msg, data, unixfrom=False):
+ b = BytesIO()
+ g = email.generator.BytesGenerator(b, maxheaderlen=0)
+ g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
+ self.assertByteStringsEqual(data, b.getvalue())
+ def assertByteStringsEqual(self, str1, str2):
+ # Not using self.blinesep here is intentional. This way the output
+ # is more useful when the failure results in mixed line endings.
+ self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
+class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
+ TestIdempotent):
+ linesep = '\n'
+ blinesep = b'\n'
+ normalize_linesep_regex = re.compile(br'\r\n')
+class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
+ TestIdempotent):
+ linesep = '\r\n'
+ blinesep = b'\r\n'
+ normalize_linesep_regex = re.compile(br'(?<!\r)\n')
+class TestBase64(unittest.TestCase):
+ def test_len(self):
+ eq = self.assertEqual
+ eq(base64mime.header_length('hello'),
+ len(base64mime.body_encode(b'hello', eol='')))
+ for size in range(15):
+ if size == 0 : bsize = 0
+ elif size <= 3 : bsize = 4
+ elif size <= 6 : bsize = 8
+ elif size <= 9 : bsize = 12
+ elif size <= 12: bsize = 16
+ else : bsize = 20
+ eq(base64mime.header_length('x' * size), bsize)
+ def test_decode(self):
+ eq = self.assertEqual
+ eq(base64mime.decode(''), b'')
+ eq(base64mime.decode('aGVsbG8='), b'hello')
+ def test_encode(self):
+ eq = self.assertEqual
+ eq(base64mime.body_encode(b''), b'')
+ eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
+ # Test the binary flag
+ eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
+ # Test the maxlinelen arg
+ eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
+ # Test the eol argument
+ eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
+ """\
+ def test_header_encode(self):
+ eq = self.assertEqual
+ he = base64mime.header_encode
+ eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
+ eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
+ eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
+ # Test the charset option
+ eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
+ eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
+class TestQuopri(unittest.TestCase):
+ def setUp(self):
+ # Set of characters (as byte integers) that don't need to be encoded
+ # in headers.
+ self.hlit = list(chain(
+ range(ord('a'), ord('z') + 1),
+ range(ord('A'), ord('Z') + 1),
+ range(ord('0'), ord('9') + 1),
+ (c for c in b'!*+-/')))
+ # Set of characters (as byte integers) that do need to be encoded in
+ # headers.
+ self.hnon = [c for c in range(256) if c not in self.hlit]
+ assert len(self.hlit) + len(self.hnon) == 256
+ # Set of characters (as byte integers) that don't need to be encoded
+ # in bodies.
+ self.blit = list(range(ord(' '), ord('~') + 1))
+ self.blit.append(ord('\t'))
+ self.blit.remove(ord('='))
+ # Set of characters (as byte integers) that do need to be encoded in
+ # bodies.
+ self.bnon = [c for c in range(256) if c not in self.blit]
+ assert len(self.blit) + len(self.bnon) == 256
+ def test_quopri_header_check(self):
+ for c in self.hlit:
+ self.assertFalse(quoprimime.header_check(c),
+ 'Should not be header quopri encoded: %s' % chr(c))
+ for c in self.hnon:
+ self.assertTrue(quoprimime.header_check(c),
+ 'Should be header quopri encoded: %s' % chr(c))
+ def test_quopri_body_check(self):
+ for c in self.blit:
+ self.assertFalse(quoprimime.body_check(c),
+ 'Should not be body quopri encoded: %s' % chr(c))
+ for c in self.bnon:
+ self.assertTrue(quoprimime.body_check(c),
+ 'Should be body quopri encoded: %s' % chr(c))
+ def test_header_quopri_len(self):
+ eq = self.assertEqual
+ eq(quoprimime.header_length(b'hello'), 5)
+ # RFC 2047 chrome is not included in header_length().
+ eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
+ quoprimime.header_length(b'hello') +
+ # =?xxx?q?...?= means 10 extra characters
+ 10)
+ eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
+ # RFC 2047 chrome is not included in header_length().
+ eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
+ quoprimime.header_length(b'h@e@l@l@o@') +
+ # =?xxx?q?...?= means 10 extra characters
+ 10)
+ for c in self.hlit:
+ eq(quoprimime.header_length(bytes([c])), 1,
+ 'expected length 1 for %r' % chr(c))
+ for c in self.hnon:
+ # Space is special; it's encoded to _
+ if c == ord(' '):
+ continue
+ eq(quoprimime.header_length(bytes([c])), 3,
+ 'expected length 3 for %r' % chr(c))
+ eq(quoprimime.header_length(b' '), 1)
+ def test_body_quopri_len(self):
+ eq = self.assertEqual
+ for c in self.blit:
+ eq(quoprimime.body_length(bytes([c])), 1)
+ for c in self.bnon:
+ eq(quoprimime.body_length(bytes([c])), 3)
+ def test_quote_unquote_idempotent(self):
+ for x in range(256):
+ c = chr(x)
+ self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
+ def _test_header_encode(self, header, expected_encoded_header, charset=None):
+ if charset is None:
+ encoded_header = quoprimime.header_encode(header)
+ else:
+ encoded_header = quoprimime.header_encode(header, charset)
+ self.assertEqual(encoded_header, expected_encoded_header)
+ def test_header_encode_null(self):
+ self._test_header_encode(b'', '')
+ def test_header_encode_one_word(self):
+ self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
+ def test_header_encode_two_lines(self):
+ self._test_header_encode(b'hello\nworld',
+ '=?iso-8859-1?q?hello=0Aworld?=')
+ def test_header_encode_non_ascii(self):
+ self._test_header_encode(b'hello\xc7there',
+ '=?iso-8859-1?q?hello=C7there?=')
+ def test_header_encode_alt_charset(self):
+ self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
+ charset='iso-8859-2')
+ def _test_header_decode(self, encoded_header, expected_decoded_header):
+ decoded_header = quoprimime.header_decode(encoded_header)
+ self.assertEqual(decoded_header, expected_decoded_header)
+ def test_header_decode_null(self):
+ self._test_header_decode('', '')
+ def test_header_decode_one_word(self):
+ self._test_header_decode('hello', 'hello')
+ def test_header_decode_two_lines(self):
+ self._test_header_decode('hello=0Aworld', 'hello\nworld')
+ def test_header_decode_non_ascii(self):
+ self._test_header_decode('hello=C7there', 'hello\xc7there')
+ def _test_decode(self, encoded, expected_decoded, eol=None):
+ if eol is None:
+ decoded = quoprimime.decode(encoded)
+ else:
+ decoded = quoprimime.decode(encoded, eol=eol)
+ self.assertEqual(decoded, expected_decoded)
+ def test_decode_null_word(self):
+ self._test_decode('', '')
+ def test_decode_null_line_null_word(self):
+ self._test_decode('\r\n', '\n')
+ def test_decode_one_word(self):
+ self._test_decode('hello', 'hello')
+ def test_decode_one_word_eol(self):
+ self._test_decode('hello', 'hello', eol='X')
+ def test_decode_one_line(self):
+ self._test_decode('hello\r\n', 'hello\n')
+ def test_decode_one_line_lf(self):
+ self._test_decode('hello\n', 'hello\n')
+ def test_decode_one_line_cr(self):
+ self._test_decode('hello\r', 'hello\n')
+ def test_decode_one_line_nl(self):
+ self._test_decode('hello\n', 'helloX', eol='X')
+ def test_decode_one_line_crnl(self):
+ self._test_decode('hello\r\n', 'helloX', eol='X')
+ def test_decode_one_line_one_word(self):
+ self._test_decode('hello\r\nworld', 'hello\nworld')
+ def test_decode_one_line_one_word_eol(self):
+ self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
+ def test_decode_two_lines(self):
+ self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
+ def test_decode_two_lines_eol(self):
+ self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
+ def test_decode_one_long_line(self):
+ self._test_decode('Spam' * 250, 'Spam' * 250)
+ def test_decode_one_space(self):
+ self._test_decode(' ', '')
+ def test_decode_multiple_spaces(self):
+ self._test_decode(' ' * 5, '')
+ def test_decode_one_line_trailing_spaces(self):
+ self._test_decode('hello \r\n', 'hello\n')
+ def test_decode_two_lines_trailing_spaces(self):
+ self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
+ def test_decode_quoted_word(self):
+ self._test_decode('=22quoted=20words=22', '"quoted words"')
+ def test_decode_uppercase_quoting(self):
+ self._test_decode('ab=CD=EF', 'ab\xcd\xef')
+ def test_decode_lowercase_quoting(self):
+ self._test_decode('ab=cd=ef', 'ab\xcd\xef')
+ def test_decode_soft_line_break(self):
+ self._test_decode('soft line=\r\nbreak', 'soft linebreak')
+ def test_decode_false_quoting(self):
+ self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
+ def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
+ kwargs = {}
+ if maxlinelen is None:
+ # Use body_encode's default.
+ maxlinelen = 76
+ else:
+ kwargs['maxlinelen'] = maxlinelen
+ if eol is None:
+ # Use body_encode's default.
+ eol = '\n'
+ else:
+ kwargs['eol'] = eol
+ encoded_body = quoprimime.body_encode(body, **kwargs)
+ self.assertEqual(encoded_body, expected_encoded_body)
+ if eol == '\n' or eol == '\r\n':
+ # We know how to split the result back into lines, so maxlinelen
+ # can be checked.
+ for line in encoded_body.splitlines():
+ self.assertLessEqual(len(line), maxlinelen)
+ def test_encode_null(self):
+ self._test_encode('', '')
+ def test_encode_null_lines(self):
+ self._test_encode('\n\n', '\n\n')
+ def test_encode_one_line(self):
+ self._test_encode('hello\n', 'hello\n')
+ def test_encode_one_line_crlf(self):
+ self._test_encode('hello\r\n', 'hello\n')
+ def test_encode_one_line_eol(self):
+ self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
+ def test_encode_one_space(self):
+ self._test_encode(' ', '=20')
+ def test_encode_one_line_one_space(self):
+ self._test_encode(' \n', '=20\n')
+# XXX: body_encode() expect strings, but uses ord(char) from these strings
+# to index into a 256-entry list. For code points above 255, this will fail.
+# Should there be a check for 8-bit only ord() values in body, or at least
+# a comment about the expected input?
+ def test_encode_two_lines_one_space(self):
+ self._test_encode(' \n \n', '=20\n=20\n')
+ def test_encode_one_word_trailing_spaces(self):
+ self._test_encode('hello ', 'hello =20')
+ def test_encode_one_line_trailing_spaces(self):
+ self._test_encode('hello \n', 'hello =20\n')
+ def test_encode_one_word_trailing_tab(self):
+ self._test_encode('hello \t', 'hello =09')
+ def test_encode_one_line_trailing_tab(self):
+ self._test_encode('hello \t\n', 'hello =09\n')
+ def test_encode_trailing_space_before_maxlinelen(self):
+ self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
+ def test_encode_trailing_space_at_maxlinelen(self):
+ self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
+ def test_encode_trailing_space_beyond_maxlinelen(self):
+ self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
+ def test_encode_whitespace_lines(self):
+ self._test_encode(' \n' * 5, '=20\n' * 5)
+ def test_encode_quoted_equals(self):
+ self._test_encode('a = b', 'a =3D b')
+ def test_encode_one_long_string(self):
+ self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
+ def test_encode_one_long_line(self):
+ self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
+ def test_encode_one_very_long_line(self):
+ self._test_encode('x' * 200 + '\n',
+ 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
+ def test_encode_one_long_line(self):
+ self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
+ def test_encode_shortest_maxlinelen(self):
+ self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
+ def test_encode_maxlinelen_too_small(self):
+ self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
+ def test_encode(self):
+ eq = self.assertEqual
+ eq(quoprimime.body_encode(''), '')
+ eq(quoprimime.body_encode('hello'), 'hello')
+ # Test the binary flag
+ eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
+ # Test the maxlinelen arg
+ eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
+xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
+ xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
+x xxxx xxxx xxxx xxxx=20""")
+ # Test the eol argument
+ eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
+ """\
+xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
+ xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
+x xxxx xxxx xxxx xxxx=20""")
+ eq(quoprimime.body_encode("""\
+one line
+two line"""), """\
+one line
+two line""")
+# Test the Charset class
+class TestCharset(unittest.TestCase):
+ def tearDown(self):
+ from email import charset as CharsetModule
+ try:
+ del CharsetModule.CHARSETS['fake']
+ except KeyError:
+ pass
+ def test_codec_encodeable(self):
+ eq = self.assertEqual
+ # Make sure us-ascii = no Unicode conversion
+ c = Charset('us-ascii')
+ eq(c.header_encode('Hello World!'), 'Hello World!')
+ # Test 8-bit idempotency with us-ascii
+ s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
+ self.assertRaises(UnicodeError, c.header_encode, s)
+ c = Charset('utf-8')
+ eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
+ def test_body_encode(self):
+ eq = self.assertEqual
+ # Try a charset with QP body encoding
+ c = Charset('iso-8859-1')
+ eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
+ # Try a charset with Base64 body encoding
+ c = Charset('utf-8')
+ eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
+ # Try a charset with None body encoding
+ c = Charset('us-ascii')
+ eq('hello world', c.body_encode('hello world'))
+ # Try the convert argument, where input codec != output codec
+ c = Charset('euc-jp')
+ # With apologies to Tokio Kikuchi ;)
+## try:
+## eq('\x1b$B5FCO;~IW\x1b(B',
+## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
+## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
+## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
+## except LookupError:
+## # We probably don't have the Japanese codecs installed
+## pass
+ # Testing SF bug #625509, which we have to fake, since there are no
+ # built-in encodings where the header encoding is QP but the body
+ # encoding is not.
+ from email import charset as CharsetModule
+ CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
+ c = Charset('fake')
+ eq('hello world', c.body_encode('hello world'))
+ def test_unicode_charset_name(self):
+ charset = Charset('us-ascii')
+ self.assertEqual(str(charset), 'us-ascii')
+ self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
+# Test multilingual MIME headers.
+class TestHeader(TestEmailBase):
+ def test_simple(self):
+ eq = self.ndiffAssertEqual
+ h = Header('Hello World!')
+ eq(h.encode(), 'Hello World!')
+ h.append(' Goodbye World!')
+ eq(h.encode(), 'Hello World! Goodbye World!')
+ def test_simple_surprise(self):
+ eq = self.ndiffAssertEqual
+ h = Header('Hello World!')
+ eq(h.encode(), 'Hello World!')
+ h.append('Goodbye World!')
+ eq(h.encode(), 'Hello World! Goodbye World!')
+ def test_header_needs_no_decoding(self):
+ h = 'no decoding needed'
+ self.assertEqual(decode_header(h), [(h, None)])
+ def test_long(self):
+ h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
+ maxlinelen=76)
+ for l in h.encode(splitchars=' ').split('\n '):
+ self.assertTrue(len(l) <= 76)
+ def test_multilingual(self):
+ eq = self.ndiffAssertEqual
+ g = Charset("iso-8859-1")
+ cz = Charset("iso-8859-2")
+ utf8 = Charset("utf-8")
+ g_head = (b'Die Mieter treten hier ein werden mit einem '
+ b'Foerderband komfortabel den Korridor entlang, '
+ b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
+ b'gegen die rotierenden Klingen bef\xf6rdert. ')
+ cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
+ b'd\xf9vtipu.. ')
+ utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
+ '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
+ '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
+ '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
+ '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
+ 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
+ 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
+ '\u3044\u307e\u3059\u3002')
+ h = Header(g_head, g)
+ h.append(cz_head, cz)
+ h.append(utf8_head, utf8)
+ enc = h.encode(maxlinelen=76)
+ eq(enc, """\
+ =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
+ =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
+ =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
+ =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
+ =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
+ =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
+ =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
+ =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
+ =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
+ =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
+ decoded = decode_header(enc)
+ eq(len(decoded), 3)
+ eq(decoded[0], (g_head, 'iso-8859-1'))
+ eq(decoded[1], (cz_head, 'iso-8859-2'))
+ eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
+ ustr = str(h)
+ eq(ustr,
+ (b'Die Mieter treten hier ein werden mit einem Foerderband '
+ b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
+ b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
+ b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
+ b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
+ b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
+ b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
+ b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
+ b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
+ b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
+ b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
+ b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
+ b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
+ b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
+ b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
+ b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
+ ).decode('utf-8'))
+ # Test make_header()
+ newh = make_header(decode_header(enc))
+ eq(newh, h)
+ def test_empty_header_encode(self):
+ h = Header()
+ self.assertEqual(h.encode(), '')
+ def test_header_ctor_default_args(self):
+ eq = self.ndiffAssertEqual
+ h = Header()
+ eq(h, '')
+ h.append('foo', Charset('iso-8859-1'))
+ eq(h, 'foo')
+ def test_explicit_maxlinelen(self):
+ eq = self.ndiffAssertEqual
+ hstr = ('A very long line that must get split to something other '
+ 'than at the 76th character boundary to test the non-default '
+ 'behavior')
+ h = Header(hstr)
+ eq(h.encode(), '''\
+A very long line that must get split to something other than at the 76th
+ character boundary to test the non-default behavior''')
+ eq(str(h), hstr)
+ h = Header(hstr, header_name='Subject')
+ eq(h.encode(), '''\
+A very long line that must get split to something other than at the
+ 76th character boundary to test the non-default behavior''')
+ eq(str(h), hstr)
+ h = Header(hstr, maxlinelen=1024, header_name='Subject')
+ eq(h.encode(), hstr)
+ eq(str(h), hstr)
+ def test_quopri_splittable(self):
+ eq = self.ndiffAssertEqual
+ h = Header(charset='iso-8859-1', maxlinelen=20)
+ x = 'xxxx ' * 20
+ h.append(x)
+ s = h.encode()
+ eq(s, """\
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_x?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?x_?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?xx?=
+ =?iso-8859-1?q?_?=""")
+ eq(x, str(make_header(decode_header(s))))
+ h = Header(charset='iso-8859-1', maxlinelen=40)
+ h.append('xxxx ' * 20)
+ s = h.encode()
+ eq(s, """\
+ =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
+ =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
+ =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
+ =?iso-8859-1?q?_xxxx_xxxx_?=""")
+ eq(x, str(make_header(decode_header(s))))
+ def test_base64_splittable(self):
+ eq = self.ndiffAssertEqual
+ h = Header(charset='koi8-r', maxlinelen=20)
+ x = 'xxxx ' * 20
+ h.append(x)
+ s = h.encode()
+ eq(s, """\
+ =?koi8-r?b?eCB4?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?IHh4?=
+ =?koi8-r?b?eHgg?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?eCB4?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?IHh4?=
+ =?koi8-r?b?eHgg?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?eCB4?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?IHh4?=
+ =?koi8-r?b?eHgg?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?eCB4?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?IHh4?=
+ =?koi8-r?b?eHgg?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?eCB4?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?IHh4?=
+ =?koi8-r?b?eHgg?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?eCB4?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?IHh4?=
+ =?koi8-r?b?eHgg?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?eCB4?=
+ =?koi8-r?b?eHh4?=
+ =?koi8-r?b?IA==?=""")
+ eq(x, str(make_header(decode_header(s))))
+ h = Header(charset='koi8-r', maxlinelen=40)
+ h.append(x)
+ s = h.encode()
+ eq(s, """\
+ =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
+ =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
+ =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
+ =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
+ =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
+ eq(x, str(make_header(decode_header(s))))
+ def test_us_ascii_header(self):
+ eq = self.assertEqual
+ s = 'hello'
+ x = decode_header(s)
+ eq(x, [('hello', None)])
+ h = make_header(x)
+ eq(s, h.encode())
+ def test_string_charset(self):
+ eq = self.assertEqual
+ h = Header()
+ h.append('hello', 'iso-8859-1')
+ eq(h, 'hello')
+## def test_unicode_error(self):
+## raises = self.assertRaises
+## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
+## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
+## h = Header()
+## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
+## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
+## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
+ def test_utf8_shortest(self):
+ eq = self.assertEqual
+ h = Header('p\xf6stal', 'utf-8')
+ eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
+ h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
+ eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
+ def test_bad_8bit_header(self):
+ raises = self.assertRaises
+ eq = self.assertEqual
+ x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
+ raises(UnicodeError, Header, x)
+ h = Header()
+ raises(UnicodeError, h.append, x)
+ e = x.decode('utf-8', 'replace')
+ eq(str(Header(x, errors='replace')), e)
+ h.append(x, errors='replace')
+ eq(str(h), e)
+ def test_escaped_8bit_header(self):
+ x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
+ e = x.decode('ascii', 'surrogateescape')
+ h = Header(e, charset=email.charset.UNKNOWN8BIT)
+ self.assertEqual(str(h),
+ 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
+ self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
+ def test_header_handles_binary_unknown8bit(self):
+ x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
+ h = Header(x, charset=email.charset.UNKNOWN8BIT)
+ self.assertEqual(str(h),
+ 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
+ self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
+ def test_make_header_handles_binary_unknown8bit(self):
+ x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
+ h = Header(x, charset=email.charset.UNKNOWN8BIT)
+ h2 = email.header.make_header(email.header.decode_header(h))
+ self.assertEqual(str(h2),
+ 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
+ self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
+ def test_modify_returned_list_does_not_change_header(self):
+ h = Header('test')
+ chunks = email.header.decode_header(h)
+ chunks.append(('ascii', 'test2'))
+ self.assertEqual(str(h), 'test')
+ def test_encoded_adjacent_nonencoded(self):
+ eq = self.assertEqual
+ h = Header()
+ h.append('hello', 'iso-8859-1')
+ h.append('world')
+ s = h.encode()
+ eq(s, '=?iso-8859-1?q?hello?= world')
+ h = make_header(decode_header(s))
+ eq(h.encode(), s)
+ def test_whitespace_eater(self):
+ eq = self.assertEqual
+ s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
+ parts = decode_header(s)
+ eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
+ hdr = make_header(parts)
+ eq(hdr.encode(),
+ 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
+ def test_broken_base64_header(self):
+ raises = self.assertRaises
+ s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
+ raises(errors.HeaderParseError, decode_header, s)
+ def test_shift_jis_charset(self):
+ h = Header('文', charset='shift_jis')
+ self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
+ def test_flatten_header_with_no_value(self):
+ # Issue 11401 (regression from email 4.x) Note that the space after
+ # the header doesn't reflect the input, but this is also the way
+ # email 4.x behaved. At some point it would be nice to fix that.
+ msg = email.message_from_string("EmptyHeader:")
+ self.assertEqual(str(msg), "EmptyHeader: \n\n")
+ def test_encode_preserves_leading_ws_on_value(self):
+ msg = Message()
+ msg['SomeHeader'] = ' value with leading ws'
+ self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
+# Test RFC 2231 header parameters (en/de)coding
+class TestRFC2231(TestEmailBase):
+ def test_get_param(self):
+ eq = self.assertEqual
+ msg = self._msgobj('msg_29.txt')
+ eq(msg.get_param('title'),
+ ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
+ eq(msg.get_param('title', unquote=False),
+ ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
+ def test_set_param(self):
+ eq = self.ndiffAssertEqual
+ msg = Message()
+ msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
+ charset='us-ascii')
+ eq(msg.get_param('title'),
+ ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
+ msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
+ charset='us-ascii', language='en')
+ eq(msg.get_param('title'),
+ ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
+ msg = self._msgobj('msg_01.txt')
+ msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
+ charset='us-ascii', language='en')
+ eq(msg.as_string(maxheaderlen=78), """\
+Return-Path: <bbb@zzz.org>
+Delivered-To: bbb@zzz.org
+Received: by mail.zzz.org (Postfix, from userid 889)
+\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
+From: bbb@ddd.com (John X. Doe)
+To: bbb@zzz.org
+Subject: This is a test message
+Date: Fri, 4 May 2001 14:05:44 -0400
+Content-Type: text/plain; charset=us-ascii;
+ title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
+Do you like this message?
+ def test_set_param_requote(self):
+ msg = Message()
+ msg.set_param('title', 'foo')
+ self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
+ msg.set_param('title', 'bar', requote=False)
+ self.assertEqual(msg['content-type'], 'text/plain; title=bar')
+ # tspecial is still quoted.
+ msg.set_param('title', "(bar)bell", requote=False)
+ self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
+ def test_del_param(self):
+ eq = self.ndiffAssertEqual
+ msg = self._msgobj('msg_01.txt')
+ msg.set_param('foo', 'bar', charset='us-ascii', language='en')
+ msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
+ charset='us-ascii', language='en')
+ msg.del_param('foo', header='Content-Type')
+ eq(msg.as_string(maxheaderlen=78), """\
+Return-Path: <bbb@zzz.org>
+Delivered-To: bbb@zzz.org
+Received: by mail.zzz.org (Postfix, from userid 889)
+\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
+From: bbb@ddd.com (John X. Doe)
+To: bbb@zzz.org
+Subject: This is a test message
+Date: Fri, 4 May 2001 14:05:44 -0400
+Content-Type: text/plain; charset="us-ascii";
+ title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
+Do you like this message?
+ def test_rfc2231_get_content_charset(self):
+ eq = self.assertEqual
+ msg = self._msgobj('msg_32.txt')
+ eq(msg.get_content_charset(), 'us-ascii')
+ def test_rfc2231_parse_rfc_quoting(self):
+ m = textwrap.dedent('''\
+ Content-Disposition: inline;
+ \tfilename*0*=''This%20is%20even%20more%20;
+ \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
+ \tfilename*2="is it not.pdf"
+ ''')
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(),
+ 'This is even more ***fun*** is it not.pdf')
+ self.assertEqual(m, msg.as_string())
+ def test_rfc2231_parse_extra_quoting(self):
+ m = textwrap.dedent('''\
+ Content-Disposition: inline;
+ \tfilename*0*="''This%20is%20even%20more%20";
+ \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
+ \tfilename*2="is it not.pdf"
+ ''')
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(),
+ 'This is even more ***fun*** is it not.pdf')
+ self.assertEqual(m, msg.as_string())
+ def test_rfc2231_no_language_or_charset(self):
+ m = '''\
+Content-Transfer-Encoding: 8bit
+Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
+Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
+ msg = email.message_from_string(m)
+ param = msg.get_param('NAME')
+ self.assertFalse(isinstance(param, tuple))
+ self.assertEqual(
+ param,
+ def test_rfc2231_no_language_or_charset_in_filename(self):
+ m = '''\
+Content-Disposition: inline;
+\tfilename*2="is it not.pdf"
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(),
+ 'This is even more ***fun*** is it not.pdf')
+ def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
+ m = '''\
+Content-Disposition: inline;
+\tfilename*2="is it not.pdf"
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(),
+ 'This is even more ***fun*** is it not.pdf')
+ def test_rfc2231_partly_encoded(self):
+ m = '''\
+Content-Disposition: inline;
+\tfilename*2="is it not.pdf"
+ msg = email.message_from_string(m)
+ self.assertEqual(
+ msg.get_filename(),
+ 'This%20is%20even%20more%20***fun*** is it not.pdf')
+ def test_rfc2231_partly_nonencoded(self):
+ m = '''\
+Content-Disposition: inline;
+\tfilename*2="is it not.pdf"
+ msg = email.message_from_string(m)
+ self.assertEqual(
+ msg.get_filename(),
+ 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
+ def test_rfc2231_no_language_or_charset_in_boundary(self):
+ m = '''\
+Content-Type: multipart/alternative;
+\tboundary*2="is it not.pdf"
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_boundary(),
+ 'This is even more ***fun*** is it not.pdf')
+ def test_rfc2231_no_language_or_charset_in_charset(self):
+ # This is a nonsensical charset value, but tests the code anyway
+ m = '''\
+Content-Type: text/plain;
+\tcharset*2="is it not.pdf"
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_content_charset(),
+ 'this is even more ***fun*** is it not.pdf')
+ def test_rfc2231_bad_encoding_in_filename(self):
+ m = '''\
+Content-Disposition: inline;
+\tfilename*2="is it not.pdf"
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(),
+ 'This is even more ***fun*** is it not.pdf')
+ def test_rfc2231_bad_encoding_in_charset(self):
+ m = """\
+Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
+ msg = email.message_from_string(m)
+ # This should return None because non-ascii characters in the charset
+ # are not allowed.
+ self.assertEqual(msg.get_content_charset(), None)
+ def test_rfc2231_bad_character_in_charset(self):
+ m = """\
+Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
+ msg = email.message_from_string(m)
+ # This should return None because non-ascii characters in the charset
+ # are not allowed.
+ self.assertEqual(msg.get_content_charset(), None)
+ def test_rfc2231_bad_character_in_filename(self):
+ m = '''\
+Content-Disposition: inline;
+\tfilename*2*="is it not.pdf%E2"
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(),
+ 'This is even more ***fun*** is it not.pdf\ufffd')
+ def test_rfc2231_unknown_encoding(self):
+ m = """\
+Content-Transfer-Encoding: 8bit
+Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
+ msg = email.message_from_string(m)
+ self.assertEqual(msg.get_filename(), 'myfile.txt')
+ def test_rfc2231_single_tick_in_filename_extended(self):
+ eq = self.assertEqual
+ m = """\
+Content-Type: application/x-foo;
+\tname*0*=\"Frank's\"; name*1*=\" Document\"
+ msg = email.message_from_string(m)
+ charset, language, s = msg.get_param('name')
+ eq(charset, None)
+ eq(language, None)
+ eq(s, "Frank's Document")
+ def test_rfc2231_single_tick_in_filename(self):
+ m = """\
+Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
+ msg = email.message_from_string(m)
+ param = msg.get_param('name')
+ self.assertFalse(isinstance(param, tuple))
+ self.assertEqual(param, "Frank's Document")
+ def test_rfc2231_tick_attack_extended(self):
+ eq = self.assertEqual
+ m = """\
+Content-Type: application/x-foo;
+\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
+ msg = email.message_from_string(m)
+ charset, language, s = msg.get_param('name')
+ eq(charset, 'us-ascii')
+ eq(language, 'en-us')
+ eq(s, "Frank's Document")
+ def test_rfc2231_tick_attack(self):
+ m = """\
+Content-Type: application/x-foo;
+\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
+ msg = email.message_from_string(m)
+ param = msg.get_param('name')
+ self.assertFalse(isinstance(param, tuple))
+ self.assertEqual(param, "us-ascii'en-us'Frank's Document")
+ def test_rfc2231_no_extended_values(self):
+ eq = self.assertEqual
+ m = """\
+Content-Type: application/x-foo; name=\"Frank's Document\"
+ msg = email.message_from_string(m)
+ eq(msg.get_param('name'), "Frank's Document")
+ def test_rfc2231_encoded_then_unencoded_segments(self):
+ eq = self.assertEqual
+ m = """\
+Content-Type: application/x-foo;
+\tname*1=\" Document\";
+\tname*2*=\" For You\"
+ msg = email.message_from_string(m)
+ charset, language, s = msg.get_param('name')
+ eq(charset, 'us-ascii')
+ eq(language, 'en-us')
+ eq(s, 'My Document For You')
+ def test_rfc2231_unencoded_then_encoded_segments(self):
+ eq = self.assertEqual
+ m = """\
+Content-Type: application/x-foo;
+\tname*1*=\" Document\";
+\tname*2*=\" For You\"
+ msg = email.message_from_string(m)
+ charset, language, s = msg.get_param('name')
+ eq(charset, 'us-ascii')
+ eq(language, 'en-us')
+ eq(s, 'My Document For You')
+# Tests to ensure that signed parts of an email are completely preserved, as
+# required by RFC1847 section 2.1. Note that these are incomplete, because the
+# email package does not currently always preserve the body. See issue 1670765.
+class TestSigned(TestEmailBase):
+ def _msg_and_obj(self, filename):
+ with openfile(findfile(filename)) as fp:
+ original = fp.read()
+ msg = email.message_from_string(original)
+ return original, msg
+ def _signed_parts_eq(self, original, result):
+ # Extract the first mime part of each message
+ import re
+ repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
+ inpart = repart.search(original).group(2)
+ outpart = repart.search(result).group(2)
+ self.assertEqual(outpart, inpart)
+ def test_long_headers_as_string(self):
+ original, msg = self._msg_and_obj('msg_45.txt')
+ result = msg.as_string()
+ self._signed_parts_eq(original, result)
+ def test_long_headers_as_string_maxheaderlen(self):
+ original, msg = self._msg_and_obj('msg_45.txt')
+ result = msg.as_string(maxheaderlen=60)
+ self._signed_parts_eq(original, result)
+ def test_long_headers_flatten(self):
+ original, msg = self._msg_and_obj('msg_45.txt')
+ fp = StringIO()
+ Generator(fp).flatten(msg)
+ result = fp.getvalue()
+ self._signed_parts_eq(original, result)
+def _testclasses():
+ mod = sys.modules[__name__]
+ return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
+def suite():
+ suite = unittest.TestSuite()
+ for testclass in _testclasses():
+ suite.addTest(unittest.makeSuite(testclass))
+ return suite
+def test_main():
+ for testclass in _testclasses():
+ run_unittest(testclass)
+if __name__ == '__main__':
+ unittest.main(defaultTest='suite')
diff --git a/lib-python/3/email/test/test_email_codecs.py b/lib-python/3/email/test/test_email_codecs.py
new file mode 100644
index 0000000000..ca85f5731e
--- /dev/null
+++ b/lib-python/3/email/test/test_email_codecs.py
@@ -0,0 +1,93 @@
+# Copyright (C) 2002-2006 Python Software Foundation
+# Contact: email-sig@python.org
+# email package unit tests for (optional) Asian codecs
+import unittest
+from test.support import run_unittest
+from email.test.test_email import TestEmailBase
+from email.charset import Charset
+from email.header import Header, decode_header
+from email.message import Message
+# We're compatible with Python 2.3, but it doesn't have the built-in Asian
+# codecs, so we have to skip all these tests.
+ str(b'foo', 'euc-jp')
+except LookupError:
+ raise unittest.SkipTest
+class TestEmailAsianCodecs(TestEmailBase):
+ def test_japanese_codecs(self):
+ eq = self.ndiffAssertEqual
+ jcode = "euc-jp"
+ gcode = "iso-8859-1"
+ j = Charset(jcode)
+ g = Charset(gcode)
+ h = Header("Hello World!")
+ jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc'
+ b'\xa5\xeb\xa5\xc9\xa1\xaa', jcode)
+ ghello = str(b'Gr\xfc\xdf Gott!', gcode)
+ h.append(jhello, j)
+ h.append(ghello, g)
+ # BAW: This used to -- and maybe should -- fold the two iso-8859-1
+ # chunks into a single encoded word. However it doesn't violate the
+ # standard to have them as two encoded chunks and maybe it's
+ # reasonable <wink> for each .append() call to result in a separate
+ # encoded word.
+ eq(h.encode(), """\
+Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=
+ =?iso-8859-1?q?Gr=FC=DF_Gott!?=""")
+ eq(decode_header(h.encode()),
+ [(b'Hello World!', None),
+ (b'\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
+ (b'Gr\xfc\xdf Gott!', gcode)])
+ subject_bytes = (b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5'
+ b'\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2'
+ b'\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3'
+ b'\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9')
+ subject = str(subject_bytes, jcode)
+ h = Header(subject, j, header_name="Subject")
+ # test a very long header
+ enc = h.encode()
+ # TK: splitting point may differ by codec design and/or Header encoding
+ eq(enc , """\
+ =?iso-2022-jp?b?GyRCMnE8VCROPjVHJyRyQlQkQyRGJCQkXiQ5GyhC?=""")
+ # TK: full decode comparison
+ eq(str(h).encode(jcode), subject_bytes)
+ def test_payload_encoding_utf8(self):
+ jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc'
+ b'\xa5\xeb\xa5\xc9\xa1\xaa', 'euc-jp')
+ msg = Message()
+ msg.set_payload(jhello, 'utf-8')
+ ustr = msg.get_payload(decode=True).decode(msg.get_content_charset())
+ self.assertEqual(jhello, ustr)
+ def test_payload_encoding(self):
+ jcode = 'euc-jp'
+ jhello = str(b'\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc'
+ b'\xa5\xeb\xa5\xc9\xa1\xaa', jcode)
+ msg = Message()
+ msg.set_payload(jhello, jcode)
+ ustr = msg.get_payload(decode=True).decode(msg.get_content_charset())
+ self.assertEqual(jhello, ustr)
+def suite():
+ suite = unittest.TestSuite()
+ suite.addTest(unittest.makeSuite(TestEmailAsianCodecs))
+ return suite
+def test_main():
+ run_unittest(TestEmailAsianCodecs)
+if __name__ == '__main__':
+ unittest.main(defaultTest='suite')
diff --git a/lib-python/3/email/test/test_email_torture.py b/lib-python/3/email/test/test_email_torture.py
new file mode 100644
index 0000000000..544b1bbb39
--- /dev/null
+++ b/lib-python/3/email/test/test_email_torture.py
@@ -0,0 +1,136 @@
+# Copyright (C) 2002-2004 Python Software Foundation
+# A torture test of the email package. This should not be run as part of the
+# standard Python test suite since it requires several meg of email messages
+# collected in the wild. These source messages are not checked into the
+# Python distro, but are available as part of the standalone email package at
+# http://sf.net/projects/mimelib
+import sys
+import os
+import unittest
+from io import StringIO
+from types import ListType
+from email.test.test_email import TestEmailBase
+from test.support import TestSkipped, run_unittest
+import email
+from email import __file__ as testfile
+from email.iterators import _structure
+def openfile(filename):
+ from os.path import join, dirname, abspath
+ path = abspath(join(dirname(testfile), os.pardir, 'moredata', filename))
+ return open(path, 'r')
+# Prevent this test from running in the Python distro
+ openfile('crispin-torture.txt')
+except IOError:
+ raise TestSkipped
+class TortureBase(TestEmailBase):
+ def _msgobj(self, filename):
+ fp = openfile(filename)
+ try:
+ msg = email.message_from_file(fp)
+ finally:
+ fp.close()
+ return msg
+class TestCrispinTorture(TortureBase):
+ # Mark Crispin's torture test from the SquirrelMail project
+ def test_mondo_message(self):
+ eq = self.assertEqual
+ neq = self.ndiffAssertEqual
+ msg = self._msgobj('crispin-torture.txt')
+ payload = msg.get_payload()
+ eq(type(payload), ListType)
+ eq(len(payload), 12)
+ eq(msg.preamble, None)
+ eq(msg.epilogue, '\n')
+ # Probably the best way to verify the message is parsed correctly is to
+ # dump its structure and compare it against the known structure.
+ fp = StringIO()
+ _structure(msg, fp=fp)
+ neq(fp.getvalue(), """\
+ text/plain
+ message/rfc822
+ multipart/alternative
+ text/plain
+ multipart/mixed
+ text/richtext
+ application/andrew-inset
+ message/rfc822
+ audio/basic
+ audio/basic
+ image/pbm
+ message/rfc822
+ multipart/mixed
+ multipart/mixed
+ text/plain
+ audio/x-sun
+ multipart/mixed
+ image/gif
+ image/gif
+ application/x-be2
+ application/atomicmail
+ audio/x-sun
+ message/rfc822
+ multipart/mixed
+ text/plain
+ image/pgm
+ text/plain
+ message/rfc822
+ multipart/mixed
+ text/plain
+ image/pbm
+ message/rfc822
+ application/postscript
+ image/gif
+ message/rfc822
+ multipart/mixed
+ audio/basic
+ audio/basic
+ message/rfc822
+ multipart/mixed
+ application/postscript
+ text/plain
+ message/rfc822
+ multipart/mixed
+ text/plain
+ multipart/parallel
+ image/gif
+ audio/basic
+ application/atomicmail
+ message/rfc822
+ audio/x-sun
+def _testclasses():
+ mod = sys.modules[__name__]
+ return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
+def suite():
+ suite = unittest.TestSuite()
+ for testclass in _testclasses():
+ suite.addTest(unittest.makeSuite(testclass))
+ return suite
+def test_main():
+ for testclass in _testclasses():
+ run_unittest(testclass)
+if __name__ == '__main__':
+ unittest.main(defaultTest='suite')
diff --git a/lib-python/3/email/utils.py b/lib-python/3/email/utils.py
new file mode 100644
index 0000000000..ac4da3705f
--- /dev/null
+++ b/lib-python/3/email/utils.py
@@ -0,0 +1,306 @@
+# Copyright (C) 2001-2010 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+"""Miscellaneous utilities."""
+__all__ = [
+ 'collapse_rfc2231_value',
+ 'decode_params',
+ 'decode_rfc2231',
+ 'encode_rfc2231',
+ 'formataddr',
+ 'formatdate',
+ 'getaddresses',
+ 'make_msgid',
+ 'mktime_tz',
+ 'parseaddr',
+ 'parsedate',
+ 'parsedate_tz',
+ 'unquote',
+ ]
+import os
+import re
+import time
+import base64
+import random
+import socket
+import urllib.parse
+import warnings
+from io import StringIO
+from email._parseaddr import quote
+from email._parseaddr import AddressList as _AddressList
+from email._parseaddr import mktime_tz
+# We need wormarounds for bugs in these methods in older Pythons (see below)
+from email._parseaddr import parsedate as _parsedate
+from email._parseaddr import parsedate_tz as _parsedate_tz
+from quopri import decodestring as _qdecode
+# Intrapackage imports
+from email.encoders import _bencode, _qencode
+CRLF = '\r\n'
+TICK = "'"
+specialsre = re.compile(r'[][\\()<>@,:;".]')
+escapesre = re.compile(r'[][\\()"]')
+# Helpers
+def formataddr(pair):
+ """The inverse of parseaddr(), this takes a 2-tuple of the form
+ (realname, email_address) and returns the string value suitable
+ for an RFC 2822 From, To or Cc header.
+ If the first element of pair is false, then the second element is
+ returned unmodified.
+ """
+ name, address = pair
+ if name:
+ quotes = ''
+ if specialsre.search(name):
+ quotes = '"'
+ name = escapesre.sub(r'\\\g<0>', name)
+ return '%s%s%s <%s>' % (quotes, name, quotes, address)
+ return address
+def getaddresses(fieldvalues):
+ """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
+ all = COMMASPACE.join(fieldvalues)
+ a = _AddressList(all)
+ return a.addresslist
+ecre = re.compile(r'''
+ =\? # literal =?
+ (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
+ \? # literal ?
+ (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
+ \? # literal ?
+ (?P<atom>.*?) # non-greedy up to the next ?= is the atom
+ \?= # literal ?=
+ ''', re.VERBOSE | re.IGNORECASE)
+def formatdate(timeval=None, localtime=False, usegmt=False):
+ """Returns a date string as specified by RFC 2822, e.g.:
+ Fri, 09 Nov 2001 01:08:47 -0000
+ Optional timeval if given is a floating point time value as accepted by
+ gmtime() and localtime(), otherwise the current time is used.
+ Optional localtime is a flag that when True, interprets timeval, and
+ returns a date relative to the local timezone instead of UTC, properly
+ taking daylight savings time into account.
+ Optional argument usegmt means that the timezone is written out as
+ an ascii string, not numeric one (so "GMT" instead of "+0000"). This
+ is needed for HTTP, and is only used when localtime==False.
+ """
+ # Note: we cannot use strftime() because that honors the locale and RFC
+ # 2822 requires that day and month names be the English abbreviations.
+ if timeval is None:
+ timeval = time.time()
+ if localtime:
+ now = time.localtime(timeval)
+ # Calculate timezone offset, based on whether the local zone has
+ # daylight savings time, and whether DST is in effect.
+ if time.daylight and now[-1]:
+ offset = time.altzone
+ else:
+ offset = time.timezone
+ hours, minutes = divmod(abs(offset), 3600)
+ # Remember offset is in seconds west of UTC, but the timezone is in
+ # minutes east of UTC, so the signs differ.
+ if offset > 0:
+ sign = '-'
+ else:
+ sign = '+'
+ zone = '%s%02d%02d' % (sign, hours, minutes // 60)
+ else:
+ now = time.gmtime(timeval)
+ # Timezone offset is always -0000
+ if usegmt:
+ zone = 'GMT'
+ else:
+ zone = '-0000'
+ return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
+ ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
+ now[2],
+ ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
+ now[0], now[3], now[4], now[5],
+ zone)
+def make_msgid(idstring=None, domain=None):
+ """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
+ <20020201195627.33539.96671@nightshade.la.mastaler.com>
+ Optional idstring if given is a string used to strengthen the
+ uniqueness of the message id. Optional domain if given provides the
+ portion of the message id after the '@'. It defaults to the locally
+ defined hostname.
+ """
+ timeval = time.time()
+ utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
+ pid = os.getpid()
+ randint = random.randrange(100000)
+ if idstring is None:
+ idstring = ''
+ else:
+ idstring = '.' + idstring
+ if domain is None:
+ domain = socket.getfqdn()
+ msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain)
+ return msgid
+# These functions are in the standalone mimelib version only because they've
+# subsequently been fixed in the latest Python versions. We use this to worm
+# around broken older Pythons.
+def parsedate(data):
+ if not data:
+ return None
+ return _parsedate(data)
+def parsedate_tz(data):
+ if not data:
+ return None
+ return _parsedate_tz(data)
+def parseaddr(addr):
+ addrs = _AddressList(addr).addresslist
+ if not addrs:
+ return '', ''
+ return addrs[0]
+# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
+def unquote(str):
+ """Remove quotes from a string."""
+ if len(str) > 1:
+ if str.startswith('"') and str.endswith('"'):
+ return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
+ if str.startswith('<') and str.endswith('>'):
+ return str[1:-1]
+ return str
+# RFC2231-related functions - parameter encoding and decoding
+def decode_rfc2231(s):
+ """Decode string according to RFC 2231"""
+ parts = s.split(TICK, 2)
+ if len(parts) <= 2:
+ return None, None, s
+ return parts
+def encode_rfc2231(s, charset=None, language=None):
+ """Encode string according to RFC 2231.
+ If neither charset nor language is given, then s is returned as-is. If
+ charset is given but not language, the string is encoded using the empty
+ string for language.
+ """
+ s = urllib.parse.quote(s, safe='', encoding=charset or 'ascii')
+ if charset is None and language is None:
+ return s
+ if language is None:
+ language = ''
+ return "%s'%s'%s" % (charset, language, s)
+rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$',
+ re.ASCII)
+def decode_params(params):
+ """Decode parameters list according to RFC 2231.
+ params is a sequence of 2-tuples containing (param name, string value).
+ """
+ # Copy params so we don't mess with the original
+ params = params[:]
+ new_params = []
+ # Map parameter's name to a list of continuations. The values are a
+ # 3-tuple of the continuation number, the string value, and a flag
+ # specifying whether a particular segment is %-encoded.
+ rfc2231_params = {}
+ name, value = params.pop(0)
+ new_params.append((name, value))
+ while params:
+ name, value = params.pop(0)
+ if name.endswith('*'):
+ encoded = True
+ else:
+ encoded = False
+ value = unquote(value)
+ mo = rfc2231_continuation.match(name)
+ if mo:
+ name, num = mo.group('name', 'num')
+ if num is not None:
+ num = int(num)
+ rfc2231_params.setdefault(name, []).append((num, value, encoded))
+ else:
+ new_params.append((name, '"%s"' % quote(value)))
+ if rfc2231_params:
+ for name, continuations in rfc2231_params.items():
+ value = []
+ extended = False
+ # Sort by number
+ continuations.sort()
+ # And now append all values in numerical order, converting
+ # %-encodings for the encoded segments. If any of the
+ # continuation names ends in a *, then the entire string, after
+ # decoding segments and concatenating, must have the charset and
+ # language specifiers at the beginning of the string.
+ for num, s, encoded in continuations:
+ if encoded:
+ # Decode as "latin-1", so the characters in s directly
+ # represent the percent-encoded octet values.
+ # collapse_rfc2231_value treats this as an octet sequence.
+ s = urllib.parse.unquote(s, encoding="latin-1")
+ extended = True
+ value.append(s)
+ value = quote(EMPTYSTRING.join(value))
+ if extended:
+ charset, language, value = decode_rfc2231(value)
+ new_params.append((name, (charset, language, '"%s"' % value)))
+ else:
+ new_params.append((name, '"%s"' % value))
+ return new_params
+def collapse_rfc2231_value(value, errors='replace',
+ fallback_charset='us-ascii'):
+ if not isinstance(value, tuple) or len(value) != 3:
+ return unquote(value)
+ # While value comes to us as a unicode string, we need it to be a bytes
+ # object. We do not want bytes() normal utf-8 decoder, we want a straight
+ # interpretation of the string as character bytes.
+ charset, language, text = value
+ rawbytes = bytes(text, 'raw-unicode-escape')
+ try:
+ return str(rawbytes, charset, errors)
+ except LookupError:
+ # charset is not a known codec.
+ return unquote(text)