diff options
author | Robin H. Johnson <robbat2@gentoo.org> | 2015-08-08 13:49:04 -0700 |
---|---|---|
committer | Robin H. Johnson <robbat2@gentoo.org> | 2015-08-08 17:38:18 -0700 |
commit | 56bd759df1d0c750a065b8c845e93d5dfa6b549d (patch) | |
tree | 3f91093cdb475e565ae857f1c5a7fd339e2d781e /dev-python/beautifulsoup | |
download | gentoo-56bd759df1d0c750a065b8c845e93d5dfa6b549d.tar.gz gentoo-56bd759df1d0c750a065b8c845e93d5dfa6b549d.tar.bz2 gentoo-56bd759df1d0c750a065b8c845e93d5dfa6b549d.zip |
proj/gentoo: Initial commit
This commit represents a new era for Gentoo:
Storing the gentoo-x86 tree in Git, as converted from CVS.
This commit is the start of the NEW history.
Any historical data is intended to be grafted onto this point.
Creation process:
1. Take final CVS checkout snapshot
2. Remove ALL ChangeLog* files
3. Transform all Manifests to thin
4. Remove empty Manifests
5. Convert all stale $Header$/$Id$ CVS keywords to non-expanded Git $Id$
5.1. Do not touch files with -kb/-ko keyword flags.
Signed-off-by: Robin H. Johnson <robbat2@gentoo.org>
X-Thanks: Alec Warner <antarus@gentoo.org> - did the GSoC 2006 migration tests
X-Thanks: Robin H. Johnson <robbat2@gentoo.org> - infra guy, herding this project
X-Thanks: Nguyen Thai Ngoc Duy <pclouds@gentoo.org> - Former Gentoo developer, wrote Git features for the migration
X-Thanks: Brian Harring <ferringb@gentoo.org> - wrote much python to improve cvs2svn
X-Thanks: Rich Freeman <rich0@gentoo.org> - validation scripts
X-Thanks: Patrick Lauer <patrick@gentoo.org> - Gentoo dev, running new 2014 work in migration
X-Thanks: Michał Górny <mgorny@gentoo.org> - scripts, QA, nagging
X-Thanks: All of other Gentoo developers - many ideas and lots of paint on the bikeshed
Diffstat (limited to 'dev-python/beautifulsoup')
12 files changed, 1388 insertions, 0 deletions
diff --git a/dev-python/beautifulsoup/Manifest b/dev-python/beautifulsoup/Manifest new file mode 100644 index 000000000000..0512d101c611 --- /dev/null +++ b/dev-python/beautifulsoup/Manifest @@ -0,0 +1,6 @@ +DIST BeautifulSoup-3.1.0.1.tar.gz 71460 SHA256 820a80f473240d9d30047f36c959d530a699a732500662dd8b03e1d3ccad12a8 SHA512 812969faf454a58d849921836ed07ec9a950f34fb31e29e118cdf1a75a533370e430f417402b5a5016d23b2d3a1c44a1cf5fde5b3bfd1bc98c50036edd51c0d6 WHIRLPOOL a199585817dcabcc6327c3836a66128605ebf92a6663b5c660125061a797485a504d300791bcd43e0e94e4f08ca59c01f65f42481da07b1240350cbfc6ea6b0c +DIST BeautifulSoup-3.2.1.tar.gz 31224 SHA256 f5ba85e907e7dfd78e44e4000b3eaef3a650aefc57831e8a645702db2e5b50db SHA512 365b7b045a2069cf437877543577bc0aa99256a6dc4c9743670b46bfceab5494a06628012d6eccecfe99c25d5c9e0c65814964b47026f15ba1a538444cfb7789 WHIRLPOOL c2f84b29421d0153fb1fecc87d63e00a61182e03bc0683132babca5d6c94143b4875a60a19124a36e4e6e78ce80bff9e1e81b37335700efc14084da933307e26 +DIST beautifulsoup4-4.1.3.tar.gz 131292 SHA256 a295b93b30e1126f3fd64edc106f6939349280dde2ba47cef1e5a92dad9319bf SHA512 79ed4f65bcb5599a0d278d5462a67e80f532cdddcb753e0703b28347d84512165ee0bffd09795a501f3b55e6e4869354f04efdceae9de9093a4d85381fee660e WHIRLPOOL 53713ca72b5a409b3755e28d69de65b7994fb387d21ba9a1827f3e4dda2355f318660f69a706825ecb3852447823ffb8bae2cee6bacc093e6ad73347852f29c6 +DIST beautifulsoup4-4.2.0.tar.gz 138400 SHA256 9565ce6f6d28808007ab6248c37d59a49934e358276da5d3ac211ea1ccfd0782 SHA512 50c7514f2a1de49df6300d292e1f00e746b341e9c70dbfc3508663870ddf92f5c8e0ea981dc7742f888428845e90bce2cce8b158e8548039be4c2c06ee76db97 WHIRLPOOL 95db573c6c291d27187af9403694bcff27add7ad378b105df96f988074480b7a1491710681a93f1396dfa33cfa8c493637564ab7fc228e9e92659c385aa769a2 +DIST beautifulsoup4-4.3.2.tar.gz 143356 SHA256 a2b29bd048ca2fe54a046b29770964738872a9747003a371344a93eedf7ad58e SHA512 3d55e553a1a2109378e2961a0feb23a8f038f4ff7bd335a21a26d588d39761f6735888e6ca2d001587e9a6a3b8259a1a09b92b310aa17e9b76fd85601a1ca592 WHIRLPOOL 3570395aa0fae03edc1de10b1c41e82efefa4753c306d2619032b124536a72b6db696c7f85c6a7c32b18c0506ec33d4c8a2fb73660264ad5afd92aba76f32884 +DIST beautifulsoup4-4.4.0.tar.gz 151500 SHA256 fad91da88f69438b9ba939ab1b2cabaa31b1d914f1cccb4bb157a993ed2917f6 SHA512 812abb09099779ecb9c41ce55963ad42973eef00193e28e59dd907715fd6bc2b470b07f80bbe2838b790be252935eb81357a8cfb59808865d2fb62479476eb2f WHIRLPOOL 93b4bd82f1b7596a198d3a8ad5d21cfad51595884b2eb03c36e827e1b3d9f7a8400ed4415c9f0ffe9d014eac66c93cd52521a433ca914cac1a390a6487f153ba diff --git a/dev-python/beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild b/dev-python/beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild new file mode 100644 index 000000000000..ca73d6e7e807 --- /dev/null +++ b/dev-python/beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild @@ -0,0 +1,41 @@ +# Copyright 1999-2015 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 +# $Id$ + +EAPI="5" +# A few tests fail with python3.3/3.4 :( +PYTHON_COMPAT=( python{3_3,3_4} pypy3 ) + +inherit distutils-r1 eutils + +MY_PN="BeautifulSoup" +MY_P="${MY_PN}-${PV}" + +DESCRIPTION="HTML/XML parser for quick-turnaround applications like screen-scraping" +HOMEPAGE="http://www.crummy.com/software/BeautifulSoup/ http://pypi.python.org/pypi/BeautifulSoup" +SRC_URI="http://www.crummy.com/software/${MY_PN}/download/${MY_P}.tar.gz" + +LICENSE="BSD" +SLOT="python-3" +KEYWORDS="alpha amd64 arm hppa ia64 ppc ppc64 s390 sh sparc x86 ~amd64-fbsd ~x86-fbsd ~x86-freebsd ~amd64-linux ~x86-linux ~x86-macos ~x86-solaris" +IUSE="" + +DEPEND="" +RDEPEND="!dev-python/beautifulsoup:0" + +S="${WORKDIR}/${MY_P}" + +PATCHES=( + "${FILESDIR}/${P}-python-3.patch" + "${FILESDIR}/${P}-disable-tests.patch" +) + +python_test() { + "${PYTHON}" BeautifulSoupTests.py || die "Tests fail with ${EPYTHON}" +} + +python_install_all() { + distutils-r1_python_install_all + # Delete useless files. + rm -r "${ED%/}/usr/bin" || die +} diff --git a/dev-python/beautifulsoup/beautifulsoup-3.2.1-r1.ebuild b/dev-python/beautifulsoup/beautifulsoup-3.2.1-r1.ebuild new file mode 100644 index 000000000000..1ed0b2621f6f --- /dev/null +++ b/dev-python/beautifulsoup/beautifulsoup-3.2.1-r1.ebuild @@ -0,0 +1,29 @@ +# Copyright 1999-2015 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 +# $Id$ + +EAPI=5 +PYTHON_COMPAT=( python2_7 pypy ) + +inherit distutils-r1 + +MY_PN="BeautifulSoup" +MY_P="${MY_PN}-${PV}" + +DESCRIPTION="HTML/XML parser for quick-turnaround applications like screen-scraping" +HOMEPAGE="http://www.crummy.com/software/BeautifulSoup/ http://pypi.python.org/pypi/BeautifulSoup" +SRC_URI="http://www.crummy.com/software/${MY_PN}/download/3.x/${MY_P}.tar.gz" + +LICENSE="BSD" +SLOT="python-2" +KEYWORDS="alpha amd64 arm ~arm64 hppa ia64 ~mips ppc ppc64 s390 sh sparc x86 ~amd64-fbsd ~x86-fbsd ~x86-freebsd ~amd64-linux ~x86-linux ~x64-macos ~x86-macos ~sparc-solaris ~x86-solaris" +IUSE="" + +DEPEND="" +RDEPEND="!dev-python/beautifulsoup:0" + +S="${WORKDIR}/${MY_P}" + +python_test() { + "${PYTHON}" BeautifulSoupTests.py || die "Testing failed with ${EPYTHON}" +} diff --git a/dev-python/beautifulsoup/beautifulsoup-4.1.3-r1.ebuild b/dev-python/beautifulsoup/beautifulsoup-4.1.3-r1.ebuild new file mode 100644 index 000000000000..836de51e1efe --- /dev/null +++ b/dev-python/beautifulsoup/beautifulsoup-4.1.3-r1.ebuild @@ -0,0 +1,46 @@ +# Copyright 1999-2015 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 +# $Id$ + +EAPI=5 + +PYTHON_COMPAT=( python{2_7,3_3} pypy ) + +inherit distutils-r1 + +MY_PN="${PN}4" +MY_P="${MY_PN}-${PV}" +DESCRIPTION="Provides pythonic idioms for iterating, searching, and modifying an HTML/XML parse tree" +HOMEPAGE="http://www.crummy.com/software/BeautifulSoup/ + http://pypi.python.org/pypi/beautifulsoup4" +SRC_URI="mirror://pypi/${MY_P:0:1}/${MY_PN}/${MY_P}.tar.gz" + +LICENSE="MIT" +SLOT="4" +KEYWORDS="~alpha amd64 ~arm ~hppa ~ia64 ~mips ~ppc ~ppc64 ~s390 ~sh ~sparc x86 ~amd64-fbsd ~x86-fbsd ~x86-freebsd ~amd64-linux ~x86-linux ~x64-macos ~x86-macos ~sparc-solaris ~x86-solaris" +# new html5 awaits keywording of html5lib in Bug 471002 +IUSE="doc test" +# pending Bug 471002; html5? ( dev-python/html5lib[$(python_gen_usedep 'python{2_6,2_7}' pypy pypy2_0)] ) + +RDEPEND="" +DEPEND="${RDEPEND} + doc? ( dev-python/sphinx[${PYTHON_USEDEP}] ) + test? ( dev-python/nose[${PYTHON_USEDEP}] + dev-python/lxml[${PYTHON_USEDEP}] )" + +S="${WORKDIR}/${MY_P}" + +python_compile_all() { + if use doc; then + emake -C doc html + fi +} + +python_test() { + nosetests -w "${BUILD_DIR}"/lib || die "Tests fail with ${EPYTHON}" +} + +python_install_all() { + use doc && local HTML_DOCS=doc/build/html/. + distutils-r1_python_install_all +} diff --git a/dev-python/beautifulsoup/beautifulsoup-4.1.3.ebuild b/dev-python/beautifulsoup/beautifulsoup-4.1.3.ebuild new file mode 100644 index 000000000000..4fbe5b6023e4 --- /dev/null +++ b/dev-python/beautifulsoup/beautifulsoup-4.1.3.ebuild @@ -0,0 +1,54 @@ +# Copyright 1999-2012 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 +# $Id$ + +EAPI="4" + +PYTHON_DEPEND="*:2.6" +SUPPORT_PYTHON_ABIS="1" +RESTRICT_PYTHON_ABIS="2.5" +PYTHON_TESTS_RESTRICTED_ABIS="*-pypy-*" +DISTUTILS_SRC_TEST="nosetests" + +inherit distutils + +MY_PN="${PN}4" +MY_P="${MY_PN}-${PV}" +DESCRIPTION="Provides pythonic idioms for iterating, searching, and modifying an HTML/XML parse tree" +HOMEPAGE="http://www.crummy.com/software/BeautifulSoup/ + http://pypi.python.org/pypi/beautifulsoup4" +SRC_URI="mirror://pypi/${MY_P:0:1}/${MY_PN}/${MY_P}.tar.gz" + +LICENSE="MIT" +SLOT="4" +KEYWORDS="alpha amd64 arm hppa ia64 ~mips ppc ppc64 s390 sh sparc x86 ~amd64-fbsd ~x86-fbsd ~x86-freebsd ~amd64-linux ~x86-linux ~x64-macos ~x86-macos ~sparc-solaris ~x86-solaris" +IUSE="doc test" + +DEPEND="doc? ( dev-python/sphinx ) + test? ( dev-python/lxml )" +RDEPEND="" + +PYTHON_MODNAME="bs4" +S="${WORKDIR}/${MY_P}" + +src_compile() { + distutils_src_compile + if use doc; then + emake -C doc html + fi +} + +src_test() { + testing() { + cd "build-${PYTHON_ABI}/lib" + nosetests --verbosity="${PYTHON_TEST_VERBOSITY}" + } + python_execute_function testing +} + +src_install() { + distutils_src_install + if use doc; then + dohtml -r doc/build/html/* + fi +} diff --git a/dev-python/beautifulsoup/beautifulsoup-4.2.0.ebuild b/dev-python/beautifulsoup/beautifulsoup-4.2.0.ebuild new file mode 100644 index 000000000000..7a457223fc6b --- /dev/null +++ b/dev-python/beautifulsoup/beautifulsoup-4.2.0.ebuild @@ -0,0 +1,48 @@ +# Copyright 1999-2015 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 +# $Id$ + +EAPI=5 +PYTHON_COMPAT=( python{2_7,3_3} pypy ) + +inherit distutils-r1 + +MY_PN=${PN}4 +MY_P=${MY_PN}-${PV} + +DESCRIPTION="Provides pythonic idioms for iterating, searching, and modifying an HTML/XML parse tree" +HOMEPAGE="http://www.crummy.com/software/BeautifulSoup/ + http://pypi.python.org/pypi/beautifulsoup4" +SRC_URI="mirror://pypi/${MY_P:0:1}/${MY_PN}/${MY_P}.tar.gz" + +LICENSE="MIT" +SLOT="4" +KEYWORDS="alpha amd64 arm hppa ia64 ~mips ppc ~ppc64 ~s390 ~sh ~sparc ~x86 ~amd64-fbsd ~x86-fbsd ~x86-freebsd ~amd64-linux ~x86-linux ~x64-macos ~x86-macos ~sparc-solaris ~x86-solaris" +# new html5 awaits keywording of html5lib in Bug 471002 +IUSE="doc test" +# pending Bug 471002; html5? ( dev-python/html5lib[$(python_gen_usedep 'python{2_6,2_7}' pypy pypy2_0)] ) + +RDEPEND="" +DEPEND="${RDEPEND} + doc? ( dev-python/sphinx[${PYTHON_USEDEP}] ) + test? ( dev-python/nose[${PYTHON_USEDEP}] + dev-python/lxml[$(python_gen_usedep 'python{2_6,2_7}' 'python3*')] )" + +S=${WORKDIR}/${MY_P} + +PATCHES=( "${FILESDIR}"/bfs-${PV}-no-lxml.patch ) + +python_compile_all() { + if use doc; then + emake -C doc html + fi +} + +python_test() { + nosetests -w "${BUILD_DIR}"/lib || die "Tests fail with ${EPYTHON}" +} + +python_install_all() { + use doc && local HTML_DOCS=doc/build/html/. + distutils-r1_python_install_all +} diff --git a/dev-python/beautifulsoup/beautifulsoup-4.3.2.ebuild b/dev-python/beautifulsoup/beautifulsoup-4.3.2.ebuild new file mode 100644 index 000000000000..de1a7e41f75d --- /dev/null +++ b/dev-python/beautifulsoup/beautifulsoup-4.3.2.ebuild @@ -0,0 +1,46 @@ +# Copyright 1999-2015 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 +# $Id$ + +EAPI=5 +PYTHON_COMPAT=( python{2_7,3_3,3_4} pypy pypy3 ) + +inherit distutils-r1 + +MY_PN=${PN}4 +MY_P=${MY_PN}-${PV} + +DESCRIPTION="Provides pythonic idioms for iterating, searching, and modifying an HTML/XML parse tree" +HOMEPAGE="http://www.crummy.com/software/BeautifulSoup/ + http://pypi.python.org/pypi/beautifulsoup4" +SRC_URI="mirror://pypi/${MY_P:0:1}/${MY_PN}/${MY_P}.tar.gz" + +LICENSE="MIT" +SLOT="4" +KEYWORDS="alpha amd64 arm hppa ia64 ~mips ppc ppc64 ~s390 ~sh sparc x86 ~amd64-fbsd ~x86-fbsd ~x86-freebsd ~amd64-linux ~x86-linux ~x64-macos ~x86-macos ~sparc-solaris ~x86-solaris" +# new html5 awaits keywording of html5lib in Bug 471002 +IUSE="doc test" +# pending Bug 471002; html5? ( dev-python/html5lib[$(python_gen_usedep 'python{2_6,2_7}' pypy pypy2_0)] ) + +RDEPEND="" +DEPEND="${RDEPEND} + doc? ( dev-python/sphinx[${PYTHON_USEDEP}] ) + test? ( dev-python/nose[${PYTHON_USEDEP}] + dev-python/lxml[$(python_gen_usedep 'python{2_6,2_7}' 'python3*')] )" + +S=${WORKDIR}/${MY_P} + +python_compile_all() { + if use doc; then + emake -C doc html + fi +} + +python_test() { + nosetests -w "${BUILD_DIR}"/lib || die "Tests fail with ${EPYTHON}" +} + +python_install_all() { + use doc && local HTML_DOCS=doc/build/html/. + distutils-r1_python_install_all +} diff --git a/dev-python/beautifulsoup/beautifulsoup-4.4.0.ebuild b/dev-python/beautifulsoup/beautifulsoup-4.4.0.ebuild new file mode 100644 index 000000000000..e140465d13ac --- /dev/null +++ b/dev-python/beautifulsoup/beautifulsoup-4.4.0.ebuild @@ -0,0 +1,47 @@ +# Copyright 1999-2015 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 +# $Id$ + +EAPI=5 +PYTHON_COMPAT=( python{2_7,3_3,3_4} pypy pypy3 ) + +inherit distutils-r1 + +MY_PN=${PN}4 +MY_P=${MY_PN}-${PV} + +DESCRIPTION="Provides pythonic idioms for iterating, searching, and modifying an HTML/XML parse tree" +HOMEPAGE="https://bugs.launchpad.net/beautifulsoup/ + http://pypi.python.org/pypi/beautifulsoup4" +SRC_URI="mirror://pypi/${MY_P:0:1}/${MY_PN}/${MY_P}.tar.gz" + +LICENSE="MIT" +SLOT="4" +KEYWORDS="~alpha ~amd64 ~arm ~hppa ~ia64 ~mips ~ppc ~ppc64 ~s390 ~sh ~sparc ~x86 ~amd64-fbsd ~x86-fbsd ~x86-freebsd ~amd64-linux ~x86-linux ~x64-macos ~x86-macos ~sparc-solaris ~x86-solaris" + +IUSE="doc test" + +# html5lib is optional however hard coding since its use is actively discouraged in the devmanual +RDEPEND="$(python_gen_cond_dep 'dev-python/html5lib[${PYTHON_USEDEP}]' python2_7 pypy) + $(python_gen_cond_dep 'dev-python/lxml[${PYTHON_USEDEP}]' python2_7 'python3*')" +DEPEND="doc? ( dev-python/sphinx[${PYTHON_USEDEP}] ) + test? ( dev-python/nose[${PYTHON_USEDEP}] + !dev-python/chardet[${PYTHON_USEDEP}] )" +# See https://bugs.launchpad.net/beautifulsoup/+bug/1471359 to explain need for blocker + +S=${WORKDIR}/${MY_P} + +python_compile_all() { + if use doc; then + emake -C doc html + fi +} + +python_test() { + nosetests -w "${BUILD_DIR}"/lib || die "Tests fail with ${EPYTHON}" +} + +python_install_all() { + use doc && local HTML_DOCS=doc/build/html/. + distutils-r1_python_install_all +} diff --git a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-disable-tests.patch b/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-disable-tests.patch new file mode 100644 index 000000000000..c97cd76ee314 --- /dev/null +++ b/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-disable-tests.patch @@ -0,0 +1,39 @@ +--- lib/BeautifulSoupTests.py.orig 2015-07-21 08:39:33.077000000 +0000 ++++ lib/BeautifulSoupTests.py 2015-07-21 08:41:19.285000000 +0000 +@@ -538,13 +538,13 @@ + text = "<td nowrap>foo</td>" + self.assertSoupEquals(text, text) + +- def testCData(self): +- xml = "<root>foo<![CDATA[foobar]]>bar</root>" +- self.assertSoupEquals(xml, xml) +- r = re.compile("foo.*bar") +- soup = BeautifulSoup(xml) +- self.assertEquals(soup.find(text=r).string, "foobar") +- self.assertEquals(soup.find(text=r).__class__, CData) ++ #def testCData(self): ++ # xml = "<root>foo<![CDATA[foobar]]>bar</root>" ++ # self.assertSoupEquals(xml, xml) ++ # r = re.compile("foo.*bar") ++ # soup = BeautifulSoup(xml) ++ # self.assertEquals(soup.find(text=r).string, "foobar") ++ # self.assertEquals(soup.find(text=r).__class__, CData) + + def testComments(self): + xml = "foo<!--foobar-->baz" +@@ -607,11 +607,11 @@ + def testWhitespaceInDeclaration(self): + self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>') + +- def testJunkInDeclaration(self): +- self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a') ++ #def testJunkInDeclaration(self): ++ # self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a') + +- def testIncompleteDeclaration(self): +- self.assertSoupEquals('a<!b <p>c') ++ #def testIncompleteDeclaration(self): ++ # self.assertSoupEquals('a<!b <p>c') + + def testEntityReplacement(self): + self.assertSoupEquals('<b>hello there</b>') diff --git a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch b/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch new file mode 100644 index 000000000000..adcbb43dd078 --- /dev/null +++ b/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch @@ -0,0 +1,949 @@ +--- BeautifulSoup.py ++++ BeautifulSoup.py +@@ -76,7 +76,7 @@ + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + + """ +-from __future__ import generators ++ + + __author__ = "Leonard Richardson (leonardr@segfault.org)" + __version__ = "3.1.0.1" +@@ -84,12 +84,12 @@ + __license__ = "New-style BSD" + + import codecs +-import markupbase ++import _markupbase + import types + import re +-from HTMLParser import HTMLParser, HTMLParseError ++from html.parser import HTMLParser, HTMLParseError + try: +- from htmlentitydefs import name2codepoint ++ from html.entities import name2codepoint + except ImportError: + name2codepoint = {} + try: +@@ -98,18 +98,18 @@ + from sets import Set as set + + #These hacks make Beautiful Soup able to parse XML with namespaces +-markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match ++_markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + + DEFAULT_OUTPUT_ENCODING = "utf-8" + + # First, the classes that represent markup elements. + +-def sob(unicode, encoding): ++def sob(str, encoding): + """Returns either the given Unicode string or its encoding.""" + if encoding is None: +- return unicode ++ return str + else: +- return unicode.encode(encoding) ++ return str.encode(encoding) + + class PageElement: + """Contains the navigational information for some part of the page +@@ -178,8 +178,8 @@ + return lastChild + + def insert(self, position, newChild): +- if (isinstance(newChild, basestring) +- or isinstance(newChild, unicode)) \ ++ if (isinstance(newChild, str) ++ or isinstance(newChild, str)) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + +@@ -334,7 +334,7 @@ + g = generator() + while True: + try: +- i = g.next() ++ i = g.__next__() + except StopIteration: + break + if i: +@@ -385,22 +385,22 @@ + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" +- if isinstance(s, unicode): ++ if isinstance(s, str): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: +- s = unicode(s) ++ s = str(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: +- s = unicode(s) ++ s = str(s) + return s + +-class NavigableString(unicode, PageElement): ++class NavigableString(str, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. +@@ -410,12 +410,12 @@ + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ +- if isinstance(value, unicode): +- return unicode.__new__(cls, value) +- return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) ++ if isinstance(value, str): ++ return str.__new__(cls, value) ++ return str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): +- return (unicode(self),) ++ return (str(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards +@@ -424,7 +424,7 @@ + if attr == 'string': + return self + else: +- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) ++ raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)) + + def encode(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.decode().encode(encoding) +@@ -435,23 +435,23 @@ + class CData(NavigableString): + + def decodeGivenEventualEncoding(self, eventualEncoding): +- return u'<![CDATA[' + self + u']]>' ++ return '<![CDATA[' + self + ']]>' + + class ProcessingInstruction(NavigableString): + + def decodeGivenEventualEncoding(self, eventualEncoding): + output = self +- if u'%SOUP-ENCODING%' in output: ++ if '%SOUP-ENCODING%' in output: + output = self.substituteEncoding(output, eventualEncoding) +- return u'<?' + output + u'?>' ++ return '<?' + output + '?>' + + class Comment(NavigableString): + def decodeGivenEventualEncoding(self, eventualEncoding): +- return u'<!--' + self + u'-->' ++ return '<!--' + self + '-->' + + class Declaration(NavigableString): + def decodeGivenEventualEncoding(self, eventualEncoding): +- return u'<!' + self + u'>' ++ return '<!' + self + '>' + + class Tag(PageElement): + +@@ -460,7 +460,7 @@ + def _invert(h): + "Cheap function to invert a hash." + i = {} +- for k,v in h.items(): ++ for k,v in list(h.items()): + i[v] = k + return i + +@@ -479,23 +479,23 @@ + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: +- return unichr(name2codepoint[x]) ++ return chr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: +- return u'&%s;' % x ++ return '&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': +- return unichr(int(x[2:], 16)) ++ return chr(int(x[2:], 16)) + else: +- return unichr(int(x[1:])) ++ return chr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: +- return u'&%s;' % x ++ return '&%s;' % x + else: +- return u'&%s;' % x ++ return '&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): +@@ -524,7 +524,7 @@ + return kval + return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, val)) +- self.attrs = map(convert, self.attrs) ++ self.attrs = list(map(convert, self.attrs)) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or +@@ -533,7 +533,7 @@ + return self._getAttrMap().get(key, default) + + def has_key(self, key): +- return self._getAttrMap().has_key(key) ++ return key in self._getAttrMap() + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, +@@ -551,7 +551,7 @@ + def __contains__(self, x): + return x in self.contents + +- def __nonzero__(self): ++ def __bool__(self): + "A tag is non-None even if it has no contents." + return True + +@@ -577,14 +577,14 @@ + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() +- if self.attrMap.has_key(key): ++ if key in self.attrMap: + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" +- return apply(self.findAll, args, kwargs) ++ return self.findAll(*args, **kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) +@@ -592,7 +592,7 @@ + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) +- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) ++ raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__, tag)) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, +@@ -868,7 +868,7 @@ + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup +- callFunctionWithTagData = callable(self.name) \ ++ callFunctionWithTagData = hasattr(self.name, '__call__') \ + and not isinstance(markupName, Tag) + + if (not self.name) \ +@@ -880,7 +880,7 @@ + else: + match = True + markupAttrMap = None +- for attr, matchAgainst in self.attrs.items(): ++ for attr, matchAgainst in list(self.attrs.items()): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs +@@ -921,16 +921,16 @@ + if self._matches(markup, self.text): + found = markup + else: +- raise Exception, "I don't know how to match against a %s" \ +- % markup.__class__ ++ raise Exception("I don't know how to match against a %s" \ ++ % markup.__class__) + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False +- if matchAgainst == True and type(matchAgainst) == types.BooleanType: ++ if matchAgainst == True and type(matchAgainst) == bool: + result = markup != None +- elif callable(matchAgainst): ++ elif hasattr(matchAgainst, '__call__'): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all +@@ -938,7 +938,7 @@ + if isinstance(markup, Tag): + markup = markup.name + if markup is not None and not isString(markup): +- markup = unicode(markup) ++ markup = str(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. +@@ -947,10 +947,10 @@ + and (markup is not None or not isString(matchAgainst))): + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): +- result = markup.has_key(matchAgainst) ++ result = matchAgainst in markup + elif matchAgainst and isString(markup): +- if isinstance(markup, unicode): +- matchAgainst = unicode(matchAgainst) ++ if isinstance(markup, str): ++ matchAgainst = str(matchAgainst) + else: + matchAgainst = str(matchAgainst) + +@@ -971,13 +971,13 @@ + """Convenience method that works with all 2.x versions of Python + to determine whether or not something is listlike.""" + return ((hasattr(l, '__iter__') and not isString(l)) +- or (type(l) in (types.ListType, types.TupleType))) ++ or (type(l) in (list, tuple))) + + def isString(s): + """Convenience method that works with all 2.x versions of Python + to determine whether or not something is stringlike.""" + try: +- return isinstance(s, unicode) or isinstance(s, basestring) ++ return isinstance(s, str) or isinstance(s, str) + except NameError: + return isinstance(s, str) + +@@ -989,7 +989,7 @@ + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. +- for k,v in portion.items(): ++ for k,v in list(portion.items()): + built[k] = v + elif isList(portion) and not isString(portion): + #It's a list. Map each item to the default. +@@ -1034,7 +1034,7 @@ + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": +- text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" ++ text = "xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): +@@ -1044,7 +1044,7 @@ + def handle_charref(self, ref): + "Handle character references as data." + if self.soup.convertEntities: +- data = unichr(int(ref)) ++ data = chr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) +@@ -1056,7 +1056,7 @@ + data = None + if self.soup.convertHTMLEntities: + try: +- data = unichr(name2codepoint[ref]) ++ data = chr(name2codepoint[ref]) + except KeyError: + pass + +@@ -1147,7 +1147,7 @@ + lambda x: '<!' + x.group(1) + '>') + ] + +- ROOT_TAG_NAME = u'[document]' ++ ROOT_TAG_NAME = '[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" +@@ -1236,14 +1236,14 @@ + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup +- if isinstance(markup, unicode): ++ if isinstance(markup, str): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) +- markup = dammit.unicode ++ markup = dammit.str + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: +@@ -1269,8 +1269,8 @@ + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" +- return self.SELF_CLOSING_TAGS.has_key(name) \ +- or self.instanceSelfClosingTags.has_key(name) ++ return name in self.SELF_CLOSING_TAGS \ ++ or name in self.instanceSelfClosingTags + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) +@@ -1305,7 +1305,7 @@ + + def endData(self, containerClass=NavigableString): + if self.currentData: +- currentData = u''.join(self.currentData) ++ currentData = ''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): +@@ -1368,7 +1368,7 @@ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None +- isResetNesting = self.RESET_NESTING_TAGS.has_key(name) ++ isResetNesting = name in self.RESET_NESTING_TAGS + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): +@@ -1381,7 +1381,7 @@ + if (nestingResetTriggers != None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers == None and isResetNesting +- and self.RESET_NESTING_TAGS.has_key(p.name)): ++ and p.name in self.RESET_NESTING_TAGS): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag +@@ -1399,7 +1399,7 @@ + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name +- attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) ++ attrs = ''.join([' %s="%s"' % (x_y[0], x_y[1]) for x_y in attrs]) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() +@@ -1493,7 +1493,7 @@ + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): +- if not kwargs.has_key('smartQuotesTo'): ++ if 'smartQuotesTo' not in kwargs: + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) +@@ -1677,7 +1677,7 @@ + parent._getAttrMap() + if (isinstance(tag, Tag) and len(tag.contents) == 1 and + isinstance(tag.contents[0], NavigableString) and +- not parent.attrMap.has_key(tag.name)): ++ tag.name not in parent.attrMap): + parent[tag.name] = tag.contents[0] + BeautifulStoneSoup.popTag(self) + +@@ -1751,9 +1751,9 @@ + self._detectEncoding(markup, isHTML) + self.smartQuotesTo = smartQuotesTo + self.triedEncodings = [] +- if markup == '' or isinstance(markup, unicode): ++ if markup == '' or isinstance(markup, str): + self.originalEncoding = None +- self.unicode = unicode(markup) ++ self.str = str(markup) + return + + u = None +@@ -1766,7 +1766,7 @@ + if u: break + + # If no luck and we have auto-detection library, try that: +- if not u and chardet and not isinstance(self.markup, unicode): ++ if not u and chardet and not isinstance(self.markup, str): + u = self._convertFrom(chardet.detect(self.markup)['encoding']) + + # As a last resort, try utf-8 and windows-1252: +@@ -1775,7 +1775,7 @@ + u = self._convertFrom(proposed_encoding) + if u: break + +- self.unicode = u ++ self.str = u + if not u: self.originalEncoding = None + + def _subMSChar(self, match): +@@ -1783,7 +1783,7 @@ + entity.""" + orig = match.group(1) + sub = self.MS_CHARS.get(orig) +- if type(sub) == types.TupleType: ++ if type(sub) == tuple: + if self.smartQuotesTo == 'xml': + sub = '&#x'.encode() + sub[1].encode() + ';'.encode() + else: +@@ -1804,7 +1804,7 @@ + if self.smartQuotesTo and proposed.lower() in("windows-1252", + "iso-8859-1", + "iso-8859-2"): +- smart_quotes_re = "([\x80-\x9f])" ++ smart_quotes_re = b"([\x80-\x9f])" + smart_quotes_compiled = re.compile(smart_quotes_re) + markup = smart_quotes_compiled.sub(self._subMSChar, markup) + +@@ -1813,7 +1813,7 @@ + u = self._toUnicode(markup, proposed) + self.markup = u + self.originalEncoding = proposed +- except Exception, e: ++ except Exception as e: + # print "That didn't work!" + # print e + return None +@@ -1842,7 +1842,7 @@ + elif data[:4] == '\xff\xfe\x00\x00': + encoding = 'utf-32le' + data = data[4:] +- newdata = unicode(data, encoding) ++ newdata = str(data, encoding) + return newdata + + def _detectEncoding(self, xml_data, isHTML=False): +@@ -1855,41 +1855,41 @@ + elif xml_data[:4] == '\x00\x3c\x00\x3f': + # UTF-16BE + sniffed_xml_encoding = 'utf-16be' +- xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') ++ xml_data = str(xml_data, 'utf-16be').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ + and (xml_data[2:4] != '\x00\x00'): + # UTF-16BE with BOM + sniffed_xml_encoding = 'utf-16be' +- xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') ++ xml_data = str(xml_data[2:], 'utf-16be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x3f\x00': + # UTF-16LE + sniffed_xml_encoding = 'utf-16le' +- xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') ++ xml_data = str(xml_data, 'utf-16le').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ + (xml_data[2:4] != '\x00\x00'): + # UTF-16LE with BOM + sniffed_xml_encoding = 'utf-16le' +- xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') ++ xml_data = str(xml_data[2:], 'utf-16le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\x00\x3c': + # UTF-32BE + sniffed_xml_encoding = 'utf-32be' +- xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') ++ xml_data = str(xml_data, 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x00\x00': + # UTF-32LE + sniffed_xml_encoding = 'utf-32le' +- xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') ++ xml_data = str(xml_data, 'utf-32le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\xfe\xff': + # UTF-32BE with BOM + sniffed_xml_encoding = 'utf-32be' +- xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') ++ xml_data = str(xml_data[4:], 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\xff\xfe\x00\x00': + # UTF-32LE with BOM + sniffed_xml_encoding = 'utf-32le' +- xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') ++ xml_data = str(xml_data[4:], 'utf-32le').encode('utf-8') + elif xml_data[:3] == '\xef\xbb\xbf': + # UTF-8 with BOM + sniffed_xml_encoding = 'utf-8' +- xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') ++ xml_data = str(xml_data[3:], 'utf-8').encode('utf-8') + else: + sniffed_xml_encoding = 'ascii' + pass +@@ -1954,41 +1954,41 @@ + 250,251,252,253,254,255) + import string + c.EBCDIC_TO_ASCII_MAP = string.maketrans( \ +- ''.join(map(chr, range(256))), ''.join(map(chr, emap))) ++ ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap))) + return s.translate(c.EBCDIC_TO_ASCII_MAP) + +- MS_CHARS = { '\x80' : ('euro', '20AC'), +- '\x81' : ' ', +- '\x82' : ('sbquo', '201A'), +- '\x83' : ('fnof', '192'), +- '\x84' : ('bdquo', '201E'), +- '\x85' : ('hellip', '2026'), +- '\x86' : ('dagger', '2020'), +- '\x87' : ('Dagger', '2021'), +- '\x88' : ('circ', '2C6'), +- '\x89' : ('permil', '2030'), +- '\x8A' : ('Scaron', '160'), +- '\x8B' : ('lsaquo', '2039'), +- '\x8C' : ('OElig', '152'), +- '\x8D' : '?', +- '\x8E' : ('#x17D', '17D'), +- '\x8F' : '?', +- '\x90' : '?', +- '\x91' : ('lsquo', '2018'), +- '\x92' : ('rsquo', '2019'), +- '\x93' : ('ldquo', '201C'), +- '\x94' : ('rdquo', '201D'), +- '\x95' : ('bull', '2022'), +- '\x96' : ('ndash', '2013'), +- '\x97' : ('mdash', '2014'), +- '\x98' : ('tilde', '2DC'), +- '\x99' : ('trade', '2122'), +- '\x9a' : ('scaron', '161'), +- '\x9b' : ('rsaquo', '203A'), +- '\x9c' : ('oelig', '153'), +- '\x9d' : '?', +- '\x9e' : ('#x17E', '17E'), +- '\x9f' : ('Yuml', ''),} ++ MS_CHARS = { b'\x80' : ('euro', '20AC'), ++ b'\x81' : ' ', ++ b'\x82' : ('sbquo', '201A'), ++ b'\x83' : ('fnof', '192'), ++ b'\x84' : ('bdquo', '201E'), ++ b'\x85' : ('hellip', '2026'), ++ b'\x86' : ('dagger', '2020'), ++ b'\x87' : ('Dagger', '2021'), ++ b'\x88' : ('circ', '2C6'), ++ b'\x89' : ('permil', '2030'), ++ b'\x8A' : ('Scaron', '160'), ++ b'\x8B' : ('lsaquo', '2039'), ++ b'\x8C' : ('OElig', '152'), ++ b'\x8D' : '?', ++ b'\x8E' : ('#x17D', '17D'), ++ b'\x8F' : '?', ++ b'\x90' : '?', ++ b'\x91' : ('lsquo', '2018'), ++ b'\x92' : ('rsquo', '2019'), ++ b'\x93' : ('ldquo', '201C'), ++ b'\x94' : ('rdquo', '201D'), ++ b'\x95' : ('bull', '2022'), ++ b'\x96' : ('ndash', '2013'), ++ b'\x97' : ('mdash', '2014'), ++ b'\x98' : ('tilde', '2DC'), ++ b'\x99' : ('trade', '2122'), ++ b'\x9a' : ('scaron', '161'), ++ b'\x9b' : ('rsaquo', '203A'), ++ b'\x9c' : ('oelig', '153'), ++ b'\x9d' : '?', ++ b'\x9e' : ('#x17E', '17E'), ++ b'\x9f' : ('Yuml', ''),} + + ####################################################################### + +@@ -1997,4 +1997,4 @@ + if __name__ == '__main__': + import sys + soup = BeautifulSoup(sys.stdin) +- print soup.prettify() ++ print(soup.prettify()) +--- BeautifulSoupTests.py ++++ BeautifulSoupTests.py +@@ -82,7 +82,7 @@ + def testFindAllText(self): + soup = BeautifulSoup("<html>\xbb</html>") + self.assertEqual(soup.findAll(text=re.compile('.*')), +- [u'\xbb']) ++ ['\xbb']) + + def testFindAllByRE(self): + import re +@@ -215,7 +215,7 @@ + soup = BeautifulSoup(self.x, parseOnlyThese=strainer) + self.assertEquals(len(soup), 10) + +- strainer = SoupStrainer(text=lambda(x):x[8]=='3') ++ strainer = SoupStrainer(text=lambda x:x[8]=='3') + soup = BeautifulSoup(self.x, parseOnlyThese=strainer) + self.assertEquals(len(soup), 3) + +@@ -256,7 +256,7 @@ + self.assertEqual(copied.decode(), self.soup.decode()) + + def testUnicodePickle(self): +- import cPickle as pickle ++ import pickle as pickle + html = "<b>" + chr(0xc3) + "</b>" + soup = BeautifulSoup(html) + dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL) +@@ -586,23 +586,23 @@ + self.assertEquals(soup.decode(), "<<sacré bleu!>>") + + soup = BeautifulStoneSoup(text, convertEntities=htmlEnt) +- self.assertEquals(soup.decode(), u"<<sacr\xe9 bleu!>>") ++ self.assertEquals(soup.decode(), "<<sacr\xe9 bleu!>>") + + # Make sure the "XML", "HTML", and "XHTML" settings work. + text = "<™'" + soup = BeautifulStoneSoup(text, convertEntities=xmlEnt) +- self.assertEquals(soup.decode(), u"<™'") ++ self.assertEquals(soup.decode(), "<™'") + + soup = BeautifulStoneSoup(text, convertEntities=htmlEnt) +- self.assertEquals(soup.decode(), u"<\u2122'") ++ self.assertEquals(soup.decode(), "<\u2122'") + + soup = BeautifulStoneSoup(text, convertEntities=xhtmlEnt) +- self.assertEquals(soup.decode(), u"<\u2122'") ++ self.assertEquals(soup.decode(), "<\u2122'") + + def testNonBreakingSpaces(self): + soup = BeautifulSoup("<a> </a>", + convertEntities=BeautifulStoneSoup.HTML_ENTITIES) +- self.assertEquals(soup.decode(), u"<a>\xa0\xa0</a>") ++ self.assertEquals(soup.decode(), "<a>\xa0\xa0</a>") + + def testWhitespaceInDeclaration(self): + self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>') +@@ -617,27 +617,27 @@ + self.assertSoupEquals('<b>hello there</b>') + + def testEntitiesInAttributeValues(self): +- self.assertSoupEquals('<x t="xñ">', '<x t="x\xc3\xb1"></x>', ++ self.assertSoupEquals('<x t="xñ">', b'<x t="x\xc3\xb1"></x>', + encoding='utf-8') +- self.assertSoupEquals('<x t="xñ">', '<x t="x\xc3\xb1"></x>', ++ self.assertSoupEquals('<x t="xñ">', b'<x t="x\xc3\xb1"></x>', + encoding='utf-8') + + soup = BeautifulSoup('<x t=">™">', + convertEntities=BeautifulStoneSoup.HTML_ENTITIES) +- self.assertEquals(soup.decode(), u'<x t=">\u2122"></x>') ++ self.assertEquals(soup.decode(), '<x t=">\u2122"></x>') + + uri = "http://crummy.com?sacré&bleu" + link = '<a href="%s"></a>' % uri + + soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES) + self.assertEquals(soup.decode(), +- link.replace("é", u"\xe9")) ++ link.replace("é", "\xe9")) + + uri = "http://crummy.com?sacré&bleu" + link = '<a href="%s"></a>' % uri + soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES) + self.assertEquals(soup.a['href'], +- uri.replace("é", u"\xe9")) ++ uri.replace("é", "\xe9")) + + def testNakedAmpersands(self): + html = {'convertEntities':BeautifulStoneSoup.HTML_ENTITIES} +@@ -663,13 +663,13 @@ + smart quote fixes.""" + + def testUnicodeDammitStandalone(self): +- markup = "<foo>\x92</foo>" ++ markup = b"<foo>\x92</foo>" + dammit = UnicodeDammit(markup) +- self.assertEquals(dammit.unicode, "<foo>’</foo>") ++ self.assertEquals(dammit.str, "<foo>’</foo>") + +- hebrew = "\xed\xe5\xec\xf9" ++ hebrew = b"\xed\xe5\xec\xf9" + dammit = UnicodeDammit(hebrew, ["iso-8859-8"]) +- self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9') ++ self.assertEquals(dammit.str, '\u05dd\u05d5\u05dc\u05e9') + self.assertEquals(dammit.originalEncoding, 'iso-8859-8') + + def testGarbageInGarbageOut(self): +@@ -677,13 +677,13 @@ + asciiSoup = BeautifulStoneSoup(ascii) + self.assertEquals(ascii, asciiSoup.decode()) + +- unicodeData = u"<foo>\u00FC</foo>" ++ unicodeData = "<foo>\u00FC</foo>" + utf8 = unicodeData.encode("utf-8") +- self.assertEquals(utf8, '<foo>\xc3\xbc</foo>') ++ self.assertEquals(utf8, b'<foo>\xc3\xbc</foo>') + + unicodeSoup = BeautifulStoneSoup(unicodeData) + self.assertEquals(unicodeData, unicodeSoup.decode()) +- self.assertEquals(unicodeSoup.foo.string, u'\u00FC') ++ self.assertEquals(unicodeSoup.foo.string, '\u00FC') + + utf8Soup = BeautifulStoneSoup(utf8, fromEncoding='utf-8') + self.assertEquals(utf8, utf8Soup.encode('utf-8')) +@@ -696,18 +696,18 @@ + + def testHandleInvalidCodec(self): + for bad_encoding in ['.utf8', '...', 'utF---16.!']: +- soup = BeautifulSoup(u"Räksmörgås".encode("utf-8"), ++ soup = BeautifulSoup("Räksmörgås".encode("utf-8"), + fromEncoding=bad_encoding) + self.assertEquals(soup.originalEncoding, 'utf-8') + + def testUnicodeSearch(self): +- html = u'<html><body><h1>Räksmörgås</h1></body></html>' ++ html = '<html><body><h1>Räksmörgås</h1></body></html>' + soup = BeautifulSoup(html) +- self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås') ++ self.assertEqual(soup.find(text='Räksmörgås'),'Räksmörgås') + + def testRewrittenXMLHeader(self): +- euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n' +- utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n" ++ euc_jp = b'<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n' ++ utf8 = b"<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n" + soup = BeautifulStoneSoup(euc_jp) + if soup.originalEncoding != "euc-jp": + raise Exception("Test failed when parsing euc-jp document. " +@@ -718,12 +718,12 @@ + self.assertEquals(soup.originalEncoding, "euc-jp") + self.assertEquals(soup.renderContents('utf-8'), utf8) + +- old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>" ++ old_text = b"<?xml encoding='windows-1252'><foo>\x92</foo>" + new_text = "<?xml version='1.0' encoding='utf-8'?><foo>’</foo>" + self.assertSoupEquals(old_text, new_text) + + def testRewrittenMetaTag(self): +- no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>''' ++ no_shift_jis_html = b'''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>''' + soup = BeautifulSoup(no_shift_jis_html) + + # Beautiful Soup used to try to rewrite the meta tag even if the +@@ -733,16 +733,16 @@ + soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer) + self.assertEquals(soup.contents[0].name, 'pre') + +- meta_tag = ('<meta content="text/html; charset=x-sjis" ' +- 'http-equiv="Content-type" />') ++ meta_tag = (b'<meta content="text/html; charset=x-sjis" ' ++ b'http-equiv="Content-type" />') + shift_jis_html = ( +- '<html><head>\n%s\n' +- '<meta http-equiv="Content-language" content="ja" />' +- '</head><body><pre>\n' +- '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f' +- '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c' +- '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n' +- '</pre></body></html>') % meta_tag ++ b'<html><head>\n' + meta_tag + b'\n' ++ b'<meta http-equiv="Content-language" content="ja" />' ++ b'</head><body><pre>\n' ++ b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f' ++ b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c' ++ b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n' ++ b'</pre></body></html>') + soup = BeautifulSoup(shift_jis_html) + if soup.originalEncoding != "shift-jis": + raise Exception("Test failed when parsing shift-jis document " +@@ -755,59 +755,59 @@ + content_type_tag = soup.meta['content'] + self.assertEquals(content_type_tag[content_type_tag.find('charset='):], + 'charset=%SOUP-ENCODING%') +- content_type = str(soup.meta) ++ content_type = soup.meta.decode() + index = content_type.find('charset=') + self.assertEqual(content_type[index:index+len('charset=utf8')+1], + 'charset=utf-8') + content_type = soup.meta.encode('shift-jis') +- index = content_type.find('charset=') ++ index = content_type.find(b'charset=') + self.assertEqual(content_type[index:index+len('charset=shift-jis')], + 'charset=shift-jis'.encode()) + + self.assertEquals(soup.encode('utf-8'), ( +- '<html><head>\n' +- '<meta content="text/html; charset=utf-8" ' +- 'http-equiv="Content-type" />\n' +- '<meta http-equiv="Content-language" content="ja" />' +- '</head><body><pre>\n' +- '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3' +- '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3' +- '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6' +- '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3' +- '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n' +- '</pre></body></html>')) ++ b'<html><head>\n' ++ b'<meta content="text/html; charset=utf-8" ' ++ b'http-equiv="Content-type" />\n' ++ b'<meta http-equiv="Content-language" content="ja" />' ++ b'</head><body><pre>\n' ++ b'\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3' ++ b'\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3' ++ b'\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6' ++ b'\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3' ++ b'\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n' ++ b'</pre></body></html>')) + self.assertEquals(soup.encode("shift-jis"), + shift_jis_html.replace('x-sjis'.encode(), + 'shift-jis'.encode())) + +- isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>""" ++ isolatin = b"""<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>""" + soup = BeautifulSoup(isolatin) + + utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode()) +- utf8 = utf8.replace("\xe9", "\xc3\xa9") ++ utf8 = utf8.replace(b"\xe9", b"\xc3\xa9") + self.assertSoupEquals(soup.encode("utf-8"), utf8, encoding='utf-8') + + def testHebrew(self): +- iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n' +- utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n' ++ iso_8859_8= b'<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n' ++ utf8 = b'<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n' + soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8") + self.assertEquals(soup.encode('utf-8'), utf8) + + def testSmartQuotesNotSoSmartAnymore(self): +- self.assertSoupEquals("\x91Foo\x92 <!--blah-->", ++ self.assertSoupEquals(b"\x91Foo\x92 <!--blah-->", + '‘Foo’ <!--blah-->') + + def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self): +- smartQuotes = "Il a dit, \x8BSacré bleu!\x9b" ++ smartQuotes = b"Il a dit, \x8BSacré bleu!\x9b" + soup = BeautifulSoup(smartQuotes) + self.assertEquals(soup.decode(), + 'Il a dit, ‹Sacré bleu!›') + soup = BeautifulSoup(smartQuotes, convertEntities="html") + self.assertEquals(soup.encode('utf-8'), +- 'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba') ++ b'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba') + + def testDontSeeSmartQuotesWhereThereAreNone(self): +- utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch" ++ utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch" + self.assertSoupEquals(utf_8, encoding='utf-8') + + +--- setup.py ++++ setup.py +@@ -19,19 +19,19 @@ + suite = loader.loadTestsFromModule(BeautifulSoupTests) + suite.run(result) + if not result.wasSuccessful(): +- print "Unit tests have failed!" ++ print("Unit tests have failed!") + for l in result.errors, result.failures: + for case, error in l: +- print "-" * 80 ++ print("-" * 80) + desc = case.shortDescription() + if desc: +- print desc +- print error +- print '''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?''' +- print "This might or might not be a problem depending on what you plan to do with\nBeautiful Soup." ++ print(desc) ++ print(error) ++ print('''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?''') ++ print("This might or might not be a problem depending on what you plan to do with\nBeautiful Soup.") + if sys.argv[1] == 'sdist': +- print +- print "I'm not going to make a source distribution since the tests don't pass." ++ print() ++ print("I'm not going to make a source distribution since the tests don't pass.") + sys.exit(1) + + setup(name="BeautifulSoup", diff --git a/dev-python/beautifulsoup/files/bfs-4.2.0-no-lxml.patch b/dev-python/beautifulsoup/files/bfs-4.2.0-no-lxml.patch new file mode 100644 index 000000000000..75ae98b1ef45 --- /dev/null +++ b/dev-python/beautifulsoup/files/bfs-4.2.0-no-lxml.patch @@ -0,0 +1,67 @@ +=== modified file 'bs4/testing.py' +--- bs4/testing.py 2013-05-07 12:19:02 +0000 ++++ bs4/testing.py 2013-05-20 13:19:16 +0000 +@@ -464,6 +464,18 @@ + self.assertEqual( + soup.encode("utf-8"), markup) + ++ def test_formatter_processes_script_tag_for_xml_documents(self): ++ doc = """ ++ <script type="text/javascript"> ++ </script> ++""" ++ soup = BeautifulSoup(doc, "xml") ++ # lxml would have stripped this while parsing, but we can add ++ # it later. ++ soup.script.string = 'console.log("< < hey > > ");' ++ encoded = soup.encode() ++ self.assertTrue(b"< < hey > >" in encoded) ++ + def test_popping_namespaced_tag(self): + markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>' + soup = self.soup(markup) + +=== modified file 'bs4/tests/test_lxml.py' +--- bs4/tests/test_lxml.py 2013-05-09 19:36:30 +0000 ++++ bs4/tests/test_lxml.py 2013-05-20 13:19:16 +0000 +@@ -10,6 +10,7 @@ + LXML_VERSION = lxml.etree.LXML_VERSION + except ImportError, e: + LXML_PRESENT = False ++ LXML_VERSION = (0,) + + from bs4 import ( + BeautifulSoup, +@@ -47,7 +48,7 @@ + # test if an old version of lxml is installed. + + @skipIf( +- LXML_VERSION < (2,3,5,0), ++ not LXML_PRESENT or LXML_VERSION < (2,3,5,0), + "Skipping doctype test for old version of lxml to avoid segfault.") + def test_empty_doctype(self): + soup = self.soup("<!DOCTYPE>") + +=== modified file 'bs4/tests/test_tree.py' +--- bs4/tests/test_tree.py 2013-05-14 12:39:16 +0000 ++++ bs4/tests/test_tree.py 2013-05-20 13:19:16 +0000 +@@ -1344,18 +1344,6 @@ + encoded = BeautifulSoup(doc).encode() + self.assertTrue(b"< < hey > >" in encoded) + +- def test_formatter_processes_script_tag_for_xml_documents(self): +- doc = """ +- <script type="text/javascript"> +- </script> +-""" +- soup = BeautifulSoup(doc, "xml") +- # lxml would have stripped this while parsing, but we can add +- # it later. +- soup.script.string = 'console.log("< < hey > > ");' +- encoded = soup.encode() +- self.assertTrue(b"< < hey > >" in encoded) +- + def test_prettify_leaves_preformatted_text_alone(self): + soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz ") + # Everything outside the <pre> tag is reformatted, but everything + diff --git a/dev-python/beautifulsoup/metadata.xml b/dev-python/beautifulsoup/metadata.xml new file mode 100644 index 000000000000..236732af0c82 --- /dev/null +++ b/dev-python/beautifulsoup/metadata.xml @@ -0,0 +1,16 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd"> +<pkgmetadata> + <herd>python</herd> + <longdescription lang="en"> + Beautiful Soup is a Python HTML/XML parser designed for quick + turnaround projects like screen-scraping. Two features make it + powerful: it won't choke if you give it bad markup and it provides a + few simple methods and Pythonic idioms for navigating and searching + a parse tree: a toolkit for dissecting a document and extracting + what you need. + </longdescription> + <upstream> + <remote-id type="pypi">beautifulsoup4</remote-id> + </upstream> +</pkgmetadata> |