diff options
author | 2023-11-22 11:51:22 +0000 | |
---|---|---|
committer | 2023-11-22 11:51:39 +0000 | |
commit | 83819f3a38705b3ea390a7b3291315a02f854926 (patch) | |
tree | af55d24d8efd90539d91bf19e1872b2fa1ea8073 /net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch | |
parent | dev-python/urllib3: Stabilize 1.26.18 ALLARCHES, #917745 (diff) | |
download | gentoo-83819f3a38705b3ea390a7b3291315a02f854926.tar.gz gentoo-83819f3a38705b3ea390a7b3291315a02f854926.tar.bz2 gentoo-83819f3a38705b3ea390a7b3291315a02f854926.zip |
net-misc/streamlink: add 6.4.0
Signed-off-by: Sam James <sam@gentoo.org>
Diffstat (limited to 'net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch')
-rw-r--r-- | net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch | 199 |
1 files changed, 199 insertions, 0 deletions
diff --git a/net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch b/net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch new file mode 100644 index 000000000000..ed5fd30366d2 --- /dev/null +++ b/net-misc/streamlink/files/streamlink-6.4.0-libxml2-2.12.0.patch @@ -0,0 +1,199 @@ +https://github.com/streamlink/streamlink/commit/9d8156dd794ee0919297cd90d85bcc11b8a28358 + +From 9d8156dd794ee0919297cd90d85bcc11b8a28358 Mon Sep 17 00:00:00 2001 +From: bastimeyer <mail@bastimeyer.de> +Date: Tue, 21 Nov 2023 20:10:47 +0100 +Subject: [PATCH] utils.parse: fix libxml2 2.12.0 compatibility + +--- + src/streamlink/compat.py | 11 ++++ + src/streamlink/utils/parse.py | 17 +++++- + tests/utils/test_parse.py | 112 ++++++++++++++++++++++++++-------- + 3 files changed, 114 insertions(+), 26 deletions(-) + +diff --git a/src/streamlink/compat.py b/src/streamlink/compat.py +index c75201544d3..993bce64cfd 100644 +--- a/src/streamlink/compat.py ++++ b/src/streamlink/compat.py +@@ -2,11 +2,22 @@ + import sys + + ++# compatibility import of charset_normalizer/chardet via requests<3.0 ++try: ++ from requests.compat import chardet as charset_normalizer # type: ignore ++except ImportError: # pragma: no cover ++ import charset_normalizer ++ ++ + is_darwin = sys.platform == "darwin" + is_win32 = os.name == "nt" + + ++detect_encoding = charset_normalizer.detect ++ ++ + __all__ = [ + "is_darwin", + "is_win32", ++ "detect_encoding", + ] +diff --git a/src/streamlink/utils/parse.py b/src/streamlink/utils/parse.py +index 8c9f79c8b51..17479b81f59 100644 +--- a/src/streamlink/utils/parse.py ++++ b/src/streamlink/utils/parse.py +@@ -4,6 +4,7 @@ + + from lxml.etree import HTML, XML + ++from streamlink.compat import detect_encoding + from streamlink.plugin import PluginError + + +@@ -51,7 +52,21 @@ def parse_html( + - Removes XML declarations of invalid XHTML5 documents + - Wraps errors in custom exception with a snippet of the data in the message + """ +- if isinstance(data, str) and data.lstrip().startswith("<?xml"): ++ # strip XML text declarations from XHTML5 documents which were incorrectly defined as HTML5 ++ is_bytes = isinstance(data, bytes) ++ if data and data.lstrip()[:5].lower() == (b"<?xml" if is_bytes else "<?xml"): ++ if is_bytes: ++ # get the document's encoding using the "encoding" attribute value of the XML text declaration ++ match = re.match(rb"^\s*<\?xml\s.*?encoding=(?P<q>[\'\"])(?P<encoding>.+?)(?P=q).*?\?>", data, re.IGNORECASE) ++ if match: ++ encoding_value = detect_encoding(match["encoding"])["encoding"] ++ encoding = match["encoding"].decode(encoding_value) ++ else: ++ # no "encoding" attribute: try to figure out encoding from the document's content ++ encoding = detect_encoding(data)["encoding"] ++ ++ data = data.decode(encoding) ++ + data = re.sub(r"^\s*<\?xml.+?\?>", "", data) + + return _parse(HTML, data, name, exception, schema, *args, **kwargs) +diff --git a/tests/utils/test_parse.py b/tests/utils/test_parse.py +index aedae7d4e8e..69c16f282b9 100644 +--- a/tests/utils/test_parse.py ++++ b/tests/utils/test_parse.py +@@ -74,31 +74,93 @@ def test_parse_xml_entities(self): + assert actual.tag == expected.tag + assert actual.attrib == expected.attrib + +- def test_parse_xml_encoding(self): +- tree = parse_xml("""<?xml version="1.0" encoding="UTF-8"?><test>ä</test>""") +- assert tree.xpath(".//text()") == ["ä"] +- tree = parse_xml("""<test>ä</test>""") +- assert tree.xpath(".//text()") == ["ä"] +- tree = parse_xml(b"""<?xml version="1.0" encoding="UTF-8"?><test>\xC3\xA4</test>""") +- assert tree.xpath(".//text()") == ["ä"] +- tree = parse_xml(b"""<test>\xC3\xA4</test>""") +- assert tree.xpath(".//text()") == ["ä"] +- +- def test_parse_html_encoding(self): +- tree = parse_html("""<!DOCTYPE html><html><head><meta charset="utf-8"/></head><body>ä</body></html>""") +- assert tree.xpath(".//body/text()") == ["ä"] +- tree = parse_html("""<!DOCTYPE html><html><body>ä</body></html>""") +- assert tree.xpath(".//body/text()") == ["ä"] +- tree = parse_html(b"""<!DOCTYPE html><html><meta charset="utf-8"/><body>\xC3\xA4</body></html>""") +- assert tree.xpath(".//body/text()") == ["ä"] +- tree = parse_html(b"""<!DOCTYPE html><html><body>\xC3\xA4</body></html>""") +- assert tree.xpath(".//body/text()") == ["ä"] +- +- def test_parse_html_xhtml5(self): +- tree = parse_html("""<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>ä?></body></html>""") +- assert tree.xpath(".//body/text()") == ["ä?>"] +- tree = parse_html(b"""<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>\xC3\xA4?></body></html>""") +- assert tree.xpath(".//body/text()") == ["ä?>"] ++ @pytest.mark.parametrize(("content", "expected"), [ ++ pytest.param( ++ """<?xml version="1.0" encoding="UTF-8"?><test>ä</test>""", ++ "ä", ++ id="string-utf-8", ++ ), ++ pytest.param( ++ """<test>ä</test>""", ++ "ä", ++ id="string-unknown", ++ ), ++ pytest.param( ++ b"""<?xml version="1.0" encoding="UTF-8"?><test>\xC3\xA4</test>""", ++ "ä", ++ id="bytes-utf-8", ++ ), ++ pytest.param( ++ b"""<?xml version="1.0" encoding="ISO-8859-1"?><test>\xE4</test>""", ++ "ä", ++ id="bytes-iso-8859-1", ++ ), ++ pytest.param( ++ b"""<test>\xC3\xA4</test>""", ++ "ä", ++ id="bytes-unknown", ++ ), ++ ]) ++ def test_parse_xml_encoding(self, content, expected): ++ tree = parse_xml(content) ++ assert tree.xpath(".//text()") == [expected] ++ ++ @pytest.mark.parametrize(("content", "expected"), [ ++ pytest.param( ++ """<!DOCTYPE html><html><head><meta charset="utf-8"/></head><body>ä</body></html>""", ++ "ä", ++ id="string-utf-8", ++ ), ++ pytest.param( ++ """<!DOCTYPE html><html><body>ä</body></html>""", ++ "ä", ++ id="string-unknown", ++ ), ++ pytest.param( ++ b"""<!DOCTYPE html><html><head><meta charset="utf-8"/></head><body>\xC3\xA4</body></html>""", ++ "ä", ++ id="bytes-utf-8", ++ ), ++ pytest.param( ++ b"""<!DOCTYPE html><html><head><meta charset="ISO-8859-1"/></head><body>\xE4</body></html>""", ++ "ä", ++ id="bytes-iso-8859-1", ++ ), ++ pytest.param( ++ b"""<!DOCTYPE html><html><body>\xC3\xA4</body></html>""", ++ "ä", ++ id="bytes-unknown", ++ ), ++ ]) ++ def test_parse_html_encoding(self, content, expected): ++ tree = parse_html(content) ++ assert tree.xpath(".//body/text()") == [expected] ++ ++ @pytest.mark.parametrize(("content", "expected"), [ ++ pytest.param( ++ """<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>ä?></body></html>""", ++ "ä?>", ++ id="string", ++ ), ++ pytest.param( ++ b"""<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><body>\xC3\xA4?></body></html>""", ++ "ä?>", ++ id="bytes-utf-8", ++ ), ++ pytest.param( ++ b"""<?xml version="1.0" encoding="ISO-8859-1"?><!DOCTYPE html><html><body>\xE4?></body></html>""", ++ "ä?>", ++ id="bytes-iso-8859-1", ++ ), ++ pytest.param( ++ b"""<?xml version="1.0"?><!DOCTYPE html><html><body>\xC3\xA4?></body></html>""", ++ "ä?>", ++ id="bytes-unknown", ++ ), ++ ]) ++ def test_parse_html_xhtml5(self, content, expected): ++ tree = parse_html(content) ++ assert tree.xpath(".//body/text()") == [expected] + + def test_parse_qsd(self): + assert parse_qsd("test=1&foo=bar", schema=validate.Schema({"test": str, "foo": "bar"})) == {"test": "1", "foo": "bar"} + |