Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:yarunachalam:branches:devel:languages:python
python-parsel
python-parsel-drop-python-2.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File python-parsel-drop-python-2.patch of Package python-parsel
diff --git a/README.rst b/README.rst index c1674f1..7bd8204 100644 --- a/README.rst +++ b/README.rst @@ -26,7 +26,7 @@ Example (`open online demo`_): .. code-block:: python >>> from parsel import Selector - >>> selector = Selector(text=u"""<html> + >>> selector = Selector(text="""<html> <body> <h1>Hello, Parsel!</h1> <ul> diff --git a/docs/conf.py b/docs/conf.py index 27eef0e..f3736de 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- import os import sys @@ -38,8 +37,8 @@ master_doc = 'index' # General information about the project. -project = u'Parsel' -copyright = u'2015, Scrapy Project' +project = 'Parsel' +copyright = '2015, Scrapy Project' # The version info for the project you're documenting, acts as replacement # for |version| and |release|, also used in various other places throughout @@ -83,8 +82,8 @@ # [howto/manual]). latex_documents = [ ('index', 'parsel.tex', - u'Parsel Documentation', - u'Scrapy Project', 'manual'), + 'Parsel Documentation', + 'Scrapy Project', 'manual'), ] @@ -94,8 +93,8 @@ # (source start file, name, description, authors, manual section). man_pages = [ ('index', 'parsel', - u'Parsel Documentation', - [u'Scrapy Project'], 1) + 'Parsel Documentation', + ['Scrapy Project'], 1) ] @@ -106,8 +105,8 @@ # dir menu entry, description, category) texinfo_documents = [ ('index', 'parsel', - u'Parsel Documentation', - u'Scrapy Project', + 'Parsel Documentation', + 'Scrapy Project', 'parsel', 'One line description of project.', 'Miscellaneous'), diff --git a/docs/usage.rst b/docs/usage.rst index f5950a8..55e6a31 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -8,11 +8,9 @@ Create a :class:`~parsel.selector.Selector` object for the HTML or XML text that you want to parse:: >>> from parsel import Selector - >>> text = u"<html><body><h1>Hello, Parsel!</h1></body></html>" + >>> text = "<html><body><h1>Hello, Parsel!</h1></body></html>" >>> selector = Selector(text=text) -.. note:: In Python 2, the ``text`` argument must be a ``unicode`` string. - Then use `CSS`_ or `XPath`_ expressions to select elements:: >>> selector.css('h1') @@ -412,7 +410,7 @@ classes. Example removing an ad from a blog post: >>> from parsel import Selector - >>> doc = u""" + >>> doc = """ ... <article> ... <div class="row">Content paragraph...</div> ... <div class="row"> @@ -455,7 +453,7 @@ The ``test()`` function, for example, can prove quite useful when XPath's Example selecting links in list item with a "class" attribute ending with a digit:: >>> from parsel import Selector - >>> doc = u""" + >>> doc = """ ... <div> ... <ul> ... <li class="item-0"><a href="link1.html">first item</a></li> @@ -487,7 +485,7 @@ extracting text elements for example. Example extracting microdata (sample content taken from http://schema.org/Product) with groups of itemscopes and corresponding itemprops:: - >>> doc = u""" + >>> doc = """ ... <div itemscope itemtype="http://schema.org/Product"> ... <span itemprop="name">Kenmore White 17" Microwave</span> ... <img src="kenmore-microwave-17in.jpg" alt='Kenmore 17" Microwave' /> @@ -591,7 +589,7 @@ returns ``True`` for nodes that have all of the specified HTML classes:: ... <p>Fourth</p> ... """) ... - >>> sel = Selector(u""" + >>> sel = Selector(""" ... <p class="foo bar-baz">First</p> ... <p class="foo">Second</p> ... <p class="bar">Third</p> @@ -1111,7 +1109,7 @@ Named variables can be useful when strings need to be escaped for single or double quotes characters. The example below would be a bit tricky to get right (or legible) without a variable reference:: - >>> html = u'''<html> + >>> html = '''<html> ... <body> ... <p>He said: "I don't know why, but I like mixing single and double quotes!"</p> ... </body> diff --git a/parsel/csstranslator.py b/parsel/csstranslator.py index 747e808..3881736 100644 --- a/parsel/csstranslator.py +++ b/parsel/csstranslator.py @@ -1,7 +1,4 @@ -try: - from functools import lru_cache -except ImportError: - from functools32 import lru_cache +from functools import lru_cache from cssselect import GenericTranslator as OriginalGenericTranslator from cssselect import HTMLTranslator as OriginalHTMLTranslator @@ -23,7 +20,7 @@ def from_xpath(cls, xpath, textnode=False, attribute=None): return x def __str__(self): - path = super(XPathExpr, self).__str__() + path = super().__str__() if self.textnode: if path == '*': path = 'text()' @@ -40,20 +37,20 @@ def __str__(self): return path def join(self, combiner, other): - super(XPathExpr, self).join(combiner, other) + super().join(combiner, other) self.textnode = other.textnode self.attribute = other.attribute return self -class TranslatorMixin(object): +class TranslatorMixin: """This mixin adds support to CSS pseudo elements via dynamic dispatch. Currently supported pseudo-elements are ``::text`` and ``::attr(ATTR_NAME)``. """ def xpath_element(self, selector): - xpath = super(TranslatorMixin, self).xpath_element(selector) + xpath = super().xpath_element(selector) return XPathExpr.from_xpath(xpath) def xpath_pseudo_element(self, xpath, pseudo_element): @@ -98,13 +95,13 @@ def xpath_text_simple_pseudo_element(self, xpath): class GenericTranslator(TranslatorMixin, OriginalGenericTranslator): @lru_cache(maxsize=256) def css_to_xpath(self, css, prefix='descendant-or-self::'): - return super(GenericTranslator, self).css_to_xpath(css, prefix) + return super().css_to_xpath(css, prefix) class HTMLTranslator(TranslatorMixin, OriginalHTMLTranslator): @lru_cache(maxsize=256) def css_to_xpath(self, css, prefix='descendant-or-self::'): - return super(HTMLTranslator, self).css_to_xpath(css, prefix) + return super().css_to_xpath(css, prefix) _translator = HTMLTranslator() diff --git a/parsel/selector.py b/parsel/selector.py index 504a4fe..b644e82 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -2,9 +2,6 @@ XPath selectors based on lxml """ -import sys - -import six from lxml import etree, html from .utils import flatten, iflatten, extract_regex, shorten @@ -22,7 +19,7 @@ class CannotRemoveElementWithoutParent(Exception): class SafeXMLParser(etree.XMLParser): def __init__(self, *args, **kwargs): kwargs.setdefault('resolve_entities', False) - super(SafeXMLParser, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) _ctgroup = { @@ -61,13 +58,8 @@ class SelectorList(list): class, which provides a few additional methods. """ - # __getslice__ is deprecated but `list` builtin implements it only in Py2 - def __getslice__(self, i, j): - o = super(SelectorList, self).__getslice__(i, j) - return self.__class__(o) - def __getitem__(self, pos): - o = super(SelectorList, self).__getitem__(pos) + o = super().__getitem__(pos) return self.__class__(o) if isinstance(pos, slice) else o def __getstate__(self): @@ -164,7 +156,7 @@ def remove(self): x.remove() -class Selector(object): +class Selector: """ :class:`Selector` allows you to select parts of an XML or HTML text using CSS or XPath expressions and extract data from it. @@ -204,9 +196,10 @@ def __init__(self, text=None, type=None, namespaces=None, root=None, self._tostring_method = _ctgroup[st]['_tostring_method'] if text is not None: - if not isinstance(text, six.text_type): - msg = "text argument should be of type %s, got %s" % ( - six.text_type, text.__class__) + if not isinstance(text, str): + msg = "text argument should be of type str, got %s" % ( + text.__class__ + ) raise TypeError(msg) root = self._get_root(text, base_url) elif root is None: @@ -255,9 +248,7 @@ def xpath(self, query, namespaces=None, **kwargs): smart_strings=self._lxml_smart_strings, **kwargs) except etree.XPathError as exc: - msg = u"XPath error: %s in %s" % (exc, query) - msg = msg if six.PY3 else msg.encode('unicode_escape') - six.reraise(ValueError, ValueError(msg), sys.exc_info()[2]) + raise ValueError("XPath error: %s in %s" % (exc, query)) if type(result) is not list: result = [result] @@ -324,11 +315,11 @@ def get(self): with_tail=False) except (AttributeError, TypeError): if self.root is True: - return u'1' + return '1' elif self.root is False: - return u'0' + return '0' else: - return six.text_type(self.root) + return str(self.root) extract = get def getall(self): @@ -354,7 +345,7 @@ def remove_namespaces(self): if el.tag.startswith('{'): el.tag = el.tag.split('}', 1)[1] # loop on element attributes also - for an in el.attrib.keys(): + for an in el.attrib: if an.startswith('{'): el.attrib[an.split('}', 1)[1]] = el.attrib.pop(an) # remove namespace declarations diff --git a/parsel/utils.py b/parsel/utils.py index 6914362..6aeff6f 100644 --- a/parsel/utils.py +++ b/parsel/utils.py @@ -1,5 +1,4 @@ import re -import six from w3lib.html import replace_entities as w3lib_replace_entities @@ -50,10 +49,10 @@ def _is_listlike(x): True >>> _is_listlike((x for x in range(3))) True - >>> _is_listlike(six.moves.xrange(5)) + >>> _is_listlike(range(5)) True """ - return hasattr(x, "__iter__") and not isinstance(x, (six.text_type, bytes)) + return hasattr(x, "__iter__") and not isinstance(x, (str, bytes)) def extract_regex(regex, text, replace_entities=True): @@ -62,7 +61,7 @@ def extract_regex(regex, text, replace_entities=True): * if the regex contains multiple numbered groups, all those will be returned (flattened) * if the regex doesn't contain any group the entire regex matching is returned """ - if isinstance(regex, six.string_types): + if isinstance(regex, str): regex = re.compile(regex, re.UNICODE) if 'extract' in regex.groupindex: diff --git a/parsel/xpathfuncs.py b/parsel/xpathfuncs.py index 95b07ba..ceb8eaf 100644 --- a/parsel/xpathfuncs.py +++ b/parsel/xpathfuncs.py @@ -1,8 +1,6 @@ import re from lxml import etree -from six import string_types - from w3lib.html import HTML5_WHITESPACE regex = '[{}]+'.format(HTML5_WHITESPACE) @@ -45,7 +43,7 @@ def has_class(context, *classes): raise ValueError( 'XPath error: has-class must have at least 1 argument') for c in classes: - if not isinstance(c, string_types): + if not isinstance(c, str): raise ValueError( 'XPath error: has-class arguments must be strings') context.eval_context['args_checked'] = True diff --git a/setup.py b/setup.py index d14ad0e..ade049f 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- -import sys - -from pkg_resources import parse_version from setuptools import setup, __version__ as setuptools_version @@ -13,32 +9,6 @@ with open('NEWS') as history_file: history = history_file.read().replace('.. :changelog:', '') -test_requirements = [ -] - -def has_environment_marker_platform_impl_support(): - """Code extracted from 'pytest/setup.py' - https://github.com/pytest-dev/pytest/blob/7538680c/setup.py#L31 - The first known release to support environment marker with range operators - it is 18.5, see: - https://setuptools.readthedocs.io/en/latest/history.html#id235 - """ - return parse_version(setuptools_version) >= parse_version('18.5') - -install_requires = [ - 'w3lib>=1.19.0', - 'lxml', - 'six>=1.6.0', - 'cssselect>=0.9' -] -extras_require = {} - -if not has_environment_marker_platform_impl_support(): - if sys.version_info[0:2] < (3, 0): - install_requires.append("functools32") -else: - extras_require[":python_version<'3.0'"] = ["functools32"] - setup( name='parsel', version='1.6.0', @@ -50,11 +20,16 @@ def has_environment_marker_platform_impl_support(): packages=[ 'parsel', ], - package_dir={'parsel': - 'parsel'}, + package_dir={ + 'parsel': 'parsel', + }, include_package_data=True, - install_requires=install_requires, - extras_require=extras_require, + install_requires=[ + 'cssselect>=0.9', + 'lxml', + 'w3lib>=1.19.0', + ], + python_requires='>=3.6', license="BSD", zip_safe=False, keywords='parsel', @@ -66,13 +41,11 @@ def has_environment_marker_platform_impl_support(): 'Topic :: Text Processing :: Markup', 'Topic :: Text Processing :: Markup :: HTML', 'Topic :: Text Processing :: Markup :: XML', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', ], diff --git a/tests/test_selector.py b/tests/test_selector.py index a5c61f6..f5c60ae 100644 --- a/tests/test_selector.py +++ b/tests/test_selector.py @@ -1,7 +1,5 @@ -# -*- coding: utf-8 -*- import re import weakref -import six import unittest import pickle @@ -17,11 +15,11 @@ class SelectorTestCase(unittest.TestCase): sscls = Selector def test_pickle_selector(self): - sel = self.sscls(text=u'<html><body><p>some text</p></body></html>') + sel = self.sscls(text='<html><body><p>some text</p></body></html>') self.assertRaises(TypeError, lambda s: pickle.dumps(s, protocol=2), sel) def test_pickle_selector_list(self): - sel = self.sscls(text=u'<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>') + sel = self.sscls(text='<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>') sel_list = sel.css('li') empty_sel_list = sel.css('p') self.assertIsInstance(sel_list, self.sscls.selectorlist_cls) @@ -31,7 +29,7 @@ def test_pickle_selector_list(self): def test_simple_selection(self): """Simple selector tests""" - body = u"<p><input name='a'value='1'/><input name='b'value='2'/></p>" + body = "<p><input name='a'value='1'/><input name='b'value='2'/></p>" sel = self.sscls(text=body) xl = sel.xpath('//input') @@ -43,48 +41,48 @@ def test_simple_selection(self): [x.extract() for x in sel.xpath('//input')]) self.assertEqual([x.extract() for x in sel.xpath("//input[@name='a']/@name")], - [u'a']) + ['a']) self.assertEqual([x.extract() for x in sel.xpath("number(concat(//input[@name='a']/@value, //input[@name='b']/@value))")], - [u'12.0']) + ['12.0']) self.assertEqual(sel.xpath("concat('xpath', 'rules')").extract(), - [u'xpathrules']) + ['xpathrules']) self.assertEqual([x.extract() for x in sel.xpath("concat(//input[@name='a']/@value, //input[@name='b']/@value)")], - [u'12']) + ['12']) def test_simple_selection_with_variables(self): """Using XPath variables""" - body = u"<p><input name='a' value='1'/><input name='b' value='2'/></p>" + body = "<p><input name='a' value='1'/><input name='b' value='2'/></p>" sel = self.sscls(text=body) self.assertEqual([x.extract() for x in sel.xpath("//input[@value=$number]/@name", number=1)], - [u'a']) + ['a']) self.assertEqual([x.extract() for x in sel.xpath("//input[@name=$letter]/@value", letter='b')], - [u'2']) + ['2']) self.assertEqual(sel.xpath("count(//input[@value=$number or @name=$letter])", number=2, letter='a').extract(), - [u'2.0']) + ['2.0']) # you can also pass booleans self.assertEqual(sel.xpath("boolean(count(//input)=$cnt)=$test", cnt=2, test=True).extract(), - [u'1']) + ['1']) self.assertEqual(sel.xpath("boolean(count(//input)=$cnt)=$test", cnt=4, test=True).extract(), - [u'0']) + ['0']) self.assertEqual(sel.xpath("boolean(count(//input)=$cnt)=$test", cnt=4, test=False).extract(), - [u'1']) + ['1']) # for named nodes, you need to use "name()=node_name" self.assertEqual(sel.xpath("boolean(count(//*[name()=$tag])=$cnt)=$test", tag="input", cnt=2, test=True).extract(), - [u'1']) + ['1']) def test_simple_selection_with_variables_escape_friendly(self): """Using XPath variables with quotes that would need escaping with string formatting""" - body = u"""<p>I'm mixing single and <input name='a' value='I say "Yeah!"'/> + body = """<p>I'm mixing single and <input name='a' value='I say "Yeah!"'/> "double quotes" and I don't care :)</p>""" sel = self.sscls(text=body) @@ -95,7 +93,7 @@ def test_simple_selection_with_variables_escape_friendly(self): # with XPath variables, escaping is done for you self.assertEqual([x.extract() for x in sel.xpath("//input[@value=$text]/@name", text=t)], - [u'a']) + ['a']) lt = """I'm mixing single and "double quotes" and I don't care :)""" # the following gives you something like # ValueError: XPath error: Invalid predicate in //p[normalize-space()='I'm mixing single and "double quotes" and I don't care :)']//@name @@ -103,10 +101,10 @@ def test_simple_selection_with_variables_escape_friendly(self): self.assertEqual([x.extract() for x in sel.xpath("//p[normalize-space()=$lng]//@name", lng=lt)], - [u'a']) + ['a']) def test_accessing_attributes(self): - body = u""" + body = """ <html lang="en" version="1.0"> <body> <ul id="some-list" class="list-cls" class="list-cls"> @@ -134,12 +132,10 @@ def test_accessing_attributes(self): [e.attrib for e in sel.css('li')]) def test_representation_slice(self): - body = u"<p><input name='{}' value='\xa9'/></p>".format(50 * 'b') + body = "<p><input name='{}' value='\xa9'/></p>".format(50 * 'b') sel = self.sscls(text=body) representation = "<Selector xpath='//input/@name' data='{}...'>".format(37 * 'b') - if six.PY2: - representation = "<Selector xpath='//input/@name' data=u'{}...'>".format(37 * 'b') self.assertEqual( [repr(it) for it in sel.xpath('//input/@name')], @@ -147,25 +143,27 @@ def test_representation_slice(self): ) def test_representation_unicode_query(self): - body = u"<p><input name='{}' value='\xa9'/></p>".format(50 * 'b') + body = "<p><input name='{}' value='\xa9'/></p>".format(50 * 'b') representation = '<Selector xpath=\'//input[@value="©"]/@value\' data=\'©\'>' - if six.PY2: - representation = "<Selector xpath=u'//input[@value=\"\\xa9\"]/@value' data=u'\\xa9'>" sel = self.sscls(text=body) self.assertEqual( - [repr(it) for it in sel.xpath(u'//input[@value="\xa9"]/@value')], + [repr(it) for it in sel.xpath('//input[@value="\xa9"]/@value')], [representation] ) def test_check_text_argument_type(self): - self.assertRaisesRegexp(TypeError, 'text argument should be of type', - self.sscls, b'<html/>') + self.assertRaisesRegex( + TypeError, + 'text argument should be of type', + self.sscls, + b'<html/>', + ) def test_extract_first(self): """Test if extract_first() returns first element""" - body = u'<ul><li id="1">1</li><li id="2">2</li></ul>' + body = '<ul><li id="1">1</li><li id="2">2</li></ul>' sel = self.sscls(text=body) self.assertEqual(sel.xpath('//ul/li/text()').extract_first(), @@ -181,38 +179,38 @@ def test_extract_first(self): def test_extract_first_default(self): """Test if extract_first() returns default value when no results found""" - body = u'<ul><li id="1">1</li><li id="2">2</li></ul>' + body = '<ul><li id="1">1</li><li id="2">2</li></ul>' sel = self.sscls(text=body) self.assertEqual(sel.xpath('//div/text()').extract_first(default='missing'), 'missing') def test_selector_get_alias(self): """Test if get() returns extracted value on a Selector""" - body = u'<ul><li id="1">1</li><li id="2">2</li><li id="3">3</li></ul>' + body = '<ul><li id="1">1</li><li id="2">2</li><li id="3">3</li></ul>' sel = self.sscls(text=body) - self.assertEqual(sel.xpath('//ul/li[position()>1]')[0].get(), u'<li id="2">2</li>') - self.assertEqual(sel.xpath('//ul/li[position()>1]/text()')[0].get(), u'2') + self.assertEqual(sel.xpath('//ul/li[position()>1]')[0].get(), '<li id="2">2</li>') + self.assertEqual(sel.xpath('//ul/li[position()>1]/text()')[0].get(), '2') def test_selector_getall_alias(self): """Test if get() returns extracted value on a Selector""" - body = u'<ul><li id="1">1</li><li id="2">2</li><li id="3">3</li></ul>' + body = '<ul><li id="1">1</li><li id="2">2</li><li id="3">3</li></ul>' sel = self.sscls(text=body) - self.assertListEqual(sel.xpath('//ul/li[position()>1]')[0].getall(), [u'<li id="2">2</li>']) - self.assertListEqual(sel.xpath('//ul/li[position()>1]/text()')[0].getall(), [u'2']) + self.assertListEqual(sel.xpath('//ul/li[position()>1]')[0].getall(), ['<li id="2">2</li>']) + self.assertListEqual(sel.xpath('//ul/li[position()>1]/text()')[0].getall(), ['2']) def test_selectorlist_get_alias(self): """Test if get() returns first element for a selection call""" - body = u'<ul><li id="1">1</li><li id="2">2</li><li id="3">3</li></ul>' + body = '<ul><li id="1">1</li><li id="2">2</li><li id="3">3</li></ul>' sel = self.sscls(text=body) - self.assertEqual(sel.xpath('//ul/li').get(), u'<li id="1">1</li>') - self.assertEqual(sel.xpath('//ul/li/text()').get(), u'1') + self.assertEqual(sel.xpath('//ul/li').get(), '<li id="1">1</li>') + self.assertEqual(sel.xpath('//ul/li/text()').get(), '1') def test_re_first(self): """Test if re_first() returns first matched element""" - body = u'<ul><li id="1">1</li><li id="2">2</li></ul>' + body = '<ul><li id="1">1</li><li id="2">2</li></ul>' sel = self.sscls(text=body) self.assertEqual(sel.xpath('//ul/li/text()').re_first(r'\d'), @@ -233,71 +231,73 @@ def test_re_first(self): def test_extract_first_re_default(self): """Test if re_first() returns default value when no results found""" - body = u'<ul><li id="1">1</li><li id="2">2</li></ul>' + body = '<ul><li id="1">1</li><li id="2">2</li></ul>' sel = self.sscls(text=body) self.assertEqual(sel.xpath('//div/text()').re_first(r'\w+', default='missing'), 'missing') self.assertEqual(sel.xpath('/ul/li/text()').re_first(r'\w+', default='missing'), 'missing') def test_select_unicode_query(self): - body = u"<p><input name='\xa9' value='1'/></p>" + body = "<p><input name='\xa9' value='1'/></p>" sel = self.sscls(text=body) - self.assertEqual(sel.xpath(u'//input[@name="\xa9"]/@value').extract(), [u'1']) + self.assertEqual(sel.xpath('//input[@name="\xa9"]/@value').extract(), ['1']) def test_list_elements_type(self): """Test Selector returning the same type in selection methods""" - text = u'<p>test<p>' + text = '<p>test<p>' assert isinstance(self.sscls(text=text).xpath("//p")[0], self.sscls) assert isinstance(self.sscls(text=text).css("p")[0], self.sscls) def test_boolean_result(self): - body = u"<p><input name='a'value='1'/><input name='b'value='2'/></p>" + body = "<p><input name='a'value='1'/><input name='b'value='2'/></p>" xs = self.sscls(text=body) - self.assertEqual(xs.xpath("//input[@name='a']/@name='a'").extract(), [u'1']) - self.assertEqual(xs.xpath("//input[@name='a']/@name='n'").extract(), [u'0']) + self.assertEqual(xs.xpath("//input[@name='a']/@name='a'").extract(), ['1']) + self.assertEqual(xs.xpath("//input[@name='a']/@name='n'").extract(), ['0']) def test_differences_parsing_xml_vs_html(self): """Test that XML and HTML Selector's behave differently""" # some text which is parsed differently by XML and HTML flavors - text = u'<div><img src="a.jpg"><p>Hello</div>' + text = '<div><img src="a.jpg"><p>Hello</div>' hs = self.sscls(text=text, type='html') self.assertEqual(hs.xpath("//div").extract(), - [u'<div><img src="a.jpg"><p>Hello</p></div>']) + ['<div><img src="a.jpg"><p>Hello</p></div>']) xs = self.sscls(text=text, type='xml') self.assertEqual(xs.xpath("//div").extract(), - [u'<div><img src="a.jpg"><p>Hello</p></img></div>']) + ['<div><img src="a.jpg"><p>Hello</p></img></div>']) def test_error_for_unknown_selector_type(self): - self.assertRaises(ValueError, self.sscls, text=u'', type='_na_') + self.assertRaises(ValueError, self.sscls, text='', type='_na_') def test_text_or_root_is_required(self): - self.assertRaisesRegexp(ValueError, - 'Selector needs either text or root argument', - self.sscls) + self.assertRaisesRegex( + ValueError, + 'Selector needs either text or root argument', + self.sscls, + ) def test_bool(self): - text = u'<a href="" >false</a><a href="nonempty">true</a>' + text = '<a href="" >false</a><a href="nonempty">true</a>' hs = self.sscls(text=text, type='html') falsish = hs.xpath('//a/@href')[0] - self.assertEqual(falsish.extract(), u'') + self.assertEqual(falsish.extract(), '') self.assertFalse(falsish) trueish = hs.xpath('//a/@href')[1] - self.assertEqual(trueish.extract(), u'nonempty') + self.assertEqual(trueish.extract(), 'nonempty') self.assertTrue(trueish) def test_slicing(self): - text = u'<div><p>1</p><p>2</p><p>3</p></div>' + text = '<div><p>1</p><p>2</p><p>3</p></div>' hs = self.sscls(text=text, type='html') self.assertIsInstance(hs.css('p')[2], self.sscls) self.assertIsInstance(hs.css('p')[2:3], self.sscls.selectorlist_cls) self.assertIsInstance(hs.css('p')[:2], self.sscls.selectorlist_cls) - self.assertEqual(hs.css('p')[2:3].extract(), [u'<p>3</p>']) - self.assertEqual(hs.css('p')[1:3].extract(), [u'<p>2</p>', u'<p>3</p>']) + self.assertEqual(hs.css('p')[2:3].extract(), ['<p>3</p>']) + self.assertEqual(hs.css('p')[1:3].extract(), ['<p>2</p>', '<p>3</p>']) def test_nested_selectors(self): """Nested selector tests""" - body = u"""<body> + body = """<body> <div class='one'> <ul> <li>one</li><li>two</li> @@ -322,7 +322,7 @@ def test_nested_selectors(self): def test_selectorlist_getall_alias(self): """Nested selector tests using getall()""" - body = u"""<body> + body = """<body> <div class='one'> <ul> <li>one</li><li>two</li> @@ -346,20 +346,20 @@ def test_selectorlist_getall_alias(self): self.assertEqual(divtwo.xpath("./li").getall(), []) def test_mixed_nested_selectors(self): - body = u'''<body> + body = '''<body> <div id=1>not<span>me</span></div> <div class="dos"><p>text</p><a href='#'>foo</a></div> </body>''' sel = self.sscls(text=body) - self.assertEqual(sel.xpath('//div[@id="1"]').css('span::text').extract(), [u'me']) - self.assertEqual(sel.css('#1').xpath('./span/text()').extract(), [u'me']) + self.assertEqual(sel.xpath('//div[@id="1"]').css('span::text').extract(), ['me']) + self.assertEqual(sel.css('#1').xpath('./span/text()').extract(), ['me']) def test_dont_strip(self): - sel = self.sscls(text=u'<div>fff: <a href="#">zzz</a></div>') - self.assertEqual(sel.xpath("//text()").extract(), [u'fff: ', u'zzz']) + sel = self.sscls(text='<div>fff: <a href="#">zzz</a></div>') + self.assertEqual(sel.xpath("//text()").extract(), ['fff: ', 'zzz']) def test_namespaces_simple(self): - body = u""" + body = """ <test xmlns:somens="http://scrapy.org"> <somens:a id="foo">take this</a> <a id="bar">found</a> @@ -370,10 +370,10 @@ def test_namespaces_simple(self): x.register_namespace("somens", "http://scrapy.org") self.assertEqual(x.xpath("//somens:a/text()").extract(), - [u'take this']) + ['take this']) def test_namespaces_adhoc(self): - body = u""" + body = """ <test xmlns:somens="http://scrapy.org"> <somens:a id="foo">take this</a> <a id="bar">found</a> @@ -384,10 +384,10 @@ def test_namespaces_adhoc(self): self.assertEqual(x.xpath("//somens:a/text()", namespaces={"somens": "http://scrapy.org"}).extract(), - [u'take this']) + ['take this']) def test_namespaces_adhoc_variables(self): - body = u""" + body = """ <test xmlns:somens="http://scrapy.org"> <somens:a id="foo">take this</a> <a id="bar">found</a> @@ -399,10 +399,10 @@ def test_namespaces_adhoc_variables(self): self.assertEqual(x.xpath("//somens:a/following-sibling::a[@id=$identifier]/text()", namespaces={"somens": "http://scrapy.org"}, identifier="bar").extract(), - [u'found']) + ['found']) def test_namespaces_multiple(self): - body = u"""<?xml version="1.0" encoding="UTF-8"?> + body = """<?xml version="1.0" encoding="UTF-8"?> <BrowseNode xmlns="http://webservices.amazon.com/AWSECommerceService/2005-10-05" xmlns:b="http://somens.com" xmlns:p="http://www.scrapy.org/product" > @@ -423,7 +423,7 @@ def test_namespaces_multiple(self): self.assertEqual(x.xpath("//p:SecondTestTag/xmlns:material/text()").extract()[0], 'iron') def test_namespaces_multiple_adhoc(self): - body = u"""<?xml version="1.0" encoding="UTF-8"?> + body = """<?xml version="1.0" encoding="UTF-8"?> <BrowseNode xmlns="http://webservices.amazon.com/AWSECommerceService/2005-10-05" xmlns:b="http://somens.com" xmlns:p="http://www.scrapy.org/product" > @@ -495,13 +495,13 @@ def test_namespaces_multiple_adhoc(self): self.assertEqual(x.xpath("//p:SecondTestTag/xmlns:material/text()").extract()[0], 'iron') def test_make_links_absolute(self): - text = u'<a href="file.html">link to file</a>' + text = '<a href="file.html">link to file</a>' sel = Selector(text=text, base_url='http://example.com') sel.root.make_links_absolute() - self.assertEqual(u'http://example.com/file.html', sel.xpath('//a/@href').extract_first()) + self.assertEqual('http://example.com/file.html', sel.xpath('//a/@href').extract_first()) def test_re(self): - body = u"""<div>Name: Mary + body = """<div>Name: Mary <ul> <li>Name: John</li> <li>Age: 10</li> @@ -519,23 +519,23 @@ def test_re(self): ["10", "20"]) # Test named group, hit and miss - x = self.sscls(text=u'foobar') + x = self.sscls(text='foobar') self.assertEqual(x.re('(?P<extract>foo)'), ['foo']) self.assertEqual(x.re('(?P<extract>baz)'), []) # A purposely constructed test for an edge case - x = self.sscls(text=u'baz') + x = self.sscls(text='baz') self.assertEqual(x.re('(?P<extract>foo)|(?P<bar>baz)'), []) def test_re_replace_entities(self): - body = u"""<script>{"foo":"bar & "baz""}</script>""" + body = """<script>{"foo":"bar & "baz""}</script>""" x = self.sscls(text=body) name_re = re.compile('{"foo":(.*)}') # by default, only & and < are preserved ; # other entities are converted - expected = u'"bar & "baz""' + expected = '"bar & "baz""' self.assertEqual(x.xpath("//script/text()").re(name_re), [expected]) self.assertEqual(x.xpath("//script").re(name_re), [expected]) self.assertEqual(x.xpath("//script/text()")[0].re(name_re), [expected]) @@ -546,7 +546,7 @@ def test_re_replace_entities(self): self.assertEqual(x.xpath("//script")[0].re_first(name_re), expected) # switching off replace_entities will preserve " also - expected = u'"bar & "baz""' + expected = '"bar & "baz""' self.assertEqual(x.xpath("//script/text()").re(name_re, replace_entities=False), [expected]) self.assertEqual(x.xpath("//script")[0].re(name_re, replace_entities=False), [expected]) @@ -554,95 +554,94 @@ def test_re_replace_entities(self): self.assertEqual(x.xpath("//script")[0].re_first(name_re, replace_entities=False), expected) def test_re_intl(self): - body = u'<div>Evento: cumplea\xf1os</div>' + body = '<div>Evento: cumplea\xf1os</div>' x = self.sscls(text=body) - self.assertEqual(x.xpath("//div").re(r"Evento: (\w+)"), [u'cumplea\xf1os']) + self.assertEqual(x.xpath("//div").re(r"Evento: (\w+)"), ['cumplea\xf1os']) def test_selector_over_text(self): - hs = self.sscls(text=u'<root>lala</root>') - self.assertEqual(hs.extract(), u'<html><body><root>lala</root></body></html>') - xs = self.sscls(text=u'<root>lala</root>', type='xml') - self.assertEqual(xs.extract(), u'<root>lala</root>') - self.assertEqual(xs.xpath('.').extract(), [u'<root>lala</root>']) + hs = self.sscls(text='<root>lala</root>') + self.assertEqual(hs.extract(), '<html><body><root>lala</root></body></html>') + xs = self.sscls(text='<root>lala</root>', type='xml') + self.assertEqual(xs.extract(), '<root>lala</root>') + self.assertEqual(xs.xpath('.').extract(), ['<root>lala</root>']) def test_invalid_xpath(self): "Test invalid xpath raises ValueError with the invalid xpath" - x = self.sscls(text=u"<html></html>") + x = self.sscls(text="<html></html>") xpath = "//test[@foo='bar]" - self.assertRaisesRegexp(ValueError, re.escape(xpath), x.xpath, xpath) + self.assertRaisesRegex(ValueError, re.escape(xpath), x.xpath, xpath) def test_invalid_xpath_unicode(self): "Test *Unicode* invalid xpath raises ValueError with the invalid xpath" - x = self.sscls(text=u"<html></html>") - xpath = u"//test[@foo='\u0431ar]" - encoded = xpath if six.PY3 else xpath.encode('unicode_escape') - self.assertRaisesRegexp(ValueError, re.escape(encoded), x.xpath, xpath) + x = self.sscls(text="<html></html>") + xpath = "//test[@foo='\\u0431ar]" + self.assertRaisesRegex(ValueError, re.escape(xpath), x.xpath, xpath) def test_http_header_encoding_precedence(self): - # u'\xa3' = pound symbol in unicode - # u'\xc2\xa3' = pound symbol in utf-8 - # u'\xa3' = pound symbol in latin-1 (iso-8859-1) + # '\xa3' = pound symbol in unicode + # '\xc2\xa3' = pound symbol in utf-8 + # '\xa3' = pound symbol in latin-1 (iso-8859-1) - text = u'''<html> + text = '''<html> <head><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"></head> <body><span id="blank">\xa3</span></body></html>''' x = self.sscls(text=text) self.assertEqual(x.xpath("//span[@id='blank']/text()").extract(), - [u'\xa3']) + ['\xa3']) def test_empty_bodies_shouldnt_raise_errors(self): - self.sscls(text=u'').xpath('//text()').extract() + self.sscls(text='').xpath('//text()').extract() def test_bodies_with_comments_only(self): - sel = self.sscls(text=u'<!-- hello world -->', base_url='http://example.com') - self.assertEqual(u'http://example.com', sel.root.base) + sel = self.sscls(text='<!-- hello world -->', base_url='http://example.com') + self.assertEqual('http://example.com', sel.root.base) def test_null_bytes_shouldnt_raise_errors(self): - text = u'<root>pre\x00post</root>' + text = '<root>pre\x00post</root>' self.sscls(text).xpath('//text()').extract() def test_replacement_char_from_badly_encoded_body(self): # \xe9 alone isn't valid utf8 sequence - text = u'<html><p>an Jos\ufffd de</p><html>' - self.assertEqual([u'an Jos\ufffd de'], + text = '<html><p>an Jos\\ufffd de</p><html>' + self.assertEqual(['an Jos\\ufffd de'], self.sscls(text).xpath('//text()').extract()) def test_select_on_unevaluable_nodes(self): - r = self.sscls(text=u'<span class="big">some text</span>') + r = self.sscls(text='<span class="big">some text</span>') # Text node x1 = r.xpath('//text()') - self.assertEqual(x1.extract(), [u'some text']) + self.assertEqual(x1.extract(), ['some text']) self.assertEqual(x1.xpath('.//b').extract(), []) # Tag attribute x1 = r.xpath('//span/@class') - self.assertEqual(x1.extract(), [u'big']) + self.assertEqual(x1.extract(), ['big']) self.assertEqual(x1.xpath('.//text()').extract(), []) def test_select_on_text_nodes(self): - r = self.sscls(text=u'<div><b>Options:</b>opt1</div><div><b>Other</b>opt2</div>') + r = self.sscls(text='<div><b>Options:</b>opt1</div><div><b>Other</b>opt2</div>') x1 = r.xpath("//div/descendant::text()[preceding-sibling::b[contains(text(), 'Options')]]") - self.assertEqual(x1.extract(), [u'opt1']) + self.assertEqual(x1.extract(), ['opt1']) x1 = r.xpath("//div/descendant::text()/preceding-sibling::b[contains(text(), 'Options')]") - self.assertEqual(x1.extract(), [u'<b>Options:</b>']) + self.assertEqual(x1.extract(), ['<b>Options:</b>']) @unittest.skip("Text nodes lost parent node reference in lxml") def test_nested_select_on_text_nodes(self): # FIXME: does not work with lxml backend [upstream] - r = self.sscls(text=u'<div><b>Options:</b>opt1</div><div><b>Other</b>opt2</div>') + r = self.sscls(text='<div><b>Options:</b>opt1</div><div><b>Other</b>opt2</div>') x1 = r.xpath("//div/descendant::text()") x2 = x1.xpath("./preceding-sibling::b[contains(text(), 'Options')]") - self.assertEqual(x2.extract(), [u'<b>Options:</b>']) + self.assertEqual(x2.extract(), ['<b>Options:</b>']) def test_weakref_slots(self): """Check that classes are using slots and are weak-referenceable""" - x = self.sscls(text=u'') + x = self.sscls(text='') weakref.ref(x) assert not hasattr(x, '__dict__'), "%s does not use __slots__" % \ x.__class__.__name__ def test_remove_namespaces(self): - xml = u"""<?xml version="1.0" encoding="UTF-8"?> + xml = """<?xml version="1.0" encoding="UTF-8"?> <feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-US" xmlns:media="http://search.yahoo.com/mrss/"> <link type="text/html"/> <entry> @@ -659,7 +658,7 @@ def test_remove_namespaces(self): self.assertEqual(len(sel.xpath("./namespace::*")), 1) def test_remove_namespaces_embedded(self): - xml = u""" + xml = """ <feed xmlns="http://www.w3.org/2005/Atom"> <link type="text/html"/> <entry> @@ -686,7 +685,7 @@ def test_remove_namespaces_embedded(self): self.assertEqual(len(sel.xpath("./namespace::*")), 1) def test_remove_attributes_namespaces(self): - xml = u"""<?xml version="1.0" encoding="UTF-8"?> + xml = """<?xml version="1.0" encoding="UTF-8"?> <feed xmlns:atom="http://www.w3.org/2005/Atom" xml:lang="en-US" xmlns:media="http://search.yahoo.com/mrss/"> <link atom:type="text/html"/> <entry> @@ -706,7 +705,7 @@ def test_smart_strings(self): class SmartStringsSelector(Selector): _lxml_smart_strings = True - body = u"""<body> + body = """<body> <div class='one'> <ul> <li>one</li><li>two</li> @@ -723,18 +722,18 @@ class SmartStringsSelector(Selector): # only when smart_strings are on x = self.sscls(text=body) li_text = x.xpath('//li/text()') - self.assertFalse(any(map(lambda e: hasattr(e.root, 'getparent'), li_text))) + self.assertFalse(any([hasattr(e.root, 'getparent') for e in li_text])) div_class = x.xpath('//div/@class') - self.assertFalse(any(map(lambda e: hasattr(e.root, 'getparent'), div_class))) + self.assertFalse(any([hasattr(e.root, 'getparent') for e in div_class])) x = SmartStringsSelector(text=body) li_text = x.xpath('//li/text()') - self.assertTrue(all(map(lambda e: hasattr(e.root, 'getparent'), li_text))) + self.assertTrue(all([hasattr(e.root, 'getparent') for e in li_text])) div_class = x.xpath('//div/@class') - self.assertTrue(all(map(lambda e: hasattr(e.root, 'getparent'), div_class))) + self.assertTrue(all([hasattr(e.root, 'getparent') for e in div_class])) def test_xml_entity_expansion(self): - malicious_xml = u'<?xml version="1.0" encoding="ISO-8859-1"?>'\ + malicious_xml = '<?xml version="1.0" encoding="ISO-8859-1"?>'\ '<!DOCTYPE foo [ <!ELEMENT foo ANY > <!ENTITY xxe SYSTEM '\ '"file:///etc/passwd" >]><foo>&xxe;</foo>' @@ -743,8 +742,8 @@ def test_xml_entity_expansion(self): self.assertEqual(sel.extract(), '<foo>&xxe;</foo>') def test_configure_base_url(self): - sel = self.sscls(text=u'nothing', base_url='http://example.com') - self.assertEqual(u'http://example.com', sel.root.base) + sel = self.sscls(text='nothing', base_url='http://example.com') + self.assertEqual('http://example.com', sel.root.base) def test_extending_selector(self): class MySelectorList(Selector.selectorlist_cls): @@ -753,33 +752,33 @@ class MySelectorList(Selector.selectorlist_cls): class MySelector(Selector): selectorlist_cls = MySelectorList - sel = MySelector(text=u'<html><div>foo</div></html>') + sel = MySelector(text='<html><div>foo</div></html>') self.assertIsInstance(sel.xpath('//div'), MySelectorList) self.assertIsInstance(sel.xpath('//div')[0], MySelector) self.assertIsInstance(sel.css('div'), MySelectorList) self.assertIsInstance(sel.css('div')[0], MySelector) def test_replacement_null_char_from_body(self): - text = u'<html>\x00<body><p>Grainy</p></body></html>' - self.assertEqual(u'<html><body><p>Grainy</p></body></html>', + text = '<html>\x00<body><p>Grainy</p></body></html>' + self.assertEqual('<html><body><p>Grainy</p></body></html>', self.sscls(text).extract()) def test_remove_selector_list(self): - sel = self.sscls(text=u'<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>') + sel = self.sscls(text='<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>') sel_list = sel.css('li') sel_list.remove() self.assertIsInstance(sel.css('li'), self.sscls.selectorlist_cls) self.assertEqual(sel.css('li'), []) def test_remove_selector(self): - sel = self.sscls(text=u'<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>') + sel = self.sscls(text='<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>') sel_list = sel.css('li') sel_list[0].remove() self.assertIsInstance(sel.css('li'), self.sscls.selectorlist_cls) self.assertEqual(sel.css('li::text').getall(), ['2', '3']) def test_remove_pseudo_element_selector_list(self): - sel = self.sscls(text=u'<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>') + sel = self.sscls(text='<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>') sel_list = sel.css('li::text') self.assertEqual(sel_list.getall(), ['1', '2', '3']) with self.assertRaises(CannotRemoveElementWithoutRoot): @@ -789,7 +788,7 @@ def test_remove_pseudo_element_selector_list(self): self.assertEqual(sel.css('li::text').getall(), ['1', '2', '3']) def test_remove_pseudo_element_selector(self): - sel = self.sscls(text=u'<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>') + sel = self.sscls(text='<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>') sel_list = sel.css('li::text') self.assertEqual(sel_list.getall(), ['1', '2', '3']) with self.assertRaises(CannotRemoveElementWithoutRoot): @@ -799,7 +798,7 @@ def test_remove_pseudo_element_selector(self): self.assertEqual(sel.css('li::text').getall(), ['1', '2', '3']) def test_remove_root_element_selector(self): - sel = self.sscls(text=u'<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>') + sel = self.sscls(text='<html><body><ul><li>1</li><li>2</li><li>3</li></ul></body></html>') sel_list = sel.css('li::text') self.assertEqual(sel_list.getall(), ['1', '2', '3']) with self.assertRaises(CannotRemoveElementWithoutParent): @@ -821,7 +820,7 @@ class ExsltTestCase(unittest.TestCase): def test_regexp(self): """EXSLT regular expression tests""" - body = u""" + body = """ <p><input name='a' value='1'/><input name='b' value='2'/></p> <div class="links"> <a href="/first.html">first link</a> @@ -840,43 +839,43 @@ def test_regexp(self): [x.extract() for x in sel.xpath( r'//a[re:test(@href, "\.html$")]/text()')], - [u'first link', u'second link']) + ['first link', 'second link']) self.assertEqual( [x.extract() for x in sel.xpath( '//a[re:test(@href, "first")]/text()')], - [u'first link']) + ['first link']) self.assertEqual( [x.extract() for x in sel.xpath( '//a[re:test(@href, "second")]/text()')], - [u'second link']) + ['second link']) # re:match() is rather special: it returns a node-set of <match> nodes - # [u'<match>http://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.xml</match>', - # u'<match>http</match>', - # u'<match>www.bayes.co.uk</match>', - # u'<match></match>', - # u'<match>/xml/index.xml?/xml/utils/rechecker.xml</match>'] + # ['<match>http://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.xml</match>', + # '<match>http</match>', + # '<match>www.bayes.co.uk</match>', + # '<match></match>', + # '<match>/xml/index.xml?/xml/utils/rechecker.xml</match>'] self.assertEqual( sel.xpath(r're:match(//a[re:test(@href, "\.xml$")]/@href,' r'"(\w+):\/\/([^/:]+)(:\d*)?([^# ]*)")/text()').extract(), - [u'http://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.xml', - u'http', - u'www.bayes.co.uk', - u'', - u'/xml/index.xml?/xml/utils/rechecker.xml']) + ['http://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.xml', + 'http', + 'www.bayes.co.uk', + '', + '/xml/index.xml?/xml/utils/rechecker.xml']) # re:replace() self.assertEqual( sel.xpath(r're:replace(//a[re:test(@href, "\.xml$")]/@href,' r'"(\w+)://(.+)(\.xml)", "","https://\2.html")').extract(), - [u'https://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.html']) + ['https://www.bayes.co.uk/xml/index.xml?/xml/utils/rechecker.html']) def test_set(self): """EXSLT set manipulation tests""" # microdata example from http://schema.org/Event - body = u""" + body = """ <div itemscope itemtype="http://schema.org/Event"> <a itemprop="url" href="nba-miami-philidelphia-game3.html"> NBA Eastern Conference First Round Playoff Tickets: @@ -908,17 +907,17 @@ def test_set(self): self.assertEqual( sel.xpath('''//div[@itemtype="http://schema.org/Event"] //@itemprop''').extract(), - [u'url', - u'name', - u'startDate', - u'location', - u'url', - u'address', - u'addressLocality', - u'addressRegion', - u'offers', - u'lowPrice', - u'offerCount'] + ['url', + 'name', + 'startDate', + 'location', + 'url', + 'address', + 'addressLocality', + 'addressRegion', + 'offers', + 'lowPrice', + 'offerCount'] ) self.assertEqual(sel.xpath(''' @@ -926,4 +925,4 @@ def test_set(self): //@itemprop, //div[@itemtype="http://schema.org/Event"] //*[@itemscope]/*/@itemprop)''').extract(), - [u'url', u'name', u'startDate', u'location', u'offers']) + ['url', 'name', 'startDate', 'location', 'offers']) diff --git a/tests/test_selector_csstranslator.py b/tests/test_selector_csstranslator.py index 83ed066..ae9ffc0 100644 --- a/tests/test_selector_csstranslator.py +++ b/tests/test_selector_csstranslator.py @@ -8,7 +8,7 @@ from cssselect.xpath import ExpressionError -HTMLBODY = u''' +HTMLBODY = ''' <html> <body> <div> @@ -52,10 +52,10 @@ def setUp(self): def test_attr_function(self): cases = [ - ('::attr(name)', u'descendant-or-self::*/@name'), - ('a::attr(href)', u'descendant-or-self::a/@href'), - ('a ::attr(img)', u'descendant-or-self::a/descendant-or-self::*/@img'), - ('a > ::attr(class)', u'descendant-or-self::a/*/@class'), + ('::attr(name)', 'descendant-or-self::*/@name'), + ('a::attr(href)', 'descendant-or-self::a/@href'), + ('a ::attr(img)', 'descendant-or-self::a/descendant-or-self::*/@img'), + ('a > ::attr(class)', 'descendant-or-self::a/*/@class'), ] for css, xpath in cases: self.assertEqual(self.c2x(css), xpath, css) @@ -71,17 +71,17 @@ def test_attr_function_exception(self): def test_text_pseudo_element(self): cases = [ - ('::text', u'descendant-or-self::text()'), - ('p::text', u'descendant-or-self::p/text()'), - ('p ::text', u'descendant-or-self::p/descendant-or-self::text()'), - ('#id::text', u"descendant-or-self::*[@id = 'id']/text()"), - ('p#id::text', u"descendant-or-self::p[@id = 'id']/text()"), - ('p#id ::text', u"descendant-or-self::p[@id = 'id']/descendant-or-self::text()"), - ('p#id > ::text', u"descendant-or-self::p[@id = 'id']/*/text()"), - ('p#id ~ ::text', u"descendant-or-self::p[@id = 'id']/following-sibling::*/text()"), - ('a[href]::text', u'descendant-or-self::a[@href]/text()'), - ('a[href] ::text', u'descendant-or-self::a[@href]/descendant-or-self::text()'), - ('p::text, a::text', u"descendant-or-self::p/text() | descendant-or-self::a/text()"), + ('::text', 'descendant-or-self::text()'), + ('p::text', 'descendant-or-self::p/text()'), + ('p ::text', 'descendant-or-self::p/descendant-or-self::text()'), + ('#id::text', "descendant-or-self::*[@id = 'id']/text()"), + ('p#id::text', "descendant-or-self::p[@id = 'id']/text()"), + ('p#id ::text', "descendant-or-self::p[@id = 'id']/descendant-or-self::text()"), + ('p#id > ::text', "descendant-or-self::p[@id = 'id']/*/text()"), + ('p#id ~ ::text', "descendant-or-self::p[@id = 'id']/following-sibling::*/text()"), + ('a[href]::text', 'descendant-or-self::a[@href]/text()'), + ('a[href] ::text', 'descendant-or-self::a[@href]/descendant-or-self::text()'), + ('p::text, a::text', "descendant-or-self::p/text() | descendant-or-self::a/text()"), ] for css, xpath in cases: self.assertEqual(self.c2x(css), xpath, css) @@ -122,7 +122,7 @@ class GenericTranslatorTest(TranslatorTestMixin, unittest.TestCase): class UtilCss2XPathTest(unittest.TestCase): def test_css2xpath(self): from parsel import css2xpath - expected_xpath = (u"descendant-or-self::*[@class and contains(" + expected_xpath = ("descendant-or-self::*[@class and contains(" "concat(' ', normalize-space(@class), ' '), ' some-class ')]") self.assertEqual(css2xpath('.some-class'), expected_xpath) @@ -144,22 +144,22 @@ def test_selector_simple(self): [x.extract() for x in self.sel.css('input')]) def test_text_pseudo_element(self): - self.assertEqual(self.x('#p-b2'), [u'<b id="p-b2">guy</b>']) - self.assertEqual(self.x('#p-b2::text'), [u'guy']) - self.assertEqual(self.x('#p-b2 ::text'), [u'guy']) - self.assertEqual(self.x('#paragraph::text'), [u'lorem ipsum text']) - self.assertEqual(self.x('#paragraph ::text'), [u'lorem ipsum text', u'hi', u'there', u'guy']) - self.assertEqual(self.x('p::text'), [u'lorem ipsum text']) - self.assertEqual(self.x('p ::text'), [u'lorem ipsum text', u'hi', u'there', u'guy']) + self.assertEqual(self.x('#p-b2'), ['<b id="p-b2">guy</b>']) + self.assertEqual(self.x('#p-b2::text'), ['guy']) + self.assertEqual(self.x('#p-b2 ::text'), ['guy']) + self.assertEqual(self.x('#paragraph::text'), ['lorem ipsum text']) + self.assertEqual(self.x('#paragraph ::text'), ['lorem ipsum text', 'hi', 'there', 'guy']) + self.assertEqual(self.x('p::text'), ['lorem ipsum text']) + self.assertEqual(self.x('p ::text'), ['lorem ipsum text', 'hi', 'there', 'guy']) def test_attribute_function(self): - self.assertEqual(self.x('#p-b2::attr(id)'), [u'p-b2']) - self.assertEqual(self.x('.cool-footer::attr(class)'), [u'cool-footer']) - self.assertEqual(self.x('.cool-footer ::attr(id)'), [u'foobar-div', u'foobar-span']) - self.assertEqual(self.x('map[name="dummymap"] ::attr(shape)'), [u'circle', u'default']) + self.assertEqual(self.x('#p-b2::attr(id)'), ['p-b2']) + self.assertEqual(self.x('.cool-footer::attr(class)'), ['cool-footer']) + self.assertEqual(self.x('.cool-footer ::attr(id)'), ['foobar-div', 'foobar-span']) + self.assertEqual(self.x('map[name="dummymap"] ::attr(shape)'), ['circle', 'default']) def test_nested_selector(self): self.assertEqual(self.sel.css('p').css('b::text').extract(), - [u'hi', u'guy']) + ['hi', 'guy']) self.assertEqual(self.sel.css('div').css('area:last-child').extract(), - [u'<area shape="default" id="area-nohref">']) + ['<area shape="default" id="area-nohref">']) diff --git a/tests/test_utils.py b/tests/test_utils.py index 47d44f3..9eede53 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,29 +1,28 @@ from parsel.utils import shorten, extract_regex from pytest import mark, raises -import six @mark.parametrize( 'width,expected', ( (-1, ValueError), - (0, u''), - (1, u'.'), - (2, u'..'), - (3, u'...'), - (4, u'f...'), - (5, u'fo...'), - (6, u'foobar'), - (7, u'foobar'), + (0, ''), + (1, '.'), + (2, '..'), + (3, '...'), + (4, 'f...'), + (5, 'fo...'), + (6, 'foobar'), + (7, 'foobar'), ) ) def test_shorten(width, expected): - if isinstance(expected, six.string_types): - assert shorten(u'foobar', width) == expected + if isinstance(expected, str): + assert shorten('foobar', width) == expected else: with raises(expected): - shorten(u'foobar', width) + shorten('foobar', width) @mark.parametrize('regex, text, replace_entities, expected', ( diff --git a/tests/test_xpathfuncs.py b/tests/test_xpathfuncs.py index cfa2579..8bcabd0 100644 --- a/tests/test_xpathfuncs.py +++ b/tests/test_xpathfuncs.py @@ -1,5 +1,3 @@ -# coding: utf-8 - from parsel import Selector from parsel.xpathfuncs import set_xpathfunc import unittest @@ -7,7 +5,7 @@ class XPathFuncsTestCase(unittest.TestCase): def test_has_class_simple(self): - body = u""" + body = """ <p class="foo bar-baz">First</p> <p class="foo">Second</p> <p class="bar">Third</p> @@ -16,80 +14,80 @@ def test_has_class_simple(self): sel = Selector(text=body) self.assertEqual( [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')], - [u'First', u'Second']) + ['First', 'Second']) self.assertEqual( [x.extract() for x in sel.xpath('//p[has-class("bar")]/text()')], - [u'Third']) + ['Third']) self.assertEqual( [x.extract() for x in sel.xpath('//p[has-class("foo","bar")]/text()')], []) self.assertEqual( [x.extract() for x in sel.xpath('//p[has-class("foo","bar-baz")]/text()')], - [u'First']) + ['First']) def test_has_class_error_no_args(self): - body = u""" + body = """ <p CLASS="foo">First</p> """ sel = Selector(text=body) - self.assertRaisesRegexp( + self.assertRaisesRegex( ValueError, 'has-class must have at least 1 argument', sel.xpath, 'has-class()') def test_has_class_error_invalid_arg_type(self): - body = u""" + body = """ <p CLASS="foo">First</p> """ sel = Selector(text=body) - self.assertRaisesRegexp( + self.assertRaisesRegex( ValueError, 'has-class arguments must be strings', sel.xpath, 'has-class(.)') def test_has_class_error_invalid_unicode(self): - body = u""" + body = """ <p CLASS="foo">First</p> """ sel = Selector(text=body) - self.assertRaisesRegexp( + self.assertRaisesRegex( ValueError, 'All strings must be XML compatible', - sel.xpath, u'has-class("héllö")'.encode('utf-8')) + sel.xpath, 'has-class("héllö")'.encode('utf-8')) def test_has_class_unicode(self): - body = u""" + body = """ <p CLASS="fóó">First</p> """ sel = Selector(text=body) self.assertEqual( - [x.extract() for x in sel.xpath(u'//p[has-class("fóó")]/text()')], - [u'First']) + [x.extract() for x in sel.xpath('//p[has-class("fóó")]/text()')], + ['First']) def test_has_class_uppercase(self): - body = u""" + body = """ <p CLASS="foo">First</p> """ sel = Selector(text=body) self.assertEqual( [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')], - [u'First']) + ['First']) def test_has_class_newline(self): - body = u""" + body = """ <p CLASS="foo bar">First</p> """ sel = Selector(text=body) self.assertEqual( - [x.extract() for x in sel.xpath(u'//p[has-class("foo")]/text()')], - [u'First']) + [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')], + ['First']) def test_has_class_tab(self): - body = u""" + body = """ <p CLASS="foo\tbar">First</p> """ sel = Selector(text=body) self.assertEqual( - [x.extract() for x in sel.xpath(u'//p[has-class("foo")]/text()')], - [u'First']) + [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')], + ['First']) def test_set_xpathfunc(self): @@ -98,11 +96,11 @@ def myfunc(ctx): myfunc.call_count = 0 - body = u""" + body = """ <p CLASS="foo">First</p> """ sel = Selector(text=body) - self.assertRaisesRegexp( + self.assertRaisesRegex( ValueError, 'Unregistered function in myfunc', sel.xpath, 'myfunc()') @@ -111,6 +109,6 @@ def myfunc(ctx): self.assertEqual(myfunc.call_count, 1) set_xpathfunc('myfunc', None) - self.assertRaisesRegexp( + self.assertRaisesRegex( ValueError, 'Unregistered function in myfunc', sel.xpath, 'myfunc()')
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor