# -*- encoding: utf-8 -*-
import py
import sys
try:
    from hypothesis import given, strategies, settings, example
    HAS_HYPOTHESIS = True
except ImportError:
    HAS_HYPOTHESIS = False
    
from rpython.rlib import rutf8
from pypy.interpreter.error import OperationError


class TestUnicodeObject:
    def test_comparison_warning(self):
        warnings = []
        def my_warn(msg, warningscls):
            warnings.append(msg)
            prev_warn(msg, warningscls)
        space = self.space
        prev_warn = space.warn
        try:
            space.warn = my_warn
            space.appexec([], """():
                chr(128) == unichr(128)
                chr(128) != unichr(128)
                chr(127) == unichr(127) # no warnings
            """)
        finally:
            space.warn = prev_warn
        assert len(warnings) == 2

    def test_listview_ascii(self):
        w_str = self.space.newutf8('abcd', 4)
        assert self.space.listview_ascii(w_str) == list("abcd")

    def test_new_shortcut(self):
        space = self.space
        w_uni = self.space.newutf8('abcd', 4)
        w_new = space.call_method(
                space.w_unicode, "__new__", space.w_unicode, w_uni)
        assert w_new is w_uni

    def test_fast_iter(self):
        space = self.space
        w_uni = space.newutf8(u"aä".encode("utf-8"), 2)
        old_index_storage = w_uni._index_storage
        w_iter = space.iter(w_uni)
        w_char1 = w_iter.descr_next(space)
        w_char2 = w_iter.descr_next(space)
        py.test.raises(OperationError, w_iter.descr_next, space)
        assert w_uni._index_storage is old_index_storage
        assert space.eq_w(w_char1, w_uni._getitem_result(space, 0))
        assert space.eq_w(w_char2, w_uni._getitem_result(space, 1))


    if HAS_HYPOTHESIS:
        @given(strategies.text(), strategies.integers(min_value=0, max_value=10),
                                  strategies.integers(min_value=-1, max_value=10))
        def test_hypo_index_find(self, u, start, len1):
            if start + len1 < 0:
                return   # skip this case
            v = u[start : start + len1]
            space = self.space
            w_u = space.newutf8(u.encode('utf8'), len(u))
            w_v = space.newutf8(v.encode('utf8'), len(v))
            expected = u.find(v, start, start + len1)
            try:
                w_index = space.call_method(w_u, 'index', w_v,
                                            space.newint(start),
                                            space.newint(start + len1))
            except OperationError as e:
                if not e.match(space, space.w_ValueError):
                    raise
                assert expected == -1
            else:
                assert space.int_w(w_index) == expected >= 0

            w_index = space.call_method(w_u, 'find', w_v,
                                        space.newint(start),
                                        space.newint(start + len1))
            assert space.int_w(w_index) == expected

            rexpected = u.rfind(v, start, start + len1)
            try:
                w_index = space.call_method(w_u, 'rindex', w_v,
                                            space.newint(start),
                                            space.newint(start + len1))
            except OperationError as e:
                if not e.match(space, space.w_ValueError):
                    raise
                assert rexpected == -1
            else:
                assert space.int_w(w_index) == rexpected >= 0

            w_index = space.call_method(w_u, 'rfind', w_v,
                                        space.newint(start),
                                        space.newint(start + len1))
            assert space.int_w(w_index) == rexpected

            expected = u.startswith(v, start)
            w_res = space.call_method(w_u, 'startswith', w_v,
                                      space.newint(start))
            assert w_res is space.newbool(expected)

            expected = u.startswith(v, start, start + len1)
            w_res = space.call_method(w_u, 'startswith', w_v,
                                      space.newint(start),
                                      space.newint(start + len1))
            assert w_res is space.newbool(expected)

            expected = u.endswith(v, start)
            w_res = space.call_method(w_u, 'endswith', w_v,
                                      space.newint(start))
            assert w_res is space.newbool(expected)

            expected = u.endswith(v, start, start + len1)
            w_res = space.call_method(w_u, 'endswith', w_v,
                                      space.newint(start),
                                      space.newint(start + len1))
            assert w_res is space.newbool(expected)


        @given(u=strategies.text(),
               start=strategies.integers(min_value=0, max_value=10),
               len1=strategies.integers(min_value=-1, max_value=10))
        def test_hypo_index_find(self, u, start, len1):
            space = self.space
            if start + len1 < 0:
                return   # skip this case
            v = u[start : start + len1]
            w_u = space.wrap(u)
            w_v = space.wrap(v)
            expected = u.find(v, start, start + len1)
            try:
                w_index = space.call_method(w_u, 'index', w_v,
                                            space.newint(start),
                                            space.newint(start + len1))
            except OperationError as e:
                if not e.match(space, space.w_ValueError):
                    raise
                assert expected == -1
            else:
                assert space.int_w(w_index) == expected >= 0

            w_index = space.call_method(w_u, 'find', w_v,
                                        space.newint(start),
                                        space.newint(start + len1))
            assert space.int_w(w_index) == expected

            rexpected = u.rfind(v, start, start + len1)
            try:
                w_index = space.call_method(w_u, 'rindex', w_v,
                                            space.newint(start),
                                            space.newint(start + len1))
            except OperationError as e:
                if not e.match(space, space.w_ValueError):
                    raise
                assert rexpected == -1
            else:
                assert space.int_w(w_index) == rexpected >= 0

            w_index = space.call_method(w_u, 'rfind', w_v,
                                        space.newint(start),
                                        space.newint(start + len1))
            assert space.int_w(w_index) == rexpected

            expected = u.startswith(v, start)
            w_res = space.call_method(w_u, 'startswith', w_v,
                                      space.newint(start))
            assert w_res is space.newbool(expected)

            expected = u.startswith(v, start, start + len1)
            w_res = space.call_method(w_u, 'startswith', w_v,
                                      space.newint(start),
                                      space.newint(start + len1))
            assert w_res is space.newbool(expected)

            expected = u.endswith(v, start)
            w_res = space.call_method(w_u, 'endswith', w_v,
                                      space.newint(start))
            assert w_res is space.newbool(expected)

            expected = u.endswith(v, start, start + len1)
            w_res = space.call_method(w_u, 'endswith', w_v,
                                      space.newint(start),
                                      space.newint(start + len1))
            assert w_res is space.newbool(expected)

    def test_getitem_constant_index_jit(self):
        # test it directly, to prevent only seeing bugs in jitted code
        space = self.space
        u = u"äöabc"
        w_u = self.space.wrap(u)
        for i in range(-len(u), len(u)):
            assert w_u._getitem_result_constant_index_jit(space, i)._utf8 == u[i].encode("utf-8")
        with py.test.raises(OperationError):
            w_u._getitem_result_constant_index_jit(space, len(u))
        with py.test.raises(OperationError):
            w_u._getitem_result_constant_index_jit(space, -len(u) - 1)

    def test_getslice_constant_index_jit(self):
        space = self.space
        u = u"äöabcéééß"
        w_u = self.space.wrap(u)
        for start in range(0, 4):
            for end in range(start, len(u)):
                assert w_u._unicode_sliced_constant_index_jit(space, start, end)._utf8 == u[start: end].encode("utf-8")

    def test_lower_upper_ascii(self):
        from pypy.module.unicodedata.interp_ucd import unicodedb
        # check that ascii chars tolower/toupper still behave sensibly in the
        # unicodedb - unlikely to ever change, but well
        for ch in range(128):
            unilower, = unicodedb.tolower_full(ch)
            assert chr(unilower) == chr(ch).lower()
            uniupper, = unicodedb.toupper_full(ch)
            assert chr(uniupper) == chr(ch).upper()

class AppTestUnicodeStringStdOnly:
    def test_compares(self):
        assert u'a' == 'a'
        assert 'a' == u'a'
        assert not u'a' == 'b'
        assert not 'a'  == u'b'
        assert u'a' != 'b'
        assert 'a'  != u'b'
        assert not (u'a' == 5)
        assert u'a' != 5
        assert u'a' < 5 or u'a' > 5

        s = chr(128)
        u = unichr(128)
        assert not s == u # UnicodeWarning
        assert s != u
        assert not u == s
        assert u != s


class AppTestUnicodeString:
    spaceconfig = dict(usemodules=('unicodedata',))

    def test_addition(self):
        def check(a, b):
            assert a == b
            assert type(a) == type(b)
        check(u'a' + 'b', u'ab')
        check('a' + u'b', u'ab')

    def test_getitem(self):
        assert u'abc'[2] == 'c'
        raises(IndexError, u'abc'.__getitem__, 15)
        assert u'g\u0105\u015b\u0107'[2] == u'\u015b'

    def test_join(self):
        def check(a, b):
            assert a == b
            assert type(a) == type(b)
        check(', '.join([u'a']), u'a')
        check(', '.join(['a', u'b']), u'a, b')
        check(u', '.join(['a', 'b']), u'a, b')
        try:
            u''.join([u'a', 2, 3])
        except TypeError as e:
            assert 'sequence item 1' in str(e)
        else:
            raise Exception("DID NOT RAISE")

    if sys.version_info >= (2,3):
        def test_contains_ex(self):
            assert u'' in 'abc'
            assert u'bc' in 'abc'
            assert 'bc' in 'abc'
        pass   # workaround for inspect.py bug in some Python 2.4s

    def test_contains(self):
        assert u'a' in 'abc'
        assert 'a' in u'abc'
        raises(UnicodeDecodeError, "u'\xe2' in 'g\xe2teau'")

    def test_splitlines(self):
        assert u''.splitlines() == []
        assert u''.splitlines(1) == []
        assert u'\n'.splitlines() == [u'']
        assert u'a'.splitlines() == [u'a']
        assert u'one\ntwo'.splitlines() == [u'one', u'two']
        assert u'\ntwo\nthree'.splitlines() == [u'', u'two', u'three']
        assert u'\n\n'.splitlines() == [u'', u'']
        assert u'a\nb\nc'.splitlines(1) == [u'a\n', u'b\n', u'c']
        assert u'\na\nb\n'.splitlines(1) == [u'\n', u'a\n', u'b\n']
        assert ((u'a' + '\xc2\x85'.decode('utf8') + u'b\n').splitlines() ==
                ['a', 'b'])

    def test_zfill(self):
        assert u'123'.zfill(2) == u'123'
        assert u'123'.zfill(3) == u'123'
        assert u'123'.zfill(4) == u'0123'
        assert u'123'.zfill(6) == u'000123'
        assert u'+123'.zfill(2) == u'+123'
        assert u'+123'.zfill(3) == u'+123'
        assert u'+123'.zfill(4) == u'+123'
        assert u'+123'.zfill(5) == u'+0123'
        assert u'+123'.zfill(6) == u'+00123'
        assert u'-123'.zfill(3) == u'-123'
        assert u'-123'.zfill(4) == u'-123'
        assert u'-123'.zfill(5) == u'-0123'
        assert u''.zfill(3) == u'000'
        assert u'34'.zfill(1) == u'34'
        assert u'34'.zfill(4) == u'0034'

    def test_split(self):
        assert u"".split() == []
        assert u"".split(u'x') == ['']
        assert u" ".split() == []
        assert u"a".split() == [u'a']
        assert u"a".split(u"a", 1) == [u'', u'']
        assert u" ".split(u" ", 1) == [u'', u'']
        assert u"aa".split(u"a", 2) == [u'', u'', u'']
        assert u" a ".split() == [u'a']
        assert u"a b c".split() == [u'a',u'b',u'c']
        assert u'this is the split function'.split() == [u'this', u'is', u'the', u'split', u'function']
        assert u'a|b|c|d'.split(u'|') == [u'a', u'b', u'c', u'd']
        assert 'a|b|c|d'.split(u'|') == [u'a', u'b', u'c', u'd']
        assert u'a|b|c|d'.split('|') == [u'a', u'b', u'c', u'd']
        assert u'a|b|c|d'.split(u'|', 2) == [u'a', u'b', u'c|d']
        assert u'a b c d'.split(None, 1) == [u'a', u'b c d']
        assert u'a b c d'.split(None, 2) == [u'a', u'b', u'c d']
        assert u'a b c d'.split(None, 3) == [u'a', u'b', u'c', u'd']
        assert u'a b c d'.split(None, 4) == [u'a', u'b', u'c', u'd']
        assert u'a b c d'.split(None, 0) == [u'a b c d']
        assert u'a  b  c  d'.split(None, 2) == [u'a', u'b', u'c  d']
        assert u'a b c d '.split() == [u'a', u'b', u'c', u'd']
        assert u'a//b//c//d'.split(u'//') == [u'a', u'b', u'c', u'd']
        assert u'endcase test'.split(u'test') == [u'endcase ', u'']
        raises(ValueError, u'abc'.split, '')
        raises(ValueError, u'abc'.split, u'')
        raises(ValueError, 'abc'.split, u'')
        assert u'   a b c d'.split(None, 0) == [u'a b c d']
        assert u'a\nb\u1680c'.split() == [u'a', u'b', u'c']

    def test_rsplit(self):
        assert u"".rsplit() == []
        assert u" ".rsplit() == []
        assert u"a".rsplit() == [u'a']
        assert u"a".rsplit(u"a", 1) == [u'', u'']
        assert u" ".rsplit(u" ", 1) == [u'', u'']
        assert u"aa".rsplit(u"a", 2) == [u'', u'', u'']
        assert u" a ".rsplit() == [u'a']
        assert u"a b c".rsplit() == [u'a',u'b',u'c']
        assert u'this is the rsplit function'.rsplit() == [u'this', u'is', u'the', u'rsplit', u'function']
        assert u'a|b|c|d'.rsplit(u'|') == [u'a', u'b', u'c', u'd']
        assert u'a|b|c|d'.rsplit('|') == [u'a', u'b', u'c', u'd']
        assert 'a|b|c|d'.rsplit(u'|') == [u'a', u'b', u'c', u'd']
        assert u'a|b|c|d'.rsplit(u'|', 2) == [u'a|b', u'c', u'd']
        assert u'a b c d'.rsplit(None, 1) == [u'a b c', u'd']
        assert u'a b c d'.rsplit(None, 2) == [u'a b', u'c', u'd']
        assert u'a b c d'.rsplit(None, 3) == [u'a', u'b', u'c', u'd']
        assert u'a b c d'.rsplit(None, 4) == [u'a', u'b', u'c', u'd']
        assert u'a b c d'.rsplit(None, 0) == [u'a b c d']
        assert u'a  b  c  d'.rsplit(None, 2) == [u'a  b', u'c', u'd']
        assert u'a b c d '.rsplit() == [u'a', u'b', u'c', u'd']
        assert u'a//b//c//d'.rsplit(u'//') == [u'a', u'b', u'c', u'd']
        assert u'endcase test'.rsplit(u'test') == [u'endcase ', u'']
        raises(ValueError, u'abc'.rsplit, u'')
        raises(ValueError, u'abc'.rsplit, '')
        raises(ValueError, 'abc'.rsplit, u'')
        assert u'  a b c  '.rsplit(None, 0) == [u'  a b c']
        assert u''.rsplit('aaa') == [u'']
        assert u'a\nb\u1680c'.rsplit() == [u'a', u'b', u'c']

    def test_rsplit_bug(self):
        assert u'Vestur- og Mið'.rsplit() == [u'Vestur-', u'og', u'Mið']

    def test_split_rsplit_str_unicode(self):
        x = 'abc'.split(u'b')
        assert x == [u'a', u'c']
        assert map(type, x) == [unicode, unicode]
        x = 'abc'.rsplit(u'b')
        assert x == [u'a', u'c']
        assert map(type, x) == [unicode, unicode]
        x = 'abc'.split(u'\u4321')
        assert x == [u'abc']
        assert map(type, x) == [unicode]
        x = 'abc'.rsplit(u'\u4321')
        assert x == [u'abc']
        assert map(type, x) == [unicode]
        raises(UnicodeDecodeError, '\x80'.split, u'a')
        raises(UnicodeDecodeError, '\x80'.split, u'')
        raises(UnicodeDecodeError, '\x80'.rsplit, u'a')
        raises(UnicodeDecodeError, '\x80'.rsplit, u'')

    def test_center(self):
        s=u"a b"
        assert s.center(0) == u"a b"
        assert s.center(1) == u"a b"
        assert s.center(2) == u"a b"
        assert s.center(3) == u"a b"
        assert s.center(4) == u"a b "
        assert s.center(5) == u" a b "
        assert s.center(6) == u" a b  "
        assert s.center(7) == u"  a b  "
        assert s.center(8) == u"  a b   "
        assert s.center(9) == u"   a b   "
        assert u'abc'.center(10) == u'   abc    '
        assert u'abc'.center(6) == u' abc  '
        assert u'abc'.center(3) == u'abc'
        assert u'abc'.center(2) == u'abc'
        assert u'abc'.center(5, u'*') == u'*abc*'    # Python 2.4
        assert u'abc'.center(5, '*') == u'*abc*'     # Python 2.4
        raises(TypeError, u'abc'.center, 4, u'cba')

    def test_title(self):
        assert u"brown fox".title() == u"Brown Fox"
        assert u"!brown fox".title() == u"!Brown Fox"
        assert u"bROWN fOX".title() == u"Brown Fox"
        assert u"Brown Fox".title() == u"Brown Fox"
        assert u"bro!wn fox".title() == u"Bro!Wn Fox"
        assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox"
        assert u'\ud800'.title() == u'\ud800'
        assert (unichr(0x345) + u'abc').title() == u'\u0399Abc'
        assert (unichr(0x345) + u'ABC').title() == u'\u0399Abc'

    def test_istitle(self):
        assert u"".istitle() == False
        assert u"!".istitle() == False
        assert u"!!".istitle() == False
        assert u"brown fox".istitle() == False
        assert u"!brown fox".istitle() == False
        assert u"bROWN fOX".istitle() == False
        assert u"Brown Fox".istitle() == True
        assert u"bro!wn fox".istitle() == False
        assert u"Bro!wn fox".istitle() == False
        assert u"!brown Fox".istitle() == False
        assert u"!Brown Fox".istitle() == True
        assert u"Brow&&&&N Fox".istitle() == True
        assert u"!Brow&&&&n Fox".istitle() == False
        assert u'\u1FFc'.istitle()
        assert u'Greek \u1FFcitlecases ...'.istitle()

    def test_islower_isupper_with_titlecase(self):
        # \u01c5 is a char which is neither lowercase nor uppercase, but
        # titlecase
        assert not u'\u01c5abc'.islower()
        assert not u'\u01c5ABC'.isupper()

    def test_lower_upper(self):
        assert u'a'.lower() == u'a'
        assert u'A'.lower() == u'a'
        assert u'\u0105'.lower() == u'\u0105'
        assert u'\u0104'.lower() == u'\u0105'
        assert u'\ud800'.lower() == u'\ud800'
        assert u'a'.upper() == u'A'
        assert u'A'.upper() == u'A'
        assert u'\u0105'.upper() == u'\u0104'
        assert u'\u0104'.upper() == u'\u0104'
        assert u'\ud800'.upper() == u'\ud800'

    def test_capitalize(self):
        assert u"brown fox".capitalize() == u"Brown fox"
        assert u' hello '.capitalize() == u' hello '
        assert u'Hello '.capitalize() == u'Hello '
        assert u'hello '.capitalize() == u'Hello '
        assert u'aaaa'.capitalize() == u'Aaaa'
        assert u'AaAa'.capitalize() == u'Aaaa'
        # check that titlecased chars are lowered correctly
        # \u1ffc is the titlecased char
        assert (u'\u1ff3\u1ff3\u1ffc\u1ffc'.capitalize() ==
                u'\u1ffc\u1ff3\u1ff3\u1ff3')
        # check with cased non-letter chars
        assert (u'\u24c5\u24ce\u24c9\u24bd\u24c4\u24c3'.capitalize() ==
                u'\u24c5\u24e8\u24e3\u24d7\u24de\u24dd')
        assert (u'\u24df\u24e8\u24e3\u24d7\u24de\u24dd'.capitalize() ==
                u'\u24c5\u24e8\u24e3\u24d7\u24de\u24dd')
        assert u'\u2160\u2161\u2162'.capitalize() == u'\u2160\u2171\u2172'
        assert u'\u2170\u2171\u2172'.capitalize() == u'\u2160\u2171\u2172'
        # check with Ll chars with no upper - nothing changes here
        assert (u'\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() ==
                u'\u019b\u1d00\u1d86\u0221\u1fb7')
        assert u'\ud800'.capitalize() == u'\ud800'
        assert u'xx\ud800'.capitalize() == u'Xx\ud800'

    def test_rjust(self):
        s = u"abc"
        assert s.rjust(2) == s
        assert s.rjust(3) == s
        assert s.rjust(4) == u" " + s
        assert s.rjust(5) == u"  " + s
        assert u'abc'.rjust(10) == u'       abc'
        assert u'abc'.rjust(6) == u'   abc'
        assert u'abc'.rjust(3) == u'abc'
        assert u'abc'.rjust(2) == u'abc'
        assert u'abc'.rjust(5, u'*') == u'**abc'    # Python 2.4
        assert u'abc'.rjust(5, '*') == u'**abc'     # Python 2.4
        raises(TypeError, u'abc'.rjust, 5, u'xx')

    def test_ljust(self):
        s = u"abc"
        assert s.ljust(2) == s
        assert s.ljust(3) == s
        assert s.ljust(4) == s + u" "
        assert s.ljust(5) == s + u"  "
        assert u'abc'.ljust(10) == u'abc       '
        assert u'abc'.ljust(6) == u'abc   '
        assert u'abc'.ljust(3) == u'abc'
        assert u'abc'.ljust(2) == u'abc'
        assert u'abc'.ljust(5, u'*') == u'abc**'    # Python 2.4
        assert u'abc'.ljust(5, '*') == u'abc**'     # Python 2.4
        raises(TypeError, u'abc'.ljust, 6, u'')

    def test_replace(self):
        assert u'one!two!three!'.replace(u'!', '@', 1) == u'one@two!three!'
        assert u'one!two!three!'.replace('!', u'') == u'onetwothree'
        assert u'one!two!three!'.replace(u'!', u'@', 2) == u'one@two@three!'
        assert u'one!two!three!'.replace('!', '@', 3) == u'one@two@three@'
        assert u'one!two!three!'.replace(u'!', '@', 4) == u'one@two@three@'
        assert u'one!two!three!'.replace('!', u'@', 0) == u'one!two!three!'
        assert u'one!two!three!'.replace(u'!', u'@') == u'one@two@three@'
        assert u'one!two!three!'.replace('x', '@') == u'one!two!three!'
        assert u'one!two!three!'.replace(u'x', '@', 2) == u'one!two!three!'
        assert u'abc'.replace('', u'-') == u'-a-b-c-'
        assert u'\u1234'.replace(u'', '-') == u'-\u1234-'
        assert u'\u0234\u5678'.replace('', u'-') == u'-\u0234-\u5678-'
        assert u'\u0234\u5678'.replace('', u'-', 0) == u'\u0234\u5678'
        assert u'\u0234\u5678'.replace('', u'-', 1) == u'-\u0234\u5678'
        assert u'\u0234\u5678'.replace('', u'-', 2) == u'-\u0234-\u5678'
        assert u'\u0234\u5678'.replace('', u'-', 3) == u'-\u0234-\u5678-'
        assert u'\u0234\u5678'.replace('', u'-', 4) == u'-\u0234-\u5678-'
        assert u'\u0234\u5678'.replace('', u'-', 700) == u'-\u0234-\u5678-'
        assert u'\u0234\u5678'.replace('', u'-', -1) == u'-\u0234-\u5678-'
        assert u'\u0234\u5678'.replace('', u'-', -42) == u'-\u0234-\u5678-'
        assert u'abc'.replace(u'', u'-', 3) == u'-a-b-c'
        assert u'abc'.replace('', '-', 0) == u'abc'
        assert u''.replace(u'', '') == u''
        assert u''.replace('', u'a') == u'a'
        assert u'abc'.replace(u'ab', u'--', 0) == u'abc'
        assert u'abc'.replace('xy', '--') == u'abc'
        assert u'123'.replace(u'123', '') == u''
        assert u'123123'.replace('123', u'') == u''
        assert u'123x123'.replace(u'123', u'') == u'x'

    def test_replace_buffer(self):
        assert u'one!two!three'.replace(buffer('!'), buffer('@')) == u'one@two@three'

    def test_replace_overflow(self):
        import sys
        if sys.maxint > 2**31-1:
            skip("Wrong platform")
        s = u"a" * (2**16)
        raises(OverflowError, s.replace, u"", s)

    def test_strip(self):
        s = u" a b "
        assert s.strip() == u"a b"
        assert s.rstrip() == u" a b"
        assert s.lstrip() == u"a b "
        assert u'xyzzyhelloxyzzy'.strip(u'xyz') == u'hello'
        assert u'xyzzyhelloxyzzy'.lstrip('xyz') == u'helloxyzzy'
        assert u'xyzzyhelloxyzzy'.rstrip(u'xyz') == u'xyzzyhello'
        exc = raises(TypeError, s.strip, buffer(' '))
        assert str(exc.value) == 'strip arg must be None, unicode or str'
        exc = raises(TypeError, s.rstrip, buffer(' '))
        assert str(exc.value) == 'rstrip arg must be None, unicode or str'
        exc = raises(TypeError, s.lstrip, buffer(' '))
        assert str(exc.value) == 'lstrip arg must be None, unicode or str'

    def test_strip_str_unicode(self):
        x = "--abc--".strip(u"-")
        assert (x, type(x)) == (u"abc", unicode)
        x = "--abc--".lstrip(u"-")
        assert (x, type(x)) == (u"abc--", unicode)
        x = "--abc--".rstrip(u"-")
        assert (x, type(x)) == (u"--abc", unicode)
        raises(UnicodeDecodeError, "\x80".strip, u"")
        raises(UnicodeDecodeError, "\x80".lstrip, u"")
        raises(UnicodeDecodeError, "\x80".rstrip, u"")

    def test_long_from_unicode(self):
        assert long(u'12345678901234567890') == 12345678901234567890
        assert int(u'12345678901234567890') == 12345678901234567890
        assert long(u'123', 7) == 66

    def test_int_from_unicode(self):
        assert int(u'12345') == 12345

    def test_float_from_unicode(self):
        assert float(u'123.456e89') == float('123.456e89')

    def test_repr_16bits(self):
        # this used to fail when run on a CPython host with 16-bit unicodes
        s = repr(u'\U00101234')
        assert s == "u'\\U00101234'"

    def test_repr(self):
        for ustr in [u"", u"a", u"'", u"\'", u"\"", u"\t", u"\\", u'',
                     u'a', u'"', u'\'', u'\"', u'\t', u'\\', u"'''\"",
                     unichr(19), unichr(2), u'\u1234', u'\U00101234']:
            assert eval(repr(ustr)) == ustr

    def test_getnewargs(self):
        class X(unicode):
            pass
        x = X(u"foo\u1234")
        a = x.__getnewargs__()
        assert a == (u"foo\u1234",)
        assert type(a[0]) is unicode

    def test_call_unicode(self):
        assert unicode() == u''
        assert unicode(None) == u'None'
        assert unicode(123) == u'123'
        assert unicode([2, 3]) == u'[2, 3]'
        class U(unicode):
            pass
        assert unicode(U()).__class__ is unicode
        assert U(u'test') == u'test'
        assert U(u'test').__class__ is U

    def test_call_unicode_2(self):
        class X(object):
            def __unicode__(self):
                return u'x'
        raises(TypeError, unicode, X(), 'ascii')

    def test_startswith(self):
        assert u'ab'.startswith(u'ab') is True
        assert u'ab'.startswith(u'a') is True
        assert u'ab'.startswith(u'') is True
        assert u'x'.startswith(u'a') is False
        assert u'x'.startswith(u'x') is True
        assert u''.startswith(u'') is True
        assert u''.startswith(u'a') is False
        assert u'x'.startswith(u'xx') is False
        assert u'y'.startswith(u'xx') is False
        assert u'\u1234\u5678\u4321'.startswith(u'\u1234') is True
        assert u'\u1234\u5678\u4321'.startswith(u'\u1234\u4321') is False
        assert u'\u1234'.startswith(u'', 1, 0) is True

    def test_startswith_more(self):
        assert u'ab'.startswith(u'a', 0) is True
        assert u'ab'.startswith(u'a', 1) is False
        assert u'ab'.startswith(u'b', 1) is True
        assert u'abc'.startswith(u'bc', 1, 2) is False
        assert u'abc'.startswith(u'c', -1, 4) is True

    def test_startswith_too_large(self):
        assert u'ab'.startswith(u'b', 1) is True
        assert u'ab'.startswith(u'', 2) is True
        assert u'ab'.startswith(u'', 3) is True   # not False
        assert u'ab'.endswith(u'b', 1) is True
        assert u'ab'.endswith(u'', 2) is True
        assert u'ab'.endswith(u'', 3) is True   # not False

    def test_startswith_tuples(self):
        assert u'hello'.startswith((u'he', u'ha'))
        assert not u'hello'.startswith((u'lo', u'llo'))
        assert u'hello'.startswith((u'hellox', u'hello'))
        assert not u'hello'.startswith(())
        assert u'helloworld'.startswith((u'hellowo', u'rld', u'lowo'), 3)
        assert not u'helloworld'.startswith((u'hellowo', u'ello', u'rld'), 3)
        assert u'hello'.startswith((u'lo', u'he'), 0, -1)
        assert not u'hello'.startswith((u'he', u'hel'), 0, 1)
        assert u'hello'.startswith((u'he', u'hel'), 0, 2)
        raises(TypeError, u'hello'.startswith, (42,))

    def test_startswith_endswith_convert(self):
        assert 'hello'.startswith((u'he\u1111', u'he'))
        assert not 'hello'.startswith((u'lo\u1111', u'llo'))
        assert 'hello'.startswith((u'hellox\u1111', u'hello'))
        assert not 'hello'.startswith((u'lo', u'he\u1111'), 0, -1)
        assert not 'hello'.endswith((u'he\u1111', u'he'))
        assert 'hello'.endswith((u'\u1111lo', u'llo'))
        assert 'hello'.endswith((u'\u1111hellox', u'hello'))

    def test_endswith(self):
        assert u'ab'.endswith(u'ab') is True
        assert u'ab'.endswith(u'b') is True
        assert u'ab'.endswith(u'') is True
        assert u'x'.endswith(u'a') is False
        assert u'x'.endswith(u'x') is True
        assert u''.endswith(u'') is True
        assert u''.endswith(u'a') is False
        assert u'x'.endswith(u'xx') is False
        assert u'y'.endswith(u'xx') is False

    def test_endswith_more(self):
        assert u'abc'.endswith(u'ab', 0, 2) is True
        assert u'abc'.endswith(u'bc', 1) is True
        assert u'abc'.endswith(u'bc', 2) is False
        assert u'abc'.endswith(u'b', -3, -1) is True

    def test_endswith_tuple(self):
        assert not u'hello'.endswith((u'he', u'ha'))
        assert u'hello'.endswith((u'lo', u'llo'))
        assert u'hello'.endswith((u'hellox', u'hello'))
        assert not u'hello'.endswith(())
        assert u'helloworld'.endswith((u'hellowo', u'rld', u'lowo'), 3)
        assert not u'helloworld'.endswith((u'hellowo', u'ello', u'rld'), 3, -1)
        assert u'hello'.endswith((u'hell', u'ell'), 0, -1)
        assert not u'hello'.endswith((u'he', u'hel'), 0, 1)
        assert u'hello'.endswith((u'he', u'hell'), 0, 4)
        raises(TypeError, u'hello'.endswith, (42,))

    def test_expandtabs(self):
        assert u'abc\rab\tdef\ng\thi'.expandtabs() ==    u'abc\rab      def\ng       hi'
        assert u'abc\rab\tdef\ng\thi'.expandtabs(8) ==   u'abc\rab      def\ng       hi'
        assert u'abc\rab\tdef\ng\thi'.expandtabs(4) ==   u'abc\rab  def\ng   hi'
        assert u'abc\r\nab\tdef\ng\thi'.expandtabs(4) == u'abc\r\nab  def\ng   hi'
        assert u'abc\rab\tdef\ng\thi'.expandtabs() ==    u'abc\rab      def\ng       hi'
        assert u'abc\rab\tdef\ng\thi'.expandtabs(8) ==   u'abc\rab      def\ng       hi'
        assert u'abc\r\nab\r\ndef\ng\r\nhi'.expandtabs(4) == u'abc\r\nab\r\ndef\ng\r\nhi'

        s = u'xy\t'
        assert s.expandtabs() =='xy      '

        s = u'\txy\t'
        assert s.expandtabs() =='        xy      '
        assert s.expandtabs(1) ==' xy '
        assert s.expandtabs(2) =='  xy  '
        assert s.expandtabs(3) =='   xy '

        assert u'xy'.expandtabs() =='xy'
        assert u''.expandtabs() ==''

    def test_expandtabs_overflows_gracefully(self):
        import sys
        if sys.maxint > (1 << 32):
            skip("Wrong platform")
        raises((OverflowError, MemoryError), u't\tt\t'.expandtabs, sys.maxint)

    def test_expandtabs_0(self):
        assert u'x\ty'.expandtabs(0) == u'xy'
        assert u'x\ty'.expandtabs(-42) == u'xy'

    def test_translate(self):
        assert u'bbbc' == u'abababc'.translate({ord('a'):None})
        assert u'iiic' == u'abababc'.translate({ord('a'):None, ord('b'):ord('i')})
        assert u'iiix' == u'abababc'.translate({ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
        assert u'<i><i><i>c' == u'abababc'.translate({ord('a'):None, ord('b'):u'<i>'})
        assert u'c' == u'abababc'.translate({ord('a'):None, ord('b'):u''})
        assert u'xyyx' == u'xzx'.translate({ord('z'):u'yy'})
        assert u'abcd' == u'ab\0d'.translate(u'c')
        assert u'abcd' == u'abcd'.translate(u'')

        raises(TypeError, u'hello'.translate)
        raises(TypeError, u'abababc'.translate, {ord('a'):''})
        raises(TypeError, u'x'.translate, {ord('x'):0x110000})

    def test_unicode_from_encoded_object(self):
        assert unicode('x', 'utf-8') == u'x'
        assert unicode('x', 'utf-8', 'strict') == u'x'

    def test_unicode_startswith_tuple(self):
        assert u'xxx'.startswith(('x', 'y', 'z'), 0)
        assert u'xxx'.endswith(('x', 'y', 'z'), 0)

    def test_missing_cases(self):
        # some random cases, which are discovered to not be tested during annotation
        assert u'xxx'[1:1] == u''

    # these tests test lots of encodings, so they really belong to the _codecs
    # module. however, they test useful unicode methods too
    # they are stolen from CPython's unit tests

    def test_codecs_utf7(self):
        utfTests = [
            (u'A\u2262\u0391.', 'A+ImIDkQ.'),             # RFC2152 example
            (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'),     # RFC2152 example
            (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'),        # RFC2152 example
            (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
            (u'+', '+-'),
            (u'+-', '+--'),
            (u'+?', '+-?'),
            (u'\?', '+AFw?'),
            (u'+?', '+-?'),
            (ur'\\?', '+AFwAXA?'),
            (ur'\\\?', '+AFwAXABc?'),
            (ur'++--', '+-+---'),
        ]

        for (x, y) in utfTests:
            assert x.encode('utf-7') == y

        # surrogates are supported
        assert unicode('+3ADYAA-', 'utf-7') == u'\udc00\ud800'

        assert unicode('+AB', 'utf-7', 'replace') == u'\ufffd'

    def test_codecs_utf8(self):
        assert u''.encode('utf-8') == ''
        assert u'\u20ac'.encode('utf-8') == '\xe2\x82\xac'
        assert u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82'
        assert u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96'
        assert u'\ud800\udc02'.encode('uTf-8') == '\xf0\x90\x80\x82'
        assert u'\ud84d\udc56'.encode('Utf8') == '\xf0\xa3\x91\x96'
        assert u'\ud800'.encode('utf-8') == '\xed\xa0\x80'
        assert u'\udc00'.encode('utf-8') == '\xed\xb0\x80'
        assert (u'\ud800\udc02'*1000).encode('utf-8') == '\xf0\x90\x80\x82'*1000
        assert (
            u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
            u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
            u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
            u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
            u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
            u' Nunstuck git und'.encode('utf-8') == 
            '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
            '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
            '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
            '\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
            '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8'
            '\xaa\x9e\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81'
            '\xe3\x81\x82\xe3\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81'
            '\x9f\xe3\x82\x89\xe3\x82\x81\xe3\x81\xa7\xe3\x81\x99\xe3'
            '\x80\x82\xe5\xae\x9f\xe9\x9a\x9b\xe3\x81\xab\xe3\x81\xaf'
            '\xe3\x80\x8cWenn ist das Nunstuck git und'
        )

        # UTF-8 specific decoding tests
        assert unicode('\xf0\xa3\x91\x96', 'utf-8') == u'\U00023456' 
        assert unicode('\xf0\x90\x80\x82', 'utf-8') == u'\U00010002' 
        assert unicode('\xe2\x82\xac', 'utf-8') == u'\u20ac' 

    def test_codecs_errors(self):
        # Error handling (encoding)
        raises(UnicodeError, u'Andr\202 x'.encode, 'ascii')
        raises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
        assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x"
        assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x"

        # Error handling (decoding)
        raises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
        raises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
        assert unicode('Andr\202 x','ascii','ignore') == u"Andr x"
        assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x'

        # Error handling (unknown character names)
        assert "\\N{foo}xx".decode("unicode-escape", "ignore") == u"xx"

        # Error handling (truncated escape sequence)
        raises(UnicodeError, "\\".decode, "unicode-escape")

        raises(UnicodeError, "\xc2".decode, "utf-8")
        assert '\xe1\x80'.decode('utf-8', 'replace') == u"\ufffd"

    def test_repr_bug(self):
        assert (repr(u'\U00090418\u027d\U000582b9\u54c3\U000fcb6e') == 
                "u'\\U00090418\\u027d\\U000582b9\\u54c3\\U000fcb6e'")
        assert (repr(u'\n') == 
                "u'\\n'")


    def test_partition(self):
        assert (u'this is the par', u'ti', u'tion method') == \
            u'this is the partition method'.partition(u'ti')

        # from raymond's original specification
        S = u'http://www.python.org'
        assert (u'http', u'://', u'www.python.org') == S.partition(u'://')
        assert (u'http://www.python.org', u'', u'') == S.partition(u'?')
        assert (u'', u'http://', u'www.python.org') == S.partition(u'http://')
        assert (u'http://www.python.', u'org', u'') == S.partition(u'org')

        raises(ValueError, S.partition, u'')
        raises(TypeError, S.partition, None)

    def test_rpartition(self):
        assert (u'this is the rparti', u'ti', u'on method') == \
            u'this is the rpartition method'.rpartition(u'ti')

        # from raymond's original specification
        S = u'http://www.python.org'
        assert (u'http', u'://', u'www.python.org') == S.rpartition(u'://')
        assert (u'', u'', u'http://www.python.org') == S.rpartition(u'?')
        assert (u'', u'http://', u'www.python.org') == S.rpartition(u'http://')
        assert (u'http://www.python.', u'org', u'') == S.rpartition(u'org')

        raises(ValueError, S.rpartition, u'')
        raises(TypeError, S.rpartition, None)

    def test_partition_str_unicode(self):
        x = 'abbbd'.rpartition(u'bb')
        assert x == (u'ab', u'bb', u'd')
        assert map(type, x) == [unicode, unicode, unicode]
        raises(UnicodeDecodeError, '\x80'.partition, u'')
        raises(UnicodeDecodeError, '\x80'.rpartition, u'')

    def test_mul(self):
        zero = 0
        assert type(u'' * zero) == type(zero * u'') == unicode
        assert u'' * zero == zero * u'' == u''
        assert u'x' * zero == zero * u'x' == u''
        assert type(u'x' * zero) == type(zero * u'x') == unicode
        assert u'123' * zero == zero * u'123' == u''
        assert type(u'123' * zero) == type(zero * u'123') == unicode
        for i in range(10):
            u = u'123' * i
            assert len(u) == 3*i
            for j in range(0, i, 3):
                assert u[j+0] == u'1'
                assert u[j+1] == u'2'
                assert u[j+2] == u'3'
            assert u'123' * i == i * u'123'

    def test_index(self):
        assert u"rrarrrrrrrrra".index(u'a', 4, None) == 12
        assert u"rrarrrrrrrrra".index(u'a', None, 6) == 2
        assert u"\u1234\u4321\u5678".index(u'\u5678', 1) == 2

    def test_rindex(self):
        from sys import maxint
        assert u'abcdefghiabc'.rindex(u'') == 12
        assert u'abcdefghiabc'.rindex(u'def') == 3
        assert u'abcdefghiabc'.rindex(u'abc') == 9
        assert u'abcdefghiabc'.rindex(u'abc', 0, -1) == 0
        assert u'abcdefghiabc'.rindex(u'abc', -4*maxint, 4*maxint) == 9
        assert u'rrarrrrrrrrra'.rindex(u'a', 4, None) == 12
        assert u"\u1234\u5678".rindex(u'\u5678') == 1

        raises(ValueError, u'abcdefghiabc'.rindex, u'hib')
        raises(ValueError, u'defghiabc'.rindex, u'def', 1)
        raises(ValueError, u'defghiabc'.rindex, u'abc', 0, -1)
        raises(ValueError, u'abcdefghi'.rindex, u'ghi', 0, 8)
        raises(ValueError, u'abcdefghi'.rindex, u'ghi', 0, -1)
        raises(TypeError, u'abcdefghijklmn'.rindex, u'abc', 0, 0.0)
        raises(TypeError, u'abcdefghijklmn'.rindex, u'abc', -10.0, 30)

    def test_rfind(self):
        assert u'abcdefghiabc'.rfind(u'abc') == 9
        assert u'abcdefghiabc'.rfind(u'') == 12
        assert u'abcdefghiabc'.rfind(u'abcd') == 0
        assert u'abcdefghiabc'.rfind(u'abcz') == -1
        assert u"\u1234\u5678".rfind(u'\u5678') == 1

    def test_rfind_corner_case(self):
        assert u'abc'.rfind('', 4) == -1

    def test_find_index_str_unicode(self):
        assert u'abcdefghiabc'.find(u'bc') == 1
        assert u'ab\u0105b\u0107'.find('b', 2) == 3
        assert u'ab\u0105b\u0107'.find('b', 0, 1) == -1
        assert 'abcdefghiabc'.rfind(u'abc') == 9
        raises(UnicodeDecodeError, '\x80'.find, u'')
        raises(UnicodeDecodeError, '\x80'.rfind, u'')
        assert 'abcdefghiabc'.index(u'bc') == 1
        assert 'abcdefghiabc'.rindex(u'abc') == 9
        raises(UnicodeDecodeError, '\x80'.index, u'')
        raises(UnicodeDecodeError, '\x80'.rindex, u'')
        assert u"\u1234\u5678".find(u'\u5678') == 1

    def test_count_unicode(self):
        assert u'aaa'.count('', 10) == 0
        assert u'aaa'.count('', 3) == 1
        assert u"".count(u"x") ==0
        assert u"".count(u"") ==1
        assert u"Python".count(u"") ==7
        assert u"ab aaba".count(u"ab") ==2
        assert u'aaa'.count(u'a') == 3
        assert u'aaa'.count(u'b') == 0
        assert u'aaa'.count(u'a', -1) == 1
        assert u'aaa'.count(u'a', -10) == 3
        assert u'aaa'.count(u'a', 0, -1) == 2
        assert u'aaa'.count(u'a', 0, -10) == 0
        assert u'ababa'.count(u'aba') == 1

    def test_count_str_unicode(self):
        assert 'aaa'.count(u'a') == 3
        assert 'aaa'.count(u'b') == 0
        assert 'aaa'.count(u'a', -1) == 1
        assert 'aaa'.count(u'a', -10) == 3
        assert 'aaa'.count(u'a', 0, -1) == 2
        assert 'aaa'.count(u'a', 0, -10) == 0
        assert 'ababa'.count(u'aba') == 1
        raises(UnicodeDecodeError, '\x80'.count, u'')

    def test_swapcase(self):
        assert u'\xe4\xc4\xdf'.swapcase() == u'\xc4\xe4\xdf'
        assert u'\ud800'.swapcase() == u'\ud800'

    def test_buffer(self):
        buf = buffer(u'XY')
        assert str(buf) in ['X\x00Y\x00',
                            '\x00X\x00Y',
                            'X\x00\x00\x00Y\x00\x00\x00',
                            '\x00\x00\x00X\x00\x00\x00Y']

    def test_call_special_methods(self):
        # xxx not completely clear if these are implementation details or not
        assert 'abc'.__add__(u'def') == u'abcdef'
        assert u'abc'.__add__(u'def') == u'abcdef'
        assert u'abc'.__add__('def') == u'abcdef'
        assert u'abc'.__rmod__(u'%s') == u'abc'
        ret = u'abc'.__rmod__('%s')
        raises(AttributeError, "u'abc'.__radd__(u'def')")

    def test_str_unicode_concat_overrides(self):
        "Test from Jython about being bug-compatible with CPython."

        def check(value, expected):
            assert type(value) == type(expected)
            assert value == expected

        def _test_concat(t1, t2):
            tprecedent = str
            if issubclass(t1, unicode) or issubclass(t2, unicode):
                tprecedent = unicode

            class SubclassB(t2):
                def __add__(self, other):
                    return SubclassB(t2(self) + t2(other))
            check(SubclassB('py') + SubclassB('thon'), SubclassB('python'))
            check(t1('python') + SubclassB('3'), tprecedent('python3'))
            check(SubclassB('py') + t1('py'), SubclassB('pypy'))

            class SubclassC(t2):
                def __radd__(self, other):
                    return SubclassC(t2(other) + t2(self))
            check(SubclassC('stack') + SubclassC('less'), t2('stackless'))
            check(t1('iron') + SubclassC('python'), SubclassC('ironpython'))
            check(SubclassC('tiny') + t1('py'), tprecedent('tinypy'))

            class SubclassD(t2):
                def __add__(self, other):
                    return SubclassD(t2(self) + t2(other))

                def __radd__(self, other):
                    return SubclassD(t2(other) + t2(self))
            check(SubclassD('di') + SubclassD('ct'), SubclassD('dict'))
            check(t1('list') + SubclassD(' comp'), SubclassD('list comp'))
            check(SubclassD('dun') + t1('der'), SubclassD('dunder'))

        _test_concat(str, str)
        _test_concat(unicode, unicode)
        # the following two cases are really there to emulate a CPython bug.
        _test_concat(str, unicode)   # uses hack in add__String_Unicode()
        _test_concat(unicode, str)   # uses hack in descroperation.binop_impl()

    def test_returns_subclass(self):
        class X(unicode):
            pass

        class Y(object):
            def __unicode__(self):
                return X("stuff")

        assert unicode(Y()).__class__ is X

    def test_getslice(self):
        assert u'123456'.__getslice__(1, 5) == u'2345'
        s = u"\u0105b\u0107"
        assert s[:] == u"\u0105b\u0107"
        assert s[1:] == u"b\u0107"
        assert s[:2] == u"\u0105b"
        assert s[1:2] == u"b"
        assert s[-2:] == u"b\u0107"
        assert s[:-1] == u"\u0105b"
        assert s[-2:2] == u"b"
        assert s[1:-1] == u"b"
        assert s[-2:-1] == u"b"

    def test_getitem_slice(self):
        assert u'123456'.__getitem__(slice(1, 5)) == u'2345'
        s = u"\u0105b\u0107"
        assert s[slice(3)] == u"\u0105b\u0107"
        assert s[slice(1, 3)] == u"b\u0107"
        assert s[slice(2)] == u"\u0105b"
        assert s[slice(1,2)] == u"b"
        assert s[slice(-2,3)] == u"b\u0107"
        assert s[slice(-1)] == u"\u0105b"
        assert s[slice(-2,2)] == u"b"
        assert s[slice(1,-1)] == u"b"
        assert s[slice(-2,-1)] == u"b"
        assert u"abcde"[::2] == u"ace"
        assert u"\u0105\u0106\u0107abcd"[::2] == u"\u0105\u0107bd"

    def test_no_len_on_str_iter(self):
        iterable = u"hello"
        raises(TypeError, len, iter(iterable))

    def test_encode_raw_unicode_escape(self):
        u = unicode('\\', 'raw_unicode_escape')
        assert u == u'\\'

    def test_decode_from_buffer(self):
        buf = buffer('character buffers are decoded to unicode')
        u = unicode(buf, 'utf-8', 'strict')
        assert u == u'character buffers are decoded to unicode'

    def test_unicode_conversion_with__unicode__(self):
        class A(unicode):
            def __unicode__(self):
                return "foo"
        class B(unicode):
            pass
        a = A('bar')
        assert a == 'bar'
        assert unicode(a) == 'foo'
        b = B('bar')
        assert b == 'bar'
        assert unicode(b) == 'bar'

    def test_unicode_conversion_with__str__(self):
        # new-style classes
        class A(object):
            def __str__(self):
                return u'\u1234'
        s = unicode(A())
        assert type(s) is unicode
        assert s == u'\u1234'
        # with old-style classes, it's different, but it should work as well
        class A:
            def __str__(self):
                return u'\u1234'
        s = unicode(A())
        assert type(s) is unicode
        assert s == u'\u1234'

    def test_formatting_unicode__str__(self):
        class A:
            def __init__(self, num):
                self.num = num
            def __str__(self):
                return unichr(self.num)

        s = '%s' % A(111)    # this is ASCII
        assert type(s) is unicode
        assert s == chr(111)

        s = '%s' % A(0x1234)    # this is not ASCII
        assert type(s) is unicode
        assert s == u'\u1234'

        # now the same with a new-style class...
        class A(object):
            def __init__(self, num):
                self.num = num
            def __str__(self):
                return unichr(self.num)

        s = '%s' % A(111)    # this is ASCII
        assert type(s) is unicode
        assert s == chr(111)

        s = '%s' % A(0x1234)    # this is not ASCII
        assert type(s) is unicode
        assert s == u'\u1234'

    def test_formatting_unicode__str__2(self):
        class A:
            def __str__(self):
                return u'baz'

        class B:
            def __str__(self):
                return 'foo'

            def __unicode__(self):
                return u'bar'

        a = A()
        b = B()
        s = '%s %s' % (a, b)
        assert s == u'baz bar'

        skip("but this case here is completely insane")
        s = '%s %s' % (b, a)
        assert s == u'foo baz'

    def test_formatting_unicode__str__3(self):
        # "bah" is all I can say
        class X(object):
            def __repr__(self):
                return u'\u1234'
        '%s' % X()
        #
        class X(object):
            def __str__(self):
                return u'\u1234'
        '%s' % X()

    def test_format_repeat(self):
        assert format(u"abc", u"z<5") == u"abczz"
        assert format(u"abc", u"\u2007<5") == u"abc\u2007\u2007"
        # raises UnicodeEncodeError, like CPython does
        raises(UnicodeEncodeError, format, 123, u"\u2007<5")

    def test_formatting_char(self):
        for num in range(0x80,0x100):
            uchar = unichr(num)
            print num
            assert uchar == u"%c" % num   # works only with ints
            assert uchar == u"%c" % uchar # and unicode chars
            # the implicit decoding should fail for non-ascii chars
            raises(UnicodeDecodeError, u"%c".__mod__, chr(num))
            raises(UnicodeDecodeError, u"%s".__mod__, chr(num))

    def test_str_subclass(self):
        class Foo9(str):
            def __unicode__(self):
                return u"world"
        assert unicode(Foo9("hello")) == u"world"

    def test_class_with_both_str_and_unicode(self):
        class A(object):
            def __str__(self):
                return 'foo'

            def __unicode__(self):
                return u'bar'

        assert unicode(A()) == u'bar'

        class A:
            def __str__(self):
                return 'foo'

            def __unicode__(self):
                return u'bar'

        assert unicode(A()) == u'bar'

    def test_format_unicode_subclass(self):
        class U(unicode):
            def __unicode__(self):
                return u'__unicode__ overridden'
        u = U(u'xxx')
        assert repr("%s" % u) == "u'__unicode__ overridden'"
        assert repr("{}".format(u)) == "'__unicode__ overridden'"

    def test_format_c_overflow(self):
        import sys
        raises(OverflowError, u'{0:c}'.format, -1)
        raises(OverflowError, u'{0:c}'.format, sys.maxunicode + 1)

    def test_replace_with_buffer(self):
        assert u'abc'.replace(buffer('b'), buffer('e')) == u'aec'
        assert u'abc'.replace(buffer('b'), u'e') == u'aec'
        assert u'abc'.replace(u'b', buffer('e')) == u'aec'

    def test_unicode_subclass(self):
        class S(unicode):
            pass

        a = S(u'hello \u1234')
        b = unicode(a)
        assert type(b) is unicode
        assert b == u'hello \u1234'

        assert u'%s' % S(u'mar\xe7') == u'mar\xe7'

    def test_isdecimal(self):
        assert u'0'.isdecimal()
        assert not u''.isdecimal()
        assert not u'a'.isdecimal()
        assert not u'\u2460'.isdecimal() # CIRCLED DIGIT ONE

    def test_isnumeric(self):
        assert u'0'.isnumeric()
        assert not u''.isnumeric()
        assert not u'a'.isnumeric()
        assert u'\u2460'.isnumeric() # CIRCLED DIGIT ONE

    def test_replace_str_unicode(self):
        res = 'one!two!three!'.replace(u'!', u'@', 1)
        assert res == u'one@two!three!'
        assert type(res) == unicode
        raises(UnicodeDecodeError, '\x80'.replace, 'a', u'b')
        raises(UnicodeDecodeError, '\x80'.replace, u'a', 'b')

    def test_join_subclass(self):
        class UnicodeSubclass(unicode):
            pass
        class StrSubclass(str):
            pass

        s1 = UnicodeSubclass(u'a')
        assert u''.join([s1]) is not s1
        s2 = StrSubclass(u'a')
        assert u''.join([s2]) is not s2

    def test_encoding_and_errors_cant_be_none(self):
        raises(TypeError, "''.decode(None)")
        raises(TypeError, "u''.encode(None)")
        raises(TypeError, "unicode('', encoding=None)")
        raises(TypeError, 'u"".encode("utf-8", None)')

    def test_unicode_constructor_misc(self):
        x = u'foo'
        x += u'bar'
        assert unicode(x) is x
        #
        class U(unicode):
            def __unicode__(self):
                return u'BOK'
        u = U(x)
        assert unicode(u) == u'BOK'
        #
        class U2(unicode):
            pass
        z = U2(u'foobaz')
        assert type(unicode(z)) is unicode
        assert unicode(z) == u'foobaz'
        #
        assert unicode(encoding='supposedly_the_encoding') == u''
        assert unicode(errors='supposedly_the_error') == u''
        e = raises(TypeError, unicode, u'', 'supposedly_the_encoding')
        assert str(e.value) == 'decoding Unicode is not supported'
        e = raises(TypeError, unicode, u'', errors='supposedly_the_error')
        assert str(e.value) == 'decoding Unicode is not supported'
        e = raises(TypeError, unicode, u, 'supposedly_the_encoding')
        assert str(e.value) == 'decoding Unicode is not supported'
        e = raises(TypeError, unicode, z, 'supposedly_the_encoding')
        assert str(e.value) == 'decoding Unicode is not supported'

    def test_newlist_utf8_non_ascii(self):
        'ä'.split("\n")[0] # does not crash

    def test_replace_no_occurrence(self):
        x = u"xyz"
        assert x.replace(u"a", u"b") is x