diff options
author | 2019-08-25 23:42:30 +0200 | |
---|---|---|
committer | 2019-08-25 23:42:30 +0200 | |
commit | 544589a132e896942cf47765b11a50eecca6d53e (patch) | |
tree | 1bdd97b997c288594eb5fa25a04e43fdc439ddf7 /pypy/objspace/std/unicodeobject.py | |
parent | merge heads (diff) | |
download | pypy-544589a132e896942cf47765b11a50eecca6d53e.tar.gz pypy-544589a132e896942cf47765b11a50eecca6d53e.tar.bz2 pypy-544589a132e896942cf47765b11a50eecca6d53e.zip |
optimization for ascii case in unicode.(r)find and .(r)index:
no need to convert the result index back from bytes to codepoints if the string
is ascii.
Diffstat (limited to 'pypy/objspace/std/unicodeobject.py')
-rw-r--r-- | pypy/objspace/std/unicodeobject.py | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py index b0a90942b5..485903dea7 100644 --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -886,6 +886,11 @@ class W_UnicodeObject(W_Root): return rutf8.codepoint_position_at_index( self._utf8, self._get_index_storage(), index) + def _codepoints_in_utf8(self, start, end): + if self.is_ascii(): + return end - start + return rutf8.codepoints_in_utf8(self._utf8, start, end) + @always_inline def _unwrap_and_search(self, space, w_sub, w_start, w_end, forward=True): w_sub = self.convert_arg_to_w_unicode(space, w_sub) @@ -907,7 +912,7 @@ class W_UnicodeObject(W_Root): res_index = self._utf8.find(w_sub._utf8, start_index, end_index) if res_index < 0: return None - skip = rutf8.codepoints_in_utf8(self._utf8, start_index, res_index) + skip = self._codepoints_in_utf8(start_index, res_index) res = start + skip assert res >= 0 return space.newint(res) @@ -915,7 +920,7 @@ class W_UnicodeObject(W_Root): res_index = self._utf8.rfind(w_sub._utf8, start_index, end_index) if res_index < 0: return None - skip = rutf8.codepoints_in_utf8(self._utf8, res_index, end_index) + skip = self._codepoints_in_utf8(res_index, end_index) res = end - skip assert res >= 0 return space.newint(res) |