From 158a579505440c8945891562eaa1aca973ca94cb Mon Sep 17 00:00:00 2001
From: Carl Friedrich Bolz-Tereick <cfbolz@gmx.de>
Date: Tue, 2 Mar 2021 13:23:40 +0100
Subject: fast path for unicode.upper/lower for ascii

---
 pypy/objspace/std/test/test_unicodeobject.py |  9 +++++++++
 pypy/objspace/std/unicodeobject.py           | 26 ++++++++++++++++++++------
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
index 6b1c7315da..e8763dc496 100644
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -207,6 +207,15 @@ class TestUnicodeObject:
             for end in range(start, len(u)):
                 assert w_u._unicode_sliced_constant_index_jit(space, start, end)._utf8 == u[start: end].encode("utf-8")
 
+    def test_lower_upper_ascii(self):
+        from pypy.module.unicodedata.interp_ucd import unicodedb
+        # check that ascii chars tolower/toupper still behave sensibly in the
+        # unicodedb - unlikely to ever change, but well
+        for ch in range(128):
+            unilower, = unicodedb.tolower_full(ch)
+            assert chr(unilower) == chr(ch).lower()
+            uniupper, = unicodedb.toupper_full(ch)
+            assert chr(uniupper) == chr(ch).upper()
 
 class AppTestUnicodeStringStdOnly:
     def test_compares(self):
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
index 4fa1a98437..0be4a9e55c 100644
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -503,11 +503,18 @@ class W_UnicodeObject(W_Root):
         return tformat.formatter_field_name_split()
 
     def descr_lower(self, space):
-        builder = rutf8.Utf8StringBuilder(len(self._utf8))
-        for ch in rutf8.Utf8StringIterator(self._utf8):
+        if self.is_ascii():
+            return space.newutf8(self._utf8.lower(), len(self._utf8))
+        return self._descr_lower(self._utf8)
+
+    @staticmethod
+    @jit.elidable
+    def _descr_lower(utf8):
+        builder = rutf8.Utf8StringBuilder(len(utf8))
+        for ch in rutf8.Utf8StringIterator(utf8):
             lower = unicodedb.tolower(ch)
             builder.append_code(lower)
-        return self.from_utf8builder(builder)
+        return W_UnicodeObject.from_utf8builder(builder)
 
     def descr_isdecimal(self, space):
         return self._is_generic(space, '_isdecimal')
@@ -650,11 +657,18 @@ class W_UnicodeObject(W_Root):
         return space.newlist(strs_w)
 
     def descr_upper(self, space):
-        builder = rutf8.Utf8StringBuilder(len(self._utf8))
-        for ch in rutf8.Utf8StringIterator(self._utf8):
+        if self.is_ascii():
+            return space.newutf8(self._utf8.upper(), len(self._utf8))
+        return self._descr_upper(self._utf8)
+
+    @staticmethod
+    @jit.elidable
+    def _descr_upper(utf8):
+        builder = rutf8.Utf8StringBuilder(len(utf8))
+        for ch in rutf8.Utf8StringIterator(utf8):
             ch = unicodedb.toupper(ch)
             builder.append_code(ch)
-        return self.from_utf8builder(builder)
+        return W_UnicodeObject.from_utf8builder(builder)
 
     @unwrap_spec(width=int)
     def descr_zfill(self, space, width):
-- 
cgit v1.2.3-65-gdbad