diff options
author | Carl Friedrich Bolz-Tereick <cfbolz@gmx.de> | 2021-03-02 14:35:35 +0100 |
---|---|---|
committer | Carl Friedrich Bolz-Tereick <cfbolz@gmx.de> | 2021-03-02 14:35:35 +0100 |
commit | 4ea9a479e31dced84c47bfa94abb87eb07fa9fcd (patch) | |
tree | 83acdcedb86853e1e1c970858ccf8332a1aa34e3 | |
parent | fast path for unicode.upper/lower for ascii (diff) | |
download | pypy-4ea9a479e31dced84c47bfa94abb87eb07fa9fcd.tar.gz pypy-4ea9a479e31dced84c47bfa94abb87eb07fa9fcd.tar.bz2 pypy-4ea9a479e31dced84c47bfa94abb87eb07fa9fcd.zip |
add ascii fast paths to the tolower/toupper functions of the unicode dbs too
(the part of the diff around special casing is due to c6650aed42e0 not
regenerating *all* unicodedb versions, only unicodedb_5_2_0)
-rw-r--r-- | rpython/rlib/unicodedata/generate_unicodedb.py | 28 | ||||
-rw-r--r-- | rpython/rlib/unicodedata/test/test_unicodedata.py | 7 | ||||
-rw-r--r-- | rpython/rlib/unicodedata/unicodedb_11_0_0.py | 18 | ||||
-rw-r--r-- | rpython/rlib/unicodedata/unicodedb_3_2_0.py | 16 | ||||
-rw-r--r-- | rpython/rlib/unicodedata/unicodedb_5_2_0.py | 68 | ||||
-rw-r--r-- | rpython/rlib/unicodedata/unicodedb_6_0_0.py | 119 | ||||
-rw-r--r-- | rpython/rlib/unicodedata/unicodedb_6_1_0.py | 119 | ||||
-rw-r--r-- | rpython/rlib/unicodedata/unicodedb_6_2_0.py | 119 | ||||
-rw-r--r-- | rpython/rlib/unicodedata/unicodedb_8_0_0.py | 119 | ||||
-rw-r--r-- | rpython/rlib/unicodedata/unicodedb_9_0_0.py | 119 |
10 files changed, 162 insertions, 570 deletions
diff --git a/rpython/rlib/unicodedata/generate_unicodedb.py b/rpython/rlib/unicodedata/generate_unicodedb.py index cacaad9dcd..0b872fdcb5 100644 --- a/rpython/rlib/unicodedata/generate_unicodedb.py +++ b/rpython/rlib/unicodedata/generate_unicodedb.py @@ -749,9 +749,17 @@ def numeric(code): totitle = {} for code, char in table.enum_chars(): if char.upper: - toupper[code] = char.upper + if code < 128: + assert ord('a') <= code <= ord('z') + assert char.upper == code - 32 + else: + toupper[code] = char.upper if char.lower: - tolower[code] = char.lower + if code < 128: + assert ord('A') <= code <= ord('Z') + assert char.lower == code + 32 + else: + tolower[code] = char.lower if char.title: totitle[code] = char.title writeDict(outfile, '_toupper', toupper, base_mod) @@ -760,6 +768,10 @@ def numeric(code): writeDict(outfile, '_special_casing', table.special_casing, base_mod) print >> outfile, ''' def toupper(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return code - 32 + return code try: return _toupper[code] except KeyError: @@ -769,6 +781,10 @@ def toupper(code): return code def tolower(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return code + 32 + return code try: return _tolower[code] except KeyError: @@ -787,6 +803,10 @@ def totitle(code): return code def toupper_full(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return [code - 32] + return [code] try: return _special_casing[code][2] except KeyError: @@ -798,6 +818,10 @@ def toupper_full(code): return [toupper(code)] def tolower_full(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return [code + 32] + return [code] try: return _special_casing[code][0] except KeyError: diff --git a/rpython/rlib/unicodedata/test/test_unicodedata.py b/rpython/rlib/unicodedata/test/test_unicodedata.py index c5065b7817..f7470de2dc 100644 --- a/rpython/rlib/unicodedata/test/test_unicodedata.py +++ b/rpython/rlib/unicodedata/test/test_unicodedata.py @@ -171,3 +171,10 @@ class TestUnicodeData1100(object): unicodedb_11_0_0]) def test_turkish_i(db): assert db.tolower_full(0x0130) == [0x69, 0x307] + +@pytest.mark.parametrize('db', [ + unicodedb_3_2_0, unicodedb_5_2_0, unicodedb_6_0_0, unicodedb_6_2_0, unicodedb_8_0_0, + unicodedb_11_0_0]) +def test_turkish_i(db): + assert db.tolower(ord('A')) == ord('a') + assert ord('A') not in db._toupper diff --git a/rpython/rlib/unicodedata/unicodedb_11_0_0.py b/rpython/rlib/unicodedata/unicodedb_11_0_0.py index 687d4db841..28d6da5ba9 100644 --- a/rpython/rlib/unicodedata/unicodedb_11_0_0.py +++ b/rpython/rlib/unicodedata/unicodedb_11_0_0.py @@ -1,6 +1,6 @@ # UNICODE CHARACTER DATABASE # This file was generated with the command: -# generate_unicodedb.py --base=unicodedb_5_2_0 --output=unicodedb_11_0_0 --unidata_version=11.0.0 +# generate_unicodedb.py --unidata_version=11.0.0 --output=unicodedb_11_0_0 --base=unicodedb_5_2_0 from rpython.rlib.rarithmetic import r_longlong @@ -64479,6 +64479,10 @@ _special_casing_corrected = { } def toupper(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return code - 32 + return code try: return _toupper[code] except KeyError: @@ -64488,6 +64492,10 @@ def toupper(code): return code def tolower(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return code + 32 + return code try: return _tolower[code] except KeyError: @@ -64506,6 +64514,10 @@ def totitle(code): return code def toupper_full(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return [code - 32] + return [code] try: return _special_casing[code][2] except KeyError: @@ -64517,6 +64529,10 @@ def toupper_full(code): return [toupper(code)] def tolower_full(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return [code + 32] + return [code] try: return _special_casing[code][0] except KeyError: diff --git a/rpython/rlib/unicodedata/unicodedb_3_2_0.py b/rpython/rlib/unicodedata/unicodedb_3_2_0.py index 337c0dde19..33827c6f35 100644 --- a/rpython/rlib/unicodedata/unicodedb_3_2_0.py +++ b/rpython/rlib/unicodedata/unicodedb_3_2_0.py @@ -19457,6 +19457,10 @@ _special_casing_corrected = { } def toupper(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return code - 32 + return code try: return _toupper[code] except KeyError: @@ -19466,6 +19470,10 @@ def toupper(code): return code def tolower(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return code + 32 + return code try: return _tolower[code] except KeyError: @@ -19484,6 +19492,10 @@ def totitle(code): return code def toupper_full(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return [code - 32] + return [code] try: return _special_casing[code][2] except KeyError: @@ -19495,6 +19507,10 @@ def toupper_full(code): return [toupper(code)] def tolower_full(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return [code + 32] + return [code] try: return _special_casing[code][0] except KeyError: diff --git a/rpython/rlib/unicodedata/unicodedb_5_2_0.py b/rpython/rlib/unicodedata/unicodedb_5_2_0.py index 9b0415f0f7..29028e004f 100644 --- a/rpython/rlib/unicodedata/unicodedb_5_2_0.py +++ b/rpython/rlib/unicodedata/unicodedb_5_2_0.py @@ -139831,32 +139831,6 @@ def numeric(code): raise _toupper = { -97: 65, -98: 66, -99: 67, -100: 68, -101: 69, -102: 70, -103: 71, -104: 72, -105: 73, -106: 74, -107: 75, -108: 76, -109: 77, -110: 78, -111: 79, -112: 80, -113: 81, -114: 82, -115: 83, -116: 84, -117: 85, -118: 86, -119: 87, -120: 88, -121: 89, -122: 90, 181: 924, 224: 192, 225: 193, @@ -140873,32 +140847,6 @@ _toupper = { _toupper_corrected = { } _tolower = { -65: 97, -66: 98, -67: 99, -68: 100, -69: 101, -70: 102, -71: 103, -72: 104, -73: 105, -74: 106, -75: 107, -76: 108, -77: 109, -78: 110, -79: 111, -80: 112, -81: 113, -82: 114, -83: 115, -84: 116, -85: 117, -86: 118, -87: 119, -88: 120, -89: 121, -90: 122, 192: 224, 193: 225, 194: 226, @@ -143062,6 +143010,10 @@ _special_casing_corrected = { } def toupper(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return code - 32 + return code try: return _toupper[code] except KeyError: @@ -143071,6 +143023,10 @@ def toupper(code): return code def tolower(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return code + 32 + return code try: return _tolower[code] except KeyError: @@ -143089,6 +143045,10 @@ def totitle(code): return code def toupper_full(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return [code - 32] + return [code] try: return _special_casing[code][2] except KeyError: @@ -143100,6 +143060,10 @@ def toupper_full(code): return [toupper(code)] def tolower_full(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return [code + 32] + return [code] try: return _special_casing[code][0] except KeyError: diff --git a/rpython/rlib/unicodedata/unicodedb_6_0_0.py b/rpython/rlib/unicodedata/unicodedb_6_0_0.py index 318828e287..fda7d153e4 100644 --- a/rpython/rlib/unicodedata/unicodedb_6_0_0.py +++ b/rpython/rlib/unicodedata/unicodedb_6_0_0.py @@ -14229,115 +14229,16 @@ _totitle = { _totitle_corrected = { } _special_casing = { -223: ([223], [83, 115], [83, 83]), -304: ([105, 775], [304], [304]), -329: ([329], [700, 78], [700, 78]), -496: ([496], [74, 780], [74, 780]), -912: ([912], [921, 776, 769], [921, 776, 769]), -944: ([944], [933, 776, 769], [933, 776, 769]), -1415: ([1415], [1333, 1410], [1333, 1362]), -7830: ([7830], [72, 817], [72, 817]), -7831: ([7831], [84, 776], [84, 776]), -7832: ([7832], [87, 778], [87, 778]), -7833: ([7833], [89, 778], [89, 778]), -7834: ([7834], [65, 702], [65, 702]), -8016: ([8016], [933, 787], [933, 787]), -8018: ([8018], [933, 787, 768], [933, 787, 768]), -8020: ([8020], [933, 787, 769], [933, 787, 769]), -8022: ([8022], [933, 787, 834], [933, 787, 834]), -8064: ([8064], [8072], [7944, 921]), -8065: ([8065], [8073], [7945, 921]), -8066: ([8066], [8074], [7946, 921]), -8067: ([8067], [8075], [7947, 921]), -8068: ([8068], [8076], [7948, 921]), -8069: ([8069], [8077], [7949, 921]), -8070: ([8070], [8078], [7950, 921]), -8071: ([8071], [8079], [7951, 921]), -8072: ([8064], [8072], [7944, 921]), -8073: ([8065], [8073], [7945, 921]), -8074: ([8066], [8074], [7946, 921]), -8075: ([8067], [8075], [7947, 921]), -8076: ([8068], [8076], [7948, 921]), -8077: ([8069], [8077], [7949, 921]), -8078: ([8070], [8078], [7950, 921]), -8079: ([8071], [8079], [7951, 921]), -8080: ([8080], [8088], [7976, 921]), -8081: ([8081], [8089], [7977, 921]), -8082: ([8082], [8090], [7978, 921]), -8083: ([8083], [8091], [7979, 921]), -8084: ([8084], [8092], [7980, 921]), -8085: ([8085], [8093], [7981, 921]), -8086: ([8086], [8094], [7982, 921]), -8087: ([8087], [8095], [7983, 921]), -8088: ([8080], [8088], [7976, 921]), -8089: ([8081], [8089], [7977, 921]), -8090: ([8082], [8090], [7978, 921]), -8091: ([8083], [8091], [7979, 921]), -8092: ([8084], [8092], [7980, 921]), -8093: ([8085], [8093], [7981, 921]), -8094: ([8086], [8094], [7982, 921]), -8095: ([8087], [8095], [7983, 921]), -8096: ([8096], [8104], [8040, 921]), -8097: ([8097], [8105], [8041, 921]), -8098: ([8098], [8106], [8042, 921]), -8099: ([8099], [8107], [8043, 921]), -8100: ([8100], [8108], [8044, 921]), -8101: ([8101], [8109], [8045, 921]), -8102: ([8102], [8110], [8046, 921]), -8103: ([8103], [8111], [8047, 921]), -8104: ([8096], [8104], [8040, 921]), -8105: ([8097], [8105], [8041, 921]), -8106: ([8098], [8106], [8042, 921]), -8107: ([8099], [8107], [8043, 921]), -8108: ([8100], [8108], [8044, 921]), -8109: ([8101], [8109], [8045, 921]), -8110: ([8102], [8110], [8046, 921]), -8111: ([8103], [8111], [8047, 921]), -8114: ([8114], [8122, 837], [8122, 921]), -8115: ([8115], [8124], [913, 921]), -8116: ([8116], [902, 837], [902, 921]), -8118: ([8118], [913, 834], [913, 834]), -8119: ([8119], [913, 834, 837], [913, 834, 921]), -8124: ([8115], [8124], [913, 921]), -8130: ([8130], [8138, 837], [8138, 921]), -8131: ([8131], [8140], [919, 921]), -8132: ([8132], [905, 837], [905, 921]), -8134: ([8134], [919, 834], [919, 834]), -8135: ([8135], [919, 834, 837], [919, 834, 921]), -8140: ([8131], [8140], [919, 921]), -8146: ([8146], [921, 776, 768], [921, 776, 768]), -8147: ([8147], [921, 776, 769], [921, 776, 769]), -8150: ([8150], [921, 834], [921, 834]), -8151: ([8151], [921, 776, 834], [921, 776, 834]), -8162: ([8162], [933, 776, 768], [933, 776, 768]), -8163: ([8163], [933, 776, 769], [933, 776, 769]), -8164: ([8164], [929, 787], [929, 787]), -8166: ([8166], [933, 834], [933, 834]), -8167: ([8167], [933, 776, 834], [933, 776, 834]), -8178: ([8178], [8186, 837], [8186, 921]), -8179: ([8179], [8188], [937, 921]), -8180: ([8180], [911, 837], [911, 921]), -8182: ([8182], [937, 834], [937, 834]), -8183: ([8183], [937, 834, 837], [937, 834, 921]), -8188: ([8179], [8188], [937, 921]), -64256: ([64256], [70, 102], [70, 70]), -64257: ([64257], [70, 105], [70, 73]), -64258: ([64258], [70, 108], [70, 76]), -64259: ([64259], [70, 102, 105], [70, 70, 73]), -64260: ([64260], [70, 102, 108], [70, 70, 76]), -64261: ([64261], [83, 116], [83, 84]), -64262: ([64262], [83, 116], [83, 84]), -64275: ([64275], [1348, 1398], [1348, 1350]), -64276: ([64276], [1348, 1381], [1348, 1333]), -64277: ([64277], [1348, 1387], [1348, 1339]), -64278: ([64278], [1358, 1398], [1358, 1350]), -64279: ([64279], [1348, 1389], [1348, 1341]), } _special_casing_corrected = { } def toupper(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return code - 32 + return code try: return _toupper[code] except KeyError: @@ -14347,6 +14248,10 @@ def toupper(code): return code def tolower(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return code + 32 + return code try: return _tolower[code] except KeyError: @@ -14365,6 +14270,10 @@ def totitle(code): return code def toupper_full(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return [code - 32] + return [code] try: return _special_casing[code][2] except KeyError: @@ -14376,6 +14285,10 @@ def toupper_full(code): return [toupper(code)] def tolower_full(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return [code + 32] + return [code] try: return _special_casing[code][0] except KeyError: diff --git a/rpython/rlib/unicodedata/unicodedb_6_1_0.py b/rpython/rlib/unicodedata/unicodedb_6_1_0.py index eea53fe649..410b6acd4f 100644 --- a/rpython/rlib/unicodedata/unicodedb_6_1_0.py +++ b/rpython/rlib/unicodedata/unicodedb_6_1_0.py @@ -20539,115 +20539,16 @@ _totitle = { _totitle_corrected = { } _special_casing = { -223: ([223], [83, 115], [83, 83]), -304: ([105, 775], [304], [304]), -329: ([329], [700, 78], [700, 78]), -496: ([496], [74, 780], [74, 780]), -912: ([912], [921, 776, 769], [921, 776, 769]), -944: ([944], [933, 776, 769], [933, 776, 769]), -1415: ([1415], [1333, 1410], [1333, 1362]), -7830: ([7830], [72, 817], [72, 817]), -7831: ([7831], [84, 776], [84, 776]), -7832: ([7832], [87, 778], [87, 778]), -7833: ([7833], [89, 778], [89, 778]), -7834: ([7834], [65, 702], [65, 702]), -8016: ([8016], [933, 787], [933, 787]), -8018: ([8018], [933, 787, 768], [933, 787, 768]), -8020: ([8020], [933, 787, 769], [933, 787, 769]), -8022: ([8022], [933, 787, 834], [933, 787, 834]), -8064: ([8064], [8072], [7944, 921]), -8065: ([8065], [8073], [7945, 921]), -8066: ([8066], [8074], [7946, 921]), -8067: ([8067], [8075], [7947, 921]), -8068: ([8068], [8076], [7948, 921]), -8069: ([8069], [8077], [7949, 921]), -8070: ([8070], [8078], [7950, 921]), -8071: ([8071], [8079], [7951, 921]), -8072: ([8064], [8072], [7944, 921]), -8073: ([8065], [8073], [7945, 921]), -8074: ([8066], [8074], [7946, 921]), -8075: ([8067], [8075], [7947, 921]), -8076: ([8068], [8076], [7948, 921]), -8077: ([8069], [8077], [7949, 921]), -8078: ([8070], [8078], [7950, 921]), -8079: ([8071], [8079], [7951, 921]), -8080: ([8080], [8088], [7976, 921]), -8081: ([8081], [8089], [7977, 921]), -8082: ([8082], [8090], [7978, 921]), -8083: ([8083], [8091], [7979, 921]), -8084: ([8084], [8092], [7980, 921]), -8085: ([8085], [8093], [7981, 921]), -8086: ([8086], [8094], [7982, 921]), -8087: ([8087], [8095], [7983, 921]), -8088: ([8080], [8088], [7976, 921]), -8089: ([8081], [8089], [7977, 921]), -8090: ([8082], [8090], [7978, 921]), -8091: ([8083], [8091], [7979, 921]), -8092: ([8084], [8092], [7980, 921]), -8093: ([8085], [8093], [7981, 921]), -8094: ([8086], [8094], [7982, 921]), -8095: ([8087], [8095], [7983, 921]), -8096: ([8096], [8104], [8040, 921]), -8097: ([8097], [8105], [8041, 921]), -8098: ([8098], [8106], [8042, 921]), -8099: ([8099], [8107], [8043, 921]), -8100: ([8100], [8108], [8044, 921]), -8101: ([8101], [8109], [8045, 921]), -8102: ([8102], [8110], [8046, 921]), -8103: ([8103], [8111], [8047, 921]), -8104: ([8096], [8104], [8040, 921]), -8105: ([8097], [8105], [8041, 921]), -8106: ([8098], [8106], [8042, 921]), -8107: ([8099], [8107], [8043, 921]), -8108: ([8100], [8108], [8044, 921]), -8109: ([8101], [8109], [8045, 921]), -8110: ([8102], [8110], [8046, 921]), -8111: ([8103], [8111], [8047, 921]), -8114: ([8114], [8122, 837], [8122, 921]), -8115: ([8115], [8124], [913, 921]), -8116: ([8116], [902, 837], [902, 921]), -8118: ([8118], [913, 834], [913, 834]), -8119: ([8119], [913, 834, 837], [913, 834, 921]), -8124: ([8115], [8124], [913, 921]), -8130: ([8130], [8138, 837], [8138, 921]), -8131: ([8131], [8140], [919, 921]), -8132: ([8132], [905, 837], [905, 921]), -8134: ([8134], [919, 834], [919, 834]), -8135: ([8135], [919, 834, 837], [919, 834, 921]), -8140: ([8131], [8140], [919, 921]), -8146: ([8146], [921, 776, 768], [921, 776, 768]), -8147: ([8147], [921, 776, 769], [921, 776, 769]), -8150: ([8150], [921, 834], [921, 834]), -8151: ([8151], [921, 776, 834], [921, 776, 834]), -8162: ([8162], [933, 776, 768], [933, 776, 768]), -8163: ([8163], [933, 776, 769], [933, 776, 769]), -8164: ([8164], [929, 787], [929, 787]), -8166: ([8166], [933, 834], [933, 834]), -8167: ([8167], [933, 776, 834], [933, 776, 834]), -8178: ([8178], [8186, 837], [8186, 921]), -8179: ([8179], [8188], [937, 921]), -8180: ([8180], [911, 837], [911, 921]), -8182: ([8182], [937, 834], [937, 834]), -8183: ([8183], [937, 834, 837], [937, 834, 921]), -8188: ([8179], [8188], [937, 921]), -64256: ([64256], [70, 102], [70, 70]), -64257: ([64257], [70, 105], [70, 73]), -64258: ([64258], [70, 108], [70, 76]), -64259: ([64259], [70, 102, 105], [70, 70, 73]), -64260: ([64260], [70, 102, 108], [70, 70, 76]), -64261: ([64261], [83, 116], [83, 84]), -64262: ([64262], [83, 116], [83, 84]), -64275: ([64275], [1348, 1398], [1348, 1350]), -64276: ([64276], [1348, 1381], [1348, 1333]), -64277: ([64277], [1348, 1387], [1348, 1339]), -64278: ([64278], [1358, 1398], [1358, 1350]), -64279: ([64279], [1348, 1389], [1348, 1341]), } _special_casing_corrected = { } def toupper(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return code - 32 + return code try: return _toupper[code] except KeyError: @@ -20657,6 +20558,10 @@ def toupper(code): return code def tolower(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return code + 32 + return code try: return _tolower[code] except KeyError: @@ -20675,6 +20580,10 @@ def totitle(code): return code def toupper_full(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return [code - 32] + return [code] try: return _special_casing[code][2] except KeyError: @@ -20686,6 +20595,10 @@ def toupper_full(code): return [toupper(code)] def tolower_full(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return [code + 32] + return [code] try: return _special_casing[code][0] except KeyError: diff --git a/rpython/rlib/unicodedata/unicodedb_6_2_0.py b/rpython/rlib/unicodedata/unicodedb_6_2_0.py index 13e07162bf..684a6f8ac1 100644 --- a/rpython/rlib/unicodedata/unicodedb_6_2_0.py +++ b/rpython/rlib/unicodedata/unicodedb_6_2_0.py @@ -20555,115 +20555,16 @@ _totitle = { _totitle_corrected = { } _special_casing = { -223: ([223], [83, 115], [83, 83]), -304: ([105, 775], [304], [304]), -329: ([329], [700, 78], [700, 78]), -496: ([496], [74, 780], [74, 780]), -912: ([912], [921, 776, 769], [921, 776, 769]), -944: ([944], [933, 776, 769], [933, 776, 769]), -1415: ([1415], [1333, 1410], [1333, 1362]), -7830: ([7830], [72, 817], [72, 817]), -7831: ([7831], [84, 776], [84, 776]), -7832: ([7832], [87, 778], [87, 778]), -7833: ([7833], [89, 778], [89, 778]), -7834: ([7834], [65, 702], [65, 702]), -8016: ([8016], [933, 787], [933, 787]), -8018: ([8018], [933, 787, 768], [933, 787, 768]), -8020: ([8020], [933, 787, 769], [933, 787, 769]), -8022: ([8022], [933, 787, 834], [933, 787, 834]), -8064: ([8064], [8072], [7944, 921]), -8065: ([8065], [8073], [7945, 921]), -8066: ([8066], [8074], [7946, 921]), -8067: ([8067], [8075], [7947, 921]), -8068: ([8068], [8076], [7948, 921]), -8069: ([8069], [8077], [7949, 921]), -8070: ([8070], [8078], [7950, 921]), -8071: ([8071], [8079], [7951, 921]), -8072: ([8064], [8072], [7944, 921]), -8073: ([8065], [8073], [7945, 921]), -8074: ([8066], [8074], [7946, 921]), -8075: ([8067], [8075], [7947, 921]), -8076: ([8068], [8076], [7948, 921]), -8077: ([8069], [8077], [7949, 921]), -8078: ([8070], [8078], [7950, 921]), -8079: ([8071], [8079], [7951, 921]), -8080: ([8080], [8088], [7976, 921]), -8081: ([8081], [8089], [7977, 921]), -8082: ([8082], [8090], [7978, 921]), -8083: ([8083], [8091], [7979, 921]), -8084: ([8084], [8092], [7980, 921]), -8085: ([8085], [8093], [7981, 921]), -8086: ([8086], [8094], [7982, 921]), -8087: ([8087], [8095], [7983, 921]), -8088: ([8080], [8088], [7976, 921]), -8089: ([8081], [8089], [7977, 921]), -8090: ([8082], [8090], [7978, 921]), -8091: ([8083], [8091], [7979, 921]), -8092: ([8084], [8092], [7980, 921]), -8093: ([8085], [8093], [7981, 921]), -8094: ([8086], [8094], [7982, 921]), -8095: ([8087], [8095], [7983, 921]), -8096: ([8096], [8104], [8040, 921]), -8097: ([8097], [8105], [8041, 921]), -8098: ([8098], [8106], [8042, 921]), -8099: ([8099], [8107], [8043, 921]), -8100: ([8100], [8108], [8044, 921]), -8101: ([8101], [8109], [8045, 921]), -8102: ([8102], [8110], [8046, 921]), -8103: ([8103], [8111], [8047, 921]), -8104: ([8096], [8104], [8040, 921]), -8105: ([8097], [8105], [8041, 921]), -8106: ([8098], [8106], [8042, 921]), -8107: ([8099], [8107], [8043, 921]), -8108: ([8100], [8108], [8044, 921]), -8109: ([8101], [8109], [8045, 921]), -8110: ([8102], [8110], [8046, 921]), -8111: ([8103], [8111], [8047, 921]), -8114: ([8114], [8122, 837], [8122, 921]), -8115: ([8115], [8124], [913, 921]), -8116: ([8116], [902, 837], [902, 921]), -8118: ([8118], [913, 834], [913, 834]), -8119: ([8119], [913, 834, 837], [913, 834, 921]), -8124: ([8115], [8124], [913, 921]), -8130: ([8130], [8138, 837], [8138, 921]), -8131: ([8131], [8140], [919, 921]), -8132: ([8132], [905, 837], [905, 921]), -8134: ([8134], [919, 834], [919, 834]), -8135: ([8135], [919, 834, 837], [919, 834, 921]), -8140: ([8131], [8140], [919, 921]), -8146: ([8146], [921, 776, 768], [921, 776, 768]), -8147: ([8147], [921, 776, 769], [921, 776, 769]), -8150: ([8150], [921, 834], [921, 834]), -8151: ([8151], [921, 776, 834], [921, 776, 834]), -8162: ([8162], [933, 776, 768], [933, 776, 768]), -8163: ([8163], [933, 776, 769], [933, 776, 769]), -8164: ([8164], [929, 787], [929, 787]), -8166: ([8166], [933, 834], [933, 834]), -8167: ([8167], [933, 776, 834], [933, 776, 834]), -8178: ([8178], [8186, 837], [8186, 921]), -8179: ([8179], [8188], [937, 921]), -8180: ([8180], [911, 837], [911, 921]), -8182: ([8182], [937, 834], [937, 834]), -8183: ([8183], [937, 834, 837], [937, 834, 921]), -8188: ([8179], [8188], [937, 921]), -64256: ([64256], [70, 102], [70, 70]), -64257: ([64257], [70, 105], [70, 73]), -64258: ([64258], [70, 108], [70, 76]), -64259: ([64259], [70, 102, 105], [70, 70, 73]), -64260: ([64260], [70, 102, 108], [70, 70, 76]), -64261: ([64261], [83, 116], [83, 84]), -64262: ([64262], [83, 116], [83, 84]), -64275: ([64275], [1348, 1398], [1348, 1350]), -64276: ([64276], [1348, 1381], [1348, 1333]), -64277: ([64277], [1348, 1387], [1348, 1339]), -64278: ([64278], [1358, 1398], [1358, 1350]), -64279: ([64279], [1348, 1389], [1348, 1341]), } _special_casing_corrected = { } def toupper(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return code - 32 + return code try: return _toupper[code] except KeyError: @@ -20673,6 +20574,10 @@ def toupper(code): return code def tolower(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return code + 32 + return code try: return _tolower[code] except KeyError: @@ -20691,6 +20596,10 @@ def totitle(code): return code def toupper_full(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return [code - 32] + return [code] try: return _special_casing[code][2] except KeyError: @@ -20702,6 +20611,10 @@ def toupper_full(code): return [toupper(code)] def tolower_full(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return [code + 32] + return [code] try: return _special_casing[code][0] except KeyError: diff --git a/rpython/rlib/unicodedata/unicodedb_8_0_0.py b/rpython/rlib/unicodedata/unicodedb_8_0_0.py index 0030d38d3c..82b965e6d5 100644 --- a/rpython/rlib/unicodedata/unicodedb_8_0_0.py +++ b/rpython/rlib/unicodedata/unicodedb_8_0_0.py @@ -47821,115 +47821,16 @@ _totitle = { _totitle_corrected = { } _special_casing = { -223: ([223], [83, 115], [83, 83]), -304: ([105, 775], [304], [304]), -329: ([329], [700, 78], [700, 78]), -496: ([496], [74, 780], [74, 780]), -912: ([912], [921, 776, 769], [921, 776, 769]), -944: ([944], [933, 776, 769], [933, 776, 769]), -1415: ([1415], [1333, 1410], [1333, 1362]), -7830: ([7830], [72, 817], [72, 817]), -7831: ([7831], [84, 776], [84, 776]), -7832: ([7832], [87, 778], [87, 778]), -7833: ([7833], [89, 778], [89, 778]), -7834: ([7834], [65, 702], [65, 702]), -8016: ([8016], [933, 787], [933, 787]), -8018: ([8018], [933, 787, 768], [933, 787, 768]), -8020: ([8020], [933, 787, 769], [933, 787, 769]), -8022: ([8022], [933, 787, 834], [933, 787, 834]), -8064: ([8064], [8072], [7944, 921]), -8065: ([8065], [8073], [7945, 921]), -8066: ([8066], [8074], [7946, 921]), -8067: ([8067], [8075], [7947, 921]), -8068: ([8068], [8076], [7948, 921]), -8069: ([8069], [8077], [7949, 921]), -8070: ([8070], [8078], [7950, 921]), -8071: ([8071], [8079], [7951, 921]), -8072: ([8064], [8072], [7944, 921]), -8073: ([8065], [8073], [7945, 921]), -8074: ([8066], [8074], [7946, 921]), -8075: ([8067], [8075], [7947, 921]), -8076: ([8068], [8076], [7948, 921]), -8077: ([8069], [8077], [7949, 921]), -8078: ([8070], [8078], [7950, 921]), -8079: ([8071], [8079], [7951, 921]), -8080: ([8080], [8088], [7976, 921]), -8081: ([8081], [8089], [7977, 921]), -8082: ([8082], [8090], [7978, 921]), -8083: ([8083], [8091], [7979, 921]), -8084: ([8084], [8092], [7980, 921]), -8085: ([8085], [8093], [7981, 921]), -8086: ([8086], [8094], [7982, 921]), -8087: ([8087], [8095], [7983, 921]), -8088: ([8080], [8088], [7976, 921]), -8089: ([8081], [8089], [7977, 921]), -8090: ([8082], [8090], [7978, 921]), -8091: ([8083], [8091], [7979, 921]), -8092: ([8084], [8092], [7980, 921]), -8093: ([8085], [8093], [7981, 921]), -8094: ([8086], [8094], [7982, 921]), -8095: ([8087], [8095], [7983, 921]), -8096: ([8096], [8104], [8040, 921]), -8097: ([8097], [8105], [8041, 921]), -8098: ([8098], [8106], [8042, 921]), -8099: ([8099], [8107], [8043, 921]), -8100: ([8100], [8108], [8044, 921]), -8101: ([8101], [8109], [8045, 921]), -8102: ([8102], [8110], [8046, 921]), -8103: ([8103], [8111], [8047, 921]), -8104: ([8096], [8104], [8040, 921]), -8105: ([8097], [8105], [8041, 921]), -8106: ([8098], [8106], [8042, 921]), -8107: ([8099], [8107], [8043, 921]), -8108: ([8100], [8108], [8044, 921]), -8109: ([8101], [8109], [8045, 921]), -8110: ([8102], [8110], [8046, 921]), -8111: ([8103], [8111], [8047, 921]), -8114: ([8114], [8122, 837], [8122, 921]), -8115: ([8115], [8124], [913, 921]), -8116: ([8116], [902, 837], [902, 921]), -8118: ([8118], [913, 834], [913, 834]), -8119: ([8119], [913, 834, 837], [913, 834, 921]), -8124: ([8115], [8124], [913, 921]), -8130: ([8130], [8138, 837], [8138, 921]), -8131: ([8131], [8140], [919, 921]), -8132: ([8132], [905, 837], [905, 921]), -8134: ([8134], [919, 834], [919, 834]), -8135: ([8135], [919, 834, 837], [919, 834, 921]), -8140: ([8131], [8140], [919, 921]), -8146: ([8146], [921, 776, 768], [921, 776, 768]), -8147: ([8147], [921, 776, 769], [921, 776, 769]), -8150: ([8150], [921, 834], [921, 834]), -8151: ([8151], [921, 776, 834], [921, 776, 834]), -8162: ([8162], [933, 776, 768], [933, 776, 768]), -8163: ([8163], [933, 776, 769], [933, 776, 769]), -8164: ([8164], [929, 787], [929, 787]), -8166: ([8166], [933, 834], [933, 834]), -8167: ([8167], [933, 776, 834], [933, 776, 834]), -8178: ([8178], [8186, 837], [8186, 921]), -8179: ([8179], [8188], [937, 921]), -8180: ([8180], [911, 837], [911, 921]), -8182: ([8182], [937, 834], [937, 834]), -8183: ([8183], [937, 834, 837], [937, 834, 921]), -8188: ([8179], [8188], [937, 921]), -64256: ([64256], [70, 102], [70, 70]), -64257: ([64257], [70, 105], [70, 73]), -64258: ([64258], [70, 108], [70, 76]), -64259: ([64259], [70, 102, 105], [70, 70, 73]), -64260: ([64260], [70, 102, 108], [70, 70, 76]), -64261: ([64261], [83, 116], [83, 84]), -64262: ([64262], [83, 116], [83, 84]), -64275: ([64275], [1348, 1398], [1348, 1350]), -64276: ([64276], [1348, 1381], [1348, 1333]), -64277: ([64277], [1348, 1387], [1348, 1339]), -64278: ([64278], [1358, 1398], [1358, 1350]), -64279: ([64279], [1348, 1389], [1348, 1341]), } _special_casing_corrected = { } def toupper(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return code - 32 + return code try: return _toupper[code] except KeyError: @@ -47939,6 +47840,10 @@ def toupper(code): return code def tolower(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return code + 32 + return code try: return _tolower[code] except KeyError: @@ -47957,6 +47862,10 @@ def totitle(code): return code def toupper_full(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return [code - 32] + return [code] try: return _special_casing[code][2] except KeyError: @@ -47968,6 +47877,10 @@ def toupper_full(code): return [toupper(code)] def tolower_full(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return [code + 32] + return [code] try: return _special_casing[code][0] except KeyError: diff --git a/rpython/rlib/unicodedata/unicodedb_9_0_0.py b/rpython/rlib/unicodedata/unicodedb_9_0_0.py index 1881c91c8f..3cf3574a6d 100644 --- a/rpython/rlib/unicodedata/unicodedb_9_0_0.py +++ b/rpython/rlib/unicodedata/unicodedb_9_0_0.py @@ -54920,115 +54920,16 @@ _totitle = { _totitle_corrected = { } _special_casing = { -223: ([223], [83, 115], [83, 83]), -304: ([105, 775], [304], [304]), -329: ([329], [700, 78], [700, 78]), -496: ([496], [74, 780], [74, 780]), -912: ([912], [921, 776, 769], [921, 776, 769]), -944: ([944], [933, 776, 769], [933, 776, 769]), -1415: ([1415], [1333, 1410], [1333, 1362]), -7830: ([7830], [72, 817], [72, 817]), -7831: ([7831], [84, 776], [84, 776]), -7832: ([7832], [87, 778], [87, 778]), -7833: ([7833], [89, 778], [89, 778]), -7834: ([7834], [65, 702], [65, 702]), -8016: ([8016], [933, 787], [933, 787]), -8018: ([8018], [933, 787, 768], [933, 787, 768]), -8020: ([8020], [933, 787, 769], [933, 787, 769]), -8022: ([8022], [933, 787, 834], [933, 787, 834]), -8064: ([8064], [8072], [7944, 921]), -8065: ([8065], [8073], [7945, 921]), -8066: ([8066], [8074], [7946, 921]), -8067: ([8067], [8075], [7947, 921]), -8068: ([8068], [8076], [7948, 921]), -8069: ([8069], [8077], [7949, 921]), -8070: ([8070], [8078], [7950, 921]), -8071: ([8071], [8079], [7951, 921]), -8072: ([8064], [8072], [7944, 921]), -8073: ([8065], [8073], [7945, 921]), -8074: ([8066], [8074], [7946, 921]), -8075: ([8067], [8075], [7947, 921]), -8076: ([8068], [8076], [7948, 921]), -8077: ([8069], [8077], [7949, 921]), -8078: ([8070], [8078], [7950, 921]), -8079: ([8071], [8079], [7951, 921]), -8080: ([8080], [8088], [7976, 921]), -8081: ([8081], [8089], [7977, 921]), -8082: ([8082], [8090], [7978, 921]), -8083: ([8083], [8091], [7979, 921]), -8084: ([8084], [8092], [7980, 921]), -8085: ([8085], [8093], [7981, 921]), -8086: ([8086], [8094], [7982, 921]), -8087: ([8087], [8095], [7983, 921]), -8088: ([8080], [8088], [7976, 921]), -8089: ([8081], [8089], [7977, 921]), -8090: ([8082], [8090], [7978, 921]), -8091: ([8083], [8091], [7979, 921]), -8092: ([8084], [8092], [7980, 921]), -8093: ([8085], [8093], [7981, 921]), -8094: ([8086], [8094], [7982, 921]), -8095: ([8087], [8095], [7983, 921]), -8096: ([8096], [8104], [8040, 921]), -8097: ([8097], [8105], [8041, 921]), -8098: ([8098], [8106], [8042, 921]), -8099: ([8099], [8107], [8043, 921]), -8100: ([8100], [8108], [8044, 921]), -8101: ([8101], [8109], [8045, 921]), -8102: ([8102], [8110], [8046, 921]), -8103: ([8103], [8111], [8047, 921]), -8104: ([8096], [8104], [8040, 921]), -8105: ([8097], [8105], [8041, 921]), -8106: ([8098], [8106], [8042, 921]), -8107: ([8099], [8107], [8043, 921]), -8108: ([8100], [8108], [8044, 921]), -8109: ([8101], [8109], [8045, 921]), -8110: ([8102], [8110], [8046, 921]), -8111: ([8103], [8111], [8047, 921]), -8114: ([8114], [8122, 837], [8122, 921]), -8115: ([8115], [8124], [913, 921]), -8116: ([8116], [902, 837], [902, 921]), -8118: ([8118], [913, 834], [913, 834]), -8119: ([8119], [913, 834, 837], [913, 834, 921]), -8124: ([8115], [8124], [913, 921]), -8130: ([8130], [8138, 837], [8138, 921]), -8131: ([8131], [8140], [919, 921]), -8132: ([8132], [905, 837], [905, 921]), -8134: ([8134], [919, 834], [919, 834]), -8135: ([8135], [919, 834, 837], [919, 834, 921]), -8140: ([8131], [8140], [919, 921]), -8146: ([8146], [921, 776, 768], [921, 776, 768]), -8147: ([8147], [921, 776, 769], [921, 776, 769]), -8150: ([8150], [921, 834], [921, 834]), -8151: ([8151], [921, 776, 834], [921, 776, 834]), -8162: ([8162], [933, 776, 768], [933, 776, 768]), -8163: ([8163], [933, 776, 769], [933, 776, 769]), -8164: ([8164], [929, 787], [929, 787]), -8166: ([8166], [933, 834], [933, 834]), -8167: ([8167], [933, 776, 834], [933, 776, 834]), -8178: ([8178], [8186, 837], [8186, 921]), -8179: ([8179], [8188], [937, 921]), -8180: ([8180], [911, 837], [911, 921]), -8182: ([8182], [937, 834], [937, 834]), -8183: ([8183], [937, 834, 837], [937, 834, 921]), -8188: ([8179], [8188], [937, 921]), -64256: ([64256], [70, 102], [70, 70]), -64257: ([64257], [70, 105], [70, 73]), -64258: ([64258], [70, 108], [70, 76]), -64259: ([64259], [70, 102, 105], [70, 70, 73]), -64260: ([64260], [70, 102, 108], [70, 70, 76]), -64261: ([64261], [83, 116], [83, 84]), -64262: ([64262], [83, 116], [83, 84]), -64275: ([64275], [1348, 1398], [1348, 1350]), -64276: ([64276], [1348, 1381], [1348, 1333]), -64277: ([64277], [1348, 1387], [1348, 1339]), -64278: ([64278], [1358, 1398], [1358, 1350]), -64279: ([64279], [1348, 1389], [1348, 1341]), } _special_casing_corrected = { } def toupper(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return code - 32 + return code try: return _toupper[code] except KeyError: @@ -55038,6 +54939,10 @@ def toupper(code): return code def tolower(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return code + 32 + return code try: return _tolower[code] except KeyError: @@ -55056,6 +54961,10 @@ def totitle(code): return code def toupper_full(code): + if code < 128: + if ord('a') <= code <= ord('z'): + return [code - 32] + return [code] try: return _special_casing[code][2] except KeyError: @@ -55067,6 +54976,10 @@ def toupper_full(code): return [toupper(code)] def tolower_full(code): + if code < 128: + if ord('A') <= code <= ord('Z'): + return [code + 32] + return [code] try: return _special_casing[code][0] except KeyError: |