Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Lib/test/string_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,13 @@ def test_isascii(self):
self.checkequal(True, '\x00\x7f', 'isascii')
self.checkequal(False, '\x80', 'isascii')
self.checkequal(False, '\xe9', 'isascii')
# bytes.isascii() and bytearray.isascii() has optimization which
# check 4 or 8 bytes at once. So check some alignments.
for p in range(8):
self.checkequal(True, ' '*p + '\x7f', 'isascii')
self.checkequal(False, ' '*p + '\x80', 'isascii')
self.checkequal(True, ' '*p + '\x7f' + ' '*8, 'isascii')
self.checkequal(False, ' '*p + '\x80' + ' '*8, 'isascii')

def test_isdigit(self):
self.checkequal(False, '', 'isdigit')
Expand Down
40 changes: 36 additions & 4 deletions Objects/bytes_methods.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,19 +98,51 @@ PyDoc_STRVAR_shared(_Py_isascii__doc__,
Return True if B is empty or all characters in B are ASCII,\n\
False otherwise.");

// Optimization is copied from ascii_decode in unicodeobject.c
/* Mask to quickly check whether a C 'long' contains a
non-ASCII, UTF8-encoded char. */
#if (SIZEOF_LONG == 8)
# define ASCII_CHAR_MASK 0x8080808080808080UL
#elif (SIZEOF_LONG == 4)
# define ASCII_CHAR_MASK 0x80808080UL
#else
# error C 'long' size should be either 4 or 8!
#endif

PyObject*
_Py_bytes_isascii(const char *cptr, Py_ssize_t len)
{
const unsigned char *p = (unsigned char *) cptr;
const unsigned char *e = p + len;
for (; p < e; p++) {
if (*p >= 128) {
const char *p = cptr;
const char *end = p + len;
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);

while (p < end) {
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
for an explanation. */
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
/* Help allocation */
const char *_p = p;
while (_p < aligned_end) {
unsigned long value = *(unsigned long *) _p;
if (value & ASCII_CHAR_MASK) {
Py_RETURN_FALSE;
}
_p += SIZEOF_LONG;
}
p = _p;
if (_p == end)
break;
}
if ((unsigned char)*p & 0x80) {
Py_RETURN_FALSE;
}
p++;
}
Py_RETURN_TRUE;
}

#undef ASCII_CHAR_MASK


PyDoc_STRVAR_shared(_Py_isdigit__doc__,
"B.isdigit() -> bool\n\
Expand Down