@@ -1715,13 +1715,85 @@ PHP_FUNCTION(mb_str_split)
17151715 }
17161716}
17171717
1718+ #ifdef __SSE2__
1719+ /* Thanks to StackOverflow user 'Paul R' (https://stackoverflow.com/users/253056/paul-r)
1720+ * From: https://stackoverflow.com/questions/36998538/fastest-way-to-horizontally-sum-sse-unsigned-byte-vector
1721+ * Takes a 128-bit XMM register, treats each byte as an 8-bit integer, and sums up all
1722+ * 16 of them, returning the sum in an ordinary scalar register */
1723+ static inline uint32_t _mm_sum_epu8 (const __m128i v )
1724+ {
1725+ /* We don't have any dedicated instruction to sum up 8-bit values from a 128-bit register
1726+ * _mm_sad_epu8 takes the differences between corresponding bytes of two different XMM registers,
1727+ * sums up those differences, and stores them as two 16-byte integers in the top and bottom
1728+ * halves of the destination XMM register
1729+ * By using a zeroed-out XMM register as one operand, we ensure the "differences" which are
1730+ * summed up will actually just be the 8-bit values from `v` */
1731+ __m128i vsum = _mm_sad_epu8 (v , _mm_setzero_si128 ());
1732+ /* If _mm_sad_epu8 had stored the sum of those bytes as a single integer, we would just have
1733+ * to extract it here; but it stored the sum as two different 16-bit values
1734+ * _mm_cvtsi128_si32 extracts one of those values into a scalar register
1735+ * _mm_extract_epi16 extracts the other one into another scalar register; then we just add them */
1736+ return _mm_cvtsi128_si32 (vsum ) + _mm_extract_epi16 (vsum , 4 );
1737+ }
1738+ #endif
1739+
1740+ /* This assumes that `string` is valid UTF-8
1741+ * In UTF-8, the only bytes which do not start a new codepoint are 0x80-0xBF (continuation bytes)
1742+ * Interpreted as signed integers, those are all byte values less than -64
1743+ * A fast way to get the length of a UTF-8 string is to start with its byte length,
1744+ * then subtract off the number of continuation bytes */
1745+ static size_t mb_fast_strlen_utf8 (unsigned char * p , size_t len )
1746+ {
1747+ unsigned char * e = p + len ;
1748+
1749+ #ifdef __SSE2__
1750+ if (len >= sizeof (__m128i )) {
1751+ const __m128i threshold = _mm_set1_epi8 (-64 );
1752+ const __m128i delta = _mm_set1_epi8 (1 );
1753+ __m128i counter = _mm_set1_epi8 (0 ); /* Vector of 16 continuation-byte counters */
1754+
1755+ int reset_counter = 255 ;
1756+ do {
1757+ __m128i operand = _mm_loadu_si128 ((__m128i * )p ); /* Load 16 bytes */
1758+ __m128i lt = _mm_cmplt_epi8 (operand , threshold ); /* Find all which are continuation bytes */
1759+ counter = _mm_add_epi8 (counter , _mm_and_si128 (lt , delta )); /* Update the 16 counters */
1760+
1761+ /* The counters can only go up to 255, so every 255 iterations, fold them into `len`
1762+ * and reset them to zero */
1763+ if (-- reset_counter == 0 ) {
1764+ len -= _mm_sum_epu8 (counter );
1765+ counter = _mm_set1_epi8 (0 );
1766+ reset_counter = 255 ;
1767+ }
1768+
1769+ p += sizeof (__m128i );
1770+ } while (p + sizeof (__m128i ) <= e );
1771+
1772+ len -= _mm_sum_epu8 (counter ); /* Fold in any remaining non-zero values in the 16 counters */
1773+ }
1774+ #endif
1775+
1776+ /* Check for continuation bytes in the 0-15 remaining bytes at the end of the string */
1777+ while (p < e ) {
1778+ signed char c = * p ++ ;
1779+ if (c < -64 ) {
1780+ len -- ;
1781+ }
1782+ }
1783+
1784+ return len ;
1785+ }
1786+
17181787static size_t mb_get_strlen (zend_string * string , const mbfl_encoding * encoding )
17191788{
17201789 unsigned int char_len = encoding -> flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2 | MBFL_ENCTYPE_WCS4 );
17211790 if (char_len ) {
17221791 return ZSTR_LEN (string ) / char_len ;
1792+ } else if (php_mb_is_no_encoding_utf8 (encoding -> no_encoding ) && GC_FLAGS (string ) & IS_STR_VALID_UTF8 ) {
1793+ return mb_fast_strlen_utf8 ((unsigned char * )ZSTR_VAL (string ), ZSTR_LEN (string ));
17231794 }
17241795
1796+
17251797 uint32_t wchar_buf [128 ];
17261798 unsigned char * in = (unsigned char * )ZSTR_VAL (string );
17271799 size_t in_len = ZSTR_LEN (string );
@@ -1789,14 +1861,7 @@ static unsigned char* offset_to_pointer_utf8(unsigned char *str, unsigned char *
17891861}
17901862
17911863static size_t pointer_to_offset_utf8 (unsigned char * start , unsigned char * pos ) {
1792- size_t result = 0 ;
1793- while (pos > start ) {
1794- unsigned char c = * -- pos ;
1795- if (c < 0x80 || (c & 0xC0 ) != 0x80 ) {
1796- result ++ ;
1797- }
1798- }
1799- return result ;
1864+ return mb_fast_strlen_utf8 (start , pos - start );
18001865}
18011866
18021867static size_t mb_find_strpos (zend_string * haystack , zend_string * needle , const mbfl_encoding * enc , ssize_t offset , bool reverse )
0 commit comments