glibc strlen delphi pascal

From: Will DeWitt Jr.

Subject: Fast strlen routine?

NewsGroup: borland.public.delphi.language.basm

Date Posted: -May- at :: PST

Download from Google

I've been tinkering with re-writing some of the standard C run-time

library routines and haven't really played much with MMX instructions,

or SSE for that matter.  But I thought what I came up with was

interesting and maybe worth sharing--

function  strlenmmx(s: PAnsiChar): longword; register;

asm

          TEST      EAX, EAX

          JZ        @@Error

          PXOR      MM1, MM1

          MOV       ECX, EAX    // save original pointer

@@:

          MOVQ      MM0, [EAX]  // grab  chars

          PCMPEQB   MM0, MM1    // check all  for null/ ( = null, FF = not null - for each char in MM0)

          PMOVMSKB  EDX, MM0    // move -bit mask of each char to DL

          ADD       EAX,       // move pointer forward  chars

          TEST      EDX, EDX    // check for any null/ chars

          JNZ       @@

          MOVQ      MM0, [EAX]  // unroll twice (#)

          PCMPEQB   MM0, MM1

          PMOVMSKB  EDX, MM0

          ADD       EAX,

          TEST      EDX, EDX

          JNZ       @@

          MOVQ      MM0, [EAX]  // (#)

          PCMPEQB   MM0, MM1

          PMOVMSKB  EDX, MM0

          ADD       EAX,

          TEST      EDX, EDX

          JZ        @@

@@:

          EMMS

          BSF       EDX, EDX

          SUB       EAX, DWORD PTR [@@SubTable+EDX*]

          SUB       EAX, ECX

          RET

@@SubTable:

          DD

          DD

          DD

          DD

          DD

          DD

          DD

          DD

          DD

@@Error:

end;

function _PCharLen(P: _PAnsiChr): Longint;

{$IFNDEF LEGACY_PCHARLEN}

begin

  Result := ;

  if P <> nil then

    while P[Result] <> # do

      Inc(Result);

end;

{$ELSE !LEGACY_PCHARLEN}

{$IFDEF CPUX86}

asm

        TEST    EAX,EAX

        JE      @@

        PUSH    EAX

        XOR     ECX,ECX

@@:    CMP     CL,[EAX+]

        JE      @@

        CMP     CL,[EAX+]

        JE      @@

        CMP     CL,[EAX+]

        JE      @@

        CMP     CL,[EAX+]

        JE      @@

        ADD     EAX,

        JMP     @@

@@:    INC     EAX

@@:    INC     EAX

@@:    INC     EAX

@@:    POP     ECX

        SUB     EAX,ECX

@@:

end;

{$ENDIF CPUX86}

{$ENDIF !LEGACY_PCHARLEN}

http://www.verydemo.com/demo_c230_i66795.html

/* 下面是库函数中strlen的实现，比想像的要复杂  */

size_t strlen (str)

     const char *str;

{

     const char *char_ptr;

     const unsigned long int *longword_ptr;

     unsigned long int longword, himagic, lomagic;

     for (char_ptr = str; ((unsigned long int) char_ptr & (sizeof (longword) - )) != ;

       ++char_ptr)

        if (*char_ptr == '\0')

          return char_ptr - str;

    longword_ptr = (unsigned long int *) char_ptr;

    himagic = 0x80808080L;

    lomagic = 0x01010101L;

  /* Instead of the traditional loop which tests each character,

     we will test a longword at a time.  The tricky part(棘手的部分) is testing

     if *any of the four* bytes in the longword in question are zero.  */

    for (;;)

    {

      longword = *longword_ptr++;

      if (((longword - lomagic) & ~longword & himagic) != )

      {

        /* 关键在于如果有0，就一定要测试出来，误判没关系 */

        /* 只是读，并没有写，不会出现段错误 */

        const char *cp = (const char *) (longword_ptr - );  /* 减一是因为前面已经加了1 */

        if (cp[] == )

          return cp - str;

        if (cp[] == )

          return cp - str + ;

        if (cp[] == )

          return cp - str + ;

        if (cp[] == )

          return cp - str + ;

        if (sizeof (longword) > )

        {

            if (cp[] == )

                return cp - str + ;

            if (cp[] == )

                return cp - str + ;

            if (cp[] == )

                return cp - str + ;

            if (cp[] == )

                return cp - str + ;

        }

      }

    }

}

int i;

while (*str++ != '\0') ++i;

return i;

http://www.strchr.com/optimized_strlen_function

http://www.strchr.com/sse2_optimised_strlen

size_t strlen(const char * str)

{

    const char *s;

    for (s = str; *s; ++s) {}

    return(s - str);

}

size_t strlen(const char *s) {

    const char *start = s;

    while(*s)

        s++;

    return s - start;

}

// for x86 only

size_t my_strlen(const char *s) {

    size_t len = ;

    for(;;) {

        unsigned x = *(unsigned*)s;

        if((x & 0xFF) == ) return len;

        if((x & 0xFF00) == ) return len + ;

        if((x & 0xFF0000) == ) return len + ;

        if((x & 0xFF000000) == ) return len + ;

        s += , len += ;

    }

}

#ifndef WORDS_BIGENDIAN

    #if 0

        static inline int count_bits_to_0(unsigned int x) // counting trailing zeroes

        {

            register int i = ;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            return i;

        }

    #elif 0

        static inline int count_bits_to_0(unsigned int x) // counting trailing zeroes

        {

            // http://www.hackersdelight.org/: ntz3() shortened for 16-bit mask by Peter Kankowski

            register int n = ;

            if ((x & 0x000000FFU) == ) {n += ; x >>= ;}

            if ((x & 0x0000000FU) == ) {n += ; x >>= ;}

            if ((x & 0x00000003U) == ) {n += ; x >>= ;}

            return n - (x & );

        }

    #else

        static inline int count_bits_to_0(unsigned int x) // counting trailing zeroes, by Nazo, post: 2009/07/20 03:40

        {                                                 // this is current winner for speed

            static const unsigned char table[] =

            {

                , , , , , , , , , , , , , , , ,

                , , , , , , , , , , , , , , , ,

                , , , , , , , , , , , , , , , ,

                , , , , , , , , , , , , , , , ,

                , , , , , , , , , , , , , , , ,

                , , , , , , , , , , , , , , , ,

                , , , , , , , , , , , , , , , ,

                , , , , , , , , , , , , , , , ,

                , , , , , , , , , , , , , , , ,

                , , , , , , , , , , , , , , , ,

                , , , , , , , , , , , , , , , ,

                , , , , , , , , , , , , , , , ,

                , , , , , , , , , , , , , , , ,

                , , , , , , , , , , , , , , , ,

                , , , , , , , , , , , , , , , ,

                , , , , , , , , , , , , , , , ,

            };

            if ((unsigned char)x)

                return table[(unsigned char)x];

            return table[x >> ] + ; // t[x / 256] + 8

        }

    #endif

#else

    #if 0

        static inline int count_bits_to_0(unsigned int x)  // counting trailing zeroes

        {

            register int i = ;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            else return i;

            if (!(x & ( << ))) i ++;

            return i;

        }

    #else

        static inline int count_bits_to_0(unsigned int x)  // counting trailing zeroes

        {

            // http://www.hackersdelight.org/: nlz1() shortened for 16-bit mask

            register int n = ;

            if (x <= 0x000000FFU) {n = n + ; x = x << ;}

            if (x <= 0x00000FFFU) {n = n + ; x = x << ;}

            if (x <= 0x00003FFFU) {n = n + ; x = x << ;}

            if (x <= 0x00007FFFU) {n = n + ;}

            return n;

        }

    #endif

#endif

size_t strlen(const char *str)

{

    register size_t len = ;

    // align to 16 bytes

    while ((((intptr_t)str) & (sizeof(__m128i)-)) != )

    {

        if (*str++ == )

            return len;

        ++ len;

    }

    // search for 0

    __m128i xmm0 = _mm_setzero_si128();

    __m128i xmm1;

    int mask = ;

    for (;;)

    {

        xmm1 = _mm_load_si128((__m128i *)str);

        xmm1 = _mm_cmpeq_epi8(xmm1, xmm0);

        if ((mask = _mm_movemask_epi8(xmm1)) != )

        {

            // got 0 somewhere within 16 bytes in xmm1, or within 16 bits in mask

            // find index of first set bit

        #ifndef _DISABLE_ASM_BSF // define it to disable ASM

            #if (_MSC_VER >= 1300)   // make sure <intrin.h> is included

                unsigned long pos;

                _BitScanForward(&pos, mask);

                len += (size_t)pos;

            #elif defined(_MSC_VER)  // earlier MSVC's do not have _BitScanForward, use inline asm

                __asm bsf edx, mask ; edx = bsf(mask)

                __asm add edx, len  ; edx += len

                __asm mov len, edx  ; len = edx

            #elif ((__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))) // modern GCC has built-in __builtin_ctz

                len += __builtin_ctz(mask);

            #elif defined(__GNUC__) // older GCC shall use inline asm

                unsigned int pos;

                asm("bsf %1, %0" : "=r" (pos) : "rm" (mask));

                len += (size_t)pos;

            #else                    // none of choices exist, use local BSF implementation

                len += count_bits_to_0(mask);

            #endif

        #else

            len += count_bits_to_0(mask);

        #endif

            break;

        }

        str += sizeof(__m128i);

        len += sizeof(__m128i);

    }

    return len;

}

This implementation would win more performance boost if 'count_bits_to_0' is optimised in less conditions.

We could use _mm_loadu_si128 to load unaligned data and thus skip own aligning loop but the performance

will still be worse due to additional CPU cycles if _mm_loadu_si128 is used.

SSE2 SIMD instructions are present on all modern CPUs and thus this implementation

may bring real benefits to intensive database/text processing applications.

License: Public Domain.

http://stackoverflow.com/questions/2372315/how-to-implement-strlen-as-fast-as-possible

also do two micro-optimizations:

Since most strings we use scan consist of ASCII chars in the range 0~127, the
high bit is (almost) never set, so only check for it in a second test.
Increment an index rather than a pointer,
which is cheaper on some architectures (notably x86) and give you the length for 'free'...

uint32_t gatopeich_strlen32(const char* str)

{

    uint32_t *u32 = (uint32_t*)str, u, abcd, i=;

    while()

    {

        u = u32[i++];

        abcd = (u-0x01010101) & 0x80808080;

        if (abcd && // If abcd is not 0, we have NUL or a non-ASCII char > 127...

             (abcd &= ~u)) // ... Discard non-ASCII chars

        {

        #if BYTE_ORDER == BIG_ENDIAN

            return *i - (abcd&0xffff0000 ? (abcd&0xff000000?:) : abcd&0xff00?:);

        #else

            return *i - (abcd&0xffff ? (abcd&0xff?:) : abcd&0xff0000?:);

        #endif

        }

    }

}

http://www.opensource.apple.com/source/Libc/Libc-997.1.1/string/FreeBSD/strlen.c

strlen.c   [plain text]

/*-

 * Copyright (c) 2009 Xin LI <delphij@FreeBSD.org>

 * All rights reserved.

 *

 * Redistribution and use in source and binary forms, with or without

 * modification, are permitted provided that the following conditions

 * are met:

 * 1. Redistributions of source code must retain the above copyright

 *    notice, this list of conditions and the following disclaimer.

 * 2. Redistributions in binary form must reproduce the above copyright

 *    notice, this list of conditions and the following disclaimer in the

 *    documentation and/or other materials provided with the distribution.

 *

 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND

 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE

 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

 * SUCH DAMAGE.

 */

#include <sys/cdefs.h>

__FBSDID("$FreeBSD: src/lib/libc/string/strlen.c,v 1.7 2009/01/26 07:31:28 delphij Exp $");

#include <limits.h>

#include <sys/types.h>

#include <string.h>

/*

 * Portable strlen() for 32-bit and 64-bit systems.

 *

 * Rationale: it is generally much more efficient to do word length

 * operations and avoid branches on modern computer systems, as

 * compared to byte-length operations with a lot of branches.

 *

 * The expression:

 *

 *    ((x - 0x01....01) & ~x & 0x80....80)

 *

 * would evaluate to a non-zero value iff any of the bytes in the

 * original word is zero.  However, we can further reduce ~1/3 of

 * time if we consider that strlen() usually operate on 7-bit ASCII

 * by employing the following expression, which allows false positive

 * when high bit of 1 and use the tail case to catch these case:

 *

 *    ((x - 0x01....01) & 0x80....80)

 *

 * This is more than 5.2 times as fast as the raw implementation on

 * Intel T7300 under long mode for strings longer than word length.

 */

/* Magic numbers for the algorithm */

#if LONG_BIT == 32

static const unsigned long mask01 = 0x01010101;

static const unsigned long mask80 = 0x80808080;

#elif LONG_BIT == 64

static const unsigned long mask01 = 0x0101010101010101;

static const unsigned long mask80 = 0x8080808080808080;

#else

#error Unsupported word size

#endif

#define    LONGPTR_MASK (sizeof(long) - 1)

/*

 * Helper macro to return string length if we caught the zero

 * byte.

 */

#define testbyte(x)                \

    do {                    \

        if (p[x] == '\0')        \

            return (p - str + x);    \

    } while ()

size_t

strlen(const char *str)

{

    const char *p;

    const unsigned long *lp;

    /* Skip the first few bytes until we have an aligned p */

    for (p = str; (uintptr_t)p & LONGPTR_MASK; p++)

        if (*p == '\0')

        return (p - str);

    /* Scan the rest of the string using word sized operation */

    for (lp = (const unsigned long *)p; ; lp++)

        if ((*lp - mask01) & mask80) {

        p = (const char *)(lp);

        testbyte();

        testbyte();

        testbyte();

        testbyte();

#if (LONG_BIT >= 64)

        testbyte();

        testbyte();

        testbyte();

        testbyte();

#endif

        }

    /* NOTREACHED */

    return ();

}

http://www.stdlib.net/~colmmacc/strlen.c.html

   /* Copyright (C) 1991, 1993, 1997, 2000, 2003 Free Software Foundation, Inc.

  2    This file is part of the GNU C Library.

  3    Written by Torbjorn Granlund (tege@sics.se),

  4    with help from Dan Sahlin (dan@sics.se);

  5    commentary by Jim Blandy (jimb@ai.mit.edu).

  6

  7    The GNU C Library is free software; you can redistribute it and/or

  8    modify it under the terms of the GNU Lesser General Public

  9    License as published by the Free Software Foundation; either

 10    version 2.1 of the License, or (at your option) any later version.

 11

 12    The GNU C Library is distributed in the hope that it will be useful,

 13    but WITHOUT ANY WARRANTY; without even the implied warranty of

 14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

 15    Lesser General Public License for more details.

 16

 17    You should have received a copy of the GNU Lesser General Public

 18    License along with the GNU C Library; if not, write to the Free

 19    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA

 20    02111-1307 USA.  */

  #include <string.h>

  #include <stdlib.h>

  #undef strlen

  /* Return the length of the null-terminated string STR.  Scan for

 28    the null terminator quickly by testing four bytes at a time.  */

  size_t

  strlen (str)

       const char *str;

  {

    const char *char_ptr;

    const unsigned long int *longword_ptr;

    unsigned long int longword, magic_bits, himagic, lomagic;

    /* Handle the first few characters by reading one character at a time.

 38      Do this until CHAR_PTR is aligned on a longword boundary.  */

    for (char_ptr = str; ((unsigned long int) char_ptr

              & (sizeof (longword) - )) != ;

         ++char_ptr)

      if (*char_ptr == '\0')

        return char_ptr - str;

    /* All these elucidatory comments refer to 4-byte longwords,

 46      but the theory applies equally well to 8-byte longwords.  */

    longword_ptr = (unsigned long int *) char_ptr;

    /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits

 51      the "holes."  Note that there is a hole just to the left of

 52      each byte, with an extra at the end:

 53

 54      bits:  01111110 11111110 11111110 11111111

 55      bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD

 56

 57      The 1-bits make sure that carries propagate to the next 0-bit.

 58      The 0-bits provide holes for carries to fall into.  */

    magic_bits = 0x7efefeffL;

    himagic = 0x80808080L;

    lomagic = 0x01010101L;

    if (sizeof (longword) > )

      {

        /* 64-bit version of the magic.  */

        /* Do the shift in two steps to avoid a warning if long has 32 bits.  */

        magic_bits = ((0x7efefefeL << ) << ) | 0xfefefeffL;

        himagic = ((himagic << ) << ) | himagic;

        lomagic = ((lomagic << ) << ) | lomagic;

      }

    if (sizeof (longword) > )

      abort ();

    /* Instead of the traditional loop which tests each character,

 74      we will test a longword at a time.  The tricky part is testing

 75      if *any of the four* bytes in the longword in question are zero.  */

    for (;;)

      {

        /* We tentatively exit the loop if adding MAGIC_BITS to

 79      LONGWORD fails to change any of the hole bits of LONGWORD.

 80

 81      1) Is this safe?  Will it catch all the zero bytes?

 82      Suppose there is a byte with all zeros.  Any carry bits

 83      propagating from its left will fall into the hole at its

 84      least significant bit and stop.  Since there will be no

 85      carry from its most significant bit, the LSB of the

 86      byte to the left will be unchanged, and the zero will be

 87      detected.

 88

 89      2) Is this worthwhile?  Will it ignore everything except

 90      zero bytes?  Suppose every byte of LONGWORD has a bit set

 91      somewhere.  There will be a carry into bit 8.  If bit 8

 92      is set, this will carry into bit 16.  If bit 8 is clear,

 93      one of bits 9-15 must be set, so there will be a carry

 94      into bit 16.  Similarly, there will be a carry into bit

 95      24.  If one of bits 24-30 is set, there will be a carry

 96      into bit 31, so all of the hole bits will be changed.

 97

 98      The one misfire occurs when bits 24-30 are clear and bit

 99      31 is set; in this case, the hole at bit 31 is not

100      changed.  If we had access to the processor carry flag,

101      we could close this loophole by putting the fourth hole

102      at bit 32!

103

104      So it ignores everything except 128's, when they're aligned

105      properly.  */

       longword = *longword_ptr++;

       if (

 #if 0

       /* Add MAGIC_BITS to LONGWORD.  */

       (((longword + magic_bits)

         /* Set those bits that were unchanged by the addition.  */

         ^ ~longword)

        /* Look at only the hole bits.  If any of the hole bits

118           are unchanged, most likely one of the bytes was a

119           zero.  */

        & ~magic_bits)

 #else

       ((longword - lomagic) & himagic)

 #endif

       != )

     {

       /* Which of the bytes was the zero?  If none of them were, it was

127          a misfire; continue the search.  */

       const char *cp = (const char *) (longword_ptr - );

       if (cp[] == )

         return cp - str;

       if (cp[] == )

         return cp - str + ;

       if (cp[] == )

         return cp - str + ;

       if (cp[] == )

         return cp - str + ;

       if (sizeof (longword) > )

         {

           if (cp[] == )

         return cp - str + ;

           if (cp[] == )

         return cp - str + ;

           if (cp[] == )

         return cp - str + ;

           if (cp[] == )

         return cp - str + ;

         }

     }

     }

 }

 libc_hidden_builtin_def (strlen)

     /* Copyright (C) 2011-2014 Free Software Foundation, Inc.

    2    This file is part of the GNU C Library.

    3    Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.

    4

    5    The GNU C Library is free software; you can redistribute it and/or

    6    modify it under the terms of the GNU Lesser General Public

    7    License as published by the Free Software Foundation; either

    8    version 2.1 of the License, or (at your option) any later version.

    9

   10    The GNU C Library is distributed in the hope that it will be useful,

   11    but WITHOUT ANY WARRANTY; without even the implied warranty of

   12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

   13    Lesser General Public License for more details.

   14

   15    You should have received a copy of the GNU Lesser General Public

   16    License along with the GNU C Library.  If not, see

   17    <http://www.gnu.org/licenses/>.  */

    #include <string.h>

    #include <stdint.h>

    #include "string-endian.h"

    size_t

    strlen (const char *s)

    {

      /* Get an aligned pointer. */

      const uintptr_t s_int = (uintptr_t) s;

      const uint64_t *p = (const uint64_t *) (s_int & -);

      /* Read and MASK the first word. */

      uint64_t v = *p | MASK (s_int);

      uint64_t bits;

      while ((bits = __insn_v1cmpeqi (v, )) == )

        v = *++p;

      return ((const char *) p) + (CFZ (bits) >> ) - s;

    }

    libc_hidden_builtin_def (strlen)

http://tonybai.com/2009/04/11/glibc-strlen-source-analysis/

直接操作C标准库提供的字符串操作函数是有一定风险的，稍有不慎就会导致内存问题。

这周用业余时间写了一个小型的安全字符串操作库，但是测试之后才发现自己的实现有很大的性能缺陷。

在Solaris上初步做了一个简单的性能比对，以下是得到的性能数据(以strlen的数据为例)：
当传入的字符串长度为10时，执行100w次：
strlen 执行时间是：32762毫秒
my_strlen执行时间是：491836毫秒

当传入的字符串长度为20时，执行100w次：
strlen 执行时间是：35075毫秒
my_strlen执行时间是：770397毫秒

很显然，标准库中strlen的消耗仅是my_strlen的十分之一不到，且其性能消耗随着字符串长度的增加并未有近线性的增加，

而my_strlen则是变化明显。想必大家这时也能猜到my_strlen采用了传统的实现的方式，即采用逐个字节判断是否为''方式，

这也与测试出的现象相符。本着刨根问底的精神，我在网上找到了GNU提供的C标准库中strlen实现的源码，

要看看GLIBC中strlen究竟采用何种技巧才达到了那么高的性能。

说实话在性能优化这方面自己一直还处于比较初级的位置，这也将是自己将来努力的一个方向。

下载了全部GLIBC的代码包，这个包还真不小。在string子目录下找到strlen.c，这就是大多数UNIX平台、

Linux平台以及绝大多数GNU软件使用的strlen的实现源码了。

这份代码由Torbjorn Granlund(还实现了memcpy)编写，Jim Blandy和Dan Sahlin提供了帮助和注释。

包括注释在内，GLIBC的strlen的代码足足有近130行，大致浏览一下，没有怎么看懂，可耐下心来细致阅读，

还是有些心得的。下面是strlen源码摘要版，后面我将针对这段代码写一些我的理解：

 /* Return the length of the null-terminated string STR.  Scan for

  2    the null terminator quickly by testing four bytes at a time.  */

   size_t strlen (str)  const char *str;

   {

           const char *char_ptr;

           const unsigned long int *longword_ptr;

           unsigned long int longword, magic_bits, himagic, lomagic;

           /* Handle the first few characters by reading one character at a time.

 10            Do this until CHAR_PTR is aligned on a longword boundary.  */

          for (char_ptr = str; ((unsigned long int) char_ptr

               & (sizeof (longword) – )) != ;

               ++char_ptr)

                  if (*char_ptr == '')

                          return char_ptr – str;

          /* All these elucidatory comments refer to 4-byte longwords,

 19            but the theory applies equally well to 8-byte longwords.  */

          longword_ptr = (unsigned long int *) char_ptr;

          himagic = 0x80808080L;

          lomagic = 0x01010101L;

          if (sizeof (longword) > )

                  abort ();

          /* Instead of the traditional loop which tests each character,

 30            we will test a longword at a time.  The tricky part is testing

 31            if *any of the four* bytes in the longword in question are zero.  */

          for (;;)

          {

                  longword = *longword_ptr++;    

                  if ( ((longword – lomagic) & himagic) != )

                  {

                          /* Which of the bytes was the zero?  If none of them were, it was

 40                            a misfire; continue the search.  */

                          const char *cp = (const char *) (longword_ptr – );

                          if (cp[] == )

                                  return cp – str;

                          if (cp[] == )

                                  return cp – str + ;

                          if (cp[] == )

                                  return cp – str + ;

                          if (cp[] == )

                                  return cp – str + ;

                          if (sizeof (longword) > )

                          {

                                  if (cp[] == )

                                          return cp – str + ;

                                  if (cp[] == )

                                          return cp – str + ;

                                  if (cp[] == )

                                          return cp – str + ;

                                  if (cp[] == )

                                          return cp – str + ;

                          }

                  }

          }

  }

从这段代码开头作者的注释我们大致可以了解到该strlen实现的原理：

就是通过每次测试四个字节来代替传统实现中每次测试一个字节的方法。

知道这个原理了，那么还需要解决两个难题：
1) C标准库要求有很好的移植性，在绝大部分系统体系结构下都应该能正确运行。

那么每次拿出4个字节比较(unsigned long int)，就需要考虑内存对齐问题，

传入的字符串的首字符地址可不一定在4对齐的地址上；

2) 如何对四个字节进行测试，找出其中某个字节为全0，这是个技巧问题。

12～21行的代码解决的就是第一个问题：

          for (char_ptr = str; ((unsigned long int) char_ptr

               & (sizeof (longword) – )) != ;

               ++char_ptr)

                  if (*char_ptr == '')

                          return char_ptr – str;

          /* All these elucidatory comments refer to 4-byte longwords,

 19            but the theory applies equally well to 8-byte longwords.  */

          longword_ptr = (unsigned long int *) char_ptr;

作者通过一个for-loop找到传入字符串中第一个地址对齐到4的字符的地址，由于该地址已经对齐到4，

所以最后一行那个强制转型是安全的。虽然可以通过圆整算式直接得到该对齐地址，但是考虑到这个区间可能存在的''，

一个字符一个字符比对也是不可避免的。在很多严格对齐的架构上(比如SUN的SPARC平台)，

编译器一般会将字符串地址在编译器就放到对齐的地址上，这样一来，实际执行strlen时for-loop很少能执行一步。

第二个问题作者则是通过一个"带前提"的技巧来解决的。作者设定了两个掩码变量：

          himagic = 0x80808080L;

          lomagic = 0x01010101L;

并通过一个conditional expression完成了对四字节中全0字节的检测：

((longword – lomagic) & himagic) != 0

我们将himagic和lomagic按bit展开：
himagic 1000 0000 1000 0000 1000 0000 1000 0000
lomagic 0000 0001 0000 0001 0000 0001 0000 0001

对于这样的代码，似乎没有什么理论可以遵循，需要在实践中去理解。

起初我构造了一个不含全0字节的longword，比如：
longword 1000 0001 1000 0001 1000 0001 1000 0001，

然后按照那个条件表达式计算后，居然也满足!=0的条件，是不是作者的逻辑有问题呢？

后来转念一想，这种逻辑是有“前提条件”的。回顾一下strlen是做什么的，其输入参数是任意的么？

当然不是。输入的字符串中每个字符的值都在[0, 127]的ascii码范围内，

也就是说每个字节最高位的bit都是0，这样longword就应该是如下这个样子了：

longword 0xxx xxxx 0xxx xxxx 0xxx xxxx 0xxx xxxx

基于这样的前提我们考虑两种情况：

当longword中没有全0字节时，比如：

longword 0000 0001 0000 0001 0000 0001 0000 0001

这样在做完计算后，值为0，不满足条件。

当longword中有全零字节时，比如：

longword 0000 0000 0000 0001 0000 0001 0000 0001

这样在做完计算后，最高字节最高bit的值肯定为1，满足!=0条件，全0字节被检测出来。

也就是说一旦有全0字节，在减去lomagic时势必会产生借位，全0的那个字节在减去lomagic后最高位bit肯定由0变1，

这样与himagic一与，肯定不为0，就是这么检测出来的。

这一方法在64位平台依然适用，上面的代码摘要中省略了对64bit平台的特殊处理，为的是使代码逻辑更清晰，更易读。

 function CStrLength( CString : PAnsiChar ) : NativeUInt;

 var

   AnsiCharPtr : PAnsiChar;

   NativePtr : PNativeUInt;

   Native : NativeUInt;

   HiMagic : NativeUInt;

   LoMagic : NativeUInt;

   MagicBits : NativeUInt;

 begin

   // Handle the first few characters by reading one character at a time.

   // Do this until CStr is aligned on a longword boundary.

   AnsiCharPtr := CString;

   while NativeUInt( AnsiCharPtr ) and ( sizeof( NativeUInt ) -  ) <>  do

   begin

     Inc( AnsiCharPtr );

     if AnsiCharPtr^ = # then

     begin

       Result := NativeUInt( AnsiCharPtr ) - NativeUInt( CString );

       Exit;

     end;

   end;

   // All these elucidatory comments refer to -byte longwords,

   // but the theory applies equally well to -byte longwords.

   NativePtr := PNativeUInt( AnsiCharPtr );

   (* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits

     the "holes."  Note that there is a hole just to the left of

     each byte, with an extra at the end:

     bits:  01111110 11111110 11111110 11111111

     bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD

     The 1-bits make sure that carries propagate to the next 0-bit.

     The 0-bits provide holes for carries to fall into. *)

   MagicBits := $7EFEFEFF;

   HiMagic := $;

   LoMagic := $;

   // -bit version of the magic.

   // Do the shift in two steps to avoid a warning if long has  bits.

   if sizeof( Native ) >  then

   begin

     MagicBits := ( ( $7EFEFEFE shl  ) shl  ) or $FEFEFEFF;

     HiMagic := ( ( HiMagic shl  ) shl  ) or HiMagic;

     LoMagic := ( ( LoMagic shl  ) shl  ) or LoMagic;

   end;

   (* We tentatively exit the loop if adding MAGIC_BITS to

     LONGWORD fails to change any of the hole bits of LONGWORD.

     1) Is this safe?  Will it catch all the zero bytes?

     Suppose there is a byte with all zeros.  Any carry bits

     propagating from its left will fall into the hole at its

     least significant bit and stop.  Since there will be no

     carry from its most significant bit, the LSB of the

     byte to the left will be unchanged, and the zero will be

     detected.

     2) Is this worthwhile?  Will it ignore everything except

     zero bytes?  Suppose every byte of LONGWORD has a bit set

     somewhere.  There will be a carry into bit 8.  If bit 8

     is set, this will carry into bit 16.  If bit 8 is clear,

     one of bits 9-15 must be set, so there will be a carry

     into bit 16.  Similarly, there will be a carry into bit

     24.  If one of bits 24-30 is set, there will be a carry

     into bit 31, so all of the hole bits will be changed.

     The one misfire occurs when bits 24-30 are clear and bit

     31 is set; in this case, the hole at bit 31 is not

     changed.  If we had access to the processor carry flag,

     we could close this loophole by putting the fourth hole

     at bit 32!

     So it ignores everything except 128's, when they're aligned

     properly. *)

   // Instead of the traditional loop which tests each character,

   // we will test a longword at a time.  The tricky part is testing

   // if *any of the four* bytes in the longword in question are zero.

   while True do

   begin

     Native := NativePtr^;

     Inc( NativePtr );

     // http://stackoverflow.com/questions/2372315/how-to-implement-strlen-as-fast-as-possible

     if ( ( Native - LoMagic ) and HiMagic                ) <> 0 then : NULL or Byte >= 0x80
        if ( ( Native - LoMagic ) and HiMagic and not Native ) <> 0 then : NULL only

     begin

       Dec( NativePtr );

       // Which of the bytes was the zero?

       AnsiCharPtr := PAnsiChar( NativePtr );

       if AnsiCharPtr[  ] = # then

       begin

         Result := NativeUInt( AnsiCharPtr ) - NativeUInt( CString );

         Exit;

       end;

       if AnsiCharPtr[  ] = # then

       begin

         Result := NativeUInt( AnsiCharPtr ) - NativeUInt( CString ) + ;

         Exit;

       end;

       if AnsiCharPtr[  ] = # then

       begin

         Result := NativeUInt( AnsiCharPtr ) - NativeUInt( CString ) + ;

         Exit;

       end;

       if AnsiCharPtr[  ] = # then

       begin

         Result := NativeUInt( AnsiCharPtr ) - NativeUInt( CString ) + ;

         Exit;

       end;

       if sizeof( Native ) >  then

       begin

         if AnsiCharPtr[  ] = # then

         begin

           Result := NativeUInt( AnsiCharPtr ) - NativeUInt( CString ) + ;

           Exit;

         end;

         if AnsiCharPtr[  ] = # then

         begin

           Result := NativeUInt( AnsiCharPtr ) - NativeUInt( CString ) + ;

           Exit;

         end;

         if AnsiCharPtr[  ] = # then

         begin

           Result := NativeUInt( AnsiCharPtr ) - NativeUInt( CString ) + ;

           Exit;

         end;

         if AnsiCharPtr[  ] = # then

         begin

           Result := NativeUInt( AnsiCharPtr ) - NativeUInt( CString ) + ;

           Exit;

         end;

       end;

     end;

   end;

 end;

glibc strlen delphi pascal的更多相关文章

Delphi / Pascal 语法知识干货
********************************************* Pascal.Delph干货 *************************************** ...
NotePad++ delphi/Pascal函数过程列表插件
从cnpack上爬下来的函数过程列表点击下载
Delphi的分配及释放---New/Dispose, GetMem/FreeMem及其它函数的区别与相同
转载自:http://www.cnblogs.com/qiusl/p/4028437.html?utm_source=tuicool 我估摸着内存分配+释放是个基础的函数,有些人可能没注意此类函数或细 ...
delphi.memory.分配及释放---New/Dispose, GetMem/FreeMem及其它函数的区别与相同
我估摸着内存分配+释放是个基础函数,有些人可能没注意此类函数或细究,但我觉得还是弄明白的好. 介绍下面内存函数前,先说一下MM的一些过程,如不关心可忽略: TMemoryManager = recor ...
delphi.位操作
位操作网上有很多介绍,请上网google/baidu,比如: 位操作技巧实例大全: http://blog.csdn.net/g_spider/article/details/5750665 位操作基 ...
[Delphi]Delphi开发的一些技巧
一.提高查询效率先进行准备查询操作: CustomerQuery.Close; if not (CustomerQuery.Prepared) then -->查询是否已准备好 Customer ...
delphi的tserversocket控件如何接收16进制数
http://bbs.csdn.net/topics/390473005 对方客户端发送数据如:68 00 00··········:接收完成后,数据长度没错(13),但是显示接收结果时,只显示一个字 ...
Delphi MDI程序父窗体如何调用当前活动子窗体的函数/过程
一个MDI文本文件编辑器打开了N个子窗体子窗体的.pas文件有一些public的过程和函数我想在父窗体调用当前活动的子窗体函数我用Self.ActiveChildForm无法调用直接frmEdit.x ...
Pascal编译器大全（非常难得）
http://www.pascaland.org/pascall.htm Some titles (french) : Compilateurs Pascal avec sources = compi ...

随机推荐

嵌入式 H264—MP4格式及在MP4文件中提取H264的SPS、PPS及码流
一.MP4格式基本概念 MP4格式对应标准MPEG-4标准(ISO/IEC14496) 二.MP4封装格式核心概念 1 MP4封装格式对应标准为 ISO/IEC 14496-12(信息技术视听对象 ...
Wiki动画回顾系列序&&目录
嘛,前前后后看了太多动画,我自己一直想做的事也是喜欢能做一款acg相关的应用,但一直没有好的点子,当然纠结到最后还是需要一个比较好的社区来让大家加入进来.一直有人让我给他们推番,而我也慢慢懂得“人心” ...
nohub命令
http://jingyan.baidu.com/article/335530daa4707f19cb41c3ef.html
设计模式工厂-Factory
在开始笔记之前先推荐一个网站:http://design-patterns.readthedocs.org/zh_CN/latest/index.html 网站对每一个Pattern都有详尽的解说.并 ...
adb device出现error：unknown host service
最近在windows下使用adb device时,提示“ADB server didn't ACK” & “error: unknown host service”错误. 网上搜索了下,是说a ...
java&c# dec 加密，通用
java /** * 解密DES * @param key 密钥,长度必须是8的倍数 * @param data 数据源 * @return 解密内容 */ public final static S ...
linux下设置SSH无密码登陆
SSH配置主机cloudgis22.edu.cn:192.168.3.21 主机cloudgis33.edu.cn:192.168.2.174 假设需要配置主机A无密码登录主机A,主机B,先确保所有 ...
html5 canvas图片翻转
<!doctype html> <html> <head> <meta charset="utf-8"> <title> ...
mediawiki 的使用 2
要想外部电脑能访问你的网站,网站部署好后,在LocalSettings.php 里将这句 $wgServer = "http://localhost"; 改成 $wgServer ...
Application_Error
//出现未捕捉的异常时,系统调用本方法,一般用于记录日志.错误页的重定向一般在web.config中设置. protected void Application_Error(object ...

glibc strlen delphi pascal

glibc strlen delphi pascal的更多相关文章

随机推荐

热门专题