





Hammingcode是指一个字串中非0符号的个数(TheHamming weight
of a stringis the number of symbols that are different from the zero-symbol ofthealphabetused.)。应用到2进制符号序列中来,即二进制串中1的个数就是该串的Hammingcode.那么上述的问题即转换成求解字串的Hammingcode的问题。


[cpp] view plaincopy

    //types and constants used in the functions below  

    typedef unsigned __int64 uint64;  //assume this gives 64-bits
const uint64 m1 = 0x5555555555555555; //binary: 0101...
const uint64 m2 = 0x3333333333333333; //binary: 00110011..
const uint64 m4 = 0x0f0f0f0f0f0f0f0f; //binary: 4 zeros, 4 ones ...
const uint64 m8 = 0x00ff00ff00ff00ff; //binary: 8 zeros, 8 ones ...
const uint64 m16 = 0x0000ffff0000ffff; //binary: 16 zeros, 16 ones ...
const uint64 m32 = 0x00000000ffffffff; //binary: 32 zeros, 32 ones
const uint64 hff = 0xffffffffffffffff; //binary: all ones
const uint64 h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3... //This is a naive implementation, shown for comparison,
//and to help in understanding the better functions.
//It uses 24 arithmetic operations (shift, add, and).
int popcount_1(uint64 x) {
x = (x & m1 ) + ((x >> ) & m1 ); //put count of each 2 bits into those 2 bits
x = (x & m2 ) + ((x >> ) & m2 ); //put count of each 4 bits into those 4 bits
x = (x & m4 ) + ((x >> ) & m4 ); //put count of each 8 bits into those 8 bits
x = (x & m8 ) + ((x >> ) & m8 ); //put count of each 16 bits into those 16 bits
x = (x & m16) + ((x >> ) & m16); //put count of each 32 bits into those 32 bits
x = (x & m32) + ((x >> ) & m32); //put count of each 64 bits into those 64 bits
return x;

对应到上面代码中的第一步来说,x = (x & m1 ) + ((x >> 2) & m1 ),
x&m1 = 0b0d0f0h
(x>>2)&m1 = 0a0c0e0g
求和得到:[a+b]2[c+d]2[e+f]2[g+h]2,这里[x]2 表示2位的二进制,其值=x(x表示10进制的值)。如果对应到64bit的串,那么这里将有32个2-bit的组合,即将64bit两两一组,并使用其来表示自身包含的1的个数。

代码的第二步:x = (x & m2 ) + ((x >> 4) & m2 ),同样使用8bit串来简化描述。
x&m2 = 00[c+d]200[g+h]2
(x>>4)&m2 = 00[a+b]2 00[e+f]2

第三步: x = (x & m4 ) + ((x >> 4) & m4 );
x&m4 = 0000[e+f+g+h]4
(x>>4)&m2 = 0000[a+b+c+d]4

对于64位的字串来说 ,只使用了24次算数操作,比起前面的算法来说要明显减少了。

[cpp] view plaincopy

    //This uses fewer arithmetic operations than any other known
//implementation on machines with slow multiplication.
//It uses 17 arithmetic operations.
int popcount_2(uint64 x) {
x -= (x >> ) & m1; //put count of each 2 bits into those 2 bits
x = (x & m2) + ((x >> ) & m2); //put count of each 4 bits into those 4 bits
x = (x + (x >> )) & m4; //put count of each 8 bits into those 8 bits
x += x >> ; //put count of each 16 bits into their lowest 8 bits
x += x >> ; //put count of each 32 bits into their lowest 8 bits
x += x >> ; //put count of each 64 bits into their lowest 8 bits
return x & 0x7f;

若a位1,那么只有两种情况,10-01 = 01, 11-01 = 10.都符合上述事实。
这样x -= (x >> 1) & m1和 x = (x & m1 ) + ((x >> 1)
& m1


第四步后x = [H8|a+b]16[H8|c+d]16[H8|e+f]16[H8|g+h]16,这里H8代表高8位,由于我们不关心高8位的值(当然H的值是明显知道的),这里就用H代替。由于使用低8位完全可以表示0~64范围内的值,因此不用担心低八位溢出。


[cpp] view plaincopy

    //This uses fewer arithmetic operations than any other known
//implementation on machines with fast multiplication.
//It uses 12 arithmetic operations, one of which is a multiply.
int popcount_3(uint64 x) {
x -= (x >> ) & m1; //put count of each 2 bits into those 2 bits
x = (x & m2) + ((x >> ) & m2); //put count of each 4 bits into those 4 bits
x = (x + (x >> )) & m4; //put count of each 8 bits into those 8 bits
return (x * h01)>>; //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...

popcount3进一步进行了优化,只看最后一步:return (x * h01)>>56;
x*h01 = x*0x0101010101010101 = x+(x<<8)+(x<<16)...+(x<<56)
即x=[a+b+c+d+e+f+g+h|L56], L56指低56位


[cpp] view plaincopy

    //This is better when most bits in x are 0
//It uses 3 arithmetic operations and one comparison/branch per "1" bit in x.
int popcount_4(uint64 x) {
int count;
for (count=; x; count++)
x &= x-;
return count;

此算法基于这样一个事实:x-1使得以二进制表示的x,从低向高位开始包括第一个1在内的值,都由0变成1,由1变成0。如11-01 = 10, 10
– 01 = 01, 01 – 01 = 00, 100 – 001 =

[cpp] view plaincopy

    //This is better if most bits in x are 0.
//It uses 2 arithmetic operations and one comparison/branch per "1" bit in x.
//It is the same as the previous function, but with the loop unrolled.
#define f(y) if ((x &= x-1) == 0) return y;
int popcount_5(uint64 x) {
if (x == ) return ;
f( ) f( ) f( ) f( ) f( ) f( ) f( ) f( )
f( ) f() f() f() f() f() f() f()
f() f() f() f() f() f() f() f()
f() f() f() f() f() f() f() f()
f() f() f() f() f() f() f() f()
f() f() f() f() f() f() f() f()
f() f() f() f() f() f() f() f()
f() f() f() f() f() f() f()
return ;
} //Use this instead if most bits in x are 1 instead of 0
#define f(y) if ((x |= x+1) == hff) return 64-y;

以4bit的串为例,可以构造一个数组int counts[16]={0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4}.
对于4bit的x,x的hamming weight即为:counts[x].

static unsigned char wordbits[65536] = { bitcounts of ints between 0 and 65535 };
static int popcount(uint32 i)
return (wordbits[i&0xFFFF] + wordbits[i>>16]);

Hamming Weight还有很多应用,这里只是简单记录一下它在求解popcount上的用法。

