织梦网站怎样做百度主动推送,wordpress pagelines,wordpress 收费阅读,网站建设与维护成本常用的字符串Hash函数还有ELFHash#xff0c;APHash等等#xff0c;都是十分简单有效的方法。这些函数使用位运算使得每一个字符都对最后的函数值产生影响。另外还有以MD5和SHA1为代表的杂凑函数#xff0c;这些函数几乎不可能找到碰撞。 常用字符串哈希函数有 BKDRHashAPHash等等都是十分简单有效的方法。这些函数使用位运算使得每一个字符都对最后的函数值产生影响。另外还有以MD5和SHA1为代表的杂凑函数这些函数几乎不可能找到碰撞。 常用字符串哈希函数有 BKDRHashAPHashDJBHashJSHashRSHashSDBMHashPJWHashELFHash等等。对于以上几种哈希函数我对其进行了一个小小的评测。 Hash函数数据1数据2数据3数据4数据1得分数据2得分数据3得分数据4得分平均分BKDRHash20477448196.5510090.9582.0592.64APHash23475449396.5588.4610051.2886.28DJBHash22497547496.5592.31010083.43JSHash14476150610084.6296.8317.9581.94RSHash10486150510010051.5820.5175.96SDBMHash32484950493.192.3157.0123.0872.41PJWHash302648785130043.89021.95ELFHash302648785130043.89021.95其中数据1为100000个字母和数字组成的随机串哈希冲突个数。数据2为100000个有意义的英文句子哈希冲突个数。数据3为数据1的哈希值与 1000003(大素数)求模后存储到线性表中冲突的个数。数据4为数据1的哈希值与10000019(更大素数)求模后存储到线性表中冲突的个数。 经过比较得出以上平均得分。平均数为平方平均数。可以发现BKDRHash无论是在实际效果还是编码实现中效果都是最突出的。APHash也是较为优秀的算法。DJBHash,JSHash,RSHash与SDBMHash各有千秋。PJWHash与ELFHash效果最差但得分相似其算法本质是相似的。 HashFun.h unsigned int SDBMHash(char *str){ unsigned int hash 0; while (*str) { // equivalent to: hash 65599*hash (*str); hash (*str) (hash 6) (hash 16) - hash; } return (hash 0x7FFFFFFF);}// RS Hash Functionunsigned int RSHash(char *str){ unsigned int b 378551; unsigned int a 63689; unsigned int hash 0; while (*str) { hash hash * a (*str); a * b; } return (hash 0x7FFFFFFF);}// JS Hash Functionunsigned int JSHash(char *str){ unsigned int hash 1315423911; while (*str) { hash ^ ((hash 5) (*str) (hash 2)); } return (hash 0x7FFFFFFF);}// P. J. Weinberger Hash Functionunsigned int PJWHash(char *str){ unsigned int BitsInUnignedInt (unsigned int)(sizeof(unsigned int) * 8); unsigned int ThreeQuarters (unsigned int)((BitsInUnignedInt * 3) / 4); unsigned int OneEighth (unsigned int)(BitsInUnignedInt / 8); unsigned int HighBits (unsigned int)(0xFFFFFFFF) (BitsInUnignedInt - OneEighth); unsigned int hash 0; unsigned int test 0; while (*str) { hash (hash OneEighth) (*str); if ((test hash HighBits) ! 0) { hash ((hash ^ (test ThreeQuarters)) (~HighBits)); } } return (hash 0x7FFFFFFF);}// ELF Hash Functionunsigned int ELFHash(char *str){ unsigned int hash 0; unsigned int x 0; while (*str) { hash (hash 4) (*str); if ((x hash 0xF0000000L) ! 0) { hash ^ (x 24); hash ~x; } } return (hash 0x7FFFFFFF);}// BKDR Hash Functionunsigned int BKDRHash(char *str){ unsigned int seed 131; // 31 131 1313 13131 131313 etc.. unsigned int hash 0; while (*str) { hash hash * seed (*str); } return (hash 0x7FFFFFFF);}// DJB Hash Functionunsigned int DJBHash(char *str){ unsigned int hash 5381; while (*str) { hash (hash 5) (*str); } return (hash 0x7FFFFFFF);}// AP Hash Functionunsigned int APHash(char *str){ unsigned int hash 0; int i; for (i0; *str; i) { if ((i 1) 0) { hash ^ ((hash 7) ^ (*str) ^ (hash 3)); } else { hash ^ (~((hash 11) ^ (*str) ^ (hash 5))); } } return (hash 0x7FFFFFFF);} 以上为转载内容 以下是自己测试Hash算法效果的函数。 main.cpp #includeiostream#includefstream#includecstdlib#includecstring#includectime#includeHashFun.husing namespace std;/* 从Dict.txt读入英文单词表总共24018个单词 测试哈希函数的分布情况*/void ReadDict(char ** dict,int len){ fstream dictFile; dictFile.open(Dict.txt,ios::in); if(!dictFile) { coutOpenFailed.endl; exit(-1);} char tmpString[50]; len0; int sl; while(!dictFile.eof()) { dictFile.getline(tmpString,50); slstrlen(tmpString); if(sl0 tmpString[sl-1]\r) { tmpString[sl-1]\0; --sl; } if(sl0) { dict[len]new char[sl]; if(dict[len]NULL) { coutMemory fail.endl;exit(-1);} strcpy(dict[len],tmpString); //coutdict[len]endl; len; } } dictFile.close();}const int MaxHashLen49999;void Analise(unsigned int HashHeight[]){ const unsigned int AreaNum20; unsigned int AreaHeight[AreaNum]; const unsigned int MaxRecordHeight10; //记录各自出现高度超过10的放在一起统计 unsigned int NumOfHeight[MaxRecordHeight1]; memset(AreaHeight,0,sizeof(AreaHeight)); memset(NumOfHeight,0, sizeof(NumOfHeight)); unsigned int step MaxHashLen/AreaNum1,wordNum0,MaxHeight0; for(int i0;iMaxHashLen;i) { wordNumHashHeight[i]; if( HashHeight[i]MaxRecordHeight ) NumOfHeight[MaxRecordHeight]; else NumOfHeight[HashHeight[i]]; if(HashHeight[i]MaxHeight) MaxHeightHashHeight[i]; AreaHeight[i/step]HashHeight[i]; } int searchtime0; for(unsigned int i1;iMaxRecordHeight;i) searchtime(i1)*i*NumOfHeight[i]; //有i*NumOfHeight[i]个的单词在高度为i的链表里 searchtime((MaxRecordHeightMaxHeight)/21)*(MaxRecordHeightMaxHeight)/2*NumOfHeight[MaxRecordHeight]; searchtime/2; cout--------Hash分布如下endl; for(unsigned int i0;iAreaNum;i) cout[ i*step , (i1)* step ]: AreaHeight[i]endl; cout--------链表高度情况endl; for(unsigned int i0;iMaxRecordHeight;i) cout高度为: i 的链表个数为 NumOfHeight[i]endl; cout高度大于 MaxRecordHeight 的链表个数为 NumOfHeight[MaxRecordHeight]endl; cout--------链表最高高度MaxHeightendl; cout--------单词表个数为wordNumendl; cout--------平均链表高度float(wordNum)/(MaxHashLen-NumOfHeight[0])endl; cout--------链表的利用率1-float(NumOfHeight[0])/MaxHashLenendl; cout--------平均查找次数1.0*searchtime/wordNumendl;}int main(){ char *dict[24100]; //存储单词表 int wordNum; unsigned int HashHeight[MaxHashLen],hashvalue; memset(HashHeight,0,sizeof(HashHeight)); ReadDict(dict,wordNum); unsigned int startTime,curTime; startTimeclock(); for(int i0;iwordNum;i) { hashvalue SDBMHash( dict[i] ) % MaxHashLen;// hashvalue RSHash( dict[i] ) % MaxHashLen;// hashvalue JSHash( dict[i] ) % MaxHashLen;// hashvalue PJWHash( dict[i] ) % MaxHashLen;// hashvalue ELFHash( dict[i] ) % MaxHashLen;// hashvalue BKDRHash( dict[i] ) % MaxHashLen;// hashvalue DJBHash( dict[i] ) % MaxHashLen;// hashvalue APHash( dict[i] ) % MaxHashLen; HashHeight[ hashvalue ]; //couthashvalueendl; } curTimeclock(); Analise(HashHeight); coutcost time(curTime-startTime)/1000 msendl; //for(int i0;iMaxHashLen;i) // coutHashHeight[i]endl; return 0;} 我的测试说明 测试的内容是从英汉词典Dict.txt读取24018个单词将哈希值对49999求余观察Hash算法的散列情况 英汉词典Dict.txt在这里下载http://wenku.baidu.com/view/4ff90a9851e79b8968022695.html 测试结果如下 方法最高高度平均查找次数BKDRHash61.2383SDBMHash51.2387JSHash61.2397DJBHash51.2403RSHash51.2417PJWHash51.2427ELFHash51.2427APHash61.2436转自 http://apps.hi.baidu.com/share/detail/39630021 http://www.cnblogs.com/atlantis13579/archive/2010/02/06/1664792.html 转载于:https://www.cnblogs.com/dongtian0359/archive/2011/07/10/2102192.html