实习日记:图像检索算法 LSH 的总结与分析
先贴上这两天刚出炉的C++代码。(利用 STL 偷了不少功夫,代码待优化)
Head.h
#ifndef HEAD_H
#define HEAD_H #include "D:\\LiYangGuang\\VSPRO\\MYLSH\\HashTable.h" #include <iostream>
#include <fstream>
#include <time.h>
#include <cstdlib>
#include <vector>
#include <map>
#include <set>
#include <string> using namespace std; void loadData(bool (*data)[], int n, char *filename);
void createTable(HashTable HTSet[], bool data[][], bool extDat[][n][k] );
void insert(HT HTSet[], bool (*extDat)[n][k]);
void standHash(HT HTSet[]);
void search(vector<int>& record, bool query[], HT HTSet[]);
/*int getPosition(int V[], std::string s, int N);*/ #endif
HashTable.h
#include <string>
#include <vector> enum{ k = , l = , n = , M = n}; typedef struct
{
std::string key;
std::vector<int> elem; // element's index
} bucket; struct INT
{
bool used;
int val;
struct INT * next;
INT() : used(false), val(), next(NULL){}
}; typedef struct HashTable
{
int R[k]; // k random dimensions
int RNum[k]; // random numbers little than M
//string DC; // the contents of k dimensions
std::vector<bucket> BukSet;
INT Hash2[M];
} HT;
getPosition.h
#include <string>
inline int getPosition(int V[], std::string s, int N)
{
int position = 0;
for(int col = 0; col < k; ++col)
{
position += V[col] * (s[col] - '0');
position %= M;
}
return position;
}
computeDistance.h
inline int distance(bool v1[], bool v2[], int N)
{
int d = 0;
for(int i = 0; i < N; ++i)
d += v1[i] ^ v2[i]; return d; }
main.cpp
#include "Head.h"
#include "D:\\LiYangGuang\\VSPRO\\MYLSH\\computeDistance.h"
using namespace std;
// length of sub hashtable, as well the number of elements.
const int MAX_Q = 1000; HT HTSet[l]; bool data[n][128];
bool extDat[l][n][k]; bool query[MAX_Q][128]; // set the query item to 1000. int main(int argc, char *argv)
{
/************************************************************************/
/* Firstly, create the HashTables */
/************************************************************************/
char *filename = "D:\\LiYangGuang\\VSPRO\\MYLSH\\data.txt";
loadData(data, n, filename);
createTable(HTSet, data, extDat);
insert(HTSet,extDat);
standHash(HTSet); /************************************************************************/
/* Secondly, start the LSH search */
/************************************************************************/ char *queryFile = "D:\\LiYangGuang\\VSPRO\\MYLSH\\query.txt";
loadData(query, MAX_Q, queryFile);
clock_t time0 = clock();
for(int qId = 0; qId < MAX_Q; ++qId)
{
vector<int> record;
clock_t timeA = clock();
search(record, query[qId], HTSet);
set<int> Dis;
for(size_t i = 0; i < record.size(); ++i)
Dis.insert(distance(data[record[i]], query[qId]));
clock_t timeB = clock();
cout << "第 " << qId + 1 << " 次查询时间:" << timeB - timeA << endl;
}
clock_t time1 = clock();
cout << "总查询时间:" << time1 - time0 << endl; return 0; }
loadData.cpp
#include <string>
#include <fstream> void loadData(bool (*data)[128], int n, char* filename)
{
std::ifstream ifs;
ifs.open(filename, std::ios::in);
for(int row = 0; row < n; ++row)
{
std::string line;
getline(ifs, line);
for(int col = 0; col < 128; ++col)
data[row][col] = (line[col] - '0') & 1;
/* std::cout << row << std::endl;*/ }
ifs.close();
}
creatTable.cpp
#include "HashTable.h"
#include <ctime> void createTable(HT HTSet[], bool data[][128], bool extDat[][n][k] )
{
srand((unsigned)time(NULL));
for(int tableNum = 0; tableNum < l; ++tableNum)
{ /* creat the ith Table;*/ for(int randNum = 0; randNum < k; ++randNum)
{
HTSet[tableNum].R[randNum] = rand() % 128;
HTSet[tableNum].RNum[randNum] = rand() % M; for(int item = 0; item < n; ++item)
{
extDat[tableNum][item][randNum] =
data[item][HTSet[tableNum].R[randNum]];
}
}
}
}
insertData.cpp
#include "HashTable.h"
#include <iostream>
#include <map>
using namespace std; map<string, int> deRepeat;
bool equal(bool V[], bool V2[], int n)
{
int i = 0;
while(i < n)
{
if(V[i] != V2[i])
return false;
}
return true;
} string itoa(bool *v, int n, string s)
{
for(int i = 0; i < n; ++i)
s.push_back(v[i]+'0');
return s;
} void insert(HT HTSet[], bool (*extDat)[n][k])
{
for(int t = 0; t < l; ++ t) /* t: table */
{
int bktNum = 0;
bucket bkt;
bkt.key = string(itoa(extDat[t][0], k, string("")));
bkt.elem.push_back(0);
HTSet[t].BukSet.push_back(bkt);
deRepeat.insert(make_pair(bkt.key, bktNum++)); // 0 为 bucket 的位置
for(int item = 1; item < n; ++item)
{
cout << item << endl;
string key = itoa(extDat[t][item], k, string(""));
//map<string, int>::iterator it = deRepeat.find(key);
if(deRepeat.find(key) != deRepeat.end())
{
HTSet[t].BukSet[deRepeat.find(key)->second].elem.push_back(item);
cout << "exist" << endl;
}
else{
bucket bkt2;
bkt2.key = key;
bkt2.elem.push_back(item);
HTSet[t].BukSet.push_back(bkt2);
deRepeat.insert(make_pair(bkt2.key, bktNum++));
cout << "creat" << endl;
}
}
deRepeat.clear();
}
}
standHash.cpp
#include "HashTable.h"
#include <iostream>
#include "getPosition.h" void standHash(HT HTSet[])
{
for(int t = 0; t < l; ++t)
{
int BktLen = HTSet[t].BukSet.size();
for(int b = 0; b < BktLen; ++b)
{
int position = getPosition(HTSet[t].RNum, HTSet[t].BukSet[b].key, k);
INT *pIn = &HTSet[t].Hash2[position];
while(pIn->used && pIn->next != NULL)
pIn = pIn->next;
if(pIn->used){
pIn->next = new INT;
pIn->next->val = b;
pIn->next->used = true;
}else{
pIn->val = b;
pIn->used = true;
}
}
std::cout << "the " << t << "th HashTable has been finished." << std::endl;
}
}
search.cpp
#include "HashTable.h"
#include "getPosition.h"
#include <vector>
using namespace std; void search(vector<int>& record, bool query[128], HT HTSet[])
{
for(int t = 0; t < l; ++t)
{
string temKey;
int temPos = 0;
for(int c = 0; c < k; ++c)
temKey.push_back(query[HTSet[t].R[c]] + '0');
temPos = getPosition(HTSet[t].RNum, temKey, k);
vector<int> bktId;
INT *p = &HTSet[t].Hash2[temPos];
while(p != NULL && p->used)
{
bktId.push_back(p->val);
p = p->next;
}
for(size_t i = 0; i < bktId.size(); ++i)
{
bucket temB = HTSet[t].BukSet[bktId[i]];
if(temKey == temB.key)
{
for(size_t j = 0; j < temB.elem.size(); ++j)
record.push_back(temB.elem[j]);
}
}
}
}
稍后总结。
代码调整:
main.cpp
#include "Head.h"
#include "D:\\LiYangGuang\\VSPRO\\MYLSH\\MYLSH\\computeDistance.h"
using namespace std;
#pragma warning(disable: 4996)
// length of sub hashtable, as well the number of elements.
const int MAX_Q = 1000; HT HTSet[l]; bool data[n][128];
bool extDat[l][n][k]; bool query[MAX_Q][128]; // set the query item to 1000. void getFileName(int v, char *FileName)
{
itoa(v, FileName, 10);
strcat(FileName, ".txt");
} int main(int argc, char *argv)
{
/************************************************************************/
/* Firstly, create the HashTables */
/************************************************************************/
char *filename = "D:\\LiYangGuang\\VSPRO\\MYLSH\\data.txt";
loadData(data, n, filename);
createTable(HTSet, data, extDat);
insert(HTSet,extDat);
standHash(HTSet); char *queryFile = "D:\\LiYangGuang\\VSPRO\\MYLSH\\query.txt";
loadData(query, MAX_Q, queryFile);
/************************************************************************/
/* Secondly, start the linear Search */
// /************************************************************************/
//
// vector<RECORD> record2;
// clock_t LineTime1 = clock();
// for(int qId = 0; qId < MAX_Q; ++qId)
// {
// for(int i = 0; i < n; ++i)
// {
// RECORD tem;
// tem.Id = i;
// tem.Dis = distance(data[i], query[qId]);
// record2.push_back(tem);
// }
// record2.clear();
// }
// clock_t LineTime2 = clock();
// float LineTime = (float)(LineTime2 - LineTime1) / CLOCKS_PER_SEC;
// cout << "全部线性查询时间:" << LineTime << " s," << " 合"
// << LineTime / 60 << " minutes."<< endl;
//
// /************************************************************************/
// /* Thirdly, start the LSH search */
// /************************************************************************/
//
// clock_t time0 = clock();
// ofstream ofs;
// char outFileName[10] = { '\0'};
// int K = 1; /// define KNN
// getFileName(K, outFileName);
// ofs.out(outFileName);
//
// for(int qId = 0; qId < MAX_Q; ++qId)
// {
// vector<RECORD> record;
// clock_t timeA = clock();
// search(record, query[qId], HTSet, data);
// if(getkNN(record,K))
// clock_t timeB = clock();
// record.clear();
// cout << "第 " << qId + 1 << " 次查询时间:" <<
// (float)(timeB - timeA) / CLOCKS_PER_SEC << " s" << endl;
// }
// clock_t time1 = clock();
// cout << "总查询时间:" << (float)(time1 - time0) / CLOCKS_PER_SEC
// << " s." << endl;
/************************************************************************/
/* */
/************************************************************************/
ofstream ofs;
char outFileName[10] = { '\0'};
int K = 1; /// define KNN
getFileName(K, outFileName);
ofs.open(outFileName, ios::out);
//ofs.precision(3);
float TotalLinearTime, TotalLSHTime;
TotalLinearTime = TotalLSHTime = 0; float TotalError = 0;
int TotalMiss = 0; vector<RECORD> record2;
for(int qId = 0; qId < MAX_Q; ++qId)
{
cout << "第 " << qId << " 次查询" << endl;
clock_t LineTime1 = clock();
for(int i = 0; i < n; ++i)
{
RECORD tem;
tem.Id = i;
tem.Dis = computeDistance(data[i], query[qId], 128);
record2.push_back(tem);
}
getkNN(record2); // 利用其对距离排序
clock_t LineTime2 = clock();
float LineTime = (float)(LineTime2 - LineTime1) / CLOCKS_PER_SEC;
TotalLinearTime += LineTime; /************************************************************************/
/* Thirdly, start the LSH search */
/************************************************************************/ vector<RECORD> record;
clock_t timeA = clock();
search(record, query[qId], HTSet, data);
if(!getkNN(record, K))
{
float queryTime = (float)(clock() - timeA) / CLOCKS_PER_SEC;
TotalLSHTime += queryTime;
ofs << "Miss\t" << "LSH Time: " << queryTime
<< "s\tLinear time: " << LineTime << 's' << endl;
TotalMiss += 1;
}
else{
float queryTime = (float)(clock() - timeA) / CLOCKS_PER_SEC;
TotalLSHTime += queryTime;
float error = 0;
if(record[K-1].Dis == 0)
error = 1;
else
error = (float)record2[K-1].Dis / record[K-1].Dis;
ofs << "Error: " << error << "\tLSH Time: "
<< queryTime << "s\tLinear time: " << LineTime << 's' << endl;
TotalError += error; }
record.clear();
record2.clear();
}
ofs << "Average errror: " << TotalError / 817 << endl;//recitfy
ofs << "Miss ratio: " << TotalMiss / MAX_Q << endl;
ofs << "Total query time: " << "LSH, " << TotalLSHTime / 3600 << " h; "
<< "Linear, " << TotalLinearTime / 3600 << " h." << endl;
ofs.close(); return 0; }
computeDistance.h
inline int computeDistance(bool v1[], bool v2[], int N)
{
int d = 0;
for(int i = 0; i < N; ++i)
d += v1[i] ^ v2[i]; return d; }
Search.cpp
#include "HashTable.h"
#include "getPosition.h"
#include "computeDistance.h"
#include <vector>
using namespace std; /*** 加入 data 项是为了计算距离 ***/
void search(vector<RECORD>& record, bool query[128], HT HTSet[], bool data[][128])
{
for(int t = 0; t < l; ++t)
{
string temKey;
int temPos = 0;
for(int c = 0; c < k; ++c)
temKey.push_back(query[HTSet[t].R[c]] + '0');
temPos = getPosition(HTSet[t].RNum, temKey, k);
vector<int> bktId;
INT *p = &HTSet[t].Hash2[temPos];
while(p != NULL && p->used)
{
bktId.push_back(p->val);
p = p->next;
}
for(size_t i = 0; i < bktId.size(); ++i)
{
bucket temB = HTSet[t].BukSet[bktId[i]];
if(temKey == temB.key)
{
for(size_t j = 0; j < temB.elem.size(); ++j)
{
RECORD temp;
temp.Id = temB.elem[j];
temp.Dis = computeDistance(data[temp.Id], query, 128);
record.push_back(temp);
} }
}
}
}
相关截图:
实习日记:图像检索算法 LSH 的总结与分析的更多相关文章
- 实习日记:图像检索算法 LSH 的总结与分析(matlab)
最开始仿真和精度测试,基于 matlab 完成的. Demo_MakeTable.m (生成 Hash 表) %======================================== %** ...
- OpenCV学习笔记(27)KAZE 算法原理与源码分析(一)非线性扩散滤波
http://blog.csdn.net/chenyusiyuan/article/details/8710462 OpenCV学习笔记(27)KAZE 算法原理与源码分析(一)非线性扩散滤波 201 ...
- 第2章 rsync算法原理和工作流程分析
本文通过示例详细分析rsync算法原理和rsync的工作流程,是对rsync官方技术报告和官方推荐文章的解释. 以下是本文的姊妹篇: 1.rsync(一):基本命令和用法 2.rsync(二):ino ...
- rsync算法原理和工作流程分析
本文通过示例详细分析rsync算法原理和rsync的工作流程,是对rsync官方技术报告和官方推荐文章的解释.本文不会介绍如何使用rsync命令(见rsync基本用法),而是详细解释它如何实现高效的增 ...
- Python实现的选择排序算法原理与用法实例分析
Python实现的选择排序算法原理与用法实例分析 这篇文章主要介绍了Python实现的选择排序算法,简单描述了选择排序的原理,并结合实例形式分析了Python实现与应用选择排序的具体操作技巧,需要的朋 ...
- Bag of Features (BOF)图像检索算法
1.首先.我们用surf算法生成图像库中每幅图的特征点及描写叙述符. 2.再用k-means算法对图像库中的特征点进行训练,生成类心. 3.生成每幅图像的BOF.详细方法为:推断图像的每一个特征点与哪 ...
- TW实习日记:前三天
今天是2018年7月20号,周五.从周一开始实习到现在,终于想起来要写日记这种东西了,可以记录一下自己这一天所学所做所知也是蛮不错的.先简单总结一下自己的大学生活吧,算是多姿多彩,体验了很多东西.在大 ...
- TW实习日记:第31-32天
不知不觉的,实习的净工作天数,已经都超过一个月了.因为对工作内容不是很满意,所以打算月底离职,也不知道是公司太缺人还是我真的能干活,领导竟然三番两次找我让我再考虑...明天又要找我了,哎...随机应变 ...
- $2015 武汉森果公司web后端开发实习日记----书写是为了更好的思考
找暑期实习,3月份分别投了百度和腾讯的实习简历,都止步于笔试,总结的主要原因有两点:基础知识不扎实,缺乏项目经验.后来到拉勾网等网站上寻找实习,看了很多家,都还是处于观望状态.后来参加了武汉实习吧在大 ...
随机推荐
- sC#进阶系列——WebApi 接口参数不再困惑:传参详解
原文:http://www.cnblogs.com/landeanfen/p/5337072.html 一.get请求 对于取数据,我们使用最多的应该就是get请求了吧.下面通过几个示例看看我们的ge ...
- JDK1.5新特性
静态导入 import static java.util.Collections.*; import static java.lang.System.out; 1.如果静态导入的成员与本类的成员存在同 ...
- CSU 1325 莫比乌斯反演
题目大意: 一.有多少个有序数对(x,y)满足1<=x<=A,1<=y<=B,并且gcd(x,y)为p的一个约数: 二.有多少个有序数对(x,y)满足1<=x<=A ...
- Mytophome Deal
using AnfleCrawler.Common; using System; using System.Collections.Generic; using System.Linq; using ...
- Notes of learning AutoLayout
在XCode5中,如果我们添加一个Button或者Label,或者其他的什么标准View,而不设置任何constraints,IB会自动生成constraints,而这些constraints是fix ...
- jsonp 实现sso
这几天用jsop实现了公司的sso. 这里面最重要的是对cookie的理解. cookie 就是一个网站存于本地的数据,zai下次请求同一个网站时,发送给服务器,服务器端可以进行AUD操作,这种操作后 ...
- Polymer.js
Polymer 1.0 教程 安装 bower install --save Polymer/polymer
- Centos Python2 升级到Python3
1. 从Python官网到获取Python3的包, 切换到目录/usr/local/src #wget https://www.python.org/ftp/python/3.5.1/Python-3 ...
- (实用篇)PHP中单引号与双引号的区别分析
在PHP中,我们可以使用单引号或者双引号来表示字符串.不过我们作为开发者,应该了解其中的区别.单引号与双引号对于定义字符一个是可以解析变量一个是会把变量直接输出来,同时单引号与双引号在字符处理上单引号 ...
- Linux版网易云音乐播放音乐时无限显示“网络错误”的解决办法
安装 gstreamer0.10-plugins-good debian类系统: -plugins-good