1 Caffe数据集txt文本制作

很多时候要建立如下数据集txt文本，类似图片所示（图片网上找的)

生成代码如下：

"""

caffe数据集txt文本制作

"""

import os

#文件保存路径

f =open(r'd:/val.txt',"w")

path = 'D:/smoke_datasets/val/0'

for filename in os.listdir(path) :

    #数据标签

    count = 0

    ff = filename+" "+ "0"+"\n"

    f.write(ff)

print('{} class: {}'.format(filename,count))

path = 'D:/smoke_datasets/val/1'

for filename in os.listdir(path) :

    #数据标签

    count = 1

    ff = filename+" "+ "1"+"\n"

    f.write(ff)

print('{} class: {}'.format(filename,count))

f.close()

2 jpg图像完整性检测

有时爬虫所获得图像可能不完整，需要进行图像完整性检测。代码如下：

"""

jpg图像完整性检测

"""

from skimage import io

from PIL import Image

import numpy as np

import os

def is_valid_jpg(path):

    #判断JPG文件下载是否完整

    if path.split('.')[-1].lower() == 'jpg':

        with open(path, 'rb') as fr:

            fr.seek(-2, 2)

             #判定jpg是否包含结束字段

            return fr.read() == '\xff\xd9'

    else:

        return False

#文件头的方式

def is_jpg(path):

    data = open(path,'rb').read(11)

    if data[:4] != '\xff\xd8\xff\xe0' and data[:4]!='\xff\xd8\xff\xe1':

        return False

    if data[6:] != 'JFIF\0' and data[6:] != 'Exif\0':

        return False

    return True

def check_pic_PIL(path):

    try:

        Image.open(path).load()

        Image.open(path).verify()

    except:

        return False

    try:

        img = Image.open(path)

        img = np.array(img, dtype=np.float32)

    except:

            return False

    if len(img.shape)!=3:

        return False

    return True

def check_pic_skimage(path):

    try:

        img = io.imread(path)

    except:

        return False

    try:

        img = np.array(img, dtype=np.float32)

    except:

        return False

    if len(img.shape)!=3:

        return False

    return True

if __name__ == '__main__':

    #结果

    f =open(r'd:/state.txt',"w")

    #路径

    paths = ["d:/train"]

    for path in paths:

        print('the current path is : {}\n'.format(path))

        #path = "D:/smoke_data/datas/deal/smoke_auto_aug"

        #文件头检测

        #精细检测

        for filename in os.listdir(path):

            #print('current jpg is {}'.format(path+"/"+filename))

            #文件头检测

            status_valid_jpg = is_valid_jpg(path+"/"+filename)

            status_jpg = is_jpg(path+"/"+filename)

            if( status_valid_jpg == False or status_jpg == False):

                ff = filename+"\n"

                f.write(ff)

                print('{} \n'.format(path+"/"+filename))

                continue

            #状态检测

            status_PIL = check_pic_PIL(path+"/"+filename)

            status_skimage = check_pic_skimage(path+"/"+filename)

            if (status_PIL == False or status_skimage == False):

                ff = filename+"\n"

                f.write(ff)

                print("=" * 50)

                print('{} \n'.format(path+"/"+filename))

                print("=" * 50)

        #分割线

        print("*" * 50)

    print("end!")

    f.close()

3 图像随机移动复制

对于windows系统，移动太慢，也费时。linux也差不多。通过python可以快速移动/复制大量图像，代码如下：

"""

图像随机移动复制

"""

import os, random, shutil

def moveFile(fileDir, tarDir, picknumber):

    #取图像原始路径

    pathDir = os.listdir(fileDir)

    filenumber = len(pathDir)

    if filenumber < picknumber:

        picknumber = filenumber

    #抽取一定比例

    sample = random.sample(pathDir, picknumber)

    print(sample)

    for name in sample:

        shutil.move(fileDir+name, tarDir+name)

    return

def copyFile(fileDir, tarDir, picknumber):

    #取图像原始路径

    pathDir = os.listdir(fileDir)

    filenumber = len(pathDir)

    if filenumber < picknumber:

        picknumber = filenumber

    #抽取一定比例

    sample = random.sample(pathDir, picknumber)

    print(sample)

    for name in sample:

        shutil.copy(fileDir+name, tarDir+name)

    return

if __name__ == '__main__':

    #图像路径

    fileDir = 'D:/datasets/train/'

    #移动路径

    tarDir = 'D:/datasets/move/'

    #从fileDir随机移动500张图像到tarDir文件夹

    moveFile(fileDir, tarDir, 500)

    #从fileDir随机复制500张图像到tarDir文件夹

    copyFile(fileDir, tarDir, 500)

4 图像尺寸统计

主要是统计图像尺寸，可以添加过滤条件，滤掉尺寸过小或者过大的图像。代码如下：

"""

统计数据集下图像尺寸

"""

import os

from PIL import Image

import pandas as pd

#数据集路径

path = 'D:/test/'

#存入列表

f = os.listdir(path)

count = 0

df = pd.DataFrame(columns=['width','height'])

for i in f:

    #旧文件名

    oldname=path+f[count]

    im = Image.open(oldname)

    df.loc[count,'width']=im.width

    df.loc[count,'height']=im.height

    print(oldname)

    count += 1

#保存结果

df.to_csv('test.csv')

f.close()

5 图像名字后缀重命名

对图像的名字以及后缀名重新命名，代码如下：

"""

图像名称后缀重命名

"""

import os

#图像路径

path = 'D:/train/'

#保存路径

save_path = 'D:/result/'

#存入列表

f = os.listdir(path)

count = 0

for i in f:

    #旧文件名

    oldname=path+f[count]

    print(oldname)

    #新文件名

    newname=save_path+'smoke.'+str(count)+'.jpg'

    os.rename(oldname,newname)

    count += 1

6 两文件夹文件比对

判断两个文件夹里面文件的文件名是不是相互对应，并移动文件名不对应的文件

# -*- coding: utf-8 -*-

"""

判断两个文件夹里面文件的文件名是不是相互对应，并移动不对应的文件

"""

import os,shutil

# 文件少的文件夹路径  传入存储的list

def listdir(path, list_name):

    for file in os.listdir(path):

        list_name.append(file.split('.')[0])  

def del_dir(checkpath,input_name,savepath):

    if not os.path.exists(savepath):

        os.makedirs(savepath)

    for file in os.listdir(checkpath):

        check_name=file.split('.')[0]

        if check_name in input_name:

            print('{} 在列表中'.format(check_name))

        #不在列表中，移动文件

        else:

            srcfile=os.path.join(checkpath, file)

            shutil.move(srcfile,savepath) 

name=[]

#保存路径

listdir('./2',name)

del_dir('./1',name,'./3')

7 绘制caffe模型的ROC曲线(二分类)

主要原理为通过OpenCV DNN 调用caffe模型，其他模型一样套路。然后对每张图像进行检测，保存该图像真实标签，以及该标签分类的模型预测概率。调用sklearn绘制ROC，并计算AUC值。通过约登指数获得ROC曲线中最佳阈值。最后通过matplotlib绘图。约登指数为TPR-FPR或者TPR+TNR-1。具体ROC原理可参考：

https://www.jianshu.com/p/c61ae11cc5f6

https://www.jianshu.com/p/82903edb58dc

https://blog.csdn.net/u014264373/article/details/80487766

'''

opencv调用caffe并计算roc

'''

import numpy as np

import matplotlib.pyplot as plt

import cv2

import os

from sklearn import metrics

# 真实图像标签为0的图像路径

imagePath_0 = ['0']

# 真实图像标签为1的图像路径

imagePath_1 = ['1']

# 正类标签

poslabel = 1

# 模型路径

prototxtFile = 'deploy_227.prototxt'

modelFile = 'model_227.caffemodel'

# 真实分类结果

trueResult = []

# 检测结果

detectProbs = []

# 图像检测

def detectCaffe(srcImg):

    detectImg = srcImg.copy()

    blob = cv2.dnn.blobFromImage(

        detectImg, 1, (227, 227), (92.713, 106.446, 118.115), swapRB=False)

    net = cv2.dnn.readNetFromCaffe(prototxtFile, modelFile)

    net.setInput(blob)

    detections = net.forward()

    # 分类结果

    order = detections[0].argmax()

    prob = detections[0].max()

    #print('the predict class is:',order)

    #print('the positive class prob is: ', prob)

    # 返回分类结果和概率

    return order, prob

# 图像检测

def imageDetect(detectImagePath, trueLabel):

    for imageFileName in os.listdir(detectImagePath):

        imageFilePath = os.path.join(detectImagePath, imageFileName)

        print("current detect image is: ", imageFileName)

        srcImg = cv2.imread(imageFilePath)

        if srcImg is None:

            print("error image is: ", imageFilePath)

            continue

        detectOrder, detectProb = detectCaffe(srcImg)

        trueResult.append(trueLabel)

        # 如果正样本编号和检测结果标签一致直接保存分类概率

        if detectOrder == poslabel:

            detectProbs.append(detectProb)

        # 如果不一致保存正样本的分类概率

        else:

            detectProbs.append(1-detectProb)

# 画ROC图，输入真实标签，正样本模型分类概率，正样本编号

def drawROC(trueResult, detectProbs, poslabel):

    fpr, tpr, thresholds = metrics.roc_curve(

        trueResult, detectProbs, pos_label=poslabel)

    #auc = metrics.roc_auc_score(y, scores)

    roc_auc = metrics.auc(fpr, tpr)

    # 计算约登指数Youden Index（TPR-FPR或者TPR+TNR-1）

    tpr_fpr = list(tpr-fpr)

    bestIndex = tpr_fpr.index(max(tpr_fpr))

    print("约登指数为{}".format(max(tpr_fpr)))

    tprBest = tpr[bestIndex]

    fprBest = fpr[bestIndex]

    thresholdsBest = thresholds[bestIndex]

    print("最佳约登指数阈值为:", thresholdsBest)

    # 假正率为横坐标，真正率为纵坐标做曲线

    plt.plot(fpr, tpr, color='darkorange',

             label='ROC curve (area = %0.2f)' % roc_auc)

    plt.plot([0, 1], [0, 1], color='navy', linestyle='--')

    #plt.xlim([0.0, 1.0])

    #plt.ylim([0.0, 1.05])

    plt.xlabel('False Positive Rate')

    plt.ylabel('True Positive Rate')

    plt.title('Receiver operating characteristic example')

    plt.legend(loc="lower right")

    # 画出约登指数最大值

    plt.plot(fprBest, tprBest, "ro")

    plt.savefig("roc.png", dpi=300)

    plt.show()

    return fpr, tpr, thresholds, bestIndex

def main():

    # 0标签图像遍历

    for imagePath in imagePath_0:

        imageDetect(imagePath, 0)

    for imagePath in imagePath_1:

        imageDetect(imagePath, 1)

    # poslabel正例标签

    fpr, tpr, thresholds, bestIndex = drawROC(

        trueResult, detectProbs, poslabel)

    np.save('fpr.npy', fpr)

    np.save('tpr.npy', tpr)

    np.save('thresholds', thresholds)

    return fpr, tpr, thresholds

if __name__ == '__main__':

    fpr, tpr, thresholds = main()

结果如图所示：

[常用工具] 深度学习Caffe处理工具的更多相关文章

人工智能深度学习Caffe框架介绍，优秀的深度学习架构
人工智能深度学习Caffe框架介绍,优秀的深度学习架构在深度学习领域,Caffe框架是人们无法绕过的一座山.这不仅是因为它无论在结构.性能上,还是在代码质量上,都称得上一款十分出色的开源框架.更重要 ...
win7 配置微软的深度学习caffe
win7 配置微软的深度学习caffe 官方下载: https://github.com/Microsoft/caffe 然后直接修改caffe目录下的windows目录下的项目的props文件 ...
NLP+VS︱深度学习数据集标注工具、方法摘录，欢迎补充~~
~~因为不太会使用opencv.matlab工具,所以在找一些比较简单的工具. . . 一.NLP标注工具BRAT BRAT是一个基于web的文本标注工具,主要用于对文本的结构化标注,用BRAT生成的 ...
【机器学习PAI实践十】深度学习Caffe框架实现图像分类的模型训练
背景我们在之前的文章中介绍过如何通过PAI内置的TensorFlow框架实验基于Cifar10的图像分类,文章链接:https://yq.aliyun.com/articles/72841.使用Te ...
深度学习图像标注工具VGG Image Annotator (VIA)使用教程
VGG Image Annotator (VIA)是一款开源的图像标注工具,由Visual Geometry Group开发. 可以在线和离线使用,可标注矩形.圆.椭圆.多边形.点和线.标注完成后,可 ...
深度学习caffe:Ubuntu16.04安装指南(1)
caffe [CPU ONLY] 2017-01-15 最简单的安装配置方式: 不用GPU加速,使用OPENCV2.4图像库, 这是根据官方教程(链接如下)简化而得到. Ubuntu 16.04 or ...
Net-Snmp工具（学习SNMP的工具，开源项目）简单使用
https://blog.csdn.net/mrzhangzifu/article/details/77882371 Net-Snmp工具的安装与配置操作系统:Ubuntu16.4 软件版本:n ...
深度学习caffe测试代码c++
#include <caffe/caffe.hpp> #include <opencv2/core/core.hpp> #include <opencv2/highgui ...
深度学习-Caffe编译测试的小总结
1. 搭建的环境和代码:win7 64bit + vs2013+CUDA7.5 http://blog.csdn.net/thesby/article/details/50880802 2. 编译,制 ...
深度学习—caffe框架训练文档
转存:LMDB E:\机器学习2\caffe资料\caffe_root\caffe-master\Build\x64\Release>convert_imageset.exe E:/机器学习2/ ...

随机推荐

细聊.Net Core中IServiceScope的工作方式
前言自从.Net Core引入IOC相关的体系之后,关于它的讨论就从来没有停止过,因为它是.Net Core体系的底层框架,你只要使用了.Net Core的时候就必然会用到它.当然关于使用它的过程中 ...
15. MongoDB系列之选择片键
1. 片键类型 1.1 升序片键升序片键通常类似于date或ObjectId--随着时间稳步增长的字段. 这种模式通常会使MongoDB更难保持块的平衡,因为所有的块都是由一个分片创建的. 1.2 ...
java中的栈（利用数组实现栈）
java中的栈(利用数组实现栈) 常见的数据结构:https://blog.csdn.net/weixin_43304253/article/details/119764275 栈的介绍 1.栈的英文 ...
SpringCloud整合分布式事务Seata 1.4.1 支持微服务全局异常拦截
项目依赖 SpringBoot 2.5.5 SpringCloud 2020.0.4 Alibaba Spring Cloud 2021.1 Mybatis Plus 3.4.0 Seata 1.4. ...
42.JSON Web Token认证
JSON Web Token认证介绍简称JWT认证,一般用于用户认证 JWT是一种相当新的标准,可用于基于token的身份验证与内置的TokenAuthentication方案不同,JWT不需要使 ...
软件架构（六）MVC架构历史
一.引子一个系统可能由很多子系统组成.各子系统内部高度内聚,子系统之间低耦合.子系统关注自己的职责.实现: 职责分离,关注点分离.----MVC架构早期就是为了分离视图.模型而诞生的. 注:很多 ...
SpringBoot 阶段测试 1
SpringBoot 阶段测试 1 目录 SpringBoot 阶段测试 1 1.使用JDK8新语法完成下列集合练习: 1.1 List中有1,2,3,4,5,6,7,8,9几个元素要求; (1) 将 ...
【第5篇】AI语音简介
1.3 AI语音简介 AI语音既人工智能语音技术,以语音识别技术为开端,实现人机语言的通信,包括语音识别技术(ASR).自然语言处理技术(NLP)和语音合成技术(TTS).通俗点说就是通过语音这个媒 ...
Arch Linux + KDE 配置&美化（持续更新~）
Arch Linux + KDE 配置&美化(持续更新~) 这篇文章着重记录archlinux + KDE的一个基本的配置过程.不包括安装过程(使用archInstall.sh).内容大概有以 ...
关于ASP.NET Core WebSocket实现集群的思考
前言提到WebSocket相信大家都听说过,它的初衷是为了解决客户端浏览器与服务端进行双向通信,是在单个TCP连接上进行全双工通讯的协议.在没有WebSocket之前只能通过浏览器到服务端的请求应答 ...

[常用工具] 深度学习Caffe处理工具