python小练习：涉及print，json，numpy

枚举参考文件夹中的文件，并与待比较文件件中的同名文件比较是否一致。

#! /usr/bin/python3.6

# -*- coding:utf-8 -*-

import os

import sys

import json

import numpy as np

from sqlalchemy import false

def cmp_file(ref_file: str, dst_file: str) -> bool:

    ref_base_name = os.path.basename(ref_file)

    dst_base_name = os.path.basename(dst_file)

    assert os.path.exists(ref_file), f"ref file not exist: {ref_base_name}"

    if not os.path.exists(dst_file):

        print(f'dst file not exist: {dst_base_name}')

        return false

    ref_data = np.fromfile(ref_file, dtype=np.ubyte, count=-1)

    dst_data = np.fromfile(dst_file, dtype=np.ubyte, count=-1)

    is_equal = np.array_equal(ref_data, dst_data)

    print(is_equal, ": ", ref_base_name)

    return is_equal

def cmp_dir(ref_dir: str, dst_dir: str) -> None:

    print(f'\n==========>>> Start compare {ref_dir} and {dst_dir}')

    ref_names = os.listdir(ref_dir)

    for name in ref_names:

        ref_file = os.path.join(ref_dir, name)

        dst_file = os.path.join(dst_dir, name)

        cmp_file(ref_file, dst_file)

def main():

    if len(sys.argv) < 2:

        print('usage: dump_dir_cmp.py dir_config.json')

        return

    json_file = sys.argv[1]

    with open(json_file) as fp:

        js_data = json.load(fp)

        for dst_dir in js_data['dst_dirs']:

            cmp_dir(js_data['ref_dir'], dst_dir)

if (__name__ == '__main__'):

    main()

配置样例：

{

    "ref_dir": "./dump_data/NPU_DUMPF001_P0/tensorflow_squeezenet_task0_loop0",

    "dst_dirs": [

        "./dump_data/NPU_DUMPF002_P0/tensorflow_squeezenet_task0_loop0",

        "./dump_data/NPU_DUMPF002_P0/tensorflow_squeezenet_task1_loop0",

        "./dump_data/NPU_DUMPF002_P0/tensorflow_squeezenet_task2_loop0",

        "./dump_data/NPU_DUMPF002_P0/tensorflow_squeezenet_task3_loop0",

        "./dump_data/NPU_DUMPF002_P0/tensorflow_squeezenet_task4_loop0",

        "./dump_data/NPU_DUMPF002_P0/tensorflow_squeezenet_task5_loop0"

    ]

}

样例2（re匹配）：

#! /usr/bin/python3.6

# -*- coding:utf-8 -*-

# cmp_dump_pickle_dir.py

import os

import re

import sys

import numpy as np

from numpy.linalg import norm

import pickle

import shutil

from sklearn.metrics.pairwise import cosine_similarity

def vec_similarity(v1: np.array, v2: np.array):

    sim = cosine_similarity(v1.reshape(1, v1.size), v2.reshape(1, v2.size))

    return sim[0][0]

    # norm2 = norm(v1) * norm(v2)

    # cosine = np.dot(v1,v2) / norm2

    # return cosine

def re_find_file(dir: str, op_name: str) -> str:

    for fname in os.listdir(dir): # 分组匹配: (...|...)

        re_dst = re.search(f"{op_name}_(out_[\S]*|out\d).bin$", fname)

        if re_dst is not None:

            return re_dst.group()

    return None

def cmp_file(ref_file: str, dst_file: str, dtype: str) -> bool:

    ref_base_name = os.path.basename(ref_file)

    dst_base_name = os.path.basename(dst_file)

    assert os.path.exists(ref_file), f"ref file not exist: {ref_base_name}"

    assert os.path.exists(dst_file), f"dst file not exist: {dst_base_name}"

    ref_data = np.fromfile(ref_file, dtype=dtype, count=-1)

    dst_data = np.fromfile(dst_file, dtype=dtype, count=-1)

    if dtype == 'float32' or dtype == 'float16':

        sim = vec_similarity(ref_data, dst_data)

        print(sim > 0.95, f", simularity={sim} : ", ref_base_name)

        return (sim > 0.95)

    is_equal = np.array_equal(ref_data, dst_data)

    print(is_equal, ": ", ref_base_name)

    return is_equal

def cmp_dir(ref_dir: str, dst_dir: str) -> None:

    print(f'\n==========>>> Start compare {ref_dir} and {dst_dir}')

    patten = re.compile(r"_op_out_[\S]*.bin$")

    ref_names = os.listdir(ref_dir)

    not_exist_ops = []

    for ref_name in ref_names:

        assert re.match(r"[\S]*_op_out_[\S]*.bin$", ref_name) is not None, f"bad file name: {ref_name}"

        dtype = ref_name[ref_name.rfind('_') + 1:ref_name.rfind('.')]

        mdl_name = ref_name[0:patten.search(ref_name).span()[0]]

        dst_name = re_find_file(dst_dir, mdl_name)

        if dst_name is None:

            not_exist_ops.append(mdl_name)

            continue

        ref_file = os.path.join(ref_dir, ref_name)

        dst_file = os.path.join(dst_dir, dst_name)

        cmp_file(ref_file, dst_file, dtype=dtype)

    print(f'\nNot exist ops: {not_exist_ops}')

def dump_pickle_file(pickle_file: str, out_bin_dir: str, force_dtype_u8: bool) -> None:

    def is_float_type(data_buff: np.ndarray) -> bool:

        return data_buff.dtype == np.float16 or data_buff.dtype == np.float32

    with open(pickle_file, "rb") as f:

        op_ref = pickle.load(f)

        for i, (key, value) in enumerate(op_ref.items()):

            data_buff = value.flatten()

            # print("layer: ", key, " shape: ", value.shape, " type: ", value.dtype, " size: ", value.size)

            dtype = 'uint8' if force_dtype_u8 and is_float_type(data_buff) else data_buff.dtype

            print("pickle key: %30s, size: %7d, dtype: %s" % (key, value.itemsize * value.size, data_buff.dtype))

            data_buff.tofile(os.path.join(out_bin_dir, key.replace("/", "_") + f"_op_out_{dtype}.bin"))

    #print("op ref: type ", type(op_ref), op_ref.size)

    #print("op shape: type ", op_ref['data'].shape)

def mkdir(dir: str) -> None:

    if os.path.exists(dir):

        shutil.rmtree(dir)

    os.mkdir(dir)

def main():

    assert len(sys.argv) >= 4, 'usage: dump_dir_cmp.py pickle_file pickle_out_dir dst_dump_dir [force_dtype_u8]'

    force_dtype_u8 = True if len(sys.argv) >= 5 and sys.argv[4] == 'force_dtype_u8' else False

    #np.seterr('raise')

    mkdir(sys.argv[2])

    dump_pickle_file(sys.argv[1], sys.argv[2], force_dtype_u8)

    cmp_dir(sys.argv[2], sys.argv[3])

if (__name__ == '__main__'):

    main()

python小练习：涉及print，json，numpy的更多相关文章

Python之数据序列化（json、pickle、shelve）
本节内容前言 json模块 pickle模块 shelve模块总结一.前言 1. 现实需求每种编程语言都有各自的数据类型,其中面向对象的编程语言还允许开发者自定义数据类型(如:自定义类),Py ...
【转】Python之数据序列化（json、pickle、shelve）
[转]Python之数据序列化(json.pickle.shelve) 本节内容前言 json模块 pickle模块 shelve模块总结一.前言 1. 现实需求每种编程语言都有各自的数据类型 ...
Python小数据保存，有多少中分类？不妨看看他们的类比与推荐方案...
小数据存储我们在编写代码的时候,经常会涉及到数据存储的情况,如果是爬虫得到的大数据,我们会选择使用数据库,或者excel存储.但如果只是一些小数据,或者说关联性较强且存在存储后复用的数据,我们该如何 ...
让你瞬间萌比的35个python小技巧
今天在看python算法的时候,看到一篇关于python的小技巧.瞬间萌比了,原来python也可以这样玩,太神奇了.萌比的是原来这么简单的东西自己都不知道,虽然会写.废话不多说了,开始上菜. 1.拆 ...
5个常常被大家忽略的Python小技巧
下面我挑选出的这几个技巧常常会被人们忽略,但它们在日常编程中能真正的给我们带来不少帮助. 1. 字典推导(Dictionary comprehensions)和集合推导(Set comprehensi ...
Python 小程序，对文件操作及其它
以下是自己写的几个对文件操作的小程序,里面涉及到文件操作,列表(集合,字典)的运用等.比方说,从文件里读取一行数据.分别存放于列表中,再对列表进行操作.如去掉里面的反复项.排序等操作. 常见对文件里行 ...
小学生都能学会的python(小数据池)
小学生都能学会的python(小数据池) 1. 小数据池. 目的:缓存我们字符串,整数,布尔值.在使用的时候不需要创建过多的对象缓存:int, str, bool. int: 缓存范围 -5~256 ...
Python 小案例实战 —— 简易银行存取款查询系统
Python 小案例实战 -- 简易银行存取款查询系统涉及知识点包的调用字典.列表的混合运用列表元素索引.追加基本的循环与分支结构源码 import sys import time ban ...
这42个Python小例子，太走心
告别枯燥,60秒学会一个Python小例子.奔着此出发点,我在过去1个月,将平时经常使用的代码段换为小例子,分享出来后受到大家的喜欢. 一.基本操作 1 链式比较 i = 3print(1 < ...
Python学习day18-常用模块之NumPy
figure:last-child { margin-bottom: 0.5rem; } #write ol, #write ul { position: relative; } img { max- ...

随机推荐

KFS replicator安装（KES-KES）
源端一.安装前置配置 1.创建安装用户 groupadd flysync useradd flysync -g flysync -G kingbase passwd flysync 2.上传安装文件 ...
【Android 逆向】ARM switch 逆向
#include <stdio.h> int switch1(int a, int b, int i){ switch (i){ case 1: return a + b; break; ...
02 uniapp/微信小程序项目day02
一.分类 1.1 页面布局首先创建cate的分支定义基本结构,因为是两个需要滚动的区域,所以这里要用到组件 scroll 这个组件如果是y scroll那就要固定高度,x scroll那就要固定宽 ...
ceph安装Dashboard并开启Object Gateway管理功能
安装配置dashboard Ceph-Dashboard 是用 Python 开发的一个 Ceph 的监控面板,用来监控 Ceph 的运行状态.同时提供 REST API 来访问状态数据. 低版本的 ...
第一个Django应用 - 第三部分：Django视图和模板
一.概述一个视图就是一个页面,通常提供特定的功能,使用特定的模板.例如:在一个博客应用中,你可能会看到下列视图: 博客主页:显示最新发布的一些内容每篇博客的详细页面:博客的永久链接基于年的博客页 ...
Prometheus 监控报警系统 AlertManager 之邮件告警
转载自:https://cloud.tencent.com/developer/article/1486483 文章目录1.Prometheus & AlertManager 介绍2.环境.软 ...
【Linux】指令学习
Linux学习记录生命不息,写作不止一个有梦有戏的人 @怒放吧德德分享学习心得,欢迎指正,大家一起学习成长! 1.虚拟机网卡配置服务器重启完成之后,我们可以通过linux的指令 ip addr ...
实时营销引擎在vivo营销自动化中的实践 | 引擎篇04
作者:vivo 互联网服务器团队本文是<vivo营销自动化技术解密>的第5篇文章,重点分析介绍在营销自动化业务中实时营销场景的背景价值.实时营销引擎架构以及项目开发过程中如何利用动态队列 ...
它让你1小时精通RabbitMQ消息队列(新增死信处理)
支持.NET/.NET Framework/.NET Core RabbitMQ作为一款主流的消息队列工具早已广受欢迎.相比于其它的MQ工具,RabbitMQ支持的语言更多.功能更完善. 本文提供一种 ...
检测 MySQL 服务是否存活 shell脚本
#!/bin/bash # 检测 MySQL 服务是否存活 # host 为你需要检测的 MySQL 主机的 IP 地址,user 为 MySQL 账户名,passwd 为密码 # 这些信息需要根据实 ...

python小练习：涉及print，json，numpy

python小练习：涉及print，json，numpy的更多相关文章

随机推荐

热门专题