pj_0001_compare_col_csv
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import pathlib
import pandas as pd
print("please input the 1stfile to compare")
csv_from = input()
print("please input the compare column")
from_col = input()
print("please input the 2ndfile to compare")
csv_to = input()
print("please input the compare column")
to_col = input()
print(csv_from)
print(from_col)
print(csv_to)
print(to_col)
#check csv
def _compare_csv():
df_from = pd.read_csv(csv_from)
df_to = pd.read_csv(csv_to)
lcol_from = list(df_from[from_col])
lcol_to = list(df_to[to_col])
print("column size of 1st csv is:", len(lcol_from))
print("column size of 2nd csv is:", len(lcol_to))
s1 = set(lcol_from)
s2 = set(lcol_to)
ss1 = s1 - s2
ss2 = s2 - s1
print("1st csv - 2nd csv is",ss1)
print("2nd csv - 1st csv is",ss2)
# ex
print("get other col item?")
q1 = input()
if q1 == 'no':
return
print("get from col or to col?")
q2 = input()
print("give me a col name")
col_name = input()
if q2 == 'from':
lcol_other = list(df_from[col_name])
ss_other = ss1
lcol_ref = lcol_from
else:
lcol_other = list(df_to[col_name])
ss_other = ss2
lcol_ref = lcol_to
#ex for 1st - 2nd
print("ex col value is:")
lout = []
for i in ss_other:
lout.append(lcol_other[lcol_ref.index(i)])
print("other col items is:", lout)
print("for un_repetition", set(lout))
if __name__ == '__main__':
_compare_csv()
pj_0001_compare_col_csv的更多相关文章
随机推荐
- LM算法详解
1. 高斯牛顿法 残差函数f(x)为非线性函数,对其一阶泰勒近似有: 这里的J是残差函数f的雅可比矩阵,带入损失函数的: 令其一阶导等于0,得: 这就是论文里常看到的normal equation. ...
- Ubuntu下的FTP Servers搭建与连接
1.安装FTP:vsftod sudo apt-get install vsftpd sudo: 使用sudo(super user do)给普通用户赋予权限 不是所有命令都能用sudo执行的,比 ...
- Centos7作为VNCserver,本地使用VNCViewer连接
1.概念 VNC是一个远程连接工具 VNC is used to display an X windows session running on another computer. Unlike a ...
- Technique to Read Source Code
Technique to Read Source Code Excerpted from http://ruby-hacking-guide.github.io/intro.htm Any progr ...
- LOJ 数列分块入门 8
\(\text{Solution}\) 一看有区间赋值直接上 \(ODT\) \(\text{Code}\) #include <cstdio> #include <iostream ...
- 题解 [SCOI2007]压缩
好题. 显然区间 dp,令 \(f_{l, r}\) 为 \([l, r]\) 之间的最短的长度.如果我们要压缩,那么就要考虑 M 与 R 的位置.由于我们大体是从左往右来转移的,所以显然我们只需要记 ...
- 《爆肝整理》保姆级系列教程-玩转Charles抓包神器教程(8)-Charles如何进行断点调试
1.简介 Charles和Fiddler一样也有个强大的功能,可以修改发送到服务器的数据包,但是修改前需要拦截,即设置断点.设置断点后,开始拦截接下来所有网页,直到取消断点.这个功能可以在数据包发送之 ...
- el-dialog中中使用echarts
1.在dialog中使用open方法 <el-dialog :title="diaTitle" :visible.sync="dialogVisible" ...
- PostgreSQL中的row_number() 与distinct用法说明
一.示例 这两个SQL执行所得到的数据是一样的! select count(s.*) from ( select *, row_number() over (partition by fee_dat ...
- 理论+实战,详解Sharding Sphere-jdbc
摘要:Apache ShardingSphere 是一款分布式的数据库生态系统,它包含两大产品:ShardingSphere-Proxy和ShardingSphere-JDBC. 本文分享自华为云社区 ...