#!/usr/bin/python
# -*- coding: UTF-8 -*-

import pathlib
import pandas as pd

print("please input the 1stfile to compare")
csv_from = input()

print("please input the compare column")
from_col = input()

print("please input the 2ndfile to compare")
csv_to = input()

print("please input the compare column")
to_col = input()

print(csv_from)
print(from_col)
print(csv_to)
print(to_col)

#check csv

def _compare_csv():
df_from = pd.read_csv(csv_from)
df_to = pd.read_csv(csv_to)

lcol_from = list(df_from[from_col])
lcol_to = list(df_to[to_col])

print("column size of 1st csv is:", len(lcol_from))
print("column size of 2nd csv is:", len(lcol_to))

s1 = set(lcol_from)
s2 = set(lcol_to)

ss1 = s1 - s2
ss2 = s2 - s1
print("1st csv - 2nd csv is",ss1)
print("2nd csv - 1st csv is",ss2)

# ex
print("get other col item?")
q1 = input()
if q1 == 'no':
return

print("get from col or to col?")
q2 = input()

print("give me a col name")
col_name = input()

if q2 == 'from':
lcol_other = list(df_from[col_name])
ss_other = ss1
lcol_ref = lcol_from
else:
lcol_other = list(df_to[col_name])
ss_other = ss2
lcol_ref = lcol_to

#ex for 1st - 2nd
print("ex col value is:")
lout = []
for i in ss_other:
lout.append(lcol_other[lcol_ref.index(i)])

print("other col items is:", lout)
print("for un_repetition", set(lout))

if __name__ == '__main__':
_compare_csv()

pj_0001_compare_col_csv的更多相关文章

随机推荐

  1. LM算法详解

    1. 高斯牛顿法 残差函数f(x)为非线性函数,对其一阶泰勒近似有: 这里的J是残差函数f的雅可比矩阵,带入损失函数的: 令其一阶导等于0,得: 这就是论文里常看到的normal equation. ...

  2. Ubuntu下的FTP Servers搭建与连接

    1.安装FTP:vsftod sudo apt-get install vsftpd   sudo: 使用sudo(super user do)给普通用户赋予权限 不是所有命令都能用sudo执行的,比 ...

  3. Centos7作为VNCserver,本地使用VNCViewer连接

    1.概念 VNC是一个远程连接工具 VNC is used to display an X windows session running on another computer. Unlike a ...

  4. Technique to Read Source Code

    Technique to Read Source Code Excerpted from http://ruby-hacking-guide.github.io/intro.htm Any progr ...

  5. LOJ 数列分块入门 8

    \(\text{Solution}\) 一看有区间赋值直接上 \(ODT\) \(\text{Code}\) #include <cstdio> #include <iostream ...

  6. 题解 [SCOI2007]压缩

    好题. 显然区间 dp,令 \(f_{l, r}\) 为 \([l, r]\) 之间的最短的长度.如果我们要压缩,那么就要考虑 M 与 R 的位置.由于我们大体是从左往右来转移的,所以显然我们只需要记 ...

  7. 《爆肝整理》保姆级系列教程-玩转Charles抓包神器教程(8)-Charles如何进行断点调试

    1.简介 Charles和Fiddler一样也有个强大的功能,可以修改发送到服务器的数据包,但是修改前需要拦截,即设置断点.设置断点后,开始拦截接下来所有网页,直到取消断点.这个功能可以在数据包发送之 ...

  8. el-dialog中中使用echarts

    1.在dialog中使用open方法 <el-dialog :title="diaTitle" :visible.sync="dialogVisible" ...

  9. PostgreSQL中的row_number() 与distinct用法说明

    一.示例 这两个SQL执行所得到的数据是一样的! select count(s.*) from (  select *, row_number() over (partition by fee_dat ...

  10. 理论+实战,详解Sharding Sphere-jdbc

    摘要:Apache ShardingSphere 是一款分布式的数据库生态系统,它包含两大产品:ShardingSphere-Proxy和ShardingSphere-JDBC. 本文分享自华为云社区 ...