pandas的合并、连接、去重、替换

 import pandas as pd

 import numpy as np

 # merge合并 ，类似于Excel中的vlookup

 df1 = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],

                     'A': ['A0', 'A1', 'A2', 'A3'],

                     'B': ['B0', 'B1', 'B2', 'B3']})

 df2 = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],

                     'C': ['C0', 'C1', 'C2', 'C3'],

                     'D': ['D0', 'D1', 'D2', 'D3']})

 df3 = pd.DataFrame({'key1': ['K0', 'K0', 'K2', 'K3'],

                     'key2': ['K0', 'K1', 'K0', 'K1'],

                     'A': ['A0', 'A1', 'A2', 'A3'],

                     'B': ['B0', 'B1', 'B2', 'B3']})

 df4 = pd.DataFrame({'key1': ['K0', 'K0', 'K2', 'K3'],

                     'key2': ['K0', 'K0', 'K0', 'K0'],

                     'C': ['C0', 'C1', 'C2', 'C3'],

                     'D': ['D0', 'D1', 'D2', 'D3']})

 print(pd.merge(df1,df2,on='key'))

 # 第一个DataFrame为拼接后左边的

 # 第二个DataFrame为拼接后右边的

 # on 为参考键

 '''

   key   A   B   C   D

 0  K0  A0  B0  C0  D0

 1  K1  A1  B1  C1  D1

 2  K2  A2  B2  C2  D2

 3  K3  A3  B3  C3  D3

 '''

 # 多个键连接

 print(pd.merge(df3, df4, on=['key1', 'key2']))

 # 当两个DataFrame中的key1和key2都相同时，才会连，否则不连

 '''

   key1 key2   A   B   C   D

 0   K0   K0  A0  B0  C0  D0

 1   K0   K0  A0  B0  C1  D1

 2   K2   K0  A2  B2  C2  D2

 '''

 # 参数how  ， 合并方式

 # 默认，取交集

 print(pd.merge(df3, df4, on=['key1', 'key2'], how='inner'))

 print('-' * 8)

 '''

   key1 key2   A   B   C   D

 0   K0   K0  A0  B0  C0  D0

 1   K0   K0  A0  B0  C1  D1

 2   K2   K0  A2  B2  C2  D2

 --------

 '''

 # 取并集，outer，数据缺失范围NaN

 print(pd.merge(df3, df4, on=['key1', 'key2'], how='outer'))

 print('-' * 8)

 '''

   key1 key2    A    B    C    D

 0   K0   K0   A0   B0   C0   D0

 1   K0   K0   A0   B0   C1   D1

 2   K0   K1   A1   B1  NaN  NaN

 3   K2   K0   A2   B2   C2   D2

 4   K3   K1   A3   B3  NaN  NaN

 5   K3   K0  NaN  NaN   C3   D3

 --------

 '''

 # 参照df3为参考合并，数据缺失范围NaN

 print(pd.merge(df3, df4, on=['key1', 'key2'], how='left'))

 print('-' * 8)

 '''

   key1 key2   A   B    C    D

 0   K0   K0  A0  B0   C0   D0

 1   K0   K0  A0  B0   C1   D1

 2   K0   K1  A1  B1  NaN  NaN

 3   K2   K0  A2  B2   C2   D2

 4   K3   K1  A3  B3  NaN  NaN

 --------

 '''

 # 参照df4为参考合并，数据缺失范围NaN

 print(pd.merge(df3, df4, on=['key1', 'key2'], how='right'))

 print('-' * 8)

 '''

   key1 key2    A    B   C   D

 0   K0   K0   A0   B0  C0  D0

 1   K0   K0   A0   B0  C1  D1

 2   K2   K0   A2   B2  C2  D2

 3   K3   K0  NaN  NaN  C3  D3

 --------

 '''

 # 参数left_on，right_on，left_index, right_index  ,当键不为一个列时，可以单独设置左键与右键

 df5 = pd.DataFrame({'lkey': list('bbacaab'),

                     'data1': range(7)})

 df6 = pd.DataFrame({'rkey': list('abd'),

                     'date2': range(3)})

 print(df5)

 print(df6)

 print(pd.merge(df5,df6,left_on='lkey',right_on='rkey'))

 '''

   lkey  data1

 0    b      0

 1    b      1

 2    a      2

 3    c      3

 4    a      4

 5    a      5

 6    b      6

   rkey  date2

 0    a      0

 1    b      1

 2    d      2

   lkey  data1 rkey  date2

 0    b      0    b      1

 1    b      1    b      1

 2    b      6    b      1

 3    a      2    a      0

 4    a      4    a      0

 5    a      5    a      0

 '''

 # concat() 连接，默认axis=0  行+行，当axis=1时，列+列  成为Dataframe

 s1 = pd.Series([2, 3, 4])

 s2 = pd.Series([1, 2, 3])

 print(pd.concat([s1, s2]))

 '''

 0    2

 1    3

 2    4

 0    1

 1    2

 2    3

 dtype: int64

 '''

 print(pd.concat([s1,s2],axis=1))

 '''

    0  1

 0  2  1

 1  3  2

 2  4  3

 '''

 snew = pd.concat([s1, s2], axis=1)

 snew.reset_index(inplace=True)

 print(snew)

 '''

    index  0  1

 0      0  2  1

 1      1  3  2

 2      2  4  3

 '''

 snew2 = pd.concat([s1, s2], axis=1)

 snew2.reset_index(inplace=True, drop=True)

 print(snew2)

 '''

    0  1

 0  2  1

 1  3  2

 2  4  3

 '''

 # 去重  .duplicated()

 s3 = pd.Series([1, 2, 2, 4, 4, 6, 7, 6, 87])

 # 判断是否重复

 print(s3.duplicated())

 '''

 0    False

 1    False

 2     True

 3    False

 4     True

 5    False

 6    False

 7     True

 8    False

 dtype: bool

 '''

 # 取出重复的值

 s4 = s3[s3.duplicated()]

 print(s4)

 # 取出唯一的元素

 s5 = s3[s3.duplicated() == False]

 print(s5)

 '''

 0     1

 1     2

 3     4

 5     6

 6     7

 8    87

 dtype: int64

 '''

 s5 = s3.drop_duplicates()

 # 可以通过设置参数：inplace控制是否替换原先的值

 print(s5)

 '''

 0     1

 1     2

 3     4

 5     6

 6     7

 8    87

 dtype: int64

 '''

 df7 = pd.DataFrame({'key1':['a','a',3,4,3],

                     'key2':['a','a','b','b',5]})

 print(df7.duplicated())

 # 按行检测，第二次出现时，返回True

 '''

 0     1

 1     2

 3     4

 5     6

 6     7

 8    87

 dtype: int64

 '''

 # 今查看key2列

 print(df7['key2'].duplicated())

 '''

 0    False

 1     True

 2    False

 3     True

 4    False

 Name: key2, dtype: bool

 '''

 # 直接去重

 print(df7.drop_duplicates())

 '''

   key1 key2

 0    a    a

 2    3    b

 3    4    b

 4    3    5

 '''

 print(df7['key2'].drop_duplicates())

 '''

 0    a

 2    b

 4    5

 Name: key2, dtype: object

 '''

 # 替换  .replace()

 s6 = pd.Series(list('askjdghs'))

 # 一次性替换一个值

 # print(s6.replace('s','dsd'))

 '''

 0      a

 1    dsd

 2      k

 3      j

 4      d

 5      g

 6      h

 7    dsd

 dtype: object

 '''

 # 一次性替换多个值

 print(s6.replace(['a','s'],np.nan))

 '''

 0    NaN

 1    NaN

 2      k

 3      j

 4      d

 5      g

 6      h

 7    NaN

 dtype: object

 '''

 # 通过字典的形式替换值

 print(s6.replace({'a':np.nan}))

 '''

 0    NaN

 1      s

 2      k

 3      j

 4      d

 5      g

 6      h

 7      s

 dtype: object

 '''

pandas的合并、连接、去重、替换的更多相关文章

04. Pandas 3| 数值计算与统计、合并连接去重分组透视表文件读取
1.数值计算和统计基础常用数学.统计方法数值计算和统计基础基本参数:axis.skipna df.mean(axis=1,skipna=False) -->> axis=1是按行来 ...
Pandas | 19 合并/连接
Pandas具有功能全面的高性能内存中连接操作,与SQL等关系数据库非常相似.Pandas提供了一个单独的merge()函数,作为DataFrame对象之间所有标准数据库连接操作的入口 - pd.me ...
SQL连接操作符介绍（循环嵌套, 哈希匹配和合并连接）
今天我将介绍在SQLServer 中的三种连接操作符类型,分别是:循环嵌套.哈希匹配和合并连接.主要对这三种连接的不同.复杂度用范例的形式一一介绍. 本文中使用了示例数据库AdventureWorks ...
排序合并连接(sort merge join)的原理
排序合并连接(sort merge join)的原理排序合并连接(sort merge join)的原理排序合并连接(sort merge join) 访问次数:两张表都只会访 ...
oracle表连接------>排序合并连接(Merge Sort Join)
排序合并连接 (Sort Merge Join)是一种两个表在做连接时用排序操作(Sort)和合并操作(Merge)来得到连接结果集的连接方法. 对于排序合并连接的优缺点及适用场景例如以下: a,通常 ...
pandas列合并为一行
将dataframe利用pandas列合并为一行,类似于sql的GROUP_CONCAT函数.例如如下dataframe id_part pred pred_class v_id 0 d 0 0.12 ...
JS 两个对象数组合并并去重
JS两个对象数组合并并去重 <!DOCTYPE html> <html> <head> <meta charset="utf-8"> ...
PHP数组合并和去重的函数有哪些
PHP数组合并和去重的函数有哪些一.总结一句话总结:合并:array_merge() array_merge_recursive() +号:去重:array_flip() array_unique ...
oracle 表连接 - sort merge joins 排序合并连接
https://blog.csdn.net/dataminer_2007/article/details/41907581一. sort merge joins连接(排序合并连接) 原理指的是两个表 ...
arcgis中的Join(合并连接)和Relate(关联连接)
arcgis中的Join(合并连接)和Relate(关联连接) 一.区别 1.连接关系不一样. Relate(关联连接)方式连接的两个表之间的记录可以是“一对一”.“多对一”.“一对多”的关系 Joi ...

随机推荐

Tensor Operation
Main operation categories that encompass the operations of tensors. Reshaping operations Element-wis ...
ssh 公钥登录远程主机并禁止密码登录
https://www.digitalocean.com/community/tutorials/how-to-set-up-ssh-keys-on-centos7 如果在新的机器上,得先用密码登录一 ...
《windows核心编程系列》七谈谈用户模式下的线程同步
用户模式下的线程同步系统中的线程必须访问系统资源,如堆.串口.文件.窗口以及其他资源.如果一个线程独占了对某个资源的访问,其他线程就无法完成工作.我们也必须限制线程在任何时刻都能访问任何资源.比如在 ...
BZOJ2159 Crash的文明世界
Description 传送门给你一个n个点的树,边权为1. 对于每个点u, 求:$\sum_{i = 1}^{n} distance(u, i)^{k}$ $ n \leq 50000, k ...
贪心/二分查找 BestCoder Round #43 1002 pog loves szh II
题目传送门 /* 贪心/二分查找:首先对ai%=p,然后sort,这样的话就有序能使用二分查找.贪心的思想是每次找到一个aj使得和为p-1(如果有的话) 当然有可能两个数和超过p,那么an的值最优,每 ...
题解报告：hdu 1114 Piggy-Bank（完全背包恰好装满）
Problem Description Before ACM can do anything, a budget must be prepared and the necessary financia ...
203 Remove Linked List Elements 删除链表中的元素
删除链表中等于给定值 val 的所有元素.示例给定: 1 --> 2 --> 6 --> 3 --> 4 --> 5 --> 6, val = 6返回: 1 --& ...
置换测试: Mock, Stub 和其他
简介在理想情况下,你所做的所有测试都是能应对你实际代码的高级测试.例如,UI 测试将模拟实际的用户输入(Klaas 在他的文章中有讨论)等等.实但际上,这并非永远都是个好主意.为每个测试用例都访问一 ...
AJPFX关于多态的应用
要求设计一个方法,要求此方法可以接受A类的任意子类对象,并调用方法,此时,如果不使用对象多态性,那代码肯定会类似如下 class A{ // 定义类A publi ...
学习笔记第十章使用CSS美化表单
第10章使用CSS美化表单 [学习重点] 正确使用各种表单控件熟悉HTML5新增的表单控件掌握表单属性的设置设计易用性表单页面 10.1 表单的基本结构表单包含多个标签,由很多控件组成 ...

pandas的合并、连接、去重、替换

pandas的合并、连接、去重、替换的更多相关文章

随机推荐

热门专题