数据分析之--Mataplotlib入门

Mataplotlib

Mataplotlib

绘制统计图形
读取图片保存图片以及图片显示

Seaborn

1.辅助的库，可以被pyplot控制
2.辅助绘制更多的图形，更加好看，功能更加强大
3.添加了调色板
4.set_style:white(默认),dark,darkgrid,ticks
5.color_palette():调色板
6.palplot():显示显色

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

sns.set_style('darkgrid')

plt.plot()   # 画布

# 每组颜色都是RGB

pink_r = sns.color_palette(palette='pink_r',n_colors=7)

sns.palplot(pink_r)

绘制线性图

需要X，Y

# 全闭

x = np.linspace(0，14，100)

for i in range(1,7):

	y = np.sin(x+i*0.5)*(7-i*np.pi)

	plt.plot(x,y)

h = np.array([170,171,169,181,190,162,170])

plt.plot(h)

图片的标题

# 图片放大 放在上面

plt.figure(figsize=(10,6))

x = np.linspace(-10,10,100)

y = np.sin(x)

z = np.cos(x)

# plt.plot 会被覆盖

plt.plot(x,y,lable='sin')	# lable 标签

plt.plot(x,z,lable='cos')

# 图片的标题

# fontsize 代表字体的颜色

# color  字体的颜色

# alpha  透明度

# rotation   字体旋转的角度

plt.title('sin&cos',fontsize=20,color='red',alpha=0.5,rotation=360)

# 设置X轴的标题

plt.xlabel('X=range(-10,10)',fontsize=20,color='orage',alpha=0.5)

# 设置Y轴的标题

plt.ylabel('f(x)=sin(x)&f(x)=cos(x)',fontsize=20,color='orange',alpha=0.5,rotation=90)

#控制X轴的范围

plt.xlim([-10,10])

#控制Y轴的范围

plt.ylim(-1,1)

# 图例

# loc是图例的位置  nloc控制显示的列数

plt.legend(loc=[0,0.1],nloc=1)

点和线的样式

#图片放大

plt.figure(figsize=(10,6))

x = np.linspace(-10,10,100)

y = np.sin(x)

z = np.cos(x)

#线的样式 linestyle  ls

#--  虚线

#-   实线

#:   虚线

#线的宽度 linewidth lw

#线的颜色  color c

#蓝色 b   绿色  g   红色 r   黄色  y   黑色 k

#青色 c   洋红色 m   白色  w

#颜色支持rgb

#点 marker

#o 小圆点   s 放块   d 菱形    x 叉 

#markersize  点的大小

plt.plot(x,y,label='sin',linestyle='--',lw=1,c='#FF0000',marker='o',markersize=10)

plt.plot(x,z,label='cos',ls=':',c=(0,1,0))

#图例

#loc是图例的位置

#ncol是图例显示的列数

plt.legend(loc=[0,1],ncol=2)

X和Y轴可读的映射

matplotlib支持lataX的语法

x = np.linspace(-np.pi,np.pi,100)

y1 = np.sin(x)

y2 = np.cos(x)

#画布对象实例化

#参数有三个 : 行  列  编号(不能从0开始,并且不能重复)

axes = plt.subplot(1,1,1)

#把x,y1花在axes画布中

axes.plot(x,y1,label='sin')

axes.plot(x,y2,label='cos')

axes.legend()

axes.set_title('A')

axes.set_xlabel('X')

axes.set_ylabel('Y')

pi = np.pi

#映射

#设置x轴的标记,协商标记数轴值

axes.set_xticks([-pi,-pi/2,0,pi/2,pi])

axes.set_xticklabels(['$-\pi$','$-\pi/2$',0,'$\pi/2$','$\pi$'],fontsize=20)

axes.set_yticks([-1,0,1])

axes.set_yticklabels(['min',0,'max'],fontsize=20,rotation=20,color='orange')

直方图

1.统计元素出现的次数
2.可以描述分部的状态

a = np.array(list('abcdabcdaa'))

plt.hist(a,bins=20,color='r')

柱状图

from sklearn.datasets import load_iris

data=load_iris().data

target = load_iris().target.reshape(-1,1)

# 合并二维数据  feature_names:特征的名称

iris=pd.DataFrame(np.concatenate([data,target],axis=1),columns=load_iris().feature_names+['labels'])

# 转变数据类型为整型

iris.labels=iris.labels.astype('int8')

iris.labels.map({0:'A',1:'B',2:'C'})

b = np.array([0,1,2,3,0,1,2,3,0,0])

#sns不支持str类型的统计

#sns绘制的图一定是带有密度图的

sns.distplot(b,bins=20,color='r')

# 方式二

c = np.array(['A','B','C'])

iris.labels=c[iris.labels]

iris.columns

Out[.]:Index(['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',

       'petal width (cm)', 'labels'],

      dtype='object')

#一列代表一个柱子,表示的是最大值的范围

sns.barplot(data=iris)

#load_dataset 加载数据集  铁达尼

titanic =  pd.read_csv('./titanic.csv')

#x,y每一列代表什么意思

#hue条件

sns.barplot(x='Sex',y='Survived',hue='Pclass',data=tita

nic)

条件性柱状图

# load_dataset 加载数据集  泰坦尼克号

titanic = pd.read_csv('./titanic.csv')

# x,y代表每列代表什么意思

sns.barplot(x='Sex',y='Survived',data=titanic)

饼图

表示的是一个比例

titanic.head()

male=titanic.query("Sex=='male'").shape[0]

male_d = titanic.query("Sex=='male' & Survived==0").shape[0]

female=titanic.query("Sex=='female'").shape[0]

female_d=titanic.query("Sex=='female' & Survived==0").shape[0]

sns.palplot(sns.color_palette('hls',2)),sns.palplot(sns.color_palette('rainbow',2)),sns.palplot(sns.color_palette('cool',2))

plt.figure(figsize=(8*3,8))

#绘制饼图

#x : 数据部分

#lables:标签部分

#autopct:显示比例  (%%代表百分号)

#explode:分离度

x = np.array([male,female])

labels=['male','female']

axes1 = plt.subplot(1,3,1)

axes1.pie(x,labels=labels,autopct="%.2f%%",colors=sns.color_palette('hls',2))

axes1.axis('image')

axes1.legend(['male=%s'%(male),'female=%s'%(female)],loc=[0,1])

#男性死亡比例

x1 = np.array([male-male_d,male_d])

labels=['L','D']

axes2 = plt.subplot(1,3,2)

explode=[.1,0]

axes2.pie(x1,labels=labels,autopct="%.2f%%",explode=explode,colors=sns.color_palette('rainbow',2))

axes2.axis('image')

axes2.legend(['L=%s'%(male-male_d),'D=%s'%(male_d)],loc=[0,1])

#女性死亡比例

x2 = np.array([female-female_d,female_d])

labels=['L','D']

axes3 = plt.subplot(1,3,3)

explode=[.1,0]

axes3.pie(x2,labels=labels,autopct="%.2f%%",explode=explode,colors=sns.color_palette('cool',2))

axes3.axis('image')

axes3.legend(['L=%s'%(female-female_d),'D=%s'%(female_d)],loc=[0,1])

#保存图片

#fname:文件存储的路径

#dpi:像素密集度

#facecolor:背景色

plt.savefig('./pie.png',dpi=100)

箱图

类别型的离散值没有必要绘制箱图
查看范围,查看异常值

titanic.info()

titanic.head()

titanic.loc[:,'Parch'].unique()

Out[.]:array([0, 1, 2, 5, 3, 4, 6])

#筛选所有的数值列

#include=None,

#exclude

titanic.select_dtypes(exclude=['object']).columns

Out[.]:Index(['PassengerId', 'Survived', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare'], dtype='object')

titanic.Age.plot(kind='box')

sns.boxplot(data=titanic.Age)

散步图

观察量与量之间的分布关系的
二维的图形，一列代表x,一列代表y
c参数是class的意思，只支持数值类型

iris.labels = iris.labels.astype('category').cat.codes

# 散步图使用多的场景：机器学习中的分类

plt.scatter(iris.iloc[:50,0],iris.iloc[:50,1],marker='o',color='r')

plt.scatter(iris.iloc[50:100,0],iris.iloc[50:100,1],marker='d',color='y')

plt.scatter(iris.iloc[100:,0],iris.iloc[100:,1],marker='x',color='g')

plt.scatter(iris.iloc[:,2],iris.iloc[:,3],c=iris.labels,cmap='gray')

散步密度图

直方图

iris.columns

sns.jointplot(x='sepal length (cm)',y='sepal width (cm)',data=iris)

#"scatter" 散布| "reg" 回归| "resid" 普通| "kde" 密度| "hex" 蜂巢

sns.jointplot(x='sepal length (cm)',y='sepal width (cm)',data=iris,kind='hex')

回归散布图

sns.regplot(x='sepal length (cm)',y='sepal width (cm)',data=iris)

#点到线之间的平均距离最短  方程

sns.regplot(x='petal length (cm)',y='petal width (cm)',data=iris)

线性分类散步图

sns.lmplot(x='petal length (cm)',y='petal width (cm)',data=iris,hue='labels',markers=['o','d','x'])

散布图矩阵

检查量于量之间相关性
要求hue的值是str

iris.labels = iris.labels.replace({0:'A',1:'B',2:'C'},)

sns.pairplot(iris,hue='labels', diag_kind='kde')

#第二列中,3个类别值交集太大,不容进行分类

sns.boxplot(data=iris)

#第二列有异常值

#相关系数为1代表两列完全一致   -1负相关

#相关系数为0代表两列完全不一样

plt.scatter(iris.iloc[:,0],iris.iloc[:,0])

3D图

# 3d散步图

from mpl_toolkits.mplot3d.axes3d import Axes3D

#3D散布图

#画布

plt.figure(figsize=(16,12))

axes3d=plt.subplot(projection='3d')

target = iris.labels.astype('category').cat.codes

axes3d.scatter3D(iris.iloc[:,0],iris.iloc[:,2],iris.iloc[:,3],c=target,cmap='rainbow',s=50)

# 3d平面图  plot()

x = np.linspace(0,100,100)

y = np.sin(x)

# z 必须和x,y有关联

#3D平面图  plot()

x = np.linspace(-10,10,100)

y = np.linspace(-10,10,100)

#z 必须和x,y有关联

xx,yy=np.meshgrid(x,y)

#e^?

z = xx**3+yy**3

fig = plt.figure(figsize=(16,10))

axes3d = Axes3D(fig)

#超平面  注意x,y,z都必须是二维的数组

pic=axes3d.plot_surface(xx,yy,z,cmap='rainbow')

#色柱

plt.colorbar(pic,shrink=0.8)

Excel数据导入数据库

import pandas as pd

from sqlalchemy import create_engine

conn = create_engine("mysql+pymysql://hal:123456@localhost:3306/demo")

#读取csv

userinfo = pd.read_csv('./user_info_utf.csv',header=None,names=['userid','sex','birth'])

userinfo.info()

#name  导入的表名

#index=False 不要行号

#if_exists='append' 表存在则添加

#GRANT ALL PRIVILEGES ON *.* TO 'hal'@'%' IDENTIFIED BY '123456' WITH GRANT OPTION;

#MySQL中text类型只能创建FullText key  , 不能创建 Index

userinfo.iloc[:50000].to_sql('user_info',conn,index=False,if_exists='fail')

userinfo.to_sql('user_info',conn,index=False,if_exists='append')

数据分析之--Mataplotlib入门的更多相关文章

向大家介绍我的新书：《基于股票大数据分析的Python入门实战》
我在公司里做了一段时间Python数据分析和机器学习的工作后,就尝试着写一本Python数据分析方面的书.正好去年有段时间股票题材比较火,就在清华出版社夏老师指导下构思了这本书.在这段特殊时期内,夏老 ...
基于股票大数据分析的Python入门实战（视频教学版）的精彩插图汇总
在我写的这本书,<基于股票大数据分析的Python入门实战(视频教学版)>里,用能吸引人的股票案例,带领大家入门Python的语法,数据分析和机器学习. 京东链接是这个:https://i ...
利用python进行数据分析之pandas入门
转自https://zhuanlan.zhihu.com/p/26100976 目录: 5.1 pandas 的数据结构介绍5.1.1 Series5.1.2 DataFrame5.1.3索引对象5. ...
Python数据分析之pandas入门
一.pandas库简介 pandas是一个专门用于数据分析的开源Python库,目前很多使用Python分析数据的专业人员都将pandas作为基础工具来使用.pandas是以Numpy作为基础来设计开 ...
[学习笔记] [数据分析] 01.Python入门
1.安装Python与环境配置 ① ② 安装pip以及利用pip安装Python库 2.Anaconda安装 conda list 要在root环境下 3.常用数据分析库 ① Numpy 安装:con ...
pyhton中pandas数据分析模块快速入门（非常容易懂）
//2019.07.16python中pandas模块应用1.pandas是python进行数据分析的数据分析库,它提供了对于大量数据进行分析的函数库和各种方法,它的官网是http://pandas. ...
数据分析之pandas入门
一.数据结构 1. Series 1.1 序列构造和调用 Series是一种类似于一维数组的对象,它由一组数据和索引共同组成,可以通过索引的方式来选取Series中的单个或一组值,常用的构造函数为ob ...
数据分析-pandas基础入门（一）
最近在学习python,所以了解了一下Pandas,Pandas是基于NumPy的一个开源Python库,它被广泛用于快速分析数据,以及数据清洗和准备等工作. 首先是安装numpy以及pandas, ...
3个月零基础入门Python+数据分析，详细时间表+计划表分享
大家好,我是白云. 今天想给大家分享的是三个月零基础入门数据分析学习计划.有小伙伴可能会说,英语好像有点不太好,要怎么办?所以今天我给大家分享的资源呢就是对国内的小伙伴很友好,还附赠大家一份三个月学 ...

随机推荐

python 使用PyKDL 四元数转欧拉角
安装: sudo apt-get install ros-indigo-kdl-parser-py 使用: import PyKDLimport math def quat_to_angle(quat ...
Git git rm和git rm --cached
git rm 和 git rm --cached 的区别 git rm file git commit -m "xxx" git push origin master 删除本地及仓 ...
解决GitHub上传大于100M文件失败
目录问题解决参考问题 push的时候遇到以下问题: remote: error: GH001: Large files detected. You may want to try Git La ...
用Fiddler模拟低速网络环境（弱网）
原文链接:http://caibaojian.com/fiddler.html 有时候宽频网路用习惯了… 在开发的过程就比较少去考虑最佳化的问题… 但当有人反应说「你的网页好慢」甚至当网路速度慢,会 ...
Java-编程规范与代码风格
阿里巴巴 Java 开发手册 https://github.com/alibaba/p3c https://developer.aliyun.com/special/tech-java 唯品会规范 J ...
CPU | 物理 CPU vs 逻辑 CPU vs 核心 vs 线程 vs Socket
当我们试着通过 Linux 命令 nproc 和 lscpu 了解一台计算机 CPU 级的架构和性能时,我们总会发现无法正确地理解相应的结果,因为我们会被好几个术语搞混淆:物理 CPU.逻辑 CPU. ...
DisplayAttribute应用——根据PropertyName自动获取对应的UI显示名
model定义,使用DisplayAttribute public class AddressSetInfo { /// <summary> /// invoiceAddress.Id / ...
使用wsimport生成webservice客户端代码
服务端 package com.xc.webservice; import javax.jws.WebService; import javax.xml.ws.Endpoint; @WebServic ...
MySQL函数使用
1.mysql开启函数功能 MySQL函数不能创建的解决方法在使用MySQL数据库时,有时会遇到mysql函数不能创建的情况. 出错信息大致类似: ERROR 1418 (HY000): This ...
Spring Boot中mybatis insert 如何获得自增id
https://www.cnblogs.com/quan-coder/p/8728410.html 注意要显式设置主键,通过: @Options(useGeneratedKeys = true, ke ...

数据分析之--Mataplotlib入门

Mataplotlib

Seaborn

绘制线性图

图片的标题

点和线的样式

X和Y轴可读的映射

直方图

柱状图

条件性柱状图

饼图

箱图

散步图

散步密度图

回归散布图

线性分类散步图

散布图矩阵

3D图

Excel数据导入数据库

数据分析之--Mataplotlib入门的更多相关文章

随机推荐

热门专题