Univariate plotting with pandas

import pandas as pd

reviews = pd.read_csv("../input/wine-reviews/winemag-data_first150k.csv", index_col=)

reviews.head()

//bar

reviews['province'].value_counts().head().plot.bar()

(reviews['province'].value_counts().head() / len(reviews)).plot.bar()

reviews['points'].value_counts().sort_index().plot.bar()

//line chart

reviews['points'].value_counts().sort_index().plot.line()

//area chart

reviews['points'].value_counts().sort_index().plot.area()

//histograms

reviews[reviews['price'] < ]['price'].plot.hist()

reviews['price'].plot.hist()

reviews[reviews['price'] > ]

//pie chart

reviews['province'].value_counts().head().plot.pie()

Bivariate plotting with pandas

import pandas as pd

reviews = pd.read_csv("../input/wine-reviews/winemag-data_first150k.csv", index_col=0)

reviews.head()

//Scatter plot

reviews[reviews['price'] < 100].sample(100).plot.scatter(x='price', y='points')

//hexplot 数据相关性

reviews[reviews['price'] < 100].plot.hexbin(x='price', y='points', gridsize=15)

//stackplot 数据堆叠

wine_counts.plot.bar(stacked=True)

wine_counts.plot.area()

//Bivariate line chart 线集成

wine_counts.plot.line()

Plotting with seaborn

import pandas as pd

reviews = pd.read_csv("../input/wine-reviews/winemag-data_first150k.csv", index_col=0)

import seaborn as sns

//Countplot

sns.countplot(reviews['points'])

//KDE Plot 平滑去噪

sns.kdeplot(reviews.query('price < 200').price)

//对比线图

reviews[reviews['price'] < 200]['price'].value_counts().sort_index().plot.line()

//二维ked

sns.kdeplot(reviews[reviews['price'] < 200].loc[:, ['price', 'points']].dropna().sample(5000))

//Distplot

sns.distplot(reviews['points'], bins=10, kde=False)

//jointplot

sns.jointplot(x='price', y='points', data=reviews[reviews['price'] < 100])

sns.jointplot(x='price', y='points', data=reviews[reviews['price'] < 100], kind='hex', gridsize=20)

//Boxplot and violin plot   25%-75%，中线

df = reviews[reviews.variety.isin(reviews.variety.value_counts().head(5).index)]

sns.boxplot(

    x='variety',

    y='points',

    data=df

)

Faceting with seaborn

import pandas as pd

pd.set_option('max_columns', None)

df = pd.read_csv("../input/fifa-18-demo-player-dataset/CompleteDataset.csv", index_col=0)

import re

import numpy as np

import seaborn as sns

footballers = df.copy()

footballers['Unit'] = df['Value'].str[-1]

footballers['Value (M)'] = np.where(footballers['Unit'] == '', 0,

                                    footballers['Value'].str[1:-1].replace(r'[a-zA-Z]',''))

footballers['Value (M)'] = footballers['Value (M)'].astype(float)

footballers['Value (M)'] = np.where(footballers['Unit'] == 'M',

                                    footballers['Value (M)'],

                                    footballers['Value (M)']/1000)

footballers = footballers.assign(Value=footballers['Value (M)'],

                                 Position=footballers['Preferred Positions'].str.split().str[0])

//The FacetGrid

df = footballers[footballers['Position'].isin(['ST', 'GK'])]

g = sns.FacetGrid(df, col="Position")

g.map(sns.kdeplot, "Overall")

df = footballers

g = sns.FacetGrid(df, col="Position", col_wrap=6)//，每行6列

g.map(sns.kdeplot, "Overall")

df = footballers[footballers['Position'].isin(['ST', 'GK'])]

df = df[df['Club'].isin(['Real Madrid CF', 'FC Barcelona', 'Atlético Madrid'])]

g = sns.FacetGrid(df, row="Position", col="Club",

                  row_order=['GK', 'ST'],

                  col_order=['Atlético Madrid', 'FC Barcelona', 'Real Madrid CF'])

g.map(sns.violinplot, "Overall") //violin图

//Pairplot 数据分析第一步

sns.pairplot(footballers[['Overall', 'Potential', 'Value']])

Multivariate plotting

import pandas as pd

pd.set_option('max_columns', None)

df = pd.read_csv("../input/fifa-18-demo-player-dataset/CompleteDataset.csv", index_col=0)

import re

import numpy as np

footballers = df.copy()

footballers['Unit'] = df['Value'].str[-1]

footballers['Value (M)'] = np.where(footballers['Unit'] == '', 0,

                                    footballers['Value'].str[1:-1].replace(r'[a-zA-Z]',''))

footballers['Value (M)'] = footballers['Value (M)'].astype(float)

footballers['Value (M)'] = np.where(footballers['Unit'] == 'M',

                                    footballers['Value (M)'],

                                    footballers['Value (M)']/1000)

footballers = footballers.assign(Value=footballers['Value (M)'],

                                 Position=footballers['Preferred Positions'].str.split().str[0])

//Multivariate scatter plots

import seaborn as sns

sns.lmplot(x='Value', y='Overall', hue='Position',

           data=footballers.loc[footballers['Position'].isin(['ST', 'RW', 'LW'])],

           fit_reg=False)

sns.lmplot(x='Value', y='Overall', markers=['o', 'x', '*'], hue='Position',

           data=footballers.loc[footballers['Position'].isin(['ST', 'RW', 'LW'])],

           fit_reg=False

          )

//Grouped box plot 分组的优势

f = (footballers

         .loc[footballers['Position'].isin(['ST', 'GK'])]

         .loc[:, ['Value', 'Overall', 'Aggression', 'Position']]

    )

f = f[f["Overall"] >= 80]

f = f[f["Overall"] < 85]

f['Aggression'] = f['Aggression'].astype(float)

sns.boxplot(x="Overall", y="Aggression", hue='Position', data=f)

//Heatmap

f = (

    footballers.loc[:, ['Acceleration', 'Aggression', 'Agility', 'Balance', 'Ball control']]

        .applymap(lambda v: int(v) if str.isdecimal(v) else np.nan)

        .dropna()

).corr()

sns.heatmap(f, annot=True)

//Parallel Coordinates

from pandas.plotting import parallel_coordinates

f = (

    footballers.iloc[:, 12:17]

        .loc[footballers['Position'].isin(['ST', 'GK'])]

        .applymap(lambda v: int(v) if str.isdecimal(v) else np.nan)

        .dropna()

)

f['Position'] = footballers['Position']

f = f.sample(200)

parallel_coordinates(f, 'Position')

plotly

import pandas as pd

reviews = pd.read_csv("../input/wine-reviews/winemag-data-130k-v2.csv", index_col=0)

reviews.head()

from plotly.offline import init_notebook_mode, iplot

init_notebook_mode(connected=True)  #离线注入笔记本模式

import plotly.graph_objs as go

iplot([go.Scatter(x=reviews.head(1000)['points'], y=reviews.head(1000)['price'], mode='markers')])

iplot([go.Histogram2dContour(x=reviews.head(500)['points'],

                             y=reviews.head(500)['price'],

                             contours=go.Contours(coloring='heatmap')),

       go.Scatter(x=reviews.head(1000)['points'], y=reviews.head(1000)['price'], mode='markers')])

#surface图

df = reviews.assign(n=0).groupby(['points', 'price'])['n'].count().reset_index()  #先point分组再price分，再添加的‘n’列上执行计数，最后对首列的index重新排序

df = df[df["price"] < 100]

v = df.pivot(index='price', columns='points', values='n').fillna(0).values.tolist() #重塑数组后用0填充NAN值，再把values列变成list

iplot([go.Surface(z=v)])

#地理图

df = reviews['country'].replace("US", "United States").value_counts()

iplot([go.Choropleth(

    locationmode='country names',

    locations=df.index.values,

    text=df.index,

    z=df.values

)])

Data Visualisation Cheet Sheet的更多相关文章

Object对象方法 cheet sheet
defineProperty create Object.create(prototype [, propertiesObject ]) prototype:没什么可说的,指定对象的原型 proper ...
使用Python对Twitter进行数据挖掘(Mining Twitter Data with Python)
目录 1.Collecting data 1.1 Register Your App 1.2 Accessing the Data 1.3 Streaming 2.Text Pre-processin ...
学习笔记之Data Visualization
Data visualization - Wikipedia https://en.wikipedia.org/wiki/Data_visualization Data visualization o ...
Mining Twitter Data with Python
目录 1.Collecting data 1.1 Register Your App 1.2 Accessing the Data 1.3 Streaming 2.Text Pre-processin ...
Import Data from *.xlsx file to DB Table through OAF page(转)
Use Poi.jar Import Data from *.xlsx file to DB Table through OAF page Use Jxl.jar Import Data from ...
【翻译】Awesome R资源大全中文版来了，全球最火的R工具包一网打尽，超过300+工具，还在等什么？
0.前言虽然很早就知道R被微软收购,也很早知道R在统计分析处理方面很强大,开始一直没有行动过...直到直到12月初在微软技术大会,看到我软的工程师演示R的使用,我就震惊了,然后最近在网上到处了解和 ...
R统计分析处理
[翻译]Awesome R资源大全中文版来了,全球最火的R工具包一网打尽,超过300+工具,还在等什么? 阅读目录 0.前言 1.集成开发环境 2.语法 3.数据操作 4.图形显示 5.HTML部件 ...
Sed&awk笔记之awk篇
http://blog.csdn.net/a81895898/article/details/8482333 Awk是什么 Awk.sed与grep,俗称Linux下的三剑客,它们之间有很多相似点,但 ...
R工具包一网打尽
这里有很多非常不错的R包和工具. 该想法来自于awesome-machine-learning. 这里是包的导航清单,看起来更方便 >>>导航清单通过这些翻译了解这些工具包,以后干 ...

随机推荐

2019牛客暑期多校赛（第三场）B-求01串中的最长01数量相等的子串和子序列
https://ac.nowcoder.com/acm/contest/883/B 首先先把0所在的位置变-1,1所在位置变1,然后统计一个前缀和,用sum[i]表示. 那么如果从起点开始的话只要满足 ...
牛客NOIP暑期七天营-TG3 赛后题解
目录牛客NOIP暑期七天营-提高组3 A-破碎的矩阵题目描述 link 题解代码 B-点与面题目描述 link 题解代码 C-信息传递题目描述 link 题解牛客NOIP暑期七天营-提高 ...
html常用标签详解2-图片标签详解
<img /> 1.图片标签的属性图片标签属于行内块元素,它自身的属性有一下几个,听我娓娓道来: src:图片资源的路径(resourse),可以使绝对路径,也可以是相对路径绝对路径: ...
常见的5个runtime exception
NullPointException(空指针异常),ArrIndexOutOfBoundsException(数组越界异常),ClassCastException(类型转换异常),ClassNotFo ...
leyou_05_文件上传
1.搭建一个新的微服务Ly-upload用来上传文件 2.导入文件上传到额依赖 <dependencies> <dependency> <groupId>org.s ...
计算机程序是怎么通过cpu，内存，硬盘运行起来的？
虽然以前知道计算机里有CPU,内存,硬盘,显卡这么些东西,我还真不知道这些东西是怎么协作起来完成一段程序的,能写出程序却不懂程序,也不会向别人解释他们的关系,所以特意总结了一下,写的比较浅显,和我一样 ...
封装原生JavaScript的ajax
function obj2str(data) { data = data || {}; // 如果没有传参, 为了添加随机因子,必须自己创建一个对象 data.t = new Date().getTi ...
IO流19（完） --- RandomAccessFile实现数据的插入 --- 技术搬运工（尚硅谷）
原hello.txt文件中的内容:abcdefghijklmn 想要实现的效果是,将xyz插入到abc后面,将文件内容变成:abcxyzdefghijklmn @Test public void te ...
ssm整合：搭建环境
解决配置中文过滤器后,存入数据库时依旧乱码问题:在web.xml中修改数据库url如下: <property name="jdbcUrl" value="jdbc: ...
专访阿里云资深技术专家黄省江：中国SaaS公司的成功之路
笔者采访中国SaaS厂商10多年,深感面对获客成本巨大.产品技术与功能成熟度不足.项目经营模式难以大规模复制.客户观念有待转变等诸多挑战,很多中国SaaS公司的经营状况都不容乐观. 7月26日,阿里云 ...

Data Visualisation Cheet Sheet

Univariate plotting with pandas

Bivariate plotting with pandas

Plotting with seaborn

Faceting with seaborn

Multivariate plotting

plotly

Data Visualisation Cheet Sheet的更多相关文章

随机推荐

热门专题