吴裕雄--天生自然 R语言开发学习：基本统计分析（续三）

#---------------------------------------------------------------------#

# R in Action (2nd ed): Chapter 7                                     #

# Basic statistics                                                    #

# requires packages npmc, ggm, gmodels, vcd, Hmisc,                   #

#                   pastecs, psych, doBy to be installed              #

# install.packages(c("ggm", "gmodels", "vcd", "Hmisc",                #

#                    "pastecs", "psych", "doBy"))                     #

#---------------------------------------------------------------------#

mt <- mtcars[c("mpg", "hp", "wt", "am")]

head(mt)

# Listing 7.1 - Descriptive stats via summary

mt <- mtcars[c("mpg", "hp", "wt", "am")]

summary(mt)

# Listing 7.2 - descriptive stats via sapply

mystats <- function(x, na.omit=FALSE){

  if (na.omit)

    x <- x[!is.na(x)]

  m <- mean(x)

  n <- length(x)

  s <- sd(x)

  skew <- sum((x-m)^3/s^3)/n

  kurt <- sum((x-m)^4/s^4)/n - 3

  return(c(n=n, mean=m, stdev=s, skew=skew, kurtosis=kurt))

}

myvars <- c("mpg", "hp", "wt")

sapply(mtcars[myvars], mystats)

# Listing 7.3 - Descriptive stats via describe (Hmisc)

library(Hmisc)

myvars <- c("mpg", "hp", "wt")

describe(mtcars[myvars])

# Listing 7.,4 - Descriptive stats via stat.desc (pastecs)

library(pastecs)

myvars <- c("mpg", "hp", "wt")

stat.desc(mtcars[myvars])

# Listing 7.5 - Descriptive stats via describe (psych)

library(psych)

myvars <- c("mpg", "hp", "wt")

describe(mtcars[myvars])

# Listing 7.6 - Descriptive stats by group with aggregate

myvars <- c("mpg", "hp", "wt")

aggregate(mtcars[myvars], by=list(am=mtcars$am), mean)

aggregate(mtcars[myvars], by=list(am=mtcars$am), sd)

# Listing 7.7 - Descriptive stats by group via by

dstats <- function(x)sapply(x, mystats)

myvars <- c("mpg", "hp", "wt")

by(mtcars[myvars], mtcars$am, dstats)

# Listing 7.8 - Descriptive stats by group via summaryBy

library(doBy)

summaryBy(mpg+hp+wt~am, data=mtcars, FUN=mystats)

# Listing 7.9 - Descriptive stats by group via describe.by (psych)

library(psych)

myvars <- c("mpg", "hp", "wt")

describeBy(mtcars[myvars], list(am=mtcars$am))

# summary statistics by group via the reshape package

library(reshape)

dstats <- function(x)(c(n=length(x), mean=mean(x), sd=sd(x)))

dfm <- melt(mtcars, measure.vars=c("mpg", "hp", "wt"),

            id.vars=c("am", "cyl"))

cast(dfm, am + cyl + variable ~ ., dstats)

# frequency tables

library(vcd)

head(Arthritis)

# one way table

mytable <- with(Arthritis, table(Improved))

mytable  # frequencies

prop.table(mytable) # proportions

prop.table(mytable)*100 # percentages

# two way table

mytable <- xtabs(~ Treatment+Improved, data=Arthritis)

mytable # frequencies

margin.table(mytable,1) #row sums

margin.table(mytable, 2) # column sums

prop.table(mytable) # cell proportions

prop.table(mytable, 1) # row proportions

prop.table(mytable, 2) # column proportions

addmargins(mytable) # add row and column sums to table

# more complex tables

addmargins(prop.table(mytable))

addmargins(prop.table(mytable, 1), 2)

addmargins(prop.table(mytable, 2), 1)

# Listing 7.10 - Two way table using CrossTable

library(gmodels)

CrossTable(Arthritis$Treatment, Arthritis$Improved)

# Listing 7.11 - Three way table

mytable <- xtabs(~ Treatment+Sex+Improved, data=Arthritis)

mytable

ftable(mytable)

margin.table(mytable, 1)

margin.table(mytable, 2)

margin.table(mytable, 2)

margin.table(mytable, c(1,3))

ftable(prop.table(mytable, c(1,2)))

ftable(addmargins(prop.table(mytable, c(1, 2)), 3))

# Listing 7.12 - Chi-square test of independence

library(vcd)

mytable <- xtabs(~Treatment+Improved, data=Arthritis)

chisq.test(mytable)

mytable <- xtabs(~Improved+Sex, data=Arthritis)

chisq.test(mytable)

# Fisher's exact test

mytable <- xtabs(~Treatment+Improved, data=Arthritis)

fisher.test(mytable)

# Chochran-Mantel-Haenszel test

mytable <- xtabs(~Treatment+Improved+Sex, data=Arthritis)

mantelhaen.test(mytable)

# Listing 7.13 - Measures of association for a two-way table

library(vcd)

mytable <- xtabs(~Treatment+Improved, data=Arthritis)

assocstats(mytable)

# Listing 7.14 Covariances and correlations

states<- state.x77[,1:6]

cov(states)

cor(states)

cor(states, method="spearman")

x <- states[,c("Population", "Income", "Illiteracy", "HS Grad")]

y <- states[,c("Life Exp", "Murder")]

cor(x,y)

# partial correlations

library(ggm)

# partial correlation of population and murder rate, controlling

# for income, illiteracy rate, and HS graduation rate

pcor(c(1,5,2,3,6), cov(states))

# Listing 7.15 - Testing a correlation coefficient for significance

cor.test(states[,3], states[,5])

# Listing 7.16 - Correlation matrix and tests of significance via corr.test

library(psych)

corr.test(states, use="complete")

# t test

library(MASS)

t.test(Prob ~ So, data=UScrime)

# dependent t test

sapply(UScrime[c("U1","U2")], function(x)(c(mean=mean(x),sd=sd(x))))

with(UScrime, t.test(U1, U2, paired=TRUE))

# Wilcoxon two group comparison

with(UScrime, by(Prob, So, median))

wilcox.test(Prob ~ So, data=UScrime)

sapply(UScrime[c("U1", "U2")], median)

with(UScrime, wilcox.test(U1, U2, paired=TRUE))

# Kruskal Wallis test

states <- data.frame(state.region, state.x77)

kruskal.test(Illiteracy ~ state.region, data=states)

# Listing 7.17 - Nonparametric multiple comparisons

source("http://www.statmethods.net/RiA/wmc.txt")

states <- data.frame(state.region, state.x77)

wmc(Illiteracy ~ state.region, data=states, method="holm")

吴裕雄--天生自然 R语言开发学习：基本统计分析（续三）的更多相关文章

吴裕雄--天生自然 R语言开发学习：R语言的安装与配置
下载R语言和开发工具RStudio安装包先安装R
吴裕雄--天生自然 R语言开发学习：数据集和数据结构
数据集的概念数据集通常是由数据构成的一个矩形数组,行表示观测,列表示变量.表2-1提供了一个假想的病例数据集. 不同的行业对于数据集的行和列叫法不同.统计学家称它们为观测(observation)和 ...
吴裕雄--天生自然 R语言开发学习：导入数据
2.3.6 导入 SPSS 数据 IBM SPSS数据集可以通过foreign包中的函数read.spss()导入到R中,也可以使用Hmisc 包中的spss.get()函数.函数spss.get() ...
吴裕雄--天生自然 R语言开发学习：使用键盘、带分隔符的文本文件输入数据
R可从键盘.文本文件.Microsoft Excel和Access.流行的统计软件.特殊格式的文件.多种关系型数据库管理系统.专业数据库.网站和在线服务中导入数据. 使用键盘了.有两种常见的方式:用 ...
吴裕雄--天生自然 R语言开发学习：R语言的简单介绍和使用
假设我们正在研究生理发育问题,并收集了10名婴儿在出生后一年内的月龄和体重数据(见表1-).我们感兴趣的是体重的分布及体重和月龄的关系. 可以使用函数c()以向量的形式输入月龄和体重数据,此函数 ...
吴裕雄--天生自然 R语言开发学习：基础知识
1.基础数据结构 1.1 向量 # 创建向量a a <- c(1,2,3) print(a) 1.2 矩阵 #创建矩阵 mymat <- matrix(c(1:10), nrow=2, n ...
吴裕雄--天生自然 R语言开发学习：图形初阶（续二）
# ----------------------------------------------------# # R in Action (2nd ed): Chapter 3 # # Gettin ...
吴裕雄--天生自然 R语言开发学习：图形初阶（续一）
# ----------------------------------------------------# # R in Action (2nd ed): Chapter 3 # # Gettin ...
吴裕雄--天生自然 R语言开发学习：图形初阶
# ----------------------------------------------------# # R in Action (2nd ed): Chapter 3 # # Gettin ...
吴裕雄--天生自然 R语言开发学习：基本图形（续二）
#---------------------------------------------------------------# # R in Action (2nd ed): Chapter 6 ...

随机推荐

python all（）函数
1.描述all() 函数——用于判断给定的可迭代参数 iterable 中的所有元素是否都为TRUE,如果是返回 True,否则返回 False.元素除了是 0.空.FALSE 外都算 TRUE.2. ...
ZJNU 2212 - Turn-based game
Mr.Lee每隔1/x s攻击一次,cpu每隔1/y s攻击一次因为时间与答案无关,最后只看boss受到了多少次攻击所以可以在每个人的频率上同时乘以xy 即Mr.Lee每隔y s攻击一次,cpu每 ...
15 docker 网络 docker 容器之间的关系 docker link
1.案例:使用 link 关联后台与数据库创建 test1 容器 docker run -d --name test1 busybox /bin/sh -c "while true; do ...
B-树与B+树两者的区别
一个m阶的B树具有如下几个特征: 根结点至少有两个子女. 每个中间节点都包含k-1个元素和k个孩子,其中 m/2 <= k <= m 每一个叶子节点都包含k-1个元素,其中 m/2 < ...
通过Dockerfile 文件为linux images 添加新用户
要求: (1)增加一个新用户,名为mynewuser (2)让这个用户有root权限 (3)设置其密码为mynewpassword (4)Container启动后以mynewuser登录,并且直接到m ...
E - Ingredients 拓扑排序+01背包
题源:https://codeforces.com/gym/101635/attachments 题意: n行,每行给定字符串s1,s2,s3代表一些菜谱名.s2和s3是煮成是的必要条件,然后给出c和 ...
SaltStack事件驱动 – event reactor
Event是SaltStack里面的对每个事件的一个记录,它相比job更加底层,Event能记录更加详细的SaltStack事件,比如Minion服务启动后请求Master签发证书或者证书校验的过程, ...
Java Web实现用户登录界面
一.学习Java Web需要的技术: Java语言基础:算法基础.常用数据结构.编程规范. 掌握常见的数据结构和实用算法:培养良好的编程习惯. Java面向对象:封装.继承.多态等,面向对象程序设计, ...
PCoA|NMDS|STRESS|RDA |RA|Unimodal|CCA|Generalized Joint Attribute Modeling
PCoA:主坐标轴分析数值型变量使用各种距离公式,而分类变量看是否相同,比如, Aabbcc || Aaffff 其中,两个相同,4个不同,一组6个,则(6+6-2*2)=8. PC0A与PCA区别 ...
Nginx_安全2
Nginx与安全有关的配置隐藏版本号 http { server_tokens off;} 经常会有针对某个版本的nginx安全漏洞出现,隐藏nginx版本号就成了主要的安全优化手段之一,当然 ...

吴裕雄--天生自然 R语言开发学习：基本统计分析（续三）

吴裕雄--天生自然 R语言开发学习：基本统计分析（续三）的更多相关文章

随机推荐

热门专题