R语言 推荐算法 recommenderlab包
recommend
li_volleyball
2016年3月20日
library(recommenderlab)
library(ggplot2)
#
data(MovieLense)
dim(MovieLense)
## [1] 943 1664
MovieLense
## 943 x 1664 rating matrix of class 'realRatingMatrix' with 99392 ratings.
image(sample(MovieLense,500),main="Raw ratings")
qplot(getRatings(MovieLense),binwidth=1,main="histogram of normalized ratings",xlab = "Ratings")
summary(getRatings(MovieLense))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 3.00 4.00 3.53 4.00 5.00
#normalized ratings
qplot(getRatings(normalize(MovieLense,method="Z-score")),main="hist of normalized ratings",xlab="rating")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary(getRatings(normalize(MovieLense,method="Z-score")))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4.8520 -0.6466 0.1084 0.0000 0.7506 4.1280
qplot(rowCounts(MovieLense), binwidth=10, main="Movies rated on Average", xlab="# of users", ylab="# of movies rated")
qplot(colMeans(MovieLense), binwidth=0.1, main="Mean ratings of Movies", xlab="Rating", ylab="# of movies")
recommenderRegistry$get_entries(dataType="realRatingMatrix")
## $IBCF_realRatingMatrix
## Recommender method: IBCF
## Description: Recommender based on item-based collaborative filtering (real data).
## Parameters:
## k method normalize normalize_sim_matrix alpha na_as_zero minRating
## 1 30 Cosine center FALSE 0.5 FALSE NA
##
## $PCA_realRatingMatrix
## Recommender method: PCA
## Description: Recommender based on PCA approximation (real data).
## Parameters:
## categories method normalize normalize_sim_matrix alpha na_as_zero
## 1 20 Cosine center FALSE 0.5 FALSE
## minRating
## 1 NA
##
## $POPULAR_realRatingMatrix
## Recommender method: POPULAR
## Description: Recommender based on item popularity (real data).
## Parameters: None
##
## $RANDOM_realRatingMatrix
## Recommender method: RANDOM
## Description: Produce random recommendations (real ratings).
## Parameters: None
##
## $SVD_realRatingMatrix
## Recommender method: SVD
## Description: Recommender based on EM-based SVD approximation from package bcv (real data).
## Parameters:
## approxRank maxiter normalize minRating
## 1 NA 100 center NA
##
## $UBCF_realRatingMatrix
## Recommender method: UBCF
## Description: Recommender based on user-based collaborative filtering (real data).
## Parameters:
## method nn sample normalize minRating
## 1 cosine 25 FALSE center NA
scheme <- evaluationScheme(MovieLense, method="split", train=0.9, k=1, given=10, goodRating=4)
scheme
## Evaluation scheme with 10 items given
## Method: 'split' with 1 run(s).
## Training set proportion: 0.900
## Good ratings: >=4.000000
## Data set: 943 x 1664 rating matrix of class 'realRatingMatrix' with 99392 ratings.
algorithms <- list(
"random items" = list(name="RANDOM", param=list(normalize = "Z-score")),
"popular items" = list(name="POPULAR", param=list(normalize = "Z-score")),
"user-based CF" = list(name="UBCF", param=list(normalize = "Z-score", method="Cosine", nn=50, minRating=3)),
"item-based CF" = list(name="IBCF", param=list(normalize = "Z-score", method="Cosine"))
)
# run algorithms, predict next n movies
results <- evaluate(scheme, algorithms, n=c(1, 3, 5, 10, 15, 20))
## RANDOM run fold/sample [model time/prediction time]
## 1 [0.02sec/1.13sec]
## POPULAR run fold/sample [model time/prediction time]
## 1 [0.14sec/0.2sec]
## UBCF run fold/sample [model time/prediction time]
## 1 [0.11sec/52.33sec]
## IBCF run fold/sample [model time/prediction time]
## 1 [348.01sec/0.66sec]
plot(results, annotate = 1:4, legend="topleft")
# See precision / recall
plot(results, "prec/rec", annotate=3)
summary(results)
## Length Class Mode
## random items 1 evaluationResults S4
## popular items 1 evaluationResults S4
## user-based CF 1 evaluationResults S4
## item-based CF 1 evaluationResults S4
print(results)
## List of evaluation results for 4 recommenders:
## Evaluation results for 1 folds/samples using method 'RANDOM'.
## Evaluation results for 1 folds/samples using method 'POPULAR'.
## Evaluation results for 1 folds/samples using method 'UBCF'.
## Evaluation results for 1 folds/samples using method 'IBCF'.
library(plyr)
result1<-ldply(avg(results))
head(result1)
## .id TP FP FN TN precision
## 1 random items 0.00000000 1.000000 47.75789 1605.242 0.00000000
## 2 random items 0.05263158 2.947368 47.70526 1603.295 0.01754386
## 3 random items 0.09473684 4.905263 47.66316 1601.337 0.01894737
## 4 random items 0.23157895 9.768421 47.52632 1596.474 0.02315789
## 5 random items 0.32631579 14.673684 47.43158 1591.568 0.02175439
## 6 random items 0.48421053 19.515789 47.27368 1586.726 0.02421053
## recall TPR FPR
## 1 0.000000000 0.000000000 0.0006231881
## 2 0.000420633 0.000420633 0.0018345901
## 3 0.001343461 0.001343461 0.0030535159
## 4 0.002965187 0.002965187 0.0060813035
## 5 0.004276282 0.004276282 0.0091353054
## 6 0.007966717 0.007966717 0.0121507535
result1[,1]<-paste(result1[,1],c(1, 3, 5, 10, 15, 20))
temp_result1<-result1[,c(1,6,7)]
f<-function(p,r){
return(2*p*r)/(p+r)
}
result1_f<-cbind(result1,f=f(temp_result1[,2],temp_result1[,3]))
head(result1_f)
## .id TP FP FN TN precision
## 1 random items 1 0.00000000 1.000000 47.75789 1605.242 0.00000000
## 2 random items 3 0.05263158 2.947368 47.70526 1603.295 0.01754386
## 3 random items 5 0.09473684 4.905263 47.66316 1601.337 0.01894737
## 4 random items 10 0.23157895 9.768421 47.52632 1596.474 0.02315789
## 5 random items 15 0.32631579 14.673684 47.43158 1591.568 0.02175439
## 6 random items 20 0.48421053 19.515789 47.27368 1586.726 0.02421053
## recall TPR FPR f
## 1 0.000000000 0.000000000 0.0006231881 0.000000e+00
## 2 0.000420633 0.000420633 0.0018345901 1.475905e-05
## 3 0.001343461 0.001343461 0.0030535159 5.091011e-05
## 4 0.002965187 0.002965187 0.0060813035 1.373350e-04
## 5 0.004276282 0.004276282 0.0091353054 1.860558e-04
## 6 0.007966717 0.007966717 0.0121507535 3.857568e-04
head(result1_f[order(-result1_f$f),])
## .id TP FP FN TN precision
## 18 user-based CF 20 6.094737 12.273684 41.66316 1593.968 0.3381538
## 17 user-based CF 15 4.978947 8.915789 42.77895 1597.326 0.3629917
## 16 user-based CF 10 3.684211 5.684211 44.07368 1600.558 0.3948758
## 12 popular items 20 5.368421 14.631579 42.38947 1591.611 0.2684211
## 11 popular items 15 4.421053 10.578947 43.33684 1595.663 0.2947368
## 15 user-based CF 5 2.157895 2.610526 45.60000 1603.632 0.4532609
## recall TPR FPR f
## 18 0.16566075 0.16566075 0.007591384 0.11203762
## 17 0.13829264 0.13829264 0.005510458 0.10039817
## 16 0.10324963 0.10324963 0.003508480 0.08154156
## 12 0.12821289 0.12821289 0.009037106 0.06883008
## 11 0.11281484 0.11281484 0.006530564 0.06650138
## 15 0.06796729 0.06796729 0.001609646 0.06161383
#带入模型
moive_re<-Recommender(MovieLense,method="UBCF")
moives_pr<-predict(moive_re,MovieLense,n=20)
print(as(moives_pr,"list"))
## [[1]]
## [1] "Glory (1989)"
## [2] "Schindler's List (1993)"
## [3] "Casablanca (1942)"
## [4] "Close Shave, A (1995)"
## [5] "Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963)"
## [6] "Leaving Las Vegas (1995)"
## [7] "One Flew Over the Cuckoo's Nest (1975)"
## [8] "Rear Window (1954)"
## [9] "Heathers (1989)"
## [10] "L.A. Confidential (1997)"
## [11] "City of Lost Children, The (1995)"
## [12] "Butch Cassidy and the Sundance Kid (1969)"
## [13] "Titanic (1997)"
## [14] "Lawrence of Arabia (1962)"
## [15] "Shine (1996)"
## [16] "Stand by Me (1986)"
## [17] "Gandhi (1982)"
## [18] "To Kill a Mockingbird (1962)"
## [19] "In the Name of the Father (1993)"
## [20] "Harold and Maude (1971)"
##
## [[2]]
## [1] "Boot, Das (1981)"
## [2] "Dead Man Walking (1995)"
## [3] "Lone Star (1996)"
## [4] "Return of the Jedi (1983)"
## [5] "Celluloid Closet, The (1995)"
## [6] "Casablanca (1942)"
## [7] "Citizen Kane (1941)"
## [8] "Godfather: Part II, The (1974)"
## [9] "2001: A Space Odyssey (1968)"
## [10] "When We Were Kings (1996)"
## [11] "Diva (1981)"
## [12] "Close Shave, A (1995)"
## [13] "Tango Lesson, The (1997)"
## [14] "Beautiful Thing (1996)"
## [15] "Empire Strikes Back, The (1980)"
## [16] "Mrs. Dalloway (1997)"
## [17] "Butch Cassidy and the Sundance Kid (1969)"
## [18] "My Fair Lady (1964)"
## [19] "Bonnie and Clyde (1967)"
## [20] "Annie Hall (1977)"
##
## [[3]]
## [1] "Mrs. Brown (Her Majesty, Mrs. Brown) (1997)"
## [2] "Star Wars (1977)"
## [3] "Pulp Fiction (1994)"
## [4] "English Patient, The (1996)"
## [5] "Full Monty, The (1997)"
## [6] "Lone Star (1996)"
## [7] "Titanic (1997)"
## [8] "Sweet Hereafter, The (1997)"
## [9] "In the Company of Men (1997)"
## [10] "Willy Wonka and the Chocolate Factory (1971)"
## [11] "In & Out (1997)"
## [12] "Vertigo (1958)"
## [13] "As Good As It Gets (1997)"
## [14] "Apt Pupil (1998)"
## [15] "Dazed and Confused (1993)"
## [16] "Ice Storm, The (1997)"
## [17] "This Is Spinal Tap (1984)"
## [18] "Trainspotting (1996)"
## [19] "Heat (1995)"
## [20] "Fargo (1996)"
##
## [[4]]
## [1] "Titanic (1997)" "English Patient, The (1996)"
## [3] "L.A. Confidential (1997)" "Game, The (1997)"
## [5] "Good Will Hunting (1997)" "Kiss the Girls (1997)"
## [7] "Full Monty, The (1997)" "Usual Suspects, The (1995)"
## [9] "Rosewood (1997)" "Boogie Nights (1997)"
## [11] "Raise the Red Lantern (1991)" "Pulp Fiction (1994)"
## [13] "Toy Story (1995)" "Love Jones (1997)"
## [15] "Eve's Bayou (1997)" "Edge, The (1997)"
## [17] "Sting, The (1973)" "Some Like It Hot (1959)"
## [19] "Strictly Ballroom (1992)" "Soul Food (1997)"
##
## [[5]]
## [1] "Terminator 2: Judgment Day (1991)"
## [2] "Terminator, The (1984)"
## [3] "Usual Suspects, The (1995)"
## [4] "Contact (1997)"
## [5] "Braveheart (1995)"
## [6] "Casablanca (1942)"
## [7] "Twelve Monkeys (1995)"
## [8] "Godfather, The (1972)"
## [9] "Shawshank Redemption, The (1994)"
## [10] "Raising Arizona (1987)"
## [11] "Amadeus (1984)"
## [12] "Nikita (La Femme Nikita) (1990)"
## [13] "Reservoir Dogs (1992)"
## [14] "Citizen Kane (1941)"
## [15] "Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963)"
## [16] "Schindler's List (1993)"
## [17] "Titanic (1997)"
## [18] "Leaving Las Vegas (1995)"
## [19] "North by Northwest (1959)"
## [20] "Army of Darkness (1993)"
#S4 class 是一种标准的R语言面向对象实现方式,s4对象有明确的类定义,参数定义,参数检查,继承关系,实例化等面向对象系统的特征。
R语言 推荐算法 recommenderlab包的更多相关文章
- R语言中的机器学习包
R语言中的机器学习包 Machine Learning & Statistical Learning (机器学习 & 统计学习) 网址:http://cran.r-project ...
- R语言中的数据处理包dplyr、tidyr笔记
R语言中的数据处理包dplyr.tidyr笔记 dplyr包是Hadley Wickham的新作,主要用于数据清洗和整理,该包专注dataframe数据格式,从而大幅提高了数据处理速度,并且提供了 ...
- r语言,安装外部包 警告: 无法将临时安装
安装R语言中的外部包时,出现错误提示 试开URL’https://mirrors.tuna.tsinghua.edu.cn/CRAN/bin/windows/contrib/3.3/ggplot2_2 ...
- R语言分类算法之随机森林
R语言分类算法之随机森林 1.原理分析: 随机森林是通过自助法(boot-strap)重采样技术,从原始训练样本集N中有放回地重复随机抽取k个样本生成新的训练集样本集合,然后根据自助样本集生成k个决策 ...
- R语言—如何安装Github包的解决方法,亲测有效
R语言—如何安装Github包的解决方法,亲测有效 准备安装材料: R包-REmap GitHub下载地址:https://github.com/lchiffon/REmap R包-baidumap ...
- R语言:关于rJava包的安装
R语言:关于rJava包的安装 盐池里的萝卜 2014-09-14 00:53:33 在做文本挖掘的时候,会发现分词时候rJava是必须要迈过去的坎儿,所以进行了总结: 第一步:安装rJava和jd ...
- R语言机器学习之caret包运用
在大数据如火如荼的时候,机器学习无疑成为了炙手可热的工具,机器学习是计算机科学和统计学的交叉学科, 旨在通过收集和分析数据的基础上,建立一系列的算法,模型对实际问题进行预测或分类. R语言无疑为我们提 ...
- R语言之数据处理常用包
dplyr包是Hadley Wickham的新作,主要用于数据清洗和整理,该包专注dataframe数据格式,从而大幅提高了数据处理速度,并且提供了与其它数据库的接口:tidyr包的作者是Hadley ...
- R语言 神经网络算法
人工神经网络(ANN),简称神经网络,是一种模仿生物神经网络的结构和功能的数学模型或计算模型.神经网络由大量的人工神经元联结进行计算.大多数情况下人工神经网络能在外界信息的基础上改变内部结构,是一种自 ...
随机推荐
- Android知识体系图
网上看到,不知哪位大神总结的,存个档(需要放大网页才能看清)
- AngularJs $http 请求服务
$http $http是Angular的一个核心服务,它有利于浏览器通过XMLHttpRequest 对象或者 JSONP和远程HTTP服务器交互. $HTTP API 是基于 $q服务暴露的defe ...
- [Eclipse]解决: Eclipse Maven “Add Dependency”搜索无结果
转载: http://www.educity.cn/wenda/469389.html eclipse插件Maven添加依赖查询无结果的解决方法(Select Dependency doesn't w ...
- Parallel Computing–Cannon算法 (MPI 实现)
原理不解释,直接上代码 代码中被注释的源程序可用于打印中间结果,检查运算是否正确. #include "mpi.h" #include <math.h> #includ ...
- JavaWeb---总结(三)Tomcat服务器学习和使用(一)
一.Tomcat服务器端口的配置 Tomcat的所有配置都放在conf文件夹之中,里面的server.xml文件是配置的核心文件. 如果想修改Tomcat服务器的启动端口,则可以在server.xml ...
- redis理解
1. Redis是什么 redis是nosql的一种. 这个问题的结果影响了我们怎么用Redis.如果你认为Redis是一个key value store, 那可能会用它来代替MySQL:如果认为它是 ...
- Nginx个人简单理解
首先我们来补充下一些基本知识: 什么是代理服务器? 先举个简单的例子,现在我们在百度访问谷歌的网站,发现现在进不去,这个时候我们可以FQ(关于FQ,可以借鉴下这个博文:http://zhangge.n ...
- BigDecimal数据加法返回值接收
1.相加 两个BigDecimal变量a,b. 如果想进行相加,即a加b的话,返回值需要使用a进行接收,如下: a = a.add(b); BigDecimal为不可变类, 所以执行运算的结果需要再返 ...
- asp.net中的窗口弹出实现,包括分支窗口 . ASP.NET返回上一页面实现方法总结 .
返回上一页的这个东东在我们做项目的时候一般是用于填写完表单后确认的时候,有对原来输入的数据进行修改或者更新时用的,或者是因为网站为了方便浏览者而有心添加的一个东东,一般这种功能的实现在ASP.NET中 ...
- Java关键字——super
使用super关键字可以从子类中调用父类中的构造方法.普通方法和属性 与this调用构造方法的要求一样,语句必须放在子类构造方法的首行 this和super都可以调用构造方法,但是两者不能同时出现,调 ...