recommend

li_volleyball

2016年3月20日

library(recommenderlab)
library(ggplot2)
#
data(MovieLense)
dim(MovieLense)
## [1] 943 1664
MovieLense
## 943 x 1664 rating matrix of class 'realRatingMatrix' with 99392 ratings.
image(sample(MovieLense,500),main="Raw ratings")
qplot(getRatings(MovieLense),binwidth=1,main="histogram of normalized ratings",xlab = "Ratings")
summary(getRatings(MovieLense))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 3.00 4.00 3.53 4.00 5.00
#normalized ratings
qplot(getRatings(normalize(MovieLense,method="Z-score")),main="hist of normalized ratings",xlab="rating")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary(getRatings(normalize(MovieLense,method="Z-score")))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4.8520 -0.6466 0.1084 0.0000 0.7506 4.1280
qplot(rowCounts(MovieLense), binwidth=10, main="Movies rated on Average", xlab="# of users", ylab="# of movies rated") qplot(colMeans(MovieLense), binwidth=0.1, main="Mean ratings of Movies", xlab="Rating", ylab="# of movies")
recommenderRegistry$get_entries(dataType="realRatingMatrix")
## $IBCF_realRatingMatrix
## Recommender method: IBCF
## Description: Recommender based on item-based collaborative filtering (real data).
## Parameters:
## k method normalize normalize_sim_matrix alpha na_as_zero minRating
## 1 30 Cosine center FALSE 0.5 FALSE NA
##
## $PCA_realRatingMatrix
## Recommender method: PCA
## Description: Recommender based on PCA approximation (real data).
## Parameters:
## categories method normalize normalize_sim_matrix alpha na_as_zero
## 1 20 Cosine center FALSE 0.5 FALSE
## minRating
## 1 NA
##
## $POPULAR_realRatingMatrix
## Recommender method: POPULAR
## Description: Recommender based on item popularity (real data).
## Parameters: None
##
## $RANDOM_realRatingMatrix
## Recommender method: RANDOM
## Description: Produce random recommendations (real ratings).
## Parameters: None
##
## $SVD_realRatingMatrix
## Recommender method: SVD
## Description: Recommender based on EM-based SVD approximation from package bcv (real data).
## Parameters:
## approxRank maxiter normalize minRating
## 1 NA 100 center NA
##
## $UBCF_realRatingMatrix
## Recommender method: UBCF
## Description: Recommender based on user-based collaborative filtering (real data).
## Parameters:
## method nn sample normalize minRating
## 1 cosine 25 FALSE center NA
scheme <- evaluationScheme(MovieLense, method="split", train=0.9, k=1, given=10, goodRating=4)

scheme
## Evaluation scheme with 10 items given
## Method: 'split' with 1 run(s).
## Training set proportion: 0.900
## Good ratings: >=4.000000
## Data set: 943 x 1664 rating matrix of class 'realRatingMatrix' with 99392 ratings.
algorithms <- list(
"random items" = list(name="RANDOM", param=list(normalize = "Z-score")),
"popular items" = list(name="POPULAR", param=list(normalize = "Z-score")),
"user-based CF" = list(name="UBCF", param=list(normalize = "Z-score", method="Cosine", nn=50, minRating=3)),
"item-based CF" = list(name="IBCF", param=list(normalize = "Z-score", method="Cosine"))
)
# run algorithms, predict next n movies
results <- evaluate(scheme, algorithms, n=c(1, 3, 5, 10, 15, 20))
## RANDOM run fold/sample [model time/prediction time]
## 1 [0.02sec/1.13sec]
## POPULAR run fold/sample [model time/prediction time]
## 1 [0.14sec/0.2sec]
## UBCF run fold/sample [model time/prediction time]
## 1 [0.11sec/52.33sec]
## IBCF run fold/sample [model time/prediction time]
## 1 [348.01sec/0.66sec]
plot(results, annotate = 1:4, legend="topleft") # See precision / recall
plot(results, "prec/rec", annotate=3)
summary(results)
## Length Class Mode
## random items 1 evaluationResults S4
## popular items 1 evaluationResults S4
## user-based CF 1 evaluationResults S4
## item-based CF 1 evaluationResults S4
print(results)
## List of evaluation results for 4 recommenders:
## Evaluation results for 1 folds/samples using method 'RANDOM'.
## Evaluation results for 1 folds/samples using method 'POPULAR'.
## Evaluation results for 1 folds/samples using method 'UBCF'.
## Evaluation results for 1 folds/samples using method 'IBCF'.
library(plyr)
result1<-ldply(avg(results))
head(result1)
## .id TP FP FN TN precision
## 1 random items 0.00000000 1.000000 47.75789 1605.242 0.00000000
## 2 random items 0.05263158 2.947368 47.70526 1603.295 0.01754386
## 3 random items 0.09473684 4.905263 47.66316 1601.337 0.01894737
## 4 random items 0.23157895 9.768421 47.52632 1596.474 0.02315789
## 5 random items 0.32631579 14.673684 47.43158 1591.568 0.02175439
## 6 random items 0.48421053 19.515789 47.27368 1586.726 0.02421053
## recall TPR FPR
## 1 0.000000000 0.000000000 0.0006231881
## 2 0.000420633 0.000420633 0.0018345901
## 3 0.001343461 0.001343461 0.0030535159
## 4 0.002965187 0.002965187 0.0060813035
## 5 0.004276282 0.004276282 0.0091353054
## 6 0.007966717 0.007966717 0.0121507535
result1[,1]<-paste(result1[,1],c(1, 3, 5, 10, 15, 20))
temp_result1<-result1[,c(1,6,7)]
f<-function(p,r){
return(2*p*r)/(p+r)
}
result1_f<-cbind(result1,f=f(temp_result1[,2],temp_result1[,3]))
head(result1_f)
## .id TP FP FN TN precision
## 1 random items 1 0.00000000 1.000000 47.75789 1605.242 0.00000000
## 2 random items 3 0.05263158 2.947368 47.70526 1603.295 0.01754386
## 3 random items 5 0.09473684 4.905263 47.66316 1601.337 0.01894737
## 4 random items 10 0.23157895 9.768421 47.52632 1596.474 0.02315789
## 5 random items 15 0.32631579 14.673684 47.43158 1591.568 0.02175439
## 6 random items 20 0.48421053 19.515789 47.27368 1586.726 0.02421053
## recall TPR FPR f
## 1 0.000000000 0.000000000 0.0006231881 0.000000e+00
## 2 0.000420633 0.000420633 0.0018345901 1.475905e-05
## 3 0.001343461 0.001343461 0.0030535159 5.091011e-05
## 4 0.002965187 0.002965187 0.0060813035 1.373350e-04
## 5 0.004276282 0.004276282 0.0091353054 1.860558e-04
## 6 0.007966717 0.007966717 0.0121507535 3.857568e-04
head(result1_f[order(-result1_f$f),])
## .id TP FP FN TN precision
## 18 user-based CF 20 6.094737 12.273684 41.66316 1593.968 0.3381538
## 17 user-based CF 15 4.978947 8.915789 42.77895 1597.326 0.3629917
## 16 user-based CF 10 3.684211 5.684211 44.07368 1600.558 0.3948758
## 12 popular items 20 5.368421 14.631579 42.38947 1591.611 0.2684211
## 11 popular items 15 4.421053 10.578947 43.33684 1595.663 0.2947368
## 15 user-based CF 5 2.157895 2.610526 45.60000 1603.632 0.4532609
## recall TPR FPR f
## 18 0.16566075 0.16566075 0.007591384 0.11203762
## 17 0.13829264 0.13829264 0.005510458 0.10039817
## 16 0.10324963 0.10324963 0.003508480 0.08154156
## 12 0.12821289 0.12821289 0.009037106 0.06883008
## 11 0.11281484 0.11281484 0.006530564 0.06650138
## 15 0.06796729 0.06796729 0.001609646 0.06161383
#带入模型
moive_re<-Recommender(MovieLense,method="UBCF")
moives_pr<-predict(moive_re,MovieLense,n=20)
print(as(moives_pr,"list"))
## [[1]]
## [1] "Glory (1989)"
## [2] "Schindler's List (1993)"
## [3] "Casablanca (1942)"
## [4] "Close Shave, A (1995)"
## [5] "Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963)"
## [6] "Leaving Las Vegas (1995)"
## [7] "One Flew Over the Cuckoo's Nest (1975)"
## [8] "Rear Window (1954)"
## [9] "Heathers (1989)"
## [10] "L.A. Confidential (1997)"
## [11] "City of Lost Children, The (1995)"
## [12] "Butch Cassidy and the Sundance Kid (1969)"
## [13] "Titanic (1997)"
## [14] "Lawrence of Arabia (1962)"
## [15] "Shine (1996)"
## [16] "Stand by Me (1986)"
## [17] "Gandhi (1982)"
## [18] "To Kill a Mockingbird (1962)"
## [19] "In the Name of the Father (1993)"
## [20] "Harold and Maude (1971)"
##
## [[2]]
## [1] "Boot, Das (1981)"
## [2] "Dead Man Walking (1995)"
## [3] "Lone Star (1996)"
## [4] "Return of the Jedi (1983)"
## [5] "Celluloid Closet, The (1995)"
## [6] "Casablanca (1942)"
## [7] "Citizen Kane (1941)"
## [8] "Godfather: Part II, The (1974)"
## [9] "2001: A Space Odyssey (1968)"
## [10] "When We Were Kings (1996)"
## [11] "Diva (1981)"
## [12] "Close Shave, A (1995)"
## [13] "Tango Lesson, The (1997)"
## [14] "Beautiful Thing (1996)"
## [15] "Empire Strikes Back, The (1980)"
## [16] "Mrs. Dalloway (1997)"
## [17] "Butch Cassidy and the Sundance Kid (1969)"
## [18] "My Fair Lady (1964)"
## [19] "Bonnie and Clyde (1967)"
## [20] "Annie Hall (1977)"
##
## [[3]]
## [1] "Mrs. Brown (Her Majesty, Mrs. Brown) (1997)"
## [2] "Star Wars (1977)"
## [3] "Pulp Fiction (1994)"
## [4] "English Patient, The (1996)"
## [5] "Full Monty, The (1997)"
## [6] "Lone Star (1996)"
## [7] "Titanic (1997)"
## [8] "Sweet Hereafter, The (1997)"
## [9] "In the Company of Men (1997)"
## [10] "Willy Wonka and the Chocolate Factory (1971)"
## [11] "In & Out (1997)"
## [12] "Vertigo (1958)"
## [13] "As Good As It Gets (1997)"
## [14] "Apt Pupil (1998)"
## [15] "Dazed and Confused (1993)"
## [16] "Ice Storm, The (1997)"
## [17] "This Is Spinal Tap (1984)"
## [18] "Trainspotting (1996)"
## [19] "Heat (1995)"
## [20] "Fargo (1996)"
##
## [[4]]
## [1] "Titanic (1997)" "English Patient, The (1996)"
## [3] "L.A. Confidential (1997)" "Game, The (1997)"
## [5] "Good Will Hunting (1997)" "Kiss the Girls (1997)"
## [7] "Full Monty, The (1997)" "Usual Suspects, The (1995)"
## [9] "Rosewood (1997)" "Boogie Nights (1997)"
## [11] "Raise the Red Lantern (1991)" "Pulp Fiction (1994)"
## [13] "Toy Story (1995)" "Love Jones (1997)"
## [15] "Eve's Bayou (1997)" "Edge, The (1997)"
## [17] "Sting, The (1973)" "Some Like It Hot (1959)"
## [19] "Strictly Ballroom (1992)" "Soul Food (1997)"
##
## [[5]]
## [1] "Terminator 2: Judgment Day (1991)"
## [2] "Terminator, The (1984)"
## [3] "Usual Suspects, The (1995)"
## [4] "Contact (1997)"
## [5] "Braveheart (1995)"
## [6] "Casablanca (1942)"
## [7] "Twelve Monkeys (1995)"
## [8] "Godfather, The (1972)"
## [9] "Shawshank Redemption, The (1994)"
## [10] "Raising Arizona (1987)"
## [11] "Amadeus (1984)"
## [12] "Nikita (La Femme Nikita) (1990)"
## [13] "Reservoir Dogs (1992)"
## [14] "Citizen Kane (1941)"
## [15] "Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963)"
## [16] "Schindler's List (1993)"
## [17] "Titanic (1997)"
## [18] "Leaving Las Vegas (1995)"
## [19] "North by Northwest (1959)"
## [20] "Army of Darkness (1993)"
#S4 class 是一种标准的R语言面向对象实现方式,s4对象有明确的类定义,参数定义,参数检查,继承关系,实例化等面向对象系统的特征。

R语言 推荐算法 recommenderlab包的更多相关文章

  1. R语言中的机器学习包

    R语言中的机器学习包   Machine Learning & Statistical Learning (机器学习 & 统计学习)  网址:http://cran.r-project ...

  2. R语言中的数据处理包dplyr、tidyr笔记

    R语言中的数据处理包dplyr.tidyr笔记   dplyr包是Hadley Wickham的新作,主要用于数据清洗和整理,该包专注dataframe数据格式,从而大幅提高了数据处理速度,并且提供了 ...

  3. r语言,安装外部包 警告: 无法将临时安装

    安装R语言中的外部包时,出现错误提示 试开URL’https://mirrors.tuna.tsinghua.edu.cn/CRAN/bin/windows/contrib/3.3/ggplot2_2 ...

  4. R语言分类算法之随机森林

    R语言分类算法之随机森林 1.原理分析: 随机森林是通过自助法(boot-strap)重采样技术,从原始训练样本集N中有放回地重复随机抽取k个样本生成新的训练集样本集合,然后根据自助样本集生成k个决策 ...

  5. R语言—如何安装Github包的解决方法,亲测有效

    R语言—如何安装Github包的解决方法,亲测有效 准备安装材料: R包-REmap GitHub下载地址:https://github.com/lchiffon/REmap R包-baidumap ...

  6. R语言:关于rJava包的安装

    R语言:关于rJava包的安装  盐池里的萝卜 2014-09-14 00:53:33 在做文本挖掘的时候,会发现分词时候rJava是必须要迈过去的坎儿,所以进行了总结: 第一步:安装rJava和jd ...

  7. R语言机器学习之caret包运用

    在大数据如火如荼的时候,机器学习无疑成为了炙手可热的工具,机器学习是计算机科学和统计学的交叉学科, 旨在通过收集和分析数据的基础上,建立一系列的算法,模型对实际问题进行预测或分类. R语言无疑为我们提 ...

  8. R语言之数据处理常用包

    dplyr包是Hadley Wickham的新作,主要用于数据清洗和整理,该包专注dataframe数据格式,从而大幅提高了数据处理速度,并且提供了与其它数据库的接口:tidyr包的作者是Hadley ...

  9. R语言 神经网络算法

    人工神经网络(ANN),简称神经网络,是一种模仿生物神经网络的结构和功能的数学模型或计算模型.神经网络由大量的人工神经元联结进行计算.大多数情况下人工神经网络能在外界信息的基础上改变内部结构,是一种自 ...

随机推荐

  1. A.Kaw矩阵代数初步学习笔记 5. System of Equations

    “矩阵代数初步”(Introduction to MATRIX ALGEBRA)课程由Prof. A.K.Kaw(University of South Florida)设计并讲授. PDF格式学习笔 ...

  2. HDU 5907 Find Q(简单字符串)

    传送门 Description Byteasar is addicted to the English letter 'q'. Now he comes across a string S consi ...

  3. 软件产品案例分析--K米

    软件产品案例分析--K米 第一部分 调研,评测 评测 个人第一次上手体验 使用的第一款点歌软件,以为就是个遥控而已,使用后发现功能还挺多,能点挺久.觉得很方便,不用挤成一堆点歌了.K米的脸蛋(UI)好 ...

  4. Linux 吃掉我的内存

    在Windows下资源管理器查看内存使用的情况,如果使用率达到80%以上,再运行大程序就能感觉到系统不流畅了,因为在内存紧缺的情况下使用交换分区,频繁地从磁盘上换入换出页会极大地影响系统的性能.而当我 ...

  5. DNS(一)之禁用权威域名服务器递归解析

    DNS dns是互联网中最核心的带层级的分布式系统,负责把域名解析成ip,把IP解析出域名,以及宣告邮件路由信息等等,使得使用域名访问网站,收发邮件成了可能. bind(berkeley Intern ...

  6. JavaScript Ajax之美~

    JavaScript Ajax之美~ 曾经有一段时期,因为开发人员对JavaScript的滥用导致其遭受了一段时间的冷门时期,不被大家看好,后来,到了2005年,Google公司的很多技术都是用了aj ...

  7. SVM支持向量机的高维映射与核函数-记录毕业论文2

    上一篇博客将了在数据集线性可分的情况下的支持向量机,这篇主要记录如何通过映射到高维解决线性不可分的数据集和如何通过核函数减少内积计算量的理论思想. [5]径向基函数的核函数:https://www.q ...

  8. easyUI-combobox 后台导入Json数据的方法

    一.前台页面: <input id="List" class="easyui-combobox" data-options="valueFiel ...

  9. pyqt2_官网教程

    https://pythonspot.com/en/pyqt4/ Articles You can find a collection of PyQT articles below. Applicat ...

  10. IntelliJ IDEA 远程调试

    远程调试服务器是一个比较实用的技巧,以便我们能够迅速定位线上问题.本文会介绍如何在IntelliJ IDEA中进行远程调试. 配置IntelliJ IDEA 选择Edit Configurations ...