R语言推荐算法 recommenderlab包

recommend

li_volleyball

2016年3月20日

library(recommenderlab)

library(ggplot2)

#

data(MovieLense)

dim(MovieLense)

## [1]  943 1664

MovieLense

## 943 x 1664 rating matrix of class 'realRatingMatrix' with 99392 ratings.

image(sample(MovieLense,500),main="Raw ratings")

qplot(getRatings(MovieLense),binwidth=1,main="histogram of normalized ratings",xlab = "Ratings")

summary(getRatings(MovieLense))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.

##    1.00    3.00    4.00    3.53    4.00    5.00

#normalized ratings

qplot(getRatings(normalize(MovieLense,method="Z-score")),main="hist of normalized ratings",xlab="rating")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

summary(getRatings(normalize(MovieLense,method="Z-score")))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.

## -4.8520 -0.6466  0.1084  0.0000  0.7506  4.1280

qplot(rowCounts(MovieLense), binwidth=10, main="Movies rated on Average", xlab="# of users", ylab="# of movies rated")

qplot(colMeans(MovieLense), binwidth=0.1, main="Mean ratings of Movies", xlab="Rating", ylab="# of movies")

recommenderRegistry$get_entries(dataType="realRatingMatrix")

## $IBCF_realRatingMatrix

## Recommender method: IBCF

## Description: Recommender based on item-based collaborative filtering (real data).

## Parameters:

##    k method normalize normalize_sim_matrix alpha na_as_zero minRating

## 1 30 Cosine    center                FALSE   0.5      FALSE        NA

##

## $PCA_realRatingMatrix

## Recommender method: PCA

## Description: Recommender based on PCA approximation (real data).

## Parameters:

##   categories method normalize normalize_sim_matrix alpha na_as_zero

## 1         20 Cosine    center                FALSE   0.5      FALSE

##   minRating

## 1        NA

##

## $POPULAR_realRatingMatrix

## Recommender method: POPULAR

## Description: Recommender based on item popularity (real data).

## Parameters: None

##

## $RANDOM_realRatingMatrix

## Recommender method: RANDOM

## Description: Produce random recommendations (real ratings).

## Parameters: None

##

## $SVD_realRatingMatrix

## Recommender method: SVD

## Description: Recommender based on EM-based SVD approximation from package bcv (real data).

## Parameters:

##   approxRank maxiter normalize minRating

## 1         NA     100    center        NA

##

## $UBCF_realRatingMatrix

## Recommender method: UBCF

## Description: Recommender based on user-based collaborative filtering (real data).

## Parameters:

##   method nn sample normalize minRating

## 1 cosine 25  FALSE    center        NA

scheme <- evaluationScheme(MovieLense, method="split", train=0.9, k=1, given=10, goodRating=4)

scheme

## Evaluation scheme with 10 items given

## Method: 'split' with 1 run(s).

## Training set proportion: 0.900

## Good ratings: >=4.000000

## Data set: 943 x 1664 rating matrix of class 'realRatingMatrix' with 99392 ratings.

algorithms <- list(

  "random items" = list(name="RANDOM", param=list(normalize = "Z-score")),

  "popular items" = list(name="POPULAR", param=list(normalize = "Z-score")),

  "user-based CF" = list(name="UBCF", param=list(normalize = "Z-score", method="Cosine", nn=50, minRating=3)),

  "item-based CF" = list(name="IBCF", param=list(normalize = "Z-score", method="Cosine"))

)

# run algorithms, predict next n movies

results <- evaluate(scheme, algorithms, n=c(1, 3, 5, 10, 15, 20))

## RANDOM run fold/sample [model time/prediction time]

##   1  [0.02sec/1.13sec]

## POPULAR run fold/sample [model time/prediction time]

##   1  [0.14sec/0.2sec]

## UBCF run fold/sample [model time/prediction time]

##   1  [0.11sec/52.33sec]

## IBCF run fold/sample [model time/prediction time]

##   1  [348.01sec/0.66sec]

plot(results, annotate = 1:4, legend="topleft")

# See precision / recall

plot(results, "prec/rec", annotate=3)

summary(results)

##               Length Class             Mode

## random items  1      evaluationResults S4

## popular items 1      evaluationResults S4

## user-based CF 1      evaluationResults S4

## item-based CF 1      evaluationResults S4

print(results)

## List of evaluation results for 4 recommenders:

## Evaluation results for 1 folds/samples using method 'RANDOM'.

## Evaluation results for 1 folds/samples using method 'POPULAR'.

## Evaluation results for 1 folds/samples using method 'UBCF'.

## Evaluation results for 1 folds/samples using method 'IBCF'.

library(plyr)

result1<-ldply(avg(results))

head(result1)

##            .id         TP        FP       FN       TN  precision

## 1 random items 0.00000000  1.000000 47.75789 1605.242 0.00000000

## 2 random items 0.05263158  2.947368 47.70526 1603.295 0.01754386

## 3 random items 0.09473684  4.905263 47.66316 1601.337 0.01894737

## 4 random items 0.23157895  9.768421 47.52632 1596.474 0.02315789

## 5 random items 0.32631579 14.673684 47.43158 1591.568 0.02175439

## 6 random items 0.48421053 19.515789 47.27368 1586.726 0.02421053

##        recall         TPR          FPR

## 1 0.000000000 0.000000000 0.0006231881

## 2 0.000420633 0.000420633 0.0018345901

## 3 0.001343461 0.001343461 0.0030535159

## 4 0.002965187 0.002965187 0.0060813035

## 5 0.004276282 0.004276282 0.0091353054

## 6 0.007966717 0.007966717 0.0121507535

result1[,1]<-paste(result1[,1],c(1, 3, 5, 10, 15, 20))

temp_result1<-result1[,c(1,6,7)]

f<-function(p,r){

  return(2*p*r)/(p+r)

}

result1_f<-cbind(result1,f=f(temp_result1[,2],temp_result1[,3]))

head(result1_f)

##               .id         TP        FP       FN       TN  precision

## 1  random items 1 0.00000000  1.000000 47.75789 1605.242 0.00000000

## 2  random items 3 0.05263158  2.947368 47.70526 1603.295 0.01754386

## 3  random items 5 0.09473684  4.905263 47.66316 1601.337 0.01894737

## 4 random items 10 0.23157895  9.768421 47.52632 1596.474 0.02315789

## 5 random items 15 0.32631579 14.673684 47.43158 1591.568 0.02175439

## 6 random items 20 0.48421053 19.515789 47.27368 1586.726 0.02421053

##        recall         TPR          FPR            f

## 1 0.000000000 0.000000000 0.0006231881 0.000000e+00

## 2 0.000420633 0.000420633 0.0018345901 1.475905e-05

## 3 0.001343461 0.001343461 0.0030535159 5.091011e-05

## 4 0.002965187 0.002965187 0.0060813035 1.373350e-04

## 5 0.004276282 0.004276282 0.0091353054 1.860558e-04

## 6 0.007966717 0.007966717 0.0121507535 3.857568e-04

head(result1_f[order(-result1_f$f),])

##                 .id       TP        FP       FN       TN precision

## 18 user-based CF 20 6.094737 12.273684 41.66316 1593.968 0.3381538

## 17 user-based CF 15 4.978947  8.915789 42.77895 1597.326 0.3629917

## 16 user-based CF 10 3.684211  5.684211 44.07368 1600.558 0.3948758

## 12 popular items 20 5.368421 14.631579 42.38947 1591.611 0.2684211

## 11 popular items 15 4.421053 10.578947 43.33684 1595.663 0.2947368

## 15  user-based CF 5 2.157895  2.610526 45.60000 1603.632 0.4532609

##        recall        TPR         FPR          f

## 18 0.16566075 0.16566075 0.007591384 0.11203762

## 17 0.13829264 0.13829264 0.005510458 0.10039817

## 16 0.10324963 0.10324963 0.003508480 0.08154156

## 12 0.12821289 0.12821289 0.009037106 0.06883008

## 11 0.11281484 0.11281484 0.006530564 0.06650138

## 15 0.06796729 0.06796729 0.001609646 0.06161383

#带入模型

moive_re<-Recommender(MovieLense,method="UBCF")

moives_pr<-predict(moive_re,MovieLense,n=20)

print(as(moives_pr,"list"))

## [[1]]

##  [1] "Glory (1989)"

##  [2] "Schindler's List (1993)"

##  [3] "Casablanca (1942)"

##  [4] "Close Shave, A (1995)"

##  [5] "Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963)"

##  [6] "Leaving Las Vegas (1995)"

##  [7] "One Flew Over the Cuckoo's Nest (1975)"

##  [8] "Rear Window (1954)"

##  [9] "Heathers (1989)"

## [10] "L.A. Confidential (1997)"

## [11] "City of Lost Children, The (1995)"

## [12] "Butch Cassidy and the Sundance Kid (1969)"

## [13] "Titanic (1997)"

## [14] "Lawrence of Arabia (1962)"

## [15] "Shine (1996)"

## [16] "Stand by Me (1986)"

## [17] "Gandhi (1982)"

## [18] "To Kill a Mockingbird (1962)"

## [19] "In the Name of the Father (1993)"

## [20] "Harold and Maude (1971)"

##

## [[2]]

##  [1] "Boot, Das (1981)"

##  [2] "Dead Man Walking (1995)"

##  [3] "Lone Star (1996)"

##  [4] "Return of the Jedi (1983)"

##  [5] "Celluloid Closet, The (1995)"

##  [6] "Casablanca (1942)"

##  [7] "Citizen Kane (1941)"

##  [8] "Godfather: Part II, The (1974)"

##  [9] "2001: A Space Odyssey (1968)"

## [10] "When We Were Kings (1996)"

## [11] "Diva (1981)"

## [12] "Close Shave, A (1995)"

## [13] "Tango Lesson, The (1997)"

## [14] "Beautiful Thing (1996)"

## [15] "Empire Strikes Back, The (1980)"

## [16] "Mrs. Dalloway (1997)"

## [17] "Butch Cassidy and the Sundance Kid (1969)"

## [18] "My Fair Lady (1964)"

## [19] "Bonnie and Clyde (1967)"

## [20] "Annie Hall (1977)"

##

## [[3]]

##  [1] "Mrs. Brown (Her Majesty, Mrs. Brown) (1997)"

##  [2] "Star Wars (1977)"

##  [3] "Pulp Fiction (1994)"

##  [4] "English Patient, The (1996)"

##  [5] "Full Monty, The (1997)"

##  [6] "Lone Star (1996)"

##  [7] "Titanic (1997)"

##  [8] "Sweet Hereafter, The (1997)"

##  [9] "In the Company of Men (1997)"

## [10] "Willy Wonka and the Chocolate Factory (1971)"

## [11] "In & Out (1997)"

## [12] "Vertigo (1958)"

## [13] "As Good As It Gets (1997)"

## [14] "Apt Pupil (1998)"

## [15] "Dazed and Confused (1993)"

## [16] "Ice Storm, The (1997)"

## [17] "This Is Spinal Tap (1984)"

## [18] "Trainspotting (1996)"

## [19] "Heat (1995)"

## [20] "Fargo (1996)"

##

## [[4]]

##  [1] "Titanic (1997)"               "English Patient, The (1996)"

##  [3] "L.A. Confidential (1997)"     "Game, The (1997)"

##  [5] "Good Will Hunting (1997)"     "Kiss the Girls (1997)"

##  [7] "Full Monty, The (1997)"       "Usual Suspects, The (1995)"

##  [9] "Rosewood (1997)"              "Boogie Nights (1997)"

## [11] "Raise the Red Lantern (1991)" "Pulp Fiction (1994)"

## [13] "Toy Story (1995)"             "Love Jones (1997)"

## [15] "Eve's Bayou (1997)"           "Edge, The (1997)"

## [17] "Sting, The (1973)"            "Some Like It Hot (1959)"

## [19] "Strictly Ballroom (1992)"     "Soul Food (1997)"

##

## [[5]]

##  [1] "Terminator 2: Judgment Day (1991)"

##  [2] "Terminator, The (1984)"

##  [3] "Usual Suspects, The (1995)"

##  [4] "Contact (1997)"

##  [5] "Braveheart (1995)"

##  [6] "Casablanca (1942)"

##  [7] "Twelve Monkeys (1995)"

##  [8] "Godfather, The (1972)"

##  [9] "Shawshank Redemption, The (1994)"

## [10] "Raising Arizona (1987)"

## [11] "Amadeus (1984)"

## [12] "Nikita (La Femme Nikita) (1990)"

## [13] "Reservoir Dogs (1992)"

## [14] "Citizen Kane (1941)"

## [15] "Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963)"

## [16] "Schindler's List (1993)"

## [17] "Titanic (1997)"

## [18] "Leaving Las Vegas (1995)"

## [19] "North by Northwest (1959)"

## [20] "Army of Darkness (1993)"

#S4 class 是一种标准的R语言面向对象实现方式，s4对象有明确的类定义，参数定义，参数检查，继承关系，实例化等面向对象系统的特征。

R语言推荐算法 recommenderlab包的更多相关文章

R语言中的机器学习包
R语言中的机器学习包 Machine Learning & Statistical Learning (机器学习 & 统计学习) 网址:http://cran.r-project ...
R语言中的数据处理包dplyr、tidyr笔记
R语言中的数据处理包dplyr.tidyr笔记 dplyr包是Hadley Wickham的新作,主要用于数据清洗和整理,该包专注dataframe数据格式,从而大幅提高了数据处理速度,并且提供了 ...
r语言，安装外部包警告: 无法将临时安装
安装R语言中的外部包时,出现错误提示试开URL’https://mirrors.tuna.tsinghua.edu.cn/CRAN/bin/windows/contrib/3.3/ggplot2_2 ...
R语言分类算法之随机森林
R语言分类算法之随机森林 1.原理分析: 随机森林是通过自助法(boot-strap)重采样技术,从原始训练样本集N中有放回地重复随机抽取k个样本生成新的训练集样本集合,然后根据自助样本集生成k个决策 ...
R语言—如何安装Github包的解决方法，亲测有效
R语言—如何安装Github包的解决方法,亲测有效准备安装材料: R包-REmap GitHub下载地址:https://github.com/lchiffon/REmap R包-baidumap ...
R语言：关于rJava包的安装
R语言:关于rJava包的安装盐池里的萝卜 2014-09-14 00:53:33 在做文本挖掘的时候,会发现分词时候rJava是必须要迈过去的坎儿,所以进行了总结: 第一步:安装rJava和jd ...
R语言机器学习之caret包运用
在大数据如火如荼的时候,机器学习无疑成为了炙手可热的工具,机器学习是计算机科学和统计学的交叉学科, 旨在通过收集和分析数据的基础上,建立一系列的算法,模型对实际问题进行预测或分类. R语言无疑为我们提 ...
R语言之数据处理常用包
dplyr包是Hadley Wickham的新作,主要用于数据清洗和整理,该包专注dataframe数据格式,从而大幅提高了数据处理速度,并且提供了与其它数据库的接口:tidyr包的作者是Hadley ...
R语言神经网络算法
人工神经网络(ANN),简称神经网络,是一种模仿生物神经网络的结构和功能的数学模型或计算模型.神经网络由大量的人工神经元联结进行计算.大多数情况下人工神经网络能在外界信息的基础上改变内部结构,是一种自 ...

随机推荐

Capture
1.导出Logical symbol 单个元件导出放入指定库:左键选中元件→右键“Edit Parts”→View“Package”→file“Save As”→找到要存放的库. 从某个已经设计好的原 ...
SSH使用密钥登录并禁止口令登录实践
生成PublicKey Linux:ssh-keygen -t rsa[私钥 (id_rsa) 与公钥 (id_rsa.pub)]Windows:SecurCRT/Xshell/PuTTY[SSH-2 ...
soapUI使用-DataSource获取oracle库中的参数
soapUI使用-DataSource获取oracle库中的参数下载mysql和oracle驱动包:http://pan.baidu.com/s/1i3sy1MH 放在Program Files\S ...
java 图像灰度化与二值化
转载:http://www.chinasb.org/archives/2013/01/5053.shtml 1: package org.chinasb.client; 2: 3: import ja ...
高性能JavaScript笔记二（算法和流程控制、快速响应用户界面、Ajax）
循环在javaScript中的四种循环中(for.for-in.while.do-while),只有for-in循环比其它几种明显要慢,另外三种速度区别不大有一点需要注意的是,javascript ...
什么是QName
看代码时经常碰到QName,当时对这个东东具体什么意思也是不太明白:今天在看SOAP消息的时候,想到这个东东,就去仔细看了下.QName其实就是Qualified Name的简称,在"Nam ...
Setting up Django and your web server with uWSGI and nginx
https://uwsgi.readthedocs.io/en/latest/tutorials/Django_and_nginx.html Setting up Django and your we ...
css007 margin padding border
css007 margin padding border 1.理解盒模型(盒模型:就是把一些东西,包括html各种标签都包含在一个看不见的盒子里) 1/在web浏览器中任何标签都是一个盒子,内容的周 ...
CentOS编译安装NodeJS+Express
NodeJS是基于Chrome’s Javascript runtime,也就是Google V8引擎执行Javascript的快速构建网络服务及应用的平台,其优点有: 在CentOS编译安装Node ...
Win8.1微软官方最终正式版ISO镜像文件
Win8.1微软官方最终正式版ISO镜像文件经过预览版,测试版.开发版本等几个乱七八糟的版本后,2013年10月17日,微软终于如约的发布了Win8.1最终正式版. Win8.1和win8的区别 1 ...

R语言 推荐算法 recommenderlab包

R语言 推荐算法 recommenderlab包的更多相关文章

随机推荐

热门专题

R语言推荐算法 recommenderlab包

R语言推荐算法 recommenderlab包的更多相关文章