给大厨写的R数据分析代码
###************************************** 新老客户统计 ***************************************###
dachu <- read.csv("D:\\Dasktop\\bigdata_game\\天池\\大厨\\qijiandiankehu.csv", header = T, encoding = "utf-8", colClasses = c("character", "Date"))
str(dachu)
head(dachu,20)
temp <- table(dachu$买家昵称)
plot(table(sort(temp))/length(temp))
#library(data.table)
#month(dachu$下单日期[nrow(dachu)]) min(dachu$下单日期)
max(dachu$下单日期) dachu$ym <- substr(dachu$下单日期, 1,7); head(dachu)
newcusts <- c()
oldcusts <- c()
ss <- sort(unique(dachu$ym))
#新客户满足一下两个条件:1)当月购买一次;2)之前无购买记录
#老客户满足一下两个条件之一:1)当月购买两次及以上;2)当月购买一次且之前有购买记录
for(i in 1:length(ss)){
#date1 = as.Date(paste(substr(kk, 1, 6), paste(as.integer(substr(kk, 7, 7))+1,"-01", sep = ""), sep = "")) if(i == 1){
date2 = as.Date(paste(ss[i+1], "-01", sep = ""))
now = dachu$买家昵称[dachu$下单日期 < date2]
temp = table(now)
uniq = unique(now)
newcusts = c(newcusts, sum(temp == 1))
oldcusts = c(oldcusts, sum(temp > 1))
}else if(i < length(ss)){
date1 = as.Date(paste(ss[i], "-01", sep = ""))
date2 = as.Date(paste(ss[i+1], "-01", sep = ""))
now = dachu$买家昵称[(dachu$下单日期 < date2) & (dachu$下单日期 >= date1)]
temp = table(now)
#old_now = names(temp)[temp>1]
new_now = names(temp)[temp==1]
temp2 = table(c(uniq, new_now))
newcusts = c(newcusts, (length(new_now) - sum(temp2 > 1)))
#oldcusts = c(oldcusts, (length(old_now) + sum(temp2 > 1)))
oldcusts = c(oldcusts, (length(temp) - length(new_now) + sum(temp2 > 1)))
#uniq = unique(c(uniq, old_now, new_now))
uniq = unique(c(uniq, names(temp))) }else{
date1 = as.Date(paste(ss[i], "-01", sep = ""))
now = dachu$买家昵称[dachu$下单日期 >= date1]
temp = table(now)
#old_now = names(temp)[temp>1]
new_now = names(temp)[temp==1]
temp2 = table(c(uniq, new_now))
newcusts = c(newcusts, (length(new_now) - sum(temp2 > 1)))
#oldcusts = c(oldcusts, (length(old_now) + sum(temp2 > 1)))
oldcusts = c(oldcusts, (length(temp) - length(new_now) + sum(temp2 > 1)))
#uniq = unique(c(uniq, old_now, new_now))
uniq = unique(c(uniq, names(temp)))
} }
newcusts
oldcusts
(newcusts1 = cbind(date=ss, newcusts))
(oldcusts1 = cbind(date=ss, oldcusts))
write.csv(newcusts1, "C:\\Users\\hasee\\Desktop\\newcusts.csv",quote = F)
write.csv(oldcusts1, "C:\\Users\\hasee\\Desktop\\oldcusts.csv",quote = F) #library(timeSeries)
win.graph()
opar <- par(no.readonly=TRUE)
par(lty=1, pch=1) #par("cex") 查看默认值
# plot.ts(ts(newcusts+oldcusts, start = c(2014, 3), frequency = 12),main="薏凡特月度新老客户购买数量变化趋势", col=1)
# lines(ts(newcusts, start = c(2014, 3), frequency = 12), col=2)
# lines(ts(oldcusts, start = c(2014, 3), frequency = 12), col=3)
time <- seq.Date(as.Date("2014/3/1"), by = "month", length = length(ss))
plot(time, newcusts+oldcusts, xlab="月份", ylab="客户数", main="薏凡特月度新老客户购买数量变化趋势",
type = "o", col=1)
# type画点/线, "p" for points, "l" for lines, "b" for both points and lines, "c" for empty points joined by lines,
# "o" for overplotted points and lines, "s" and "S" for stair steps and "h" for histogram-like vertical lines.
# Finally, "n" does not produce any points or lines.
# pch点型,
# cex点大小:
# lty线型:0=blank, 1=solid (default), 2=dashed, 3=dotted, 4=dotdash, 5=longdash, 6=twodash)
# lwd线宽
lines(time, newcusts, type = "o", col=2)
lines(time, oldcusts, type = "o", col=3)
legend("topright", c("总体客户", "新客户", "老客户"), col=1:3, lty=1, pch=1)
# “bottomright”, “bottom”, “bottomleft”, “left”, “topleft”, “top”, “topright”, “right”, “center”
par(opar)
#par(new=TRUE) ###************************************** 当月回购率 ***************************************###
# 月初统计购买一次的客户数,月末统计这部分人回购人数。
# 当月新进的客户且购买2次以上的不计入新客户
# 新客户可直接table=1的sum,但是当月回购的客户如何计算是难点。(可以unique内连接计数)
#数据导入
dachu <- read.csv("D:\\Dasktop\\bigdata_game\\天池\\大厨\\qijiandiankehu.csv", header = T, encoding = "utf-8", colClasses = c("character", "Date"))
str(dachu) #定义保存新客户回购数据
new_customer <- data.frame() min(dachu$下单时间)
max(dachu$下单时间)
ss=sort(unique(substr(dachu$下单时间,1,7))) #从第二个月开始,首月新客数和回购数均为0
for(i in seq(length(ss))[-1]){
data1 = as.Date(paste(ss[i], "-01", sep = "")) #月初之前客户购买记录
data2 = max(i-12,1)
data2 = as.Date(paste(ss[data2], "-01", sep = ""))
temp <- table(dachu$买家昵称[(dachu$下单时间 >= data2)&(dachu$下单时间 < data1)]) #月内客户购买记录
if(i < length(ss)){
data2 = as.Date(paste(ss[i+1], "-01", sep = ""))
temp2 <- table(dachu$买家昵称[(dachu$下单时间 >= data1)&(dachu$下单时间 < data2)])
}else{
temp2 <- table(dachu$买家昵称[dachu$下单时间 >= data1])
} #月内回购记录
temp2 = merge(data.frame(k=names(temp)[temp==1]),
data.frame(k=names(temp2)),
by=c('k')) #保存日期、月初新客数、月内回购数
new_customer = rbind(new_customer, data.frame(date=ss[i], counts=sum(temp==1), repurchase=nrow(temp2))) }
#计算回购率
new_customer$rate <- new_customer[[3]] / new_customer[[2]]
#colnames(new_customer) = c('date','counts','repurchase','rate') win.graph()
opar<-par(mfrow=c(2,2))
plot(new_customer$date,new_customer$counts)
plot(new_customer$date,new_customer$repurchase);plot(new_customer$date,new_customer$rate)
par(opar) write.csv(new_customer,"C:\\Users\\hasee\\Desktop\\new_customer.csv") ###****************************************** 季度转化率 ****************************************###
#数据导入
dachu <- read.csv("C:\\Users\\hasee\\Desktop\\qijiandiankehu.csv", header = T, encoding = "utf-8", colClasses = c("character", "Date"))
str(dachu) #定义保存新客户回购数据
new_customer <- data.frame() min(dachu$下单时间)
max(dachu$下单时间)
ss=sort(unique(substr(dachu$下单时间,1,7))) #从第二个月开始,首月新客数和回购数均为0
for(i in seq(length(ss)-2)[-1]){
data1 = as.Date(paste(ss[i], "-01", sep = "")) #季度初之前客户购买记录
temp <- table(dachu$买家昵称[dachu$下单时间 < data1]) #季度内客户购买记录
if(i < length(ss)-2){
data2 = as.Date(paste(ss[i+3], "-01", sep = ""))
temp2 <- table(dachu$买家昵称[(dachu$下单时间 >= data1)&(dachu$下单时间 < data2)])
}else{
temp2 <- table(dachu$买家昵称[dachu$下单时间 >= data1])
} #季度内回购记录
temp2 = merge(data.frame(k=names(temp)[temp==1]),
data.frame(k=names(temp2)),
by=c('k')) #保存日期、季度初新客数、月内回购数
new_customer = rbind(new_customer, data.frame(date=ss[i], counts=sum(temp==1), repurchase=nrow(temp2))) }
#计算回购率
new_customer$rate <- new_customer[[3]] / new_customer[[2]]
#colnames(new_customer) = c('date','counts','repurchase','rate') win.graph()
opar<-par(mfrow=c(2,2))
plot(new_customer$date,new_customer$counts)
plot(new_customer$date,new_customer$repurchase);plot(new_customer$date,new_customer$rate)
par(opar) write.csv(new_customer,"C:\\Users\\hasee\\Desktop\\new_customer.csv") ###************************************ 客户连带率:该段代码貌似有问题 ***********************************###
# 只针对所有一次客户
# 月连带率=本月发生连带的客户数/本月成交总客户数
# 产品连带率=购买该产品连带的客户数/购买该产品总体客户数
# 成交总客户=1次多件客户+一次一件客户
#数据导入
library(readxl)
# dachu <- read.csv("C:\\Users\\hasee\\Desktop\\liandailv.xlsx", header = T, encoding = "utf-8", colClasses = c("character", "Date", "character"))
# read_excel(path, sheet = 1, col_names = TRUE, col_types = NULL, na = "", skip = 0)
dachu <- read_excel("C:\\Users\\hasee\\Desktop\\liandailv.xlsx", sheet = 1, col_names = TRUE, col_types = c("text", "text", "text"), na = "", skip = 0)
dachu$下单日期 <- as.Date(dachu$下单日期)
str(dachu)
unique(dachu$商品ID) #定义保存月度连带率
min(dachu$下单日期)
max(dachu$下单日期)
month_set=sort(unique(substr(dachu$下单日期,1,7))) #月度连带率
month_associate_rate = data.frame()
date1 = min(dachu$下单日期)
for(i in seq(length(month_set))){
if(i < length(month_set)){
date2 = as.Date(paste(month_set[i+1], "-01", sep = ""))
temp <- table(dachu$买家昵称[(dachu$下单日期 >= date1)&(dachu$下单日期 < date2)])
date1 = date2
}else{
temp = table(dachu$买家昵称[dachu$下单日期 >= date1])
}
month_associate_rate = rbind(month_associate_rate, data.frame(month=month_set[i], count = length(temp), count2= sum(temp>1), rate=(sum(temp>1)/length(temp))))
}
month_associate_rate #产品连带率
dachu$flag <- 0
head(dachu)
temp = table(dachu$买家昵称)
# library(dplyr)
# temp2 = left_join(dachu, data.frame(x = names(temp)[temp>1], flag.y = 1), by= c("买家昵称" = "x"),suffix = c("", ".y"))
temp2 = merge(dachu, data.frame(x = names(temp)[temp>1], flag.x = 1), by.x = "买家昵称", by.y = "x", all.x = TRUE)
temp2$flag[temp2$flag.x==1] = 1
temp2$flag.x = NULL
temp2 #定义保存产品连带率
prod_set=unique(dachu$商品ID)
product_associate_rate = data.frame() #产品连带率
for(pi in prod_set){
temp <- temp2$flag[temp2$商品ID == pi]
product_associate_rate = rbind(product_associate_rate, data.frame(product=pi, count = length(temp), count2= sum(temp==1), rate=(sum(temp==1)/length(temp)))) } product_associate_rate = product_associate_rate[order(product_associate_rate$count, decreasing = TRUE),]
product_associate_rate$product = as.character(product_associate_rate$product)
head(product_associate_rate) #验证
dachu[dachu$买家昵称 %in% dachu[dachu$商品ID=="42303520877",]$买家昵称,] #产品连带率前五月度变化
#temp2为产品连带率里计算的那个
prod_set = product_associate_rate$product[1:5]
product_associate_rate_top5 = data.frame()
date1 = min(temp2$下单日期)
for(i in seq(length(month_set))){
if(i < length(month_set)){
date2 = as.Date(paste(month_set[i+1], "-01", sep = ""))
temp <- temp2[(temp2$下单日期 >= date1)&(temp2$下单日期 < date2),]
date1 = date2
}else{
temp = temp2[temp2$下单日期 >= date1,]
} temp3 = data.frame(month=month_set[i])
for(pi in prod_set){
temp4 = temp$flag[temp$商品ID==pi]
temp3 = cbind(temp3, length(temp4), sum(temp4==1), ifelse(length(temp4)==0,0,sum(temp4==1)/length(temp4)))
} product_associate_rate_top5 = rbind(product_associate_rate_top5, temp3)
}
colnames(product_associate_rate_top5)[-1] <- paste('top',rep(1:5,each=3),c('count','count2','rate'),sep = '')
product_associate_rate_top5 #图形展示
win.graph()
opar<-par(mfrow=c(1,2))
plot(month_associate_rate$month, month_associate_rate$rate, type="l", col = "blue", main = "月度连带率", xlab = "月份", ylab="连带率")
plot(product_associate_rate$rate, main = "产品连带率", xlab = "产品", ylab="连带率")
par(opar) write.csv(month_associate_rate,"C:\\Users\\hasee\\Desktop\\month_associate_rate.csv")
write.csv(product_associate_rate,"C:\\Users\\hasee\\Desktop\\product_associate_rate.csv") #, quote = TRUE
write.csv(product_associate_rate_top5,"C:\\Users\\hasee\\Desktop\\product_associate_rate_top5.csv") #, quote = TRUE # dplyr包包含了各种关联查询的函数,如inner_join,left_join,full_join,rigth_join......
library(dplyr)
library("nycflights13")
# Drop unimportant variables so it's easier to understand the join results.
flights2 <-
flights %>%
select(year:day,tailnum, carrier)
flights2 %>%
left_join(airlines,by= "carrier") #merge(data.frame(x=1:3,y=0,z=2),data.frame(x=2:3,y=1:2),by=c("x"),all.x = T) ###******************************************* 回购率与首次消费金额关系 ********************************************###
dachu <- read.csv("D:\\Dasktop\\bigdata_game\\天池\\大厨\\suoyoukehushuju.csv", header = T, encoding = "utf-8", colClasses = c("character", "Date", "numeric"))
str(dachu)
head(dachu,20) library(dplyr)
temp=head(dachu,20)
temp = head(arrange(dachu, 买家昵称, desc(下单时间)), 100);temp
#flights[order(flights$year, flights$month, flights$day), ]
#flights[order(desc(flights$arr_delay)), ]
#filter(group_by(temp, 买家昵称)) temp <- dachu%>%
arrange(买家昵称, 下单时间) %>%
group_by(买家昵称)%>%
mutate(count = n())%>%
slice(1)%>%
filter() win.graph()
opar<-par(mfrow=c(1,2))
#实付金额——购买次数分布图
plot(temp$实付金额, temp$count)
#实付金额——频数(人次)分布图
plot(table(temp$实付金额))
par(opar) #通过第一个图,暂且分组0-1000等距每200,1000-2000,2000以上
temp$group <- 0
temp[temp$实付金额 < 1000, ]$group <- temp[temp$实付金额 < 1000, ]$实付金额 %/% 100
temp[(temp$实付金额 >= 1000) & (temp$实付金额 < 2000), ]$group <- 10
temp[temp$实付金额 >= 2000, ]$group <- 11
head(temp,20)
temp2 <- temp%>%
group_by(group)%>%
summarise(n1=sum(count>1), n2=n(), rate = n1/n2) win.graph()
#各组回购率分布图
plot(temp2$group, temp2$rate) # i <- c("gamma","a")
# switch(i,
# beta = "You typed beta",
# alpha = "You typed alpha",
# gamma = "You typed gamma",
# delta = "You typed delta"
# ) ###******************************************* 客户联带对回购的影响 *******************************************###
t0 <- Sys.time()
dachu <- read.csv("D:\\Dasktop\\bigdata_game\\天池\\大厨\\AnalysisOrderDownLoad-订单信息-子订单(全量)-10027396-8025-107.csv",
header = T, encoding = "utf-8", colClasses = c(rep("character",4), rep("Date",3), rep("character",5), "integer","numeric","character",rep("numeric",2)))
str(dachu)
dachu <- dachu[,4:5]
head(dachu)
dachu$买家昵称 <- substr(dachu$买家昵称,3,nchar(dachu$买家昵称)-1)
head(dachu,20) library(dplyr)
#首单购买件数回购率
temp <- dachu %>%
group_by(买家昵称, 下单时间) %>%
summarise(count=n()) %>%
arrange(买家昵称, 下单时间) %>%
group_by(买家昵称) %>%
mutate(count2=n()) %>%
slice(1) %>%
group_by(count) %>%
mutate(n1 = n(), n2 = sum(count2>1), rate = n2/n1) %>%
slice(1) %>%
select(count, n1, n2, rate) temp win.graph()
plot(temp$count, temp$rate, main="首单购买件数与回购率", xlab = "首单购买件数",
ylab = "回购客户占比", col="red") #按月计算新客中回购客户占比
temp <- dachu %>%
group_by(买家昵称, 下单时间) %>%
summarise(count=n()) %>% #连带件数
mutate(year=as.integer(substr(下单时间,1,4)),
month=as.integer(substr(下单时间,6,7))) %>%
arrange(买家昵称, 下单时间) %>%
group_by(买家昵称) %>%
mutate(count2=n()) %>% #回购次数
slice(1) %>% #第一次出现(前面的按时间排序不可少)即为新客
group_by(year, month) %>%
mutate(n1 = n(), n2 = sum(count>1), rate = n2/n1) %>%
slice(1) %>%
select(下单时间, year, month, n1, n2, rate) temp
win.graph()
time <- seq.Date(as.Date(paste(substr(min(temp$下单时间),1,7), "-01", sep = "")),
by = "month", length = nrow(temp))
plot(time, temp$rate, main = "各月新客中连带客户占比", xlab = "月份",
ylab = "首单购买多件客户占比", type = "l") #按订单统计连带率(即购买多件订单与总订单之比)
temp <- dachu %>%
group_by(买家昵称, 下单时间) %>%
summarise(count=n()) sum(temp$count>1)/nrow(temp) Sys.time()-t0 ###############################################################################################################
#setwd("H:/数据分析/内部数据/薏凡特旗舰店数据/旗舰店客户数据分析/0803")
setwd("D:\\Dasktop\\bigdata_game\\天池\\大厨")
dat <- read.csv("kehushuju.csv",header=TRUE,encoding="utf-8",colClasses=c("character","Date","integer","numeric","integer"),stringsAsFactors = F)
dat <- arrange(dat, 买家昵称, 下单日期)
head(dat)
# new_dat<-unique(dat) #数据量多时,计算量很大,而且基本不会出现重复记录,所以可以省略
# head(new_dat)
library(dplyr) ##回购次数与回购概率
###
temp <- dat %>%
group_by(买家昵称)%>%
summarise(count=n())
head(temp)
rr1 <- c()
rr2 <- c()
rate <- c()
max_count <- max(temp$count)
for (i in 1:(max_count-1)){ ###可能会出错,rate分母=0
# rr1[i] <- summarise(filter(temp,count==i+1),n())
# rr2[i] <- summarise(filter(temp,count>=i),n())
# rate[i] <- summarise(filter(temp,count==i+1),n())/summarise(filter(temp,count>=i),n())
rr1 <- c(rr1, sum(temp$count == i+1)) #效率更高
rr2 <- c(rr2, sum(temp$count >= i))
rate <- c(rate, rr1[i]/rr2[i]) #避免重复计算
}
temp2<-filter(temp,count>=2)
head(temp2)
rrr<-cbind(rr1,rr2,rate) rrr
# write.csv(rrr,"H:/数据分析/内部数据/薏凡特旗舰店数据/旗舰店客户数据分析/0803/rrr.csv") #计算回购周期##### #添加购买次数列 new_dat2 <- select(dat, 买家昵称,下单日期, 下单时点)
# new_dat2<-data.frame(new_dat2) #已经是数据框结构,而且即便转换格式此处也不对,应该为:new_dat2<-as.data.frame(new_dat2)
# new_dat2<-unique(new_dat2)
# head(new_dat2) # temp2<-group_by(new_dat2,买家昵称)
# temp2<-summarise(temp2,count=n())
# temp2 <- new_dat2 %>%
# group_by(买家昵称) %>%
# summarise(count=n())
# head(temp2)
# count2<-unique(temp2$count)
#
# new_dat2$counts=0
# for(i in count2){
# rg<-temp[temp2$count==i,]$买家昵称;
# new_dat2[new_dat2$买家昵称 %in% rg,]$counts=i
#
# } new_dat2 <- merge(new_dat2, temp, by=c('买家昵称')) head(new_dat2)
# old_dat<-filter(new_dat2,counts>=2)
# old_dat<-arrange(old_dat,下单日期)
# old_dat <- new_dat2 %>% ##此处太慢,后面给出改进方法
# filter(count>=2) %>%
# arrange(下单日期)
# # old_dat<-unique(old_dat)
# head(old_dat)
# #max_count2<-max(old_dat$counts)
# #num<-c(1:max_count2)
# rebuy<-c()
# redays<-c()
# # t=1
# for(i in unique(old_dat$买家昵称) ){
# rg<-filter(old_dat,old_dat$买家昵称==i)
#
# for(j in 1:(rg$count[1]-1))
# {
# #t_diff <- rg$下单日期[j+1] - rg$下单日期[j]
# t_diff <- as.integer(rg$下单日期[j+1] - rg$下单日期[j])
# # rebuy[t]=j+1
# # redays[t]=t_diff
# # t=t+1
# rebuy = c(rebuy,j+1)
# redays = c(redays,t_diff)
# }
# }
#
# head(rebuy)
# head(redays)
# mydata<-data.frame(rebuy,redays)
# #write.csv(mydata,"H:/数据分析/内部数据/薏凡特旗舰店数据/旗舰店客户数据分析/0803/mydata.csv")
# head(mydata) ###各时点回购人数占比
#不考虑时间因素时
rate <- data.frame()
for(i in sort(unique(dat$下单时点))){
temp2 = new_dat2[new_dat2$下单时点 == i,]$count
rate = rbind(rate, c(i, sum(temp2>1)/length(temp2)))
}
colnames(rate) <- c("下单时点", "rate")
rate #考虑时间因素时
###如果考虑时间因素,则需加以下代码
new_dat3 <- arrange(new_dat2, 买家昵称, 下单日期) #最好加排序,防止出错
head(new_dat3, 50)
# for(i in temp$买家昵称){ #由于循环较大故运行时间较长
# new_dat3[new_dat3$买家昵称 == i,]$count <- 1:(temp[temp$买家昵称 == i,]$count)
# }
# head(new_dat3, 50) #改进后,此方法必须对数据先排序!!
# t0 <- Sys.time()
# i <- 1; nmax <- nrow(new_dat3)
# repeat{
# #m = i
# n = new_dat3[i,4]
# #ss = new_dat3[i,1]
# # repeat{
# # i <- i + 1
# # if((new_dat3[i,1] != ss) | (i > nmax)){
# # new_dat3[m:(i-1),4] <- 1:new_dat3[m,4]
# # break
# # }
# # }
# new_dat3[i:(i + n - 1),4] <- 1:n
# i = i+n
# if(i > nmax) break
# }
# Sys.time()-t0
#
# t0 <- Sys.time()
# i <- 1; nmax <- nrow(new_dat3)
# while(i <= nmax){
# #m = i
# n = new_dat3[i,4]
# #ss = new_dat3[i,1]
# # repeat{
# # i <- i + 1
# # if((new_dat3[i,1] != ss) | (i > nmax)){
# # new_dat3[m:(i-1),4] <- 1:new_dat3[m,4]
# # break
# # }
# # }
# new_dat3[i:(i + n - 1),4] <- 1:n
# i = i+n
# }
# Sys.time()-t0 t0 <- Sys.time()
for(i in sort(unique(temp$count))){ #必须加sort排序
df = (new_dat3$count == i)
new_dat3[df, 4] <- rep(1:i, sum(df)/i)
}
Sys.time()-t0
head(new_dat3, 50)
tail(new_dat3,50) #计算
rate2 <- data.frame(下单时点=c(), rate=c())
for(i in sort(unique(dat$下单时点))){
temp2 = new_dat3[new_dat3$下单时点 == i,]$count
rate2 = rbind(rate2, c(i, sum(temp2>1)/length(temp2)))
}
colnames(rate2) <- c("下单时点", "rate")
rate2 #改进方法
new_dat3$t_diff <- as.integer(new_dat3$下单日期 - c(new_dat3$下单日期[1], new_dat3$下单日期[-nrow(new_dat3)]))
head(new_dat3)
new_dat3$t_diff[new_dat3$count==1] <- 0
mydata <- new_dat3 %>%
select(count, t_diff) %>%
filter(count > 1) %>%
rename(rebuy = count, redays = t_diff)
head(mydata) plot(mydata) #各次购买5天内回购情况
new_dat3$m5 <- (new_dat3$t_diff <5)
new_dat3$m5[new_dat3$count == 1] <- 0 setwd("H:/数据分析/内部数据/薏凡特旗舰店数据/旗舰店客户数据分析/0803/自我研究")
dat<-read.csv("kehushuju.csv",header=T,encoding="utf-8",colClasses=c("character","Date","integer","numeric","integer"))
head(dat)
library(dplyr)
dat1<-arrange(dat,下单日期)
head(dat1)
m=5 #定义回购周期,M=5表示客户在5天内回购
counts<-c(rep(0,length(dat1[,2])))
t0<-Sys.time()
for(i in 1:length(dat1[,2])){
t_run<-dat1[,2][i]+m
goal_dat1<-filter(dat1,下单日期<=t_run)
if(length(filter(goal_dat1,goal_dat1$买家昵称==dat1[,1][i])[,1])>=2){
counts[i]<-1
}
}
tt<-Sys.time()-t0
head(counts)
end_dat5<-cbind(dat1,counts)
write.csv(end_dat5,"H:/数据分析/内部数据/薏凡特旗舰店数据/旗舰店客户数据分析/0803/自我研究/end_dat5.csv")
给大厨写的R数据分析代码的更多相关文章
- 机器学习十大算法总览(含Python3.X和R语言代码)
引言 一监督学习 二无监督学习 三强化学习 四通用机器学习算法列表 线性回归Linear Regression 逻辑回归Logistic Regression 决策树Decision Tree 支持向 ...
- R数据分析:跟随top期刊手把手教你做一个临床预测模型
临床预测模型也是大家比较感兴趣的,今天就带着大家看一篇临床预测模型的文章,并且用一个例子给大家过一遍做法. 这篇文章来自护理领域顶级期刊的文章,文章名在下面 Ballesta-Castillejos ...
- R数据分析:潜类别轨迹模型LCTM的做法,实例解析
最近看了好多潜类别轨迹latent class trajectory models的文章,发现这个方法和我之前常用的横断面数据的潜类别和潜剖面分析完全不是一个东西,做纵向轨迹的正宗流派还是这个方法,当 ...
- R数据分析:二分类因变量的混合效应,多水平logistics模型介绍
今天给大家写广义混合效应模型Generalised Linear Random Intercept Model的第一部分 ,混合效应logistics回归模型,这个和线性混合效应模型一样也有好几个叫法 ...
- R数据分析:如何简洁高效地展示统计结果
之前给大家写过一篇数据清洗的文章,解决的问题是你拿到原始数据后如何快速地对数据进行处理,处理到你基本上可以拿来分析的地步,其中介绍了如何选变量如何筛选个案,变量重新编码,如何去重,如何替换缺失值,如何 ...
- R数据分析:临床预测模型中校准曲线和DCA曲线的意义与做法
之前给大家写过一个临床预测模型:R数据分析:跟随top期刊手把手教你做一个临床预测模型,里面其实都是比较基础的模型判别能力discrimination的一些指标,那么今天就再进一步,给大家分享一些和临 ...
- ASP.NET 大文件下载的实现思路及代码
文件下载是一个网站最基本的功能,ASP.NET网站的文件下载功能实现也很简单,但是如果遇到大文件的下载而不做特殊处理的话,那将会出现不可预料的后果.本文就基于ASP.NET提供大文件下载的实现思路及代 ...
- python实现统计你一共写了多少行代码
程序员要保证一定的代码量就必须勤奋的敲代码,但怎么知道自己一共写了多少代码呢,笔者用python写了个简单的脚本,遍历所有的.java,.cpp,.c文件的行数,但是正如大家所知,java生成了许多代 ...
- 如何写出无法维护的代码(JAVA版)
程序命名(针对那些不能混淆的代码) 容易输入的名字.比如:Fred,asdf 单字母的变量名.比如:a,b,c, x,y,z,或者干脆上中文比如(阿隆索肯德基) 有创意地拼写错误.比如:SetPint ...
随机推荐
- inline-block间隙问题总结, ,style一个样式后面 多加了一个 分号; 导致 样式失效
1--- 样式最后的{}后面, 不能有分号 ; 2---- display:inline-block 后, 元素间会有间隙 原因: 由换行或者回车导致的. 解决一: 只要把标签写成一行或者标签 ...
- App自动更新(DownloadManager下载器)
一.开门见山 代码: object AppUpdateManager { const val APP_UPDATE_APK = "update.apk" private var b ...
- 雷林鹏分享:jQuery EasyUI 数据网格 - 创建自定义视图
jQuery EasyUI 数据网格 - 创建自定义视图 在不同的情况下,您可能需要为数据网格(datagrid)运用更灵活的布局.对于用户来说,卡片视图(Card View)是个不错的选择.这个工具 ...
- Lab 6-3
In this lab, we'll analyze the malware found in the file Lab06-03.exe. Questions and Short Answers C ...
- Django-2.1基础操作
创建项目 安装django pip3 install django #查看django版本 django-admin --version python -m django --version 2.1. ...
- 快速高效实现微信小程序图片上传与腾讯免费5G存储空间的使用
本文介绍了如何在微信小程序开发中使用腾讯官方提供的云开发功能快速实现图片的上传与存储,以及介绍云开发的 5G 存储空间的基本使用方法,这将大大提高微信小程序的开发效率 对于一般的图片上传功能开发,我们 ...
- 最全的测试用例(UI)
一.文本框为字符型 必填项非空校验: 1.必填项未输入--程序应提示错误: 2.必填项只输入若干个空格,未输入其它字符--程序应提示错误: 字段唯一性校验:(不是所有字段都作此项校 ...
- gps相关的知识
百度地图开放平台 高德开放平台
- DOM获取元素的方法
DOM:document object module 文档对象模型 DOM就是描述整个html页面中节点关系的图谱,如下图. 1,通过ID,获取页面中元素的方法:(上下文必须是document) do ...
- 运维自动化 第一章 git
一.git简单操作 4个地方: 工作区: 当前编辑的区域 缓存区: add 之后的区域 本地仓库: commit之后的区域 远程仓库 :远程的区域 简单操作: git init 初始化操作 比如我选定 ...