install.packages("lubridate", repos = "http://cran.us.r-project.org")
library(dplyr)
library(lubridate)
library(ggplot2)
cs <- read.delim("H(TSV)/HDS_Customers.tab",stringsAsFactors = F)
tr <- read.delim("H(TSV)/HDS_Transactions_MG.tab", stringsAsFactors = F)
str(tr)
cs.v1 <- tr %>% filter(net_amt < 0) %>% group_by(custid) %>% summarize(rf_amt = sum(net_amt),rf_cnt=n())
head(cs.v1[order(cs.v1$rf_amt),])
cs.v2 <- tr %>% distinct(custid,brd_nm) %>% group_by(custid) %>% summarize(buy_brd=n())
head(cs.v2)
start_date = ymd(ymd_hms(min(tr$sales_date)))
end_date = ymd(ymd_hms(max(tr$sales_date)))
start_date
end_date
cs.v3 <- tr %>% distinct(custid,sales_date) %>% group_by(custid) %>% summarize(visits=n()) %>% mutate(API = as.integer(end_date - start_date)/visits)
head(cs.v3)
tmp <- tr %>% group_by(custid) %>% summarize(n=n())
cs.v4 <- inner_join(cs.v3, tmp) %>% mutate(NPPV = n/visits) %>% select(custid,NPPV)
head(cs.v4)
tmp <- tr %>% mutate(wk_amt = ifelse(wday(sales_date) %in% 2:6 ,net_amt,0), we_amt = ifelse(wday(sales_date) %in% c(1,7),net_amt,0)) %>% group_by(custid)
tmp <- tmp %>% summarise_each(funs(sum),wk_amt,we_amt)
cs.v5 <- tmp %>% mutate(wk_pat = ifelse(wk_amt>=we_amt*1.5,"주중형",ifelse(we_amt>=wk_amt*1.5,"주말형","유형없음")))
head(cs.v5)
cs.v5_2 <- tmp %>% mutate(wk_pat = ifelse(wk_amt>=we_amt*1.5,"Weekdays",ifelse(we_amt>=wk_amt*1.5,"Weekends","No Type")))
options(repr.plot.width = 5, repr.plot.height=4)
ggplot(cs.v5_2, aes(wk_pat)) + geom_bar(aes(fill=wk_pat))
cs.v6<-cs %>%
mutate(age=year('2001-05-01') -year(ymd_hms(birth))) %>%
mutate(age=ifelse(age < 10 | age > 100, NA, age)) %>%
mutate(age=ifelse(is.na(age),round(mean(age,na.rm=T)),age)) %>%
mutate(agegrp=cut(age, c(0,19,29,39,49,59,69,100), labels=F)*10) %>%
select(custid, age, agegrp)
head(cs.v6)
cs.v6_2 <-cs%>% # 위와동일한결과를얻는다른표현
mutate(age=year('2001-05-01') -year(ymd_hms(birth)),
age=ifelse(age < 10 | age > 100, NA, age),
age=ifelse(is.na(age),round(mean(age,na.rm=T)),age),
agegrp=cut(age, c(0,19,29,39,49,59,69,100), labels=F)*10) %>%
select(custid, age, agegrp)
head(cs.v6_2)
end_date <- ymd(ymd_hms(max(tr$sales_date)))
start_date <- ymd('20010501') - months(12)
end_date
start_date
cs.v7.12 <-tr%>%
filter(start_date<=sales_date& sales_date<=end_date) %>%
group_by(custid) %>%
summarize(amt12=sum(net_amt), nop12=n())
start_date<-ymd('20010501') -months(6)
cs.v7.06 <-tr%>%
filter(start_date<=sales_date& sales_date<=end_date) %>%
group_by(custid) %>%
summarize(amt6=sum(net_amt), nop6=n())
start_date<-ymd('20010501') -months(3)
cs.v7.03 <-tr%>%
filter(start_date<=sales_date& sales_date<=end_date) %>%
group_by(custid) %>%
summarize(amt3=sum(net_amt), nop3=n())
cs.v7 <- left_join(cs.v7.12, cs.v7.06) %>% left_join(cs.v7.03)
head(cs.v7)
summary(cs.v7)
cs.v7[is.na(cs.v7$amt6),]$amt6 <- 0
cs.v7[is.na(cs.v7$nop6),]$nop6 <- 0
cs.v7[is.na(cs.v7$amt3),]$amt3 <- 0
cs.v7[is.na(cs.v7$nop3),]$nop3 <- 0
summary(cs.v7)
custsig<-cs%>%
left_join(cs.v1) %>%
left_join(cs.v2) %>%
left_join(cs.v3) %>%
left_join(cs.v4) %>%
left_join(cs.v5) %>%
left_join(cs.v6) %>%
left_join(cs.v7)
summary(custsig)
custsig[is.na(custsig$rf_amt),]$rf_amt <- 0
custsig[is.na(custsig$rf_cnt),]$rf_cnt <- 0
summary(custsig)