library(arules)
library(dplyr)
par(family ="NanumBarunGothic")
tr <- read.delim("dataTransactions.tab", stringsAsFactors=FALSE)
head(tr)
tr.filter <- tr %>%
filter(!(corner %in% c("일반식품","화장품"))) %>%
distinct(custid, corner)
head(tr.filter)
head(split(tr.filter$corner, tr.filter$custid))
# custid별로 corner를 자른다.
trans <- as(split(tr.filter$corner, tr.filter$custid), "transactions") #transactions 메소드
trans
# trans <- read.transactions("dataTransactions.tab", format = "single", sep="\t", cols = c(2,6), skip=1)
# 일반식품, 화장품 제거없이 하려면 위와 같이 해도 상관 없다.
a_matrix <- matrix(
c(1,1,1,0,0,
1,1,0,0,0,
1,1,0,1,0,
0,0,1,0,1,
1,1,0,1,1), ncol = 5)
dimnames(a_matrix) <- list(
c("a","b","c","d","e"),
paste("Tr",c(1:5), sep = ""))
a_matrix
trans2 <- as(a_matrix, "transactions")
trans2
inspect(trans2)
a_df <- data.frame(
age = as.factor(c(6,8,7,6,9,5)),
grade = as.factor(c(1,3,1,1,4,1)))
a_df
trans3 <- as(a_df, "transactions")
inspect(trans3)
options(repr.plot.width=4,repr.plot.height=3)
image(trans3)
inspect(trans[1:2])
transactionInfo(trans[size(trans) > 20])
image(trans[1:5])
options(repr.plot.width=4,repr.plot.height=5)
image(sample(trans, 100, replace = FALSE), main = "matrix diagram") # Sampling
t(itemFrequency(trans, type="absolute"))
table(tr.filter$corner)
t(round(itemFrequency(trans)[order(itemFrequency(trans), decreasing = TRUE)],2))
options(repr.plot.width=4,repr.plot.height=4)
itemFrequencyPlot(trans, support=0.2, cex.names=0.8,family = "HYsanB")
itemFrequencyPlot(trans, topN = 20, main = "support top 20 items")
rules <- apriori(trans, parameter=list(support=0.2, confidence=0.8))
summary(rules)
rule1 | rule2 | rule3 |
---|---|---|
2 | 3 | 4 |
1 | 40 | 29 |
a->b | a+b->c | a+b+c->d |
inspect(rules)
inspect(sort(rules, by = "lift")[1:30])
rules.target <- subset(rules, rhs %in% "스포츠" & lift > 1.4)
inspect(sort(rules.target, by="confidence"))
rule.interest <- subset(rules, items %in% c("장신구", "섬유"))
inspect(rule.interest[1:10])
write(rules.target, file="arules.csv", sep=",", row.name=F)
library(pmml) # 세계 표준 문서
write.PMML(rules.target, file = "arules.xml")
library(arulesViz)
plot(rules)
plot(sort(rules, by = "lift")[1:20], method = "grouped")
plot(rules, method = "graph", control = list(type="items"))
data <- read.delim("shoppingmall.txt", stringsAsFactors=FALSE)
head(data,3)
st <- as.matrix(data[,-1])
st[1:5,]
trans <- as(st, "transactions")
inspect(trans[1:20])
options(repr.plot.width=4,repr.plot.height=3)
image(trans[1:5])
inspect(trans[1:2])
options(repr.plot.width=4,repr.plot.height=7)
image(sample(trans, 100, replace = FALSE), main = "matrix diagram")
rules <- apriori(trans, parameter=list(support=0.01, confidence=0.8))
summary(rules)
inspect(sort(rules, by = "lift")[1:20])