<
library(C50)
library(caret)
library(ROCR)
cb <- read.delim("Hshopping.txt",stringsAsFactors = F)
cb$반품여부 <- as.factor(cb$반품여부)
colnames(cb) <- c("ID","SEX","AGE","AMT","APP","REFUND")
head(cb)
set.seed(1) # seed를 고정해야 동일한 샘플링을 가질 수 있다.
inTrain <- createDataPartition(y=cb$REFUND,p=0.6,list=F)
head(inTrain)
cb.train <- cb[inTrain,]
cb.test <- cb[-inTrain,]
dim(cb.train); dim(cb.test)
Winnowing
Pruning severity
Global Pruning
c5_options <- C5.0Control(winnow = FALSE, noGlobalPruning = FALSE)
c5_model <- C5.0(REFUND ~ SEX+AGE+AMT+APP, data=cb.train,control=c5_options, rules=FALSE)
summary(c5_model)
options(repr.plot.height=5)
plot(c5_model)
< <
c5_model_2 <- C5.0(REFUND ~ SEX+AGE+AMT+APP, data=cb.train,control=c5_options, rules=T)
summary(c5_model_2)
c5_model_3 <- C5.0(REFUND ~ SEX+AGE+AMT+APP, data=cb.train,control=c5_options, rules=F, trials=3)
summary(c5_model_3)
c5_options_2 <- C5.0Control(winnow = F, noGlobalPruning = F, CF=0.7)
c5_model_4 <- C5.0(REFUND ~ SEX+AGE+AMT+APP, data=cb.train,control=c5_options_2, rules=FALSE)
summary(c5_model_4)
< < < < < <
cb.test$c5_pred <- predict(c5_model,cb.test,type="class")
cb.test$c5_pred_prob <- round(predict(c5_model,cb.test,type="prob"),2)
confusionMatrix(cb.test$c5_pred, cb.test$REFUND)
c5_pred <- prediction(cb.test$c5_pred_prob[,2], cb.test$REFUND)
c5_model.perf1 <- performance(c5_pred,"tpr","fpr") # Roc curve
c5_model.perf2 <- performance(c5_pred,"lift","rpp") # Lift chart
par(mfrow=c(1,2))
plot(c5_model.perf1,colorize=T)
plot(c5_model.perf2,colorize=T)
performance(c5_pred,"auc")@y.values[[1]]
library(Epi)
ROC(form=cb.test$REFUND~c5_pred_prob[,2], data=cb.test, plot="ROC")