library(randomForest)
library(caret)
library(ROCR)
cb <- read.delim("../1022_Decision Tree_2/Hshopping.txt", stringsAsFactors=FALSE)
colnames(cb) <- c("ID","SEX","AGE","AMT","STAR","REFUND") # Jupyter note Font Error using Korean
cb$REFUND <- factor(cb$REFUND)
set.seed(1)
inTrain <- createDataPartition(y=cb$REFUND, p=0.6, list=FALSE)
cb.train <- cb[inTrain,]
cb.test <- cb[-inTrain,]
nrow(cb.train)
nrow(cb)
set.seed(123)
rf_model <- randomForest(REFUND ~. -ID, data=cb.train, ntree = 50, mtry=2)
rf_model
options(repr.plot.width=5, repr.plot.height=4)
plot(rf_model)
importance(rf_model) #중요도 퍼센트로 표현.
varImpPlot(rf_model)
cb.test$rf_pred <- predict(rf_model, cb.test, type="response")
confusionMatrix(cb.test$rf_pred, cb.test$REFUND)
cb.test$rf_pred_prob <- predict(rf_model, cb.test, type="prob")
rf_pred <- prediction(cb.test$rf_pred_prob[,2],cb.test$REFUND)
rf_model.perf1 <- performance(rf_pred, "tpr", "fpr") # ROC-chart
rf_model.perf2 <- performance(rf_pred, "lift", "rpp")
options(repr.plot.width=8, repr.plot.height=4)
par(mfrow=c(1,2))
plot(rf_model.perf1, colorize=TRUE); abline(a=0, b=1, lty=3)
plot(rf_model.perf2, colorize=TRUE); abline(v=0.4, lty=3)
performance(rf_pred, "auc")@y.values[[1]]