install.packages("dplyr", repos = "http://cran.us.r-project.org")
library(dplyr)
a <- 1:5
b <- c(3.3,4,2.3,2.2,3.1)
c <- c(2,4,0,1,1.2)
d <- c(3.7,4,3.3,3.3,3.9)
exam1 <- data.frame(ID=a,Exam1=b,Exam2=c,Quiz=d)
exam1
write.csv(exam1,"exam1.csv")
filter(exam1, Exam2 >= 1 & Quiz < 3.9)
filter(exam1, Exam2 >=1, Quiz < 3.9)
mean(exam1$Exam1)
mean(exam1$Exam2)
filter(exam1, Exam1 >= mean(Exam1), Exam2 >= mean(Exam2))
app <- c(6,3.5,1.5,3.5)
rbind(exam1,app)
app <- c(6,1)
rbind(exam1,app)
arrange(exam1, Quiz) # arrange(exam1, desc(Quiz)) 내림차순
# Quiz와 Exam1 순서로 오름차순으로 정렬한다면?
arrange(exam1, Quiz,Exam1)
sample_n(exam1, 3) # Random sampling with a fixed number
sample_frac(exam1, 0.4) # Random sampling with a fixed fraction
exam1[as.logical((1:nrow(exam1))%%2),] # 1-in-n sampling
exam1$Gender <- c("남","여","남","여","남")
write.csv(exam1,"exam3.csv")
by_gender = group_by(exam1, Gender)
summarise(by_gender, exam1=mean(Exam1), quiz=median(Quiz))
exam1 %>% group_by(Gender) %>% summarise_each(funs(min,max), Exam1, Exam2, Quiz)
filter(exam1,!duplicated(Quiz))
distinct(exam1, Quiz) # return unique values
exam1 <- mutate(exam1, ExamSum=Exam1+Exam2, ExamMean=ExamSum/2)
exam1
select(exam1, ID:Exam2)
exam1 <- select(exam1, -ExamSum, -ExamMean)
exam1
rename(exam1, id=ID, quiz = Quiz, ex1=Exam1, ex2=Exam2)
select(exam1, ID, Quiz, Exam1:Exam2)
exam1$Extra <- c(1, 1, NA, NA, 2)
exam1
exam1$Extra[is.na(exam1$Extra)] <- 0
exam1
a <- 1:5
b <- c(3.1,4,2.3,5.2,2.1)
c <- c(2,4,0,1,1.2)
d <- c(1.5,2,3.6,6.3,2.9)
exam2 <- data.frame(CID=a,Exam3=b,Exam4=c,FinalExam=d)
merge(exam1, exam2, by.x="ID", by.y="CID")
t(exam1)
tapply(exam1$Quiz,exam1$Gender,sum) # 범주형 변수에만 적용이 가능하다. apply는 Matrix에 적용.
a <- c(1,1,1,1,2,2,2)
b <- c('a','b','c','a','a','b','b')
c <- 1:7
d <- 7:1
tr <- data.frame(id=a,site=b,pageview=c,dwelltime=d)
tr
library(reshape)
tr.melt <- melt(tr, id.vars=c("id","site"),measure.vars=c("pageview","dwelltime")) # id.vars의 값들을 기준으로 measure.vars 값들을 펼침.
tr.melt
cast(tr.melt, id ~ site, sum, subset=variable=="pageview")
cast(tr.melt, id+site~variable, length)
cast(tr.melt, id ~ variable, mean, subset=variable=="pageview")
exam1 <- read.csv("exam1.csv")
exam1 <-mutate(exam1, ExamSum=Exam1+Exam2)
exam1$Level <-cut(exam1$ExamSum,breaks=3,labels=F)
exam1
exam1$Level <-cut(exam1$ExamSum,c(0,2,4,6,8),labels=F)
exam1
cs <- read.table("dataCustomers.tab", sep="\t", header = T, stringsAsFactors = F)
tr <- read.table("dataTransactions.tab", sep="\t", header = T, stringsAsFactors = F)
head(cs,3)
head(tr,3)
filter(cs, age >= 50 & age <= 59 & gender == "여" & marriage =="기혼") %>% tail()
cs %>% group_by(gender) %>% summarize(Cnt = n()) #남녀수
cs %>% group_by(gender) %>% summarize(age = mean(age)) #남녀 평균 나이
#group_by(cs,gender) %>% summarize(age=mean(age))
head(distinct(cs,residence),3)
head(unique(cs$residence),3)
head(tr,3)
tr %>% group_by(store,import) %>% summarise(Cnt=n())
tmp <- merge(cs,tr)
head(tmp,3)
tmp %>% group_by(gender) %>% summarise_each(funs(min,median,max), amount)
tr %>% group_by(custid) %>% summarise(amount = sum(amount)) %>% arrange(desc(amount)) %>% head(3)