library(plot3Drgl)
library(nycflights13)
options(repr.plot.height=3)
points3D(iris$Sepal.Length,iris$Sepal.Width,iris$Petal.Length)
scatter3Drgl(iris$Sepal.Length,iris$Sepal.Width,iris$Petal.Length)
library(tidyverse)
library(forcats)
library(lubridate)
month_levels <- c(
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
)
x1 <- c("Dec", "Apr", "Jan", "Mar")
x2 <- c("Dec", "Apr", "Jam", "Mar")
y1 <- factor(x1, levels = month_levels)
y1
ymd_hms("2017-01-31 20:11:59")
flights %>% select(year,month,day,hour, minute) %>%
mutate(departure = make_datetime(year,month,day,hour,minute)) %>% head(3)
make_datetime_100 <- function(year, month, day, time) {
make_datetime(year, month, day, time %/% 100, time %% 100) # %/% 몫, %%나머지
}
flights_dt <- flights %>%
filter(!is.na(dep_time), !is.na(arr_time)) %>%
mutate(
dep_time = make_datetime_100(year, month, day, dep_time),
arr_time = make_datetime_100(year, month, day, arr_time),
sched_dep_time = make_datetime_100(year, month, day, sched_dep_time),
sched_arr_time = make_datetime_100(year, month, day, sched_arr_time)
) %>%
select(origin, dest, ends_with("delay"), ends_with("time"))
head(flights_dt)
flights_dt %>%
ggplot(aes(dep_time)) +
geom_freqpoly(binwidth = 86400) # 86400 seconds = 1 day
flights_dt %>%
ggplot(aes(dep_time)) +
geom_histogram(binwidth = 86400) # 86400 seconds = 1 day 보기 어렵다. 너무 백빽함.
flights_dt %>%
filter(dep_time < ymd(20130102)) %>% # filter 날짜.
ggplot(aes(dep_time)) +
geom_freqpoly(binwidth = 600) # 600 s = 10 minutes
datetime <- today()
year(datetime)
month(datetime)
mday(datetime)
yday(datetime)
wday(datetime)
wday(datetime, label = TRUE, abbr = FALSE)
data.frame(year=year(datetime),mon = month(datetime),mday = mday(datetime),yday = yday(datetime),wday = wday(datetime),wday_label = wday(datetime, label = TRUE, abbr = FALSE))
flights_dt %>%
mutate(wday = wday(dep_time, label = TRUE)) %>% # 요일별 Count
ggplot(aes(x = wday)) +
geom_bar()
flights_dt %>%
mutate(minute = minute(dep_time)) %>%
group_by(minute) %>%
summarise( avg_delay = mean(arr_delay, na.rm = TRUE),n = n()) %>%
ggplot(aes(minute, avg_delay)) + geom_line()
flights_dt %>%
count(week = floor_date(dep_time, "week")) %>%
ggplot(aes(week, n)) +
geom_line()
library(modelr)
head(sim1) # 예제 데이터.
ggplot(sim1) +geom_point(aes(x,y))
models <- tibble(
a1 = runif(250, -20, 40), # 난수를 생성, 250개를 -20~40까지. runif 일정구간에서 나올 확률이 동일.
a2 = runif(250, -5, 5)
)
head(models)
ggplot(sim1, aes(x,y)) +
geom_abline(aes(intercept=a1, slope=a2),data=models, alpha =.25) + # abline은 절편과 기울기를 주면 해당 그림을 그린다.
geom_point()
model1 <- function(a, data) {
a[1] + data$x * a[2]
}
model1(c(7, 1.5), sim1)
measure_distance <- function(mod, data) {
diff <- data$y - model1(mod, data)
sqrt(mean(diff ^ 2))
}
measure_distance(c(7,1.5), sim1) # RMSE => Route Mean of Square Errors = SD
# 절편 : 7, 기울기:1.5
measure_distance(c(mean(sim1$y),0), sim1) # 표준편차
sd(sim1$y)
sim1_dist <- function(a1, a2) {
measure_distance(c(a1, a2), sim1)
}
map_dbl(c(1,2,3),sqrt) # 각각에 적요해서 Vector로 출력
map2_dbl(c(1,2,3),c(4,5,6),function(x,y){x+y}) #인자를 2개를 받는다. 짝을 지어서
models <- models %>%
mutate(dist = purrr::map2_dbl(a1, a2, sim1_dist))
head(models)
ggplot(sim1, aes(x, y)) +
geom_point(size = 2, colour = "grey30") +
geom_abline(
aes(intercept = a1, slope = a2, colour = -dist), # 거리가 가까울수록 연한 그래프
data = filter(models, rank(dist) <= 10)
)
ggplot(sim1, aes(x, y)) +
geom_point(size = 2, colour = "grey30") +
geom_abline(
aes(intercept = a1, slope = a2, colour = -dist),
data = models %>% arrange(dist) %>% head(10)
)
ggplot(models) +
geom_point(aes(a1,a2, color = -dist)) # 색이 연할 수록 거리가 가깝다. 즉, 그래프가 그쪽으로 간다.
best <- optim(c(0,0), measure_distance,data=sim1) # c(0,0) :Initial values for the parameters to be optimized over.
best$par
ggplot(sim1, aes(x, y)) +
geom_point(size = 2, colour = "grey30") +
geom_abline(intercept = best$par[1], slope = best$par[2]) # 기울기가 best$par[2] 절편 : best$par[1]
lm(y~x, sim1)
names(diamonds)
model <- lm(price ~ carat, diamonds)
summary(model)
lm(price ~ 0 + carat, diamonds) %>% summary() # intercept 가 사라진다. / 절편 생략.
lm(price ~ carat + z, diamonds) %>% summary() # 케럿이 커지면서 z가 줄어야된다.
ggplot(diamonds) + geom_point(aes(carat,price), alpha=.02)
lm(price ~ carat ^2, diamonds) %>% summary() # 적용이 안된다.
lm(price ~ I(carat^2) + carat + z, diamonds) %>% summary() # I()를 사용하게 된다면 적용된다.
lm(price ~ carat + z + carat:z, diamonds) %>% summary() # 다변량 시간에 배운것 같다. 이제 이해가간다. 다시 보자
lm(price ~ carat * z, diamonds) %>% summary()