11. arrangereorder a data frame
flights %>%
filter( hour < 8 ) %>%
arrange( year, month, day )
12. selectselect certain columns from the data frame
select(flights, year, month, day)
select(flights, year:day)
select(flights, -(year:day))
13. mutatemodify or create columns based on others
flights %>%
mutate(
gain = arr_delay - dep_delay,
speed = distance / air_time * 60
) %>%
filter( gain > 0 ) %>%
arrange( desc(speed) ) %>%
select( year, month, day, dest, gain, speed )
14. summarisecollapse a data frame into one row …
flights %>%
summarise(delay = mean(dep_delay, na.rm = TRUE))
flights %>%
filter( dep_delay > 0 ) %>%
summarise(arr_delay = mean(arr_delay, na.rm = TRUE))
15. group_byGroup observations by one or more variables
flights %>%
group_by( tailnum ) %>%
summarise(
count = n(),
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE)
) %>%
filter( is.finite(delay) ) %>%
arrange( desc(count) )
16. bind_rows
bind_rows( , )
color num
green 1
yellow 2
red 3
blue 4
pink 5
color num
green 1
yellow 2
color num
red 3
blue 4
pink 5
17. joins
a <- data_frame(
color = c("green", "yellow", "red"),
num = 1:3
)
b <- data_frame(
color = c("green", "yellow", "pink"),
size = c("S", "M", "L")
)
color num
green 1
yellow 2
red 3
color size
green S
yellow M
pink L
25. g <- c("origin", "dest")
v <- "dep_delay"
flights %>%
group_by( g ) %>%
summarise( result = mean(v, na.rm = TRUE) )
🙀
🙀
26. g <- c("origin", "dest")
v <- "dep_delay"
flights %>%
group_by_( .dots = g ) %>%
summarise_( .dots =
interp(~ mean(var, na.rm = TRUE), var = as.name(v))
)
27.
28.
29.
30. Future
• Performance improvements (parallel C++)
• Alternative back ends
• Different type of groupings (e.g. bootstrap)
As soon as we get hoverboard ...