SlideShare ist ein Scribd-Unternehmen logo
1 von 31
Downloaden Sie, um offline zu lesen
dplyr
@romain_francois
• Use R since 2002
• #rcatladies
• R Enthusiast
• R/C++ hero
• Performance
• dplyr
• Occasional comedy
%>%from magrittr
enjoy(cool(bake(shape(beat(append(bowl(rep("flour",
2), "yeast", "water", "milk", "oil"), "flour", until
= "soft"), duration = "3mins"), as = "balls", style =
"slightly-flat"), degrees = 200, duration =
"15mins"), duration = "5mins"))
bowl(rep("flour", 2), "yeast", "water", "milk", "oil") %>%
append("flour", until = "soft") %>%

beat(duration = "3mins") %>%

shape(as = "balls", style = "slightly-flat") %>%

bake(degrees = 200, duration = "15mins") %>%

cool(buns, duration = "5mins") %>%
enjoy()
nycflights13
> flights
Source: local data frame [336,776 x 16]
year month day dep_time dep_delay arr_time arr_delay carrier tailnum flight
1 2013 1 1 517 2 830 11 UA N14228 1545
2 2013 1 1 533 4 850 20 UA N24211 1714
.. ... ... ... ... ... ... ... ... ... ...
Variables not shown: origin (chr), dest (chr), air_time (dbl), distance (dbl),
hour (dbl), minute (dbl)
nycflights13
> glimpse(flights)
Observations: 336,776
Variables: 16
$ year (int) 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 201...
$ month (int) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
$ day (int) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
$ dep_time (int) 517, 533, 542, 544, 554, 554, 555, 557, 557, 558, 558, 55...
$ dep_delay (dbl) 2, 4, 2, -1, -6, -4, -5, -3, -3, -2, -2, -2, -2, -2, -1, ...
$ arr_time (int) 830, 850, 923, 1004, 812, 740, 913, 709, 838, 753, 849, 8...
$ arr_delay (dbl) 11, 20, 33, -18, -25, 12, 19, -14, -8, 8, -2, -3, 7, -14,...
$ carrier (chr) "UA", "UA", "AA", "B6", "DL", "UA", "B6", "EV", "B6", "AA...
$ tailnum (chr) "N14228", "N24211", "N619AA", "N804JB", "N668DN", "N39463...
$ flight (int) 1545, 1714, 1141, 725, 461, 1696, 507, 5708, 79, 301, 49,...
$ origin (chr) "EWR", "LGA", "JFK", "JFK", "LGA", "EWR", "EWR", "LGA", "...
$ dest (chr) "IAH", "IAH", "MIA", "BQN", "ATL", "ORD", "FLL", "IAD", "...
$ air_time (dbl) 227, 227, 160, 183, 116, 150, 158, 53, 140, 138, 149, 158...
$ distance (dbl) 1400, 1416, 1089, 1576, 762, 719, 1065, 229, 944, 733, 10...
$ hour (dbl) 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, ...
$ minute (dbl) 17, 33, 42, 44, 54, 54, 55, 57, 57, 58, 58, 58, 58, 58, 5...
filterA subset of the rows of the data frame
flights %>%
filter( dep_delay < 10 )
flights %>%
filter( arr_delay < dep_delay )
slicefilter rows by position
flights %>%
slice( 1:10 )
arrangereorder a data frame
flights %>%
filter( hour < 8 ) %>%
arrange( year, month, day )
selectselect certain columns from the data frame
select(flights, year, month, day)
select(flights, year:day)
select(flights, -(year:day))
mutatemodify or create columns based on others
flights %>%
mutate(
gain = arr_delay - dep_delay,
speed = distance / air_time * 60
) %>%
filter( gain > 0 ) %>%
arrange( desc(speed) ) %>%
select( year, month, day, dest, gain, speed )
summarisecollapse a data frame into one row …
flights %>%
summarise(delay = mean(dep_delay, na.rm = TRUE))
flights %>%
filter( dep_delay > 0 ) %>%
summarise(arr_delay = mean(arr_delay, na.rm = TRUE))
group_byGroup observations by one or more variables
flights %>%
group_by( tailnum ) %>%
summarise(
count = n(),
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE)
) %>%
filter( is.finite(delay) ) %>%
arrange( desc(count) )
bind_rows
bind_rows( , )
color num
green 1
yellow 2
red 3
blue 4
pink 5
color num
green 1
yellow 2
color num
red 3
blue 4
pink 5
joins
a <- data_frame(
color = c("green", "yellow", "red"),
num = 1:3
)
b <- data_frame(
color = c("green", "yellow", "pink"),
size = c("S", "M", "L")
)
color num
green 1
yellow 2
red 3
color size
green S
yellow M
pink L
inner_join
color num
green 1
yellow 2
red 3
color size
green S
yellow M
pink L
inner_join( , )
color num size
green 1 S
yellow 2 M
left_join
color num
green 1
yellow 2
red 3
color size
green S
yellow M
pink L
left_join( , )
color num size
green 1 S
yellow 2 M
red 3
right_join
color num
green 1
yellow 2
red 3
color size
green S
yellow M
pink L
right_join( , )
color num size
green 1 S
yellow 2 M
pink L
full_join
color num
green 1
yellow 2
red 3
color size
green S
yellow M
pink L
full_join( , )
color num size
green 1 S
yellow 2 M
red 3
pink L
data_frameJust like data.frame, but better
> data_frame( x = 1:5, y = letters[1:5] ) %>% glimpse
Observations: 5
Variables: 2
$ x (int) 1, 2, 3, 4, 5
$ y (chr) "a", "b", "c", "d", "e"
> data_frame( x = 1:5, y = letters[1:5] , z = x + 1) %>% glimpse
Observations: 5
Variables: 3
$ x (int) 1, 2, 3, 4, 5
$ y (chr) "a", "b", "c", "d", "e"
$ z (dbl) 2, 3, 4, 5, 6
frame_data aka tibble
> frame_data(
+ ~colA, ~colB,
+ "a", 1,
+ "b", 2
+ )
Source: local data frame [2 x 2]
colA colB
(chr) (dbl)
1 a 1
2 b 2
_
g <- c("origin", "dest")
v <- "dep_delay"
flights %>%
group_by( g ) %>%
summarise( result = mean(v, na.rm = TRUE) )
🙀
🙀
g <- c("origin", "dest")
v <- "dep_delay"
flights %>%
group_by_( .dots = g ) %>%
summarise_( .dots =
interp(~ mean(var, na.rm = TRUE), var = as.name(v))
)
Future
• Performance improvements (parallel C++)
• Alternative back ends
• Different type of groupings (e.g. bootstrap)
As soon as we get hoverboard ...
dplyr
Romain François
@romain_francois
romain@r-enthusiasts.com

Weitere ähnliche Inhalte

Was ist angesagt?

Kevin Kempter PostgreSQL Backup and Recovery Methods @ Postgres Open
Kevin Kempter PostgreSQL Backup and Recovery Methods @ Postgres OpenKevin Kempter PostgreSQL Backup and Recovery Methods @ Postgres Open
Kevin Kempter PostgreSQL Backup and Recovery Methods @ Postgres Open
PostgresOpen
 

Was ist angesagt? (20)

Better than you think: Handling JSON data in ClickHouse
Better than you think: Handling JSON data in ClickHouseBetter than you think: Handling JSON data in ClickHouse
Better than you think: Handling JSON data in ClickHouse
 
Postgresql Database Administration Basic - Day1
Postgresql  Database Administration Basic  - Day1Postgresql  Database Administration Basic  - Day1
Postgresql Database Administration Basic - Day1
 
Backup & disaster recovery for Solr
Backup & disaster recovery for SolrBackup & disaster recovery for Solr
Backup & disaster recovery for Solr
 
PostgreSQL Replication Tutorial
PostgreSQL Replication TutorialPostgreSQL Replication Tutorial
PostgreSQL Replication Tutorial
 
Data modeling for Elasticsearch
Data modeling for ElasticsearchData modeling for Elasticsearch
Data modeling for Elasticsearch
 
MongoDB Europe 2016 - Advanced MongoDB Aggregation Pipelines
MongoDB Europe 2016 - Advanced MongoDB Aggregation PipelinesMongoDB Europe 2016 - Advanced MongoDB Aggregation Pipelines
MongoDB Europe 2016 - Advanced MongoDB Aggregation Pipelines
 
Troubleshooting Complex Oracle Performance Problems with Tanel Poder
Troubleshooting Complex Oracle Performance Problems with Tanel PoderTroubleshooting Complex Oracle Performance Problems with Tanel Poder
Troubleshooting Complex Oracle Performance Problems with Tanel Poder
 
JSON and PL/SQL: A Match Made in Database
JSON and PL/SQL: A Match Made in DatabaseJSON and PL/SQL: A Match Made in Database
JSON and PL/SQL: A Match Made in Database
 
Kevin Kempter PostgreSQL Backup and Recovery Methods @ Postgres Open
Kevin Kempter PostgreSQL Backup and Recovery Methods @ Postgres OpenKevin Kempter PostgreSQL Backup and Recovery Methods @ Postgres Open
Kevin Kempter PostgreSQL Backup and Recovery Methods @ Postgres Open
 
Adventures with the ClickHouse ReplacingMergeTree Engine
Adventures with the ClickHouse ReplacingMergeTree EngineAdventures with the ClickHouse ReplacingMergeTree Engine
Adventures with the ClickHouse ReplacingMergeTree Engine
 
MongoDB
MongoDBMongoDB
MongoDB
 
Effective testing with pytest
Effective testing with pytestEffective testing with pytest
Effective testing with pytest
 
ClickHouse Data Warehouse 101: The First Billion Rows, by Alexander Zaitsev a...
ClickHouse Data Warehouse 101: The First Billion Rows, by Alexander Zaitsev a...ClickHouse Data Warehouse 101: The First Billion Rows, by Alexander Zaitsev a...
ClickHouse Data Warehouse 101: The First Billion Rows, by Alexander Zaitsev a...
 
Fun with ClickHouse Window Functions-2021-08-19.pdf
Fun with ClickHouse Window Functions-2021-08-19.pdfFun with ClickHouse Window Functions-2021-08-19.pdf
Fun with ClickHouse Window Functions-2021-08-19.pdf
 
MongoDb scalability and high availability with Replica-Set
MongoDb scalability and high availability with Replica-SetMongoDb scalability and high availability with Replica-Set
MongoDb scalability and high availability with Replica-Set
 
PythonOOP
PythonOOPPythonOOP
PythonOOP
 
MySQL Performance Schema in Action
MySQL Performance Schema in ActionMySQL Performance Schema in Action
MySQL Performance Schema in Action
 
Sql and Sql commands
Sql and Sql commandsSql and Sql commands
Sql and Sql commands
 
NumPy.pptx
NumPy.pptxNumPy.pptx
NumPy.pptx
 
ClickHouse tips and tricks. Webinar slides. By Robert Hodges, Altinity CEO
ClickHouse tips and tricks. Webinar slides. By Robert Hodges, Altinity CEOClickHouse tips and tricks. Webinar slides. By Robert Hodges, Altinity CEO
ClickHouse tips and tricks. Webinar slides. By Robert Hodges, Altinity CEO
 

Andere mochten auch

Andere mochten auch (10)

Data Manipulation Using R (& dplyr)
Data Manipulation Using R (& dplyr)Data Manipulation Using R (& dplyr)
Data Manipulation Using R (& dplyr)
 
Rデータ処理入門
Rデータ処理入門Rデータ処理入門
Rデータ処理入門
 
Tokyor36
Tokyor36Tokyor36
Tokyor36
 
Introduction to R Short course Fall 2016
Introduction to R Short course Fall 2016Introduction to R Short course Fall 2016
Introduction to R Short course Fall 2016
 
Data manipulation with dplyr
Data manipulation with dplyrData manipulation with dplyr
Data manipulation with dplyr
 
dplyrとは何だったのか
dplyrとは何だったのかdplyrとは何だったのか
dplyrとは何だったのか
 
「plyrパッケージで君も前処理スタ☆」改め「plyrパッケージ徹底入門」
「plyrパッケージで君も前処理スタ☆」改め「plyrパッケージ徹底入門」「plyrパッケージで君も前処理スタ☆」改め「plyrパッケージ徹底入門」
「plyrパッケージで君も前処理スタ☆」改め「plyrパッケージ徹底入門」
 
木と電話と選挙(causalTree)
木と電話と選挙(causalTree)木と電話と選挙(causalTree)
木と電話と選挙(causalTree)
 
R入門(dplyrでデータ加工)-TokyoR42
R入門(dplyrでデータ加工)-TokyoR42R入門(dplyrでデータ加工)-TokyoR42
R入門(dplyrでデータ加工)-TokyoR42
 
Tidyverseとは
TidyverseとはTidyverseとは
Tidyverseとは
 

Ähnlich wie dplyr

Palestra sobre Collections com Python
Palestra sobre Collections com PythonPalestra sobre Collections com Python
Palestra sobre Collections com Python
pugpe
 
Global Change, Species Diversity, and the Future of Marine Ecosystems
Global Change, Species Diversity, and the Future of Marine EcosystemsGlobal Change, Species Diversity, and the Future of Marine Ecosystems
Global Change, Species Diversity, and the Future of Marine Ecosystems
Jarrett Byrnes
 
Danos morais obito dengue hemorragica resp. estado
Danos morais obito dengue hemorragica resp. estadoDanos morais obito dengue hemorragica resp. estado
Danos morais obito dengue hemorragica resp. estado
Informa Jurídico
 

Ähnlich wie dplyr (20)

dplyr and torrents from cpasbien
dplyr and torrents from cpasbiendplyr and torrents from cpasbien
dplyr and torrents from cpasbien
 
dplyr
dplyrdplyr
dplyr
 
Τα Πολύ Βασικά για την Python
Τα Πολύ Βασικά για την PythonΤα Πολύ Βασικά για την Python
Τα Πολύ Βασικά για την Python
 
Super Advanced Python –act1
Super Advanced Python –act1Super Advanced Python –act1
Super Advanced Python –act1
 
sopa de pollo para el alma latina
sopa de pollo para el alma latinasopa de pollo para el alma latina
sopa de pollo para el alma latina
 
Elixir
ElixirElixir
Elixir
 
Sopa de pollo para el alma Latina
Sopa de pollo para el alma LatinaSopa de pollo para el alma Latina
Sopa de pollo para el alma Latina
 
R programming language
R programming languageR programming language
R programming language
 
Basics
BasicsBasics
Basics
 
r studio presentation.pptx
r studio presentation.pptxr studio presentation.pptx
r studio presentation.pptx
 
r studio presentation.pptx
r studio presentation.pptxr studio presentation.pptx
r studio presentation.pptx
 
Evergreen trails master plan community meeting 1 boards
Evergreen trails master plan community meeting 1 boardsEvergreen trails master plan community meeting 1 boards
Evergreen trails master plan community meeting 1 boards
 
dplyr use case
dplyr use casedplyr use case
dplyr use case
 
Oceans 2019 tutorial-geophysical-nav_7-updated
Oceans 2019 tutorial-geophysical-nav_7-updatedOceans 2019 tutorial-geophysical-nav_7-updated
Oceans 2019 tutorial-geophysical-nav_7-updated
 
Al Fazl International Weekly26 June 2015
Al Fazl International  Weekly26 June 2015Al Fazl International  Weekly26 June 2015
Al Fazl International Weekly26 June 2015
 
ΠΛΗ31 ΜΑΘΗΜΑ 2.2 (ΕΚΤΥΠΩΣΗ)
ΠΛΗ31 ΜΑΘΗΜΑ 2.2 (ΕΚΤΥΠΩΣΗ)ΠΛΗ31 ΜΑΘΗΜΑ 2.2 (ΕΚΤΥΠΩΣΗ)
ΠΛΗ31 ΜΑΘΗΜΑ 2.2 (ΕΚΤΥΠΩΣΗ)
 
Palestra sobre Collections com Python
Palestra sobre Collections com PythonPalestra sobre Collections com Python
Palestra sobre Collections com Python
 
Global Change, Species Diversity, and the Future of Marine Ecosystems
Global Change, Species Diversity, and the Future of Marine EcosystemsGlobal Change, Species Diversity, and the Future of Marine Ecosystems
Global Change, Species Diversity, and the Future of Marine Ecosystems
 
Encuesta de valores de los mexicanos...
Encuesta de valores de los mexicanos...Encuesta de valores de los mexicanos...
Encuesta de valores de los mexicanos...
 
Danos morais obito dengue hemorragica resp. estado
Danos morais obito dengue hemorragica resp. estadoDanos morais obito dengue hemorragica resp. estado
Danos morais obito dengue hemorragica resp. estado
 

Mehr von Romain Francois

Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++
Romain Francois
 
RProtoBuf: protocol buffers for R
RProtoBuf: protocol buffers for RRProtoBuf: protocol buffers for R
RProtoBuf: protocol buffers for R
Romain Francois
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++
Romain Francois
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++
Romain Francois
 

Mehr von Romain Francois (18)

R/C++
R/C++R/C++
R/C++
 
user2015 keynote talk
user2015 keynote talkuser2015 keynote talk
user2015 keynote talk
 
SevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittrSevillaR meetup: dplyr and magrittr
SevillaR meetup: dplyr and magrittr
 
R/C++ talk at earl 2014
R/C++ talk at earl 2014R/C++ talk at earl 2014
R/C++ talk at earl 2014
 
Rcpp11 genentech
Rcpp11 genentechRcpp11 genentech
Rcpp11 genentech
 
Rcpp11 useR2014
Rcpp11 useR2014Rcpp11 useR2014
Rcpp11 useR2014
 
Rcpp11
Rcpp11Rcpp11
Rcpp11
 
R and C++
R and C++R and C++
R and C++
 
R and cpp
R and cppR and cpp
R and cpp
 
Rcpp attributes
Rcpp attributesRcpp attributes
Rcpp attributes
 
Rcpp is-ready
Rcpp is-readyRcpp is-ready
Rcpp is-ready
 
Rcpp
RcppRcpp
Rcpp
 
Integrating R with C++: Rcpp, RInside and RProtoBuf
Integrating R with C++: Rcpp, RInside and RProtoBufIntegrating R with C++: Rcpp, RInside and RProtoBuf
Integrating R with C++: Rcpp, RInside and RProtoBuf
 
Object Oriented Design(s) in R
Object Oriented Design(s) in RObject Oriented Design(s) in R
Object Oriented Design(s) in R
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++
 
RProtoBuf: protocol buffers for R
RProtoBuf: protocol buffers for RRProtoBuf: protocol buffers for R
RProtoBuf: protocol buffers for R
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++
 
Rcpp: Seemless R and C++
Rcpp: Seemless R and C++Rcpp: Seemless R and C++
Rcpp: Seemless R and C++
 

Kürzlich hochgeladen

Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Safe Software
 
Artificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and MythsArtificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and Myths
Joaquim Jorge
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
?#DUbAI#??##{{(☎️+971_581248768%)**%*]'#abortion pills for sale in dubai@
 

Kürzlich hochgeladen (20)

Partners Life - Insurer Innovation Award 2024
Partners Life - Insurer Innovation Award 2024Partners Life - Insurer Innovation Award 2024
Partners Life - Insurer Innovation Award 2024
 
GenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdfGenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdf
 
Manulife - Insurer Innovation Award 2024
Manulife - Insurer Innovation Award 2024Manulife - Insurer Innovation Award 2024
Manulife - Insurer Innovation Award 2024
 
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost SavingRepurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
Repurposing LNG terminals for Hydrogen Ammonia: Feasibility and Cost Saving
 
Top 5 Benefits OF Using Muvi Live Paywall For Live Streams
Top 5 Benefits OF Using Muvi Live Paywall For Live StreamsTop 5 Benefits OF Using Muvi Live Paywall For Live Streams
Top 5 Benefits OF Using Muvi Live Paywall For Live Streams
 
Apidays New York 2024 - The value of a flexible API Management solution for O...
Apidays New York 2024 - The value of a flexible API Management solution for O...Apidays New York 2024 - The value of a flexible API Management solution for O...
Apidays New York 2024 - The value of a flexible API Management solution for O...
 
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, AdobeApidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
 
Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...
Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...
Deploy with confidence: VMware Cloud Foundation 5.1 on next gen Dell PowerEdg...
 
The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024The 7 Things I Know About Cyber Security After 25 Years | April 2024
The 7 Things I Know About Cyber Security After 25 Years | April 2024
 
Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...
Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...
Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
 
Artificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and MythsArtificial Intelligence: Facts and Myths
Artificial Intelligence: Facts and Myths
 
MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024
 
Powerful Google developer tools for immediate impact! (2023-24 C)
Powerful Google developer tools for immediate impact! (2023-24 C)Powerful Google developer tools for immediate impact! (2023-24 C)
Powerful Google developer tools for immediate impact! (2023-24 C)
 
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot TakeoffStrategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
 
HTML Injection Attacks: Impact and Mitigation Strategies
HTML Injection Attacks: Impact and Mitigation StrategiesHTML Injection Attacks: Impact and Mitigation Strategies
HTML Injection Attacks: Impact and Mitigation Strategies
 
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
 
Scaling API-first – The story of a global engineering organization
Scaling API-first – The story of a global engineering organizationScaling API-first – The story of a global engineering organization
Scaling API-first – The story of a global engineering organization
 
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
 

dplyr

  • 2. • Use R since 2002 • #rcatladies • R Enthusiast • R/C++ hero • Performance • dplyr • Occasional comedy
  • 3.
  • 4.
  • 6. enjoy(cool(bake(shape(beat(append(bowl(rep("flour", 2), "yeast", "water", "milk", "oil"), "flour", until = "soft"), duration = "3mins"), as = "balls", style = "slightly-flat"), degrees = 200, duration = "15mins"), duration = "5mins")) bowl(rep("flour", 2), "yeast", "water", "milk", "oil") %>% append("flour", until = "soft") %>%
 beat(duration = "3mins") %>%
 shape(as = "balls", style = "slightly-flat") %>%
 bake(degrees = 200, duration = "15mins") %>%
 cool(buns, duration = "5mins") %>% enjoy()
  • 7. nycflights13 > flights Source: local data frame [336,776 x 16] year month day dep_time dep_delay arr_time arr_delay carrier tailnum flight 1 2013 1 1 517 2 830 11 UA N14228 1545 2 2013 1 1 533 4 850 20 UA N24211 1714 .. ... ... ... ... ... ... ... ... ... ... Variables not shown: origin (chr), dest (chr), air_time (dbl), distance (dbl), hour (dbl), minute (dbl)
  • 8. nycflights13 > glimpse(flights) Observations: 336,776 Variables: 16 $ year (int) 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 201... $ month (int) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... $ day (int) 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... $ dep_time (int) 517, 533, 542, 544, 554, 554, 555, 557, 557, 558, 558, 55... $ dep_delay (dbl) 2, 4, 2, -1, -6, -4, -5, -3, -3, -2, -2, -2, -2, -2, -1, ... $ arr_time (int) 830, 850, 923, 1004, 812, 740, 913, 709, 838, 753, 849, 8... $ arr_delay (dbl) 11, 20, 33, -18, -25, 12, 19, -14, -8, 8, -2, -3, 7, -14,... $ carrier (chr) "UA", "UA", "AA", "B6", "DL", "UA", "B6", "EV", "B6", "AA... $ tailnum (chr) "N14228", "N24211", "N619AA", "N804JB", "N668DN", "N39463... $ flight (int) 1545, 1714, 1141, 725, 461, 1696, 507, 5708, 79, 301, 49,... $ origin (chr) "EWR", "LGA", "JFK", "JFK", "LGA", "EWR", "EWR", "LGA", "... $ dest (chr) "IAH", "IAH", "MIA", "BQN", "ATL", "ORD", "FLL", "IAD", "... $ air_time (dbl) 227, 227, 160, 183, 116, 150, 158, 53, 140, 138, 149, 158... $ distance (dbl) 1400, 1416, 1089, 1576, 762, 719, 1065, 229, 944, 733, 10... $ hour (dbl) 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, ... $ minute (dbl) 17, 33, 42, 44, 54, 54, 55, 57, 57, 58, 58, 58, 58, 58, 5...
  • 9. filterA subset of the rows of the data frame flights %>% filter( dep_delay < 10 ) flights %>% filter( arr_delay < dep_delay )
  • 10. slicefilter rows by position flights %>% slice( 1:10 )
  • 11. arrangereorder a data frame flights %>% filter( hour < 8 ) %>% arrange( year, month, day )
  • 12. selectselect certain columns from the data frame select(flights, year, month, day) select(flights, year:day) select(flights, -(year:day))
  • 13. mutatemodify or create columns based on others flights %>% mutate( gain = arr_delay - dep_delay, speed = distance / air_time * 60 ) %>% filter( gain > 0 ) %>% arrange( desc(speed) ) %>% select( year, month, day, dest, gain, speed )
  • 14. summarisecollapse a data frame into one row … flights %>% summarise(delay = mean(dep_delay, na.rm = TRUE)) flights %>% filter( dep_delay > 0 ) %>% summarise(arr_delay = mean(arr_delay, na.rm = TRUE))
  • 15. group_byGroup observations by one or more variables flights %>% group_by( tailnum ) %>% summarise( count = n(), dist = mean(distance, na.rm = TRUE), delay = mean(arr_delay, na.rm = TRUE) ) %>% filter( is.finite(delay) ) %>% arrange( desc(count) )
  • 16. bind_rows bind_rows( , ) color num green 1 yellow 2 red 3 blue 4 pink 5 color num green 1 yellow 2 color num red 3 blue 4 pink 5
  • 17. joins a <- data_frame( color = c("green", "yellow", "red"), num = 1:3 ) b <- data_frame( color = c("green", "yellow", "pink"), size = c("S", "M", "L") ) color num green 1 yellow 2 red 3 color size green S yellow M pink L
  • 18. inner_join color num green 1 yellow 2 red 3 color size green S yellow M pink L inner_join( , ) color num size green 1 S yellow 2 M
  • 19. left_join color num green 1 yellow 2 red 3 color size green S yellow M pink L left_join( , ) color num size green 1 S yellow 2 M red 3
  • 20. right_join color num green 1 yellow 2 red 3 color size green S yellow M pink L right_join( , ) color num size green 1 S yellow 2 M pink L
  • 21. full_join color num green 1 yellow 2 red 3 color size green S yellow M pink L full_join( , ) color num size green 1 S yellow 2 M red 3 pink L
  • 22. data_frameJust like data.frame, but better > data_frame( x = 1:5, y = letters[1:5] ) %>% glimpse Observations: 5 Variables: 2 $ x (int) 1, 2, 3, 4, 5 $ y (chr) "a", "b", "c", "d", "e" > data_frame( x = 1:5, y = letters[1:5] , z = x + 1) %>% glimpse Observations: 5 Variables: 3 $ x (int) 1, 2, 3, 4, 5 $ y (chr) "a", "b", "c", "d", "e" $ z (dbl) 2, 3, 4, 5, 6
  • 23. frame_data aka tibble > frame_data( + ~colA, ~colB, + "a", 1, + "b", 2 + ) Source: local data frame [2 x 2] colA colB (chr) (dbl) 1 a 1 2 b 2
  • 24. _
  • 25. g <- c("origin", "dest") v <- "dep_delay" flights %>% group_by( g ) %>% summarise( result = mean(v, na.rm = TRUE) ) 🙀 🙀
  • 26. g <- c("origin", "dest") v <- "dep_delay" flights %>% group_by_( .dots = g ) %>% summarise_( .dots = interp(~ mean(var, na.rm = TRUE), var = as.name(v)) )
  • 27.
  • 28.
  • 29.
  • 30. Future • Performance improvements (parallel C++) • Alternative back ends • Different type of groupings (e.g. bootstrap) As soon as we get hoverboard ...