SlideShare a Scribd company logo
1 of 49
Performance 101:
so you need to use a database
Leon Fayer
@papa_fire
who am I ?
• 20+ years since first print ‘Hello World!’;
• currently @ OmniTI
• development and operations of large web applications
• performance & scalability
• we are hiring:
• https://omniti.com/is/hiring
what it is about
databases & performance
what it’s not about
NoSQL MySQL
how database connection works
① establish connection
② send query
③ process query
④ send result
⑤ close connection
common database connection
my $dbh = DBI->connect(…);
my $sth = $dbh->prepare($query);
$sth->execute();
my $result = $sth->fetchrow_hashref();
$dbh->disconnect;
common database connection
① my $dbh = DBI->connect(…);
② my $sth = $dbh->prepare($query);
③ $sth->execute();
④ my $result = $sth->fetchrow_hashref();
⑤ $dbh->disconnect;
① establish
connection
① establish connection
⑤ close connection
and
problem
connection overhead
example
my @data_array;
for (my $i; $i<=10; $i++) {
my $dbh = DBI->connect(…);
my $sth = $dbh->prepare(qq{select * from foo
where id = ?});
$sth->execute($i);
push @data_array, $sth->fetchrow_hashref();
$dbh->disconnect;
}
visual representation
①
①
①
⑤
⑤
⑤
short answer
persistent connections
even shorter answer
avoid multiple connections
how it works (high level)
①
①
①
⑤
⑤
⑤
①
⑤
correct example
my @data_array;
my $dbh = DBI->connect(…);
my $sth = $dbh->prepare(qq{select * from foo
where id = ?});
for (my $i; $i<=10; $i++) {
$sth->execute($i);
push @data_array, $sth->fetchrow_hashref();
}
$dbh->disconnect;
conclusion
reduce # of connections
② send query
most common problem
n+1
n+1
# get a list of items
my %item_list;
my $sth = $dbh->prepare(qq{select id from items
where active = true});
$sth->execute();
# get properties for each items
while ( my @row = $sth->fetchrow_hashref() ) {
my $sth_prop = $dbh->prepare(qq{select * from
item_properties where item_id = ?});
$sth_prop->execute($row->{‘id’});
$item_list{$row->{‘id’}}{‘props’} =
$sth_prop->fetchall_hashref();
}
n+1 you don’t know about
# get a list of items
my @ids = get_active_item_ids();
my @item_props;
# get properties for each items
foreach my $i (@ids) {
push @item_props, Item->new($i)->properties();
}
easy solution
# get a list of items with properties
my $sth = $dbh->prepare(qq{select i. item_id, p.*
from items i,
item_properties p
where i.item_id = p.item_id
and active = true});
$sth->execute();
# arrange object to your liking
while (my $row = $sth->fetchrow_hashref()) {
$item_list{$row->{‘id’}}{‘props’} = $row; # sans id
$item_list{$row->{‘id’}}{‘id’} = $row->{‘id’};
}
conclusion
limit number of queries
cool stuff
:BONUS:
Common Table Expressions
(CTEs)
* MySQL does not support CTEs
# create temp table naughty_users
# and get data from it
with naughty_users as (
select * from users where banned = 1
)
select userid, email from naughty_users;
even more cool
Writeable
Common Table Expressions
multiple queries are required
# create user record
insert into users (name, email) values (?,?) returning
userid
# create address record
insert into addresses (userid, address, city, state,
zip) values (?,?,?,?,?) returning addressid
# track changes to user information
insert into user_history (userid, addressid, action)
values (?,?,?) returning historyid
or are they?
with userdata as (
insert into users (name, email) values (?,?)
returning userid
), addressdata as (
insert into addresses (userid, address, city, state, zip)
select userid,?,?,?,? from userdata
returning addressid
), historydata as (
insert into user_history (userid, addressid, action)
select userid, addressid,?
from userdata, addressdata
returning historyid
)
select userid, addressid, historyid
from userdata, addressdata, historydata;
why not use transactions?
• no complicated transaction code
• no complicated error handling code
• reduced query overhead
• better performance
find out more
For more details:
http://omniti.com/seeds/writable-ctes-improve-performance
③ process query
unpopular opinion
ORMs are evil
in one sentence
you have no idea how it works
why?
1. machine-generated
2. object construction overhead
3. false sense of control
food for thought
think about ORM as
the most junior
developer you’ve ever
worked with
would you trust junior with this?
select * from
(
select bannerid, caption, client_url, image_file, sponsorid, weight from
(
select V.bannerid, V.impressions, B.caption, B.client_url, B.image_file, s.sponsorid, s.weight,
row_number() over (partition by s.sponsorid order by s.weight desc) ranking
FROM
(
-- This level gives me a list of banners sorted by least seen,and then by highest weight
select valid.bannerid, valid.totalweight, count(I.timestamp) as impressions FROM
(
-- This level gets me a list of banners that are valid for display
select b.bannerid,
-- Add up the weight from 4 sources. Banner weight, and weight for each data item they match
decode( decode(bitand(u.STATE_BM1,b.STATE_BM1),0,0,1) +
decode(bitand(u.STATE_BM2,b.STATE_BM2),0,0,1) +
decode(bitand(u.STATE_BM3,b.STATE_BM3),0,0,1),0,0,b.STATE_WT
) +
decode(bitand(u.AGE_BM,b.AGE_BM),0,0,b.AGE_WT)+
decode(bitand(u.GENDER_BM,b.GENDER_BM),0,0,b.GENDER_WT)+
b.weight as totalweight
from tgif.tbl_users u, tgif.tbl_banners b, tgif.tbl_bannerstats bs
where
-- I only care about ME!
u.userid= 1
-- Don't show inactive banners
and b.inactive != 1
-- Only show banners that are currently running
and sysdate < b.end_date and sysdate >=b.start_date
-- Only get the type of banner i'm looking for
and b.type= 3
-- Join on the total stats, and only display banners that haven't reached their per banner maximums
and b.bannerid = bs.bannerid
and ( b.max_impressions IS NULL OR bs.total_impressions < b.max_impressions )
and ( b.max_clicks IS NULL OR bs.total_clicks < b.max_clicks )
and ( b.max_conversions IS NULL OR bs.total_conversions < b.max_conversions )
-- Ignore any banners that don't match their demographics (ie, male banner won't go to females)
and ( b.AGE_BM IS NULL OR b.AGE_BM = 0 OR bitand(u.AGE_BM, b.AGE_BM) != 0 )
and ( b.GENDER_BM IS NULL OR b.GENDER_BM =0 OR bitand(u.GENDER_BM, b.GENDER_BM) != 0 )
and ( b.STATE_BM1 IS NULL OR b.STATE_BM1 =0 OR bitand(u.STATE_BM1, b.STATE_BM1) != 0 )
and ( b.STATE_BM2 IS NULL OR b.STATE_BM2 =0 OR bitand(u.STATE_BM2, b.STATE_BM2) != 0 )
and ( b.STATE_BM3 IS NULL OR b.STATE_BM3 =0 OR bitand(u.STATE_BM3, b.STATE_BM3) != 0 )
-- But don't show me any banners that I have already signed up
and b.bannerid NOT IN (
SELECT B.bannerid FROM tgif.tbl_bannerconversions C, tgif.tbl_banners B, tgif.tbl_sponsors sp
WHERE C.USERID=1
AND C.bannerid=B.bannerid
AND B.sponsorid=sp.sponsorid
-- unless they have a conversion interval, and that interval has expired
AND ( sp.conversion_interval = 0 OR sysdate > C.timestamp+sp.conversion_interval )
)
-- Don't show me any banners that have SPONSORS that have reached their maximums
and b.sponsorid NOT IN (
-- I believe this would be better done using HAVING clauses, but I can't figure it out
-- Take the banners for a sponsor in the bannerstats table, and get the totals per sponsor
-- return anything that has reached it's maximum
select sponsorid FROM
(
SELECT S.sponsorid, S.max_impressions, S.max_conversions, S.max_clicks,
sum(total_impressions) as imps, sum(total_conversions) as convs,
sum(total_clicks) as clicks
FROM tgif.tbl_sponsors S, tgif.tbl_banners B, tgif.tbl_bannerstats bs
WHERE S.sponsorid=B.sponsorid
AND B.bannerid=bs.bannerid
GROUP BY S.Sponsorid, S.max_impressions, S.max_conversions, S.max_clicks
) exclude
WHERE ( imps > max_impressions OR convs >= max_conversions OR clicks > max_clicks )
)
) valid, tgif.tbl_bannerimpressions I
where
valid.bannerid=I.bannerid(+)
and I.userid(+)=1
group by valid.bannerid, valid.totalweight
-- I want to see banners I haven't seen yet, sorted by highest weight, so we sort by number
-- of times that this user has seen this particular banner, then we sort by weight
order by impressions, totalweight DESC
) V, tgif.tbl_banners B, tgif.tbl_sponsors S
where B.bannerid=V.bannerid
and B.sponsorid=S.sponsorid
and S.inactive != 1
and s.sponsorid not in (
) valid, tgif.tbl_bannerimpressions I
where
valid.bannerid=I.bannerid(+)
and I.userid(+)=1
group by valid.bannerid, valid.totalweight
-- I want to see banners I haven't seen yet, sorted by highest weight, so we sort by number
-- of times that this user has seen this particular banner, then we sort by weight
order by impressions, totalweight DESC
) V, tgif.tbl_banners B, tgif.tbl_sponsors S
where B.bannerid=V.bannerid
and B.sponsorid=S.sponsorid
and S.inactive != 1
and s.sponsorid not in (
-- Check the user impression cap to make sure it hasn't been passed by the user
select s.sponsorid from tgif.tbl_banners b, tgif.tbl_sponsors s,
tgif.TBL_BANNERIMPRESSIONS i
where s.sponsorid = b.sponsorid
and b.bannerid = i.bannerid
and i.timestamp >= sysdate - nvl(user_impression_cap_days,100)
and userid = 1
group by s.sponsorid
having count(*) >= max(nvl(user_impression_cap,1000000000))
)
-- Make sure the sponsor is still in the valid table. This table is updated hourly
-- and contains the sponsors that have not gone over their sponsor level frequencies for
-- impressions/conversions/clicks
and s.sponsorid in (select sponsorid from tgif.tbl_active_sponsors)
)
where ranking=1
--Order the banners by sponsor weight, which is handled by the ranking
--order by S.weight
order by impressions, weight desc
)
where rownum <= 10;
noone thinks about this
object construction is
expensive
object construction issue
METHOD REAL USER SYS PCPU
Base ORM 6.330 5.771 0.212 94.51
SQL without objects 0.664 0.274 0.120 59.35
SQL with ORM objects 6.354 5.797 0.197 94.34
timely tweet
conclusion
learn SQL
④ send results
may be shocking, but …
databases can do math
illustrating wrong
# get all orders
my $sth = $dbh->prepare("select order_id, price from orders");
$sth->execute();
my $orders= $sth->fetchrow_hashref();
my $count = 1;
my $total = 0;
my $avg = 0;
# get average $ for last 10 orders
foreach my $o (sort {$orders->{‘order_id’}->{$b} <=> $orders->{‘order_id’}->{$a} } %$orders) {
$total += $o->{‘price’};
if ($count == 10) {
$avg = $total/$count;
last;
}
$count++;
}
vs right
# get average $ for last 10 orders
$sth = $dbh->prepare(qq{select avg(price) as avg_price
from (select price from orders
order by order_id desc limit 10) });
$sth->execute();
$orders= $sth->fetchrow_hashref();
$avg = $orders->{‘avg_price’};
database can do …
1. math
2. dates
3. aggregations
4. [partial] matches
5. much, much more
conclusion
learn SQL
other things to consider
1. cache is a wonderful thing
2. * is not your friend
3. EXPLAIN/ANALYZE are
want to know more?
THE SCALABILITY AND PERFORMANCE
CONFERENCE
SEPTEMBER 21-23, 2016
http://surge.omniti.com
Questions?
@papa_fire
Thank You

More Related Content

What's hot

好みの日本酒を呑みたい! 〜さけのわデータで探す自分好みの酒〜
好みの日本酒を呑みたい! 〜さけのわデータで探す自分好みの酒〜好みの日本酒を呑みたい! 〜さけのわデータで探す自分好みの酒〜
好みの日本酒を呑みたい! 〜さけのわデータで探す自分好みの酒〜
Takashi Kitano
 
You don’t know query - WordCamp UK Edinburgh 2012
You don’t know query - WordCamp UK Edinburgh 2012You don’t know query - WordCamp UK Edinburgh 2012
You don’t know query - WordCamp UK Edinburgh 2012
l3rady
 
Topological indices (t is) of the graphs to seek qsar models of proteins com...
Topological indices (t is) of the graphs  to seek qsar models of proteins com...Topological indices (t is) of the graphs  to seek qsar models of proteins com...
Topological indices (t is) of the graphs to seek qsar models of proteins com...
Jitendra Kumar Gupta
 

What's hot (20)

[2019] 아직도 돈 주고 DB 쓰나요? for Developer
[2019] 아직도 돈 주고 DB 쓰나요? for Developer[2019] 아직도 돈 주고 DB 쓰나요? for Developer
[2019] 아직도 돈 주고 DB 쓰나요? for Developer
 
Python Ireland Nov 2010 Talk: Unit Testing
Python Ireland Nov 2010 Talk: Unit TestingPython Ireland Nov 2010 Talk: Unit Testing
Python Ireland Nov 2010 Talk: Unit Testing
 
令和から本気出す
令和から本気出す令和から本気出す
令和から本気出す
 
Functional perl
Functional perlFunctional perl
Functional perl
 
Neatly folding-a-tree
Neatly folding-a-treeNeatly folding-a-tree
Neatly folding-a-tree
 
好みの日本酒を呑みたい! 〜さけのわデータで探す自分好みの酒〜
好みの日本酒を呑みたい! 〜さけのわデータで探す自分好みの酒〜好みの日本酒を呑みたい! 〜さけのわデータで探す自分好みの酒〜
好みの日本酒を呑みたい! 〜さけのわデータで探す自分好みの酒〜
 
Manage catalog Configueation In Sharepoint PowerShell
Manage catalog Configueation In Sharepoint PowerShellManage catalog Configueation In Sharepoint PowerShell
Manage catalog Configueation In Sharepoint PowerShell
 
Very basic functional design patterns
Very basic functional design patternsVery basic functional design patterns
Very basic functional design patterns
 
The Perl6 Type System
The Perl6 Type SystemThe Perl6 Type System
The Perl6 Type System
 
You don’t know query - WordCamp UK Edinburgh 2012
You don’t know query - WordCamp UK Edinburgh 2012You don’t know query - WordCamp UK Edinburgh 2012
You don’t know query - WordCamp UK Edinburgh 2012
 
Taking Perl to Eleven with Higher-Order Functions
Taking Perl to Eleven with Higher-Order FunctionsTaking Perl to Eleven with Higher-Order Functions
Taking Perl to Eleven with Higher-Order Functions
 
Topological indices (t is) of the graphs to seek qsar models of proteins com...
Topological indices (t is) of the graphs  to seek qsar models of proteins com...Topological indices (t is) of the graphs  to seek qsar models of proteins com...
Topological indices (t is) of the graphs to seek qsar models of proteins com...
 
{tidygraph}と{ggraph}による モダンなネットワーク分析(未公開ver)
{tidygraph}と{ggraph}による モダンなネットワーク分析(未公開ver){tidygraph}と{ggraph}による モダンなネットワーク分析(未公開ver)
{tidygraph}と{ggraph}による モダンなネットワーク分析(未公開ver)
 
{tidygraph}と{ggraph}によるモダンなネットワーク分析
{tidygraph}と{ggraph}によるモダンなネットワーク分析{tidygraph}と{ggraph}によるモダンなネットワーク分析
{tidygraph}と{ggraph}によるモダンなネットワーク分析
 
Unit testing with zend framework PHPBenelux
Unit testing with zend framework PHPBeneluxUnit testing with zend framework PHPBenelux
Unit testing with zend framework PHPBenelux
 
The underestimated power of KeyPaths
The underestimated power of KeyPathsThe underestimated power of KeyPaths
The underestimated power of KeyPaths
 
WP_Query, pre_get_posts, and eliminating query_posts()
WP_Query, pre_get_posts, and eliminating query_posts()WP_Query, pre_get_posts, and eliminating query_posts()
WP_Query, pre_get_posts, and eliminating query_posts()
 
Chris Mc Glothen Sql Portfolio
Chris Mc Glothen Sql PortfolioChris Mc Glothen Sql Portfolio
Chris Mc Glothen Sql Portfolio
 
Patterns for slick database applications
Patterns for slick database applicationsPatterns for slick database applications
Patterns for slick database applications
 
WordPress 3.1 at DC PHP
WordPress 3.1 at DC PHPWordPress 3.1 at DC PHP
WordPress 3.1 at DC PHP
 

Viewers also liked

Viewers also liked (8)

Breaking social dependency
Breaking social dependencyBreaking social dependency
Breaking social dependency
 
Adventures in public speaking
Adventures in public speakingAdventures in public speaking
Adventures in public speaking
 
What DevOps is Not
What DevOps is NotWhat DevOps is Not
What DevOps is Not
 
On call for developers
On call for developersOn call for developers
On call for developers
 
Lost art of troubleshooting
Lost art of troubleshootingLost art of troubleshooting
Lost art of troubleshooting
 
Production testing through monitoring
Production testing through monitoringProduction testing through monitoring
Production testing through monitoring
 
BizOps and you
BizOps and youBizOps and you
BizOps and you
 
Improving DevOps through better monitoring
Improving DevOps through better monitoringImproving DevOps through better monitoring
Improving DevOps through better monitoring
 

Similar to Database performance 101

Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
MongoSF
 
PerlApp2Postgresql (2)
PerlApp2Postgresql (2)PerlApp2Postgresql (2)
PerlApp2Postgresql (2)
Jerome Eteve
 
Database Development Replication Security Maintenance Report
Database Development Replication Security Maintenance ReportDatabase Development Replication Security Maintenance Report
Database Development Replication Security Maintenance Report
nyin27
 
Below is my code- I have an error that I still have difficulty figurin.pdf
Below is my code- I have an error that I still have difficulty figurin.pdfBelow is my code- I have an error that I still have difficulty figurin.pdf
Below is my code- I have an error that I still have difficulty figurin.pdf
armanuelraj
 
Optimizing the Catalyst Optimizer for Complex Plans
Optimizing the Catalyst Optimizer for Complex PlansOptimizing the Catalyst Optimizer for Complex Plans
Optimizing the Catalyst Optimizer for Complex Plans
Databricks
 

Similar to Database performance 101 (20)

James Colby Maddox Business Intellignece and Computer Science Portfolio
James Colby Maddox Business Intellignece and Computer Science PortfolioJames Colby Maddox Business Intellignece and Computer Science Portfolio
James Colby Maddox Business Intellignece and Computer Science Portfolio
 
Create and Maintain COMPLEX HIERARCHIES easily
Create and Maintain COMPLEX HIERARCHIES easilyCreate and Maintain COMPLEX HIERARCHIES easily
Create and Maintain COMPLEX HIERARCHIES easily
 
Data Exploration with Apache Drill: Day 2
Data Exploration with Apache Drill: Day 2Data Exploration with Apache Drill: Day 2
Data Exploration with Apache Drill: Day 2
 
DBIC 3 - Primer
DBIC 3 - PrimerDBIC 3 - Primer
DBIC 3 - Primer
 
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
Map/reduce, geospatial indexing, and other cool features (Kristina Chodorow)
 
Beyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the codeBeyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the code
 
Company segmentation - an approach with R
Company segmentation - an approach with RCompany segmentation - an approach with R
Company segmentation - an approach with R
 
DBIx-DataModel v2.0 in detail
DBIx-DataModel v2.0 in detail DBIx-DataModel v2.0 in detail
DBIx-DataModel v2.0 in detail
 
Optimization in django orm
Optimization in django ormOptimization in django orm
Optimization in django orm
 
Assignment 4.pdf
Assignment 4.pdfAssignment 4.pdf
Assignment 4.pdf
 
ES6 patterns in the wild
ES6 patterns in the wildES6 patterns in the wild
ES6 patterns in the wild
 
PerlApp2Postgresql (2)
PerlApp2Postgresql (2)PerlApp2Postgresql (2)
PerlApp2Postgresql (2)
 
Real World Optimization
Real World OptimizationReal World Optimization
Real World Optimization
 
How to generate a 100+ page website using parameterisation in R
How to generate a 100+ page website using parameterisation in RHow to generate a 100+ page website using parameterisation in R
How to generate a 100+ page website using parameterisation in R
 
MongoDB World 2018: Keynote
MongoDB World 2018: KeynoteMongoDB World 2018: Keynote
MongoDB World 2018: Keynote
 
Database Development Replication Security Maintenance Report
Database Development Replication Security Maintenance ReportDatabase Development Replication Security Maintenance Report
Database Development Replication Security Maintenance Report
 
Below is my code- I have an error that I still have difficulty figurin.pdf
Below is my code- I have an error that I still have difficulty figurin.pdfBelow is my code- I have an error that I still have difficulty figurin.pdf
Below is my code- I have an error that I still have difficulty figurin.pdf
 
Wordpress plugin development from Scratch
Wordpress plugin development from ScratchWordpress plugin development from Scratch
Wordpress plugin development from Scratch
 
Mongo DB 102
Mongo DB 102Mongo DB 102
Mongo DB 102
 
Optimizing the Catalyst Optimizer for Complex Plans
Optimizing the Catalyst Optimizer for Complex PlansOptimizing the Catalyst Optimizer for Complex Plans
Optimizing the Catalyst Optimizer for Complex Plans
 

Recently uploaded

+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
?#DUbAI#??##{{(☎️+971_581248768%)**%*]'#abortion pills for sale in dubai@
 
Why Teams call analytics are critical to your entire business
Why Teams call analytics are critical to your entire businessWhy Teams call analytics are critical to your entire business
Why Teams call analytics are critical to your entire business
panagenda
 

Recently uploaded (20)

Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...
Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...
Apidays New York 2024 - Passkeys: Developing APIs to enable passwordless auth...
 
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
 
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
 
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ..."I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
 
Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)
 
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot TakeoffStrategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
Strategize a Smooth Tenant-to-tenant Migration and Copilot Takeoff
 
Artificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : UncertaintyArtificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : Uncertainty
 
DEV meet-up UiPath Document Understanding May 7 2024 Amsterdam
DEV meet-up UiPath Document Understanding May 7 2024 AmsterdamDEV meet-up UiPath Document Understanding May 7 2024 Amsterdam
DEV meet-up UiPath Document Understanding May 7 2024 Amsterdam
 
Why Teams call analytics are critical to your entire business
Why Teams call analytics are critical to your entire businessWhy Teams call analytics are critical to your entire business
Why Teams call analytics are critical to your entire business
 
Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...
Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...
Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...
 
WSO2's API Vision: Unifying Control, Empowering Developers
WSO2's API Vision: Unifying Control, Empowering DevelopersWSO2's API Vision: Unifying Control, Empowering Developers
WSO2's API Vision: Unifying Control, Empowering Developers
 
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, AdobeApidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
 
Elevate Developer Efficiency & build GenAI Application with Amazon Q​
Elevate Developer Efficiency & build GenAI Application with Amazon Q​Elevate Developer Efficiency & build GenAI Application with Amazon Q​
Elevate Developer Efficiency & build GenAI Application with Amazon Q​
 
FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024
 
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWEREMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
 
Boost Fertility New Invention Ups Success Rates.pdf
Boost Fertility New Invention Ups Success Rates.pdfBoost Fertility New Invention Ups Success Rates.pdf
Boost Fertility New Invention Ups Success Rates.pdf
 
Exploring Multimodal Embeddings with Milvus
Exploring Multimodal Embeddings with MilvusExploring Multimodal Embeddings with Milvus
Exploring Multimodal Embeddings with Milvus
 
[BuildWithAI] Introduction to Gemini.pdf
[BuildWithAI] Introduction to Gemini.pdf[BuildWithAI] Introduction to Gemini.pdf
[BuildWithAI] Introduction to Gemini.pdf
 
CNIC Information System with Pakdata Cf In Pakistan
CNIC Information System with Pakdata Cf In PakistanCNIC Information System with Pakdata Cf In Pakistan
CNIC Information System with Pakdata Cf In Pakistan
 

Database performance 101

  • 1. Performance 101: so you need to use a database Leon Fayer @papa_fire
  • 2. who am I ? • 20+ years since first print ‘Hello World!’; • currently @ OmniTI • development and operations of large web applications • performance & scalability • we are hiring: • https://omniti.com/is/hiring
  • 3. what it is about databases & performance
  • 4. what it’s not about NoSQL MySQL
  • 5. how database connection works ① establish connection ② send query ③ process query ④ send result ⑤ close connection
  • 6. common database connection my $dbh = DBI->connect(…); my $sth = $dbh->prepare($query); $sth->execute(); my $result = $sth->fetchrow_hashref(); $dbh->disconnect;
  • 7. common database connection ① my $dbh = DBI->connect(…); ② my $sth = $dbh->prepare($query); ③ $sth->execute(); ④ my $result = $sth->fetchrow_hashref(); ⑤ $dbh->disconnect;
  • 9. ① establish connection ⑤ close connection and
  • 11. example my @data_array; for (my $i; $i<=10; $i++) { my $dbh = DBI->connect(…); my $sth = $dbh->prepare(qq{select * from foo where id = ?}); $sth->execute($i); push @data_array, $sth->fetchrow_hashref(); $dbh->disconnect; }
  • 14. even shorter answer avoid multiple connections
  • 15. how it works (high level) ① ① ① ⑤ ⑤ ⑤ ① ⑤
  • 16. correct example my @data_array; my $dbh = DBI->connect(…); my $sth = $dbh->prepare(qq{select * from foo where id = ?}); for (my $i; $i<=10; $i++) { $sth->execute($i); push @data_array, $sth->fetchrow_hashref(); } $dbh->disconnect;
  • 17. conclusion reduce # of connections
  • 20. n+1 # get a list of items my %item_list; my $sth = $dbh->prepare(qq{select id from items where active = true}); $sth->execute(); # get properties for each items while ( my @row = $sth->fetchrow_hashref() ) { my $sth_prop = $dbh->prepare(qq{select * from item_properties where item_id = ?}); $sth_prop->execute($row->{‘id’}); $item_list{$row->{‘id’}}{‘props’} = $sth_prop->fetchall_hashref(); }
  • 21. n+1 you don’t know about # get a list of items my @ids = get_active_item_ids(); my @item_props; # get properties for each items foreach my $i (@ids) { push @item_props, Item->new($i)->properties(); }
  • 22. easy solution # get a list of items with properties my $sth = $dbh->prepare(qq{select i. item_id, p.* from items i, item_properties p where i.item_id = p.item_id and active = true}); $sth->execute(); # arrange object to your liking while (my $row = $sth->fetchrow_hashref()) { $item_list{$row->{‘id’}}{‘props’} = $row; # sans id $item_list{$row->{‘id’}}{‘id’} = $row->{‘id’}; }
  • 24. cool stuff :BONUS: Common Table Expressions (CTEs) * MySQL does not support CTEs
  • 25. # create temp table naughty_users # and get data from it with naughty_users as ( select * from users where banned = 1 ) select userid, email from naughty_users;
  • 26. even more cool Writeable Common Table Expressions
  • 27. multiple queries are required # create user record insert into users (name, email) values (?,?) returning userid # create address record insert into addresses (userid, address, city, state, zip) values (?,?,?,?,?) returning addressid # track changes to user information insert into user_history (userid, addressid, action) values (?,?,?) returning historyid
  • 28. or are they? with userdata as ( insert into users (name, email) values (?,?) returning userid ), addressdata as ( insert into addresses (userid, address, city, state, zip) select userid,?,?,?,? from userdata returning addressid ), historydata as ( insert into user_history (userid, addressid, action) select userid, addressid,? from userdata, addressdata returning historyid ) select userid, addressid, historyid from userdata, addressdata, historydata;
  • 29. why not use transactions? • no complicated transaction code • no complicated error handling code • reduced query overhead • better performance
  • 30. find out more For more details: http://omniti.com/seeds/writable-ctes-improve-performance
  • 33. in one sentence you have no idea how it works
  • 34. why? 1. machine-generated 2. object construction overhead 3. false sense of control
  • 35. food for thought think about ORM as the most junior developer you’ve ever worked with
  • 36. would you trust junior with this? select * from ( select bannerid, caption, client_url, image_file, sponsorid, weight from ( select V.bannerid, V.impressions, B.caption, B.client_url, B.image_file, s.sponsorid, s.weight, row_number() over (partition by s.sponsorid order by s.weight desc) ranking FROM ( -- This level gives me a list of banners sorted by least seen,and then by highest weight select valid.bannerid, valid.totalweight, count(I.timestamp) as impressions FROM ( -- This level gets me a list of banners that are valid for display select b.bannerid, -- Add up the weight from 4 sources. Banner weight, and weight for each data item they match decode( decode(bitand(u.STATE_BM1,b.STATE_BM1),0,0,1) + decode(bitand(u.STATE_BM2,b.STATE_BM2),0,0,1) + decode(bitand(u.STATE_BM3,b.STATE_BM3),0,0,1),0,0,b.STATE_WT ) + decode(bitand(u.AGE_BM,b.AGE_BM),0,0,b.AGE_WT)+ decode(bitand(u.GENDER_BM,b.GENDER_BM),0,0,b.GENDER_WT)+ b.weight as totalweight from tgif.tbl_users u, tgif.tbl_banners b, tgif.tbl_bannerstats bs where -- I only care about ME! u.userid= 1 -- Don't show inactive banners and b.inactive != 1 -- Only show banners that are currently running and sysdate < b.end_date and sysdate >=b.start_date -- Only get the type of banner i'm looking for and b.type= 3 -- Join on the total stats, and only display banners that haven't reached their per banner maximums and b.bannerid = bs.bannerid and ( b.max_impressions IS NULL OR bs.total_impressions < b.max_impressions ) and ( b.max_clicks IS NULL OR bs.total_clicks < b.max_clicks ) and ( b.max_conversions IS NULL OR bs.total_conversions < b.max_conversions ) -- Ignore any banners that don't match their demographics (ie, male banner won't go to females) and ( b.AGE_BM IS NULL OR b.AGE_BM = 0 OR bitand(u.AGE_BM, b.AGE_BM) != 0 ) and ( b.GENDER_BM IS NULL OR b.GENDER_BM =0 OR bitand(u.GENDER_BM, b.GENDER_BM) != 0 ) and ( b.STATE_BM1 IS NULL OR b.STATE_BM1 =0 OR bitand(u.STATE_BM1, b.STATE_BM1) != 0 ) and ( b.STATE_BM2 IS NULL OR b.STATE_BM2 =0 OR bitand(u.STATE_BM2, b.STATE_BM2) != 0 ) and ( b.STATE_BM3 IS NULL OR b.STATE_BM3 =0 OR bitand(u.STATE_BM3, b.STATE_BM3) != 0 ) -- But don't show me any banners that I have already signed up and b.bannerid NOT IN ( SELECT B.bannerid FROM tgif.tbl_bannerconversions C, tgif.tbl_banners B, tgif.tbl_sponsors sp WHERE C.USERID=1 AND C.bannerid=B.bannerid AND B.sponsorid=sp.sponsorid -- unless they have a conversion interval, and that interval has expired AND ( sp.conversion_interval = 0 OR sysdate > C.timestamp+sp.conversion_interval ) ) -- Don't show me any banners that have SPONSORS that have reached their maximums and b.sponsorid NOT IN ( -- I believe this would be better done using HAVING clauses, but I can't figure it out -- Take the banners for a sponsor in the bannerstats table, and get the totals per sponsor -- return anything that has reached it's maximum select sponsorid FROM ( SELECT S.sponsorid, S.max_impressions, S.max_conversions, S.max_clicks, sum(total_impressions) as imps, sum(total_conversions) as convs, sum(total_clicks) as clicks FROM tgif.tbl_sponsors S, tgif.tbl_banners B, tgif.tbl_bannerstats bs WHERE S.sponsorid=B.sponsorid AND B.bannerid=bs.bannerid GROUP BY S.Sponsorid, S.max_impressions, S.max_conversions, S.max_clicks ) exclude WHERE ( imps > max_impressions OR convs >= max_conversions OR clicks > max_clicks ) ) ) valid, tgif.tbl_bannerimpressions I where valid.bannerid=I.bannerid(+) and I.userid(+)=1 group by valid.bannerid, valid.totalweight -- I want to see banners I haven't seen yet, sorted by highest weight, so we sort by number -- of times that this user has seen this particular banner, then we sort by weight order by impressions, totalweight DESC ) V, tgif.tbl_banners B, tgif.tbl_sponsors S where B.bannerid=V.bannerid and B.sponsorid=S.sponsorid and S.inactive != 1 and s.sponsorid not in ( ) valid, tgif.tbl_bannerimpressions I where valid.bannerid=I.bannerid(+) and I.userid(+)=1 group by valid.bannerid, valid.totalweight -- I want to see banners I haven't seen yet, sorted by highest weight, so we sort by number -- of times that this user has seen this particular banner, then we sort by weight order by impressions, totalweight DESC ) V, tgif.tbl_banners B, tgif.tbl_sponsors S where B.bannerid=V.bannerid and B.sponsorid=S.sponsorid and S.inactive != 1 and s.sponsorid not in ( -- Check the user impression cap to make sure it hasn't been passed by the user select s.sponsorid from tgif.tbl_banners b, tgif.tbl_sponsors s, tgif.TBL_BANNERIMPRESSIONS i where s.sponsorid = b.sponsorid and b.bannerid = i.bannerid and i.timestamp >= sysdate - nvl(user_impression_cap_days,100) and userid = 1 group by s.sponsorid having count(*) >= max(nvl(user_impression_cap,1000000000)) ) -- Make sure the sponsor is still in the valid table. This table is updated hourly -- and contains the sponsors that have not gone over their sponsor level frequencies for -- impressions/conversions/clicks and s.sponsorid in (select sponsorid from tgif.tbl_active_sponsors) ) where ranking=1 --Order the banners by sponsor weight, which is handled by the ranking --order by S.weight order by impressions, weight desc ) where rownum <= 10;
  • 37. noone thinks about this object construction is expensive
  • 38. object construction issue METHOD REAL USER SYS PCPU Base ORM 6.330 5.771 0.212 94.51 SQL without objects 0.664 0.274 0.120 59.35 SQL with ORM objects 6.354 5.797 0.197 94.34
  • 42. may be shocking, but … databases can do math
  • 43. illustrating wrong # get all orders my $sth = $dbh->prepare("select order_id, price from orders"); $sth->execute(); my $orders= $sth->fetchrow_hashref(); my $count = 1; my $total = 0; my $avg = 0; # get average $ for last 10 orders foreach my $o (sort {$orders->{‘order_id’}->{$b} <=> $orders->{‘order_id’}->{$a} } %$orders) { $total += $o->{‘price’}; if ($count == 10) { $avg = $total/$count; last; } $count++; }
  • 44. vs right # get average $ for last 10 orders $sth = $dbh->prepare(qq{select avg(price) as avg_price from (select price from orders order by order_id desc limit 10) }); $sth->execute(); $orders= $sth->fetchrow_hashref(); $avg = $orders->{‘avg_price’};
  • 45. database can do … 1. math 2. dates 3. aggregations 4. [partial] matches 5. much, much more
  • 47. other things to consider 1. cache is a wonderful thing 2. * is not your friend 3. EXPLAIN/ANALYZE are
  • 48. want to know more? THE SCALABILITY AND PERFORMANCE CONFERENCE SEPTEMBER 21-23, 2016 http://surge.omniti.com