3. When?
1. Service is too slow
(Profiling, Django debug toolbar on test environment)
2. Before release on Production
(Django Debug Toolbar: check the time and count of mysql requests execution)
3. Code write and review
(tests, loops, bulk operations, foreign keys)
4. Database design
(design, indexes, table denormalize, etc….)
4. What?
1. Requests number
(Hit db as rarely as possible)
2. DB Schema
(Analyze sql queries. Use Explain to check indexes. Denormalize tables. etc.)
5. Bulk Operations
1. bulk_create
2. transaction.atomic / manual transactions
3. Update
4. Update with data from related table.
5. Select_for_update
6. Update and Create!
Updates. Inserts. Data migrations.
6. Bulk Create
def add_packages(apps, schema_editor):
CampaignPackage = apps.get_model('campaign', 'CampaignPackage')
path = os.path.join(settings.BASE_DIR, 'fixtures', 'campaign_package_data.json')
with open(path, 'r') as f:
data = f.read()
data = ujson.load(data)
# Bad
for package in data:
CampaignPackage.objects.create(**package['fields'])
7. Bulk Create
def add_packages(apps, schema_editor):
CampaignPackage = apps.get_model('campaign', 'CampaignPackage')
path = os.path.join(settings.BASE_DIR, 'fixtures', 'campaign_package_data.json')
with open(path, 'r') as f:
data = f.read()
data = ujson.load(data)
# Somehow Better not for MyIsam
with transaction.atomic():
for package in data:
CampaignPackage.objects.create(**package['fields'])
8. Bulk Create
def add_packages(apps, schema_editor):
CampaignPackage = apps.get_model('campaign', 'CampaignPackage')
path = os.path.join(settings.BASE_DIR, 'fixtures', 'campaign_package_data.json')
with open(path, 'r') as f:
data = f.read()
data = ujson.load(data)
# Best
CampaignPackage.objects.bulk_create([
CampaignPackage(**package['fields']) for package in data
])
9. Bulk Create
def add_packages(apps, schema_editor):
CampaignPackage = apps.get_model('campaign', 'CampaignPackage')
path = os.path.join(settings.BASE_DIR, 'fixtures', 'campaign_package_data.json')
with open(path, 'r') as f:
data = f.read()
data = ujson.load(data)
# Best (Note batch_size)
CampaignPackage.objects.bulk_create([
CampaignPackage(**package['fields']) for package in data
], batch_size=None)
10. Update
from django.db.models.expressions import F
from django.db import connection
def fill_media_budget(apps, schema_editor):
Campaign = apps.get_model("campaign", "CampaignPackage")
# many requests
for campaign in Campaign.objects.all():
campaign.media_budget += campaign.price
campaign.save()
11. Update
from django.db.models.expressions import F
from django.db import connection
def fill_media_budget(apps, schema_editor):
Campaign = apps.get_model("campaign", "CampaignPackage")
# one request
Campaign.objects.all().update(media_budget=F('price') + F('media_budget'))
12. Update From Related Table
from django.db import connection
with connection.cursor() as c:
c.execute("UPDATE campaign p "
"INNER JOIN extend_campaign_data c ON p.extend_data_id = c.id"
"SET p.media_budget = p.media_budget + c.media_budget"
"WHERE p.update = 1")
13. Select For Update
from __future__ import unicode_literals
from django.db import migrations, transaction
def rename_saved_campaign_extra_package_to_custom_package(apps, schema_editor):
CampaignSavedProgress = apps.get_model('campaign', 'CampaignSavedProgress')
with transaction.atomic():
for data in CampaignSavedProgress.objects.select_for_update().filter(step=1):
data.steps_data['package']['package-custom_package'] = data.steps_data['package'].pop('package-extra_package')
data.save()
14. Update And Create
dated_reports = self._prepare_reports(**kwargs) .
for date, report_data in dated_reports.items():
campaigns = Campaign.objects.filter(campaign161_id__in=campaigns_data.keys()).select_related('statistic')
for campaign in campaigns:
if campaign.statistic.id in statistic_ids_with_existing_daily_reports_set:
existing_daily_reports_data.append(( report_data['clicks'],
report_data['impressions'],
campaign.statistic.id,
date))
else:
new_daily_reports.append(DailyReport(date=date, clicks=report_data['clicks'],
impressions=report_data['impressions'],
report_id=campaign.statistic.id))
if new_daily_reports:
DailyReport.objects.bulk_create(new_daily_reports)
if existing_daily_reports_data:
with connection.cursor() as c:
c.executemany("UPDATE dashboard_dailyreport "
"SET clicks=%s, impressions=%s "
"WHERE report_id=%s and date=%s",
existing_daily_reports_data)
15. select_related (Models)
from django.db import models
class Campaign(models.Model): # (10 campaigns)
campaign_name = models.CharField(max_length=1000, null=True)
advertiser = models.CharField(max_length=100, null=True)
headline = models.CharField(max_length=100)
order = models.ForeignKey(Order)
class Order(models.Model):
user = models.ForeignKey('user_profile.User', db_index=True)
created_date = models.DateTimeField(auto_now_add=True)
payment = models.OneToOneField('campaign.Payment', null=True)
discount = models.ForeignKey('Discount', null=True)
waiting_for_unsuccessful_email = models.BooleanField(default=False)
class Payment(models.Model):
amount_payed = models.DecimalField(decimal_places=2, max_digits=5)
17. select_related
def print_order_data():
campaigns = Campaign.objects.all() # 10 campaigns
for c in campaigns:
print(c.order.payment.id)
print(c.order.created_date)
Q: How many times DB is hit?
A: 1 + 10 + 10 = 21
SELECT * FROM `campaign_campaign`;
SELECT * FROM `campaign_order` WHERE `campaign_order`.`id` = 63; # 10 times on every iter
SELECT * FROM `campaign_payment` WHERE `campaign_payment`.`id` = 80; # 10 times on every iter
18. select_related
def print_order_data():
campaigns = Campaign.objects.all().select_related('order__payment') # 10 campaigns
for c in campaigns:
print(c.order.payment.id)
print(c.order.created_date)
Q: How many times DB is hit?
A: Just once.
SELECT `campaign_campaign`.*, `campaign_campaign`.*, `campaign_payment`.*
FROM `campaign_campaign`
INNER JOIN `campaign_order`
ON(`campaign_campaign`.`order_id` = `campaign_order`.`id`)
LEFT OUTER JOIN `campaign_payment`
ON(`campaign_order`.`payment_id` = `campaign_payment`.`id`)
19. select_related
def print_order_data():
campaigns = Campaign.objects.all().select_related('order__payment').values_list('order__payment__amount_payed',
'order__created_date')
for c in campaigns:
print(c['order__payment__amount_payed'])
print(c['order__created_date'])
Q: How many times DB is hit?
A: Just once. + avoid unnecessary deserialization / data transfer
SELECT `campaign_payment`.`amount_payed`, `campaign_order`.`created_date`
FROM `campaign_campaign`
INNER JOIN `campaign_order`
ON (`campaign_campaign`.`order_id` = `campaign_order`.`id`)
LEFT OUTER JOIN `campaign_payment`
ON (`campaign_order`.`payment_id` = `campaign_payment`.`id`);
20. select_related (encapsulate)
class CampaignManager(models.Manager):
def get_queryset(self):
return super(CampaignManager, self).get_queryset().select_related('order__payment')
class Campaign(models.Model): # (10 campaigns)
campaign_name = models.CharField(max_length=1000, null=True)
advertiser = models.CharField(max_length=100, null=True)
headline = models.CharField(max_length=100)
order = models.ForeignKey(Order)
objects = CampaignManager()
campaigns = Campaign.objects.all()
#Or in admin panel
class Campaigndmin(admin.ModelAdmin):
list_select_related = ('order__payment')
21. How to get SQL queries?
- Console
from django.db import connection
connection.queries
results = Results.objects.all()
print(results.query)
- Django Debug Toolbar, Log sql queries to console in debug mode.
- Code Analysis
campaign.user.id -> campaign.user_id
Campaign.user -> select_related (especially loops)
Then you can:
- Tests
with self.assertNumQueries(1):
print_order_data()
- Sql console, Explain
22. prefetch_related (Models)
class Campaign(models.Model): # (10 campaigns)
campaign_name = models.CharField(max_length=1000, null=True)
order = models.ForeignKey(Order)
package = models.ForeignKey(Package)
class Order(models.Model):
user = models.ForeignKey('user_profile.User')
created_date = models.DateTimeField(auto_now_add=True)
payment = models.OneToOneField('campaign.Payment', null=True)
class Payment(models.Model):
amount_payed = models.DecimalField(decimal_places=2, max_digits=5)
23. prefetch_related
def select_campaigns():
orders = Order.objects.all()
for order in orders:
print("-".join([str(campaign.id) for campaign in order.campaign_set.all()]))
Q: How many times DB is hit (for 10 Order’s)?
24. prefetch_related
def select_campaigns():
orders = Order.objects.all()
for order in orders:
print("-".join([str(campaign.id) for campaign in order.campaign_set.all()]))
Q: How many times DB is hit (for 10 Order’s)?
A: 1 + 10 = 11
SELECT `campaign_order`.*
FROM `campaign_order`; # 1
SELECT `campaign_campaign`.*
FROM `campaign_campaign` WHERE `campaign_campaign`.`order_id` = 1; # 10
25. prefetch_related
def select_campaigns():
orders = Order.objects.prefetch_related('campaign_set').all()
for order in orders:
print("-".join([str(campaign.id) for campaign in order.campaign_set.all()]))
Q: How many times DB is hit (for 10 Order’s)?
A: 1 + 1 = 2
SELECT `campaign_order`.*
FROM `campaign_order`; # 1
SELECT `campaign_campaign`.*
FROM `campaign_campaign`
WHERE `campaign_campaign`.`order_id` IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); # 1
26. prefetch_related
def select_campaigns():
orders = Order.objects.prefetch_related('campaign_set').all()
for order in orders:
print("-".join([str(campaign.id) for campaign in order.campaign_set.order_by('created')]))
Q: How many times DB is hit (for 10 Order’s)?
27. prefetch_related
def select_campaigns():
orders = Order.objects.prefetch_related('campaign_set').all()
for order in orders:
print("-".join([str(campaign.id) for campaign in order.campaign_set.order_by('created')]))
Q: How many times DB is hit (for 10 Order’s)?
A: 1 + 1 + 10 = 12
SELECT `campaign_order`.*
FROM `campaign_order`; # 1
SELECT `campaign_campaign`.*
FROM `campaign_campaign`
WHERE `campaign_campaign`.`order_id` IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); # 1
SELECT `campaign_campaign`.*
FROM `campaign_campaign` WHERE `campaign_campaign`.`order_id` = 1
ORDER BY `campaign_campaign`.`created` ASC; # 10
28. refetch_related (Prefetch)
def select_campaigns():
orders = Order.objects.prefetch_related(
Prefetch(
'campaign_set',
queryset=Campaign.objects.order_by('created') # any kind of filtration
)
).all()
for order in orders:
print("-".join([str(campaign.id) for campaign in order.campaign_set.all()]))
Q: How many times DB is hit (for 10 Order’s)?
A: 1 + 1 = 2
SELECT `campaign_order`.*
FROM `campaign_order`; # 1
SELECT `campaign_campaign`.*
FROM `campaign_campaign`
WHERE `campaign_campaign`.`order_id` IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
ORDER BY `campaign_campaign`.`created` ASC; # 1
29. refetch_related (Prefetch)
def select_campaigns():
orders = Order.objects.prefetch_related(
Prefetch(
'campaign_set',
queryset=Campaign.objects.order_by('created'),
to_attr='campaigns'
)
).all()
for order in orders:
print("-".join([str(campaign.id) for campaign in order.campaigns]))
Q: How many times DB is hit (for 10 Order’s)?
A: 1 + 1 = 2
30. All Together (Models)
class Campaign(models.Model): # (10 campaigns)
campaign_name = models.CharField(max_length=1000, null=True)
order = models.ForeignKey(Order)
package = models.ForeignKey(Package)
class Order(models.Model):
user = models.ForeignKey('user_profile.User')
created_date = models.DateTimeField(auto_now_add=True)
payment = models.OneToOneField('campaign.Payment', null=True)
class Payment(models.Model):
amount_payed = models.DecimalField(decimal_places=2, max_digits=5)
class ManualInvoice(models.Model):
user = models.ForeignKey('user_profile.User', db_index=False)
creation_month = models.DateField()
invoice_id = models.CharField(max_length=60, null=True, unique=True)
31. All Together
def get_invoices_with_prefetch_data():
invoices_to_send = ManualInvoice.select_related('user__companyinfo').prefetch_related(
Prefetch('payments',
queryset=Payment.objects.select_related('order__user').prefetch_related(
Prefetch(
'order__campaign_set',
queryset=Campaign.objects.select_related('package').order_by('created'),
to_attr='sorted_campaigns_cached'
)
),
to_attr='payments_list'),
).all()
return invoices_to_send
32. Conclusions
1. Count db requests. Less is better.
2. Use bulk operations.
3. Optimize data migrations (15 sec on test env - 2 hours on
prod)
4. Note foreign keys calls in loops.
5. Analyze SQL queries.