Fixed #23646 -- Added QuerySet.bulk_update() to efficiently update many models.

This commit is contained in:
Tom Forbes 2018-09-18 21:14:44 +01:00 committed by Tim Graham
parent 7b159df942
commit 9cbdb44014
11 changed files with 359 additions and 8 deletions

View file

@ -18,9 +18,9 @@ from django.db import (
from django.db.models import DateField, DateTimeField, sql
from django.db.models.constants import LOOKUP_SEP
from django.db.models.deletion import Collector
from django.db.models.expressions import F
from django.db.models.expressions import Case, Expression, F, Value, When
from django.db.models.fields import AutoField
from django.db.models.functions import Trunc
from django.db.models.functions import Cast, Trunc
from django.db.models.query_utils import FilteredRelation, InvalidQuery, Q
from django.db.models.sql.constants import CURSOR, GET_ITERATOR_CHUNK_SIZE
from django.db.utils import NotSupportedError
@ -473,6 +473,50 @@ class QuerySet:
return objs
def bulk_update(self, objs, fields, batch_size=None):
"""
Update the given fields in each of the given objects in the database.
"""
if batch_size is not None and batch_size < 0:
raise ValueError('Batch size must be a positive integer.')
if not fields:
raise ValueError('Field names must be given to bulk_update().')
objs = tuple(objs)
if not all(obj.pk for obj in objs):
raise ValueError('All bulk_update() objects must have a primary key set.')
fields = [self.model._meta.get_field(name) for name in fields]
if any(not f.concrete or f.many_to_many for f in fields):
raise ValueError('bulk_update() can only be used with concrete fields.')
if any(f.primary_key for f in fields):
raise ValueError('bulk_update() cannot be used with primary key fields.')
if not objs:
return
# PK is used twice in the resulting update query, once in the filter
# and once in the WHEN. Each field will also have one CAST.
max_batch_size = connections[self.db].ops.bulk_batch_size(['pk', 'pk'] + fields, objs)
batch_size = min(batch_size, max_batch_size) if batch_size else max_batch_size
requires_casting = connections[self.db].features.requires_casted_case_in_updates
batches = (objs[i:i + batch_size] for i in range(0, len(objs), batch_size))
updates = []
for batch_objs in batches:
update_kwargs = {}
for field in fields:
when_statements = []
for obj in batch_objs:
attr = getattr(obj, field.attname)
if not isinstance(attr, Expression):
attr = Value(attr, output_field=field)
when_statements.append(When(pk=obj.pk, then=attr))
case_statement = Case(*when_statements, output_field=field)
if requires_casting:
case_statement = Cast(case_statement, output_field=field)
update_kwargs[field.attname] = case_statement
updates.append(([obj.pk for obj in batch_objs], update_kwargs))
with transaction.atomic(using=self.db, savepoint=False):
for pks, update_kwargs in updates:
self.filter(pk__in=pks).update(**update_kwargs)
bulk_update.alters_data = True
def get_or_create(self, defaults=None, **kwargs):
"""
Look up an object with the given kwargs, creating one if necessary.