Added support for modifying the effect of `DISTINCT` clauses so they

only consider some fields (PostgreSQL only).

For this, the ``distinct()`` QuerySet method now accepts an optional
list of model fields names and generates ``DISTINCT ON`` clauses on
these cases. Thanks Jeffrey Gelens and Anssi Kääriäinen for their work.

Fixes #6422.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@17244 bcc190cf-cafb-0310-a4f2-bffc1f526a37
This commit is contained in:
Ramiro Morales 2011-12-22 20:42:40 +00:00
parent 03eb2907d5
commit 287565779d
16 changed files with 374 additions and 43 deletions

View file

@ -23,6 +23,8 @@ class SQLCompiler(object):
Does any necessary class setup immediately prior to producing SQL. This
is for things that can't necessarily be done in __init__ because we
might not have all the pieces in place at that time.
# TODO: after the query has been executed, the altered state should be
# cleaned. We are not using a clone() of the query here.
"""
if not self.query.tables:
self.query.join((None, self.query.model._meta.db_table, None, None))
@ -60,11 +62,19 @@ class SQLCompiler(object):
return '', ()
self.pre_sql_setup()
# After executing the query, we must get rid of any joins the query
# setup created. So, take note of alias counts before the query ran.
# However we do not want to get rid of stuff done in pre_sql_setup(),
# as the pre_sql_setup will modify query state in a way that forbids
# another run of it.
self.refcounts_before = self.query.alias_refcount.copy()
out_cols = self.get_columns(with_col_aliases)
ordering, ordering_group_by = self.get_ordering()
# This must come after 'select' and 'ordering' -- see docstring of
# get_from_clause() for details.
distinct_fields = self.get_distinct()
# This must come after 'select', 'ordering' and 'distinct' -- see
# docstring of get_from_clause() for details.
from_, f_params = self.get_from_clause()
qn = self.quote_name_unless_alias
@ -76,8 +86,10 @@ class SQLCompiler(object):
params.extend(val[1])
result = ['SELECT']
if self.query.distinct:
result.append('DISTINCT')
result.append(self.connection.ops.distinct_sql(distinct_fields))
result.append(', '.join(out_cols + self.query.ordering_aliases))
result.append('FROM')
@ -90,6 +102,9 @@ class SQLCompiler(object):
grouping, gb_params = self.get_grouping()
if grouping:
if distinct_fields:
raise NotImplementedError(
"annotate() + distinct(fields) not implemented.")
if ordering:
# If the backend can't group by PK (i.e., any database
# other than MySQL), then any fields mentioned in the
@ -129,6 +144,9 @@ class SQLCompiler(object):
raise DatabaseError('NOWAIT is not supported on this database backend.')
result.append(self.connection.ops.for_update_sql(nowait=nowait))
# Finally do cleanup - get rid of the joins we created above.
self.query.reset_refcounts(self.refcounts_before)
return ' '.join(result), tuple(params)
def as_nested_sql(self):
@ -292,6 +310,26 @@ class SQLCompiler(object):
col_aliases.add(field.column)
return result, aliases
def get_distinct(self):
"""
Returns a quoted list of fields to use in DISTINCT ON part of the query.
Note that this method can alter the tables in the query, and thus it
must be called before get_from_clause().
"""
qn = self.quote_name_unless_alias
qn2 = self.connection.ops.quote_name
result = []
opts = self.query.model._meta
for name in self.query.distinct_fields:
parts = name.split(LOOKUP_SEP)
field, col, alias, _, _ = self._setup_joins(parts, opts, None)
col, alias = self._final_join_removal(col, alias)
result.append("%s.%s" % (qn(alias), qn2(col)))
return result
def get_ordering(self):
"""
Returns a tuple containing a list representing the SQL elements in the
@ -384,21 +422,7 @@ class SQLCompiler(object):
"""
name, order = get_order_dir(name, default_order)
pieces = name.split(LOOKUP_SEP)
if not alias:
alias = self.query.get_initial_alias()
field, target, opts, joins, last, extra = self.query.setup_joins(pieces,
opts, alias, False)
alias = joins[-1]
col = target.column
if not field.rel:
# To avoid inadvertent trimming of a necessary alias, use the
# refcount to show that we are referencing a non-relation field on
# the model.
self.query.ref_alias(alias)
# Must use left outer joins for nullable fields and their relations.
self.query.promote_alias_chain(joins,
self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
field, col, alias, joins, opts = self._setup_joins(pieces, opts, alias)
# If we get to this point and the field is a relation to another model,
# append the default ordering for that model.
@ -416,11 +440,47 @@ class SQLCompiler(object):
results.extend(self.find_ordering_name(item, opts, alias,
order, already_seen))
return results
col, alias = self._final_join_removal(col, alias)
return [(alias, col, order)]
def _setup_joins(self, pieces, opts, alias):
"""
A helper method for get_ordering and get_distinct. This method will
call query.setup_joins, handle refcounts and then promote the joins.
Note that get_ordering and get_distinct must produce same target
columns on same input, as the prefixes of get_ordering and get_distinct
must match. Executing SQL where this is not true is an error.
"""
if not alias:
alias = self.query.get_initial_alias()
field, target, opts, joins, _, _ = self.query.setup_joins(pieces,
opts, alias, False)
alias = joins[-1]
col = target.column
if not field.rel:
# To avoid inadvertent trimming of a necessary alias, use the
# refcount to show that we are referencing a non-relation field on
# the model.
self.query.ref_alias(alias)
# Must use left outer joins for nullable fields and their relations.
# Ordering or distinct must not affect the returned set, and INNER
# JOINS for nullable fields could do this.
self.query.promote_alias_chain(joins,
self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER)
return field, col, alias, joins, opts
def _final_join_removal(self, col, alias):
"""
A helper method for get_distinct and get_ordering. This method will
trim extra not-needed joins from the tail of the join chain.
This is very similar to what is done in trim_joins, but we will
trim LEFT JOINS here. It would be a good idea to consolidate this
method and query.trim_joins().
"""
if alias:
# We have to do the same "final join" optimisation as in
# add_filter, since the final column might not otherwise be part of
# the select set (so we can't order on it).
while 1:
join = self.query.alias_map[alias]
if col != join[RHS_JOIN_COL]:
@ -428,7 +488,7 @@ class SQLCompiler(object):
self.query.unref_alias(alias)
alias = join[LHS_ALIAS]
col = join[LHS_JOIN_COL]
return [(alias, col, order)]
return col, alias
def get_from_clause(self):
"""
@ -438,8 +498,8 @@ class SQLCompiler(object):
from-clause via a "select".
This should only be called after any SQL construction methods that
might change the tables we need. This means the select columns and
ordering must be done first.
might change the tables we need. This means the select columns,
ordering and distinct must be done first.
"""
result = []
qn = self.quote_name_unless_alias
@ -984,6 +1044,7 @@ class SQLAggregateCompiler(SQLCompiler):
"""
if qn is None:
qn = self.quote_name_unless_alias
sql = ('SELECT %s FROM (%s) subquery' % (
', '.join([
aggregate.as_sql(qn, self.connection)