use group by instead of distinct when possible

This commit is contained in:
Gani Georgiev
2026-01-13 18:47:32 +02:00
parent 5f8cce3558
commit 0b9a646d41
5 changed files with 138 additions and 44 deletions

View File

@@ -128,7 +128,7 @@ func NewRecordFieldResolver(
// resolved fields (eg. dynamically joining relations).
func (r *RecordFieldResolver) UpdateQuery(query *dbx.SelectQuery) error {
if len(r.joins) > 0 {
query.Distinct(true)
r.updateQueryWithDeduplicateConstraint(query)
for _, join := range r.joins {
query.LeftJoin(
@@ -171,7 +171,7 @@ func (r *RecordFieldResolver) updateQueryWithCollectionListRule(c *Collection, t
query.AndWhere(expr)
if len(cloneR.joins) > 0 {
query.Distinct(true)
r.updateQueryWithDeduplicateConstraint(query)
for _, j := range cloneR.joins {
query.LeftJoin(
@@ -184,6 +184,35 @@ func (r *RecordFieldResolver) updateQueryWithCollectionListRule(c *Collection, t
return nil
}
func (r *RecordFieldResolver) updateQueryWithDeduplicateConstraint(query *dbx.SelectQuery) {
info := query.Info()
if info.Distinct {
return
}
var groupByCol = r.baseCollection.Name
if r.baseCollectionAlias != "" {
groupByCol = r.baseCollectionAlias
}
groupByCol += ".id"
if len(info.GroupBy) > 0 && info.GroupBy[0] == groupByCol {
return
}
// when deemed safe (GROUP BY could have different execution order compared to DISTINCT),
// prefer GROUP BY to deduplicate only on the id field instead of all columns
// so that the size of a single row wouldn't matter that much
if len(info.GroupBy) == 0 &&
info.Having == nil &&
(len(info.Selects) == 0 ||
(len(info.Selects) == 1 && strings.HasSuffix(info.Selects[0], ".*"))) {
query.GroupBy(groupByCol)
} else {
query.Distinct(true)
}
}
// Resolve implements `search.FieldResolver` interface.
//
// Example of some resolvable fieldName formats:

File diff suppressed because one or more lines are too long

18
tools/dbutils/select.go Normal file
View File

@@ -0,0 +1,18 @@
package dbutils
import "regexp"
// Regexp for columns and tables (the same as the one in dbx).
var selectRegex = regexp.MustCompile(`(?i:\s+as\s+|\s+)([\w\-_\.]+)$`)
// AliasOrIdentifier returns the alias from a column or table identifier,
// Returns the identifier unmodified if no alias was found.
func AliasOrIdentifier(columnOrTableIdentifier string) string {
matches := selectRegex.FindStringSubmatch(columnOrTableIdentifier)
if len(matches) > 0 && matches[1] != "" {
return matches[1]
}
return columnOrTableIdentifier
}

View File

@@ -0,0 +1,38 @@
package dbutils_test
import (
"testing"
"github.com/pocketbase/pocketbase/tools/dbutils"
)
func TestAliasOrIdentifier(t *testing.T) {
scenarios := []struct {
value string
expected string
}{
{"", ""},
{"abc", "abc"},
{"abc ", "abc "}, // return unmodified
{"abc.def", "abc.def"},
{"abc.123 def", "def"},
{"abc.123 as def.456", "def.456"},
{"(abc) def", "def"},
{"(abc) as def", "def"},
{"abc def", "def"},
{"abc as def", "def"},
// technically invalid identifier but consistent with the dbx regex matching
{"a b c d", "d"},
{"a b c as d", "d"},
}
for _, s := range scenarios {
t.Run(s.value, func(t *testing.T) {
result := dbutils.AliasOrIdentifier(s.value)
if result != s.expected {
t.Fatalf("Expected\n%v\ngot\n%v", s.expected, result)
}
})
}
}

View File

@@ -8,6 +8,7 @@ import (
"strings"
"github.com/pocketbase/dbx"
"github.com/pocketbase/pocketbase/tools/dbutils"
"github.com/pocketbase/pocketbase/tools/inflector"
"golang.org/x/sync/errgroup"
)
@@ -298,13 +299,21 @@ func (s *Provider) Exec(items any) (*Result, error) {
countExec := func() error {
queryInfo := countQuery.Info()
countCol := s.countCol
if len(queryInfo.From) > 0 {
countCol = queryInfo.From[0] + "." + countCol
firstFrom := dbutils.AliasOrIdentifier(queryInfo.From[0])
countCol = firstFrom + "." + countCol
}
// @todo while currently there is no such use case, evaluate if
// wrapping as a subquery would be more suitable for the cases
// when there is "Group By" different from the default deduplication one
// added by RecordFieldResolver.UpdateQuery
// note: countQuery is shallow cloned and slice/map in-place modifications should be avoided
err := countQuery.Distinct(false).
Select("COUNT(DISTINCT [[" + countCol + "]])").
GroupBy( /* reset */ ).
OrderBy( /* reset */ ).
Row(&totalCount)
if err != nil {