fix(db-postgres): ensure countDistinct works correctly and achieve better performance when the query has table joins (#11208)
The fix, added in https://github.com/payloadcms/payload/pull/11096 wasn't sufficient enough. It did handle the case when the same query path / table was joined twice and caused incorrect `totalDocs`, but it didn't handle the case when `JOIN` returns more than 1 rows, which 2 added new assertions here check. Now, we use `COUNT(*)` only if we don't have any joined tables. If we do, instead of using `SELECT (COUNT DISTINCT id)` which as described in the previous PR is _very slow_ for large tables, we use the following query: ```sql SELECT COUNT(1) OVER() as count -- window function, executes for each row only once FROM users LEFT JOIN -- ... here additional rows are added WHERE -- ... GROUP BY users.id -- this ensures we're counting only users without additional rows from joins. LIMIT 1 -- Since COUNT(1) OVER() executes and resolves before doing LIMIT, we can safely apply LIMIT 1. ```
This commit is contained in:
@@ -9,32 +9,40 @@ export const countDistinct: CountDistinct = async function countDistinct(
|
||||
this: SQLiteAdapter,
|
||||
{ db, joins, tableName, where },
|
||||
) {
|
||||
// When we don't have any joins - use a simple COUNT(*) query.
|
||||
if (joins.length === 0) {
|
||||
const countResult = await db
|
||||
.select({
|
||||
count: count(),
|
||||
})
|
||||
.from(this.tables[tableName])
|
||||
.where(where)
|
||||
return Number(countResult[0].count)
|
||||
}
|
||||
|
||||
const chainedMethods: ChainedMethods = []
|
||||
|
||||
// COUNT(DISTINCT id) is slow on large tables, so we only use DISTINCT if we have to
|
||||
const visitedPaths = new Set([])
|
||||
let useDistinct = false
|
||||
joins.forEach(({ condition, queryPath, table }) => {
|
||||
if (!useDistinct && queryPath) {
|
||||
if (visitedPaths.has(queryPath)) {
|
||||
useDistinct = true
|
||||
} else {
|
||||
visitedPaths.add(queryPath)
|
||||
}
|
||||
}
|
||||
joins.forEach(({ condition, table }) => {
|
||||
chainedMethods.push({
|
||||
args: [table, condition],
|
||||
method: 'leftJoin',
|
||||
})
|
||||
})
|
||||
|
||||
// When we have any joins, we need to count each individual ID only once.
|
||||
// COUNT(*) doesn't work for this well in this case, as it also counts joined tables.
|
||||
// SELECT (COUNT DISTINCT id) has a very slow performance on large tables.
|
||||
// Instead, COUNT (GROUP BY id) can be used which is still slower than COUNT(*) but acceptable.
|
||||
const countResult = await chainMethods({
|
||||
methods: chainedMethods,
|
||||
query: db
|
||||
.select({
|
||||
count: useDistinct ? sql`COUNT(DISTINCT ${this.tables[tableName].id})` : count(),
|
||||
count: sql`COUNT(1) OVER()`,
|
||||
})
|
||||
.from(this.tables[tableName])
|
||||
.where(where),
|
||||
.where(where)
|
||||
.groupBy(this.tables[tableName].id)
|
||||
.limit(1),
|
||||
})
|
||||
|
||||
return Number(countResult[0].count)
|
||||
|
||||
Reference in New Issue
Block a user