Files
payload/packages/drizzle/src/postgres/countDistinct.ts
Sasha 513ba636af fix(db-postgres): ensure countDistinct works correctly and achieve better performance when the query has table joins (#11208)
The fix, added in https://github.com/payloadcms/payload/pull/11096
wasn't sufficient enough. It did handle the case when the same query
path / table was joined twice and caused incorrect `totalDocs`, but it
didn't handle the case when `JOIN` returns more than 1 rows, which 2
added new assertions here check.

Now, we use `COUNT(*)` only if we don't have any joined tables. If we
do, instead of using `SELECT (COUNT DISTINCT id)` which as described in
the previous PR is _very slow_ for large tables, we use the following
query:

```sql
SELECT COUNT(1) OVER() as count -- window function, executes for each row only once
FROM users
LEFT JOIN -- ... here additional rows are added
WHERE -- ...
GROUP BY users.id -- this ensures we're counting only users without additional rows from joins. 
LIMIT 1 -- Since COUNT(1) OVER() executes and resolves before doing LIMIT, we can safely apply LIMIT 1.
```
2025-02-16 14:08:08 +02:00

50 lines
1.4 KiB
TypeScript

import { count, sql } from 'drizzle-orm'
import type { ChainedMethods } from '../types.js'
import type { BasePostgresAdapter, CountDistinct } from './types.js'
import { chainMethods } from '../find/chainMethods.js'
export const countDistinct: CountDistinct = async function countDistinct(
this: BasePostgresAdapter,
{ db, joins, tableName, where },
) {
// When we don't have any joins - use a simple COUNT(*) query.
if (joins.length === 0) {
const countResult = await db
.select({
count: count(),
})
.from(this.tables[tableName])
.where(where)
return Number(countResult[0].count)
}
const chainedMethods: ChainedMethods = []
joins.forEach(({ condition, table }) => {
chainedMethods.push({
args: [table, condition],
method: 'leftJoin',
})
})
// When we have any joins, we need to count each individual ID only once.
// COUNT(*) doesn't work for this well in this case, as it also counts joined tables.
// SELECT (COUNT DISTINCT id) has a very slow performance on large tables.
// Instead, COUNT (GROUP BY id) can be used which is still slower than COUNT(*) but acceptable.
const countResult = await chainMethods({
methods: chainedMethods,
query: db
.select({
count: sql`COUNT(1) OVER()`,
})
.from(this.tables[tableName])
.where(where)
.groupBy(this.tables[tableName].id)
.limit(1),
})
return Number(countResult[0].count)
}