fix(db-postgres): ensure countDistinct works correctly and achieve better performance when the query has table joins (#11208)
The fix, added in https://github.com/payloadcms/payload/pull/11096 wasn't sufficient enough. It did handle the case when the same query path / table was joined twice and caused incorrect `totalDocs`, but it didn't handle the case when `JOIN` returns more than 1 rows, which 2 added new assertions here check. Now, we use `COUNT(*)` only if we don't have any joined tables. If we do, instead of using `SELECT (COUNT DISTINCT id)` which as described in the previous PR is _very slow_ for large tables, we use the following query: ```sql SELECT COUNT(1) OVER() as count -- window function, executes for each row only once FROM users LEFT JOIN -- ... here additional rows are added WHERE -- ... GROUP BY users.id -- this ensures we're counting only users without additional rows from joins. LIMIT 1 -- Since COUNT(1) OVER() executes and resolves before doing LIMIT, we can safely apply LIMIT 1. ```
This commit is contained in:
@@ -9,32 +9,40 @@ export const countDistinct: CountDistinct = async function countDistinct(
|
||||
this: SQLiteAdapter,
|
||||
{ db, joins, tableName, where },
|
||||
) {
|
||||
// When we don't have any joins - use a simple COUNT(*) query.
|
||||
if (joins.length === 0) {
|
||||
const countResult = await db
|
||||
.select({
|
||||
count: count(),
|
||||
})
|
||||
.from(this.tables[tableName])
|
||||
.where(where)
|
||||
return Number(countResult[0].count)
|
||||
}
|
||||
|
||||
const chainedMethods: ChainedMethods = []
|
||||
|
||||
// COUNT(DISTINCT id) is slow on large tables, so we only use DISTINCT if we have to
|
||||
const visitedPaths = new Set([])
|
||||
let useDistinct = false
|
||||
joins.forEach(({ condition, queryPath, table }) => {
|
||||
if (!useDistinct && queryPath) {
|
||||
if (visitedPaths.has(queryPath)) {
|
||||
useDistinct = true
|
||||
} else {
|
||||
visitedPaths.add(queryPath)
|
||||
}
|
||||
}
|
||||
joins.forEach(({ condition, table }) => {
|
||||
chainedMethods.push({
|
||||
args: [table, condition],
|
||||
method: 'leftJoin',
|
||||
})
|
||||
})
|
||||
|
||||
// When we have any joins, we need to count each individual ID only once.
|
||||
// COUNT(*) doesn't work for this well in this case, as it also counts joined tables.
|
||||
// SELECT (COUNT DISTINCT id) has a very slow performance on large tables.
|
||||
// Instead, COUNT (GROUP BY id) can be used which is still slower than COUNT(*) but acceptable.
|
||||
const countResult = await chainMethods({
|
||||
methods: chainedMethods,
|
||||
query: db
|
||||
.select({
|
||||
count: useDistinct ? sql`COUNT(DISTINCT ${this.tables[tableName].id})` : count(),
|
||||
count: sql`COUNT(1) OVER()`,
|
||||
})
|
||||
.from(this.tables[tableName])
|
||||
.where(where),
|
||||
.where(where)
|
||||
.groupBy(this.tables[tableName].id)
|
||||
.limit(1),
|
||||
})
|
||||
|
||||
return Number(countResult[0].count)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { count, sql } from 'drizzle-orm'
|
||||
|
||||
import type { ChainedMethods, TransactionPg } from '../types.js'
|
||||
import type { ChainedMethods } from '../types.js'
|
||||
import type { BasePostgresAdapter, CountDistinct } from './types.js'
|
||||
|
||||
import { chainMethods } from '../find/chainMethods.js'
|
||||
@@ -9,33 +9,40 @@ export const countDistinct: CountDistinct = async function countDistinct(
|
||||
this: BasePostgresAdapter,
|
||||
{ db, joins, tableName, where },
|
||||
) {
|
||||
// When we don't have any joins - use a simple COUNT(*) query.
|
||||
if (joins.length === 0) {
|
||||
const countResult = await db
|
||||
.select({
|
||||
count: count(),
|
||||
})
|
||||
.from(this.tables[tableName])
|
||||
.where(where)
|
||||
return Number(countResult[0].count)
|
||||
}
|
||||
|
||||
const chainedMethods: ChainedMethods = []
|
||||
|
||||
// COUNT(DISTINCT id) is slow on large tables, so we only use DISTINCT if we have to
|
||||
const visitedPaths = new Set([])
|
||||
let useDistinct = false
|
||||
joins.forEach(({ condition, queryPath, table }) => {
|
||||
if (!useDistinct && queryPath) {
|
||||
if (visitedPaths.has(queryPath)) {
|
||||
useDistinct = true
|
||||
} else {
|
||||
visitedPaths.add(queryPath)
|
||||
}
|
||||
}
|
||||
joins.forEach(({ condition, table }) => {
|
||||
chainedMethods.push({
|
||||
args: [table, condition],
|
||||
method: 'leftJoin',
|
||||
})
|
||||
})
|
||||
|
||||
// When we have any joins, we need to count each individual ID only once.
|
||||
// COUNT(*) doesn't work for this well in this case, as it also counts joined tables.
|
||||
// SELECT (COUNT DISTINCT id) has a very slow performance on large tables.
|
||||
// Instead, COUNT (GROUP BY id) can be used which is still slower than COUNT(*) but acceptable.
|
||||
const countResult = await chainMethods({
|
||||
methods: chainedMethods,
|
||||
query: (db as TransactionPg)
|
||||
query: db
|
||||
.select({
|
||||
count: useDistinct ? sql`COUNT(DISTINCT ${this.tables[tableName].id})` : count(),
|
||||
count: sql`COUNT(1) OVER()`,
|
||||
})
|
||||
.from(this.tables[tableName])
|
||||
.where(where),
|
||||
.where(where)
|
||||
.groupBy(this.tables[tableName].id)
|
||||
.limit(1),
|
||||
})
|
||||
|
||||
return Number(countResult[0].count)
|
||||
|
||||
@@ -248,6 +248,12 @@ export default buildConfigWithDefaults({
|
||||
relationTo: 'movies',
|
||||
hasMany: true,
|
||||
},
|
||||
{
|
||||
name: 'directors',
|
||||
type: 'relationship',
|
||||
relationTo: 'directors',
|
||||
hasMany: true,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
||||
@@ -422,11 +422,21 @@ describe('Relationships', () => {
|
||||
data: {},
|
||||
})
|
||||
|
||||
const movie3 = await payload.create({
|
||||
collection: 'movies',
|
||||
data: { name: 'some-name' },
|
||||
})
|
||||
|
||||
const movie4 = await payload.create({
|
||||
collection: 'movies',
|
||||
data: { name: 'some-name' },
|
||||
})
|
||||
|
||||
await payload.create({
|
||||
collection: 'directors',
|
||||
data: {
|
||||
name: 'Quentin Tarantino',
|
||||
movies: [movie2.id, movie1.id],
|
||||
movies: [movie2.id, movie1.id, movie3.id, movie4.id],
|
||||
},
|
||||
})
|
||||
|
||||
@@ -455,6 +465,41 @@ describe('Relationships', () => {
|
||||
})
|
||||
|
||||
expect(res.totalDocs).toBe(1)
|
||||
|
||||
const res_2 = await payload.find({
|
||||
collection: 'directors',
|
||||
limit: 10,
|
||||
where: {
|
||||
or: [
|
||||
{
|
||||
'movies.name': {
|
||||
equals: 'some-name',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
})
|
||||
|
||||
expect(res_2.totalDocs).toBe(1)
|
||||
|
||||
const dir_1 = await payload.create({ collection: 'directors', data: { name: 'dir' } })
|
||||
const dir_2 = await payload.create({ collection: 'directors', data: { name: 'dir' } })
|
||||
|
||||
const dir_3 = await payload.create({
|
||||
collection: 'directors',
|
||||
data: { directors: [dir_1.id, dir_2.id] },
|
||||
})
|
||||
|
||||
const result = await payload.find({
|
||||
collection: 'directors',
|
||||
where: {
|
||||
'directors.name': { equals: 'dir' },
|
||||
},
|
||||
})
|
||||
|
||||
expect(result.totalDocs).toBe(1)
|
||||
expect(result.docs).toHaveLength(1)
|
||||
expect(result.docs[0]?.id).toBe(dir_3.id)
|
||||
})
|
||||
|
||||
it('should query using "contains" by hasMany relationship field', async () => {
|
||||
|
||||
@@ -6,10 +6,65 @@
|
||||
* and re-run `payload generate:types` to regenerate this file.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Supported timezones in IANA format.
|
||||
*
|
||||
* This interface was referenced by `Config`'s JSON-Schema
|
||||
* via the `definition` "supportedTimezones".
|
||||
*/
|
||||
export type SupportedTimezones =
|
||||
| 'Pacific/Midway'
|
||||
| 'Pacific/Niue'
|
||||
| 'Pacific/Honolulu'
|
||||
| 'Pacific/Rarotonga'
|
||||
| 'America/Anchorage'
|
||||
| 'Pacific/Gambier'
|
||||
| 'America/Los_Angeles'
|
||||
| 'America/Tijuana'
|
||||
| 'America/Denver'
|
||||
| 'America/Phoenix'
|
||||
| 'America/Chicago'
|
||||
| 'America/Guatemala'
|
||||
| 'America/New_York'
|
||||
| 'America/Bogota'
|
||||
| 'America/Caracas'
|
||||
| 'America/Santiago'
|
||||
| 'America/Buenos_Aires'
|
||||
| 'America/Sao_Paulo'
|
||||
| 'Atlantic/South_Georgia'
|
||||
| 'Atlantic/Azores'
|
||||
| 'Atlantic/Cape_Verde'
|
||||
| 'Europe/London'
|
||||
| 'Europe/Berlin'
|
||||
| 'Africa/Lagos'
|
||||
| 'Europe/Athens'
|
||||
| 'Africa/Cairo'
|
||||
| 'Europe/Moscow'
|
||||
| 'Asia/Riyadh'
|
||||
| 'Asia/Dubai'
|
||||
| 'Asia/Baku'
|
||||
| 'Asia/Karachi'
|
||||
| 'Asia/Tashkent'
|
||||
| 'Asia/Calcutta'
|
||||
| 'Asia/Dhaka'
|
||||
| 'Asia/Almaty'
|
||||
| 'Asia/Jakarta'
|
||||
| 'Asia/Bangkok'
|
||||
| 'Asia/Shanghai'
|
||||
| 'Asia/Singapore'
|
||||
| 'Asia/Tokyo'
|
||||
| 'Asia/Seoul'
|
||||
| 'Australia/Sydney'
|
||||
| 'Pacific/Guam'
|
||||
| 'Pacific/Noumea'
|
||||
| 'Pacific/Auckland'
|
||||
| 'Pacific/Fiji';
|
||||
|
||||
export interface Config {
|
||||
auth: {
|
||||
users: UserAuthOperations;
|
||||
};
|
||||
blocks: {};
|
||||
collections: {
|
||||
posts: Post;
|
||||
postsLocalized: PostsLocalized;
|
||||
@@ -220,6 +275,7 @@ export interface Director {
|
||||
id: string;
|
||||
name?: string | null;
|
||||
movies?: (string | Movie)[] | null;
|
||||
directors?: (string | Director)[] | null;
|
||||
updatedAt: string;
|
||||
createdAt: string;
|
||||
}
|
||||
@@ -657,6 +713,7 @@ export interface MoviesSelect<T extends boolean = true> {
|
||||
export interface DirectorsSelect<T extends boolean = true> {
|
||||
name?: T;
|
||||
movies?: T;
|
||||
directors?: T;
|
||||
updatedAt?: T;
|
||||
createdAt?: T;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user