Files
payloadcms/test/database/postgres-vector.int.spec.ts
Alessio Gravili 94f5e790f6 perf(drizzle): single-roundtrip db updates for simple collections (#13186)
Currently, an optimized DB update (simple data => no
delete-and-create-row) does the following:
1. sql UPDATE
2. sql SELECT

This PR reduces this further to one single DB call for simple
collections:
1. sql UPDATE with RETURNING()

This only works for simple collections that do not have any fields that
need to be fetched from other tables. If a collection has fields like
relationship or blocks, we'll need that separate SELECT call to join in
the other tables.

In 4.0, we can remove all "complex" fields from the jobs collection and
replace them with a JSON field to make use of this optimization

---
- To see the specific tasks where the Asana app for GitHub is being
used, see below:
  - https://app.asana.com/0/0/1210803039809814
2025-07-23 01:45:55 -07:00

361 lines
8.3 KiB
TypeScript

/* eslint-disable jest/no-conditional-in-test */
/* eslint-disable jest/expect-expect */
/* eslint-disable jest/require-top-level-describe */
import type { PostgresAdapter } from '@payloadcms/db-postgres'
import type { PostgresDB } from '@payloadcms/drizzle'
import { cosineDistance, desc, gt, jaccardDistance, l2Distance, lt, sql } from 'drizzle-orm'
import path from 'path'
import { BasePayload, buildConfig, type DatabaseAdapterObj } from 'payload'
import { fileURLToPath } from 'url'
const filename = fileURLToPath(import.meta.url)
const dirname = path.dirname(filename)
const describePostgres = process.env.PAYLOAD_DATABASE?.startsWith('postgres')
? describe
: describe.skip
describePostgres('postgres vector custom column', () => {
const vectorColumnQueryTest = async (vectorType: string) => {
const {
databaseAdapter,
}: {
databaseAdapter: DatabaseAdapterObj<PostgresAdapter>
} = await import(path.resolve(dirname, '../databaseAdapter.js'))
const init = databaseAdapter.init
// set options
databaseAdapter.init = ({ payload }) => {
const adapter = init({ payload })
adapter.extensions = {
vector: true,
}
adapter.beforeSchemaInit = [
({ schema, adapter }) => {
if (adapter?.rawTables?.posts?.columns) {
adapter.rawTables.posts.columns.embedding = {
type: vectorType,
dimensions: 5,
name: 'embedding',
}
}
return schema
},
]
return adapter
}
const config = await buildConfig({
db: databaseAdapter,
secret: 'secret',
collections: [
{
slug: 'users',
auth: true,
fields: [],
},
{
slug: 'posts',
fields: [
{
type: 'json',
name: 'embedding',
},
{
name: 'title',
type: 'text',
},
],
},
],
})
// do not use getPayload to avoid caching and re-using payload instance from previous tests
const payload = await new BasePayload().init({ config })
const catEmbedding = [1.5, -0.4, 7.2, 19.6, 20.2]
await payload.create({
collection: 'posts',
data: {
embedding: [-5.2, 3.1, 0.2, 8.1, 3.5],
title: 'apple',
},
})
await payload.create({
collection: 'posts',
data: {
embedding: catEmbedding,
title: 'cat',
},
})
await payload.create({
collection: 'posts',
data: {
embedding: [-5.1, 2.9, 0.8, 7.9, 3.1],
title: 'fruit',
},
})
await payload.create({
collection: 'posts',
data: {
embedding: [1.7, -0.3, 6.9, 19.1, 21.1],
title: 'dog',
},
})
const similarity = sql<number>`1 - (${cosineDistance(payload.db.tables.posts.embedding, catEmbedding)})`
const db = payload.db.drizzle as PostgresDB
const res = await db
.select()
.from(payload.db.tables.posts)
.where(gt(similarity, 0.9))
.orderBy(desc(similarity))
// Only cat and dog
expect(res).toHaveLength(2)
// similarity sort
expect(res?.[0]?.title).toBe('cat')
expect(res?.[1]?.title).toBe('dog')
}
it('should add a vector column and query it', async () => {
await vectorColumnQueryTest('vector')
})
it('should add a halfvec column and query it', async () => {
await vectorColumnQueryTest('halfvec')
})
it('should add a sparsevec column and query it', async () => {
const {
databaseAdapter,
}: {
databaseAdapter: DatabaseAdapterObj<PostgresAdapter>
} = await import(path.resolve(dirname, '../databaseAdapter.js'))
const init = databaseAdapter.init
databaseAdapter.init = ({ payload }) => {
const adapter = init({ payload })
adapter.extensions = {
vector: true,
}
adapter.beforeSchemaInit = [
({ schema, adapter }) => {
if (adapter?.rawTables?.posts?.columns) {
adapter.rawTables.posts.columns.embedding = {
type: 'sparsevec',
dimensions: 5,
name: 'embedding',
}
}
return schema
},
]
return adapter
}
const config = await buildConfig({
db: databaseAdapter,
secret: 'secret',
collections: [
{
slug: 'users',
auth: true,
fields: [],
},
{
slug: 'posts',
fields: [
{
name: 'embedding',
type: 'text',
},
{
name: 'title',
type: 'text',
},
],
},
],
})
const payload = await new BasePayload().init({ config })
// sparse-vector format: '{index:value,...}/dims'
const catEmbedding = '{1:1,3:2,5:3}/5'
await payload.create({
collection: 'posts',
data: {
embedding: '{2:1,4:2}/5',
title: 'apple',
},
})
await payload.create({
collection: 'posts',
data: {
embedding: catEmbedding,
title: 'cat',
},
})
await payload.create({
collection: 'posts',
data: {
embedding: '{2:4,4:6}/5',
title: 'fruit',
},
})
await payload.create({
collection: 'posts',
data: {
embedding: '{1:1,3:2,5:2}/5',
title: 'dog',
},
})
const distance = sql<number>`(${l2Distance(payload.db.tables.posts.embedding, catEmbedding)})`
const db = payload.db.drizzle as PostgresDB
const res = await db
.select()
.from(payload.db.tables.posts)
.where(lt(distance, 1.1))
.orderBy(distance)
.execute()
// should return cat (distance 0) then dog
expect(res).toHaveLength(2)
expect(res?.[0]?.title).toBe('cat')
expect(res?.[1]?.title).toBe('dog')
})
it('should add a binaryvec column and query it', async () => {
const {
databaseAdapter,
}: {
databaseAdapter: DatabaseAdapterObj<PostgresAdapter>
} = await import(path.resolve(dirname, '../databaseAdapter.js'))
const init = databaseAdapter.init
// set options
databaseAdapter.init = ({ payload }) => {
const adapter = init({ payload })
adapter.extensions = {
vector: true,
}
adapter.beforeSchemaInit = [
({ schema, adapter }) => {
if (adapter?.rawTables?.posts?.columns) {
adapter.rawTables.posts.columns.embedding = {
type: 'bit',
dimensions: 5,
name: 'embedding',
}
}
return schema
},
]
return adapter
}
const config = await buildConfig({
db: databaseAdapter,
secret: 'secret',
collections: [
{
slug: 'users',
auth: true,
fields: [],
},
{
slug: 'posts',
fields: [
{
type: 'text',
name: 'embedding',
},
{
name: 'title',
type: 'text',
},
],
},
],
})
// do not use getPayload to avoid caching and re-using payload instance from previous tests
const payload = await new BasePayload().init({ config })
const catEmbedding = '10101'
await payload.create({
collection: 'posts',
data: {
embedding: '01010',
title: 'apple',
},
})
await payload.create({
collection: 'posts',
data: {
embedding: '10101',
title: 'cat',
},
})
await payload.create({
collection: 'posts',
data: {
embedding: '11111',
title: 'fruit',
},
})
await payload.create({
collection: 'posts',
data: {
embedding: '10100',
title: 'dog',
},
})
const similarity = sql<number>`1 - (${jaccardDistance(payload.db.tables.posts.embedding, catEmbedding)})`
const db = payload.db.drizzle as PostgresDB
const res = await db
.select()
.from(payload.db.tables.posts)
.where(gt(similarity, 0.6))
.orderBy(desc(similarity))
// Only cat and dog
expect(res).toHaveLength(2)
// similarity sort
expect(res?.[0]?.title).toBe('cat')
expect(res?.[1]?.title).toBe('dog')
})
})