feat(plugin-search): added support for reindexing collections on demand (#9391)

### What?
This PR aims to add reindexing capabilities to `plugin-search` to allow
users to reindex entire searchable collections on demand.

### Why?
As it stands, end users must either perform document reindexing manually
one-by-one or via bulk operations. Both of these approaches are
undesirable because they result in new versions being published on
existing documents. Consider the case when `plugin-search` is only added
_after_ the project has started and documents have been added to
existing collections. It would be nice if users could simply click a
button, choose the searchable collections to reindex, and have the
custom endpoint handle the rest.

### How?
This PR adds on to the existing plugin configuration, creating a custom
endpoint and a custom `beforeListTable` component in the form of a popup
button. Upon clicking the button, a dropdown/popup is opened with
options to select which collection to reindex, as well as a useful `All
Collections` option to run reindexing on all configured search
collections. It also adds a `reindexBatchSize` option in the config to
allow users to specify in what quantity to batch documents to sync with
search.

Big shoutout to @paulpopus & @r1tsuu for the triple-A level support on
this one!

Fixes #8902 

See it in action:


https://github.com/user-attachments/assets/ee8dd68c-ea89-49cd-adc3-151973eea28b

Notes:
- Traditionally these kinds of long-running tasks would be better suited
for a job. However, given how many users enjoy deploying to serverless
environments, it would be problematic to offer this feature exclusive to
jobs queues. I thought a significant amount about this and decided it
would be best to ship the feature as-is with the intention of creating
an opt-in method to use job queues in the future if/when this gets
merged.
- In my testing, the collection description somehow started to appear in
the document views after the on-demand RSC merge. I haven't reproduced
this, but this PR has an example of that problem. Super strange.

---------

Co-authored-by: Sasha <64744993+r1tsuu@users.noreply.github.com>
Co-authored-by: Paul Popus <paul@nouance.io>
This commit is contained in:
Said Akhrarov
2024-11-26 18:14:31 -05:00
committed by GitHub
parent bffd98f019
commit defa13e4fe
55 changed files with 1424 additions and 196 deletions

View File

@@ -3,6 +3,7 @@ import path from 'path'
const filename = fileURLToPath(import.meta.url)
const dirname = path.dirname(filename)
import { searchPlugin } from '@payloadcms/plugin-search'
import { randomUUID } from 'node:crypto'
import { buildConfigWithDefaults } from '../buildConfigWithDefaults.js'
import { devUser } from '../credentials.js'
@@ -49,8 +50,28 @@ export default buildConfigWithDefaults({
posts: ({ title }) => (title === 'Hello, world!' ? 30 : 20),
},
searchOverrides: {
access: {
// Used for int test
delete: ({ req: { user } }) => user.email === devUser.email,
},
fields: ({ defaultFields }) => [
...defaultFields,
// This is necessary to test whether search docs were deleted or not with SQLite
// Because IDs in SQLite, apparently, aren't unique if we count deleted rows without AUTOINCREMENT option
// Thus we have a custom UUID field.
{
name: 'id',
type: 'text',
hooks: {
beforeChange: [
({ operation }) => {
if (operation === 'create') {
return randomUUID()
}
},
],
},
},
{
name: 'excerpt',
type: 'textarea',

View File

@@ -1,19 +1,67 @@
import type { Payload } from 'payload'
import path from 'path'
import { NotFound, type Payload } from 'payload'
import { wait } from 'payload/shared'
import { fileURLToPath } from 'url'
import type { NextRESTClient } from '../helpers/NextRESTClient.js'
import { devUser } from '../credentials.js'
import { initPayloadInt } from '../helpers/initPayloadInt.js'
import { pagesSlug, postsSlug } from './shared.js'
let payload: Payload
let restClient: NextRESTClient
let token: string
const filename = fileURLToPath(import.meta.url)
const dirname = path.dirname(filename)
describe('@payloadcms/plugin-search', () => {
beforeAll(async () => {
;({ payload } = await initPayloadInt(dirname))
;({ payload, restClient } = await initPayloadInt(dirname))
const data = await restClient
.POST('/users/login', {
body: JSON.stringify({
email: devUser.email,
password: devUser.password,
}),
})
.then((res) => res.json())
token = data.token
})
beforeEach(async () => {
await payload.delete({
collection: 'search',
depth: 0,
where: {
id: {
exists: true,
},
},
})
await Promise.all([
payload.delete({
collection: postsSlug,
depth: 0,
where: {
id: {
exists: true,
},
},
}),
payload.delete({
collection: pagesSlug,
depth: 0,
where: {
id: {
exists: true,
},
},
}),
])
})
afterAll(async () => {
@@ -227,4 +275,150 @@ describe('@payloadcms/plugin-search', () => {
expect(syncedSearchData.docs[0].slug).toEqual('es')
})
it('should respond with 401 when invalid permissions on user before reindex', async () => {
const testCreds = {
email: 'test@payloadcms.com',
password: 'test',
}
await payload.create({
collection: 'users',
data: testCreds,
})
const testUserRes = await restClient.POST(`/users/login`, {
body: JSON.stringify(testCreds),
})
const testUser = await testUserRes.json()
const endpointRes = await restClient.POST(`/search/reindex`, {
body: JSON.stringify({
collections: [postsSlug],
}),
headers: {
Authorization: `JWT ${testUser.token}`,
},
})
expect(endpointRes.status).toEqual(401)
})
it('should respond with 400 when invalid collection args passed to reindex', async () => {
const endpointNoArgsRes = await restClient.POST(`/search/reindex`, {
body: JSON.stringify({}),
headers: {
Authorization: `JWT ${token}`,
},
})
const endpointEmptyArrRes = await restClient.POST(`/search/reindex`, {
body: JSON.stringify({
collections: [],
}),
headers: {
Authorization: `JWT ${token}`,
},
})
const endpointInvalidArrRes = await restClient.POST(`/search/reindex`, {
body: JSON.stringify({
collections: ['users'],
}),
headers: {
Authorization: `JWT ${token}`,
},
})
expect(endpointNoArgsRes.status).toBe(400)
expect(endpointEmptyArrRes.status).toBe(400)
expect(endpointInvalidArrRes.status).toBe(400)
})
it('should delete existing search indexes before reindexing', async () => {
await payload.create({
collection: postsSlug,
data: {
title: 'post_1',
_status: 'published',
},
})
await wait(200)
await payload.create({
collection: postsSlug,
data: {
title: 'post_2',
_status: 'published',
},
})
const { docs } = await payload.find({ collection: 'search' })
await wait(200)
const endpointRes = await restClient.POST('/search/reindex', {
body: JSON.stringify({
collections: [postsSlug, pagesSlug],
}),
})
expect(endpointRes.status).toBe(200)
const { docs: results } = await payload.find({
collection: 'search',
depth: 0,
where: {
id: {
in: docs.map((doc) => doc.id),
},
},
})
// Should have no docs with these ID
// after reindex since it deletes indexes and recreates them
expect(results).toHaveLength(0)
})
it('should reindex whole collections', async () => {
await payload.create({
collection: pagesSlug,
data: {
title: 'Test page title',
_status: 'published',
},
})
await payload.create({
collection: postsSlug,
data: {
title: 'Test page title',
_status: 'published',
},
})
await wait(200)
const { totalDocs: totalBeforeReindex } = await payload.count({
collection: 'search',
})
const endpointRes = await restClient.POST(`/search/reindex`, {
body: JSON.stringify({
collections: [postsSlug, pagesSlug],
}),
headers: {
Authorization: `JWT ${token}`,
},
})
expect(endpointRes.status).toBe(200)
const { totalDocs: totalAfterReindex } = await payload.count({
collection: 'search',
})
expect(totalAfterReindex).toBe(totalBeforeReindex)
})
})