refactor: simplify job queue error handling (#12845)

This simplifies workflow / task error handling, as well as cancelling
jobs. Previously, we were handling errors when they occur and passing
through error state using a `state` object - errors were then handled in
multiple areas of the code.

This PR adds new, clean `TaskError`, `WorkflowError` and
`JobCancelledError` errors that are thrown when they occur and are
handled **in one single place**, massively cleaning up complex functions
like
[payload/src/queues/operations/runJobs/runJob/getRunTaskFunction.ts](https://github.com/payloadcms/payload/compare/refactor/jobs-errors?expand=1#diff-53dc7ccb7c8e023c9ba63fdd2e78c32ad0be606a2c64a3512abad87893f5fd21)

Performance will also be positively improved by this change -
previously, as task / workflow failure or cancellation would have
resulted in multiple, separate `updateJob` db calls, as data
modifications to the job object required for storing failure state were
done multiple times in multiple areas of the codebase. Most notably,
task error state was handled and updated separately from workflow error
state.
Now, it's just a clean, single `updateJob` call

This PR also does the following:
- adds a new test for `deleteJobOnComplete` behavior
- cleans up test suite
- ensures `deleteJobOnComplete` does not delete definitively failed jobs

---
- To see the specific tasks where the Asana app for GitHub is being
used, see below:
  - https://app.asana.com/0/0/1210553277813320
This commit is contained in:
Alessio Gravili
2025-06-17 15:24:53 -07:00
committed by GitHub
parent dffdee89d8
commit 59f536c2c9
20 changed files with 676 additions and 496 deletions

View File

@@ -9,18 +9,22 @@ import { NextRESTClient } from './NextRESTClient.js'
/**
* Initialize Payload configured for integration tests
*/
export async function initPayloadInt(
export async function initPayloadInt<TInitializePayload extends boolean | undefined = true>(
dirname: string,
testSuiteNameOverride?: string,
initializePayload = true,
): Promise<{ config: SanitizedConfig; payload?: Payload; restClient?: NextRESTClient }> {
initializePayload?: TInitializePayload,
): Promise<
TInitializePayload extends false
? { config: SanitizedConfig }
: { config: SanitizedConfig; payload: Payload; restClient: NextRESTClient }
> {
const testSuiteName = testSuiteNameOverride ?? path.basename(dirname)
await runInit(testSuiteName, false, true)
console.log('importing config', path.resolve(dirname, 'config.ts'))
const { default: config } = await import(path.resolve(dirname, 'config.ts'))
if (!initializePayload) {
return { config: await config }
if (initializePayload === false) {
return { config: await config } as any
}
console.log('starting payload')
@@ -29,5 +33,5 @@ export async function initPayloadInt(
console.log('initializing rest client')
const restClient = new NextRESTClient(payload.config)
console.log('initPayloadInt done')
return { config: payload.config, payload, restClient }
return { config: payload.config, payload, restClient } as any
}

View File

@@ -9,6 +9,7 @@ import { devUser } from '../credentials.js'
import { updatePostStep1, updatePostStep2 } from './runners/updatePost.js'
import { seed } from './seed.js'
import { externalWorkflow } from './workflows/externalWorkflow.js'
import { failsImmediatelyWorkflow } from './workflows/failsImmediately.js'
import { inlineTaskTestWorkflow } from './workflows/inlineTaskTest.js'
import { inlineTaskTestDelayedWorkflow } from './workflows/inlineTaskTestDelayed.js'
import { longRunningWorkflow } from './workflows/longRunning.js'
@@ -393,6 +394,7 @@ export default buildConfigWithDefaults({
workflowRetries2TasksRetriesUndefinedWorkflow,
workflowRetries2TasksRetries0Workflow,
inlineTaskTestWorkflow,
failsImmediatelyWorkflow,
inlineTaskTestDelayedWorkflow,
externalWorkflow,
retriesBackoffTestWorkflow,

View File

@@ -41,6 +41,7 @@ describe('Queues', () => {
if (data.token) {
token = data.token
}
payload.config.jobs.deleteJobOnComplete = true
})
it('will run access control on jobs runner', async () => {
@@ -182,7 +183,6 @@ describe('Queues', () => {
// @ts-expect-error amountRetried is new arbitrary data and not in the type
expect(jobAfterRun.input.amountRetried).toBe(3)
payload.config.jobs.deleteJobOnComplete = true
})
it('ensure workflow-level retries are respected', async () => {
@@ -218,8 +218,6 @@ describe('Queues', () => {
// @ts-expect-error amountRetried is new arbitrary data and not in the type
expect(jobAfterRun.input.amountRetried).toBe(2)
payload.config.jobs.deleteJobOnComplete = true
})
it('ensure workflows dont limit retries if no retries property is sett', async () => {
@@ -255,8 +253,6 @@ describe('Queues', () => {
// @ts-expect-error amountRetried is new arbitrary data and not in the type
expect(jobAfterRun.input.amountRetried).toBe(3)
payload.config.jobs.deleteJobOnComplete = true
})
it('ensure workflows dont retry if retries set to 0, even if individual tasks have retries > 0 set', async () => {
@@ -292,8 +288,6 @@ describe('Queues', () => {
// @ts-expect-error amountRetried is new arbitrary data and not in the type
expect(jobAfterRun.input.amountRetried).toBe(0)
payload.config.jobs.deleteJobOnComplete = true
})
it('ensure workflows dont retry if neither workflows nor tasks have retries set', async () => {
@@ -329,8 +323,6 @@ describe('Queues', () => {
// @ts-expect-error amountRetried is new arbitrary data and not in the type
expect(jobAfterRun.input.amountRetried).toBe(0)
payload.config.jobs.deleteJobOnComplete = true
})
it('ensure workflows retry if workflows have retries set and tasks do not have retries set, due to tasks inheriting workflow retries', async () => {
@@ -366,8 +358,6 @@ describe('Queues', () => {
// @ts-expect-error amountRetried is new arbitrary data and not in the type
expect(jobAfterRun.input.amountRetried).toBe(2)
payload.config.jobs.deleteJobOnComplete = true
})
it('ensure workflows do not retry if workflows have retries set and tasks have retries set to 0', async () => {
@@ -403,8 +393,6 @@ describe('Queues', () => {
// @ts-expect-error amountRetried is new arbitrary data and not in the type
expect(jobAfterRun.input.amountRetried).toBe(0)
payload.config.jobs.deleteJobOnComplete = true
})
/*
@@ -492,7 +480,7 @@ describe('Queues', () => {
id: job.id,
})
expect(jobAfterRun.totalTried).toBe(5)
expect((jobAfterRun.taskStatus as JobTaskStatus).inline['1'].totalTried).toBe(5)
expect((jobAfterRun.taskStatus as JobTaskStatus).inline?.['1']?.totalTried).toBe(5)
// @ts-expect-error amountRetried is new arbitrary data and not in the type
expect(jobAfterRun.input.amountRetried).toBe(4)
@@ -518,7 +506,7 @@ describe('Queues', () => {
if (index === arr.length - 1) {
return null
}
return new Date(arr[index + 1]).getTime() - new Date(time).getTime()
return new Date(arr[index + 1] as string).getTime() - new Date(time).getTime()
})
.filter((p) => p !== null)
@@ -527,8 +515,6 @@ describe('Queues', () => {
expect(durations[1]).toBeGreaterThan(600)
expect(durations[2]).toBeGreaterThan(1200)
expect(durations[3]).toBeGreaterThan(2400)
payload.config.jobs.deleteJobOnComplete = true
})
it('ensure jobs run in FIFO order by default', async () => {
@@ -647,7 +633,7 @@ describe('Queues', () => {
})
expect(allSimples.totalDocs).toBe(1)
expect(allSimples.docs[0].title).toBe('hello!')
expect(allSimples.docs[0]?.title).toBe('hello!')
})
it('can create and autorun jobs', async () => {
@@ -677,12 +663,12 @@ describe('Queues', () => {
const { id } = await payload.jobs.queue({
workflow: 'inlineTaskTest',
input: {
message: 'hello!',
message: 'deleteJobOnComplete test',
},
})
const before = await payload.findByID({ collection: 'payload-jobs', id, disableErrors: true })
expect(before.id).toBe(id)
expect(before?.id).toBe(id)
await payload.jobs.run()
@@ -690,6 +676,21 @@ describe('Queues', () => {
expect(after).toBeNull()
})
it('should not delete failed jobs if deleteJobOnComplete is true', async () => {
const { id } = await payload.jobs.queue({
workflow: 'failsImmediately',
input: {},
})
const before = await payload.findByID({ collection: 'payload-jobs', id, disableErrors: true })
expect(before?.id).toBe(id)
await payload.jobs.run()
const after = await payload.findByID({ collection: 'payload-jobs', id, disableErrors: true })
expect(after?.id).toBe(id)
})
it('should respect deleteJobOnComplete false configuration', async () => {
payload.config.jobs.deleteJobOnComplete = false
const { id } = await payload.jobs.queue({
@@ -700,14 +701,12 @@ describe('Queues', () => {
})
const before = await payload.findByID({ collection: 'payload-jobs', id, disableErrors: true })
expect(before.id).toBe(id)
expect(before?.id).toBe(id)
await payload.jobs.run()
const after = await payload.findByID({ collection: 'payload-jobs', id, disableErrors: true })
expect(after.id).toBe(id)
payload.config.jobs.deleteJobOnComplete = true
expect(after?.id).toBe(id)
})
it('can queue single tasks', async () => {
@@ -726,7 +725,7 @@ describe('Queues', () => {
})
expect(allSimples.totalDocs).toBe(1)
expect(allSimples.docs[0].title).toBe('from single task')
expect(allSimples.docs[0]?.title).toBe('from single task')
})
it('can queue and run via the endpoint single tasks without workflows', async () => {
@@ -751,7 +750,7 @@ describe('Queues', () => {
})
expect(allSimples.totalDocs).toBe(1)
expect(allSimples.docs[0].title).toBe('from single task')
expect(allSimples.docs[0]?.title).toBe('from single task')
payload.config.jobs.workflows = workflowsRef
})
@@ -885,8 +884,8 @@ describe('Queues', () => {
})
expect(allSimples.totalDocs).toBe(8)
expect(allSimples.docs[0].title).toBe('from single task')
expect(allSimples.docs[7].title).toBe('from single task')
expect(allSimples.docs[0]?.title).toBe('from single task')
expect(allSimples.docs[7]?.title).toBe('from single task')
})
it('can queue single tasks hundreds of times', async () => {
@@ -912,9 +911,8 @@ describe('Queues', () => {
})
expect(allSimples.totalDocs).toBe(numberOfTasks) // Default limit: 10
expect(allSimples.docs[0].title).toBe('from single task')
expect(allSimples.docs[numberOfTasks - 1].title).toBe('from single task')
payload.config.jobs.deleteJobOnComplete = true
expect(allSimples.docs[0]?.title).toBe('from single task')
expect(allSimples.docs[numberOfTasks - 1]?.title).toBe('from single task')
})
it('ensure default jobs run limit of 10 works', async () => {
@@ -935,8 +933,8 @@ describe('Queues', () => {
})
expect(allSimples.totalDocs).toBe(10) // Default limit: 10
expect(allSimples.docs[0].title).toBe('from single task')
expect(allSimples.docs[9].title).toBe('from single task')
expect(allSimples.docs[0]?.title).toBe('from single task')
expect(allSimples.docs[9]?.title).toBe('from single task')
})
it('ensure jobs run limit can be customized', async () => {
@@ -959,9 +957,9 @@ describe('Queues', () => {
})
expect(allSimples.totalDocs).toBe(42) // Default limit: 10
expect(allSimples.docs[0].title).toBe('from single task')
expect(allSimples.docs[30].title).toBe('from single task')
expect(allSimples.docs[41].title).toBe('from single task')
expect(allSimples.docs[0]?.title).toBe('from single task')
expect(allSimples.docs[30]?.title).toBe('from single task')
expect(allSimples.docs[41]?.title).toBe('from single task')
})
it('can queue different kinds of single tasks multiple times', async () => {
@@ -1026,7 +1024,7 @@ describe('Queues', () => {
})
expect(allSimples.totalDocs).toBe(1)
expect(allSimples.docs[0].title).toBe('external')
expect(allSimples.docs[0]?.title).toBe('external')
})
it('can queue external workflow that is running external task', async () => {
@@ -1045,13 +1043,13 @@ describe('Queues', () => {
})
expect(allSimples.totalDocs).toBe(1)
expect(allSimples.docs[0].title).toBe('externalWorkflow')
expect(allSimples.docs[0]?.title).toBe('externalWorkflow')
})
it('ensure payload.jobs.runByID works and only runs the specified job', async () => {
payload.config.jobs.deleteJobOnComplete = false
let lastJobID: string = null
let lastJobID: null | string = null
for (let i = 0; i < 3; i++) {
const job = await payload.jobs.queue({
task: 'CreateSimple',
@@ -1061,6 +1059,9 @@ describe('Queues', () => {
})
lastJobID = job.id
}
if (!lastJobID) {
throw new Error('No job ID found after queuing jobs')
}
await payload.jobs.runByID({
id: lastJobID,
@@ -1072,7 +1073,7 @@ describe('Queues', () => {
})
expect(allSimples.totalDocs).toBe(1)
expect(allSimples.docs[0].title).toBe('from single task')
expect(allSimples.docs[0]?.title).toBe('from single task')
const allCompletedJobs = await payload.find({
collection: 'payload-jobs',
@@ -1085,13 +1086,13 @@ describe('Queues', () => {
})
expect(allCompletedJobs.totalDocs).toBe(1)
expect(allCompletedJobs.docs[0].id).toBe(lastJobID)
expect(allCompletedJobs.docs[0]?.id).toBe(lastJobID)
})
it('ensure where query for id in payload.jobs.run works and only runs the specified job', async () => {
payload.config.jobs.deleteJobOnComplete = false
let lastJobID: string = null
let lastJobID: null | string = null
for (let i = 0; i < 3; i++) {
const job = await payload.jobs.queue({
task: 'CreateSimple',
@@ -1101,6 +1102,9 @@ describe('Queues', () => {
})
lastJobID = job.id
}
if (!lastJobID) {
throw new Error('No job ID found after queuing jobs')
}
await payload.jobs.run({
where: {
@@ -1116,7 +1120,7 @@ describe('Queues', () => {
})
expect(allSimples.totalDocs).toBe(1)
expect(allSimples.docs[0].title).toBe('from single task')
expect(allSimples.docs[0]?.title).toBe('from single task')
const allCompletedJobs = await payload.find({
collection: 'payload-jobs',
@@ -1129,7 +1133,7 @@ describe('Queues', () => {
})
expect(allCompletedJobs.totalDocs).toBe(1)
expect(allCompletedJobs.docs[0].id).toBe(lastJobID)
expect(allCompletedJobs.docs[0]?.id).toBe(lastJobID)
})
it('ensure where query for input data in payload.jobs.run works and only runs the specified job', async () => {
@@ -1158,7 +1162,7 @@ describe('Queues', () => {
})
expect(allSimples.totalDocs).toBe(1)
expect(allSimples.docs[0].title).toBe('from single task 2')
expect(allSimples.docs[0]?.title).toBe('from single task 2')
const allCompletedJobs = await payload.find({
collection: 'payload-jobs',
@@ -1171,7 +1175,7 @@ describe('Queues', () => {
})
expect(allCompletedJobs.totalDocs).toBe(1)
expect((allCompletedJobs.docs[0].input as any).message).toBe('from single task 2')
expect((allCompletedJobs.docs[0]?.input as any).message).toBe('from single task 2')
})
it('can run sub-tasks', async () => {
@@ -1191,24 +1195,24 @@ describe('Queues', () => {
})
expect(allSimples.totalDocs).toBe(2)
expect(allSimples.docs[0].title).toBe('hello!')
expect(allSimples.docs[1].title).toBe('hello!')
expect(allSimples.docs[0]?.title).toBe('hello!')
expect(allSimples.docs[1]?.title).toBe('hello!')
const jobAfterRun = await payload.findByID({
collection: 'payload-jobs',
id: job.id,
})
expect(jobAfterRun.log[0].taskID).toBe('create doc 1')
expect(jobAfterRun?.log?.[0]?.taskID).toBe('create doc 1')
//expect(jobAfterRun.log[0].parent.taskID).toBe('create two docs')
// jobAfterRun.log[0].parent should not exist
expect(jobAfterRun.log[0].parent).toBeUndefined()
expect(jobAfterRun?.log?.[0]?.parent).toBeUndefined()
expect(jobAfterRun.log[1].taskID).toBe('create doc 2')
expect(jobAfterRun?.log?.[1]?.taskID).toBe('create doc 2')
//expect(jobAfterRun.log[1].parent.taskID).toBe('create two docs')
expect(jobAfterRun.log[1].parent).toBeUndefined()
expect(jobAfterRun?.log?.[1]?.parent).toBeUndefined()
expect(jobAfterRun.log[2].taskID).toBe('create two docs')
expect(jobAfterRun?.log?.[2]?.taskID).toBe('create two docs')
})
it('ensure successful sub-tasks are not retried', async () => {
@@ -1237,7 +1241,7 @@ describe('Queues', () => {
})
expect(allSimples.totalDocs).toBe(1)
expect(allSimples.docs[0].title).toBe('hello!')
expect(allSimples?.docs?.[0]?.title).toBe('hello!')
const jobAfterRun = await payload.findByID({
collection: 'payload-jobs',
@@ -1339,8 +1343,8 @@ describe('Queues', () => {
expect(jobAfterRun.hasError).toBe(true)
expect(jobAfterRun.log?.length).toBe(1)
expect(jobAfterRun.log[0].error.message).toBe('failed')
expect(jobAfterRun.log[0].state).toBe('failed')
expect(jobAfterRun?.log?.[0]?.error?.message).toBe('failed')
expect(jobAfterRun?.log?.[0]?.state).toBe('failed')
})
it('can tasks return error', async () => {
@@ -1360,8 +1364,8 @@ describe('Queues', () => {
expect(jobAfterRun.hasError).toBe(true)
expect(jobAfterRun.log?.length).toBe(1)
expect(jobAfterRun.log[0].error.message).toBe('failed')
expect(jobAfterRun.log[0].state).toBe('failed')
expect(jobAfterRun?.log?.[0]?.error?.message).toBe('Task handler returned a failed state')
expect(jobAfterRun?.log?.[0]?.state).toBe('failed')
})
it('can tasks return error with custom error message', async () => {
@@ -1383,8 +1387,8 @@ describe('Queues', () => {
expect(jobAfterRun.hasError).toBe(true)
expect(jobAfterRun.log?.length).toBe(1)
expect(jobAfterRun.log[0].error.message).toBe('custom error message')
expect(jobAfterRun.log[0].state).toBe('failed')
expect(jobAfterRun?.log?.[0]?.error?.message).toBe('custom error message')
expect(jobAfterRun?.log?.[0]?.state).toBe('failed')
})
it('can reliably run workflows with parallel tasks', async () => {

View File

@@ -123,6 +123,7 @@ export interface Config {
workflowRetries2TasksRetriesUndefined: WorkflowWorkflowRetries2TasksRetriesUndefined;
workflowRetries2TasksRetries0: WorkflowWorkflowRetries2TasksRetries0;
inlineTaskTest: WorkflowInlineTaskTest;
failsImmediately: WorkflowFailsImmediately;
inlineTaskTestDelayed: WorkflowInlineTaskTestDelayed;
externalWorkflow: WorkflowExternalWorkflow;
retriesBackoffTest: WorkflowRetriesBackoffTest;
@@ -314,6 +315,7 @@ export interface PayloadJob {
| 'workflowRetries2TasksRetriesUndefined'
| 'workflowRetries2TasksRetries0'
| 'inlineTaskTest'
| 'failsImmediately'
| 'inlineTaskTestDelayed'
| 'externalWorkflow'
| 'retriesBackoffTest'
@@ -724,6 +726,13 @@ export interface WorkflowInlineTaskTest {
message: string;
};
}
/**
* This interface was referenced by `Config`'s JSON-Schema
* via the `definition` "WorkflowFailsImmediately".
*/
export interface WorkflowFailsImmediately {
input?: unknown;
}
/**
* This interface was referenced by `Config`'s JSON-Schema
* via the `definition` "WorkflowInlineTaskTestDelayed".

View File

@@ -1,14 +1,8 @@
import type { Payload } from 'payload'
import path from 'path'
import { fileURLToPath } from 'url'
import { devUser } from '../credentials.js'
import { seedDB } from '../helpers/seed.js'
const filename = fileURLToPath(import.meta.url)
const dirname = path.dirname(filename)
export const seed = async (_payload: Payload) => {
await _payload.create({
collection: 'users',
@@ -22,9 +16,11 @@ export const seed = async (_payload: Payload) => {
export async function clearAndSeedEverything(_payload: Payload) {
return await seedDB({
_payload,
collectionSlugs: _payload.config.collections.map((collection) => collection.slug),
collectionSlugs: [
..._payload.config.collections.map((collection) => collection.slug),
'payload-jobs',
],
seedFunction: seed,
snapshotKey: 'fieldsTest',
uploadsDir: path.resolve(dirname, './collections/Upload/uploads'),
snapshotKey: 'queuesTest',
})
}

View File

@@ -0,0 +1,10 @@
import type { WorkflowConfig } from 'payload'
export const failsImmediatelyWorkflow: WorkflowConfig<'failsImmediately'> = {
slug: 'failsImmediately',
inputSchema: [],
retries: 0,
handler: () => {
throw new Error('This workflow fails immediately')
},
}