fix(plugin-import-export): duplicated rows and headers in CSV export when streaming paginated results (#12941)

This PR fixes an issue in the export logic where CSV downloads would
include duplicate rows and repeated column headers across paginated
batches.

Key changes:
- Ensured `page` is incremented correctly after each `payload.find` call
- Tracked and wrote CSV column headers only once for the first page
- Prevented row duplication by removing unused `result` initialization
and using isolated `page` tracking
- Streamlined both download and non-download logic for consistent batch
processing

This resolves incorrect row counts and header duplication in large CSV
exports.
This commit is contained in:
Patrik
2025-06-26 09:09:17 -04:00
committed by GitHub
parent 8900a38678
commit 5cf92878a4
2 changed files with 52 additions and 31 deletions

View File

@@ -1,5 +1,5 @@
/* eslint-disable perfectionist/sort-objects */
import type { PaginatedDocs, PayloadRequest, Sort, User, Where } from 'payload'
import type { PayloadRequest, Sort, User, Where } from 'payload'
import { stringify } from 'csv-stringify/sync'
import { APIError } from 'payload'
@@ -104,8 +104,6 @@ export const createExport = async (args: CreateExportArgs) => {
req.payload.logger.info({ message: 'Find arguments:', findArgs })
}
let result: PaginatedDocs = { hasNextPage: true } as PaginatedDocs
const toCSVFunctions = getCustomFieldFunctions({
fields: collectionConfig.flattenedFields,
select,
@@ -115,34 +113,50 @@ export const createExport = async (args: CreateExportArgs) => {
if (debug) {
req.payload.logger.info('Starting download stream')
}
const encoder = new TextEncoder()
let isFirstBatch = true
let columns: string[] | undefined
let page = 1
const stream = new Readable({
async read() {
let result = await payload.find(findArgs)
let isFirstBatch = true
const result = await payload.find({
...findArgs,
page,
})
while (result.docs.length > 0) {
if (debug) {
req.payload.logger.info(
`Processing batch ${findArgs.page + 1} with ${result.docs.length} documents`,
)
}
const csvInput = result.docs.map((doc) => flattenObject({ doc, fields, toCSVFunctions }))
const csvString = stringify(csvInput, { header: isFirstBatch })
this.push(encoder.encode(csvString))
isFirstBatch = false
if (!result.hasNextPage) {
if (debug) {
req.payload.logger.info('Stream complete - no more pages')
}
this.push(null) // End the stream
break
}
findArgs.page += 1
result = await payload.find(findArgs)
if (debug) {
req.payload.logger.info(`Processing batch ${page} with ${result.docs.length} documents`)
}
if (result.docs.length === 0) {
this.push(null)
return
}
const csvInput = result.docs.map((doc) => flattenObject({ doc, fields, toCSVFunctions }))
if (isFirstBatch) {
columns = Object.keys(csvInput[0] ?? {})
}
const csvString = stringify(csvInput, {
header: isFirstBatch,
columns,
})
this.push(encoder.encode(csvString))
isFirstBatch = false
if (!result.hasNextPage) {
if (debug) {
req.payload.logger.info('Stream complete - no more pages')
}
this.push(null) // End the stream
}
page += 1
},
})
@@ -159,10 +173,14 @@ export const createExport = async (args: CreateExportArgs) => {
}
const outputData: string[] = []
let isFirstBatch = true
let page = 1
let hasNextPage = true
while (result.hasNextPage) {
findArgs.page += 1
result = await payload.find(findArgs)
while (hasNextPage) {
const result = await payload.find({
...findArgs,
page,
})
if (debug) {
req.payload.logger.info(
@@ -178,6 +196,9 @@ export const createExport = async (args: CreateExportArgs) => {
const jsonInput = result.docs.map((doc) => JSON.stringify(doc))
outputData.push(jsonInput.join(',\n'))
}
hasNextPage = result.hasNextPage
page += 1
}
const buffer = Buffer.from(format === 'json' ? `[${outputData.join(',')}]` : outputData.join(''))

View File

@@ -581,8 +581,8 @@ describe('@payloadcms/plugin-import-export', () => {
let promises = []
for (let i = 0; i < 100000; i++) {
promises.push(
payload.create({
collectionSlug: 'pages',
await payload.create({
collection: 'pages',
data: {
title: `Array ${i}`,
blocks: [