Skip to content

Commit 17037ba

Browse files
author
oleg gurev
committedFeb 14, 2024
[PBCKP-913] Fix WAL switching with huge XLogRecord
- Backport of PBCKP-859 bugfix - increase current segment number when reader has already read it before - avoid error if reader has to switch WAL again - add python test for PAGE backup with huge XLog record
1 parent 287e7fc commit 17037ba

File tree

2 files changed

+55
-0
lines changed

2 files changed

+55
-0
lines changed
 

Diff for: ‎src/parsexlog.c

+8
Original file line numberDiff line numberDiff line change
@@ -1588,9 +1588,14 @@ SwitchThreadToNextWal(XLogReaderState *xlogreader, xlog_thread_arg *arg)
15881588
reader_data = (XLogReaderData *) xlogreader->private_data;
15891589
reader_data->need_switch = false;
15901590

1591+
start:
15911592
/* Critical section */
15921593
pthread_lock(&wal_segment_mutex);
15931594
Assert(segno_next);
1595+
1596+
if (reader_data->xlogsegno > segno_next)
1597+
segno_next = reader_data->xlogsegno;
1598+
15941599
reader_data->xlogsegno = segno_next;
15951600
segnum_read++;
15961601
segno_next++;
@@ -1604,6 +1609,7 @@ SwitchThreadToNextWal(XLogReaderState *xlogreader, xlog_thread_arg *arg)
16041609
GetXLogRecPtr(reader_data->xlogsegno, 0, wal_seg_size, arg->startpoint);
16051610
/* We need to close previously opened file if it wasn't closed earlier */
16061611
CleanupXLogPageRead(xlogreader);
1612+
xlogreader->currRecPtr = InvalidXLogRecPtr;
16071613
/* Skip over the page header and contrecord if any */
16081614
found = XLogFindNextRecord(xlogreader, arg->startpoint);
16091615

@@ -1613,6 +1619,8 @@ SwitchThreadToNextWal(XLogReaderState *xlogreader, xlog_thread_arg *arg)
16131619
*/
16141620
if (XLogRecPtrIsInvalid(found))
16151621
{
1622+
if (reader_data->need_switch)
1623+
goto start;
16161624
/*
16171625
* Check if we need to stop reading. We stop if other thread found a
16181626
* target segment.

Diff for: ‎tests/page_test.py

+47
Original file line numberDiff line numberDiff line change
@@ -1415,3 +1415,50 @@ def test_page_pg_resetxlog(self):
14151415
#
14161416
# pgdata_restored = self.pgdata_content(node_restored.data_dir)
14171417
# self.compare_pgdata(pgdata, pgdata_restored)
1418+
1419+
def test_page_huge_xlog_record(self):
1420+
backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
1421+
1422+
1423+
node = self.make_simple_node(
1424+
base_dir=os.path.join(self.module_name, self.fname, 'node'),
1425+
set_replication=True,
1426+
initdb_params=['--data-checksums'],
1427+
pg_options={
1428+
'max_locks_per_transaction': '1000',
1429+
'work_mem': '100MB',
1430+
'temp_buffers': '100MB',
1431+
'wal_buffers': '128MB',
1432+
'wal_level' : 'logical',
1433+
})
1434+
1435+
self.init_pb(backup_dir)
1436+
self.add_instance(backup_dir, 'node', node)
1437+
self.set_archiving(backup_dir, 'node', node)
1438+
node.slow_start()
1439+
1440+
node.pgbench_init(scale=3)
1441+
1442+
# Do full backup
1443+
self.backup_node(backup_dir, 'node', node, backup_type='full')
1444+
show_backup = self.show_pb(backup_dir,'node')[0]
1445+
1446+
self.assertEqual(show_backup['status'], "OK")
1447+
self.assertEqual(show_backup['backup-mode'], "FULL")
1448+
1449+
# Originally client had the problem at the transaction that (supposedly)
1450+
# deletes a lot of temporary tables (probably it was client disconnect).
1451+
# It generated ~40MB COMMIT WAL record.
1452+
#
1453+
# `pg_logical_emit_message` is much simpler and faster way to generate
1454+
# such huge record.
1455+
node.safe_psql(
1456+
"postgres",
1457+
"select pg_logical_emit_message(False, 'z', repeat('o', 60*1000*1000))")
1458+
1459+
# Do page backup
1460+
self.backup_node(backup_dir, 'node', node, backup_type='page')
1461+
1462+
show_backup = self.show_pb(backup_dir,'node')[1]
1463+
self.assertEqual(show_backup['status'], "OK")
1464+
self.assertEqual(show_backup['backup-mode'], "PAGE")

0 commit comments

Comments
 (0)
Please sign in to comment.