diff --git a/ext/dom/html_document.c b/ext/dom/html_document.c
index ed7454dd89d43..ca6d215154e2a 100644
--- a/ext/dom/html_document.c
+++ b/ext/dom/html_document.c
@@ -528,9 +528,32 @@ static bool dom_decode_encode_fast_path(
size_t *tree_error_offset
)
{
- decoding_encoding_ctx->decode.status = LXB_STATUS_OK;
-
const lxb_char_t *buf_ref = *buf_ref_ref;
+
+ /* If we returned for needing more bytes, we need to finish up the buffer for the old codepoint. */
+ if (decoding_encoding_ctx->decode.status == LXB_STATUS_CONTINUE) {
+ lxb_char_t buf[4];
+ lxb_char_t *buf_ptr = buf;
+ lxb_codepoint_t codepoint = lxb_encoding_decode_utf_8_single(&decoding_encoding_ctx->decode, &buf_ref, buf_end);
+ if (lxb_encoding_encode_utf_8_single(&decoding_encoding_ctx->encode, &buf_ptr, buf + sizeof(buf), codepoint) > sizeof(buf)) {
+ buf_ptr = zend_mempcpy(buf, LXB_ENCODING_REPLACEMENT_BYTES, LXB_ENCODING_REPLACEMENT_SIZE);
+ }
+ decoding_encoding_ctx->decode.status = LXB_STATUS_OK;
+
+ if (!dom_process_parse_chunk(
+ ctx,
+ document,
+ parser,
+ buf_ptr - buf,
+ buf,
+ buf_ptr - buf,
+ tokenizer_error_offset,
+ tree_error_offset
+ )) {
+ goto fail_oom;
+ }
+ }
+
const lxb_char_t *last_output = buf_ref;
while (buf_ref != buf_end) {
/* Fast path converts non-validated UTF-8 -> validated UTF-8 */
diff --git a/ext/dom/tests/modern/html/encoding/gh17481.phpt b/ext/dom/tests/modern/html/encoding/gh17481.phpt
new file mode 100644
index 0000000000000..74e13e1300942
--- /dev/null
+++ b/ext/dom/tests/modern/html/encoding/gh17481.phpt
@@ -0,0 +1,33 @@
+--TEST--
+GH-17481 (UTF-8 corruption in \Dom\HTMLDocument)
+--EXTENSIONS--
+dom
+--FILE--
+$input";
+ if ($endTag) {
+ $Data .= '';
+ }
+ $Document = \Dom\HTMLDocument::createFromString($Data, 0, 'UTF-8');
+ var_dump($Document->body->textContent === $input);
+}
+
+?>
+--EXPECT--
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)