From fa122d3dfef5359f5af3535ed754083c6f16578e Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 19 Dec 2019 14:34:48 -0700 Subject: [PATCH] Java: Use StringBuildler in decodeURI and guard against H(null)L pattern --- .../fraser/neil/plaintext/diff_match_patch.java | 13 +++++++++++-- .../neil/plaintext/diff_match_patch_test.java | 6 ++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/java/src/name/fraser/neil/plaintext/diff_match_patch.java b/java/src/name/fraser/neil/plaintext/diff_match_patch.java index 9f3f71e..71e6480 100644 --- a/java/src/name/fraser/neil/plaintext/diff_match_patch.java +++ b/java/src/name/fraser/neil/plaintext/diff_match_patch.java @@ -1507,7 +1507,7 @@ private int digit16(char b) throws IllegalArgumentException { private String decodeURI(String text) throws IllegalArgumentException { int i = 0; - StringBuffer decoded = new StringBuffer(""); + StringBuilder decoded = new StringBuilder(text.length()); while (i < text.length()) { if (text.charAt(i) != '%') { @@ -1576,7 +1576,16 @@ private String decodeURI(String text) throws IllegalArgumentException { throw new IllegalArgumentException(); } - return decoded.toString(); + // some objective-c versions of the library produced patches with + // (null) in the place where surrogates were split across diff + // boundaries. if we leave those in we'll be stuck with a + // high-surrogate (null) low-surrogate pattern that will break + // deeper in the library or consuming application. we'll "fix" + // these by dropping the (null) and re-joining the surrogate halves + return decoded.toString().replaceAll( + "([\\uD800-\\uDBFF])\\(null\\)([\\uDC00-\\uDFFF])", + "$1$2" + ); } /** diff --git a/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java b/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java index 5be10f1..04aa860 100644 --- a/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java +++ b/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java @@ -460,6 +460,12 @@ public static void testDiffDelta() { dmp.diff_toDelta(dmp.diff_fromDelta("\ud83c\udd70", "=1\t-1\t+%ED%B5%B1")) ); + assertEquals( + "diff_fromDelta: Invalid diff from objective-c with (null) string", + diffList(new Diff(INSERT, "\ud83c\udd70")), + dmp.diff_fromDelta("", "+%ED%A0%BC%28null%29%ED%B5%B0") + ); + // Verify pool of unchanged characters. diffs = diffList(new Diff(INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")); String text2 = dmp.diff_text2(diffs);