From 05280c2d474fce08bfb19cc2178949e5d384c999 Mon Sep 17 00:00:00 2001 From: "richard.bunel@opendigitaleducation.com" Date: Fri, 25 Mar 2022 07:53:15 +0100 Subject: [PATCH 1/2] TEXT-215: Prevent decimal numeric entities from wrongly including hexadecimal characters --- .../apache/commons/text/translate/NumericEntityUnescaper.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java b/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java index 57f1b5e80c..071de602e7 100644 --- a/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java +++ b/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java @@ -115,8 +115,8 @@ public int translate(final CharSequence input, final int index, final Writer wri int end = start; // Note that this supports character codes without a ; on the end while (end < seqEnd && (input.charAt(end) >= '0' && input.charAt(end) <= '9' - || input.charAt(end) >= 'a' && input.charAt(end) <= 'f' - || input.charAt(end) >= 'A' && input.charAt(end) <= 'F')) { + || isHex && (input.charAt(end) >= 'a' && input.charAt(end) <= 'f' + || input.charAt(end) >= 'A' && input.charAt(end) <= 'F'))) { end++; } From 4deb957636be056dcad7156ea5c5df72b1c0bb90 Mon Sep 17 00:00:00 2001 From: "richard.bunel@opendigitaleducation.com" Date: Fri, 25 Mar 2022 10:59:43 +0100 Subject: [PATCH 2/2] TEXT-215: Add unit test for 'semiColonOptional' option in NumericEntityUnescaper --- .../text/translate/NumericEntityUnescaperTest.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java b/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java index 4770b050ed..5eaa9c2e19 100644 --- a/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java +++ b/src/test/java/org/apache/commons/text/translate/NumericEntityUnescaperTest.java @@ -95,4 +95,13 @@ public void testCreatesNumericEntityUnescaperTwo() { assertThat(numericEntityUnescaper.translate("Ws2v8|O=7NR&#cB")).isEqualTo("Ws2v8|O=7NR&#cB"); } + @Test + public void testSemiColonOptional() { + NumericEntityUnescaper neu = new NumericEntityUnescaper(NumericEntityUnescaper.OPTION.semiColonOptional); + assertThat(neu.translate("This is a test")).as("Test hexadecimal entity with semi-colon").isEqualTo("This is a test"); + assertThat(neu.translate("This is a ݎst")).as("Test hexadecimal entity without semi-colon").isEqualTo("This is a \u074Est"); + assertThat(neu.translate("This is a test")).as("Test decimal entity with semi-colon").isEqualTo("This is a test"); + assertThat(neu.translate("This is a test")).as("Test decimal entity without semi-colon").isEqualTo("This is a test"); + } + }