diff --git a/src/main/java/org/apache/commons/text/StringEscapeUtils.java b/src/main/java/org/apache/commons/text/StringEscapeUtils.java index 0c6162882c..a43c8911f6 100644 --- a/src/main/java/org/apache/commons/text/StringEscapeUtils.java +++ b/src/main/java/org/apache/commons/text/StringEscapeUtils.java @@ -328,6 +328,20 @@ public int translate(final CharSequence input, final int index, final Writer wri new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE), new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE) ); + /** + * Translator object for escaping HTML version 5.0. + * + * While {@link #escapeHtml5(String)} is the expected method of use, this + * object allows the HTML escaping functionality to be used + * as the foundation for a custom translator. + */ + public static final CharSequenceTranslator ESCAPE_HTML5 = + new AggregateTranslator( + new LookupTranslator(EntityArrays.HTML50_EXTENDED_ESCAPE), + new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE), + new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE), + new LookupTranslator(EntityArrays.BASIC_ESCAPE) + ); /** * Translator object for escaping individual Comma Separated Values. * @@ -445,6 +459,22 @@ public int translate(final CharSequence input, final int index, final Writer wri new NumericEntityUnescaper() ); + /** + * Translator object for unescaping escaped HTML 5.0. + * + * While {@link #unescapeHtml5(String)} is the expected method of use, this + * object allows the HTML unescaping functionality to be used + * as the foundation for a custom translator. + */ + public static final CharSequenceTranslator UNESCAPE_HTML5 = + new AggregateTranslator( + new LookupTranslator(EntityArrays.HTML50_EXTENDED_UNESCAPE), + new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE), + new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE), + new LookupTranslator(EntityArrays.BASIC_UNESCAPE), + new NumericEntityUnescaper() + ); + /** * Translator object for unescaping escaped XML. * @@ -588,6 +618,22 @@ public static final String escapeHtml4(final String input) { return ESCAPE_HTML4.translate(input); } + // HTML and XML + //-------------------------------------------------------------------------- + /** + * Escapes the characters in a {@code String} using HTML entities. + * + *

Supports all known HTML 5.0 entities.

+ * + * @param input the {@code String} to escape, may be null + * @return a new escaped {@code String}, {@code null} if null string input + * + * @see HTML 5.0 Entities + */ + public static final String escapeHtml5(final String input) { + return ESCAPE_HTML5.translate(input); + } + // Java and JavaScript //-------------------------------------------------------------------------- /** @@ -792,6 +838,18 @@ public static final String unescapeHtml4(final String input) { return UNESCAPE_HTML4.translate(input); } + /** + * Unescapes a string containing entity escapes to a string + * containing the actual Unicode characters corresponding to the + * escapes. Supports all known HTML 5.0 entities. + * + * @param input the {@code String} to unescape, may be null + * @return a new unescaped {@code String}, {@code null} if null string input + */ + public static final String unescapeHtml5(final String input) { + return UNESCAPE_HTML5.translate(input); + } + /** * Unescapes any Java literals found in the {@code String}. * For example, it will turn a sequence of {@code '\'} and diff --git a/src/main/java/org/apache/commons/text/translate/EntityArrays.java b/src/main/java/org/apache/commons/text/translate/EntityArrays.java index 73f9e4730a..ca27b5ac6e 100644 --- a/src/main/java/org/apache/commons/text/translate/EntityArrays.java +++ b/src/main/java/org/apache/commons/text/translate/EntityArrays.java @@ -148,7 +148,7 @@ public class EntityArrays { /** * A Map<CharSequence, CharSequence> to escape additional * character entity - * references. Note that this must be used with {@link #ISO8859_1_ESCAPE} to get the full list of + * references. Note that this must be used with {@link #BASIC_ESCAPE} and {@link #ISO8859_1_ESCAPE} to get the full list of * HTML 4.0 character entities. */ public static final Map HTML40_EXTENDED_ESCAPE; @@ -361,6 +361,2028 @@ public class EntityArrays { HTML40_EXTENDED_UNESCAPE = Collections.unmodifiableMap(invert(HTML40_EXTENDED_ESCAPE)); } + /** + * A Map<CharSequence, CharSequence> to escape additional + * character entity + * references. Note that this must be used with {@link #BASIC_ESCAPE}, {@link #ISO8859_1_ESCAPE} and {@link #HTML40_EXTENDED_ESCAPE} to get the full list of + * HTML 5.0 character entities. + */ + public static final Map HTML50_EXTENDED_ESCAPE; + + static { + final Map initialMap = new HashMap<>(); + initialMap.put("\u0009", " "); + /* "\u000A" is a Unicode control character pre-processed as a literal line break */ + initialMap.put("\u005C\u006E", " "); + initialMap.put("\u0021", "!"); + initialMap.put("\u005C\u0022", """); + initialMap.put("\u0023", "#"); + initialMap.put("\u0024", "$"); + initialMap.put("\u0025", "%"); + initialMap.put("\u0026", "&"); + initialMap.put("\u0027", "'"); + initialMap.put("\u0028", "("); + initialMap.put("\u0029", ")"); + initialMap.put("\u002A", "*"); + initialMap.put("\u002B", "+"); + initialMap.put("\u002C", ","); + initialMap.put("\u002E", "."); + initialMap.put("\u002F", "/"); + initialMap.put("\u003A", ":"); + initialMap.put("\u003B", ";"); + initialMap.put("\u003C", "<"); + initialMap.put("\u003C\u20D2", "<⃒"); + initialMap.put("\u003D", "="); + initialMap.put("\u003D\u20E5", "=⃥"); + initialMap.put("\u003E", ">"); + initialMap.put("\u003E\u20D2", ">⃒"); + initialMap.put("\u003F", "?"); + initialMap.put("\u0040", "@"); + initialMap.put("\u005B", "["); + initialMap.put("\u005C\u005C", "\"); + initialMap.put("\u005D", "]"); + initialMap.put("\u005E", "^"); + initialMap.put("\u005F", "_"); + initialMap.put("\u0060", "`"); + initialMap.put("\u0066\u006A", "fj"); + initialMap.put("\u007B", "{"); + initialMap.put("\u007C", "|"); + initialMap.put("\u007D", "}"); + initialMap.put("\u0100", "Ā"); + initialMap.put("\u0101", "ā"); + initialMap.put("\u0102", "Ă"); + initialMap.put("\u0103", "ă"); + initialMap.put("\u0104", "Ą"); + initialMap.put("\u0105", "ą"); + initialMap.put("\u0106", "Ć"); + initialMap.put("\u0107", "ć"); + initialMap.put("\u0108", "Ĉ"); + initialMap.put("\u0109", "ĉ"); + initialMap.put("\u010A", "Ċ"); + initialMap.put("\u010B", "ċ"); + initialMap.put("\u010C", "Č"); + initialMap.put("\u010D", "č"); + initialMap.put("\u010E", "Ď"); + initialMap.put("\u010F", "ď"); + initialMap.put("\u0110", "Đ"); + initialMap.put("\u0111", "đ"); + initialMap.put("\u0112", "Ē"); + initialMap.put("\u0113", "ē"); + initialMap.put("\u0116", "Ė"); + initialMap.put("\u0117", "ė"); + initialMap.put("\u0118", "Ę"); + initialMap.put("\u0119", "ę"); + initialMap.put("\u011A", "Ě"); + initialMap.put("\u011B", "ě"); + initialMap.put("\u011C", "Ĝ"); + initialMap.put("\u011D", "ĝ"); + initialMap.put("\u011E", "Ğ"); + initialMap.put("\u011F", "ğ"); + initialMap.put("\u0120", "Ġ"); + initialMap.put("\u0121", "ġ"); + initialMap.put("\u0122", "Ģ"); + initialMap.put("\u0124", "Ĥ"); + initialMap.put("\u0125", "ĥ"); + initialMap.put("\u0126", "Ħ"); + initialMap.put("\u0127", "ħ"); + initialMap.put("\u0128", "Ĩ"); + initialMap.put("\u0129", "ĩ"); + initialMap.put("\u012A", "Ī"); + initialMap.put("\u012B", "ī"); + initialMap.put("\u012E", "Į"); + initialMap.put("\u012F", "į"); + initialMap.put("\u0130", "İ"); + initialMap.put("\u0131", "ı"); + initialMap.put("\u0132", "IJ"); + initialMap.put("\u0133", "ij"); + initialMap.put("\u0134", "Ĵ"); + initialMap.put("\u0135", "ĵ"); + initialMap.put("\u0136", "Ķ"); + initialMap.put("\u0137", "ķ"); + initialMap.put("\u0138", "ĸ"); + initialMap.put("\u0139", "Ĺ"); + initialMap.put("\u013A", "ĺ"); + initialMap.put("\u013B", "Ļ"); + initialMap.put("\u013C", "ļ"); + initialMap.put("\u013D", "Ľ"); + initialMap.put("\u013E", "ľ"); + initialMap.put("\u013F", "Ŀ"); + initialMap.put("\u0140", "ŀ"); + initialMap.put("\u0141", "Ł"); + initialMap.put("\u0142", "ł"); + initialMap.put("\u0143", "Ń"); + initialMap.put("\u0144", "ń"); + initialMap.put("\u0145", "Ņ"); + initialMap.put("\u0146", "ņ"); + initialMap.put("\u0147", "Ň"); + initialMap.put("\u0148", "ň"); + initialMap.put("\u0149", "ʼn"); + initialMap.put("\u014A", "Ŋ"); + initialMap.put("\u014B", "ŋ"); + initialMap.put("\u014C", "Ō"); + initialMap.put("\u014D", "ō"); + initialMap.put("\u0150", "Ő"); + initialMap.put("\u0151", "ő"); + initialMap.put("\u0154", "Ŕ"); + initialMap.put("\u0155", "ŕ"); + initialMap.put("\u0156", "Ŗ"); + initialMap.put("\u0157", "ŗ"); + initialMap.put("\u0158", "Ř"); + initialMap.put("\u0159", "ř"); + initialMap.put("\u015A", "Ś"); + initialMap.put("\u015B", "ś"); + initialMap.put("\u015C", "Ŝ"); + initialMap.put("\u015D", "ŝ"); + initialMap.put("\u015E", "Ş"); + initialMap.put("\u015F", "ş"); + initialMap.put("\u0162", "Ţ"); + initialMap.put("\u0163", "ţ"); + initialMap.put("\u0164", "Ť"); + initialMap.put("\u0165", "ť"); + initialMap.put("\u0166", "Ŧ"); + initialMap.put("\u0167", "ŧ"); + initialMap.put("\u0168", "Ũ"); + initialMap.put("\u0169", "ũ"); + initialMap.put("\u016A", "Ū"); + initialMap.put("\u016B", "ū"); + initialMap.put("\u016C", "Ŭ"); + initialMap.put("\u016D", "ŭ"); + initialMap.put("\u016E", "Ů"); + initialMap.put("\u016F", "ů"); + initialMap.put("\u0170", "Ű"); + initialMap.put("\u0171", "ű"); + initialMap.put("\u0172", "Ų"); + initialMap.put("\u0173", "ų"); + initialMap.put("\u0174", "Ŵ"); + initialMap.put("\u0175", "ŵ"); + initialMap.put("\u0176", "Ŷ"); + initialMap.put("\u0177", "ŷ"); + initialMap.put("\u0179", "Ź"); + initialMap.put("\u017A", "ź"); + initialMap.put("\u017B", "Ż"); + initialMap.put("\u017C", "ż"); + initialMap.put("\u017D", "Ž"); + initialMap.put("\u017E", "ž"); + initialMap.put("\u01B5", "Ƶ"); + initialMap.put("\u01F5", "ǵ"); + initialMap.put("\u0237", "ȷ"); + initialMap.put("\u02C7", "ˇ"); + initialMap.put("\u02D8", "˘"); + initialMap.put("\u02D9", "˙"); + initialMap.put("\u02DA", "˚"); + initialMap.put("\u02DB", "˛"); + initialMap.put("\u02DD", "˝"); + initialMap.put("\u0311", "̑"); + initialMap.put("\u03D5", "ϕ"); + initialMap.put("\u03DC", "Ϝ"); + initialMap.put("\u03DD", "ϝ"); + initialMap.put("\u03F0", "ϰ"); + initialMap.put("\u03F1", "ϱ"); + initialMap.put("\u03F5", "ϵ"); + initialMap.put("\u03F6", "϶"); + initialMap.put("\u0401", "Ё"); + initialMap.put("\u0402", "Ђ"); + initialMap.put("\u0403", "Ѓ"); + initialMap.put("\u0404", "Є"); + initialMap.put("\u0405", "Ѕ"); + initialMap.put("\u0406", "І"); + initialMap.put("\u0407", "Ї"); + initialMap.put("\u0408", "Ј"); + initialMap.put("\u0409", "Љ"); + initialMap.put("\u040A", "Њ"); + initialMap.put("\u040B", "Ћ"); + initialMap.put("\u040C", "Ќ"); + initialMap.put("\u040E", "Ў"); + initialMap.put("\u040F", "Џ"); + initialMap.put("\u0410", "А"); + initialMap.put("\u0411", "Б"); + initialMap.put("\u0412", "В"); + initialMap.put("\u0413", "Г"); + initialMap.put("\u0414", "Д"); + initialMap.put("\u0415", "Е"); + initialMap.put("\u0416", "Ж"); + initialMap.put("\u0417", "З"); + initialMap.put("\u0418", "И"); + initialMap.put("\u0419", "Й"); + initialMap.put("\u041A", "К"); + initialMap.put("\u041B", "Л"); + initialMap.put("\u041C", "М"); + initialMap.put("\u041D", "Н"); + initialMap.put("\u041E", "О"); + initialMap.put("\u041F", "П"); + initialMap.put("\u0420", "Р"); + initialMap.put("\u0421", "С"); + initialMap.put("\u0422", "Т"); + initialMap.put("\u0423", "У"); + initialMap.put("\u0424", "Ф"); + initialMap.put("\u0425", "Х"); + initialMap.put("\u0426", "Ц"); + initialMap.put("\u0427", "Ч"); + initialMap.put("\u0428", "Ш"); + initialMap.put("\u0429", "Щ"); + initialMap.put("\u042A", "Ъ"); + initialMap.put("\u042B", "Ы"); + initialMap.put("\u042C", "Ь"); + initialMap.put("\u042D", "Э"); + initialMap.put("\u042E", "Ю"); + initialMap.put("\u042F", "Я"); + initialMap.put("\u0430", "а"); + initialMap.put("\u0431", "б"); + initialMap.put("\u0432", "в"); + initialMap.put("\u0433", "г"); + initialMap.put("\u0434", "д"); + initialMap.put("\u0435", "е"); + initialMap.put("\u0436", "ж"); + initialMap.put("\u0437", "з"); + initialMap.put("\u0438", "и"); + initialMap.put("\u0439", "й"); + initialMap.put("\u043A", "к"); + initialMap.put("\u043B", "л"); + initialMap.put("\u043C", "м"); + initialMap.put("\u043D", "н"); + initialMap.put("\u043E", "о"); + initialMap.put("\u043F", "п"); + initialMap.put("\u0440", "р"); + initialMap.put("\u0441", "с"); + initialMap.put("\u0442", "т"); + initialMap.put("\u0443", "у"); + initialMap.put("\u0444", "ф"); + initialMap.put("\u0445", "х"); + initialMap.put("\u0446", "ц"); + initialMap.put("\u0447", "ч"); + initialMap.put("\u0448", "ш"); + initialMap.put("\u0449", "щ"); + initialMap.put("\u044A", "ъ"); + initialMap.put("\u044B", "ы"); + initialMap.put("\u044C", "ь"); + initialMap.put("\u044D", "э"); + initialMap.put("\u044E", "ю"); + initialMap.put("\u044F", "я"); + initialMap.put("\u0451", "ё"); + initialMap.put("\u0452", "ђ"); + initialMap.put("\u0453", "ѓ"); + initialMap.put("\u0454", "є"); + initialMap.put("\u0455", "ѕ"); + initialMap.put("\u0456", "і"); + initialMap.put("\u0457", "ї"); + initialMap.put("\u0458", "ј"); + initialMap.put("\u0459", "љ"); + initialMap.put("\u045A", "њ"); + initialMap.put("\u045B", "ћ"); + initialMap.put("\u045C", "ќ"); + initialMap.put("\u045E", "ў"); + initialMap.put("\u045F", "џ"); + initialMap.put("\u2004", " "); + initialMap.put("\u2005", " "); + initialMap.put("\u2007", " "); + initialMap.put("\u2008", " "); + initialMap.put("\u200A", " "); + initialMap.put("\u200B", "​"); + initialMap.put("\u2010", "‐"); + initialMap.put("\u2015", "―"); + initialMap.put("\u2016", "‖"); + initialMap.put("\u2025", "‥"); + initialMap.put("\u2031", "‱"); + initialMap.put("\u2034", "‴"); + initialMap.put("\u2035", "‵"); + initialMap.put("\u2041", "⁁"); + initialMap.put("\u2043", "⁃"); + initialMap.put("\u204F", "⁏"); + initialMap.put("\u2057", "⁗"); + initialMap.put("\u205F", " "); + initialMap.put("\u205F\u200A", "  "); + initialMap.put("\u2060", "⁠"); + initialMap.put("\u2061", "⁡"); + initialMap.put("\u2062", "⁢"); + initialMap.put("\u2063", "⁣"); + initialMap.put("\u20DB", "⃛"); + initialMap.put("\u20DC", "⃜"); + initialMap.put("\u2102", "ℂ"); + initialMap.put("\u2105", "℅"); + initialMap.put("\u210A", "ℊ"); + initialMap.put("\u210B", "ℋ"); + initialMap.put("\u210C", "ℌ"); + initialMap.put("\u210D", "ℍ"); + initialMap.put("\u210E", "ℎ"); + initialMap.put("\u210F", "ℏ"); + initialMap.put("\u2110", "ℐ"); + initialMap.put("\u2112", "ℒ"); + initialMap.put("\u2113", "ℓ"); + initialMap.put("\u2115", "ℕ"); + initialMap.put("\u2116", "№"); + initialMap.put("\u2117", "℗"); + initialMap.put("\u2119", "ℙ"); + initialMap.put("\u211A", "ℚ"); + initialMap.put("\u211B", "ℛ"); + initialMap.put("\u211D", "ℝ"); + initialMap.put("\u211E", "℞"); + initialMap.put("\u2124", "ℤ"); + initialMap.put("\u2127", "℧"); + initialMap.put("\u2128", "ℨ"); + initialMap.put("\u2129", "℩"); + initialMap.put("\u212C", "ℬ"); + initialMap.put("\u212D", "ℭ"); + initialMap.put("\u212F", "ℯ"); + initialMap.put("\u2130", "ℰ"); + initialMap.put("\u2131", "ℱ"); + initialMap.put("\u2133", "ℳ"); + initialMap.put("\u2134", "ℴ"); + initialMap.put("\u2136", "ℶ"); + initialMap.put("\u2137", "ℷ"); + initialMap.put("\u2138", "ℸ"); + initialMap.put("\u2145", "ⅅ"); + initialMap.put("\u2146", "ⅆ"); + initialMap.put("\u2147", "ⅇ"); + initialMap.put("\u2148", "ⅈ"); + initialMap.put("\u2153", "⅓"); + initialMap.put("\u2154", "⅔"); + initialMap.put("\u2155", "⅕"); + initialMap.put("\u2156", "⅖"); + initialMap.put("\u2157", "⅗"); + initialMap.put("\u2158", "⅘"); + initialMap.put("\u2159", "⅙"); + initialMap.put("\u215A", "⅚"); + initialMap.put("\u215B", "⅛"); + initialMap.put("\u215C", "⅜"); + initialMap.put("\u215D", "⅝"); + initialMap.put("\u215E", "⅞"); + initialMap.put("\u2195", "↕"); + initialMap.put("\u2196", "↖"); + initialMap.put("\u2197", "↗"); + initialMap.put("\u2198", "↘"); + initialMap.put("\u2199", "↙"); + initialMap.put("\u219A", "↚"); + initialMap.put("\u219B", "↛"); + initialMap.put("\u219D", "↝"); + initialMap.put("\u219D\u0338", "↝̸"); + initialMap.put("\u219E", "↞"); + initialMap.put("\u219F", "↟"); + initialMap.put("\u21A0", "↠"); + initialMap.put("\u21A1", "↡"); + initialMap.put("\u21A2", "↢"); + initialMap.put("\u21A3", "↣"); + initialMap.put("\u21A4", "↤"); + initialMap.put("\u21A5", "↥"); + initialMap.put("\u21A6", "↦"); + initialMap.put("\u21A7", "↧"); + initialMap.put("\u21A9", "↩"); + initialMap.put("\u21AA", "↪"); + initialMap.put("\u21AB", "↫"); + initialMap.put("\u21AC", "↬"); + initialMap.put("\u21AD", "↭"); + initialMap.put("\u21AE", "↮"); + initialMap.put("\u21B0", "↰"); + initialMap.put("\u21B1", "↱"); + initialMap.put("\u21B2", "↲"); + initialMap.put("\u21B3", "↳"); + initialMap.put("\u21B6", "↶"); + initialMap.put("\u21B7", "↷"); + initialMap.put("\u21BA", "↺"); + initialMap.put("\u21BB", "↻"); + initialMap.put("\u21BC", "↼"); + initialMap.put("\u21BD", "↽"); + initialMap.put("\u21BE", "↾"); + initialMap.put("\u21BF", "↿"); + initialMap.put("\u21C0", "⇀"); + initialMap.put("\u21C1", "⇁"); + initialMap.put("\u21C2", "⇂"); + initialMap.put("\u21C3", "⇃"); + initialMap.put("\u21C4", "⇄"); + initialMap.put("\u21C5", "⇅"); + initialMap.put("\u21C6", "⇆"); + initialMap.put("\u21C7", "⇇"); + initialMap.put("\u21C8", "⇈"); + initialMap.put("\u21C9", "⇉"); + initialMap.put("\u21CA", "⇊"); + initialMap.put("\u21CB", "⇋"); + initialMap.put("\u21CC", "⇌"); + initialMap.put("\u21CD", "⇍"); + initialMap.put("\u21CE", "⇎"); + initialMap.put("\u21CF", "⇏"); + initialMap.put("\u21D5", "⇕"); + initialMap.put("\u21D6", "⇖"); + initialMap.put("\u21D7", "⇗"); + initialMap.put("\u21D8", "⇘"); + initialMap.put("\u21D9", "⇙"); + initialMap.put("\u21DA", "⇚"); + initialMap.put("\u21DB", "⇛"); + initialMap.put("\u21DD", "⇝"); + initialMap.put("\u21E4", "⇤"); + initialMap.put("\u21E5", "⇥"); + initialMap.put("\u21F5", "⇵"); + initialMap.put("\u21FD", "⇽"); + initialMap.put("\u21FE", "⇾"); + initialMap.put("\u21FF", "⇿"); + initialMap.put("\u2201", "∁"); + initialMap.put("\u2202\u0338", "∂̸"); + initialMap.put("\u2204", "∄"); + initialMap.put("\u220C", "∌"); + initialMap.put("\u2210", "∐"); + initialMap.put("\u2213", "∓"); + initialMap.put("\u2214", "∔"); + initialMap.put("\u2216", "∖"); + initialMap.put("\u2218", "∘"); + initialMap.put("\u221F", "∟"); + initialMap.put("\u2220\u20D2", "∠⃒"); + initialMap.put("\u2221", "∡"); + initialMap.put("\u2222", "∢"); + initialMap.put("\u2223", "∣"); + initialMap.put("\u2224", "∤"); + initialMap.put("\u2225", "∥"); + initialMap.put("\u2226", "∦"); + initialMap.put("\u2229\uFE00", "∩︀"); + initialMap.put("\u222A\uFE00", "∪︀"); + initialMap.put("\u222C", "∬"); + initialMap.put("\u222D", "∭"); + initialMap.put("\u222E", "∮"); + initialMap.put("\u222F", "∯"); + initialMap.put("\u2230", "∰"); + initialMap.put("\u2231", "∱"); + initialMap.put("\u2232", "∲"); + initialMap.put("\u2233", "∳"); + initialMap.put("\u2235", "∵"); + initialMap.put("\u2236", "∶"); + initialMap.put("\u2237", "∷"); + initialMap.put("\u2238", "∸"); + initialMap.put("\u223A", "∺"); + initialMap.put("\u223B", "∻"); + initialMap.put("\u223C\u20D2", "∼⃒"); + initialMap.put("\u223D", "∽"); + initialMap.put("\u223D\u0331", "∽̱"); + initialMap.put("\u223E", "∾"); + initialMap.put("\u223E\u0333", "∾̳"); + initialMap.put("\u223F", "∿"); + initialMap.put("\u2240", "≀"); + initialMap.put("\u2241", "≁"); + initialMap.put("\u2242", "≂"); + initialMap.put("\u2242\u0338", "≂̸"); + initialMap.put("\u2243", "≃"); + initialMap.put("\u2244", "≄"); + initialMap.put("\u2246", "≆"); + initialMap.put("\u2247", "≇"); + initialMap.put("\u2249", "≉"); + initialMap.put("\u224A", "≊"); + initialMap.put("\u224B", "≋"); + initialMap.put("\u224B\u0338", "≋̸"); + initialMap.put("\u224C", "≌"); + initialMap.put("\u224D", "≍"); + initialMap.put("\u224D\u20D2", "≍⃒"); + initialMap.put("\u224E", "≎"); + initialMap.put("\u224E\u0338", "≎̸"); + initialMap.put("\u224F", "≏"); + initialMap.put("\u224F\u0338", "≏̸"); + initialMap.put("\u2250", "≐"); + initialMap.put("\u2250\u0338", "≐̸"); + initialMap.put("\u2251", "≑"); + initialMap.put("\u2252", "≒"); + initialMap.put("\u2253", "≓"); + initialMap.put("\u2254", "≔"); + initialMap.put("\u2255", "≕"); + initialMap.put("\u2256", "≖"); + initialMap.put("\u2257", "≗"); + initialMap.put("\u2259", "≙"); + initialMap.put("\u225A", "≚"); + initialMap.put("\u225C", "≜"); + initialMap.put("\u225F", "≟"); + initialMap.put("\u2261\u20E5", "≡⃥"); + initialMap.put("\u2262", "≢"); + initialMap.put("\u2264\u20D2", "≤⃒"); + initialMap.put("\u2265\u20D2", "≥⃒"); + initialMap.put("\u2266", "≦"); + initialMap.put("\u2266\u0338", "≦̸"); + initialMap.put("\u2267", "≧"); + initialMap.put("\u2267\u0338", "≧̸"); + initialMap.put("\u2268", "≨"); + initialMap.put("\u2268\uFE00", "≨︀"); + initialMap.put("\u2269", "≩"); + initialMap.put("\u2269\uFE00", "≩︀"); + initialMap.put("\u226A", "≪"); + initialMap.put("\u226A\u0338", "≪̸"); + initialMap.put("\u226A\u20D2", "≪⃒"); + initialMap.put("\u226B", "≫"); + initialMap.put("\u226B\u0338", "≫̸"); + initialMap.put("\u226B\u20D2", "≫⃒"); + initialMap.put("\u226C", "≬"); + initialMap.put("\u226D", "≭"); + initialMap.put("\u226E", "≮"); + initialMap.put("\u226F", "≯"); + initialMap.put("\u2270", "≰"); + initialMap.put("\u2271", "≱"); + initialMap.put("\u2272", "≲"); + initialMap.put("\u2273", "≳"); + initialMap.put("\u2274", "≴"); + initialMap.put("\u2275", "≵"); + initialMap.put("\u2276", "≶"); + initialMap.put("\u2277", "≷"); + initialMap.put("\u2278", "≸"); + initialMap.put("\u2279", "≹"); + initialMap.put("\u227A", "≺"); + initialMap.put("\u227B", "≻"); + initialMap.put("\u227C", "≼"); + initialMap.put("\u227D", "≽"); + initialMap.put("\u227E", "≾"); + initialMap.put("\u227F", "≿"); + initialMap.put("\u227F\u0338", "≿̸"); + initialMap.put("\u2280", "⊀"); + initialMap.put("\u2281", "⊁"); + initialMap.put("\u2282\u20D2", "⊂⃒"); + initialMap.put("\u2283\u20D2", "⊃⃒"); + initialMap.put("\u2285", "⊅"); + initialMap.put("\u2288", "⊈"); + initialMap.put("\u2289", "⊉"); + initialMap.put("\u228A", "⊊"); + initialMap.put("\u228A\uFE00", "⊊︀"); + initialMap.put("\u228B", "⊋"); + initialMap.put("\u228B\uFE00", "⊋︀"); + initialMap.put("\u228D", "⊍"); + initialMap.put("\u228E", "⊎"); + initialMap.put("\u228F", "⊏"); + initialMap.put("\u228F\u0338", "⊏̸"); + initialMap.put("\u2290", "⊐"); + initialMap.put("\u2290\u0338", "⊐̸"); + initialMap.put("\u2291", "⊑"); + initialMap.put("\u2292", "⊒"); + initialMap.put("\u2293", "⊓"); + initialMap.put("\u2293\uFE00", "⊓︀"); + initialMap.put("\u2294", "⊔"); + initialMap.put("\u2294\uFE00", "⊔︀"); + initialMap.put("\u2296", "⊖"); + initialMap.put("\u2298", "⊘"); + initialMap.put("\u2299", "⊙"); + initialMap.put("\u229A", "⊚"); + initialMap.put("\u229B", "⊛"); + initialMap.put("\u229D", "⊝"); + initialMap.put("\u229E", "⊞"); + initialMap.put("\u229F", "⊟"); + initialMap.put("\u22A0", "⊠"); + initialMap.put("\u22A1", "⊡"); + initialMap.put("\u22A2", "⊢"); + initialMap.put("\u22A3", "⊣"); + initialMap.put("\u22A4", "⊤"); + initialMap.put("\u22A7", "⊧"); + initialMap.put("\u22A8", "⊨"); + initialMap.put("\u22A9", "⊩"); + initialMap.put("\u22AA", "⊪"); + initialMap.put("\u22AB", "⊫"); + initialMap.put("\u22AC", "⊬"); + initialMap.put("\u22AD", "⊭"); + initialMap.put("\u22AE", "⊮"); + initialMap.put("\u22AF", "⊯"); + initialMap.put("\u22B0", "⊰"); + initialMap.put("\u22B2", "⊲"); + initialMap.put("\u22B3", "⊳"); + initialMap.put("\u22B4", "⊴"); + initialMap.put("\u22B4\u20D2", "⊴⃒"); + initialMap.put("\u22B5", "⊵"); + initialMap.put("\u22B5\u20D2", "⊵⃒"); + initialMap.put("\u22B6", "⊶"); + initialMap.put("\u22B7", "⊷"); + initialMap.put("\u22B8", "⊸"); + initialMap.put("\u22B9", "⊹"); + initialMap.put("\u22BA", "⊺"); + initialMap.put("\u22BB", "⊻"); + initialMap.put("\u22BD", "⊽"); + initialMap.put("\u22BE", "⊾"); + initialMap.put("\u22BF", "⊿"); + initialMap.put("\u22C0", "⋀"); + initialMap.put("\u22C1", "⋁"); + initialMap.put("\u22C2", "⋂"); + initialMap.put("\u22C3", "⋃"); + initialMap.put("\u22C4", "⋄"); + initialMap.put("\u22C6", "⋆"); + initialMap.put("\u22C7", "⋇"); + initialMap.put("\u22C8", "⋈"); + initialMap.put("\u22C9", "⋉"); + initialMap.put("\u22CA", "⋊"); + initialMap.put("\u22CB", "⋋"); + initialMap.put("\u22CC", "⋌"); + initialMap.put("\u22CD", "⋍"); + initialMap.put("\u22CE", "⋎"); + initialMap.put("\u22CF", "⋏"); + initialMap.put("\u22D0", "⋐"); + initialMap.put("\u22D1", "⋑"); + initialMap.put("\u22D2", "⋒"); + initialMap.put("\u22D3", "⋓"); + initialMap.put("\u22D4", "⋔"); + initialMap.put("\u22D5", "⋕"); + initialMap.put("\u22D6", "⋖"); + initialMap.put("\u22D7", "⋗"); + initialMap.put("\u22D8", "⋘"); + initialMap.put("\u22D8\u0338", "⋘̸"); + initialMap.put("\u22D9", "⋙"); + initialMap.put("\u22D9\u0338", "⋙̸"); + initialMap.put("\u22DA", "⋚"); + initialMap.put("\u22DA\uFE00", "⋚︀"); + initialMap.put("\u22DB", "⋛"); + initialMap.put("\u22DB\uFE00", "⋛︀"); + initialMap.put("\u22DE", "⋞"); + initialMap.put("\u22DF", "⋟"); + initialMap.put("\u22E0", "⋠"); + initialMap.put("\u22E1", "⋡"); + initialMap.put("\u22E2", "⋢"); + initialMap.put("\u22E3", "⋣"); + initialMap.put("\u22E6", "⋦"); + initialMap.put("\u22E7", "⋧"); + initialMap.put("\u22E8", "⋨"); + initialMap.put("\u22E9", "⋩"); + initialMap.put("\u22EA", "⋪"); + initialMap.put("\u22EB", "⋫"); + initialMap.put("\u22EC", "⋬"); + initialMap.put("\u22ED", "⋭"); + initialMap.put("\u22EE", "⋮"); + initialMap.put("\u22EF", "⋯"); + initialMap.put("\u22F0", "⋰"); + initialMap.put("\u22F1", "⋱"); + initialMap.put("\u22F2", "⋲"); + initialMap.put("\u22F3", "⋳"); + initialMap.put("\u22F4", "⋴"); + initialMap.put("\u22F5", "⋵"); + initialMap.put("\u22F5\u0338", "⋵̸"); + initialMap.put("\u22F6", "⋶"); + initialMap.put("\u22F7", "⋷"); + initialMap.put("\u22F9", "⋹"); + initialMap.put("\u22F9\u0338", "⋹̸"); + initialMap.put("\u22FA", "⋺"); + initialMap.put("\u22FB", "⋻"); + initialMap.put("\u22FC", "⋼"); + initialMap.put("\u22FD", "⋽"); + initialMap.put("\u22FE", "⋾"); + initialMap.put("\u2305", "⌅"); + initialMap.put("\u2306", "⌆"); + initialMap.put("\u230C", "⌌"); + initialMap.put("\u230D", "⌍"); + initialMap.put("\u230E", "⌎"); + initialMap.put("\u230F", "⌏"); + initialMap.put("\u2310", "⌐"); + initialMap.put("\u2312", "⌒"); + initialMap.put("\u2313", "⌓"); + initialMap.put("\u2315", "⌕"); + initialMap.put("\u2316", "⌖"); + initialMap.put("\u231C", "⌜"); + initialMap.put("\u231D", "⌝"); + initialMap.put("\u231E", "⌞"); + initialMap.put("\u231F", "⌟"); + initialMap.put("\u2322", "⌢"); + initialMap.put("\u2323", "⌣"); + initialMap.put("\u232D", "⌭"); + initialMap.put("\u232E", "⌮"); + initialMap.put("\u2336", "⌶"); + initialMap.put("\u233D", "⌽"); + initialMap.put("\u233F", "⌿"); + initialMap.put("\u237C", "⍼"); + initialMap.put("\u23B0", "⎰"); + initialMap.put("\u23B1", "⎱"); + initialMap.put("\u23B4", "⎴"); + initialMap.put("\u23B5", "⎵"); + initialMap.put("\u23B6", "⎶"); + initialMap.put("\u23DC", "⏜"); + initialMap.put("\u23DD", "⏝"); + initialMap.put("\u23DE", "⏞"); + initialMap.put("\u23DF", "⏟"); + initialMap.put("\u23E2", "⏢"); + initialMap.put("\u23E7", "⏧"); + initialMap.put("\u2423", "␣"); + initialMap.put("\u24C8", "Ⓢ"); + initialMap.put("\u2500", "─"); + initialMap.put("\u2502", "│"); + initialMap.put("\u250C", "┌"); + initialMap.put("\u2510", "┐"); + initialMap.put("\u2514", "└"); + initialMap.put("\u2518", "┘"); + initialMap.put("\u251C", "├"); + initialMap.put("\u2524", "┤"); + initialMap.put("\u252C", "┬"); + initialMap.put("\u2534", "┴"); + initialMap.put("\u253C", "┼"); + initialMap.put("\u2550", "═"); + initialMap.put("\u2551", "║"); + initialMap.put("\u2552", "╒"); + initialMap.put("\u2553", "╓"); + initialMap.put("\u2554", "╔"); + initialMap.put("\u2555", "╕"); + initialMap.put("\u2556", "╖"); + initialMap.put("\u2557", "╗"); + initialMap.put("\u2558", "╘"); + initialMap.put("\u2559", "╙"); + initialMap.put("\u255A", "╚"); + initialMap.put("\u255B", "╛"); + initialMap.put("\u255C", "╜"); + initialMap.put("\u255D", "╝"); + initialMap.put("\u255E", "╞"); + initialMap.put("\u255F", "╟"); + initialMap.put("\u2560", "╠"); + initialMap.put("\u2561", "╡"); + initialMap.put("\u2562", "╢"); + initialMap.put("\u2563", "╣"); + initialMap.put("\u2564", "╤"); + initialMap.put("\u2565", "╥"); + initialMap.put("\u2566", "╦"); + initialMap.put("\u2567", "╧"); + initialMap.put("\u2568", "╨"); + initialMap.put("\u2569", "╩"); + initialMap.put("\u256A", "╪"); + initialMap.put("\u256B", "╫"); + initialMap.put("\u256C", "╬"); + initialMap.put("\u2580", "▀"); + initialMap.put("\u2584", "▄"); + initialMap.put("\u2588", "█"); + initialMap.put("\u2591", "░"); + initialMap.put("\u2592", "▒"); + initialMap.put("\u2593", "▓"); + initialMap.put("\u25A1", "□"); + initialMap.put("\u25AA", "▪"); + initialMap.put("\u25AB", "▫"); + initialMap.put("\u25AD", "▭"); + initialMap.put("\u25AE", "▮"); + initialMap.put("\u25B1", "▱"); + initialMap.put("\u25B3", "△"); + initialMap.put("\u25B4", "▴"); + initialMap.put("\u25B5", "▵"); + initialMap.put("\u25B8", "▸"); + initialMap.put("\u25B9", "▹"); + initialMap.put("\u25BD", "▽"); + initialMap.put("\u25BE", "▾"); + initialMap.put("\u25BF", "▿"); + initialMap.put("\u25C2", "◂"); + initialMap.put("\u25C3", "◃"); + initialMap.put("\u25CB", "○"); + initialMap.put("\u25EC", "◬"); + initialMap.put("\u25EF", "◯"); + initialMap.put("\u25F8", "◸"); + initialMap.put("\u25F9", "◹"); + initialMap.put("\u25FA", "◺"); + initialMap.put("\u25FB", "◻"); + initialMap.put("\u25FC", "◼"); + initialMap.put("\u2605", "★"); + initialMap.put("\u2606", "☆"); + initialMap.put("\u260E", "☎"); + initialMap.put("\u2640", "♀"); + initialMap.put("\u2642", "♂"); + initialMap.put("\u266A", "♪"); + initialMap.put("\u266D", "♭"); + initialMap.put("\u266E", "♮"); + initialMap.put("\u266F", "♯"); + initialMap.put("\u2713", "✓"); + initialMap.put("\u2717", "✗"); + initialMap.put("\u2720", "✠"); + initialMap.put("\u2736", "✶"); + initialMap.put("\u2758", "❘"); + initialMap.put("\u2772", "❲"); + initialMap.put("\u2773", "❳"); + initialMap.put("\u27C8", "⟈"); + initialMap.put("\u27C9", "⟉"); + initialMap.put("\u27E6", "⟦"); + initialMap.put("\u27E7", "⟧"); + initialMap.put("\u27E8", "⟨"); + initialMap.put("\u27E9", "⟩"); + initialMap.put("\u27EA", "⟪"); + initialMap.put("\u27EB", "⟫"); + initialMap.put("\u27EC", "⟬"); + initialMap.put("\u27ED", "⟭"); + initialMap.put("\u27F5", "⟵"); + initialMap.put("\u27F6", "⟶"); + initialMap.put("\u27F7", "⟷"); + initialMap.put("\u27F8", "⟸"); + initialMap.put("\u27F9", "⟹"); + initialMap.put("\u27FA", "⟺"); + initialMap.put("\u27FC", "⟼"); + initialMap.put("\u27FF", "⟿"); + initialMap.put("\u2902", "⤂"); + initialMap.put("\u2903", "⤃"); + initialMap.put("\u2904", "⤄"); + initialMap.put("\u2905", "⤅"); + initialMap.put("\u290C", "⤌"); + initialMap.put("\u290D", "⤍"); + initialMap.put("\u290E", "⤎"); + initialMap.put("\u290F", "⤏"); + initialMap.put("\u2910", "⤐"); + initialMap.put("\u2911", "⤑"); + initialMap.put("\u2912", "⤒"); + initialMap.put("\u2913", "⤓"); + initialMap.put("\u2916", "⤖"); + initialMap.put("\u2919", "⤙"); + initialMap.put("\u291A", "⤚"); + initialMap.put("\u291B", "⤛"); + initialMap.put("\u291C", "⤜"); + initialMap.put("\u291D", "⤝"); + initialMap.put("\u291E", "⤞"); + initialMap.put("\u291F", "⤟"); + initialMap.put("\u2920", "⤠"); + initialMap.put("\u2923", "⤣"); + initialMap.put("\u2924", "⤤"); + initialMap.put("\u2925", "⤥"); + initialMap.put("\u2926", "⤦"); + initialMap.put("\u2927", "⤧"); + initialMap.put("\u2928", "⤨"); + initialMap.put("\u2929", "⤩"); + initialMap.put("\u292A", "⤪"); + initialMap.put("\u2933", "⤳"); + initialMap.put("\u2933\u0338", "⤳̸"); + initialMap.put("\u2935", "⤵"); + initialMap.put("\u2936", "⤶"); + initialMap.put("\u2937", "⤷"); + initialMap.put("\u2938", "⤸"); + initialMap.put("\u2939", "⤹"); + initialMap.put("\u293C", "⤼"); + initialMap.put("\u293D", "⤽"); + initialMap.put("\u2945", "⥅"); + initialMap.put("\u2948", "⥈"); + initialMap.put("\u2949", "⥉"); + initialMap.put("\u294A", "⥊"); + initialMap.put("\u294B", "⥋"); + initialMap.put("\u294E", "⥎"); + initialMap.put("\u294F", "⥏"); + initialMap.put("\u2950", "⥐"); + initialMap.put("\u2951", "⥑"); + initialMap.put("\u2952", "⥒"); + initialMap.put("\u2953", "⥓"); + initialMap.put("\u2954", "⥔"); + initialMap.put("\u2955", "⥕"); + initialMap.put("\u2956", "⥖"); + initialMap.put("\u2957", "⥗"); + initialMap.put("\u2958", "⥘"); + initialMap.put("\u2959", "⥙"); + initialMap.put("\u295A", "⥚"); + initialMap.put("\u295B", "⥛"); + initialMap.put("\u295C", "⥜"); + initialMap.put("\u295D", "⥝"); + initialMap.put("\u295E", "⥞"); + initialMap.put("\u295F", "⥟"); + initialMap.put("\u2960", "⥠"); + initialMap.put("\u2961", "⥡"); + initialMap.put("\u2962", "⥢"); + initialMap.put("\u2963", "⥣"); + initialMap.put("\u2964", "⥤"); + initialMap.put("\u2965", "⥥"); + initialMap.put("\u2966", "⥦"); + initialMap.put("\u2967", "⥧"); + initialMap.put("\u2968", "⥨"); + initialMap.put("\u2969", "⥩"); + initialMap.put("\u296A", "⥪"); + initialMap.put("\u296B", "⥫"); + initialMap.put("\u296C", "⥬"); + initialMap.put("\u296D", "⥭"); + initialMap.put("\u296E", "⥮"); + initialMap.put("\u296F", "⥯"); + initialMap.put("\u2970", "⥰"); + initialMap.put("\u2971", "⥱"); + initialMap.put("\u2972", "⥲"); + initialMap.put("\u2973", "⥳"); + initialMap.put("\u2974", "⥴"); + initialMap.put("\u2975", "⥵"); + initialMap.put("\u2976", "⥶"); + initialMap.put("\u2978", "⥸"); + initialMap.put("\u2979", "⥹"); + initialMap.put("\u297B", "⥻"); + initialMap.put("\u297C", "⥼"); + initialMap.put("\u297D", "⥽"); + initialMap.put("\u297E", "⥾"); + initialMap.put("\u297F", "⥿"); + initialMap.put("\u2985", "⦅"); + initialMap.put("\u2986", "⦆"); + initialMap.put("\u298B", "⦋"); + initialMap.put("\u298C", "⦌"); + initialMap.put("\u298D", "⦍"); + initialMap.put("\u298E", "⦎"); + initialMap.put("\u298F", "⦏"); + initialMap.put("\u2990", "⦐"); + initialMap.put("\u2991", "⦑"); + initialMap.put("\u2992", "⦒"); + initialMap.put("\u2993", "⦓"); + initialMap.put("\u2994", "⦔"); + initialMap.put("\u2995", "⦕"); + initialMap.put("\u2996", "⦖"); + initialMap.put("\u299A", "⦚"); + initialMap.put("\u299C", "⦜"); + initialMap.put("\u299D", "⦝"); + initialMap.put("\u29A4", "⦤"); + initialMap.put("\u29A5", "⦥"); + initialMap.put("\u29A6", "⦦"); + initialMap.put("\u29A7", "⦧"); + initialMap.put("\u29A8", "⦨"); + initialMap.put("\u29A9", "⦩"); + initialMap.put("\u29AA", "⦪"); + initialMap.put("\u29AB", "⦫"); + initialMap.put("\u29AC", "⦬"); + initialMap.put("\u29AD", "⦭"); + initialMap.put("\u29AE", "⦮"); + initialMap.put("\u29AF", "⦯"); + initialMap.put("\u29B0", "⦰"); + initialMap.put("\u29B1", "⦱"); + initialMap.put("\u29B2", "⦲"); + initialMap.put("\u29B3", "⦳"); + initialMap.put("\u29B4", "⦴"); + initialMap.put("\u29B5", "⦵"); + initialMap.put("\u29B6", "⦶"); + initialMap.put("\u29B7", "⦷"); + initialMap.put("\u29B9", "⦹"); + initialMap.put("\u29BB", "⦻"); + initialMap.put("\u29BC", "⦼"); + initialMap.put("\u29BE", "⦾"); + initialMap.put("\u29BF", "⦿"); + initialMap.put("\u29C0", "⧀"); + initialMap.put("\u29C1", "⧁"); + initialMap.put("\u29C2", "⧂"); + initialMap.put("\u29C3", "⧃"); + initialMap.put("\u29C4", "⧄"); + initialMap.put("\u29C5", "⧅"); + initialMap.put("\u29C9", "⧉"); + initialMap.put("\u29CD", "⧍"); + initialMap.put("\u29CE", "⧎"); + initialMap.put("\u29CF", "⧏"); + initialMap.put("\u29CF\u0338", "⧏̸"); + initialMap.put("\u29D0", "⧐"); + initialMap.put("\u29D0\u0338", "⧐̸"); + initialMap.put("\u29DC", "⧜"); + initialMap.put("\u29DD", "⧝"); + initialMap.put("\u29DE", "⧞"); + initialMap.put("\u29E3", "⧣"); + initialMap.put("\u29E4", "⧤"); + initialMap.put("\u29E5", "⧥"); + initialMap.put("\u29EB", "⧫"); + initialMap.put("\u29F4", "⧴"); + initialMap.put("\u29F6", "⧶"); + initialMap.put("\u2A00", "⨀"); + initialMap.put("\u2A01", "⨁"); + initialMap.put("\u2A02", "⨂"); + initialMap.put("\u2A04", "⨄"); + initialMap.put("\u2A06", "⨆"); + initialMap.put("\u2A0C", "⨌"); + initialMap.put("\u2A0D", "⨍"); + initialMap.put("\u2A10", "⨐"); + initialMap.put("\u2A11", "⨑"); + initialMap.put("\u2A12", "⨒"); + initialMap.put("\u2A13", "⨓"); + initialMap.put("\u2A14", "⨔"); + initialMap.put("\u2A15", "⨕"); + initialMap.put("\u2A16", "⨖"); + initialMap.put("\u2A17", "⨗"); + initialMap.put("\u2A22", "⨢"); + initialMap.put("\u2A23", "⨣"); + initialMap.put("\u2A24", "⨤"); + initialMap.put("\u2A25", "⨥"); + initialMap.put("\u2A26", "⨦"); + initialMap.put("\u2A27", "⨧"); + initialMap.put("\u2A29", "⨩"); + initialMap.put("\u2A2A", "⨪"); + initialMap.put("\u2A2D", "⨭"); + initialMap.put("\u2A2E", "⨮"); + initialMap.put("\u2A2F", "⨯"); + initialMap.put("\u2A30", "⨰"); + initialMap.put("\u2A31", "⨱"); + initialMap.put("\u2A33", "⨳"); + initialMap.put("\u2A34", "⨴"); + initialMap.put("\u2A35", "⨵"); + initialMap.put("\u2A36", "⨶"); + initialMap.put("\u2A37", "⨷"); + initialMap.put("\u2A38", "⨸"); + initialMap.put("\u2A39", "⨹"); + initialMap.put("\u2A3A", "⨺"); + initialMap.put("\u2A3B", "⨻"); + initialMap.put("\u2A3C", "⨼"); + initialMap.put("\u2A3F", "⨿"); + initialMap.put("\u2A40", "⩀"); + initialMap.put("\u2A42", "⩂"); + initialMap.put("\u2A43", "⩃"); + initialMap.put("\u2A44", "⩄"); + initialMap.put("\u2A45", "⩅"); + initialMap.put("\u2A46", "⩆"); + initialMap.put("\u2A47", "⩇"); + initialMap.put("\u2A48", "⩈"); + initialMap.put("\u2A49", "⩉"); + initialMap.put("\u2A4A", "⩊"); + initialMap.put("\u2A4B", "⩋"); + initialMap.put("\u2A4C", "⩌"); + initialMap.put("\u2A4D", "⩍"); + initialMap.put("\u2A50", "⩐"); + initialMap.put("\u2A53", "⩓"); + initialMap.put("\u2A54", "⩔"); + initialMap.put("\u2A55", "⩕"); + initialMap.put("\u2A56", "⩖"); + initialMap.put("\u2A57", "⩗"); + initialMap.put("\u2A58", "⩘"); + initialMap.put("\u2A5A", "⩚"); + initialMap.put("\u2A5B", "⩛"); + initialMap.put("\u2A5C", "⩜"); + initialMap.put("\u2A5D", "⩝"); + initialMap.put("\u2A5F", "⩟"); + initialMap.put("\u2A66", "⩦"); + initialMap.put("\u2A6A", "⩪"); + initialMap.put("\u2A6D", "⩭"); + initialMap.put("\u2A6D\u0338", "⩭̸"); + initialMap.put("\u2A6E", "⩮"); + initialMap.put("\u2A6F", "⩯"); + initialMap.put("\u2A70", "⩰"); + initialMap.put("\u2A70\u0338", "⩰̸"); + initialMap.put("\u2A71", "⩱"); + initialMap.put("\u2A72", "⩲"); + initialMap.put("\u2A73", "⩳"); + initialMap.put("\u2A74", "⩴"); + initialMap.put("\u2A75", "⩵"); + initialMap.put("\u2A77", "⩷"); + initialMap.put("\u2A78", "⩸"); + initialMap.put("\u2A79", "⩹"); + initialMap.put("\u2A7A", "⩺"); + initialMap.put("\u2A7B", "⩻"); + initialMap.put("\u2A7C", "⩼"); + initialMap.put("\u2A7D", "⩽"); + initialMap.put("\u2A7D\u0338", "⩽̸"); + initialMap.put("\u2A7E", "⩾"); + initialMap.put("\u2A7E\u0338", "⩾̸"); + initialMap.put("\u2A7F", "⩿"); + initialMap.put("\u2A80", "⪀"); + initialMap.put("\u2A81", "⪁"); + initialMap.put("\u2A82", "⪂"); + initialMap.put("\u2A83", "⪃"); + initialMap.put("\u2A84", "⪄"); + initialMap.put("\u2A85", "⪅"); + initialMap.put("\u2A86", "⪆"); + initialMap.put("\u2A87", "⪇"); + initialMap.put("\u2A88", "⪈"); + initialMap.put("\u2A89", "⪉"); + initialMap.put("\u2A8A", "⪊"); + initialMap.put("\u2A8B", "⪋"); + initialMap.put("\u2A8C", "⪌"); + initialMap.put("\u2A8D", "⪍"); + initialMap.put("\u2A8E", "⪎"); + initialMap.put("\u2A8F", "⪏"); + initialMap.put("\u2A90", "⪐"); + initialMap.put("\u2A91", "⪑"); + initialMap.put("\u2A92", "⪒"); + initialMap.put("\u2A93", "⪓"); + initialMap.put("\u2A94", "⪔"); + initialMap.put("\u2A95", "⪕"); + initialMap.put("\u2A96", "⪖"); + initialMap.put("\u2A97", "⪗"); + initialMap.put("\u2A98", "⪘"); + initialMap.put("\u2A99", "⪙"); + initialMap.put("\u2A9A", "⪚"); + initialMap.put("\u2A9D", "⪝"); + initialMap.put("\u2A9E", "⪞"); + initialMap.put("\u2A9F", "⪟"); + initialMap.put("\u2AA0", "⪠"); + initialMap.put("\u2AA1", "⪡"); + initialMap.put("\u2AA1\u0338", "⪡̸"); + initialMap.put("\u2AA2", "⪢"); + initialMap.put("\u2AA2\u0338", "⪢̸"); + initialMap.put("\u2AA4", "⪤"); + initialMap.put("\u2AA5", "⪥"); + initialMap.put("\u2AA6", "⪦"); + initialMap.put("\u2AA7", "⪧"); + initialMap.put("\u2AA8", "⪨"); + initialMap.put("\u2AA9", "⪩"); + initialMap.put("\u2AAA", "⪪"); + initialMap.put("\u2AAB", "⪫"); + initialMap.put("\u2AAC", "⪬"); + initialMap.put("\u2AAC\uFE00", "⪬︀"); + initialMap.put("\u2AAD", "⪭"); + initialMap.put("\u2AAD\uFE00", "⪭︀"); + initialMap.put("\u2AAE", "⪮"); + initialMap.put("\u2AAF", "⪯"); + initialMap.put("\u2AAF\u0338", "⪯̸"); + initialMap.put("\u2AB0", "⪰"); + initialMap.put("\u2AB0\u0338", "⪰̸"); + initialMap.put("\u2AB3", "⪳"); + initialMap.put("\u2AB4", "⪴"); + initialMap.put("\u2AB5", "⪵"); + initialMap.put("\u2AB6", "⪶"); + initialMap.put("\u2AB7", "⪷"); + initialMap.put("\u2AB8", "⪸"); + initialMap.put("\u2AB9", "⪹"); + initialMap.put("\u2ABA", "⪺"); + initialMap.put("\u2ABB", "⪻"); + initialMap.put("\u2ABC", "⪼"); + initialMap.put("\u2ABD", "⪽"); + initialMap.put("\u2ABE", "⪾"); + initialMap.put("\u2ABF", "⪿"); + initialMap.put("\u2AC0", "⫀"); + initialMap.put("\u2AC1", "⫁"); + initialMap.put("\u2AC2", "⫂"); + initialMap.put("\u2AC3", "⫃"); + initialMap.put("\u2AC4", "⫄"); + initialMap.put("\u2AC5", "⫅"); + initialMap.put("\u2AC5\u0338", "⫅̸"); + initialMap.put("\u2AC6", "⫆"); + initialMap.put("\u2AC6\u0338", "⫆̸"); + initialMap.put("\u2AC7", "⫇"); + initialMap.put("\u2AC8", "⫈"); + initialMap.put("\u2ACB", "⫋"); + initialMap.put("\u2ACB\uFE00", "⫋︀"); + initialMap.put("\u2ACC", "⫌"); + initialMap.put("\u2ACC\uFE00", "⫌︀"); + initialMap.put("\u2ACF", "⫏"); + initialMap.put("\u2AD0", "⫐"); + initialMap.put("\u2AD1", "⫑"); + initialMap.put("\u2AD2", "⫒"); + initialMap.put("\u2AD3", "⫓"); + initialMap.put("\u2AD4", "⫔"); + initialMap.put("\u2AD5", "⫕"); + initialMap.put("\u2AD6", "⫖"); + initialMap.put("\u2AD7", "⫗"); + initialMap.put("\u2AD8", "⫘"); + initialMap.put("\u2AD9", "⫙"); + initialMap.put("\u2ADA", "⫚"); + initialMap.put("\u2ADB", "⫛"); + initialMap.put("\u2AE4", "⫤"); + initialMap.put("\u2AE6", "⫦"); + initialMap.put("\u2AE7", "⫧"); + initialMap.put("\u2AE8", "⫨"); + initialMap.put("\u2AE9", "⫩"); + initialMap.put("\u2AEB", "⫫"); + initialMap.put("\u2AEC", "⫬"); + initialMap.put("\u2AED", "⫭"); + initialMap.put("\u2AEE", "⫮"); + initialMap.put("\u2AEF", "⫯"); + initialMap.put("\u2AF0", "⫰"); + initialMap.put("\u2AF1", "⫱"); + initialMap.put("\u2AF2", "⫲"); + initialMap.put("\u2AF3", "⫳"); + initialMap.put("\u2AFD", "⫽"); + initialMap.put("\u2AFD\u20E5", "⫽⃥"); + initialMap.put("\uD835\uDC9C", "𝒜"); + initialMap.put("\uD835\uDC9E", "𝒞"); + initialMap.put("\uD835\uDC9F", "𝒟"); + initialMap.put("\uD835\uDCA2", "𝒢"); + initialMap.put("\uD835\uDCA5", "𝒥"); + initialMap.put("\uD835\uDCA6", "𝒦"); + initialMap.put("\uD835\uDCA9", "𝒩"); + initialMap.put("\uD835\uDCAA", "𝒪"); + initialMap.put("\uD835\uDCAB", "𝒫"); + initialMap.put("\uD835\uDCAC", "𝒬"); + initialMap.put("\uD835\uDCAE", "𝒮"); + initialMap.put("\uD835\uDCAF", "𝒯"); + initialMap.put("\uD835\uDCB0", "𝒰"); + initialMap.put("\uD835\uDCB1", "𝒱"); + initialMap.put("\uD835\uDCB2", "𝒲"); + initialMap.put("\uD835\uDCB3", "𝒳"); + initialMap.put("\uD835\uDCB4", "𝒴"); + initialMap.put("\uD835\uDCB5", "𝒵"); + initialMap.put("\uD835\uDCB6", "𝒶"); + initialMap.put("\uD835\uDCB7", "𝒷"); + initialMap.put("\uD835\uDCB8", "𝒸"); + initialMap.put("\uD835\uDCB9", "𝒹"); + initialMap.put("\uD835\uDCBB", "𝒻"); + initialMap.put("\uD835\uDCBD", "𝒽"); + initialMap.put("\uD835\uDCBE", "𝒾"); + initialMap.put("\uD835\uDCBF", "𝒿"); + initialMap.put("\uD835\uDCC0", "𝓀"); + initialMap.put("\uD835\uDCC1", "𝓁"); + initialMap.put("\uD835\uDCC2", "𝓂"); + initialMap.put("\uD835\uDCC3", "𝓃"); + initialMap.put("\uD835\uDCC5", "𝓅"); + initialMap.put("\uD835\uDCC6", "𝓆"); + initialMap.put("\uD835\uDCC7", "𝓇"); + initialMap.put("\uD835\uDCC8", "𝓈"); + initialMap.put("\uD835\uDCC9", "𝓉"); + initialMap.put("\uD835\uDCCA", "𝓊"); + initialMap.put("\uD835\uDCCB", "𝓋"); + initialMap.put("\uD835\uDCCC", "𝓌"); + initialMap.put("\uD835\uDCCD", "𝓍"); + initialMap.put("\uD835\uDCCE", "𝓎"); + initialMap.put("\uD835\uDCCF", "𝓏"); + initialMap.put("\uD835\uDD04", "𝔄"); + initialMap.put("\uD835\uDD05", "𝔅"); + initialMap.put("\uD835\uDD07", "𝔇"); + initialMap.put("\uD835\uDD08", "𝔈"); + initialMap.put("\uD835\uDD09", "𝔉"); + initialMap.put("\uD835\uDD0A", "𝔊"); + initialMap.put("\uD835\uDD0D", "𝔍"); + initialMap.put("\uD835\uDD0E", "𝔎"); + initialMap.put("\uD835\uDD0F", "𝔏"); + initialMap.put("\uD835\uDD10", "𝔐"); + initialMap.put("\uD835\uDD11", "𝔑"); + initialMap.put("\uD835\uDD12", "𝔒"); + initialMap.put("\uD835\uDD13", "𝔓"); + initialMap.put("\uD835\uDD14", "𝔔"); + initialMap.put("\uD835\uDD16", "𝔖"); + initialMap.put("\uD835\uDD17", "𝔗"); + initialMap.put("\uD835\uDD18", "𝔘"); + initialMap.put("\uD835\uDD19", "𝔙"); + initialMap.put("\uD835\uDD1A", "𝔚"); + initialMap.put("\uD835\uDD1B", "𝔛"); + initialMap.put("\uD835\uDD1C", "𝔜"); + initialMap.put("\uD835\uDD1E", "𝔞"); + initialMap.put("\uD835\uDD1F", "𝔟"); + initialMap.put("\uD835\uDD20", "𝔠"); + initialMap.put("\uD835\uDD21", "𝔡"); + initialMap.put("\uD835\uDD22", "𝔢"); + initialMap.put("\uD835\uDD23", "𝔣"); + initialMap.put("\uD835\uDD24", "𝔤"); + initialMap.put("\uD835\uDD25", "𝔥"); + initialMap.put("\uD835\uDD26", "𝔦"); + initialMap.put("\uD835\uDD27", "𝔧"); + initialMap.put("\uD835\uDD28", "𝔨"); + initialMap.put("\uD835\uDD29", "𝔩"); + initialMap.put("\uD835\uDD2A", "𝔪"); + initialMap.put("\uD835\uDD2B", "𝔫"); + initialMap.put("\uD835\uDD2C", "𝔬"); + initialMap.put("\uD835\uDD2D", "𝔭"); + initialMap.put("\uD835\uDD2E", "𝔮"); + initialMap.put("\uD835\uDD2F", "𝔯"); + initialMap.put("\uD835\uDD30", "𝔰"); + initialMap.put("\uD835\uDD31", "𝔱"); + initialMap.put("\uD835\uDD32", "𝔲"); + initialMap.put("\uD835\uDD33", "𝔳"); + initialMap.put("\uD835\uDD34", "𝔴"); + initialMap.put("\uD835\uDD35", "𝔵"); + initialMap.put("\uD835\uDD36", "𝔶"); + initialMap.put("\uD835\uDD37", "𝔷"); + initialMap.put("\uD835\uDD38", "𝔸"); + initialMap.put("\uD835\uDD39", "𝔹"); + initialMap.put("\uD835\uDD3B", "𝔻"); + initialMap.put("\uD835\uDD3C", "𝔼"); + initialMap.put("\uD835\uDD3D", "𝔽"); + initialMap.put("\uD835\uDD3E", "𝔾"); + initialMap.put("\uD835\uDD40", "𝕀"); + initialMap.put("\uD835\uDD41", "𝕁"); + initialMap.put("\uD835\uDD42", "𝕂"); + initialMap.put("\uD835\uDD43", "𝕃"); + initialMap.put("\uD835\uDD44", "𝕄"); + initialMap.put("\uD835\uDD46", "𝕆"); + initialMap.put("\uD835\uDD4A", "𝕊"); + initialMap.put("\uD835\uDD4B", "𝕋"); + initialMap.put("\uD835\uDD4C", "𝕌"); + initialMap.put("\uD835\uDD4D", "𝕍"); + initialMap.put("\uD835\uDD4E", "𝕎"); + initialMap.put("\uD835\uDD4F", "𝕏"); + initialMap.put("\uD835\uDD50", "𝕐"); + initialMap.put("\uD835\uDD52", "𝕒"); + initialMap.put("\uD835\uDD53", "𝕓"); + initialMap.put("\uD835\uDD54", "𝕔"); + initialMap.put("\uD835\uDD55", "𝕕"); + initialMap.put("\uD835\uDD56", "𝕖"); + initialMap.put("\uD835\uDD57", "𝕗"); + initialMap.put("\uD835\uDD58", "𝕘"); + initialMap.put("\uD835\uDD59", "𝕙"); + initialMap.put("\uD835\uDD5A", "𝕚"); + initialMap.put("\uD835\uDD5B", "𝕛"); + initialMap.put("\uD835\uDD5C", "𝕜"); + initialMap.put("\uD835\uDD5D", "𝕝"); + initialMap.put("\uD835\uDD5E", "𝕞"); + initialMap.put("\uD835\uDD5F", "𝕟"); + initialMap.put("\uD835\uDD60", "𝕠"); + initialMap.put("\uD835\uDD61", "𝕡"); + initialMap.put("\uD835\uDD62", "𝕢"); + initialMap.put("\uD835\uDD63", "𝕣"); + initialMap.put("\uD835\uDD64", "𝕤"); + initialMap.put("\uD835\uDD65", "𝕥"); + initialMap.put("\uD835\uDD66", "𝕦"); + initialMap.put("\uD835\uDD67", "𝕧"); + initialMap.put("\uD835\uDD68", "𝕨"); + initialMap.put("\uD835\uDD69", "𝕩"); + initialMap.put("\uD835\uDD6A", "𝕪"); + initialMap.put("\uD835\uDD6B", "𝕫"); + initialMap.put("\uFB00", "ff"); + initialMap.put("\uFB01", "fi"); + initialMap.put("\uFB02", "fl"); + initialMap.put("\uFB03", "ffi"); + initialMap.put("\uFB04", "ffl"); + HTML50_EXTENDED_ESCAPE = Collections.unmodifiableMap(initialMap); + } + + /** + * Reverse of {@link #HTML50_EXTENDED_ESCAPE} for unescaping purposes. + * Additional character entities, synonymous with ones found in {@link #BASIC_ESCAPE}, + * {@link #ISO8859_1_ESCAPE}, {@link #HTML40_EXTENDED_ESCAPE} and + * {@link #HTML50_EXTENDED_ESCAPE} are added. + */ + public static final Map HTML50_EXTENDED_UNESCAPE; + + static { + final Map initialMap = invert(HTML50_EXTENDED_ESCAPE); + initialMap.put("*", "\u002A"); + initialMap.put("[", "\u005B"); + initialMap.put("]", "\u005D"); + initialMap.put("_", "\u005F"); + initialMap.put("`", "\u0060"); + initialMap.put("{", "\u007B"); + initialMap.put("|", "\u007C"); + initialMap.put("|", "\u007C"); + initialMap.put("}", "\u007D"); + initialMap.put(" ", "\u00A0"); + initialMap.put("¨", "\u00A8"); + initialMap.put("¨", "\u00A8"); + initialMap.put("¨", "\u00A8"); + initialMap.put("©", "\u00A9"); + initialMap.put("®", "\u00AE"); + initialMap.put("®", "\u00AE"); + initialMap.put("¯", "\u00AF"); + initialMap.put("±", "\u00B1"); + initialMap.put("±", "\u00B1"); + initialMap.put("´", "\u00B4"); + initialMap.put("·", "\u00B7"); + initialMap.put("·", "\u00B7"); + initialMap.put("¸", "\u00B8"); + initialMap.put("½", "\u00BD"); + initialMap.put("Å", "\u00C5"); + initialMap.put("÷", "\u00F7"); + initialMap.put("ı", "\u0131"); + initialMap.put("ˇ", "\u02C7"); + initialMap.put("˘", "\u02D8"); + initialMap.put("˙", "\u02D9"); + initialMap.put("˜", "\u02DC"); + initialMap.put("˝", "\u02DD"); + initialMap.put("Ω", "\u03A9"); + initialMap.put("ε", "\u03B5"); + initialMap.put("ς", "\u03C2"); + initialMap.put("ς", "\u03C2"); + initialMap.put("υ", "\u03C5"); + initialMap.put("ϑ", "\u03D1"); + initialMap.put("ϑ", "\u03D1"); + initialMap.put("ϒ", "\u03D2"); + initialMap.put("ϕ", "\u03D5"); + initialMap.put("ϕ", "\u03D5"); + initialMap.put("ϖ", "\u03D6"); + initialMap.put("ϝ", "\u03DD"); + initialMap.put("ϰ", "\u03F0"); + initialMap.put("ϱ", "\u03F1"); + initialMap.put("ϵ", "\u03F5"); + initialMap.put("ϵ", "\u03F5"); + initialMap.put("϶", "\u03F6"); + initialMap.put(" ", "\u2009"); + initialMap.put(" ", "\u200A"); + initialMap.put("​", "\u200B"); + initialMap.put("​", "\u200B"); + initialMap.put("​", "\u200B"); + initialMap.put("​", "\u200B"); + initialMap.put("‐", "\u2010"); + initialMap.put("‖", "\u2016"); + initialMap.put("‘", "\u2018"); + initialMap.put("’", "\u2019"); + initialMap.put("’", "\u2019"); + initialMap.put("‚", "\u201A"); + initialMap.put("“", "\u201C"); + initialMap.put("”", "\u201D"); + initialMap.put("”", "\u201D"); + initialMap.put("„", "\u201E"); + initialMap.put("‡", "\u2021"); + initialMap.put("•", "\u2022"); + initialMap.put("…", "\u2026"); + initialMap.put("‵", "\u2035"); + initialMap.put("‾", "\u203E"); + initialMap.put("⁡", "\u2061"); + initialMap.put("⁢", "\u2062"); + initialMap.put("⁣", "\u2063"); + initialMap.put("⃛", "\u20DB"); + initialMap.put("ℂ", "\u2102"); + initialMap.put("ℋ", "\u210B"); + initialMap.put("ℋ", "\u210B"); + initialMap.put("ℌ", "\u210C"); + initialMap.put("ℍ", "\u210D"); + initialMap.put("ℏ", "\u210F"); + initialMap.put("ℏ", "\u210F"); + initialMap.put("ℏ", "\u210F"); + initialMap.put("ℐ", "\u2110"); + initialMap.put("ℑ", "\u2111"); + initialMap.put("ℑ", "\u2111"); + initialMap.put("ℑ", "\u2111"); + initialMap.put("ℒ", "\u2112"); + initialMap.put("ℒ", "\u2112"); + initialMap.put("ℕ", "\u2115"); + initialMap.put("℘", "\u2118"); + initialMap.put("ℙ", "\u2119"); + initialMap.put("ℚ", "\u211A"); + initialMap.put("ℛ", "\u211B"); + initialMap.put("ℜ", "\u211C"); + initialMap.put("ℜ", "\u211C"); + initialMap.put("ℜ", "\u211C"); + initialMap.put("ℝ", "\u211D"); + initialMap.put("™", "\u2122"); + initialMap.put("ℤ", "\u2124"); + initialMap.put("ℨ", "\u2128"); + initialMap.put("ℬ", "\u212C"); + initialMap.put("ℬ", "\u212C"); + initialMap.put("ℭ", "\u212D"); + initialMap.put("ℰ", "\u2130"); + initialMap.put("ℱ", "\u2131"); + initialMap.put("ℳ", "\u2133"); + initialMap.put("ℳ", "\u2133"); + initialMap.put("ℴ", "\u2134"); + initialMap.put("ℴ", "\u2134"); + initialMap.put("ℵ", "\u2135"); + initialMap.put("ⅅ", "\u2145"); + initialMap.put("ⅆ", "\u2146"); + initialMap.put("ⅇ", "\u2147"); + initialMap.put("ⅇ", "\u2147"); + initialMap.put("ⅈ", "\u2148"); + initialMap.put("←", "\u2190"); + initialMap.put("←", "\u2190"); + initialMap.put("←", "\u2190"); + initialMap.put("←", "\u2190"); + initialMap.put("↑", "\u2191"); + initialMap.put("↑", "\u2191"); + initialMap.put("↑", "\u2191"); + initialMap.put("→", "\u2192"); + initialMap.put("→", "\u2192"); + initialMap.put("→", "\u2192"); + initialMap.put("→", "\u2192"); + initialMap.put("↓", "\u2193"); + initialMap.put("↓", "\u2193"); + initialMap.put("↓", "\u2193"); + initialMap.put("↔", "\u2194"); + initialMap.put("↔", "\u2194"); + initialMap.put("↕", "\u2195"); + initialMap.put("↕", "\u2195"); + initialMap.put("↖", "\u2196"); + initialMap.put("↖", "\u2196"); + initialMap.put("↗", "\u2197"); + initialMap.put("↗", "\u2197"); + initialMap.put("↘", "\u2198"); + initialMap.put("↘", "\u2198"); + initialMap.put("↙", "\u2199"); + initialMap.put("↙", "\u2199"); + initialMap.put("↚", "\u219A"); + initialMap.put("↛", "\u219B"); + initialMap.put("↝", "\u219D"); + initialMap.put("↞", "\u219E"); + initialMap.put("↠", "\u21A0"); + initialMap.put("↢", "\u21A2"); + initialMap.put("↣", "\u21A3"); + initialMap.put("↤", "\u21A4"); + initialMap.put("↥", "\u21A5"); + initialMap.put("↦", "\u21A6"); + initialMap.put("↦", "\u21A6"); + initialMap.put("↧", "\u21A7"); + initialMap.put("↩", "\u21A9"); + initialMap.put("↪", "\u21AA"); + initialMap.put("↫", "\u21AB"); + initialMap.put("↬", "\u21AC"); + initialMap.put("↭", "\u21AD"); + initialMap.put("↮", "\u21AE"); + initialMap.put("↰", "\u21B0"); + initialMap.put("↱", "\u21B1"); + initialMap.put("↶", "\u21B6"); + initialMap.put("↷", "\u21B7"); + initialMap.put("↺", "\u21BA"); + initialMap.put("↻", "\u21BB"); + initialMap.put("↼", "\u21BC"); + initialMap.put("↼", "\u21BC"); + initialMap.put("↽", "\u21BD"); + initialMap.put("↽", "\u21BD"); + initialMap.put("↾", "\u21BE"); + initialMap.put("↾", "\u21BE"); + initialMap.put("↿", "\u21BF"); + initialMap.put("↿", "\u21BF"); + initialMap.put("⇀", "\u21C0"); + initialMap.put("⇀", "\u21C0"); + initialMap.put("⇁", "\u21C1"); + initialMap.put("⇁", "\u21C1"); + initialMap.put("⇂", "\u21C2"); + initialMap.put("⇂", "\u21C2"); + initialMap.put("⇃", "\u21C3"); + initialMap.put("⇃", "\u21C3"); + initialMap.put("⇄", "\u21C4"); + initialMap.put("⇄", "\u21C4"); + initialMap.put("⇅", "\u21C5"); + initialMap.put("⇆", "\u21C6"); + initialMap.put("⇆", "\u21C6"); + initialMap.put("⇇", "\u21C7"); + initialMap.put("⇈", "\u21C8"); + initialMap.put("⇉", "\u21C9"); + initialMap.put("⇊", "\u21CA"); + initialMap.put("⇋", "\u21CB"); + initialMap.put("⇋", "\u21CB"); + initialMap.put("⇌", "\u21CC"); + initialMap.put("⇌", "\u21CC"); + initialMap.put("⇍", "\u21CD"); + initialMap.put("⇎", "\u21CE"); + initialMap.put("⇏", "\u21CF"); + initialMap.put("⇐", "\u21D0"); + initialMap.put("⇐", "\u21D0"); + initialMap.put("⇑", "\u21D1"); + initialMap.put("⇑", "\u21D1"); + initialMap.put("⇒", "\u21D2"); + initialMap.put("⇒", "\u21D2"); + initialMap.put("⇒", "\u21D2"); + initialMap.put("⇓", "\u21D3"); + initialMap.put("⇓", "\u21D3"); + initialMap.put("⇔", "\u21D4"); + initialMap.put("⇔", "\u21D4"); + initialMap.put("⇔", "\u21D4"); + initialMap.put("⇕", "\u21D5"); + initialMap.put("⇕", "\u21D5"); + initialMap.put("⇚", "\u21DA"); + initialMap.put("⇛", "\u21DB"); + initialMap.put("⇤", "\u21E4"); + initialMap.put("⇥", "\u21E5"); + initialMap.put("⇵", "\u21F5"); + initialMap.put("∀", "\u2200"); + initialMap.put("∁", "\u2201"); + initialMap.put("∂", "\u2202"); + initialMap.put("∃", "\u2203"); + initialMap.put("∄", "\u2204"); + initialMap.put("∄", "\u2204"); + initialMap.put("∅", "\u2205"); + initialMap.put("∅", "\u2205"); + initialMap.put("∅", "\u2205"); + initialMap.put("∇", "\u2207"); + initialMap.put("∈", "\u2208"); + initialMap.put("∈", "\u2208"); + initialMap.put("∈", "\u2208"); + initialMap.put("∉", "\u2209"); + initialMap.put("∉", "\u2209"); + initialMap.put("∋", "\u220B"); + initialMap.put("∋", "\u220B"); + initialMap.put("∋", "\u220B"); + initialMap.put("∌", "\u220C"); + initialMap.put("∌", "\u220C"); + initialMap.put("∏", "\u220F"); + initialMap.put("∐", "\u2210"); + initialMap.put("∑", "\u2211"); + initialMap.put("∓", "\u2213"); + initialMap.put("∓", "\u2213"); + initialMap.put("∔", "\u2214"); + initialMap.put("∖", "\u2216"); + initialMap.put("∖", "\u2216"); + initialMap.put("∖", "\u2216"); + initialMap.put("∖", "\u2216"); + initialMap.put("∘", "\u2218"); + initialMap.put("√", "\u221A"); + initialMap.put("∝", "\u221D"); + initialMap.put("∝", "\u221D"); + initialMap.put("∝", "\u221D"); + initialMap.put("∝", "\u221D"); + initialMap.put("∠", "\u2220"); + initialMap.put("∡", "\u2221"); + initialMap.put("∣", "\u2223"); + initialMap.put("∣", "\u2223"); + initialMap.put("∣", "\u2223"); + initialMap.put("∤", "\u2224"); + initialMap.put("∤", "\u2224"); + initialMap.put("∤", "\u2224"); + initialMap.put("∥", "\u2225"); + initialMap.put("∥", "\u2225"); + initialMap.put("∥", "\u2225"); + initialMap.put("∥", "\u2225"); + initialMap.put("∦", "\u2226"); + initialMap.put("∦", "\u2226"); + initialMap.put("∦", "\u2226"); + initialMap.put("∦", "\u2226"); + initialMap.put("∧", "\u2227"); + initialMap.put("∨", "\u2228"); + initialMap.put("∫", "\u222B"); + initialMap.put("∭", "\u222D"); + initialMap.put("∮", "\u222E"); + initialMap.put("∮", "\u222E"); + initialMap.put("∯", "\u222F"); + initialMap.put("∲", "\u2232"); + initialMap.put("∳", "\u2233"); + initialMap.put("∴", "\u2234"); + initialMap.put("∴", "\u2234"); + initialMap.put("∵", "\u2235"); + initialMap.put("∵", "\u2235"); + initialMap.put("∷", "\u2237"); + initialMap.put("∸", "\u2238"); + initialMap.put("∼", "\u223C"); + initialMap.put("∼", "\u223C"); + initialMap.put("∼", "\u223C"); + initialMap.put("∽", "\u223D"); + initialMap.put("∾", "\u223E"); + initialMap.put("≀", "\u2240"); + initialMap.put("≀", "\u2240"); + initialMap.put("≁", "\u2241"); + initialMap.put("≂", "\u2242"); + initialMap.put("≂", "\u2242"); + initialMap.put("≂̸", "\u2242\u0338"); + initialMap.put("≃", "\u2243"); + initialMap.put("≃", "\u2243"); + initialMap.put("≄", "\u2244"); + initialMap.put("≄", "\u2244"); + initialMap.put("≅", "\u2245"); + initialMap.put("≇", "\u2247"); + initialMap.put("≈", "\u2248"); + initialMap.put("≈", "\u2248"); + initialMap.put("≈", "\u2248"); + initialMap.put("≈", "\u2248"); + initialMap.put("≈", "\u2248"); + initialMap.put("≉", "\u2249"); + initialMap.put("≉", "\u2249"); + initialMap.put("≊", "\u224A"); + initialMap.put("≌", "\u224C"); + initialMap.put("≍", "\u224D"); + initialMap.put("≎", "\u224E"); + initialMap.put("≎", "\u224E"); + initialMap.put("≎̸", "\u224E\u0338"); + initialMap.put("≏", "\u224F"); + initialMap.put("≏", "\u224F"); + initialMap.put("≏̸", "\u224F\u0338"); + initialMap.put("≐", "\u2250"); + initialMap.put("≐", "\u2250"); + initialMap.put("≑", "\u2251"); + initialMap.put("≒", "\u2252"); + initialMap.put("≓", "\u2253"); + initialMap.put("≔", "\u2254"); + initialMap.put("≔", "\u2254"); + initialMap.put("≕", "\u2255"); + initialMap.put("≖", "\u2256"); + initialMap.put("≗", "\u2257"); + initialMap.put("≜", "\u225C"); + initialMap.put("≟", "\u225F"); + initialMap.put("≠", "\u2260"); + initialMap.put("≡", "\u2261"); + initialMap.put("≢", "\u2262"); + initialMap.put("≤", "\u2264"); + initialMap.put("≥", "\u2265"); + initialMap.put("≥", "\u2265"); + initialMap.put("≦", "\u2266"); + initialMap.put("≦", "\u2266"); + initialMap.put("≦̸", "\u2266\u0338"); + initialMap.put("≧", "\u2267"); + initialMap.put("≧", "\u2267"); + initialMap.put("≧̸", "\u2267\u0338"); + initialMap.put("≧̸", "\u2267\u0338"); + initialMap.put("≨", "\u2268"); + initialMap.put("≨︀", "\u2268\uFE00"); + initialMap.put("≩", "\u2269"); + initialMap.put("≩︀", "\u2269\uFE00"); + initialMap.put("≪", "\u226A"); + initialMap.put("≪", "\u226A"); + initialMap.put("≪̸", "\u226A\u0338"); + initialMap.put("≫", "\u226B"); + initialMap.put("≫", "\u226B"); + initialMap.put("≫̸", "\u226B\u0338"); + initialMap.put("≬", "\u226C"); + initialMap.put("≮", "\u226E"); + initialMap.put("≮", "\u226E"); + initialMap.put("≯", "\u226F"); + initialMap.put("≯", "\u226F"); + initialMap.put("≰", "\u2270"); + initialMap.put("≰", "\u2270"); + initialMap.put("≱", "\u2271"); + initialMap.put("≱", "\u2271"); + initialMap.put("≲", "\u2272"); + initialMap.put("≲", "\u2272"); + initialMap.put("≳", "\u2273"); + initialMap.put("≳", "\u2273"); + initialMap.put("≴", "\u2274"); + initialMap.put("≵", "\u2275"); + initialMap.put("≶", "\u2276"); + initialMap.put("≶", "\u2276"); + initialMap.put("≷", "\u2277"); + initialMap.put("≷", "\u2277"); + initialMap.put("≸", "\u2278"); + initialMap.put("≹", "\u2279"); + initialMap.put("≺", "\u227A"); + initialMap.put("≺", "\u227A"); + initialMap.put("≻", "\u227B"); + initialMap.put("≻", "\u227B"); + initialMap.put("≼", "\u227C"); + initialMap.put("≼", "\u227C"); + initialMap.put("≽", "\u227D"); + initialMap.put("≽", "\u227D"); + initialMap.put("≾", "\u227E"); + initialMap.put("≾", "\u227E"); + initialMap.put("≿", "\u227F"); + initialMap.put("≿", "\u227F"); + initialMap.put("⊀", "\u2280"); + initialMap.put("⊀", "\u2280"); + initialMap.put("⊁", "\u2281"); + initialMap.put("⊁", "\u2281"); + initialMap.put("⊂", "\u2282"); + initialMap.put("⊂⃒", "\u2282\u20D2"); + initialMap.put("⊂⃒", "\u2282\u20D2"); + initialMap.put("⊃", "\u2283"); + initialMap.put("⊃", "\u2283"); + initialMap.put("⊃⃒", "\u2283\u20D2"); + initialMap.put("⊃⃒", "\u2283\u20D2"); + initialMap.put("⊆", "\u2286"); + initialMap.put("⊆", "\u2286"); + initialMap.put("⊇", "\u2287"); + initialMap.put("⊇", "\u2287"); + initialMap.put("⊈", "\u2288"); + initialMap.put("⊈", "\u2288"); + initialMap.put("⊉", "\u2289"); + initialMap.put("⊉", "\u2289"); + initialMap.put("⊊", "\u228A"); + initialMap.put("⊊︀", "\u228A\uFE00"); + initialMap.put("⊋", "\u228B"); + initialMap.put("⊋︀", "\u228B\uFE00"); + initialMap.put("⊎", "\u228E"); + initialMap.put("⊏", "\u228F"); + initialMap.put("⊏", "\u228F"); + initialMap.put("⊐", "\u2290"); + initialMap.put("⊐", "\u2290"); + initialMap.put("⊑", "\u2291"); + initialMap.put("⊑", "\u2291"); + initialMap.put("⊒", "\u2292"); + initialMap.put("⊒", "\u2292"); + initialMap.put("⊓", "\u2293"); + initialMap.put("⊔", "\u2294"); + initialMap.put("⊕", "\u2295"); + initialMap.put("⊖", "\u2296"); + initialMap.put("⊗", "\u2297"); + initialMap.put("⊙", "\u2299"); + initialMap.put("⊚", "\u229A"); + initialMap.put("⊛", "\u229B"); + initialMap.put("⊝", "\u229D"); + initialMap.put("⊞", "\u229E"); + initialMap.put("⊟", "\u229F"); + initialMap.put("⊠", "\u22A0"); + initialMap.put("⊡", "\u22A1"); + initialMap.put("⊢", "\u22A2"); + initialMap.put("⊣", "\u22A3"); + initialMap.put("⊤", "\u22A4"); + initialMap.put("⊥", "\u22A5"); + initialMap.put("⊥", "\u22A5"); + initialMap.put("⊥", "\u22A5"); + initialMap.put("⊨", "\u22A8"); + initialMap.put("⊲", "\u22B2"); + initialMap.put("⊲", "\u22B2"); + initialMap.put("⊳", "\u22B3"); + initialMap.put("⊳", "\u22B3"); + initialMap.put("⊴", "\u22B4"); + initialMap.put("⊴", "\u22B4"); + initialMap.put("⊵", "\u22B5"); + initialMap.put("⊵", "\u22B5"); + initialMap.put("⊸", "\u22B8"); + initialMap.put("⊺", "\u22BA"); + initialMap.put("⋀", "\u22C0"); + initialMap.put("⋀", "\u22C0"); + initialMap.put("⋁", "\u22C1"); + initialMap.put("⋁", "\u22C1"); + initialMap.put("⋂", "\u22C2"); + initialMap.put("⋂", "\u22C2"); + initialMap.put("⋃", "\u22C3"); + initialMap.put("⋃", "\u22C3"); + initialMap.put("⋄", "\u22C4"); + initialMap.put("⋄", "\u22C4"); + initialMap.put("⋆", "\u22C6"); + initialMap.put("⋇", "\u22C7"); + initialMap.put("⋋", "\u22CB"); + initialMap.put("⋌", "\u22CC"); + initialMap.put("⋍", "\u22CD"); + initialMap.put("⋎", "\u22CE"); + initialMap.put("⋏", "\u22CF"); + initialMap.put("⋐", "\u22D0"); + initialMap.put("⋑", "\u22D1"); + initialMap.put("⋔", "\u22D4"); + initialMap.put("⋖", "\u22D6"); + initialMap.put("⋗", "\u22D7"); + initialMap.put("⋙", "\u22D9"); + initialMap.put("⋚", "\u22DA"); + initialMap.put("⋚", "\u22DA"); + initialMap.put("⋛", "\u22DB"); + initialMap.put("⋛", "\u22DB"); + initialMap.put("⋞", "\u22DE"); + initialMap.put("⋟", "\u22DF"); + initialMap.put("⋠", "\u22E0"); + initialMap.put("⋡", "\u22E1"); + initialMap.put("⋢", "\u22E2"); + initialMap.put("⋣", "\u22E3"); + initialMap.put("⋨", "\u22E8"); + initialMap.put("⋩", "\u22E9"); + initialMap.put("⋪", "\u22EA"); + initialMap.put("⋪", "\u22EA"); + initialMap.put("⋫", "\u22EB"); + initialMap.put("⋫", "\u22EB"); + initialMap.put("⋬", "\u22EC"); + initialMap.put("⋬", "\u22EC"); + initialMap.put("⋭", "\u22ED"); + initialMap.put("⋭", "\u22ED"); + initialMap.put("⌅", "\u2305"); + initialMap.put("⌆", "\u2306"); + initialMap.put("⌈", "\u2308"); + initialMap.put("⌉", "\u2309"); + initialMap.put("⌊", "\u230A"); + initialMap.put("⌋", "\u230B"); + initialMap.put("⌜", "\u231C"); + initialMap.put("⌝", "\u231D"); + initialMap.put("⌞", "\u231E"); + initialMap.put("⌟", "\u231F"); + initialMap.put("⌢", "\u2322"); + initialMap.put("⌣", "\u2323"); + initialMap.put("⎰", "\u23B0"); + initialMap.put("⎱", "\u23B1"); + initialMap.put("⎴", "\u23B4"); + initialMap.put("⎵", "\u23B5"); + initialMap.put("Ⓢ", "\u24C8"); + initialMap.put("─", "\u2500"); + initialMap.put("□", "\u25A1"); + initialMap.put("□", "\u25A1"); + initialMap.put("▪", "\u25AA"); + initialMap.put("▪", "\u25AA"); + initialMap.put("▪", "\u25AA"); + initialMap.put("△", "\u25B3"); + initialMap.put("▴", "\u25B4"); + initialMap.put("▵", "\u25B5"); + initialMap.put("▸", "\u25B8"); + initialMap.put("▹", "\u25B9"); + initialMap.put("▽", "\u25BD"); + initialMap.put("▾", "\u25BE"); + initialMap.put("▿", "\u25BF"); + initialMap.put("◂", "\u25C2"); + initialMap.put("◃", "\u25C3"); + initialMap.put("◊", "\u25CA"); + initialMap.put("◯", "\u25EF"); + initialMap.put("★", "\u2605"); + initialMap.put("♠", "\u2660"); + initialMap.put("♣", "\u2663"); + initialMap.put("♥", "\u2665"); + initialMap.put("♦", "\u2666"); + initialMap.put("♮", "\u266E"); + initialMap.put("✓", "\u2713"); + initialMap.put("✠", "\u2720"); + initialMap.put("⟦", "\u27E6"); + initialMap.put("⟧", "\u27E7"); + initialMap.put("⟨", "\u27E8"); + initialMap.put("⟨", "\u27E8"); + initialMap.put("⟩", "\u27E9"); + initialMap.put("⟩", "\u27E9"); + initialMap.put("⟵", "\u27F5"); + initialMap.put("⟵", "\u27F5"); + initialMap.put("⟶", "\u27F6"); + initialMap.put("⟶", "\u27F6"); + initialMap.put("⟷", "\u27F7"); + initialMap.put("⟷", "\u27F7"); + initialMap.put("⟸", "\u27F8"); + initialMap.put("⟸", "\u27F8"); + initialMap.put("⟹", "\u27F9"); + initialMap.put("⟹", "\u27F9"); + initialMap.put("⟺", "\u27FA"); + initialMap.put("⟺", "\u27FA"); + initialMap.put("⟼", "\u27FC"); + initialMap.put("⤍", "\u290D"); + initialMap.put("⤏", "\u290F"); + initialMap.put("⤐", "\u2910"); + initialMap.put("⤥", "\u2925"); + initialMap.put("⤦", "\u2926"); + initialMap.put("⤨", "\u2928"); + initialMap.put("⤩", "\u2929"); + initialMap.put("⥮", "\u296E"); + initialMap.put("⥯", "\u296F"); + initialMap.put("⧫", "\u29EB"); + initialMap.put("⨀", "\u2A00"); + initialMap.put("⨁", "\u2A01"); + initialMap.put("⨂", "\u2A02"); + initialMap.put("⨄", "\u2A04"); + initialMap.put("⨆", "\u2A06"); + initialMap.put("⨌", "\u2A0C"); + initialMap.put("⨼", "\u2A3C"); + initialMap.put("⩷", "\u2A77"); + initialMap.put("⩽", "\u2A7D"); + initialMap.put("⩽", "\u2A7D"); + initialMap.put("⩽̸", "\u2A7D\u0338"); + initialMap.put("⩽̸", "\u2A7D\u0338"); + initialMap.put("⩾", "\u2A7E"); + initialMap.put("⩾", "\u2A7E"); + initialMap.put("⩾̸", "\u2A7E\u0338"); + initialMap.put("⩾̸", "\u2A7E\u0338"); + initialMap.put("⪅", "\u2A85"); + initialMap.put("⪆", "\u2A86"); + initialMap.put("⪇", "\u2A87"); + initialMap.put("⪈", "\u2A88"); + initialMap.put("⪉", "\u2A89"); + initialMap.put("⪊", "\u2A8A"); + initialMap.put("⪋", "\u2A8B"); + initialMap.put("⪌", "\u2A8C"); + initialMap.put("⪕", "\u2A95"); + initialMap.put("⪖", "\u2A96"); + initialMap.put("⪯", "\u2AAF"); + initialMap.put("⪯", "\u2AAF"); + initialMap.put("⪯̸", "\u2AAF\u0338"); + initialMap.put("⪯̸", "\u2AAF\u0338"); + initialMap.put("⪰", "\u2AB0"); + initialMap.put("⪰", "\u2AB0"); + initialMap.put("⪰̸", "\u2AB0\u0338"); + initialMap.put("⪰̸", "\u2AB0\u0338"); + initialMap.put("⪵", "\u2AB5"); + initialMap.put("⪶", "\u2AB6"); + initialMap.put("⪷", "\u2AB7"); + initialMap.put("⪸", "\u2AB8"); + initialMap.put("⪹", "\u2AB9"); + initialMap.put("⪺", "\u2ABA"); + initialMap.put("⫅", "\u2AC5"); + initialMap.put("⫅̸", "\u2AC5\u0338"); + initialMap.put("⫆", "\u2AC6"); + initialMap.put("⫆̸", "\u2AC6\u0338"); + initialMap.put("⫋", "\u2ACB"); + initialMap.put("⫋︀", "\u2ACB\uFE00"); + initialMap.put("⫌", "\u2ACC"); + initialMap.put("⫌︀", "\u2ACC\uFE00"); + initialMap.put("⫤", "\u2AE4"); + HTML50_EXTENDED_UNESCAPE = Collections.unmodifiableMap(initialMap); + } + + /** + * A Map<CharSequence, CharSequence> to unescape the character + * entities without semicolon (&amp, &quot, &nsbp...). + * + * Note that these character entities are not part of the HTML 5.0 standard + * but are nonetheless understood by virtually every modern browsers. + */ + public static final Map NO_SEMICOLON_UNESCAPE; + + static { + final Map initialMap = new HashMap<>(); + initialMap.put(""", "\u005C\u0022"); + initialMap.put(""", "\u005C\u0022"); + initialMap.put("&", "\u0026"); + initialMap.put("&", "\u0026"); + initialMap.put("<", "\u003C"); + initialMap.put("<", "\u003C"); + initialMap.put(">", "\u003E"); + initialMap.put(">", "\u003E"); + initialMap.put(" ", "\u00A0"); + initialMap.put("¡", "\u00A1"); + initialMap.put("¢", "\u00A2"); + initialMap.put("£", "\u00A3"); + initialMap.put("¤", "\u00A4"); + initialMap.put("¥", "\u00A5"); + initialMap.put("¦", "\u00A6"); + initialMap.put("§", "\u00A7"); + initialMap.put("¨", "\u00A8"); + initialMap.put("©", "\u00A9"); + initialMap.put("©", "\u00A9"); + initialMap.put("ª", "\u00AA"); + initialMap.put("«", "\u00AB"); + initialMap.put("¬", "\u00AC"); + initialMap.put("­", "\u00AD"); + initialMap.put("®", "\u00AE"); + initialMap.put("®", "\u00AE"); + initialMap.put("¯", "\u00AF"); + initialMap.put("°", "\u00B0"); + initialMap.put("±", "\u00B1"); + initialMap.put("²", "\u00B2"); + initialMap.put("³", "\u00B3"); + initialMap.put("´", "\u00B4"); + initialMap.put("µ", "\u00B5"); + initialMap.put("¶", "\u00B6"); + initialMap.put("·", "\u00B7"); + initialMap.put("¸", "\u00B8"); + initialMap.put("¹", "\u00B9"); + initialMap.put("º", "\u00BA"); + initialMap.put("»", "\u00BB"); + initialMap.put("¼", "\u00BC"); + initialMap.put("½", "\u00BD"); + initialMap.put("¾", "\u00BE"); + initialMap.put("¿", "\u00BF"); + initialMap.put("À", "\u00C0"); + initialMap.put("Á", "\u00C1"); + initialMap.put("Â", "\u00C2"); + initialMap.put("Ã", "\u00C3"); + initialMap.put("Ä", "\u00C4"); + initialMap.put("Å", "\u00C5"); + initialMap.put("Æ", "\u00C6"); + initialMap.put("Ç", "\u00C7"); + initialMap.put("È", "\u00C8"); + initialMap.put("É", "\u00C9"); + initialMap.put("Ê", "\u00CA"); + initialMap.put("Ë", "\u00CB"); + initialMap.put("Ì", "\u00CC"); + initialMap.put("Í", "\u00CD"); + initialMap.put("Î", "\u00CE"); + initialMap.put("Ï", "\u00CF"); + initialMap.put("Ð", "\u00D0"); + initialMap.put("Ñ", "\u00D1"); + initialMap.put("Ò", "\u00D2"); + initialMap.put("Ó", "\u00D3"); + initialMap.put("Ô", "\u00D4"); + initialMap.put("Õ", "\u00D5"); + initialMap.put("Ö", "\u00D6"); + initialMap.put("×", "\u00D7"); + initialMap.put("Ø", "\u00D8"); + initialMap.put("Ù", "\u00D9"); + initialMap.put("Ú", "\u00DA"); + initialMap.put("Û", "\u00DB"); + initialMap.put("Ü", "\u00DC"); + initialMap.put("Ý", "\u00DD"); + initialMap.put("Þ", "\u00DE"); + initialMap.put("ß", "\u00DF"); + initialMap.put("à", "\u00E0"); + initialMap.put("á", "\u00E1"); + initialMap.put("â", "\u00E2"); + initialMap.put("ã", "\u00E3"); + initialMap.put("ä", "\u00E4"); + initialMap.put("å", "\u00E5"); + initialMap.put("æ", "\u00E6"); + initialMap.put("ç", "\u00E7"); + initialMap.put("è", "\u00E8"); + initialMap.put("é", "\u00E9"); + initialMap.put("ê", "\u00EA"); + initialMap.put("ë", "\u00EB"); + initialMap.put("ì", "\u00EC"); + initialMap.put("í", "\u00ED"); + initialMap.put("î", "\u00EE"); + initialMap.put("ï", "\u00EF"); + initialMap.put("ð", "\u00F0"); + initialMap.put("ñ", "\u00F1"); + initialMap.put("ò", "\u00F2"); + initialMap.put("ó", "\u00F3"); + initialMap.put("ô", "\u00F4"); + initialMap.put("õ", "\u00F5"); + initialMap.put("ö", "\u00F6"); + initialMap.put("÷", "\u00F7"); + initialMap.put("ø", "\u00F8"); + initialMap.put("ù", "\u00F9"); + initialMap.put("ú", "\u00FA"); + initialMap.put("û", "\u00FB"); + initialMap.put("ü", "\u00FC"); + initialMap.put("ý", "\u00FD"); + initialMap.put("þ", "\u00FE"); + initialMap.put("ÿ", "\u00FF"); + NO_SEMICOLON_UNESCAPE = Collections.unmodifiableMap(initialMap); + } + /** * A Map<CharSequence, CharSequence> to escape the basic XML and HTML * character entities. diff --git a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java index 01aa6c436c..fcfc9a2796 100644 --- a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java +++ b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java @@ -62,6 +62,22 @@ public class StringEscapeUtilsTest { {"8-bit ascii shouldn't number-escape", "\u0080\u009F", "\u0080\u009F"}, }; + private static final String[][] HTML5_ESCAPES = { + {"no escaping", "plain text", "plain text"}, + {"no escaping", "plain text", "plain text"}, + {"empty string", "", ""}, + {"null", null, null}, + {"ampersand", "bread & butter", "bread & butter"}, + {"quotes", ""bread" & butter", "\"bread\" & butter"}, + {"final character only", "greater than >", "greater than >"}, + {"first character only", "< less than", "< less than"}, + {"apostrophe", "Huntington's chorea", "Huntington's chorea"}, + {"languages", "English,Français,\u65E5\u672C\u8A9E (nihongo)", + "English,Fran\u00E7ais,\u65E5\u672C\u8A9E (nihongo)"}, + {"8-bit ascii shouldn't number-escape", "\u0080\u009F", "\u0080\u009F"}, + {"tabulation and new line", " Hello world! ", "\u0009Hello world!\n"} + }; + private void assertEscapeJava(final String escaped, final String original) throws IOException { assertEscapeJava(escaped, original, null); } @@ -214,7 +230,7 @@ public void testEscapeHtml3() { final String message = element[0]; final String expected = element[1]; final String original = element[2]; - assertEquals(expected, StringEscapeUtils.escapeHtml4(original), message); + assertEquals(expected, StringEscapeUtils.escapeHtml3(original), message); final StringWriter sw = new StringWriter(); try { StringEscapeUtils.ESCAPE_HTML3.translate(original, sw); @@ -244,6 +260,24 @@ public void testEscapeHtml4() { } } + @Test + public void testEscapeHtml5() { + for (final String[] element : HTML5_ESCAPES) { + final String message = element[0]; + final String expected = element[1]; + final String original = element[2]; + assertEquals(expected, StringEscapeUtils.escapeHtml5(original), message); + final StringWriter sw = new StringWriter(); + try { + StringEscapeUtils.ESCAPE_HTML5.translate(original, sw); + } catch (final IOException e) { + // expected + } + final String actual = original == null ? null : sw.toString(); + assertEquals(expected, actual, message); + } + } + /** * Tests // https://issues.apache.org/jira/browse/LANG-480 */ @@ -565,6 +599,34 @@ public void testUnescapeHtml4() { assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml4("Hello&##;World")); } + @Test + public void testUnescapeHtml5() { + for (final String[] element : HTML5_ESCAPES) { + final String message = element[0]; + final String expected = element[2]; + final String original = element[1]; + assertEquals(expected, StringEscapeUtils.unescapeHtml5(original), message); + + final StringWriter sw = new StringWriter(); + try { + StringEscapeUtils.UNESCAPE_HTML5.translate(original, sw); + } catch (final IOException e) { + // expected + } + final String actual = original == null ? null : sw.toString(); + assertEquals(expected, actual, message); + } + // \u00E7 is a cedilla (c with wiggle under) + // note that the test string must be 7-bit-clean (Unicode escaped) or else it will compile incorrectly + // on some locales + assertEquals("Fran\u00E7ais", StringEscapeUtils.unescapeHtml5("Fran\u00E7ais"), "funny chars pass through OK"); + + assertEquals("Hello&;World", StringEscapeUtils.unescapeHtml5("Hello&;World")); + assertEquals("Hello&#;World", StringEscapeUtils.unescapeHtml5("Hello&#;World")); + assertEquals("Hello&# ;World", StringEscapeUtils.unescapeHtml5("Hello&# ;World")); + assertEquals("Hello&##;World", StringEscapeUtils.unescapeHtml5("Hello&##;World")); + } + @Test public void testUnescapeJava() throws IOException { assertNull(StringEscapeUtils.unescapeJava(null)); diff --git a/src/test/java/org/apache/commons/text/translate/EntityArraysTest.java b/src/test/java/org/apache/commons/text/translate/EntityArraysTest.java index 3361e8e59d..61aebdf66b 100644 --- a/src/test/java/org/apache/commons/text/translate/EntityArraysTest.java +++ b/src/test/java/org/apache/commons/text/translate/EntityArraysTest.java @@ -22,6 +22,10 @@ import java.io.BufferedReader; import java.io.FileReader; import java.util.Map; +import java.util.HashMap; +import java.util.HashSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.junit.jupiter.api.Test; @@ -43,10 +47,17 @@ public void testForDuplicatedDeclaredMapKeys() throws Exception { + "/EntityArrays.java"))) { String line; int mapDeclarationCounter = 0; + final Map mapsSize = new HashMap<>(); + final Pattern pattern = Pattern.compile("invert\\((\\w+)\\)"); while ((line = br.readLine()) != null) { + final Matcher matcher = pattern.matcher(line); //Start with map declaration and count put lines if (line.contains("new HashMap<>();")) { mapDeclarationCounter = 0; + } else if (matcher.find()) { + final String mapVariableName = matcher.group(1); + assertThat(mapsSize).containsKeys(mapVariableName); + mapDeclarationCounter = mapsSize.get(mapVariableName); } else if (line.contains(".put(")) { mapDeclarationCounter++; } else if (line.contains("Collections.unmodifiableMap(initialMap);")) { @@ -58,6 +69,7 @@ public void testForDuplicatedDeclaredMapKeys() throws Exception { // Validate that we are not inserting into the same key twice in the map declaration. If this, // indeed was the case the keySet().size() would be smaller than the number of put() statements assertThat(mapValue.size()).isEqualTo(mapDeclarationCounter); + mapsSize.put(mapVariableName, mapDeclarationCounter); } } } @@ -85,6 +97,38 @@ public void testHtml40ExtendedMap() { testEscapeVsUnescapeMaps(EntityArrays.HTML40_EXTENDED_ESCAPE, EntityArrays.HTML40_EXTENDED_UNESCAPE); } + @Test + public void testForDuplicateDeclaredMapValuesHtml50ExtendedMap() { + assertThat(EntityArrays.HTML50_EXTENDED_UNESCAPE.keySet().size()).isGreaterThanOrEqualTo( + EntityArrays.HTML50_EXTENDED_ESCAPE.values().size()); + } + + @Test + public void testHtml50ExtendedMap() { + final String[] emptyArray = new String[0]; + assertThat(EntityArrays.HTML50_EXTENDED_UNESCAPE).containsKeys( + EntityArrays.HTML50_EXTENDED_ESCAPE.values().toArray(emptyArray)); + assertThat(EntityArrays.HTML50_EXTENDED_UNESCAPE).containsValues( + EntityArrays.HTML50_EXTENDED_ESCAPE.keySet().toArray(emptyArray)); + for (final Map.Entry entry : EntityArrays.HTML50_EXTENDED_ESCAPE.entrySet()) { + assertThat(entry.getKey()).isEqualTo(EntityArrays.HTML50_EXTENDED_UNESCAPE.get(entry.getValue())); + } + } + + @Test + public void testNoSemicolonMap() { + final Map aggregateMap = new HashMap<>(); + aggregateMap.putAll(EntityArrays.BASIC_UNESCAPE); + aggregateMap.putAll(EntityArrays.ISO8859_1_UNESCAPE); + aggregateMap.putAll(EntityArrays.HTML40_EXTENDED_UNESCAPE); + aggregateMap.putAll(EntityArrays.HTML50_EXTENDED_UNESCAPE); + for (final Map.Entry entry : EntityArrays.NO_SEMICOLON_UNESCAPE.entrySet()) { + final CharSequence entityWithSemicolon = entry.getKey() + ";"; + assertThat(aggregateMap).containsKey(entityWithSemicolon); + assertThat(aggregateMap.get(entityWithSemicolon)).isEqualTo(entry.getValue()); + } + } + @Test public void testForDuplicateDeclaredMapValuesAposMap() { assertThat(EntityArrays.APOS_ESCAPE.keySet()).hasSameSizeAs(