001/* 002 * Copyright 2002-2019 the original author or authors. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * https://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.springframework.web.util; 018 019import org.springframework.util.Assert; 020 021/** 022 * Utility class for HTML escaping. 023 * 024 * <p>Escapes and unescapes based on the W3C HTML 4.01 recommendation, handling 025 * character entity references. 026 * 027 * <p>Reference: 028 * <a href="https://www.w3.org/TR/html4/charset.html">https://www.w3.org/TR/html4/charset.html</a> 029 * 030 * <p>For a comprehensive set of String escaping utilities, consider 031 * <a href="https://commons.apache.org/proper/commons-text/">Apache Commons Text</a> 032 * and its {@code StringEscapeUtils} class. We do not use that class here in order 033 * to avoid a runtime dependency on Commons Text just for HTML escaping. Furthermore, 034 * Spring's HTML escaping is more flexible and 100% HTML 4.0 compliant. 035 * 036 * @author Juergen Hoeller 037 * @author Martin Kersten 038 * @author Craig Andrews 039 * @since 01.03.2003 040 */ 041public abstract class HtmlUtils { 042 043 /** 044 * Shared instance of pre-parsed HTML character entity references. 045 */ 046 private static final HtmlCharacterEntityReferences characterEntityReferences = 047 new HtmlCharacterEntityReferences(); 048 049 050 /** 051 * Turn special characters into HTML character references. 052 * <p>Handles complete character set defined in HTML 4.01 recommendation. 053 * <p>Escapes all special characters to their corresponding 054 * entity reference (e.g. {@code <}). 055 * <p>Reference: 056 * <a href="https://www.w3.org/TR/html4/sgml/entities.html"> 057 * https://www.w3.org/TR/html4/sgml/entities.html 058 * </a> 059 * @param input the (unescaped) input string 060 * @return the escaped string 061 */ 062 public static String htmlEscape(String input) { 063 return htmlEscape(input, WebUtils.DEFAULT_CHARACTER_ENCODING); 064 } 065 066 /** 067 * Turn special characters into HTML character references. 068 * <p>Handles complete character set defined in HTML 4.01 recommendation. 069 * <p>Escapes all special characters to their corresponding 070 * entity reference (e.g. {@code <}) at least as required by the 071 * specified encoding. In other words, if a special character does 072 * not have to be escaped for the given encoding, it may not be. 073 * <p>Reference: 074 * <a href="https://www.w3.org/TR/html4/sgml/entities.html"> 075 * https://www.w3.org/TR/html4/sgml/entities.html 076 * </a> 077 * @param input the (unescaped) input string 078 * @param encoding the name of a supported {@link java.nio.charset.Charset charset} 079 * @return the escaped string 080 * @since 4.1.2 081 */ 082 public static String htmlEscape(String input, String encoding) { 083 Assert.notNull(input, "Input is required"); 084 Assert.notNull(encoding, "Encoding is required"); 085 StringBuilder escaped = new StringBuilder(input.length() * 2); 086 for (int i = 0; i < input.length(); i++) { 087 char character = input.charAt(i); 088 String reference = characterEntityReferences.convertToReference(character, encoding); 089 if (reference != null) { 090 escaped.append(reference); 091 } 092 else { 093 escaped.append(character); 094 } 095 } 096 return escaped.toString(); 097 } 098 099 /** 100 * Turn special characters into HTML character references. 101 * <p>Handles complete character set defined in HTML 4.01 recommendation. 102 * <p>Escapes all special characters to their corresponding numeric 103 * reference in decimal format (&#<i>Decimal</i>;). 104 * <p>Reference: 105 * <a href="https://www.w3.org/TR/html4/sgml/entities.html"> 106 * https://www.w3.org/TR/html4/sgml/entities.html 107 * </a> 108 * @param input the (unescaped) input string 109 * @return the escaped string 110 */ 111 public static String htmlEscapeDecimal(String input) { 112 return htmlEscapeDecimal(input, WebUtils.DEFAULT_CHARACTER_ENCODING); 113 } 114 115 /** 116 * Turn special characters into HTML character references. 117 * <p>Handles complete character set defined in HTML 4.01 recommendation. 118 * <p>Escapes all special characters to their corresponding numeric 119 * reference in decimal format (&#<i>Decimal</i>;) at least as required by the 120 * specified encoding. In other words, if a special character does 121 * not have to be escaped for the given encoding, it may not be. 122 * <p>Reference: 123 * <a href="https://www.w3.org/TR/html4/sgml/entities.html"> 124 * https://www.w3.org/TR/html4/sgml/entities.html 125 * </a> 126 * @param input the (unescaped) input string 127 * @param encoding the name of a supported {@link java.nio.charset.Charset charset} 128 * @return the escaped string 129 * @since 4.1.2 130 */ 131 public static String htmlEscapeDecimal(String input, String encoding) { 132 Assert.notNull(input, "Input is required"); 133 Assert.notNull(encoding, "Encoding is required"); 134 StringBuilder escaped = new StringBuilder(input.length() * 2); 135 for (int i = 0; i < input.length(); i++) { 136 char character = input.charAt(i); 137 if (characterEntityReferences.isMappedToReference(character, encoding)) { 138 escaped.append(HtmlCharacterEntityReferences.DECIMAL_REFERENCE_START); 139 escaped.append((int) character); 140 escaped.append(HtmlCharacterEntityReferences.REFERENCE_END); 141 } 142 else { 143 escaped.append(character); 144 } 145 } 146 return escaped.toString(); 147 } 148 149 /** 150 * Turn special characters into HTML character references. 151 * <p>Handles complete character set defined in HTML 4.01 recommendation. 152 * <p>Escapes all special characters to their corresponding numeric 153 * reference in hex format (&#x<i>Hex</i>;). 154 * <p>Reference: 155 * <a href="https://www.w3.org/TR/html4/sgml/entities.html"> 156 * https://www.w3.org/TR/html4/sgml/entities.html 157 * </a> 158 * @param input the (unescaped) input string 159 * @return the escaped string 160 */ 161 public static String htmlEscapeHex(String input) { 162 return htmlEscapeHex(input, WebUtils.DEFAULT_CHARACTER_ENCODING); 163 } 164 165 /** 166 * Turn special characters into HTML character references. 167 * <p>Handles complete character set defined in HTML 4.01 recommendation. 168 * <p>Escapes all special characters to their corresponding numeric 169 * reference in hex format (&#x<i>Hex</i>;) at least as required by the 170 * specified encoding. In other words, if a special character does 171 * not have to be escaped for the given encoding, it may not be. 172 * <p>Reference: 173 * <a href="https://www.w3.org/TR/html4/sgml/entities.html"> 174 * https://www.w3.org/TR/html4/sgml/entities.html 175 * </a> 176 * @param input the (unescaped) input string 177 * @param encoding the name of a supported {@link java.nio.charset.Charset charset} 178 * @return the escaped string 179 * @since 4.1.2 180 */ 181 public static String htmlEscapeHex(String input, String encoding) { 182 Assert.notNull(input, "Input is required"); 183 Assert.notNull(encoding, "Encoding is required"); 184 StringBuilder escaped = new StringBuilder(input.length() * 2); 185 for (int i = 0; i < input.length(); i++) { 186 char character = input.charAt(i); 187 if (characterEntityReferences.isMappedToReference(character, encoding)) { 188 escaped.append(HtmlCharacterEntityReferences.HEX_REFERENCE_START); 189 escaped.append(Integer.toString(character, 16)); 190 escaped.append(HtmlCharacterEntityReferences.REFERENCE_END); 191 } 192 else { 193 escaped.append(character); 194 } 195 } 196 return escaped.toString(); 197 } 198 199 /** 200 * Turn HTML character references into their plain text UNICODE equivalent. 201 * <p>Handles complete character set defined in HTML 4.01 recommendation 202 * and all reference types (decimal, hex, and entity). 203 * <p>Correctly converts the following formats: 204 * <blockquote> 205 * &#<i>Entity</i>; - <i>(Example: &amp;) case sensitive</i> 206 * &#<i>Decimal</i>; - <i>(Example: &#68;)</i><br> 207 * &#x<i>Hex</i>; - <i>(Example: &#xE5;) case insensitive</i><br> 208 * </blockquote> 209 * <p>Gracefully handles malformed character references by copying original 210 * characters as is when encountered. 211 * <p>Reference: 212 * <a href="https://www.w3.org/TR/html4/sgml/entities.html"> 213 * https://www.w3.org/TR/html4/sgml/entities.html 214 * </a> 215 * @param input the (escaped) input string 216 * @return the unescaped string 217 */ 218 public static String htmlUnescape(String input) { 219 return new HtmlCharacterEntityDecoder(characterEntityReferences, input).decode(); 220 } 221 222}