001/* 002 * Copyright 2002-2016 the original author or authors. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * https://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.springframework.web.util; 018 019import org.springframework.util.Assert; 020 021/** 022 * Utility class for HTML escaping. Escapes and unescapes 023 * based on the W3C HTML 4.01 recommendation, handling 024 * character entity references. 025 * 026 * <p>Reference: 027 * <a href="http://www.w3.org/TR/html4/charset.html">http://www.w3.org/TR/html4/charset.html</a> 028 * 029 * <p>For a comprehensive set of String escaping utilities, 030 * consider Apache Commons Lang and its StringEscapeUtils class. 031 * We are not using that class here to avoid a runtime dependency 032 * on Commons Lang just for HTML escaping. Furthermore, Spring's 033 * HTML escaping is more flexible and 100% HTML 4.0 compliant. 034 * 035 * @author Juergen Hoeller 036 * @author Martin Kersten 037 * @author Craig Andrews 038 * @since 01.03.2003 039 */ 040public abstract class HtmlUtils { 041 042 /** 043 * Shared instance of pre-parsed HTML character entity references. 044 */ 045 private static final HtmlCharacterEntityReferences characterEntityReferences = 046 new HtmlCharacterEntityReferences(); 047 048 049 /** 050 * Turn special characters into HTML character references. 051 * Handles complete character set defined in HTML 4.01 recommendation. 052 * <p>Escapes all special characters to their corresponding 053 * entity reference (e.g. {@code <}). 054 * <p>Reference: 055 * <a href="http://www.w3.org/TR/html4/sgml/entities.html"> 056 * http://www.w3.org/TR/html4/sgml/entities.html 057 * </a> 058 * @param input the (unescaped) input string 059 * @return the escaped string 060 */ 061 public static String htmlEscape(String input) { 062 return htmlEscape(input, WebUtils.DEFAULT_CHARACTER_ENCODING); 063 } 064 065 /** 066 * Turn special characters into HTML character references. 067 * Handles complete character set defined in HTML 4.01 recommendation. 068 * <p>Escapes all special characters to their corresponding 069 * entity reference (e.g. {@code <}) at least as required by the 070 * specified encoding. In other words, if a special character does 071 * not have to be escaped for the given encoding, it may not be. 072 * <p>Reference: 073 * <a href="http://www.w3.org/TR/html4/sgml/entities.html"> 074 * http://www.w3.org/TR/html4/sgml/entities.html 075 * </a> 076 * @param input the (unescaped) input string 077 * @param encoding the name of a supported {@link java.nio.charset.Charset charset} 078 * @return the escaped string 079 * @since 4.1.2 080 */ 081 public static String htmlEscape(String input, String encoding) { 082 Assert.notNull(encoding, "Encoding is required"); 083 if (input == null) { 084 return null; 085 } 086 StringBuilder escaped = new StringBuilder(input.length() * 2); 087 for (int i = 0; i < input.length(); i++) { 088 char character = input.charAt(i); 089 String reference = characterEntityReferences.convertToReference(character, encoding); 090 if (reference != null) { 091 escaped.append(reference); 092 } 093 else { 094 escaped.append(character); 095 } 096 } 097 return escaped.toString(); 098 } 099 100 /** 101 * Turn special characters into HTML character references. 102 * Handles complete character set defined in HTML 4.01 recommendation. 103 * <p>Escapes all special characters to their corresponding numeric 104 * reference in decimal format (&#<i>Decimal</i>;). 105 * <p>Reference: 106 * <a href="http://www.w3.org/TR/html4/sgml/entities.html"> 107 * http://www.w3.org/TR/html4/sgml/entities.html 108 * </a> 109 * @param input the (unescaped) input string 110 * @return the escaped string 111 */ 112 public static String htmlEscapeDecimal(String input) { 113 return htmlEscapeDecimal(input, WebUtils.DEFAULT_CHARACTER_ENCODING); 114 } 115 116 /** 117 * Turn special characters into HTML character references. 118 * Handles complete character set defined in HTML 4.01 recommendation. 119 * <p>Escapes all special characters to their corresponding numeric 120 * reference in decimal format (&#<i>Decimal</i>;) at least as required by the 121 * specified encoding. In other words, if a special character does 122 * not have to be escaped for the given encoding, it may not be. 123 * <p>Reference: 124 * <a href="http://www.w3.org/TR/html4/sgml/entities.html"> 125 * http://www.w3.org/TR/html4/sgml/entities.html 126 * </a> 127 * @param input the (unescaped) input string 128 * @param encoding the name of a supported {@link java.nio.charset.Charset charset} 129 * @return the escaped string 130 * @since 4.1.2 131 */ 132 public static String htmlEscapeDecimal(String input, String encoding) { 133 Assert.notNull(encoding, "Encoding is required"); 134 if (input == null) { 135 return null; 136 } 137 StringBuilder escaped = new StringBuilder(input.length() * 2); 138 for (int i = 0; i < input.length(); i++) { 139 char character = input.charAt(i); 140 if (characterEntityReferences.isMappedToReference(character, encoding)) { 141 escaped.append(HtmlCharacterEntityReferences.DECIMAL_REFERENCE_START); 142 escaped.append((int) character); 143 escaped.append(HtmlCharacterEntityReferences.REFERENCE_END); 144 } 145 else { 146 escaped.append(character); 147 } 148 } 149 return escaped.toString(); 150 } 151 152 /** 153 * Turn special characters into HTML character references. 154 * Handles complete character set defined in HTML 4.01 recommendation. 155 * <p>Escapes all special characters to their corresponding numeric 156 * reference in hex format (&#x<i>Hex</i>;). 157 * <p>Reference: 158 * <a href="http://www.w3.org/TR/html4/sgml/entities.html"> 159 * http://www.w3.org/TR/html4/sgml/entities.html 160 * </a> 161 * @param input the (unescaped) input string 162 * @return the escaped string 163 */ 164 public static String htmlEscapeHex(String input) { 165 return htmlEscapeHex(input, WebUtils.DEFAULT_CHARACTER_ENCODING); 166 } 167 168 /** 169 * Turn special characters into HTML character references. 170 * Handles complete character set defined in HTML 4.01 recommendation. 171 * <p>Escapes all special characters to their corresponding numeric 172 * reference in hex format (&#x<i>Hex</i>;) at least as required by the 173 * specified encoding. In other words, if a special character does 174 * not have to be escaped for the given encoding, it may not be. 175 * <p>Reference: 176 * <a href="http://www.w3.org/TR/html4/sgml/entities.html"> 177 * http://www.w3.org/TR/html4/sgml/entities.html 178 * </a> 179 * @param input the (unescaped) input string 180 * @param encoding the name of a supported {@link java.nio.charset.Charset charset} 181 * @return the escaped string 182 * @since 4.1.2 183 */ 184 public static String htmlEscapeHex(String input, String encoding) { 185 Assert.notNull(encoding, "Encoding is required"); 186 if (input == null) { 187 return null; 188 } 189 StringBuilder escaped = new StringBuilder(input.length() * 2); 190 for (int i = 0; i < input.length(); i++) { 191 char character = input.charAt(i); 192 if (characterEntityReferences.isMappedToReference(character, encoding)) { 193 escaped.append(HtmlCharacterEntityReferences.HEX_REFERENCE_START); 194 escaped.append(Integer.toString(character, 16)); 195 escaped.append(HtmlCharacterEntityReferences.REFERENCE_END); 196 } 197 else { 198 escaped.append(character); 199 } 200 } 201 return escaped.toString(); 202 } 203 204 /** 205 * Turn HTML character references into their plain text UNICODE equivalent. 206 * <p>Handles complete character set defined in HTML 4.01 recommendation 207 * and all reference types (decimal, hex, and entity). 208 * <p>Correctly converts the following formats: 209 * <blockquote> 210 * &#<i>Entity</i>; - <i>(Example: &amp;) case sensitive</i> 211 * &#<i>Decimal</i>; - <i>(Example: &#68;)</i><br> 212 * &#x<i>Hex</i>; - <i>(Example: &#xE5;) case insensitive</i><br> 213 * </blockquote> 214 * Gracefully handles malformed character references by copying original 215 * characters as is when encountered.<p> 216 * <p>Reference: 217 * <a href="http://www.w3.org/TR/html4/sgml/entities.html"> 218 * http://www.w3.org/TR/html4/sgml/entities.html 219 * </a> 220 * @param input the (escaped) input string 221 * @return the unescaped string 222 */ 223 public static String htmlUnescape(String input) { 224 if (input == null) { 225 return null; 226 } 227 return new HtmlCharacterEntityDecoder(characterEntityReferences, input).decode(); 228 } 229 230}