001/*
002 * Copyright 2002-2016 the original author or authors.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *      https://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.springframework.web.util;
018
019import org.springframework.util.Assert;
020
021/**
022 * Utility class for HTML escaping. Escapes and unescapes
023 * based on the W3C HTML 4.01 recommendation, handling
024 * character entity references.
025 *
026 * <p>Reference:
027 * <a href="http://www.w3.org/TR/html4/charset.html">http://www.w3.org/TR/html4/charset.html</a>
028 *
029 * <p>For a comprehensive set of String escaping utilities,
030 * consider Apache Commons Lang and its StringEscapeUtils class.
031 * We are not using that class here to avoid a runtime dependency
032 * on Commons Lang just for HTML escaping. Furthermore, Spring's
033 * HTML escaping is more flexible and 100% HTML 4.0 compliant.
034 *
035 * @author Juergen Hoeller
036 * @author Martin Kersten
037 * @author Craig Andrews
038 * @since 01.03.2003
039 */
040public abstract class HtmlUtils {
041
042        /**
043         * Shared instance of pre-parsed HTML character entity references.
044         */
045        private static final HtmlCharacterEntityReferences characterEntityReferences =
046                        new HtmlCharacterEntityReferences();
047
048
049        /**
050         * Turn special characters into HTML character references.
051         * Handles complete character set defined in HTML 4.01 recommendation.
052         * <p>Escapes all special characters to their corresponding
053         * entity reference (e.g. {@code &lt;}).
054         * <p>Reference:
055         * <a href="http://www.w3.org/TR/html4/sgml/entities.html">
056         * http://www.w3.org/TR/html4/sgml/entities.html
057         * </a>
058         * @param input the (unescaped) input string
059         * @return the escaped string
060         */
061        public static String htmlEscape(String input) {
062                return htmlEscape(input, WebUtils.DEFAULT_CHARACTER_ENCODING);
063        }
064
065        /**
066         * Turn special characters into HTML character references.
067         * Handles complete character set defined in HTML 4.01 recommendation.
068         * <p>Escapes all special characters to their corresponding
069         * entity reference (e.g. {@code &lt;}) at least as required by the
070         * specified encoding. In other words, if a special character does
071         * not have to be escaped for the given encoding, it may not be.
072         * <p>Reference:
073         * <a href="http://www.w3.org/TR/html4/sgml/entities.html">
074         * http://www.w3.org/TR/html4/sgml/entities.html
075         * </a>
076         * @param input the (unescaped) input string
077         * @param encoding the name of a supported {@link java.nio.charset.Charset charset}
078         * @return the escaped string
079         * @since 4.1.2
080         */
081        public static String htmlEscape(String input, String encoding) {
082                Assert.notNull(encoding, "Encoding is required");
083                if (input == null) {
084                        return null;
085                }
086                StringBuilder escaped = new StringBuilder(input.length() * 2);
087                for (int i = 0; i < input.length(); i++) {
088                        char character = input.charAt(i);
089                        String reference = characterEntityReferences.convertToReference(character, encoding);
090                        if (reference != null) {
091                                escaped.append(reference);
092                        }
093                        else {
094                                escaped.append(character);
095                        }
096                }
097                return escaped.toString();
098        }
099
100        /**
101         * Turn special characters into HTML character references.
102         * Handles complete character set defined in HTML 4.01 recommendation.
103         * <p>Escapes all special characters to their corresponding numeric
104         * reference in decimal format (&#<i>Decimal</i>;).
105         * <p>Reference:
106         * <a href="http://www.w3.org/TR/html4/sgml/entities.html">
107         * http://www.w3.org/TR/html4/sgml/entities.html
108         * </a>
109         * @param input the (unescaped) input string
110         * @return the escaped string
111         */
112        public static String htmlEscapeDecimal(String input) {
113                return htmlEscapeDecimal(input, WebUtils.DEFAULT_CHARACTER_ENCODING);
114        }
115
116        /**
117         * Turn special characters into HTML character references.
118         * Handles complete character set defined in HTML 4.01 recommendation.
119         * <p>Escapes all special characters to their corresponding numeric
120         * reference in decimal format (&#<i>Decimal</i>;) at least as required by the
121         * specified encoding. In other words, if a special character does
122         * not have to be escaped for the given encoding, it may not be.
123         * <p>Reference:
124         * <a href="http://www.w3.org/TR/html4/sgml/entities.html">
125         * http://www.w3.org/TR/html4/sgml/entities.html
126         * </a>
127         * @param input the (unescaped) input string
128         * @param encoding the name of a supported {@link java.nio.charset.Charset charset}
129         * @return the escaped string
130         * @since 4.1.2
131         */
132        public static String htmlEscapeDecimal(String input, String encoding) {
133                Assert.notNull(encoding, "Encoding is required");
134                if (input == null) {
135                        return null;
136                }
137                StringBuilder escaped = new StringBuilder(input.length() * 2);
138                for (int i = 0; i < input.length(); i++) {
139                        char character = input.charAt(i);
140                        if (characterEntityReferences.isMappedToReference(character, encoding)) {
141                                escaped.append(HtmlCharacterEntityReferences.DECIMAL_REFERENCE_START);
142                                escaped.append((int) character);
143                                escaped.append(HtmlCharacterEntityReferences.REFERENCE_END);
144                        }
145                        else {
146                                escaped.append(character);
147                        }
148                }
149                return escaped.toString();
150        }
151
152        /**
153         * Turn special characters into HTML character references.
154         * Handles complete character set defined in HTML 4.01 recommendation.
155         * <p>Escapes all special characters to their corresponding numeric
156         * reference in hex format (&#x<i>Hex</i>;).
157         * <p>Reference:
158         * <a href="http://www.w3.org/TR/html4/sgml/entities.html">
159         * http://www.w3.org/TR/html4/sgml/entities.html
160         * </a>
161         * @param input the (unescaped) input string
162         * @return the escaped string
163         */
164        public static String htmlEscapeHex(String input) {
165                return htmlEscapeHex(input, WebUtils.DEFAULT_CHARACTER_ENCODING);
166        }
167
168        /**
169         * Turn special characters into HTML character references.
170         * Handles complete character set defined in HTML 4.01 recommendation.
171         * <p>Escapes all special characters to their corresponding numeric
172         * reference in hex format (&#x<i>Hex</i>;) at least as required by the
173         * specified encoding. In other words, if a special character does
174         * not have to be escaped for the given encoding, it may not be.
175         * <p>Reference:
176         * <a href="http://www.w3.org/TR/html4/sgml/entities.html">
177         * http://www.w3.org/TR/html4/sgml/entities.html
178         * </a>
179         * @param input the (unescaped) input string
180         * @param encoding the name of a supported {@link java.nio.charset.Charset charset}
181         * @return the escaped string
182         * @since 4.1.2
183         */
184        public static String htmlEscapeHex(String input, String encoding) {
185                Assert.notNull(encoding, "Encoding is required");
186                if (input == null) {
187                        return null;
188                }
189                StringBuilder escaped = new StringBuilder(input.length() * 2);
190                for (int i = 0; i < input.length(); i++) {
191                        char character = input.charAt(i);
192                        if (characterEntityReferences.isMappedToReference(character, encoding)) {
193                                escaped.append(HtmlCharacterEntityReferences.HEX_REFERENCE_START);
194                                escaped.append(Integer.toString(character, 16));
195                                escaped.append(HtmlCharacterEntityReferences.REFERENCE_END);
196                        }
197                        else {
198                                escaped.append(character);
199                        }
200                }
201                return escaped.toString();
202        }
203
204        /**
205         * Turn HTML character references into their plain text UNICODE equivalent.
206         * <p>Handles complete character set defined in HTML 4.01 recommendation
207         * and all reference types (decimal, hex, and entity).
208         * <p>Correctly converts the following formats:
209         * <blockquote>
210         * &amp;#<i>Entity</i>; - <i>(Example: &amp;amp;) case sensitive</i>
211         * &amp;#<i>Decimal</i>; - <i>(Example: &amp;#68;)</i><br>
212         * &amp;#x<i>Hex</i>; - <i>(Example: &amp;#xE5;) case insensitive</i><br>
213         * </blockquote>
214         * Gracefully handles malformed character references by copying original
215         * characters as is when encountered.<p>
216         * <p>Reference:
217         * <a href="http://www.w3.org/TR/html4/sgml/entities.html">
218         * http://www.w3.org/TR/html4/sgml/entities.html
219         * </a>
220         * @param input the (escaped) input string
221         * @return the unescaped string
222         */
223        public static String htmlUnescape(String input) {
224                if (input == null) {
225                        return null;
226                }
227                return new HtmlCharacterEntityDecoder(characterEntityReferences, input).decode();
228        }
229
230}