001/*
002 * Copyright 2002-2019 the original author or authors.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *      https://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.springframework.util.xml;
018
019import java.io.BufferedReader;
020import java.io.CharConversionException;
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.InputStreamReader;
024
025import org.springframework.lang.Nullable;
026import org.springframework.util.StringUtils;
027
028/**
029 * Detects whether an XML stream is using DTD- or XSD-based validation.
030 *
031 * @author Rob Harrop
032 * @author Juergen Hoeller
033 * @author Sam Brannen
034 * @since 2.0
035 */
036public class XmlValidationModeDetector {
037
038        /**
039         * Indicates that the validation should be disabled.
040         */
041        public static final int VALIDATION_NONE = 0;
042
043        /**
044         * Indicates that the validation mode should be auto-guessed, since we cannot find
045         * a clear indication (probably choked on some special characters, or the like).
046         */
047        public static final int VALIDATION_AUTO = 1;
048
049        /**
050         * Indicates that DTD validation should be used (we found a "DOCTYPE" declaration).
051         */
052        public static final int VALIDATION_DTD = 2;
053
054        /**
055         * Indicates that XSD validation should be used (found no "DOCTYPE" declaration).
056         */
057        public static final int VALIDATION_XSD = 3;
058
059
060        /**
061         * The token in a XML document that declares the DTD to use for validation
062         * and thus that DTD validation is being used.
063         */
064        private static final String DOCTYPE = "DOCTYPE";
065
066        /**
067         * The token that indicates the start of an XML comment.
068         */
069        private static final String START_COMMENT = "<!--";
070
071        /**
072         * The token that indicates the end of an XML comment.
073         */
074        private static final String END_COMMENT = "-->";
075
076
077        /**
078         * Indicates whether or not the current parse position is inside an XML comment.
079         */
080        private boolean inComment;
081
082
083        /**
084         * Detect the validation mode for the XML document in the supplied {@link InputStream}.
085         * Note that the supplied {@link InputStream} is closed by this method before returning.
086         * @param inputStream the InputStream to parse
087         * @throws IOException in case of I/O failure
088         * @see #VALIDATION_DTD
089         * @see #VALIDATION_XSD
090         */
091        public int detectValidationMode(InputStream inputStream) throws IOException {
092                // Peek into the file to look for DOCTYPE.
093                BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
094                try {
095                        boolean isDtdValidated = false;
096                        String content;
097                        while ((content = reader.readLine()) != null) {
098                                content = consumeCommentTokens(content);
099                                if (this.inComment || !StringUtils.hasText(content)) {
100                                        continue;
101                                }
102                                if (hasDoctype(content)) {
103                                        isDtdValidated = true;
104                                        break;
105                                }
106                                if (hasOpeningTag(content)) {
107                                        // End of meaningful data...
108                                        break;
109                                }
110                        }
111                        return (isDtdValidated ? VALIDATION_DTD : VALIDATION_XSD);
112                }
113                catch (CharConversionException ex) {
114                        // Choked on some character encoding...
115                        // Leave the decision up to the caller.
116                        return VALIDATION_AUTO;
117                }
118                finally {
119                        reader.close();
120                }
121        }
122
123
124        /**
125         * Does the content contain the DTD DOCTYPE declaration?
126         */
127        private boolean hasDoctype(String content) {
128                return content.contains(DOCTYPE);
129        }
130
131        /**
132         * Does the supplied content contain an XML opening tag. If the parse state is currently
133         * in an XML comment then this method always returns false. It is expected that all comment
134         * tokens will have consumed for the supplied content before passing the remainder to this method.
135         */
136        private boolean hasOpeningTag(String content) {
137                if (this.inComment) {
138                        return false;
139                }
140                int openTagIndex = content.indexOf('<');
141                return (openTagIndex > -1 && (content.length() > openTagIndex + 1) &&
142                                Character.isLetter(content.charAt(openTagIndex + 1)));
143        }
144
145        /**
146         * Consume all leading and trailing comments in the given String and return
147         * the remaining content, which may be empty since the supplied content might
148         * be all comment data.
149         */
150        @Nullable
151        private String consumeCommentTokens(String line) {
152                int indexOfStartComment = line.indexOf(START_COMMENT);
153                if (indexOfStartComment == -1 && !line.contains(END_COMMENT)) {
154                        return line;
155                }
156
157                String result = "";
158                String currLine = line;
159                if (indexOfStartComment >= 0) {
160                        result = line.substring(0, indexOfStartComment);
161                        currLine = line.substring(indexOfStartComment);
162                }
163
164                while ((currLine = consume(currLine)) != null) {
165                        if (!this.inComment && !currLine.trim().startsWith(START_COMMENT)) {
166                                return result + currLine;
167                        }
168                }
169                return null;
170        }
171
172        /**
173         * Consume the next comment token, update the "inComment" flag
174         * and return the remaining content.
175         */
176        @Nullable
177        private String consume(String line) {
178                int index = (this.inComment ? endComment(line) : startComment(line));
179                return (index == -1 ? null : line.substring(index));
180        }
181
182        /**
183         * Try to consume the {@link #START_COMMENT} token.
184         * @see #commentToken(String, String, boolean)
185         */
186        private int startComment(String line) {
187                return commentToken(line, START_COMMENT, true);
188        }
189
190        private int endComment(String line) {
191                return commentToken(line, END_COMMENT, false);
192        }
193
194        /**
195         * Try to consume the supplied token against the supplied content and update the
196         * in comment parse state to the supplied value. Returns the index into the content
197         * which is after the token or -1 if the token is not found.
198         */
199        private int commentToken(String line, String token, boolean inCommentIfPresent) {
200                int index = line.indexOf(token);
201                if (index > - 1) {
202                        this.inComment = inCommentIfPresent;
203                }
204                return (index == -1 ? index : index + token.length());
205        }
206
207}