001/*
002 * Copyright 2002-2014 the original author or authors.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *      https://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.springframework.util.xml;
018
019import java.io.BufferedReader;
020import java.io.CharConversionException;
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.InputStreamReader;
024
025import org.springframework.util.StringUtils;
026
027/**
028 * Detects whether an XML stream is using DTD- or XSD-based validation.
029 *
030 * @author Rob Harrop
031 * @author Juergen Hoeller
032 * @since 2.0
033 */
034public class XmlValidationModeDetector {
035
036        /**
037         * Indicates that the validation should be disabled.
038         */
039        public static final int VALIDATION_NONE = 0;
040
041        /**
042         * Indicates that the validation mode should be auto-guessed, since we cannot find
043         * a clear indication (probably choked on some special characters, or the like).
044         */
045        public static final int VALIDATION_AUTO = 1;
046
047        /**
048         * Indicates that DTD validation should be used (we found a "DOCTYPE" declaration).
049         */
050        public static final int VALIDATION_DTD = 2;
051
052        /**
053         * Indicates that XSD validation should be used (found no "DOCTYPE" declaration).
054         */
055        public static final int VALIDATION_XSD = 3;
056
057
058        /**
059         * The token in a XML document that declares the DTD to use for validation
060         * and thus that DTD validation is being used.
061         */
062        private static final String DOCTYPE = "DOCTYPE";
063
064        /**
065         * The token that indicates the start of an XML comment.
066         */
067        private static final String START_COMMENT = "<!--";
068
069        /**
070         * The token that indicates the end of an XML comment.
071         */
072        private static final String END_COMMENT = "-->";
073
074
075        /**
076         * Indicates whether or not the current parse position is inside an XML comment.
077         */
078        private boolean inComment;
079
080
081        /**
082         * Detect the validation mode for the XML document in the supplied {@link InputStream}.
083         * Note that the supplied {@link InputStream} is closed by this method before returning.
084         * @param inputStream the InputStream to parse
085         * @throws IOException in case of I/O failure
086         * @see #VALIDATION_DTD
087         * @see #VALIDATION_XSD
088         */
089        public int detectValidationMode(InputStream inputStream) throws IOException {
090                // Peek into the file to look for DOCTYPE.
091                BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
092                try {
093                        boolean isDtdValidated = false;
094                        String content;
095                        while ((content = reader.readLine()) != null) {
096                                content = consumeCommentTokens(content);
097                                if (this.inComment || !StringUtils.hasText(content)) {
098                                        continue;
099                                }
100                                if (hasDoctype(content)) {
101                                        isDtdValidated = true;
102                                        break;
103                                }
104                                if (hasOpeningTag(content)) {
105                                        // End of meaningful data...
106                                        break;
107                                }
108                        }
109                        return (isDtdValidated ? VALIDATION_DTD : VALIDATION_XSD);
110                }
111                catch (CharConversionException ex) {
112                        // Choked on some character encoding...
113                        // Leave the decision up to the caller.
114                        return VALIDATION_AUTO;
115                }
116                finally {
117                        reader.close();
118                }
119        }
120
121
122        /**
123         * Does the content contain the DTD DOCTYPE declaration?
124         */
125        private boolean hasDoctype(String content) {
126                return content.contains(DOCTYPE);
127        }
128
129        /**
130         * Does the supplied content contain an XML opening tag. If the parse state is currently
131         * in an XML comment then this method always returns false. It is expected that all comment
132         * tokens will have consumed for the supplied content before passing the remainder to this method.
133         */
134        private boolean hasOpeningTag(String content) {
135                if (this.inComment) {
136                        return false;
137                }
138                int openTagIndex = content.indexOf('<');
139                return (openTagIndex > -1 && (content.length() > openTagIndex + 1) &&
140                                Character.isLetter(content.charAt(openTagIndex + 1)));
141        }
142
143        /**
144         * Consumes all the leading comment data in the given String and returns the remaining content, which
145         * may be empty since the supplied content might be all comment data. For our purposes it is only important
146         * to strip leading comment content on a line since the first piece of non comment content will be either
147         * the DOCTYPE declaration or the root element of the document.
148         */
149        private String consumeCommentTokens(String line) {
150                if (!line.contains(START_COMMENT) && !line.contains(END_COMMENT)) {
151                        return line;
152                }
153                while ((line = consume(line)) != null) {
154                        if (!this.inComment && !line.trim().startsWith(START_COMMENT)) {
155                                return line;
156                        }
157                }
158                return line;
159        }
160
161        /**
162         * Consume the next comment token, update the "inComment" flag
163         * and return the remaining content.
164         */
165        private String consume(String line) {
166                int index = (this.inComment ? endComment(line) : startComment(line));
167                return (index == -1 ? null : line.substring(index));
168        }
169
170        /**
171         * Try to consume the {@link #START_COMMENT} token.
172         * @see #commentToken(String, String, boolean)
173         */
174        private int startComment(String line) {
175                return commentToken(line, START_COMMENT, true);
176        }
177
178        private int endComment(String line) {
179                return commentToken(line, END_COMMENT, false);
180        }
181
182        /**
183         * Try to consume the supplied token against the supplied content and update the
184         * in comment parse state to the supplied value. Returns the index into the content
185         * which is after the token or -1 if the token is not found.
186         */
187        private int commentToken(String line, String token, boolean inCommentIfPresent) {
188                int index = line.indexOf(token);
189                if (index > - 1) {
190                        this.inComment = inCommentIfPresent;
191                }
192                return (index == -1 ? index : index + token.length());
193        }
194
195}