001/* 002 * Copyright 2002-2014 the original author or authors. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * https://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.springframework.util.xml; 018 019import java.io.BufferedReader; 020import java.io.CharConversionException; 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.InputStreamReader; 024 025import org.springframework.util.StringUtils; 026 027/** 028 * Detects whether an XML stream is using DTD- or XSD-based validation. 029 * 030 * @author Rob Harrop 031 * @author Juergen Hoeller 032 * @since 2.0 033 */ 034public class XmlValidationModeDetector { 035 036 /** 037 * Indicates that the validation should be disabled. 038 */ 039 public static final int VALIDATION_NONE = 0; 040 041 /** 042 * Indicates that the validation mode should be auto-guessed, since we cannot find 043 * a clear indication (probably choked on some special characters, or the like). 044 */ 045 public static final int VALIDATION_AUTO = 1; 046 047 /** 048 * Indicates that DTD validation should be used (we found a "DOCTYPE" declaration). 049 */ 050 public static final int VALIDATION_DTD = 2; 051 052 /** 053 * Indicates that XSD validation should be used (found no "DOCTYPE" declaration). 054 */ 055 public static final int VALIDATION_XSD = 3; 056 057 058 /** 059 * The token in a XML document that declares the DTD to use for validation 060 * and thus that DTD validation is being used. 061 */ 062 private static final String DOCTYPE = "DOCTYPE"; 063 064 /** 065 * The token that indicates the start of an XML comment. 066 */ 067 private static final String START_COMMENT = "<!--"; 068 069 /** 070 * The token that indicates the end of an XML comment. 071 */ 072 private static final String END_COMMENT = "-->"; 073 074 075 /** 076 * Indicates whether or not the current parse position is inside an XML comment. 077 */ 078 private boolean inComment; 079 080 081 /** 082 * Detect the validation mode for the XML document in the supplied {@link InputStream}. 083 * Note that the supplied {@link InputStream} is closed by this method before returning. 084 * @param inputStream the InputStream to parse 085 * @throws IOException in case of I/O failure 086 * @see #VALIDATION_DTD 087 * @see #VALIDATION_XSD 088 */ 089 public int detectValidationMode(InputStream inputStream) throws IOException { 090 // Peek into the file to look for DOCTYPE. 091 BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); 092 try { 093 boolean isDtdValidated = false; 094 String content; 095 while ((content = reader.readLine()) != null) { 096 content = consumeCommentTokens(content); 097 if (this.inComment || !StringUtils.hasText(content)) { 098 continue; 099 } 100 if (hasDoctype(content)) { 101 isDtdValidated = true; 102 break; 103 } 104 if (hasOpeningTag(content)) { 105 // End of meaningful data... 106 break; 107 } 108 } 109 return (isDtdValidated ? VALIDATION_DTD : VALIDATION_XSD); 110 } 111 catch (CharConversionException ex) { 112 // Choked on some character encoding... 113 // Leave the decision up to the caller. 114 return VALIDATION_AUTO; 115 } 116 finally { 117 reader.close(); 118 } 119 } 120 121 122 /** 123 * Does the content contain the DTD DOCTYPE declaration? 124 */ 125 private boolean hasDoctype(String content) { 126 return content.contains(DOCTYPE); 127 } 128 129 /** 130 * Does the supplied content contain an XML opening tag. If the parse state is currently 131 * in an XML comment then this method always returns false. It is expected that all comment 132 * tokens will have consumed for the supplied content before passing the remainder to this method. 133 */ 134 private boolean hasOpeningTag(String content) { 135 if (this.inComment) { 136 return false; 137 } 138 int openTagIndex = content.indexOf('<'); 139 return (openTagIndex > -1 && (content.length() > openTagIndex + 1) && 140 Character.isLetter(content.charAt(openTagIndex + 1))); 141 } 142 143 /** 144 * Consumes all the leading comment data in the given String and returns the remaining content, which 145 * may be empty since the supplied content might be all comment data. For our purposes it is only important 146 * to strip leading comment content on a line since the first piece of non comment content will be either 147 * the DOCTYPE declaration or the root element of the document. 148 */ 149 private String consumeCommentTokens(String line) { 150 if (!line.contains(START_COMMENT) && !line.contains(END_COMMENT)) { 151 return line; 152 } 153 while ((line = consume(line)) != null) { 154 if (!this.inComment && !line.trim().startsWith(START_COMMENT)) { 155 return line; 156 } 157 } 158 return line; 159 } 160 161 /** 162 * Consume the next comment token, update the "inComment" flag 163 * and return the remaining content. 164 */ 165 private String consume(String line) { 166 int index = (this.inComment ? endComment(line) : startComment(line)); 167 return (index == -1 ? null : line.substring(index)); 168 } 169 170 /** 171 * Try to consume the {@link #START_COMMENT} token. 172 * @see #commentToken(String, String, boolean) 173 */ 174 private int startComment(String line) { 175 return commentToken(line, START_COMMENT, true); 176 } 177 178 private int endComment(String line) { 179 return commentToken(line, END_COMMENT, false); 180 } 181 182 /** 183 * Try to consume the supplied token against the supplied content and update the 184 * in comment parse state to the supplied value. Returns the index into the content 185 * which is after the token or -1 if the token is not found. 186 */ 187 private int commentToken(String line, String token, boolean inCommentIfPresent) { 188 int index = line.indexOf(token); 189 if (index > - 1) { 190 this.inComment = inCommentIfPresent; 191 } 192 return (index == -1 ? index : index + token.length()); 193 } 194 195}