001/* 002 * Copyright 2002-2019 the original author or authors. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * https://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.springframework.util.xml; 018 019import java.io.BufferedReader; 020import java.io.CharConversionException; 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.InputStreamReader; 024 025import org.springframework.lang.Nullable; 026import org.springframework.util.StringUtils; 027 028/** 029 * Detects whether an XML stream is using DTD- or XSD-based validation. 030 * 031 * @author Rob Harrop 032 * @author Juergen Hoeller 033 * @author Sam Brannen 034 * @since 2.0 035 */ 036public class XmlValidationModeDetector { 037 038 /** 039 * Indicates that the validation should be disabled. 040 */ 041 public static final int VALIDATION_NONE = 0; 042 043 /** 044 * Indicates that the validation mode should be auto-guessed, since we cannot find 045 * a clear indication (probably choked on some special characters, or the like). 046 */ 047 public static final int VALIDATION_AUTO = 1; 048 049 /** 050 * Indicates that DTD validation should be used (we found a "DOCTYPE" declaration). 051 */ 052 public static final int VALIDATION_DTD = 2; 053 054 /** 055 * Indicates that XSD validation should be used (found no "DOCTYPE" declaration). 056 */ 057 public static final int VALIDATION_XSD = 3; 058 059 060 /** 061 * The token in a XML document that declares the DTD to use for validation 062 * and thus that DTD validation is being used. 063 */ 064 private static final String DOCTYPE = "DOCTYPE"; 065 066 /** 067 * The token that indicates the start of an XML comment. 068 */ 069 private static final String START_COMMENT = "<!--"; 070 071 /** 072 * The token that indicates the end of an XML comment. 073 */ 074 private static final String END_COMMENT = "-->"; 075 076 077 /** 078 * Indicates whether or not the current parse position is inside an XML comment. 079 */ 080 private boolean inComment; 081 082 083 /** 084 * Detect the validation mode for the XML document in the supplied {@link InputStream}. 085 * Note that the supplied {@link InputStream} is closed by this method before returning. 086 * @param inputStream the InputStream to parse 087 * @throws IOException in case of I/O failure 088 * @see #VALIDATION_DTD 089 * @see #VALIDATION_XSD 090 */ 091 public int detectValidationMode(InputStream inputStream) throws IOException { 092 // Peek into the file to look for DOCTYPE. 093 BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); 094 try { 095 boolean isDtdValidated = false; 096 String content; 097 while ((content = reader.readLine()) != null) { 098 content = consumeCommentTokens(content); 099 if (this.inComment || !StringUtils.hasText(content)) { 100 continue; 101 } 102 if (hasDoctype(content)) { 103 isDtdValidated = true; 104 break; 105 } 106 if (hasOpeningTag(content)) { 107 // End of meaningful data... 108 break; 109 } 110 } 111 return (isDtdValidated ? VALIDATION_DTD : VALIDATION_XSD); 112 } 113 catch (CharConversionException ex) { 114 // Choked on some character encoding... 115 // Leave the decision up to the caller. 116 return VALIDATION_AUTO; 117 } 118 finally { 119 reader.close(); 120 } 121 } 122 123 124 /** 125 * Does the content contain the DTD DOCTYPE declaration? 126 */ 127 private boolean hasDoctype(String content) { 128 return content.contains(DOCTYPE); 129 } 130 131 /** 132 * Does the supplied content contain an XML opening tag. If the parse state is currently 133 * in an XML comment then this method always returns false. It is expected that all comment 134 * tokens will have consumed for the supplied content before passing the remainder to this method. 135 */ 136 private boolean hasOpeningTag(String content) { 137 if (this.inComment) { 138 return false; 139 } 140 int openTagIndex = content.indexOf('<'); 141 return (openTagIndex > -1 && (content.length() > openTagIndex + 1) && 142 Character.isLetter(content.charAt(openTagIndex + 1))); 143 } 144 145 /** 146 * Consume all leading and trailing comments in the given String and return 147 * the remaining content, which may be empty since the supplied content might 148 * be all comment data. 149 */ 150 @Nullable 151 private String consumeCommentTokens(String line) { 152 int indexOfStartComment = line.indexOf(START_COMMENT); 153 if (indexOfStartComment == -1 && !line.contains(END_COMMENT)) { 154 return line; 155 } 156 157 String result = ""; 158 String currLine = line; 159 if (indexOfStartComment >= 0) { 160 result = line.substring(0, indexOfStartComment); 161 currLine = line.substring(indexOfStartComment); 162 } 163 164 while ((currLine = consume(currLine)) != null) { 165 if (!this.inComment && !currLine.trim().startsWith(START_COMMENT)) { 166 return result + currLine; 167 } 168 } 169 return null; 170 } 171 172 /** 173 * Consume the next comment token, update the "inComment" flag 174 * and return the remaining content. 175 */ 176 @Nullable 177 private String consume(String line) { 178 int index = (this.inComment ? endComment(line) : startComment(line)); 179 return (index == -1 ? null : line.substring(index)); 180 } 181 182 /** 183 * Try to consume the {@link #START_COMMENT} token. 184 * @see #commentToken(String, String, boolean) 185 */ 186 private int startComment(String line) { 187 return commentToken(line, START_COMMENT, true); 188 } 189 190 private int endComment(String line) { 191 return commentToken(line, END_COMMENT, false); 192 } 193 194 /** 195 * Try to consume the supplied token against the supplied content and update the 196 * in comment parse state to the supplied value. Returns the index into the content 197 * which is after the token or -1 if the token is not found. 198 */ 199 private int commentToken(String line, String token, boolean inCommentIfPresent) { 200 int index = line.indexOf(token); 201 if (index > - 1) { 202 this.inComment = inCommentIfPresent; 203 } 204 return (index == -1 ? index : index + token.length()); 205 } 206 207}