001/*
002 * Copyright 2002-2019 the original author or authors.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *      https://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.springframework.web.util;
018
019import java.net.URI;
020import java.nio.charset.Charset;
021import java.nio.charset.StandardCharsets;
022import java.util.Arrays;
023import java.util.LinkedHashMap;
024import java.util.List;
025import java.util.Map;
026
027import org.springframework.lang.Nullable;
028import org.springframework.util.LinkedMultiValueMap;
029import org.springframework.util.MultiValueMap;
030import org.springframework.util.StringUtils;
031
032/**
033 * Utility methods for URI encoding and decoding based on RFC 3986.
034 *
035 * <p>There are two types of encode methods:
036 * <ul>
037 * <li>{@code "encodeXyz"} -- these encode a specific URI component (e.g. path,
038 * query) by percent encoding illegal characters, which includes non-US-ASCII
039 * characters, and also characters that are otherwise illegal within the given
040 * URI component type, as defined in RFC 3986. The effect of this method, with
041 * regards to encoding, is comparable to using the multi-argument constructor
042 * of {@link URI}.
043 * <li>{@code "encode"} and {@code "encodeUriVariables"} -- these can be used
044 * to encode URI variable values by percent encoding all characters that are
045 * either illegal, or have any reserved meaning, anywhere within a URI.
046 * </ul>
047 *
048 * @author Arjen Poutsma
049 * @author Juergen Hoeller
050 * @author Rossen Stoyanchev
051 * @since 3.0
052 * @see <a href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>
053 */
054public abstract class UriUtils {
055
056        /**
057         * Encode the given URI scheme with the given encoding.
058         * @param scheme the scheme to be encoded
059         * @param encoding the character encoding to encode to
060         * @return the encoded scheme
061         */
062        public static String encodeScheme(String scheme, String encoding) {
063                return encode(scheme, encoding, HierarchicalUriComponents.Type.SCHEME);
064        }
065
066        /**
067         * Encode the given URI scheme with the given encoding.
068         * @param scheme the scheme to be encoded
069         * @param charset the character encoding to encode to
070         * @return the encoded scheme
071         * @since 5.0
072         */
073        public static String encodeScheme(String scheme, Charset charset) {
074                return encode(scheme, charset, HierarchicalUriComponents.Type.SCHEME);
075        }
076
077        /**
078         * Encode the given URI authority with the given encoding.
079         * @param authority the authority to be encoded
080         * @param encoding the character encoding to encode to
081         * @return the encoded authority
082         */
083        public static String encodeAuthority(String authority, String encoding) {
084                return encode(authority, encoding, HierarchicalUriComponents.Type.AUTHORITY);
085        }
086
087        /**
088         * Encode the given URI authority with the given encoding.
089         * @param authority the authority to be encoded
090         * @param charset the character encoding to encode to
091         * @return the encoded authority
092         * @since 5.0
093         */
094        public static String encodeAuthority(String authority, Charset charset) {
095                return encode(authority, charset, HierarchicalUriComponents.Type.AUTHORITY);
096        }
097
098        /**
099         * Encode the given URI user info with the given encoding.
100         * @param userInfo the user info to be encoded
101         * @param encoding the character encoding to encode to
102         * @return the encoded user info
103         */
104        public static String encodeUserInfo(String userInfo, String encoding) {
105                return encode(userInfo, encoding, HierarchicalUriComponents.Type.USER_INFO);
106        }
107
108        /**
109         * Encode the given URI user info with the given encoding.
110         * @param userInfo the user info to be encoded
111         * @param charset the character encoding to encode to
112         * @return the encoded user info
113         * @since 5.0
114         */
115        public static String encodeUserInfo(String userInfo, Charset charset) {
116                return encode(userInfo, charset, HierarchicalUriComponents.Type.USER_INFO);
117        }
118
119        /**
120         * Encode the given URI host with the given encoding.
121         * @param host the host to be encoded
122         * @param encoding the character encoding to encode to
123         * @return the encoded host
124         */
125        public static String encodeHost(String host, String encoding) {
126                return encode(host, encoding, HierarchicalUriComponents.Type.HOST_IPV4);
127        }
128
129        /**
130         * Encode the given URI host with the given encoding.
131         * @param host the host to be encoded
132         * @param charset the character encoding to encode to
133         * @return the encoded host
134         * @since 5.0
135         */
136        public static String encodeHost(String host, Charset charset) {
137                return encode(host, charset, HierarchicalUriComponents.Type.HOST_IPV4);
138        }
139
140        /**
141         * Encode the given URI port with the given encoding.
142         * @param port the port to be encoded
143         * @param encoding the character encoding to encode to
144         * @return the encoded port
145         */
146        public static String encodePort(String port, String encoding) {
147                return encode(port, encoding, HierarchicalUriComponents.Type.PORT);
148        }
149
150        /**
151         * Encode the given URI port with the given encoding.
152         * @param port the port to be encoded
153         * @param charset the character encoding to encode to
154         * @return the encoded port
155         * @since 5.0
156         */
157        public static String encodePort(String port, Charset charset) {
158                return encode(port, charset, HierarchicalUriComponents.Type.PORT);
159        }
160
161        /**
162         * Encode the given URI path with the given encoding.
163         * @param path the path to be encoded
164         * @param encoding the character encoding to encode to
165         * @return the encoded path
166         */
167        public static String encodePath(String path, String encoding) {
168                return encode(path, encoding, HierarchicalUriComponents.Type.PATH);
169        }
170
171        /**
172         * Encode the given URI path with the given encoding.
173         * @param path the path to be encoded
174         * @param charset the character encoding to encode to
175         * @return the encoded path
176         * @since 5.0
177         */
178        public static String encodePath(String path, Charset charset) {
179                return encode(path, charset, HierarchicalUriComponents.Type.PATH);
180        }
181
182        /**
183         * Encode the given URI path segment with the given encoding.
184         * @param segment the segment to be encoded
185         * @param encoding the character encoding to encode to
186         * @return the encoded segment
187         */
188        public static String encodePathSegment(String segment, String encoding) {
189                return encode(segment, encoding, HierarchicalUriComponents.Type.PATH_SEGMENT);
190        }
191
192        /**
193         * Encode the given URI path segment with the given encoding.
194         * @param segment the segment to be encoded
195         * @param charset the character encoding to encode to
196         * @return the encoded segment
197         * @since 5.0
198         */
199        public static String encodePathSegment(String segment, Charset charset) {
200                return encode(segment, charset, HierarchicalUriComponents.Type.PATH_SEGMENT);
201        }
202
203        /**
204         * Encode the given URI query with the given encoding.
205         * @param query the query to be encoded
206         * @param encoding the character encoding to encode to
207         * @return the encoded query
208         */
209        public static String encodeQuery(String query, String encoding) {
210                return encode(query, encoding, HierarchicalUriComponents.Type.QUERY);
211        }
212
213        /**
214         * Encode the given URI query with the given encoding.
215         * @param query the query to be encoded
216         * @param charset the character encoding to encode to
217         * @return the encoded query
218         * @since 5.0
219         */
220        public static String encodeQuery(String query, Charset charset) {
221                return encode(query, charset, HierarchicalUriComponents.Type.QUERY);
222        }
223
224        /**
225         * Encode the given URI query parameter with the given encoding.
226         * @param queryParam the query parameter to be encoded
227         * @param encoding the character encoding to encode to
228         * @return the encoded query parameter
229         */
230        public static String encodeQueryParam(String queryParam, String encoding) {
231                return encode(queryParam, encoding, HierarchicalUriComponents.Type.QUERY_PARAM);
232        }
233
234        /**
235         * Encode the given URI query parameter with the given encoding.
236         * @param queryParam the query parameter to be encoded
237         * @param charset the character encoding to encode to
238         * @return the encoded query parameter
239         * @since 5.0
240         */
241        public static String encodeQueryParam(String queryParam, Charset charset) {
242                return encode(queryParam, charset, HierarchicalUriComponents.Type.QUERY_PARAM);
243        }
244
245        /**
246         * Encode the query parameters from the given {@code MultiValueMap} with UTF-8.
247         * <p>This can be used with {@link UriComponentsBuilder#queryParams(MultiValueMap)}
248         * when building a URI from an already encoded template.
249         * <pre class="code">
250         * MultiValueMap&lt;String, String&gt; params = new LinkedMultiValueMap<>(2);
251         * // add to params...
252         *
253         * ServletUriComponentsBuilder.fromCurrentRequest()
254         *         .queryParams(UriUtils.encodeQueryParams(params))
255         *         .build(true)
256         *         .toUriString();
257         * </pre>
258         * @param params the parameters to encode
259         * @return a new {@code MultiValueMap} with the encoded names and values
260         * @since 5.2.3
261         */
262        public static MultiValueMap<String, String> encodeQueryParams(MultiValueMap<String, String> params) {
263                Charset charset = StandardCharsets.UTF_8;
264                MultiValueMap<String, String> result = new LinkedMultiValueMap<>(params.size());
265                for (Map.Entry<String, List<String>> entry : params.entrySet()) {
266                        for (String value : entry.getValue()) {
267                                result.add(encodeQueryParam(entry.getKey(), charset), encodeQueryParam(value, charset));
268                        }
269                }
270                return result;
271        }
272
273        /**
274         * Encode the given URI fragment with the given encoding.
275         * @param fragment the fragment to be encoded
276         * @param encoding the character encoding to encode to
277         * @return the encoded fragment
278         */
279        public static String encodeFragment(String fragment, String encoding) {
280                return encode(fragment, encoding, HierarchicalUriComponents.Type.FRAGMENT);
281        }
282
283        /**
284         * Encode the given URI fragment with the given encoding.
285         * @param fragment the fragment to be encoded
286         * @param charset the character encoding to encode to
287         * @return the encoded fragment
288         * @since 5.0
289         */
290        public static String encodeFragment(String fragment, Charset charset) {
291                return encode(fragment, charset, HierarchicalUriComponents.Type.FRAGMENT);
292        }
293
294
295        /**
296         * Variant of {@link #encode(String, Charset)} with a String charset.
297         * @param source the String to be encoded
298         * @param encoding the character encoding to encode to
299         * @return the encoded String
300         */
301        public static String encode(String source, String encoding) {
302                return encode(source, encoding, HierarchicalUriComponents.Type.URI);
303        }
304
305        /**
306         * Encode all characters that are either illegal, or have any reserved
307         * meaning, anywhere within a URI, as defined in
308         * <a href="https://tools.ietf.org/html/rfc3986">RFC 3986</a>.
309         * This is useful to ensure that the given String will be preserved as-is
310         * and will not have any o impact on the structure or meaning of the URI.
311         * @param source the String to be encoded
312         * @param charset the character encoding to encode to
313         * @return the encoded String
314         * @since 5.0
315         */
316        public static String encode(String source, Charset charset) {
317                return encode(source, charset, HierarchicalUriComponents.Type.URI);
318        }
319
320        /**
321         * Convenience method to apply {@link #encode(String, Charset)} to all
322         * given URI variable values.
323         * @param uriVariables the URI variable values to be encoded
324         * @return the encoded String
325         * @since 5.0
326         */
327        public static Map<String, String> encodeUriVariables(Map<String, ?> uriVariables) {
328                Map<String, String> result = new LinkedHashMap<>(uriVariables.size());
329                uriVariables.forEach((key, value) -> {
330                        String stringValue = (value != null ? value.toString() : "");
331                        result.put(key, encode(stringValue, StandardCharsets.UTF_8));
332                });
333                return result;
334        }
335
336        /**
337         * Convenience method to apply {@link #encode(String, Charset)} to all
338         * given URI variable values.
339         * @param uriVariables the URI variable values to be encoded
340         * @return the encoded String
341         * @since 5.0
342         */
343        public static Object[] encodeUriVariables(Object... uriVariables) {
344                return Arrays.stream(uriVariables)
345                                .map(value -> {
346                                        String stringValue = (value != null ? value.toString() : "");
347                                        return encode(stringValue, StandardCharsets.UTF_8);
348                                })
349                                .toArray();
350        }
351
352        private static String encode(String scheme, String encoding, HierarchicalUriComponents.Type type) {
353                return HierarchicalUriComponents.encodeUriComponent(scheme, encoding, type);
354        }
355
356        private static String encode(String scheme, Charset charset, HierarchicalUriComponents.Type type) {
357                return HierarchicalUriComponents.encodeUriComponent(scheme, charset, type);
358        }
359
360
361        /**
362         * Decode the given encoded URI component.
363         * <p>See {@link StringUtils#uriDecode(String, Charset)} for the decoding rules.
364         * @param source the encoded String
365         * @param encoding the character encoding to use
366         * @return the decoded value
367         * @throws IllegalArgumentException when the given source contains invalid encoded sequences
368         * @see StringUtils#uriDecode(String, Charset)
369         * @see java.net.URLDecoder#decode(String, String)
370         */
371        public static String decode(String source, String encoding) {
372                return StringUtils.uriDecode(source, Charset.forName(encoding));
373        }
374
375        /**
376         * Decode the given encoded URI component.
377         * <p>See {@link StringUtils#uriDecode(String, Charset)} for the decoding rules.
378         * @param source the encoded String
379         * @param charset the character encoding to use
380         * @return the decoded value
381         * @throws IllegalArgumentException when the given source contains invalid encoded sequences
382         * @since 5.0
383         * @see StringUtils#uriDecode(String, Charset)
384         * @see java.net.URLDecoder#decode(String, String)
385         */
386        public static String decode(String source, Charset charset) {
387                return StringUtils.uriDecode(source, charset);
388        }
389
390        /**
391         * Extract the file extension from the given URI path.
392         * @param path the URI path (e.g. "/products/index.html")
393         * @return the extracted file extension (e.g. "html")
394         * @since 4.3.2
395         */
396        @Nullable
397        public static String extractFileExtension(String path) {
398                int end = path.indexOf('?');
399                int fragmentIndex = path.indexOf('#');
400                if (fragmentIndex != -1 && (end == -1 || fragmentIndex < end)) {
401                        end = fragmentIndex;
402                }
403                if (end == -1) {
404                        end = path.length();
405                }
406                int begin = path.lastIndexOf('/', end) + 1;
407                int paramIndex = path.indexOf(';', begin);
408                end = (paramIndex != -1 && paramIndex < end ? paramIndex : end);
409                int extIndex = path.lastIndexOf('.', end);
410                if (extIndex != -1 && extIndex > begin) {
411                        return path.substring(extIndex + 1, end);
412                }
413                return null;
414        }
415
416}