001/*
002 * Copyright 2006-2012 the original author or authors.
003 * 
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 * 
008 *      https://www.apache.org/licenses/LICENSE-2.0
009 * 
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.springframework.batch.item.file.transform;
017
018import java.util.ArrayList;
019import java.util.Collections;
020import java.util.List;
021import java.util.regex.Matcher;
022import java.util.regex.Pattern;
023
024import org.springframework.util.Assert;
025
026/**
027 * Line-tokenizer using a regular expression to filter out data (by using matching and non-matching groups).
028 * Consider the following regex which picks only the first and last name (notice the non-matching group in the middle):
029 * <pre>
030 * (.*?)(?: .*)* (.*) 
031 * </pre>
032 * For the names:
033 * <ul>  
034 *  <li>"Graham James Edward Miller"</li>
035 *  <li>"Andrew Gregory Macintyre"</li>
036 *  <li>"No MiddleName"</li>
037 * </ul> 
038 * 
039 * the output will be:
040 * <ul>
041 * <li>"Miller", "Graham"</li>
042 * <li>"Macintyre", "Andrew"</li>
043 * <li>"MiddleName", "No"</li>
044 * </ul>
045 * 
046 * An empty list is returned, in case of a non-match.
047 * 
048 * @see Matcher#group(int)
049 * @author Costin Leau
050 */
051public class RegexLineTokenizer extends AbstractLineTokenizer {
052
053        private Pattern pattern;
054
055        @Override
056        protected List<String> doTokenize(String line) {
057                Matcher matcher = pattern.matcher(line);
058                boolean matchFound = matcher.find();
059
060                if (matchFound) {
061                        List<String> tokens = new ArrayList<String>(matcher.groupCount());
062                        for (int i = 1; i <= matcher.groupCount(); i++) {
063                                tokens.add(matcher.group(i));
064                        }
065                        return tokens;
066                }
067                return Collections.emptyList();
068        }
069
070        /**
071         * Sets the regex pattern to use.
072         * 
073         * @param pattern Regular Expression pattern
074         */
075        public void setPattern(Pattern pattern) {
076                Assert.notNull(pattern, "a non-null pattern is required");
077                this.pattern = pattern;
078        }
079
080        /**
081         * Sets the regular expression to use. 
082         * 
083         * @param regex regular expression (as a String)
084         */
085        public void setRegex(String regex) {
086                Assert.hasText(regex, "a valid regex is required");
087                this.pattern = Pattern.compile(regex);
088        }
089}