001/* 002 * Copyright 2006-2012 the original author or authors. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * https://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.springframework.batch.item.file.transform; 017 018import java.util.ArrayList; 019import java.util.Collections; 020import java.util.List; 021import java.util.regex.Matcher; 022import java.util.regex.Pattern; 023 024import org.springframework.util.Assert; 025 026/** 027 * Line-tokenizer using a regular expression to filter out data (by using matching and non-matching groups). 028 * Consider the following regex which picks only the first and last name (notice the non-matching group in the middle): 029 * <pre> 030 * (.*?)(?: .*)* (.*) 031 * </pre> 032 * For the names: 033 * <ul> 034 * <li>"Graham James Edward Miller"</li> 035 * <li>"Andrew Gregory Macintyre"</li> 036 * <li>"No MiddleName"</li> 037 * </ul> 038 * 039 * the output will be: 040 * <ul> 041 * <li>"Miller", "Graham"</li> 042 * <li>"Macintyre", "Andrew"</li> 043 * <li>"MiddleName", "No"</li> 044 * </ul> 045 * 046 * An empty list is returned, in case of a non-match. 047 * 048 * @see Matcher#group(int) 049 * @author Costin Leau 050 */ 051public class RegexLineTokenizer extends AbstractLineTokenizer { 052 053 private Pattern pattern; 054 055 @Override 056 protected List<String> doTokenize(String line) { 057 Matcher matcher = pattern.matcher(line); 058 boolean matchFound = matcher.find(); 059 060 if (matchFound) { 061 List<String> tokens = new ArrayList<String>(matcher.groupCount()); 062 for (int i = 1; i <= matcher.groupCount(); i++) { 063 tokens.add(matcher.group(i)); 064 } 065 return tokens; 066 } 067 return Collections.emptyList(); 068 } 069 070 /** 071 * Sets the regex pattern to use. 072 * 073 * @param pattern Regular Expression pattern 074 */ 075 public void setPattern(Pattern pattern) { 076 Assert.notNull(pattern, "a non-null pattern is required"); 077 this.pattern = pattern; 078 } 079 080 /** 081 * Sets the regular expression to use. 082 * 083 * @param regex regular expression (as a String) 084 */ 085 public void setRegex(String regex) { 086 Assert.hasText(regex, "a valid regex is required"); 087 this.pattern = Pattern.compile(regex); 088 } 089}