001/* 002 * Copyright 2006-2014 the original author or authors. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * https://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.springframework.batch.item.file.transform; 018 019import java.util.ArrayList; 020import java.util.Arrays; 021import java.util.List; 022 023/** 024 * Tokenizer used to process data obtained from files with fixed-length format. 025 * Columns are specified by array of Range objects ({@link #setColumns(Range[])} 026 * ). 027 * 028 * @author tomas.slanina 029 * @author peter.zozom 030 * @author Dave Syer 031 * @author Lucas Ward 032 * @author Michael Minella 033 */ 034public class FixedLengthTokenizer extends AbstractLineTokenizer { 035 036 private Range[] ranges; 037 038 private int maxRange = 0; 039 040 boolean open = false; 041 042 /** 043 * Set the column ranges. Used in conjunction with the 044 * {@link RangeArrayPropertyEditor} this property can be set in the form of 045 * a String describing the range boundaries, e.g. "1,4,7" or "1-3,4-6,7" or 046 * "1-2,4-5,7-10". If the last range is open then the rest of the line is 047 * read into that column (irrespective of the strict flag setting). 048 * 049 * @see #setStrict(boolean) 050 * 051 * @param ranges the column ranges expected in the input 052 */ 053 public void setColumns(Range... ranges) { 054 this.ranges = Arrays.asList(ranges).toArray(new Range[ranges.length]); 055 calculateMaxRange(ranges); 056 } 057 058 /* 059 * Calculate the highest value within an array of ranges. The ranges aren't 060 * necessarily in order. For example: "5-10, 1-4,11-15". Furthermore, there 061 * isn't always a min and max, such as: "1,4-20, 22" 062 */ 063 private void calculateMaxRange(Range[] ranges) { 064 if (ranges == null || ranges.length == 0) { 065 maxRange = 0; 066 return; 067 } 068 069 open = false; 070 maxRange = ranges[0].getMin(); 071 072 for (int i = 0; i < ranges.length; i++) { 073 int upperBound; 074 if (ranges[i].hasMaxValue()) { 075 upperBound = ranges[i].getMax(); 076 } 077 else { 078 upperBound = ranges[i].getMin(); 079 if (upperBound > maxRange) { 080 open = true; 081 } 082 } 083 084 if (upperBound > maxRange) { 085 maxRange = upperBound; 086 } 087 } 088 } 089 090 /** 091 * Yields the tokens resulting from the splitting of the supplied 092 * <code>line</code>. 093 * 094 * @param line the line to be tokenized (can be <code>null</code>) 095 * 096 * @return the resulting tokens (empty if the line is null) 097 * @throws IncorrectLineLengthException if line length is greater than or 098 * less than the max range set. 099 */ 100 @Override 101 protected List<String> doTokenize(String line) { 102 List<String> tokens = new ArrayList<String>(ranges.length); 103 int lineLength; 104 String token; 105 106 lineLength = line.length(); 107 108 if (lineLength < maxRange && isStrict()) { 109 throw new IncorrectLineLengthException("Line is shorter than max range " + maxRange, maxRange, lineLength, line); 110 } 111 112 if (!open && lineLength > maxRange && isStrict()) { 113 throw new IncorrectLineLengthException("Line is longer than max range " + maxRange, maxRange, lineLength, line); 114 } 115 116 for (int i = 0; i < ranges.length; i++) { 117 118 int startPos = ranges[i].getMin() - 1; 119 int endPos = ranges[i].getMax(); 120 121 if (lineLength >= endPos) { 122 token = line.substring(startPos, endPos); 123 } 124 else if (lineLength >= startPos) { 125 token = line.substring(startPos); 126 } 127 else { 128 token = ""; 129 } 130 131 tokens.add(token); 132 } 133 134 return tokens; 135 } 136}