001/*
002 * Copyright 2006-2014 the original author or authors.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *      https://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.springframework.batch.item.file.transform;
018
019import java.util.ArrayList;
020import java.util.Arrays;
021import java.util.List;
022
023/**
024 * Tokenizer used to process data obtained from files with fixed-length format.
025 * Columns are specified by array of Range objects ({@link #setColumns(Range[])}
026 * ).
027 * 
028 * @author tomas.slanina
029 * @author peter.zozom
030 * @author Dave Syer
031 * @author Lucas Ward
032 * @author Michael Minella
033 */
034public class FixedLengthTokenizer extends AbstractLineTokenizer {
035
036        private Range[] ranges;
037
038        private int maxRange = 0;
039
040        boolean open = false;
041
042        /**
043         * Set the column ranges. Used in conjunction with the
044         * {@link RangeArrayPropertyEditor} this property can be set in the form of
045         * a String describing the range boundaries, e.g. "1,4,7" or "1-3,4-6,7" or
046         * "1-2,4-5,7-10". If the last range is open then the rest of the line is
047         * read into that column (irrespective of the strict flag setting).
048         * 
049         * @see #setStrict(boolean)
050         * 
051         * @param ranges the column ranges expected in the input
052         */
053        public void setColumns(Range... ranges) {
054                this.ranges = Arrays.asList(ranges).toArray(new Range[ranges.length]);
055                calculateMaxRange(ranges);
056        }
057
058        /*
059         * Calculate the highest value within an array of ranges. The ranges aren't
060         * necessarily in order. For example: "5-10, 1-4,11-15". Furthermore, there
061         * isn't always a min and max, such as: "1,4-20, 22"
062         */
063        private void calculateMaxRange(Range[] ranges) {
064                if (ranges == null || ranges.length == 0) {
065                        maxRange = 0;
066                        return;
067                }
068
069                open = false;
070                maxRange = ranges[0].getMin();
071
072                for (int i = 0; i < ranges.length; i++) {
073                        int upperBound;
074                        if (ranges[i].hasMaxValue()) {
075                                upperBound = ranges[i].getMax();
076                        }
077                        else {
078                                upperBound = ranges[i].getMin();
079                                if (upperBound > maxRange) {
080                                        open = true;
081                                }
082                        }
083
084                        if (upperBound > maxRange) {
085                                maxRange = upperBound;
086                        }
087                }
088        }
089
090        /**
091         * Yields the tokens resulting from the splitting of the supplied
092         * <code>line</code>.
093         * 
094         * @param line the line to be tokenized (can be <code>null</code>)
095         * 
096         * @return the resulting tokens (empty if the line is null)
097         * @throws IncorrectLineLengthException if line length is greater than or
098         * less than the max range set.
099         */
100    @Override
101        protected List<String> doTokenize(String line) {
102                List<String> tokens = new ArrayList<String>(ranges.length);
103                int lineLength;
104                String token;
105
106                lineLength = line.length();
107
108                if (lineLength < maxRange && isStrict()) {
109                        throw new IncorrectLineLengthException("Line is shorter than max range " + maxRange, maxRange, lineLength, line);
110                }
111
112                if (!open && lineLength > maxRange && isStrict()) {
113                        throw new IncorrectLineLengthException("Line is longer than max range " + maxRange, maxRange, lineLength, line);
114                }
115
116                for (int i = 0; i < ranges.length; i++) {
117
118                        int startPos = ranges[i].getMin() - 1;
119                        int endPos = ranges[i].getMax();
120
121                        if (lineLength >= endPos) {
122                                token = line.substring(startPos, endPos);
123                        }
124                        else if (lineLength >= startPos) {
125                                token = line.substring(startPos);
126                        }
127                        else {
128                                token = "";
129                        }
130
131                        tokens.add(token);
132                }
133
134                return tokens;
135        }
136}