org.springframework.batch.item.file.RegexFileItemReader.java Source code

Java tutorial

Introduction

Here is the source code for org.springframework.batch.item.file.RegexFileItemReader.java

Source

/*
 * Copyright 2002-2014 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.springframework.batch.item.file;

import java.io.BufferedReader;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.batch.item.ReaderNotOpenException;
import org.springframework.batch.item.support.AbstractItemCountingItemStreamItemReader;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.core.io.Resource;
import org.springframework.util.Assert;
import org.springframework.util.ClassUtils;

public class RegexFileItemReader<T> extends AbstractItemCountingItemStreamItemReader<T>
        implements ResourceAwareItemReaderItemStream<T>, InitializingBean {

    private static Log logger = LogFactory.getLog(RegexFileItemReader.class);

    // default encoding for input files
    public static String DEFAULT_CHARSET = Charset.defaultCharset().name();

    private Resource resource;

    private BufferedReader reader;

    private String encoding = DEFAULT_CHARSET;

    private LineMapper<T> lineMapper;

    private int lineCount = 0;

    private boolean noInput = false;

    private Pattern pattern = null;

    private boolean strict = true;

    private BufferedReaderFactory bufferedReaderFactory = new DefaultBufferedReaderFactory();

    public RegexFileItemReader() {
        setName(ClassUtils.getShortName(RegexFileItemReader.class));
    }

    /**
     * In strict mode the reader will throw an exception on
     * {@link #open(org.springframework.batch.item.ExecutionContext)} if the input resource does not exist.
     * @param strict <code>true</code> by default
     */
    public void setStrict(boolean strict) {
        this.strict = strict;
    }

    /**
     * Setter for line mapper. This property is required to be set.
     * @param lineMapper maps line to item
     */
    public void setLineMapper(LineMapper<T> lineMapper) {
        this.lineMapper = lineMapper;
    }

    /**
     * Setter for the encoding for this input source. Default value is {@link #DEFAULT_CHARSET}.
     * 
     * @param encoding a properties object which possibly contains the encoding for this input file;
     */
    public void setEncoding(String encoding) {
        this.encoding = encoding;
    }

    /**
     * Factory for the {@link BufferedReader} that will be used to extract lines from the file. The default is fine for
     * plain text files, but this is a useful strategy for binary files where the standard BufferedReaader from java.io
     * is limiting.
     * 
     * @param bufferedReaderFactory the bufferedReaderFactory to set
     */
    public void setBufferedReaderFactory(BufferedReaderFactory bufferedReaderFactory) {
        this.bufferedReaderFactory = bufferedReaderFactory;
    }

    /**
     * RegExp pattern used to slice file into items 
     * 
     * @param bufferedReaderFactory the bufferedReaderFactory to set
     */
    public void setPattern(Pattern pattern) {
        this.pattern = pattern;
    }

    /**
     * Public setter for the input resource.
     */
    @Override
    public void setResource(Resource resource) {
        this.resource = resource;
    }

    @Override
    public void afterPropertiesSet() throws Exception {
        Assert.notNull(lineMapper, "LineMapper is required");
        Assert.notNull(pattern, "Pattern is required");
    }

    @Override
    protected T doRead() throws Exception {
        if (noInput) {
            return null;
        }

        String line = readLine();

        if (line == null) {
            return null;
        } else {
            try {
                return lineMapper.mapLine(line, lineCount);
            } catch (Exception ex) {
                throw new FlatFileParseException("Parsing error at line: " + lineCount + " in resource=["
                        + resource.getDescription() + "], input=[" + line + "]", ex, line, lineCount);
            }
        }
    }

    private int bufferSize = 2048;
    private int currentBufferSize = 2048;
    private int offsetBuffer = 0;
    private int offsetLastStart = 0;
    private int offsetLastEnd = 0;
    private char[] buffer = new char[2 * bufferSize];
    private boolean readBufferNeeded = true;

    private boolean readBuffer() throws IOException {

        if (!readBufferNeeded) {
            return true;
        }

        readBufferNeeded = false;
        int read = this.reader.read(buffer, offsetBuffer, bufferSize);
        if (read == -1) {
            return false;
        }

        return true;
    }

    private void copy(char[] b, int from, int len, int to) {

        for (int idx = from; idx < from + len; idx++) {
            b[to++] = b[idx];
        }

    }

    /**
     * @return next line (skip comments).getCurrentResource
     */
    private String readLine() {

        if (reader == null) {
            throw new ReaderNotOpenException("Reader must be open before it can be read.");
        }

        String line = null;

        try {

            while (1 == 1) {

                boolean readed = readBuffer();
                if (!readed) {
                    return null;
                }

                String bufferString = String.valueOf(buffer);
                Matcher matcher = pattern.matcher(bufferString);
                boolean found = matcher.find(offsetLastEnd);
                if (found) {
                    offsetLastStart = matcher.start();
                    offsetLastEnd = matcher.end();
                    line = bufferString.substring(offsetLastStart, offsetLastEnd);
                    lineCount++;
                    break;

                } else {

                    readBufferNeeded = true;

                    if (offsetLastEnd != 0) {
                        int restLen = currentBufferSize - offsetLastEnd;
                        // move to beggining of buffer chunk after last item found
                        copy(buffer, offsetLastEnd, currentBufferSize - offsetLastEnd, 0);
                        Arrays.fill(buffer, restLen, buffer.length, (char) 0);

                        // new buffer's length will contain this chunk
                        currentBufferSize = bufferSize + restLen;
                        offsetBuffer = restLen;
                        offsetLastEnd = 0;

                    } else {
                        // if can't find any item in current buffer then read next block
                        // and add to first one (remove previous block if needed)
                        offsetBuffer = bufferSize;
                        if (currentBufferSize == bufferSize) {

                            // first block stays, read next one after first
                            // blocks: 1 -> 12
                            currentBufferSize = 2 * bufferSize;

                        } else {
                            // copy second block to beginning, make room to next one, forget first
                            // blocks: 12 -> 23
                            copy(buffer, bufferSize, bufferSize, 0);
                            offsetBuffer = bufferSize;
                            currentBufferSize = 2 * bufferSize;
                        }
                    }
                }
            }

            if (line == null) {
                return null;
            }

        } catch (IOException e) {
            // Prevent IOException from recurring indefinitely
            // if client keeps catching and re-calling
            noInput = true;
            throw new NonTransientFlatFileException("Unable to read from resource: [" + resource + "]", e, line,
                    lineCount);
        }
        return line;
    }

    @Override
    protected void doOpen() throws Exception {
        Assert.notNull(resource, "Input resource must be set");

        noInput = true;
        if (!resource.exists()) {
            if (strict) {
                throw new IllegalStateException(
                        "Input resource must exist (reader is in 'strict' mode): " + resource);
            }
            logger.warn("Input resource does not exist " + resource.getDescription());
            return;
        }

        if (!resource.isReadable()) {
            if (strict) {
                throw new IllegalStateException(
                        "Input resource must be readable (reader is in 'strict' mode): " + resource);
            }
            logger.warn("Input resource is not readable " + resource.getDescription());
            return;
        }

        reader = bufferedReaderFactory.create(resource, encoding);

        noInput = false;
    }

    @Override
    protected void doClose() throws Exception {
        lineCount = 0;
        offsetBuffer = 0;
        offsetLastStart = 0;
        offsetLastEnd = 0;
        readBufferNeeded = true;

        if (reader != null) {
            reader.close();
        }
    }

}