org.apache.crunch.io.text.csv.CSVRecordIterator.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.crunch.io.text.csv.CSVRecordIterator.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.crunch.io.text.csv;

import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;

import org.apache.hadoop.io.Text;

import com.google.common.io.Closeables;

/**
 * An {@code Iterator} for an internally created {@code CSVLineReader}
 */
public class CSVRecordIterator implements Iterator<String>, Closeable {
    private final CSVLineReader csvLineReader;
    private InputStream inputStream;
    private String currentLine;

    /**
     * Creates an instance of {@code CSVRecordIterator} with default configuration
     * 
     * @param inputStream
     *          The {@code InputStream} for the CSV file to iterate over
     * @throws UnsupportedEncodingException
     */
    public CSVRecordIterator(final InputStream inputStream) throws UnsupportedEncodingException {
        this(inputStream, CSVLineReader.DEFAULT_BUFFER_SIZE, CSVLineReader.DEFAULT_INPUT_FILE_ENCODING,
                CSVLineReader.DEFAULT_QUOTE_CHARACTER, CSVLineReader.DEFAULT_QUOTE_CHARACTER,
                CSVLineReader.DEFAULT_ESCAPE_CHARACTER, CSVLineReader.DEFAULT_MAXIMUM_RECORD_SIZE);
    }

    /**
     * Creates an instance of {@code CSVRecordIterator} with custom configuration
     * 
     * @param inputStream
     *          The {@code InputStream} for the CSV file to iterate over
     * @param bufferSize
     *          The size of the buffer used when reading the input stream
     * @param inputFileEncoding
     *          the encoding for the input file
     * @param openQuoteChar
     *          the character to use to open quote blocks
     * @param closeQuoteChar
     *          the character to use to close quote blocks
     * @param escapeChar
     *          the character to use for escaping control characters and quotes
     * @param maximumRecordSize
     *          The maximum acceptable size of one CSV record. Beyond this limit,
     *          {@code CSVLineReader} will stop parsing and an exception will be
     *          thrown.
     * @throws UnsupportedEncodingException
     */
    public CSVRecordIterator(final InputStream inputStream, final int bufferSize, final String inputFileEncoding,
            final char openQuoteChar, final char closeQuoteChar, final char escapeChar, final int maximumRecordSize)
            throws UnsupportedEncodingException {
        csvLineReader = new CSVLineReader(inputStream, bufferSize, inputFileEncoding, openQuoteChar, closeQuoteChar,
                escapeChar, maximumRecordSize);
        this.inputStream = inputStream;
        incrementValue();
    }

    @Override
    public boolean hasNext() {
        if (!(currentLine == null)) {
            return true;
        }
        Closeables.closeQuietly(this);
        return false;
    }

    @Override
    public String next() {
        final String result = currentLine;
        incrementValue();
        return result;
    }

    @Override
    public void remove() {
        incrementValue();
    }

    private void incrementValue() {
        final Text tempText = new Text();
        try {
            csvLineReader.readCSVLine(tempText);
        } catch (final IOException e) {
            throw new RuntimeException("A problem occurred accessing the underlying CSV file stream.", e);
        }
        final String tempTextAsString = tempText.toString();
        if ("".equals(tempTextAsString)) {
            currentLine = null;
        } else {
            currentLine = tempTextAsString;
        }
    }

    @Override
    public void close() throws IOException {
        if (inputStream != null) {
            inputStream.close();
            inputStream = null;
        }
    }
}