org.jberet.support.io.JacksonCsvItemReader.java Source code

Java tutorial

Introduction

Here is the source code for org.jberet.support.io.JacksonCsvItemReader.java

Source

/*
 * Copyright (c) 2015 Red Hat, Inc. and/or its affiliates.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 * Cheng Fang - Initial API and implementation
 */

package org.jberet.support.io;

import java.io.Serializable;
import java.util.List;
import java.util.Map;
import javax.batch.api.BatchProperty;
import javax.batch.api.chunk.ItemReader;
import javax.enterprise.context.Dependent;
import javax.inject.Inject;
import javax.inject.Named;

import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.dataformat.csv.CsvParser;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;
import org.jberet.support._private.SupportLogger;
import org.jberet.support._private.SupportMessages;

/**
 * An implementation of {@code javax.batch.api.chunk.ItemReader} that reads data items from CSV files using jackson-dataformat-csv.
 *
 * @see CsvItemReader
 * @see JacksonCsvItemWriter
 * @see JacksonCsvItemReaderWriterBase
 * @since 1.2.0
 */
@Named
@Dependent
public class JacksonCsvItemReader extends JacksonCsvItemReaderWriterBase implements ItemReader {

    /**
     * Specifies the start position (a positive integer starting from 1) to read the data. If reading from the beginning
     * of the input CSV resource, there is no need to specify this property.
     */
    @Inject
    @BatchProperty
    protected int start;

    /**
     * Specify the end position in the data set (inclusive). Optional property, and defaults to {@code Integer.MAX_VALUE}.
     * If reading till the end of the input CSV resource, there is no need to specify this property.
     */
    @Inject
    @BatchProperty
    protected int end;

    /**
     * Whether the first data line (either first line of the document, if useHeader=false, or second, if useHeader=true)
     * should be completely ignored by parser. Needed to support CSV-like file formats that include additional
     * non-data content before real data begins (specifically some database dumps do this)
     * <p/>
     * Optional property, valid values are "true" and "false" and defaults to "false".
     *
     * @see "com.fasterxml.jackson.dataformat.csv.CsvSchema"
     */
    @Inject
    @BatchProperty
    protected String skipFirstDataRow;

    /**
     * Character, if any, used to escape values. Most commonly defined as backslash ('\'). Only used by parser;
     * generator only uses quoting, including doubling up of quotes to indicate quote char itself.
     * Optional protected and defaults to null.
     *
     * @see "com.fasterxml.jackson.dataformat.csv.CsvSchema"
     */
    @Inject
    @BatchProperty
    protected String escapeChar;

    /**
     * A comma-separated list of key-value pairs that specify {@code com.fasterxml.jackson.core.JsonParser} features.
     * Optional property and defaults to null. For example,
     * <p/>
     * <pre>
     * ALLOW_COMMENTS=true, ALLOW_YAML_COMMENTS=true, ALLOW_NUMERIC_LEADING_ZEROS=true, STRICT_DUPLICATE_DETECTION=true
     * </pre>
     *
     * @see "com.fasterxml.jackson.core.JsonParser.Feature"
     */
    @Inject
    @BatchProperty
    protected Map<String, String> jsonParserFeatures;

    /**
     * A comma-separated list of key-value pairs that specify {@code com.fasterxml.jackson.dataformat.csv.CsvParser.Feature}.
     * Optional property and defaults to null. For example,
     * <p/>
     * <pre>
     * TRIM_SPACES=false, WRAP_AS_ARRAY=false
     * </pre>
     *
     * @see "com.fasterxml.jackson.dataformat.csv.CsvParser.Feature"
     */
    @Inject
    @BatchProperty
    protected Map<String, String> csvParserFeatures;

    /**
     * A comma-separated list of fully-qualified names of classes that implement
     * {@code com.fasterxml.jackson.databind.deser.DeserializationProblemHandler}, which can be registered to get
     * called when a potentially recoverable problem is encountered during deserialization process.
     * Handlers can try to resolve the problem, throw an exception or do nothing. Optional property and defaults to null.
     * For example,
     * <p/>
     * <pre>
     * org.jberet.support.io.JsonItemReaderTest$UnknownHandler, org.jberet.support.io.JsonItemReaderTest$UnknownHandler2
     * </pre>
     *
     * @see "com.fasterxml.jackson.databind.deser.DeserializationProblemHandler"
     * @see "com.fasterxml.jackson.databind.ObjectMapper#addHandler(com.fasterxml.jackson.databind.deser.DeserializationProblemHandler)"
     * @see MappingJsonFactoryObjectFactory#configureDeserializationProblemHandlers(com.fasterxml.jackson.databind.ObjectMapper, java.lang.String, java.lang.ClassLoader)
     */
    @Inject
    @BatchProperty
    protected String deserializationProblemHandlers;

    /**
     * Fully-qualified name of a class that extends {@code com.fasterxml.jackson.core.io.InputDecorator}, which can be
     * used to decorate input sources. Typical use is to use a filter abstraction (filtered stream, reader)
     * around original input source, and apply additional processing during read operations. Optional property and
     * defaults to null. For example,
     * <p/>
     * <pre>
     * org.jberet.support.io.JsonItemReaderTest$NoopInputDecorator
     * </pre>
     *
     * @see "com.fasterxml.jackson.core.JsonFactory#setInputDecorator(com.fasterxml.jackson.core.io.InputDecorator)"
     * @see "com.fasterxml.jackson.core.io.InputDecorator"
     */
    @Inject
    @BatchProperty
    protected Class inputDecorator;

    private CsvParser csvParser;
    private int rowNumber;
    private boolean rawAccess;

    @Override
    public void open(final Serializable checkpoint) throws Exception {
        if (end == 0) {
            end = Integer.MAX_VALUE;
        }
        if (checkpoint != null) {
            start = (Integer) checkpoint;
        }
        if (start > end) {
            throw SupportMessages.MESSAGES.invalidStartPosition((Integer) checkpoint, start, end);
        }
        init();
        csvParser = (CsvParser) JsonItemReader.configureJsonParser(this, inputDecorator,
                deserializationProblemHandlers, jsonParserFeatures);

        if (csvParserFeatures != null) {
            for (final Map.Entry<String, String> e : csvParserFeatures.entrySet()) {
                final String key = e.getKey();
                final String value = e.getValue();
                final CsvParser.Feature feature;
                try {
                    feature = CsvParser.Feature.valueOf(key);
                } catch (final Exception e1) {
                    throw SupportMessages.MESSAGES.unrecognizedReaderWriterProperty(key, value);
                }
                if ("true".equals(value)) {
                    if (!feature.enabledByDefault()) {
                        csvParser.configure(feature, true);
                    }
                } else if ("false".equals(value)) {
                    if (feature.enabledByDefault()) {
                        csvParser.configure(feature, false);
                    }
                } else {
                    throw SupportMessages.MESSAGES.invalidReaderWriterProperty(null, value, key);
                }
            }
        }

        rawAccess = beanType == List.class || beanType == String[].class;
        if (!rawAccess) {
            CsvSchema schema;
            if (columns != null) {
                schema = buildCsvSchema(null);
            } else {
                //columns not defined, but beanType is either Map.class or Pojo.class or JsonNode.class
                if (useHeader) {
                    schema = buildCsvSchema(CsvSchema.emptySchema());
                } else {
                    throw SupportMessages.MESSAGES.invalidReaderWriterProperty(null, columns, "columns");
                }
            }

            if (escapeChar != null) {
                schema = schema.withEscapeChar(escapeChar.charAt(0));
            }
            if (skipFirstDataRow != null) {
                schema = schema.withSkipFirstDataRow(Boolean.parseBoolean(skipFirstDataRow.trim()));
            }
            csvParser.setSchema(schema);
        }
    }

    @Override
    public void close() throws Exception {
        if (csvParser != null) {
            SupportLogger.LOGGER.closingResource(resource, this.getClass());
            if (deserializationProblemHandlers != null) {
                objectMapper.clearProblemHandlers();
            }
            csvParser.close();
            csvParser = null;
        }
    }

    @Override
    public Object readItem() throws Exception {
        if (rowNumber >= end) {
            return null;
        }

        JsonToken token;
        final Object readValue;

        if (!rawAccess) {
            do {
                token = csvParser.nextToken();
                if (token == null) {
                    return null;
                }
                if (token == JsonToken.START_OBJECT) {
                    if (++rowNumber >= start) {
                        break;
                    }
                }
            } while (true);

            readValue = objectMapper.readValue(csvParser, beanType);
            if (!skipBeanValidation) {
                ItemReaderWriterBase.validate(readValue);
            }
        } else {
            do {
                token = csvParser.nextToken();
                if (token == null) {
                    return null;
                }
                if (token == JsonToken.START_ARRAY) {
                    if (++rowNumber >= start) {
                        break;
                    }
                }
            } while (true);

            readValue = objectMapper.readValue(csvParser, beanType);
        }
        return readValue;
    }

    /**
     * Gets the current row number in the {@code ResultSet} as the checkpoint info.
     *
     * @return the current row number in the {@code ResultSet}
     * @throws Exception any exception raised
     */
    @Override
    public Serializable checkpointInfo() throws Exception {
        return rowNumber;
    }
}