Example usage for org.apache.commons.csv CSVFormat getIgnoreEmptyLines

List of usage examples for org.apache.commons.csv CSVFormat getIgnoreEmptyLines

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVFormat getIgnoreEmptyLines.

Prototype

public boolean getIgnoreEmptyLines() 

Source Link

Document

Specifies whether empty lines between records are ignored when parsing input.

Usage

From source file:com.datascience.cascading.scheme.CsvScheme.java

/**
 * Configures the Hadoop configuration for the given CSV format.
 *///from  ww w.j av  a 2 s .co  m
private void configureReaderFormat(CSVFormat format, Configuration conf) {
    conf.set(CsvOutputFormat.CHARSET, charset);

    // If the format header was explicitly provided by the user then forward it to the record reader. If skipHeaderRecord
    // is enabled then that indicates that field names were detected. We need to ensure that headers are defined in order
    // for the CSV reader to skip the header record.
    conf.setBoolean(CsvInputFormat.STRICT_MODE, strict);
    if (format.getHeader() != null) {
        conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, format.getHeader());
    } else if (format.getSkipHeaderRecord()) {
        Fields fields = getSourceFields();
        String[] columns = new String[fields.size()];
        for (int i = 0; i < fields.size(); i++) {
            columns[i] = fields.get(i).toString();
        }
        conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, columns);
    }

    conf.setBoolean(CsvInputFormat.CSV_READER_SKIP_HEADER, format.getSkipHeaderRecord());
    conf.set(CsvInputFormat.CSV_READER_DELIMITER, String.valueOf(format.getDelimiter()));

    if (format.getRecordSeparator() != null) {
        conf.set(CsvInputFormat.CSV_READER_RECORD_SEPARATOR, format.getRecordSeparator());
    }

    if (format.getQuoteCharacter() != null) {
        conf.set(CsvInputFormat.CSV_READER_QUOTE_CHARACTER, String.valueOf(format.getQuoteCharacter()));
    }

    if (format.getQuoteMode() != null) {
        conf.set(CsvInputFormat.CSV_READER_QUOTE_MODE, format.getQuoteMode().name());
    }

    if (format.getEscapeCharacter() != null) {
        conf.set(CsvInputFormat.CSV_READER_ESCAPE_CHARACTER, String.valueOf(format.getEscapeCharacter()));
    }

    conf.setBoolean(CsvInputFormat.CSV_READER_IGNORE_EMPTY_LINES, format.getIgnoreEmptyLines());
    conf.setBoolean(CsvInputFormat.CSV_READER_IGNORE_SURROUNDING_SPACES, format.getIgnoreSurroundingSpaces());

    if (format.getNullString() != null) {
        conf.set(CsvInputFormat.CSV_READER_NULL_STRING, format.getNullString());
    }
}

From source file:com.datascience.cascading.scheme.CsvScheme.java

/**
 * Configures the Hadoop configuration for the given CSV format.
 *//*from  w w w.  jav a2s  .co m*/
private void configureWriterFormat(CSVFormat format, Configuration conf) {
    conf.set(CsvOutputFormat.CHARSET, charset);

    // Apache CSV doesn't really handle the skipHeaderRecord flag correctly when writing output. If the skip flag is set
    // and headers are configured, headers will always be written to the output. Since we always have headers and/or
    // fields configured, we need to use the skipHeaderRecord flag to determine whether headers should be written.
    if (!format.getSkipHeaderRecord()) {
        if (format.getHeader() != null && format.getHeader().length != 0) {
            conf.setStrings(CsvOutputFormat.CSV_WRITER_COLUMNS, format.getHeader());
        } else {
            Fields fields = getSinkFields();
            String[] columns = new String[fields.size()];
            for (int i = 0; i < fields.size(); i++) {
                columns[i] = fields.get(i).toString();
            }
            conf.setStrings(CsvOutputFormat.CSV_WRITER_COLUMNS, columns);
        }
    }

    conf.setBoolean(CsvOutputFormat.CSV_WRITER_SKIP_HEADER, format.getSkipHeaderRecord());
    conf.set(CsvOutputFormat.CSV_WRITER_DELIMITER, String.valueOf(format.getDelimiter()));

    if (format.getRecordSeparator() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_RECORD_SEPARATOR, format.getRecordSeparator());
    }

    if (format.getQuoteCharacter() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_QUOTE_CHARACTER, String.valueOf(format.getQuoteCharacter()));
    }

    if (format.getQuoteMode() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_QUOTE_MODE, format.getQuoteMode().name());
    }

    if (format.getEscapeCharacter() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_ESCAPE_CHARACTER, String.valueOf(format.getEscapeCharacter()));
    }

    conf.setBoolean(CsvOutputFormat.CSV_WRITER_IGNORE_EMPTY_LINES, format.getIgnoreEmptyLines());
    conf.setBoolean(CsvOutputFormat.CSV_WRITER_IGNORE_SURROUNDING_SPACES, format.getIgnoreSurroundingSpaces());

    if (format.getNullString() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_NULL_STRING, format.getNullString());
    }
}

From source file:org.apache.nifi.csv.JacksonCSVRecordReader.java

public JacksonCSVRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema,
        final CSVFormat csvFormat, final boolean hasHeader, final boolean ignoreHeader, final String dateFormat,
        final String timeFormat, final String timestampFormat, final String encoding) throws IOException {
    super(logger, schema, hasHeader, ignoreHeader, dateFormat, timeFormat, timestampFormat);

    final Reader reader = new InputStreamReader(new BOMInputStream(in));

    CsvSchema.Builder csvSchemaBuilder = CsvSchema.builder().setColumnSeparator(csvFormat.getDelimiter())
            .setLineSeparator(csvFormat.getRecordSeparator())
            // Can only use comments in Jackson CSV if the correct marker is set
            .setAllowComments("#".equals(CharUtils.toString(csvFormat.getCommentMarker())))
            // The call to setUseHeader(false) in all code paths is due to the way Jackson does data binding/mapping. Missing or extra columns may not
            // be handled correctly when using the header for mapping.
            .setUseHeader(false);/*from   w  w w .  j  a  v a 2s . c om*/

    csvSchemaBuilder = (csvFormat.getQuoteCharacter() == null) ? csvSchemaBuilder
            : csvSchemaBuilder.setQuoteChar(csvFormat.getQuoteCharacter());
    csvSchemaBuilder = (csvFormat.getEscapeCharacter() == null) ? csvSchemaBuilder
            : csvSchemaBuilder.setEscapeChar(csvFormat.getEscapeCharacter());

    if (hasHeader) {
        if (ignoreHeader) {
            csvSchemaBuilder = csvSchemaBuilder.setSkipFirstDataRow(true);
        }
    }

    CsvSchema csvSchema = csvSchemaBuilder.build();

    // Add remaining config options to the mapper
    List<CsvParser.Feature> features = new ArrayList<>();
    features.add(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS);
    if (csvFormat.getIgnoreEmptyLines()) {
        features.add(CsvParser.Feature.SKIP_EMPTY_LINES);
    }
    if (csvFormat.getTrim()) {
        features.add(CsvParser.Feature.TRIM_SPACES);
    }

    ObjectReader objReader = mapper.readerFor(String[].class).with(csvSchema)
            .withFeatures(features.toArray(new CsvParser.Feature[features.size()]));

    recordStream = objReader.readValues(reader);
}