List of usage examples for org.apache.commons.csv CSVFormat getSkipHeaderRecord
public boolean getSkipHeaderRecord()
From source file:com.streamsets.pipeline.lib.csv.CsvParser.java
@SuppressWarnings("unchecked") public CsvParser(CountingReader reader, CSVFormat format, int maxObjectLen, long initialPosition, int skipStartLines) throws IOException { Utils.checkNotNull(reader, "reader"); Utils.checkNotNull(reader.getPos() == 0, "reader must be in position zero, the CsvParser will fast-forward to the initialPosition"); Utils.checkNotNull(format, "format"); Utils.checkArgument(initialPosition >= 0, "initialPosition must be greater or equal than zero"); Utils.checkArgument(skipStartLines >= 0, "skipStartLines must be greater or equal than zero"); this.reader = reader; currentPos = initialPosition;// ww w . j av a 2 s .com this.maxObjectLen = maxObjectLen; if (initialPosition == 0) { if (skipStartLines > 0) { skipLinesPosCorrection = skipLines(reader, skipStartLines); currentPos = skipLinesPosCorrection; } if (format.getSkipHeaderRecord()) { format = format.withSkipHeaderRecord(false); parser = new CSVParser(reader, format, 0, 0); headers = read(); } else { parser = new CSVParser(reader, format, 0, 0); headers = null; } } else { if (format.getSkipHeaderRecord()) { format = format.withSkipHeaderRecord(false); parser = new CSVParser(reader, format, 0, 0); headers = read(); while (getReaderPosition() < initialPosition && read() != null) { } if (getReaderPosition() != initialPosition) { throw new IOException( Utils.format("Could not position reader at position '{}', got '{}' instead", initialPosition, getReaderPosition())); } } else { IOUtils.skipFully(reader, initialPosition); parser = new CSVParser(reader, format, initialPosition, 0); headers = null; } } }
From source file:com.datascience.cascading.scheme.CsvScheme.java
/** * Configures the Hadoop configuration for the given CSV format. *//* w w w .j a v a 2 s .c o m*/ private void configureWriterFormat(CSVFormat format, Configuration conf) { conf.set(CsvOutputFormat.CHARSET, charset); // Apache CSV doesn't really handle the skipHeaderRecord flag correctly when writing output. If the skip flag is set // and headers are configured, headers will always be written to the output. Since we always have headers and/or // fields configured, we need to use the skipHeaderRecord flag to determine whether headers should be written. if (!format.getSkipHeaderRecord()) { if (format.getHeader() != null && format.getHeader().length != 0) { conf.setStrings(CsvOutputFormat.CSV_WRITER_COLUMNS, format.getHeader()); } else { Fields fields = getSinkFields(); String[] columns = new String[fields.size()]; for (int i = 0; i < fields.size(); i++) { columns[i] = fields.get(i).toString(); } conf.setStrings(CsvOutputFormat.CSV_WRITER_COLUMNS, columns); } } conf.setBoolean(CsvOutputFormat.CSV_WRITER_SKIP_HEADER, format.getSkipHeaderRecord()); conf.set(CsvOutputFormat.CSV_WRITER_DELIMITER, String.valueOf(format.getDelimiter())); if (format.getRecordSeparator() != null) { conf.set(CsvOutputFormat.CSV_WRITER_RECORD_SEPARATOR, format.getRecordSeparator()); } if (format.getQuoteCharacter() != null) { conf.set(CsvOutputFormat.CSV_WRITER_QUOTE_CHARACTER, String.valueOf(format.getQuoteCharacter())); } if (format.getQuoteMode() != null) { conf.set(CsvOutputFormat.CSV_WRITER_QUOTE_MODE, format.getQuoteMode().name()); } if (format.getEscapeCharacter() != null) { conf.set(CsvOutputFormat.CSV_WRITER_ESCAPE_CHARACTER, String.valueOf(format.getEscapeCharacter())); } conf.setBoolean(CsvOutputFormat.CSV_WRITER_IGNORE_EMPTY_LINES, format.getIgnoreEmptyLines()); conf.setBoolean(CsvOutputFormat.CSV_WRITER_IGNORE_SURROUNDING_SPACES, format.getIgnoreSurroundingSpaces()); if (format.getNullString() != null) { conf.set(CsvOutputFormat.CSV_WRITER_NULL_STRING, format.getNullString()); } }
From source file:com.datascience.cascading.scheme.CsvScheme.java
/** * Configures the Hadoop configuration for the given CSV format. *//*from w w w .j av a 2 s. c o m*/ private void configureReaderFormat(CSVFormat format, Configuration conf) { conf.set(CsvOutputFormat.CHARSET, charset); // If the format header was explicitly provided by the user then forward it to the record reader. If skipHeaderRecord // is enabled then that indicates that field names were detected. We need to ensure that headers are defined in order // for the CSV reader to skip the header record. conf.setBoolean(CsvInputFormat.STRICT_MODE, strict); if (format.getHeader() != null) { conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, format.getHeader()); } else if (format.getSkipHeaderRecord()) { Fields fields = getSourceFields(); String[] columns = new String[fields.size()]; for (int i = 0; i < fields.size(); i++) { columns[i] = fields.get(i).toString(); } conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, columns); } conf.setBoolean(CsvInputFormat.CSV_READER_SKIP_HEADER, format.getSkipHeaderRecord()); conf.set(CsvInputFormat.CSV_READER_DELIMITER, String.valueOf(format.getDelimiter())); if (format.getRecordSeparator() != null) { conf.set(CsvInputFormat.CSV_READER_RECORD_SEPARATOR, format.getRecordSeparator()); } if (format.getQuoteCharacter() != null) { conf.set(CsvInputFormat.CSV_READER_QUOTE_CHARACTER, String.valueOf(format.getQuoteCharacter())); } if (format.getQuoteMode() != null) { conf.set(CsvInputFormat.CSV_READER_QUOTE_MODE, format.getQuoteMode().name()); } if (format.getEscapeCharacter() != null) { conf.set(CsvInputFormat.CSV_READER_ESCAPE_CHARACTER, String.valueOf(format.getEscapeCharacter())); } conf.setBoolean(CsvInputFormat.CSV_READER_IGNORE_EMPTY_LINES, format.getIgnoreEmptyLines()); conf.setBoolean(CsvInputFormat.CSV_READER_IGNORE_SURROUNDING_SPACES, format.getIgnoreSurroundingSpaces()); if (format.getNullString() != null) { conf.set(CsvInputFormat.CSV_READER_NULL_STRING, format.getNullString()); } }
From source file:org.apache.camel.dataformat.csv.CsvMarshaller.java
/** * Creates a new instance./*w ww . ja va2 s .co m*/ * * @param format CSV format * @param dataFormat Camel CSV data format * @return New instance */ public static CsvMarshaller create(CSVFormat format, CsvDataFormat dataFormat) { // If we don't want the header record, clear it if (format.getSkipHeaderRecord()) { format = format.withHeader((String[]) null); } String[] fixedColumns = dataFormat.getHeader(); if (fixedColumns != null && fixedColumns.length > 0) { return new FixedColumnsMarshaller(format, fixedColumns); } return new DynamicColumnsMarshaller(format); }
From source file:org.apache.camel.dataformat.csv.CsvUnmarshaller.java
public static CsvUnmarshaller create(CSVFormat format, CsvDataFormat dataFormat) { // If we want to use maps, thus the header must be either fixed or automatic if (dataFormat.isUseMaps() && format.getHeader() == null) { format = format.withHeader();/*from w w w. ja va 2s. co m*/ } // If we want to skip the header record it must automatic otherwise it's not working if (format.getSkipHeaderRecord() && format.getHeader() == null) { format = format.withHeader(); } if (dataFormat.isLazyLoad()) { return new StreamCsvUnmarshaller(format, dataFormat); } return new BulkCsvUnmarshaller(format, dataFormat); }