List of usage examples for org.apache.commons.csv CSVFormat getQuoteCharacter
public Character getQuoteCharacter()
From source file:com.github.jferard.pgloaderutils.sniffer.csv.HeaderRowAnalyzerTest.java
@Test public final void test2() throws IOException { CSVFormat format = this.h.analyze(Arrays.asList("a'b", "b"), "'a&'b';b;c"); Assert.assertEquals(';', format.getDelimiter()); Assert.assertEquals('&', (char) format.getEscapeCharacter()); Assert.assertEquals('\'', (char) format.getQuoteCharacter()); }
From source file:com.thinkbiganalytics.discovery.parsers.csv.CSVFileSchemaParser.java
private CSVFormat createCSVFormat(String sampleData) throws IOException { CSVFormat format; if (autoDetect) { CSVAutoDetect autoDetect = new CSVAutoDetect(); format = autoDetect.detectCSVFormat(sampleData, this.headerRow, this.separatorChar); this.separatorChar = Character.toString(format.getDelimiter()); this.quoteChar = Character.toString(format.getQuoteCharacter()); } else {// w w w.ja va 2s. c o m format = CSVFormat.DEFAULT.withAllowMissingColumnNames(); if (StringUtils.isNotEmpty(separatorChar)) { format = format.withDelimiter(toChar(separatorChar).charAt(0)); } if (StringUtils.isNotEmpty(escapeChar)) { format = format.withEscape(toChar(escapeChar).charAt(0)); } if (StringUtils.isNotEmpty(quoteChar)) { format = format.withQuoteMode(QuoteMode.MINIMAL).withQuote(toChar(quoteChar).charAt(0)); } } return format; }
From source file:com.datascience.cascading.scheme.CsvScheme.java
/** * Configures the Hadoop configuration for the given CSV format. *///w ww .java 2s . c o m private void configureReaderFormat(CSVFormat format, Configuration conf) { conf.set(CsvOutputFormat.CHARSET, charset); // If the format header was explicitly provided by the user then forward it to the record reader. If skipHeaderRecord // is enabled then that indicates that field names were detected. We need to ensure that headers are defined in order // for the CSV reader to skip the header record. conf.setBoolean(CsvInputFormat.STRICT_MODE, strict); if (format.getHeader() != null) { conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, format.getHeader()); } else if (format.getSkipHeaderRecord()) { Fields fields = getSourceFields(); String[] columns = new String[fields.size()]; for (int i = 0; i < fields.size(); i++) { columns[i] = fields.get(i).toString(); } conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, columns); } conf.setBoolean(CsvInputFormat.CSV_READER_SKIP_HEADER, format.getSkipHeaderRecord()); conf.set(CsvInputFormat.CSV_READER_DELIMITER, String.valueOf(format.getDelimiter())); if (format.getRecordSeparator() != null) { conf.set(CsvInputFormat.CSV_READER_RECORD_SEPARATOR, format.getRecordSeparator()); } if (format.getQuoteCharacter() != null) { conf.set(CsvInputFormat.CSV_READER_QUOTE_CHARACTER, String.valueOf(format.getQuoteCharacter())); } if (format.getQuoteMode() != null) { conf.set(CsvInputFormat.CSV_READER_QUOTE_MODE, format.getQuoteMode().name()); } if (format.getEscapeCharacter() != null) { conf.set(CsvInputFormat.CSV_READER_ESCAPE_CHARACTER, String.valueOf(format.getEscapeCharacter())); } conf.setBoolean(CsvInputFormat.CSV_READER_IGNORE_EMPTY_LINES, format.getIgnoreEmptyLines()); conf.setBoolean(CsvInputFormat.CSV_READER_IGNORE_SURROUNDING_SPACES, format.getIgnoreSurroundingSpaces()); if (format.getNullString() != null) { conf.set(CsvInputFormat.CSV_READER_NULL_STRING, format.getNullString()); } }
From source file:com.datascience.cascading.scheme.CsvScheme.java
/** * Configures the Hadoop configuration for the given CSV format. *///from w ww. j a va 2 s . c o m private void configureWriterFormat(CSVFormat format, Configuration conf) { conf.set(CsvOutputFormat.CHARSET, charset); // Apache CSV doesn't really handle the skipHeaderRecord flag correctly when writing output. If the skip flag is set // and headers are configured, headers will always be written to the output. Since we always have headers and/or // fields configured, we need to use the skipHeaderRecord flag to determine whether headers should be written. if (!format.getSkipHeaderRecord()) { if (format.getHeader() != null && format.getHeader().length != 0) { conf.setStrings(CsvOutputFormat.CSV_WRITER_COLUMNS, format.getHeader()); } else { Fields fields = getSinkFields(); String[] columns = new String[fields.size()]; for (int i = 0; i < fields.size(); i++) { columns[i] = fields.get(i).toString(); } conf.setStrings(CsvOutputFormat.CSV_WRITER_COLUMNS, columns); } } conf.setBoolean(CsvOutputFormat.CSV_WRITER_SKIP_HEADER, format.getSkipHeaderRecord()); conf.set(CsvOutputFormat.CSV_WRITER_DELIMITER, String.valueOf(format.getDelimiter())); if (format.getRecordSeparator() != null) { conf.set(CsvOutputFormat.CSV_WRITER_RECORD_SEPARATOR, format.getRecordSeparator()); } if (format.getQuoteCharacter() != null) { conf.set(CsvOutputFormat.CSV_WRITER_QUOTE_CHARACTER, String.valueOf(format.getQuoteCharacter())); } if (format.getQuoteMode() != null) { conf.set(CsvOutputFormat.CSV_WRITER_QUOTE_MODE, format.getQuoteMode().name()); } if (format.getEscapeCharacter() != null) { conf.set(CsvOutputFormat.CSV_WRITER_ESCAPE_CHARACTER, String.valueOf(format.getEscapeCharacter())); } conf.setBoolean(CsvOutputFormat.CSV_WRITER_IGNORE_EMPTY_LINES, format.getIgnoreEmptyLines()); conf.setBoolean(CsvOutputFormat.CSV_WRITER_IGNORE_SURROUNDING_SPACES, format.getIgnoreSurroundingSpaces()); if (format.getNullString() != null) { conf.set(CsvOutputFormat.CSV_WRITER_NULL_STRING, format.getNullString()); } }
From source file:org.apache.nifi.csv.JacksonCSVRecordReader.java
public JacksonCSVRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema, final CSVFormat csvFormat, final boolean hasHeader, final boolean ignoreHeader, final String dateFormat, final String timeFormat, final String timestampFormat, final String encoding) throws IOException { super(logger, schema, hasHeader, ignoreHeader, dateFormat, timeFormat, timestampFormat); final Reader reader = new InputStreamReader(new BOMInputStream(in)); CsvSchema.Builder csvSchemaBuilder = CsvSchema.builder().setColumnSeparator(csvFormat.getDelimiter()) .setLineSeparator(csvFormat.getRecordSeparator()) // Can only use comments in Jackson CSV if the correct marker is set .setAllowComments("#".equals(CharUtils.toString(csvFormat.getCommentMarker()))) // The call to setUseHeader(false) in all code paths is due to the way Jackson does data binding/mapping. Missing or extra columns may not // be handled correctly when using the header for mapping. .setUseHeader(false);//from w w w.java 2 s . c o m csvSchemaBuilder = (csvFormat.getQuoteCharacter() == null) ? csvSchemaBuilder : csvSchemaBuilder.setQuoteChar(csvFormat.getQuoteCharacter()); csvSchemaBuilder = (csvFormat.getEscapeCharacter() == null) ? csvSchemaBuilder : csvSchemaBuilder.setEscapeChar(csvFormat.getEscapeCharacter()); if (hasHeader) { if (ignoreHeader) { csvSchemaBuilder = csvSchemaBuilder.setSkipFirstDataRow(true); } } CsvSchema csvSchema = csvSchemaBuilder.build(); // Add remaining config options to the mapper List<CsvParser.Feature> features = new ArrayList<>(); features.add(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS); if (csvFormat.getIgnoreEmptyLines()) { features.add(CsvParser.Feature.SKIP_EMPTY_LINES); } if (csvFormat.getTrim()) { features.add(CsvParser.Feature.TRIM_SPACES); } ObjectReader objReader = mapper.readerFor(String[].class).with(csvSchema) .withFeatures(features.toArray(new CsvParser.Feature[features.size()])); recordStream = objReader.readValues(reader); }