Example usage for org.apache.commons.csv CSVFormat getDelimiter

List of usage examples for org.apache.commons.csv CSVFormat getDelimiter

Introduction

In this page you can find the example usage for org.apache.commons.csv CSVFormat getDelimiter.

Prototype

public char getDelimiter() 

Source Link

Document

Returns the character delimiting the values (typically ';', ',' or '\t').

Usage

From source file:com.anhth12.lambda.common.text.TextUtils.java

private static CSVFormat formatForDelimiter(char delimiter) {
    CSVFormat format = CSV_FORMAT;
    if (delimiter != format.getDelimiter()) {
        format = format.withDelimiter(delimiter);
    }/*www.  ja v a 2 s. com*/
    return format;
}

From source file:com.github.jferard.pgloaderutils.sniffer.csv.HeaderRowAnalyzerTest.java

@Test
public final void test() throws IOException {
    CSVFormat format = this.h.analyze(Arrays.asList("a", "b"), "a;b;c");
    Assert.assertEquals(';', format.getDelimiter());
    format = this.h.analyze(Arrays.asList("a", "b"), "a,b,c");
    Assert.assertEquals(',', format.getDelimiter());
}

From source file:com.github.jferard.pgloaderutils.sniffer.csv.HeaderRowAnalyzerTest.java

@Test
public final void test2() throws IOException {
    CSVFormat format = this.h.analyze(Arrays.asList("a'b", "b"), "'a&'b';b;c");
    Assert.assertEquals(';', format.getDelimiter());
    Assert.assertEquals('&', (char) format.getEscapeCharacter());
    Assert.assertEquals('\'', (char) format.getQuoteCharacter());
}

From source file:com.thinkbiganalytics.discovery.parsers.csv.CSVFileSchemaParser.java

private CSVFormat createCSVFormat(String sampleData) throws IOException {
    CSVFormat format;
    if (autoDetect) {
        CSVAutoDetect autoDetect = new CSVAutoDetect();
        format = autoDetect.detectCSVFormat(sampleData, this.headerRow, this.separatorChar);
        this.separatorChar = Character.toString(format.getDelimiter());
        this.quoteChar = Character.toString(format.getQuoteCharacter());
    } else {//from w  w  w .ja v a  2s. co  m
        format = CSVFormat.DEFAULT.withAllowMissingColumnNames();

        if (StringUtils.isNotEmpty(separatorChar)) {
            format = format.withDelimiter(toChar(separatorChar).charAt(0));
        }
        if (StringUtils.isNotEmpty(escapeChar)) {
            format = format.withEscape(toChar(escapeChar).charAt(0));
        }
        if (StringUtils.isNotEmpty(quoteChar)) {
            format = format.withQuoteMode(QuoteMode.MINIMAL).withQuote(toChar(quoteChar).charAt(0));
        }
    }

    return format;
}

From source file:com.datascience.cascading.scheme.CsvScheme.java

/**
 * Configures the Hadoop configuration for the given CSV format.
 *//* w w w  .  j  a v  a 2  s. c o  m*/
private void configureReaderFormat(CSVFormat format, Configuration conf) {
    conf.set(CsvOutputFormat.CHARSET, charset);

    // If the format header was explicitly provided by the user then forward it to the record reader. If skipHeaderRecord
    // is enabled then that indicates that field names were detected. We need to ensure that headers are defined in order
    // for the CSV reader to skip the header record.
    conf.setBoolean(CsvInputFormat.STRICT_MODE, strict);
    if (format.getHeader() != null) {
        conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, format.getHeader());
    } else if (format.getSkipHeaderRecord()) {
        Fields fields = getSourceFields();
        String[] columns = new String[fields.size()];
        for (int i = 0; i < fields.size(); i++) {
            columns[i] = fields.get(i).toString();
        }
        conf.setStrings(CsvInputFormat.CSV_READER_COLUMNS, columns);
    }

    conf.setBoolean(CsvInputFormat.CSV_READER_SKIP_HEADER, format.getSkipHeaderRecord());
    conf.set(CsvInputFormat.CSV_READER_DELIMITER, String.valueOf(format.getDelimiter()));

    if (format.getRecordSeparator() != null) {
        conf.set(CsvInputFormat.CSV_READER_RECORD_SEPARATOR, format.getRecordSeparator());
    }

    if (format.getQuoteCharacter() != null) {
        conf.set(CsvInputFormat.CSV_READER_QUOTE_CHARACTER, String.valueOf(format.getQuoteCharacter()));
    }

    if (format.getQuoteMode() != null) {
        conf.set(CsvInputFormat.CSV_READER_QUOTE_MODE, format.getQuoteMode().name());
    }

    if (format.getEscapeCharacter() != null) {
        conf.set(CsvInputFormat.CSV_READER_ESCAPE_CHARACTER, String.valueOf(format.getEscapeCharacter()));
    }

    conf.setBoolean(CsvInputFormat.CSV_READER_IGNORE_EMPTY_LINES, format.getIgnoreEmptyLines());
    conf.setBoolean(CsvInputFormat.CSV_READER_IGNORE_SURROUNDING_SPACES, format.getIgnoreSurroundingSpaces());

    if (format.getNullString() != null) {
        conf.set(CsvInputFormat.CSV_READER_NULL_STRING, format.getNullString());
    }
}

From source file:com.datascience.cascading.scheme.CsvScheme.java

/**
 * Configures the Hadoop configuration for the given CSV format.
 *//*from  w  w w .j a va2 s  .  c  om*/
private void configureWriterFormat(CSVFormat format, Configuration conf) {
    conf.set(CsvOutputFormat.CHARSET, charset);

    // Apache CSV doesn't really handle the skipHeaderRecord flag correctly when writing output. If the skip flag is set
    // and headers are configured, headers will always be written to the output. Since we always have headers and/or
    // fields configured, we need to use the skipHeaderRecord flag to determine whether headers should be written.
    if (!format.getSkipHeaderRecord()) {
        if (format.getHeader() != null && format.getHeader().length != 0) {
            conf.setStrings(CsvOutputFormat.CSV_WRITER_COLUMNS, format.getHeader());
        } else {
            Fields fields = getSinkFields();
            String[] columns = new String[fields.size()];
            for (int i = 0; i < fields.size(); i++) {
                columns[i] = fields.get(i).toString();
            }
            conf.setStrings(CsvOutputFormat.CSV_WRITER_COLUMNS, columns);
        }
    }

    conf.setBoolean(CsvOutputFormat.CSV_WRITER_SKIP_HEADER, format.getSkipHeaderRecord());
    conf.set(CsvOutputFormat.CSV_WRITER_DELIMITER, String.valueOf(format.getDelimiter()));

    if (format.getRecordSeparator() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_RECORD_SEPARATOR, format.getRecordSeparator());
    }

    if (format.getQuoteCharacter() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_QUOTE_CHARACTER, String.valueOf(format.getQuoteCharacter()));
    }

    if (format.getQuoteMode() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_QUOTE_MODE, format.getQuoteMode().name());
    }

    if (format.getEscapeCharacter() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_ESCAPE_CHARACTER, String.valueOf(format.getEscapeCharacter()));
    }

    conf.setBoolean(CsvOutputFormat.CSV_WRITER_IGNORE_EMPTY_LINES, format.getIgnoreEmptyLines());
    conf.setBoolean(CsvOutputFormat.CSV_WRITER_IGNORE_SURROUNDING_SPACES, format.getIgnoreSurroundingSpaces());

    if (format.getNullString() != null) {
        conf.set(CsvOutputFormat.CSV_WRITER_NULL_STRING, format.getNullString());
    }
}

From source file:org.apache.logging.log4j.core.layout.CsvLogEventLayoutTest.java

private void testLayout(final CSVFormat format, final AbstractCsvLayout layout, final String header,
        final String footer) {
    final Map<String, Appender> appenders = root.getAppenders();
    for (final Appender appender : appenders.values()) {
        root.removeAppender(appender);/*from   w  ww .j a  v a 2s . co m*/
    }
    // set up appender
    final ListAppender appender = new ListAppender("List", null, layout, true, false);
    appender.start();

    // set appender on root and set level to debug
    root.addAppender(appender);
    root.setLevel(Level.DEBUG);

    root.debug("one={}, two={}, three={}", 1, 2, 3);
    root.info("Hello");
    appender.stop();

    final List<String> list = appender.getMessages();
    final boolean hasHeaderSerializer = layout.getHeaderSerializer() != null;
    final boolean hasFooterSerializer = layout.getFooterSerializer() != null;
    final int headerOffset = hasHeaderSerializer ? 1 : 0;
    final String event0 = list.get(0 + headerOffset);
    final String event1 = list.get(1 + headerOffset);
    final char del = format.getDelimiter();
    Assert.assertTrue(event0, event0.contains(del + "DEBUG" + del));
    final String quote = del == ',' ? "\"" : "";
    Assert.assertTrue(event0, event0.contains(del + quote + "one=1, two=2, three=3" + quote + del));
    Assert.assertTrue(event1, event1.contains(del + "INFO" + del));

    if (hasHeaderSerializer && header == null) {
        Assert.fail();
    }
    if (!hasHeaderSerializer && header != null) {
        Assert.fail();
    }
    if (hasFooterSerializer && footer == null) {
        Assert.fail();
    }
    if (!hasFooterSerializer && footer != null) {
        Assert.fail();
    }
    if (hasHeaderSerializer) {
        Assert.assertEquals(list.toString(), header, list.get(0));
    }
    if (hasFooterSerializer) {
        Assert.assertEquals(list.toString(), footer, list.get(list.size() - 1));
    }
}

From source file:org.apache.nifi.csv.JacksonCSVRecordReader.java

public JacksonCSVRecordReader(final InputStream in, final ComponentLog logger, final RecordSchema schema,
        final CSVFormat csvFormat, final boolean hasHeader, final boolean ignoreHeader, final String dateFormat,
        final String timeFormat, final String timestampFormat, final String encoding) throws IOException {
    super(logger, schema, hasHeader, ignoreHeader, dateFormat, timeFormat, timestampFormat);

    final Reader reader = new InputStreamReader(new BOMInputStream(in));

    CsvSchema.Builder csvSchemaBuilder = CsvSchema.builder().setColumnSeparator(csvFormat.getDelimiter())
            .setLineSeparator(csvFormat.getRecordSeparator())
            // Can only use comments in Jackson CSV if the correct marker is set
            .setAllowComments("#".equals(CharUtils.toString(csvFormat.getCommentMarker())))
            // The call to setUseHeader(false) in all code paths is due to the way Jackson does data binding/mapping. Missing or extra columns may not
            // be handled correctly when using the header for mapping.
            .setUseHeader(false);//from w  w w  .  j  a v a2  s .c  o m

    csvSchemaBuilder = (csvFormat.getQuoteCharacter() == null) ? csvSchemaBuilder
            : csvSchemaBuilder.setQuoteChar(csvFormat.getQuoteCharacter());
    csvSchemaBuilder = (csvFormat.getEscapeCharacter() == null) ? csvSchemaBuilder
            : csvSchemaBuilder.setEscapeChar(csvFormat.getEscapeCharacter());

    if (hasHeader) {
        if (ignoreHeader) {
            csvSchemaBuilder = csvSchemaBuilder.setSkipFirstDataRow(true);
        }
    }

    CsvSchema csvSchema = csvSchemaBuilder.build();

    // Add remaining config options to the mapper
    List<CsvParser.Feature> features = new ArrayList<>();
    features.add(CsvParser.Feature.INSERT_NULLS_FOR_MISSING_COLUMNS);
    if (csvFormat.getIgnoreEmptyLines()) {
        features.add(CsvParser.Feature.SKIP_EMPTY_LINES);
    }
    if (csvFormat.getTrim()) {
        features.add(CsvParser.Feature.TRIM_SPACES);
    }

    ObjectReader objReader = mapper.readerFor(String[].class).with(csvSchema)
            .withFeatures(features.toArray(new CsvParser.Feature[features.size()]));

    recordStream = objReader.readValues(reader);
}

From source file:org.apache.tika.parser.csv.TextAndCSVParser.java

@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
        throws IOException, SAXException, TikaException {

    CSVParams params = getOverride(metadata);
    Reader reader = null;/*from   www  . jav a 2s  .  c  o m*/
    Charset charset = null;
    if (!params.isComplete()) {
        reader = detect(params, stream, metadata, context);
        if (params.getCharset() != null) {
            charset = params.getCharset();
        } else {
            charset = ((AutoDetectReader) reader).getCharset();
        }
    } else {
        reader = new BufferedReader(new InputStreamReader(stream, params.getCharset()));
        charset = params.getCharset();
    }

    updateMetadata(params, metadata);

    //if text or a non-csv/tsv category of text
    //treat this as text and be done
    //TODO -- if it was detected as a non-csv subtype of text
    if (!params.getMediaType().getBaseType().equals(CSV) && !params.getMediaType().getBaseType().equals(TSV)) {
        handleText(reader, charset, handler, metadata);
        return;
    }

    CSVFormat csvFormat = CSVFormat.EXCEL.withDelimiter(params.getDelimiter());
    metadata.set(DELIMITER_PROPERTY, CHAR_TO_STRING_DELIMITER_MAP.get(csvFormat.getDelimiter()));

    XHTMLContentHandler xhtmlContentHandler = new XHTMLContentHandler(handler, metadata);
    try (org.apache.commons.csv.CSVParser commonsParser = new org.apache.commons.csv.CSVParser(reader,
            csvFormat)) {
        xhtmlContentHandler.startDocument();
        xhtmlContentHandler.startElement(TABLE);
        try {
            for (CSVRecord row : commonsParser) {
                xhtmlContentHandler.startElement(TR);
                for (String cell : row) {
                    xhtmlContentHandler.startElement(TD);
                    xhtmlContentHandler.characters(cell);
                    xhtmlContentHandler.endElement(TD);
                }
                xhtmlContentHandler.endElement(TR);
            }
        } catch (IllegalStateException e) {
            //if there's a parse exception
            //try to get the rest of the content...treat it as text for now
            //There will be some content lost because of buffering.
            //TODO -- figure out how to improve this
            xhtmlContentHandler.endElement(TABLE);
            xhtmlContentHandler.startElement("div", "name", "after exception");
            handleText(reader, xhtmlContentHandler);
            xhtmlContentHandler.endElement("div");
            xhtmlContentHandler.endDocument();
            //TODO -- consider dumping what's left in the reader as text
            throw new TikaException("exception parsing the csv", e);
        }

        xhtmlContentHandler.endElement(TABLE);
        xhtmlContentHandler.endDocument();
    }
}

From source file:org.wso2.carbon.ml.core.impl.MLModelHandler.java

public List<?> predict(int tenantId, String userName, long modelId, String dataFormat, InputStream dataStream)
        throws MLModelHandlerException {
    List<String[]> data = new ArrayList<String[]>();
    CSVFormat csvFormat = DataTypeFactory.getCSVFormat(dataFormat);
    BufferedReader br = new BufferedReader(new InputStreamReader(dataStream, StandardCharsets.UTF_8));
    try {/* w ww  . ja  v  a2  s. c  om*/
        String line;
        while ((line = br.readLine()) != null) {
            String[] dataRow = line.split(csvFormat.getDelimiter() + "");
            data.add(dataRow);
        }
        return predict(tenantId, userName, modelId, data);
    } catch (IOException e) {
        String msg = "Failed to read the data points for prediction for model [id] " + modelId;
        log.error(msg, e);
        throw new MLModelHandlerException(msg, e);
    } finally {
        try {
            dataStream.close();
            br.close();
        } catch (IOException e) {
            String msg = "Error in closing input stream while publishing model";
            log.error(msg, e);
        }
    }

}