Example usage for org.apache.commons.lang3.text StrTokenizer setQuoteChar

List of usage examples for org.apache.commons.lang3.text StrTokenizer setQuoteChar

Introduction

In this page you can find the example usage for org.apache.commons.lang3.text StrTokenizer setQuoteChar.

Prototype

public StrTokenizer setQuoteChar(final char quote) 

Source Link

Document

Sets the quote character to use.

Usage

From source file:com.mgmtp.jfunk.core.util.CsvDataProcessor.java

/**
 * Processes the specified CSV file. For every line but the header line (which is required), the
 * specified command is executed.//from  www  .  j  a va  2  s.c o m
 * 
 * @param reader
 *            the reader for loading the CSV data
 * @param delimiter
 *            the column separator
 * @param quoteChar
 *            the quote character ('\0' for no quoting)
 * @param command
 *            the command (i. e. a Groovy closure if used in a Groovy script) to be executed for
 *            every processed line
 */
public void processFile(final Reader reader, final String delimiter, final char quoteChar,
        final Runnable command) {
    try {
        List<String> inputLines = CharStreams.readLines(reader);

        StrTokenizer st = StrTokenizer.getCSVInstance();
        st.setDelimiterString(delimiter);
        if (quoteChar != '\0') {
            st.setQuoteChar(quoteChar);
        } else {
            st.setQuoteMatcher(StrMatcher.noneMatcher());
        }

        // extract header
        String headerLine = inputLines.remove(0);
        List<Column> columns = initColumns(st, headerLine);
        for (String line : inputLines) {
            st.reset(line);
            String[] colArray = st.getTokenArray();
            int len = colArray.length;
            checkState(len == columns.size(),
                    "Mismatch between number of header columns and number of line columns.");

            DataSource dataSource = dataSourceProvider.get();
            Configuration config = configProvider.get();
            for (int i = 0; i < len; ++i) {
                String value = StringUtils.trimToEmpty(colArray[i]);

                String dataSetKey = columns.get(i).dataSetKey;
                String key = columns.get(i).key;
                if (dataSetKey != null) {
                    if ("<auto>".equals(value)) {
                        dataSource.resetFixedValue(dataSetKey, key);
                    } else {
                        log.debug("Setting data set entry for " + this + " to value=" + value);
                        dataSource.setFixedValue(dataSetKey, key, value);
                    }
                } else {
                    log.debug("Setting property for " + this + " to value=" + value);
                    config.put(key, value);
                }
            }

            command.run();
        }
    } catch (IOException ex) {
        throw new JFunkException("Error processing CSV data", ex);
    }
}

From source file:org.gbif.file.StrTokenizerTest.java

@Test
public void testCsvQuoted() throws IOException {
    StrTokenizer tokenizer = new StrTokenizer();
    tokenizer.setDelimiterString(",");
    tokenizer.setQuoteChar('"');
    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);

    tokenizer.reset("121,432423, 9099053,\"Frieda karla L.,DC.\",Ahrens");
    String[] columns = tokenizer.getTokenArray();
    assertEquals("121", columns[0]);
    assertEquals("432423", columns[1]);
    assertEquals(" 9099053", columns[2]);
    assertEquals("Frieda karla L.,DC.", columns[3]);
    assertEquals("Ahrens", columns[4]);

    tokenizer.reset("   ,4321");
    columns = tokenizer.getTokenArray();
    assertEquals("   ", columns[0]);
    assertEquals("4321", columns[1]);

    tokenizer.reset(" ,,,,zzz  ");
    columns = tokenizer.getTokenArray();
    assertEquals(" ", columns[0]);
    assertNull(columns[1]);/*  w  ww .  j  av a2s  .  c o m*/
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);

    tokenizer.reset(",,,,zzz  ");
    columns = tokenizer.getTokenArray();
    assertNull(columns[0]);
    assertNull(columns[1]);
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);
}

From source file:org.gbif.file.StrTokenizerTest.java

@Test
public void testPipes() throws IOException {
    StrTokenizer tokenizer = new StrTokenizer();
    tokenizer.setDelimiterChar('|');
    tokenizer.setQuoteChar('"');
    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);

    tokenizer.reset("121|432423| 9099053|\"Frieda karla L.|DC.\"|Ahrens");
    String[] columns = tokenizer.getTokenArray();
    assertEquals("121", columns[0]);
    assertEquals("432423", columns[1]);
    assertEquals(" 9099053", columns[2]);
    assertEquals("Frieda karla L.|DC.", columns[3]);
    assertEquals("Ahrens", columns[4]);

    tokenizer.reset("   |4321");
    columns = tokenizer.getTokenArray();
    assertEquals("   ", columns[0]);
    assertEquals("4321", columns[1]);

    tokenizer.reset(" ||||zzz  ");
    columns = tokenizer.getTokenArray();
    assertEquals(" ", columns[0]);
    assertNull(columns[1]);//from  w  ww  .  j  av  a2s.  c om
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);

    tokenizer.reset("||||zzz  ");
    columns = tokenizer.getTokenArray();
    assertNull(columns[0]);
    assertNull(columns[1]);
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);
}

From source file:org.gbif.file.StrTokenizerTest.java

@Test
public void testTabQuoted() throws IOException {
    StrTokenizer tokenizer = new StrTokenizer();
    tokenizer.setDelimiterString("\t");
    tokenizer.setQuoteChar('"');
    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);

    tokenizer.reset("121\t432423\t 9099053\t\"Frieda karla L.,DC.\"\tAhrens");
    String[] columns = tokenizer.getTokenArray();
    assertEquals("121", columns[0]);
    assertEquals("432423", columns[1]);
    assertEquals(" 9099053", columns[2]);
    assertEquals("Frieda karla L.,DC.", columns[3]);
    assertEquals("Ahrens", columns[4]);

    tokenizer.reset("   \t4321");
    columns = tokenizer.getTokenArray();
    assertEquals("   ", columns[0]);
    assertEquals("4321", columns[1]);

    tokenizer.reset(" \t\t\t\tzzz  ");
    columns = tokenizer.getTokenArray();
    assertEquals(" ", columns[0]);
    assertNull(columns[1]);/*  w w  w  . java 2 s.  c  o  m*/
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);

    tokenizer.reset("\t\t\t\tzzz  ");
    columns = tokenizer.getTokenArray();
    assertNull(columns[0]);
    assertNull(columns[1]);
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);
}