Example usage for org.apache.commons.lang3.text StrTokenizer setEmptyTokenAsNull

List of usage examples for org.apache.commons.lang3.text StrTokenizer setEmptyTokenAsNull

Introduction

In this page you can find the example usage for org.apache.commons.lang3.text StrTokenizer setEmptyTokenAsNull.

Prototype

public StrTokenizer setEmptyTokenAsNull(final boolean emptyAsNull) 

Source Link

Document

Sets whether the tokenizer should return empty tokens as null.

Usage

From source file:org.gbif.file.StrTokenizerPerformance.java

@Test
public void testCharVsStringPerformance() throws IOException {
    File source = FileUtils.getClasspathFile("irmng.tail");

    // test CHAR/*from w  ww.j a v a  2  s.  c  om*/
    StrTokenizer tokenizer = new StrTokenizer();
    tokenizer.setDelimiterChar('\t');
    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);
    long time = test(tokenizer, source);
    System.out.println(time + " milliseconds for CHAR based tokenizer.");

    // test STRING
    tokenizer = new StrTokenizer();
    tokenizer.setDelimiterString("\t");
    tokenizer.setEmptyTokenAsNull(true);
    time = test(tokenizer, source);
    System.out.println(time + " milliseconds for STRING based tokenizer.");
}

From source file:org.gbif.file.StrTokenizerTest.java

@Test
public void testCsvQuoted() throws IOException {
    StrTokenizer tokenizer = new StrTokenizer();
    tokenizer.setDelimiterString(",");
    tokenizer.setQuoteChar('"');
    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);

    tokenizer.reset("121,432423, 9099053,\"Frieda karla L.,DC.\",Ahrens");
    String[] columns = tokenizer.getTokenArray();
    assertEquals("121", columns[0]);
    assertEquals("432423", columns[1]);
    assertEquals(" 9099053", columns[2]);
    assertEquals("Frieda karla L.,DC.", columns[3]);
    assertEquals("Ahrens", columns[4]);

    tokenizer.reset("   ,4321");
    columns = tokenizer.getTokenArray();
    assertEquals("   ", columns[0]);
    assertEquals("4321", columns[1]);

    tokenizer.reset(" ,,,,zzz  ");
    columns = tokenizer.getTokenArray();
    assertEquals(" ", columns[0]);
    assertNull(columns[1]);/*from   w  ww  .  jav a 2 s  .  c o  m*/
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);

    tokenizer.reset(",,,,zzz  ");
    columns = tokenizer.getTokenArray();
    assertNull(columns[0]);
    assertNull(columns[1]);
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);
}

From source file:org.gbif.file.StrTokenizerTest.java

@Test
public void testCsvUnquoted() throws IOException {
    StrTokenizer tokenizer = new StrTokenizer();
    tokenizer.setDelimiterString(",");
    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);

    tokenizer.reset("121,432423, 9099053,Frieda karla L.,DC.,Ahrens");
    String[] columns = tokenizer.getTokenArray();
    assertEquals("121", columns[0]);
    assertEquals("432423", columns[1]);
    assertEquals(" 9099053", columns[2]);
    assertEquals("Frieda karla L.", columns[3]);
    assertEquals("DC.", columns[4]);
    assertEquals("Ahrens", columns[5]);

    tokenizer.reset(",,,,zzz  ");
    columns = tokenizer.getTokenArray();
    assertNull(columns[0]);/* w w  w .j  av  a 2 s  .  c o  m*/
    assertNull(columns[1]);
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);
}

From source file:org.gbif.file.StrTokenizerTest.java

@Test
public void testPipes() throws IOException {
    StrTokenizer tokenizer = new StrTokenizer();
    tokenizer.setDelimiterChar('|');
    tokenizer.setQuoteChar('"');
    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);

    tokenizer.reset("121|432423| 9099053|\"Frieda karla L.|DC.\"|Ahrens");
    String[] columns = tokenizer.getTokenArray();
    assertEquals("121", columns[0]);
    assertEquals("432423", columns[1]);
    assertEquals(" 9099053", columns[2]);
    assertEquals("Frieda karla L.|DC.", columns[3]);
    assertEquals("Ahrens", columns[4]);

    tokenizer.reset("   |4321");
    columns = tokenizer.getTokenArray();
    assertEquals("   ", columns[0]);
    assertEquals("4321", columns[1]);

    tokenizer.reset(" ||||zzz  ");
    columns = tokenizer.getTokenArray();
    assertEquals(" ", columns[0]);
    assertNull(columns[1]);/* w ww.  j a va 2 s  . c om*/
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);

    tokenizer.reset("||||zzz  ");
    columns = tokenizer.getTokenArray();
    assertNull(columns[0]);
    assertNull(columns[1]);
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);
}

From source file:org.gbif.file.StrTokenizerTest.java

@Test
public void testTabQuoted() throws IOException {
    StrTokenizer tokenizer = new StrTokenizer();
    tokenizer.setDelimiterString("\t");
    tokenizer.setQuoteChar('"');
    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);

    tokenizer.reset("121\t432423\t 9099053\t\"Frieda karla L.,DC.\"\tAhrens");
    String[] columns = tokenizer.getTokenArray();
    assertEquals("121", columns[0]);
    assertEquals("432423", columns[1]);
    assertEquals(" 9099053", columns[2]);
    assertEquals("Frieda karla L.,DC.", columns[3]);
    assertEquals("Ahrens", columns[4]);

    tokenizer.reset("   \t4321");
    columns = tokenizer.getTokenArray();
    assertEquals("   ", columns[0]);
    assertEquals("4321", columns[1]);

    tokenizer.reset(" \t\t\t\tzzz  ");
    columns = tokenizer.getTokenArray();
    assertEquals(" ", columns[0]);
    assertNull(columns[1]);//from   w w w. j av  a 2 s  . c  om
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);

    tokenizer.reset("\t\t\t\tzzz  ");
    columns = tokenizer.getTokenArray();
    assertNull(columns[0]);
    assertNull(columns[1]);
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);
}

From source file:org.gbif.file.StrTokenizerTest.java

@Test
public void testTabUnquoted() throws IOException {
    StrTokenizer tokenizer = new StrTokenizer();
    tokenizer.setDelimiterString("\t");
    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);

    tokenizer.reset("121\t432423\t 9099053\t\"Frieda karla L.,DC.\"\tAhrens");
    String[] columns = tokenizer.getTokenArray();
    assertEquals("121", columns[0]);
    assertEquals("432423", columns[1]);
    assertEquals(" 9099053", columns[2]);
    assertEquals("\"Frieda karla L.,DC.\"", columns[3]);
    assertEquals("Ahrens", columns[4]);

    tokenizer.reset("   \t4321");
    columns = tokenizer.getTokenArray();
    assertEquals("   ", columns[0]);
    assertEquals("4321", columns[1]);

    tokenizer.reset(" \t\t\t\tzzz  ");
    columns = tokenizer.getTokenArray();
    assertEquals(" ", columns[0]);
    assertNull(columns[1]);//from ww w  .  jav a 2s . c  o  m
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);

    tokenizer.reset("\t\t\t\tzzz  ");
    columns = tokenizer.getTokenArray();
    assertNull(columns[0]);
    assertNull(columns[1]);
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);
}

From source file:org.kalypso.model.wspm.pdb.internal.gaf.GafReader.java

private GafLine parseLine(final String line) throws CoreException {
    final StrTokenizer tokenizer = new StrTokenizer(line);
    tokenizer.setDelimiterMatcher(StrMatcher.trimMatcher());
    tokenizer.setQuoteMatcher(StrMatcher.noneMatcher());
    tokenizer.setIgnoredMatcher(StrMatcher.noneMatcher());
    tokenizer.setTrimmerMatcher(StrMatcher.noneMatcher());
    tokenizer.setEmptyTokenAsNull(false);
    tokenizer.setIgnoreEmptyTokens(false);
    final String[] tokens = tokenizer.getTokenArray();

    if (tokens.length < 9)
        throw failLine(IStatus.INFO, Messages.getString("GafReader.5")); //$NON-NLS-1$

    final Object[] items = parseTokens(tokens);
    checkCommentLine(items);/*from  ww  w .jav a  2  s.c  o m*/

    final BigDecimal station = asDecimal(items[0], Messages.getString("GafReader.6")); //$NON-NLS-1$
    final String pointId = asString(tokens[1]);
    final BigDecimal width = asDecimalOrNull(items[2], Messages.getString("GafReader.7")); //$NON-NLS-1$
    final BigDecimal height = asDecimal(items[3], Messages.getString("GafReader.8")); //$NON-NLS-1$
    final String code = asString(tokens[4]).toUpperCase();
    final String roughnessClass = asString(tokens[5]);
    final String vegetationClass = asString(tokens[6]);
    final BigDecimal hw = asDecimal(items[7], Messages.getString("GafReader.9")); //$NON-NLS-1$
    final BigDecimal rw = asDecimal(items[8], Messages.getString("GafReader.10")); //$NON-NLS-1$
    final String hyk = tokens.length < 10 ? StringUtils.EMPTY : asString(tokens[9]).toUpperCase();

    return new GafLine(station, pointId, width, height, code, roughnessClass, vegetationClass, rw, hw, hyk,
            Status.OK_STATUS);
}