Example usage for org.apache.commons.lang3.text StrTokenizer setIgnoreEmptyTokens

List of usage examples for org.apache.commons.lang3.text StrTokenizer setIgnoreEmptyTokens

Introduction

In this page you can find the example usage for org.apache.commons.lang3.text StrTokenizer setIgnoreEmptyTokens.

Prototype

public StrTokenizer setIgnoreEmptyTokens(final boolean ignoreEmptyTokens) 

Source Link

Document

Sets whether the tokenizer should ignore and not return empty tokens.

Usage

From source file:com.hurence.logisland.repository.csv.CsvFileParser.java

/**
 * Parse the file given in parameters//from  www . ja v  a 2  s.c o  m
 *
 * @param filePath
 * @return
 */
public List<T> parseFile(String filePath) {
    List<T> result = new ArrayList<>();

    InputStreamReader isr = null;
    BufferedReader bsr = null;
    try {
        isr = new InputStreamReader(new FileInputStream(filePath), "UTF-8");
        bsr = new BufferedReader(isr);

        logger.debug("start parsing csv file : " + filePath);
        int nblines = 0;
        String line;
        while ((line = bsr.readLine()) != null) {
            // don't parse the first line of csv
            if (nblines != 0) {
                StrTokenizer tokenizer = new StrTokenizer(line, separator);
                tokenizer.setIgnoreEmptyTokens(false);
                T o = createEntity(tokenizer);
                if (o != null) {
                    result.add(o);
                }
            } else {
                nblines++;
            }
        }

        logger.debug("done parsing csv file : " + filePath);

    } catch (FileNotFoundException ex) {
        logger.error("file not found : " + filePath);
    } catch (IOException ex) {
        logger.error("unknown error while parsing : " + filePath);
    } finally {
        try {
            if (bsr != null) {
                bsr.close();
            }
        } catch (IOException ex) {
            logger.error("unknown error while parsing : " + filePath);
        }

    }
    return result;

}

From source file:com.jkoolcloud.tnt4j.streams.parsers.ActivityTokenParser.java

@Override
protected ActivityContext prepareItem(TNTInputStream<?, ?> stream, Object data) throws ParseException {
    // Get next string to parse
    String dataStr = getNextActivityString(data);
    if (StringUtils.isEmpty(dataStr)) {
        return null;
    }/*from ww  w  .  j  av a2 s . c  o  m*/
    logger().log(OpLevel.DEBUG, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME,
            "ActivityParser.splitting.string"), dataStr);
    if (pattern != null) {
        Matcher matcher = pattern.matcher(dataStr);
        if (matcher == null || !matcher.matches()) {
            logger().log(OpLevel.DEBUG, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME,
                    "ActivityParser.input.not.match"), getName(), pattern.pattern());
            return null;
        }
    }
    StrTokenizer tk = stripQuotes ? new StrTokenizer(dataStr, fieldDelim, StrMatcher.doubleQuoteMatcher())
            : new StrTokenizer(dataStr, fieldDelim);
    tk.setIgnoreEmptyTokens(false);
    String[] fields = tk.getTokenArray();
    if (ArrayUtils.isEmpty(fields)) {
        logger().log(OpLevel.DEBUG,
                StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ActivityParser.no.fields"));
        return null;
    }
    logger().log(OpLevel.DEBUG,
            StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ActivityParser.split"),
            fields.length);

    ActivityContext cData = new ActivityContext(stream, data, fields);
    cData.setMessage(getRawDataAsMessage(fields));

    return cData;
}

From source file:com.jkoolcloud.tnt4j.streams.parsers.ActivityNameValueParser.java

@Override
protected ActivityContext prepareItem(TNTInputStream<?, ?> stream, Object data) throws ParseException {
    String dataStr = getNextActivityString(data);
    if (StringUtils.isEmpty(dataStr)) {
        return null;
    }//from w ww.ja v a 2  s  .  c  o m
    logger().log(OpLevel.DEBUG, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME,
            "ActivityParser.splitting.string"), dataStr);
    if (pattern != null) {
        Matcher matcher = pattern.matcher(dataStr);
        if (matcher == null || !matcher.matches()) {
            logger().log(OpLevel.DEBUG, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME,
                    "ActivityParser.input.not.match"), getName(), pattern.pattern());
            return null;
        }
    }
    StrTokenizer tk = stripQuotes ? new StrTokenizer(dataStr, fieldDelim, StrMatcher.doubleQuoteMatcher())
            : new StrTokenizer(dataStr, fieldDelim);
    tk.setIgnoreEmptyTokens(false);
    String[] fields = tk.getTokenArray();
    if (ArrayUtils.isEmpty(fields)) {
        logger().log(OpLevel.DEBUG,
                StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ActivityParser.no.fields"));
        return null;
    }
    logger().log(OpLevel.DEBUG,
            StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ActivityParser.split"),
            fields.length);
    Map<String, String> nameValues = new HashMap<>(fields.length);
    for (String field : fields) {
        if (field != null) {
            String[] nv = field.split(Pattern.quote(valueDelim));
            if (ArrayUtils.isNotEmpty(nv)) {
                nameValues.put(nv[0], nv.length > 1 ? nv[1].trim() : "");
            }
            logger().log(OpLevel.TRACE, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME,
                    "ActivityNameValueParser.found"), field);
        }
    }

    ActivityContext cData = new ActivityContext(stream, data, nameValues);
    cData.setMessage(getRawDataAsMessage(nameValues));

    return cData;
}

From source file:org.gbif.file.StrTokenizerPerformance.java

@Test
public void testCharVsStringPerformance() throws IOException {
    File source = FileUtils.getClasspathFile("irmng.tail");

    // test CHAR//from  w w  w  .j  ava  2s  . c  o m
    StrTokenizer tokenizer = new StrTokenizer();
    tokenizer.setDelimiterChar('\t');
    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);
    long time = test(tokenizer, source);
    System.out.println(time + " milliseconds for CHAR based tokenizer.");

    // test STRING
    tokenizer = new StrTokenizer();
    tokenizer.setDelimiterString("\t");
    tokenizer.setEmptyTokenAsNull(true);
    time = test(tokenizer, source);
    System.out.println(time + " milliseconds for STRING based tokenizer.");
}

From source file:org.gbif.file.StrTokenizerTest.java

@Test
public void testCsvQuoted() throws IOException {
    StrTokenizer tokenizer = new StrTokenizer();
    tokenizer.setDelimiterString(",");
    tokenizer.setQuoteChar('"');
    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);

    tokenizer.reset("121,432423, 9099053,\"Frieda karla L.,DC.\",Ahrens");
    String[] columns = tokenizer.getTokenArray();
    assertEquals("121", columns[0]);
    assertEquals("432423", columns[1]);
    assertEquals(" 9099053", columns[2]);
    assertEquals("Frieda karla L.,DC.", columns[3]);
    assertEquals("Ahrens", columns[4]);

    tokenizer.reset("   ,4321");
    columns = tokenizer.getTokenArray();
    assertEquals("   ", columns[0]);
    assertEquals("4321", columns[1]);

    tokenizer.reset(" ,,,,zzz  ");
    columns = tokenizer.getTokenArray();
    assertEquals(" ", columns[0]);
    assertNull(columns[1]);/*from w ww.jav a 2  s .  c  om*/
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);

    tokenizer.reset(",,,,zzz  ");
    columns = tokenizer.getTokenArray();
    assertNull(columns[0]);
    assertNull(columns[1]);
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);
}

From source file:org.gbif.file.StrTokenizerTest.java

@Test
public void testCsvUnquoted() throws IOException {
    StrTokenizer tokenizer = new StrTokenizer();
    tokenizer.setDelimiterString(",");
    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);

    tokenizer.reset("121,432423, 9099053,Frieda karla L.,DC.,Ahrens");
    String[] columns = tokenizer.getTokenArray();
    assertEquals("121", columns[0]);
    assertEquals("432423", columns[1]);
    assertEquals(" 9099053", columns[2]);
    assertEquals("Frieda karla L.", columns[3]);
    assertEquals("DC.", columns[4]);
    assertEquals("Ahrens", columns[5]);

    tokenizer.reset(",,,,zzz  ");
    columns = tokenizer.getTokenArray();
    assertNull(columns[0]);/*  www  . j  a  v  a 2  s  . c  o m*/
    assertNull(columns[1]);
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);
}

From source file:org.gbif.file.StrTokenizerTest.java

@Test
public void testPipes() throws IOException {
    StrTokenizer tokenizer = new StrTokenizer();
    tokenizer.setDelimiterChar('|');
    tokenizer.setQuoteChar('"');
    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);

    tokenizer.reset("121|432423| 9099053|\"Frieda karla L.|DC.\"|Ahrens");
    String[] columns = tokenizer.getTokenArray();
    assertEquals("121", columns[0]);
    assertEquals("432423", columns[1]);
    assertEquals(" 9099053", columns[2]);
    assertEquals("Frieda karla L.|DC.", columns[3]);
    assertEquals("Ahrens", columns[4]);

    tokenizer.reset("   |4321");
    columns = tokenizer.getTokenArray();
    assertEquals("   ", columns[0]);
    assertEquals("4321", columns[1]);

    tokenizer.reset(" ||||zzz  ");
    columns = tokenizer.getTokenArray();
    assertEquals(" ", columns[0]);
    assertNull(columns[1]);//ww w.j  a  v  a2 s .c o  m
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);

    tokenizer.reset("||||zzz  ");
    columns = tokenizer.getTokenArray();
    assertNull(columns[0]);
    assertNull(columns[1]);
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);
}

From source file:org.gbif.file.StrTokenizerTest.java

@Test
public void testTabQuoted() throws IOException {
    StrTokenizer tokenizer = new StrTokenizer();
    tokenizer.setDelimiterString("\t");
    tokenizer.setQuoteChar('"');
    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);

    tokenizer.reset("121\t432423\t 9099053\t\"Frieda karla L.,DC.\"\tAhrens");
    String[] columns = tokenizer.getTokenArray();
    assertEquals("121", columns[0]);
    assertEquals("432423", columns[1]);
    assertEquals(" 9099053", columns[2]);
    assertEquals("Frieda karla L.,DC.", columns[3]);
    assertEquals("Ahrens", columns[4]);

    tokenizer.reset("   \t4321");
    columns = tokenizer.getTokenArray();
    assertEquals("   ", columns[0]);
    assertEquals("4321", columns[1]);

    tokenizer.reset(" \t\t\t\tzzz  ");
    columns = tokenizer.getTokenArray();
    assertEquals(" ", columns[0]);
    assertNull(columns[1]);//  w  w  w . ja v  a  2 s  .  c  o  m
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);

    tokenizer.reset("\t\t\t\tzzz  ");
    columns = tokenizer.getTokenArray();
    assertNull(columns[0]);
    assertNull(columns[1]);
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);
}

From source file:org.gbif.file.StrTokenizerTest.java

@Test
public void testTabUnquoted() throws IOException {
    StrTokenizer tokenizer = new StrTokenizer();
    tokenizer.setDelimiterString("\t");
    tokenizer.setEmptyTokenAsNull(true);
    tokenizer.setIgnoreEmptyTokens(false);

    tokenizer.reset("121\t432423\t 9099053\t\"Frieda karla L.,DC.\"\tAhrens");
    String[] columns = tokenizer.getTokenArray();
    assertEquals("121", columns[0]);
    assertEquals("432423", columns[1]);
    assertEquals(" 9099053", columns[2]);
    assertEquals("\"Frieda karla L.,DC.\"", columns[3]);
    assertEquals("Ahrens", columns[4]);

    tokenizer.reset("   \t4321");
    columns = tokenizer.getTokenArray();
    assertEquals("   ", columns[0]);
    assertEquals("4321", columns[1]);

    tokenizer.reset(" \t\t\t\tzzz  ");
    columns = tokenizer.getTokenArray();
    assertEquals(" ", columns[0]);
    assertNull(columns[1]);/*from  ww w  . ja v  a  2 s . co  m*/
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);

    tokenizer.reset("\t\t\t\tzzz  ");
    columns = tokenizer.getTokenArray();
    assertNull(columns[0]);
    assertNull(columns[1]);
    assertNull(columns[2]);
    assertNull(columns[3]);
    assertEquals("zzz  ", columns[4]);
}

From source file:org.kalypso.model.wspm.pdb.internal.gaf.GafReader.java

private GafLine parseLine(final String line) throws CoreException {
    final StrTokenizer tokenizer = new StrTokenizer(line);
    tokenizer.setDelimiterMatcher(StrMatcher.trimMatcher());
    tokenizer.setQuoteMatcher(StrMatcher.noneMatcher());
    tokenizer.setIgnoredMatcher(StrMatcher.noneMatcher());
    tokenizer.setTrimmerMatcher(StrMatcher.noneMatcher());
    tokenizer.setEmptyTokenAsNull(false);
    tokenizer.setIgnoreEmptyTokens(false);
    final String[] tokens = tokenizer.getTokenArray();

    if (tokens.length < 9)
        throw failLine(IStatus.INFO, Messages.getString("GafReader.5")); //$NON-NLS-1$

    final Object[] items = parseTokens(tokens);
    checkCommentLine(items);//from www.j a va  2  s.c o m

    final BigDecimal station = asDecimal(items[0], Messages.getString("GafReader.6")); //$NON-NLS-1$
    final String pointId = asString(tokens[1]);
    final BigDecimal width = asDecimalOrNull(items[2], Messages.getString("GafReader.7")); //$NON-NLS-1$
    final BigDecimal height = asDecimal(items[3], Messages.getString("GafReader.8")); //$NON-NLS-1$
    final String code = asString(tokens[4]).toUpperCase();
    final String roughnessClass = asString(tokens[5]);
    final String vegetationClass = asString(tokens[6]);
    final BigDecimal hw = asDecimal(items[7], Messages.getString("GafReader.9")); //$NON-NLS-1$
    final BigDecimal rw = asDecimal(items[8], Messages.getString("GafReader.10")); //$NON-NLS-1$
    final String hyk = tokens.length < 10 ? StringUtils.EMPTY : asString(tokens[9]).toUpperCase();

    return new GafLine(station, pointId, width, height, code, roughnessClass, vegetationClass, rw, hw, hyk,
            Status.OK_STATUS);
}