List of usage examples for org.apache.commons.lang3.text StrTokenizer setEmptyTokenAsNull
public StrTokenizer setEmptyTokenAsNull(final boolean emptyAsNull)
From source file:org.gbif.file.StrTokenizerPerformance.java
@Test public void testCharVsStringPerformance() throws IOException { File source = FileUtils.getClasspathFile("irmng.tail"); // test CHAR/*from w ww.j a v a 2 s. c om*/ StrTokenizer tokenizer = new StrTokenizer(); tokenizer.setDelimiterChar('\t'); tokenizer.setEmptyTokenAsNull(true); tokenizer.setIgnoreEmptyTokens(false); long time = test(tokenizer, source); System.out.println(time + " milliseconds for CHAR based tokenizer."); // test STRING tokenizer = new StrTokenizer(); tokenizer.setDelimiterString("\t"); tokenizer.setEmptyTokenAsNull(true); time = test(tokenizer, source); System.out.println(time + " milliseconds for STRING based tokenizer."); }
From source file:org.gbif.file.StrTokenizerTest.java
@Test public void testCsvQuoted() throws IOException { StrTokenizer tokenizer = new StrTokenizer(); tokenizer.setDelimiterString(","); tokenizer.setQuoteChar('"'); tokenizer.setEmptyTokenAsNull(true); tokenizer.setIgnoreEmptyTokens(false); tokenizer.reset("121,432423, 9099053,\"Frieda karla L.,DC.\",Ahrens"); String[] columns = tokenizer.getTokenArray(); assertEquals("121", columns[0]); assertEquals("432423", columns[1]); assertEquals(" 9099053", columns[2]); assertEquals("Frieda karla L.,DC.", columns[3]); assertEquals("Ahrens", columns[4]); tokenizer.reset(" ,4321"); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertEquals("4321", columns[1]); tokenizer.reset(" ,,,,zzz "); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertNull(columns[1]);/*from w ww . jav a 2 s . c o m*/ assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); tokenizer.reset(",,,,zzz "); columns = tokenizer.getTokenArray(); assertNull(columns[0]); assertNull(columns[1]); assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); }
From source file:org.gbif.file.StrTokenizerTest.java
@Test public void testCsvUnquoted() throws IOException { StrTokenizer tokenizer = new StrTokenizer(); tokenizer.setDelimiterString(","); tokenizer.setEmptyTokenAsNull(true); tokenizer.setIgnoreEmptyTokens(false); tokenizer.reset("121,432423, 9099053,Frieda karla L.,DC.,Ahrens"); String[] columns = tokenizer.getTokenArray(); assertEquals("121", columns[0]); assertEquals("432423", columns[1]); assertEquals(" 9099053", columns[2]); assertEquals("Frieda karla L.", columns[3]); assertEquals("DC.", columns[4]); assertEquals("Ahrens", columns[5]); tokenizer.reset(",,,,zzz "); columns = tokenizer.getTokenArray(); assertNull(columns[0]);/* w w w .j av a 2 s . c o m*/ assertNull(columns[1]); assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); }
From source file:org.gbif.file.StrTokenizerTest.java
@Test public void testPipes() throws IOException { StrTokenizer tokenizer = new StrTokenizer(); tokenizer.setDelimiterChar('|'); tokenizer.setQuoteChar('"'); tokenizer.setEmptyTokenAsNull(true); tokenizer.setIgnoreEmptyTokens(false); tokenizer.reset("121|432423| 9099053|\"Frieda karla L.|DC.\"|Ahrens"); String[] columns = tokenizer.getTokenArray(); assertEquals("121", columns[0]); assertEquals("432423", columns[1]); assertEquals(" 9099053", columns[2]); assertEquals("Frieda karla L.|DC.", columns[3]); assertEquals("Ahrens", columns[4]); tokenizer.reset(" |4321"); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertEquals("4321", columns[1]); tokenizer.reset(" ||||zzz "); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertNull(columns[1]);/* w ww. j a va 2 s . c om*/ assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); tokenizer.reset("||||zzz "); columns = tokenizer.getTokenArray(); assertNull(columns[0]); assertNull(columns[1]); assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); }
From source file:org.gbif.file.StrTokenizerTest.java
@Test public void testTabQuoted() throws IOException { StrTokenizer tokenizer = new StrTokenizer(); tokenizer.setDelimiterString("\t"); tokenizer.setQuoteChar('"'); tokenizer.setEmptyTokenAsNull(true); tokenizer.setIgnoreEmptyTokens(false); tokenizer.reset("121\t432423\t 9099053\t\"Frieda karla L.,DC.\"\tAhrens"); String[] columns = tokenizer.getTokenArray(); assertEquals("121", columns[0]); assertEquals("432423", columns[1]); assertEquals(" 9099053", columns[2]); assertEquals("Frieda karla L.,DC.", columns[3]); assertEquals("Ahrens", columns[4]); tokenizer.reset(" \t4321"); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertEquals("4321", columns[1]); tokenizer.reset(" \t\t\t\tzzz "); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertNull(columns[1]);//from w w w. j av a 2 s . c om assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); tokenizer.reset("\t\t\t\tzzz "); columns = tokenizer.getTokenArray(); assertNull(columns[0]); assertNull(columns[1]); assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); }
From source file:org.gbif.file.StrTokenizerTest.java
@Test public void testTabUnquoted() throws IOException { StrTokenizer tokenizer = new StrTokenizer(); tokenizer.setDelimiterString("\t"); tokenizer.setEmptyTokenAsNull(true); tokenizer.setIgnoreEmptyTokens(false); tokenizer.reset("121\t432423\t 9099053\t\"Frieda karla L.,DC.\"\tAhrens"); String[] columns = tokenizer.getTokenArray(); assertEquals("121", columns[0]); assertEquals("432423", columns[1]); assertEquals(" 9099053", columns[2]); assertEquals("\"Frieda karla L.,DC.\"", columns[3]); assertEquals("Ahrens", columns[4]); tokenizer.reset(" \t4321"); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertEquals("4321", columns[1]); tokenizer.reset(" \t\t\t\tzzz "); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertNull(columns[1]);//from ww w . jav a 2s . c o m assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); tokenizer.reset("\t\t\t\tzzz "); columns = tokenizer.getTokenArray(); assertNull(columns[0]); assertNull(columns[1]); assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); }
From source file:org.kalypso.model.wspm.pdb.internal.gaf.GafReader.java
private GafLine parseLine(final String line) throws CoreException { final StrTokenizer tokenizer = new StrTokenizer(line); tokenizer.setDelimiterMatcher(StrMatcher.trimMatcher()); tokenizer.setQuoteMatcher(StrMatcher.noneMatcher()); tokenizer.setIgnoredMatcher(StrMatcher.noneMatcher()); tokenizer.setTrimmerMatcher(StrMatcher.noneMatcher()); tokenizer.setEmptyTokenAsNull(false); tokenizer.setIgnoreEmptyTokens(false); final String[] tokens = tokenizer.getTokenArray(); if (tokens.length < 9) throw failLine(IStatus.INFO, Messages.getString("GafReader.5")); //$NON-NLS-1$ final Object[] items = parseTokens(tokens); checkCommentLine(items);/*from ww w .jav a 2 s.c o m*/ final BigDecimal station = asDecimal(items[0], Messages.getString("GafReader.6")); //$NON-NLS-1$ final String pointId = asString(tokens[1]); final BigDecimal width = asDecimalOrNull(items[2], Messages.getString("GafReader.7")); //$NON-NLS-1$ final BigDecimal height = asDecimal(items[3], Messages.getString("GafReader.8")); //$NON-NLS-1$ final String code = asString(tokens[4]).toUpperCase(); final String roughnessClass = asString(tokens[5]); final String vegetationClass = asString(tokens[6]); final BigDecimal hw = asDecimal(items[7], Messages.getString("GafReader.9")); //$NON-NLS-1$ final BigDecimal rw = asDecimal(items[8], Messages.getString("GafReader.10")); //$NON-NLS-1$ final String hyk = tokens.length < 10 ? StringUtils.EMPTY : asString(tokens[9]).toUpperCase(); return new GafLine(station, pointId, width, height, code, roughnessClass, vegetationClass, rw, hw, hyk, Status.OK_STATUS); }