List of usage examples for org.apache.commons.lang3.text StrTokenizer setIgnoreEmptyTokens
public StrTokenizer setIgnoreEmptyTokens(final boolean ignoreEmptyTokens)
From source file:com.hurence.logisland.repository.csv.CsvFileParser.java
/** * Parse the file given in parameters//from www . ja v a 2 s.c o m * * @param filePath * @return */ public List<T> parseFile(String filePath) { List<T> result = new ArrayList<>(); InputStreamReader isr = null; BufferedReader bsr = null; try { isr = new InputStreamReader(new FileInputStream(filePath), "UTF-8"); bsr = new BufferedReader(isr); logger.debug("start parsing csv file : " + filePath); int nblines = 0; String line; while ((line = bsr.readLine()) != null) { // don't parse the first line of csv if (nblines != 0) { StrTokenizer tokenizer = new StrTokenizer(line, separator); tokenizer.setIgnoreEmptyTokens(false); T o = createEntity(tokenizer); if (o != null) { result.add(o); } } else { nblines++; } } logger.debug("done parsing csv file : " + filePath); } catch (FileNotFoundException ex) { logger.error("file not found : " + filePath); } catch (IOException ex) { logger.error("unknown error while parsing : " + filePath); } finally { try { if (bsr != null) { bsr.close(); } } catch (IOException ex) { logger.error("unknown error while parsing : " + filePath); } } return result; }
From source file:com.jkoolcloud.tnt4j.streams.parsers.ActivityTokenParser.java
@Override protected ActivityContext prepareItem(TNTInputStream<?, ?> stream, Object data) throws ParseException { // Get next string to parse String dataStr = getNextActivityString(data); if (StringUtils.isEmpty(dataStr)) { return null; }/*from ww w . j av a2 s . c o m*/ logger().log(OpLevel.DEBUG, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ActivityParser.splitting.string"), dataStr); if (pattern != null) { Matcher matcher = pattern.matcher(dataStr); if (matcher == null || !matcher.matches()) { logger().log(OpLevel.DEBUG, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ActivityParser.input.not.match"), getName(), pattern.pattern()); return null; } } StrTokenizer tk = stripQuotes ? new StrTokenizer(dataStr, fieldDelim, StrMatcher.doubleQuoteMatcher()) : new StrTokenizer(dataStr, fieldDelim); tk.setIgnoreEmptyTokens(false); String[] fields = tk.getTokenArray(); if (ArrayUtils.isEmpty(fields)) { logger().log(OpLevel.DEBUG, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ActivityParser.no.fields")); return null; } logger().log(OpLevel.DEBUG, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ActivityParser.split"), fields.length); ActivityContext cData = new ActivityContext(stream, data, fields); cData.setMessage(getRawDataAsMessage(fields)); return cData; }
From source file:com.jkoolcloud.tnt4j.streams.parsers.ActivityNameValueParser.java
@Override protected ActivityContext prepareItem(TNTInputStream<?, ?> stream, Object data) throws ParseException { String dataStr = getNextActivityString(data); if (StringUtils.isEmpty(dataStr)) { return null; }//from w ww.ja v a 2 s . c o m logger().log(OpLevel.DEBUG, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ActivityParser.splitting.string"), dataStr); if (pattern != null) { Matcher matcher = pattern.matcher(dataStr); if (matcher == null || !matcher.matches()) { logger().log(OpLevel.DEBUG, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ActivityParser.input.not.match"), getName(), pattern.pattern()); return null; } } StrTokenizer tk = stripQuotes ? new StrTokenizer(dataStr, fieldDelim, StrMatcher.doubleQuoteMatcher()) : new StrTokenizer(dataStr, fieldDelim); tk.setIgnoreEmptyTokens(false); String[] fields = tk.getTokenArray(); if (ArrayUtils.isEmpty(fields)) { logger().log(OpLevel.DEBUG, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ActivityParser.no.fields")); return null; } logger().log(OpLevel.DEBUG, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ActivityParser.split"), fields.length); Map<String, String> nameValues = new HashMap<>(fields.length); for (String field : fields) { if (field != null) { String[] nv = field.split(Pattern.quote(valueDelim)); if (ArrayUtils.isNotEmpty(nv)) { nameValues.put(nv[0], nv.length > 1 ? nv[1].trim() : ""); } logger().log(OpLevel.TRACE, StreamsResources.getString(StreamsResources.RESOURCE_BUNDLE_NAME, "ActivityNameValueParser.found"), field); } } ActivityContext cData = new ActivityContext(stream, data, nameValues); cData.setMessage(getRawDataAsMessage(nameValues)); return cData; }
From source file:org.gbif.file.StrTokenizerPerformance.java
@Test public void testCharVsStringPerformance() throws IOException { File source = FileUtils.getClasspathFile("irmng.tail"); // test CHAR//from w w w .j ava 2s . c o m StrTokenizer tokenizer = new StrTokenizer(); tokenizer.setDelimiterChar('\t'); tokenizer.setEmptyTokenAsNull(true); tokenizer.setIgnoreEmptyTokens(false); long time = test(tokenizer, source); System.out.println(time + " milliseconds for CHAR based tokenizer."); // test STRING tokenizer = new StrTokenizer(); tokenizer.setDelimiterString("\t"); tokenizer.setEmptyTokenAsNull(true); time = test(tokenizer, source); System.out.println(time + " milliseconds for STRING based tokenizer."); }
From source file:org.gbif.file.StrTokenizerTest.java
@Test public void testCsvQuoted() throws IOException { StrTokenizer tokenizer = new StrTokenizer(); tokenizer.setDelimiterString(","); tokenizer.setQuoteChar('"'); tokenizer.setEmptyTokenAsNull(true); tokenizer.setIgnoreEmptyTokens(false); tokenizer.reset("121,432423, 9099053,\"Frieda karla L.,DC.\",Ahrens"); String[] columns = tokenizer.getTokenArray(); assertEquals("121", columns[0]); assertEquals("432423", columns[1]); assertEquals(" 9099053", columns[2]); assertEquals("Frieda karla L.,DC.", columns[3]); assertEquals("Ahrens", columns[4]); tokenizer.reset(" ,4321"); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertEquals("4321", columns[1]); tokenizer.reset(" ,,,,zzz "); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertNull(columns[1]);/*from w ww.jav a 2 s . c om*/ assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); tokenizer.reset(",,,,zzz "); columns = tokenizer.getTokenArray(); assertNull(columns[0]); assertNull(columns[1]); assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); }
From source file:org.gbif.file.StrTokenizerTest.java
@Test public void testCsvUnquoted() throws IOException { StrTokenizer tokenizer = new StrTokenizer(); tokenizer.setDelimiterString(","); tokenizer.setEmptyTokenAsNull(true); tokenizer.setIgnoreEmptyTokens(false); tokenizer.reset("121,432423, 9099053,Frieda karla L.,DC.,Ahrens"); String[] columns = tokenizer.getTokenArray(); assertEquals("121", columns[0]); assertEquals("432423", columns[1]); assertEquals(" 9099053", columns[2]); assertEquals("Frieda karla L.", columns[3]); assertEquals("DC.", columns[4]); assertEquals("Ahrens", columns[5]); tokenizer.reset(",,,,zzz "); columns = tokenizer.getTokenArray(); assertNull(columns[0]);/* www . j a v a 2 s . c o m*/ assertNull(columns[1]); assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); }
From source file:org.gbif.file.StrTokenizerTest.java
@Test public void testPipes() throws IOException { StrTokenizer tokenizer = new StrTokenizer(); tokenizer.setDelimiterChar('|'); tokenizer.setQuoteChar('"'); tokenizer.setEmptyTokenAsNull(true); tokenizer.setIgnoreEmptyTokens(false); tokenizer.reset("121|432423| 9099053|\"Frieda karla L.|DC.\"|Ahrens"); String[] columns = tokenizer.getTokenArray(); assertEquals("121", columns[0]); assertEquals("432423", columns[1]); assertEquals(" 9099053", columns[2]); assertEquals("Frieda karla L.|DC.", columns[3]); assertEquals("Ahrens", columns[4]); tokenizer.reset(" |4321"); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertEquals("4321", columns[1]); tokenizer.reset(" ||||zzz "); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertNull(columns[1]);//ww w.j a v a2 s .c o m assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); tokenizer.reset("||||zzz "); columns = tokenizer.getTokenArray(); assertNull(columns[0]); assertNull(columns[1]); assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); }
From source file:org.gbif.file.StrTokenizerTest.java
@Test public void testTabQuoted() throws IOException { StrTokenizer tokenizer = new StrTokenizer(); tokenizer.setDelimiterString("\t"); tokenizer.setQuoteChar('"'); tokenizer.setEmptyTokenAsNull(true); tokenizer.setIgnoreEmptyTokens(false); tokenizer.reset("121\t432423\t 9099053\t\"Frieda karla L.,DC.\"\tAhrens"); String[] columns = tokenizer.getTokenArray(); assertEquals("121", columns[0]); assertEquals("432423", columns[1]); assertEquals(" 9099053", columns[2]); assertEquals("Frieda karla L.,DC.", columns[3]); assertEquals("Ahrens", columns[4]); tokenizer.reset(" \t4321"); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertEquals("4321", columns[1]); tokenizer.reset(" \t\t\t\tzzz "); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertNull(columns[1]);// w w w . ja v a 2 s . c o m assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); tokenizer.reset("\t\t\t\tzzz "); columns = tokenizer.getTokenArray(); assertNull(columns[0]); assertNull(columns[1]); assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); }
From source file:org.gbif.file.StrTokenizerTest.java
@Test public void testTabUnquoted() throws IOException { StrTokenizer tokenizer = new StrTokenizer(); tokenizer.setDelimiterString("\t"); tokenizer.setEmptyTokenAsNull(true); tokenizer.setIgnoreEmptyTokens(false); tokenizer.reset("121\t432423\t 9099053\t\"Frieda karla L.,DC.\"\tAhrens"); String[] columns = tokenizer.getTokenArray(); assertEquals("121", columns[0]); assertEquals("432423", columns[1]); assertEquals(" 9099053", columns[2]); assertEquals("\"Frieda karla L.,DC.\"", columns[3]); assertEquals("Ahrens", columns[4]); tokenizer.reset(" \t4321"); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertEquals("4321", columns[1]); tokenizer.reset(" \t\t\t\tzzz "); columns = tokenizer.getTokenArray(); assertEquals(" ", columns[0]); assertNull(columns[1]);/*from ww w . ja v a 2 s . co m*/ assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); tokenizer.reset("\t\t\t\tzzz "); columns = tokenizer.getTokenArray(); assertNull(columns[0]); assertNull(columns[1]); assertNull(columns[2]); assertNull(columns[3]); assertEquals("zzz ", columns[4]); }
From source file:org.kalypso.model.wspm.pdb.internal.gaf.GafReader.java
private GafLine parseLine(final String line) throws CoreException { final StrTokenizer tokenizer = new StrTokenizer(line); tokenizer.setDelimiterMatcher(StrMatcher.trimMatcher()); tokenizer.setQuoteMatcher(StrMatcher.noneMatcher()); tokenizer.setIgnoredMatcher(StrMatcher.noneMatcher()); tokenizer.setTrimmerMatcher(StrMatcher.noneMatcher()); tokenizer.setEmptyTokenAsNull(false); tokenizer.setIgnoreEmptyTokens(false); final String[] tokens = tokenizer.getTokenArray(); if (tokens.length < 9) throw failLine(IStatus.INFO, Messages.getString("GafReader.5")); //$NON-NLS-1$ final Object[] items = parseTokens(tokens); checkCommentLine(items);//from www.j a va 2 s.c o m final BigDecimal station = asDecimal(items[0], Messages.getString("GafReader.6")); //$NON-NLS-1$ final String pointId = asString(tokens[1]); final BigDecimal width = asDecimalOrNull(items[2], Messages.getString("GafReader.7")); //$NON-NLS-1$ final BigDecimal height = asDecimal(items[3], Messages.getString("GafReader.8")); //$NON-NLS-1$ final String code = asString(tokens[4]).toUpperCase(); final String roughnessClass = asString(tokens[5]); final String vegetationClass = asString(tokens[6]); final BigDecimal hw = asDecimal(items[7], Messages.getString("GafReader.9")); //$NON-NLS-1$ final BigDecimal rw = asDecimal(items[8], Messages.getString("GafReader.10")); //$NON-NLS-1$ final String hyk = tokens.length < 10 ? StringUtils.EMPTY : asString(tokens[9]).toUpperCase(); return new GafLine(station, pointId, width, height, code, roughnessClass, vegetationClass, rw, hw, hyk, Status.OK_STATUS); }