List of usage examples for org.apache.commons.lang3.text StrMatcher quoteMatcher
public static StrMatcher quoteMatcher()
From source file:edu.sabanciuniv.sentilab.sare.controllers.opinion.OpinionCorpusFactory.java
@Override protected OpinionCorpusFactory addTextPacket(OpinionCorpus corpus, InputStream input, String delimiter) throws IOException { Validate.notNull(corpus, CannedMessages.NULL_ARGUMENT, "corpus"); Validate.notNull(input, CannedMessages.NULL_ARGUMENT, "input"); OpinionDocumentFactory opinionFactory = null; BufferedReader reader = new BufferedReader(new InputStreamReader(input)); String line;//w ww . j a v a2 s .c o m while ((line = reader.readLine()) != null) { StrTokenizer tokenizer = new StrTokenizer(line, StrMatcher.stringMatcher(delimiter), StrMatcher.quoteMatcher()); List<String> columns = tokenizer.getTokenList(); if (columns.size() < 1) { continue; } opinionFactory = new OpinionDocumentFactory().setCorpus(corpus).setContent(columns.get(0)); if (columns.size() > 1) { try { opinionFactory.setPolarity(Double.parseDouble(columns.get(1))); } catch (NumberFormatException e) { opinionFactory.setPolarity(null); } } corpus.addDocument(opinionFactory.create()); } return this; }
From source file:edu.sabanciuniv.sentilab.sare.controllers.aspect.AspectLexiconFactory.java
@Override protected AspectLexiconFactory addTextPacket(AspectLexicon lexicon, InputStream input, String delimiter) throws IOException { Validate.notNull(lexicon, CannedMessages.NULL_ARGUMENT, "lexicon"); Validate.notNull(input, CannedMessages.NULL_ARGUMENT, "input"); delimiter = StringUtils.defaultString(delimiter, "\t"); BufferedReader reader = new BufferedReader(new InputStreamReader(input)); String line;// w ww. j a v a 2s. c o m while ((line = reader.readLine()) != null) { StrTokenizer tokenizer = new StrTokenizer(line, StrMatcher.stringMatcher(delimiter), StrMatcher.quoteMatcher()); List<String> columns = tokenizer.getTokenList(); if (columns.size() < 1) { continue; } String aspectStr = columns.get(0); Matcher matcher = Pattern.compile("^<(.*)>$").matcher(aspectStr); if (matcher.matches()) { aspectStr = matcher.group(1); } else { continue; } AspectLexicon aspect = lexicon.addAspect(aspectStr); for (int i = 1; i < columns.size(); i++) { aspect.addExpression(columns.get(i)); } } return this; }