List of usage examples for edu.stanford.nlp.util StringUtils join
public static String join(Object[] elements)
From source file:knu.univ.lingvo.coref.Mention.java
License:Open Source License
public String getPattern(List<CoreLabel> pTokens) { ArrayList<String> phrase_string = new ArrayList<String>(); String ne = ""; for (CoreLabel token : pTokens) { if (token.index() == headWord.index()) { phrase_string.add(token.lemma()); ne = ""; } else if ((token.lemma().equals("and") || StringUtils.isPunct(token.lemma())) && pTokens.size() > pTokens.indexOf(token) + 1 && pTokens.indexOf(token) > 0 && pTokens.get(pTokens.indexOf(token) + 1).ner() .equals(pTokens.get(pTokens.indexOf(token) - 1).ner())) { } else if (token.index() == headWord.index() - 1 && token.ner().equals(nerString)) { phrase_string.add(token.lemma()); ne = ""; } else if (!token.ner().equals("O")) { if (!token.ner().equals(ne)) { ne = token.ner();// w w w. ja v a 2 s. c om phrase_string.add("<" + ne + ">"); } } else { phrase_string.add(token.lemma()); ne = ""; } } return StringUtils.join(phrase_string); }
From source file:org.seasr.meandre.components.analytics.psychometrics.LIWC.java
License:Open Source License
@Override public void executeCallBack(ComponentContext cc) throws Exception { Object inTokens = cc.getDataComponentFromInput(IN_TOKENS); String[] tokens = DataTypeParser.parseAsString(inTokens); WordClassCount[] LIWC_Values = null; try {//from www.j a v a 2 s. com LIWC_Values = dict.countClasses(StringUtils.join(tokens)); } catch (IllegalArgumentException e) { console.warning(String.format("Failed to calculate LIWC values! : %s", e.getMessage())); LIWC_Values = new WordClassCount[0]; } // System.out.print("LIWCOnTokenDoubleValues:"); Map<String, Integer> out = new Hashtable<String, Integer>(); for (WordClassCount fc : LIWC_Values) { out.put(dict.getClassName(fc.classId), fc.count); // System.out.print(fc); } // System.out.println(); // System.out.println(String.format("LIWCOnTokenDoubleValues: found %d classes",out.size())); Integer count = out.get(dict.getClassName(WordClassDictionary.CLASS_TOTAL_WORDS)); if (count == null) count = -1; console.fine(String.format("LIWC counter found %d classes from %d tokens", out.size(), count)); cc.pushDataComponentToOutput(OUT_LIWC_SCORES, BasicDataTypesTools.mapToIntegerMap(out, false)); cc.pushDataComponentToOutput(OUT_WORD_COUNT, BasicDataTypesTools.integerToIntegers(count)); cc.pushDataComponentToOutput(OUT_TOKENS, inTokens); }
From source file:process.PTBTokenizer.java
License:Open Source License
/** * Returns a presentable version of the given PTB-tokenized words. Pass in a * List of Strings and this method will join the words with spaces and call * {@link #ptb2Text(String)} on the output. * /*w ww . ja va 2 s .c o m*/ * @param ptbWords * A list of String * @return A presentable version of the given PTB-tokenized words */ public static String ptb2Text(List<String> ptbWords) { return ptb2Text(StringUtils.join(ptbWords)); }