List of usage examples for edu.stanford.nlp.util StringUtils split
public static List<String> split(String str, String regex)
From source file:org.voyanttools.trombone.storage.file.FileStorage.java
License:Open Source License
@Override public List<String> retrieveStrings(String id) throws IOException { String string = retrieveString(id); return StringUtils.split(string, "\n"); }
From source file:semRewrite.datesandnumber.StanfordDateTimeExtractor.java
License:Open Source License
/** *************************************************************** * Calls the stanford parser and extracts the necessary information about the words in the string * and stores them in Token object for further usage. * @param input: The natural language string. * @return List of Tokens./*from w w w . ja va2s . co m*/ */ public List<Tokens> populateParserInfo(String inputSentence) { Properties props = new Properties(); // props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation annotation; annotation = new Annotation(inputSentence); pipeline.annotate(annotation); List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); int sentenceCount = 0; List<Tokens> tokenList = new ArrayList<Tokens>(); for (CoreMap sentence : sentences) { tokenCount = 1; for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String namedEntity = token.get(NamedEntityTagAnnotation.class); if ((DATE_ENTITIES.contains(namedEntity)) || ((MEASURE_ENTITIES.contains(namedEntity)) && (token.get(PartOfSpeechAnnotation.class).equals("CD") || token.get(PartOfSpeechAnnotation.class).equals("JJ"))) || (namedEntity.equals("DURATION") && token.get(PartOfSpeechAnnotation.class).equals("CD"))) { Tokens tokens = new Tokens(); tokens.setId(tokenCount); tokens.setWord(token.get(TextAnnotation.class)); tokens.setNer(token.get(NamedEntityTagAnnotation.class)); tokens.setNormalizedNer(token.get(NormalizedNamedEntityTagAnnotation.class)); tokens.setCharBegin(token.get(BeginIndexAnnotation.class)); tokens.setCharEnd(token.get(EndIndexAnnotation.class)); tokens.setPos(token.get(PartOfSpeechAnnotation.class)); tokens.setLemma(token.get(LemmaAnnotation.class)); tokenList.add(tokens); } tokenCount++; } dependencies = (sentence.get(CollapsedDependenciesAnnotation.class)); dependencyList = (StringUtils.split(dependencies.toList(), "\n")); } return tokenList; }