Example usage for edu.stanford.nlp.util StringUtils split

List of usage examples for edu.stanford.nlp.util StringUtils split

Introduction

In this page you can find the example usage for edu.stanford.nlp.util StringUtils split.

Prototype

public static List<String> split(String str, String regex) 

Source Link

Document

Splits the given string using the given regex as delimiters.

Usage

From source file:org.voyanttools.trombone.storage.file.FileStorage.java

License:Open Source License

@Override
public List<String> retrieveStrings(String id) throws IOException {
    String string = retrieveString(id);
    return StringUtils.split(string, "\n");
}

From source file:semRewrite.datesandnumber.StanfordDateTimeExtractor.java

License:Open Source License

/** ***************************************************************
   * Calls the stanford parser and extracts the necessary information about the words in the string 
   * and stores them in Token object for further usage. 
   * @param input: The natural language string.
* @return List of Tokens./*from   w w w . ja va2s . co  m*/
   */
public List<Tokens> populateParserInfo(String inputSentence) {

    Properties props = new Properties();
    // props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation annotation;
    annotation = new Annotation(inputSentence);

    pipeline.annotate(annotation);
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    int sentenceCount = 0;
    List<Tokens> tokenList = new ArrayList<Tokens>();
    for (CoreMap sentence : sentences) {
        tokenCount = 1;
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String namedEntity = token.get(NamedEntityTagAnnotation.class);
            if ((DATE_ENTITIES.contains(namedEntity))
                    || ((MEASURE_ENTITIES.contains(namedEntity))
                            && (token.get(PartOfSpeechAnnotation.class).equals("CD")
                                    || token.get(PartOfSpeechAnnotation.class).equals("JJ")))
                    || (namedEntity.equals("DURATION")
                            && token.get(PartOfSpeechAnnotation.class).equals("CD"))) {
                Tokens tokens = new Tokens();
                tokens.setId(tokenCount);
                tokens.setWord(token.get(TextAnnotation.class));
                tokens.setNer(token.get(NamedEntityTagAnnotation.class));
                tokens.setNormalizedNer(token.get(NormalizedNamedEntityTagAnnotation.class));
                tokens.setCharBegin(token.get(BeginIndexAnnotation.class));
                tokens.setCharEnd(token.get(EndIndexAnnotation.class));
                tokens.setPos(token.get(PartOfSpeechAnnotation.class));
                tokens.setLemma(token.get(LemmaAnnotation.class));
                tokenList.add(tokens);
            }
            tokenCount++;
        }
        dependencies = (sentence.get(CollapsedDependenciesAnnotation.class));
        dependencyList = (StringUtils.split(dependencies.toList(), "\n"));
    }
    return tokenList;
}