Example usage for edu.stanford.nlp.ling CoreLabel toString

List of usage examples for edu.stanford.nlp.ling CoreLabel toString

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling CoreLabel toString.

Prototype

@SuppressWarnings("unchecked")
public String toString(OutputFormat format) 

Source Link

Document

Returns a formatted string representing this label.

Usage

From source file:com.daemon.sentiment.FeatureMatrix.java

License:Open Source License

/**
 * POS tagging features//from w ww.j a  v a  2s.  c om
 * 
 * Words are tagged with their respective part-of-speech tag as determined
 * by the Stanford parser
 * 
 * @param tokens
 *            Tokenized text of the tweet
 * @param tokensPOSTagged
 *            Tokenized text of the tweet, possibly with negations from the
 *            previous step
 * @return Reference to the second parameter, which now has POS annotations,
 *         e.g. "love $NN$"
 */
private List<String> addPOSTags(List<String> tokens, List<String> tokensPOSTagged) {
    Tree stanfordTree;

    // Parser needs the tokens-list in a HasWord format
    List<HasWord> sentence = new ArrayList<HasWord>();
    for (String token : tokens) {
        sentence.add(new Word(token));
    }

    // Parse the sentence
    stanfordTree = lexicalizedParser.apply(sentence);

    // add results (POS tags) in tokensPOSTagged-list
    int i = 0;
    for (CoreLabel label : stanfordTree.taggedLabeledYield()) {
        tokensPOSTagged.set(i, tokensPOSTagged.get(i) + " $" + label.toString("value") + "$");
        i++;
    }

    return tokensPOSTagged;

}

From source file:uk.bl.wa.nlp.parsers.StanfordAnnotatorParser.java

License:Open Source License

/**
 * //w w  w .j a  v  a2s  .c  o m
 * @param text
 * @param metadata
 */
public void parse(String text, Metadata metadata) {

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);

    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    // Loop over and extract:
    boolean inEntity = false;
    String currentEntity = "";
    String currentEntityType = "";
    HashSet<String> persons = new HashSet<String>();
    HashSet<String> orgs = new HashSet<String>();
    HashSet<String> locations = new HashSet<String>();
    HashSet<String> dates = new HashSet<String>();
    HashSet<String> miscs = new HashSet<String>();
    double totalSentiment = 0;
    double totalSentences = 0;
    int[] sentiments = new int[5];
    for (CoreMap sentence : sentences) {
        // REQUIRES LATER VERSION OF PARSER (Java 8)
        // Tree tree = sentence.get(SentimentAnnotatedTree.class);
        // int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
        // totalSentiment += sentiment;
        // totalSentences++;
        // // Also store as a histogram:
        // sentiments[sentiment]++;

        // traversing the words in the current sentence
        // a CoreLabel is a CoreMap with additional token-specific methods
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            // this is the text of the token
            //String word = token.get(TextAnnotation.class);
            // this is the POS tag of the token
            //String pos = token.get(PartOfSpeechAnnotation.class);
            // this is the NER label of the token
            String ne = token.get(NamedEntityTagAnnotation.class);
            if (!inEntity) {
                if (!"O".equals(ne)) {
                    inEntity = true;
                    currentEntity = "";
                    currentEntityType = ne;
                }
            }
            if (inEntity) {
                if ("O".equals(ne)) {
                    inEntity = false;
                    if ("PERSON".equals(currentEntityType)) {
                        persons.add(currentEntity.trim());
                    } else if ("ORGANIZATION".equals(currentEntityType)) {
                        orgs.add(currentEntity.trim());
                    } else if ("LOCATION".equals(currentEntityType)) {
                        locations.add(currentEntity.trim());
                    } else if ("DATE".equals(currentEntityType)) {
                        dates.add(currentEntity.trim());
                    } else if ("MISC".equals(currentEntityType)) {
                        miscs.add(currentEntity.trim());
                    } else if ("NUMBER".equals(currentEntityType)) {
                        // Ignore numbers.
                    } else {
                        System.err.println("Entity type " + currentEntityType + " for token " + token
                                + " cannot be handled by this parser!");
                    }
                } else {
                    currentEntity += " " + token.toString(OutputFormat.VALUE);
                }
            }
        }
    }

    // Now store them:
    metadata.set(NER_PERSONS, persons.toArray(new String[0]));
    metadata.set(NER_ORGANISATIONS, orgs.toArray(new String[0]));
    metadata.set(NER_LOCATIONS, locations.toArray(new String[0]));
    metadata.set(NER_DATES, dates.toArray(new String[0]));
    metadata.set(NER_MISC, miscs.toArray(new String[0]));
    // And calculate and store the rounded average sentiment:
    metadata.set(AVG_SENTIMENT, (int) Math.round(totalSentiment / totalSentences));
    // Convert sentiment distribution:
    // String[] sentiment_dist = new String[5];
    // for( int i = 0; i < 5; i++ ) sentiment_dist[i] = ""+sentiments[i];
    // metadata.set( SENTIMENT_DIST, sentiment_dist);
}