List of usage examples for edu.stanford.nlp.ling CoreLabel toString
@SuppressWarnings("unchecked") public String toString(OutputFormat format)
From source file:com.daemon.sentiment.FeatureMatrix.java
License:Open Source License
/** * POS tagging features//from w ww.j a v a 2s. c om * * Words are tagged with their respective part-of-speech tag as determined * by the Stanford parser * * @param tokens * Tokenized text of the tweet * @param tokensPOSTagged * Tokenized text of the tweet, possibly with negations from the * previous step * @return Reference to the second parameter, which now has POS annotations, * e.g. "love $NN$" */ private List<String> addPOSTags(List<String> tokens, List<String> tokensPOSTagged) { Tree stanfordTree; // Parser needs the tokens-list in a HasWord format List<HasWord> sentence = new ArrayList<HasWord>(); for (String token : tokens) { sentence.add(new Word(token)); } // Parse the sentence stanfordTree = lexicalizedParser.apply(sentence); // add results (POS tags) in tokensPOSTagged-list int i = 0; for (CoreLabel label : stanfordTree.taggedLabeledYield()) { tokensPOSTagged.set(i, tokensPOSTagged.get(i) + " $" + label.toString("value") + "$"); i++; } return tokensPOSTagged; }
From source file:uk.bl.wa.nlp.parsers.StanfordAnnotatorParser.java
License:Open Source License
/** * //w w w .j a v a2s .c o m * @param text * @param metadata */ public void parse(String text, Metadata metadata) { // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types List<CoreMap> sentences = document.get(SentencesAnnotation.class); // Loop over and extract: boolean inEntity = false; String currentEntity = ""; String currentEntityType = ""; HashSet<String> persons = new HashSet<String>(); HashSet<String> orgs = new HashSet<String>(); HashSet<String> locations = new HashSet<String>(); HashSet<String> dates = new HashSet<String>(); HashSet<String> miscs = new HashSet<String>(); double totalSentiment = 0; double totalSentences = 0; int[] sentiments = new int[5]; for (CoreMap sentence : sentences) { // REQUIRES LATER VERSION OF PARSER (Java 8) // Tree tree = sentence.get(SentimentAnnotatedTree.class); // int sentiment = RNNCoreAnnotations.getPredictedClass(tree); // totalSentiment += sentiment; // totalSentences++; // // Also store as a histogram: // sentiments[sentiment]++; // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods for (CoreLabel token : sentence.get(TokensAnnotation.class)) { // this is the text of the token //String word = token.get(TextAnnotation.class); // this is the POS tag of the token //String pos = token.get(PartOfSpeechAnnotation.class); // this is the NER label of the token String ne = token.get(NamedEntityTagAnnotation.class); if (!inEntity) { if (!"O".equals(ne)) { inEntity = true; currentEntity = ""; currentEntityType = ne; } } if (inEntity) { if ("O".equals(ne)) { inEntity = false; if ("PERSON".equals(currentEntityType)) { persons.add(currentEntity.trim()); } else if ("ORGANIZATION".equals(currentEntityType)) { orgs.add(currentEntity.trim()); } else if ("LOCATION".equals(currentEntityType)) { locations.add(currentEntity.trim()); } else if ("DATE".equals(currentEntityType)) { dates.add(currentEntity.trim()); } else if ("MISC".equals(currentEntityType)) { miscs.add(currentEntity.trim()); } else if ("NUMBER".equals(currentEntityType)) { // Ignore numbers. } else { System.err.println("Entity type " + currentEntityType + " for token " + token + " cannot be handled by this parser!"); } } else { currentEntity += " " + token.toString(OutputFormat.VALUE); } } } } // Now store them: metadata.set(NER_PERSONS, persons.toArray(new String[0])); metadata.set(NER_ORGANISATIONS, orgs.toArray(new String[0])); metadata.set(NER_LOCATIONS, locations.toArray(new String[0])); metadata.set(NER_DATES, dates.toArray(new String[0])); metadata.set(NER_MISC, miscs.toArray(new String[0])); // And calculate and store the rounded average sentiment: metadata.set(AVG_SENTIMENT, (int) Math.round(totalSentiment / totalSentences)); // Convert sentiment distribution: // String[] sentiment_dist = new String[5]; // for( int i = 0; i < 5; i++ ) sentiment_dist[i] = ""+sentiments[i]; // metadata.set( SENTIMENT_DIST, sentiment_dist); }