List of usage examples for edu.stanford.nlp.trees Tree skipRoot
public Tree skipRoot()
From source file:es.uam.irg.nlp.syntax.SyntacticAnalyzer.java
License:Open Source License
public static List<SyntacticallyAnalyzedSentence> analyzeSentences(String text) throws Exception { List<SyntacticallyAnalyzedSentence> _sentences = new ArrayList<SyntacticallyAnalyzedSentence>(); Properties props = new Properties(); //props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref, sentiment"); props.put("annotators", "tokenize, ssplit, pos, parse"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); text = text.replace("...", "."); Annotation annotation = new Annotation(text); pipeline.annotate(annotation);//w ww .j a va 2 s . com List<CoreMap> analyzedSentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); if (analyzedSentences != null && !analyzedSentences.isEmpty()) { for (CoreMap analyzedSentence : analyzedSentences) { String sentence = analyzedSentence.toString(); Tree tree = analyzedSentence.get(TreeCoreAnnotations.TreeAnnotation.class); String treeDescription = tree.skipRoot().pennString(); SyntacticTreebank treebank = new SyntacticTreebank(treeDescription, true); SyntacticallyAnalyzedSentence _sentence = new SyntacticallyAnalyzedSentence(sentence, treebank); _sentences.add(_sentence); } } return _sentences; }
From source file:org.aksw.simba.bengal.triple2nl.property.PropertyVerbalizer.java
License:Apache License
private PropertyVerbalization getTypeByLinguisticAnalysis(String propertyURI, String propertyText) { logger.debug("...using linguistical analysis..."); Annotation document = new Annotation(propertyText); pipeline.annotate(document);//from w ww . j ava 2 s . c o m List<CoreMap> sentences = document.get(SentencesAnnotation.class); String pattern = ""; PropertyVerbalizationType verbalizationType = PropertyVerbalizationType.UNSPECIFIED; boolean firstTokenAuxiliary = false; for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(TokensAnnotation.class); // get the first word and check if it's 'is' or 'has' CoreLabel token = tokens.get(0); String word = token.get(TextAnnotation.class); String pos = token.get(PartOfSpeechAnnotation.class); String lemma = token.getString(LemmaAnnotation.class); firstTokenAuxiliary = auxiliaryVerbs.contains(lemma); if (lemma.equals("be") || word.equals("have")) { pattern += lemma.toUpperCase(); } else { if (pos.startsWith("N")) { pattern += "NP"; } else if (pos.startsWith("V")) { pattern += "VP"; } else { pattern += pos; } } if (tokens.size() > 1) { pattern += " "; for (int i = 1; i < tokens.size(); i++) { token = tokens.get(i); pos = token.get(PartOfSpeechAnnotation.class); if (pos.startsWith("N")) { pattern += "NP"; } else if (pos.startsWith("V")) { pattern += "VP"; } else { pattern += pos; } pattern += " "; } } // get the parse tree Tree tree = sentence.get(TreeAnnotation.class); // skip ROOT tag tree = tree.skipRoot(); logger.debug("Parse tree:" + tree.pennString()); // tree.pennPrint(); // check if VP is directly followed by NP // sometimes parent node is S,SINV,etc. if (tree.value().matches(Joiner.on('|').join(Lists.newArrayList(S, SBAR, SBARQ, SINV, FRAGMENT)))) { tree = tree.getChild(0); } boolean useDeterminer = false; if (tree.value().equals(VERB_PHRASE.getTag())) { for (Tree child : tree.getChildrenAsList()) { // check if first non terminal is NP and not contains a // determiner if (!child.isPreTerminal()) { if (child.value().equals(NOUN_PHRASE.getTag()) && !child.getChild(0).value().equals(DETERMINER.getTag())) { useDeterminer = true; } break; } } } // add determiner tag if (useDeterminer) { String[] split = pattern.split(" "); pattern = split[0] + " DET " + Joiner.on(" ").join(Arrays.copyOfRange(split, 1, split.length)); } } pattern = pattern.trim(); // if first token is an auxiliary can return verb if (firstTokenAuxiliary) { verbalizationType = PropertyVerbalizationType.VERB; } // check if pattern matches if (pattern.matches(VERB_PATTERN)) { logger.debug("...successfully determined type."); verbalizationType = PropertyVerbalizationType.VERB; } return new PropertyVerbalization(propertyURI, propertyText, pattern, verbalizationType); }