Example usage for edu.stanford.nlp.trees Tree skipRoot

List of usage examples for edu.stanford.nlp.trees Tree skipRoot

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree skipRoot.

Prototype

public Tree skipRoot() 

Source Link

Document

Returns first child if this is unary and if the label at the current node is either "ROOT" or empty.

Usage

From source file:es.uam.irg.nlp.syntax.SyntacticAnalyzer.java

License:Open Source License

public static List<SyntacticallyAnalyzedSentence> analyzeSentences(String text) throws Exception {
    List<SyntacticallyAnalyzedSentence> _sentences = new ArrayList<SyntacticallyAnalyzedSentence>();

    Properties props = new Properties();
    //props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref, sentiment");
    props.put("annotators", "tokenize, ssplit, pos, parse");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

    text = text.replace("...", ".");

    Annotation annotation = new Annotation(text);
    pipeline.annotate(annotation);//w ww  .j a va  2  s  .  com

    List<CoreMap> analyzedSentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    if (analyzedSentences != null && !analyzedSentences.isEmpty()) {
        for (CoreMap analyzedSentence : analyzedSentences) {
            String sentence = analyzedSentence.toString();

            Tree tree = analyzedSentence.get(TreeCoreAnnotations.TreeAnnotation.class);
            String treeDescription = tree.skipRoot().pennString();
            SyntacticTreebank treebank = new SyntacticTreebank(treeDescription, true);

            SyntacticallyAnalyzedSentence _sentence = new SyntacticallyAnalyzedSentence(sentence, treebank);
            _sentences.add(_sentence);
        }
    }
    return _sentences;
}

From source file:org.aksw.simba.bengal.triple2nl.property.PropertyVerbalizer.java

License:Apache License

private PropertyVerbalization getTypeByLinguisticAnalysis(String propertyURI, String propertyText) {
    logger.debug("...using linguistical analysis...");
    Annotation document = new Annotation(propertyText);
    pipeline.annotate(document);//from w ww  . j  ava 2 s . c o  m
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    String pattern = "";
    PropertyVerbalizationType verbalizationType = PropertyVerbalizationType.UNSPECIFIED;
    boolean firstTokenAuxiliary = false;
    for (CoreMap sentence : sentences) {
        List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
        // get the first word and check if it's 'is' or 'has'
        CoreLabel token = tokens.get(0);
        String word = token.get(TextAnnotation.class);
        String pos = token.get(PartOfSpeechAnnotation.class);
        String lemma = token.getString(LemmaAnnotation.class);

        firstTokenAuxiliary = auxiliaryVerbs.contains(lemma);

        if (lemma.equals("be") || word.equals("have")) {
            pattern += lemma.toUpperCase();
        } else {
            if (pos.startsWith("N")) {
                pattern += "NP";
            } else if (pos.startsWith("V")) {
                pattern += "VP";
            } else {
                pattern += pos;
            }
        }
        if (tokens.size() > 1) {
            pattern += " ";
            for (int i = 1; i < tokens.size(); i++) {
                token = tokens.get(i);
                pos = token.get(PartOfSpeechAnnotation.class);
                if (pos.startsWith("N")) {
                    pattern += "NP";
                } else if (pos.startsWith("V")) {
                    pattern += "VP";
                } else {
                    pattern += pos;
                }
                pattern += " ";
            }
        }
        // get the parse tree
        Tree tree = sentence.get(TreeAnnotation.class);
        // skip ROOT tag
        tree = tree.skipRoot();
        logger.debug("Parse tree:" + tree.pennString());
        // tree.pennPrint();
        // check if VP is directly followed by NP
        // sometimes parent node is S,SINV,etc.
        if (tree.value().matches(Joiner.on('|').join(Lists.newArrayList(S, SBAR, SBARQ, SINV, FRAGMENT)))) {
            tree = tree.getChild(0);
        }
        boolean useDeterminer = false;
        if (tree.value().equals(VERB_PHRASE.getTag())) {
            for (Tree child : tree.getChildrenAsList()) {
                // check if first non terminal is NP and not contains a
                // determiner
                if (!child.isPreTerminal()) {
                    if (child.value().equals(NOUN_PHRASE.getTag())
                            && !child.getChild(0).value().equals(DETERMINER.getTag())) {
                        useDeterminer = true;
                    }
                    break;
                }
            }
        }
        // add determiner tag
        if (useDeterminer) {
            String[] split = pattern.split(" ");
            pattern = split[0] + " DET " + Joiner.on(" ").join(Arrays.copyOfRange(split, 1, split.length));
        }
    }
    pattern = pattern.trim();

    // if first token is an auxiliary can return verb
    if (firstTokenAuxiliary) {
        verbalizationType = PropertyVerbalizationType.VERB;
    }

    // check if pattern matches
    if (pattern.matches(VERB_PATTERN)) {
        logger.debug("...successfully determined type.");
        verbalizationType = PropertyVerbalizationType.VERB;
    }
    return new PropertyVerbalization(propertyURI, propertyText, pattern, verbalizationType);
}