Example usage for edu.stanford.nlp.trees Tree isPhrasal

List of usage examples for edu.stanford.nlp.trees Tree isPhrasal

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree isPhrasal.

Prototype

public boolean isPhrasal() 

Source Link

Document

Return whether this node is a phrasal node or not.

Usage

From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java

License:Open Source License

private static org.apache.uima.jcas.tcas.Annotation convertConstituentTreeNode(JCas aJCas,
        TreebankLanguagePack aTreebankLanguagePack, Tree aNode, org.apache.uima.jcas.tcas.Annotation aParentFS,
        boolean internStrings, MappingProvider constituentMappingProvider, List<CoreLabel> tokens) {
    // Get node label
    String nodeLabelValue = aNode.value();

    // Extract syntactic function from node label
    String syntacticFunction = null;
    AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack;
    int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter());
    if (gfIdx > 0) {
        syntacticFunction = nodeLabelValue.substring(gfIdx + 1);
        nodeLabelValue = nodeLabelValue.substring(0, gfIdx);
    }/*from  w w w .j av  a 2  s. com*/

    // Check if node is a constituent node on sentence or phrase-level
    if (aNode.isPhrasal()) {
        Type constType = constituentMappingProvider.getTagType(nodeLabelValue);

        IntPair span = aNode.getSpan();
        int begin = tokens.get(span.getSource()).get(CharacterOffsetBeginAnnotation.class);
        int end = tokens.get(span.getTarget()).get(CharacterOffsetEndAnnotation.class);

        Constituent constituent = (Constituent) aJCas.getCas().createAnnotation(constType, begin, end);
        constituent.setConstituentType(internStrings ? nodeLabelValue.intern() : nodeLabelValue);
        constituent.setSyntacticFunction(
                internStrings && syntacticFunction != null ? syntacticFunction.intern() : syntacticFunction);
        constituent.setParent(aParentFS);

        // Do we have any children?
        List<org.apache.uima.jcas.tcas.Annotation> childAnnotations = new ArrayList<>();
        for (Tree child : aNode.getChildrenAsList()) {
            org.apache.uima.jcas.tcas.Annotation childAnnotation = convertConstituentTreeNode(aJCas,
                    aTreebankLanguagePack, child, constituent, internStrings, constituentMappingProvider,
                    tokens);
            if (childAnnotation != null) {
                childAnnotations.add(childAnnotation);
            }
        }

        // Now that we know how many children we have, link annotation of
        // current node with its children
        constituent.setChildren(FSCollectionFactory.createFSArray(aJCas, childAnnotations));

        constituent.addToIndexes();

        return constituent;
    }
    // Create parent link on token
    else if (aNode.isPreTerminal()) {
        // link token to its parent constituent
        List<Tree> children = aNode.getChildrenAsList();
        assert children.size() == 1;
        Tree terminal = children.get(0);
        CoreLabel label = (CoreLabel) terminal.label();
        Token token = label.get(TokenKey.class);
        token.setParent(aParentFS);
        return token;
    } else {
        throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal");
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java

License:Open Source License

/**
 * Creates linked constituent annotations + POS annotations
 * /*from  w  w w  . j a va 2  s . co  m*/
 * @param aTreebankLanguagePack
 *            the language pack.
 * @param aNode
 *            the source tree
 * @param aParentFS
 *            the parent annotation
 * @param aCreatePos
 *            sets whether to create or not to create POS tags
 * @return the child-structure (needed for recursive call only)
 */
private Annotation createConstituentAnnotationFromTree(TreebankLanguagePack aTreebankLanguagePack, Tree aNode,
        Annotation aParentFS, boolean aCreatePos) {
    String nodeLabelValue = aNode.value();
    String syntacticFunction = null;
    AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack;
    int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter());
    if (gfIdx > 0) {
        syntacticFunction = nodeLabelValue.substring(gfIdx + 1);
        nodeLabelValue = nodeLabelValue.substring(0, gfIdx);
    }

    // calculate span for the current subtree
    IntPair span = tokenTree.getSpan(aNode);

    // Check if the node has been marked by a TSurgeon operation.
    // If so, add a tag-annotation on the constituent
    if (nodeLabelValue.contains(TAG_SEPARATOR) && !nodeLabelValue.equals(TAG_SEPARATOR)) {
        int separatorIndex = nodeLabelValue.indexOf(TAG_SEPARATOR);
        String tag = nodeLabelValue.substring(0, separatorIndex);
        nodeLabelValue = nodeLabelValue.substring(separatorIndex + 1, nodeLabelValue.length());
        createTagAnnotation(span.getSource(), span.getTarget(), tag);
    }

    // Check if node is a constituent node on sentence or phrase-level
    if (aNode.isPhrasal()) {

        // add annotation to annotation tree
        Constituent constituent = createConstituentAnnotation(span.getSource(), span.getTarget(),
                nodeLabelValue, syntacticFunction);
        // link to parent
        if (aParentFS != null) {
            constituent.setParent(aParentFS);
        }

        // Do we have any children?
        List<Annotation> childAnnotations = new ArrayList<Annotation>();
        for (Tree child : aNode.getChildrenAsList()) {
            Annotation childAnnotation = createConstituentAnnotationFromTree(aTreebankLanguagePack, child,
                    constituent, aCreatePos);
            if (childAnnotation != null) {
                childAnnotations.add(childAnnotation);
            }
        }

        // Now that we know how many children we have, link annotation of
        // current node with its children
        FSArray children = new FSArray(jCas, childAnnotations.size());
        int curChildNum = 0;
        for (FeatureStructure child : childAnnotations) {
            children.set(curChildNum, child);
            curChildNum++;
        }
        constituent.setChildren(children);

        // write annotation for current node to index
        jCas.addFsToIndexes(constituent);

        return constituent;
    }
    // If the node is a word-level constituent node (== POS):
    // create parent link on token and (if not turned off) create POS tag
    else if (aNode.isPreTerminal()) {
        // create POS-annotation (annotation over the token)
        POS pos = createPOSAnnotation(span.getSource(), span.getTarget(), nodeLabelValue);

        // in any case: get the token that is covered by the POS
        // TODO how about multi word prepositions etc. (e.g. "such as")
        List<Token> coveredTokens = JCasUtil.selectCovered(jCas, Token.class, pos);
        // the POS should only cover one token
        assert coveredTokens.size() == 1;
        Token token = coveredTokens.get(0);

        // only add POS to index if we want POS-tagging
        if (aCreatePos) {
            jCas.addFsToIndexes(pos);
            token.setPos(pos);
        }

        // link token to its parent constituent
        if (aParentFS != null) {
            token.setParent(aParentFS);
        }

        return token;
    } else {
        throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal");
    }
}

From source file:opennlp.tools.parse_thicket.parse_thicket2graph.GraphFromPTreeBuilder.java

License:Apache License

private void navigate(Tree tree, Graph<ParseGraphNode, DefaultEdge> g, int l, ParseGraphNode currParent) {
    // String currParent = tree.label().value()+" $"+Integer.toString(l);
    // g.addVertex(currParent);
    if (tree.getChildrenAsList().size() == 1)
        navigate(tree.getChildrenAsList().get(0), g, l + 1, currParent);
    else if (tree.getChildrenAsList().size() == 0)
        return;//from www. ja va 2  s.c  o  m

    for (Tree child : tree.getChildrenAsList()) {
        String currChild = null;
        ParseGraphNode currChildNode = null;
        try {
            if (child.isLeaf())
                continue;
            if (child.label().value().startsWith("S"))
                navigate(child.getChildrenAsList().get(0), g, l + 1, currParent);

            if (!child.isPhrasal() || child.isPreTerminal())
                currChild = child.toString() + " #" + Integer.toString(l);
            else
                currChild = child.label().value() + " #" + Integer.toString(l);
            currChildNode = new ParseGraphNode(child, currChild);
            g.addVertex(currChildNode);
            g.addEdge(currParent, currChildNode);
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        navigate(child, g, l + 1, currChildNode);
    }
}

From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java

License:Open Source License

/**
 * Identify the list of rightmost non-terminals that span a complete subtree, i.e., one that
 * a) the leaf of its' rightmost child is a word, OR
 * b) the index of the leaf of its' rightmost is a word AND is the last in the yield (AND this leaf is the last word - optional, as this condition breeches incrementality).
 * @param analysisTree/*from w w  w. j av a2  s.  c om*/
 * @return 
 */
private List<Tree> getListOfRightMostCompleteNonTerminals(Tree tree) {
    List<Tree> list = new ArrayList();
    List<Tree> leaves = tree.getLeaves();
    // check if the last leaf is a word.
    Tree currentWord = leaves.get(leaves.size() - 1);
    if (currentWord.nodeString().endsWith("<>")) {
        Tree parent = currentWord.parent(tree);
        while (parent != tree) {
            if (parent.isPhrasal()) {
                list.add(parent);
            }
            parent = parent.parent(tree);
        }
        list.add(tree);
    }
    return list;
}

From source file:reactivetechnologies.sentigrade.engine.nlp.SentimentAnalyzer.java

License:Apache License

private static void print(Tree tree) {
    if (LOG.isDebugEnabled()) {
        Tree t = tree.deepCopy();
        //setSentimentLabels(t);
        LOG.debug("leaf?" + t.isLeaf() + " phrrasal?" + t.isPhrasal() + " preterminal?" + t.isPreTerminal()
                + " class:" + RNNCoreAnnotations.getPredictedClass(t));
        LOG.debug("" + t + "");

    }//from   w  w w .  j a va 2 s.  c  o m

}

From source file:reactivetechnologies.sentigrade.engine.nlp.SentimentAnalyzer.java

License:Apache License

private SentimentVector calculatePOSScores(Tree parse) {
    SentimentVector vector = new SentimentVector();
    if (parse.isPhrasal()) {
        // TregexPattern pattern = TregexPattern.compile("@NP");//noun
        // TregexPattern pattern = TregexPattern.compile("@VP");
        // TregexPattern pattern = TregexPattern.compile("@JJS");//adjective
        // superlative
        // TregexPattern pattern = TregexPattern.compile("@CD");//numeric
        TregexPattern pattern = TregexPattern.compile("@ADJP");
        TregexMatcher matcher = pattern.matcher(parse);

        while (matcher.find()) {
            Tree match = matcher.getMatch();
            vector.adjScore += calcPOSAdj(match.deepCopy());
            vector.advScore += calcPOSAdv(match.deepCopy());
            vector.nounScore += calcPOSNoun(match.deepCopy());
            vector.verbScore += calcPOSVerb(match.deepCopy());
        }//from   www .ja va 2 s .c  om

        pattern = TregexPattern.compile("@VP");
        matcher = pattern.matcher(parse);
        while (matcher.find()) {
            Tree match = matcher.getMatch();
            vector.adjScore += calcPOSAdj(match.deepCopy());
            vector.nounScore += calcPOSNoun(match.deepCopy());
            vector.verbScore += calcPOSVerb(match.deepCopy());
            vector.advScore += calcPOSAdv(match.deepCopy());
        }
    }
    return vector;
}