Example usage for edu.stanford.nlp.trees Tree firstChild

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree firstChild.

Prototype

public Tree firstChild()

Source Link

Document

Returns the first child of a tree, or null if none.

Usage

From source file:com.parse.Dependency.java

public static void main(String[] args) {
    LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
    lp.setOptionFlags(new String[] { "-maxLength", "80", "-retainTmpSubcategories", });

    String[] sent = { "This", "is", "an", "easy", "sentence", "." };
    List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
    Tree parse = lp.apply(rawWords);/*from  w  w w.j  av a 2  s.  c o  m*/
    parse.pennPrint();
    System.out.println();

    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
    List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
    System.out.println(tdl);
    //System.out.println();

    //TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
    // tp.printTree(parse);

    String sentence = "which movies were directed by Christopher Nolan";
    Tree t2 = lp.parse(sentence);
    System.out.println(t2.firstChild().toString());
    gs = gsf.newGrammaticalStructure(t2);
    tdl = gs.typedDependenciesCCprocessed();
    System.out.println(tdl);
    System.out.println(tdl.get(0).dep().nodeString());

}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeWithTokens.java

License:Open Source License

private Tree getLeftmostLeaf(Tree t) {
    if (t.isLeaf()) {
        return t;
    } else {//  w w w .  j av  a  2s . co m
        return getLeftmostLeaf(t.firstChild());
    }
}

From source file:edu.cmu.deiis.annotator.StanfordCoreNLPAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);/*from  ww w  .jav a2  s .co  m*/
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        entity.addToIndexes();
    }

}

From source file:edu.cmu.deiis.annotators.StanfordAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);//from  w  w  w  . jav a 2s.com
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                //String line = mention.getCoveredText();
                //System.out.println(line);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        //NamedEntityMention mention=entity.getMentions(3);
        //System.out.println(mention.getBegin());
        entity.addToIndexes();
    }

}

From source file:elkfed.coref.discourse_entities.DiscourseEntity.java

License:Open Source License

/**
 * Reads the premodifiers from the input Mention and creates Property objects as attributes for every premodifier
 * not part of an embedded NE. Embedded NEs are to be treated as relations to other Discourse Entities.
 * @param np The NP to be processed/*from  ww  w. j  a v a  2  s.c  om*/
 * @return Set A Set of Property Objects; one for every premodifier (attribute)
 */
private Set<Property> computeAttributes(Mention np) {
    LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin();
    Set<Property> result = new LinkedHashSet<Property>();
    List<Tree> preModifiers = np.getPremodifiers(); // straight from Mention
    //DEBUG
    //System.out.println("Number of premodifiers of "+np.getMarkableString()+" :"+
    //        preModifiers.size());
    char pos = '\0';
    if ((preModifiers != null) && (preModifiers.size() > 0)) {
        for (int i = 0; i < preModifiers.size(); i++) {
            Tree mod = preModifiers.get(i); // Expected structure:
            // (NP (DT the) (JJ last) (NN supper))
            if (mod.isLeaf()) {
                // this shouldn't happen'
                System.out.println("WARNING: UNEXPECTED LEAF " + mod.nodeString());
                //result.add(new Property(Property.ATTRIBUTE, mod.nodeString()));
                //result.add(new Property(Property.ATTRIBUTE, getSense(mod.nodeString())));
            } else {
                NodeCategory ncat = lang_plugin.labelCat(mod.nodeString());
                if (mod.isPreTerminal()) {
                    if (ncat == NodeCategory.CN || ncat == NodeCategory.ADJ) {
                        if (ncat == NodeCategory.CN) {
                            pos = 'N';
                        }
                        if (ncat == NodeCategory.ADJ) {
                            pos = 'A';
                        }

                        //System.out.println("Pre terminal node "+ mod.nodeString());
                        Tree wordNode = mod.firstChild();
                        _logger.fine("Adding attribute " + wordNode.nodeString() + " to entity");
                        result.add(new Property(Property.ATTRIBUTE, wordNode.nodeString(), pos));
                    }
                }
            }
        }
    }
    return result;
}

From source file:elkfed.coref.discourse_entities.DiscourseEntity.java

License:Open Source License

private Set<Property> computeInitialRelations(Mention np) {
    LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin();
    Set<Property> result = new LinkedHashSet<Property>();
    List<Tree> postModifiers = np.getPostmodifiers(); // straight from Mention

    char pos = '\0';
    //DEBUG/*from   w w  w  .  j a  va 2  s. c  om*/
    //System.out.println("Number of postmodifiers of "+np.getMarkableString()+" :"+
    //        postModifiers.size());
    if ((postModifiers != null) && (postModifiers.size() > 0)) {
        for (int i = 0; i < postModifiers.size(); i++) {
            Tree mod = postModifiers.get(i); // Expected structure:
            // (NP  (NN software) (PP from (NP India))
            if (mod.isLeaf()) {
                // this shouldn't happen'
                System.out.println("WARNING: UNEXPECTED LEAF " + mod.nodeString());
                //result.add(new Property(Property.ATTRIBUTE, mod.nodeString()));
                //result.add(new Property(Property.ATTRIBUTE, getSense(mod.nodeString())));
            } else {
                if (mod.isPreTerminal()) { // this shouldn't happen either,
                    // but we'll add it to the properties
                    NodeCategory ncat = lang_plugin.labelCat(mod.nodeString());
                    if (ncat == NodeCategory.CN || ncat == NodeCategory.ADJ) {
                        if (ncat == NodeCategory.CN) {
                            pos = 'N';
                        }
                        if (ncat == NodeCategory.ADJ) {
                            pos = 'A';
                        }
                    }
                } else {
                    //System.out.println("Type of postmodifier: " + mod.nodeString());
                    NodeCategory ncat = lang_plugin.labelCat(mod.nodeString());
                    if (ncat == NodeCategory.PP) {
                        if (mod.numChildren() == 2) { // (PP (in from) (NP (nnp India)))
                            Tree prepNode = mod.getChild(0);
                            Tree npNode = mod.getChild(1);
                            Tree npHead = massimoHeadFindHack(npNode);
                            if (npHead != null && prepNode != null) {

                                //DEBUG
                                //System.out.println("Adding relation "+
                                //                  prepNode.firstChild().nodeString()+" "+
                                //                  npHead.firstChild().nodeString() );

                                /* -- no clue what it means, just fixed so that it doesn't crash  (Olga) -- */
                                if (prepNode.numChildren() > 0)
                                    prepNode = prepNode.firstChild();
                                result.add(
                                        new Property(prepNode.nodeString(), npHead.firstChild().nodeString()));
                            }
                        }
                    }
                }
            }
        } //end outer loop
    } //end if premodified
    return result;
}

From source file:elkfed.coref.discourse_entities.DiscourseEntity.java

License:Open Source License

private Tree massimoHeadFindHack(Tree npNode) {
    LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin();
    /*/*from  w ww .  jav  a  2s.  c o  m*/
     * NOTE (yv):
     * We should really have a decent configurable head finder.
     * The "generic" head finder below probably works, but ...
     * this is ugly enough for English, but making it work for
     * English *and* Italian (and possibly other languages)
     * is only something for very enthusiastic people with
     * slight masochistic tendencies.
     */
    //CollinsHeadFinder hf = new CollinsHeadFinder();
    //ModCollinsHeadFinder hf = new ModCollinsHeadFinder();

    /* -- trivial -- */
    if (npNode.numChildren() == 0)
        return npNode;
    if (npNode.numChildren() == 1) {
        if (npNode.firstChild().numChildren() == 0)
            return npNode;
        return massimoHeadFindHack(npNode.firstChild());
    }
    /* -- coordination -- */
    if (npNode.numChildren() > 2) {
        for (Tree n : npNode.children()) {
            if (lang_plugin.labelCat(n.nodeString()) == NodeCategory.CC)
                return null;
        }
    }

    /* -- last child is a noun (common/proper) --*/
    /* NB: will it work for italian though? */

    NodeCategory firstpos = lang_plugin.labelCat(npNode.firstChild().nodeString());
    NodeCategory nextpos = lang_plugin.labelCat(npNode.getChild(1).nodeString());
    NodeCategory lastpos = lang_plugin.labelCat(npNode.lastChild().nodeString());

    if (lastpos == NodeCategory.CN)
        return npNode.lastChild();
    if (lastpos == NodeCategory.PN)
        return npNode.lastChild();

    /* -- (NP (NP (DT the) (NN man)) (PP (in from) (NP (NNP UNCLE)))) -- */

    if (firstpos == NodeCategory.NP && nextpos != NodeCategory.CN)
        return massimoHeadFindHack(npNode.firstChild());

    /* -- misc -- */

    Tree found_head = null;
    int state = 0;
    for (Tree n : npNode.children()) {
        NodeCategory ncat = lang_plugin.labelCat(n.nodeString());
        if (ncat == NodeCategory.CN || ncat == NodeCategory.PN || ncat == NodeCategory.PRO) {
            state = 4;
            found_head = n;
        } else if (ncat == NodeCategory.NP && state < 3) {
            state = 3;
            found_head = n;
        } else if (ncat == NodeCategory.ADJ && state < 3) {
            state = 2;
            found_head = n;
        }
    }
    if (found_head != null) {
        if (state == 3) {
            return massimoHeadFindHack(found_head);
        }
        return found_head;
    }

    //    if (ConfigProperties.getInstance().getDbgPrint()) 
    System.err.println("Couldn't find a head for NP:" + npNode.pennString());
    return null;
}

From source file:ims.cs.parc.ParcUtils.java

License:Open Source License

/**
 * Find all head verbs in the corpus. The algorithm is taken from Pareti (2015).
 * @param sentence/*from  w ww  .ja v  a  2  s. c o m*/
 */
public static void markHeadVerbs(Sentence sentence) {

    for (Tree tree : sentence.tree.preOrderNodeList()) {
        if (tree.label().value().equals("VP")) {
            boolean valid = true;
            for (Tree child : tree.children()) {
                if (child.label().value().equals("VP")) {
                    valid = false;
                    break;
                }
            }

            if (valid) {
                for (Tree child : tree.children()) {
                    if (child.firstChild().isLeaf() && child.label().value().startsWith("V")) {
                        Token token = sentence.treeLookup.get(child.firstChild());
                        if (token != null)
                            token.isHeadVerb = true;
                    }
                }
            }
        }
    }
}

From source file:org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils.java

License:Open Source License

public static Tree getFirstLeaf(Tree tree) {
    if (tree.isLeaf()) {
        return tree;
    } else {//from   ww w. j av a 2 s.  c o m
        return getFirstLeaf(tree.firstChild());
    }
}

From source file:org.textmining.annotator.StanfordCoreNlpAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);//from   www  . ja  va 2 s.  c o m
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        entity.addToIndexes();
    }

    //end of process-method
}