edu.nus.comp.nlp.tool.anaphoraresolution.AnnotatedText.java Source code

Introduction

Here is the source code for edu.nus.comp.nlp.tool.anaphoraresolution.AnnotatedText.java
Source

/**
 * JavaRAP: a freely-available JAVA anaphora resolution implementation of the
 * classic Lappin and Leass (1994) paper:
 * 
 * An Algorithm for Pronominal Anaphora Resolution. Computational Linguistics,
 * 20(4), pp. 535-561.
 * 
 * Copyright (C) 2005 Long Qiu
 * 
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
 * more details.
 * 
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 51
 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */
package edu.nus.comp.nlp.tool.anaphoraresolution;

import static com.google.common.base.Preconditions.checkNotNull;

import java.util.Collections;
import java.util.Enumeration;
import java.util.List;

import javax.swing.tree.DefaultMutableTreeNode;
import javax.swing.tree.TreeNode;

import com.google.common.collect.Lists;

import edu.stanford.nlp.trees.Tree;

/**
 * @author Qiu Long
 * @version 1.0
 * @author "Yifan Peng"
 */

public class AnnotatedText {

    public static AnnotatedText parseAnnotatedText(List<String> sentences) {
        return new AnnotatedText(sentences);
    }

    public static AnnotatedText parseAnnotatedText(String s) {
        List<String> sents = Lists.newArrayList();
        String[] sentenceList = s.split("\\(S1 ");
        for (int i = 0; i < sentenceList.length; i++) {
            String sentence = sentenceList[i];
            if (!sentence.trim().isEmpty()) {
                sents.add("(S1 " + sentence.trim());
            }
        }
        return new AnnotatedText(sents);
    }

    // DefaultMutableTreeNode instance inside.overlapping allowed
    private List<TagWord> NPList;
    // DefaultMutableTreeNode instance inside.overlapping disallowed
    private List<TagWord> SNPList;
    // DefaultMutableTreeNode instance inside.overlapping disallowed
    private List<TagWord> PRPList;

    private DefaultMutableTreeNode rootNode;

    private AnnotatedText(List<String> sentences) {
        rootNode = new DefaultMutableTreeNode();
        for (int i = 0; i < sentences.size(); i++) {
            String sentence = sentences.get(i);
            TreeAdapter adpater = new TreeAdapter(Tree.valueOf(sentence), i);
            DefaultMutableTreeNode tn = adpater.getDefaultMutableTreeNode();
            rootNode.add(tn);
        }

        // rootNode = buildParseTree(sents);
        NPExtractor ex = new NPExtractor(rootNode);
        NPList = ex.getNPList();
        PRPList = ex.getPRPList();

        identifyPleonasticPronoun(rootNode);
        SNPList = buildSNPList(NPList);
    }

    private List<TagWord> buildSNPList(List<TagWord> npList) {
        if (npList.isEmpty()) {
            return Collections.emptyList();
        }
        TagWord sTW = npList.get(0);
        List<TagWord> snpList = Lists.newArrayList(sTW);
        for (int i = 1; i < npList.size(); i++) {
            TagWord tw = npList.get(i);
            if (!sTW.getNP().contains(tw.getNP())) {
                sTW = tw;
                snpList.add(sTW);
            }
        }
        return snpList;
    }

    public List<TagWord> getNPList() {
        return NPList;
    }

    public List<TagWord> getPRPList() {
        return PRPList;
    }

    public List<TagWord> getSNPList() {
        return SNPList;
    }

    public DefaultMutableTreeNode getTree() {
        return rootNode;
    }

    private void identifyPleonasticPronoun(DefaultMutableTreeNode root) {
        @SuppressWarnings("rawtypes")
        Enumeration enumeration = root.preorderEnumeration();

        while (enumeration.hasMoreElements()) {
            TreeNode node = (DefaultMutableTreeNode) enumeration.nextElement();
            TagWord tagWd = Utils.getTagWord(node);
            if (tagWd == null) {
                continue;
            }

            if (!tagWd.getTag().equalsIgnoreCase("PRP") || !tagWd.getText().equalsIgnoreCase("it")) {
                continue;
            }

            DefaultMutableTreeNode NPnode = (DefaultMutableTreeNode) node.getParent();
            checkNotNull(NPnode, "Weird: (PRP it) has no parent");

            DefaultMutableTreeNode parentNode = (DefaultMutableTreeNode) NPnode.getParent();
            checkNotNull(parentNode, "Weird: (PRP it) has no grandparent");

            DefaultMutableTreeNode siblingNode = (DefaultMutableTreeNode) NPnode.getNextSibling();
            DefaultMutableTreeNode nephewNode1 = null;
            DefaultMutableTreeNode nephewNode2 = null;
            DefaultMutableTreeNode nephewNode3 = null;
            if ((siblingNode != null) && (siblingNode.getChildCount() > 0)) {
                nephewNode1 = (DefaultMutableTreeNode) siblingNode.getChildAt(0);
                nephewNode2 = (DefaultMutableTreeNode) nephewNode1.getNextSibling();
                if (nephewNode2 != null) {
                    nephewNode3 = (DefaultMutableTreeNode) nephewNode2.getNextSibling();
                }
            }
            DefaultMutableTreeNode PrevSiblingNode = (DefaultMutableTreeNode) NPnode.getPreviousSibling();

            // identify pleonastic pronouns
            boolean isPleonastic = false;
            // It is very necessary
            // It is recommended that
            if (Utils.equalsIgnoreCaseTag(siblingNode, "VP") && Utils.equalsIgnoreCaseTag(nephewNode1, "AUX")
                    && Utils.equalsIgnoreCaseTag(nephewNode2, "ADJP")) {
                isPleonastic |= ModalAdj.findAny(Utils.getText(nephewNode2).split(" "));
            }

            if (Utils.equalsIgnoreCaseTag(siblingNode, "VP") && Utils.equalsIgnoreCaseTag(nephewNode1, "AUX")
                    && Utils.equalsIgnoreCaseTag(nephewNode3, "ADJP")) {
                isPleonastic |= ModalAdj.findAny(Utils.getText(nephewNode3).split(" "));
            }

            // really appreciate it
            if (Utils.equalsIgnoreCaseTag(PrevSiblingNode, "VB")) {
                isPleonastic |= ModalAdj.findAny(Utils.getText(PrevSiblingNode).split(" "));
            }

            // it may/might be
            if (Utils.equalsIgnoreCaseTag(siblingNode, "VP") && Utils.equalsIgnoreCaseTag(nephewNode1, "MD")
                    && Utils.equalsIgnoreCaseTag(nephewNode2, "VP") && nephewNode2.getChildCount() > 1
                    && Utils.equalsIgnoreCaseTag(nephewNode2.getChildAt(0), "AUX")
                    && Utils.equalsIgnoreCaseTag(nephewNode2.getChildAt(1), "ADJP")) {
                isPleonastic |= ModalAdj.findAny(Utils.getText(nephewNode2).split(" "));
            }

            DefaultMutableTreeNode uncleNode = (DefaultMutableTreeNode) parentNode.getPreviousSibling();
            // I will/could appreciate/ believe it
            if (Utils.equalsIgnoreCaseTag(siblingNode, "VB") && Utils.equalsIgnoreCaseTag(uncleNode, "MD")) {
                isPleonastic |= ModalAdj.findAny(Utils.getText(siblingNode).split(" "));
            }

            // find it important
            if (Utils.equalsIgnoreCaseTag(siblingNode, "ADJP")) {
                isPleonastic |= ModalAdj.findAny(Utils.getText(siblingNode).split(" "));
            }

            // it is thanks to/it is time to
            if (Utils.equalsIgnoreCaseTag(siblingNode, "VP") && Utils.equalsIgnoreCaseTag(nephewNode1, "AUX")
                    && Utils.equalsIgnoreCaseTag(nephewNode2, "NP")) {
                isPleonastic |= ModalAdj.findAny(Utils.getText(nephewNode2).split(" "));
            }

            // it follows that
            if (Utils.equalsIgnoreCaseTag(siblingNode, "VP") && Utils.equalsIgnoreCaseTag(nephewNode1, "VB")
                    && Utils.equalsIgnoreCaseTag(nephewNode2, "S")) {
                isPleonastic |= ModalAdj.find(Utils.getText(nephewNode1));
            }

            tagWd.setPleonastic(isPleonastic);
            // set parent NP as pleonastic also
            Utils.getTagWord(NPnode).setPleonastic(isPleonastic);
        } // /~while
    }

}