elkfed.coref.mentions.Mention.java Source code

Java tutorial

Introduction

Here is the source code for elkfed.coref.mentions.Mention.java

Source

/*
 * Mention.java
 * 
 * Copyright 2007 Project ELERFED
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* ToDo for parseheads -- make it optional (for corpora with annotated heads) 
 the option is supported (do_not_use_parsehead), but not in config
 */
package elkfed.coref.mentions;

import static elkfed.lang.EnglishLinguisticConstants.PRONOUN;
import static elkfed.lang.EnglishLinguisticConstants.SINGULAR_PRONOUN_ADJ;
import static elkfed.mmax.pipeline.MarkableCreator.ISPRENOMINAL_ATTRIBUTE;
import static elkfed.mmax.pipeline.MarkableCreator.SENTENCE_ID_ATTRIBUTE;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

import edu.stanford.nlp.trees.ModCollinsHeadFinder;
import edu.stanford.nlp.trees.international.tuebadz.*;
import edu.stanford.nlp.trees.Tree;
import elkfed.config.ConfigProperties;
import elkfed.coref.discourse_entities.DiscourseEntity;
import elkfed.coref.discourse_entities.SieveDiscourseEntity;
import elkfed.coref.utterances.Utterance;
import elkfed.knowledge.SemanticClass;
import elkfed.lang.EnglishLanguagePlugin;
import elkfed.lang.EnglishLinguisticConstants;
import elkfed.lang.GermanLanguagePlugin;
import elkfed.lang.GermanLinguisticConstants;
import elkfed.lang.LanguagePlugin;
import elkfed.lang.MentionType;
import elkfed.lang.MentionType.Features;
import elkfed.mmax.minidisc.Markable;
import elkfed.mmax.minidisc.MarkableHelper;
import elkfed.mmax.minidisc.MarkableLevel;
import elkfed.mmax.minidisc.MiniDiscourse;
import elkfed.nlp.util.Gender;
import elkfed.nlp.util.Number;
import elkfed.nlp.util.NameStructure;

/* for new (parse) heads */

/**
 * Takes a markable and wraps it in a mention object. Information about the
 * mention (ex: gender information, plurality, etc.) is determined as well.
 * 
 * @author vae2101
 * 
 */
public class Mention implements Comparable<Mention> {

    private static final LanguagePlugin langPlugin = ConfigProperties.getInstance().getLanguagePlugin();

    private static final Logger _logger = Logger.getLogger("elkfed.mentions");

    final private boolean do_not_use_parsehead = true; // set to true for
    // old-style (aka
    // "mmax") head
    // computation always

    // Mention information
    final private MentionType _mentionType;
    final private String headString;
    final private String headLemma;
    // the parts of speech of the head
    final private String headPos;
    final private Markable _markable;
    final private String _markableString;
    final private String _enamexType;
    private String _setID = null;
    private int _mentionIdx; // in sentence
    final private MiniDiscourse _document;

    final private int _sentId;
    private int _sentStart;
    private int _sentEnd;
    private Tree _sentenceTree;
    private Tree _sentenceTreeDiscIds;
    private Tree _lowestProjection; // the smallest np containing the mention,
    // modulo embedding (looks like)
    public Tree _highestProjection; // the largest np containing the mention,
    // modulo embedding

    private Tree _minparsespan; // minimal parse subtree that represents the
    // mention
    private Tree _minnpparsespan; // minimal parse np-subtree that represents
    // the mention
    private Tree _maxnpparsespan; // maximal parse np-subtree that represents
    // the mention

    public List<Tree> _premodifiers;
    public List<Tree> _postmodifiers;
    private int _startWord;
    private int _endWord;
    private HashMap<String, String> _nameStructure;

    private Utterance _utterance;
    private int _posInUtterance;
    private boolean isFirstMention; // this seems to be first in a sentence, not
    // dnew (olga)

    private DiscourseEntity _discourseEntity;
    private SieveDiscourseEntity _sieveDiscourseEntity;

    /* for parse heads */
    private Tree _ParseHead;
    private TueBaDZHeadFinder _headFinder;

    public TueBaDZHeadFinder getStHeadFinder() {
        if (_headFinder == null)
            _headFinder = new TueBaDZHeadFinder();
        return _headFinder;
    }

    public Tree getHighestNP() {
        return _highestProjection;
    }

    public Tree getLowestNP() {
        return _lowestProjection;
    }

    public Tree getMinParseTree() {
        return _minparsespan;
    }

    public Tree getMinNPParseTree() {
        return _minnpparsespan;
    }

    public Tree getMaxNPParseTree() {
        return _maxnpparsespan;
    }

    public Tree getParseHead() {
        if (do_not_use_parsehead)
            return null;
        return _ParseHead;
    }

    public String getHeadString() {
        if (_ParseHead == null)
            return headString;
        return _ParseHead.value();
    }

    public String getHeadLemma() {
        if (_ParseHead == null)
            return headLemma;
        return headLemma;
        // return _ParseHead.?? //ToDo: find out what to put here
    }

    public String getHeadPOS() {
        if (_ParseHead == null)
            return headPos;
        if (_ParseHead.parent(_sentenceTree) == null)
            return headPos;
        String st = _ParseHead.parent(_sentenceTree).label().toString();
        if (st == null)
            return "UNKNOWN";
        if (st.equals(""))
            return "UNKNOWN";
        if (st.equals(" "))
            return "UNKNOWN";
        if (st.equals("''"))
            return "QUOTES"; // for WEKA: error in HeadPos feature otherwise
        return st;
    }

    public void SetNumber() {
        if (getParseHead() == null) {
            // do nothing -- rely on the number as determined by the language
            // plugin instead
            return;

        } else {
            // check pos tags
            String headpos = getHeadPOS();
            boolean issing = true;

            // if pronoun, check sing/plural pronoun
            if (getHeadString().toLowerCase().matches(PRONOUN)) {
                issing = getHeadString().toLowerCase().matches(SINGULAR_PRONOUN_ADJ);
            } else {
                if (headpos.equals("CC"))
                    issing = false;
                if (headpos.equals("NNS"))
                    issing = false;
                if (headpos.equals("NNPS"))
                    issing = false;

                _mentionType.features.remove(MentionType.Features.isSingular);
                _mentionType.features.remove(MentionType.Features.isPlural);
            }
            if (issing)
                _mentionType.features.add(MentionType.Features.isSingular);
            else
                _mentionType.features.add(MentionType.Features.isPlural);
        }
    }

    /**
     * Constructs a mention object and populates all necessary local variables,
     * taking a markable and an MMAX document as arguments.
     * 
     */
    public Mention(Markable markable, MiniDiscourse doc) {
        LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin();
        _markable = markable;
        _document = doc;
        _markableString = lang_plugin.markableString(markable);
        _enamexType = lang_plugin.enamexType(markable);

        headString = lang_plugin.getHead(markable);
        headLemma = lang_plugin.getHeadLemma(markable);
        String hp = lang_plugin.getHeadPOS(markable);
        if (hp == null || hp.equals("")) {
            headPos = "UNKNOWN";
        } else {
            if (hp.equals("''"))
                headPos = "QUOTES";
            else
                headPos = hp;
        }
        // get semclass, gender, number
        _mentionType = lang_plugin.calcMentionType(markable);

        _sentId = Integer.parseInt(getMarkable().getAttributeValue(SENTENCE_ID_ATTRIBUTE));

        // Utterance - no need to set??
        // _utterance=null;
        // _posInUtterance = -1;
        isFirstMention = false;

        // Name internal stucture

        if (getProperName()) {
            _nameStructure = calcNameStructure();

        }

    }

    public MentionType mentionType() {
        return _mentionType;
    }

    public boolean getProperName() {
        return _mentionType.features.contains(MentionType.Features.isProperName);
    }

    public void setMentionIdx(int i) {
        _mentionIdx = i;
    }

    public int getMentionIdx() {
        return _mentionIdx;
    }

    /** returns true if mention is a pronoun */
    public boolean getPronoun() {
        return _mentionType.features.contains(MentionType.Features.isPronoun);
    }

    public boolean getReflPronoun() {
        return _mentionType.features.contains(MentionType.Features.isReflexive);
    }

    public boolean getPossPronoun() {
        return _mentionType.features.contains(MentionType.Features.isPossPronoun);
    }

    public boolean getRelPronoun() {
        return _mentionType.features.contains(MentionType.Features.isRelative);
    }

    public boolean getPersPronoun() {
        return _mentionType.features.contains(MentionType.Features.isPersPronoun);
    }

    public boolean getDefinite() {
        return _mentionType.features.contains(MentionType.Features.isDefinite);
    }

    public boolean getDnewDeterminer() {
        return _mentionType.features.contains(MentionType.Features.isDnewDeterminer);
    }

    public boolean getIndefinite() {
        return _mentionType.features.contains(MentionType.Features.isIndefinite);
    }

    public boolean getDemonstrative() {
        return _mentionType.features.contains(MentionType.Features.isDemonstrative);
    }

    public boolean getDemPronoun() {
        return _mentionType.features.contains(MentionType.Features.isDemPronoun);
    }

    public boolean getDemNominal() {
        return _mentionType.features.contains(MentionType.Features.isDemNominal);
    }

    /**
     * Return if mention is singular
     */
    public boolean getNumber() {
        return _mentionType.features.contains(MentionType.Features.isSingular);
    }

    public Number getNumberLabel() {
        if (_mentionType.features.contains(MentionType.Features.isSingular)) {
            return Number.SINGULAR;
        }
        if (_mentionType.features.contains(MentionType.Features.isPlural)) {
            return Number.PLURAL;
        } else {
            return Number.UNKNOWN;
        }
    }

    /**
     * return singular, plural or unknown
     */

    /**
     * Return mention person
     */
    public boolean getIsFirstSecondPerson() {
        return _mentionType.features.contains(MentionType.Features.isFirstSecondPerson);
    }

    /**
     * Return mention gender
     */
    public Gender getGender() {
        return _mentionType.gender;
    }

    /**
     * Return mention head string of NP
     */
    /* public String getHeadString(){return headString;} */
    // Olga: use prs
    // first (cf. above)

    /**
     * Return mention head POS of NP
     * 
     * @author samuel
     * @return mention head POS of NP
     */
    /* public String getHeadPOS(){return headPos;} */
    // Olga: commented -- use
    // prs first (cf. new
    // getHeadPos())

    /**
     * return the head string of the NP as it is needed for pattern searches or
     * WP queries
     */
    public String getHeadOrName() {
        LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin();
        return lang_plugin.getHeadOrName(getMarkable());
    }

    // TODO: is this necessary?
    // Shouldn't getHeadOrName do the same?
    // Similar to lemma, but kepping capitals
    // to use for the strudel similarity
    public String getHeadStr4Strudel(Mention m) {
        String str;
        if (!m.getNumber()) {
            str = m.getHeadOrName();
        } else {
            str = m.getHeadString();
        }
        return str;
    }

    public Set<Features> getFeatures() {
        return _mentionType.features;
    }

    /**
     * Return the mention's name internal structure. Only really makes sense if
     * mention is a proper name
     */
    public HashMap<String, String> getNameStructure() {
        return _nameStructure;
    }

    /**
     * Return mentions markable
     */
    public Markable getMarkable() {
        return _markable;
    }

    /**
     * Return mentions markable string
     */
    public String getMarkableString() {
        return _markableString;
    }

    /**
     * Return mentions document
     */
    public MiniDiscourse getDocument() {
        return _document;
    }

    /**
     * Return mentions semantic class
     */
    public SemanticClass getSemanticClass() {
        return _mentionType.semanticClass;
    }

    /**
     * Takes the sentence tree of this mention and replaces the unlexicalized
     * tokens in the leaves with their discourse ids. Usefull to derive spans
     * based on syntax.
     * 
     * @author samuel
     * @param sentTree
     *            original sentence tree
     * @return Tree with discourse ids in leaves
     */
    private Tree getSentenceTreeWithDiscurseIdsInLeaves(Tree sentTree) {
        Tree result = sentTree.deepCopy();
        List<Tree> leaves = result.getLeaves();
        String[] discElements = getSentenceDiscourseElements();
        String[] discIDs = getSentenceDiscourseElementIDs();
        if (discElements.length != leaves.size() || discIDs.length != leaves.size()) {
            throw new RuntimeException("SentenceTree leaves and discourse elements do not match for mention "
                    + this.getMarkable().getID());
        }

        /*
         * sometimes tueba-d/z punctuations in the parse tree are out of context
         * appended to the root of the tree hence the corrector part
         * 
         * yv: ... and we need to unescape those words for the semeval2mmax
         * output. duh.
         */
        int corrector = 0;
        for (int i = 0; i < discElements.length; i++) {
            String leafWord = leaves.get(i - corrector).value().toLowerCase();
            leafWord = leafWord.replaceAll("-lrb-", "(");
            leafWord = leafWord.replaceAll("-rrb-", ")");
            leafWord = leafWord.replaceAll("\\\\", "");
            String discWord = discElements[i].replaceAll("\\\\", "");
            if (discWord.equalsIgnoreCase(leafWord)) {
                leaves.get(i - corrector).setValue(discIDs[i]);
            } else {
                System.err.format("non-matching: %s vs %s\n", leafWord, discElements[i]);
                corrector++;
            }
        }
        return result;
    }

    /**
     * set parse information. This should only be used by CorefMentionFactory
     */
    public void setParseInfo(Tree sentTree, int start, int end) {
        _sentenceTree = sentTree;
        _startWord = start;
        _endWord = end;
        LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin();
        List<Tree>[] parseInfo = lang_plugin.calcParseInfo(sentTree, start, end, _mentionType);
        List<Tree> projections = parseInfo[0];
        _premodifiers = parseInfo[1];
        _postmodifiers = parseInfo[2];
        _lowestProjection = projections.get(0);
        _highestProjection = projections.get(projections.size() - 1);
        _ParseHead = _lowestProjection.headTerminal(getStHeadFinder());

        /* check that the head is inside the markable's min, set null otherwise */

        List<Tree> Leaves = sentTree.getLeaves();
        int ind = start;
        boolean found = false;
        int lsz = Leaves.size();

        while (ind <= end && ind < lsz && found == false) {
            Tree CurLeaf = Leaves.get(ind);
            if (CurLeaf == _ParseHead)
                found = true;
            ind++;
        }
        if (found == false)
            _ParseHead = null;

        Tree[] parseExtra = lang_plugin.calcParseExtra(sentTree, start, end, _ParseHead, getStHeadFinder());

        _minparsespan = parseExtra[0];
        _minnpparsespan = parseExtra[1];
        _maxnpparsespan = parseExtra[2];

        /* adjust number */
        SetNumber();

        if (_logger.isLoggable(Level.FINE)) {
            _logger.fine(String.format("Parse info for '%s'", toString()));
            _logger.fine("headOrName: " + getHeadOrName());
            _logger.fine("lowest: " + _lowestProjection);
            _logger.fine("highest: " + _highestProjection);
            _logger.fine("premodify: " + _premodifiers);
            _logger.fine("postmodify: " + _postmodifiers);
        }
    }

    /* Utterance info */

    public Utterance getUtterance() {
        return _utterance;
    }

    public void setUtterance(Utterance utt) {
        _utterance = utt;
        _utterance.addCF(this);
    }

    public int getUttPos() {
        return _posInUtterance;
    }

    public void setUttPos(int pos) {
        _posInUtterance = pos;
        if (_posInUtterance == 0) {
            isFirstMention = true;
        }
    }

    public boolean getIsFirstMention() {
        return isFirstMention;
    }

    // discourse entities
    public DiscourseEntity getDiscourseEntity() {
        return _discourseEntity;
    }

    public SieveDiscourseEntity getSieveDiscourseEntity() {
        return _sieveDiscourseEntity;
    }

    public void createSieveDiscourseEntity() {
        _sieveDiscourseEntity = new SieveDiscourseEntity(this);
    }

    public void createDiscourseEntity() {
        _discourseEntity = new DiscourseEntity(this);
    }

    /**
     * Sorting
     */
    public int compareTo(Mention m) {
        if (_startWord < m.getStartWord()) {
            return -1;
        } else if (m.getStartWord() < _startWord) {
            return 1;
        } else {
            return 0;
        }
    }

    /**
     * Uses some heuristics to determine internal structure in names. i.e.
     * Forename, Middle, Surname, etc.
     * 
     */
    private HashMap<String, String> calcNameStructure() {
        return NameStructure.getNameStructure(_markableString);
    }

    /** returns the parse for the containing sentence */
    public Tree getSentenceTree() {
        return _sentenceTree;
    }

    /**
     * @author samuel
     * @return returns the parse for the containing sentence with disc ids in
     *         leaves
     */
    public Tree getSentenceTreeWithDiscIds() {
        if (_sentenceTreeDiscIds == null) {
            _sentenceTreeDiscIds = getSentenceTreeWithDiscurseIdsInLeaves(_sentenceTree);
        }
        return _sentenceTreeDiscIds;
    }

    /** returns the baseNP node for that markable */
    public Tree getLowestProjection() {
        return _lowestProjection;
    }

    /** returns the highest projection for that markable */
    public Tree getHighestProjection() {
        return _highestProjection;
    }

    /**
     * returns the sentence-relative word index of the start of the markable
     */
    public int getStartWord() {
        return _startWord;
    }

    public void setStartWord(int i) {
        _startWord = i;
    }

    /**
     * returns the sentence-relative word index of the end of the markable
     */
    public int getEndWord() {
        return _endWord;
    }

    public void setEndWord(int i) {
        _endWord = i;
    }

    /**
     * Determine whether mention is coreferent with a given mention
     */
    public boolean isCoreferent(Mention m) {
        if (this._setID == null)
            return false;
        else if (_setID.equals(m._setID))
            return true;
        else
            return false;
    }

    /**
     * see if two mentions have overlapping spans - actually, a better
     * approximation to syntactic embedding would probably make sense here, as
     * in [1 the guardian] of [2[3 his] treasure] we would like to allow 1--3,
     * but not 1--2(?)
     */
    public boolean overlapsWith(Mention m) {
        Markable m1 = getMarkable();
        Markable m2 = m.getMarkable();
        if (m1.getRightmostDiscoursePosition() <= m2.getLeftmostDiscoursePosition())
            return false;
        else if (m2.getRightmostDiscoursePosition() <= m1.getLeftmostDiscoursePosition())
            return false;
        else
            return true;

    }

    /**
     * see if two mentions have overlapping spans - like in [1 [2 his] treasure]
     */
    public boolean embeds(Mention m) {
        Markable m1 = getMarkable();
        Markable m2 = m.getMarkable();
        return m1.getLeftmostDiscoursePosition() <= m2.getLeftmostDiscoursePosition()
                && m1.getRightmostDiscoursePosition() >= m2.getRightmostDiscoursePosition();
    }

    public void setSetID(String setid) {
        _setID = setid;
    }

    public String getSetID() {
        return _setID;
    }

    public static String getMarkableString(final Markable markable) {
        return new StringBuffer(markable.toString()).deleteCharAt(markable.toString().length() - 1).deleteCharAt(0)
                .toString();
    }

    public int getSentId() {
        return _sentId;
    }

    public void setSentenceStart(int start) {
        _sentStart = start;
    }

    public int getSentenceStart() {
        return _sentStart;
    }

    public void setSentenceEnd(int end) {
        _sentEnd = end;
    }

    public int getSentenceEnd() {
        return _sentEnd;
    }

    @Override
    public String toString() {
        return _markableString;
    }

    public String[] getLeftContext(int nWords) {
        int n;
        int posN = getMarkable().getLeftmostDiscoursePosition();
        if (posN < nWords)
            n = posN;
        else
            n = nWords;
        String[] result = new String[n];
        for (int token = posN - n; token < posN; token++) {
            result[token - posN + n] = _document.getDiscourseElementAtDiscoursePosition(token).toString();
        }
        return result;
    }

    public String[] getRightContext(int nWords) {
        int posN = getMarkable().getRightmostDiscoursePosition() + 1;
        if (_document.getTokens().length < posN + nWords) {
            nWords = _document.getTokens().length - posN;
        }
        String[] result = new String[nWords];
        for (int token = posN; token < posN + nWords; token++) {
            result[token - posN] = _document.getDiscourseElementAtDiscoursePosition(token).toString();
        }
        return result;
    }

    /** Checks whether this mention is a named entity */
    public boolean isEnamex() {
        return _mentionType.features.contains(Features.isEnamex);
    }

    /** Checks whether this mention is a coordinated NP */
    /*
     * probably only works for Italian though (with "np.coord" annotated in the
     * data)
     */

    public boolean isCoord() {
        return _mentionType.features.contains(MentionType.Features.isCoord);
    }

    /** Gets the type of a named entity */
    public String getEnamexType() {
        return _enamexType;
    }

    public List<Tree> getPostmodifiers() {
        return _postmodifiers;
    }

    public List<Tree> getPremodifiers() {
        return _premodifiers;
    }

    public String getRootPath() {
        Tree top = getSentenceTree();
        Tree here = getHighestProjection();
        StringBuffer sb = new StringBuffer();
        String lastValue = null;
        while (here != top) {
            here = here.parent(top);
            String val = here.value();
            if (!val.equals(lastValue))
                sb.append(here.value()).append(".");
            lastValue = val;
        }
        return sb.toString();
    }

    public boolean isPrenominal() {
        return Boolean.parseBoolean(getMarkable().getAttributeValue(ISPRENOMINAL_ATTRIBUTE));
    }

    public String computeAppType(Mention np) {

        /*
         * this probably shouldn't work properly it's used by discourse_entities
         * but hP and lP are inconsistent with it ToDo: unify with other appo
         * stuff (olga)
         */

        String headAppo = null;
        Tree lowestProjection = np.getLowestProjection();
        Tree highestProjection = np.getHighestProjection();
        if (highestProjection == null)
            return headAppo;
        if (highestProjection.children() == null)
            return headAppo;
        if (highestProjection.children().length <= 2)
            return headAppo;
        if (!highestProjection.children()[1].label().toString().equals(","))
            return headAppo;

        Tree h0 = highestProjection.children()[0].headPreTerminal(getStHeadFinder());
        Tree h2 = highestProjection.children()[2].headPreTerminal(getStHeadFinder());
        if (h0 == null)
            return headAppo;
        if (h2 == null)
            return headAppo;

        if (highestProjection.children()[0] == lowestProjection
                && highestProjection.children()[2].label().toString().equals("NP")
                && !h0.label().toString().equals("NNP") && h2.label().toString().equals("NN"))

            return highestProjection.children()[2].headTerminal(getStHeadFinder()).toString();

        if (highestProjection.children()[0].label().toString().equals("NP")
                && highestProjection.children()[2] == lowestProjection && h2.label().toString().equals("NNP")
                && h0.label().toString().equals("NN"))
            return highestProjection.children()[0].headTerminal(getStHeadFinder()).toString();

        return headAppo;
    }

    /**
     * @author samuel
     * @param leftMostDiscursePos
     *            left most discourse position
     * @param rightMostDiscursePos
     *            most discourse position
     * @return The subtree inside a span from the current mentions
     *         sentenceTreeWithDiscIds
     */
    public Tree getMarkableTree(int leftMostDiscursePos, int rightMostDiscursePos) {
        Tree sentenceTree = this.getSentenceTreeWithDiscIds();
        List<Tree> leaves = sentenceTree.getLeaves();

        int start = leftMostDiscursePos - this.getSentenceStart();
        int end = rightMostDiscursePos - this.getSentenceStart();
        Tree startNode = leaves.get(start);
        Tree endNode = leaves.get(end);

        Tree parentNode = startNode;
        while (parentNode != null && !parentNode.dominates(endNode)) {
            parentNode = parentNode.parent(sentenceTree);
        }

        return parentNode;
    }

    /**
     * @author samuel
     * @param postag
     *            Parts of speech tag
     * @return Array of disc ids representing the highest projecting phrase
     *         inside the markables subtree with the given postag
     */
    public ArrayList<String> getHighestProjectingPhraseWithPOS(String postag) {
        return getHighestProjectingPhraseWithPOS(getMarkableSubTree(), postag);
    }

    /**
     * @author samuel
     * @param markableSubtree
     *            A subtree of the sentenceTreeWithDiscIds
     * @param postag
     *            Parts of speech tag
     * @return Array of disc ids representing the highest projecting phrase
     *         inside the markables subtree with the given postag
     */
    public ArrayList<String> getHighestProjectingPhraseWithPOS(Tree markableSubtree, String postag) {
        LinkedList<Tree> stack = new LinkedList<Tree>();
        stack.add(markableSubtree);
        ArrayList<String> result = new ArrayList<String>();
        Tree head = null;
        fifo: while (stack.size() > 0) {
            head = stack.removeFirst();
            for (Tree child : head.children()) {
                if (child.value().toString().equalsIgnoreCase(postag)) {
                    break fifo;
                } else if (child.value().toString().equalsIgnoreCase("nx")) {
                    stack.add(child);
                }
            }
        }
        for (Tree child : head.children()) {
            if (child.value().toString().equalsIgnoreCase(postag)) {
                result.add(child.children()[0].value().toString());
            }
        }
        if (result.size() > 0) {
            return result;
        } else {
            return null;
        }
    }

    /**
     * @author samuel
     * @param level
     *            markable level name
     * @return
     */
    public Markable[] getSentenceMarkables(String level) {
        Markable m = this.getMarkable();
        MiniDiscourse doc = m.getMarkableLevel().getDocument();
        MarkableLevel markableLevel = doc.getMarkableLevelByName(level);
        return getSentenceMarkables(markableLevel);
    }

    /**
     * @author samuel
     * @return discourse elements in this mentions sentence
     */
    public String[] getSentenceDiscourseElements() {
        return _document.getDiscourseElements(getSentenceStart(), getSentenceEnd());
    }

    /**
     * @author samuel
     * @return discourse elements in this mentions sentence
     */
    public String[] getSentenceDiscourseElementIDs() {
        return _document.getDiscourseElementIDs(getSentenceStart(), getSentenceEnd());
    }

    public List<String> getDiscourseElementsByLevel(String markableLevel) {
        List<String> words = new ArrayList<String>();

        if (langPlugin instanceof GermanLanguagePlugin) {
            MarkableLevel level = _document.getMarkableLevelByName(markableLevel);
            int from = _markable.getLeftmostDiscoursePosition();
            int to = _markable.getRightmostDiscoursePosition();

            Markable[] markables = level.getMarkablesAtSpan(from, to);
            for (Markable markable : markables) {
                words.add(markable.getAttributeValue("tag"));
            }

        } else if (langPlugin instanceof EnglishLanguagePlugin) {
            MarkableLevel level = _document.getMarkableLevelByName(markableLevel);
            int from = _markable.getLeftmostDiscoursePosition();
            int to = _markable.getRightmostDiscoursePosition();

            Markable[] markables = level.getMarkablesAtSpan(from, to);
            for (Markable markable : markables) {
                if (markableLevel.equals("morph")) {
                    words.add(markable.getAttributeValue("lemma"));
                } else {
                    words.add(markable.getAttributeValue("tag"));
                }
            }
        }
        return words;
    }

    public List<String> getDiscourseElementsByLevelAndExtendedSpan(String markableLevel, int spanleft,
            int spanright) {
        List<String> words = new ArrayList<String>();
        int from;
        int to;
        MarkableLevel level = _document.getMarkableLevelByName(markableLevel);
        if (_markable.getLeftmostDiscoursePosition() - spanleft < getSentenceStart()) {
            from = getSentenceStart();
        } else {
            from = _markable.getLeftmostDiscoursePosition() - spanleft;
        }

        if (_markable.getRightmostDiscoursePosition() + spanright > getSentenceEnd()) {
            to = getSentenceEnd();
        } else {
            to = _markable.getRightmostDiscoursePosition() + spanright;
        }

        Markable[] markables = level.getMarkablesAtSpan(from, to);
        for (Markable markable : markables) {
            words.add(markable.getAttributeValue("tag"));

        }
        return words;
    }

    /**
     * @author samuel
     * @param level
     *            markable level name
     * @return this mentions sentence markables
     */
    public Markable[] getSentenceMarkables(MarkableLevel level) {
        return level.getMarkablesAtSpan(this.getSentenceStart(), this.getSentenceEnd());
    }

    /**
     * @author samuel
     * @return The markables subtree with disc ids in leaves
     */
    public Tree getMarkableSubTree() {
        return getMarkableTree(this.getMarkable().getLeftmostDiscoursePosition(),
                this.getMarkable().getRightmostDiscoursePosition());
    }

    // TODO further cleanup of Kepa stuff - move to LanguagePlugin or delete
    public String computePredicationType(Mention np) {
        String predType = null;
        Tree mentionTree = np.getHighestProjection();
        Tree sentenceTree = np.getSentenceTree();
        Tree parentNode = null;
        if (mentionTree == null && ConfigProperties.getInstance().getDbgPrint()) {
            System.out.println("No mentionTree for " + np.toString());
        }
        if (mentionTree != null)
            parentNode = mentionTree.ancestor(1, sentenceTree);
        if (!(parentNode == null) && parentNode.children().length > 1
                && parentNode.children()[1].label().toString().equals("VP")
                && parentNode.children()[1].children().length > 1) {
            String hword10 = parentNode.children()[1].children()[0].headTerminal(new ModCollinsHeadFinder())
                    .toString();
            if (hword10.equals("is") || hword10.equals("are") || hword10.equals("was") || hword10.equals("were")) {
                Tree pchild11 = parentNode.children()[1].children()[1];
                if (pchild11 != null) {// &&
                    if (pchild11.label().toString().equals("NP")) {
                        String pchild11_headpos = pchild11.headPreTerminal(new ModCollinsHeadFinder()).label()
                                .toString();
                        if (!pchild11_headpos.equals("JJS") && !pchild11_headpos.equals("NNP")) {
                            predType = pchild11.headTerminal(new ModCollinsHeadFinder()).toString();
                        }
                    }
                }
            }
        }
        return predType;
    }

    public String computePredicationAttr(Mention np) {
        String predAttr = null;
        Tree mentionTree = np.getHighestProjection();
        Tree sentenceTree = np.getSentenceTree();
        Tree parentNode = null;
        if (!(mentionTree == null)) {
            parentNode = mentionTree.ancestor(1, sentenceTree);
        }
        if (!(parentNode == null) && parentNode.children().length > 1
                && parentNode.children()[1].label().toString().equals("VP")
                && parentNode.children()[1].children().length > 1) {
            if (parentNode.children()[1].children()[0].headTerminal(new ModCollinsHeadFinder()).toString()
                    .equals("is")
                    || parentNode.children()[1].children()[0].headTerminal(new ModCollinsHeadFinder()).toString()
                            .equals("are")
                    || parentNode.children()[1].children()[0].headTerminal(new ModCollinsHeadFinder()).toString()
                            .equals("was")
                    || parentNode.children()[1].children()[0].headTerminal(new ModCollinsHeadFinder()).toString()
                            .equals("were")) {
                if (!(parentNode.children()[1].children()[1] == null)) {// &&
                    if (parentNode.children()[1].children()[1].label().toString().equals("ADJP")) {
                        predAttr = parentNode.children()[1].children()[1].headTerminal(new ModCollinsHeadFinder())
                                .toString();
                        // System.out.println("ATTR!!! " + predAttr);
                    } else if (parentNode.children()[1].children()[1].label().toString().equals("NP")
                            && parentNode.children()[1].children()[1].headPreTerminal(new ModCollinsHeadFinder())
                                    .label().toString().equals("JJS")) {
                        predAttr = parentNode.children()[1].children()[1].headTerminal(new ModCollinsHeadFinder())
                                .toString();
                        // System.out.println("ATTR!!! " + predAttr);
                    }
                }
            }
        }
        return predAttr;
    }

    /**
     * @author samuel
     * @param discIds
     *            discourse ids
     * @param markableLevel
     *            markable level name
     * @return a joined string of the markable levels tag attribute
     */
    public String getJoinedStringFromDiscIds(ArrayList<String> discIds, String markableLevel) {
        if (discIds != null) {
            int from = MarkableHelper.parseId(discIds.get(0), "word");
            int to = MarkableHelper.parseId(discIds.get(discIds.size() - 1), "word");
            return getJoinedStringFromDiscIds(from, to, markableLevel);
        } else {
            return null;
        }
    }

    /**
     * @author samuel
     * @param from
     *            left most discourse position
     * @param to
     *            right most discourse position
     * @param markableLevel
     *            markable level name
     * @return a joined string of the markable levels tag attribute
     */
    public String getJoinedStringFromDiscIds(int from, int to, String markableLevel) {
        MarkableLevel level = _document.getMarkableLevelByName(markableLevel);

        StringBuffer result = new StringBuffer();

        Markable[] markables = level.getMarkablesAtSpan(from - 1, to - 1);
        for (Markable markable : markables) {
            if (!(markable == null)) {

                result.append(markable.getAttributeValue("tag"));
                result.append(" ");
            }
        }

        if (result.length() > 0) {
            return result.toString().trim();
        } else {
            return null;
        }
    }

    public void setDiscourseEntity(DiscourseEntity de) {
        _discourseEntity = de;
    }

    public void setSieveDiscourseEntity(SieveDiscourseEntity de) {
        _sieveDiscourseEntity = de;
    }

    public void linkToAntecedent(Mention ante) {

        ante.getSieveDiscourseEntity().merge(this);
        ante.getDiscourseEntity().merge(this);
        _discourseEntity = ante.getDiscourseEntity();
        _sieveDiscourseEntity = ante.getSieveDiscourseEntity();
    }
}