Example usage for edu.stanford.nlp.trees Tree ancestor

List of usage examples for edu.stanford.nlp.trees Tree ancestor

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree ancestor.

Prototype

public Tree ancestor(int height, Tree root) 

Source Link

Document

Return the ancestor tree node height nodes up from the current node.

Usage

From source file:coreferenceresolver.util.StanfordUtil.java

public void init(boolean simpleInit) throws FileNotFoundException, IOException {
    String outPosFilePath = "./input.txt.pos";
    FileWriter fw = new FileWriter(new File(outPosFilePath));
    BufferedWriter bw = new BufferedWriter(fw);
    props = new Properties();
    if (simpleInit) {
        props.put("annotators", "tokenize, ssplit, pos, parse");
    } else {//from   w  w  w  .j a v a  2  s  .  c o  m
        props.put("annotators", "tokenize, ssplit, pos, parse, sentiment");
    }
    pipeline = new StanfordCoreNLP(props);

    reviews = new ArrayList<>();

    FileReader fileReader = new FileReader(documentFile);
    BufferedReader bufferedReader = new BufferedReader(fileReader);

    String reviewLine;
    int reviewId = 0;
    int sentenceId;
    //read input file line by line and count the number sentences of each lines
    while ((reviewLine = bufferedReader.readLine()) != null) {
        sentenceId = 0;
        Review newReview = new Review();

        //Add to reviews list
        newReview.setRawContent(reviewLine);

        // create an empty Annotation just with the given text
        document = new Annotation(reviewLine);

        // run all Annotators on this text
        pipeline.annotate(document);
        List<CoreMap> sentences = document.get(SentencesAnnotation.class);

        //Begin extracting from paragraphs
        for (CoreMap sentence : sentences) {
            int sentenceOffsetBegin = sentence.get(CharacterOffsetBeginAnnotation.class);
            int sentenceOffsetEnd = sentence.get(CharacterOffsetEndAnnotation.class);
            Sentence newSentence = new Sentence();
            newSentence.setReviewId(reviewId);
            newSentence.setRawContent(sentence.toString());
            newSentence.setOffsetBegin(sentenceOffsetBegin);
            newSentence.setOffsetEnd(sentenceOffsetEnd);

            if (!simpleInit) {
                int sentimentLevel = RNNCoreAnnotations
                        .getPredictedClass(sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class));
                newSentence.setSentimentLevel(sentimentLevel);

                //Dependency Parsing
                SemanticGraph collCCDeps = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
                Collection<TypedDependency> typedDeps = collCCDeps.typedDependencies();
                newSentence.setDependencies(typedDeps);
            }

            List<Tree> sentenceTreeLeaves = sentence.get(TreeCoreAnnotations.TreeAnnotation.class).getLeaves();

            int i = 0;
            for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
                Token newToken = new Token();

                Tree tokenTree = sentenceTreeLeaves.get(i);
                newToken.setTokenTree(tokenTree);

                String word = token.get(TextAnnotation.class);
                newToken.setWord(word);

                String pos = token.get(PartOfSpeechAnnotation.class);
                newToken.setPOS(pos);

                int offsetBegin = token.get(CharacterOffsetBeginAnnotation.class);
                newToken.setOffsetBegin(offsetBegin);

                int offsetEnd = token.get(CharacterOffsetEndAnnotation.class);
                newToken.setOffsetEnd(offsetEnd);

                if (!simpleInit) {
                    //Check NP relative clause
                    Tree twoLevelsAncestor = tokenTree.ancestor(2,
                            sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
                    if (twoLevelsAncestor.value().equals("WHNP") && !word.toLowerCase().equals("who")
                            && !word.toLowerCase().equals("what")) {
                        newToken.setRelativePronoun(true);
                    }

                    //Calculate sentiment for this token
                    int newTokenSentiment = Util.retrieveOriginalSentiment(newToken.getWord());
                    newToken.setSentimentOrientation(newTokenSentiment, newSentence.getDependencies());
                }

                newSentence.addToken(newToken);
                bw.write(token.word() + "/" + token.tag() + " ");
                ++i;
            }
            bw.newLine();

            if (!simpleInit) {

                //Check if this sentence contains a comparative indicator. 
                //If yes, it is a comparative sentence. Identify which NP is superior or inferior in this sentence
                List<Token> comparativeTokens = FeatureExtractor.findComparativeIndicator(newSentence, null,
                        null);
                //TODO
                //Check special comparative samples
                if (!comparativeTokens.isEmpty()) {
                    newSentence.initComparatives(comparativeTokens);
                }
            }

            newReview.addSentence(newSentence);

            ++sentenceId;
        }

        bw.write("./.");
        bw.newLine();

        reviews.add(newReview);
        ++reviewId;
    }
    bw.close();
}

From source file:elkfed.coref.mentions.Mention.java

License:Apache License

public String computePredicationType(Mention np) {
    String predType = null;/*from  www.j av a2 s. com*/
    Tree mentionTree = np.getHighestProjection();
    Tree sentenceTree = np.getSentenceTree();
    Tree parentNode = null;
    if (mentionTree == null && ConfigProperties.getInstance().getDbgPrint()) {
        System.out.println("No mentionTree for " + np.toString());
    }
    if (mentionTree != null)
        parentNode = mentionTree.ancestor(1, sentenceTree);
    if (!(parentNode == null) && parentNode.children().length > 1
            && parentNode.children()[1].label().toString().equals("VP")
            && parentNode.children()[1].children().length > 1) {
        String hword10 = parentNode.children()[1].children()[0].headTerminal(new ModCollinsHeadFinder())
                .toString();
        if (hword10.equals("is") || hword10.equals("are") || hword10.equals("was") || hword10.equals("were")) {
            Tree pchild11 = parentNode.children()[1].children()[1];
            if (pchild11 != null) {// &&
                if (pchild11.label().toString().equals("NP")) {
                    String pchild11_headpos = pchild11.headPreTerminal(new ModCollinsHeadFinder()).label()
                            .toString();
                    if (!pchild11_headpos.equals("JJS") && !pchild11_headpos.equals("NNP")) {
                        predType = pchild11.headTerminal(new ModCollinsHeadFinder()).toString();
                    }
                }
            }
        }
    }
    return predType;
}

From source file:elkfed.coref.mentions.Mention.java

License:Apache License

public String computePredicationAttr(Mention np) {
    String predAttr = null;/*  w ww .java 2  s  .c o m*/
    Tree mentionTree = np.getHighestProjection();
    Tree sentenceTree = np.getSentenceTree();
    Tree parentNode = null;
    if (!(mentionTree == null)) {
        parentNode = mentionTree.ancestor(1, sentenceTree);
    }
    if (!(parentNode == null) && parentNode.children().length > 1
            && parentNode.children()[1].label().toString().equals("VP")
            && parentNode.children()[1].children().length > 1) {
        if (parentNode.children()[1].children()[0].headTerminal(new ModCollinsHeadFinder()).toString()
                .equals("is")
                || parentNode.children()[1].children()[0].headTerminal(new ModCollinsHeadFinder()).toString()
                        .equals("are")
                || parentNode.children()[1].children()[0].headTerminal(new ModCollinsHeadFinder()).toString()
                        .equals("was")
                || parentNode.children()[1].children()[0].headTerminal(new ModCollinsHeadFinder()).toString()
                        .equals("were")) {
            if (!(parentNode.children()[1].children()[1] == null)) {// &&
                if (parentNode.children()[1].children()[1].label().toString().equals("ADJP")) {
                    predAttr = parentNode.children()[1].children()[1].headTerminal(new ModCollinsHeadFinder())
                            .toString();
                    // System.out.println("ATTR!!! " + predAttr);
                } else if (parentNode.children()[1].children()[1].label().toString().equals("NP")
                        && parentNode.children()[1].children()[1].headPreTerminal(new ModCollinsHeadFinder())
                                .label().toString().equals("JJS")) {
                    predAttr = parentNode.children()[1].children()[1].headTerminal(new ModCollinsHeadFinder())
                            .toString();
                    // System.out.println("ATTR!!! " + predAttr);
                }
            }
        }
    }
    return predAttr;
}

From source file:knu.univ.lingvo.coref.Mention.java

License:Open Source License

public String lowestNPIncludesHead() {
    String ret = "";
    Tree head = this.contextParseTree.getLeaves().get(this.headIndex);
    Tree lowestNP = head;
    String s;/*from www .  ja v a 2  s.c  om*/
    while (true) {
        if (lowestNP == null)
            return ret;
        s = ((CoreLabel) lowestNP.label()).get(CoreAnnotations.ValueAnnotation.class);
        if (s.equals("NP") || s.equals("ROOT"))
            break;
        lowestNP = lowestNP.ancestor(1, this.contextParseTree);
    }
    if (s.equals("ROOT"))
        lowestNP = head;
    for (Tree t : lowestNP.getLeaves()) {
        if (!ret.equals(""))
            ret = ret + " ";
        ret = ret + ((CoreLabel) t.label()).get(CoreAnnotations.TextAnnotation.class);
    }
    if (!this.spanToString().contains(ret))
        return this.sentenceWords.get(this.headIndex).get(CoreAnnotations.TextAnnotation.class);
    return ret;
}

From source file:knu.univ.lingvo.coref.sievepasses.DeterministicCorefSieve.java

License:Open Source License

/** Divides a sentence into clauses and sorts the antecedents for pronoun matching. */
private static List<Mention> sortMentionsForPronoun(List<Mention> l, Mention m1, boolean sameSentence) {
    List<Mention> sorted = new ArrayList<Mention>();
    if (sameSentence) {
        Tree tree = m1.contextParseTree;
        Tree current = m1.mentionSubTree;
        while (true) {
            current = current.ancestor(1, tree);
            if (current.label().value().startsWith("S")) {
                for (Mention m : l) {
                    if (!sorted.contains(m) && current.dominates(m.mentionSubTree)) {
                        sorted.add(m);/*from w  w  w.j  ava 2 s .c  om*/
                    }
                }
            }
            if (current.label().value().equals("ROOT") || current.ancestor(1, tree) == null)
                break;
        }
        if (SieveCoreferenceSystem.logger.isLoggable(Level.FINEST)) {
            if (l.size() != sorted.size()) {
                SieveCoreferenceSystem.logger.finest("sorting failed!!! -> parser error?? \tmentionID: "
                        + m1.mentionID + " " + m1.spanToString());
                sorted = l;
            } else if (!l.equals(sorted)) {
                SieveCoreferenceSystem.logger.finest("sorting succeeded & changed !! \tmentionID: "
                        + m1.mentionID + " " + m1.spanToString());
                for (int i = 0; i < l.size(); i++) {
                    Mention ml = l.get(i);
                    Mention msorted = sorted.get(i);
                    SieveCoreferenceSystem.logger
                            .finest("\t[" + ml.spanToString() + "]\t[" + msorted.spanToString() + "]");
                }
            } else {
                SieveCoreferenceSystem.logger
                        .finest("no changed !! \tmentionID: " + m1.mentionID + " " + m1.spanToString());
            }
        }
    }
    return sorted;
}

From source file:org.devboost.stanford.language.LanguageCreator.java

License:Open Source License

private static List<Word> createWords(Tree root, Tree tree) {
    List<Word> words = new ArrayList<Word>();
    Label label = tree.label();// w  w w. j  av a 2  s .co  m
    if (label instanceof CoreLabel) {
        CoreLabel coreLabel = (CoreLabel) label;
        int beginPosition = coreLabel.beginPosition();
        int endPosition = coreLabel.endPosition();
        String originalText = coreLabel.word();
        if (originalText != null) {
            Tree parent = tree.ancestor(1, root);
            Label parentLabel = parent.label();
            if (parentLabel instanceof CoreLabel) {
                String parentCategory = ((CoreLabel) parentLabel).category();
                parentCategory = parentCategory.replaceAll("\\$", "S");
                if (parentCategory != null) {
                    EClassifier classifier = LanguagePackage.eINSTANCE.getEClassifier(parentCategory);
                    if (classifier instanceof EClass) {
                        Word word = (Word) LanguageFactory.eINSTANCE.create((EClass) classifier);
                        word.setText(originalText);
                        word.setBegin(beginPosition);
                        word.setEnd(endPosition);
                        words.add(word);
                    }
                }
            }
        }
    }
    List<Tree> children = tree.getChildrenAsList();
    for (Tree child : children) {
        words.addAll(createWords(root, child));
    }
    return words;
}

From source file:qmul.util.parse.CreateTreeFromDCPSE.java

License:Open Source License

/**
 * @param reader//from  w  w  w.ja va2  s . c  o  m
 *            a {@link Reader}
 * @return the Stanford {@link Tree}
 */
public static Tree makeTree(Reader reader) {
    if (options == null) {
        setDefaultOptions();
    }
    List<Tree> children = new ArrayList<Tree>();
    Tree t0 = null;
    Tree tPrev = null;
    Tree tAll = null;
    Tree tTemp = null;
    int n = 0;
    int countspace = 0;
    int countspaceprevious = 0;
    int countspacepreviousprevious = 0;
    char c1 = 'x';
    int childWhere = Integer.MAX_VALUE;
    String gads = "";
    String otherStuff = "";
    String[] gadsWord = null;
    boolean isAword = false;// do not change
    boolean processLine = true;// do not change

    try {
        while ((n = reader.read()) != -1) {
            char c = (char) n;
            if (c == '[' && gads.matches("")) {
                processLine = false;
                // System.out.println(otherStuff);
                otherStuff = "";
            }

            if (processLine) {
                if (c == '\n') {
                    if (gads.matches("^\\s+$")) {
                        // we've hit a line containing only whitespace: end of the tree
                        break;
                    }
                    if (options.get(INCLUDE_NO_PAUSE)) {
                        if (gads.contains("PAUSE")) {
                            gads = IGNORE_MARKER;
                        }
                    }
                    // remove "ignored" nodes; unless we need to keep them to work out features, in which case we'll
                    // remove them later in DCPSECorpus
                    if (options.get(INCLUDE_NO_IGNORE) && !options.get(PP_LEXICAL_FEATURES)) {
                        if (gads.contains("ignore)")) {
                            gads = IGNORE_MARKER;
                        }
                    }
                    if (options.get(INCLUDE_NO_UMM)) {
                        if (gads.contains("DISMK,INTERJEC")) {
                            gads = IGNORE_MARKER;
                        }
                    }
                    if (options.get(INCLUDE_NO_REACT)) {
                        if (gads.contains("DISMK,REACT")) {
                            gads = IGNORE_MARKER;
                        }
                    }
                    if (options.get(INCLUDE_NO_UNCLEAR)) {
                        if (gads.contains("INDET,?")) {
                            gads = IGNORE_MARKER;
                        }
                    }
                    if (gads.contains("{")) {
                        // remove all annoying browser markup
                        gadsWord = gads.replaceAll("\\[.*?\\]", "").split("\\s+");
                        gads = gadsWord[0];
                        isAword = true;
                    }
                    if (options.get(INCLUDE_NO_BRACKETS)) {
                        if (gads.contains("(")) {
                            gads = gads.replaceAll("\\(.+\\)", "");
                        }
                    }
                    if (options.get(CATEGORIES_NOT_FUNCTIONS) && !gads.matches(IGNORE_MARKER)) {
                        gads = gads.replaceFirst(".*?,", "");
                    }
                    if (!gads.matches(IGNORE_MARKER)) {
                        tPrev = t0;
                        t0 = tf.newTreeNode(gads.trim(), children);
                        if (childWhere == Integer.MAX_VALUE) {
                            tAll = t0;
                        } else if (childWhere >= 0) {
                            // up x
                            tTemp = tPrev.ancestor(childWhere + 1, tAll);
                            if (tTemp == null) {
                                System.out.println("c1 = " + c1);
                                System.out.println("gads = " + gads);
                                System.out.println("t0 = ");
                                t0.indentedListPrint();
                                System.out.println("tPrev = ");
                                tPrev.indentedListPrint();
                                System.out.println("tAll = ");
                                tAll.indentedListPrint();
                                System.err.println("ERROR: null ancestor at " + (childWhere + 1) + " " + tAll);
                            }
                            tTemp.addChild(t0);
                        } else if (childWhere < 0) {
                            // down one level
                            tPrev.addChild(t0);
                        }
                        if (isAword) {
                            tPrev = t0;
                            String wordLabel = gadsWord[1];
                            for (int iWord = 2; iWord < gadsWord.length; iWord++) {
                                wordLabel += " " + gadsWord[iWord];
                            }
                            tTemp = tf.newLeaf(wordLabel.trim());
                            tPrev.addChild(tTemp);
                            isAword = false;
                            tTemp = null;
                        }
                    }
                    if (gads.matches(IGNORE_MARKER)) {// reset previous counter if is a line to ignore
                        countspaceprevious = countspaceprevious + childWhere;

                    }
                    gads = "";
                    c1 = 'y';
                } else if (c1 == 'y' && c == ' ') {// was just a return character and is space
                    countspace++;
                } else {// not a leading space or a return character
                    gads += c;
                    c1 = 'x';
                    if (countspace != 0) {
                        childWhere = countspaceprevious - countspace;
                        countspaceprevious = countspace;
                        countspacepreviousprevious = countspaceprevious;
                        countspace = 0;
                    }
                }
            } else if (c == '\n') { // (if not processLine = True)
                processLine = true;
            } else { // processLine = false and not a return character
                otherStuff += c;
            }
        }
    } catch (IOException ioe) {
        System.err.println("IOException: " + ioe.getMessage());
    }
    if (tAll == null && !otherStuff.isEmpty()) {
        Tree tSpec = tf.newTreeNode("EMPTY", children);
        return tSpec;
    } else {
        return tAll;
    }
}

From source file:qmul.util.parse.CreateTreeFromSWBD.java

License:Open Source License

/**
 * @param reader// ww  w. ja  v a  2  s .  co  m
 *            a {@link Reader}
 * @return the Stanford {@link Tree}
 */
public static Tree makeTree(Reader reader) {
    if (options == null) {
        setDefaultOptions();
    }
    List<Tree> children = new ArrayList<Tree>();
    Tree t0 = null;
    Tree tPrev = null;
    Tree tAll = null;
    Tree tTemp = null;
    int n = 0;
    String funcStr[] = { "", "" };
    int openBrackets = 0;
    int closeBrackets = 0;
    int totalBrackets = 0;
    int childWhere = Integer.MAX_VALUE;
    String gads = "";
    String otherStuff = "";
    String[] gadsWord = null;
    boolean isAword = false;// do not change
    boolean wasAword = false;// do not change
    boolean processLine = false;// do not change

    try {
        while ((n = reader.read()) != -1) {
            char c = (char) n;
            char charsToIgnore[] = { '.', ',', '?', '\n', '\t', '\r' };

            if (gads == IGNORE_MARKER) {
                if (c == '\n') {
                    gads = "";
                }
            } else {
                for (int i = 0; i < charsToIgnore.length; i++) {
                    if (c == charsToIgnore[i]) {
                        c = '~';
                    }
                }
                if (c == '(' || c == ')' || c == ' ' || c == '~') {
                    if (c == '(') {
                        totalBrackets++;
                    } else if (c == ')') {
                        totalBrackets--;
                    }
                    if (gads.matches("") && totalBrackets != 0) {
                        // there is nothing yet to process. Collect brackets
                        funcStr[0] += c;
                        processLine = false;
                    } else if (totalBrackets == 0) {
                        processLine = true;
                    } else {
                        processLine = true;
                        // Something needs to be put on a tree... I think
                        funcStr[1] += c; // start collecting next set of function stuff
                        if (funcStr[0].matches("^\\s$")) {
                            // need to put something here to prevent it having a fit when multiple words and
                            // also to ignore those which are part of the function
                            if (c != ' ' || (c == '~' && openBrackets <= 0)) {
                                isAword = true;
                                if (!wasAword) {
                                    openBrackets++;
                                } else {
                                    openBrackets--;
                                }
                            } else {
                                processLine = false;
                                gads += c;
                            }
                        } else if (openBrackets < 0 && gads.matches("^[a-zA-Z][a-z]+$")) {
                            isAword = true;
                            if (c == ' ') {
                                processLine = false;
                                gads += c;
                            } else {
                                // hold previous brackets and reset own...
                                for (int i = 0; i < closeBrackets; i++) {
                                    funcStr[1] += ')';
                                }
                                for (int j = 0; j < openBrackets; j++) {
                                    funcStr[1] += '(';
                                }
                                closeBrackets = 0;
                                for (int k = 0; k < funcStr[0].length(); k++) {
                                    if (funcStr[0].charAt(k) == '(') {
                                        openBrackets++;
                                    } else if (funcStr[0].charAt(k) == ')') {
                                        closeBrackets++;
                                    }
                                }
                            }
                        } else {
                            for (int j = 0; j < funcStr[0].length(); j++) {
                                if (funcStr[0].charAt(j) == '(') {
                                    openBrackets++;
                                } else if (funcStr[0].charAt(j) == ')') {
                                    closeBrackets++;
                                }
                            }
                        }
                    }
                } else if (c != '~') {
                    gads += c;
                }
                if ((gads.matches("^\\s$") || gads.matches(""))) {
                    if (totalBrackets != 0 || tAll == null) {
                        processLine = false;
                    }
                }
                if (gads.matches("^\\*x\\*")) {
                    gads = IGNORE_MARKER;
                }
                // // this is actually done later in SwitchboardCorpus using a NodeFilter
                // if (options.get(INCLUDE_NO_INTJ)) {
                // if (gads.contains("INTJ")) {
                // gads = IGNORE_MARKER;
                // }
                // }
            }
            if (processLine) {
                if (gads.matches("E\\_S") || totalBrackets == 0) {
                    // we've hit an end of segment; end the tree
                    System.out.println("end of segment");
                    break;
                }
                if (!gads.matches(IGNORE_MARKER)) {
                    // System.out.println("gads is: " + gads);
                    tPrev = t0;
                    if (isAword) {
                        t0 = tf.newLeaf(gads);
                    } else {
                        t0 = tf.newTreeNode(gads, children);
                    }
                    if (childWhere == Integer.MAX_VALUE) {
                        // System.out.println("It is the first in the tree");
                        tAll = t0;// set initially
                        childWhere = 0;
                    } else if (openBrackets <= closeBrackets) {
                        // System.out.println("It should be going up " + (closeBrackets-openBrackets));
                        // up x
                        if (openBrackets < 0) {
                            openBrackets++;
                        }
                        tTemp = tPrev.ancestor((closeBrackets - openBrackets) + 1, tAll);
                        if (tTemp == null) {
                            System.out.println("open = " + openBrackets);
                            System.out.println("close = " + closeBrackets);
                            System.out.println("gads = " + gads);
                            System.out.println("t0 = ");
                            t0.indentedListPrint();
                            System.out.println("tPrev = ");
                            tPrev.indentedListPrint();
                            System.out.println("tAll = ");
                            tAll.indentedListPrint();
                            System.err.println("ERROR: null ancestor at " + (childWhere + 1) + " " + tAll);
                        }
                        tTemp.addChild(t0);
                        if (isAword) {
                            // System.out.println("It is a word");
                            openBrackets = 0;
                            closeBrackets = 0;
                        }
                        // tPrev.addChild(t0);
                    } else if (openBrackets > closeBrackets) {
                        // down one level
                        if (isAword) {
                            // System.out.println("It is a word");
                            openBrackets--;
                        }
                        // System.out.println("It should be going down one");
                        tPrev.addChild(t0);
                    }
                }
                if (!isAword) {
                    openBrackets = 0;
                    closeBrackets = 0;
                    wasAword = false;
                } else {
                    wasAword = true;
                    isAword = false;
                    openBrackets--;
                    // System.out.println("closeBrackets is: "+ closeBrackets);
                }
                gads = "";
                processLine = false;
                funcStr[0] = funcStr[1];
                funcStr[1] = "";
            }
        }
    } catch (IOException ioe) {
        System.err.println("IOException: " + ioe.getMessage());
    }
    if (tAll == null) {
        Tree tSpec = tf.newTreeNode("EMPTY", children);
        return tSpec;
    } else {
        // tAll.indentedListPrint();
        return tAll;
    }
}

From source file:uk.ac.gla.mir.util.TripletExtractor.java

License:Open Source License

private static void getTriplets(final Tree sentence, List<Tree> leaves) {
    Comparator<KeyValue<Integer, Entity>> cmp = new Comparator<KeyValue<Integer, Entity>>() {
        public int compare(KeyValue<Integer, Entity> o1, KeyValue<Integer, Entity> o2) {
            return o1.getValue().compareTo(o2.getValue());
        }/*from  www  .ja va2s .co m*/
    };
    deepestSentence = null;
    dependancy = "";
    toDependancy = "";
    e = null;
    deepestVerbPhrase = null;
    dependancyDepth = 0;
    depthOfSentence = -10;
    childNumberOfDeepestSenctence = -1;
    getDeepestSentence(sentence, 0);
    if (deepestSentence != null) {
        tmpTriplets = new ArrayList<Triplet>();
        Triplet tempTriplet = extractTriplet(deepestSentence, leaves);
        final Tree ancestor = deepestSentence.ancestor(1, sentence);
        if (ancestor == null)
            return;
        if (childNumberOfDeepestSenctence != -1) {
            addAttribute(cmp, sentence, deepestSentence);
            ancestor.removeChild(childNumberOfDeepestSenctence);
        }
        if (ancestor.value().equalsIgnoreCase("ROOT")) {
            return;
        } else {
            Tree parentOfSentence = ancestor.ancestor(1, sentence);
            int clauseIndex = parentOfSentence.indexOf(ancestor);
            for (int j = clauseIndex - 1; j >= 0; j--) {
                Tree lastNoun = parentOfSentence.getChild(j);
                nounFound = false;
                e = new Entity();
                extractLastNoun(lastNoun);
                tempTriplet.lastNP = e;
                if (!tempTriplet.lastNP.isEmpty())
                    if (tempTriplet.subject.isEmpty()) {
                        tempTriplet.subject = e;
                        tempTriplet.subject.attributes = getSubjectAttributes(e.tree.ancestor(1, sentence),
                                true, sentence, e.tree);
                    }
                if (!tempTriplet.lastNP.isEmpty())
                    break;
            }
        }
        getTriplets(sentence, leaves);

    } else {
        tmpTriplets = new ArrayList<Triplet>();
        Triplet tempTriplet = extractTriplet(sentence, leaves);
        addAttribute(cmp, sentence, sentence);
    }
}

From source file:uk.ac.gla.mir.util.TripletExtractor.java

License:Open Source License

private static void addAttribute(Comparator cmp, Tree rootSentence, Tree deepSentence) {
    List<Tree> leaves = deepSentence.getLeaves();
    TIntHashSet intSet = new TIntHashSet();
    ArrayList<KeyValue<Integer, Entity>> tripletEntity2Index = new ArrayList<KeyValue<Integer, Entity>>();
    for (int i = 0; i < tmpTriplets.size(); i++) {
        Triplet triplet = tmpTriplets.get(i);
        if (triplet.subject != null && !triplet.subject.name.trim().equalsIgnoreCase("")) {
            Entity subject = triplet.subject;
            subject.attributes.clear();/*from  w ww  . java  2s . c o  m*/
            int value = findIndex(subject.name, leaves);
            if (!intSet.contains(value)) {
                KeyValue<Integer, Entity> keyValue = new KeyValue<Integer, Entity>(subject, value);
                tripletEntity2Index.add(keyValue);
                intSet.add(value);
            }
        }
        if (triplet.verb != null && !triplet.verb.name.trim().equalsIgnoreCase("")) {
            Entity verb = triplet.verb;
            verb.isVerb = true;
            verb.attributes.clear();
            int value = findIndex(verb.name, leaves);
            if (!intSet.contains(value)) {
                KeyValue<Integer, Entity> keyValue = new KeyValue<Integer, Entity>(verb, value);
                tripletEntity2Index.add(keyValue);
                intSet.add(value);
            }
        }
        if (triplet.object != null && !triplet.object.name.trim().equalsIgnoreCase("")) {
            Entity object = triplet.object;
            object.attributes.clear();
            int value = findIndex(object.name, leaves);
            if (!intSet.contains(value)) {
                KeyValue<Integer, Entity> keyValue = new KeyValue<Integer, Entity>(object, value);
                tripletEntity2Index.add(keyValue);
                intSet.add(value);
            }
        }
    }

    Entity closetVerb = null;
    if (tripletEntity2Index.size() == 0)
        return;

    int EndInterval = 0, StartInterval = -1;
    for (int i = 0; i < leaves.size(); i++) {
        final KeyValue<Integer, Entity> keyValue = tripletEntity2Index.get(EndInterval);
        Entity entity = keyValue.getKey();
        if (entity.isVerb)
            closetVerb = entity;
        int index = keyValue.getValue();

        Tree leaf = leaves.get(i);
        if (i == index) {
            StartInterval = index;
            EndInterval++;

            if (EndInterval == tripletEntity2Index.size()) {
                break;
            }

        } else if (i > StartInterval && i < index) {
            Entity attrib = new Entity(leaf.value(), leaf.ancestor(1, rootSentence).value());
            if (leaf.ancestor(1, rootSentence).value().equalsIgnoreCase("RP") && closetVerb != null
                    && !closetVerb.isEmpty())
                closetVerb.name += " " + leaf;
            else
                entity.attributes.add(attrib);
        }

    }

    for (int i = StartInterval + 1; i < leaves.size(); i++) {
        final KeyValue<Integer, Entity> keyValue = tripletEntity2Index.get(EndInterval - 1);
        Entity entity = keyValue.getKey();
        Tree leaf = leaves.get(i);
        Entity attrib = new Entity(leaf.value(), leaf.ancestor(1, rootSentence).value());

        if (leaf.ancestor(1, rootSentence).value().equalsIgnoreCase("RP") && closetVerb != null
                && !closetVerb.isEmpty())
            closetVerb.name += " " + leaf;
        else
            entity.attributes.add(attrib);
    }
}