Example usage for edu.stanford.nlp.trees Tree numChildren

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree numChildren.

Prototype

public int numChildren()

Source Link

Document

Says how many children a tree node has in its local tree.

Usage

From source file:KleinBilingualParser.java

private static double numChildren(Tree nodeF, Tree nodeE) {
    if (nodeF.numChildren() == nodeE.numChildren()) {
        return 1.0 / 10;
    } else {/*from   w ww . ja v a 2 s. com*/
        return 0.0;
    }
}

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

public double validateBinarizedTree(Tree tree, int start) {
    if (tree.isLeaf()) {
        return 0.0;
    }//from   w  ww . ja  va 2 s.  c o m
    float epsilon = 0.0001f;
    if (tree.isPreTerminal()) {
        String wordStr = tree.children()[0].label().value();
        int tag = tagIndex.indexOf(tree.label().value());
        int word = wordIndex.indexOf(wordStr);
        IntTaggedWord iTW = new IntTaggedWord(word, tag);
        float score = lex.score(iTW, start, wordStr, null);
        float bound = iScore[start][start + 1][stateIndex.indexOf(tree.label().value())];
        if (score > bound + epsilon) {
            System.out.println("Invalid tagging:");
            System.out.println("  Tag: " + tree.label().value());
            System.out.println("  Word: " + tree.children()[0].label().value());
            System.out.println("  Score: " + score);
            System.out.println("  Bound: " + bound);
        }
        return score;
    }
    int parent = stateIndex.indexOf(tree.label().value());
    int firstChild = stateIndex.indexOf(tree.children()[0].label().value());
    if (tree.numChildren() == 1) {
        UnaryRule ur = new UnaryRule(parent, firstChild);
        double score = SloppyMath.max(ug.scoreRule(ur), -10000.0)
                + validateBinarizedTree(tree.children()[0], start);
        double bound = iScore[start][start + tree.yield().size()][parent];
        if (score > bound + epsilon) {
            System.out.println("Invalid unary:");
            System.out.println("  Parent: " + tree.label().value());
            System.out.println("  Child: " + tree.children()[0].label().value());
            System.out.println("  Start: " + start);
            System.out.println("  End: " + (start + tree.yield().size()));
            System.out.println("  Score: " + score);
            System.out.println("  Bound: " + bound);
        }
        return score;
    }
    int secondChild = stateIndex.indexOf(tree.children()[1].label().value());
    BinaryRule br = new BinaryRule(parent, firstChild, secondChild);
    double score = SloppyMath.max(bg.scoreRule(br), -10000.0) + validateBinarizedTree(tree.children()[0], start)
            + validateBinarizedTree(tree.children()[1], start + tree.children()[0].yield().size());
    double bound = iScore[start][start + tree.yield().size()][parent];
    if (score > bound + epsilon) {
        System.out.println("Invalid binary:");
        System.out.println("  Parent: " + tree.label().value());
        System.out.println("  LChild: " + tree.children()[0].label().value());
        System.out.println("  RChild: " + tree.children()[1].label().value());
        System.out.println("  Start: " + start);
        System.out.println("  End: " + (start + tree.yield().size()));
        System.out.println("  Score: " + score);
        System.out.println("  Bound: " + bound);
    }
    return score;
}

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

public double scoreBinarizedTree(Tree tree, int start, int debugLvl) {
    if (tree.isLeaf()) {
        return 0.0;
    }//from   www.  ja v a2s .c om
    if (tree.isPreTerminal()) {
        String wordStr = tree.children()[0].label().value();
        int tag = tagIndex.indexOf(tree.label().value());
        int word = wordIndex.indexOf(wordStr);
        IntTaggedWord iTW = new IntTaggedWord(word, tag);
        // if (lex.score(iTW,(leftmost ? 0 : 1)) == Double.NEGATIVE_INFINITY) {
        //   System.out.println("NO SCORE FOR: "+iTW);
        // }
        float score = lex.score(iTW, start, wordStr, null);
        tree.setScore(score);
        if (debugLvl > 0)
            System.out.println(score + " " + tree.getSpan());
        return score;
    }
    int parent = stateIndex.indexOf(tree.label().value());
    int firstChild = stateIndex.indexOf(tree.children()[0].label().value());
    if (tree.numChildren() == 1) {
        UnaryRule ur = new UnaryRule(parent, firstChild);
        //+ DEBUG
        // if (ug.scoreRule(ur) < -10000) {
        //        System.out.println("Grammar doesn't have rule: " + ur);
        // }
        //      return SloppyMath.max(ug.scoreRule(ur), -10000.0) + scoreBinarizedTree(tree.children()[0], leftmost);
        double score = ug.scoreRule(ur) + scoreBinarizedTree(tree.children()[0], start, debugLvl)
                + lex.score(ur, start, start + tree.children()[0].yield().size());
        tree.setScore(score);
        if (debugLvl > 0)
            System.out.println(score + " " + tree.getSpan());
        return score;
    }
    int secondChild = stateIndex.indexOf(tree.children()[1].label().value());
    BinaryRule br = new BinaryRule(parent, firstChild, secondChild);
    //+ DEBUG
    // if (bg.scoreRule(br) < -10000) {
    //  System.out.println("Grammar doesn't have rule: " + br);
    // }
    //    return SloppyMath.max(bg.scoreRule(br), -10000.0) +
    //            scoreBinarizedTree(tree.children()[0], leftmost) +
    //            scoreBinarizedTree(tree.children()[1], false);
    int sz0 = tree.children()[0].yield().size();
    double score = bg.scoreRule(br) + scoreBinarizedTree(tree.children()[0], start, debugLvl)
            + scoreBinarizedTree(tree.children()[1], start + sz0, debugLvl)
            + lex.score(br, start, start + sz0 + tree.children()[1].yield().size(), start + sz0);
    tree.setScore(score);
    if (debugLvl > 0)
        System.out.println(score + " " + tree.getSpan() + " " + (sz0 + start));
    return score;
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public static boolean cCommands(Tree root, Tree n1, Tree n2) {
    if (n1.dominates(n2))
        return false;

    Tree n1Parent = n1.parent(root);
    while (n1Parent != null && n1Parent.numChildren() == 1) {
        n1Parent = n1Parent.parent(root);
    }//w w  w . jav a 2s.  co m

    if (n1Parent != null && n1Parent.dominates(n2))
        return true;

    return false;
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John and James like Susan.  ->  John likes Susan.
 * //www .j  av  a2 s.com
 */
private void extractConjoinedNPs(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    Tree conjoinedNode;
    Tree parent;

    TregexMatcher matcher;
    Question newQuestion;

    //only extract conjoined NPs that are arguments or adjuncts of the main verb
    // in the tree, this means the closest S will be the one under the root
    tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ "
            + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction
            + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form"
            + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) 
            + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR ";
    //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.")
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    List<Integer> nodeIndexes = new ArrayList<Integer>();
    List<Integer> parentIDs = new ArrayList<Integer>();

    while (matcher.find()) {
        //store the parents' IDs (in the tree)
        parent = matcher.getNode("parent");
        parentIDs.add(parent.nodeNumber(input.getIntermediateTree()));

        conjoinedNode = matcher.getNode("child");
        //store the conjoined nodes' index into their parent's list of children
        int idx = parent.objectIndexOf(conjoinedNode);
        if (!nodeIndexes.contains(idx))
            nodeIndexes.add(idx);
    }

    //for each of the conjoined children,
    //create a new tree by removing all the nodes they are conjoined with
    Collections.sort(nodeIndexes);//sort, just to keep them in the original order
    for (int i = 0; i < nodeIndexes.size(); i++) {
        newQuestion = input.deeperCopy();

        Tree t = newQuestion.getIntermediateTree();
        parent = t.getNodeNumber(parentIDs.get(i));
        Tree gparent = parent.parent(t);
        conjoinedNode = parent.getChild(nodeIndexes.get(i));
        String siblingLabel;

        //Remove all the nodes that are conjoined
        //with the selected noun (or are conjunctions, commas).
        //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons
        for (int j = 0; j < parent.numChildren(); j++) {
            if (parent.getChild(j) == conjoinedNode)
                continue;
            siblingLabel = parent.getChild(j).label().toString();
            if (siblingLabel.matches("^[NCP,:S].*")) {
                parent.removeChild(j);
                j--;
            }
        }

        //if there is an trivial unary "NP -> NP",
        //remove the parent and put the child in its place
        if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) {
            int tmpIndex = gparent.objectIndexOf(parent);
            gparent.removeChild(tmpIndex);
            gparent.addChild(tmpIndex, parent.getChild(0));
        }

        correctTense(conjoinedNode, gparent);
        addQuotationMarksIfNeeded(newQuestion.getIntermediateTree());

        //if(GlobalProperties.getDebug()) System.err.println("extractConjoinedNPs: "+newQuestion.getIntermediateTree().toString());
        if (this.getComputeFeatures)
            newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name
        if (this.getComputeFeatures)
            newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0);
        extracted.add(newQuestion);
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John, who is a friend of mine, likes Susan. -> John is a friend of mine.
 * /*from w  w w . j  a  v  a 2s  .  co m*/
 */
private void extractNonRestrictiveRelativeClauses(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;
    TregexMatcher matcherclause;

    tregexOpStr = "NP=np < (SBAR=sbar [ < (WHADVP=wherecomp < (WRB < where)) " + " | < (WHNP !< /WP\\$/) "
            + " | < (WHNP=possessive < /WP\\$/)" //John, whose car was
            + " | < (WHPP < IN|TO=preposition) ] $-- NP $- /,/ " + " < S=relclause  !< WHADJP)";

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());

    //iterate over all the relative clauses in the input
    //and create an output sentence for each one.
    while (matcher.find()) {
        Tree missingArgumentTree = matcher.getNode("np");
        Tree relclause = matcher.getNode("relclause");
        if (missingArgumentTree == null || relclause == null)
            continue;
        missingArgumentTree = missingArgumentTree.deepCopy();
        relclause = relclause.deepCopy();
        Tree possessive = matcher.getNode("possessive");
        Tree sbar = matcher.getNode("sbar").deepCopy();

        makeDeterminerDefinite(missingArgumentTree);

        if (possessive != null) {
            possessive = possessive.deepCopy();
            possessive.removeChild(0);
            String newTree = "(NP (NP " + missingArgumentTree.toString() + " (POS 's))";
            for (int i = 0; i < possessive.numChildren(); i++)
                newTree += possessive.getChild(i).toString() + " ";
            newTree += ")";
            missingArgumentTree = QuestionUtil.readTreeFromString(newTree);
        }

        //remove the relative clause and the commas surrounding it from the missing argument tree
        for (int i = 0; i < missingArgumentTree.numChildren(); i++) {
            if (missingArgumentTree.getChild(i).equals(sbar)) {
                //remove the relative clause
                missingArgumentTree.removeChild(i);
                //remove the comma after the relative clause
                if (i < missingArgumentTree.numChildren()
                        && missingArgumentTree.getChild(i).label().toString().equals(",")) {
                    missingArgumentTree.removeChild(i);
                }
                //remove the comma before the relative clause
                if (i > 0 && missingArgumentTree.getChild(i - 1).label().toString().equals(",")) {
                    missingArgumentTree.removeChild(i - 1);
                    i--;
                }
                i--;
            }
        }

        //put the noun in the clause at the topmost place with an opening for a noun. 
        //Note that this may mess up if there are noun phrase adjuncts like "The man I met Tuesday".

        //specifically: 
        //the parent of the noun can be either a clause (S) as in "The man who met me"
        //or a verb phrase as in "The man who I met".
        //for verb phrases, add the noun to the end since it will be an object.
        //for clauses, add the noun to the beginning since it will be the subject.
        tregexOpStr = "S|VP=newparent !< NP < (VP=verb !< TO !$ TO)";
        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        matcherclause = matchPattern.matcher(relclause);
        boolean subjectMovement = true;
        if (!matcherclause.find()) {
            tregexOpStr = "VP=newparent !< VP < /VB.*/=verb !>> (S !< NP) !<< (VP !< VP !< NP)";
            matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
            matcherclause = matchPattern.matcher(relclause);
            subjectMovement = false;
        }

        //reset (so the first match isn't skipped)
        matcherclause = matchPattern.matcher(relclause);

        if (matcherclause.find()) {
            Tree newparenttree = matcherclause.getNode("newparent");
            Tree verbtree = matcherclause.getNode("verb");
            boolean ppRelativeClause = false;

            if (matcher.getNode("wherecomp") != null) {
                String tmp = "(PP (IN at) " + missingArgumentTree.toString() + ")";
                missingArgumentTree = QuestionUtil.readTreeFromString(tmp);
                ppRelativeClause = true;
                subjectMovement = false;
            } else if (matcher.getNode("preposition") != null) {
                String tmp = "(PP (IN " + matcher.getNode("preposition").yield().toString() + ") "
                        + missingArgumentTree.toString() + ")";
                missingArgumentTree = QuestionUtil.readTreeFromString(tmp);
                ppRelativeClause = true;
            }

            if (subjectMovement) { //subject
                newparenttree.addChild(newparenttree.objectIndexOf(verbtree), missingArgumentTree);
            } else { // newparentlabel is VP   
                if (ppRelativeClause)
                    newparenttree.addChild(newparenttree.numChildren(), missingArgumentTree);
                else
                    newparenttree.addChild(newparenttree.objectIndexOf(verbtree) + 1, missingArgumentTree);
            }

            //create a new tree with punctuation
            Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
            newTree.addChild(relclause);
            QuestionUtil.addPeriodIfNeeded(newTree);

            //if(GlobalProperties.getDebug()) System.err.println("extractRelativeClauses: "+ newTree.toString());
            addQuotationMarksIfNeeded(newTree);
            Question newTreeWithFeatures = input.deeperCopy();
            newTreeWithFeatures.setIntermediateTree(newTree);
            if (this.getComputeFeatures)
                newTreeWithFeatures.setFeatureValue("extractedFromRelativeClause", 1.0);
            addIfNovel(extracted, newTreeWithFeatures);
        }
    }
}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 *
 * This method removes the answer phrase from its original position
 * and places it at the front of the main clause.
 *
 * Note: Tsurgeon operations are perhaps not optimal here.
 * Using the Stanford API to move nodes directly might be simpler...
 *
 */// w  w  w . jav a  2s  .com
private List<Tree> moveWHPhraseUnmovable(Tree inputTree, Tree intermediateTree, int i,
        boolean subjectMovement) {
    Tree copyTree2;
    List<Tree> res = new ArrayList<Tree>();
    Tree mainclauseNode;

    String marker = "/^(UNMOVABLE-NP|UNMOVABLE-PP|UNMOVABLE-SBAR)-" + i + "$/";

    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;

    //extract the "answer" phrase and generate a WH phrase from it
    tregexOpStr = "ROOT=root < (SQ=qclause << " + marker + "=answer < VP=predicate)";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    if (GlobalProperties.getDebug())
        System.err.println("moveWHPhrase: inputTree:" + inputTree.toString());
    if (GlobalProperties.getDebug())
        System.err.println("moveWHPhrase: tregexOpStr:" + tregexOpStr);
    TregexMatcher matcher = matchPattern.matcher(inputTree);
    matcher.find();
    Tree phraseToMove = matcher.getNode("answer");

    String whPhraseSubtree;

    if (printExtractedPhrases)
        System.out.println("EXTRACTED\t" + phraseToMove.yield().toString());

    whGen.generateWHPhraseSubtrees(removeMarkersFromTree(phraseToMove), intermediateTree.yield().toString());
    List<String> whPhraseSubtrees = whGen.getWHPhraseSubtrees();
    List<String> leftOverPrepositions = whGen.getLeftOverPrepositions();

    //copyTree = inputTree.deeperCopy();
    //The placeholder is necessary because tsurgeon will complain
    //if an added node has no children. This placeholder is removed below.
    //      ps.add(Tsurgeon.parseOperation("insert (PREPPLACEHOLDER dummy) $+ answer"));
    //      ps.add(Tsurgeon.parseOperation("prune answer"));
    //      ps.add(Tsurgeon.parseOperation("insert (SBARQ=mainclause PLACEHOLDER=placeholder) >0 root"));
    //      ps.add(Tsurgeon.parseOperation("move qclause >-1 mainclause"));
    //      p = Tsurgeon.collectOperations(ps);
    //      ops.add(new Pair<TregexPattern,TsurgeonPattern>(matchPattern,p));
    //      Tsurgeon.processPatternsOnTree(ops, copyTree);

    //copyTree = removeMarkersFromTree(copyTree);

    //Now put each WH phrase into the tree and remove the original answer.
    //Operate on the tree directly rather than using tsurgeon
    //because tsurgeon can't parse operations that insert trees with special characters (e.g., ":")
    for (int j = 0; j < whPhraseSubtrees.size(); j++) {
        copyTree2 = inputTree.deeperCopy();
        whPhraseSubtree = whPhraseSubtrees.get(j);

        //         if(GlobalProperties.getDebug()) System.err.println("moveWHPhrase: whPhraseSubtree:"+whPhraseSubtree);
        //         tregexOpStr = "ROOT < (SBARQ=mainclause < PLACEHOLDER=ph1) << (__=ph2Parent < PREPPLACEHOLDER=ph2)";
        //         matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        //         matcher = matchPattern.matcher(copyTree2);
        //         if(!matcher.find()){
        //            continue;
        //         }
        matcher = matchPattern.matcher(copyTree2);
        matcher.find();
        mainclauseNode = matcher.getNode("answer");
        if (mainclauseNode == null)
            continue;
        //replace the wh placeholder with a wh phrase
        int cc = mainclauseNode.numChildren();
        for (int c = 0; c < cc; c++)
            mainclauseNode.removeChild(0);
        mainclauseNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(whPhraseSubtree));

        copyTree2 = removeMarkersFromTree(copyTree2);
        //Replace the pp placeholder with the left over preposition.
        //This may happen when the answer phrase was a PP.
        //e.g., John went to the game. -> What did John go to?
        //         prepPlaceholderParent = matcher.getNode("ph2Parent");
        //         int index = prepPlaceholderParent.indexOf(matcher.getNode("ph2"));
        //         if(leftOverPreposition != null && leftOverPreposition.length()>0){
        //            prepPlaceholderParent.addChild(index, AnalysisUtilities.getInstance().readTreeFromString(leftOverPreposition));
        //         }
        //         //now remove the left-over-preposition placeholder
        //         ps.clear();
        //         ps.add(Tsurgeon.parseOperation("prune ph2"));
        //         p = Tsurgeon.collectOperations(ps);
        //         ops.clear();
        //         ops.add(new Pair<TregexPattern,TsurgeonPattern>(TregexPatternFactory.getPattern("PREPPLACEHOLDER=ph2"),p));
        //         Tsurgeon.processPatternsOnTree(ops, copyTree2);

        copyTree2 = moveLeadingAdjuncts(copyTree2);

        if (GlobalProperties.getDebug())
            System.err.println("moveWHPhrase: " + copyTree2.toString());
        res.add(copyTree2);
    }

    return res;
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John and James like Susan.  ->  John likes Susan.
 * /* w ww .  java 2s .c o  m*/
 */
private void extractConjoinedNPs(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    Tree conjoinedNode;
    Tree parent;

    TregexMatcher matcher;
    Question newQuestion;

    //only extract conjoined NPs that are arguments or adjuncts of the main verb
    // in the tree, this means the closest S will be the one under the root
    tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ "
            + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction
            + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form"
            + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) 
            + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR ";
    //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.")
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    List<Integer> nodeIndexes = new ArrayList<Integer>();
    List<Integer> parentIDs = new ArrayList<Integer>();

    while (matcher.find()) {
        //store the parents' IDs (in the tree)
        parent = matcher.getNode("parent");
        parentIDs.add(parent.nodeNumber(input.getIntermediateTree()));

        conjoinedNode = matcher.getNode("child");
        //store the conjoined nodes' index into their parent's list of children
        int idx = parent.indexOf(conjoinedNode);
        if (!nodeIndexes.contains(idx))
            nodeIndexes.add(idx);
    }

    //for each of the conjoined children,
    //create a new tree by removing all the nodes they are conjoined with
    Collections.sort(nodeIndexes);//sort, just to keep them in the original order
    for (int i = 0; i < nodeIndexes.size(); i++) {
        newQuestion = input.deeperCopy();

        Tree t = newQuestion.getIntermediateTree();
        parent = t.getNodeNumber(parentIDs.get(i));
        Tree gparent = parent.parent(t);
        conjoinedNode = parent.getChild(nodeIndexes.get(i));
        String siblingLabel;

        //Remove all the nodes that are conjoined
        //with the selected noun (or are conjunctions, commas).
        //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons
        for (int j = 0; j < parent.numChildren(); j++) {
            if (parent.getChild(j) == conjoinedNode)
                continue;
            siblingLabel = parent.getChild(j).label().toString();
            if (siblingLabel.matches("^[NCP,:S].*")) {
                parent.removeChild(j);
                j--;
            }
        }

        //if there is an trivial unary "NP -> NP",
        //remove the parent and put the child in its place
        if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) {
            int tmpIndex = gparent.indexOf(parent);
            gparent.removeChild(tmpIndex);
            gparent.addChild(tmpIndex, parent.getChild(0));
        }

        correctTense(conjoinedNode, gparent);
        addQuotationMarksIfNeeded(newQuestion.getIntermediateTree());

        if (GlobalProperties.getDebug())
            System.err.println("extractConjoinedNPs: " + newQuestion.getIntermediateTree().toString());
        if (GlobalProperties.getComputeFeatures())
            newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name
        if (GlobalProperties.getComputeFeatures())
            newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0);
        extracted.add(newQuestion);
    }
}

From source file:edu.cmu.ark.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John, who is a friend of mine, likes Susan. -> John is a friend of mine.
 * //from w  w  w  . jav a  2 s .  c o  m
 */
private void extractNonRestrictiveRelativeClauses(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;
    TregexMatcher matcherclause;

    tregexOpStr = "NP=np < (SBAR=sbar [ < (WHADVP=wherecomp < (WRB < where)) " + " | < (WHNP !< /WP\\$/) "
            + " | < (WHNP=possessive < /WP\\$/)" //John, whose car was
            + " | < (WHPP < IN|TO=preposition) ] $-- NP $- /,/ " + " < S=relclause  !< WHADJP)";

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());

    //iterate over all the relative clauses in the input
    //and create an output sentence for each one.
    while (matcher.find()) {
        Tree missingArgumentTree = matcher.getNode("np");
        Tree relclause = matcher.getNode("relclause");
        if (missingArgumentTree == null || relclause == null)
            continue;
        missingArgumentTree = missingArgumentTree.deeperCopy();
        relclause = relclause.deeperCopy();
        Tree possessive = matcher.getNode("possessive");
        Tree sbar = matcher.getNode("sbar").deeperCopy();

        makeDeterminerDefinite(missingArgumentTree);

        if (possessive != null) {
            possessive = possessive.deeperCopy();
            possessive.removeChild(0);
            String newTree = "(NP (NP " + missingArgumentTree.toString() + " (POS 's))";
            for (int i = 0; i < possessive.numChildren(); i++)
                newTree += possessive.getChild(i).toString() + " ";
            newTree += ")";
            missingArgumentTree = AnalysisUtilities.getInstance().readTreeFromString(newTree);
        }

        //remove the relative clause and the commas surrounding it from the missing argument tree
        for (int i = 0; i < missingArgumentTree.numChildren(); i++) {
            if (missingArgumentTree.getChild(i).equals(sbar)) {
                //remove the relative clause
                missingArgumentTree.removeChild(i);
                //remove the comma after the relative clause
                if (i < missingArgumentTree.numChildren()
                        && missingArgumentTree.getChild(i).label().toString().equals(",")) {
                    missingArgumentTree.removeChild(i);
                }
                //remove the comma before the relative clause
                if (i > 0 && missingArgumentTree.getChild(i - 1).label().toString().equals(",")) {
                    missingArgumentTree.removeChild(i - 1);
                    i--;
                }
                i--;
            }
        }

        //put the noun in the clause at the topmost place with an opening for a noun. 
        //Note that this may mess up if there are noun phrase adjuncts like "The man I met Tuesday".

        //specifically: 
        //the parent of the noun can be either a clause (S) as in "The man who met me"
        //or a verb phrase as in "The man who I met".
        //for verb phrases, add the noun to the end since it will be an object.
        //for clauses, add the noun to the beginning since it will be the subject.
        tregexOpStr = "S|VP=newparent !< NP < (VP=verb !< TO !$ TO)";
        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        matcherclause = matchPattern.matcher(relclause);
        boolean subjectMovement = true;
        if (!matcherclause.find()) {
            tregexOpStr = "VP=newparent !< VP < /VB.*/=verb !>> (S !< NP) !<< (VP !< VP !< NP)";
            matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
            matcherclause = matchPattern.matcher(relclause);
            subjectMovement = false;
        }

        //reset (so the first match isn't skipped)
        matcherclause = matchPattern.matcher(relclause);

        if (matcherclause.find()) {
            Tree newparenttree = matcherclause.getNode("newparent");
            Tree verbtree = matcherclause.getNode("verb");
            boolean ppRelativeClause = false;

            if (matcher.getNode("wherecomp") != null) {
                String tmp = "(PP (IN at) " + missingArgumentTree.toString() + ")";
                missingArgumentTree = AnalysisUtilities.getInstance().readTreeFromString(tmp);
                ppRelativeClause = true;
                subjectMovement = false;
            } else if (matcher.getNode("preposition") != null) {
                String tmp = "(PP (IN " + matcher.getNode("preposition").yield().toString() + ") "
                        + missingArgumentTree.toString() + ")";
                missingArgumentTree = AnalysisUtilities.getInstance().readTreeFromString(tmp);
                ppRelativeClause = true;
            }

            if (subjectMovement) { //subject
                newparenttree.addChild(newparenttree.indexOf(verbtree), missingArgumentTree);
            } else { // newparentlabel is VP   
                if (ppRelativeClause)
                    newparenttree.addChild(newparenttree.numChildren(), missingArgumentTree);
                else
                    newparenttree.addChild(newparenttree.indexOf(verbtree) + 1, missingArgumentTree);
            }

            //create a new tree with punctuation
            Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
            newTree.addChild(relclause);
            AnalysisUtilities.addPeriodIfNeeded(newTree);

            if (GlobalProperties.getDebug())
                System.err.println("extractRelativeClauses: " + newTree.toString());
            addQuotationMarksIfNeeded(newTree);
            Question newTreeWithFeatures = input.deeperCopy();
            newTreeWithFeatures.setIntermediateTree(newTree);
            if (GlobalProperties.getComputeFeatures())
                newTreeWithFeatures.setFeatureValue("extractedFromRelativeClause", 1.0);
            addIfNovel(extracted, newTreeWithFeatures);
        }
    }
}

From source file:edu.jhu.hlt.concrete.stanford.PreNERCoreMapWrapper.java

License:Open Source License

/**
*
* @param root//from ww  w  .j  av  a 2s. co  m
* @param left
* @param right
* @param n
*          is the length of the sentence is tokens.
* @param p
* @param tokenizationUUID
* @return The constituent ID
* @throws AnalyticException
*/
private static int constructConstituent(Tree root, int left, int right, int n, Parse p, UUID tokenizationUUID,
        HeadFinder hf) throws AnalyticException {

    Constituent constituent = new Constituent();
    constituent.setId(p.getConstituentListSize());
    constituent.setTag(root.value());
    constituent.setStart(left);
    constituent.setEnding(right);
    p.addToConstituentList(constituent);
    Tree headTree = null;
    if (!root.isLeaf()) {
        try {
            headTree = hf.determineHead(root);
        } catch (java.lang.IllegalArgumentException iae) {
            LOGGER.warn("Failed to find head, falling back on rightmost constituent.");
            headTree = root.children()[root.numChildren() - 1];
        }
    }
    int i = 0, headTreeIdx = -1;

    int leftPtr = left;
    for (Tree child : root.getChildrenAsList()) {
        int width = child.getLeaves().size();
        int childId = constructConstituent(child, leftPtr, leftPtr + width, n, p, tokenizationUUID, hf);
        constituent.addToChildList(childId);

        leftPtr += width;
        if (headTree != null && child == headTree) {
            assert (headTreeIdx < 0);
            headTreeIdx = i;
        }
        i++;
    }

    if (headTreeIdx >= 0)
        constituent.setHeadChildIndex(headTreeIdx);

    if (!constituent.isSetChildList())
        constituent.setChildList(new ArrayList<Integer>());
    return constituent.getId();
}