Example usage for edu.stanford.nlp.trees Tree toString

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree toString.

Prototype

@Override
public String toString()

Source Link

Document

Converts parse tree to string in Penn Treebank format.

Usage

From source file:Treeparse.java

public static List<Tree> GetNounPhrases(Tree parse) {

    List<Tree> phraseList = new ArrayList<Tree>();
    for (Tree subtree : parse) {
        if (subtree.label().value().equals("NP")) {
            String str = subtree.toString();

            if (str.contains("JJ") || str.contains("JJR") || str.contains("JJS") || str.contains("RB")
                    || str.contains("RBR") || str.contains("RBS")) {
                phraseList.add(subtree);

            }/*from  ww w.  j a va 2  s  .co  m*/
        }
    }

    return phraseList;

}

From source file:BuildBinarizedDataset.java

/**
 * Turns a text file into trees for use in a RNTN classifier such as
 * the treebank used in the Sentiment project.
 * <br>/*from   ww w  .ja  v  a  2  s.c  o  m*/
 * The expected input file is one sentence per line, with sentences
 * separated by blank lines. The first line has the main label of the sentence together with the full sentence.
 * Lines after the first sentence line but before
 * the blank line will be treated as labeled sub-phrases.  The
 * labels should start with the label and then contain a list of
 * tokens the label applies to. All phrases that do not have their own label will take on the main sentence label!
 *  For example:
 * <br>
 * <code>
 * 1 Today is not a good day.<br>
 * 3 good<br>
 * 3 good day <br>
 * 3 a good day <br>
 * <br>
 * (next block starts here) <br>
 * </code>
 * By default the englishPCFG parser is used.  This can be changed
 * with the <code>-parserModel</code> flag.  Specify an input file
 * with <code>-input</code>.
 * <br>
 * If a sentiment model is provided with -sentimentModel, that model
 * will be used to prelabel the sentences.  Any spans with given
 * labels will then be used to adjust those labels.
 */
public static void main(String[] arg) throws IOException {
    CollapseUnaryTransformer transformer = new CollapseUnaryTransformer();
    // FileWriter writer = new FileWriter("D:\\dataset\\train.txt", true);
    String parserModel = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
    String args[] = { "-input", "D:\\parse.txt", "-sentimentModel",
            "edu/stanford/nlp/models/sentiment/sentiment.ser.gz" };
    String inputPath = "D:\\dataset\\good.txt";

    String sentimentModelPath = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz";
    SentimentModel sentimentModel = null;

    /* for (int argIndex = 0; argIndex < args.length; ) {
       if (args[argIndex].equalsIgnoreCase("-input")) {
         inputPath = args[argIndex + 1];
         argIndex += 2;
       } else if (args[argIndex].equalsIgnoreCase("-parserModel")) {
         parserModel = args[argIndex + 1];
         argIndex += 2;
       } else if (args[argIndex].equalsIgnoreCase("-sentimentModel")) {
         sentimentModelPath = args[argIndex + 1];
         argIndex += 2;
       } else {
         System.err.println("Unknown argument " + args[argIndex]);
         System.exit(2);
       }
     }*/

    if (inputPath == null) {
        throw new IllegalArgumentException("Must specify input file with -input");
    }

    LexicalizedParser parser = LexicalizedParser.loadModel(parserModel);
    TreeBinarizer binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(),
            parser.treebankLanguagePack());

    if (sentimentModelPath != null) {
        sentimentModel = SentimentModel.loadSerialized(sentimentModelPath);
    }

    String text = IOUtils.slurpFileNoExceptions(inputPath);
    String[] chunks = text.split("\\n\\s*\\n+"); // need blank line to make a new chunk

    for (String chunk : chunks) {
        if (chunk.trim().isEmpty()) {
            continue;
        }
        // The expected format is that line 0 will be the text of the
        // sentence, and each subsequence line, if any, will be a value
        // followed by the sequence of tokens that get that value.

        // Here we take the first line and tokenize it as one sentence.
        String[] lines = chunk.trim().split("\\n");
        String sentence = lines[0];
        StringReader sin = new StringReader(sentence);
        DocumentPreprocessor document = new DocumentPreprocessor(sin);
        document.setSentenceFinalPuncWords(new String[] { "\n" });
        List<HasWord> tokens = document.iterator().next();
        Integer mainLabel = new Integer(tokens.get(0).word());
        //System.out.print("Main Sentence Label: " + mainLabel.toString() + "; ");
        tokens = tokens.subList(1, tokens.size());
        //System.err.println(tokens);

        Map<Pair<Integer, Integer>, String> spanToLabels = Generics.newHashMap();
        for (int i = 1; i < lines.length; ++i) {
            extractLabels(spanToLabels, tokens, lines[i]);
        }

        // TODO: add an option which treats the spans as constraints when parsing

        Tree tree = parser.apply(tokens);
        Tree binarized = binarizer.transformTree(tree);
        Tree collapsedUnary = transformer.transformTree(binarized);

        // if there is a sentiment model for use in prelabeling, we
        // label here and then use the user given labels to adjust
        if (sentimentModel != null) {
            Trees.convertToCoreLabels(collapsedUnary);
            SentimentCostAndGradient scorer = new SentimentCostAndGradient(sentimentModel, null);
            scorer.forwardPropagateTree(collapsedUnary);
            setPredictedLabels(collapsedUnary);
        } else {
            setUnknownLabels(collapsedUnary, mainLabel);
            //collapsedUnary.label().setValue(mainLabel.toString());
            //System.out.println("Root"+collapsedUnary.getNodeNumber(1));
        }

        Trees.convertToCoreLabels(collapsedUnary);
        collapsedUnary.indexSpans();

        for (Map.Entry<Pair<Integer, Integer>, String> pairStringEntry : spanToLabels.entrySet()) {
            setSpanLabel(collapsedUnary, pairStringEntry.getKey(), pairStringEntry.getValue());
        }
        String x = collapsedUnary.toString();
        //x.replaceAll("\\s","");
        x = x.replace("(", "[");
        x = x.replace(")", "]");
        //writer.write(x);
        //writer.write("\r\n"); 
        System.out.println(x);
        //System.out.println();
    }
    //writer.close();
}

From source file:com.mycompany.stanlp.ChildSpeech.java

/**
 * @param args the command line arguments
 *//* w w w .  j  a  v  a 2  s  .  co m*/
public static void main(String[] args) throws IOException {

    PrintWriter pw = new PrintWriter(new File("out.csv"));
    StringBuilder sb = new StringBuilder();
    // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    TreeMap<String, String[]> tm = new TreeMap<String, String[]>();
    String csvFile = "C:\\Users\\steve\\Downloads\\verbsome.csv";
    BufferedReader br = null;
    String line = "";
    String cvsSplitBy = ",";

    try {

        br = new BufferedReader(new FileReader(csvFile));
        while ((line = br.readLine()) != null) {
            System.out.println("reached");
            String[] country = line.split(cvsSplitBy);
            String[] input = new String[2];
            input[0] = country[0];
            input[1] = country[5];
            tm.put(country[4], input);

        }

    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        if (br != null) {
            try {
                br.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    for (Map.Entry<String, String[]> entry : tm.entrySet()) {
        String[] value = entry.getValue();
        Annotation document = new Annotation(value[1]);

        pipeline.annotate(document);
        List<CoreMap> sentences = document.get(SentencesAnnotation.class);

        for (CoreMap sentence : sentences) {

            for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
                // this is the text of the token
                String word = token.get(TextAnnotation.class);
                //ArrayList<CoreLabel> al = new ArrayList();
                if (word.equals(value[0])) {
                    Tree tree = sentence.get(TreeAnnotation.class);
                    //TregexPattern patternMW = TregexPattern.compile("VP([ >># VB | >># VBP | >># VBD] <<" + value[0] +
                    //       ")");
                    TregexPattern patternMW = TregexPattern
                            .compile(" VP  [ <# VB | <# VBP | <# VBD] & <<" + value[0]);
                    TregexMatcher matcher = patternMW.matcher(tree);
                    while (matcher.findNextMatchingNode()) {
                        Tree match = matcher.getMatch();
                        String tempString = tree.toString();
                        sb.append(entry.getKey());
                        sb.append(",");
                        sb.append(value[0]);
                        sb.append(",");
                        sb.append(tempString);
                        sb.append(",");
                        if (match.preTerminalYield().size() == 1) {
                            for (Label l : tree.preTerminalYield()) {
                                sb.append(l.toString());
                                sb.append("&");
                            }
                        } else {
                            for (Label l : match.preTerminalYield()) {

                                sb.append(l.toString());
                                sb.append("&");
                            }
                        }
                        sb.append(",");
                        sb.append(match.toString());
                        //sb.append(",");
                        //sb.append(token.get(PartOfSpeechAnnotation.class));
                        sb.append('\n');
                    }

                }
                // this is the POS tag of the token
                // this is the NER label of the token
                //String ne = token.get(NamedEntityTagAnnotation.class);       
            }

            //SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
        }

        //Map<Integer, edu.stanford.nlp.dcoref.CorefChain> graph = 
        //document.get(CorefChainAnnotation.class);

    }
    pw.write(sb.toString());
    pw.close();
}

From source file:coreferenceresolver.process.FeatureExtractor.java

/**
 * ************************************//from  ww w  .ja  v  a 2  s  .  co m
 * Some functions relates to String match
 *
 * **********************************
 */
//Algorithm: N2 is considered that has a similar string with N1 if:
//Main noun of N2 is the same as the main noun of N1 and
//N1 includes all Nouns, adjactives of N2
public static Boolean stringSimilarity(NounPhrase np1, NounPhrase np2, Sentence sen) {
    if (np2.getHeadNode().toString().toLowerCase().equals(np1.getHeadNode().toString().toLowerCase())) {
        for (Token token : sen.getTokens()) {
            if ((token.getOffsetBegin() >= np2.getOffsetBegin())
                    && token.getOffsetEnd() <= np2.getOffsetEnd()) {
                if ((token.getPOS().equals("JJ")) || (token.getPOS().equals("NN"))
                        || (token.getPOS().equals("NNS")) || (token.getPOS().equals("NNP"))
                        || (token.getPOS().equals("NNPS"))) {
                    boolean isConclusion = false;
                    for (Tree tree : np1.getNpNode().getLeaves()) {
                        if (tree.toString().toLowerCase().equals(token.getWord().toLowerCase())) {
                            isConclusion = true;
                        }
                    }
                    if (isConclusion == false) {
                        return false;
                    }
                }
            }
        }
        return true;

    }
    return false;
}

From source file:DependencyParser.RunStanfordParser.java

public static void dfs(Tree node, Tree parent, Pattern patt) {
    if (node == null || node.isLeaf()) {
        return;/*www .  java2 s  .c  o m*/
    }
    //if node is a NP - Get the terminal nodes to get the words in the NP      
    if (node.value().equals("NP")) {

        //   System.out.println(" Noun Phrase is ");
        /*List<Tree> leaves = node.getLeaves();
                
        for(Tree leaf : leaves) {
        System.out.println(leaf.toString());
                
        }*/
        //Matcher match = patt.matcher(a.trim());
        Matcher match = patt.matcher(node.toString().trim());
        while (match.find()) {
            System.out.println("NP: " + match.group());
        }

    }

    for (Tree child : node.children()) {
        dfs(child, node, patt);
    }

}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public ParseResult parseSentence(String sentence) {
    String result = "";
    //System.err.println(sentence);
    //see if a parser socket server is available
    int port = new Integer(GlobalProperties.getProperties().getProperty("parserServerPort", "5556"));
    String host = "127.0.0.1";
    Socket client;//from w w w .  ja  v  a 2 s .  com
    PrintWriter pw;
    BufferedReader br;
    String line;
    Tree parse = null;
    double parseScore = Double.MIN_VALUE;

    try {
        client = new Socket(host, port);

        pw = new PrintWriter(client.getOutputStream());
        br = new BufferedReader(new InputStreamReader(client.getInputStream()));
        pw.println(sentence);
        pw.flush(); //flush to complete the transmission

        while ((line = br.readLine()) != null) {
            //if(!line.matches(".*\\S.*")){
            //        System.out.println();
            //}
            if (br.ready()) {
                line = line.replaceAll("\n", "");
                line = line.replaceAll("\\s+", " ");
                result += line + " ";
            } else {
                parseScore = new Double(line);
            }
        }

        br.close();
        pw.close();
        client.close();

        if (parse == null) {
            parse = readTreeFromString("(ROOT (. .))");
            parseScore = -99999.0;
        }

        if (GlobalProperties.getDebug())
            System.err.println("result (parse):" + result);
        parse = readTreeFromString(result);
        return new ParseResult(true, parse, parseScore);

    } catch (Exception ex) {
        if (GlobalProperties.getDebug())
            System.err.println("Could not connect to parser server.");
        //ex.printStackTrace();
    }

    System.err.println("parsing:" + sentence);

    //if socket server not available, then use a local parser object
    if (parser == null) {
        try {
            Options op = new Options();
            String serializedInputFileOrUrl = GlobalProperties.getProperties().getProperty("parserGrammarFile",
                    "config" + File.separator + "englishFactored.ser.gz");
            parser = new LexicalizedParser(serializedInputFileOrUrl, op);
            int maxLength = new Integer(GlobalProperties.getProperties().getProperty("parserMaxLength", "40"))
                    .intValue();
            parser.setMaxLength(maxLength);
            parser.setOptionFlags("-outputFormat", "oneline");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    try {
        if (parser.parse(sentence)) {
            parse = parser.getBestParse();

            //remove all the parent annotations (this is a hacky way to do it)
            String ps = parse.toString().replaceAll("\\[[^\\]]+/[^\\]]+\\]", "");
            parse = AnalysisUtilities.getInstance().readTreeFromString(ps);

            parseScore = parser.getPCFGScore();
            return new ParseResult(true, parse, parseScore);
        }
    } catch (Exception e) {
    }

    parse = readTreeFromString("(ROOT (. .))");
    parseScore = -99999.0;
    return new ParseResult(false, parse, parseScore);
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * //from  ww  w  . j ava  2 s  .c  o  m
 * John studied, hoping to get a good grade. -> John hoped to get a good grade.
 * 
 * @param extracted
 * @param input
 */
private void extractVerbParticipialModifiers(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "S=sub $- /,/ !< NP < (VP=participial < VBG=verb) " + " >+(VP) (S|SINV < NP=subj) "
            + " >> (ROOT <<# /VB.*/=tense) "; //tense determined by top-most verb

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        String verbPOS = findTense(matcher.getNode("tense"));
        Tree p = matcher.getNode("participial").deepCopy();
        Tree verb = matcher.getNode("verb");
        String verbLemma = QuestionUtil.getLemma(verb.getChild(0).label().toString(), verb.label().toString());
        String newVerb = this.conjugator.getSurfaceForm(verbLemma, verbPOS);
        int verbIndex = p.objectIndexOf(verb);
        p.removeChild(verbIndex);
        p.addChild(verbIndex, QuestionUtil.readTreeFromString("(" + verbPOS + " " + newVerb + ")"));
        String treeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + p.toString() + " (. .)))";
        Tree newTree = QuestionUtil.readTreeFromString(treeStr);
        correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0));

        addQuotationMarksIfNeeded(newTree);
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromVerbParticipial", 1.0);
        if (this.getComputeFeatures)
            System.err.println("extractVerbParticipialModifiers: " + newTree.toString());
        addIfNovel(extracted, newTreeWithFeatures);
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John, who is a friend of mine, likes Susan. -> John is a friend of mine.
 * //  w w  w .  j av a 2 s. c  o  m
 */
private void extractNonRestrictiveRelativeClauses(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;
    TregexMatcher matcherclause;

    tregexOpStr = "NP=np < (SBAR=sbar [ < (WHADVP=wherecomp < (WRB < where)) " + " | < (WHNP !< /WP\\$/) "
            + " | < (WHNP=possessive < /WP\\$/)" //John, whose car was
            + " | < (WHPP < IN|TO=preposition) ] $-- NP $- /,/ " + " < S=relclause  !< WHADJP)";

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());

    //iterate over all the relative clauses in the input
    //and create an output sentence for each one.
    while (matcher.find()) {
        Tree missingArgumentTree = matcher.getNode("np");
        Tree relclause = matcher.getNode("relclause");
        if (missingArgumentTree == null || relclause == null)
            continue;
        missingArgumentTree = missingArgumentTree.deepCopy();
        relclause = relclause.deepCopy();
        Tree possessive = matcher.getNode("possessive");
        Tree sbar = matcher.getNode("sbar").deepCopy();

        makeDeterminerDefinite(missingArgumentTree);

        if (possessive != null) {
            possessive = possessive.deepCopy();
            possessive.removeChild(0);
            String newTree = "(NP (NP " + missingArgumentTree.toString() + " (POS 's))";
            for (int i = 0; i < possessive.numChildren(); i++)
                newTree += possessive.getChild(i).toString() + " ";
            newTree += ")";
            missingArgumentTree = QuestionUtil.readTreeFromString(newTree);
        }

        //remove the relative clause and the commas surrounding it from the missing argument tree
        for (int i = 0; i < missingArgumentTree.numChildren(); i++) {
            if (missingArgumentTree.getChild(i).equals(sbar)) {
                //remove the relative clause
                missingArgumentTree.removeChild(i);
                //remove the comma after the relative clause
                if (i < missingArgumentTree.numChildren()
                        && missingArgumentTree.getChild(i).label().toString().equals(",")) {
                    missingArgumentTree.removeChild(i);
                }
                //remove the comma before the relative clause
                if (i > 0 && missingArgumentTree.getChild(i - 1).label().toString().equals(",")) {
                    missingArgumentTree.removeChild(i - 1);
                    i--;
                }
                i--;
            }
        }

        //put the noun in the clause at the topmost place with an opening for a noun. 
        //Note that this may mess up if there are noun phrase adjuncts like "The man I met Tuesday".

        //specifically: 
        //the parent of the noun can be either a clause (S) as in "The man who met me"
        //or a verb phrase as in "The man who I met".
        //for verb phrases, add the noun to the end since it will be an object.
        //for clauses, add the noun to the beginning since it will be the subject.
        tregexOpStr = "S|VP=newparent !< NP < (VP=verb !< TO !$ TO)";
        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        matcherclause = matchPattern.matcher(relclause);
        boolean subjectMovement = true;
        if (!matcherclause.find()) {
            tregexOpStr = "VP=newparent !< VP < /VB.*/=verb !>> (S !< NP) !<< (VP !< VP !< NP)";
            matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
            matcherclause = matchPattern.matcher(relclause);
            subjectMovement = false;
        }

        //reset (so the first match isn't skipped)
        matcherclause = matchPattern.matcher(relclause);

        if (matcherclause.find()) {
            Tree newparenttree = matcherclause.getNode("newparent");
            Tree verbtree = matcherclause.getNode("verb");
            boolean ppRelativeClause = false;

            if (matcher.getNode("wherecomp") != null) {
                String tmp = "(PP (IN at) " + missingArgumentTree.toString() + ")";
                missingArgumentTree = QuestionUtil.readTreeFromString(tmp);
                ppRelativeClause = true;
                subjectMovement = false;
            } else if (matcher.getNode("preposition") != null) {
                String tmp = "(PP (IN " + matcher.getNode("preposition").yield().toString() + ") "
                        + missingArgumentTree.toString() + ")";
                missingArgumentTree = QuestionUtil.readTreeFromString(tmp);
                ppRelativeClause = true;
            }

            if (subjectMovement) { //subject
                newparenttree.addChild(newparenttree.objectIndexOf(verbtree), missingArgumentTree);
            } else { // newparentlabel is VP   
                if (ppRelativeClause)
                    newparenttree.addChild(newparenttree.numChildren(), missingArgumentTree);
                else
                    newparenttree.addChild(newparenttree.objectIndexOf(verbtree) + 1, missingArgumentTree);
            }

            //create a new tree with punctuation
            Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>());
            newTree.addChild(relclause);
            QuestionUtil.addPeriodIfNeeded(newTree);

            //if(GlobalProperties.getDebug()) System.err.println("extractRelativeClauses: "+ newTree.toString());
            addQuotationMarksIfNeeded(newTree);
            Question newTreeWithFeatures = input.deeperCopy();
            newTreeWithFeatures.setIntermediateTree(newTree);
            if (this.getComputeFeatures)
                newTreeWithFeatures.setFeatureValue("extractedFromRelativeClause", 1.0);
            addIfNovel(extracted, newTreeWithFeatures);
        }
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John, hoping to get a good grade, studied. -> John hoped to get a good grade.
 *   Walking to the store, John saw Susan -> John was walking to the store.
 *   /*from w w w.j av  a  2s.  c o m*/
 *   NOTE: This method produces false positives for sentences like, 
 *            "Broadly speaking, the project was successful."
 *         where the participial phrase does not modify the subject.
 *   
 * @param extracted
 * @param input
 */
private void extractNounParticipialModifiers(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "ROOT < (S " + " [ << (NP < (NP=subj  $++ (/,/ $+ (VP=modifier <# VBN|VBG|VP=tense )))) " //modifiers that appear after nouns
            + " | < (S !< NP|SBAR < (VP=modifier <# VBN|VBG|VP=tense) $+ (/,/ $+ NP=subj)) " //modifiers before the subject. e.g., Founded by John, the company...
            + " | < (SBAR < (S !< NP|SBAR < (VP=modifier <# VBN|VBG=tense)) $+ (/,/ $+ NP=subj)) " //e.g., While walking to the store, John saw Susan.
            + " | < (PP=modifier !< NP <# VBG=tense $+ (/,/ $+ NP=subj)) ] ) " // e.g., Walking to the store, John saw Susan.
            + " <<# /^VB.*$/=maintense "; //tense determined by top-most verb

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    while (matcher.find()) {
        Tree nountree = matcher.getNode("subj").deepCopy();
        Tree vptree = matcher.getNode("modifier");
        Tree verb = matcher.getNode("tense");
        makeDeterminerDefinite(nountree);

        if (vptree.label().toString().equals("PP"))
            vptree.label().setValue("VP");
        String verbPOS = findTense(matcher.getNode("maintense"));
        if (vptree == null || nountree == null)
            return;

        String newTreeStr;
        if (verb.label().toString().equals("VBG")) {
            //for present partcipials, change the tense to the tense of the main verb
            //e.g., walking to the store -> walked to the store
            String verbLemma = QuestionUtil.getLemma(verb.getChild(0).label().toString(),
                    verb.label().toString());
            String newVerb = this.conjugator.getSurfaceForm(verbLemma, verbPOS);
            int verbIndex = vptree.objectIndexOf(verb);
            vptree = vptree.deepCopy();
            vptree.removeChild(verbIndex);
            vptree.addChild(verbIndex, QuestionUtil.readTreeFromString("(" + verbPOS + " " + newVerb + ")"));
            newTreeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + vptree.toString()
                    + " (. .)))";
        } else {
            //for past participials, add a copula
            //e.g., John, exhausted, -> John was exhausted
            //(or for conjunctions, just add the copula---kind of a hack to make the moby dick sentence work out)
            String auxiliary;
            if (verbPOS.equals("VBP") || verbPOS.equals("VBD")) {
                if (isPlural(nountree))
                    auxiliary = "(VBD were)";
                else
                    auxiliary = "(VBD was)";
            } else {
                if (isPlural(nountree))
                    auxiliary = "(VB are)";
                else
                    auxiliary = "(VBZ is)";
            }

            newTreeStr = "(ROOT (S " + nountree + " (VP " + auxiliary + " " + vptree + ") (. .)))";
        }

        Tree newTree = QuestionUtil.readTreeFromString(newTreeStr);
        correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0));
        addQuotationMarksIfNeeded(newTree);

        //if(GlobalProperties.getDebug()) System.err.println("extractNounParticipialModifiers: "+ newTree.toString());
        Question newTreeWithFeatures = input.deeperCopy();
        newTreeWithFeatures.setIntermediateTree(newTree);
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); //old feature name
        if (this.getComputeFeatures)
            newTreeWithFeatures.setFeatureValue("extractedFromNounParticipial", 1.0);
        extracted.add(newTreeWithFeatures);
    }

}

From source file:edu.cmu.ark.QuestionTransducer.java

License:Open Source License

/**
 *
 * This method marks phrases in the tree that should not undergo WH movement
 * and become answers to questions, either due to syntactic
 * constraints or some conservative restrictions used to avoid
 * particular constructions that the system is not designed to handle.
 *
 * E.g.,// w ww.j  a  va  2 s . co m
 * Sentence: Darwin studied how SPECIES evolve.
 * Avoided Question: * What did Darwin study how evolve?
 *
 */
private Tree markUnmovablePhrasesFull(Tree inputTree) {
    Tree copyTree = inputTree.deeperCopy();

    //adjunct clauses under verb phrases (following commas)
    markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (VP < (S=unmovable $,, /,/))");

    //anything under a sentence level subordinate clause
    markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root < (S < PP|ADJP|ADVP|S|SBAR=unmovable)");

    //anything under a phrase directly dominating a conjunction
    markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (/\\.*/ < CC << NP|ADJP|VP|ADVP|PP=unmovable)");

    //adjunct clauses -- assume subordinate clauses that have a complementizer other than "that" (or empty) are adjuncts
    markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (SBAR < (IN|DT < /[^that]/) << NP|PP=unmovable)");

    //anything under a WH phrase
    markNodesAsUnmovableUsingPattern(copyTree,
            "ROOT=root << (SBAR < /^WH.*P$/ << NP|ADJP|VP|ADVP|PP=unmovable)");

    //"Complementizer-trace effect"
    //the subject of a complement phrase when an explicit complementizer is present (e.g., I knew that JOHN ran.)
    markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (SBAR <, IN|DT < (S < (NP=unmovable !$,, VP)))");

    //anything under a clause that is a predicate nominative (e.g., my favorite activity is to run in THE PARK)
    markNodesAsUnmovableUsingPattern(copyTree,
            "ROOT=root << (S < (VP <+(VP) (VB|VBD|VBN|VBZ < be|being|been|is|are|was|were|am) <+(VP) (S << NP|ADJP|VP|ADVP|PP=unmovable)))");

    //objects of prepositional phrases with prepositions other than "of" or "about".
    //"of" and "about" signal that the modifier is a complement rather than an adjunct.
    //allows: "John visited the capital of Alaska." -> "What did John visit the capital of?"
    //disallows: "John visited a city in Alaska." -> ? "What did John visit a city in?"
    markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (NP << (PP=unmovable !< (IN < of|about)))");

    //nested prepositional phrases of any kind
    //disallows: "Bill saw John in the hall of mirrors." -> * "What did Bill see John in the hall of?"
    markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (PP << PP=unmovable)");

    //prepositional phrases in subjects (e.g., disallows: "The capital of Alaska is Juneau." -> * "What is the capital of Juneau?")
    //Nothing can be moved out of subjects.
    //I think the generative account is that phrases can only be moved to the level of the verb
    //that governs them, and subjects (along with adjuncts) are not governed by the verb.
    markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (NP $ VP << PP=unmovable)");

    //subordinate clauses that are not complements of verbs
    markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (SBAR=unmovable [ !> VP | $-- /,/ | < RB ])");

    //adjunct subordinate clauses
    //"how", "whether", and "that" under IN or WHADVP nodes signal complements.
    //WHNP always signals a complement.
    //otherwise, the SBAR is an adjunct.
    //Note: we mark words like "where" as unmovable because they are potentially adjuncts.
    //  e.g., "he knew where it was" has a complement, but "he went to college where he grew up" has an adjunct
    markNodesAsUnmovableUsingPattern(copyTree,
            "ROOT=root << (SBAR=unmovable !< WHNP < (/^[^S].*/ !<< that|whether|how))"); //dominates a non-S node that doesn't include one of the unambiguous complementizers

    //////////////////////////////////////////////////////////////
    //MARK SOME AS UNMOVABLE TO AVOID OBVIOUSLY BAD QUESTIONS
    //

    //existential there NPs
    markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (NP=unmovable < EX)");

    //phrases in quotations
    markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (/^S/ < `` << NP|ADJP|VP|ADVP|PP=unmovable)");

    //prepositional phrases that don't have NP objects
    markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (PP=unmovable !< /.*NP/)");

    //pronouns which are the subject of complement verb phrases
    //These would nearly always lead to silly/tricky questions (e.g., "GM says its profits will fall." -> "Whose profits did GM say will fall?")
    //markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (VP < (SBAR < (S <<, (NP=unmovable < PRP))))");

    //both NPs that are under an S (MJH: we are punting on this).
    //If there are multiple NPs, one may be a temporal modifier
    markMultipleNPsAsUnmovable(copyTree);
    /////////////////////////////////////////////////////////////////

    ////////////////////////////////////////////////////////////////
    //PROPAGATE ABOVE CONSTRAINTS
    //any non-PP phrases under otherwise movable phrases (we assume movable phrases serve as islands)
    markNodesAsUnmovableUsingPattern(copyTree,
            "ROOT=root << (NP|PP|ADJP|ADVP|PP << (NP|ADJP|VP|ADVP=unmovable))");

    //anything under an unmovable node
    markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (@UNMOVABLE << NP|ADJP|VP|ADVP|PP=unmovable)");

    if (GlobalProperties.getDebug())
        System.err.println("markUnmovablePhrases: " + copyTree.toString());
    return copyTree;
}