List of usage examples for edu.stanford.nlp.trees Tree toString
@Override
public String toString()
From source file:Treeparse.java
public static List<Tree> GetNounPhrases(Tree parse) { List<Tree> phraseList = new ArrayList<Tree>(); for (Tree subtree : parse) { if (subtree.label().value().equals("NP")) { String str = subtree.toString(); if (str.contains("JJ") || str.contains("JJR") || str.contains("JJS") || str.contains("RB") || str.contains("RBR") || str.contains("RBS")) { phraseList.add(subtree); }/*from ww w. j a va 2 s .co m*/ } } return phraseList; }
From source file:BuildBinarizedDataset.java
/** * Turns a text file into trees for use in a RNTN classifier such as * the treebank used in the Sentiment project. * <br>/*from ww w .ja v a 2 s.c o m*/ * The expected input file is one sentence per line, with sentences * separated by blank lines. The first line has the main label of the sentence together with the full sentence. * Lines after the first sentence line but before * the blank line will be treated as labeled sub-phrases. The * labels should start with the label and then contain a list of * tokens the label applies to. All phrases that do not have their own label will take on the main sentence label! * For example: * <br> * <code> * 1 Today is not a good day.<br> * 3 good<br> * 3 good day <br> * 3 a good day <br> * <br> * (next block starts here) <br> * </code> * By default the englishPCFG parser is used. This can be changed * with the <code>-parserModel</code> flag. Specify an input file * with <code>-input</code>. * <br> * If a sentiment model is provided with -sentimentModel, that model * will be used to prelabel the sentences. Any spans with given * labels will then be used to adjust those labels. */ public static void main(String[] arg) throws IOException { CollapseUnaryTransformer transformer = new CollapseUnaryTransformer(); // FileWriter writer = new FileWriter("D:\\dataset\\train.txt", true); String parserModel = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"; String args[] = { "-input", "D:\\parse.txt", "-sentimentModel", "edu/stanford/nlp/models/sentiment/sentiment.ser.gz" }; String inputPath = "D:\\dataset\\good.txt"; String sentimentModelPath = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz"; SentimentModel sentimentModel = null; /* for (int argIndex = 0; argIndex < args.length; ) { if (args[argIndex].equalsIgnoreCase("-input")) { inputPath = args[argIndex + 1]; argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-parserModel")) { parserModel = args[argIndex + 1]; argIndex += 2; } else if (args[argIndex].equalsIgnoreCase("-sentimentModel")) { sentimentModelPath = args[argIndex + 1]; argIndex += 2; } else { System.err.println("Unknown argument " + args[argIndex]); System.exit(2); } }*/ if (inputPath == null) { throw new IllegalArgumentException("Must specify input file with -input"); } LexicalizedParser parser = LexicalizedParser.loadModel(parserModel); TreeBinarizer binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(), parser.treebankLanguagePack()); if (sentimentModelPath != null) { sentimentModel = SentimentModel.loadSerialized(sentimentModelPath); } String text = IOUtils.slurpFileNoExceptions(inputPath); String[] chunks = text.split("\\n\\s*\\n+"); // need blank line to make a new chunk for (String chunk : chunks) { if (chunk.trim().isEmpty()) { continue; } // The expected format is that line 0 will be the text of the // sentence, and each subsequence line, if any, will be a value // followed by the sequence of tokens that get that value. // Here we take the first line and tokenize it as one sentence. String[] lines = chunk.trim().split("\\n"); String sentence = lines[0]; StringReader sin = new StringReader(sentence); DocumentPreprocessor document = new DocumentPreprocessor(sin); document.setSentenceFinalPuncWords(new String[] { "\n" }); List<HasWord> tokens = document.iterator().next(); Integer mainLabel = new Integer(tokens.get(0).word()); //System.out.print("Main Sentence Label: " + mainLabel.toString() + "; "); tokens = tokens.subList(1, tokens.size()); //System.err.println(tokens); Map<Pair<Integer, Integer>, String> spanToLabels = Generics.newHashMap(); for (int i = 1; i < lines.length; ++i) { extractLabels(spanToLabels, tokens, lines[i]); } // TODO: add an option which treats the spans as constraints when parsing Tree tree = parser.apply(tokens); Tree binarized = binarizer.transformTree(tree); Tree collapsedUnary = transformer.transformTree(binarized); // if there is a sentiment model for use in prelabeling, we // label here and then use the user given labels to adjust if (sentimentModel != null) { Trees.convertToCoreLabels(collapsedUnary); SentimentCostAndGradient scorer = new SentimentCostAndGradient(sentimentModel, null); scorer.forwardPropagateTree(collapsedUnary); setPredictedLabels(collapsedUnary); } else { setUnknownLabels(collapsedUnary, mainLabel); //collapsedUnary.label().setValue(mainLabel.toString()); //System.out.println("Root"+collapsedUnary.getNodeNumber(1)); } Trees.convertToCoreLabels(collapsedUnary); collapsedUnary.indexSpans(); for (Map.Entry<Pair<Integer, Integer>, String> pairStringEntry : spanToLabels.entrySet()) { setSpanLabel(collapsedUnary, pairStringEntry.getKey(), pairStringEntry.getValue()); } String x = collapsedUnary.toString(); //x.replaceAll("\\s",""); x = x.replace("(", "["); x = x.replace(")", "]"); //writer.write(x); //writer.write("\r\n"); System.out.println(x); //System.out.println(); } //writer.close(); }
From source file:com.mycompany.stanlp.ChildSpeech.java
/** * @param args the command line arguments *//* w w w . j a v a 2 s . co m*/ public static void main(String[] args) throws IOException { PrintWriter pw = new PrintWriter(new File("out.csv")); StringBuilder sb = new StringBuilder(); // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); TreeMap<String, String[]> tm = new TreeMap<String, String[]>(); String csvFile = "C:\\Users\\steve\\Downloads\\verbsome.csv"; BufferedReader br = null; String line = ""; String cvsSplitBy = ","; try { br = new BufferedReader(new FileReader(csvFile)); while ((line = br.readLine()) != null) { System.out.println("reached"); String[] country = line.split(cvsSplitBy); String[] input = new String[2]; input[0] = country[0]; input[1] = country[5]; tm.put(country[4], input); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (br != null) { try { br.close(); } catch (IOException e) { e.printStackTrace(); } } } for (Map.Entry<String, String[]> entry : tm.entrySet()) { String[] value = entry.getValue(); Annotation document = new Annotation(value[1]); pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { for (CoreLabel token : sentence.get(TokensAnnotation.class)) { // this is the text of the token String word = token.get(TextAnnotation.class); //ArrayList<CoreLabel> al = new ArrayList(); if (word.equals(value[0])) { Tree tree = sentence.get(TreeAnnotation.class); //TregexPattern patternMW = TregexPattern.compile("VP([ >># VB | >># VBP | >># VBD] <<" + value[0] + // ")"); TregexPattern patternMW = TregexPattern .compile(" VP [ <# VB | <# VBP | <# VBD] & <<" + value[0]); TregexMatcher matcher = patternMW.matcher(tree); while (matcher.findNextMatchingNode()) { Tree match = matcher.getMatch(); String tempString = tree.toString(); sb.append(entry.getKey()); sb.append(","); sb.append(value[0]); sb.append(","); sb.append(tempString); sb.append(","); if (match.preTerminalYield().size() == 1) { for (Label l : tree.preTerminalYield()) { sb.append(l.toString()); sb.append("&"); } } else { for (Label l : match.preTerminalYield()) { sb.append(l.toString()); sb.append("&"); } } sb.append(","); sb.append(match.toString()); //sb.append(","); //sb.append(token.get(PartOfSpeechAnnotation.class)); sb.append('\n'); } } // this is the POS tag of the token // this is the NER label of the token //String ne = token.get(NamedEntityTagAnnotation.class); } //SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); } //Map<Integer, edu.stanford.nlp.dcoref.CorefChain> graph = //document.get(CorefChainAnnotation.class); } pw.write(sb.toString()); pw.close(); }
From source file:coreferenceresolver.process.FeatureExtractor.java
/** * ************************************//from ww w .ja v a 2 s . co m * Some functions relates to String match * * ********************************** */ //Algorithm: N2 is considered that has a similar string with N1 if: //Main noun of N2 is the same as the main noun of N1 and //N1 includes all Nouns, adjactives of N2 public static Boolean stringSimilarity(NounPhrase np1, NounPhrase np2, Sentence sen) { if (np2.getHeadNode().toString().toLowerCase().equals(np1.getHeadNode().toString().toLowerCase())) { for (Token token : sen.getTokens()) { if ((token.getOffsetBegin() >= np2.getOffsetBegin()) && token.getOffsetEnd() <= np2.getOffsetEnd()) { if ((token.getPOS().equals("JJ")) || (token.getPOS().equals("NN")) || (token.getPOS().equals("NNS")) || (token.getPOS().equals("NNP")) || (token.getPOS().equals("NNPS"))) { boolean isConclusion = false; for (Tree tree : np1.getNpNode().getLeaves()) { if (tree.toString().toLowerCase().equals(token.getWord().toLowerCase())) { isConclusion = true; } } if (isConclusion == false) { return false; } } } } return true; } return false; }
From source file:DependencyParser.RunStanfordParser.java
public static void dfs(Tree node, Tree parent, Pattern patt) { if (node == null || node.isLeaf()) { return;/*www . java2 s .c o m*/ } //if node is a NP - Get the terminal nodes to get the words in the NP if (node.value().equals("NP")) { // System.out.println(" Noun Phrase is "); /*List<Tree> leaves = node.getLeaves(); for(Tree leaf : leaves) { System.out.println(leaf.toString()); }*/ //Matcher match = patt.matcher(a.trim()); Matcher match = patt.matcher(node.toString().trim()); while (match.find()) { System.out.println("NP: " + match.group()); } } for (Tree child : node.children()) { dfs(child, node, patt); } }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
public ParseResult parseSentence(String sentence) { String result = ""; //System.err.println(sentence); //see if a parser socket server is available int port = new Integer(GlobalProperties.getProperties().getProperty("parserServerPort", "5556")); String host = "127.0.0.1"; Socket client;//from w w w . ja v a 2 s . com PrintWriter pw; BufferedReader br; String line; Tree parse = null; double parseScore = Double.MIN_VALUE; try { client = new Socket(host, port); pw = new PrintWriter(client.getOutputStream()); br = new BufferedReader(new InputStreamReader(client.getInputStream())); pw.println(sentence); pw.flush(); //flush to complete the transmission while ((line = br.readLine()) != null) { //if(!line.matches(".*\\S.*")){ // System.out.println(); //} if (br.ready()) { line = line.replaceAll("\n", ""); line = line.replaceAll("\\s+", " "); result += line + " "; } else { parseScore = new Double(line); } } br.close(); pw.close(); client.close(); if (parse == null) { parse = readTreeFromString("(ROOT (. .))"); parseScore = -99999.0; } if (GlobalProperties.getDebug()) System.err.println("result (parse):" + result); parse = readTreeFromString(result); return new ParseResult(true, parse, parseScore); } catch (Exception ex) { if (GlobalProperties.getDebug()) System.err.println("Could not connect to parser server."); //ex.printStackTrace(); } System.err.println("parsing:" + sentence); //if socket server not available, then use a local parser object if (parser == null) { try { Options op = new Options(); String serializedInputFileOrUrl = GlobalProperties.getProperties().getProperty("parserGrammarFile", "config" + File.separator + "englishFactored.ser.gz"); parser = new LexicalizedParser(serializedInputFileOrUrl, op); int maxLength = new Integer(GlobalProperties.getProperties().getProperty("parserMaxLength", "40")) .intValue(); parser.setMaxLength(maxLength); parser.setOptionFlags("-outputFormat", "oneline"); } catch (Exception e) { e.printStackTrace(); } } try { if (parser.parse(sentence)) { parse = parser.getBestParse(); //remove all the parent annotations (this is a hacky way to do it) String ps = parse.toString().replaceAll("\\[[^\\]]+/[^\\]]+\\]", ""); parse = AnalysisUtilities.getInstance().readTreeFromString(ps); parseScore = parser.getPCFGScore(); return new ParseResult(true, parse, parseScore); } } catch (Exception e) { } parse = readTreeFromString("(ROOT (. .))"); parseScore = -99999.0; return new ParseResult(false, parse, parseScore); }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * //from ww w . j ava 2 s .c o m * John studied, hoping to get a good grade. -> John hoped to get a good grade. * * @param extracted * @param input */ private void extractVerbParticipialModifiers(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "S=sub $- /,/ !< NP < (VP=participial < VBG=verb) " + " >+(VP) (S|SINV < NP=subj) " + " >> (ROOT <<# /VB.*/=tense) "; //tense determined by top-most verb matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { String verbPOS = findTense(matcher.getNode("tense")); Tree p = matcher.getNode("participial").deepCopy(); Tree verb = matcher.getNode("verb"); String verbLemma = QuestionUtil.getLemma(verb.getChild(0).label().toString(), verb.label().toString()); String newVerb = this.conjugator.getSurfaceForm(verbLemma, verbPOS); int verbIndex = p.objectIndexOf(verb); p.removeChild(verbIndex); p.addChild(verbIndex, QuestionUtil.readTreeFromString("(" + verbPOS + " " + newVerb + ")")); String treeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + p.toString() + " (. .)))"; Tree newTree = QuestionUtil.readTreeFromString(treeStr); correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0)); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromVerbParticipial", 1.0); if (this.getComputeFeatures) System.err.println("extractVerbParticipialModifiers: " + newTree.toString()); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John, who is a friend of mine, likes Susan. -> John is a friend of mine. * // w w w . j av a 2 s. c o m */ private void extractNonRestrictiveRelativeClauses(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; TregexMatcher matcherclause; tregexOpStr = "NP=np < (SBAR=sbar [ < (WHADVP=wherecomp < (WRB < where)) " + " | < (WHNP !< /WP\\$/) " + " | < (WHNP=possessive < /WP\\$/)" //John, whose car was + " | < (WHPP < IN|TO=preposition) ] $-- NP $- /,/ " + " < S=relclause !< WHADJP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); //iterate over all the relative clauses in the input //and create an output sentence for each one. while (matcher.find()) { Tree missingArgumentTree = matcher.getNode("np"); Tree relclause = matcher.getNode("relclause"); if (missingArgumentTree == null || relclause == null) continue; missingArgumentTree = missingArgumentTree.deepCopy(); relclause = relclause.deepCopy(); Tree possessive = matcher.getNode("possessive"); Tree sbar = matcher.getNode("sbar").deepCopy(); makeDeterminerDefinite(missingArgumentTree); if (possessive != null) { possessive = possessive.deepCopy(); possessive.removeChild(0); String newTree = "(NP (NP " + missingArgumentTree.toString() + " (POS 's))"; for (int i = 0; i < possessive.numChildren(); i++) newTree += possessive.getChild(i).toString() + " "; newTree += ")"; missingArgumentTree = QuestionUtil.readTreeFromString(newTree); } //remove the relative clause and the commas surrounding it from the missing argument tree for (int i = 0; i < missingArgumentTree.numChildren(); i++) { if (missingArgumentTree.getChild(i).equals(sbar)) { //remove the relative clause missingArgumentTree.removeChild(i); //remove the comma after the relative clause if (i < missingArgumentTree.numChildren() && missingArgumentTree.getChild(i).label().toString().equals(",")) { missingArgumentTree.removeChild(i); } //remove the comma before the relative clause if (i > 0 && missingArgumentTree.getChild(i - 1).label().toString().equals(",")) { missingArgumentTree.removeChild(i - 1); i--; } i--; } } //put the noun in the clause at the topmost place with an opening for a noun. //Note that this may mess up if there are noun phrase adjuncts like "The man I met Tuesday". //specifically: //the parent of the noun can be either a clause (S) as in "The man who met me" //or a verb phrase as in "The man who I met". //for verb phrases, add the noun to the end since it will be an object. //for clauses, add the noun to the beginning since it will be the subject. tregexOpStr = "S|VP=newparent !< NP < (VP=verb !< TO !$ TO)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcherclause = matchPattern.matcher(relclause); boolean subjectMovement = true; if (!matcherclause.find()) { tregexOpStr = "VP=newparent !< VP < /VB.*/=verb !>> (S !< NP) !<< (VP !< VP !< NP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcherclause = matchPattern.matcher(relclause); subjectMovement = false; } //reset (so the first match isn't skipped) matcherclause = matchPattern.matcher(relclause); if (matcherclause.find()) { Tree newparenttree = matcherclause.getNode("newparent"); Tree verbtree = matcherclause.getNode("verb"); boolean ppRelativeClause = false; if (matcher.getNode("wherecomp") != null) { String tmp = "(PP (IN at) " + missingArgumentTree.toString() + ")"; missingArgumentTree = QuestionUtil.readTreeFromString(tmp); ppRelativeClause = true; subjectMovement = false; } else if (matcher.getNode("preposition") != null) { String tmp = "(PP (IN " + matcher.getNode("preposition").yield().toString() + ") " + missingArgumentTree.toString() + ")"; missingArgumentTree = QuestionUtil.readTreeFromString(tmp); ppRelativeClause = true; } if (subjectMovement) { //subject newparenttree.addChild(newparenttree.objectIndexOf(verbtree), missingArgumentTree); } else { // newparentlabel is VP if (ppRelativeClause) newparenttree.addChild(newparenttree.numChildren(), missingArgumentTree); else newparenttree.addChild(newparenttree.objectIndexOf(verbtree) + 1, missingArgumentTree); } //create a new tree with punctuation Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>()); newTree.addChild(relclause); QuestionUtil.addPeriodIfNeeded(newTree); //if(GlobalProperties.getDebug()) System.err.println("extractRelativeClauses: "+ newTree.toString()); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromRelativeClause", 1.0); addIfNovel(extracted, newTreeWithFeatures); } } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John, hoping to get a good grade, studied. -> John hoped to get a good grade. * Walking to the store, John saw Susan -> John was walking to the store. * /*from w w w.j av a 2s. c o m*/ * NOTE: This method produces false positives for sentences like, * "Broadly speaking, the project was successful." * where the participial phrase does not modify the subject. * * @param extracted * @param input */ private void extractNounParticipialModifiers(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "ROOT < (S " + " [ << (NP < (NP=subj $++ (/,/ $+ (VP=modifier <# VBN|VBG|VP=tense )))) " //modifiers that appear after nouns + " | < (S !< NP|SBAR < (VP=modifier <# VBN|VBG|VP=tense) $+ (/,/ $+ NP=subj)) " //modifiers before the subject. e.g., Founded by John, the company... + " | < (SBAR < (S !< NP|SBAR < (VP=modifier <# VBN|VBG=tense)) $+ (/,/ $+ NP=subj)) " //e.g., While walking to the store, John saw Susan. + " | < (PP=modifier !< NP <# VBG=tense $+ (/,/ $+ NP=subj)) ] ) " // e.g., Walking to the store, John saw Susan. + " <<# /^VB.*$/=maintense "; //tense determined by top-most verb matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree nountree = matcher.getNode("subj").deepCopy(); Tree vptree = matcher.getNode("modifier"); Tree verb = matcher.getNode("tense"); makeDeterminerDefinite(nountree); if (vptree.label().toString().equals("PP")) vptree.label().setValue("VP"); String verbPOS = findTense(matcher.getNode("maintense")); if (vptree == null || nountree == null) return; String newTreeStr; if (verb.label().toString().equals("VBG")) { //for present partcipials, change the tense to the tense of the main verb //e.g., walking to the store -> walked to the store String verbLemma = QuestionUtil.getLemma(verb.getChild(0).label().toString(), verb.label().toString()); String newVerb = this.conjugator.getSurfaceForm(verbLemma, verbPOS); int verbIndex = vptree.objectIndexOf(verb); vptree = vptree.deepCopy(); vptree.removeChild(verbIndex); vptree.addChild(verbIndex, QuestionUtil.readTreeFromString("(" + verbPOS + " " + newVerb + ")")); newTreeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + vptree.toString() + " (. .)))"; } else { //for past participials, add a copula //e.g., John, exhausted, -> John was exhausted //(or for conjunctions, just add the copula---kind of a hack to make the moby dick sentence work out) String auxiliary; if (verbPOS.equals("VBP") || verbPOS.equals("VBD")) { if (isPlural(nountree)) auxiliary = "(VBD were)"; else auxiliary = "(VBD was)"; } else { if (isPlural(nountree)) auxiliary = "(VB are)"; else auxiliary = "(VBZ is)"; } newTreeStr = "(ROOT (S " + nountree + " (VP " + auxiliary + " " + vptree + ") (. .)))"; } Tree newTree = QuestionUtil.readTreeFromString(newTreeStr); correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0)); addQuotationMarksIfNeeded(newTree); //if(GlobalProperties.getDebug()) System.err.println("extractNounParticipialModifiers: "+ newTree.toString()); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); //old feature name if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromNounParticipial", 1.0); extracted.add(newTreeWithFeatures); } }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * * This method marks phrases in the tree that should not undergo WH movement * and become answers to questions, either due to syntactic * constraints or some conservative restrictions used to avoid * particular constructions that the system is not designed to handle. * * E.g.,// w ww.j a va 2 s . co m * Sentence: Darwin studied how SPECIES evolve. * Avoided Question: * What did Darwin study how evolve? * */ private Tree markUnmovablePhrasesFull(Tree inputTree) { Tree copyTree = inputTree.deeperCopy(); //adjunct clauses under verb phrases (following commas) markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (VP < (S=unmovable $,, /,/))"); //anything under a sentence level subordinate clause markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root < (S < PP|ADJP|ADVP|S|SBAR=unmovable)"); //anything under a phrase directly dominating a conjunction markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (/\\.*/ < CC << NP|ADJP|VP|ADVP|PP=unmovable)"); //adjunct clauses -- assume subordinate clauses that have a complementizer other than "that" (or empty) are adjuncts markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (SBAR < (IN|DT < /[^that]/) << NP|PP=unmovable)"); //anything under a WH phrase markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (SBAR < /^WH.*P$/ << NP|ADJP|VP|ADVP|PP=unmovable)"); //"Complementizer-trace effect" //the subject of a complement phrase when an explicit complementizer is present (e.g., I knew that JOHN ran.) markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (SBAR <, IN|DT < (S < (NP=unmovable !$,, VP)))"); //anything under a clause that is a predicate nominative (e.g., my favorite activity is to run in THE PARK) markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (S < (VP <+(VP) (VB|VBD|VBN|VBZ < be|being|been|is|are|was|were|am) <+(VP) (S << NP|ADJP|VP|ADVP|PP=unmovable)))"); //objects of prepositional phrases with prepositions other than "of" or "about". //"of" and "about" signal that the modifier is a complement rather than an adjunct. //allows: "John visited the capital of Alaska." -> "What did John visit the capital of?" //disallows: "John visited a city in Alaska." -> ? "What did John visit a city in?" markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (NP << (PP=unmovable !< (IN < of|about)))"); //nested prepositional phrases of any kind //disallows: "Bill saw John in the hall of mirrors." -> * "What did Bill see John in the hall of?" markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (PP << PP=unmovable)"); //prepositional phrases in subjects (e.g., disallows: "The capital of Alaska is Juneau." -> * "What is the capital of Juneau?") //Nothing can be moved out of subjects. //I think the generative account is that phrases can only be moved to the level of the verb //that governs them, and subjects (along with adjuncts) are not governed by the verb. markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (NP $ VP << PP=unmovable)"); //subordinate clauses that are not complements of verbs markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (SBAR=unmovable [ !> VP | $-- /,/ | < RB ])"); //adjunct subordinate clauses //"how", "whether", and "that" under IN or WHADVP nodes signal complements. //WHNP always signals a complement. //otherwise, the SBAR is an adjunct. //Note: we mark words like "where" as unmovable because they are potentially adjuncts. // e.g., "he knew where it was" has a complement, but "he went to college where he grew up" has an adjunct markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (SBAR=unmovable !< WHNP < (/^[^S].*/ !<< that|whether|how))"); //dominates a non-S node that doesn't include one of the unambiguous complementizers ////////////////////////////////////////////////////////////// //MARK SOME AS UNMOVABLE TO AVOID OBVIOUSLY BAD QUESTIONS // //existential there NPs markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (NP=unmovable < EX)"); //phrases in quotations markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (/^S/ < `` << NP|ADJP|VP|ADVP|PP=unmovable)"); //prepositional phrases that don't have NP objects markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (PP=unmovable !< /.*NP/)"); //pronouns which are the subject of complement verb phrases //These would nearly always lead to silly/tricky questions (e.g., "GM says its profits will fall." -> "Whose profits did GM say will fall?") //markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (VP < (SBAR < (S <<, (NP=unmovable < PRP))))"); //both NPs that are under an S (MJH: we are punting on this). //If there are multiple NPs, one may be a temporal modifier markMultipleNPsAsUnmovable(copyTree); ///////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////// //PROPAGATE ABOVE CONSTRAINTS //any non-PP phrases under otherwise movable phrases (we assume movable phrases serve as islands) markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (NP|PP|ADJP|ADVP|PP << (NP|ADJP|VP|ADVP=unmovable))"); //anything under an unmovable node markNodesAsUnmovableUsingPattern(copyTree, "ROOT=root << (@UNMOVABLE << NP|ADJP|VP|ADVP|PP=unmovable)"); if (GlobalProperties.getDebug()) System.err.println("markUnmovablePhrases: " + copyTree.toString()); return copyTree; }