List of usage examples for edu.stanford.nlp.trees GrammaticalStructure getRoots
public static Collection<TypedDependency> getRoots(Collection<TypedDependency> list)
From source file:com.search.MySearchHandler.java
License:Apache License
private static ParsedQuestion parseQues(String str) { // String str = // "what is the caption of European Council of Religious Leaders ? "; TermTokenStream queryStream = new TermTokenStream((String) null); String[] strlist = corelabeltostr(tokenizerFactory.getTokenizer(new StringReader(str)).tokenize()); queryStream.append(strlist);/* w ww.j av a 2s .com*/ Iterator<List> it = classifier.classify(queryStream.toSingleString()).iterator(); boolean NameFound = false; String buffer = ""; String finalstr = ""; while (it.hasNext()) { Iterator<CoreLabel> itr = it.next().iterator(); while (itr.hasNext()) { if (!itr.next().get(CoreAnnotations.AnswerAnnotation.class).equals("O")) { NameFound = true; } } } finalstr = compressNames(queryStream); System.out.println(finalstr); Tree parse = null; parse = lp.apply(tokenizerFactory.getTokenizer(new StringReader(queryStream.toNLPString())).tokenize()); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); // detect Answer Type START String whclause = null; for (int i = 0; i < queryStream.length(); i++) { if (gs.getNodeByIndex(i + 1).parent().value().equals("WRB") || (gs.getNodeByIndex(i + 1).parent().value().equals("WP")) || (gs.getNodeByIndex(i + 1).parent().value().equals("WDT"))) { whclause = queryStream.index(i); } } // detect Answer Type END if (whclause != null) { List<TypedDependency> tdl = getTyped(queryStream.toNLPString()); System.out.println(tdl); // Compress based on Noun Compounds and Adjectives , Determiners String compressedstr = compress(queryStream); System.out.println(compressedstr); // Compress based on Caps in letters. String comstr = compressCaps(queryStream); System.out.println(comstr); parse = lp.apply(tokenizerFactory.getTokenizer(new StringReader(comstr)).tokenize()); GrammaticalStructure gs2 = gsf.newGrammaticalStructure(parse); List<TypedDependency> tdl2 = gs2.typedDependenciesCCprocessed(); Iterator<TypedDependency> itlist = GrammaticalStructure.getRoots(tdl2).iterator(); // System.out.println(queryStream.toNLPString()); // Identify ROOT in the Grammatical Structure String multi[][] = new String[queryStream.length()][queryStream.length()]; int root = 0; while (itlist.hasNext()) { int tr = itlist.next().dep().index() - 1; root = tr; multi[tr][tr] = "root"; } // Fill the Array based on Relations ListIterator<TypedDependency> tdllist = tdl2.listIterator(); while (tdllist.hasNext()) { TypedDependency tr = tdllist.next(); int govin = tr.gov().index() - 1; int depin = tr.dep().index() - 1; if (govin >= 0) { // Have to fix the multiple words in the sequence issue multi[govin][depin] = tr.reln().toString(); } } // PRINT ARRAY // for (int i = 0; i < queryStream.length(); i++) { // for (int j = 0; j < queryStream.length(); j++) { // System.out.print((multi[i][j] == null ? "-" : multi[i][j]) // + " "); // } // System.out.println(); // } // parse.pennPrint(); parse = lp.apply(tokenizerFactory.getTokenizer(new StringReader(str)).tokenize()); List<Tree> nounPhrases = new LinkedList<Tree>(); getNounPhrases(parse, nounPhrases); ListIterator<Tree> treit = nounPhrases.listIterator(); String nounstr = ""; while (treit.hasNext()) { nounstr += treit.next().value() + " "; } List<Tree> verbPhrases = new LinkedList<Tree>(); getVerbPhrases(parse, verbPhrases); ListIterator<Tree> treverbit = verbPhrases.listIterator(); String verbstr = ""; while (treverbit.hasNext()) { verbstr += treverbit.next().value() + " "; } System.out.println(nounstr); HashMap<String, String> depHashMap = new HashMap<String, String>(); HashMap<String, String> govHashMap = new HashMap<String, String>(); for (int i = 0; i < queryStream.length(); i++) { for (int j = 0; j < queryStream.length(); j++) { if (multi[i][j] != null && i != j) { if (!depHashMap.containsKey(multi[i][j]) && !govHashMap.containsKey(multi[i][j])) { depHashMap.put(multi[i][j], queryStream.index(j)); govHashMap.put(multi[i][j], queryStream.index(i)); } else if (!depHashMap.containsKey(multi[i][j] + "1") && !govHashMap.containsKey(multi[i][j] + "1")) { depHashMap.put(multi[i][j] + "1", queryStream.index(j)); govHashMap.put(multi[i][j] + "1", queryStream.index(i)); } else { depHashMap.put(multi[i][j] + "2", queryStream.index(j)); govHashMap.put(multi[i][j] + "2", queryStream.index(i)); } } } } // System.out.println(gs2.getNodeByIndex(root + 1).value()); StringBuffer sb = new StringBuffer(); for (int i = 0; i < queryStream.length(); i++) { sb.append(gs2.getNodeByIndex(i + 1).parent().value()); } // System.out.println(queryStream.length()); String template = sb.toString(); System.out.println(template); System.out.println(getQuestionType(template, queryStream)); // Initialize Required fields for Query ParsedQuestion parques = new ParsedQuestion(); parques.setWhclause(whclause); String relation = ""; TreeMap<String, Double> whtype = new TreeMap<String, Double>(); String searchname = ""; String relationkeyword = ""; switch (getQuestionType(template, queryStream)) { case "fourtype1": if (questypemap.get(whclause.toLowerCase().trim()) != null) { whtype = getWHMAP(questypemap.get(whclause.toLowerCase().trim())); if (whtype == null) { break; } // get Relation from ROOT of the sentence relationkeyword = gs2.getNodeByIndex(root + 1).value(); relation = getRelation(gs2.getNodeByIndex(root + 1).value(), whtype.firstKey().toLowerCase()); // get Name from the NN in the sentence for (int i = 0; i < queryStream.length(); i++) { if (isNoun(gs2.getNodeByIndex(i + 1).parent().value())) { searchname = queryStream.index(i); } } } break; case "fivetype1": // get WH clause from the sentence if (questypemap.get(whclause.toLowerCase().trim()) != null) { whtype = getWHMAP(questypemap.get(whclause.toLowerCase().trim())); if (whtype == null) { break; } // get Relation from ROOT of the sentence relationkeyword = gs2.getNodeByIndex(root + 1).value(); relation = getRelation(gs2.getNodeByIndex(root + 1).value(), whtype.firstKey().toLowerCase()); // get Name from the NN in the sentence for (int i = 0; i < queryStream.length(); i++) { if (isNoun(gs2.getNodeByIndex(i + 1).parent().value())) { searchname = queryStream.index(i); } } } break; case "sixtype1": // get WH clause from the sentence if (questypemap.get(whclause.toLowerCase().trim()) != null) { whtype = getWHMAP(questypemap.get(whclause.toLowerCase().trim())); if (whtype == null) { break; } // get the nsubj / nsubjpass relation in the sentence if (depHashMap.containsKey("nsubj")) { relationkeyword = depHashMap.get("nsubj"); relation = getRelation(depHashMap.get("nsubj"), whtype.firstKey().toLowerCase()); } else if (depHashMap.containsKey("nsubjpass")) { relationkeyword = depHashMap.get("nsubjpass"); relation = getRelation(depHashMap.get("nsubjpass"), whtype.firstKey().toLowerCase()); } // get the possessive dependent relation in the sentence if (depHashMap.containsKey("poss")) { searchname = depHashMap.get("poss"); } } break; case "sixtype2": // WRBJJVBDNNPVB. // get WH clause from the sentence if (questypemap.get(whclause.toLowerCase().trim()) != null) { whtype = getWHMAP(questypemap.get(whclause.toLowerCase().trim())); if (whtype == null) { break; } // get the dep relation in the sentence if (depHashMap.containsKey("dep")) { relationkeyword = depHashMap.get("dep"); relation = getRelation(depHashMap.get("dep"), whtype.firstKey().toLowerCase()); } // get the nsubj dependent relation in the sentence if (depHashMap.containsKey("nsubj")) { searchname = depHashMap.get("nsubj"); } else if (depHashMap.containsKey("nsubjpass")) { searchname = depHashMap.get("nsubjpass"); } } break; case "sixtype3": // WRBJJVBDNNPVB. // get WH clause from the sentence if (questypemap.get(whclause.toLowerCase().trim()) != null) { whtype = getWHMAP(questypemap.get(whclause.toLowerCase().trim())); if (whtype == null) { break; } // get the dep relation in the sentence if (depHashMap.containsKey("dep")) { relationkeyword = depHashMap.get("dep") + " " + govHashMap.get("dep"); relation = getRelation(depHashMap.get("dep") + " " + govHashMap.get("dep"), whtype.firstKey().toLowerCase()); } // get the nsubj dependent relation in the sentence if (depHashMap.containsKey("nsubj")) { searchname = depHashMap.get("nsubj"); } else if (depHashMap.containsKey("nsubjpass")) { searchname = depHashMap.get("nsubjpass"); } } break; case "sixtype4": // WRBVBDNNPVBNN. // when did CatchMeIfYouCan hit theboxoffice ? // not written yet // get WH clause from the sentence if (questypemap.get(whclause.toLowerCase().trim()) != null) { whtype = getWHMAP(questypemap.get(whclause.toLowerCase().trim())); if (whtype == null) { break; } // get the dep relation in the sentence if (depHashMap.containsKey("dobj")) { relationkeyword = govHashMap.get("dobj") + " " + depHashMap.get("dobj"); relation = getRelation(govHashMap.get("dobj") + " " + depHashMap.get("dobj"), whtype.firstKey().toLowerCase()); } // get the nsubj dependent relation in the sentence if (depHashMap.containsKey("nsubj")) { searchname = depHashMap.get("nsubj"); } else if (depHashMap.containsKey("nsubjpass")) { searchname = depHashMap.get("nsubjpass"); } } break; case "sixtype5": // WPVBDNNINNNP. // who was thedirector of AClockworkOrange ? if (questypemap.get(whclause.toLowerCase().trim()) != null) { whtype = getWHMAP(questypemap.get(whclause.toLowerCase().trim())); if (whtype == null) { break; } // get the nsubj relation in the sentence if (depHashMap.containsKey("nsubj")) { relationkeyword = depHashMap.get("nsubj"); relation = getRelation(depHashMap.get("nsubj"), whtype.firstKey().toLowerCase()); } else if (depHashMap.containsKey("nsubjpass")) { relationkeyword = depHashMap.get("nsubjpass"); relation = getRelation(depHashMap.get("nsubjpass"), whtype.firstKey().toLowerCase()); } // get the nsubj dependent relation in the sentence if (depHashMap.containsKey("prep_" + queryStream.index(3).trim())) { searchname = depHashMap.get("prep_" + queryStream.index(3).trim()); } } break; case "seventype1": // WPVBPNNSVBNINNNP. // what are thenotableinstruments played by SamCollins ? if (questypemap.get(whclause.toLowerCase().trim()) != null) { whtype = getWHMAP(questypemap.get(whclause.toLowerCase().trim())); if (whtype == null) { break; } // get the nsubj relation in the sentence if (depHashMap.containsKey("nsubj")) { relationkeyword = depHashMap.get("nsubj") + " " + govHashMap.get("nsubj"); relation = getRelation(depHashMap.get("nsubj") + " " + govHashMap.get("nsubj"), whtype.firstKey().toLowerCase()); } else if (depHashMap.containsKey("nsubjpass")) { relationkeyword = depHashMap.get("nsubjpass") + " " + govHashMap.get("nsubjpass"); relation = getRelation(depHashMap.get("nsubjpass") + " " + govHashMap.get("nsubjpass"), whtype.firstKey().toLowerCase()); } // get the nsubj dependent relation in the sentence if (depHashMap.containsKey("agent")) { searchname = depHashMap.get("agent"); } } break; case "": // WPVBDNNINNNP. // who was thedirector of AClockworkOrange ? if (questypemap.get(whclause.toLowerCase().trim()) != null) { whtype = getWHMAP(questypemap.get(whclause.toLowerCase().trim())); if (whtype == null) { break; } int s = identifylastleaf(multi, root, 0); if (s != 0) { searchname = queryStream.index(s); relationkeyword = getWords(s, queryStream, gs2); } else { searchname = nounstr; relationkeyword = verbstr + " " + nounstr; } // exclude s and 0 and get all the words // get the nsubj relation in the sentence relationkeyword = getWords(s, queryStream, gs2); for (int i = 0; i < queryStream.length(); i++) { for (int j = 0; j < queryStream.length(); j++) { if (multi[i][j] != null && i != j) { System.out.println(multi[i][j] + ":" + type(multi[i][j])); System.out.println(queryStream.index(i) + " --> " + queryStream.index(j)); } } } } break; default: break; } // System.out.println("WH clause : "+whclause); // System.out.println("WH Type : "+whtype); // System.out.println("Relation : "+relation); // System.out.println("Relation Key word : "+ relationkeyword); // System.out.println("Search Name:"+searchname); parques.setRelationKeyWord(relationkeyword); parques.setSearchName(searchname); parques.setWhtype(whtype); if (whtype == null || searchname == null || relationkeyword == null) { for (int i = 0; i < queryStream.length(); i++) { for (int j = 0; j < queryStream.length(); j++) { if (multi[i][j] != null && i != j) { System.out.println(multi[i][j] + ":" + type(multi[i][j])); System.out.println(queryStream.index(i) + " --> " + queryStream.index(j)); } } } } return parques; } return null; }