List of usage examples for edu.stanford.nlp.parser.common ParserGrammar parse
public abstract Tree parse(List<? extends HasWord> words);
From source file:ie.pars.bnc.preprocess.ProcessNLP.java
License:Open Source License
/** * * @param inputStreamFile//from w ww .j av a2 s . c o m * @param morphology * @param posTagger * @param parser * @return * @throws Exception */ public static StringBuilder parseBNCXML(InputStream inputStreamFile, Morphology morphology, MaxentTagger posTagger, ParserGrammar parser) throws Exception { StringBuilder results = new StringBuilder(); int counterSent = 0; List<List<List<WordLemmaTag>>> parseBNCXMLTokenized = parseBNCXMLTokenized(inputStreamFile); for (List<List<WordLemmaTag>> xparseBNCXMLL : parseBNCXMLTokenized) { results.append("<p>\n"); for (List<WordLemmaTag> para : xparseBNCXMLL) { if (counterSent++ % 20 == 0) { System.out.print("."); } results.append("<s>\n"); List<TaggedWord> tagSentence = posTagger.tagSentence(para, true); Tree parseTree = parser.parse(tagSentence); GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(parseTree, parser.treebankLanguagePack().punctuationWordRejectFilter(), parser.getTLPParams().typedDependencyHeadFinder()); Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree(); SemanticGraph depTree = new SemanticGraph(deps); for (int i = 0; i < tagSentence.size(); ++i) { int head = -1; String deprel = null; // if (depTree != null) { Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index) .collect(Collectors.toSet()); IndexedWord node = depTree.getNodeByIndexSafe(i + 1); if (node != null) { List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node); if (!edgeList.isEmpty()) { assert edgeList.size() == 1; head = edgeList.get(0).getGovernor().index(); deprel = edgeList.get(0).getRelation().toString(); } else if (rootSet.contains(i + 1)) { head = 0; deprel = "ROOT"; } } // } // Write the token TaggedWord lexHead = null; if (head > 0) { lexHead = tagSentence.get(head - 1); } results.append(line(i + 1, tagSentence.get(i), morphology, head, deprel, lexHead)).append("\n"); } results.append("</s>\n"); } results.append("</p>\n"); } System.out.println(""); inputStreamFile.close(); return results; }
From source file:ie.pars.bnc.preprocess.ProcessNLP.java
License:Open Source License
private static StringBuilder parseTheSentence(String sentence, Morphology morphology, MaxentTagger posTagger, ParserGrammar parser, String sid) { TokenizerFactory<Word> newTokenizerFactory = PTBTokenizerFactory.newTokenizerFactory(); // TokenizerFactory<WordLemmaTag> tokenizerFactory; // TokenizerFactory<CoreLabel> factory = PTBTokenizer.factory(new CoreLabelTokenFactory() , ""); // TokenizerFactory<Word> factory1 = PTBTokenizer.factory(); StringBuilder results = new StringBuilder(); results.append("<s id='" + sid + "'>\n"); StringReader sr = new StringReader(sentence); Tokenizer<Word> tokenizer = newTokenizerFactory.getTokenizer(sr); List<Word> tokenize = tokenizer.tokenize(); List<TaggedWord> tagSentence = posTagger.tagSentence(tokenize); Tree parseTree = parser.parse(tagSentence); GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(parseTree, parser.treebankLanguagePack().punctuationWordRejectFilter(), parser.getTLPParams().typedDependencyHeadFinder()); Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree(); SemanticGraph depTree = new SemanticGraph(deps); for (int i = 0; i < tagSentence.size(); ++i) { int head = -1; String deprel = null;/* w w w.j a v a2 s . c om*/ // if (depTree != null) { Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index).collect(Collectors.toSet()); IndexedWord node = depTree.getNodeByIndexSafe(i + 1); if (node != null) { List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node); if (!edgeList.isEmpty()) { assert edgeList.size() == 1; head = edgeList.get(0).getGovernor().index(); deprel = edgeList.get(0).getRelation().toString(); } else if (rootSet.contains(i + 1)) { head = 0; deprel = "ROOT"; } } // } // Write the token TaggedWord lexHead = null; if (head > 0) { lexHead = tagSentence.get(head - 1); } results.append(line(i + 1, tagSentence.get(i), morphology, head, deprel, lexHead)).append("\n"); } results.append("</s>\n"); return results; }