List of usage examples for edu.stanford.nlp.parser.common ParserGrammar getTLPParams
public abstract TreebankLangParserParams getTLPParams();
From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.CoreNlpParser.java
License:Open Source License
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { CAS cas = aJCas.getCas();/*from w w w.j a va 2 s . c o m*/ annotatorProvider.configure(cas); // Transfer from CAS to CoreNLP DKPro2CoreNlp converter = new DKPro2CoreNlp(); converter.setPtb3Escaping(ptb3Escaping); converter.setQuoteBegin(quoteBegin); converter.setQuoteEnd(quoteEnd); converter.setEncoding(modelEncoding); converter.setReadPos(readPos); Annotation document = new Annotation((String) null); converter.convert(aJCas, document); // Actual processing ParserAnnotator annotator = annotatorProvider.getResource(); annotator.annotate(document); // Get TreebankLanguagePack ParserGrammar parser; try { parser = (ParserGrammar) FieldUtils.readField(annotator, "parser", true); } catch (IllegalAccessException e) { throw new AnalysisEngineProcessException(e); } TreebankLanguagePack tlp = parser.getTLPParams().treebankLanguagePack(); // Transfer back into the CAS if (writePos) { posMappingProvider.configure(cas); CoreNlp2DKPro.convertPOSs(aJCas, document, posMappingProvider, internStrings); } if (writeConstituent) { constituentMappingProvider.configure(cas); CoreNlp2DKPro.convertConstituents(aJCas, document, constituentMappingProvider, internStrings, tlp); } if (writePennTree) { CoreNlp2DKPro.convertPennTree(aJCas, document); } if (writeDependency) { dependencyMappingProvider.configure(cas); CoreNlp2DKPro.convertDependencies(aJCas, document, dependencyMappingProvider, internStrings); } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordParser.java
License:Open Source License
/** * Processes the given text using the StanfordParser. * * @param aJCas//from w ww.jav a 2 s . c om * the {@link JCas} to process * @see org.apache.uima.analysis_component.JCasAnnotator_ImplBase#process(org.apache.uima.jcas.JCas) */ @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { modelProvider.configure(aJCas.getCas()); posMappingProvider.configure(aJCas.getCas()); constituentMappingProvider.configure(aJCas.getCas()); Type typeToParse; if (annotationTypeToParse != null) { typeToParse = aJCas.getCas().getTypeSystem().getType(annotationTypeToParse); } else { typeToParse = JCasUtil.getType(aJCas, Sentence.class); } FSIterator<Annotation> typeToParseIterator = aJCas.getAnnotationIndex(typeToParse).iterator(); // Iterator each Sentence or whichever construct to parse while (typeToParseIterator.hasNext()) { Annotation currAnnotationToParse = typeToParseIterator.next(); List<HasWord> tokenizedSentence = new ArrayList<HasWord>(); List<Token> tokens = new ArrayList<Token>(); // Split sentence to tokens for annotating indexes for (Token token : JCasUtil.selectCovered(Token.class, currAnnotationToParse)) { tokenizedSentence.add(tokenToWord(token)); tokens.add(token); } getContext().getLogger().log(FINE, tokenizedSentence.toString()); ParserGrammar parser = modelProvider.getResource(); Tree parseTree; try { if (tokenizedSentence.size() > maxTokens) { continue; } if (ptb3Escaping) { tokenizedSentence = CoreNlpUtils.applyPtbEscaping(tokenizedSentence, quoteBegin, quoteEnd); } // Get parse ParserQuery query = parser.parserQuery(); query.parse(tokenizedSentence); parseTree = query.getBestParse(); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } // Create new StanfordAnnotator object StanfordAnnotator sfAnnotator = null; try { sfAnnotator = new StanfordAnnotator(new TreeWithTokens(parseTree, tokens)); sfAnnotator.setPosMappingProvider(posMappingProvider); sfAnnotator.setConstituentMappingProvider(constituentMappingProvider); } catch (CASException e) { throw new AnalysisEngineProcessException(e); } // Create Penn bracketed structure annotations if (writePennTree) { sfAnnotator.createPennTreeAnnotation(currAnnotationToParse.getBegin(), currAnnotationToParse.getEnd()); } // Create dependency annotations if (writeDependency) { doCreateDependencyTags(parser, sfAnnotator, parseTree, tokens); } // Create constituent annotations if (writeConstituent) { sfAnnotator.createConstituentAnnotationFromTree(parser.getTLPParams().treebankLanguagePack(), writePos); } } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordParser.java
License:Open Source License
protected void doCreateDependencyTags(ParserGrammar aParser, StanfordAnnotator sfAnnotator, Tree parseTree, List<Token> tokens) { GrammaticalStructure gs;//w w w .ja va2s . c o m try { TreebankLanguagePack tlp = aParser.getTLPParams().treebankLanguagePack(); gs = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(), tlp.typedDependencyHeadFinder()) .newGrammaticalStructure(parseTree); } catch (UnsupportedOperationException e) { // We already warned in the model provider if dependencies are not supported, so here // we just do nothing and skip the dependencies. return; } Collection<TypedDependency> dependencies = null; switch (mode) { case BASIC: dependencies = gs.typedDependencies(); // gs.typedDependencies(false); break; case NON_COLLAPSED: dependencies = gs.allTypedDependencies(); // gs.typedDependencies(true); break; case COLLAPSED_WITH_EXTRA: dependencies = gs.typedDependenciesCollapsed(true); break; case COLLAPSED: dependencies = gs.typedDependenciesCollapsed(false); break; case CC_PROPAGATED: dependencies = gs.typedDependenciesCCprocessed(true); break; case CC_PROPAGATED_NO_EXTRA: dependencies = gs.typedDependenciesCCprocessed(false); break; case TREE: dependencies = gs.typedDependenciesCollapsedTree(); break; } for (TypedDependency currTypedDep : dependencies) { int govIndex = currTypedDep.gov().index(); int depIndex = currTypedDep.dep().index(); if (govIndex != 0) { // Stanford CoreNLP produces a dependency relation between a verb and ROOT-0 which // is not token at all! Token govToken = tokens.get(govIndex - 1); Token depToken = tokens.get(depIndex - 1); sfAnnotator.createDependencyAnnotation(currTypedDep.reln(), govToken, depToken); } } }
From source file:ie.pars.bnc.preprocess.ProcessNLP.java
License:Open Source License
/** * * @param inputStreamFile//from w ww . jav a 2s.co m * @param morphology * @param posTagger * @param parser * @return * @throws Exception */ public static StringBuilder parseBNCXML(InputStream inputStreamFile, Morphology morphology, MaxentTagger posTagger, ParserGrammar parser) throws Exception { StringBuilder results = new StringBuilder(); int counterSent = 0; List<List<List<WordLemmaTag>>> parseBNCXMLTokenized = parseBNCXMLTokenized(inputStreamFile); for (List<List<WordLemmaTag>> xparseBNCXMLL : parseBNCXMLTokenized) { results.append("<p>\n"); for (List<WordLemmaTag> para : xparseBNCXMLL) { if (counterSent++ % 20 == 0) { System.out.print("."); } results.append("<s>\n"); List<TaggedWord> tagSentence = posTagger.tagSentence(para, true); Tree parseTree = parser.parse(tagSentence); GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(parseTree, parser.treebankLanguagePack().punctuationWordRejectFilter(), parser.getTLPParams().typedDependencyHeadFinder()); Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree(); SemanticGraph depTree = new SemanticGraph(deps); for (int i = 0; i < tagSentence.size(); ++i) { int head = -1; String deprel = null; // if (depTree != null) { Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index) .collect(Collectors.toSet()); IndexedWord node = depTree.getNodeByIndexSafe(i + 1); if (node != null) { List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node); if (!edgeList.isEmpty()) { assert edgeList.size() == 1; head = edgeList.get(0).getGovernor().index(); deprel = edgeList.get(0).getRelation().toString(); } else if (rootSet.contains(i + 1)) { head = 0; deprel = "ROOT"; } } // } // Write the token TaggedWord lexHead = null; if (head > 0) { lexHead = tagSentence.get(head - 1); } results.append(line(i + 1, tagSentence.get(i), morphology, head, deprel, lexHead)).append("\n"); } results.append("</s>\n"); } results.append("</p>\n"); } System.out.println(""); inputStreamFile.close(); return results; }
From source file:ie.pars.bnc.preprocess.ProcessNLP.java
License:Open Source License
public static void handleDependencies(Tree tree, ParserGrammar parser, String arg, OutputStream outStream, String commandArgs) throws IOException { GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(tree, parser.treebankLanguagePack().punctuationWordRejectFilter(), parser.getTLPParams().typedDependencyHeadFinder()); Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree(); // SemanticGraph sg = new SemanticGraph(deps); OutputStreamWriter osw = new OutputStreamWriter(outStream, "utf-8"); for (TypedDependency dep : deps) { String t = dep.dep().word() + "\t" + dep.dep().lemma() + "\t" + dep.dep().tag() + "\t"; System.out.println(t);// www. j av a 2 s.co m osw.write(dep.toString()); osw.write("\n"); } osw.flush(); }
From source file:ie.pars.bnc.preprocess.ProcessNLP.java
License:Open Source License
private static StringBuilder parseTheSentence(String sentence, Morphology morphology, MaxentTagger posTagger, ParserGrammar parser, String sid) { TokenizerFactory<Word> newTokenizerFactory = PTBTokenizerFactory.newTokenizerFactory(); // TokenizerFactory<WordLemmaTag> tokenizerFactory; // TokenizerFactory<CoreLabel> factory = PTBTokenizer.factory(new CoreLabelTokenFactory() , ""); // TokenizerFactory<Word> factory1 = PTBTokenizer.factory(); StringBuilder results = new StringBuilder(); results.append("<s id='" + sid + "'>\n"); StringReader sr = new StringReader(sentence); Tokenizer<Word> tokenizer = newTokenizerFactory.getTokenizer(sr); List<Word> tokenize = tokenizer.tokenize(); List<TaggedWord> tagSentence = posTagger.tagSentence(tokenize); Tree parseTree = parser.parse(tagSentence); GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(parseTree, parser.treebankLanguagePack().punctuationWordRejectFilter(), parser.getTLPParams().typedDependencyHeadFinder()); Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree(); SemanticGraph depTree = new SemanticGraph(deps); for (int i = 0; i < tagSentence.size(); ++i) { int head = -1; String deprel = null;/*w w w . j a v a2s .c o m*/ // if (depTree != null) { Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index).collect(Collectors.toSet()); IndexedWord node = depTree.getNodeByIndexSafe(i + 1); if (node != null) { List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node); if (!edgeList.isEmpty()) { assert edgeList.size() == 1; head = edgeList.get(0).getGovernor().index(); deprel = edgeList.get(0).getRelation().toString(); } else if (rootSet.contains(i + 1)) { head = 0; deprel = "ROOT"; } } // } // Write the token TaggedWord lexHead = null; if (head > 0) { lexHead = tagSentence.get(head - 1); } results.append(line(i + 1, tagSentence.get(i), morphology, head, deprel, lexHead)).append("\n"); } results.append("</s>\n"); return results; }