Example usage for edu.stanford.nlp.parser.common ParserGrammar getTLPParams

List of usage examples for edu.stanford.nlp.parser.common ParserGrammar getTLPParams

Introduction

In this page you can find the example usage for edu.stanford.nlp.parser.common ParserGrammar getTLPParams.

Prototype

public abstract TreebankLangParserParams getTLPParams();

Source Link

Usage

From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.CoreNlpParser.java

License:Open Source License

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    CAS cas = aJCas.getCas();/*from  w  w  w.j  a  va  2  s  .  c o m*/

    annotatorProvider.configure(cas);

    // Transfer from CAS to CoreNLP
    DKPro2CoreNlp converter = new DKPro2CoreNlp();
    converter.setPtb3Escaping(ptb3Escaping);
    converter.setQuoteBegin(quoteBegin);
    converter.setQuoteEnd(quoteEnd);
    converter.setEncoding(modelEncoding);
    converter.setReadPos(readPos);

    Annotation document = new Annotation((String) null);
    converter.convert(aJCas, document);

    // Actual processing
    ParserAnnotator annotator = annotatorProvider.getResource();
    annotator.annotate(document);

    // Get TreebankLanguagePack
    ParserGrammar parser;
    try {
        parser = (ParserGrammar) FieldUtils.readField(annotator, "parser", true);
    } catch (IllegalAccessException e) {
        throw new AnalysisEngineProcessException(e);
    }
    TreebankLanguagePack tlp = parser.getTLPParams().treebankLanguagePack();

    // Transfer back into the CAS
    if (writePos) {
        posMappingProvider.configure(cas);
        CoreNlp2DKPro.convertPOSs(aJCas, document, posMappingProvider, internStrings);
    }

    if (writeConstituent) {
        constituentMappingProvider.configure(cas);
        CoreNlp2DKPro.convertConstituents(aJCas, document, constituentMappingProvider, internStrings, tlp);
    }

    if (writePennTree) {
        CoreNlp2DKPro.convertPennTree(aJCas, document);
    }

    if (writeDependency) {
        dependencyMappingProvider.configure(cas);
        CoreNlp2DKPro.convertDependencies(aJCas, document, dependencyMappingProvider, internStrings);
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordParser.java

License:Open Source License

/**
 * Processes the given text using the StanfordParser.
 *
 * @param aJCas//from w ww.jav  a  2  s . c om
 *            the {@link JCas} to process
 * @see org.apache.uima.analysis_component.JCasAnnotator_ImplBase#process(org.apache.uima.jcas.JCas)
 */
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    modelProvider.configure(aJCas.getCas());
    posMappingProvider.configure(aJCas.getCas());
    constituentMappingProvider.configure(aJCas.getCas());

    Type typeToParse;
    if (annotationTypeToParse != null) {
        typeToParse = aJCas.getCas().getTypeSystem().getType(annotationTypeToParse);
    } else {
        typeToParse = JCasUtil.getType(aJCas, Sentence.class);
    }
    FSIterator<Annotation> typeToParseIterator = aJCas.getAnnotationIndex(typeToParse).iterator();

    // Iterator each Sentence or whichever construct to parse

    while (typeToParseIterator.hasNext()) {
        Annotation currAnnotationToParse = typeToParseIterator.next();
        List<HasWord> tokenizedSentence = new ArrayList<HasWord>();
        List<Token> tokens = new ArrayList<Token>();

        // Split sentence to tokens for annotating indexes
        for (Token token : JCasUtil.selectCovered(Token.class, currAnnotationToParse)) {
            tokenizedSentence.add(tokenToWord(token));
            tokens.add(token);
        }

        getContext().getLogger().log(FINE, tokenizedSentence.toString());
        ParserGrammar parser = modelProvider.getResource();

        Tree parseTree;
        try {
            if (tokenizedSentence.size() > maxTokens) {
                continue;
            }

            if (ptb3Escaping) {
                tokenizedSentence = CoreNlpUtils.applyPtbEscaping(tokenizedSentence, quoteBegin, quoteEnd);
            }

            // Get parse
            ParserQuery query = parser.parserQuery();
            query.parse(tokenizedSentence);
            parseTree = query.getBestParse();
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }

        // Create new StanfordAnnotator object
        StanfordAnnotator sfAnnotator = null;
        try {
            sfAnnotator = new StanfordAnnotator(new TreeWithTokens(parseTree, tokens));
            sfAnnotator.setPosMappingProvider(posMappingProvider);
            sfAnnotator.setConstituentMappingProvider(constituentMappingProvider);
        } catch (CASException e) {
            throw new AnalysisEngineProcessException(e);
        }

        // Create Penn bracketed structure annotations
        if (writePennTree) {
            sfAnnotator.createPennTreeAnnotation(currAnnotationToParse.getBegin(),
                    currAnnotationToParse.getEnd());
        }

        // Create dependency annotations
        if (writeDependency) {
            doCreateDependencyTags(parser, sfAnnotator, parseTree, tokens);
        }

        // Create constituent annotations
        if (writeConstituent) {
            sfAnnotator.createConstituentAnnotationFromTree(parser.getTLPParams().treebankLanguagePack(),
                    writePos);
        }
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordParser.java

License:Open Source License

protected void doCreateDependencyTags(ParserGrammar aParser, StanfordAnnotator sfAnnotator, Tree parseTree,
        List<Token> tokens) {
    GrammaticalStructure gs;//w w  w  .ja  va2s  .  c o  m
    try {
        TreebankLanguagePack tlp = aParser.getTLPParams().treebankLanguagePack();
        gs = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(), tlp.typedDependencyHeadFinder())
                .newGrammaticalStructure(parseTree);
    } catch (UnsupportedOperationException e) {
        // We already warned in the model provider if dependencies are not supported, so here
        // we just do nothing and skip the dependencies.
        return;
    }

    Collection<TypedDependency> dependencies = null;
    switch (mode) {
    case BASIC:
        dependencies = gs.typedDependencies(); // gs.typedDependencies(false);
        break;
    case NON_COLLAPSED:
        dependencies = gs.allTypedDependencies(); // gs.typedDependencies(true);
        break;
    case COLLAPSED_WITH_EXTRA:
        dependencies = gs.typedDependenciesCollapsed(true);
        break;
    case COLLAPSED:
        dependencies = gs.typedDependenciesCollapsed(false);
        break;
    case CC_PROPAGATED:
        dependencies = gs.typedDependenciesCCprocessed(true);
        break;
    case CC_PROPAGATED_NO_EXTRA:
        dependencies = gs.typedDependenciesCCprocessed(false);
        break;
    case TREE:
        dependencies = gs.typedDependenciesCollapsedTree();
        break;
    }

    for (TypedDependency currTypedDep : dependencies) {
        int govIndex = currTypedDep.gov().index();
        int depIndex = currTypedDep.dep().index();
        if (govIndex != 0) {
            // Stanford CoreNLP produces a dependency relation between a verb and ROOT-0 which
            // is not token at all!
            Token govToken = tokens.get(govIndex - 1);
            Token depToken = tokens.get(depIndex - 1);

            sfAnnotator.createDependencyAnnotation(currTypedDep.reln(), govToken, depToken);
        }
    }
}

From source file:ie.pars.bnc.preprocess.ProcessNLP.java

License:Open Source License

/**
 *
 * @param inputStreamFile//from w  ww . jav  a 2s.co  m
 * @param morphology
 * @param posTagger
 * @param parser
 * @return
 * @throws Exception
 */
public static StringBuilder parseBNCXML(InputStream inputStreamFile, Morphology morphology,
        MaxentTagger posTagger, ParserGrammar parser) throws Exception {
    StringBuilder results = new StringBuilder();
    int counterSent = 0;
    List<List<List<WordLemmaTag>>> parseBNCXMLTokenized = parseBNCXMLTokenized(inputStreamFile);
    for (List<List<WordLemmaTag>> xparseBNCXMLL : parseBNCXMLTokenized) {
        results.append("<p>\n");
        for (List<WordLemmaTag> para : xparseBNCXMLL) {
            if (counterSent++ % 20 == 0) {
                System.out.print(".");
            }
            results.append("<s>\n");
            List<TaggedWord> tagSentence = posTagger.tagSentence(para, true);

            Tree parseTree = parser.parse(tagSentence);

            GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(parseTree,
                    parser.treebankLanguagePack().punctuationWordRejectFilter(),
                    parser.getTLPParams().typedDependencyHeadFinder());

            Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree();
            SemanticGraph depTree = new SemanticGraph(deps);

            for (int i = 0; i < tagSentence.size(); ++i) {

                int head = -1;
                String deprel = null;
                //                    if (depTree != null) {
                Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index)
                        .collect(Collectors.toSet());
                IndexedWord node = depTree.getNodeByIndexSafe(i + 1);
                if (node != null) {
                    List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node);
                    if (!edgeList.isEmpty()) {
                        assert edgeList.size() == 1;
                        head = edgeList.get(0).getGovernor().index();
                        deprel = edgeList.get(0).getRelation().toString();
                    } else if (rootSet.contains(i + 1)) {
                        head = 0;
                        deprel = "ROOT";
                    }
                }
                //     }

                // Write the token
                TaggedWord lexHead = null;
                if (head > 0) {
                    lexHead = tagSentence.get(head - 1);
                }
                results.append(line(i + 1, tagSentence.get(i), morphology, head, deprel, lexHead)).append("\n");
            }
            results.append("</s>\n");
        }
        results.append("</p>\n");
    }
    System.out.println("");
    inputStreamFile.close();

    return results;
}

From source file:ie.pars.bnc.preprocess.ProcessNLP.java

License:Open Source License

public static void handleDependencies(Tree tree, ParserGrammar parser, String arg, OutputStream outStream,
        String commandArgs) throws IOException {
    GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(tree,
            parser.treebankLanguagePack().punctuationWordRejectFilter(),
            parser.getTLPParams().typedDependencyHeadFinder());

    Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree();
    // SemanticGraph sg = new SemanticGraph(deps);

    OutputStreamWriter osw = new OutputStreamWriter(outStream, "utf-8");
    for (TypedDependency dep : deps) {
        String t = dep.dep().word() + "\t" + dep.dep().lemma() + "\t" + dep.dep().tag() + "\t";
        System.out.println(t);// www. j  av a 2 s.co  m

        osw.write(dep.toString());
        osw.write("\n");
    }
    osw.flush();
}

From source file:ie.pars.bnc.preprocess.ProcessNLP.java

License:Open Source License

private static StringBuilder parseTheSentence(String sentence, Morphology morphology, MaxentTagger posTagger,
        ParserGrammar parser, String sid) {
    TokenizerFactory<Word> newTokenizerFactory = PTBTokenizerFactory.newTokenizerFactory();
    //        TokenizerFactory<WordLemmaTag> tokenizerFactory;
    //        TokenizerFactory<CoreLabel> factory = PTBTokenizer.factory(new CoreLabelTokenFactory() , "");
    //        TokenizerFactory<Word> factory1 = PTBTokenizer.factory();

    StringBuilder results = new StringBuilder();
    results.append("<s id='" + sid + "'>\n");

    StringReader sr = new StringReader(sentence);
    Tokenizer<Word> tokenizer = newTokenizerFactory.getTokenizer(sr);
    List<Word> tokenize = tokenizer.tokenize();

    List<TaggedWord> tagSentence = posTagger.tagSentence(tokenize);

    Tree parseTree = parser.parse(tagSentence);

    GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(parseTree,
            parser.treebankLanguagePack().punctuationWordRejectFilter(),
            parser.getTLPParams().typedDependencyHeadFinder());

    Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree();
    SemanticGraph depTree = new SemanticGraph(deps);

    for (int i = 0; i < tagSentence.size(); ++i) {

        int head = -1;
        String deprel = null;/*w w w .  j  a v a2s  .c  o m*/
        //                    if (depTree != null) {
        Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index).collect(Collectors.toSet());
        IndexedWord node = depTree.getNodeByIndexSafe(i + 1);
        if (node != null) {
            List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node);
            if (!edgeList.isEmpty()) {
                assert edgeList.size() == 1;
                head = edgeList.get(0).getGovernor().index();
                deprel = edgeList.get(0).getRelation().toString();
            } else if (rootSet.contains(i + 1)) {
                head = 0;
                deprel = "ROOT";
            }
        }
        //     }

        // Write the token
        TaggedWord lexHead = null;
        if (head > 0) {
            lexHead = tagSentence.get(head - 1);
        }
        results.append(line(i + 1, tagSentence.get(i), morphology, head, deprel, lexHead)).append("\n");
    }
    results.append("</s>\n");
    return results;
}