Example usage for edu.stanford.nlp.parser.common ParserGrammar parserQuery

List of usage examples for edu.stanford.nlp.parser.common ParserGrammar parserQuery

Introduction

In this page you can find the example usage for edu.stanford.nlp.parser.common ParserGrammar parserQuery.

Prototype

public abstract ParserQuery parserQuery();

Source Link

Usage

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordParser.java

License:Open Source License

/**
 * Processes the given text using the StanfordParser.
 *
 * @param aJCas// ww  w  .  j a v a2 s.co  m
 *            the {@link JCas} to process
 * @see org.apache.uima.analysis_component.JCasAnnotator_ImplBase#process(org.apache.uima.jcas.JCas)
 */
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    modelProvider.configure(aJCas.getCas());
    posMappingProvider.configure(aJCas.getCas());
    constituentMappingProvider.configure(aJCas.getCas());

    Type typeToParse;
    if (annotationTypeToParse != null) {
        typeToParse = aJCas.getCas().getTypeSystem().getType(annotationTypeToParse);
    } else {
        typeToParse = JCasUtil.getType(aJCas, Sentence.class);
    }
    FSIterator<Annotation> typeToParseIterator = aJCas.getAnnotationIndex(typeToParse).iterator();

    // Iterator each Sentence or whichever construct to parse

    while (typeToParseIterator.hasNext()) {
        Annotation currAnnotationToParse = typeToParseIterator.next();
        List<HasWord> tokenizedSentence = new ArrayList<HasWord>();
        List<Token> tokens = new ArrayList<Token>();

        // Split sentence to tokens for annotating indexes
        for (Token token : JCasUtil.selectCovered(Token.class, currAnnotationToParse)) {
            tokenizedSentence.add(tokenToWord(token));
            tokens.add(token);
        }

        getContext().getLogger().log(FINE, tokenizedSentence.toString());
        ParserGrammar parser = modelProvider.getResource();

        Tree parseTree;
        try {
            if (tokenizedSentence.size() > maxTokens) {
                continue;
            }

            if (ptb3Escaping) {
                tokenizedSentence = CoreNlpUtils.applyPtbEscaping(tokenizedSentence, quoteBegin, quoteEnd);
            }

            // Get parse
            ParserQuery query = parser.parserQuery();
            query.parse(tokenizedSentence);
            parseTree = query.getBestParse();
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }

        // Create new StanfordAnnotator object
        StanfordAnnotator sfAnnotator = null;
        try {
            sfAnnotator = new StanfordAnnotator(new TreeWithTokens(parseTree, tokens));
            sfAnnotator.setPosMappingProvider(posMappingProvider);
            sfAnnotator.setConstituentMappingProvider(constituentMappingProvider);
        } catch (CASException e) {
            throw new AnalysisEngineProcessException(e);
        }

        // Create Penn bracketed structure annotations
        if (writePennTree) {
            sfAnnotator.createPennTreeAnnotation(currAnnotationToParse.getBegin(),
                    currAnnotationToParse.getEnd());
        }

        // Create dependency annotations
        if (writeDependency) {
            doCreateDependencyTags(parser, sfAnnotator, parseTree, tokens);
        }

        // Create constituent annotations
        if (writeConstituent) {
            sfAnnotator.createConstituentAnnotationFromTree(parser.getTLPParams().treebankLanguagePack(),
                    writePos);
        }
    }
}