Example usage for opennlp.tools.sentdetect SentenceModel SentenceModel

List of usage examples for opennlp.tools.sentdetect SentenceModel SentenceModel

Introduction

In this page you can find the example usage for opennlp.tools.sentdetect SentenceModel SentenceModel.

Prototype

public SentenceModel(URL modelURL) throws IOException 

Source Link

Usage

From source file:de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpSegmenter.java

@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
    super.initialize(aContext);

    sentenceModelProvider = new CasConfigurableProviderBase<SentenceDetectorME>() {
        {/* w  ww  .  ja v a 2 s  .c o  m*/
            setDefault(VERSION, "20120616.0");
            setDefault(GROUP_ID, "de.tudarmstadt.ukp.dkpro.core");
            setDefault(ARTIFACT_ID,
                    "de.tudarmstadt.ukp.dkpro.core.opennlp-model-sentence-${language}-${variant}");

            setDefault(LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/core/opennlp/lib/"
                    + "sentence-${language}-${variant}.bin");
            setDefault(VARIANT, "maxent");

            setOverride(LOCATION, modelLocation);
            setOverride(LANGUAGE, language);
            setOverride(VARIANT, variant);
        }

        @Override
        protected SentenceDetectorME produceResource(URL aUrl) throws IOException {
            InputStream is = null;
            try {
                is = aUrl.openStream();
                SentenceModel model = new SentenceModel(is);
                return new SentenceDetectorME(model);
            } finally {
                closeQuietly(is);
            }
        }
    };

    tokenModelProvider = new CasConfigurableProviderBase<TokenizerME>() {
        {
            setDefault(VERSION, "1.5");
            setDefault(GROUP_ID, "de.tudarmstadt.ukp.dkpro.core");
            setDefault(ARTIFACT_ID, "de.tudarmstadt.ukp.dkpro.core.opennlp-model-token-${language}-${variant}");

            setDefault(LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/core/opennlp/lib/"
                    + "token-${language}-${variant}.bin");
            setDefault(VARIANT, "maxent");

            setOverride(LOCATION, modelLocation);
            setOverride(LANGUAGE, language);
            setOverride(VARIANT, variant);
        }

        @Override
        protected TokenizerME produceResource(URL aUrl) throws IOException {
            InputStream is = null;
            try {
                is = aUrl.openStream();
                TokenizerModel model = new TokenizerModel(is);
                return new TokenizerME(model);
            } finally {
                closeQuietly(is);
            }
        }
    };
}

From source file:edu.stanford.muse.index.NER.java

public synchronized static void initialize() throws ClassCastException, IOException, ClassNotFoundException {
    if (pFinder != null)
        return;//www .  ja v  a2  s.  com
    long startTimeMillis = System.currentTimeMillis();
    log.info("Initializing NER models");

    try {
        InputStream pis = Config.getResourceAsStream("models/en-ner-person.bin");
        TokenNameFinderModel pmodel = new TokenNameFinderModel(pis);
        pFinder = new NameFinderME(pmodel);

        InputStream lis = Config.getResourceAsStream("models/en-ner-location.bin");
        TokenNameFinderModel lmodel = new TokenNameFinderModel(lis);
        lFinder = new NameFinderME(lmodel);

        InputStream ois = Config.getResourceAsStream("models/en-ner-organization.bin");
        TokenNameFinderModel omodel = new TokenNameFinderModel(ois);
        oFinder = new NameFinderME(omodel);
    }
    //dont bother about this, instead try not to use it
    catch (Exception e) {
        Util.print_exception(e, log);
    }
    try {
        InputStream modelIn = Config.getResourceAsStream("models/en-sent.bin");
        SentenceModel model = new SentenceModel(modelIn);
        sFinder = new SentenceDetectorME(model);

        InputStream tokenStream = Config.getResourceAsStream("models/en-token.bin");
        TokenizerModel modelTokenizer = new TokenizerModel(tokenStream);
        tokenizer = new TokenizerME(modelTokenizer);
    } catch (Exception e) {
        Util.print_exception(e);
    }

    long endTimeMillis = System.currentTimeMillis();
    log.info("Done initializing NER model in " + Util.commatize(endTimeMillis - startTimeMillis) + "ms");
}

From source file:it.uniud.ailab.dcore.wrappers.external.OpenNlpBootstrapperAnnotator.java

/**
 * Loads a sentence model or retrieves it from cache if has been already
 * loaded before./*  ww  w  . j  a v a  2  s  .c  om*/
 *
 * @param modelId the model to retrieve
 * @return the loaded model
 */
public static SentenceModel getSentenceModel(String modelId) {

    // if the model has not already been loaded, cache it
    if (!sentenceModelsCache.containsKey(modelId)) {

        // Split the text into sentences
        InputStream sentModelIn = null;
        SentenceModel sentModel = null;
        String sentPath = "";

        try {
            sentPath = databasePaths.get(modelId);
            sentModelIn = new FileInputStream(sentPath);
            sentModel = new SentenceModel(sentModelIn);
        } catch (IOException e) {
            throw new AnnotationException(new OpenNlpBootstrapperAnnotator(),
                    "Error while loading the model file \"" + sentPath + "\".", e);
        } catch (NullPointerException e) {
            throw new AnnotationException(new OpenNlpBootstrapperAnnotator(),
                    "Error while looking for the model \"" + modelId + "\".", e);
        } finally {
            if (sentModelIn != null) {
                try {
                    sentModelIn.close();
                } catch (IOException e) {
                    throw new AnnotationException(new OpenNlpBootstrapperAnnotator(),
                            "Error while loading the model file '\"" + modelId + "\".", e);
                }
            }
        }
        sentenceModelsCache.put(modelId, sentModel);
        return sentModel;
    }
    return sentenceModelsCache.get(modelId);
}

From source file:org.dbpedia.spotlight.spot.OpenNLPUtil.java

protected static BaseModel loadgivenmodeltype(OpenNlpModels m, InputStream in)
        throws InvalidFormatException, IOException {
    BaseModel mdl = null;//w ww.ja v a2 s .  c  o  m
    switch (m) {
    case TokenizerModel: {
        mdl = new TokenizerModel(in);
        LOG.debug("OpenNLP5 Tokenizer Model loaded: " + mdl);
        break;
    }
    case POSModel: {
        mdl = new POSModel(in);
        LOG.debug("OpenNLP5 POS Model loaded: " + mdl);
        break;
    }
    case SentenceModel: {
        mdl = new SentenceModel(in);
        LOG.debug("OpenNLP5 Sentence Model loaded: " + mdl);
        break;
    }
    case ChunkModel: {
        mdl = new ChunkerModel(in);
        LOG.debug("OpenNLP5 Sentence Model loaded: " + mdl);
        break;
    }
    case person:
    case organization:
    case location: {
        mdl = new TokenNameFinderModel(in);
        LOG.debug("OpenNLP5 TokenNameFinderModel Model loaded: " + mdl);
        break;
    }
    default:
        LOG.debug("Unknown Model Type!");

    }
    return mdl;
}

From source file:org.esipfed.eskg.nlp.OpenIE.java

public static void main(String[] args) throws IOException {

    SentenceDetector sentenceDetector = null;
    try {/*from ww w . jav  a2 s.c o  m*/
        // need to change this to the resource folder
        InputStream modelIn = OpenIE.class.getClassLoader().getResourceAsStream("en-sent.bin");
        final SentenceModel sentenceModel = new SentenceModel(modelIn);
        modelIn.close();
        sentenceDetector = new SentenceDetectorME(sentenceModel);
    } catch (IOException ioe) {
        LOG.error("Error either reading 'en-sent.bin' file or creating SentanceModel: ", ioe);
        throw new IOException(ioe);
    }
    edu.knowitall.openie.OpenIE openIE = new edu.knowitall.openie.OpenIE(
            new ClearParser(new ClearPostagger(new ClearTokenizer())), new ClearSrl(), false, false);

    // any text file that contains English sentences would work
    File file = FileUtils.toFile(OpenIE.class.getClassLoader().getResource("test.txt"));
    String text = readFile(file.getAbsolutePath(), StandardCharsets.UTF_8);

    if (sentenceDetector != null) {
        String[] sentences = sentenceDetector.sentDetect(text);
        for (int i = 0; i < sentences.length; i++) {

            Seq<Instance> extractions = openIE.extract(sentences[i]);

            List<Instance> listExtractions = JavaConversions.seqAsJavaList(extractions);

            for (Instance instance : listExtractions) {
                StringBuilder sb = new StringBuilder();

                sb.append(instance.confidence()).append('\t').append(instance.extr().context()).append('\t')
                        .append(instance.extr().arg1().text()).append('\t').append(instance.extr().rel().text())
                        .append('\t');

                List<Argument> listArg2s = JavaConversions.seqAsJavaList(instance.extr().arg2s());
                for (Argument argument : listArg2s) {
                    sb.append(argument.text()).append("; ");
                }

                LOG.info(sb.toString());
            }
        }
    }

}

From source file:org.wso2.uima.collectionProccesingEngine.analysisEngines.LocationIdentifier.java

@Override
public void initialize(UimaContext ctx) throws ResourceInitializationException {
    super.initialize(ctx);
    InputStream sentenceStream = null;
    InputStream tokenizerStream = null;
    InputStream nameFinderStream = null;
    try {/* ww w.  j a v a 2s  .  c o  m*/
        sentenceStream = getContext().getResourceAsStream("SentenceModel");
        SentenceModel sentenceModel = new SentenceModel(sentenceStream);
        sentenceDetector = new SentenceDetectorME(sentenceModel);
        sentenceStream.close();
        tokenizerStream = getContext().getResourceAsStream("TokenizerModel");
        TokenizerModel tokenModel = new TokenizerModel(tokenizerStream);
        tokenizer = new TokenizerME(tokenModel);
        tokenizerStream.close();
        nameFinderStream = getContext().getResourceAsStream("TokenNameFinderModel");
        TokenNameFinderModel nameFinderModel = new TokenNameFinderModel(nameFinderStream);
        locationFinder = new NameFinderME(nameFinderModel);
        nameFinderStream.close();
    } catch (Exception e) {
        throw new ResourceInitializationException(e);
    } finally {
        IOUtils.closeQuietly(nameFinderStream);
        IOUtils.closeQuietly(tokenizerStream);
        IOUtils.closeQuietly(sentenceStream);
        logger.info(LocationIdentifier.class.getSimpleName() + " Analysis Engine initialized successfully");
    }
}