Example usage for opennlp.tools.postag POSModel POSModel

List of usage examples for opennlp.tools.postag POSModel POSModel

Introduction

In this page you can find the example usage for opennlp.tools.postag POSModel POSModel.

Prototype

public POSModel(URL modelURL) throws IOException 

Source Link

Usage

From source file:de.dfki.km.perspecting.obie.experiments.PhraseExperiment.java

/**
 * @throws java.lang.Exception//from www  .ja v  a  2 s  . co  m
 */
@BeforeClass
public static void setUpBeforeClass() throws Exception {

    pool.setUser($DATABASE_SERVER_USER);
    pool.setPassword($DATABASE_SERVER_PW);
    pool.setPortNumber($DATABASE_SERVER_PORT);
    pool.setDatabaseName($DATABASE);
    pool.setServerName($DATABASE_SERVER);
    pool.setMaxConnections(100);

    kb = new PostgresKB(pool.getConnection(), $DATABASE, new URI("http://test.de"));
    pipeline = new Pipeline(kb);

    LanguageIdentification languageClassification = new LanguageIdentification(Language.EN);
    WordSegmenter wordTokenizer = new WordSegmenter();
    SentenceSegmenter sentenceTokenizer = new SentenceSegmenter();

    POSModel posModel = new POSModel(Scoobie.class.getResourceAsStream("pos/en/en-pos-maxent.bin"));
    POSTagging posTagger = new POSTagging(new POSTaggerME(posModel));
    ProperNameRecognition nounPhraseChunker = new ProperNameRecognition(
            new CRFNounPhraseChunkerModel(Scoobie.class.getResourceAsStream("npc/en/EN.crf")));

    SuffixArrayBuilder suffixArrayBuilder = new SuffixArrayBuilder(100, new LiteralHashing(4));
    RDFLiteralSpotting entityRecognizer = new RDFLiteralSpotting();
    InstanceRecognition subjectResolver = new InstanceRecognition();

    pipeline.configure(languageClassification, wordTokenizer, sentenceTokenizer, posTagger, nounPhraseChunker,
            suffixArrayBuilder, entityRecognizer, new DummyTask(), new DummyTask(), subjectResolver,
            new DummyTask(), new DummyTask(), new DummyTask(), new DummyTask());

}

From source file:de.dfki.km.perspecting.obie.experiments.ProperNameExperiment.java

/**
 * @throws java.lang.Exception/*from  w w w  .j a  v a  2s.com*/
 */

public static void setUp(String databaseServer, String dataBase) throws Exception {

    pool.setUser($DATABASE_SERVER_USER);
    pool.setPassword($DATABASE_SERVER_PW);
    pool.setPortNumber($DATABASE_SERVER_PORT);
    pool.setDatabaseName(dataBase);
    pool.setServerName(databaseServer);
    pool.setMaxConnections(100);

    kb = new PostgresKB(pool.getConnection(), dataBase, new URI("http://test.de"));
    pipeline = new Pipeline(kb);

    LanguageIdentification languageClassification = new LanguageIdentification(Language.EN);
    WordSegmenter wordTokenizer = new WordSegmenter();
    SentenceSegmenter sentenceTokenizer = new SentenceSegmenter();

    POSModel posModel = new POSModel(Scoobie.class.getResourceAsStream("pos/en/en-pos-maxent.bin"));
    POSTagging posTagger = new POSTagging(new POSTaggerME(posModel));
    ProperNameRecognition nounPhraseChunker = new ProperNameRecognition(
            new CRFNounPhraseChunkerModel(Scoobie.class.getResourceAsStream("npc/en/EN.crf")));

    SuffixArrayBuilder suffixArrayBuilder = new SuffixArrayBuilder(100, new LiteralHashing(4));
    RDFLiteralSpotting entityRecognizer = new RDFLiteralSpotting();
    pipeline.configure(languageClassification, wordTokenizer, sentenceTokenizer, posTagger, nounPhraseChunker,
            suffixArrayBuilder, entityRecognizer, new DummyTask(), new DummyTask(), new DummyTask(),
            new DummyTask(), new DummyTask(), new DummyTask(), new DummyTask());

}

From source file:de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger.java

@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
    super.initialize(aContext);

    modelProvider = new CasConfigurableProviderBase<POSTagger>() {
        {//  www .  j a  v  a 2  s .c o m
            setDefault(VERSION, "20120616.0");
            setDefault(GROUP_ID, "de.tudarmstadt.ukp.dkpro.core");
            setDefault(ARTIFACT_ID,
                    "de.tudarmstadt.ukp.dkpro.core.opennlp-model-tagger-${language}-${variant}");

            setDefault(LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/core/opennlp/lib/"
                    + "tagger-${language}-${variant}.bin");
            setDefault(VARIANT, "maxent");

            setOverride(LOCATION, modelLocation);
            setOverride(LANGUAGE, language);
            setOverride(VARIANT, variant);
        }

        @Override
        protected POSTagger produceResource(URL aUrl) throws IOException {
            InputStream is = null;
            try {
                is = aUrl.openStream();
                POSModel model = new POSModel(is);

                if (printTagSet) {
                    List<String> tags = new ArrayList<String>();
                    for (int i = 0; i < model.getPosModel().getNumOutcomes(); i++) {
                        tags.add(model.getPosModel().getOutcome(i));
                    }
                    Collections.sort(tags);

                    StringBuilder sb = new StringBuilder();
                    sb.append("Model contains [").append(tags.size()).append("] tags: ");

                    for (String tag : tags) {
                        sb.append(tag);
                        sb.append(" ");
                    }
                    getContext().getLogger().log(INFO, sb.toString());
                }

                return new POSTaggerME(model);
            } finally {
                closeQuietly(is);
            }
        }
    };

    mappingProvider = new MappingProvider();
    mappingProvider.setDefault(MappingProvider.LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/"
            + "core/api/lexmorph/tagset/${language}-${tagger.tagset}-tagger.map");
    mappingProvider.setDefault(MappingProvider.BASE_TYPE, POS.class.getName());
    mappingProvider.setDefault("tagger.tagset", "default");
    mappingProvider.setOverride(MappingProvider.LOCATION, mappingLocation);
    mappingProvider.setOverride(MappingProvider.LANGUAGE, language);
    mappingProvider.addImport("tagger.tagset", modelProvider);

}

From source file:de.dfki.km.perspecting.obie.experiments.RelevanceRatingExperiment.java

public static void setUp(String $DATABASE_SERVER, String $DATABASE, TextCorpus corpus) throws Exception {

    pool.setUser($DATABASE_SERVER_USER);
    pool.setPassword($DATABASE_SERVER_PW);
    pool.setPortNumber($DATABASE_SERVER_PORT);
    pool.setDatabaseName($DATABASE);// ww  w .j  a v a2 s  .  co m
    pool.setServerName($DATABASE_SERVER);
    pool.setMaxConnections(100);

    kb = new PostgresKB(pool.getConnection(), $DATABASE, new URI("http://test.de"));
    pipeline = new Pipeline(kb);

    LanguageIdentification languageClassification = new LanguageIdentification(Language.EN);
    WordSegmenter wordTokenizer = new WordSegmenter();
    SentenceSegmenter sentenceTokenizer = new SentenceSegmenter();

    POSModel posModel = new POSModel(Scoobie.class.getResourceAsStream("pos/en/en-pos-maxent.bin"));
    POSTagging posTagger = new POSTagging(new POSTaggerME(posModel));

    ProperNameRecognition nounPhraseChunker = new ProperNameRecognition(
            new CRFNounPhraseChunkerModel($SCOOBIE_HOME + $DATABASE_DBPEDIA_en2 + "/npc/en/EN.crf"));

    SuffixArrayBuilder suffixArrayBuilder = new SuffixArrayBuilder(100, new LiteralHashing(4));
    RDFLiteralSpotting namedEntityRecognizer = new RDFLiteralSpotting();
    InstanceRecognition instanceResolver = new InstanceRecognition();
    EntityDisambiguation instanceDisambiguator = new EntityDisambiguation(
            new AmbiguityResolver[] { new DegreeBasedResolver() });

    KnownFactsRetrieval factRetrieval = new KnownFactsRetrieval();

    ArrayList<int[]> l = new ArrayList<int[]>();

    int max = (int) Math.pow(2, 9);
    for (int i = 0; i < max; i++) {
        String binary = Integer.toBinaryString(i);
        String prefix = "";
        for (int pad = 0; pad < 9 - binary.length(); pad++) {
            prefix += "0";
        }
        binary = prefix + binary;

        TIntHashSet s = new TIntHashSet();
        for (int j = 0; j < 9; j++) {
            if (j < binary.length() && binary.charAt(j) == '1') {
                s.add(j);
            }
        }
        if (s.size() > 1)
            l.add(s.toArray());
    }

    RelevanceRating relevanceRating = new RelevanceRating(new RatingMetric[] { new AuthorityBasedRating(), // 0
            new HubBasedRating(), // 1
            new PageRankBasedRating(), // 2
            new DegreeBasedRating(), // 3
            new CapacityBasedRating(), // 4
            new RandomRating(), // 5
            new PositionBasedRating(), // 6
            new TermFrequencyBasedRating(), // 7
            new InverseDocumentFrequencyBasedRating(corpus,
                    new File(corpus.getCorpus().getAbsolutePath() + "/index/")) }, // 8

            l.toArray(new int[l.size()][]));

    pipeline.configure(languageClassification, wordTokenizer, sentenceTokenizer, posTagger, nounPhraseChunker,
            suffixArrayBuilder, namedEntityRecognizer, new DummyTask(), new DummyTask(), instanceResolver,
            instanceDisambiguator, factRetrieval, relevanceRating, new DummyTask());

}

From source file:hrpod.tools.nlp.NLPTools.java

public void setPosModel() {
    try {//from  ww w.  ja  v  a  2 s .com
        URL url = this.getClass().getResource(modelBasePath + "en-pos-maxent.bin");
        this.posModel = new POSModel(new FileInputStream(new File(url.getFile())));
    } catch (Exception e) {
        logger.error("Error is setPosModel", e);
    }
}

From source file:it.uniud.ailab.dcore.wrappers.external.OpenNlpBootstrapperAnnotator.java

/**
 * Loads a POStagger model or retrieves it from cache if has been already
 * loaded before./*  w  ww. ja  v a2s . c o  m*/
 *
 * @param modelId the model to retrieve
 * @return the loaded model
 */
public static POSModel getPOSTaggerModel(String modelId) {

    // if the model has not already been loaded, cache it
    if (!posModelsCache.containsKey(modelId)) {

        // Split the text into sentences
        InputStream POSModelIn = null;
        POSModel POSModel = null;
        String sentPath = "";

        try {
            sentPath = databasePaths.get(modelId);
            POSModelIn = new FileInputStream(sentPath);
            POSModel = new POSModel(POSModelIn);
        } catch (IOException e) {
            throw new AnnotationException(new OpenNlpBootstrapperAnnotator(),
                    "Error while loading the model file \"" + sentPath + "\".", e);
        } catch (NullPointerException e) {
            throw new AnnotationException(new OpenNlpBootstrapperAnnotator(),
                    "Error while looking for the model \"" + modelId + "\".", e);
        } finally {
            if (POSModelIn != null) {
                try {
                    POSModelIn.close();
                } catch (IOException e) {
                    throw new AnnotationException(new OpenNlpBootstrapperAnnotator(),
                            "Error while loading the model file '\"" + modelId + "\".", e);
                }
            }
        }
        posModelsCache.put(modelId, POSModel);
        return POSModel;
    }
    return posModelsCache.get(modelId);
}

From source file:org.dbpedia.spotlight.spot.OpenNLPUtil.java

protected static BaseModel loadgivenmodeltype(OpenNlpModels m, InputStream in)
        throws InvalidFormatException, IOException {
    BaseModel mdl = null;//from  www .  jav a 2s. c  o m
    switch (m) {
    case TokenizerModel: {
        mdl = new TokenizerModel(in);
        LOG.debug("OpenNLP5 Tokenizer Model loaded: " + mdl);
        break;
    }
    case POSModel: {
        mdl = new POSModel(in);
        LOG.debug("OpenNLP5 POS Model loaded: " + mdl);
        break;
    }
    case SentenceModel: {
        mdl = new SentenceModel(in);
        LOG.debug("OpenNLP5 Sentence Model loaded: " + mdl);
        break;
    }
    case ChunkModel: {
        mdl = new ChunkerModel(in);
        LOG.debug("OpenNLP5 Sentence Model loaded: " + mdl);
        break;
    }
    case person:
    case organization:
    case location: {
        mdl = new TokenNameFinderModel(in);
        LOG.debug("OpenNLP5 TokenNameFinderModel Model loaded: " + mdl);
        break;
    }
    default:
        LOG.debug("Unknown Model Type!");

    }
    return mdl;
}

From source file:sentimental_analysis.pre.processor.WordBag.Tokenizer.PartsofSpeechFilter.java

private POSModel initialisePOS() {
    try {/*from  w ww.  ja  v a  2 s .c  o  m*/
        FileInputStream modelIn = new FileInputStream(
                "C:\\Users\\Sushil-PC\\Dropbox\\SentimentalAnalysis-shared\\java\\en-pos-maxent.bin");
        try {
            return new POSModel(modelIn);
        } catch (IOException ex) {
            System.out.println("Exception Thrown POS Model (initialisePOS) : " + ex.getMessage());
            System.out.println("Stack Trace");
            ex.printStackTrace();
        }

    } catch (FileNotFoundException ex) {
        Logger.getLogger(TokenizerM.class.getName()).log(Level.SEVERE, null, ex);
        System.out.println("Exception Thrown En-pps-maxent-bin : " + ex.getMessage());
        System.out.println("Stack Trace");
        ex.printStackTrace();
    }
    return null;
}