List of usage examples for opennlp.tools.postag POSModel POSModel
public POSModel(URL modelURL) throws IOException
From source file:de.dfki.km.perspecting.obie.experiments.PhraseExperiment.java
/** * @throws java.lang.Exception//from www .ja v a 2 s . co m */ @BeforeClass public static void setUpBeforeClass() throws Exception { pool.setUser($DATABASE_SERVER_USER); pool.setPassword($DATABASE_SERVER_PW); pool.setPortNumber($DATABASE_SERVER_PORT); pool.setDatabaseName($DATABASE); pool.setServerName($DATABASE_SERVER); pool.setMaxConnections(100); kb = new PostgresKB(pool.getConnection(), $DATABASE, new URI("http://test.de")); pipeline = new Pipeline(kb); LanguageIdentification languageClassification = new LanguageIdentification(Language.EN); WordSegmenter wordTokenizer = new WordSegmenter(); SentenceSegmenter sentenceTokenizer = new SentenceSegmenter(); POSModel posModel = new POSModel(Scoobie.class.getResourceAsStream("pos/en/en-pos-maxent.bin")); POSTagging posTagger = new POSTagging(new POSTaggerME(posModel)); ProperNameRecognition nounPhraseChunker = new ProperNameRecognition( new CRFNounPhraseChunkerModel(Scoobie.class.getResourceAsStream("npc/en/EN.crf"))); SuffixArrayBuilder suffixArrayBuilder = new SuffixArrayBuilder(100, new LiteralHashing(4)); RDFLiteralSpotting entityRecognizer = new RDFLiteralSpotting(); InstanceRecognition subjectResolver = new InstanceRecognition(); pipeline.configure(languageClassification, wordTokenizer, sentenceTokenizer, posTagger, nounPhraseChunker, suffixArrayBuilder, entityRecognizer, new DummyTask(), new DummyTask(), subjectResolver, new DummyTask(), new DummyTask(), new DummyTask(), new DummyTask()); }
From source file:de.dfki.km.perspecting.obie.experiments.ProperNameExperiment.java
/** * @throws java.lang.Exception/*from w w w .j a v a 2s.com*/ */ public static void setUp(String databaseServer, String dataBase) throws Exception { pool.setUser($DATABASE_SERVER_USER); pool.setPassword($DATABASE_SERVER_PW); pool.setPortNumber($DATABASE_SERVER_PORT); pool.setDatabaseName(dataBase); pool.setServerName(databaseServer); pool.setMaxConnections(100); kb = new PostgresKB(pool.getConnection(), dataBase, new URI("http://test.de")); pipeline = new Pipeline(kb); LanguageIdentification languageClassification = new LanguageIdentification(Language.EN); WordSegmenter wordTokenizer = new WordSegmenter(); SentenceSegmenter sentenceTokenizer = new SentenceSegmenter(); POSModel posModel = new POSModel(Scoobie.class.getResourceAsStream("pos/en/en-pos-maxent.bin")); POSTagging posTagger = new POSTagging(new POSTaggerME(posModel)); ProperNameRecognition nounPhraseChunker = new ProperNameRecognition( new CRFNounPhraseChunkerModel(Scoobie.class.getResourceAsStream("npc/en/EN.crf"))); SuffixArrayBuilder suffixArrayBuilder = new SuffixArrayBuilder(100, new LiteralHashing(4)); RDFLiteralSpotting entityRecognizer = new RDFLiteralSpotting(); pipeline.configure(languageClassification, wordTokenizer, sentenceTokenizer, posTagger, nounPhraseChunker, suffixArrayBuilder, entityRecognizer, new DummyTask(), new DummyTask(), new DummyTask(), new DummyTask(), new DummyTask(), new DummyTask(), new DummyTask()); }
From source file:de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger.java
@Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); modelProvider = new CasConfigurableProviderBase<POSTagger>() { {// www . j a v a 2 s .c o m setDefault(VERSION, "20120616.0"); setDefault(GROUP_ID, "de.tudarmstadt.ukp.dkpro.core"); setDefault(ARTIFACT_ID, "de.tudarmstadt.ukp.dkpro.core.opennlp-model-tagger-${language}-${variant}"); setDefault(LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/core/opennlp/lib/" + "tagger-${language}-${variant}.bin"); setDefault(VARIANT, "maxent"); setOverride(LOCATION, modelLocation); setOverride(LANGUAGE, language); setOverride(VARIANT, variant); } @Override protected POSTagger produceResource(URL aUrl) throws IOException { InputStream is = null; try { is = aUrl.openStream(); POSModel model = new POSModel(is); if (printTagSet) { List<String> tags = new ArrayList<String>(); for (int i = 0; i < model.getPosModel().getNumOutcomes(); i++) { tags.add(model.getPosModel().getOutcome(i)); } Collections.sort(tags); StringBuilder sb = new StringBuilder(); sb.append("Model contains [").append(tags.size()).append("] tags: "); for (String tag : tags) { sb.append(tag); sb.append(" "); } getContext().getLogger().log(INFO, sb.toString()); } return new POSTaggerME(model); } finally { closeQuietly(is); } } }; mappingProvider = new MappingProvider(); mappingProvider.setDefault(MappingProvider.LOCATION, "classpath:/de/tudarmstadt/ukp/dkpro/" + "core/api/lexmorph/tagset/${language}-${tagger.tagset}-tagger.map"); mappingProvider.setDefault(MappingProvider.BASE_TYPE, POS.class.getName()); mappingProvider.setDefault("tagger.tagset", "default"); mappingProvider.setOverride(MappingProvider.LOCATION, mappingLocation); mappingProvider.setOverride(MappingProvider.LANGUAGE, language); mappingProvider.addImport("tagger.tagset", modelProvider); }
From source file:de.dfki.km.perspecting.obie.experiments.RelevanceRatingExperiment.java
public static void setUp(String $DATABASE_SERVER, String $DATABASE, TextCorpus corpus) throws Exception { pool.setUser($DATABASE_SERVER_USER); pool.setPassword($DATABASE_SERVER_PW); pool.setPortNumber($DATABASE_SERVER_PORT); pool.setDatabaseName($DATABASE);// ww w .j a v a2 s . co m pool.setServerName($DATABASE_SERVER); pool.setMaxConnections(100); kb = new PostgresKB(pool.getConnection(), $DATABASE, new URI("http://test.de")); pipeline = new Pipeline(kb); LanguageIdentification languageClassification = new LanguageIdentification(Language.EN); WordSegmenter wordTokenizer = new WordSegmenter(); SentenceSegmenter sentenceTokenizer = new SentenceSegmenter(); POSModel posModel = new POSModel(Scoobie.class.getResourceAsStream("pos/en/en-pos-maxent.bin")); POSTagging posTagger = new POSTagging(new POSTaggerME(posModel)); ProperNameRecognition nounPhraseChunker = new ProperNameRecognition( new CRFNounPhraseChunkerModel($SCOOBIE_HOME + $DATABASE_DBPEDIA_en2 + "/npc/en/EN.crf")); SuffixArrayBuilder suffixArrayBuilder = new SuffixArrayBuilder(100, new LiteralHashing(4)); RDFLiteralSpotting namedEntityRecognizer = new RDFLiteralSpotting(); InstanceRecognition instanceResolver = new InstanceRecognition(); EntityDisambiguation instanceDisambiguator = new EntityDisambiguation( new AmbiguityResolver[] { new DegreeBasedResolver() }); KnownFactsRetrieval factRetrieval = new KnownFactsRetrieval(); ArrayList<int[]> l = new ArrayList<int[]>(); int max = (int) Math.pow(2, 9); for (int i = 0; i < max; i++) { String binary = Integer.toBinaryString(i); String prefix = ""; for (int pad = 0; pad < 9 - binary.length(); pad++) { prefix += "0"; } binary = prefix + binary; TIntHashSet s = new TIntHashSet(); for (int j = 0; j < 9; j++) { if (j < binary.length() && binary.charAt(j) == '1') { s.add(j); } } if (s.size() > 1) l.add(s.toArray()); } RelevanceRating relevanceRating = new RelevanceRating(new RatingMetric[] { new AuthorityBasedRating(), // 0 new HubBasedRating(), // 1 new PageRankBasedRating(), // 2 new DegreeBasedRating(), // 3 new CapacityBasedRating(), // 4 new RandomRating(), // 5 new PositionBasedRating(), // 6 new TermFrequencyBasedRating(), // 7 new InverseDocumentFrequencyBasedRating(corpus, new File(corpus.getCorpus().getAbsolutePath() + "/index/")) }, // 8 l.toArray(new int[l.size()][])); pipeline.configure(languageClassification, wordTokenizer, sentenceTokenizer, posTagger, nounPhraseChunker, suffixArrayBuilder, namedEntityRecognizer, new DummyTask(), new DummyTask(), instanceResolver, instanceDisambiguator, factRetrieval, relevanceRating, new DummyTask()); }
From source file:hrpod.tools.nlp.NLPTools.java
public void setPosModel() { try {//from ww w. ja v a 2 s .com URL url = this.getClass().getResource(modelBasePath + "en-pos-maxent.bin"); this.posModel = new POSModel(new FileInputStream(new File(url.getFile()))); } catch (Exception e) { logger.error("Error is setPosModel", e); } }
From source file:it.uniud.ailab.dcore.wrappers.external.OpenNlpBootstrapperAnnotator.java
/** * Loads a POStagger model or retrieves it from cache if has been already * loaded before./* w ww. ja v a2s . c o m*/ * * @param modelId the model to retrieve * @return the loaded model */ public static POSModel getPOSTaggerModel(String modelId) { // if the model has not already been loaded, cache it if (!posModelsCache.containsKey(modelId)) { // Split the text into sentences InputStream POSModelIn = null; POSModel POSModel = null; String sentPath = ""; try { sentPath = databasePaths.get(modelId); POSModelIn = new FileInputStream(sentPath); POSModel = new POSModel(POSModelIn); } catch (IOException e) { throw new AnnotationException(new OpenNlpBootstrapperAnnotator(), "Error while loading the model file \"" + sentPath + "\".", e); } catch (NullPointerException e) { throw new AnnotationException(new OpenNlpBootstrapperAnnotator(), "Error while looking for the model \"" + modelId + "\".", e); } finally { if (POSModelIn != null) { try { POSModelIn.close(); } catch (IOException e) { throw new AnnotationException(new OpenNlpBootstrapperAnnotator(), "Error while loading the model file '\"" + modelId + "\".", e); } } } posModelsCache.put(modelId, POSModel); return POSModel; } return posModelsCache.get(modelId); }
From source file:org.dbpedia.spotlight.spot.OpenNLPUtil.java
protected static BaseModel loadgivenmodeltype(OpenNlpModels m, InputStream in) throws InvalidFormatException, IOException { BaseModel mdl = null;//from www . jav a 2s. c o m switch (m) { case TokenizerModel: { mdl = new TokenizerModel(in); LOG.debug("OpenNLP5 Tokenizer Model loaded: " + mdl); break; } case POSModel: { mdl = new POSModel(in); LOG.debug("OpenNLP5 POS Model loaded: " + mdl); break; } case SentenceModel: { mdl = new SentenceModel(in); LOG.debug("OpenNLP5 Sentence Model loaded: " + mdl); break; } case ChunkModel: { mdl = new ChunkerModel(in); LOG.debug("OpenNLP5 Sentence Model loaded: " + mdl); break; } case person: case organization: case location: { mdl = new TokenNameFinderModel(in); LOG.debug("OpenNLP5 TokenNameFinderModel Model loaded: " + mdl); break; } default: LOG.debug("Unknown Model Type!"); } return mdl; }
From source file:sentimental_analysis.pre.processor.WordBag.Tokenizer.PartsofSpeechFilter.java
private POSModel initialisePOS() { try {/*from w ww. ja v a 2 s .c o m*/ FileInputStream modelIn = new FileInputStream( "C:\\Users\\Sushil-PC\\Dropbox\\SentimentalAnalysis-shared\\java\\en-pos-maxent.bin"); try { return new POSModel(modelIn); } catch (IOException ex) { System.out.println("Exception Thrown POS Model (initialisePOS) : " + ex.getMessage()); System.out.println("Stack Trace"); ex.printStackTrace(); } } catch (FileNotFoundException ex) { Logger.getLogger(TokenizerM.class.getName()).log(Level.SEVERE, null, ex); System.out.println("Exception Thrown En-pps-maxent-bin : " + ex.getMessage()); System.out.println("Stack Trace"); ex.printStackTrace(); } return null; }