List of usage examples for org.apache.lucene.search.spell HighFrequencyDictionary HighFrequencyDictionary
public HighFrequencyDictionary(IndexReader reader, String field, float thresh)
field in the provided reader. From source file:org.apache.solr.handler.SpellCheckerRequestHandler.java
License:Apache License
/** Returns a dictionary to be used when building the spell-checker index. * Override the method for custom dictionary *//* w w w . j a v a 2 s . co m*/ protected Dictionary getDictionary(SolrQueryRequest req) { float threshold; try { threshold = req.getParams().getFloat(THRESHOLD, DEFAULT_DICTIONARY_THRESHOLD); } catch (NumberFormatException e) { throw new RuntimeException("Threshold must be a valid positive float", e); } IndexReader indexReader = req.getSearcher().getReader(); return new HighFrequencyDictionary(indexReader, termSourceField, threshold); }
From source file:org.apache.solr.spelling.FileBasedSpellChecker.java
License:Apache License
private void loadExternalFileDictionary(SolrCore core, SolrIndexSearcher searcher) { try {//from w w w . j av a 2s. c o m IndexSchema schema = null == searcher ? core.getLatestSchema() : searcher.getSchema(); // Get the field's analyzer if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) { FieldType fieldType = schema.getFieldTypes().get(fieldTypeName); // Do index-time analysis using the given fieldType's analyzer RAMDirectory ramDir = new RAMDirectory(); LogMergePolicy mp = new LogByteSizeMergePolicy(); mp.setMergeFactor(300); IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, fieldType.getAnalyzer()) .setMaxBufferedDocs(150).setMergePolicy(mp) .setOpenMode(IndexWriterConfig.OpenMode.CREATE) // TODO: if we enable this, codec gets angry since field won't exist in the schema // .setCodec(core.getCodec()) ); List<String> lines = core.getResourceLoader().getLines(sourceLocation, characterEncoding); for (String s : lines) { Document d = new Document(); d.add(new TextField(WORD_FIELD_NAME, s, Field.Store.NO)); writer.addDocument(d); } writer.forceMerge(1); writer.close(); dictionary = new HighFrequencyDictionary(DirectoryReader.open(ramDir), WORD_FIELD_NAME, 0.0f); } else { // check if character encoding is defined if (characterEncoding == null) { dictionary = new PlainTextDictionary(core.getResourceLoader().openResource(sourceLocation)); } else { dictionary = new PlainTextDictionary(new InputStreamReader( core.getResourceLoader().openResource(sourceLocation), characterEncoding)); } } } catch (IOException e) { log.error("Unable to load spellings", e); } }
From source file:org.apache.solr.spelling.IndexBasedSpellChecker.java
License:Apache License
@Override public void build(SolrCore core, SolrIndexSearcher searcher) throws IOException { IndexReader reader = null;//from w w w. ja v a 2 s . c o m if (sourceLocation == null) { // Load from Solr's index reader = searcher.getIndexReader(); } else { // Load from Lucene index at given sourceLocation reader = this.reader; } // Create the dictionary dictionary = new HighFrequencyDictionary(reader, field, threshold); // TODO: maybe whether or not to clear the index should be configurable? // an incremental update is faster (just adds new terms), but if you 'expunged' // old terms I think they might hang around. spellChecker.clearIndex(); // TODO: you should be able to specify the IWC params? // TODO: if we enable this, codec gets angry since field won't exist in the schema // config.setCodec(core.getCodec()); spellChecker.indexDictionary(dictionary, new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, null), false); }
From source file:org.apache.solr.spelling.suggest.HighFrequencyDictionaryFactory.java
License:Apache License
@Override public Dictionary create(SolrCore core, SolrIndexSearcher searcher) { if (params == null) { // should not happen; implies setParams was not called throw new IllegalStateException("Value of params not set"); }/*from ww w . java 2 s .co m*/ String field = (String) params.get(SolrSpellChecker.FIELD); if (field == null) { throw new IllegalArgumentException(SolrSpellChecker.FIELD + " is a mandatory parameter"); } float threshold = params.get(THRESHOLD_TOKEN_FREQUENCY) == null ? 0.0f : (Float) params.get(THRESHOLD_TOKEN_FREQUENCY); return new HighFrequencyDictionary(searcher.getIndexReader(), field, threshold); }
From source file:org.apache.solr.spelling.suggest.Suggester.java
License:Apache License
@Override public void build(SolrCore core, SolrIndexSearcher searcher) throws IOException { LOG.info("build()"); if (sourceLocation == null) { reader = searcher.getIndexReader(); dictionary = new HighFrequencyDictionary(reader, field, threshold); } else {//from w w w.j av a 2s.com try { dictionary = new FileDictionary(new InputStreamReader( core.getResourceLoader().openResource(sourceLocation), IOUtils.CHARSET_UTF_8)); } catch (UnsupportedEncodingException e) { // should not happen LOG.error("should not happen", e); } } lookup.build(dictionary); if (storeDir != null) { File target = new File(storeDir, factory.storeFileName()); if (!lookup.store(new FileOutputStream(target))) { if (sourceLocation == null) { assert reader != null && field != null; LOG.error("Store Lookup build from index on field: " + field + " failed reader has: " + reader.maxDoc() + " docs"); } else { LOG.error("Store Lookup build from sourceloaction: " + sourceLocation + " failed"); } } else { LOG.info("Stored suggest data to: " + target.getAbsolutePath()); } } }
From source file:org.compass.core.lucene.engine.spellcheck.DefaultLuceneSpellCheckManager.java
License:Apache License
public synchronized boolean rebuild(final String subIndex) throws SearchEngineException { checkIfStarted();/*from ww w .j ava 2 s. c om*/ return searchEngineFactory.getTransactionContext().execute(new TransactionContextCallbackWithTr<Boolean>() { public Boolean doInTransaction(InternalCompassTransaction tr) throws CompassException { long version = readSpellCheckIndexVersion(subIndex); long indexVersion; try { indexVersion = LuceneSubIndexInfo.getIndexInfo(subIndex, indexStore).version(); } catch (IOException e) { throw new SearchEngineException( "Failed to read actual index version for sub index [" + subIndex + "]", e); } if (version == indexVersion) { if (log.isDebugEnabled()) { log.debug("No need to rebuild spell check index, sub index [" + subIndex + "] has not changed"); } return false; } if (log.isDebugEnabled()) { log.debug("Rebuilding spell index for sub index [" + subIndex + "]"); } Directory dir = spellCheckStore.openDirectory(spellIndexSubContext, subIndex); CompassSpellChecker spellChecker; try { spellChecker = new CompassSpellChecker(dir, true); spellChecker.clearIndex(); } catch (IOException e) { throw new SearchEngineException( "Failed to create spell checker for sub index [" + subIndex + "]", e); } IndexWriter writer = null; try { LuceneSearchEngineInternalSearch search = (LuceneSearchEngineInternalSearch) tr .getSearchEngine().internalSearch(new String[] { subIndex }, null); if (search.getSearcher() != null) { writer = searchEngineFactory.getLuceneIndexManager().getIndexWritersManager() .openIndexWriter(spellCheckSettings, dir, true, null, new WhitespaceAnalyzer()); for (String property : properties.get(subIndex)) { spellChecker.indexDictionary(writer, new HighFrequencyDictionary(search.getReader(), property, defaultDictionaryThreshold)); } writer.optimize(); } else { if (log.isDebugEnabled()) { log.debug( "No data found in sub index [" + subIndex + "], skipping building spell index"); } } } catch (LockObtainFailedException e) { log.debug( "Failed to obtain lock, assuming indexing of spell index is in process for sub index [" + subIndex + "]"); return null; } catch (IOException e) { throw new SearchEngineException("Failed to index spell index for sub index [" + subIndex + "]", e); } finally { if (writer != null) { try { writer.close(); } catch (IOException e) { log.warn("Failed to close specll check index writer for sub index [" + subIndex + "]", e); } } } // refresh the readers and searchers closeAndRefresh(subIndex); writeSpellCheckIndexVersion(subIndex, indexVersion); if (log.isDebugEnabled()) { log.debug("Finished rebuilding spell index for sub index [" + subIndex + "]"); } return true; } }); }
From source file:org.dice.solrenhancements.spellchecker.DiceMultipleCaseSuggester.java
License:Apache License
@Override public void build(SolrCore core, SolrIndexSearcher searcher) throws IOException { LOG.info("build()"); if (sourceLocation == null) { reader = searcher.getIndexReader(); dictionary = new HighFrequencyDictionary(reader, field, threshold); } else {/*from w w w . j a v a 2s . co m*/ try { final String fileDelim = ","; if (sourceLocation.contains(fileDelim)) { String[] files = sourceLocation.split(fileDelim); Reader[] readers = new Reader[files.length]; for (int i = 0; i < files.length; i++) { Reader reader = new InputStreamReader(core.getResourceLoader().openResource(files[i]), IOUtils.CHARSET_UTF_8); readers[i] = reader; } dictionary = new MultipleFileDictionary(readers); } else { dictionary = new FileDictionary(new InputStreamReader( core.getResourceLoader().openResource(sourceLocation), IOUtils.CHARSET_UTF_8)); } } catch (UnsupportedEncodingException e) { // should not happen LOG.error("should not happen", e); } } lookup.build(dictionary); if (storeDir != null) { File target = new File(storeDir, factory.storeFileName()); if (!lookup.store(new FileOutputStream(target))) { if (sourceLocation == null) { assert reader != null && field != null; LOG.error("Store Lookup build from index on field: " + field + " failed reader has: " + reader.maxDoc() + " docs"); } else { LOG.error("Store Lookup build from sourceloaction: " + sourceLocation + " failed"); } } else { LOG.info("Stored suggest data to: " + target.getAbsolutePath()); } } }