Example usage for org.apache.lucene.search.spell HighFrequencyDictionary HighFrequencyDictionary

List of usage examples for org.apache.lucene.search.spell HighFrequencyDictionary HighFrequencyDictionary

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell HighFrequencyDictionary HighFrequencyDictionary.

Prototype

public HighFrequencyDictionary(IndexReader reader, String field, float thresh) 

Source Link

Document

Creates a new Dictionary, pulling source terms from the specified field in the provided reader.

Usage

From source file:org.apache.solr.handler.SpellCheckerRequestHandler.java

License:Apache License

/** Returns a dictionary to be used when building the spell-checker index.
 * Override the method for custom dictionary
 *//* w w  w . j a v a  2  s .  co m*/
protected Dictionary getDictionary(SolrQueryRequest req) {
    float threshold;
    try {
        threshold = req.getParams().getFloat(THRESHOLD, DEFAULT_DICTIONARY_THRESHOLD);
    } catch (NumberFormatException e) {
        throw new RuntimeException("Threshold must be a valid positive float", e);
    }
    IndexReader indexReader = req.getSearcher().getReader();
    return new HighFrequencyDictionary(indexReader, termSourceField, threshold);
}

From source file:org.apache.solr.spelling.FileBasedSpellChecker.java

License:Apache License

private void loadExternalFileDictionary(SolrCore core, SolrIndexSearcher searcher) {
    try {//from   w  w  w . j av a  2s.  c o  m
        IndexSchema schema = null == searcher ? core.getLatestSchema() : searcher.getSchema();
        // Get the field's analyzer
        if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) {
            FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
            // Do index-time analysis using the given fieldType's analyzer
            RAMDirectory ramDir = new RAMDirectory();

            LogMergePolicy mp = new LogByteSizeMergePolicy();
            mp.setMergeFactor(300);

            IndexWriter writer = new IndexWriter(ramDir,
                    new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, fieldType.getAnalyzer())
                            .setMaxBufferedDocs(150).setMergePolicy(mp)
                            .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
            // TODO: if we enable this, codec gets angry since field won't exist in the schema
            // .setCodec(core.getCodec())
            );

            List<String> lines = core.getResourceLoader().getLines(sourceLocation, characterEncoding);

            for (String s : lines) {
                Document d = new Document();
                d.add(new TextField(WORD_FIELD_NAME, s, Field.Store.NO));
                writer.addDocument(d);
            }
            writer.forceMerge(1);
            writer.close();

            dictionary = new HighFrequencyDictionary(DirectoryReader.open(ramDir), WORD_FIELD_NAME, 0.0f);
        } else {
            // check if character encoding is defined
            if (characterEncoding == null) {
                dictionary = new PlainTextDictionary(core.getResourceLoader().openResource(sourceLocation));
            } else {
                dictionary = new PlainTextDictionary(new InputStreamReader(
                        core.getResourceLoader().openResource(sourceLocation), characterEncoding));
            }
        }

    } catch (IOException e) {
        log.error("Unable to load spellings", e);
    }
}

From source file:org.apache.solr.spelling.IndexBasedSpellChecker.java

License:Apache License

@Override
public void build(SolrCore core, SolrIndexSearcher searcher) throws IOException {
    IndexReader reader = null;//from   w w  w. ja  v a 2  s . c o m
    if (sourceLocation == null) {
        // Load from Solr's index
        reader = searcher.getIndexReader();
    } else {
        // Load from Lucene index at given sourceLocation
        reader = this.reader;
    }

    // Create the dictionary
    dictionary = new HighFrequencyDictionary(reader, field, threshold);
    // TODO: maybe whether or not to clear the index should be configurable?
    // an incremental update is faster (just adds new terms), but if you 'expunged'
    // old terms I think they might hang around.
    spellChecker.clearIndex();
    // TODO: you should be able to specify the IWC params?
    // TODO: if we enable this, codec gets angry since field won't exist in the schema
    // config.setCodec(core.getCodec());
    spellChecker.indexDictionary(dictionary,
            new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, null), false);
}

From source file:org.apache.solr.spelling.suggest.HighFrequencyDictionaryFactory.java

License:Apache License

@Override
public Dictionary create(SolrCore core, SolrIndexSearcher searcher) {
    if (params == null) {
        // should not happen; implies setParams was not called
        throw new IllegalStateException("Value of params not set");
    }/*from   ww  w  .  java  2 s .co  m*/
    String field = (String) params.get(SolrSpellChecker.FIELD);

    if (field == null) {
        throw new IllegalArgumentException(SolrSpellChecker.FIELD + " is a mandatory parameter");
    }

    float threshold = params.get(THRESHOLD_TOKEN_FREQUENCY) == null ? 0.0f
            : (Float) params.get(THRESHOLD_TOKEN_FREQUENCY);

    return new HighFrequencyDictionary(searcher.getIndexReader(), field, threshold);
}

From source file:org.apache.solr.spelling.suggest.Suggester.java

License:Apache License

@Override
public void build(SolrCore core, SolrIndexSearcher searcher) throws IOException {
    LOG.info("build()");
    if (sourceLocation == null) {
        reader = searcher.getIndexReader();
        dictionary = new HighFrequencyDictionary(reader, field, threshold);
    } else {//from w w w.j av a  2s.com
        try {
            dictionary = new FileDictionary(new InputStreamReader(
                    core.getResourceLoader().openResource(sourceLocation), IOUtils.CHARSET_UTF_8));
        } catch (UnsupportedEncodingException e) {
            // should not happen
            LOG.error("should not happen", e);
        }
    }

    lookup.build(dictionary);
    if (storeDir != null) {
        File target = new File(storeDir, factory.storeFileName());
        if (!lookup.store(new FileOutputStream(target))) {
            if (sourceLocation == null) {
                assert reader != null && field != null;
                LOG.error("Store Lookup build from index on field: " + field + " failed reader has: "
                        + reader.maxDoc() + " docs");
            } else {
                LOG.error("Store Lookup build from sourceloaction: " + sourceLocation + " failed");
            }
        } else {
            LOG.info("Stored suggest data to: " + target.getAbsolutePath());
        }
    }
}

From source file:org.compass.core.lucene.engine.spellcheck.DefaultLuceneSpellCheckManager.java

License:Apache License

public synchronized boolean rebuild(final String subIndex) throws SearchEngineException {
    checkIfStarted();/*from   ww w .j ava  2  s. c om*/
    return searchEngineFactory.getTransactionContext().execute(new TransactionContextCallbackWithTr<Boolean>() {
        public Boolean doInTransaction(InternalCompassTransaction tr) throws CompassException {
            long version = readSpellCheckIndexVersion(subIndex);
            long indexVersion;
            try {
                indexVersion = LuceneSubIndexInfo.getIndexInfo(subIndex, indexStore).version();
            } catch (IOException e) {
                throw new SearchEngineException(
                        "Failed to read actual index version for sub index [" + subIndex + "]", e);
            }
            if (version == indexVersion) {
                if (log.isDebugEnabled()) {
                    log.debug("No need to rebuild spell check index, sub index [" + subIndex
                            + "] has not changed");
                }
                return false;
            }

            if (log.isDebugEnabled()) {
                log.debug("Rebuilding spell index for sub index [" + subIndex + "]");
            }
            Directory dir = spellCheckStore.openDirectory(spellIndexSubContext, subIndex);
            CompassSpellChecker spellChecker;
            try {
                spellChecker = new CompassSpellChecker(dir, true);
                spellChecker.clearIndex();
            } catch (IOException e) {
                throw new SearchEngineException(
                        "Failed to create spell checker for sub index [" + subIndex + "]", e);
            }
            IndexWriter writer = null;
            try {
                LuceneSearchEngineInternalSearch search = (LuceneSearchEngineInternalSearch) tr
                        .getSearchEngine().internalSearch(new String[] { subIndex }, null);
                if (search.getSearcher() != null) {
                    writer = searchEngineFactory.getLuceneIndexManager().getIndexWritersManager()
                            .openIndexWriter(spellCheckSettings, dir, true, null, new WhitespaceAnalyzer());
                    for (String property : properties.get(subIndex)) {
                        spellChecker.indexDictionary(writer, new HighFrequencyDictionary(search.getReader(),
                                property, defaultDictionaryThreshold));
                    }
                    writer.optimize();
                } else {
                    if (log.isDebugEnabled()) {
                        log.debug(
                                "No data found in sub index [" + subIndex + "], skipping building spell index");
                    }
                }
            } catch (LockObtainFailedException e) {
                log.debug(
                        "Failed to obtain lock, assuming indexing of spell index is in process for sub index ["
                                + subIndex + "]");
                return null;
            } catch (IOException e) {
                throw new SearchEngineException("Failed to index spell index for sub index [" + subIndex + "]",
                        e);
            } finally {
                if (writer != null) {
                    try {
                        writer.close();
                    } catch (IOException e) {
                        log.warn("Failed to close specll check index writer for sub index [" + subIndex + "]",
                                e);
                    }
                }
            }
            // refresh the readers and searchers
            closeAndRefresh(subIndex);
            writeSpellCheckIndexVersion(subIndex, indexVersion);

            if (log.isDebugEnabled()) {
                log.debug("Finished rebuilding spell index for sub index [" + subIndex + "]");
            }
            return true;
        }
    });
}

From source file:org.dice.solrenhancements.spellchecker.DiceMultipleCaseSuggester.java

License:Apache License

@Override
public void build(SolrCore core, SolrIndexSearcher searcher) throws IOException {
    LOG.info("build()");
    if (sourceLocation == null) {
        reader = searcher.getIndexReader();
        dictionary = new HighFrequencyDictionary(reader, field, threshold);
    } else {/*from w  w w  . j  a v a  2s . co m*/
        try {

            final String fileDelim = ",";
            if (sourceLocation.contains(fileDelim)) {
                String[] files = sourceLocation.split(fileDelim);
                Reader[] readers = new Reader[files.length];
                for (int i = 0; i < files.length; i++) {
                    Reader reader = new InputStreamReader(core.getResourceLoader().openResource(files[i]),
                            IOUtils.CHARSET_UTF_8);
                    readers[i] = reader;
                }
                dictionary = new MultipleFileDictionary(readers);
            } else {
                dictionary = new FileDictionary(new InputStreamReader(
                        core.getResourceLoader().openResource(sourceLocation), IOUtils.CHARSET_UTF_8));
            }
        } catch (UnsupportedEncodingException e) {
            // should not happen
            LOG.error("should not happen", e);
        }
    }

    lookup.build(dictionary);
    if (storeDir != null) {
        File target = new File(storeDir, factory.storeFileName());
        if (!lookup.store(new FileOutputStream(target))) {
            if (sourceLocation == null) {
                assert reader != null && field != null;
                LOG.error("Store Lookup build from index on field: " + field + " failed reader has: "
                        + reader.maxDoc() + " docs");
            } else {
                LOG.error("Store Lookup build from sourceloaction: " + sourceLocation + " failed");
            }
        } else {
            LOG.info("Stored suggest data to: " + target.getAbsolutePath());
        }
    }
}