Example usage for org.apache.lucene.search.spell SpellChecker indexDictionary

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell SpellChecker indexDictionary.

Prototype

public final void indexDictionary(Dictionary dict, IndexWriterConfig config, boolean fullMerge)
        throws IOException

Source Link

Document

Indexes the data from the given Dictionary .

Usage

From source file:com.bah.bahdit.main.search.utils.LevenshteinDistance.java

License:Apache License

/**
 * Given a context to place all of the spell check files in, this method
 * sets up the spellchecker object using the sample table
 * /* www  .  j  a  va  2  s.  c o m*/
 * @param context - the context of the current servlet
 * @param sampleTable - the full text sample table
 * @return - a spellchecker object
 */
public static SpellChecker createSpellChecker(ServletContext context, HashMap<String, Integer> sampleTable) {

    SpellChecker spellChecker = null;

    // write terms from sample table to text file, to be basis of dictionary
    File f = new File("dictionary" + System.nanoTime() + ".txt");
    try {
        f.createNewFile();
        BufferedWriter out = new BufferedWriter(new FileWriter(f));

        for (String entry : sampleTable.keySet()) {
            out.write(entry + "\n");
        }

    } catch (IOException e) {
        e.printStackTrace();
    }

    String dPath = System.getProperty("user.dir") + "/spellcheck" + System.nanoTime();

    File dir = new File(dPath);
    Directory directory = null;

    try {
        directory = FSDirectory.open(dir);
    } catch (IOException e3) {
        e3.printStackTrace();
    }

    try {
        spellChecker = new SpellChecker(directory);
    } catch (IOException e2) {
        e2.printStackTrace();
    }

    StandardAnalyzer a = new StandardAnalyzer(Version.LUCENE_40);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, a);
    boolean fullMerge = true;
    PlainTextDictionary dict = null;

    try {
        dict = new PlainTextDictionary(f);
    } catch (FileNotFoundException e1) {
        e1.printStackTrace();
    }

    try {
        spellChecker.indexDictionary(dict, config, fullMerge);
    } catch (IOException e) {
        e.printStackTrace();
    }

    return spellChecker;
}

From source file:com.jaeksoft.searchlib.cache.SpellCheckerCache.java

License:Open Source License

public SpellChecker get(ReaderLocal reader, String field) throws IOException {
    rwl.w.lock();//from w  ww. ja v  a 2s  . c  om
    try {
        FieldNameKey key = new FieldNameKey(field);
        SpellChecker spellChecker = getAndPromote(key);
        if (spellChecker != null)
            return spellChecker;
        LuceneDictionary dict = reader.getLuceneDirectionary(key.getFieldName());
        SpellChecker spellchecker = new SpellChecker(new RAMDirectory());
        spellchecker.indexDictionary(dict, new IndexWriterConfig(Version.LUCENE_36, null), true);

        put(key, spellchecker);
        return spellchecker;
    } finally {
        rwl.w.unlock();
    }
}

From source file:com.ostrichemulators.semtool.rdf.engine.util.EngineConsistencyChecker.java

/**
 * Resolves "near" matches from the elements of the given type. If
 * {@link #across} is <code>true</code>, each element will be compared to all
 * elements of all types./*www . ja  va  2 s . com*/
 *
 * @param uri the concept/relation class (not instance) to resolve
 * @param minDistance the minimum allowable similarity
 * @return map of uri-to-hits
 */
public MultiMap<IRI, Hit> check(IRI uri, final float minDistance) {
    MultiMap<IRI, Hit> hits = new MultiMap<>();

    // get our universe of possible hits
    Map<IRI, String> possibles = getHitUniverse(uri);
    MultiMap<String, IRI> revpos = MultiMap.flip(possibles);

    Directory ramdir = new RAMDirectory();
    StandardAnalyzer analyzer = null;
    SpellChecker speller = null;

    List<IRI> errors = new ArrayList<>();
    try {
        analyzer = new StandardAnalyzer();
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        speller = new SpellChecker(ramdir, strdist);

        StringBuilder names = new StringBuilder();
        for (String s : possibles.values()) {
            names.append(s).append("\n");
        }
        PlainTextDictionary ptd = new PlainTextDictionary(new StringReader(names.toString()));
        speller.indexDictionary(ptd, config, true);

        List<IRI> needles = typeToURILkp.get(uri);
        for (IRI needle : needles) {
            String needlelabel = labels.get(needle);
            try {
                String[] suggestions = speller.suggestSimilar(needlelabel, 20, minDistance);
                for (String s : suggestions) {
                    // found a match, so figure out what we actually matched
                    float distance = strdist.getDistance(needlelabel, s);

                    for (IRI match : revpos.get(s)) {
                        hits.add(needle, new Hit(match, s, uriToTypeLkp.get(match), distance));
                    }
                }
            } catch (Exception e) {
                // our fallback resolution always works; it's just a ton slower
                errors.add(needle);
            }
        }
    } catch (Exception e) {
        log.error(e, e);
    } finally {
        for (Closeable c : new Closeable[] { analyzer, ramdir, speller }) {
            if (null != c) {
                try {
                    c.close();
                } catch (Exception e) {
                    log.warn(e, e);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        fallbackResolve(errors, possibles, hits, strdist, minDistance);
    }

    return hits;
}

From source file:fastcampus.lucene.example.search.SpellCheckerExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    Directory directory = FSDirectory.open(Paths.get("./index/spell/"));
    SpellChecker spellChecker = new SpellChecker(directory);

    //Analyzer analyzer = new StandardAnalyzer();                             // ? 
    Analyzer analyzer = new Analyzer() {
        @Override/*w  ww.ja v a 2s. c o  m*/
        protected TokenStreamComponents createComponents(String s) {
            Reader reader = new StringReader(s);
            Tokenizer tokenizer = new StandardTokenizer();
            tokenizer.setReader(reader);
            String name = "nfc_cf";
            Normalizer2 normalizer = Normalizer2.getInstance(null, name, Normalizer2.Mode.DECOMPOSE);
            TokenFilter filter = new ICUNormalizer2Filter(tokenizer, normalizer);
            return new TokenStreamComponents(tokenizer, filter);
        }
    };

    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); //?? Writer? ?  ?

    Path path = Paths.get("./data/spell/dic.txt");

    spellChecker.setSpellIndex(directory);
    spellChecker.clearIndex();
    spellChecker.indexDictionary(new PlainTextDictionary(path), indexWriterConfig, true);
    String wordForSuggestions = "?";
    //spellChecker.setStringDistance(new LevensteinDistance());  //#Levenstein  
    spellChecker.setStringDistance(new JaroWinklerDistance()); //Jaro-Winkler 

    int suggestionsNumber = 1;
    String[] suggestions = spellChecker.suggestSimilar(wordForSuggestions, suggestionsNumber);
    if (suggestions != null && suggestions.length > 0) {

        for (String word : suggestions) {

            System.out.println("Did you mean:" + word);

        }

    } else {

        System.out.println("No suggestions found for word:" + wordForSuggestions);

    }

}

From source file:fr.mael.microrss.dao.impl.GenericDaoImpl.java

License:Open Source License

/**
 * @see fr.mael.jmusic.dao.GenericDao#buildSpellIndex()
 *//*from ww w  . java 2 s .c  o m*/
@Override
public void buildSpellIndex() throws IOException {
    FullTextSession searchSession = Search.getFullTextSession(sessionFactory.getCurrentSession());
    SearchFactory searchFactory = searchSession.getSearchFactory();
    IndexReader reader = searchFactory.getIndexReaderAccessor().open(getPersistentClass());
    try {
        FSDirectory spellDir = FSDirectory
                .open(new File(configuration.getIndexDir() + "/spell_" + getPersistentClass().getName()));
        SpellChecker spellChecker = new SpellChecker(spellDir);
        Dictionary dictionary = new LuceneDictionary(reader, "name");
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,
                searchFactory.getAnalyzer(getPersistentClass()));
        spellChecker.indexDictionary(dictionary, config, true);
    } catch (Exception e) {
        log.error("Error building spell index", e);
    } finally {
        searchFactory.getIndexReaderAccessor().close(reader);
    }

}

From source file:org.silverpeas.search.indexEngine.model.DidYouMeanIndexer.java

License:Open Source License

/**
 * creates or updates a spelling index. The spelling index is created or updated from an existing
 * index. The spelling index is used to suggest words when an user executes a query that returns
 * unsatisfactory results. if a spelling index already exists, only the new words contained in the
 * index source will be added. otherwise a new index will be created
 * @param field name of the field of the index source that will be used to feed the spelling index
 * @param originalIndexDirectory represents the source index path
 * @param spellIndexDirectory represents the spelling index path
 *///from  w w  w. j a  v a 2s  . c  om
public static void createSpellIndex(String field, String originalIndexDirectory, String spellIndexDirectory) {
    // stop the process if method parameters is null or empty
    if (!StringUtil.isDefined(field) || !StringUtil.isDefined(originalIndexDirectory)
            || !StringUtil.isDefined(spellIndexDirectory)) {
        SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_INVALID_ARG");
        return;
    }
    // initializes local variable
    IndexReader indexReader = null;

    try {
        // create a file object with given path
        File file = new File(spellIndexDirectory);
        // open original index
        FSDirectory directory = FSDirectory.open(file);
        indexReader = IndexReader.open(FSDirectory.open(new File(originalIndexDirectory)));
        // create a Lucene dictionary with the original index
        Dictionary dictionary = new LuceneDictionary(indexReader, field);
        // index the dictionary into the spelling index
        SpellChecker spellChecker = new SpellChecker(directory);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36,
                new StandardAnalyzer(Version.LUCENE_36));
        spellChecker.indexDictionary(dictionary, config, true);
        spellChecker.close();
    } catch (CorruptIndexException e) {
        SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_INDEX_FAILED", e);
    } catch (IOException e) {
        SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_LOAD_IO_EXCEPTION", e);
    } finally {
        IOUtils.closeQuietly(indexReader);
    }

}

From source file:org.watermint.sourcecolon.org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

/**
 * Generate a spelling suggestion for the definitions stored in defs
 */// w  w  w  .j  a  v  a2 s. c o m
public void createSpellingSuggestions() {
    IndexReader indexReader = null;
    SpellChecker checker = null;

    try {
        log.info("Generating spelling suggestion index ... ");
        indexReader = IndexReader.open(indexDirectory);
        checker = new SpellChecker(spellDirectory);
        //TODO below seems only to index "defs" , possible bug ?
        checker.indexDictionary(new LuceneDictionary(indexReader, "defs"),
                new IndexWriterConfig(Version.LUCENE_36, null), true);
        log.info("done");
    } catch (IOException e) {
        log.log(Level.SEVERE, "ERROR: Generating spelling: {0}", e);
    } finally {
        if (indexReader != null) {
            try {
                indexReader.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occurred while closing reader", e);
            }
        }
        if (spellDirectory != null) {
            spellDirectory.close();
        }
    }
}

From source file:resource.IndexFiles.java

License:Apache License

private static void createDictionary(Analyzer analyzer) throws IOException {
    Directory dictionaryDir = FSDirectory.open(new File(DICTIONARY_PATH));
    Directory indexDir = FSDirectory.open(new File(INDEX_PATH));

    IndexReader reader = DirectoryReader.open(indexDir);
    Dictionary dictionary = new LuceneDictionary(reader, "contents");
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);

    SpellChecker spellChecker = new SpellChecker(dictionaryDir);
    spellChecker.indexDictionary(dictionary, iwc, false);
    spellChecker.close();/*ww  w  . j a v a  2 s.co  m*/
}