List of usage examples for org.apache.lucene.search.spell SpellChecker indexDictionary
public final void indexDictionary(Dictionary dict, IndexWriterConfig config, boolean fullMerge) throws IOException
From source file:com.bah.bahdit.main.search.utils.LevenshteinDistance.java
License:Apache License
/** * Given a context to place all of the spell check files in, this method * sets up the spellchecker object using the sample table * /* www . j a va 2 s. c o m*/ * @param context - the context of the current servlet * @param sampleTable - the full text sample table * @return - a spellchecker object */ public static SpellChecker createSpellChecker(ServletContext context, HashMap<String, Integer> sampleTable) { SpellChecker spellChecker = null; // write terms from sample table to text file, to be basis of dictionary File f = new File("dictionary" + System.nanoTime() + ".txt"); try { f.createNewFile(); BufferedWriter out = new BufferedWriter(new FileWriter(f)); for (String entry : sampleTable.keySet()) { out.write(entry + "\n"); } } catch (IOException e) { e.printStackTrace(); } String dPath = System.getProperty("user.dir") + "/spellcheck" + System.nanoTime(); File dir = new File(dPath); Directory directory = null; try { directory = FSDirectory.open(dir); } catch (IOException e3) { e3.printStackTrace(); } try { spellChecker = new SpellChecker(directory); } catch (IOException e2) { e2.printStackTrace(); } StandardAnalyzer a = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, a); boolean fullMerge = true; PlainTextDictionary dict = null; try { dict = new PlainTextDictionary(f); } catch (FileNotFoundException e1) { e1.printStackTrace(); } try { spellChecker.indexDictionary(dict, config, fullMerge); } catch (IOException e) { e.printStackTrace(); } return spellChecker; }
From source file:com.jaeksoft.searchlib.cache.SpellCheckerCache.java
License:Open Source License
public SpellChecker get(ReaderLocal reader, String field) throws IOException { rwl.w.lock();//from w ww. ja v a 2s . c om try { FieldNameKey key = new FieldNameKey(field); SpellChecker spellChecker = getAndPromote(key); if (spellChecker != null) return spellChecker; LuceneDictionary dict = reader.getLuceneDirectionary(key.getFieldName()); SpellChecker spellchecker = new SpellChecker(new RAMDirectory()); spellchecker.indexDictionary(dict, new IndexWriterConfig(Version.LUCENE_36, null), true); put(key, spellchecker); return spellchecker; } finally { rwl.w.unlock(); } }
From source file:com.ostrichemulators.semtool.rdf.engine.util.EngineConsistencyChecker.java
/** * Resolves "near" matches from the elements of the given type. If * {@link #across} is <code>true</code>, each element will be compared to all * elements of all types./*www . ja va 2 s . com*/ * * @param uri the concept/relation class (not instance) to resolve * @param minDistance the minimum allowable similarity * @return map of uri-to-hits */ public MultiMap<IRI, Hit> check(IRI uri, final float minDistance) { MultiMap<IRI, Hit> hits = new MultiMap<>(); // get our universe of possible hits Map<IRI, String> possibles = getHitUniverse(uri); MultiMap<String, IRI> revpos = MultiMap.flip(possibles); Directory ramdir = new RAMDirectory(); StandardAnalyzer analyzer = null; SpellChecker speller = null; List<IRI> errors = new ArrayList<>(); try { analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); speller = new SpellChecker(ramdir, strdist); StringBuilder names = new StringBuilder(); for (String s : possibles.values()) { names.append(s).append("\n"); } PlainTextDictionary ptd = new PlainTextDictionary(new StringReader(names.toString())); speller.indexDictionary(ptd, config, true); List<IRI> needles = typeToURILkp.get(uri); for (IRI needle : needles) { String needlelabel = labels.get(needle); try { String[] suggestions = speller.suggestSimilar(needlelabel, 20, minDistance); for (String s : suggestions) { // found a match, so figure out what we actually matched float distance = strdist.getDistance(needlelabel, s); for (IRI match : revpos.get(s)) { hits.add(needle, new Hit(match, s, uriToTypeLkp.get(match), distance)); } } } catch (Exception e) { // our fallback resolution always works; it's just a ton slower errors.add(needle); } } } catch (Exception e) { log.error(e, e); } finally { for (Closeable c : new Closeable[] { analyzer, ramdir, speller }) { if (null != c) { try { c.close(); } catch (Exception e) { log.warn(e, e); } } } } if (!errors.isEmpty()) { fallbackResolve(errors, possibles, hits, strdist, minDistance); } return hits; }
From source file:fastcampus.lucene.example.search.SpellCheckerExample.java
License:Apache License
public static void main(String[] args) throws Exception { Directory directory = FSDirectory.open(Paths.get("./index/spell/")); SpellChecker spellChecker = new SpellChecker(directory); //Analyzer analyzer = new StandardAnalyzer(); // ? Analyzer analyzer = new Analyzer() { @Override/*w ww.ja v a 2s. c o m*/ protected TokenStreamComponents createComponents(String s) { Reader reader = new StringReader(s); Tokenizer tokenizer = new StandardTokenizer(); tokenizer.setReader(reader); String name = "nfc_cf"; Normalizer2 normalizer = Normalizer2.getInstance(null, name, Normalizer2.Mode.DECOMPOSE); TokenFilter filter = new ICUNormalizer2Filter(tokenizer, normalizer); return new TokenStreamComponents(tokenizer, filter); } }; IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); //?? Writer? ? ? Path path = Paths.get("./data/spell/dic.txt"); spellChecker.setSpellIndex(directory); spellChecker.clearIndex(); spellChecker.indexDictionary(new PlainTextDictionary(path), indexWriterConfig, true); String wordForSuggestions = "?"; //spellChecker.setStringDistance(new LevensteinDistance()); //#Levenstein spellChecker.setStringDistance(new JaroWinklerDistance()); //Jaro-Winkler int suggestionsNumber = 1; String[] suggestions = spellChecker.suggestSimilar(wordForSuggestions, suggestionsNumber); if (suggestions != null && suggestions.length > 0) { for (String word : suggestions) { System.out.println("Did you mean:" + word); } } else { System.out.println("No suggestions found for word:" + wordForSuggestions); } }
From source file:fr.mael.microrss.dao.impl.GenericDaoImpl.java
License:Open Source License
/** * @see fr.mael.jmusic.dao.GenericDao#buildSpellIndex() *//*from ww w . java 2 s .c o m*/ @Override public void buildSpellIndex() throws IOException { FullTextSession searchSession = Search.getFullTextSession(sessionFactory.getCurrentSession()); SearchFactory searchFactory = searchSession.getSearchFactory(); IndexReader reader = searchFactory.getIndexReaderAccessor().open(getPersistentClass()); try { FSDirectory spellDir = FSDirectory .open(new File(configuration.getIndexDir() + "/spell_" + getPersistentClass().getName())); SpellChecker spellChecker = new SpellChecker(spellDir); Dictionary dictionary = new LuceneDictionary(reader, "name"); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, searchFactory.getAnalyzer(getPersistentClass())); spellChecker.indexDictionary(dictionary, config, true); } catch (Exception e) { log.error("Error building spell index", e); } finally { searchFactory.getIndexReaderAccessor().close(reader); } }
From source file:org.silverpeas.search.indexEngine.model.DidYouMeanIndexer.java
License:Open Source License
/** * creates or updates a spelling index. The spelling index is created or updated from an existing * index. The spelling index is used to suggest words when an user executes a query that returns * unsatisfactory results. if a spelling index already exists, only the new words contained in the * index source will be added. otherwise a new index will be created * @param field name of the field of the index source that will be used to feed the spelling index * @param originalIndexDirectory represents the source index path * @param spellIndexDirectory represents the spelling index path *///from w w w. j a v a 2s . c om public static void createSpellIndex(String field, String originalIndexDirectory, String spellIndexDirectory) { // stop the process if method parameters is null or empty if (!StringUtil.isDefined(field) || !StringUtil.isDefined(originalIndexDirectory) || !StringUtil.isDefined(spellIndexDirectory)) { SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_INVALID_ARG"); return; } // initializes local variable IndexReader indexReader = null; try { // create a file object with given path File file = new File(spellIndexDirectory); // open original index FSDirectory directory = FSDirectory.open(file); indexReader = IndexReader.open(FSDirectory.open(new File(originalIndexDirectory))); // create a Lucene dictionary with the original index Dictionary dictionary = new LuceneDictionary(indexReader, field); // index the dictionary into the spelling index SpellChecker spellChecker = new SpellChecker(directory); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)); spellChecker.indexDictionary(dictionary, config, true); spellChecker.close(); } catch (CorruptIndexException e) { SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_INDEX_FAILED", e); } catch (IOException e) { SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_LOAD_IO_EXCEPTION", e); } finally { IOUtils.closeQuietly(indexReader); } }
From source file:org.watermint.sourcecolon.org.opensolaris.opengrok.index.IndexDatabase.java
License:Open Source License
/** * Generate a spelling suggestion for the definitions stored in defs */// w w w .j a v a2 s. c o m public void createSpellingSuggestions() { IndexReader indexReader = null; SpellChecker checker = null; try { log.info("Generating spelling suggestion index ... "); indexReader = IndexReader.open(indexDirectory); checker = new SpellChecker(spellDirectory); //TODO below seems only to index "defs" , possible bug ? checker.indexDictionary(new LuceneDictionary(indexReader, "defs"), new IndexWriterConfig(Version.LUCENE_36, null), true); log.info("done"); } catch (IOException e) { log.log(Level.SEVERE, "ERROR: Generating spelling: {0}", e); } finally { if (indexReader != null) { try { indexReader.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occurred while closing reader", e); } } if (spellDirectory != null) { spellDirectory.close(); } } }
From source file:resource.IndexFiles.java
License:Apache License
private static void createDictionary(Analyzer analyzer) throws IOException { Directory dictionaryDir = FSDirectory.open(new File(DICTIONARY_PATH)); Directory indexDir = FSDirectory.open(new File(INDEX_PATH)); IndexReader reader = DirectoryReader.open(indexDir); Dictionary dictionary = new LuceneDictionary(reader, "contents"); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer); SpellChecker spellChecker = new SpellChecker(dictionaryDir); spellChecker.indexDictionary(dictionary, iwc, false); spellChecker.close();/*ww w . j a v a 2 s.co m*/ }