List of usage examples for org.apache.lucene.search.spell PlainTextDictionary PlainTextDictionary
public PlainTextDictionary(Reader reader)
From source file:com.bah.bahdit.main.search.utils.LevenshteinDistance.java
License:Apache License
/** * Given a context to place all of the spell check files in, this method * sets up the spellchecker object using the sample table * //from w w w . j a v a 2 s . c o m * @param context - the context of the current servlet * @param sampleTable - the full text sample table * @return - a spellchecker object */ public static SpellChecker createSpellChecker(ServletContext context, HashMap<String, Integer> sampleTable) { SpellChecker spellChecker = null; // write terms from sample table to text file, to be basis of dictionary File f = new File("dictionary" + System.nanoTime() + ".txt"); try { f.createNewFile(); BufferedWriter out = new BufferedWriter(new FileWriter(f)); for (String entry : sampleTable.keySet()) { out.write(entry + "\n"); } } catch (IOException e) { e.printStackTrace(); } String dPath = System.getProperty("user.dir") + "/spellcheck" + System.nanoTime(); File dir = new File(dPath); Directory directory = null; try { directory = FSDirectory.open(dir); } catch (IOException e3) { e3.printStackTrace(); } try { spellChecker = new SpellChecker(directory); } catch (IOException e2) { e2.printStackTrace(); } StandardAnalyzer a = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, a); boolean fullMerge = true; PlainTextDictionary dict = null; try { dict = new PlainTextDictionary(f); } catch (FileNotFoundException e1) { e1.printStackTrace(); } try { spellChecker.indexDictionary(dict, config, fullMerge); } catch (IOException e) { e.printStackTrace(); } return spellChecker; }
From source file:com.ostrichemulators.semtool.rdf.engine.util.EngineConsistencyChecker.java
/** * Resolves "near" matches from the elements of the given type. If * {@link #across} is <code>true</code>, each element will be compared to all * elements of all types./*from ww w . j a v a2 s .c o m*/ * * @param uri the concept/relation class (not instance) to resolve * @param minDistance the minimum allowable similarity * @return map of uri-to-hits */ public MultiMap<IRI, Hit> check(IRI uri, final float minDistance) { MultiMap<IRI, Hit> hits = new MultiMap<>(); // get our universe of possible hits Map<IRI, String> possibles = getHitUniverse(uri); MultiMap<String, IRI> revpos = MultiMap.flip(possibles); Directory ramdir = new RAMDirectory(); StandardAnalyzer analyzer = null; SpellChecker speller = null; List<IRI> errors = new ArrayList<>(); try { analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); speller = new SpellChecker(ramdir, strdist); StringBuilder names = new StringBuilder(); for (String s : possibles.values()) { names.append(s).append("\n"); } PlainTextDictionary ptd = new PlainTextDictionary(new StringReader(names.toString())); speller.indexDictionary(ptd, config, true); List<IRI> needles = typeToURILkp.get(uri); for (IRI needle : needles) { String needlelabel = labels.get(needle); try { String[] suggestions = speller.suggestSimilar(needlelabel, 20, minDistance); for (String s : suggestions) { // found a match, so figure out what we actually matched float distance = strdist.getDistance(needlelabel, s); for (IRI match : revpos.get(s)) { hits.add(needle, new Hit(match, s, uriToTypeLkp.get(match), distance)); } } } catch (Exception e) { // our fallback resolution always works; it's just a ton slower errors.add(needle); } } } catch (Exception e) { log.error(e, e); } finally { for (Closeable c : new Closeable[] { analyzer, ramdir, speller }) { if (null != c) { try { c.close(); } catch (Exception e) { log.warn(e, e); } } } } if (!errors.isEmpty()) { fallbackResolve(errors, possibles, hits, strdist, minDistance); } return hits; }
From source file:cz.muni.fi.webmias.suggest.MathNamesSuggester.java
License:Apache License
public MathNamesSuggester() { try {/*from w ww . ja v a 2s . c o m*/ suggester = new AnalyzingSuggester(new StandardAnalyzer(Version.LUCENE_4_10_2)); suggester.build(new PlainTextDictionary(getMathNamesReader())); } catch (IOException ex) { Logger.getLogger(MathNamesSuggester.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:engine.easy.search.EasySearchEngine.java
License:Apache License
private SpellChecker getSpecSpellChecker() { SpellChecker spellchecker = null;/*from w w w . j a v a2s . com*/ try { File dir = new File(AppConstants.DICTIONARY_INDEX_PATH); Directory directory = FSDirectory.open(dir); spellchecker = new SpellChecker(new RAMDirectory()); spellchecker.indexDictionary(new PlainTextDictionary(new File(AppConstants.DICTIONARY_PATH))); } catch (Exception e) { System.out.println("Exception: getSpecSpellChecker" + e.toString()); } return spellchecker; }
From source file:engine.easy.util.SuggestionSpellService.java
License:Apache License
public static void main(String[] args) throws Exception { File dir = new File(AppConstants.DICTIONARY_INDEX_PATH); Directory directory = FSDirectory.open(dir); SpellChecker spellChecker = new SpellChecker(new RAMDirectory()); spellChecker.indexDictionary(new PlainTextDictionary(new File(AppConstants.DICTIONARY_PATH))); String wordForSuggestions = "hee"; int suggestionsNumber = 3; String[] suggestions = spellChecker.suggestSimilar(wordForSuggestions, suggestionsNumber); if (suggestions != null && suggestions.length > 0) { for (String word : suggestions) { System.out.println("Did you mean:" + word); }/*from ww w . j av a 2 s . c om*/ } else { System.out.println("No suggestions found for word:" + wordForSuggestions); } }
From source file:fastcampus.lucene.example.search.SpellCheckerExample.java
License:Apache License
public static void main(String[] args) throws Exception { Directory directory = FSDirectory.open(Paths.get("./index/spell/")); SpellChecker spellChecker = new SpellChecker(directory); //Analyzer analyzer = new StandardAnalyzer(); // ? Analyzer analyzer = new Analyzer() { @Override//from w ww.j av a 2 s. co m protected TokenStreamComponents createComponents(String s) { Reader reader = new StringReader(s); Tokenizer tokenizer = new StandardTokenizer(); tokenizer.setReader(reader); String name = "nfc_cf"; Normalizer2 normalizer = Normalizer2.getInstance(null, name, Normalizer2.Mode.DECOMPOSE); TokenFilter filter = new ICUNormalizer2Filter(tokenizer, normalizer); return new TokenStreamComponents(tokenizer, filter); } }; IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); //?? Writer? ? ? Path path = Paths.get("./data/spell/dic.txt"); spellChecker.setSpellIndex(directory); spellChecker.clearIndex(); spellChecker.indexDictionary(new PlainTextDictionary(path), indexWriterConfig, true); String wordForSuggestions = "?"; //spellChecker.setStringDistance(new LevensteinDistance()); //#Levenstein spellChecker.setStringDistance(new JaroWinklerDistance()); //Jaro-Winkler int suggestionsNumber = 1; String[] suggestions = spellChecker.suggestSimilar(wordForSuggestions, suggestionsNumber); if (suggestions != null && suggestions.length > 0) { for (String word : suggestions) { System.out.println("Did you mean:" + word); } } else { System.out.println("No suggestions found for word:" + wordForSuggestions); } }
From source file:org.apache.solr.spelling.FileBasedSpellChecker.java
License:Apache License
private void loadExternalFileDictionary(SolrCore core, SolrIndexSearcher searcher) { try {//from ww w . j ava 2s. co m IndexSchema schema = null == searcher ? core.getLatestSchema() : searcher.getSchema(); // Get the field's analyzer if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) { FieldType fieldType = schema.getFieldTypes().get(fieldTypeName); // Do index-time analysis using the given fieldType's analyzer RAMDirectory ramDir = new RAMDirectory(); LogMergePolicy mp = new LogByteSizeMergePolicy(); mp.setMergeFactor(300); IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, fieldType.getAnalyzer()) .setMaxBufferedDocs(150).setMergePolicy(mp) .setOpenMode(IndexWriterConfig.OpenMode.CREATE) // TODO: if we enable this, codec gets angry since field won't exist in the schema // .setCodec(core.getCodec()) ); List<String> lines = core.getResourceLoader().getLines(sourceLocation, characterEncoding); for (String s : lines) { Document d = new Document(); d.add(new TextField(WORD_FIELD_NAME, s, Field.Store.NO)); writer.addDocument(d); } writer.forceMerge(1); writer.close(); dictionary = new HighFrequencyDictionary(DirectoryReader.open(ramDir), WORD_FIELD_NAME, 0.0f); } else { // check if character encoding is defined if (characterEncoding == null) { dictionary = new PlainTextDictionary(core.getResourceLoader().openResource(sourceLocation)); } else { dictionary = new PlainTextDictionary(new InputStreamReader( core.getResourceLoader().openResource(sourceLocation), characterEncoding)); } } } catch (IOException e) { log.error("Unable to load spellings", e); } }
From source file:org.tinymce.spellchecker.LuceneSpellCheckerServlet.java
License:Open Source License
private MemoryAwareSpellChecker reindexSpellchecker(String lang) throws SpellCheckException { MemoryAwareSpellChecker checker;/*from w ww .jav a 2s.c o m*/ List<File> dictionariesFiles = getDictionaryFiles(lang); try { checker = new MemoryAwareSpellChecker(getSpellCheckerDirectory(lang)); checker.clearIndex(); } catch (IOException e) { throw new SpellCheckException("Failed to create index", e); } for (File dictionariesFile : dictionariesFiles) { try { checker.indexDictionary(new PlainTextDictionary(dictionariesFile)); } catch (IOException e) { logger.log(Level.SEVERE, "Failed to index dictionary " + dictionariesFile.getAbsolutePath(), e); throw new SpellCheckException("Failed to index dictionary " + dictionariesFile.getAbsolutePath(), e); } } return checker; }