List of usage examples for org.apache.lucene.search.spell LuceneDictionary LuceneDictionary
public LuceneDictionary(IndexReader reader, String field)
field
in the provided reader
From source file:aos.lucene.tools.CreateSpellCheckerIndex.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { LOGGER.info("Usage: java lia.tools.SpellCheckerTest SpellCheckerIndexDir IndexDir IndexField"); System.exit(1);/*from w w w . ja v a2s. co m*/ } String spellCheckDir = args[0]; String indexDir = args[1]; String indexField = args[2]; LOGGER.info("Now build SpellChecker index..."); Directory dir = FSDirectory.open(new File(spellCheckDir)); SpellChecker spell = new SpellChecker(dir); //#A long startTime = System.currentTimeMillis(); Directory dir2 = FSDirectory.open(new File(indexDir)); IndexReader r = DirectoryReader.open(dir2); //#B try { spell.indexDictionary(new LuceneDictionary(r, indexField)); //#C } finally { r.close(); } dir.close(); dir2.close(); long endTime = System.currentTimeMillis(); LOGGER.info(" took " + (endTime - startTime) + " milliseconds"); }
From source file:com.appeligo.lucene.DidYouMeanIndexer.java
License:Apache License
public static void createSpellIndex(String field, Directory originalIndexDirectory, Directory spellIndexDirectory) throws IOException { IndexReader indexReader = null;/* ww w . j a v a 2s. c o m*/ try { indexReader = IndexReader.open(originalIndexDirectory); Dictionary dictionary = new LuceneDictionary(indexReader, field); SpellChecker spellChecker = new SpellChecker(spellIndexDirectory); spellChecker.indexDictionary(dictionary); if (log.isDebugEnabled()) { spellChecker = new SpellChecker(spellIndexDirectory); // need to re-open to see it work log.debug("Does 'next' exist in the dictionary? " + spellChecker.exist("next")); StringBuilder sb = new StringBuilder(); for (String s : spellChecker.suggestSimilar("noxt", 5, indexReader, "compositeField", true)) { sb.append(s + ", "); } log.debug("Best suggestions for 'noxt': " + sb); } } finally { if (indexReader != null) { indexReader.close(); } } }
From source file:com.ikon.module.db.stuff.IndexHelper.java
License:Open Source License
protected void buildSpellCheckerIndex(SearchFactory searchFactory) { IndexReader reader = null;/*from w ww . j a v a 2 s.c om*/ Directory dir = null; long _entr = System.currentTimeMillis(); File spellCheckIndexDir = new File("lucene_index/spellcheck"); log.info("Building SpellChecker index in {0}", spellCheckIndexDir.getAbsolutePath()); ReaderProvider readerProvider = searchFactory.getReaderProvider(); try { reader = readerProvider.openReader(searchFactory.getDirectoryProviders(NodeDocumentVersion.class)[0]); dir = FSDirectory.open(spellCheckIndexDir); SpellChecker spell = new SpellChecker(dir); spell.clearIndex(); spell.indexDictionary(new LuceneDictionary(reader, NodeDocument.TEXT_FIELD)); spell.close(); dir.close(); dir = null; long _exit = System.currentTimeMillis(); log.info("Took {1} (ms) to build SpellChecker index in {0}", spellCheckIndexDir.getAbsolutePath(), String.valueOf((_exit - _entr))); } catch (Exception exc) { log.error("Failed to build spell checker index!", exc); } finally { if (dir != null) { try { dir.close(); } catch (Exception zzz) { } } if (reader != null) { readerProvider.closeReader(reader); } } }
From source file:com.jaeksoft.searchlib.index.ReaderLocal.java
License:Open Source License
public LuceneDictionary getLuceneDirectionary(String fieldName) { return new LuceneDictionary(indexReader, fieldName); }
From source file:com.leavesfly.lia.tool.CreateSpellCheckerIndex.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length != 3) { System.out.println("Usage: java lia.tools.SpellCheckerTest SpellCheckerIndexDir IndexDir IndexField"); System.exit(1);//ww w .j a v a 2s. co m } String spellCheckDir = args[0]; String indexDir = args[1]; String indexField = args[2]; System.out.println("Now build SpellChecker index..."); Directory dir = FSDirectory.open(new File(spellCheckDir)); SpellChecker spell = new SpellChecker(dir); //#A long startTime = System.currentTimeMillis(); Directory dir2 = FSDirectory.open(new File(indexDir)); IndexReader r = IndexReader.open(dir2); //#B try { spell.indexDictionary(new LuceneDictionary(r, indexField)); //#C } finally { r.close(); } dir.close(); dir2.close(); long endTime = System.currentTimeMillis(); System.out.println(" took " + (endTime - startTime) + " milliseconds"); }
From source file:edu.sdsc.scigraph.vocabulary.VocabularyNeo4jImpl.java
License:Apache License
@Inject public VocabularyNeo4jImpl(GraphDatabaseService graph, @Nullable @IndicatesNeo4jGraphLocation String neo4jLocation, CurieUtil curieUtil, NodeTransformer transformer) throws IOException { this.graph = graph; this.curieUtil = curieUtil; this.transformer = transformer; if (null != neo4jLocation) { Directory indexDirectory = FSDirectory .open(new File(new File(neo4jLocation), "index/lucene/node/node_auto_index")); Directory spellDirectory = FSDirectory .open(new File(new File(neo4jLocation), "index/lucene/spellchecker")); spellChecker = new SpellChecker(spellDirectory); try (IndexReader reader = IndexReader.open(indexDirectory)) { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, new KeywordAnalyzer()); spellChecker.indexDictionary( new LuceneDictionary(reader, NodeProperties.LABEL + LuceneUtils.EXACT_SUFFIX), config, true);/*from w w w . j a v a 2 s. c om*/ } } else { spellChecker = null; } }
From source file:es.pode.indexador.negocio.servicios.indexado.SrvIndexadorServiceImpl.java
License:Open Source License
/** * Generacin del repositorio que nos muestra las palabras sugeridas * @param directorioIndiceSimple Objeto directorio con informacin del directorio del repositorio de ndices * @param directorioIndiceSpell Objeto directorio con la informacin del directorio del repositorio de las palabras sugeridas * @throws IOException/*from w w w . ja v a2 s . c o m*/ * @throws Exception */ private synchronized void spellCheckerAdd(Directory directorioIndiceSimple, Directory directorioIndiceSpell) throws IOException, Exception { if (logger.isDebugEnabled()) logger.debug("Comprobamos el directorio del spellchecker = " + directorioIndiceSpell + " y el normal = " + directorioIndiceSimple); if (IndexReader.indexExists(directorioIndiceSimple)) { if (logger.isDebugEnabled()) logger.debug("El ndiceSimple " + directorioIndiceSimple + "existe y lo abrimos para leer."); IndexReader indexReader = IndexReader.open(directorioIndiceSimple); String field = props.getProperty("campo_titulo"); if (logger.isDebugEnabled()) logger.debug("Creamos un diccionario para el campo = " + field); Dictionary dictionary = new LuceneDictionary(indexReader, field); if (logger.isDebugEnabled()) logger.debug("Creamos el spellchecher[" + directorioIndiceSpell + "]"); SpellChecker spellChecker = new SpellChecker(directorioIndiceSpell); if (logger.isDebugEnabled()) logger.debug("Indexamos el diccionario de [" + directorioIndiceSimple + "] en el spell [" + directorioIndiceSpell + "]"); spellChecker.indexDictionary(dictionary); field = props.getProperty("campo_descripcion"); if (logger.isDebugEnabled()) logger.debug("Creamos un diccionario para el campo = " + field); dictionary = new LuceneDictionary(indexReader, field); spellChecker.indexDictionary(dictionary); indexReader.close(); directorioIndiceSpell.close(); } else { logger.error("No existe el indice en el directorio[" + directorioIndiceSimple + "]"); throw new Exception("No existe el ndice en el directorio = " + directorioIndiceSimple); } }
From source file:fr.mael.microrss.dao.impl.GenericDaoImpl.java
License:Open Source License
/** * @see fr.mael.jmusic.dao.GenericDao#buildSpellIndex() *//*from w ww . j a v a 2 s. c om*/ @Override public void buildSpellIndex() throws IOException { FullTextSession searchSession = Search.getFullTextSession(sessionFactory.getCurrentSession()); SearchFactory searchFactory = searchSession.getSearchFactory(); IndexReader reader = searchFactory.getIndexReaderAccessor().open(getPersistentClass()); try { FSDirectory spellDir = FSDirectory .open(new File(configuration.getIndexDir() + "/spell_" + getPersistentClass().getName())); SpellChecker spellChecker = new SpellChecker(spellDir); Dictionary dictionary = new LuceneDictionary(reader, "name"); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, searchFactory.getAnalyzer(getPersistentClass())); spellChecker.indexDictionary(dictionary, config, true); } catch (Exception e) { log.error("Error building spell index", e); } finally { searchFactory.getIndexReaderAccessor().close(reader); } }
From source file:lucee.runtime.search.lucene2.LuceneSearchCollection.java
License:Open Source License
private void indexSpellCheck(String id) throws SearchException { if (!spellcheck) return;/* w w w.j a va2 s . com*/ IndexReader reader = null; FSDirectory spellDir = null; Resource dir = _createSpellDirectory(id); try { File spellFile = FileWrapper.toFile(dir); spellDir = FSDirectory.getDirectory(spellFile); reader = _getReader(id, false); Dictionary dictionary = new LuceneDictionary(reader, "contents"); SpellChecker spellChecker = new SpellChecker(spellDir); spellChecker.indexDictionary(dictionary); } catch (IOException ioe) { throw new SearchException(ioe); } finally { flushEL(reader); closeEL(reader); } }
From source file:org.ala.lucene.Autocompleter.java
License:Open Source License
@SuppressWarnings("unchecked") public void reIndex(Directory sourceDirectory, String fieldToAutocomplete, boolean createNewIndex) throws CorruptIndexException, IOException { // build a dictionary (from the spell package) IndexReader sourceReader = IndexReader.open(sourceDirectory); LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete); // code from//from w ww .j a v a2 s . c o m // org.apache.lucene.search.spell.SpellChecker.indexDictionary( // Dictionary) IndexWriter.unlock(autoCompleteDirectory); // use a custom analyzer so we can do EdgeNGramFiltering IndexWriterConfig indexWriterConfig = new IndexWriterConfig(SolrUtils.BIE_LUCENE_VERSION, new Analyzer() { protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final StandardTokenizer src = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader); TokenStream result = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader); result = new StandardFilter(SolrUtils.BIE_LUCENE_VERSION, result); result = new LowerCaseFilter(SolrUtils.BIE_LUCENE_VERSION, result); result = new StopFilter(SolrUtils.BIE_LUCENE_VERSION, result, new CharArraySet(SolrUtils.BIE_LUCENE_VERSION, new HashSet<String>(Arrays.asList(ENGLISH_STOP_WORDS)), true)); result = new EdgeNGramTokenFilter(result, Side.FRONT, 1, 20); return new TokenStreamComponents(src, result) { @Override protected void setReader(final Reader reader) throws IOException { super.setReader(reader); } }; } // public TokenStream tokenStream(String fieldName, Reader reader) { // TokenStream result = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader); // // result = new StandardFilter(SolrUtils.BIE_LUCENE_VERSION, result); // result = new LowerCaseFilter(SolrUtils.BIE_LUCENE_VERSION, result); // //result = new ISOLatin1AccentFilter(result); // result = new StopFilter(SolrUtils.BIE_LUCENE_VERSION, result, new HashSet<String>(Arrays.asList(ENGLISH_STOP_WORDS))); // result = new EdgeNGramTokenFilter(result, Side.FRONT,1, 20); // // return result; // } }); if (createNewIndex) { indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } indexWriterConfig.setMaxBufferedDocs(150); IndexWriter writer = new IndexWriter(autoCompleteDirectory, indexWriterConfig); // writer.setMergeFactor(300); // go through every word, storing the original word (incl. n-grams) // and the number of times it occurs Map<String, Integer> wordsMap = new HashMap<String, Integer>(); Iterator<String> iter = (Iterator<String>) dict.getWordsIterator(); while (iter.hasNext()) { String word = iter.next(); int len = word.length(); if (len < 3) { continue; // too short we bail but "too long" is fine... } if (wordsMap.containsKey(word)) { throw new IllegalStateException("This should never happen in Lucene 2.3.2"); // wordsMap.put(word, wordsMap.get(word) + 1); } else { // use the number of documents this word appears in wordsMap.put(word, sourceReader.docFreq(new Term(fieldToAutocomplete, word))); } } for (String word : wordsMap.keySet()) { // ok index the word Document doc = new Document(); doc.add(new Field(SOURCE_WORD_FIELD, word, Field.Store.YES, Field.Index.NOT_ANALYZED)); // orig term doc.add(new Field(GRAMMED_WORDS_FIELD, word, Field.Store.YES, Field.Index.ANALYZED)); // grammed doc.add(new Field(COUNT_FIELD, Integer.toString(wordsMap.get(word)), Field.Store.NO, Field.Index.NOT_ANALYZED)); // count writer.addDocument(doc); } sourceReader.close(); // close writer writer.forceMerge(1); writer.close(); // re-open our reader reOpenReader(); }