Example usage for org.apache.lucene.search.spell LuceneDictionary LuceneDictionary

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell LuceneDictionary LuceneDictionary.

Prototype

public LuceneDictionary(IndexReader reader, String field)

Source Link

Document

Creates a new Dictionary, pulling source terms from the specified field in the provided reader

Usage

From source file:aos.lucene.tools.CreateSpellCheckerIndex.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        LOGGER.info("Usage: java lia.tools.SpellCheckerTest SpellCheckerIndexDir IndexDir IndexField");
        System.exit(1);/*from w w  w .  ja v  a2s.  co m*/
    }

    String spellCheckDir = args[0];
    String indexDir = args[1];
    String indexField = args[2];

    LOGGER.info("Now build SpellChecker index...");
    Directory dir = FSDirectory.open(new File(spellCheckDir));
    SpellChecker spell = new SpellChecker(dir); //#A
    long startTime = System.currentTimeMillis();

    Directory dir2 = FSDirectory.open(new File(indexDir));
    IndexReader r = DirectoryReader.open(dir2); //#B
    try {
        spell.indexDictionary(new LuceneDictionary(r, indexField)); //#C
    } finally {
        r.close();
    }
    dir.close();
    dir2.close();
    long endTime = System.currentTimeMillis();
    LOGGER.info("  took " + (endTime - startTime) + " milliseconds");
}

From source file:com.appeligo.lucene.DidYouMeanIndexer.java

License:Apache License

public static void createSpellIndex(String field, Directory originalIndexDirectory,
        Directory spellIndexDirectory) throws IOException {

    IndexReader indexReader = null;/* ww  w  . j  a  v  a  2s.  c o  m*/
    try {
        indexReader = IndexReader.open(originalIndexDirectory);
        Dictionary dictionary = new LuceneDictionary(indexReader, field);
        SpellChecker spellChecker = new SpellChecker(spellIndexDirectory);
        spellChecker.indexDictionary(dictionary);
        if (log.isDebugEnabled()) {
            spellChecker = new SpellChecker(spellIndexDirectory); // need to re-open to see it work
            log.debug("Does 'next' exist in the dictionary? " + spellChecker.exist("next"));
            StringBuilder sb = new StringBuilder();
            for (String s : spellChecker.suggestSimilar("noxt", 5, indexReader, "compositeField", true)) {
                sb.append(s + ", ");
            }
            log.debug("Best suggestions for 'noxt': " + sb);
        }
    } finally {
        if (indexReader != null) {
            indexReader.close();
        }
    }
}

From source file:com.ikon.module.db.stuff.IndexHelper.java

License:Open Source License

protected void buildSpellCheckerIndex(SearchFactory searchFactory) {
    IndexReader reader = null;/*from  w ww  .  j  a v  a 2  s.c  om*/
    Directory dir = null;
    long _entr = System.currentTimeMillis();
    File spellCheckIndexDir = new File("lucene_index/spellcheck");
    log.info("Building SpellChecker index in {0}", spellCheckIndexDir.getAbsolutePath());
    ReaderProvider readerProvider = searchFactory.getReaderProvider();

    try {
        reader = readerProvider.openReader(searchFactory.getDirectoryProviders(NodeDocumentVersion.class)[0]);
        dir = FSDirectory.open(spellCheckIndexDir);
        SpellChecker spell = new SpellChecker(dir);
        spell.clearIndex();
        spell.indexDictionary(new LuceneDictionary(reader, NodeDocument.TEXT_FIELD));
        spell.close();
        dir.close();
        dir = null;
        long _exit = System.currentTimeMillis();
        log.info("Took {1} (ms) to build SpellChecker index in {0}", spellCheckIndexDir.getAbsolutePath(),
                String.valueOf((_exit - _entr)));
    } catch (Exception exc) {
        log.error("Failed to build spell checker index!", exc);
    } finally {
        if (dir != null) {
            try {
                dir.close();
            } catch (Exception zzz) {
            }
        }
        if (reader != null) {
            readerProvider.closeReader(reader);
        }
    }
}

From source file:com.jaeksoft.searchlib.index.ReaderLocal.java

License:Open Source License

public LuceneDictionary getLuceneDirectionary(String fieldName) {
    return new LuceneDictionary(indexReader, fieldName);
}

From source file:com.leavesfly.lia.tool.CreateSpellCheckerIndex.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        System.out.println("Usage: java lia.tools.SpellCheckerTest SpellCheckerIndexDir IndexDir IndexField");
        System.exit(1);//ww w  .j a  v  a  2s. co  m
    }

    String spellCheckDir = args[0];
    String indexDir = args[1];
    String indexField = args[2];

    System.out.println("Now build SpellChecker index...");
    Directory dir = FSDirectory.open(new File(spellCheckDir));
    SpellChecker spell = new SpellChecker(dir); //#A
    long startTime = System.currentTimeMillis();

    Directory dir2 = FSDirectory.open(new File(indexDir));
    IndexReader r = IndexReader.open(dir2); //#B
    try {
        spell.indexDictionary(new LuceneDictionary(r, indexField)); //#C
    } finally {
        r.close();
    }
    dir.close();
    dir2.close();
    long endTime = System.currentTimeMillis();
    System.out.println("  took " + (endTime - startTime) + " milliseconds");
}

From source file:edu.sdsc.scigraph.vocabulary.VocabularyNeo4jImpl.java

License:Apache License

@Inject
public VocabularyNeo4jImpl(GraphDatabaseService graph,
        @Nullable @IndicatesNeo4jGraphLocation String neo4jLocation, CurieUtil curieUtil,
        NodeTransformer transformer) throws IOException {
    this.graph = graph;
    this.curieUtil = curieUtil;
    this.transformer = transformer;
    if (null != neo4jLocation) {
        Directory indexDirectory = FSDirectory
                .open(new File(new File(neo4jLocation), "index/lucene/node/node_auto_index"));
        Directory spellDirectory = FSDirectory
                .open(new File(new File(neo4jLocation), "index/lucene/spellchecker"));
        spellChecker = new SpellChecker(spellDirectory);
        try (IndexReader reader = IndexReader.open(indexDirectory)) {
            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, new KeywordAnalyzer());
            spellChecker.indexDictionary(
                    new LuceneDictionary(reader, NodeProperties.LABEL + LuceneUtils.EXACT_SUFFIX), config,
                    true);/*from w  w  w . j a  v  a 2  s. c  om*/
        }
    } else {
        spellChecker = null;
    }
}

From source file:es.pode.indexador.negocio.servicios.indexado.SrvIndexadorServiceImpl.java

License:Open Source License

/**
 * Generacin del repositorio que nos muestra las palabras sugeridas
 * @param directorioIndiceSimple Objeto directorio con informacin del directorio del repositorio de ndices
 * @param directorioIndiceSpell Objeto directorio con la informacin del directorio del repositorio de las palabras sugeridas
 * @throws IOException/*from  w w  w  . ja v a2 s .  c o  m*/
 * @throws Exception
 */
private synchronized void spellCheckerAdd(Directory directorioIndiceSimple, Directory directorioIndiceSpell)
        throws IOException, Exception {

    if (logger.isDebugEnabled())
        logger.debug("Comprobamos el directorio del spellchecker = " + directorioIndiceSpell + " y el normal = "
                + directorioIndiceSimple);
    if (IndexReader.indexExists(directorioIndiceSimple)) {
        if (logger.isDebugEnabled())
            logger.debug("El ndiceSimple " + directorioIndiceSimple + "existe y lo abrimos para leer.");
        IndexReader indexReader = IndexReader.open(directorioIndiceSimple);
        String field = props.getProperty("campo_titulo");
        if (logger.isDebugEnabled())
            logger.debug("Creamos un diccionario para el campo = " + field);
        Dictionary dictionary = new LuceneDictionary(indexReader, field);
        if (logger.isDebugEnabled())
            logger.debug("Creamos el spellchecher[" + directorioIndiceSpell + "]");
        SpellChecker spellChecker = new SpellChecker(directorioIndiceSpell);
        if (logger.isDebugEnabled())
            logger.debug("Indexamos el diccionario de [" + directorioIndiceSimple + "] en el spell ["
                    + directorioIndiceSpell + "]");
        spellChecker.indexDictionary(dictionary);
        field = props.getProperty("campo_descripcion");
        if (logger.isDebugEnabled())
            logger.debug("Creamos un diccionario para el campo = " + field);
        dictionary = new LuceneDictionary(indexReader, field);
        spellChecker.indexDictionary(dictionary);
        indexReader.close();
        directorioIndiceSpell.close();
    } else {
        logger.error("No existe el indice en el directorio[" + directorioIndiceSimple + "]");
        throw new Exception("No existe el ndice en el directorio = " + directorioIndiceSimple);
    }
}

From source file:fr.mael.microrss.dao.impl.GenericDaoImpl.java

License:Open Source License

/**
 * @see fr.mael.jmusic.dao.GenericDao#buildSpellIndex()
 *//*from  w  ww .  j  a  v  a 2 s.  c om*/
@Override
public void buildSpellIndex() throws IOException {
    FullTextSession searchSession = Search.getFullTextSession(sessionFactory.getCurrentSession());
    SearchFactory searchFactory = searchSession.getSearchFactory();
    IndexReader reader = searchFactory.getIndexReaderAccessor().open(getPersistentClass());
    try {
        FSDirectory spellDir = FSDirectory
                .open(new File(configuration.getIndexDir() + "/spell_" + getPersistentClass().getName()));
        SpellChecker spellChecker = new SpellChecker(spellDir);
        Dictionary dictionary = new LuceneDictionary(reader, "name");
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,
                searchFactory.getAnalyzer(getPersistentClass()));
        spellChecker.indexDictionary(dictionary, config, true);
    } catch (Exception e) {
        log.error("Error building spell index", e);
    } finally {
        searchFactory.getIndexReaderAccessor().close(reader);
    }

}

From source file:lucee.runtime.search.lucene2.LuceneSearchCollection.java

License:Open Source License

private void indexSpellCheck(String id) throws SearchException {
    if (!spellcheck)
        return;/* w  w w.j a va2 s .  com*/

    IndexReader reader = null;
    FSDirectory spellDir = null;

    Resource dir = _createSpellDirectory(id);
    try {
        File spellFile = FileWrapper.toFile(dir);
        spellDir = FSDirectory.getDirectory(spellFile);
        reader = _getReader(id, false);
        Dictionary dictionary = new LuceneDictionary(reader, "contents");

        SpellChecker spellChecker = new SpellChecker(spellDir);
        spellChecker.indexDictionary(dictionary);

    } catch (IOException ioe) {
        throw new SearchException(ioe);
    } finally {
        flushEL(reader);
        closeEL(reader);
    }
}

From source file:org.ala.lucene.Autocompleter.java

License:Open Source License

@SuppressWarnings("unchecked")
public void reIndex(Directory sourceDirectory, String fieldToAutocomplete, boolean createNewIndex)
        throws CorruptIndexException, IOException {
    // build a dictionary (from the spell package)
    IndexReader sourceReader = IndexReader.open(sourceDirectory);

    LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete);

    // code from//from  w ww .j a  v a2 s  . c  o m
    // org.apache.lucene.search.spell.SpellChecker.indexDictionary(
    // Dictionary)
    IndexWriter.unlock(autoCompleteDirectory);

    // use a custom analyzer so we can do EdgeNGramFiltering
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(SolrUtils.BIE_LUCENE_VERSION, new Analyzer() {
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            final StandardTokenizer src = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader);
            TokenStream result = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader);
            result = new StandardFilter(SolrUtils.BIE_LUCENE_VERSION, result);
            result = new LowerCaseFilter(SolrUtils.BIE_LUCENE_VERSION, result);
            result = new StopFilter(SolrUtils.BIE_LUCENE_VERSION, result,
                    new CharArraySet(SolrUtils.BIE_LUCENE_VERSION,
                            new HashSet<String>(Arrays.asList(ENGLISH_STOP_WORDS)), true));
            result = new EdgeNGramTokenFilter(result, Side.FRONT, 1, 20);
            return new TokenStreamComponents(src, result) {
                @Override
                protected void setReader(final Reader reader) throws IOException {
                    super.setReader(reader);
                }

            };
        }
        //            public TokenStream tokenStream(String fieldName, Reader reader) {
        //            TokenStream result = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader);
        //            
        //            result = new StandardFilter(SolrUtils.BIE_LUCENE_VERSION, result);
        //            result = new LowerCaseFilter(SolrUtils.BIE_LUCENE_VERSION, result);
        //            //result = new ISOLatin1AccentFilter(result);
        //            result = new StopFilter(SolrUtils.BIE_LUCENE_VERSION, result, new HashSet<String>(Arrays.asList(ENGLISH_STOP_WORDS)));
        //            result = new EdgeNGramTokenFilter(result, Side.FRONT,1, 20);
        //            
        //            return result;
        //          }
    });
    if (createNewIndex) {
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    indexWriterConfig.setMaxBufferedDocs(150);
    IndexWriter writer = new IndexWriter(autoCompleteDirectory, indexWriterConfig);
    //        writer.setMergeFactor(300);

    // go through every word, storing the original word (incl. n-grams)
    // and the number of times it occurs
    Map<String, Integer> wordsMap = new HashMap<String, Integer>();

    Iterator<String> iter = (Iterator<String>) dict.getWordsIterator();
    while (iter.hasNext()) {
        String word = iter.next();

        int len = word.length();
        if (len < 3) {
            continue; // too short we bail but "too long" is fine...
        }

        if (wordsMap.containsKey(word)) {
            throw new IllegalStateException("This should never happen in Lucene 2.3.2");
            // wordsMap.put(word, wordsMap.get(word) + 1);
        } else {
            // use the number of documents this word appears in
            wordsMap.put(word, sourceReader.docFreq(new Term(fieldToAutocomplete, word)));
        }
    }

    for (String word : wordsMap.keySet()) {
        // ok index the word
        Document doc = new Document();
        doc.add(new Field(SOURCE_WORD_FIELD, word, Field.Store.YES, Field.Index.NOT_ANALYZED)); // orig term
        doc.add(new Field(GRAMMED_WORDS_FIELD, word, Field.Store.YES, Field.Index.ANALYZED)); // grammed
        doc.add(new Field(COUNT_FIELD, Integer.toString(wordsMap.get(word)), Field.Store.NO,
                Field.Index.NOT_ANALYZED)); // count

        writer.addDocument(doc);
    }

    sourceReader.close();

    // close writer
    writer.forceMerge(1);
    writer.close();

    // re-open our reader
    reOpenReader();
}