Example usage for org.apache.lucene.search.spell LuceneDictionary LuceneDictionary

List of usage examples for org.apache.lucene.search.spell LuceneDictionary LuceneDictionary

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell LuceneDictionary LuceneDictionary.

Prototype

public LuceneDictionary(IndexReader reader, String field) 

Source Link

Document

Creates a new Dictionary, pulling source terms from the specified field in the provided reader

Usage

From source file:aos.lucene.tools.CreateSpellCheckerIndex.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        LOGGER.info("Usage: java lia.tools.SpellCheckerTest SpellCheckerIndexDir IndexDir IndexField");
        System.exit(1);/*from w w  w .  ja v  a2s.  co m*/
    }

    String spellCheckDir = args[0];
    String indexDir = args[1];
    String indexField = args[2];

    LOGGER.info("Now build SpellChecker index...");
    Directory dir = FSDirectory.open(new File(spellCheckDir));
    SpellChecker spell = new SpellChecker(dir); //#A
    long startTime = System.currentTimeMillis();

    Directory dir2 = FSDirectory.open(new File(indexDir));
    IndexReader r = DirectoryReader.open(dir2); //#B
    try {
        spell.indexDictionary(new LuceneDictionary(r, indexField)); //#C
    } finally {
        r.close();
    }
    dir.close();
    dir2.close();
    long endTime = System.currentTimeMillis();
    LOGGER.info("  took " + (endTime - startTime) + " milliseconds");
}

From source file:com.appeligo.lucene.DidYouMeanIndexer.java

License:Apache License

public static void createSpellIndex(String field, Directory originalIndexDirectory,
        Directory spellIndexDirectory) throws IOException {

    IndexReader indexReader = null;/* ww  w  . j  a  v  a  2s.  c o  m*/
    try {
        indexReader = IndexReader.open(originalIndexDirectory);
        Dictionary dictionary = new LuceneDictionary(indexReader, field);
        SpellChecker spellChecker = new SpellChecker(spellIndexDirectory);
        spellChecker.indexDictionary(dictionary);
        if (log.isDebugEnabled()) {
            spellChecker = new SpellChecker(spellIndexDirectory); // need to re-open to see it work
            log.debug("Does 'next' exist in the dictionary? " + spellChecker.exist("next"));
            StringBuilder sb = new StringBuilder();
            for (String s : spellChecker.suggestSimilar("noxt", 5, indexReader, "compositeField", true)) {
                sb.append(s + ", ");
            }
            log.debug("Best suggestions for 'noxt': " + sb);
        }
    } finally {
        if (indexReader != null) {
            indexReader.close();
        }
    }
}

From source file:com.ikon.module.db.stuff.IndexHelper.java

License:Open Source License

protected void buildSpellCheckerIndex(SearchFactory searchFactory) {
    IndexReader reader = null;/*from  w ww  .  j  a v  a 2  s.c  om*/
    Directory dir = null;
    long _entr = System.currentTimeMillis();
    File spellCheckIndexDir = new File("lucene_index/spellcheck");
    log.info("Building SpellChecker index in {0}", spellCheckIndexDir.getAbsolutePath());
    ReaderProvider readerProvider = searchFactory.getReaderProvider();

    try {
        reader = readerProvider.openReader(searchFactory.getDirectoryProviders(NodeDocumentVersion.class)[0]);
        dir = FSDirectory.open(spellCheckIndexDir);
        SpellChecker spell = new SpellChecker(dir);
        spell.clearIndex();
        spell.indexDictionary(new LuceneDictionary(reader, NodeDocument.TEXT_FIELD));
        spell.close();
        dir.close();
        dir = null;
        long _exit = System.currentTimeMillis();
        log.info("Took {1} (ms) to build SpellChecker index in {0}", spellCheckIndexDir.getAbsolutePath(),
                String.valueOf((_exit - _entr)));
    } catch (Exception exc) {
        log.error("Failed to build spell checker index!", exc);
    } finally {
        if (dir != null) {
            try {
                dir.close();
            } catch (Exception zzz) {
            }
        }
        if (reader != null) {
            readerProvider.closeReader(reader);
        }
    }
}

From source file:com.jaeksoft.searchlib.index.ReaderLocal.java

License:Open Source License

public LuceneDictionary getLuceneDirectionary(String fieldName) {
    return new LuceneDictionary(indexReader, fieldName);
}

From source file:com.leavesfly.lia.tool.CreateSpellCheckerIndex.java

License:Apache License

public static void main(String[] args) throws IOException {

    if (args.length != 3) {
        System.out.println("Usage: java lia.tools.SpellCheckerTest SpellCheckerIndexDir IndexDir IndexField");
        System.exit(1);//ww w  .j a  v  a  2s. co  m
    }

    String spellCheckDir = args[0];
    String indexDir = args[1];
    String indexField = args[2];

    System.out.println("Now build SpellChecker index...");
    Directory dir = FSDirectory.open(new File(spellCheckDir));
    SpellChecker spell = new SpellChecker(dir); //#A
    long startTime = System.currentTimeMillis();

    Directory dir2 = FSDirectory.open(new File(indexDir));
    IndexReader r = IndexReader.open(dir2); //#B
    try {
        spell.indexDictionary(new LuceneDictionary(r, indexField)); //#C
    } finally {
        r.close();
    }
    dir.close();
    dir2.close();
    long endTime = System.currentTimeMillis();
    System.out.println("  took " + (endTime - startTime) + " milliseconds");
}

From source file:edu.sdsc.scigraph.vocabulary.VocabularyNeo4jImpl.java

License:Apache License

@Inject
public VocabularyNeo4jImpl(GraphDatabaseService graph,
        @Nullable @IndicatesNeo4jGraphLocation String neo4jLocation, CurieUtil curieUtil,
        NodeTransformer transformer) throws IOException {
    this.graph = graph;
    this.curieUtil = curieUtil;
    this.transformer = transformer;
    if (null != neo4jLocation) {
        Directory indexDirectory = FSDirectory
                .open(new File(new File(neo4jLocation), "index/lucene/node/node_auto_index"));
        Directory spellDirectory = FSDirectory
                .open(new File(new File(neo4jLocation), "index/lucene/spellchecker"));
        spellChecker = new SpellChecker(spellDirectory);
        try (IndexReader reader = IndexReader.open(indexDirectory)) {
            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, new KeywordAnalyzer());
            spellChecker.indexDictionary(
                    new LuceneDictionary(reader, NodeProperties.LABEL + LuceneUtils.EXACT_SUFFIX), config,
                    true);/*from w  w  w . j a  v  a 2  s. c  om*/
        }
    } else {
        spellChecker = null;
    }
}

From source file:es.pode.indexador.negocio.servicios.indexado.SrvIndexadorServiceImpl.java

License:Open Source License

/**
 * Generacin del repositorio que nos muestra las palabras sugeridas
 * @param directorioIndiceSimple Objeto directorio con informacin del directorio del repositorio de ndices
 * @param directorioIndiceSpell Objeto directorio con la informacin del directorio del repositorio de las palabras sugeridas
 * @throws IOException/*from  w w  w  . ja v a2 s .  c o  m*/
 * @throws Exception
 */
private synchronized void spellCheckerAdd(Directory directorioIndiceSimple, Directory directorioIndiceSpell)
        throws IOException, Exception {

    if (logger.isDebugEnabled())
        logger.debug("Comprobamos el directorio del spellchecker = " + directorioIndiceSpell + " y el normal = "
                + directorioIndiceSimple);
    if (IndexReader.indexExists(directorioIndiceSimple)) {
        if (logger.isDebugEnabled())
            logger.debug("El ndiceSimple " + directorioIndiceSimple + "existe y lo abrimos para leer.");
        IndexReader indexReader = IndexReader.open(directorioIndiceSimple);
        String field = props.getProperty("campo_titulo");
        if (logger.isDebugEnabled())
            logger.debug("Creamos un diccionario para el campo = " + field);
        Dictionary dictionary = new LuceneDictionary(indexReader, field);
        if (logger.isDebugEnabled())
            logger.debug("Creamos el spellchecher[" + directorioIndiceSpell + "]");
        SpellChecker spellChecker = new SpellChecker(directorioIndiceSpell);
        if (logger.isDebugEnabled())
            logger.debug("Indexamos el diccionario de [" + directorioIndiceSimple + "] en el spell ["
                    + directorioIndiceSpell + "]");
        spellChecker.indexDictionary(dictionary);
        field = props.getProperty("campo_descripcion");
        if (logger.isDebugEnabled())
            logger.debug("Creamos un diccionario para el campo = " + field);
        dictionary = new LuceneDictionary(indexReader, field);
        spellChecker.indexDictionary(dictionary);
        indexReader.close();
        directorioIndiceSpell.close();
    } else {
        logger.error("No existe el indice en el directorio[" + directorioIndiceSimple + "]");
        throw new Exception("No existe el ndice en el directorio = " + directorioIndiceSimple);
    }
}

From source file:fr.mael.microrss.dao.impl.GenericDaoImpl.java

License:Open Source License

/**
 * @see fr.mael.jmusic.dao.GenericDao#buildSpellIndex()
 *//*from  w  ww .  j  a  v  a 2 s.  c om*/
@Override
public void buildSpellIndex() throws IOException {
    FullTextSession searchSession = Search.getFullTextSession(sessionFactory.getCurrentSession());
    SearchFactory searchFactory = searchSession.getSearchFactory();
    IndexReader reader = searchFactory.getIndexReaderAccessor().open(getPersistentClass());
    try {
        FSDirectory spellDir = FSDirectory
                .open(new File(configuration.getIndexDir() + "/spell_" + getPersistentClass().getName()));
        SpellChecker spellChecker = new SpellChecker(spellDir);
        Dictionary dictionary = new LuceneDictionary(reader, "name");
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,
                searchFactory.getAnalyzer(getPersistentClass()));
        spellChecker.indexDictionary(dictionary, config, true);
    } catch (Exception e) {
        log.error("Error building spell index", e);
    } finally {
        searchFactory.getIndexReaderAccessor().close(reader);
    }

}

From source file:lucee.runtime.search.lucene2.LuceneSearchCollection.java

License:Open Source License

private void indexSpellCheck(String id) throws SearchException {
    if (!spellcheck)
        return;/* w  w w.j a va2 s .  com*/

    IndexReader reader = null;
    FSDirectory spellDir = null;

    Resource dir = _createSpellDirectory(id);
    try {
        File spellFile = FileWrapper.toFile(dir);
        spellDir = FSDirectory.getDirectory(spellFile);
        reader = _getReader(id, false);
        Dictionary dictionary = new LuceneDictionary(reader, "contents");

        SpellChecker spellChecker = new SpellChecker(spellDir);
        spellChecker.indexDictionary(dictionary);

    } catch (IOException ioe) {
        throw new SearchException(ioe);
    } finally {
        flushEL(reader);
        closeEL(reader);
    }
}

From source file:org.ala.lucene.Autocompleter.java

License:Open Source License

@SuppressWarnings("unchecked")
public void reIndex(Directory sourceDirectory, String fieldToAutocomplete, boolean createNewIndex)
        throws CorruptIndexException, IOException {
    // build a dictionary (from the spell package)
    IndexReader sourceReader = IndexReader.open(sourceDirectory);

    LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete);

    // code from//from  w ww .j a  v a2 s  . c  o m
    // org.apache.lucene.search.spell.SpellChecker.indexDictionary(
    // Dictionary)
    IndexWriter.unlock(autoCompleteDirectory);

    // use a custom analyzer so we can do EdgeNGramFiltering
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(SolrUtils.BIE_LUCENE_VERSION, new Analyzer() {
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            final StandardTokenizer src = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader);
            TokenStream result = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader);
            result = new StandardFilter(SolrUtils.BIE_LUCENE_VERSION, result);
            result = new LowerCaseFilter(SolrUtils.BIE_LUCENE_VERSION, result);
            result = new StopFilter(SolrUtils.BIE_LUCENE_VERSION, result,
                    new CharArraySet(SolrUtils.BIE_LUCENE_VERSION,
                            new HashSet<String>(Arrays.asList(ENGLISH_STOP_WORDS)), true));
            result = new EdgeNGramTokenFilter(result, Side.FRONT, 1, 20);
            return new TokenStreamComponents(src, result) {
                @Override
                protected void setReader(final Reader reader) throws IOException {
                    super.setReader(reader);
                }

            };
        }
        //            public TokenStream tokenStream(String fieldName, Reader reader) {
        //            TokenStream result = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader);
        //            
        //            result = new StandardFilter(SolrUtils.BIE_LUCENE_VERSION, result);
        //            result = new LowerCaseFilter(SolrUtils.BIE_LUCENE_VERSION, result);
        //            //result = new ISOLatin1AccentFilter(result);
        //            result = new StopFilter(SolrUtils.BIE_LUCENE_VERSION, result, new HashSet<String>(Arrays.asList(ENGLISH_STOP_WORDS)));
        //            result = new EdgeNGramTokenFilter(result, Side.FRONT,1, 20);
        //            
        //            return result;
        //          }
    });
    if (createNewIndex) {
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    indexWriterConfig.setMaxBufferedDocs(150);
    IndexWriter writer = new IndexWriter(autoCompleteDirectory, indexWriterConfig);
    //        writer.setMergeFactor(300);

    // go through every word, storing the original word (incl. n-grams)
    // and the number of times it occurs
    Map<String, Integer> wordsMap = new HashMap<String, Integer>();

    Iterator<String> iter = (Iterator<String>) dict.getWordsIterator();
    while (iter.hasNext()) {
        String word = iter.next();

        int len = word.length();
        if (len < 3) {
            continue; // too short we bail but "too long" is fine...
        }

        if (wordsMap.containsKey(word)) {
            throw new IllegalStateException("This should never happen in Lucene 2.3.2");
            // wordsMap.put(word, wordsMap.get(word) + 1);
        } else {
            // use the number of documents this word appears in
            wordsMap.put(word, sourceReader.docFreq(new Term(fieldToAutocomplete, word)));
        }
    }

    for (String word : wordsMap.keySet()) {
        // ok index the word
        Document doc = new Document();
        doc.add(new Field(SOURCE_WORD_FIELD, word, Field.Store.YES, Field.Index.NOT_ANALYZED)); // orig term
        doc.add(new Field(GRAMMED_WORDS_FIELD, word, Field.Store.YES, Field.Index.ANALYZED)); // grammed
        doc.add(new Field(COUNT_FIELD, Integer.toString(wordsMap.get(word)), Field.Store.NO,
                Field.Index.NOT_ANALYZED)); // count

        writer.addDocument(doc);
    }

    sourceReader.close();

    // close writer
    writer.forceMerge(1);
    writer.close();

    // re-open our reader
    reOpenReader();
}