Example usage for org.apache.lucene.search.spell SpellChecker SpellChecker

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell SpellChecker SpellChecker.

Prototype

public SpellChecker(Directory spellIndex) throws IOException

Source Link

Document

Use the given directory as a spell checker index with a LevenshteinDistance as the default StringDistance .

Usage

From source file:org.olat.search.service.spell.SearchSpellChecker.java

License:Apache License

/**
 * Creates a new spell-check index based on search-index
 *//*from  w  ww .  j a va2s . co m*/
public void createSpellIndex() {
    if (isSpellCheckEnabled) {
        IndexReader indexReader = null;
        try {
            log.info("Start generating Spell-Index...");
            long startSpellIndexTime = 0;
            if (log.isDebug()) {
                startSpellIndexTime = System.currentTimeMillis();
            }
            final Directory indexDir = FSDirectory.open(new File(indexPath));
            indexReader = IndexReader.open(indexDir);
            // 1. Create content spellIndex
            final File spellDictionaryFile = new File(spellDictionaryPath);
            final Directory contentSpellIndexDirectory = FSDirectory
                    .open(new File(spellDictionaryPath + CONTENT_PATH));// true
            final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
            final Dictionary contentDictionary = new LuceneDictionary(indexReader,
                    AbstractOlatDocument.CONTENT_FIELD_NAME);
            contentSpellChecker.indexDictionary(contentDictionary);
            // 2. Create title spellIndex
            final Directory titleSpellIndexDirectory = FSDirectory
                    .open(new File(spellDictionaryPath + TITLE_PATH));// true
            final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
            final Dictionary titleDictionary = new LuceneDictionary(indexReader,
                    AbstractOlatDocument.TITLE_FIELD_NAME);
            titleSpellChecker.indexDictionary(titleDictionary);
            // 3. Create description spellIndex
            final Directory descriptionSpellIndexDirectory = FSDirectory
                    .open(new File(spellDictionaryPath + DESCRIPTION_PATH));// true
            final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
            final Dictionary descriptionDictionary = new LuceneDictionary(indexReader,
                    AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            descriptionSpellChecker.indexDictionary(descriptionDictionary);
            // 4. Create author spellIndex
            final Directory authorSpellIndexDirectory = FSDirectory
                    .open(new File(spellDictionaryPath + AUTHOR_PATH));// true
            final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
            final Dictionary authorDictionary = new LuceneDictionary(indexReader,
                    AbstractOlatDocument.AUTHOR_FIELD_NAME);
            authorSpellChecker.indexDictionary(authorDictionary);

            // Merge all part spell indexes (content,title etc.) to one common spell index
            final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);// true
            final IndexWriter merger = new IndexWriter(spellIndexDirectory,
                    new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
            final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory,
                    descriptionSpellIndexDirectory, authorSpellIndexDirectory };
            merger.addIndexesNoOptimize(directories);
            merger.optimize();
            merger.close();
            spellChecker = new SpellChecker(spellIndexDirectory);
            spellChecker.setAccuracy(0.7f);
            if (log.isDebug()) {
                log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms");
            }
            log.info("New generated Spell-Index ready to use.");
        } catch (final IOException ioEx) {
            log.warn("Can not create SpellIndex", ioEx);
        } finally {
            if (indexReader != null) {
                try {
                    indexReader.close();
                } catch (final IOException e) {
                    log.warn("Can not close indexReader properly", e);
                }
            }
        }
    }
}

From source file:org.sakaiproject.search.journal.impl.JournaledFSIndexStorage.java

License:Educational Community License

private void createSpellIndex(IndexReader indexReader) {
    if (!serverConfigurationService.getBoolean("search.experimental.didyoumean", false)) {
        return;/*  ww  w.  j  ava  2s . c om*/
    }

    log.info("create Spell Index");

    Long start = System.currentTimeMillis();
    try {

        log.info("main index is in: " + journalSettings.getSearchIndexDirectory());
        log.info("local base is: " + journalSettings.getLocalIndexBase());
        spellIndexDirectory = new NIOFSDirectory(new File(journalSettings.getLocalIndexBase() + "/spellindex"));
        if (indexReader == null) {
            log.info("unable to get index reader aborting spellindex creation");
            return;
        }
        Dictionary dictionary = new LuceneDictionary(indexReader, SearchService.FIELD_CONTENTS);
        SpellChecker spellChecker = new SpellChecker(spellIndexDirectory);
        spellChecker.clearIndex();
        spellChecker.indexDictionary(dictionary);
        log.info("New Spell dictionary constructed in " + (System.currentTimeMillis() - start));
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();

    }

    log.info("All done in " + (System.currentTimeMillis() - start));

}

From source file:org.silverpeas.core.index.indexing.model.DidYouMeanIndexer.java

License:Open Source License

/**
 * Clears all the entries from given spelling index
 * @param pathSpellChecker The SpellChecker's path to clear. The path must be a directory path.
 * @return true whether the index have been cleared otherwise false.
 */// w w  w  . j a  v a 2  s.c om
public static boolean clearSpellIndex(String pathSpellChecker) {
    boolean isCleared = false;

    // create a file object with given path
    File file = new File(pathSpellChecker);
    if (file.exists()) {
        // create a spellChecker with the file object
        try (SpellChecker spell = new SpellChecker(FSDirectory.open(file.toPath()))) {
            // if index exists, clears his content
            spell.clearIndex();
            isCleared = true;
        } catch (IOException e) {
            SilverLogger.getLogger(DidYouMeanIndexer.class).error(e);
        }
    }
    return isCleared;
}

From source file:org.silverpeas.core.index.search.model.DidYouMeanSearcher.java

License:Open Source License

/**
 * @param queryDescription//  w  ww.  ja  v  a  2  s. c  o m
 * @return
 * @throws org.silverpeas.core.index.search.model.ParseException
 * @throws ParseException
 */
public String[] suggest(QueryDescription queryDescription)
        throws org.silverpeas.core.index.search.model.ParseException, IOException {
    spellCheckers.clear();

    String[] suggestions = null;
    // The variable field is only used to parse the query String and to obtain the words that will
    // be used for the search
    final String field = "content";
    if (StringUtil.isDefined(queryDescription.getQuery())) {

        // parses the query string to prepare the search
        Analyzer analyzer = indexManager.getAnalyzer(queryDescription.getRequestedLanguage());
        QueryParser queryParser = new QueryParser(field, analyzer);

        Query parsedQuery;
        try {
            parsedQuery = queryParser.parse(queryDescription.getQuery());
        } catch (org.apache.lucene.queryparser.classic.ParseException exception) {
            try {
                parsedQuery = queryParser.parse(QueryParser.escape(queryDescription.getQuery()));
            } catch (org.apache.lucene.queryparser.classic.ParseException pe) {
                throw new org.silverpeas.core.index.search.model.ParseException("DidYouMeanSearcher", pe);
            }
        }

        // splits the query to realize a separated search with each word
        this.query = parsedQuery.toString(field);
        StringTokenizer tokens = new StringTokenizer(query);

        // gets spelling index paths
        Set<String> spellIndexPaths = indexSearcher.getIndexPathSet(queryDescription.getWhereToSearch());

        try {
            while (tokens.hasMoreTokens()) {
                SpellChecker spellCheck = new SpellChecker(FSDirectory.open(uploadIndexDir.toPath()));
                spellCheckers.add(spellCheck);
                String token = tokens.nextToken().replaceAll("\"", "");
                for (String path : spellIndexPaths) {

                    // create a file object with given path
                    File file = new File(path + "Spell");

                    if (file.exists()) {

                        // create a spellChecker with the file object
                        FSDirectory directory = FSDirectory.open(file.toPath());
                        spellCheck.setSpellIndex(directory);

                        // if the word exist in the dictionary, we stop the current treatment and search the
                        // next word because the suggestSimilar method will return the same word than the given word
                        if (spellCheck.exist(token)) {
                            continue;
                        }
                        spellCheck.suggestSimilar(token, 1);

                    }
                }
            }
        } catch (IOException e) {
            SilverLogger.getLogger(this).error(e.getMessage(), e);
        }

        suggestions = buildFinalResult();

    }
    return suggestions;
}

From source file:org.silverpeas.search.indexEngine.model.DidYouMeanIndexer.java

License:Open Source License

/**
 * creates or updates a spelling index. The spelling index is created or updated from an existing
 * index. The spelling index is used to suggest words when an user executes a query that returns
 * unsatisfactory results. if a spelling index already exists, only the new words contained in the
 * index source will be added. otherwise a new index will be created
 * @param field name of the field of the index source that will be used to feed the spelling index
 * @param originalIndexDirectory represents the source index path
 * @param spellIndexDirectory represents the spelling index path
 *///from  ww w . jav a  2s .c o m
public static void createSpellIndex(String field, String originalIndexDirectory, String spellIndexDirectory) {
    // stop the process if method parameters is null or empty
    if (!StringUtil.isDefined(field) || !StringUtil.isDefined(originalIndexDirectory)
            || !StringUtil.isDefined(spellIndexDirectory)) {
        SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_INVALID_ARG");
        return;
    }
    // initializes local variable
    IndexReader indexReader = null;

    try {
        // create a file object with given path
        File file = new File(spellIndexDirectory);
        // open original index
        FSDirectory directory = FSDirectory.open(file);
        indexReader = IndexReader.open(FSDirectory.open(new File(originalIndexDirectory)));
        // create a Lucene dictionary with the original index
        Dictionary dictionary = new LuceneDictionary(indexReader, field);
        // index the dictionary into the spelling index
        SpellChecker spellChecker = new SpellChecker(directory);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36,
                new StandardAnalyzer(Version.LUCENE_36));
        spellChecker.indexDictionary(dictionary, config, true);
        spellChecker.close();
    } catch (CorruptIndexException e) {
        SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_INDEX_FAILED", e);
    } catch (IOException e) {
        SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_LOAD_IO_EXCEPTION", e);
    } finally {
        IOUtils.closeQuietly(indexReader);
    }

}

From source file:org.silverpeas.search.indexEngine.model.DidYouMeanIndexer.java

License:Open Source License

/**
 * Clears all the entries from given spelling index
 * @param pathSpellChecker The SpellChecker's path to clear. The path must be a directory path.
 * @return true whether the index have been cleared otherwise false.
 *//* w  w w . java2 s . c o  m*/
public static boolean clearSpellIndex(String pathSpellChecker) {
    boolean isCleared = false;
    try {
        // create a file object with given path
        File file = new File(pathSpellChecker);
        if (file != null && file.exists()) {
            // create a spellChecker with the file object
            SpellChecker spell = new SpellChecker(FSDirectory.open(file));
            // if index exists, clears his content
            if (spell != null) {
                spell.clearIndex();
                isCleared = true;
            }
        }
    } catch (IOException e) {
        SilverTrace.error("indexEngine", DidYouMeanIndexer.class.toString(), "root.EX_LOAD_IO_EXCEPTION", e);
    }
    return isCleared;
}

From source file:org.silverpeas.search.searchEngine.model.DidYouMeanSearcher.java

License:Open Source License

/**
 * @param queryDescription//from  w ww  . j av a  2s .  c o  m
 * @return
 * @throws org.silverpeas.search.searchEngine.model.ParseException
 * @throws ParseException
 */
public String[] suggest(QueryDescription queryDescription)
        throws org.silverpeas.search.searchEngine.model.ParseException, IOException {

    String[] suggestions = null;
    // The variable field is only used to parse the query String and to obtain the words that will
    // be used for the search
    final String field = "content";
    if (StringUtil.isDefined(queryDescription.getQuery())) {

        // parses the query string to prepare the search
        Analyzer analyzer = new IndexManager().getAnalyzer(queryDescription.getRequestedLanguage());
        QueryParser queryParser = new QueryParser(Version.LUCENE_36, field, analyzer);

        Query parsedQuery;
        try {
            parsedQuery = queryParser.parse(queryDescription.getQuery());
        } catch (ParseException exception) {
            throw new org.silverpeas.search.searchEngine.model.ParseException("DidYouMeanSearcher", exception);
        }

        // splits the query to realize a separated search with each word
        this.query = parsedQuery.toString(field);
        StringTokenizer tokens = new StringTokenizer(query);

        // gets spelling index paths
        WAIndexSearcher waIndexSearcher = new WAIndexSearcher();
        Set<String> spellIndexPaths = waIndexSearcher
                .getIndexPathSet(queryDescription.getSpaceComponentPairSet());

        try {
            while (tokens.hasMoreTokens()) {
                SpellChecker spellCheck = new SpellChecker(FSDirectory.open(uploadIndexDir));
                spellCheckers.add(spellCheck);
                String token = tokens.nextToken().replaceAll("\"", "");
                for (String path : spellIndexPaths) {

                    // create a file object with given path
                    File file = new File(path + "Spell");

                    if (file.exists()) {

                        // create a spellChecker with the file object
                        FSDirectory directory = FSDirectory.open(file);
                        spellCheck.setSpellIndex(directory);

                        // if the word exist in the dictionary, we stop the current treatment and search the
                        // next word because the suggestSimilar method will return the same word than the given word
                        if (spellCheck.exist(token)) {
                            continue;
                        }
                        spellCheck.suggestSimilar(token, 1);

                    }
                }
            }
        } catch (IOException e) {
            SilverTrace.error("searchEngine", DidYouMeanIndexer.class.toString(), "root.EX_LOAD_IO_EXCEPTION",
                    e);
        }

        suggestions = buildFinalResult();

    }
    return suggestions;
}

From source file:org.watermint.sourcecolon.org.opensolaris.opengrok.index.IndexDatabase.java

License:Open Source License

/**
 * Generate a spelling suggestion for the definitions stored in defs
 *//*from  w  w w .j ava 2s  .  co  m*/
public void createSpellingSuggestions() {
    IndexReader indexReader = null;
    SpellChecker checker = null;

    try {
        log.info("Generating spelling suggestion index ... ");
        indexReader = IndexReader.open(indexDirectory);
        checker = new SpellChecker(spellDirectory);
        //TODO below seems only to index "defs" , possible bug ?
        checker.indexDictionary(new LuceneDictionary(indexReader, "defs"),
                new IndexWriterConfig(Version.LUCENE_36, null), true);
        log.info("done");
    } catch (IOException e) {
        log.log(Level.SEVERE, "ERROR: Generating spelling: {0}", e);
    } finally {
        if (indexReader != null) {
            try {
                indexReader.close();
            } catch (IOException e) {
                log.log(Level.WARNING, "An error occurred while closing reader", e);
            }
        }
        if (spellDirectory != null) {
            spellDirectory.close();
        }
    }
}

From source file:org.watermint.sourcecolon.org.opensolaris.opengrok.web.SearchHelper.java

License:Open Source License

/**
 * If a search did not return a hit, one may use this method to obtain
 * suggestions for a new search./*from   w ww . j  a  va  2  s  .  c om*/
 * <p/>
 * <p/>
 * Parameters which should be populated/set at this time:
 * <ul>
 * <li>{@link #projects}</li>
 * <li>{@link #dataRoot}</li>
 * <li>{@link #builder}</li>
 * </ul>
 *
 * @return a possible empty list of sugeestions.
 */
public List<Suggestion> getSuggestions() {
    if (projects == null) {
        return new ArrayList<>(0);
    }
    File[] spellIndex = null;
    if (projects.isEmpty()) {
        spellIndex = new File[] { new File(dataRoot, "spellIndex") };
    } else if (projects.size() == 1) {
        spellIndex = new File[] { new File(dataRoot, "spellIndex/" + projects.first()) };
    } else {
        spellIndex = new File[projects.size()];
        int ii = 0;
        File indexDir = new File(dataRoot, "spellIndex");
        for (String proj : projects) {
            spellIndex[ii++] = new File(indexDir, proj);
        }
    }
    List<Suggestion> res = new ArrayList<>();
    List<String> dummy = new ArrayList<>();
    for (File aSpellIndex : spellIndex) {
        if (!aSpellIndex.exists()) {
            continue;
        }
        SpellChecker checker = null;
        Suggestion s = new Suggestion(aSpellIndex.getName());
        try (FSDirectory spellDirectory = FSDirectory.open(aSpellIndex)) {
            checker = new SpellChecker(spellDirectory);
            getSuggestion(builder.getFreetext(), checker, dummy);
            s.freetext = dummy.toArray(new String[dummy.size()]);
            dummy.clear();
            getSuggestion(builder.getRefs(), checker, dummy);
            s.refs = dummy.toArray(new String[dummy.size()]);
            dummy.clear();
            // TODO it seems the only true spellchecker is for
            // below field, see IndexDatabase
            // createspellingsuggestions ...
            getSuggestion(builder.getDefs(), checker, dummy);
            s.defs = dummy.toArray(new String[dummy.size()]);
            dummy.clear();
            if (s.freetext.length > 0 || s.defs.length > 0 || s.refs.length > 0) {
                res.add(s);
            }
        } catch (IOException e) {
            log.log(Level.WARNING, "Got excption while getting spelling suggestions: ", e);
        } finally {
            if (checker != null) {
                try {
                    checker.close();
                } catch (Exception x) {
                    log.log(Level.WARNING, "Got excption while closing spelling suggestions: ", x);
                }
            }
        }
    }
    return res;
}

From source file:prman.model.SpellCheckManager.java

License:Open Source License

public boolean setup(Locale loc) throws IOException {
    if (isIndexed(loc)) {
        File fdir = new File(DIR, loc.toString());
        FSDirectory dir = FSDirectory.getDirectory(fdir, false);
        SpellChecker sc = new SpellChecker(dir);
        spellCheckers.put(loc, sc);/*  w  w w  .  j a v  a 2s. c o m*/
        return true;
    } else
        return false;
}