Example usage for org.apache.lucene.search.spell PlainTextDictionary PlainTextDictionary

List of usage examples for org.apache.lucene.search.spell PlainTextDictionary PlainTextDictionary

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell PlainTextDictionary PlainTextDictionary.

Prototype

public PlainTextDictionary(Reader reader) 

Source Link

Document

Creates a dictionary based on a reader.

Usage

From source file:com.bah.bahdit.main.search.utils.LevenshteinDistance.java

License:Apache License

/**
 * Given a context to place all of the spell check files in, this method
 * sets up the spellchecker object using the sample table
 * //from w w w . j  a v  a  2 s  . c o m
 * @param context - the context of the current servlet
 * @param sampleTable - the full text sample table
 * @return - a spellchecker object
 */
public static SpellChecker createSpellChecker(ServletContext context, HashMap<String, Integer> sampleTable) {

    SpellChecker spellChecker = null;

    // write terms from sample table to text file, to be basis of dictionary
    File f = new File("dictionary" + System.nanoTime() + ".txt");
    try {
        f.createNewFile();
        BufferedWriter out = new BufferedWriter(new FileWriter(f));

        for (String entry : sampleTable.keySet()) {
            out.write(entry + "\n");
        }

    } catch (IOException e) {
        e.printStackTrace();
    }

    String dPath = System.getProperty("user.dir") + "/spellcheck" + System.nanoTime();

    File dir = new File(dPath);
    Directory directory = null;

    try {
        directory = FSDirectory.open(dir);
    } catch (IOException e3) {
        e3.printStackTrace();
    }

    try {
        spellChecker = new SpellChecker(directory);
    } catch (IOException e2) {
        e2.printStackTrace();
    }

    StandardAnalyzer a = new StandardAnalyzer(Version.LUCENE_40);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_40, a);
    boolean fullMerge = true;
    PlainTextDictionary dict = null;

    try {
        dict = new PlainTextDictionary(f);
    } catch (FileNotFoundException e1) {
        e1.printStackTrace();
    }

    try {
        spellChecker.indexDictionary(dict, config, fullMerge);
    } catch (IOException e) {
        e.printStackTrace();
    }

    return spellChecker;
}

From source file:com.ostrichemulators.semtool.rdf.engine.util.EngineConsistencyChecker.java

/**
 * Resolves "near" matches from the elements of the given type. If
 * {@link #across} is <code>true</code>, each element will be compared to all
 * elements of all types./*from   ww w  . j  a  v a2 s .c o  m*/
 *
 * @param uri the concept/relation class (not instance) to resolve
 * @param minDistance the minimum allowable similarity
 * @return map of uri-to-hits
 */
public MultiMap<IRI, Hit> check(IRI uri, final float minDistance) {
    MultiMap<IRI, Hit> hits = new MultiMap<>();

    // get our universe of possible hits
    Map<IRI, String> possibles = getHitUniverse(uri);
    MultiMap<String, IRI> revpos = MultiMap.flip(possibles);

    Directory ramdir = new RAMDirectory();
    StandardAnalyzer analyzer = null;
    SpellChecker speller = null;

    List<IRI> errors = new ArrayList<>();
    try {
        analyzer = new StandardAnalyzer();
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        speller = new SpellChecker(ramdir, strdist);

        StringBuilder names = new StringBuilder();
        for (String s : possibles.values()) {
            names.append(s).append("\n");
        }
        PlainTextDictionary ptd = new PlainTextDictionary(new StringReader(names.toString()));
        speller.indexDictionary(ptd, config, true);

        List<IRI> needles = typeToURILkp.get(uri);
        for (IRI needle : needles) {
            String needlelabel = labels.get(needle);
            try {
                String[] suggestions = speller.suggestSimilar(needlelabel, 20, minDistance);
                for (String s : suggestions) {
                    // found a match, so figure out what we actually matched
                    float distance = strdist.getDistance(needlelabel, s);

                    for (IRI match : revpos.get(s)) {
                        hits.add(needle, new Hit(match, s, uriToTypeLkp.get(match), distance));
                    }
                }
            } catch (Exception e) {
                // our fallback resolution always works; it's just a ton slower
                errors.add(needle);
            }
        }
    } catch (Exception e) {
        log.error(e, e);
    } finally {
        for (Closeable c : new Closeable[] { analyzer, ramdir, speller }) {
            if (null != c) {
                try {
                    c.close();
                } catch (Exception e) {
                    log.warn(e, e);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        fallbackResolve(errors, possibles, hits, strdist, minDistance);
    }

    return hits;
}

From source file:cz.muni.fi.webmias.suggest.MathNamesSuggester.java

License:Apache License

public MathNamesSuggester() {
    try {/*from w ww  . ja  v  a  2s  .  c  o m*/
        suggester = new AnalyzingSuggester(new StandardAnalyzer(Version.LUCENE_4_10_2));
        suggester.build(new PlainTextDictionary(getMathNamesReader()));
    } catch (IOException ex) {
        Logger.getLogger(MathNamesSuggester.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:engine.easy.search.EasySearchEngine.java

License:Apache License

private SpellChecker getSpecSpellChecker() {
    SpellChecker spellchecker = null;/*from w w w  .  j a v  a2s . com*/

    try {
        File dir = new File(AppConstants.DICTIONARY_INDEX_PATH);
        Directory directory = FSDirectory.open(dir);
        spellchecker = new SpellChecker(new RAMDirectory());
        spellchecker.indexDictionary(new PlainTextDictionary(new File(AppConstants.DICTIONARY_PATH)));

    } catch (Exception e) {
        System.out.println("Exception: getSpecSpellChecker" + e.toString());
    }

    return spellchecker;
}

From source file:engine.easy.util.SuggestionSpellService.java

License:Apache License

public static void main(String[] args) throws Exception {

    File dir = new File(AppConstants.DICTIONARY_INDEX_PATH);

    Directory directory = FSDirectory.open(dir);

    SpellChecker spellChecker = new SpellChecker(new RAMDirectory());

    spellChecker.indexDictionary(new PlainTextDictionary(new File(AppConstants.DICTIONARY_PATH)));

    String wordForSuggestions = "hee";

    int suggestionsNumber = 3;

    String[] suggestions = spellChecker.suggestSimilar(wordForSuggestions, suggestionsNumber);

    if (suggestions != null && suggestions.length > 0) {
        for (String word : suggestions) {
            System.out.println("Did you mean:" + word);
        }/*from  ww  w .  j  av a  2  s . c  om*/
    } else {
        System.out.println("No suggestions found for word:" + wordForSuggestions);
    }
}

From source file:fastcampus.lucene.example.search.SpellCheckerExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    Directory directory = FSDirectory.open(Paths.get("./index/spell/"));
    SpellChecker spellChecker = new SpellChecker(directory);

    //Analyzer analyzer = new StandardAnalyzer();                             // ? 
    Analyzer analyzer = new Analyzer() {
        @Override//from w ww.j av a 2  s.  co  m
        protected TokenStreamComponents createComponents(String s) {
            Reader reader = new StringReader(s);
            Tokenizer tokenizer = new StandardTokenizer();
            tokenizer.setReader(reader);
            String name = "nfc_cf";
            Normalizer2 normalizer = Normalizer2.getInstance(null, name, Normalizer2.Mode.DECOMPOSE);
            TokenFilter filter = new ICUNormalizer2Filter(tokenizer, normalizer);
            return new TokenStreamComponents(tokenizer, filter);
        }
    };

    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); //?? Writer? ?  ?

    Path path = Paths.get("./data/spell/dic.txt");

    spellChecker.setSpellIndex(directory);
    spellChecker.clearIndex();
    spellChecker.indexDictionary(new PlainTextDictionary(path), indexWriterConfig, true);
    String wordForSuggestions = "?";
    //spellChecker.setStringDistance(new LevensteinDistance());  //#Levenstein  
    spellChecker.setStringDistance(new JaroWinklerDistance()); //Jaro-Winkler 

    int suggestionsNumber = 1;
    String[] suggestions = spellChecker.suggestSimilar(wordForSuggestions, suggestionsNumber);
    if (suggestions != null && suggestions.length > 0) {

        for (String word : suggestions) {

            System.out.println("Did you mean:" + word);

        }

    } else {

        System.out.println("No suggestions found for word:" + wordForSuggestions);

    }

}

From source file:org.apache.solr.spelling.FileBasedSpellChecker.java

License:Apache License

private void loadExternalFileDictionary(SolrCore core, SolrIndexSearcher searcher) {
    try {//from  ww  w . j ava  2s. co m
        IndexSchema schema = null == searcher ? core.getLatestSchema() : searcher.getSchema();
        // Get the field's analyzer
        if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) {
            FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
            // Do index-time analysis using the given fieldType's analyzer
            RAMDirectory ramDir = new RAMDirectory();

            LogMergePolicy mp = new LogByteSizeMergePolicy();
            mp.setMergeFactor(300);

            IndexWriter writer = new IndexWriter(ramDir,
                    new IndexWriterConfig(core.getSolrConfig().luceneMatchVersion, fieldType.getAnalyzer())
                            .setMaxBufferedDocs(150).setMergePolicy(mp)
                            .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
            // TODO: if we enable this, codec gets angry since field won't exist in the schema
            // .setCodec(core.getCodec())
            );

            List<String> lines = core.getResourceLoader().getLines(sourceLocation, characterEncoding);

            for (String s : lines) {
                Document d = new Document();
                d.add(new TextField(WORD_FIELD_NAME, s, Field.Store.NO));
                writer.addDocument(d);
            }
            writer.forceMerge(1);
            writer.close();

            dictionary = new HighFrequencyDictionary(DirectoryReader.open(ramDir), WORD_FIELD_NAME, 0.0f);
        } else {
            // check if character encoding is defined
            if (characterEncoding == null) {
                dictionary = new PlainTextDictionary(core.getResourceLoader().openResource(sourceLocation));
            } else {
                dictionary = new PlainTextDictionary(new InputStreamReader(
                        core.getResourceLoader().openResource(sourceLocation), characterEncoding));
            }
        }

    } catch (IOException e) {
        log.error("Unable to load spellings", e);
    }
}

From source file:org.tinymce.spellchecker.LuceneSpellCheckerServlet.java

License:Open Source License

private MemoryAwareSpellChecker reindexSpellchecker(String lang) throws SpellCheckException {
    MemoryAwareSpellChecker checker;/*from  w  ww .jav a  2s.c o m*/
    List<File> dictionariesFiles = getDictionaryFiles(lang);
    try {
        checker = new MemoryAwareSpellChecker(getSpellCheckerDirectory(lang));
        checker.clearIndex();
    } catch (IOException e) {
        throw new SpellCheckException("Failed to create index", e);
    }

    for (File dictionariesFile : dictionariesFiles) {
        try {
            checker.indexDictionary(new PlainTextDictionary(dictionariesFile));
        } catch (IOException e) {
            logger.log(Level.SEVERE, "Failed to index dictionary " + dictionariesFile.getAbsolutePath(), e);

            throw new SpellCheckException("Failed to index dictionary " + dictionariesFile.getAbsolutePath(),
                    e);
        }
    }
    return checker;
}