com.tamingtext.fuzzy.SpellCorrector.java Source code

Java tutorial

Introduction

Here is the source code for com.tamingtext.fuzzy.SpellCorrector.java

Source

/*
 * Copyright 2008-2011 Grant Ingersoll, Thomas Morton and Drew Farris
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 * -------------------
 * To purchase or learn more about Taming Text, by Grant Ingersoll, Thomas Morton and Drew Farris, visit
 * http://www.manning.com/ingersoll
 */

package com.tamingtext.fuzzy;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;

import org.apache.lucene.search.spell.StringDistance;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;

//<start id="did-you-mean.corrector"/>
public class SpellCorrector {

    private SolrServer solr;
    private SolrQuery query;
    private StringDistance sd;
    private float threshold;

    public SpellCorrector(StringDistance sd, float threshold) throws MalformedURLException {
        solr = new CommonsHttpSolrServer(new URL("http://localhost:8983/solr"));
        query = new SolrQuery();
        query.setFields("word");
        query.setRows(50); //<co id="co.dym.num"/>
        this.sd = sd;
        this.threshold = threshold;
    }

    public String topSuggestion(String spelling) throws SolrServerException {
        query.setQuery("wordNGram:" + spelling); //<co id="co.dym.field"/>
        QueryResponse response = solr.query(query);
        SolrDocumentList dl = response.getResults();
        Iterator<SolrDocument> di = dl.iterator();
        float maxDistance = 0;
        String suggestion = null;
        while (di.hasNext()) {
            SolrDocument doc = di.next();
            String word = (String) doc.getFieldValue("word");
            float distance = sd.getDistance(word, spelling); //<co id="co.dym.edit"/>
            if (distance > maxDistance) {
                maxDistance = distance;
                suggestion = word; //<co id="co.dym.max"/>
            }
        }
        if (maxDistance > threshold) { //<co id="co.dym.threshold"/>
            return suggestion;
        }
        return null;
    }
}
/*
<calloutlist>
<callout arearefs="co.dym.num"><para>The number of n-gram matches to consider.</para></callout>
<callout arearefs="co.dym.field"><para>Query the field which contains the n-gram.</para></callout>
<callout arearefs="co.dym.edit"><para>Compute the edit distance.</para></callout>
<callout arearefs="co.dym.max"><para>Keep best suggestion.</para></callout>
<callout arearefs="co.dym.threshold"><para>Check threshold otherwise return no suggestion.</para></callout>
</calloutlist>
 */
//<end id="did-you-mean.corrector"/>