Example usage for org.apache.lucene.search.spell NGramDistance getDistance

List of usage examples for org.apache.lucene.search.spell NGramDistance getDistance

Introduction

In this page you can find the example usage for org.apache.lucene.search.spell NGramDistance getDistance.

Prototype

@Override
    public float getDistance(String source, String target) 

Source Link

Usage

From source file:br.bireme.ngrams.NGrams.java

private static void searchRaw(final Parameters parameters, final IndexSearcher searcher,
        final NGAnalyzer analyzer, final NGramDistance ngDistance, final String text,
        final boolean useSimilarity, final Set<String> id_id, final Set<Result> results)
        throws IOException, ParseException {
    assert parameters != null;
    assert searcher != null;
    assert analyzer != null;
    assert ngDistance != null;
    assert id_id != null;
    assert results != null;

    if (text == null) {
        throw new NullPointerException("text");
    }/*w  ww  .  j  ava  2s .  c o  m*/

    final String text2 = StringEscapeUtils.unescapeHtml4(text);
    final String[] param = text2.trim().split(" *\\| *", Integer.MAX_VALUE);
    if (param.length != parameters.nameFields.size()) {
        throw new IOException(text);
    }
    final String fname = parameters.indexed.name;
    final QueryParser parser = new QueryParser(fname, analyzer);
    final String ntext = Tools
            .limitSize(Tools.normalize(param[parameters.indexed.pos], OCC_SEPARATOR), MAX_NG_TEXT_SIZE).trim();
    final int MAX_RESULTS = 20;

    if (!ntext.isEmpty()) {
        final Query query = parser.parse(QueryParser.escape(ntext));
        final TopDocs top = searcher.search(query, MAX_RESULTS);
        final float lower = parameters.scores.first().minValue;
        ScoreDoc[] scores = top.scoreDocs;
        int remaining = MAX_RESULTS;

        for (ScoreDoc sdoc : scores) {
            if (remaining-- <= 0) {
                break; // Only for performance
            }
            final Document doc = searcher.doc(sdoc.doc);
            if (useSimilarity) {
                final String dname = doc.get(fname);
                if (dname == null) {
                    throw new IOException("dname");
                }
                final float similarity = ngDistance.getDistance(ntext, doc.get(fname));
                if (similarity < lower) {
                    if (remaining > 3) {
                        remaining = 3;
                        //System.out.println("Atualizando tot=" + tot + " score=" + sdoc.score + " similarity=" + similarity+ " text=" + doc.get(fname));
                    }
                } else {
                    final Result out = createResult(id_id, parameters, param, doc, ngDistance, similarity,
                            sdoc.score);
                    if (out != null) {
                        results.add(out);
                    }
                }
            } else {
                if (sdoc.score < 1.0) {
                    System.out.println("Saindo score=" + sdoc.score);
                    break; // Only for performance
                }
                final Result out = createResult(id_id, parameters, param, doc, ngDistance, 0, sdoc.score);
                if (out != null) {
                    results.add(out);
                }
            }
        }
    }
}

From source file:br.bireme.ngrams.NGrams.java

private static int compareNGramFields(final NGramDistance ngDistance, final br.bireme.ngrams.Field field,
        final String fld, final Document doc) {
    assert ngDistance != null;
    assert field != null;
    assert doc != null;

    final int ret;
    final String text = (String) doc.get(field.name);
    final String xfld = (fld == null) ? "" : fld.trim();
    final String xtext = (text == null) ? "" : text.trim();

    if (xfld.isEmpty() && xtext.isEmpty()) {
        ret = -1;/*from  w w w.  j  a  v  a  2  s  .com*/
    } else {
        final float similarity = ngDistance.getDistance(xfld, xtext);
        if (similarity >= ((NGramField) field).minScore) {
            ret = 1;
        } else if (field.contentMatch == Status.MAX_SCORE) {
            ret = -2;
        } else {
            ret = -1;
        }
    }

    return ret;
}

From source file:ca.ualberta.entitylinking.common.indexing.AliasLuceneIndex.java

License:Open Source License

private List<Integer> rankingByNGramDistance(TopDocs td, String str) {
    NGramDistance measure = new NGramDistance(2);
    List<Rank<Double, Integer>> rankList = new ArrayList<Rank<Double, Integer>>();

    try {//from  w  w  w  . j  a v a 2s. c om
        if (keyArray == null)
            keyArray = FieldCache.DEFAULT.getStrings(reader, "docID");
    } catch (Exception e) {
        e.printStackTrace();
    }

    for (int i = 0; i < td.scoreDocs.length; i++) {
        int docId = td.scoreDocs[i].doc;
        String alias = keyArray[docId];
        double sim = measure.getDistance(alias, str);
        rankList.add(new Rank<Double, Integer>(sim, docId));
    }

    Collections.sort(rankList);

    List<Integer> ret = new ArrayList<Integer>();
    for (Rank<Double, Integer> rank : rankList) {
        ret.add(rank.obj);
    }

    return ret;
}

From source file:ca.ualberta.entitylinking.utils.similarity.StringSim.java

License:Open Source License

/**
 * Compute the n-gram distance between two strings
 * @param s source//w w w .j a v a2s  .  c o m
 * @param t target
 * @param n number of characters in each gram.
 * @return A similarity between 0 and 1.
 */
public static double ngram_distance(String s, String t, int n) {
    NGramDistance measure = new NGramDistance(n);

    return measure.getDistance(s, t);

}