Example usage for org.apache.lucene.search IndexSearcher setSimilarity

List of usage examples for org.apache.lucene.search IndexSearcher setSimilarity

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher setSimilarity.

Prototype

public void setSimilarity(Similarity similarity) 

Source Link

Document

Expert: Set the Similarity implementation used by this IndexSearcher.

Usage

From source file:net.semanticmetadata.lire.indexing.HashingTest.java

License:Open Source License

public void testSearch() throws IOException {
    BitSampling.readHashFunctions();// w  w w.  j  a v  a  2 s .c  om
    // Putting the index into RAM ... with MMapDirectory instead of FSDirectory:
    IndexReader reader = DirectoryReader.open(MMapDirectory.open(new File("indexor-1.4mh")));
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(new DefaultSimilarity() {
        @Override
        public float tf(float freq) {
            return 1;
        }

        @Override
        public float idf(long docFreq, long numDocs) {
            return 1;
        }

        @Override
        public float coord(int overlap, int maxOverlap) {
            return 1;
        }

        @Override
        public float queryNorm(float sumOfSquaredWeights) {
            return 1;
        }

        @Override
        public float sloppyFreq(int distance) {
            return 1;
        }

        @Override
        public float lengthNorm(FieldInvertState state) {
            return 1;
        }
    });
    LireFeature feat = null;
    try {
        feat = new CEDD();
    } catch (Exception e) {
        System.err.println("there is a problem with creating the right feature instance.");
        e.printStackTrace();
    }

    if (feat != null) {
        feat.extract(ImageIO.read(new File(queryFile)));
        int[] ints = BitSampling.generateHashes(feat.getDoubleHistogram());
        System.out.println(Arrays.toString(ints));
        StringBuilder queryStringBuilder = new StringBuilder(10 * 10);
        for (int i = 0; i < ints.length; i++) {
            queryStringBuilder.append(ints[i]);
            queryStringBuilder.append(' ');
        }
        try {
            BooleanQuery query = new BooleanQuery();
            for (int i = 0; i < ints.length; i++) {
                // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before.
                query.add(new BooleanClause(new TermQuery(new Term("Hashes", ints[i] + "")),
                        BooleanClause.Occur.SHOULD));
            }
            long ms = System.currentTimeMillis();
            TopDocs topDocs = null;
            for (int i = 0; i < 3; i++) {
                topDocs = searcher.search(query, 5000);
            }
            System.out.println((System.currentTimeMillis() - ms) / 3);
            ms = System.currentTimeMillis();
            for (int i = 0; i < 3; i++)
                topDocs = rerank(topDocs, feat, reader);
            System.out.println((System.currentTimeMillis() - ms) / 3);
            String file = printToHtml(topDocs, reader);
            FileUtils.browseUri(file);
        } catch (Exception e) {
            System.err.println("Exception searching the index.");
            e.printStackTrace();
        }
    }

}

From source file:net.semanticmetadata.lire.indexing.LocalitySensitiveHashingTest.java

License:Open Source License

public double singleSearch(int docNum) throws IOException, InstantiationException, IllegalAccessException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));

    // -----------

    String query = reader.document(docNum).getValues("hash")[0];
    CEDD ceddQuery = new CEDD();
    ceddQuery.setByteArrayRepresentation(
            reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().bytes,
            reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().offset,
            reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().length);

    // -----------

    HashSet<String> gold = new HashSet<String>(numImagesEval);
    ImageSearcher cis = ImageSearcherFactory.createCEDDImageSearcher(100);
    ImageSearchHits hits = cis.search(reader.document(docNum), reader);
    for (int i = 0; i < 10; i++) {
        gold.add(hits.doc(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
    }//from  w  ww . j a va  2 s. c  o m

    // ------------

    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(new SimilarityBase() {
        @Override
        protected float score(BasicStats basicStats, float freq, float v2) {
            return 1;
        }

        @Override
        public String toString() {
            return null;
        }
    });
    TopDocs topDocs = searcher.search(createQuery(query), 500);
    topDocs = rerank(topDocs, ceddQuery, reader);
    //        System.out.println("topDocs.scoreDocs.length = " + topDocs.scoreDocs.length);
    double numMatches = 0;
    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = topDocs.scoreDocs[i];
        //            System.out.print(scoreDoc.score + ": ");
        String file = reader.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        //            System.out.println(file.substring(file.lastIndexOf('/') + 1) + (gold.contains(file)?" x":" o"));
        if (gold.contains(file))
            numMatches++;
    }
    return numMatches;
}

From source file:net.semanticmetadata.lire.searchers.BitSamplingImageSearcher.java

License:Open Source License

private ImageSearchHits search(String[] hashes, GlobalFeature queryFeature, IndexReader reader)
        throws IOException {
    // first search by text:
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(new BaseSimilarity());
    BooleanQuery query = null;/*from ww w  .  j a  v  a2  s.c  om*/
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    for (int i = 0; i < hashes.length; i++) {
        // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before.
        if (partialHashes) {
            if (Math.random() < 0.5)
                builder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")),
                        BooleanClause.Occur.SHOULD));
        } else
            builder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")),
                    BooleanClause.Occur.SHOULD));
    }
    query = builder.build();
    TopDocs docs = searcher.search(query, maxResultsHashBased);
    //        System.out.println(docs.totalHits);
    // then re-rank
    TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>();
    double maxDistance = -1d;
    double tmpScore;
    for (int i = 0; i < docs.scoreDocs.length; i++) {
        feature.setByteArrayRepresentation(
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes,
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset,
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length);
        tmpScore = queryFeature.getDistance(feature);
        assert (tmpScore >= 0);
        if (resultScoreDocs.size() < maximumHits) {
            resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc));
            maxDistance = Math.max(maxDistance, tmpScore);
        } else if (tmpScore < maxDistance) {
            // if it is nearer to the sample than at least one of the current set:
            // remove the last one ...
            resultScoreDocs.remove(resultScoreDocs.last());
            // add the new one ...
            resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc));
            // and set our new distance border ...
            maxDistance = resultScoreDocs.last().getDistance();
        }
    }
    assert (resultScoreDocs.size() <= maximumHits);
    return new SimpleImageSearchHits(resultScoreDocs, maxDistance);
}

From source file:net.semanticmetadata.lire.searchers.LshImageSearcher.java

License:Open Source License

private ImageSearchHits search(String[] hashes, GlobalFeature queryFeature, IndexReader reader)
        throws IOException {
    // first search by text:
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(new ClassicSimilarity() {
        @Override/*from w w w. j a  va  2  s  . c  om*/
        public float tf(float freq) {
            return 1;
        }

        @Override
        public float idf(long docFreq, long numDocs) {
            return 1;
        }

        @Override
        public float coord(int overlap, int maxOverlap) {
            return 1;
        }

        @Override
        public float queryNorm(float sumOfSquaredWeights) {
            return 1;
        }

        @Override
        public float sloppyFreq(int distance) {
            return 1;
        }

        @Override
        public float lengthNorm(FieldInvertState state) {
            return 1;
        }
    });
    BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
    for (int i = 0; i < hashes.length; i++) {
        // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before.
        queryBuilder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")),
                BooleanClause.Occur.SHOULD));
    }
    TopDocs docs = searcher.search(queryBuilder.build(), maxResultsHashBased);
    // then re-rank
    TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>();
    double maxDistance = 0d;
    double tmpScore = 0d;
    for (int i = 0; i < docs.scoreDocs.length; i++) {
        feature.setByteArrayRepresentation(
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes,
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset,
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length);
        tmpScore = queryFeature.getDistance(feature);
        if (resultScoreDocs.size() < maximumHits) {
            resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc));
            maxDistance = Math.max(maxDistance, tmpScore);
        } else if (tmpScore < maxDistance) {
            resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc));
        }
        while (resultScoreDocs.size() > maximumHits) {
            resultScoreDocs.remove(resultScoreDocs.last());
            maxDistance = resultScoreDocs.last().getDistance();
        }
        //            resultScoreDocs.add(new SimpleResult(tmpScore, reader.document(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc));
    }
    return new SimpleImageSearchHits(resultScoreDocs, maxDistance);
}

From source file:net.semanticmetadata.lire.searchers.MetricSpacesImageSearcher.java

License:Open Source License

private ImageSearchHits search(String hashes, GlobalFeature queryFeature, IndexReader reader)
        throws IOException {
    // first search by text:
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(new BaseSimilarity());
    QueryParser qp = new QueryParser(hashesFieldName, new WhitespaceAnalyzer());
    Query query = null;/*from ww w.  j  av  a 2 s .com*/
    try {
        query = qp.parse(hashes);
    } catch (ParseException e) {
        e.printStackTrace();
    }
    if (query == null)
        return null;
    TopDocs docs = searcher.search(query, maxResultsHashBased);
    //        System.out.println(docs.totalHits);
    // then re-rank
    TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>();
    double maxDistance = -1d;
    double tmpScore;
    for (int i = 0; i < docs.scoreDocs.length; i++) {
        feature.setByteArrayRepresentation(
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes,
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset,
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length);
        tmpScore = queryFeature.getDistance(feature);
        assert (tmpScore >= 0);
        if (resultScoreDocs.size() < maximumHits) {
            resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc));
            maxDistance = Math.max(maxDistance, tmpScore);
        } else if (tmpScore < maxDistance) {
            // if it is nearer to the sample than at least one of the current set:
            // remove the last one ...
            resultScoreDocs.remove(resultScoreDocs.last());
            // add the new one ...
            resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc));
            // and set our new distance border ...
            maxDistance = resultScoreDocs.last().getDistance();
        }
    }
    assert (resultScoreDocs.size() <= maximumHits);
    return new SimpleImageSearchHits(resultScoreDocs, maxDistance);
}

From source file:net.semanticmetadata.lire.searchers.VisualWordsImageSearcher.java

License:Open Source License

public ImageSearchHits search(Document doc, IndexReader reader) throws IOException {
    SimpleImageSearchHits sh = null;/*  w ww  .  jav  a  2 s  . co  m*/
    IndexSearcher isearcher = new IndexSearcher(reader);
    isearcher.setSimilarity(similarity);
    String queryString = doc.getValues(fieldName)[0];
    Query tq = null;
    try {
        tq = qp.parse(queryString);
        TopDocs docs = isearcher.search(tq, numMaxHits);
        LinkedList<SimpleResult> res = new LinkedList<SimpleResult>();
        double maxDistance = 0d;
        for (int i = 0; i < docs.scoreDocs.length; i++) {
            double d = 1d / docs.scoreDocs[i].score;
            maxDistance = Math.max(d, maxDistance);
            SimpleResult sr = new SimpleResult(d, docs.scoreDocs[i].doc);
            res.add(sr);
        }
        sh = new SimpleImageSearchHits(res, maxDistance);
    } catch (ParseException e) {
        System.err.println(queryString);
        e.printStackTrace();
    }
    return sh;
}

From source file:nl.uva.expose.clustering.SimGraphMaker.java

public HashMap<String, Double> searchAndReturnResults(String queryText, String qId)
        throws IOException, ParseException {
    queryText = queryText.replaceAll("AND", "and").replaceAll("OR", "or").replaceAll("NOT", "not"); // to avoid boolean operation!
    QueryParser qParser = new QueryParser(Version.LUCENE_CURRENT, field, this.analyzer);
    BooleanQuery.setMaxClauseCount(queryText.split("\\s+").length);
    Query q = qParser.parse(QueryParser.escape(queryText));
    Similarity simFunc = new BM25Similarity();
    IndexSearcher isearcher = new IndexSearcher(this.ireader);
    isearcher.setSimilarity(simFunc);
    TopFieldCollector tfc = TopFieldCollector.create(Sort.RELEVANCE, ireader.numDocs(), true, true, true,
            false);//  w  w  w.  j  av a  2s .  co  m
    //            TopFieldCollector tfc = TopFieldCollector.create(Sort.RELEVANCE,20, true, true, true, false);
    isearcher.search(q, tfc);
    TopDocs results = tfc.topDocs();
    ScoreDoc[] hits = results.scoreDocs;
    return fillQueryResultList(hits, qId);
}

From source file:nl.uva.mlc.eurovoc.irengine.Retrieval.java

public HashMap<String, Feature> searchAndReturnResults(String queryText, String qId)
        throws IOException, ParseException {
    queryText = queryText.replaceAll("AND", "and").replaceAll("OR", "or").replaceAll("NOT", "not"); // to avoid boolean operation!
    QueryParser qParser = new QueryParser(Version.LUCENE_CURRENT, field, this.analyzer);
    BooleanQuery.setMaxClauseCount(queryText.split("\\s+").length);
    Query q = qParser.parse(QueryParser.escape(queryText));
    this.simFunction = SIM_FUNCS[SimilarityFunction.valueOf(SimFName).ordinal()];
    Similarity simFunc = this.simFunction;
    IndexSearcher isearcher = new IndexSearcher(this.ireader);
    isearcher.setSimilarity(simFunc);
    TopFieldCollector tfc = TopFieldCollector.create(Sort.RELEVANCE, ireader.numDocs(), true, true, true,
            false);/*from www .j  a  v a 2 s .com*/
    isearcher.search(q, tfc);
    TopDocs results = tfc.topDocs();
    ScoreDoc[] hits = results.scoreDocs;
    return fillQueryResultList(hits, qId);
}

From source file:org.codice.ddf.spatial.geocoding.query.GeoNamesQueryLuceneDirectoryIndex.java

License:Open Source License

@Override
protected IndexSearcher createIndexSearcher(final IndexReader indexReader) {
    final IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(GeoNamesLuceneIndexer.SIMILARITY);
    return indexSearcher;
}

From source file:org.compass.core.lucene.engine.manager.DefaultLuceneSearchEngineIndexManager.java

License:Apache License

public IndexSearcher openIndexSearcher(IndexReader reader) {
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(searchEngineFactory.getSimilarityManager().getSearchSimilarity());
    return searcher;
}