List of usage examples for org.apache.lucene.search IndexSearcher setSimilarity
public void setSimilarity(Similarity similarity)
From source file:net.semanticmetadata.lire.indexing.HashingTest.java
License:Open Source License
public void testSearch() throws IOException { BitSampling.readHashFunctions();// w w w. j a v a 2 s .c om // Putting the index into RAM ... with MMapDirectory instead of FSDirectory: IndexReader reader = DirectoryReader.open(MMapDirectory.open(new File("indexor-1.4mh"))); IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new DefaultSimilarity() { @Override public float tf(float freq) { return 1; } @Override public float idf(long docFreq, long numDocs) { return 1; } @Override public float coord(int overlap, int maxOverlap) { return 1; } @Override public float queryNorm(float sumOfSquaredWeights) { return 1; } @Override public float sloppyFreq(int distance) { return 1; } @Override public float lengthNorm(FieldInvertState state) { return 1; } }); LireFeature feat = null; try { feat = new CEDD(); } catch (Exception e) { System.err.println("there is a problem with creating the right feature instance."); e.printStackTrace(); } if (feat != null) { feat.extract(ImageIO.read(new File(queryFile))); int[] ints = BitSampling.generateHashes(feat.getDoubleHistogram()); System.out.println(Arrays.toString(ints)); StringBuilder queryStringBuilder = new StringBuilder(10 * 10); for (int i = 0; i < ints.length; i++) { queryStringBuilder.append(ints[i]); queryStringBuilder.append(' '); } try { BooleanQuery query = new BooleanQuery(); for (int i = 0; i < ints.length; i++) { // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before. query.add(new BooleanClause(new TermQuery(new Term("Hashes", ints[i] + "")), BooleanClause.Occur.SHOULD)); } long ms = System.currentTimeMillis(); TopDocs topDocs = null; for (int i = 0; i < 3; i++) { topDocs = searcher.search(query, 5000); } System.out.println((System.currentTimeMillis() - ms) / 3); ms = System.currentTimeMillis(); for (int i = 0; i < 3; i++) topDocs = rerank(topDocs, feat, reader); System.out.println((System.currentTimeMillis() - ms) / 3); String file = printToHtml(topDocs, reader); FileUtils.browseUri(file); } catch (Exception e) { System.err.println("Exception searching the index."); e.printStackTrace(); } } }
From source file:net.semanticmetadata.lire.indexing.LocalitySensitiveHashingTest.java
License:Open Source License
public double singleSearch(int docNum) throws IOException, InstantiationException, IllegalAccessException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); // ----------- String query = reader.document(docNum).getValues("hash")[0]; CEDD ceddQuery = new CEDD(); ceddQuery.setByteArrayRepresentation( reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().bytes, reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().offset, reader.document(docNum).getField(DocumentBuilder.FIELD_NAME_CEDD).binaryValue().length); // ----------- HashSet<String> gold = new HashSet<String>(numImagesEval); ImageSearcher cis = ImageSearcherFactory.createCEDDImageSearcher(100); ImageSearchHits hits = cis.search(reader.document(docNum), reader); for (int i = 0; i < 10; i++) { gold.add(hits.doc(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); }//from w ww . j a va 2 s. c o m // ------------ IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new SimilarityBase() { @Override protected float score(BasicStats basicStats, float freq, float v2) { return 1; } @Override public String toString() { return null; } }); TopDocs topDocs = searcher.search(createQuery(query), 500); topDocs = rerank(topDocs, ceddQuery, reader); // System.out.println("topDocs.scoreDocs.length = " + topDocs.scoreDocs.length); double numMatches = 0; for (int i = 0; i < topDocs.scoreDocs.length; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; // System.out.print(scoreDoc.score + ": "); String file = reader.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; // System.out.println(file.substring(file.lastIndexOf('/') + 1) + (gold.contains(file)?" x":" o")); if (gold.contains(file)) numMatches++; } return numMatches; }
From source file:net.semanticmetadata.lire.searchers.BitSamplingImageSearcher.java
License:Open Source License
private ImageSearchHits search(String[] hashes, GlobalFeature queryFeature, IndexReader reader) throws IOException { // first search by text: IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new BaseSimilarity()); BooleanQuery query = null;/*from ww w . j a v a2 s.c om*/ BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (int i = 0; i < hashes.length; i++) { // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before. if (partialHashes) { if (Math.random() < 0.5) builder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")), BooleanClause.Occur.SHOULD)); } else builder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")), BooleanClause.Occur.SHOULD)); } query = builder.build(); TopDocs docs = searcher.search(query, maxResultsHashBased); // System.out.println(docs.totalHits); // then re-rank TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>(); double maxDistance = -1d; double tmpScore; for (int i = 0; i < docs.scoreDocs.length; i++) { feature.setByteArrayRepresentation( reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length); tmpScore = queryFeature.getDistance(feature); assert (tmpScore >= 0); if (resultScoreDocs.size() < maximumHits) { resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc)); maxDistance = Math.max(maxDistance, tmpScore); } else if (tmpScore < maxDistance) { // if it is nearer to the sample than at least one of the current set: // remove the last one ... resultScoreDocs.remove(resultScoreDocs.last()); // add the new one ... resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc)); // and set our new distance border ... maxDistance = resultScoreDocs.last().getDistance(); } } assert (resultScoreDocs.size() <= maximumHits); return new SimpleImageSearchHits(resultScoreDocs, maxDistance); }
From source file:net.semanticmetadata.lire.searchers.LshImageSearcher.java
License:Open Source License
private ImageSearchHits search(String[] hashes, GlobalFeature queryFeature, IndexReader reader) throws IOException { // first search by text: IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new ClassicSimilarity() { @Override/*from w w w. j a va 2 s . c om*/ public float tf(float freq) { return 1; } @Override public float idf(long docFreq, long numDocs) { return 1; } @Override public float coord(int overlap, int maxOverlap) { return 1; } @Override public float queryNorm(float sumOfSquaredWeights) { return 1; } @Override public float sloppyFreq(int distance) { return 1; } @Override public float lengthNorm(FieldInvertState state) { return 1; } }); BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); for (int i = 0; i < hashes.length; i++) { // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before. queryBuilder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")), BooleanClause.Occur.SHOULD)); } TopDocs docs = searcher.search(queryBuilder.build(), maxResultsHashBased); // then re-rank TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>(); double maxDistance = 0d; double tmpScore = 0d; for (int i = 0; i < docs.scoreDocs.length; i++) { feature.setByteArrayRepresentation( reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length); tmpScore = queryFeature.getDistance(feature); if (resultScoreDocs.size() < maximumHits) { resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc)); maxDistance = Math.max(maxDistance, tmpScore); } else if (tmpScore < maxDistance) { resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc)); } while (resultScoreDocs.size() > maximumHits) { resultScoreDocs.remove(resultScoreDocs.last()); maxDistance = resultScoreDocs.last().getDistance(); } // resultScoreDocs.add(new SimpleResult(tmpScore, reader.document(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc)); } return new SimpleImageSearchHits(resultScoreDocs, maxDistance); }
From source file:net.semanticmetadata.lire.searchers.MetricSpacesImageSearcher.java
License:Open Source License
private ImageSearchHits search(String hashes, GlobalFeature queryFeature, IndexReader reader) throws IOException { // first search by text: IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new BaseSimilarity()); QueryParser qp = new QueryParser(hashesFieldName, new WhitespaceAnalyzer()); Query query = null;/*from ww w. j av a 2 s .com*/ try { query = qp.parse(hashes); } catch (ParseException e) { e.printStackTrace(); } if (query == null) return null; TopDocs docs = searcher.search(query, maxResultsHashBased); // System.out.println(docs.totalHits); // then re-rank TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>(); double maxDistance = -1d; double tmpScore; for (int i = 0; i < docs.scoreDocs.length; i++) { feature.setByteArrayRepresentation( reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length); tmpScore = queryFeature.getDistance(feature); assert (tmpScore >= 0); if (resultScoreDocs.size() < maximumHits) { resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc)); maxDistance = Math.max(maxDistance, tmpScore); } else if (tmpScore < maxDistance) { // if it is nearer to the sample than at least one of the current set: // remove the last one ... resultScoreDocs.remove(resultScoreDocs.last()); // add the new one ... resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc)); // and set our new distance border ... maxDistance = resultScoreDocs.last().getDistance(); } } assert (resultScoreDocs.size() <= maximumHits); return new SimpleImageSearchHits(resultScoreDocs, maxDistance); }
From source file:net.semanticmetadata.lire.searchers.VisualWordsImageSearcher.java
License:Open Source License
public ImageSearchHits search(Document doc, IndexReader reader) throws IOException { SimpleImageSearchHits sh = null;/* w ww . jav a 2 s . co m*/ IndexSearcher isearcher = new IndexSearcher(reader); isearcher.setSimilarity(similarity); String queryString = doc.getValues(fieldName)[0]; Query tq = null; try { tq = qp.parse(queryString); TopDocs docs = isearcher.search(tq, numMaxHits); LinkedList<SimpleResult> res = new LinkedList<SimpleResult>(); double maxDistance = 0d; for (int i = 0; i < docs.scoreDocs.length; i++) { double d = 1d / docs.scoreDocs[i].score; maxDistance = Math.max(d, maxDistance); SimpleResult sr = new SimpleResult(d, docs.scoreDocs[i].doc); res.add(sr); } sh = new SimpleImageSearchHits(res, maxDistance); } catch (ParseException e) { System.err.println(queryString); e.printStackTrace(); } return sh; }
From source file:nl.uva.expose.clustering.SimGraphMaker.java
public HashMap<String, Double> searchAndReturnResults(String queryText, String qId) throws IOException, ParseException { queryText = queryText.replaceAll("AND", "and").replaceAll("OR", "or").replaceAll("NOT", "not"); // to avoid boolean operation! QueryParser qParser = new QueryParser(Version.LUCENE_CURRENT, field, this.analyzer); BooleanQuery.setMaxClauseCount(queryText.split("\\s+").length); Query q = qParser.parse(QueryParser.escape(queryText)); Similarity simFunc = new BM25Similarity(); IndexSearcher isearcher = new IndexSearcher(this.ireader); isearcher.setSimilarity(simFunc); TopFieldCollector tfc = TopFieldCollector.create(Sort.RELEVANCE, ireader.numDocs(), true, true, true, false);// w w w. j av a 2s . co m // TopFieldCollector tfc = TopFieldCollector.create(Sort.RELEVANCE,20, true, true, true, false); isearcher.search(q, tfc); TopDocs results = tfc.topDocs(); ScoreDoc[] hits = results.scoreDocs; return fillQueryResultList(hits, qId); }
From source file:nl.uva.mlc.eurovoc.irengine.Retrieval.java
public HashMap<String, Feature> searchAndReturnResults(String queryText, String qId) throws IOException, ParseException { queryText = queryText.replaceAll("AND", "and").replaceAll("OR", "or").replaceAll("NOT", "not"); // to avoid boolean operation! QueryParser qParser = new QueryParser(Version.LUCENE_CURRENT, field, this.analyzer); BooleanQuery.setMaxClauseCount(queryText.split("\\s+").length); Query q = qParser.parse(QueryParser.escape(queryText)); this.simFunction = SIM_FUNCS[SimilarityFunction.valueOf(SimFName).ordinal()]; Similarity simFunc = this.simFunction; IndexSearcher isearcher = new IndexSearcher(this.ireader); isearcher.setSimilarity(simFunc); TopFieldCollector tfc = TopFieldCollector.create(Sort.RELEVANCE, ireader.numDocs(), true, true, true, false);/*from www .j a v a 2 s .com*/ isearcher.search(q, tfc); TopDocs results = tfc.topDocs(); ScoreDoc[] hits = results.scoreDocs; return fillQueryResultList(hits, qId); }
From source file:org.codice.ddf.spatial.geocoding.query.GeoNamesQueryLuceneDirectoryIndex.java
License:Open Source License
@Override protected IndexSearcher createIndexSearcher(final IndexReader indexReader) { final IndexSearcher indexSearcher = new IndexSearcher(indexReader); indexSearcher.setSimilarity(GeoNamesLuceneIndexer.SIMILARITY); return indexSearcher; }
From source file:org.compass.core.lucene.engine.manager.DefaultLuceneSearchEngineIndexManager.java
License:Apache License
public IndexSearcher openIndexSearcher(IndexReader reader) { IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(searchEngineFactory.getSimilarityManager().getSearchSimilarity()); return searcher; }