List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:net.semanticmetadata.lire.indexing.MetricSpacesTest.java
License:Open Source License
public void testMetrics() throws IOException { String smallIdx = "wang-cedd"; MetricSpacesInvertedListIndexing ms = MetricSpacesInvertedListIndexing.getDefaultInstance(); MetricSpacesInvertedListIndexing.numReferenceObjectsUsed = 10; MetricSpacesInvertedListIndexing.numReferenceObjects = 50; TopDocs docs = ms.search(ImageIO.read(new FileInputStream("wang-data-1000/10.jpg")), smallIdx); IndexReader ir = DirectoryReader.open(FSDirectory.open(new File(smallIdx))); for (int i = 0; i < docs.scoreDocs.length; i++) { ScoreDoc scoreDoc = docs.scoreDocs[i]; String identifier = ir.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; System.out.println(scoreDoc.score + ": " + identifier + " \t(" + scoreDoc.doc + ")"); }//from w w w. j a va 2s.c om }
From source file:net.semanticmetadata.lire.lucene.TestRerankTextSearch.java
License:Open Source License
public void testSearch() throws IOException, ParseException { // create a Lucene IndexReader and the according IndexSearcher: IndexReader reader = DirectoryReader.open(FSDirectory.open(testIndex)); IndexSearcher searcher = new IndexSearcher(reader); // The QueryParser takes a String and creates a query out of it. Make sure you use the same field // as for indexing, in this case "tags" QueryParser q = new QueryParser(Version.LUCENE_42, "tags", new SimpleAnalyzer(Version.LUCENE_42)); // let's just take the tags of the first document in the index: Query query = q.parse(reader.document(1).getValues("tags")[0]); // now that's the actual search: // NOTE: The number of results here is critical. The less documents are returned here, the // less the image re-ranking can mess up. However, the recall (the absolute number of relevant // documents returned) is also influenced by this. Best to try several values like 10, 100, 200, 500, ... TopDocs results = searcher.search(query, 10); // here we print the results of the text search, just for the win. System.out.println("-----------> SEARCH RESULTS ..."); for (int i = 0; i < results.scoreDocs.length; i++) { ScoreDoc scoreDoc = results.scoreDocs[i]; System.out.print(scoreDoc.score + "\t: "); // reader.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] gets you the actual image file path. // LIRE manages all needed filed names as static Strings in DocumentBuilder ... System.out.print(/* w ww . j ava 2 s . co m*/ reader.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " -> "); System.out.println(reader.document(scoreDoc.doc).getValues("tags")[0]); } // just for a visual example ... this will pop up a browser window FileUtils.browseUri(FileUtils.saveImageResultsToHtml("text", results, reader, reader.document(1).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])); // and now for the re-ranking: // make sure to use a low level feature that has been indexed -- check the DocumentBuilder in above method. RerankFilter rerank = new RerankFilter(FCTH.class, DocumentBuilder.FIELD_NAME_FCTH); // note that you need the document here, it contains the low level feature ... // if you don't have it but just the image you need to create a new one with the // appropriate DocumentBuilder -- check the DocumentBuilder in above method. ImageSearchHits hitsReranked = rerank.filter(results, reader, reader.document(1)); // and here we print the re-ranked hits: System.out.println("-----------> RERANKED ..."); for (int i = 0; i < hitsReranked.length(); i++) { System.out.print(hitsReranked.score(i) + "\t: "); System.out.print(hitsReranked.doc(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " -> "); System.out.println(hitsReranked.doc(i).getValues("tags")[0]); } // just for a visual example ... this will pop up a browser window. FileUtils.browseUri(FileUtils.saveImageResultsToHtml("reranked", hitsReranked, reader.document(1).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])); }
From source file:net.semanticmetadata.lire.lucene.TestSearchID.java
License:Open Source License
public void testSearch() throws IOException { // use GeneralTest to create the index ... IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-small"))); IndexSearcher is = new IndexSearcher(reader); TopDocs docs = is.search(new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, "img01.JPG")), 10); Document result = reader.document(docs.scoreDocs[0].doc); System.out.println(result.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); }
From source file:net.semanticmetadata.lire.sampleapp.IndexingAndSearchWithLocalFeatures.java
License:Open Source License
/** * Linear search on the indexed data./*from w ww . j a va2 s.com*/ * @param indexPath * @throws IOException */ public static void search(String indexPath) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); // make sure that this matches what you used for indexing (see below) ... ImageSearcher imgSearcher = new GenericFastImageSearcher(1000, CEDD.class, SimpleExtractor.KeypointDetector.CVSURF, new BOVW(), 128, true, reader, indexPath + ".config"); // just a static example with a given image. ImageSearchHits hits = imgSearcher .search(ImageIO.read(new File("testdata/ferrari/black/2828686873_2fa36f83d7_b.jpg")), reader); for (int i = 0; i < hits.length(); i++) { System.out.printf("%.2f: (%d) %s\n", hits.score(i), hits.documentID(i), reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); } }
From source file:net.semanticmetadata.lire.sampleapp.Searcher.java
License:Open Source License
public static void main(String[] args) throws IOException { // Checking if arg[0] is there and if it is an image. BufferedImage img = null;// w ww . j av a 2 s. com boolean passed = false; if (args.length > 0) { File f = new File(args[0]); if (f.exists()) { try { img = ImageIO.read(f); passed = true; } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } } } if (!passed) { System.out.println("No image given as first argument."); System.out.println("Run \"Searcher <query image>\" to search for <query image>."); System.exit(1); } IndexReader ir = DirectoryReader.open(FSDirectory.open(Paths.get("index"))); ImageSearcher searcher = new GenericFastImageSearcher(30, CEDD.class); // ImageSearcher searcher = new GenericFastImageSearcher(30, AutoColorCorrelogram.class); // searching with a image file ... ImageSearchHits hits = searcher.search(img, ir); // searching with a Lucene document instance ... // ImageSearchHits hits = searcher.search(ir.document(0), ir); for (int i = 0; i < hits.length(); i++) { String fileName = ir.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; System.out.println(hits.score(i) + ": \t" + fileName); } }
From source file:net.semanticmetadata.lire.searchers.BitSamplingImageSearcher.java
License:Open Source License
private ImageSearchHits search(String[] hashes, GlobalFeature queryFeature, IndexReader reader) throws IOException { // first search by text: IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new BaseSimilarity()); BooleanQuery query = null;// w w w. j av a 2 s . co m BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (int i = 0; i < hashes.length; i++) { // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before. if (partialHashes) { if (Math.random() < 0.5) builder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")), BooleanClause.Occur.SHOULD)); } else builder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")), BooleanClause.Occur.SHOULD)); } query = builder.build(); TopDocs docs = searcher.search(query, maxResultsHashBased); // System.out.println(docs.totalHits); // then re-rank TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>(); double maxDistance = -1d; double tmpScore; for (int i = 0; i < docs.scoreDocs.length; i++) { feature.setByteArrayRepresentation( reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length); tmpScore = queryFeature.getDistance(feature); assert (tmpScore >= 0); if (resultScoreDocs.size() < maximumHits) { resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc)); maxDistance = Math.max(maxDistance, tmpScore); } else if (tmpScore < maxDistance) { // if it is nearer to the sample than at least one of the current set: // remove the last one ... resultScoreDocs.remove(resultScoreDocs.last()); // add the new one ... resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc)); // and set our new distance border ... maxDistance = resultScoreDocs.last().getDistance(); } } assert (resultScoreDocs.size() <= maximumHits); return new SimpleImageSearchHits(resultScoreDocs, maxDistance); }
From source file:net.semanticmetadata.lire.searchers.custom.SingleNddCeddImageSearcher.java
License:Open Source License
protected void init(IndexReader reader) { this.reader = reader; if (reader.hasDeletions()) { throw new UnsupportedOperationException( "The index has to be optimized first to be cached! Use IndexWriter.forceMerge(0) to do this."); }// ww w. ja va2s . c om docs = new TreeSet<SimpleResult>(); try { this.cachedInstance = (GlobalFeature) this.descriptorClass.newInstance(); if (fieldName == null) fieldName = this.cachedInstance.getFieldName(); } catch (InstantiationException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher (" + descriptorClass.getName() + "): " + e.getMessage()); } catch (IllegalAccessException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher (" + descriptorClass.getName() + "): " + e.getMessage()); } // put all respective features into an in-memory cache ... if (isCaching && reader != null) { int docs = reader.numDocs(); featureCache = new ArrayList<double[]>(docs); try { Document d; for (int i = 0; i < docs; i++) { d = reader.document(i); cachedInstance.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes, d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length); // normalize features,o we can use L1 if (!halfDimensions) { featureCache.add(normalize(cachedInstance.getFeatureVector())); } else { featureCache.add(crunch(cachedInstance.getFeatureVector())); } } } catch (IOException e) { e.printStackTrace(); } } }
From source file:net.semanticmetadata.lire.searchers.custom.TopDocsImageSearcher.java
License:Open Source License
/** * @param results//from ww w .j av a 2s. c o m * @param reader * @param globalFeature * @return the maximum distance found for normalizing. * @throws java.io.IOException */ protected double findSimilar(TopDocs results, IndexReader reader, GlobalFeature globalFeature) throws IOException { double maxDistance = -1d, overallMaxDistance = -1d; boolean hasDeletions = reader.hasDeletions(); // clear result set ... docs.clear(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = results.totalHits; for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document d = reader.document(results.scoreDocs[i].doc); double distance = getDistance(d, globalFeature); assert (distance >= 0); // calculate the overall max distance to normalize score afterwards if (overallMaxDistance < distance) { overallMaxDistance = distance; } // if it is the first document: if (maxDistance < 0) { maxDistance = distance; } // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(distance, results.scoreDocs[i].doc)); if (distance > maxDistance) maxDistance = distance; } else if (distance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult(distance, results.scoreDocs[i].doc)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } return maxDistance; }
From source file:net.semanticmetadata.lire.searchers.FastOpponentImageSearcher.java
License:Open Source License
/** * @param reader//from w ww . j a v a 2 s . c om * @param globalFeature * @return the maximum distance found for normalizing. * @throws java.io.IOException */ protected double findSimilar(IndexReader reader, GlobalFeature globalFeature) throws IOException { maxDistance = -1f; // clear result set ... docs.clear(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); Document d; double tmpDistance; int docs = reader.numDocs(); byte[] histogram = globalFeature.getByteArrayRepresentation(); for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. d = reader.document(i); tmpDistance = getDistance(d, histogram); assert (tmpDistance >= 0); // calculate the overall max distance to normalize score afterwards // if (overallMaxDistance < tmpDistance) { // overallMaxDistance = tmpDistance; // } // if it is the first document: if (maxDistance < 0) { maxDistance = tmpDistance; } // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(tmpDistance, i)); if (tmpDistance > maxDistance) maxDistance = tmpDistance; } else if (tmpDistance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult(tmpDistance, i)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } return maxDistance; }
From source file:net.semanticmetadata.lire.searchers.forevaluations.GenericFastImageSearcherForEvaluation.java
License:Open Source License
/** * @param reader/*from ww w . j a va2s . co m*/ * @param lireFeature * @return the maximum distance found for normalizing. * @throws IOException */ protected double findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException { maxDistance = -1d; // clear result set ... docs.clear(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); Document d; double tmpDistance; int docs = reader.numDocs(); if (!isCaching) { // we read each and every document from the index and then we compare it to the query. for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. d = reader.document(i); tmpDistance = getDistance(d, lireFeature); assert (tmpDistance >= 0); // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResultForEvaluation(tmpDistance, i, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])); if (tmpDistance > maxDistance) maxDistance = tmpDistance; } else if (tmpDistance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResultForEvaluation(tmpDistance, i, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } } else { LinkedList<Consumer> tasks = new LinkedList<Consumer>(); LinkedList<Thread> threads = new LinkedList<Thread>(); Consumer consumer; Thread thread; Thread p = new Thread(new Producer()); p.start(); for (int i = 0; i < numThreads; i++) { consumer = new Consumer(lireFeature); thread = new Thread(consumer); thread.start(); tasks.add(consumer); threads.add(thread); } for (Thread next : threads) { try { next.join(); } catch (InterruptedException e) { e.printStackTrace(); } } TreeSet<SimpleResultForEvaluation> tmpDocs; boolean flag; SimpleResultForEvaluation simpleResult; for (Consumer task : tasks) { tmpDocs = task.getResult(); flag = true; while (flag && (tmpDocs.size() > 0)) { simpleResult = tmpDocs.pollFirst(); if (this.docs.size() < maxHits) { this.docs.add(simpleResult); if (simpleResult.getDistance() > maxDistance) maxDistance = simpleResult.getDistance(); } else if (simpleResult.getDistance() < maxDistance) { // this.docs.remove(this.docs.last()); this.docs.pollLast(); this.docs.add(simpleResult); maxDistance = this.docs.last().getDistance(); } else flag = false; } } } return maxDistance; }