List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:net.semanticmetadata.lire.GeneralTest.java
License:Open Source License
public void testCreateAndSearchSmallIndex() throws IOException { for (int i = 0, buildersLength = builders.length; i < buildersLength; i++) { DocumentBuilder b = builders[i]; // create an index with a specific builder: IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-small", true); for (String identifier : testFiles) { Document doc = b.createDocument(new FileInputStream(testFilesPath + identifier), identifier); doc.add(new StoredField("video_file", "surgery1.mp4")); doc.add(new StoredField("timestamp", "25")); iw.addDocument(doc);// w ww. j av a 2 s . c om } iw.close(); ImageSearcher s = searchers[i]; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-small"))); for (int k = 0; k < reader.maxDoc(); k++) { Document query = reader.document(k); ImageSearchHits hits = s.search(query, reader); for (int y = 0; y < hits.length(); y++) { Document result = hits.doc(y); if (y == 0) { // check if the first result is the query: assertEquals(result.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] .equals(query.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), true); System.out.println(result.getValues("video_file")[0]); } else { // check if they are ordered by distance: assertEquals(hits.score(y) < hits.score(y - 1), true); } } } } }
From source file:net.semanticmetadata.lire.GeneralTest.java
License:Open Source License
public void testReadIndex() throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("ucid-index-39997508"))); for (int k = 0; k < reader.maxDoc(); k++) { Document document = reader.document(k); BytesRef b = document.getField("featureCEDDLoDe_Hist").binaryValue(); double[] doubles = SerializationUtils.toDoubleArray(b.bytes, b.offset, b.length); if (document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0].endsWith("00008.png")) System.out.println(Arrays.toString(doubles)); }/*from ww w .ja va 2 s . co m*/ // check lucene tuorials and docs IndexSearcher is = new IndexSearcher(reader); TopDocs td = is.search(new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, "")), 10); for (int i = 0; i < td.scoreDocs.length; i++) { ScoreDoc scoreDoc = td.scoreDocs[i]; Document document = reader.document(scoreDoc.doc); } }
From source file:net.semanticmetadata.lire.GeneralTest.java
License:Open Source License
/** * There was an error that images with the same score but different documents in the index * were not included in the result list. Here's the test for that. *///from ww w . j av a 2s . co m public void testDuplicatesInIndex() throws IOException { indexFiles("src\\test\\resources\\images", "index-large-new", 0, true); indexFiles("src\\test\\resources\\images", "index-large-new", 0, false); indexFiles("src\\test\\resources\\images", "index-large-new", 0, false); ImageSearcher s = searchers[0]; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large-new"))); Document query = reader.document(0); ImageSearchHits hits = s.search(query, reader); FileUtils.saveImageResultsToPng("duplicate_", hits, query.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); }
From source file:net.semanticmetadata.lire.GeneralTest.java
License:Open Source License
public void testSearchIndexLarge() throws IOException { for (int i = 0; i < 10; i++) { int queryDocID = (int) (Math.random() * 800); // queryDocID = 877 * (i + 1); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large"))); // select one feature for the large index: int featureIndex = 0; int count = 0; long ms = System.currentTimeMillis(); ImageSearchHits hits = searchers[featureIndex].search(reader.document(queryDocID), reader); for (int j = 0; j < hits.length(); j++) { String fileName = hits.doc(j).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; System.out.println(hits.score(j) + ": \t" + fileName); }// w w w . j a v a 2 s . c om // FileUtils.saveImageResultsToHtml("GeneralTest_testSearchIndexLarge_", hits, reader.document(10).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); FileUtils.saveImageResultsToPng("GeneralTest_testSearchIndexLarge_" + i + "_", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); } }
From source file:net.semanticmetadata.lire.GeneralTest.java
License:Open Source License
public void testSearchRunTime() throws IOException { int queryDocID; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large-new"))); int featureIndex = 0; ImageSearchHits hits = searchers[featureIndex].search(reader.document(0), reader); hits = searchers[featureIndex].search(reader.document(1), reader); long ms = System.currentTimeMillis(); for (int i = 0; i < 100; i++) { queryDocID = i;/* www .j a va2 s . c om*/ // select one feature for the large index: hits = searchers[featureIndex].search(reader.document(queryDocID), reader); } ms = System.currentTimeMillis() - ms; System.out.println("ms = " + ms / 100); }
From source file:net.semanticmetadata.lire.GeneralTest.java
License:Open Source License
public void testRerankFilters() throws IOException { int queryDocID = (int) (Math.random() * 10000); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large"))); // select one feature for the large index: int featureIndex = 4; int count = 0; long ms = System.currentTimeMillis(); ImageSearchHits hits = searchers[featureIndex].search(reader.document(queryDocID), reader); RerankFilter rerank = new RerankFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD); LsaFilter lsa = new LsaFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD); FileUtils.saveImageResultsToPng("GeneralTest_rerank_0_old", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); hits = rerank.filter(hits, reader.document(queryDocID)); FileUtils.saveImageResultsToPng("GeneralTest_rerank_1_new", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); hits = lsa.filter(hits, reader.document(queryDocID)); FileUtils.saveImageResultsToPng("GeneralTest_rerank_2_lsa", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); }
From source file:net.semanticmetadata.lire.imageanalysis.CEDDTest.java
License:Open Source License
public void testIndexSerialization() throws IOException { ParallelIndexer pi = new ParallelIndexer(8, "test-idx", "testdata\\UCID", true) { @Override/*from w w w. j a va 2 s .co m*/ public void addBuilders(ChainedDocumentBuilder builder) { builder.addBuilder(new GenericDocumentBuilder(CEDD.class)); } }; pi.run(); IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("test-idx"))); CEDD tmp = new CEDD(); CEDD idx = new CEDD(); for (int i = 0; i < ir.maxDoc(); i++) { Document d = ir.document(i); BytesRef ref = d.getBinaryValue(new CEDD().getFieldName()); idx.setByteArrayRepresentation(ref.bytes, ref.offset, ref.length); tmp.extract(ImageIO.read(new File(d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]))); for (int j = 0; j < tmp.getDoubleHistogram().length; j++) { double v = tmp.getDoubleHistogram()[j]; if (Math.abs(v - idx.getDoubleHistogram()[j]) > 0.1) { System.err.println( d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " error at position " + j); break; } } // assertEquals((double) idx.getDistance(tmp), 0d, 0.00001); } }
From source file:net.semanticmetadata.lire.imageanalysis.OpponentHistogramTest.java
License:Open Source License
public void testFastSearch() throws IOException { Codec.forName("LireCustomCodec"); // ParallelIndexer pin = new ParallelIndexer(7, "./index-fast-3", "testdata/wang-1000") { ParallelIndexer pin = new ParallelIndexer(7, "./index-fast-3", "D:\\DataSets\\Flickrphotos\\01", true) { @Override//from ww w .ja v a 2 s . c om public void addBuilders(ChainedDocumentBuilder builder) { builder.addBuilder(DocumentBuilderFactory.getOpponentHistogramDocumentBuilder()); } }; pin.run(); IndexReader ir = DirectoryReader.open(MMapDirectory.open(new File("./index-fast-3"))); System.out.println("ir.maxDoc() = " + ir.maxDoc()); long ms = System.currentTimeMillis(); ImageSearcher is = new FastOpponentImageSearcher(50); ms = System.currentTimeMillis() - ms; System.out.println("init ms = " + ms); ms = System.currentTimeMillis(); for (int i = 0; i < 100; i++) is.search(ir.document(i), ir); ms = System.currentTimeMillis() - ms; System.out.println("cached ms = " + ms); is = ImageSearcherFactory.createOpponentHistogramSearcher(50); ms = System.currentTimeMillis(); for (int i = 0; i < 100; i++) is.search(ir.document(i), ir); ms = System.currentTimeMillis() - ms; System.out.println("read from Lucene ms = " + ms); }
From source file:net.semanticmetadata.lire.imageanalysis.ScalableColorTest.java
License:Open Source License
public void testIndexSerialization() throws IOException { ParallelIndexer pi = new ParallelIndexer(8, "test-idx", "testdata\\UCID", true) { @Override/*from w w w . j a v a 2s . c om*/ public void addBuilders(ChainedDocumentBuilder builder) { builder.addBuilder(new GenericDocumentBuilder(ScalableColor.class)); } }; pi.run(); IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("test-idx"))); ScalableColor tmp = new ScalableColor(); ScalableColor idx = new ScalableColor(); for (int i = 0; i < ir.maxDoc(); i++) { Document d = ir.document(i); BytesRef ref = d.getBinaryValue(new ScalableColor().getFieldName()); idx.setByteArrayRepresentation(ref.bytes, ref.offset, ref.length); tmp.extract(ImageIO.read(new File(d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]))); for (int j = 0; j < tmp.getDoubleHistogram().length; j++) { double v = tmp.getDoubleHistogram()[j]; if (Math.abs(v - idx.getDoubleHistogram()[j]) > 0.1) { System.err.println( d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " error at position " + j); break; } } // assertEquals((double) idx.getDistance(tmp), 0d, 0.00001); } }
From source file:net.semanticmetadata.lire.impl.BitSamplingImageSearcher.java
License:Open Source License
private ImageSearchHits search(String[] hashes, LireFeature queryFeature, IndexReader reader) throws IOException { // first search by text: IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new BaseSimilarity()); BooleanQuery query = new BooleanQuery(); for (int i = 0; i < hashes.length; i++) { // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before. if (partialHashes) { if (Math.random() < 0.5) query.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")), BooleanClause.Occur.SHOULD)); } else//from w ww .j a v a 2 s .com query.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")), BooleanClause.Occur.SHOULD)); } TopDocs docs = searcher.search(query, maxResultsHashBased); // System.out.println(docs.totalHits); // then re-rank TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>(); float maxDistance = -1f; float tmpScore; for (int i = 0; i < docs.scoreDocs.length; i++) { feature.setByteArrayRepresentation( reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset, reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length); tmpScore = queryFeature.getDistance(feature); assert (tmpScore >= 0); if (resultScoreDocs.size() < maximumHits) { resultScoreDocs.add( new SimpleResult(tmpScore, reader.document(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc)); maxDistance = Math.max(maxDistance, tmpScore); } else if (tmpScore < maxDistance) { // if it is nearer to the sample than at least one of the current set: // remove the last one ... resultScoreDocs.remove(resultScoreDocs.last()); // add the new one ... resultScoreDocs.add( new SimpleResult(tmpScore, reader.document(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc)); // and set our new distance border ... maxDistance = resultScoreDocs.last().getDistance(); } } assert (resultScoreDocs.size() <= maximumHits); return new SimpleImageSearchHits(resultScoreDocs, maxDistance); }