List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:net.semanticmetadata.lire.GeneralTest.java
License:Open Source License
public void testCreateAndSearchSmallIndex() throws IOException { for (int i = 0, buildersLength = builders.length; i < buildersLength; i++) { DocumentBuilder b = builders[i]; // create an index with a specific builder: IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-small", true); for (String identifier : testFiles) { Document doc = b.createDocument(new FileInputStream(testFilesPath + identifier), identifier); doc.add(new StoredField("video_file", "surgery1.mp4")); doc.add(new StoredField("timestamp", "25")); iw.addDocument(doc);//from w w w. j a va 2 s .c o m } iw.close(); ImageSearcher s = searchers[i]; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-small"))); for (int k = 0; k < reader.maxDoc(); k++) { Document query = reader.document(k); ImageSearchHits hits = s.search(query, reader); for (int y = 0; y < hits.length(); y++) { Document result = hits.doc(y); if (y == 0) { // check if the first result is the query: assertEquals(result.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] .equals(query.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), true); System.out.println(result.getValues("video_file")[0]); } else { // check if they are ordered by distance: assertEquals(hits.score(y) < hits.score(y - 1), true); } } } } }
From source file:net.semanticmetadata.lire.GeneralTest.java
License:Open Source License
public void testReadIndex() throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("ucid-index-39997508"))); for (int k = 0; k < reader.maxDoc(); k++) { Document document = reader.document(k); BytesRef b = document.getField("featureCEDDLoDe_Hist").binaryValue(); double[] doubles = SerializationUtils.toDoubleArray(b.bytes, b.offset, b.length); if (document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0].endsWith("00008.png")) System.out.println(Arrays.toString(doubles)); }//from www . j a v a 2s.co m // check lucene tuorials and docs IndexSearcher is = new IndexSearcher(reader); TopDocs td = is.search(new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, "")), 10); for (int i = 0; i < td.scoreDocs.length; i++) { ScoreDoc scoreDoc = td.scoreDocs[i]; Document document = reader.document(scoreDoc.doc); } }
From source file:net.semanticmetadata.lire.imageanalysis.CEDDTest.java
License:Open Source License
public void testIndexSerialization() throws IOException { ParallelIndexer pi = new ParallelIndexer(8, "test-idx", "testdata\\UCID", true) { @Override/*from w w w . j a v a 2 s.com*/ public void addBuilders(ChainedDocumentBuilder builder) { builder.addBuilder(new GenericDocumentBuilder(CEDD.class)); } }; pi.run(); IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("test-idx"))); CEDD tmp = new CEDD(); CEDD idx = new CEDD(); for (int i = 0; i < ir.maxDoc(); i++) { Document d = ir.document(i); BytesRef ref = d.getBinaryValue(new CEDD().getFieldName()); idx.setByteArrayRepresentation(ref.bytes, ref.offset, ref.length); tmp.extract(ImageIO.read(new File(d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]))); for (int j = 0; j < tmp.getDoubleHistogram().length; j++) { double v = tmp.getDoubleHistogram()[j]; if (Math.abs(v - idx.getDoubleHistogram()[j]) > 0.1) { System.err.println( d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " error at position " + j); break; } } // assertEquals((double) idx.getDistance(tmp), 0d, 0.00001); } }
From source file:net.semanticmetadata.lire.imageanalysis.OpponentHistogramTest.java
License:Open Source License
public void testFastSearch() throws IOException { Codec.forName("LireCustomCodec"); // ParallelIndexer pin = new ParallelIndexer(7, "./index-fast-3", "testdata/wang-1000") { ParallelIndexer pin = new ParallelIndexer(7, "./index-fast-3", "D:\\DataSets\\Flickrphotos\\01", true) { @Override/* w w w . j a va2 s . c om*/ public void addBuilders(ChainedDocumentBuilder builder) { builder.addBuilder(DocumentBuilderFactory.getOpponentHistogramDocumentBuilder()); } }; pin.run(); IndexReader ir = DirectoryReader.open(MMapDirectory.open(new File("./index-fast-3"))); System.out.println("ir.maxDoc() = " + ir.maxDoc()); long ms = System.currentTimeMillis(); ImageSearcher is = new FastOpponentImageSearcher(50); ms = System.currentTimeMillis() - ms; System.out.println("init ms = " + ms); ms = System.currentTimeMillis(); for (int i = 0; i < 100; i++) is.search(ir.document(i), ir); ms = System.currentTimeMillis() - ms; System.out.println("cached ms = " + ms); is = ImageSearcherFactory.createOpponentHistogramSearcher(50); ms = System.currentTimeMillis(); for (int i = 0; i < 100; i++) is.search(ir.document(i), ir); ms = System.currentTimeMillis() - ms; System.out.println("read from Lucene ms = " + ms); }
From source file:net.semanticmetadata.lire.imageanalysis.ScalableColorTest.java
License:Open Source License
public void testIndexSerialization() throws IOException { ParallelIndexer pi = new ParallelIndexer(8, "test-idx", "testdata\\UCID", true) { @Override//from w w w. j a v a2 s. co m public void addBuilders(ChainedDocumentBuilder builder) { builder.addBuilder(new GenericDocumentBuilder(ScalableColor.class)); } }; pi.run(); IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("test-idx"))); ScalableColor tmp = new ScalableColor(); ScalableColor idx = new ScalableColor(); for (int i = 0; i < ir.maxDoc(); i++) { Document d = ir.document(i); BytesRef ref = d.getBinaryValue(new ScalableColor().getFieldName()); idx.setByteArrayRepresentation(ref.bytes, ref.offset, ref.length); tmp.extract(ImageIO.read(new File(d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]))); for (int j = 0; j < tmp.getDoubleHistogram().length; j++) { double v = tmp.getDoubleHistogram()[j]; if (Math.abs(v - idx.getDoubleHistogram()[j]) > 0.1) { System.err.println( d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " error at position " + j); break; } } // assertEquals((double) idx.getDistance(tmp), 0d, 0.00001); } }
From source file:net.semanticmetadata.lire.indexing.IndexVisualWordsTest.java
License:Open Source License
public void testIndexMissingFiles() throws IOException { // first delete some of the existing ones ... System.out.println("Deleting visual words from docs ..."); IndexReader ir = DirectoryReader.open(FSDirectory.open(new File(index))); IndexWriter iw = LuceneUtils.createIndexWriter(index, false); int maxDocs = ir.maxDoc(); for (int i = 0; i < maxDocs / 10; i++) { Document d = ir.document(i); // d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW); d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW); // d.removeFields(DocumentBuilder.FIELD_NAME_SURF_LOCAL_FEATURE_HISTOGRAM); d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW_VECTOR); // d.removeFields(DocumentBuilder.FIELD_NAME_SURF); iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), d); }// ww w .ja va 2 s . c o m System.out.println("# of deleted docs: " + maxDocs / 10); System.out.println("Optimizing and closing ..."); iw.close(); ir.close(); System.out.println("Creating new visual words ..."); BOVWBuilder surfFeatureHistogramBuilder = new BOVWBuilder( DirectoryReader.open(FSDirectory.open(new File(index))), new SurfFeature(), numSamples, clusters); // surfFeatureHistogramBuilder.indexMissing(); // System.out.println("Finished."); }
From source file:net.semanticmetadata.lire.indexing.MetricSpacesTest.java
License:Open Source License
public void testPerformance() throws IOException { MetricSpacesInvertedListIndexing mes = MetricSpacesInvertedListIndexing.getDefaultInstance(); int numSearches = 10; IndexReader reader = mes.getIndexReader(indexPath); System.out.println(reader.maxDoc() + " documents"); TopDocs docs;/* w w w . j av a 2 s . co m*/ long ms = System.currentTimeMillis(); for (int i = 0; i < numSearches; i++) { docs = mes.search(reader.document(i), indexPath); } ms = System.currentTimeMillis() - ms; System.out.println("ms = " + ms); ImageSearcher ceddSearcher = ImageSearcherFactory.createCEDDImageSearcher(100); ms = System.currentTimeMillis(); for (int i = 0; i < numSearches; i++) { ceddSearcher.search(reader.document(i), reader); } ms = System.currentTimeMillis() - ms; System.out.println("ms = " + ms); }
From source file:net.semanticmetadata.lire.searchers.TestSearching.java
License:Open Source License
public void testSearch() throws IOException, IllegalAccessException, InstantiationException { Cluster[] cvsurf512 = Cluster.readClusters(codebookPath + "CvSURF512"); Cluster[] simpleceddcvsurf512 = Cluster.readClusters(codebookPath + "SIMPLEdetCVSURFCEDD512"); ParallelIndexer parallelIndexer = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPath, testExtensive, numOfClusters, numOfDocsForVocabulary, aggregatorClass); parallelIndexer.addExtractor(globalFeatureClass); parallelIndexer.addExtractor(localFeatureClass, cvsurf512); parallelIndexer.addExtractor(globalFeatureClass, keypointDetector, simpleceddcvsurf512); parallelIndexer.run();// w w w .ja v a 2 s . c o m BufferedImage image = ImageIO.read(new FileInputStream(imageToSearch)); IndexReader reader = DirectoryReader .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPath)), IOContext.READONCE)); System.out.println("Documents in the reader: " + reader.maxDoc()); GenericFastImageSearcher ceddSearcher = new GenericFastImageSearcher(10, globalFeatureClass, true, reader); ImageSearchHits ceddhits = ceddSearcher.search(image, reader); String hitFile; for (int y = 0; y < ceddhits.length(); y++) { hitFile = reader.document(ceddhits.documentID(y)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1); System.out.println(y + ". " + hitFile + " " + ceddhits.score(y)); } System.out.println(); GenericFastImageSearcher cvsurfsearcher = new GenericFastImageSearcher(10, localFeatureClass, aggregatorClass.newInstance(), 512, true, reader, indexPath + ".config"); ImageSearchHits cvsurfhits = cvsurfsearcher.search(image, reader); for (int y = 0; y < cvsurfhits.length(); y++) { hitFile = reader.document(cvsurfhits.documentID(y)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1); System.out.println(y + ". " + hitFile + " " + cvsurfhits.score(y)); } System.out.println(); GenericFastImageSearcher simpleceddcvsurfsearcher = new GenericFastImageSearcher(10, globalFeatureClass, keypointDetector, aggregatorClass.newInstance(), 512, true, reader, indexPath + ".config"); ImageSearchHits simpleceddcvsurfhits = simpleceddcvsurfsearcher.search(image, reader); for (int y = 0; y < simpleceddcvsurfhits.length(); y++) { hitFile = reader.document(simpleceddcvsurfhits.documentID(y)) .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1); System.out.println(y + ". " + hitFile + " " + simpleceddcvsurfhits.score(y)); } System.out.println(); }
From source file:net.semanticmetadata.lire.searchers.TestSearching.java
License:Open Source License
public void testSearchMulImages() throws IOException, IllegalAccessException, InstantiationException { Cluster[] cvsurf512 = Cluster.readClusters(codebookPath + "CvSURF512"); Cluster[] simpleceddcvsurf512 = Cluster.readClusters(codebookPath + "SIMPLEdetCVSURFCEDD512"); ParallelIndexer parallelIndexer = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPath, testExtensive, numOfClusters, numOfDocsForVocabulary, aggregatorClass); parallelIndexer.addExtractor(globalFeatureClass); parallelIndexer.addExtractor(localFeatureClass, cvsurf512); parallelIndexer.addExtractor(globalFeatureClass, keypointDetector, simpleceddcvsurf512); parallelIndexer.run();//from w w w.j a va 2 s . c om ArrayList<String> images = FileUtils.readFileLines(new File(testExtensive), true); IndexReader reader = DirectoryReader .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPath)), IOContext.READONCE)); System.out.println("Documents in the reader: " + reader.maxDoc()); GenericFastImageSearcher ceddSearcher = new GenericFastImageSearcher(1, globalFeatureClass, true, reader); GenericFastImageSearcher cvsurfsearcher = new GenericFastImageSearcher(1, localFeatureClass, aggregatorClass.newInstance(), 512, true, reader, indexPath + ".config"); GenericFastImageSearcher simpleceddcvsurfsearcher = new GenericFastImageSearcher(1, globalFeatureClass, keypointDetector, aggregatorClass.newInstance(), 512, true, reader, indexPath + ".config"); BufferedImage image; ImageSearchHits ceddhits, cvsurfhits, simpleceddcvsurfhits; String hitFile; int counter = 0; for (String next : images) { image = ImageIO.read(new FileInputStream(next)); next = next.substring(next.lastIndexOf('\\') + 1); System.out.println(counter + " ~ " + next); ceddhits = ceddSearcher.search(image, reader); hitFile = reader.document(ceddhits.documentID(0)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1); if (next.equals(hitFile)) System.out.println(0 + ". " + hitFile + " " + ceddhits.score(0)); else System.err.println("ERROR " + hitFile + " " + ceddhits.score(0) + " ERROR"); cvsurfhits = cvsurfsearcher.search(image, reader); hitFile = reader.document(cvsurfhits.documentID(0)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1); if (next.equals(hitFile)) System.out.println(0 + ". " + hitFile + " " + cvsurfhits.score(0)); else System.err.println("ERROR " + hitFile + " " + cvsurfhits.score(0) + " ERROR"); simpleceddcvsurfhits = simpleceddcvsurfsearcher.search(image, reader); hitFile = reader.document(simpleceddcvsurfhits.documentID(0)) .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1); if (next.equals(hitFile)) System.out.println(0 + ". " + hitFile + " " + simpleceddcvsurfhits.score(0)); else System.err.println("ERROR " + hitFile + " " + simpleceddcvsurfhits.score(0) + " ERROR"); counter++; System.out.println(); } }
From source file:net.semanticmetadata.lire.searchers.TestSearching.java
License:Open Source License
public void testSeparateIndex() throws IOException, IllegalAccessException, InstantiationException { Cluster[] cvsurf512 = Cluster.readClusters(codebookPath + "CvSURF512"); Cluster[] simpleceddcvsurf512 = Cluster.readClusters(codebookPath + "SIMPLEdetCVSURFCEDD512"); ParallelIndexer parallelIndexer = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPath, testExtensiveRed, numOfClusters, numOfDocsForVocabulary, aggregatorClass); parallelIndexer.addExtractor(globalFeatureClass); parallelIndexer.addExtractor(localFeatureClass, cvsurf512); parallelIndexer.addExtractor(globalFeatureClass, keypointDetector, simpleceddcvsurf512); parallelIndexer.run();/*from w w w . ja v a 2 s. c o m*/ ParallelIndexer parallelIndexerSeparate = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPathSeparate, testExtensiveBlack, indexPath); parallelIndexerSeparate.run(); IndexReader readerIndex = DirectoryReader .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPath)), IOContext.READONCE)); System.out.println("Documents in the reader: " + readerIndex.maxDoc()); IndexReader readerQueries = DirectoryReader .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPathSeparate)), IOContext.READONCE)); System.out.println("Documents in the reader: " + readerQueries.maxDoc()); GenericFastImageSearcher ceddSearcher = new GenericFastImageSearcher(5, globalFeatureClass, true, readerIndex); GenericFastImageSearcher cvsurfsearcher = new GenericFastImageSearcher(5, localFeatureClass, aggregatorClass.newInstance(), 512, true, readerIndex, indexPath + ".config"); GenericFastImageSearcher simpleceddcvsurfsearcher = new GenericFastImageSearcher(5, globalFeatureClass, keypointDetector, aggregatorClass.newInstance(), 512, true, readerIndex, indexPath + ".config"); Bits liveDocs = MultiFields.getLiveDocs(readerQueries); ImageSearchHits ceddhits, cvsurfhits, simpleceddcvsurfhits; Document queryDoc; String queryfile, hitFile; int counter = 0; for (int i = 0; i < readerQueries.maxDoc(); i++) { if (readerQueries.hasDeletions() && !liveDocs.get(i)) continue; queryDoc = readerQueries.document(i); queryfile = queryDoc.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; queryfile = queryfile.substring(queryfile.lastIndexOf('\\') + 1); System.out.println(counter + ". Query image: " + queryfile); ceddhits = ceddSearcher.search(queryDoc, readerIndex); cvsurfhits = cvsurfsearcher.search(queryDoc, readerIndex); simpleceddcvsurfhits = simpleceddcvsurfsearcher.search(queryDoc, readerIndex); System.out.println("Global:"); for (int y = 0; y < ceddhits.length(); y++) { hitFile = readerIndex.document(ceddhits.documentID(y)) .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1); System.out.println(y + ". " + hitFile + " " + ceddhits.score(y)); } System.out.println("Local:"); for (int y = 0; y < cvsurfhits.length(); y++) { hitFile = readerIndex.document(cvsurfhits.documentID(y)) .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1); System.out.println(y + ". " + hitFile + " " + cvsurfhits.score(y)); } System.out.println("Simple:"); for (int y = 0; y < simpleceddcvsurfhits.length(); y++) { hitFile = readerIndex.document(simpleceddcvsurfhits.documentID(y)) .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1); System.out.println(y + ". " + hitFile + " " + simpleceddcvsurfhits.score(y)); } System.out.println(); counter++; } }