Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:net.semanticmetadata.lire.GeneralTest.java

License:Open Source License

public void testCreateAndSearchSmallIndex() throws IOException {
    for (int i = 0, buildersLength = builders.length; i < buildersLength; i++) {
        DocumentBuilder b = builders[i];
        // create an index with a specific builder:
        IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-small", true);
        for (String identifier : testFiles) {
            Document doc = b.createDocument(new FileInputStream(testFilesPath + identifier), identifier);
            doc.add(new StoredField("video_file", "surgery1.mp4"));
            doc.add(new StoredField("timestamp", "25"));
            iw.addDocument(doc);//from w  w w. j  a  va 2  s .c  o m
        }
        iw.close();

        ImageSearcher s = searchers[i];
        IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-small")));
        for (int k = 0; k < reader.maxDoc(); k++) {
            Document query = reader.document(k);
            ImageSearchHits hits = s.search(query, reader);
            for (int y = 0; y < hits.length(); y++) {
                Document result = hits.doc(y);
                if (y == 0) {
                    // check if the first result is the query:
                    assertEquals(result.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]
                            .equals(query.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), true);
                    System.out.println(result.getValues("video_file")[0]);
                } else {
                    // check if they are ordered by distance:
                    assertEquals(hits.score(y) < hits.score(y - 1), true);
                }
            }
        }
    }
}

From source file:net.semanticmetadata.lire.GeneralTest.java

License:Open Source License

public void testReadIndex() throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("ucid-index-39997508")));
    for (int k = 0; k < reader.maxDoc(); k++) {
        Document document = reader.document(k);
        BytesRef b = document.getField("featureCEDDLoDe_Hist").binaryValue();
        double[] doubles = SerializationUtils.toDoubleArray(b.bytes, b.offset, b.length);
        if (document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0].endsWith("00008.png"))
            System.out.println(Arrays.toString(doubles));
    }//from  www  .  j a v a  2s.co m

    // check lucene tuorials and docs
    IndexSearcher is = new IndexSearcher(reader);
    TopDocs td = is.search(new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, "")), 10);
    for (int i = 0; i < td.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = td.scoreDocs[i];
        Document document = reader.document(scoreDoc.doc);
    }
}

From source file:net.semanticmetadata.lire.imageanalysis.CEDDTest.java

License:Open Source License

public void testIndexSerialization() throws IOException {
    ParallelIndexer pi = new ParallelIndexer(8, "test-idx", "testdata\\UCID", true) {
        @Override/*from   w w  w  . j  a v a 2  s.com*/
        public void addBuilders(ChainedDocumentBuilder builder) {
            builder.addBuilder(new GenericDocumentBuilder(CEDD.class));
        }
    };
    pi.run();
    IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("test-idx")));
    CEDD tmp = new CEDD();
    CEDD idx = new CEDD();
    for (int i = 0; i < ir.maxDoc(); i++) {
        Document d = ir.document(i);
        BytesRef ref = d.getBinaryValue(new CEDD().getFieldName());
        idx.setByteArrayRepresentation(ref.bytes, ref.offset, ref.length);
        tmp.extract(ImageIO.read(new File(d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])));
        for (int j = 0; j < tmp.getDoubleHistogram().length; j++) {
            double v = tmp.getDoubleHistogram()[j];
            if (Math.abs(v - idx.getDoubleHistogram()[j]) > 0.1) {
                System.err.println(
                        d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " error at position " + j);
                break;
            }
        }
        // assertEquals((double) idx.getDistance(tmp), 0d, 0.00001);
    }
}

From source file:net.semanticmetadata.lire.imageanalysis.OpponentHistogramTest.java

License:Open Source License

public void testFastSearch() throws IOException {
    Codec.forName("LireCustomCodec");
    //        ParallelIndexer pin = new ParallelIndexer(7, "./index-fast-3", "testdata/wang-1000") {
    ParallelIndexer pin = new ParallelIndexer(7, "./index-fast-3", "D:\\DataSets\\Flickrphotos\\01", true) {
        @Override/*  w  w w  . j a va2 s  .  c  om*/
        public void addBuilders(ChainedDocumentBuilder builder) {
            builder.addBuilder(DocumentBuilderFactory.getOpponentHistogramDocumentBuilder());
        }
    };
    pin.run();
    IndexReader ir = DirectoryReader.open(MMapDirectory.open(new File("./index-fast-3")));
    System.out.println("ir.maxDoc() = " + ir.maxDoc());

    long ms = System.currentTimeMillis();
    ImageSearcher is = new FastOpponentImageSearcher(50);
    ms = System.currentTimeMillis() - ms;
    System.out.println("init ms = " + ms);

    ms = System.currentTimeMillis();
    for (int i = 0; i < 100; i++)
        is.search(ir.document(i), ir);
    ms = System.currentTimeMillis() - ms;
    System.out.println("cached ms = " + ms);

    is = ImageSearcherFactory.createOpponentHistogramSearcher(50);
    ms = System.currentTimeMillis();
    for (int i = 0; i < 100; i++)
        is.search(ir.document(i), ir);
    ms = System.currentTimeMillis() - ms;
    System.out.println("read from Lucene ms = " + ms);
}

From source file:net.semanticmetadata.lire.imageanalysis.ScalableColorTest.java

License:Open Source License

public void testIndexSerialization() throws IOException {
    ParallelIndexer pi = new ParallelIndexer(8, "test-idx", "testdata\\UCID", true) {
        @Override//from w  w w. j  a  v  a2 s. co  m
        public void addBuilders(ChainedDocumentBuilder builder) {
            builder.addBuilder(new GenericDocumentBuilder(ScalableColor.class));
        }
    };
    pi.run();
    IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("test-idx")));
    ScalableColor tmp = new ScalableColor();
    ScalableColor idx = new ScalableColor();
    for (int i = 0; i < ir.maxDoc(); i++) {
        Document d = ir.document(i);
        BytesRef ref = d.getBinaryValue(new ScalableColor().getFieldName());
        idx.setByteArrayRepresentation(ref.bytes, ref.offset, ref.length);
        tmp.extract(ImageIO.read(new File(d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])));
        for (int j = 0; j < tmp.getDoubleHistogram().length; j++) {
            double v = tmp.getDoubleHistogram()[j];
            if (Math.abs(v - idx.getDoubleHistogram()[j]) > 0.1) {
                System.err.println(
                        d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " error at position " + j);
                break;
            }
        }
        // assertEquals((double) idx.getDistance(tmp), 0d, 0.00001);
    }
}

From source file:net.semanticmetadata.lire.indexing.IndexVisualWordsTest.java

License:Open Source License

public void testIndexMissingFiles() throws IOException {
    // first delete some of the existing ones ...
    System.out.println("Deleting visual words from docs ...");
    IndexReader ir = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexWriter iw = LuceneUtils.createIndexWriter(index, false);
    int maxDocs = ir.maxDoc();
    for (int i = 0; i < maxDocs / 10; i++) {
        Document d = ir.document(i);
        //            d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW);
        d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW);
        //            d.removeFields(DocumentBuilder.FIELD_NAME_SURF_LOCAL_FEATURE_HISTOGRAM);
        d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW_VECTOR);
        //            d.removeFields(DocumentBuilder.FIELD_NAME_SURF);
        iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER,
                d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), d);
    }//  ww  w .ja va 2 s . c  o m
    System.out.println("# of deleted docs:  " + maxDocs / 10);
    System.out.println("Optimizing and closing ...");
    iw.close();
    ir.close();
    System.out.println("Creating new visual words ...");
    BOVWBuilder surfFeatureHistogramBuilder = new BOVWBuilder(
            DirectoryReader.open(FSDirectory.open(new File(index))), new SurfFeature(), numSamples, clusters);
    //        surfFeatureHistogramBuilder.indexMissing();
    //        System.out.println("Finished.");
}

From source file:net.semanticmetadata.lire.indexing.MetricSpacesTest.java

License:Open Source License

public void testPerformance() throws IOException {
    MetricSpacesInvertedListIndexing mes = MetricSpacesInvertedListIndexing.getDefaultInstance();
    int numSearches = 10;
    IndexReader reader = mes.getIndexReader(indexPath);
    System.out.println(reader.maxDoc() + " documents");
    TopDocs docs;/* w w w .  j av  a 2  s  .  co m*/

    long ms = System.currentTimeMillis();
    for (int i = 0; i < numSearches; i++) {
        docs = mes.search(reader.document(i), indexPath);
    }
    ms = System.currentTimeMillis() - ms;
    System.out.println("ms = " + ms);

    ImageSearcher ceddSearcher = ImageSearcherFactory.createCEDDImageSearcher(100);
    ms = System.currentTimeMillis();
    for (int i = 0; i < numSearches; i++) {
        ceddSearcher.search(reader.document(i), reader);
    }
    ms = System.currentTimeMillis() - ms;
    System.out.println("ms = " + ms);
}

From source file:net.semanticmetadata.lire.searchers.TestSearching.java

License:Open Source License

public void testSearch() throws IOException, IllegalAccessException, InstantiationException {
    Cluster[] cvsurf512 = Cluster.readClusters(codebookPath + "CvSURF512");
    Cluster[] simpleceddcvsurf512 = Cluster.readClusters(codebookPath + "SIMPLEdetCVSURFCEDD512");

    ParallelIndexer parallelIndexer = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPath,
            testExtensive, numOfClusters, numOfDocsForVocabulary, aggregatorClass);
    parallelIndexer.addExtractor(globalFeatureClass);
    parallelIndexer.addExtractor(localFeatureClass, cvsurf512);
    parallelIndexer.addExtractor(globalFeatureClass, keypointDetector, simpleceddcvsurf512);
    parallelIndexer.run();// w  w w  .ja  v a 2 s  .  c o  m

    BufferedImage image = ImageIO.read(new FileInputStream(imageToSearch));

    IndexReader reader = DirectoryReader
            .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPath)), IOContext.READONCE));
    System.out.println("Documents in the reader: " + reader.maxDoc());

    GenericFastImageSearcher ceddSearcher = new GenericFastImageSearcher(10, globalFeatureClass, true, reader);
    ImageSearchHits ceddhits = ceddSearcher.search(image, reader);
    String hitFile;
    for (int y = 0; y < ceddhits.length(); y++) {
        hitFile = reader.document(ceddhits.documentID(y)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1);
        System.out.println(y + ". " + hitFile + " " + ceddhits.score(y));
    }
    System.out.println();

    GenericFastImageSearcher cvsurfsearcher = new GenericFastImageSearcher(10, localFeatureClass,
            aggregatorClass.newInstance(), 512, true, reader, indexPath + ".config");
    ImageSearchHits cvsurfhits = cvsurfsearcher.search(image, reader);
    for (int y = 0; y < cvsurfhits.length(); y++) {
        hitFile = reader.document(cvsurfhits.documentID(y)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1);
        System.out.println(y + ". " + hitFile + " " + cvsurfhits.score(y));
    }
    System.out.println();

    GenericFastImageSearcher simpleceddcvsurfsearcher = new GenericFastImageSearcher(10, globalFeatureClass,
            keypointDetector, aggregatorClass.newInstance(), 512, true, reader, indexPath + ".config");
    ImageSearchHits simpleceddcvsurfhits = simpleceddcvsurfsearcher.search(image, reader);
    for (int y = 0; y < simpleceddcvsurfhits.length(); y++) {
        hitFile = reader.document(simpleceddcvsurfhits.documentID(y))
                .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1);
        System.out.println(y + ". " + hitFile + " " + simpleceddcvsurfhits.score(y));
    }
    System.out.println();
}

From source file:net.semanticmetadata.lire.searchers.TestSearching.java

License:Open Source License

public void testSearchMulImages() throws IOException, IllegalAccessException, InstantiationException {
    Cluster[] cvsurf512 = Cluster.readClusters(codebookPath + "CvSURF512");
    Cluster[] simpleceddcvsurf512 = Cluster.readClusters(codebookPath + "SIMPLEdetCVSURFCEDD512");

    ParallelIndexer parallelIndexer = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPath,
            testExtensive, numOfClusters, numOfDocsForVocabulary, aggregatorClass);
    parallelIndexer.addExtractor(globalFeatureClass);
    parallelIndexer.addExtractor(localFeatureClass, cvsurf512);
    parallelIndexer.addExtractor(globalFeatureClass, keypointDetector, simpleceddcvsurf512);
    parallelIndexer.run();//from  w  w  w.j  a va 2  s  . c  om

    ArrayList<String> images = FileUtils.readFileLines(new File(testExtensive), true);

    IndexReader reader = DirectoryReader
            .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPath)), IOContext.READONCE));
    System.out.println("Documents in the reader: " + reader.maxDoc());

    GenericFastImageSearcher ceddSearcher = new GenericFastImageSearcher(1, globalFeatureClass, true, reader);
    GenericFastImageSearcher cvsurfsearcher = new GenericFastImageSearcher(1, localFeatureClass,
            aggregatorClass.newInstance(), 512, true, reader, indexPath + ".config");
    GenericFastImageSearcher simpleceddcvsurfsearcher = new GenericFastImageSearcher(1, globalFeatureClass,
            keypointDetector, aggregatorClass.newInstance(), 512, true, reader, indexPath + ".config");

    BufferedImage image;
    ImageSearchHits ceddhits, cvsurfhits, simpleceddcvsurfhits;
    String hitFile;
    int counter = 0;
    for (String next : images) {
        image = ImageIO.read(new FileInputStream(next));
        next = next.substring(next.lastIndexOf('\\') + 1);
        System.out.println(counter + " ~ " + next);

        ceddhits = ceddSearcher.search(image, reader);
        hitFile = reader.document(ceddhits.documentID(0)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1);
        if (next.equals(hitFile))
            System.out.println(0 + ". " + hitFile + " " + ceddhits.score(0));
        else
            System.err.println("ERROR " + hitFile + " " + ceddhits.score(0) + " ERROR");

        cvsurfhits = cvsurfsearcher.search(image, reader);
        hitFile = reader.document(cvsurfhits.documentID(0)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1);
        if (next.equals(hitFile))
            System.out.println(0 + ". " + hitFile + " " + cvsurfhits.score(0));
        else
            System.err.println("ERROR " + hitFile + " " + cvsurfhits.score(0) + " ERROR");

        simpleceddcvsurfhits = simpleceddcvsurfsearcher.search(image, reader);
        hitFile = reader.document(simpleceddcvsurfhits.documentID(0))
                .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1);
        if (next.equals(hitFile))
            System.out.println(0 + ". " + hitFile + " " + simpleceddcvsurfhits.score(0));
        else
            System.err.println("ERROR " + hitFile + " " + simpleceddcvsurfhits.score(0) + " ERROR");

        counter++;
        System.out.println();
    }
}

From source file:net.semanticmetadata.lire.searchers.TestSearching.java

License:Open Source License

public void testSeparateIndex() throws IOException, IllegalAccessException, InstantiationException {
    Cluster[] cvsurf512 = Cluster.readClusters(codebookPath + "CvSURF512");
    Cluster[] simpleceddcvsurf512 = Cluster.readClusters(codebookPath + "SIMPLEdetCVSURFCEDD512");

    ParallelIndexer parallelIndexer = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPath,
            testExtensiveRed, numOfClusters, numOfDocsForVocabulary, aggregatorClass);
    parallelIndexer.addExtractor(globalFeatureClass);
    parallelIndexer.addExtractor(localFeatureClass, cvsurf512);
    parallelIndexer.addExtractor(globalFeatureClass, keypointDetector, simpleceddcvsurf512);
    parallelIndexer.run();/*from  w  w  w .  ja v a  2  s.  c  o  m*/

    ParallelIndexer parallelIndexerSeparate = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS,
            indexPathSeparate, testExtensiveBlack, indexPath);
    parallelIndexerSeparate.run();

    IndexReader readerIndex = DirectoryReader
            .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPath)), IOContext.READONCE));
    System.out.println("Documents in the reader: " + readerIndex.maxDoc());

    IndexReader readerQueries = DirectoryReader
            .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPathSeparate)), IOContext.READONCE));
    System.out.println("Documents in the reader: " + readerQueries.maxDoc());

    GenericFastImageSearcher ceddSearcher = new GenericFastImageSearcher(5, globalFeatureClass, true,
            readerIndex);
    GenericFastImageSearcher cvsurfsearcher = new GenericFastImageSearcher(5, localFeatureClass,
            aggregatorClass.newInstance(), 512, true, readerIndex, indexPath + ".config");
    GenericFastImageSearcher simpleceddcvsurfsearcher = new GenericFastImageSearcher(5, globalFeatureClass,
            keypointDetector, aggregatorClass.newInstance(), 512, true, readerIndex, indexPath + ".config");

    Bits liveDocs = MultiFields.getLiveDocs(readerQueries);

    ImageSearchHits ceddhits, cvsurfhits, simpleceddcvsurfhits;
    Document queryDoc;
    String queryfile, hitFile;
    int counter = 0;
    for (int i = 0; i < readerQueries.maxDoc(); i++) {
        if (readerQueries.hasDeletions() && !liveDocs.get(i))
            continue;

        queryDoc = readerQueries.document(i);
        queryfile = queryDoc.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        queryfile = queryfile.substring(queryfile.lastIndexOf('\\') + 1);
        System.out.println(counter + ". Query image: " + queryfile);
        ceddhits = ceddSearcher.search(queryDoc, readerIndex);
        cvsurfhits = cvsurfsearcher.search(queryDoc, readerIndex);
        simpleceddcvsurfhits = simpleceddcvsurfsearcher.search(queryDoc, readerIndex);

        System.out.println("Global:");
        for (int y = 0; y < ceddhits.length(); y++) {
            hitFile = readerIndex.document(ceddhits.documentID(y))
                    .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
            hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1);
            System.out.println(y + ". " + hitFile + " " + ceddhits.score(y));
        }

        System.out.println("Local:");
        for (int y = 0; y < cvsurfhits.length(); y++) {
            hitFile = readerIndex.document(cvsurfhits.documentID(y))
                    .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
            hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1);
            System.out.println(y + ". " + hitFile + " " + cvsurfhits.score(y));
        }

        System.out.println("Simple:");
        for (int y = 0; y < simpleceddcvsurfhits.length(); y++) {
            hitFile = readerIndex.document(simpleceddcvsurfhits.documentID(y))
                    .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
            hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1);
            System.out.println(y + ". " + hitFile + " " + simpleceddcvsurfhits.score(y));
        }
        System.out.println();
        counter++;
    }
}