Example usage for org.apache.lucene.index IndexReader numDocs

List of usage examples for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:net.semanticmetadata.lire.searchers.forevaluations.GenericFastImageSearcherForEvaluation.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    SimpleImageDuplicates simpleImageDuplicates = null;
    //        try {
    //            if (!IndexReader.indexExists(reader.directory()))
    //                throw new FileNotFoundException("No index found at this specific location.");
    Document doc = reader.document(0);

    LireFeature lireFeature = extractorItem.getFeatureInstance();
    if (doc.getField(fieldName).binaryValue() != null && doc.getField(fieldName).binaryValue().length > 0)
        lireFeature.setByteArrayRepresentation(doc.getField(fieldName).binaryValue().bytes,
                doc.getField(fieldName).binaryValue().offset, doc.getField(fieldName).binaryValue().length);

    HashMap<Double, List<String>> duplicates = new HashMap<Double, List<String>>();

    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = reader.numDocs();
    int numDuplicates = 0;
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        Document d = reader.document(i);
        double distance = getDistance(d, lireFeature);

        if (!duplicates.containsKey(distance)) {
            duplicates.put(distance, new LinkedList<String>());
        } else {//from   w w w. java 2 s .com
            numDuplicates++;
        }
        duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

    if (numDuplicates == 0)
        return null;

    LinkedList<List<String>> results = new LinkedList<List<String>>();
    for (double d : duplicates.keySet()) {
        if (duplicates.get(d).size() > 1) {
            results.add(duplicates.get(d));
        }
    }
    simpleImageDuplicates = new SimpleImageDuplicates(results);
    //        } catch (InstantiationException e) {
    //            logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    //        } catch (IllegalAccessException e) {
    //            logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    //        }
    return simpleImageDuplicates;

}

From source file:net.semanticmetadata.lire.searchers.GenericFastImageSearcher.java

License:Open Source License

/**
 * @param reader/*from  w ww .j a  v  a 2  s. c o  m*/
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws IOException
 */
protected double findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    maxDistance = -1d;

    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Document d;
    double tmpDistance;
    int docs = reader.numDocs();
    if (!isCaching) {
        // we read each and every document from the index and then we compare it to the query.
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.

            d = reader.document(i);
            tmpDistance = getDistance(d, lireFeature);
            assert (tmpDistance >= 0);
            // if the array is not full yet:
            if (this.docs.size() < maxHits) {
                this.docs.add(new SimpleResult(tmpDistance, i));
                if (tmpDistance > maxDistance)
                    maxDistance = tmpDistance;
            } else if (tmpDistance < maxDistance) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.docs.remove(this.docs.last());
                // add the new one ...
                this.docs.add(new SimpleResult(tmpDistance, i));
                // and set our new distance border ...
                maxDistance = this.docs.last().getDistance();
            }
        }
    } else {
        LinkedList<Consumer> tasks = new LinkedList<Consumer>();
        LinkedList<Thread> threads = new LinkedList<Thread>();
        Consumer consumer;
        Thread thread;
        Thread p = new Thread(new Producer());
        p.start();
        for (int i = 0; i < numThreads; i++) {
            consumer = new Consumer(lireFeature);
            thread = new Thread(consumer);
            thread.start();
            tasks.add(consumer);
            threads.add(thread);
        }
        for (Thread next : threads) {
            try {
                next.join();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
        TreeSet<SimpleResult> tmpDocs;
        boolean flag;
        SimpleResult simpleResult;
        for (Consumer task : tasks) {
            tmpDocs = task.getResult();
            flag = true;
            while (flag && (tmpDocs.size() > 0)) {
                simpleResult = tmpDocs.pollFirst();
                if (this.docs.size() < maxHits) {
                    this.docs.add(simpleResult);
                    if (simpleResult.getDistance() > maxDistance)
                        maxDistance = simpleResult.getDistance();
                } else if (simpleResult.getDistance() < maxDistance) {
                    //                        this.docs.remove(this.docs.last());
                    this.docs.pollLast();
                    this.docs.add(simpleResult);
                    maxDistance = this.docs.last().getDistance();
                } else
                    flag = false;
            }
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testSearch() throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    System.out.println("numDocs = " + numDocs);
    ImageSearcher searcher = ImageSearcherFactory.createScalableColorImageSearcher(50);
    FileInputStream imageStream = new FileInputStream(testFilesPath + testFiles[0]);
    BufferedImage bimg = ImageIO.read(imageStream);
    ImageSearchHits hits = null;/*from  ww w  . j a  v  a2s.  c  om*/
    long time = System.currentTimeMillis();
    for (int i = 0; i < numsearches; i++) {
        hits = searcher.search(bimg, reader);
    }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with image, averaged on " + numsearches);
    for (int i = 0; i < 5; i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
    Document document = hits.doc(0);
    time = System.currentTimeMillis();
    for (int i = 0; i < numsearches; i++) {
        hits = searcher.search(document, reader);
    }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with document, averaged on " + numsearches);
    for (int i = 0; i < 5; i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

    document = getDocumentBuilder().createDocument(bimg, testFilesPath + testFiles[0]);
    time = System.currentTimeMillis();
    for (int i = 0; i < numsearches; i++) {
        hits = searcher.search(document, reader);
    }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with document, averaged on " + numsearches);
    for (int i = 0; i < 5; i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testFindDuplicates() throws Exception {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    System.out.println("numDocs = " + numDocs);
    ImageSearcher searcher = ImageSearcherFactory.createCEDDImageSearcher(50);
    ImageDuplicates imageDuplicates = searcher.findDuplicates(reader);
    if (imageDuplicates == null) {
        System.out.println("No duplicates found");
        return;//w  w w.  ja  v  a2 s  . com
    }
    for (int i = 0; i < imageDuplicates.length(); i++) {
        System.out.println(imageDuplicates.getDuplicate(i).toString());
    }
}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCorrelationSearch() throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    System.out.println("numDocs = " + numDocs);
    ImageSearcher searcher = ImageSearcherFactory.createAutoColorCorrelogramImageSearcher(10);
    FileInputStream imageStream = new FileInputStream(testFilesPath + testFiles[0]);
    BufferedImage bimg = ImageIO.read(imageStream);
    ImageSearchHits hits = null;//from   w w  w .j a  va 2s.c o m
    long time = System.currentTimeMillis();
    //        for (int i = 0; i < numsearches; i++) {
    hits = searcher.search(bimg, reader);
    //        }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with image, averaged on " + numsearches);
    for (int i = 0; i < hits.length(); i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
    Document document = hits.doc(4);
    time = System.currentTimeMillis();
    for (int i = 0; i < numsearches; i++) {
        hits = searcher.search(document, reader);
    }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with document, averaged on " + numsearches);
    for (int i = 0; i < hits.length(); i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCEDDSearch() throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    System.out.println("numDocs = " + numDocs);
    ImageSearcher searcher = ImageSearcherFactory.createCEDDImageSearcher(30);
    FileInputStream imageStream = new FileInputStream(testFilesPath + testFiles[0]);
    BufferedImage bimg = ImageIO.read(imageStream);
    ImageSearchHits hits = null;//from   www  . j  a va  2 s. co m
    long time = System.currentTimeMillis();
    //        for (int i = 0; i < numsearches; i++) {
    hits = searcher.search(bimg, reader);
    //        }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with image, averaged on " + numsearches);
    for (int i = 0; i < hits.length(); i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
    Document document = hits.doc(4);
    time = System.currentTimeMillis();
    for (int i = 0; i < numsearches; i++) {
        hits = searcher.search(document, reader);
    }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with document, averaged on " + numsearches);
    for (int i = 0; i < hits.length(); i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testSimpleColorHistogramSearch() throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    System.out.println("numDocs = " + numDocs);
    ImageSearcher searcher = ImageSearcherFactory.createColorHistogramImageSearcher(30);
    FileInputStream imageStream = new FileInputStream(testFilesPath + testFiles[0]);
    BufferedImage bimg = ImageIO.read(imageStream);
    ImageSearchHits hits = null;//  ww w .j  av a 2 s  .  c  om
    long time = System.currentTimeMillis();
    //        for (int i = 0; i < numsearches; i++) {
    hits = searcher.search(bimg, reader);
    //        }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with image, averaged on " + numsearches);
    for (int i = 0; i < hits.length(); i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
    Document document = hits.doc(4);
    time = System.currentTimeMillis();
    for (int i = 0; i < numsearches; i++) {
        hits = searcher.search(document, reader);
    }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with document, averaged on " + numsearches);
    for (int i = 0; i < hits.length(); i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

}

From source file:net.semanticmetadata.lire.TestImageSearcherWeighted.java

License:Open Source License

public void testSearch() throws IOException {
    IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    System.out.println("numDocs = " + numDocs);
    ImageSearcher searcher1 = ImageSearcherFactory.createWeightedSearcher(10, 0.2f, 0.8f, 1.0f);
    ImageSearcher searcher2 = ImageSearcherFactory.createWeightedSearcher(10, 0.8f, 0.0f, 1.0f);
    ImageSearcher searcher3 = ImageSearcherFactory.createWeightedSearcher(10, 0.0f, 1.0f, 0.0f);
    FileInputStream imageStream = new FileInputStream(testFilesPath + testFiles[0]);
    BufferedImage bimg = ImageIO.read(imageStream);
    ImageSearchHits hits = null;/*from www .  ja  va2s  .  c  o  m*/
    hits = searcher1.search(bimg, reader);
    for (int i = 0; i < 5; i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
    System.out.println("-");
    hits = searcher2.search(bimg, reader);
    for (int i = 0; i < 5; i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
    System.out.println("-");
    hits = searcher3.search(bimg, reader);
    for (int i = 0; i < 5; i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
}

From source file:net.sf.logsaw.index.internal.LuceneIndexServiceImpl.java

License:Open Source License

@Override
public int count(ILogResource log) throws CoreException {
    Assert.isNotNull(log, "log"); //$NON-NLS-1$
    ARunWithIndexReader<Integer> runnable = new ARunWithIndexReader<Integer>() {

        /* (non-Javadoc)
         * @see net.sf.logsaw.index.impl.ARunWithIndexReader#doRunWithIndexReader(org.apache.lucene.index.IndexReader, net.sf.logsaw.core.framework.ILogResource)
         *//*from   www.  j av a 2  s  .  c  o  m*/
        @Override
        protected Integer doRunWithIndexReader(IndexReader reader, ILogResource log) throws CoreException {
            if (reader != null) {
                return Integer.valueOf(reader.numDocs());
            }
            // Index does not exist yet
            return Integer.valueOf(0);
        }
    };
    return runnable.runWithIndexReader(log);
}

From source file:net.sourceforge.docfetcher.model.UtilModel.java

License:Open Source License

@VisibleForPackageGroup
public static void assertDocCount(Directory luceneDir, int expectedCount) throws Exception {
    IndexReader reader = IndexReader.open(luceneDir);
    assertEquals(expectedCount, reader.numDocs());
    Closeables.closeQuietly(reader);/*from  w  w w .ja v a2s .c  o  m*/
}