Example usage for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:net.semanticmetadata.lire.searchers.forevaluations.GenericFastImageSearcherForEvaluation.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    SimpleImageDuplicates simpleImageDuplicates = null;
    //        try {
    //            if (!IndexReader.indexExists(reader.directory()))
    //                throw new FileNotFoundException("No index found at this specific location.");
    Document doc = reader.document(0);

    LireFeature lireFeature = extractorItem.getFeatureInstance();
    if (doc.getField(fieldName).binaryValue() != null && doc.getField(fieldName).binaryValue().length > 0)
        lireFeature.setByteArrayRepresentation(doc.getField(fieldName).binaryValue().bytes,
                doc.getField(fieldName).binaryValue().offset, doc.getField(fieldName).binaryValue().length);

    HashMap<Double, List<String>> duplicates = new HashMap<Double, List<String>>();

    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = reader.numDocs();
    int numDuplicates = 0;
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        Document d = reader.document(i);
        double distance = getDistance(d, lireFeature);

        if (!duplicates.containsKey(distance)) {
            duplicates.put(distance, new LinkedList<String>());
        } else {//from   w w w. java 2 s .com
            numDuplicates++;
        }
        duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

    if (numDuplicates == 0)
        return null;

    LinkedList<List<String>> results = new LinkedList<List<String>>();
    for (double d : duplicates.keySet()) {
        if (duplicates.get(d).size() > 1) {
            results.add(duplicates.get(d));
        }
    }
    simpleImageDuplicates = new SimpleImageDuplicates(results);
    //        } catch (InstantiationException e) {
    //            logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    //        } catch (IllegalAccessException e) {
    //            logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    //        }
    return simpleImageDuplicates;

}

From source file:net.semanticmetadata.lire.searchers.GenericFastImageSearcher.java

License:Open Source License

/**
 * @param reader/*from  w ww .j a  v  a 2  s. c o  m*/
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws IOException
 */
protected double findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    maxDistance = -1d;

    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Document d;
    double tmpDistance;
    int docs = reader.numDocs();
    if (!isCaching) {
        // we read each and every document from the index and then we compare it to the query.
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.

            d = reader.document(i);
            tmpDistance = getDistance(d, lireFeature);
            assert (tmpDistance >= 0);
            // if the array is not full yet:
            if (this.docs.size() < maxHits) {
                this.docs.add(new SimpleResult(tmpDistance, i));
                if (tmpDistance > maxDistance)
                    maxDistance = tmpDistance;
            } else if (tmpDistance < maxDistance) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.docs.remove(this.docs.last());
                // add the new one ...
                this.docs.add(new SimpleResult(tmpDistance, i));
                // and set our new distance border ...
                maxDistance = this.docs.last().getDistance();
            }
        }
    } else {
        LinkedList<Consumer> tasks = new LinkedList<Consumer>();
        LinkedList<Thread> threads = new LinkedList<Thread>();
        Consumer consumer;
        Thread thread;
        Thread p = new Thread(new Producer());
        p.start();
        for (int i = 0; i < numThreads; i++) {
            consumer = new Consumer(lireFeature);
            thread = new Thread(consumer);
            thread.start();
            tasks.add(consumer);
            threads.add(thread);
        }
        for (Thread next : threads) {
            try {
                next.join();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
        TreeSet<SimpleResult> tmpDocs;
        boolean flag;
        SimpleResult simpleResult;
        for (Consumer task : tasks) {
            tmpDocs = task.getResult();
            flag = true;
            while (flag && (tmpDocs.size() > 0)) {
                simpleResult = tmpDocs.pollFirst();
                if (this.docs.size() < maxHits) {
                    this.docs.add(simpleResult);
                    if (simpleResult.getDistance() > maxDistance)
                        maxDistance = simpleResult.getDistance();
                } else if (simpleResult.getDistance() < maxDistance) {
                    //                        this.docs.remove(this.docs.last());
                    this.docs.pollLast();
                    this.docs.add(simpleResult);
                    maxDistance = this.docs.last().getDistance();
                } else
                    flag = false;
            }
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testSearch() throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    System.out.println("numDocs = " + numDocs);
    ImageSearcher searcher = ImageSearcherFactory.createScalableColorImageSearcher(50);
    FileInputStream imageStream = new FileInputStream(testFilesPath + testFiles[0]);
    BufferedImage bimg = ImageIO.read(imageStream);
    ImageSearchHits hits = null;/*from  ww w  . j a  v  a2s.  c  om*/
    long time = System.currentTimeMillis();
    for (int i = 0; i < numsearches; i++) {
        hits = searcher.search(bimg, reader);
    }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with image, averaged on " + numsearches);
    for (int i = 0; i < 5; i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
    Document document = hits.doc(0);
    time = System.currentTimeMillis();
    for (int i = 0; i < numsearches; i++) {
        hits = searcher.search(document, reader);
    }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with document, averaged on " + numsearches);
    for (int i = 0; i < 5; i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

    document = getDocumentBuilder().createDocument(bimg, testFilesPath + testFiles[0]);
    time = System.currentTimeMillis();
    for (int i = 0; i < numsearches; i++) {
        hits = searcher.search(document, reader);
    }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with document, averaged on " + numsearches);
    for (int i = 0; i < 5; i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testFindDuplicates() throws Exception {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    System.out.println("numDocs = " + numDocs);
    ImageSearcher searcher = ImageSearcherFactory.createCEDDImageSearcher(50);
    ImageDuplicates imageDuplicates = searcher.findDuplicates(reader);
    if (imageDuplicates == null) {
        System.out.println("No duplicates found");
        return;//w  w w.  ja  v  a2 s  . com
    }
    for (int i = 0; i < imageDuplicates.length(); i++) {
        System.out.println(imageDuplicates.getDuplicate(i).toString());
    }
}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCorrelationSearch() throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    System.out.println("numDocs = " + numDocs);
    ImageSearcher searcher = ImageSearcherFactory.createAutoColorCorrelogramImageSearcher(10);
    FileInputStream imageStream = new FileInputStream(testFilesPath + testFiles[0]);
    BufferedImage bimg = ImageIO.read(imageStream);
    ImageSearchHits hits = null;//from   w w  w .j a  va 2s.c o m
    long time = System.currentTimeMillis();
    //        for (int i = 0; i < numsearches; i++) {
    hits = searcher.search(bimg, reader);
    //        }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with image, averaged on " + numsearches);
    for (int i = 0; i < hits.length(); i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
    Document document = hits.doc(4);
    time = System.currentTimeMillis();
    for (int i = 0; i < numsearches; i++) {
        hits = searcher.search(document, reader);
    }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with document, averaged on " + numsearches);
    for (int i = 0; i < hits.length(); i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testCEDDSearch() throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    System.out.println("numDocs = " + numDocs);
    ImageSearcher searcher = ImageSearcherFactory.createCEDDImageSearcher(30);
    FileInputStream imageStream = new FileInputStream(testFilesPath + testFiles[0]);
    BufferedImage bimg = ImageIO.read(imageStream);
    ImageSearchHits hits = null;//from   www  . j  a va  2 s. co m
    long time = System.currentTimeMillis();
    //        for (int i = 0; i < numsearches; i++) {
    hits = searcher.search(bimg, reader);
    //        }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with image, averaged on " + numsearches);
    for (int i = 0; i < hits.length(); i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
    Document document = hits.doc(4);
    time = System.currentTimeMillis();
    for (int i = 0; i < numsearches; i++) {
        hits = searcher.search(document, reader);
    }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with document, averaged on " + numsearches);
    for (int i = 0; i < hits.length(); i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

}

From source file:net.semanticmetadata.lire.TestImageSearcher.java

License:Open Source License

public void testSimpleColorHistogramSearch() throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    System.out.println("numDocs = " + numDocs);
    ImageSearcher searcher = ImageSearcherFactory.createColorHistogramImageSearcher(30);
    FileInputStream imageStream = new FileInputStream(testFilesPath + testFiles[0]);
    BufferedImage bimg = ImageIO.read(imageStream);
    ImageSearchHits hits = null;//  ww w .j  av a 2 s  .  c  om
    long time = System.currentTimeMillis();
    //        for (int i = 0; i < numsearches; i++) {
    hits = searcher.search(bimg, reader);
    //        }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with image, averaged on " + numsearches);
    for (int i = 0; i < hits.length(); i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
    Document document = hits.doc(4);
    time = System.currentTimeMillis();
    for (int i = 0; i < numsearches; i++) {
        hits = searcher.search(document, reader);
    }
    time = System.currentTimeMillis() - time;
    System.out.println(
            ((float) time / (float) numsearches) + " ms per search with document, averaged on " + numsearches);
    for (int i = 0; i < hits.length(); i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

}

From source file:net.semanticmetadata.lire.TestImageSearcherWeighted.java

License:Open Source License

public void testSearch() throws IOException {
    IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    System.out.println("numDocs = " + numDocs);
    ImageSearcher searcher1 = ImageSearcherFactory.createWeightedSearcher(10, 0.2f, 0.8f, 1.0f);
    ImageSearcher searcher2 = ImageSearcherFactory.createWeightedSearcher(10, 0.8f, 0.0f, 1.0f);
    ImageSearcher searcher3 = ImageSearcherFactory.createWeightedSearcher(10, 0.0f, 1.0f, 0.0f);
    FileInputStream imageStream = new FileInputStream(testFilesPath + testFiles[0]);
    BufferedImage bimg = ImageIO.read(imageStream);
    ImageSearchHits hits = null;/*from www .  ja  va2s  .  c  o  m*/
    hits = searcher1.search(bimg, reader);
    for (int i = 0; i < 5; i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
    System.out.println("-");
    hits = searcher2.search(bimg, reader);
    for (int i = 0; i < 5; i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
    System.out.println("-");
    hits = searcher3.search(bimg, reader);
    for (int i = 0; i < 5; i++) {
        System.out.println(hits.score(i) + ": "
                + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }
}

From source file:net.sf.logsaw.index.internal.LuceneIndexServiceImpl.java

License:Open Source License

@Override
public int count(ILogResource log) throws CoreException {
    Assert.isNotNull(log, "log"); //$NON-NLS-1$
    ARunWithIndexReader<Integer> runnable = new ARunWithIndexReader<Integer>() {

        /* (non-Javadoc)
         * @see net.sf.logsaw.index.impl.ARunWithIndexReader#doRunWithIndexReader(org.apache.lucene.index.IndexReader, net.sf.logsaw.core.framework.ILogResource)
         *//*from   www.  j av a 2  s  .  c  o  m*/
        @Override
        protected Integer doRunWithIndexReader(IndexReader reader, ILogResource log) throws CoreException {
            if (reader != null) {
                return Integer.valueOf(reader.numDocs());
            }
            // Index does not exist yet
            return Integer.valueOf(0);
        }
    };
    return runnable.runWithIndexReader(log);
}

From source file:net.sourceforge.docfetcher.model.UtilModel.java

License:Open Source License

@VisibleForPackageGroup
public static void assertDocCount(Directory luceneDir, int expectedCount) throws Exception {
    IndexReader reader = IndexReader.open(luceneDir);
    assertEquals(expectedCount, reader.numDocs());
    Closeables.closeQuietly(reader);/*from  w  w w .ja v a2s .c  o  m*/
}