Example usage for org.apache.lucene.index IndexReader hasDeletions

List of usage examples for org.apache.lucene.index IndexReader hasDeletions

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader hasDeletions.

Prototype

public boolean hasDeletions() 

Source Link

Document

Returns true if any documents have been deleted.

Usage

From source file:net.semanticmetadata.lire.benchmarking.TestUniversal.java

License:Open Source License

private void testSearchSpeed(Class<? extends GlobalFeature> featureClass) throws IOException {
    ParallelIndexer parallelIndexer = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPath,
            testExtensive, true);//  www .  j av  a 2  s .  c om
    parallelIndexer.addExtractor(featureClass);
    parallelIndexer.run();
    IndexReader reader = DirectoryReader
            .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPath)), IOContext.READONCE));
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    double queryCount = 0d;
    ImageSearcher searcher = new GenericFastImageSearcher(100, featureClass);
    long ms = System.currentTimeMillis();
    String fileName;
    Document queryDoc;
    ImageSearchHits hits;
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        fileName = getIDfromFileName(reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
        if (queries.keySet().contains(fileName)) {
            queryCount += 1d;
            // ok, we've got a query here for a document ...
            queryDoc = reader.document(i);
            hits = searcher.search(queryDoc, reader);
        }
    }
    ms = System.currentTimeMillis() - ms;
    System.out.printf("%s \t %3.1f \n",
            featureClass.getName().substring(featureClass.getName().lastIndexOf('.') + 1),
            (double) ms / queryCount);
}

From source file:net.semanticmetadata.lire.benchmarking.TestWang.java

License:Open Source License

public void tttestGetDistribution() throws IOException {
    BufferedWriter bw = new BufferedWriter(new FileWriter("data.csv"));
    IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)));
    // get the first document:
    //        if (!IndexReader.indexExists(reader.directory()))
    //            throw new FileNotFoundException("No index found at this specific location.");

    CEDD cedd1 = new CEDD();
    FCTH fcth1 = new FCTH();

    CEDD cedd2 = new CEDD();
    FCTH fcth2 = new FCTH();

    JCD jcd1 = new JCD();
    JCD jcd2 = new JCD();
    String[] cls;//from ww  w .ja v a  2  s.c om

    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = reader.numDocs();
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        Document doc = reader.document(i);
        cls = doc.getValues(DocumentBuilder.FIELD_NAME_CEDD);
        if (cls != null && cls.length > 0)
            cedd1.setStringRepresentation(cls[0]);
        cls = doc.getValues(DocumentBuilder.FIELD_NAME_FCTH);
        if (cls != null && cls.length > 0)
            fcth1.setStringRepresentation(cls[0]);

        for (int j = i + 1; j < docs; j++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.
            Document doc2 = reader.document(j);
            cls = doc2.getValues(DocumentBuilder.FIELD_NAME_CEDD);
            if (cls != null && cls.length > 0)
                cedd2.setStringRepresentation(cls[0]);
            cls = doc2.getValues(DocumentBuilder.FIELD_NAME_FCTH);
            if (cls != null && cls.length > 0)
                fcth2.setStringRepresentation(cls[0]);
            jcd1.init(cedd1, fcth1);
            jcd2.init(cedd2, fcth2);
            bw.write(cedd1.getDistance(cedd2) + ";" + fcth1.getDistance(fcth2) + ";" + jcd1.getDistance(jcd2)
                    + "\n");
        }
        if (i % 100 == 0)
            System.out.println(i + " entries processed ... ");
    }
    bw.close();
}

From source file:net.semanticmetadata.lire.benchmarking.TestZuBuD.java

License:Open Source License

private void computeMAP(ImageSearcher searcher, String prefix, IndexReader reader, int clusters,
        IndexReader readerQueries) throws IOException {
    long start = System.currentTimeMillis();

    double queryCount = 0d;
    double errorRate = 0;
    double map = 0;
    double p10 = 0;
    int errorCount = 0;
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(readerQueries);
    PrintWriter fw;/*from   ww  w  . j av a 2s. c om*/
    if (searcher.toString().contains("ImageSearcherUsingWSs")) {
        (new File("eval/" + db + "/" + prefix.replace(' ', '_') + "/" + clusters + "/")).mkdirs();
        fw = new PrintWriter(new File("eval/" + db + "/" + prefix.replace(' ', '_') + "/" + clusters + "/"
                + prefix.replace(' ', '_') + "-" + db + clusters
                + searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1] + ".txt"));
    } else {
        //            (new File("eval/#WithMirFlickr/" + db + "/")).mkdirs();
        (new File("eval/" + db + "/")).mkdirs();
        if (clusters > 0)
            fw = new PrintWriter(
                    new File("eval/" + db + "/" + prefix.replace(' ', '_') + "-" + db + clusters + ".txt"));
        else
            //                fw = new PrintWriter(new File("eval/#WithMirFlickr/" + db + "/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal
            fw = new PrintWriter(
                    new File("eval/" + db + "/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal
    }
    Hashtable<Integer, String> evalText = new Hashtable<Integer, String>(260);
    for (int i = 0; i < readerQueries.maxDoc(); i++) {
        if (readerQueries.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        String fileName = getIDfromFileName(
                readerQueries.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
        if (queries.keySet().contains(fileName)) {
            String tmpEval = "";
            queryCount += 1d;
            // ok, we've got a query here for a document ...
            Document queryDoc = readerQueries.document(i);
            ImageSearchHits hits = searcher.search(queryDoc, reader);
            double rank = 0;
            double avgPrecision = 0;
            double found = 0;
            double tmpP10 = 0;
            Locale.setDefault(Locale.US);
            for (int y = 0; y < hits.length(); y++) {
                String hitFile = getIDfromFileName(reader.document(hits.documentID(y))
                        .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
                // TODO: Sort by query ID!
                tmpEval += String.format(Locale.US, "%d 1 %s %d %.2f test\n", query2id.get(fileName),
                        hitFile.substring(0, hitFile.lastIndexOf('.')), (int) rank + 1, hits.score(y));
                // if (!hitFile.equals(fileName)) {
                rank++;
                //                    if ((queries.get(fileName).contains(hitFile) || hitFile.equals(fileName))&&(!fileName.equals(hitFile))) { // it's a hit.
                if (queries.get(fileName).contains(hitFile) || hitFile.equals(fileName)) { // it's a hit.
                    found++;
                    // TODO: Compute error rate, etc. here.
                    avgPrecision += found / rank;// * (1d/queries.get(fileName).size());
                    //                        avgPrecision += found / (rank-1);// * (1d/queries.get(fileName).size());
                    //                            if (rank<=60) System.out.print('X');
                    if (rank <= 10)
                        tmpP10++;
                } else { // nothing has been found.
                    if (rank == 1)
                        errorRate += 1d;
                    //                            if (rank<=60) System.out.print('-');
                }
            }
            // }
            //                System.out.println();
            //                avgPrecision /= (double) (1d + queries.get(fileName).size()); // TODO: check!!
            avgPrecision /= (double) (queries.get(fileName).size());

            if (!(found - queries.get(fileName).size() == 0)) {
                // some of the results have not been found. We have to deal with it ...
                errorCount++;
            }

            // assertTrue(found - queries.get(fileName).size() == 0);
            map += avgPrecision;
            p10 += tmpP10;
            evalText.put(query2id.get(fileName), tmpEval);
        }
    }

    for (int i = 0; i < query2id.size(); i++) {
        fw.write(evalText.get(i + 1));
    }

    fw.close();
    errorRate = errorRate / queryCount;
    map = map / queryCount;
    p10 = p10 / (queryCount * 10d);

    double h = (System.currentTimeMillis() - start) / 3600000.0;
    double m = (h - Math.floor(h)) * 60.0;
    double s = (m - Math.floor(m)) * 60;
    String str = String.format("%s%02d:%02d", (((int) h > 0) ? String.format("%02d:", (int) h) : ""), (int) m,
            (int) s) + " ~ ";

    if (searcher.toString().contains("ImageSearcherUsingWSs"))
        str += String.format("%s%s\t%.4f\t%.4f\t%.4f\t(%s)", prefix, ((clusters > 0) ? ("\t" + clusters) : ""),
                map, p10, errorRate,
                searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1]);
    else
        str += String.format("%s%s\t%.4f\t%.4f\t%.4f", prefix, ((clusters > 0) ? ("\t" + clusters) : ""), map,
                p10, errorRate);
    if (errorCount > 0) {
        // some of the results have not been found. We have to deal with it ...
        str += "\t~~\tDid not find result ;(\t(" + errorCount + ")";
    }
    System.out.println(str);
}

From source file:net.semanticmetadata.lire.benchmarking.UCIDBenchmark.java

License:Open Source License

private void computeMAP(ImageSearcher searcher, String prefix, IndexReader reader) throws IOException {
    double queryCount = 0d;
    double errorRate = 0;
    double map = 0;
    double p10 = 0;
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    PrintWriter fw = new PrintWriter(new File("eval/" + prefix.replace(' ', '_') + "-eval.txt"));
    Hashtable<Integer, String> evalText = new Hashtable<Integer, String>(260);
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        String fileName = getIDfromFileName(
                reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
        if (queries.keySet().contains(fileName)) {
            String tmpEval = "";
            queryCount += 1d;/*from   www . j a v  a  2  s.com*/
            // ok, we've got a query here for a document ...
            Document queryDoc = reader.document(i);
            ImageSearchHits hits = searcher.search(queryDoc, reader);
            double rank = 0;
            double avgPrecision = 0;
            double found = 0;
            double tmpP10 = 0;
            Locale.setDefault(Locale.US);
            for (int y = 0; y < hits.length(); y++) {
                String hitFile = getIDfromFileName(
                        hits.doc(y).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
                // TODO: Sort by query ID!
                tmpEval += String.format(Locale.US, "%d 1 %s %d %.2f test\n", query2id.get(fileName),
                        hitFile.substring(0, hitFile.lastIndexOf('.')), (int) rank + 1, 100 - hits.score(y));
                // if (!hitFile.equals(fileName)) {
                rank++;
                if (queries.get(fileName).contains(hitFile) || hitFile.equals(fileName)) { // it's a hit.
                    found++;
                    // TODO: Compute error rate, etc. here.
                    avgPrecision += found / rank;// * (1d/queries.get(fileName).size());
                    //                            if (rank<=60) System.out.print('X');
                    if (rank <= 10)
                        tmpP10++;
                } else { // nothing has been found.
                    if (rank == 1)
                        errorRate += 1d;
                    //                            if (rank<=60) System.out.print('-');
                }
            }
            // }
            //                System.out.println();
            if (found - queries.get(fileName).size() == 1)
                avgPrecision /= (double) (1d + queries.get(fileName).size());
            else {
                // some of the results have not been found. We have to deal with it ...
                System.err.println("Did not find result ;(");
            }

            // assertTrue(found - queries.get(fileName).size() == 0);
            map += avgPrecision;
            p10 += tmpP10;
            evalText.put(query2id.get(fileName), tmpEval);
        }
    }
    for (int i = 0; i < query2id.size(); i++) {
        fw.write(evalText.get(i + 1));
    }
    fw.close();
    errorRate = errorRate / queryCount;
    map = map / queryCount;
    p10 = p10 / (queryCount * 10d);
    System.out.print(prefix);
    System.out.format("\t%.5f\t%.5f\t%.5f\n", map, p10, errorRate);

}

From source file:net.semanticmetadata.lire.impl.CEDDImageSearcher.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    SimpleImageDuplicates simpleImageDuplicates = null;
    try {/*from  w w w.  ja  va2s  . c  om*/
        if (!IndexReader.indexExists(reader.directory()))
            throw new FileNotFoundException("No index found at this specific location.");
        Document doc = reader.document(0);

        CEDD lireFeature = (CEDD) descriptorClass.newInstance();
        byte[] cls = doc.getBinaryValue(fieldName);
        if (cls != null && cls.length > 0)
            lireFeature.setByteArrayRepresentation(cls);

        HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

        // find duplicates ...
        boolean hasDeletions = reader.hasDeletions();

        int docs = reader.numDocs();
        int numDuplicates = 0;
        for (int i = 0; i < docs; i++) {
            if (hasDeletions && reader.isDeleted(i)) {
                continue;
            }
            Document d = reader.document(i);
            float distance = getDistance(d, lireFeature);

            if (!duplicates.containsKey(distance)) {
                duplicates.put(distance, new LinkedList<String>());
            } else {
                numDuplicates++;
            }
            duplicates.get(distance).add(d.getFieldable(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
        }

        if (numDuplicates == 0)
            return null;

        LinkedList<List<String>> results = new LinkedList<List<String>>();
        for (float f : duplicates.keySet()) {
            if (duplicates.get(f).size() > 1) {
                results.add(duplicates.get(f));
            }
        }
        simpleImageDuplicates = new SimpleImageDuplicates(results);
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    }
    return simpleImageDuplicates;

}

From source file:net.semanticmetadata.lire.impl.ColorLayoutImageSearcher.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    SimpleImageDuplicates simpleImageDuplicates = null;
    try {/*from  www. j  a  v a 2 s  . c  o  m*/
        if (!IndexReader.indexExists(reader.directory()))
            throw new FileNotFoundException("No index found at this specific location.");
        Document doc = reader.document(0);

        ColorLayout lireFeature = (ColorLayout) descriptorClass.newInstance();
        byte[] cls = doc.getBinaryValue(fieldName);
        if (cls != null && cls.length > 0)
            lireFeature.setByteArrayRepresentation(cls);

        HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

        // find duplicates ...
        boolean hasDeletions = reader.hasDeletions();

        int docs = reader.numDocs();
        int numDuplicates = 0;
        for (int i = 0; i < docs; i++) {
            if (hasDeletions && reader.isDeleted(i)) {
                continue;
            }
            Document d = reader.document(i);
            float distance = getDistance(d, lireFeature);

            if (!duplicates.containsKey(distance)) {
                duplicates.put(distance, new LinkedList<String>());
            } else {
                numDuplicates++;
            }
            duplicates.get(distance).add(d.getFieldable(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
        }

        if (numDuplicates == 0)
            return null;

        LinkedList<List<String>> results = new LinkedList<List<String>>();
        for (float f : duplicates.keySet()) {
            if (duplicates.get(f).size() > 1) {
                results.add(duplicates.get(f));
            }
        }
        simpleImageDuplicates = new SimpleImageDuplicates(results);
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    }
    return simpleImageDuplicates;

}

From source file:net.semanticmetadata.lire.impl.custom.SingleNddCeddImageSearcher.java

License:Open Source License

protected void init(IndexReader reader) {
    this.reader = reader;
    if (reader.hasDeletions()) {
        throw new UnsupportedOperationException(
                "The index has to be optimized first to be cached! Use IndexWriter.forceMerge(0) to do this.");
    }//from   www . j av  a 2s  . co  m
    docs = new TreeSet<SimpleResult>();
    try {
        this.cachedInstance = (LireFeature) this.descriptorClass.newInstance();
        if (fieldName == null)
            fieldName = this.cachedInstance.getFieldName();
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher ("
                + descriptorClass.getName() + "): " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher ("
                + descriptorClass.getName() + "): " + e.getMessage());
    }
    // put all respective features into an in-memory cache ...
    if (isCaching && reader != null) {
        int docs = reader.numDocs();
        featureCache = new ArrayList<double[]>(docs);
        try {
            Document d;
            for (int i = 0; i < docs; i++) {
                d = reader.document(i);
                cachedInstance.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes,
                        d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length);
                // normalize features,o we can use L1
                if (!halfDimensions) {
                    featureCache.add(normalize(cachedInstance.getDoubleHistogram()));
                } else {
                    featureCache.add(crunch(cachedInstance.getDoubleHistogram()));
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:net.semanticmetadata.lire.impl.FastOpponentImageSearcher.java

License:Open Source License

/**
 * @param reader/* w w  w .  j a v a 2 s  .  co  m*/
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected double findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    maxDistance = -1f;
    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Document d;
    double tmpDistance;
    int docs = reader.numDocs();
    byte[] histogram = lireFeature.getByteArrayRepresentation();
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        d = reader.document(i);
        tmpDistance = getDistance(d, histogram);
        assert (tmpDistance >= 0);
        // calculate the overall max distance to normalize score afterwards
        //            if (overallMaxDistance < tmpDistance) {
        //                overallMaxDistance = tmpDistance;
        //            }
        // if it is the first document:
        if (maxDistance < 0) {
            maxDistance = tmpDistance;
        }
        // if the array is not full yet:
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult((float) tmpDistance, d, i));
            if (tmpDistance > maxDistance)
                maxDistance = tmpDistance;
        } else if (tmpDistance < maxDistance) {
            // if it is nearer to the sample than at least on of the current set:
            // remove the last one ...
            this.docs.remove(this.docs.last());
            // add the new one ...
            this.docs.add(new SimpleResult((float) tmpDistance, d, i));
            // and set our new distance border ...
            maxDistance = this.docs.last().getDistance();
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.GenericFastImageSearcher.java

License:Open Source License

/**
 * @param reader//ww w. j  ava2s.c o  m
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected float findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    maxDistance = Float.MAX_VALUE;
    //        overallMaxDistance = -1f;

    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Document d;
    float tmpDistance;
    int docs = reader.numDocs();
    if (!isCaching) {
        // we read each and every document from the index and then we compare it to the query.
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.

            d = reader.document(i);
            tmpDistance = getDistance(d, lireFeature);
            assert (tmpDistance >= 0);
            // if the array is not full yet:
            if (this.docs.size() < maxHits) {
                this.docs.add(new SimpleResult(tmpDistance, d, i));
                if (tmpDistance > maxDistance)
                    maxDistance = tmpDistance;
            } else if (tmpDistance < maxDistance) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.docs.remove(this.docs.last());
                // add the new one ...
                this.docs.add(new SimpleResult(tmpDistance, d, i));
                // and set our new distance border ...
                maxDistance = this.docs.last().getDistance();
            }
        }
    } else {
        // we use the in-memory cache to find the matching docs from the index.
        int count = 0;
        for (Iterator<byte[]> iterator = featureCache.iterator(); iterator.hasNext();) {
            cachedInstance.setByteArrayRepresentation(iterator.next());
            tmpDistance = lireFeature.getDistance(cachedInstance);
            assert (tmpDistance >= 0);
            // if the array is not full yet:
            if (this.docs.size() < maxHits) {
                this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count));
                if (tmpDistance > maxDistance)
                    maxDistance = tmpDistance;
            } else if (tmpDistance < maxDistance) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.docs.remove(this.docs.last());
                // add the new one ...
                this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count));
                // and set our new distance border ...
                maxDistance = this.docs.last().getDistance();
            }
            count++;
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.GenericFastImageSearcher.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    SimpleImageDuplicates simpleImageDuplicates = null;
    try {//from   w w  w  . j  a  va 2s.  c o  m
        //            if (!IndexReader.indexExists(reader.directory()))
        //                throw new FileNotFoundException("No index found at this specific location.");
        Document doc = reader.document(0);

        LireFeature lireFeature = (LireFeature) descriptorClass.newInstance();
        if (doc.getField(fieldName).binaryValue() != null && doc.getField(fieldName).binaryValue().length > 0)
            lireFeature.setByteArrayRepresentation(doc.getField(fieldName).binaryValue().bytes,
                    doc.getField(fieldName).binaryValue().offset, doc.getField(fieldName).binaryValue().length);

        HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

        // Needed for check whether the document is deleted.
        Bits liveDocs = MultiFields.getLiveDocs(reader);

        int docs = reader.numDocs();
        int numDuplicates = 0;
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.

            Document d = reader.document(i);
            float distance = getDistance(d, lireFeature);

            if (!duplicates.containsKey(distance)) {
                duplicates.put(distance, new LinkedList<String>());
            } else {
                numDuplicates++;
            }
            duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
        }

        if (numDuplicates == 0)
            return null;

        LinkedList<List<String>> results = new LinkedList<List<String>>();
        for (float f : duplicates.keySet()) {
            if (duplicates.get(f).size() > 1) {
                results.add(duplicates.get(f));
            }
        }
        simpleImageDuplicates = new SimpleImageDuplicates(results);
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    }
    return simpleImageDuplicates;

}