Example usage for org.apache.lucene.index IndexReader hasDeletions

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader hasDeletions.

Prototype

public boolean hasDeletions()

Source Link

Document

Returns true if any documents have been deleted.

Usage

From source file:net.semanticmetadata.lire.impl.GenericImageSearcher.java

License:Open Source License

/**
 * @param reader/*from ww  w.  jav a 2  s . co m*/
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected float findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    float maxDistance = -1f, overallMaxDistance = -1f;
    float tmpDistance = 0f;
    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = reader.numDocs();
    Document d = null;
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        d = reader.document(i);
        tmpDistance = getDistance(d, lireFeature);
        //            if (distance < 0 || Float.isNaN(distance))
        //                System.out.println("X");
        assert (tmpDistance >= 0);
        // calculate the overall max distance to normalize score afterwards
        if (overallMaxDistance < tmpDistance) {
            overallMaxDistance = tmpDistance;
        }
        // if it is the first document:
        if (maxDistance < 0) {
            maxDistance = tmpDistance;
        }
        // if the array is not full yet:
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult(tmpDistance, d, i));
            if (tmpDistance > maxDistance)
                maxDistance = tmpDistance;
        } else if (tmpDistance < maxDistance) {
            // if it is nearer to the sample than at least on of the current set:
            // remove the last one ...
            this.docs.remove(this.docs.last());
            // add the new one ...
            this.docs.add(new SimpleResult(tmpDistance, d, i));
            // and set our new distance border ...
            maxDistance = this.docs.last().getDistance();
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.GenericImageSearcher.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    SimpleImageDuplicates simpleImageDuplicates = null;
    try {/*from  w ww.  j  a v  a 2 s  . co  m*/
        //            if (!IndexReader.indexExists(reader.directory()))
        //                throw new FileNotFoundException("No index found at this specific location.");
        Document doc = reader.document(0);

        LireFeature lireFeature = (LireFeature) descriptorClass.newInstance();
        String[] cls = doc.getValues(fieldName);
        if (cls != null && cls.length > 0)
            lireFeature.setStringRepresentation(cls[0]);

        HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

        // Needed for check whether the document is deleted.
        Bits liveDocs = MultiFields.getLiveDocs(reader);

        int docs = reader.numDocs();
        int numDuplicates = 0;
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.
            Document d = reader.document(i);
            float distance = getDistance(d, lireFeature);

            if (!duplicates.containsKey(distance)) {
                duplicates.put(distance, new LinkedList<String>());
            } else {
                numDuplicates++;
            }
            duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
        }

        if (numDuplicates == 0)
            return null;

        LinkedList<List<String>> results = new LinkedList<List<String>>();
        for (float f : duplicates.keySet()) {
            if (duplicates.get(f).size() > 1) {
                results.add(duplicates.get(f));
            }
        }
        simpleImageDuplicates = new SimpleImageDuplicates(results);
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    }
    return simpleImageDuplicates;

}

From source file:net.semanticmetadata.lire.impl.ParallelImageSearcher.java

License:Open Source License

/**
 * @param reader//from  w w w . ja v a2  s . c o  m
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
@SuppressWarnings("unchecked")
private float[] findSimilar(IndexReader reader, LireFeature[] lireFeature) throws IOException {
    float[] maxDistance = new float[lireFeature.length];
    float[] overallMaxDistance = new float[lireFeature.length];

    for (int i = 0; i < overallMaxDistance.length; i++) {
        overallMaxDistance[i] = -1f;
        maxDistance[i] = -1f;
    }

    parDocs = new TreeSet[lireFeature.length];
    for (int i = 0; i < parDocs.length; i++) {
        parDocs[i] = new TreeSet<SimpleResult>();
    }

    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    // clear result set ...

    int docs = reader.numDocs();
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        Document d = reader.document(i);
        float[] distance = getDistance(d, lireFeature);
        // calculate the overall max distance to normalize score afterwards
        for (int j = 0; j < distance.length; j++) {
            float f = distance[j];
            if (overallMaxDistance[j] < f) {
                overallMaxDistance[j] = f;
            }
            // if it is the first document:
            if (maxDistance[j] < 0) {
                maxDistance[j] = f;
            }
            // if the array is not full yet:
            if (this.parDocs[j].size() < maxHits) {
                this.parDocs[j].add(new SimpleResult(f, d, i));
                if (f > maxDistance[j]) {
                    maxDistance[j] = f;
                }
            } else if (f < maxDistance[j]) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.parDocs[j].remove(this.parDocs[j].last());
                // add the new one ...
                this.parDocs[j].add(new SimpleResult(f, d, i));
                // and set our new distance border ...
                maxDistance[j] = this.parDocs[j].last().getDistance();
            }

        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.searcher.GenericFastImageSearcher.java

License:Open Source License

/**
 * @param reader/*ww  w  .  j a v a  2  s  .  c om*/
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected float findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    maxDistance = -1f;
    //        overallMaxDistance = -1f;

    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Document d;
    float tmpDistance;
    int docs = reader.numDocs();
    if (!isCaching) {
        // we read each and every document from the index and then we compare it to the query.
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.

            d = reader.document(i);
            tmpDistance = getDistance(d, lireFeature);
            assert (tmpDistance >= 0);
            // if it is the first document:
            if (maxDistance < 0) {
                maxDistance = tmpDistance;
            }
            // if the array is not full yet:
            if (this.docs.size() < maxHits) {
                this.docs.add(new SimpleResult(tmpDistance, d, i));
                if (tmpDistance > maxDistance)
                    maxDistance = tmpDistance;
            } else if (tmpDistance < maxDistance) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.docs.remove(this.docs.last());
                // add the new one ...
                this.docs.add(new SimpleResult(tmpDistance, d, i));
                // and set our new distance border ...
                maxDistance = this.docs.last().getDistance();
            }
        }
    } else {
        // we use the in-memory cache to find the matching docs from the index.
        int count = 0;
        for (Iterator<byte[]> iterator = featureCache.iterator(); iterator.hasNext();) {
            cachedInstance.setByteArrayRepresentation(iterator.next());
            if (reader.hasDeletions() && !liveDocs.get(count)) {
                count++;
                continue; // if it is deleted, just ignore it.
            } else {
                tmpDistance = lireFeature.getDistance(cachedInstance);
                assert (tmpDistance >= 0);
                // if it is the first document:
                if (maxDistance < 0) {
                    maxDistance = tmpDistance;
                }
                // if the array is not full yet:
                if (this.docs.size() < maxHits) {
                    this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count));
                    if (tmpDistance > maxDistance)
                        maxDistance = tmpDistance;
                } else if (tmpDistance < maxDistance) {
                    // if it is nearer to the sample than at least on of the current set:
                    // remove the last one ...
                    this.docs.remove(this.docs.last());
                    // add the new one ...
                    this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count));
                    // and set our new distance border ...
                    maxDistance = this.docs.last().getDistance();
                }
                count++;
            }
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.searcher.LocationBasedImageSearcher.java

License:Open Source License

/**
 * @param reader//  ww  w.  ja  v a 2 s . c  o  m
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected float findSimilar(IndexReader reader, ImageInfo imageInfo) throws IOException {
    float maxDistance = -1f, allMaxDistance = -1f;
    float tmpDistance = 0f;

    docs.clear();
    //????
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docNumber = reader.numDocs();
    Document d = null;
    for (int i = 0; i < docNumber; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; //?

        d = reader.document(i);//?
        tmpDistance = getDistance(d, imageInfo);//?
        //????
        if (tmpDistance < 0 || tmpDistance > this.threshold)
            continue;

        //????
        if (allMaxDistance < tmpDistance) {
            allMaxDistance = tmpDistance;
        }
        //??
        if (maxDistance < 0) {
            maxDistance = tmpDistance;
        }
        //????
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult(tmpDistance, d, i));
            if (tmpDistance > maxDistance)
                maxDistance = tmpDistance;
            //?????
        } else if (tmpDistance < maxDistance) {
            //???
            this.docs.remove(this.docs.size() - 1);
            this.docs.add(new SimpleResult(tmpDistance, d, i));
            //?
            maxDistance = tmpDistance;

            Collections.sort(docs);
        }
    }
    //?
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.searcher.TopDocsImageSearcher.java

License:Open Source License

/**
 * @param results//from  www .  jav  a 2s  . c  om
 * @param reader
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected float findSimilar(TopDocs results, IndexReader reader, LireFeature lireFeature) throws IOException {
    float maxDistance = -1f, overallMaxDistance = -1f;
    boolean hasDeletions = reader.hasDeletions();

    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = results.totalHits;
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        Document d = reader.document(results.scoreDocs[i].doc);
        float distance = getDistance(d, lireFeature);
        assert (distance >= 0);
        // calculate the overall max distance to normalize score afterwards
        if (overallMaxDistance < distance) {
            overallMaxDistance = distance;
        }
        // if it is the first document:
        if (maxDistance < 0) {
            maxDistance = distance;
        }
        // if the array is not full yet:
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult(distance, d, i));
            if (distance > maxDistance)
                maxDistance = distance;
        } else if (distance < maxDistance) {
            // if it is nearer to the sample than at least on of the current set:
            // remove the last one ...
            this.docs.remove(this.docs.last());
            // add the new one ...
            this.docs.add(new SimpleResult(distance, d, i));
            // and set our new distance border ...
            maxDistance = this.docs.last().getDistance();
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.SimpleImageSearcher.java

License:Open Source License

/**
 * @param reader/*w  w  w  .  j  a v a  2 s . c o m*/
 * @param cl
 * @param sc
 * @param eh
 * @return the maximum distance found for normalizing.
 * @throws IOException
 */
private float findSimilar(IndexReader reader, ColorLayoutImpl cl, ScalableColorImpl sc,
        EdgeHistogramImplementation eh) throws IOException {
    float maxDistance = -1f, overallMaxDistance = -1f;
    boolean hasDeletions = reader.hasDeletions();

    // clear result set ...
    docs.clear();

    int docs = reader.numDocs();
    for (int i = 0; i < docs; i++) {
        // bugfix by Roman Kern

        Document d = reader.document(i);
        float distance = getDistance(d, cl, sc, eh);
        // calculate the overall max distance to normalize score afterwards
        if (overallMaxDistance < distance) {
            overallMaxDistance = distance;
        }
        // if it is the first document:
        if (maxDistance < 0) {
            maxDistance = distance;
        }
        // if the array is not full yet:
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult(distance, d));
            if (distance > maxDistance)
                maxDistance = distance;
        } else if (distance < maxDistance) {
            // if it is nearer to the sample than at least on of the current set:
            // remove the last one ...
            this.docs.remove(this.docs.last());
            // add the new one ...
            this.docs.add(new SimpleResult(distance, d));
            // and set our new distance border ...
            maxDistance = this.docs.last().getDistance();
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.SimpleImageSearcher.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    if (!IndexReader.indexExists(reader.directory()))
        throw new FileNotFoundException("No index found at this specific location.");
    Document doc = reader.document(0);
    ScalableColorImpl sc = null;/*w w  w  . ja va2  s .  c  o m*/
    ColorLayoutImpl cl = null;
    EdgeHistogramImplementation eh = null;

    String[] cls = doc.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT);
    if (cls != null && cls.length > 0)
        cl = new ColorLayoutImpl(cls[0]);
    String[] scs = doc.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR);
    if (scs != null && scs.length > 0)
        sc = new ScalableColorImpl(scs[0]);
    String[] ehs = doc.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM);
    if (ehs != null && ehs.length > 0)
        eh = new EdgeHistogramImplementation(ehs[0]);

    HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

    // find duplicates ...
    boolean hasDeletions = reader.hasDeletions();

    int docs = reader.numDocs();
    int numDuplicates = 0;
    for (int i = 0; i < docs; i++) {
        Document d = reader.document(i);
        float distance = getDistance(d, cl, sc, eh);

        if (!duplicates.containsKey(distance)) {
            duplicates.put(distance, new LinkedList<String>());
        } else {
            numDuplicates++;
        }
        duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

    if (numDuplicates == 0)
        return null;

    LinkedList<List<String>> results = new LinkedList<List<String>>();
    for (float f : duplicates.keySet()) {
        if (duplicates.get(f).size() > 1) {
            results.add(duplicates.get(f));
        }
    }
    return new SimpleImageDuplicates(results);
}

From source file:net.semanticmetadata.lire.indexing.MetricSpacesInvertedListIndexing.java

License:Open Source License

/**
 * Creates a set of reference objects and stores it in a new index (hashFunctionsFileName "<indexPath>-ro"). Then creates ordered
 * lists of reference object positions for each data item in the index with given feature.
 * Finally a new index (hashFunctionsFileName "<indexPath>-ms") is created where all the original documents as well as the new data
 * are stored./*from w w w  .  j  a v  a  2s .c  o  m*/
 *
 * @param indexPath the path to the original index
 * @throws IOException
 */
public void createIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();

    if (numDocs < numReferenceObjects) {
        throw new UnsupportedOperationException("Too few documents in index.");
    }

    // progress report
    progress.setNumDocsAll(numDocs);
    progress.setCurrentState(State.RoSelection);

    boolean hasDeletions = reader.hasDeletions();

    // init reference objects:
    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true);
    HashSet<Integer> referenceObjsIds = new HashSet<Integer>(numReferenceObjects);

    double numDocsDouble = (double) numDocs;
    while (referenceObjsIds.size() < numReferenceObjects) {
        referenceObjsIds.add((int) (numDocsDouble * Math.random()));
    }
    int count = 0;

    if (hasDeletions) {
        System.err.println("WARNING: There are deleted docs in your index. You should "
                + "optimize your index before using this method.");
    }

    // progress report
    progress.setCurrentState(State.RoIndexing);

    // find them in the index and put them into a separate index:
    for (int i : referenceObjsIds) {
        count++;
        Document document = reader.document(i);
        document.add(new Field("ro-id", count + "", StringField.TYPE_STORED));
        iw.addDocument(document);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Indexing);

    // now find the reference objects for each entry ;)
    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
    analyzerPerField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(
            new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField);

    iw = new IndexWriter(FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                    .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        Document document = reader.document(i);
        ImageSearchHits hits = searcher.search(document, readerRo);
        sb.delete(0, sb.length());
        for (int j = 0; j < numReferenceObjectsUsed; j++) {
            sb.append(hits.doc(j).getValues("ro-id")[0]);
            sb.append(' ');
        }
        // System.out.println(sb.toString());
        document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
        iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER,
                document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document);

        // progress report
        progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Idle);

}

From source file:net.semanticmetadata.lire.indexing.MetricSpacesInvertedListIndexing.java

License:Open Source License

/**
 * We assume that the initial indexing has been done and a set of reference objects has been
 * found and indexed in the separate fileList. However further documents were added and they
 * now need to get a ranked list of reference objects. So we (i) get all these new documents
 * missing the field "ro-order" and (ii) add this field.
 *
 * @param indexPath the index to update//  ww w  .  j a  v a  2 s  .c om
 * @throws IOException
 */
public void updateIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    boolean hasDeletions = reader.hasDeletions();
    int countUpdated = 0;

    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1);
    perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(
            new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField);

    IndexWriter iw = new IndexWriter(FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                    .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        Document document = reader.document(i);
        if (document.getField("ro-order") == null) { // if the field is not here we create it.
            ImageSearchHits hits = searcher.search(document, readerRo);
            sb.delete(0, sb.length());
            for (int j = 0; j < numReferenceObjectsUsed; j++) {
                sb.append(hits.doc(j).getValues("ro-id")[0]);
                sb.append(' ');
            }
            // System.out.println(sb.toString());
            document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
            iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER,
                    document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document);
            countUpdated++;
        }

        // progress report
        progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

        // debug:
        System.out.println("countUpdated = " + countUpdated);
    }
    iw.commit();
    iw.close();
}