Example usage for org.apache.lucene.index IndexReader hasDeletions

List of usage examples for org.apache.lucene.index IndexReader hasDeletions

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader hasDeletions.

Prototype

public boolean hasDeletions() 

Source Link

Document

Returns true if any documents have been deleted.

Usage

From source file:com.esri.gpt.catalog.lucene.AclFilter.java

License:Apache License

/**
 * Queries for documents that have no values associated with the field.
 * @param reader the index reader//w  w w  .  j  a  va 2 s . c o  m
 * @return the OpenBitSet (documents with no values set to true)
 * @throws IOException if an exception is encountered while reading the index
 */
private OpenBitSet queryNulls(IndexReader reader, String field) throws IOException {
    int nBits = reader.maxDoc();
    OpenBitSet bitSet = new OpenBitSet(nBits);
    TermEnum termEnum = null;
    TermDocs termDocs = null;
    if ((field != null) && (field.trim().length() > 0)) {
        try {

            // find all documents that have a term for the field, then flip the bit set
            termEnum = reader.terms(new Term(field));
            termDocs = reader.termDocs();
            do {
                Term term = termEnum.term();
                if ((term != null) && term.field().equals(field)) {
                    termDocs.seek(term);
                    while (termDocs.next()) {
                        bitSet.fastSet(termDocs.doc());
                    }
                }
            } while (termEnum.next());

            bitSet.flip(0, nBits);
            if (reader.hasDeletions()) {
                for (int i = 0; i < nBits; i++) {
                    if (bitSet.get(i) && reader.isDeleted(i)) {
                        bitSet.fastFlip(i);
                    }
                }
            }

        } finally {
            try {
                if (termEnum != null)
                    termEnum.close();
            } catch (Exception ef) {
            }
            try {
                if (termDocs != null)
                    termDocs.close();
            } catch (Exception ef) {
            }
        }
    }
    return bitSet;
}

From source file:com.jaeksoft.searchlib.index.IndexStatistics.java

License:Open Source License

protected IndexStatistics(IndexReader indexReader) {
    maxDoc = indexReader.maxDoc();//from  w ww.  j  a  v  a  2 s.  c o m
    numDocs = indexReader.numDocs();
    numDeletedDocs = indexReader.numDeletedDocs();
    hasDeletions = indexReader.hasDeletions();
    isOptimized = indexReader.isOptimized();
}

From source file:com.zimbra.cs.rmgmt.RemoteMailQueue.java

License:Open Source License

private void summarize(SearchResult result, IndexReader indexReader) throws IOException {
    TermEnum terms = indexReader.terms();
    boolean hasDeletions = indexReader.hasDeletions();
    do {/*from w  ww  . j av a2s  .c om*/
        Term term = terms.term();
        if (term != null) {
            String field = term.field();
            if (field != null && field.length() > 0) {
                QueueAttr attr = QueueAttr.valueOf(field);
                if (attr == QueueAttr.addr || attr == QueueAttr.host || attr == QueueAttr.from
                        || attr == QueueAttr.to || attr == QueueAttr.fromdomain || attr == QueueAttr.todomain
                        || attr == QueueAttr.reason || attr == QueueAttr.received) {
                    List<SummaryItem> list = result.sitems.get(attr);
                    if (list == null) {
                        list = new LinkedList<SummaryItem>();
                        result.sitems.put(attr, list);
                    }
                    int count = 0;
                    if (hasDeletions) {
                        TermDocs termDocs = indexReader.termDocs(term);
                        while (termDocs.next()) {
                            if (!indexReader.isDeleted(termDocs.doc())) {
                                count++;
                            }
                        }
                    } else {
                        count = terms.docFreq();
                    }
                    if (count > 0) {
                        list.add(new SummaryItem(term.text(), count));
                    }
                }
            }
        }
    } while (terms.next());
}

From source file:intelligentWebAlgorithms.algos.search.ranking.DocRankMatrixBuilder.java

License:Apache License

private List<Integer> getProcessedDocs(IndexReader idxR) throws IOException {
    List<Integer> docs = new ArrayList<Integer>();
    for (int i = 0, n = idxR.maxDoc(); i < n; i++) {
        if (idxR.hasDeletions() == false) {
            Document doc = idxR.document(i);
            if (eligibleForDocRank(doc.get("doctype"))) {
                docs.add(i);//  ww w .j a  va 2  s .c o m
            }
        }
    }
    return docs;

}

From source file:net.conquiris.lucene.GuavaCachingFilter.java

License:Apache License

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    final Object key = reader.hasDeletions() ? reader.getDeletesCacheKey() : reader.getCoreCacheKey();
    try {/*from w  w w .  j a  v a 2 s. c om*/
        return cache.get(key, new Loader(reader));
    } catch (Throwable t) {
        Throwable cause = t.getCause();
        if (cause instanceof IOException) {
            throw (IOException) cause;
        }
        if (cause instanceof RuntimeException) {
            throw (RuntimeException) cause;
        }
        if (cause instanceof Error) {
            throw (Error) cause;
        }
        if (t instanceof UncheckedExecutionException) {
            throw (UncheckedExecutionException) t;
        }
        throw new UncheckedExecutionException(cause);
    }
}

From source file:net.semanticmetadata.lire.benchmarking.TestNister.java

License:Open Source License

public void computePrecision(String pathName, Similarity similarity, String label) throws IOException {
    //        ImageSearcher vis = new GenericImageSearcher(4, SimpleFeature.class, "featureSURFHistogram");
    //        ImageSearcher vis = new GenericFastImageSearcher(4, CEDD.class, DocumentBuilder.FIELD_NAME_CEDD);
    //        VisualWordsImageSearcher vis = new VisualWordsImageSearcher(4, similarity, DocumentBuilder.FIELD_NAME_SIFT_VISUAL_WORDS);
    VisualWordsImageSearcher vis = new VisualWordsImageSearcher(4, similarity,
            DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW);
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(pathName)));

    int queryID, resultID;
    int countSearches = 0, countTruePositives = 0;
    float avgPrecision = 0f;

    Set<Integer> test = StatsUtils.drawSample(100, 10200);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i : test) {
        //        for (int j = 0; j < tests.length; j++) {
        //            int i = tests[j];
        //        for (int i =0; i < 1000; i++) {
        //        for (int i =0; i < reader.numDocs(); i++) {

        if (!((reader.hasDeletions() && !liveDocs.get(i)))) {
            ImageSearchHits hits = vis.search(reader.document(i), reader);
            String s = reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
            s = s.replaceAll("\\D", "");
            queryID = Integer.parseInt(s);
            countTruePositives = 0;//from w ww.j  a  v  a2 s .  c  om
            for (int k = 0; k < hits.length(); k++) {
                String name = hits.doc(k).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
                name = name.replaceAll("\\D", "");
                resultID = Integer.parseInt(name);
                if (queryID / 4 == resultID / 4) {
                    //System.out.print("X");
                    countTruePositives++;
                }
                //else System.out.print("O");
            }
            countSearches++;
            avgPrecision += (float) countTruePositives / 4f;
            // progress:
            //                if (countSearches%100==0) System.out.print('.');
            //                if (countSearches%1000==0) System.out.print(':');
            //System.out.println();
        }
    }
    avgPrecision = avgPrecision / (float) countSearches;
    FileWriter fw = new FileWriter(new File("precision_results.txt"), true);
    System.out.println(label + " p@4= " + avgPrecision);
    fw.write(label + " p@4= " + avgPrecision + "\n");
    fw.close();
}

From source file:net.semanticmetadata.lire.benchmarking.TestSimple.java

License:Open Source License

private void doSearch(ImageSearcher searcher, String prefix, IndexReader reader) throws IOException {
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    String fileName, fullFileName;
    Document queryDoc;//from w  ww  . j  av a  2 s .  c  o m
    ImageSearchHits hits;
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        fullFileName = reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        fileName = getIDfromFileName(fullFileName);
        if (allQueries.contains(fileName)) {
            // ok, we've got a query here for a document ...
            queryDoc = reader.document(i);
            hits = searcher.search(queryDoc, reader);
            FileUtils.browseUri(FileUtils.saveImageResultsToHtml(prefix + "-" + fileName, hits, fullFileName));
        }
    }
    for (int i = 0; i < outsideQueries.size(); i++) {
        fullFileName = outsideQueries.get(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        fileName = getIDfromFileName(fullFileName);
        if (allQueries.contains(fileName)) {
            // ok, we've got a query here for a document ...
            queryDoc = outsideQueries.get(i);
            hits = searcher.search(queryDoc, reader);
            FileUtils.browseUri(FileUtils.saveImageResultsToHtml(prefix + "-" + fileName, hits, fullFileName));
        }
    }
}

From source file:net.semanticmetadata.lire.benchmarking.TestUCID.java

License:Open Source License

private void computeMAP(ImageSearcher searcher, String prefix, IndexReader reader) throws IOException {
    double queryCount = 0d;
    double errorRate = 0;
    double map = 0;
    double p10 = 0;
    int errorCount = 0;
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    PrintWriter fw;/*from  w w  w.java  2s  .  c om*/
    if (searcher.toString().contains("ImageSearcherUsingWSs")) {
        (new File("eval/" + prefix.replace(' ', '_') + "/" + clusters + "/")).mkdirs();
        fw = new PrintWriter(new File("eval/" + prefix.replace(' ', '_') + "/" + clusters + "/"
                + prefix.replace(' ', '_') + "-" + db + clusters
                + searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1] + ".txt"));
    } else
        fw = new PrintWriter(new File("eval/" + prefix.replace(' ', '_') + "-" + db + clusters + ".txt"));
    //            fw = new PrintWriter(new File("eval/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal
    Hashtable<Integer, String> evalText = new Hashtable<Integer, String>(260);
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        String fileName = getIDfromFileName(
                reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
        if (queries.keySet().contains(fileName)) {
            String tmpEval = "";
            queryCount += 1d;
            // ok, we've got a query here for a document ...
            Document queryDoc = reader.document(i);
            ImageSearchHits hits = searcher.search(queryDoc, reader);
            double rank = 0;
            double avgPrecision = 0;
            double found = 0;
            double tmpP10 = 0;
            Locale.setDefault(Locale.US);
            for (int y = 0; y < hits.length(); y++) {
                String hitFile = getIDfromFileName(
                        hits.doc(y).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
                // TODO: Sort by query ID!
                tmpEval += String.format(Locale.US, "%d 1 %s %d %.2f test\n", query2id.get(fileName),
                        hitFile.substring(0, hitFile.lastIndexOf('.')), (int) rank + 1, hits.score(y));
                // if (!hitFile.equals(fileName)) {
                rank++;
                //                    if ((queries.get(fileName).contains(hitFile) || hitFile.equals(fileName))&&(!fileName.equals(hitFile))) { // it's a hit.
                if (queries.get(fileName).contains(hitFile) || hitFile.equals(fileName)) { // it's a hit.
                    found++;
                    // TODO: Compute error rate, etc. here.
                    avgPrecision += found / rank;// * (1d/queries.get(fileName).size());
                    //                        avgPrecision += found / (rank-1);// * (1d/queries.get(fileName).size());
                    //                            if (rank<=60) System.out.print('X');
                    if (rank <= 10)
                        tmpP10++;
                } else { // nothing has been found.
                    if (rank == 1)
                        errorRate += 1d;
                    //                            if (rank<=60) System.out.print('-');
                }
            }
            // }
            //                System.out.println();
            avgPrecision /= (double) (1d + queries.get(fileName).size());
            //                avgPrecision /= (double) (queries.get(fileName).size());

            if (!(found - queries.get(fileName).size() == 1)) {
                // some of the results have not been found. We have to deal with it ...
                errorCount++;
            }

            // assertTrue(found - queries.get(fileName).size() == 0);
            map += avgPrecision;
            p10 += tmpP10;
            evalText.put(query2id.get(fileName), tmpEval);
        }
    }
    for (int i = 0; i < query2id.size(); i++) {
        fw.write(evalText.get(i + 1));
    }
    fw.close();
    errorRate = errorRate / queryCount;
    map = map / queryCount;
    p10 = p10 / (queryCount * 10d);
    //        System.out.print(prefix);
    String s;
    if (searcher.toString().contains("ImageSearcherUsingWSs"))
        s = String.format("%s\t%.4f\t%.4f\t%.4f\t(%s)", prefix, map, p10, errorRate,
                searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1]);
    else
        s = String.format("%s\t%.4f\t%.4f\t%.4f", prefix, map, p10, errorRate);
    if (errorCount > 0) {
        // some of the results have not been found. We have to deal with it ...
        //System.err.println("Did not find result ;(  (" + errorCount + ")");
        s += "\t~~\tDid not find result ;(\t(" + errorCount + ")";
    }
    System.out.println(s);
}

From source file:net.semanticmetadata.lire.benchmarking.TestUCID.java

License:Open Source License

private void testSearchSpeed(ArrayList<String> images, final Class featureClass) throws IOException {
    parallelIndexer = new ParallelIndexer(8, indexPath, testExtensive, true) {
        @Override/*from w ww . j  a v  a2s  .  com*/
        public void addBuilders(ChainedDocumentBuilder builder) {
            builder.addBuilder(new GenericDocumentBuilder(featureClass, "feature"));
        }
    };
    parallelIndexer.run();
    IndexReader reader = DirectoryReader
            .open(new RAMDirectory(FSDirectory.open(new File(indexPath)), IOContext.READONCE));
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    double queryCount = 0d;
    ImageSearcher searcher = new GenericFastImageSearcher(100, featureClass, "feature");
    long ms = System.currentTimeMillis();
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        String fileName = getIDfromFileName(
                reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
        if (queries.keySet().contains(fileName)) {
            queryCount += 1d;
            // ok, we've got a query here for a document ...
            Document queryDoc = reader.document(i);
            ImageSearchHits hits = searcher.search(queryDoc, reader);
        }
    }
    ms = System.currentTimeMillis() - ms;
    System.out.printf("%s \t %3.1f \n",
            featureClass.getName().substring(featureClass.getName().lastIndexOf('.') + 1),
            (double) ms / queryCount);
}

From source file:net.semanticmetadata.lire.benchmarking.TestUniversal.java

License:Open Source License

private void computeMAP(ImageSearcher searcher, String prefix, IndexReader reader, int clusters)
        throws IOException {
    long start = System.currentTimeMillis();
    long timeOfSearch = 0, ms;

    double queryCount = 0d;
    double errorRate = 0;
    double map = 0;
    double p10 = 0;
    int errorCount = 0;
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    PrintWriter fw;/*from  ww  w .  j ava2  s .  co m*/
    if (searcher.toString().contains("ImageSearcherUsingWSs")) {
        (new File("eval/" + db + "/" + prefix.replace(' ', '_') + "/" + clusters + "/")).mkdirs();
        fw = new PrintWriter(new File("eval/" + db + "/" + prefix.replace(' ', '_') + "/" + clusters + "/"
                + prefix.replace(' ', '_') + "-" + db + clusters
                + searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1] + ".txt"));
    } else {
        //            (new File("eval/#WithMirFlickr/" + db + "/")).mkdirs();
        (new File("eval/" + db + "/")).mkdirs();
        if (clusters > 0)
            fw = new PrintWriter(
                    new File("eval/" + db + "/" + prefix.replace(' ', '_') + "-" + db + clusters + ".txt"));
        else
            //                fw = new PrintWriter(new File("eval/#WithMirFlickr/" + db + "/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal
            fw = new PrintWriter(
                    new File("eval/" + db + "/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal
    }
    Hashtable<Integer, String> evalText = new Hashtable<Integer, String>(260);
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        String fileName = getIDfromFileName(
                reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
        if (queries.keySet().contains(fileName)) {
            String tmpEval = "";
            queryCount += 1d;
            // ok, we've got a query here for a document ...
            Document queryDoc = reader.document(i);
            ms = System.currentTimeMillis();
            ImageSearchHits hits = searcher.search(queryDoc, reader);
            timeOfSearch += System.currentTimeMillis() - ms;
            double rank = 0;
            double avgPrecision = 0;
            double found = 0;
            double tmpP10 = 0;
            Locale.setDefault(Locale.US);
            for (int y = 0; y < hits.length(); y++) {
                //                    String hitFile = getIDfromFileName(hits.doc(y).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
                String hitFile = getIDfromFileName(reader.document(hits.documentID(y))
                        .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
                //                    String hitFile = getIDfromFileName(hits.path(y));
                // TODO: Sort by query ID!
                tmpEval += String.format(Locale.US, "%d 1 %s %d %.2f test\n", query2id.get(fileName),
                        hitFile.substring(0, hitFile.lastIndexOf('.')), (int) rank + 1, hits.score(y));
                // if (!hitFile.equals(fileName)) {
                rank++;
                //                    if ((queries.get(fileName).contains(hitFile) || hitFile.equals(fileName))&&(!fileName.equals(hitFile))) { // it's a hit.
                if (queries.get(fileName).contains(hitFile) || hitFile.equals(fileName)) { // it's a hit.
                    found++;
                    // TODO: Compute error rate, etc. here.
                    avgPrecision += found / rank;// * (1d/queries.get(fileName).size());
                    //                        avgPrecision += found / (rank-1);// * (1d/queries.get(fileName).size());
                    //                            if (rank<=60) System.out.print('X');
                    if (rank <= 10)
                        tmpP10++;
                } else { // nothing has been found.
                    if (rank == 1)
                        errorRate += 1d;
                    //                            if (rank<=60) System.out.print('-');
                }
            }
            // }
            //                System.out.println();
            avgPrecision /= (double) (1d + queries.get(fileName).size());
            //                avgPrecision /= (double) (queries.get(fileName).size());

            if (!(found - queries.get(fileName).size() == 1)) {
                // some of the results have not been found. We have to deal with it ...
                errorCount++;
            }

            // assertTrue(found - queries.get(fileName).size() == 0);
            map += avgPrecision;
            p10 += tmpP10;
            evalText.put(query2id.get(fileName), tmpEval);
        }
    }

    for (int i = 0; i < query2id.size(); i++) {
        fw.write(evalText.get(i + 1));
    }

    fw.close();
    errorRate = errorRate / queryCount;
    map = map / queryCount;
    p10 = p10 / (queryCount * 10d);

    double h = (System.currentTimeMillis() - start) / 3600000.0;
    double m = (h - Math.floor(h)) * 60.0;
    double s = (m - Math.floor(m)) * 60;
    String str = String.format("%s%02d:%02d", (((int) h > 0) ? String.format("%02d:", (int) h) : ""), (int) m,
            (int) s) + " ~ ";

    if (searcher.toString().contains("ImageSearcherUsingWSs"))
        str += String.format("%s%s\t%.4f\t%.4f\t%.4f\t(%s)", prefix, ((clusters > 0) ? ("\t" + clusters) : ""),
                map, p10, errorRate,
                searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1]);
    else
        str += String.format("%s%s\t%.4f\t%.4f\t%.4f", prefix, ((clusters > 0) ? ("\t" + clusters) : ""), map,
                p10, errorRate);
    if (errorCount > 0) {
        // some of the results have not been found. We have to deal with it ...
        str += "\t~~\tDid not find result ;(\t(" + errorCount + ")";
    }
    h = timeOfSearch / 3600000.0;
    m = (h - Math.floor(h)) * 60.0;
    s = (m - Math.floor(m)) * 60;
    str += " ~ TimeOfsearch: " + String.format("%s%02d:%02d",
            (((int) h > 0) ? String.format("%02d:", (int) h) : ""), (int) m, (int) s);

    System.out.println(str);
}