List of usage examples for org.apache.lucene.index IndexReader hasDeletions
public boolean hasDeletions()
From source file:com.esri.gpt.catalog.lucene.AclFilter.java
License:Apache License
/** * Queries for documents that have no values associated with the field. * @param reader the index reader//w w w . j a va 2 s . c o m * @return the OpenBitSet (documents with no values set to true) * @throws IOException if an exception is encountered while reading the index */ private OpenBitSet queryNulls(IndexReader reader, String field) throws IOException { int nBits = reader.maxDoc(); OpenBitSet bitSet = new OpenBitSet(nBits); TermEnum termEnum = null; TermDocs termDocs = null; if ((field != null) && (field.trim().length() > 0)) { try { // find all documents that have a term for the field, then flip the bit set termEnum = reader.terms(new Term(field)); termDocs = reader.termDocs(); do { Term term = termEnum.term(); if ((term != null) && term.field().equals(field)) { termDocs.seek(term); while (termDocs.next()) { bitSet.fastSet(termDocs.doc()); } } } while (termEnum.next()); bitSet.flip(0, nBits); if (reader.hasDeletions()) { for (int i = 0; i < nBits; i++) { if (bitSet.get(i) && reader.isDeleted(i)) { bitSet.fastFlip(i); } } } } finally { try { if (termEnum != null) termEnum.close(); } catch (Exception ef) { } try { if (termDocs != null) termDocs.close(); } catch (Exception ef) { } } } return bitSet; }
From source file:com.jaeksoft.searchlib.index.IndexStatistics.java
License:Open Source License
protected IndexStatistics(IndexReader indexReader) { maxDoc = indexReader.maxDoc();//from w ww. j a v a 2 s. c o m numDocs = indexReader.numDocs(); numDeletedDocs = indexReader.numDeletedDocs(); hasDeletions = indexReader.hasDeletions(); isOptimized = indexReader.isOptimized(); }
From source file:com.zimbra.cs.rmgmt.RemoteMailQueue.java
License:Open Source License
private void summarize(SearchResult result, IndexReader indexReader) throws IOException { TermEnum terms = indexReader.terms(); boolean hasDeletions = indexReader.hasDeletions(); do {/*from w ww . j av a2s .c om*/ Term term = terms.term(); if (term != null) { String field = term.field(); if (field != null && field.length() > 0) { QueueAttr attr = QueueAttr.valueOf(field); if (attr == QueueAttr.addr || attr == QueueAttr.host || attr == QueueAttr.from || attr == QueueAttr.to || attr == QueueAttr.fromdomain || attr == QueueAttr.todomain || attr == QueueAttr.reason || attr == QueueAttr.received) { List<SummaryItem> list = result.sitems.get(attr); if (list == null) { list = new LinkedList<SummaryItem>(); result.sitems.put(attr, list); } int count = 0; if (hasDeletions) { TermDocs termDocs = indexReader.termDocs(term); while (termDocs.next()) { if (!indexReader.isDeleted(termDocs.doc())) { count++; } } } else { count = terms.docFreq(); } if (count > 0) { list.add(new SummaryItem(term.text(), count)); } } } } } while (terms.next()); }
From source file:intelligentWebAlgorithms.algos.search.ranking.DocRankMatrixBuilder.java
License:Apache License
private List<Integer> getProcessedDocs(IndexReader idxR) throws IOException { List<Integer> docs = new ArrayList<Integer>(); for (int i = 0, n = idxR.maxDoc(); i < n; i++) { if (idxR.hasDeletions() == false) { Document doc = idxR.document(i); if (eligibleForDocRank(doc.get("doctype"))) { docs.add(i);// ww w .j a va 2 s .c o m } } } return docs; }
From source file:net.conquiris.lucene.GuavaCachingFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { final Object key = reader.hasDeletions() ? reader.getDeletesCacheKey() : reader.getCoreCacheKey(); try {/*from w w w . j a v a 2 s. c om*/ return cache.get(key, new Loader(reader)); } catch (Throwable t) { Throwable cause = t.getCause(); if (cause instanceof IOException) { throw (IOException) cause; } if (cause instanceof RuntimeException) { throw (RuntimeException) cause; } if (cause instanceof Error) { throw (Error) cause; } if (t instanceof UncheckedExecutionException) { throw (UncheckedExecutionException) t; } throw new UncheckedExecutionException(cause); } }
From source file:net.semanticmetadata.lire.benchmarking.TestNister.java
License:Open Source License
public void computePrecision(String pathName, Similarity similarity, String label) throws IOException { // ImageSearcher vis = new GenericImageSearcher(4, SimpleFeature.class, "featureSURFHistogram"); // ImageSearcher vis = new GenericFastImageSearcher(4, CEDD.class, DocumentBuilder.FIELD_NAME_CEDD); // VisualWordsImageSearcher vis = new VisualWordsImageSearcher(4, similarity, DocumentBuilder.FIELD_NAME_SIFT_VISUAL_WORDS); VisualWordsImageSearcher vis = new VisualWordsImageSearcher(4, similarity, DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(pathName))); int queryID, resultID; int countSearches = 0, countTruePositives = 0; float avgPrecision = 0f; Set<Integer> test = StatsUtils.drawSample(100, 10200); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i : test) { // for (int j = 0; j < tests.length; j++) { // int i = tests[j]; // for (int i =0; i < 1000; i++) { // for (int i =0; i < reader.numDocs(); i++) { if (!((reader.hasDeletions() && !liveDocs.get(i)))) { ImageSearchHits hits = vis.search(reader.document(i), reader); String s = reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; s = s.replaceAll("\\D", ""); queryID = Integer.parseInt(s); countTruePositives = 0;//from w ww.j a v a2 s . c om for (int k = 0; k < hits.length(); k++) { String name = hits.doc(k).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; name = name.replaceAll("\\D", ""); resultID = Integer.parseInt(name); if (queryID / 4 == resultID / 4) { //System.out.print("X"); countTruePositives++; } //else System.out.print("O"); } countSearches++; avgPrecision += (float) countTruePositives / 4f; // progress: // if (countSearches%100==0) System.out.print('.'); // if (countSearches%1000==0) System.out.print(':'); //System.out.println(); } } avgPrecision = avgPrecision / (float) countSearches; FileWriter fw = new FileWriter(new File("precision_results.txt"), true); System.out.println(label + " p@4= " + avgPrecision); fw.write(label + " p@4= " + avgPrecision + "\n"); fw.close(); }
From source file:net.semanticmetadata.lire.benchmarking.TestSimple.java
License:Open Source License
private void doSearch(ImageSearcher searcher, String prefix, IndexReader reader) throws IOException { // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); String fileName, fullFileName; Document queryDoc;//from w ww . j av a 2 s . c o m ImageSearchHits hits; for (int i = 0; i < reader.maxDoc(); i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. fullFileName = reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; fileName = getIDfromFileName(fullFileName); if (allQueries.contains(fileName)) { // ok, we've got a query here for a document ... queryDoc = reader.document(i); hits = searcher.search(queryDoc, reader); FileUtils.browseUri(FileUtils.saveImageResultsToHtml(prefix + "-" + fileName, hits, fullFileName)); } } for (int i = 0; i < outsideQueries.size(); i++) { fullFileName = outsideQueries.get(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; fileName = getIDfromFileName(fullFileName); if (allQueries.contains(fileName)) { // ok, we've got a query here for a document ... queryDoc = outsideQueries.get(i); hits = searcher.search(queryDoc, reader); FileUtils.browseUri(FileUtils.saveImageResultsToHtml(prefix + "-" + fileName, hits, fullFileName)); } } }
From source file:net.semanticmetadata.lire.benchmarking.TestUCID.java
License:Open Source License
private void computeMAP(ImageSearcher searcher, String prefix, IndexReader reader) throws IOException { double queryCount = 0d; double errorRate = 0; double map = 0; double p10 = 0; int errorCount = 0; // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); PrintWriter fw;/*from w w w.java 2s . c om*/ if (searcher.toString().contains("ImageSearcherUsingWSs")) { (new File("eval/" + prefix.replace(' ', '_') + "/" + clusters + "/")).mkdirs(); fw = new PrintWriter(new File("eval/" + prefix.replace(' ', '_') + "/" + clusters + "/" + prefix.replace(' ', '_') + "-" + db + clusters + searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1] + ".txt")); } else fw = new PrintWriter(new File("eval/" + prefix.replace(' ', '_') + "-" + db + clusters + ".txt")); // fw = new PrintWriter(new File("eval/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal Hashtable<Integer, String> evalText = new Hashtable<Integer, String>(260); for (int i = 0; i < reader.maxDoc(); i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. String fileName = getIDfromFileName( reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); if (queries.keySet().contains(fileName)) { String tmpEval = ""; queryCount += 1d; // ok, we've got a query here for a document ... Document queryDoc = reader.document(i); ImageSearchHits hits = searcher.search(queryDoc, reader); double rank = 0; double avgPrecision = 0; double found = 0; double tmpP10 = 0; Locale.setDefault(Locale.US); for (int y = 0; y < hits.length(); y++) { String hitFile = getIDfromFileName( hits.doc(y).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); // TODO: Sort by query ID! tmpEval += String.format(Locale.US, "%d 1 %s %d %.2f test\n", query2id.get(fileName), hitFile.substring(0, hitFile.lastIndexOf('.')), (int) rank + 1, hits.score(y)); // if (!hitFile.equals(fileName)) { rank++; // if ((queries.get(fileName).contains(hitFile) || hitFile.equals(fileName))&&(!fileName.equals(hitFile))) { // it's a hit. if (queries.get(fileName).contains(hitFile) || hitFile.equals(fileName)) { // it's a hit. found++; // TODO: Compute error rate, etc. here. avgPrecision += found / rank;// * (1d/queries.get(fileName).size()); // avgPrecision += found / (rank-1);// * (1d/queries.get(fileName).size()); // if (rank<=60) System.out.print('X'); if (rank <= 10) tmpP10++; } else { // nothing has been found. if (rank == 1) errorRate += 1d; // if (rank<=60) System.out.print('-'); } } // } // System.out.println(); avgPrecision /= (double) (1d + queries.get(fileName).size()); // avgPrecision /= (double) (queries.get(fileName).size()); if (!(found - queries.get(fileName).size() == 1)) { // some of the results have not been found. We have to deal with it ... errorCount++; } // assertTrue(found - queries.get(fileName).size() == 0); map += avgPrecision; p10 += tmpP10; evalText.put(query2id.get(fileName), tmpEval); } } for (int i = 0; i < query2id.size(); i++) { fw.write(evalText.get(i + 1)); } fw.close(); errorRate = errorRate / queryCount; map = map / queryCount; p10 = p10 / (queryCount * 10d); // System.out.print(prefix); String s; if (searcher.toString().contains("ImageSearcherUsingWSs")) s = String.format("%s\t%.4f\t%.4f\t%.4f\t(%s)", prefix, map, p10, errorRate, searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1]); else s = String.format("%s\t%.4f\t%.4f\t%.4f", prefix, map, p10, errorRate); if (errorCount > 0) { // some of the results have not been found. We have to deal with it ... //System.err.println("Did not find result ;( (" + errorCount + ")"); s += "\t~~\tDid not find result ;(\t(" + errorCount + ")"; } System.out.println(s); }
From source file:net.semanticmetadata.lire.benchmarking.TestUCID.java
License:Open Source License
private void testSearchSpeed(ArrayList<String> images, final Class featureClass) throws IOException { parallelIndexer = new ParallelIndexer(8, indexPath, testExtensive, true) { @Override/*from w ww . j a v a2s . com*/ public void addBuilders(ChainedDocumentBuilder builder) { builder.addBuilder(new GenericDocumentBuilder(featureClass, "feature")); } }; parallelIndexer.run(); IndexReader reader = DirectoryReader .open(new RAMDirectory(FSDirectory.open(new File(indexPath)), IOContext.READONCE)); Bits liveDocs = MultiFields.getLiveDocs(reader); double queryCount = 0d; ImageSearcher searcher = new GenericFastImageSearcher(100, featureClass, "feature"); long ms = System.currentTimeMillis(); for (int i = 0; i < reader.maxDoc(); i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. String fileName = getIDfromFileName( reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); if (queries.keySet().contains(fileName)) { queryCount += 1d; // ok, we've got a query here for a document ... Document queryDoc = reader.document(i); ImageSearchHits hits = searcher.search(queryDoc, reader); } } ms = System.currentTimeMillis() - ms; System.out.printf("%s \t %3.1f \n", featureClass.getName().substring(featureClass.getName().lastIndexOf('.') + 1), (double) ms / queryCount); }
From source file:net.semanticmetadata.lire.benchmarking.TestUniversal.java
License:Open Source License
private void computeMAP(ImageSearcher searcher, String prefix, IndexReader reader, int clusters) throws IOException { long start = System.currentTimeMillis(); long timeOfSearch = 0, ms; double queryCount = 0d; double errorRate = 0; double map = 0; double p10 = 0; int errorCount = 0; // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); PrintWriter fw;/*from ww w . j ava2 s . co m*/ if (searcher.toString().contains("ImageSearcherUsingWSs")) { (new File("eval/" + db + "/" + prefix.replace(' ', '_') + "/" + clusters + "/")).mkdirs(); fw = new PrintWriter(new File("eval/" + db + "/" + prefix.replace(' ', '_') + "/" + clusters + "/" + prefix.replace(' ', '_') + "-" + db + clusters + searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1] + ".txt")); } else { // (new File("eval/#WithMirFlickr/" + db + "/")).mkdirs(); (new File("eval/" + db + "/")).mkdirs(); if (clusters > 0) fw = new PrintWriter( new File("eval/" + db + "/" + prefix.replace(' ', '_') + "-" + db + clusters + ".txt")); else // fw = new PrintWriter(new File("eval/#WithMirFlickr/" + db + "/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal fw = new PrintWriter( new File("eval/" + db + "/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal } Hashtable<Integer, String> evalText = new Hashtable<Integer, String>(260); for (int i = 0; i < reader.maxDoc(); i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. String fileName = getIDfromFileName( reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); if (queries.keySet().contains(fileName)) { String tmpEval = ""; queryCount += 1d; // ok, we've got a query here for a document ... Document queryDoc = reader.document(i); ms = System.currentTimeMillis(); ImageSearchHits hits = searcher.search(queryDoc, reader); timeOfSearch += System.currentTimeMillis() - ms; double rank = 0; double avgPrecision = 0; double found = 0; double tmpP10 = 0; Locale.setDefault(Locale.US); for (int y = 0; y < hits.length(); y++) { // String hitFile = getIDfromFileName(hits.doc(y).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); String hitFile = getIDfromFileName(reader.document(hits.documentID(y)) .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); // String hitFile = getIDfromFileName(hits.path(y)); // TODO: Sort by query ID! tmpEval += String.format(Locale.US, "%d 1 %s %d %.2f test\n", query2id.get(fileName), hitFile.substring(0, hitFile.lastIndexOf('.')), (int) rank + 1, hits.score(y)); // if (!hitFile.equals(fileName)) { rank++; // if ((queries.get(fileName).contains(hitFile) || hitFile.equals(fileName))&&(!fileName.equals(hitFile))) { // it's a hit. if (queries.get(fileName).contains(hitFile) || hitFile.equals(fileName)) { // it's a hit. found++; // TODO: Compute error rate, etc. here. avgPrecision += found / rank;// * (1d/queries.get(fileName).size()); // avgPrecision += found / (rank-1);// * (1d/queries.get(fileName).size()); // if (rank<=60) System.out.print('X'); if (rank <= 10) tmpP10++; } else { // nothing has been found. if (rank == 1) errorRate += 1d; // if (rank<=60) System.out.print('-'); } } // } // System.out.println(); avgPrecision /= (double) (1d + queries.get(fileName).size()); // avgPrecision /= (double) (queries.get(fileName).size()); if (!(found - queries.get(fileName).size() == 1)) { // some of the results have not been found. We have to deal with it ... errorCount++; } // assertTrue(found - queries.get(fileName).size() == 0); map += avgPrecision; p10 += tmpP10; evalText.put(query2id.get(fileName), tmpEval); } } for (int i = 0; i < query2id.size(); i++) { fw.write(evalText.get(i + 1)); } fw.close(); errorRate = errorRate / queryCount; map = map / queryCount; p10 = p10 / (queryCount * 10d); double h = (System.currentTimeMillis() - start) / 3600000.0; double m = (h - Math.floor(h)) * 60.0; double s = (m - Math.floor(m)) * 60; String str = String.format("%s%02d:%02d", (((int) h > 0) ? String.format("%02d:", (int) h) : ""), (int) m, (int) s) + " ~ "; if (searcher.toString().contains("ImageSearcherUsingWSs")) str += String.format("%s%s\t%.4f\t%.4f\t%.4f\t(%s)", prefix, ((clusters > 0) ? ("\t" + clusters) : ""), map, p10, errorRate, searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1]); else str += String.format("%s%s\t%.4f\t%.4f\t%.4f", prefix, ((clusters > 0) ? ("\t" + clusters) : ""), map, p10, errorRate); if (errorCount > 0) { // some of the results have not been found. We have to deal with it ... str += "\t~~\tDid not find result ;(\t(" + errorCount + ")"; } h = timeOfSearch / 3600000.0; m = (h - Math.floor(h)) * 60.0; s = (m - Math.floor(m)) * 60; str += " ~ TimeOfsearch: " + String.format("%s%02d:%02d", (((int) h > 0) ? String.format("%02d:", (int) h) : ""), (int) m, (int) s); System.out.println(str); }