List of usage examples for org.apache.lucene.index IndexReader hasDeletions
public boolean hasDeletions()
From source file:net.semanticmetadata.lire.benchmarking.TestUniversal.java
License:Open Source License
private void testSearchSpeed(Class<? extends GlobalFeature> featureClass) throws IOException { ParallelIndexer parallelIndexer = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPath, testExtensive, true);// www . j av a 2 s . c om parallelIndexer.addExtractor(featureClass); parallelIndexer.run(); IndexReader reader = DirectoryReader .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPath)), IOContext.READONCE)); Bits liveDocs = MultiFields.getLiveDocs(reader); double queryCount = 0d; ImageSearcher searcher = new GenericFastImageSearcher(100, featureClass); long ms = System.currentTimeMillis(); String fileName; Document queryDoc; ImageSearchHits hits; for (int i = 0; i < reader.maxDoc(); i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. fileName = getIDfromFileName(reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); if (queries.keySet().contains(fileName)) { queryCount += 1d; // ok, we've got a query here for a document ... queryDoc = reader.document(i); hits = searcher.search(queryDoc, reader); } } ms = System.currentTimeMillis() - ms; System.out.printf("%s \t %3.1f \n", featureClass.getName().substring(featureClass.getName().lastIndexOf('.') + 1), (double) ms / queryCount); }
From source file:net.semanticmetadata.lire.benchmarking.TestWang.java
License:Open Source License
public void tttestGetDistribution() throws IOException { BufferedWriter bw = new BufferedWriter(new FileWriter("data.csv")); IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath))); // get the first document: // if (!IndexReader.indexExists(reader.directory())) // throw new FileNotFoundException("No index found at this specific location."); CEDD cedd1 = new CEDD(); FCTH fcth1 = new FCTH(); CEDD cedd2 = new CEDD(); FCTH fcth2 = new FCTH(); JCD jcd1 = new JCD(); JCD jcd2 = new JCD(); String[] cls;//from ww w .ja v a 2 s.c om // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = reader.numDocs(); for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document doc = reader.document(i); cls = doc.getValues(DocumentBuilder.FIELD_NAME_CEDD); if (cls != null && cls.length > 0) cedd1.setStringRepresentation(cls[0]); cls = doc.getValues(DocumentBuilder.FIELD_NAME_FCTH); if (cls != null && cls.length > 0) fcth1.setStringRepresentation(cls[0]); for (int j = i + 1; j < docs; j++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document doc2 = reader.document(j); cls = doc2.getValues(DocumentBuilder.FIELD_NAME_CEDD); if (cls != null && cls.length > 0) cedd2.setStringRepresentation(cls[0]); cls = doc2.getValues(DocumentBuilder.FIELD_NAME_FCTH); if (cls != null && cls.length > 0) fcth2.setStringRepresentation(cls[0]); jcd1.init(cedd1, fcth1); jcd2.init(cedd2, fcth2); bw.write(cedd1.getDistance(cedd2) + ";" + fcth1.getDistance(fcth2) + ";" + jcd1.getDistance(jcd2) + "\n"); } if (i % 100 == 0) System.out.println(i + " entries processed ... "); } bw.close(); }
From source file:net.semanticmetadata.lire.benchmarking.TestZuBuD.java
License:Open Source License
private void computeMAP(ImageSearcher searcher, String prefix, IndexReader reader, int clusters, IndexReader readerQueries) throws IOException { long start = System.currentTimeMillis(); double queryCount = 0d; double errorRate = 0; double map = 0; double p10 = 0; int errorCount = 0; // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(readerQueries); PrintWriter fw;/*from ww w . j av a 2s. c om*/ if (searcher.toString().contains("ImageSearcherUsingWSs")) { (new File("eval/" + db + "/" + prefix.replace(' ', '_') + "/" + clusters + "/")).mkdirs(); fw = new PrintWriter(new File("eval/" + db + "/" + prefix.replace(' ', '_') + "/" + clusters + "/" + prefix.replace(' ', '_') + "-" + db + clusters + searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1] + ".txt")); } else { // (new File("eval/#WithMirFlickr/" + db + "/")).mkdirs(); (new File("eval/" + db + "/")).mkdirs(); if (clusters > 0) fw = new PrintWriter( new File("eval/" + db + "/" + prefix.replace(' ', '_') + "-" + db + clusters + ".txt")); else // fw = new PrintWriter(new File("eval/#WithMirFlickr/" + db + "/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal fw = new PrintWriter( new File("eval/" + db + "/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal } Hashtable<Integer, String> evalText = new Hashtable<Integer, String>(260); for (int i = 0; i < readerQueries.maxDoc(); i++) { if (readerQueries.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. String fileName = getIDfromFileName( readerQueries.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); if (queries.keySet().contains(fileName)) { String tmpEval = ""; queryCount += 1d; // ok, we've got a query here for a document ... Document queryDoc = readerQueries.document(i); ImageSearchHits hits = searcher.search(queryDoc, reader); double rank = 0; double avgPrecision = 0; double found = 0; double tmpP10 = 0; Locale.setDefault(Locale.US); for (int y = 0; y < hits.length(); y++) { String hitFile = getIDfromFileName(reader.document(hits.documentID(y)) .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); // TODO: Sort by query ID! tmpEval += String.format(Locale.US, "%d 1 %s %d %.2f test\n", query2id.get(fileName), hitFile.substring(0, hitFile.lastIndexOf('.')), (int) rank + 1, hits.score(y)); // if (!hitFile.equals(fileName)) { rank++; // if ((queries.get(fileName).contains(hitFile) || hitFile.equals(fileName))&&(!fileName.equals(hitFile))) { // it's a hit. if (queries.get(fileName).contains(hitFile) || hitFile.equals(fileName)) { // it's a hit. found++; // TODO: Compute error rate, etc. here. avgPrecision += found / rank;// * (1d/queries.get(fileName).size()); // avgPrecision += found / (rank-1);// * (1d/queries.get(fileName).size()); // if (rank<=60) System.out.print('X'); if (rank <= 10) tmpP10++; } else { // nothing has been found. if (rank == 1) errorRate += 1d; // if (rank<=60) System.out.print('-'); } } // } // System.out.println(); // avgPrecision /= (double) (1d + queries.get(fileName).size()); // TODO: check!! avgPrecision /= (double) (queries.get(fileName).size()); if (!(found - queries.get(fileName).size() == 0)) { // some of the results have not been found. We have to deal with it ... errorCount++; } // assertTrue(found - queries.get(fileName).size() == 0); map += avgPrecision; p10 += tmpP10; evalText.put(query2id.get(fileName), tmpEval); } } for (int i = 0; i < query2id.size(); i++) { fw.write(evalText.get(i + 1)); } fw.close(); errorRate = errorRate / queryCount; map = map / queryCount; p10 = p10 / (queryCount * 10d); double h = (System.currentTimeMillis() - start) / 3600000.0; double m = (h - Math.floor(h)) * 60.0; double s = (m - Math.floor(m)) * 60; String str = String.format("%s%02d:%02d", (((int) h > 0) ? String.format("%02d:", (int) h) : ""), (int) m, (int) s) + " ~ "; if (searcher.toString().contains("ImageSearcherUsingWSs")) str += String.format("%s%s\t%.4f\t%.4f\t%.4f\t(%s)", prefix, ((clusters > 0) ? ("\t" + clusters) : ""), map, p10, errorRate, searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1]); else str += String.format("%s%s\t%.4f\t%.4f\t%.4f", prefix, ((clusters > 0) ? ("\t" + clusters) : ""), map, p10, errorRate); if (errorCount > 0) { // some of the results have not been found. We have to deal with it ... str += "\t~~\tDid not find result ;(\t(" + errorCount + ")"; } System.out.println(str); }
From source file:net.semanticmetadata.lire.benchmarking.UCIDBenchmark.java
License:Open Source License
private void computeMAP(ImageSearcher searcher, String prefix, IndexReader reader) throws IOException { double queryCount = 0d; double errorRate = 0; double map = 0; double p10 = 0; // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); PrintWriter fw = new PrintWriter(new File("eval/" + prefix.replace(' ', '_') + "-eval.txt")); Hashtable<Integer, String> evalText = new Hashtable<Integer, String>(260); for (int i = 0; i < reader.maxDoc(); i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. String fileName = getIDfromFileName( reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); if (queries.keySet().contains(fileName)) { String tmpEval = ""; queryCount += 1d;/*from www . j a v a 2 s.com*/ // ok, we've got a query here for a document ... Document queryDoc = reader.document(i); ImageSearchHits hits = searcher.search(queryDoc, reader); double rank = 0; double avgPrecision = 0; double found = 0; double tmpP10 = 0; Locale.setDefault(Locale.US); for (int y = 0; y < hits.length(); y++) { String hitFile = getIDfromFileName( hits.doc(y).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); // TODO: Sort by query ID! tmpEval += String.format(Locale.US, "%d 1 %s %d %.2f test\n", query2id.get(fileName), hitFile.substring(0, hitFile.lastIndexOf('.')), (int) rank + 1, 100 - hits.score(y)); // if (!hitFile.equals(fileName)) { rank++; if (queries.get(fileName).contains(hitFile) || hitFile.equals(fileName)) { // it's a hit. found++; // TODO: Compute error rate, etc. here. avgPrecision += found / rank;// * (1d/queries.get(fileName).size()); // if (rank<=60) System.out.print('X'); if (rank <= 10) tmpP10++; } else { // nothing has been found. if (rank == 1) errorRate += 1d; // if (rank<=60) System.out.print('-'); } } // } // System.out.println(); if (found - queries.get(fileName).size() == 1) avgPrecision /= (double) (1d + queries.get(fileName).size()); else { // some of the results have not been found. We have to deal with it ... System.err.println("Did not find result ;("); } // assertTrue(found - queries.get(fileName).size() == 0); map += avgPrecision; p10 += tmpP10; evalText.put(query2id.get(fileName), tmpEval); } } for (int i = 0; i < query2id.size(); i++) { fw.write(evalText.get(i + 1)); } fw.close(); errorRate = errorRate / queryCount; map = map / queryCount; p10 = p10 / (queryCount * 10d); System.out.print(prefix); System.out.format("\t%.5f\t%.5f\t%.5f\n", map, p10, errorRate); }
From source file:net.semanticmetadata.lire.impl.CEDDImageSearcher.java
License:Open Source License
public ImageDuplicates findDuplicates(IndexReader reader) throws IOException { // get the first document: SimpleImageDuplicates simpleImageDuplicates = null; try {/*from w w w. ja va2s . c om*/ if (!IndexReader.indexExists(reader.directory())) throw new FileNotFoundException("No index found at this specific location."); Document doc = reader.document(0); CEDD lireFeature = (CEDD) descriptorClass.newInstance(); byte[] cls = doc.getBinaryValue(fieldName); if (cls != null && cls.length > 0) lireFeature.setByteArrayRepresentation(cls); HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>(); // find duplicates ... boolean hasDeletions = reader.hasDeletions(); int docs = reader.numDocs(); int numDuplicates = 0; for (int i = 0; i < docs; i++) { if (hasDeletions && reader.isDeleted(i)) { continue; } Document d = reader.document(i); float distance = getDistance(d, lireFeature); if (!duplicates.containsKey(distance)) { duplicates.put(distance, new LinkedList<String>()); } else { numDuplicates++; } duplicates.get(distance).add(d.getFieldable(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } if (numDuplicates == 0) return null; LinkedList<List<String>> results = new LinkedList<List<String>>(); for (float f : duplicates.keySet()) { if (duplicates.get(f).size() > 1) { results.add(duplicates.get(f)); } } simpleImageDuplicates = new SimpleImageDuplicates(results); } catch (InstantiationException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); } catch (IllegalAccessException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); } return simpleImageDuplicates; }
From source file:net.semanticmetadata.lire.impl.ColorLayoutImageSearcher.java
License:Open Source License
public ImageDuplicates findDuplicates(IndexReader reader) throws IOException { // get the first document: SimpleImageDuplicates simpleImageDuplicates = null; try {/*from www. j a v a 2 s . c o m*/ if (!IndexReader.indexExists(reader.directory())) throw new FileNotFoundException("No index found at this specific location."); Document doc = reader.document(0); ColorLayout lireFeature = (ColorLayout) descriptorClass.newInstance(); byte[] cls = doc.getBinaryValue(fieldName); if (cls != null && cls.length > 0) lireFeature.setByteArrayRepresentation(cls); HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>(); // find duplicates ... boolean hasDeletions = reader.hasDeletions(); int docs = reader.numDocs(); int numDuplicates = 0; for (int i = 0; i < docs; i++) { if (hasDeletions && reader.isDeleted(i)) { continue; } Document d = reader.document(i); float distance = getDistance(d, lireFeature); if (!duplicates.containsKey(distance)) { duplicates.put(distance, new LinkedList<String>()); } else { numDuplicates++; } duplicates.get(distance).add(d.getFieldable(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } if (numDuplicates == 0) return null; LinkedList<List<String>> results = new LinkedList<List<String>>(); for (float f : duplicates.keySet()) { if (duplicates.get(f).size() > 1) { results.add(duplicates.get(f)); } } simpleImageDuplicates = new SimpleImageDuplicates(results); } catch (InstantiationException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); } catch (IllegalAccessException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); } return simpleImageDuplicates; }
From source file:net.semanticmetadata.lire.impl.custom.SingleNddCeddImageSearcher.java
License:Open Source License
protected void init(IndexReader reader) { this.reader = reader; if (reader.hasDeletions()) { throw new UnsupportedOperationException( "The index has to be optimized first to be cached! Use IndexWriter.forceMerge(0) to do this."); }//from www . j av a 2s . co m docs = new TreeSet<SimpleResult>(); try { this.cachedInstance = (LireFeature) this.descriptorClass.newInstance(); if (fieldName == null) fieldName = this.cachedInstance.getFieldName(); } catch (InstantiationException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher (" + descriptorClass.getName() + "): " + e.getMessage()); } catch (IllegalAccessException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher (" + descriptorClass.getName() + "): " + e.getMessage()); } // put all respective features into an in-memory cache ... if (isCaching && reader != null) { int docs = reader.numDocs(); featureCache = new ArrayList<double[]>(docs); try { Document d; for (int i = 0; i < docs; i++) { d = reader.document(i); cachedInstance.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes, d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length); // normalize features,o we can use L1 if (!halfDimensions) { featureCache.add(normalize(cachedInstance.getDoubleHistogram())); } else { featureCache.add(crunch(cachedInstance.getDoubleHistogram())); } } } catch (IOException e) { e.printStackTrace(); } } }
From source file:net.semanticmetadata.lire.impl.FastOpponentImageSearcher.java
License:Open Source License
/** * @param reader/* w w w . j a v a 2 s . co m*/ * @param lireFeature * @return the maximum distance found for normalizing. * @throws java.io.IOException */ protected double findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException { maxDistance = -1f; // clear result set ... docs.clear(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); Document d; double tmpDistance; int docs = reader.numDocs(); byte[] histogram = lireFeature.getByteArrayRepresentation(); for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. d = reader.document(i); tmpDistance = getDistance(d, histogram); assert (tmpDistance >= 0); // calculate the overall max distance to normalize score afterwards // if (overallMaxDistance < tmpDistance) { // overallMaxDistance = tmpDistance; // } // if it is the first document: if (maxDistance < 0) { maxDistance = tmpDistance; } // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult((float) tmpDistance, d, i)); if (tmpDistance > maxDistance) maxDistance = tmpDistance; } else if (tmpDistance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult((float) tmpDistance, d, i)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } return maxDistance; }
From source file:net.semanticmetadata.lire.impl.GenericFastImageSearcher.java
License:Open Source License
/** * @param reader//ww w. j ava2s.c o m * @param lireFeature * @return the maximum distance found for normalizing. * @throws java.io.IOException */ protected float findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException { maxDistance = Float.MAX_VALUE; // overallMaxDistance = -1f; // clear result set ... docs.clear(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); Document d; float tmpDistance; int docs = reader.numDocs(); if (!isCaching) { // we read each and every document from the index and then we compare it to the query. for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. d = reader.document(i); tmpDistance = getDistance(d, lireFeature); assert (tmpDistance >= 0); // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(tmpDistance, d, i)); if (tmpDistance > maxDistance) maxDistance = tmpDistance; } else if (tmpDistance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult(tmpDistance, d, i)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } } else { // we use the in-memory cache to find the matching docs from the index. int count = 0; for (Iterator<byte[]> iterator = featureCache.iterator(); iterator.hasNext();) { cachedInstance.setByteArrayRepresentation(iterator.next()); tmpDistance = lireFeature.getDistance(cachedInstance); assert (tmpDistance >= 0); // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count)); if (tmpDistance > maxDistance) maxDistance = tmpDistance; } else if (tmpDistance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } count++; } } return maxDistance; }
From source file:net.semanticmetadata.lire.impl.GenericFastImageSearcher.java
License:Open Source License
public ImageDuplicates findDuplicates(IndexReader reader) throws IOException { // get the first document: SimpleImageDuplicates simpleImageDuplicates = null; try {//from w w w . j a va 2s. c o m // if (!IndexReader.indexExists(reader.directory())) // throw new FileNotFoundException("No index found at this specific location."); Document doc = reader.document(0); LireFeature lireFeature = (LireFeature) descriptorClass.newInstance(); if (doc.getField(fieldName).binaryValue() != null && doc.getField(fieldName).binaryValue().length > 0) lireFeature.setByteArrayRepresentation(doc.getField(fieldName).binaryValue().bytes, doc.getField(fieldName).binaryValue().offset, doc.getField(fieldName).binaryValue().length); HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = reader.numDocs(); int numDuplicates = 0; for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document d = reader.document(i); float distance = getDistance(d, lireFeature); if (!duplicates.containsKey(distance)) { duplicates.put(distance, new LinkedList<String>()); } else { numDuplicates++; } duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } if (numDuplicates == 0) return null; LinkedList<List<String>> results = new LinkedList<List<String>>(); for (float f : duplicates.keySet()) { if (duplicates.get(f).size() > 1) { results.add(duplicates.get(f)); } } simpleImageDuplicates = new SimpleImageDuplicates(results); } catch (InstantiationException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); } catch (IllegalAccessException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); } return simpleImageDuplicates; }