List of usage examples for org.apache.lucene.index IndexReader hasDeletions
public boolean hasDeletions()
From source file:net.semanticmetadata.lire.searchers.custom.SingleNddCeddImageSearcher.java
License:Open Source License
protected void init(IndexReader reader) { this.reader = reader; if (reader.hasDeletions()) { throw new UnsupportedOperationException( "The index has to be optimized first to be cached! Use IndexWriter.forceMerge(0) to do this."); }/*from w ww . j a v a 2 s .c om*/ docs = new TreeSet<SimpleResult>(); try { this.cachedInstance = (GlobalFeature) this.descriptorClass.newInstance(); if (fieldName == null) fieldName = this.cachedInstance.getFieldName(); } catch (InstantiationException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher (" + descriptorClass.getName() + "): " + e.getMessage()); } catch (IllegalAccessException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher (" + descriptorClass.getName() + "): " + e.getMessage()); } // put all respective features into an in-memory cache ... if (isCaching && reader != null) { int docs = reader.numDocs(); featureCache = new ArrayList<double[]>(docs); try { Document d; for (int i = 0; i < docs; i++) { d = reader.document(i); cachedInstance.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes, d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length); // normalize features,o we can use L1 if (!halfDimensions) { featureCache.add(normalize(cachedInstance.getFeatureVector())); } else { featureCache.add(crunch(cachedInstance.getFeatureVector())); } } } catch (IOException e) { e.printStackTrace(); } } }
From source file:net.semanticmetadata.lire.searchers.custom.TopDocsImageSearcher.java
License:Open Source License
/** * @param results//from w w w . j av a 2s .c o m * @param reader * @param globalFeature * @return the maximum distance found for normalizing. * @throws java.io.IOException */ protected double findSimilar(TopDocs results, IndexReader reader, GlobalFeature globalFeature) throws IOException { double maxDistance = -1d, overallMaxDistance = -1d; boolean hasDeletions = reader.hasDeletions(); // clear result set ... docs.clear(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = results.totalHits; for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document d = reader.document(results.scoreDocs[i].doc); double distance = getDistance(d, globalFeature); assert (distance >= 0); // calculate the overall max distance to normalize score afterwards if (overallMaxDistance < distance) { overallMaxDistance = distance; } // if it is the first document: if (maxDistance < 0) { maxDistance = distance; } // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(distance, results.scoreDocs[i].doc)); if (distance > maxDistance) maxDistance = distance; } else if (distance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult(distance, results.scoreDocs[i].doc)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } return maxDistance; }
From source file:net.semanticmetadata.lire.searchers.FastOpponentImageSearcher.java
License:Open Source License
/** * @param reader/*from w w w .j ava 2s .co m*/ * @param globalFeature * @return the maximum distance found for normalizing. * @throws java.io.IOException */ protected double findSimilar(IndexReader reader, GlobalFeature globalFeature) throws IOException { maxDistance = -1f; // clear result set ... docs.clear(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); Document d; double tmpDistance; int docs = reader.numDocs(); byte[] histogram = globalFeature.getByteArrayRepresentation(); for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. d = reader.document(i); tmpDistance = getDistance(d, histogram); assert (tmpDistance >= 0); // calculate the overall max distance to normalize score afterwards // if (overallMaxDistance < tmpDistance) { // overallMaxDistance = tmpDistance; // } // if it is the first document: if (maxDistance < 0) { maxDistance = tmpDistance; } // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(tmpDistance, i)); if (tmpDistance > maxDistance) maxDistance = tmpDistance; } else if (tmpDistance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult(tmpDistance, i)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } return maxDistance; }
From source file:net.semanticmetadata.lire.searchers.forevaluations.GenericFastImageSearcherForEvaluation.java
License:Open Source License
/** * @param reader//from ww w.j av a2s .c o m * @param lireFeature * @return the maximum distance found for normalizing. * @throws IOException */ protected double findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException { maxDistance = -1d; // clear result set ... docs.clear(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); Document d; double tmpDistance; int docs = reader.numDocs(); if (!isCaching) { // we read each and every document from the index and then we compare it to the query. for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. d = reader.document(i); tmpDistance = getDistance(d, lireFeature); assert (tmpDistance >= 0); // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResultForEvaluation(tmpDistance, i, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])); if (tmpDistance > maxDistance) maxDistance = tmpDistance; } else if (tmpDistance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResultForEvaluation(tmpDistance, i, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } } else { LinkedList<Consumer> tasks = new LinkedList<Consumer>(); LinkedList<Thread> threads = new LinkedList<Thread>(); Consumer consumer; Thread thread; Thread p = new Thread(new Producer()); p.start(); for (int i = 0; i < numThreads; i++) { consumer = new Consumer(lireFeature); thread = new Thread(consumer); thread.start(); tasks.add(consumer); threads.add(thread); } for (Thread next : threads) { try { next.join(); } catch (InterruptedException e) { e.printStackTrace(); } } TreeSet<SimpleResultForEvaluation> tmpDocs; boolean flag; SimpleResultForEvaluation simpleResult; for (Consumer task : tasks) { tmpDocs = task.getResult(); flag = true; while (flag && (tmpDocs.size() > 0)) { simpleResult = tmpDocs.pollFirst(); if (this.docs.size() < maxHits) { this.docs.add(simpleResult); if (simpleResult.getDistance() > maxDistance) maxDistance = simpleResult.getDistance(); } else if (simpleResult.getDistance() < maxDistance) { // this.docs.remove(this.docs.last()); this.docs.pollLast(); this.docs.add(simpleResult); maxDistance = this.docs.last().getDistance(); } else flag = false; } } } return maxDistance; }
From source file:net.semanticmetadata.lire.searchers.forevaluations.GenericFastImageSearcherForEvaluation.java
License:Open Source License
public ImageDuplicates findDuplicates(IndexReader reader) throws IOException { // get the first document: SimpleImageDuplicates simpleImageDuplicates = null; // try { // if (!IndexReader.indexExists(reader.directory())) // throw new FileNotFoundException("No index found at this specific location."); Document doc = reader.document(0); LireFeature lireFeature = extractorItem.getFeatureInstance(); if (doc.getField(fieldName).binaryValue() != null && doc.getField(fieldName).binaryValue().length > 0) lireFeature.setByteArrayRepresentation(doc.getField(fieldName).binaryValue().bytes, doc.getField(fieldName).binaryValue().offset, doc.getField(fieldName).binaryValue().length); HashMap<Double, List<String>> duplicates = new HashMap<Double, List<String>>(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = reader.numDocs(); int numDuplicates = 0; for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document d = reader.document(i); double distance = getDistance(d, lireFeature); if (!duplicates.containsKey(distance)) { duplicates.put(distance, new LinkedList<String>()); } else {/*from ww w . j a v a 2 s .c o m*/ numDuplicates++; } duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } if (numDuplicates == 0) return null; LinkedList<List<String>> results = new LinkedList<List<String>>(); for (double d : duplicates.keySet()) { if (duplicates.get(d).size() > 1) { results.add(duplicates.get(d)); } } simpleImageDuplicates = new SimpleImageDuplicates(results); // } catch (InstantiationException e) { // logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); // } catch (IllegalAccessException e) { // logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); // } return simpleImageDuplicates; }
From source file:net.semanticmetadata.lire.searchers.GenericFastImageSearcher.java
License:Open Source License
/** * @param reader// w ww. ja v a 2s. c om * @param lireFeature * @return the maximum distance found for normalizing. * @throws IOException */ protected double findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException { maxDistance = -1d; // clear result set ... docs.clear(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); Document d; double tmpDistance; int docs = reader.numDocs(); if (!isCaching) { // we read each and every document from the index and then we compare it to the query. for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. d = reader.document(i); tmpDistance = getDistance(d, lireFeature); assert (tmpDistance >= 0); // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(tmpDistance, i)); if (tmpDistance > maxDistance) maxDistance = tmpDistance; } else if (tmpDistance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult(tmpDistance, i)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } } else { LinkedList<Consumer> tasks = new LinkedList<Consumer>(); LinkedList<Thread> threads = new LinkedList<Thread>(); Consumer consumer; Thread thread; Thread p = new Thread(new Producer()); p.start(); for (int i = 0; i < numThreads; i++) { consumer = new Consumer(lireFeature); thread = new Thread(consumer); thread.start(); tasks.add(consumer); threads.add(thread); } for (Thread next : threads) { try { next.join(); } catch (InterruptedException e) { e.printStackTrace(); } } TreeSet<SimpleResult> tmpDocs; boolean flag; SimpleResult simpleResult; for (Consumer task : tasks) { tmpDocs = task.getResult(); flag = true; while (flag && (tmpDocs.size() > 0)) { simpleResult = tmpDocs.pollFirst(); if (this.docs.size() < maxHits) { this.docs.add(simpleResult); if (simpleResult.getDistance() > maxDistance) maxDistance = simpleResult.getDistance(); } else if (simpleResult.getDistance() < maxDistance) { // this.docs.remove(this.docs.last()); this.docs.pollLast(); this.docs.add(simpleResult); maxDistance = this.docs.last().getDistance(); } else flag = false; } } } return maxDistance; }
From source file:net.semanticmetadata.lire.searchers.TestSearching.java
License:Open Source License
public void testSeparateIndex() throws IOException, IllegalAccessException, InstantiationException { Cluster[] cvsurf512 = Cluster.readClusters(codebookPath + "CvSURF512"); Cluster[] simpleceddcvsurf512 = Cluster.readClusters(codebookPath + "SIMPLEdetCVSURFCEDD512"); ParallelIndexer parallelIndexer = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPath, testExtensiveRed, numOfClusters, numOfDocsForVocabulary, aggregatorClass); parallelIndexer.addExtractor(globalFeatureClass); parallelIndexer.addExtractor(localFeatureClass, cvsurf512); parallelIndexer.addExtractor(globalFeatureClass, keypointDetector, simpleceddcvsurf512); parallelIndexer.run();//w w w . j a v a2s .co m ParallelIndexer parallelIndexerSeparate = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPathSeparate, testExtensiveBlack, indexPath); parallelIndexerSeparate.run(); IndexReader readerIndex = DirectoryReader .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPath)), IOContext.READONCE)); System.out.println("Documents in the reader: " + readerIndex.maxDoc()); IndexReader readerQueries = DirectoryReader .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPathSeparate)), IOContext.READONCE)); System.out.println("Documents in the reader: " + readerQueries.maxDoc()); GenericFastImageSearcher ceddSearcher = new GenericFastImageSearcher(5, globalFeatureClass, true, readerIndex); GenericFastImageSearcher cvsurfsearcher = new GenericFastImageSearcher(5, localFeatureClass, aggregatorClass.newInstance(), 512, true, readerIndex, indexPath + ".config"); GenericFastImageSearcher simpleceddcvsurfsearcher = new GenericFastImageSearcher(5, globalFeatureClass, keypointDetector, aggregatorClass.newInstance(), 512, true, readerIndex, indexPath + ".config"); Bits liveDocs = MultiFields.getLiveDocs(readerQueries); ImageSearchHits ceddhits, cvsurfhits, simpleceddcvsurfhits; Document queryDoc; String queryfile, hitFile; int counter = 0; for (int i = 0; i < readerQueries.maxDoc(); i++) { if (readerQueries.hasDeletions() && !liveDocs.get(i)) continue; queryDoc = readerQueries.document(i); queryfile = queryDoc.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; queryfile = queryfile.substring(queryfile.lastIndexOf('\\') + 1); System.out.println(counter + ". Query image: " + queryfile); ceddhits = ceddSearcher.search(queryDoc, readerIndex); cvsurfhits = cvsurfsearcher.search(queryDoc, readerIndex); simpleceddcvsurfhits = simpleceddcvsurfsearcher.search(queryDoc, readerIndex); System.out.println("Global:"); for (int y = 0; y < ceddhits.length(); y++) { hitFile = readerIndex.document(ceddhits.documentID(y)) .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1); System.out.println(y + ". " + hitFile + " " + ceddhits.score(y)); } System.out.println("Local:"); for (int y = 0; y < cvsurfhits.length(); y++) { hitFile = readerIndex.document(cvsurfhits.documentID(y)) .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1); System.out.println(y + ". " + hitFile + " " + cvsurfhits.score(y)); } System.out.println("Simple:"); for (int y = 0; y < simpleceddcvsurfhits.length(); y++) { hitFile = readerIndex.document(simpleceddcvsurfhits.documentID(y)) .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; hitFile = hitFile.substring(hitFile.lastIndexOf('\\') + 1); System.out.println(y + ". " + hitFile + " " + simpleceddcvsurfhits.score(y)); } System.out.println(); counter++; } }
From source file:org.apache.gaelucene.tools.LuceneIndexPushUtil.java
License:Apache License
public static void main(String[] args) throws IOException { for (int i = 0; i < args.length; i++) { if ("-app-url".equals(args[i])) { gaeAppURL = args[++i];//w ww .j a v a2 s . c om } else if ("-auth-cookie".equals(args[i])) { authCookie = args[++i]; } else if ("-src".equals(args[i])) { sourceDirName = args[++i]; } else if ("-cat".equals(args[i])) { category = args[++i]; } else if ("-rec-file".equals(args[i])) { jobRecFileName = args[++i]; } } if (gaeAppURL == null || authCookie == null || sourceDirName == null || category == null || jobRecFileName == null) { System.err.println(USAGE); System.exit(-1); } File sourceDir = new File(sourceDirName); if (!sourceDir.exists()) { System.err.println("'" + sourceDir.getAbsolutePath() + "' DOES NOT EXIST!"); System.exit(-1); } sourceDirName = sourceDir.getAbsolutePath(); // load filenames that have been uploaded successfully last time. HashSet<String> uploadedRec = new HashSet<String>(); File jobRecFile = new File(jobRecFileName); if (jobRecFile.exists()) { LineNumberReader reader = new LineNumberReader(new FileReader(jobRecFile)); for (String line = reader.readLine(); line != null;) { if (line.indexOf(" OK") > -1) { line = line.substring(0, line.indexOf(" ")).trim(); } uploadedRec.add(line); line = reader.readLine(); } reader.close(); } System.out.println("[INFO ] - trying to open index under " + sourceDirName); IndexReader indexReader = IndexReader.open(sourceDir); int maxDoc = indexReader.maxDoc(); int numDocs = indexReader.numDocs(); long version = indexReader.getVersion(); boolean hasDeletions = indexReader.hasDeletions(); boolean isOptimized = indexReader.isOptimized(); System.out.println("maxDoc:" + maxDoc); System.out.println("numDocs:" + numDocs); System.out.println("version:" + version); System.out.println("hasDeletions:" + hasDeletions); System.out.println("isOptimized:" + isOptimized); // record filenames that were uploaded successfully BufferedWriter dataWriter = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(jobRecFile, true))); System.out.println("[INFO ] - trying to synchronize the index files onto gae..."); File[] files = sourceDir.listFiles(); for (int i = 0; i < files.length; i++) { File file = files[i]; if (uploadedRec.contains(file.getName())) { System.out.println("[INFO ] - skip file '" + file.getName() + "'"); continue; } try { commitFile(file, category, version, i); dataWriter.write(file.getName() + " OK\n"); } catch (IOException ioe) { System.out.println("[WARN ] - failed to upload '" + file.getName() + "', because:" + ioe); } } dataWriter.flush(); dataWriter.close(); System.out.println("[INFO ] - trying to activate the index..."); try { activateIndex(category, version); } catch (IOException ioe) { System.out.println("[WARN ] - failed to activate the index, because:" + ioe); } }
From source file:org.dyndns.andreasbaumann.LuceneAnalyzer.java
License:Open Source License
private static void printGlobalInfo(IndexReader indexReader, boolean printHeaders, boolean isSolr, SolrIndexSearcher solrSearch) throws IOException { if (printHeaders) { System.out.println("Global Information:"); System.out.println("==================="); }//from w w w.ja v a 2 s . c o m System.out.println("\tnumber of documents: " + indexReader.numDocs()); // we should get the number of features differently, this is inefficient, but Lucene // has no notion of global statistics (because the default weighting schema doesn't // make use of it!) int nofFeatures = 0; int nofTokens = 0; TermEnum terms = indexReader.terms(); while (terms.next()) { Term term = terms.term(); int df = terms.docFreq(); nofFeatures++; nofTokens += df; } System.out.println("\ttotal number of features: " + nofFeatures); System.out.println("\ttotal number of tokens: " + nofTokens); System.out.println("\tversion: " + indexReader.getVersion()); System.out.println("\tstill current: " + indexReader.isCurrent()); //TODO: we don't get segment information! //System.out.println( "is optimized:" + segmentInfos.size( ) == 1 && !indexReader.hasDeletions( ) ); System.out.println("\tmaximal document number: " + indexReader.maxDoc()); System.out.println("\thas deletions: " + indexReader.hasDeletions()); if (isSolr) { System.out.println("\tSolr version: " + solrSearch.getVersion()); } System.out.println(""); }
From source file:org.eclipse.che.api.search.server.impl.LuceneSearcher.java
License:Open Source License
private void printStatistic() throws IOException { if (LOG.isDebugEnabled()) { IndexSearcher luceneSearcher = null; try {/*w w w . j ava2 s . co m*/ searcherManager.maybeRefresh(); luceneSearcher = searcherManager.acquire(); IndexReader reader = luceneSearcher.getIndexReader(); LOG.debug( "IndexReader numDocs={} numDeletedDocs={} maxDoc={} hasDeletions={}. Writer numDocs={} numRamDocs={} hasPendingMerges={} hasUncommittedChanges={} hasDeletions={}", reader.numDocs(), reader.numDeletedDocs(), reader.maxDoc(), reader.hasDeletions(), luceneIndexWriter.numDocs(), luceneIndexWriter.numRamDocs(), luceneIndexWriter.hasPendingMerges(), luceneIndexWriter.hasUncommittedChanges(), luceneIndexWriter.hasDeletions()); } finally { searcherManager.release(luceneSearcher); } } }