List of usage examples for org.apache.lucene.index IndexReader numDocs
public abstract int numDocs();
From source file:net.semanticmetadata.lire.searchers.forevaluations.GenericFastImageSearcherForEvaluation.java
License:Open Source License
public ImageDuplicates findDuplicates(IndexReader reader) throws IOException { // get the first document: SimpleImageDuplicates simpleImageDuplicates = null; // try { // if (!IndexReader.indexExists(reader.directory())) // throw new FileNotFoundException("No index found at this specific location."); Document doc = reader.document(0); LireFeature lireFeature = extractorItem.getFeatureInstance(); if (doc.getField(fieldName).binaryValue() != null && doc.getField(fieldName).binaryValue().length > 0) lireFeature.setByteArrayRepresentation(doc.getField(fieldName).binaryValue().bytes, doc.getField(fieldName).binaryValue().offset, doc.getField(fieldName).binaryValue().length); HashMap<Double, List<String>> duplicates = new HashMap<Double, List<String>>(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = reader.numDocs(); int numDuplicates = 0; for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document d = reader.document(i); double distance = getDistance(d, lireFeature); if (!duplicates.containsKey(distance)) { duplicates.put(distance, new LinkedList<String>()); } else {//from w w w. java 2 s .com numDuplicates++; } duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } if (numDuplicates == 0) return null; LinkedList<List<String>> results = new LinkedList<List<String>>(); for (double d : duplicates.keySet()) { if (duplicates.get(d).size() > 1) { results.add(duplicates.get(d)); } } simpleImageDuplicates = new SimpleImageDuplicates(results); // } catch (InstantiationException e) { // logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); // } catch (IllegalAccessException e) { // logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); // } return simpleImageDuplicates; }
From source file:net.semanticmetadata.lire.searchers.GenericFastImageSearcher.java
License:Open Source License
/** * @param reader/*from w ww .j a v a 2 s. c o m*/ * @param lireFeature * @return the maximum distance found for normalizing. * @throws IOException */ protected double findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException { maxDistance = -1d; // clear result set ... docs.clear(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); Document d; double tmpDistance; int docs = reader.numDocs(); if (!isCaching) { // we read each and every document from the index and then we compare it to the query. for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. d = reader.document(i); tmpDistance = getDistance(d, lireFeature); assert (tmpDistance >= 0); // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(tmpDistance, i)); if (tmpDistance > maxDistance) maxDistance = tmpDistance; } else if (tmpDistance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult(tmpDistance, i)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } } else { LinkedList<Consumer> tasks = new LinkedList<Consumer>(); LinkedList<Thread> threads = new LinkedList<Thread>(); Consumer consumer; Thread thread; Thread p = new Thread(new Producer()); p.start(); for (int i = 0; i < numThreads; i++) { consumer = new Consumer(lireFeature); thread = new Thread(consumer); thread.start(); tasks.add(consumer); threads.add(thread); } for (Thread next : threads) { try { next.join(); } catch (InterruptedException e) { e.printStackTrace(); } } TreeSet<SimpleResult> tmpDocs; boolean flag; SimpleResult simpleResult; for (Consumer task : tasks) { tmpDocs = task.getResult(); flag = true; while (flag && (tmpDocs.size() > 0)) { simpleResult = tmpDocs.pollFirst(); if (this.docs.size() < maxHits) { this.docs.add(simpleResult); if (simpleResult.getDistance() > maxDistance) maxDistance = simpleResult.getDistance(); } else if (simpleResult.getDistance() < maxDistance) { // this.docs.remove(this.docs.last()); this.docs.pollLast(); this.docs.add(simpleResult); maxDistance = this.docs.last().getDistance(); } else flag = false; } } } return maxDistance; }
From source file:net.semanticmetadata.lire.TestImageSearcher.java
License:Open Source License
public void testSearch() throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); System.out.println("numDocs = " + numDocs); ImageSearcher searcher = ImageSearcherFactory.createScalableColorImageSearcher(50); FileInputStream imageStream = new FileInputStream(testFilesPath + testFiles[0]); BufferedImage bimg = ImageIO.read(imageStream); ImageSearchHits hits = null;/*from ww w . j a v a2s. c om*/ long time = System.currentTimeMillis(); for (int i = 0; i < numsearches; i++) { hits = searcher.search(bimg, reader); } time = System.currentTimeMillis() - time; System.out.println( ((float) time / (float) numsearches) + " ms per search with image, averaged on " + numsearches); for (int i = 0; i < 5; i++) { System.out.println(hits.score(i) + ": " + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } Document document = hits.doc(0); time = System.currentTimeMillis(); for (int i = 0; i < numsearches; i++) { hits = searcher.search(document, reader); } time = System.currentTimeMillis() - time; System.out.println( ((float) time / (float) numsearches) + " ms per search with document, averaged on " + numsearches); for (int i = 0; i < 5; i++) { System.out.println(hits.score(i) + ": " + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } document = getDocumentBuilder().createDocument(bimg, testFilesPath + testFiles[0]); time = System.currentTimeMillis(); for (int i = 0; i < numsearches; i++) { hits = searcher.search(document, reader); } time = System.currentTimeMillis() - time; System.out.println( ((float) time / (float) numsearches) + " ms per search with document, averaged on " + numsearches); for (int i = 0; i < 5; i++) { System.out.println(hits.score(i) + ": " + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } }
From source file:net.semanticmetadata.lire.TestImageSearcher.java
License:Open Source License
public void testFindDuplicates() throws Exception { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); System.out.println("numDocs = " + numDocs); ImageSearcher searcher = ImageSearcherFactory.createCEDDImageSearcher(50); ImageDuplicates imageDuplicates = searcher.findDuplicates(reader); if (imageDuplicates == null) { System.out.println("No duplicates found"); return;//w w w. ja v a2 s . com } for (int i = 0; i < imageDuplicates.length(); i++) { System.out.println(imageDuplicates.getDuplicate(i).toString()); } }
From source file:net.semanticmetadata.lire.TestImageSearcher.java
License:Open Source License
public void testCorrelationSearch() throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); System.out.println("numDocs = " + numDocs); ImageSearcher searcher = ImageSearcherFactory.createAutoColorCorrelogramImageSearcher(10); FileInputStream imageStream = new FileInputStream(testFilesPath + testFiles[0]); BufferedImage bimg = ImageIO.read(imageStream); ImageSearchHits hits = null;//from w w w .j a va 2s.c o m long time = System.currentTimeMillis(); // for (int i = 0; i < numsearches; i++) { hits = searcher.search(bimg, reader); // } time = System.currentTimeMillis() - time; System.out.println( ((float) time / (float) numsearches) + " ms per search with image, averaged on " + numsearches); for (int i = 0; i < hits.length(); i++) { System.out.println(hits.score(i) + ": " + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } Document document = hits.doc(4); time = System.currentTimeMillis(); for (int i = 0; i < numsearches; i++) { hits = searcher.search(document, reader); } time = System.currentTimeMillis() - time; System.out.println( ((float) time / (float) numsearches) + " ms per search with document, averaged on " + numsearches); for (int i = 0; i < hits.length(); i++) { System.out.println(hits.score(i) + ": " + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } }
From source file:net.semanticmetadata.lire.TestImageSearcher.java
License:Open Source License
public void testCEDDSearch() throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); System.out.println("numDocs = " + numDocs); ImageSearcher searcher = ImageSearcherFactory.createCEDDImageSearcher(30); FileInputStream imageStream = new FileInputStream(testFilesPath + testFiles[0]); BufferedImage bimg = ImageIO.read(imageStream); ImageSearchHits hits = null;//from www . j a va 2 s. co m long time = System.currentTimeMillis(); // for (int i = 0; i < numsearches; i++) { hits = searcher.search(bimg, reader); // } time = System.currentTimeMillis() - time; System.out.println( ((float) time / (float) numsearches) + " ms per search with image, averaged on " + numsearches); for (int i = 0; i < hits.length(); i++) { System.out.println(hits.score(i) + ": " + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } Document document = hits.doc(4); time = System.currentTimeMillis(); for (int i = 0; i < numsearches; i++) { hits = searcher.search(document, reader); } time = System.currentTimeMillis() - time; System.out.println( ((float) time / (float) numsearches) + " ms per search with document, averaged on " + numsearches); for (int i = 0; i < hits.length(); i++) { System.out.println(hits.score(i) + ": " + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } }
From source file:net.semanticmetadata.lire.TestImageSearcher.java
License:Open Source License
public void testSimpleColorHistogramSearch() throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); System.out.println("numDocs = " + numDocs); ImageSearcher searcher = ImageSearcherFactory.createColorHistogramImageSearcher(30); FileInputStream imageStream = new FileInputStream(testFilesPath + testFiles[0]); BufferedImage bimg = ImageIO.read(imageStream); ImageSearchHits hits = null;// ww w .j av a 2 s . c om long time = System.currentTimeMillis(); // for (int i = 0; i < numsearches; i++) { hits = searcher.search(bimg, reader); // } time = System.currentTimeMillis() - time; System.out.println( ((float) time / (float) numsearches) + " ms per search with image, averaged on " + numsearches); for (int i = 0; i < hits.length(); i++) { System.out.println(hits.score(i) + ": " + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } Document document = hits.doc(4); time = System.currentTimeMillis(); for (int i = 0; i < numsearches; i++) { hits = searcher.search(document, reader); } time = System.currentTimeMillis() - time; System.out.println( ((float) time / (float) numsearches) + " ms per search with document, averaged on " + numsearches); for (int i = 0; i < hits.length(); i++) { System.out.println(hits.score(i) + ": " + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } }
From source file:net.semanticmetadata.lire.TestImageSearcherWeighted.java
License:Open Source License
public void testSearch() throws IOException { IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); System.out.println("numDocs = " + numDocs); ImageSearcher searcher1 = ImageSearcherFactory.createWeightedSearcher(10, 0.2f, 0.8f, 1.0f); ImageSearcher searcher2 = ImageSearcherFactory.createWeightedSearcher(10, 0.8f, 0.0f, 1.0f); ImageSearcher searcher3 = ImageSearcherFactory.createWeightedSearcher(10, 0.0f, 1.0f, 0.0f); FileInputStream imageStream = new FileInputStream(testFilesPath + testFiles[0]); BufferedImage bimg = ImageIO.read(imageStream); ImageSearchHits hits = null;/*from www . ja va2s . c o m*/ hits = searcher1.search(bimg, reader); for (int i = 0; i < 5; i++) { System.out.println(hits.score(i) + ": " + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } System.out.println("-"); hits = searcher2.search(bimg, reader); for (int i = 0; i < 5; i++) { System.out.println(hits.score(i) + ": " + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } System.out.println("-"); hits = searcher3.search(bimg, reader); for (int i = 0; i < 5; i++) { System.out.println(hits.score(i) + ": " + hits.doc(i).getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } }
From source file:net.sf.logsaw.index.internal.LuceneIndexServiceImpl.java
License:Open Source License
@Override public int count(ILogResource log) throws CoreException { Assert.isNotNull(log, "log"); //$NON-NLS-1$ ARunWithIndexReader<Integer> runnable = new ARunWithIndexReader<Integer>() { /* (non-Javadoc) * @see net.sf.logsaw.index.impl.ARunWithIndexReader#doRunWithIndexReader(org.apache.lucene.index.IndexReader, net.sf.logsaw.core.framework.ILogResource) *//*from www. j av a 2 s . c o m*/ @Override protected Integer doRunWithIndexReader(IndexReader reader, ILogResource log) throws CoreException { if (reader != null) { return Integer.valueOf(reader.numDocs()); } // Index does not exist yet return Integer.valueOf(0); } }; return runnable.runWithIndexReader(log); }
From source file:net.sourceforge.docfetcher.model.UtilModel.java
License:Open Source License
@VisibleForPackageGroup public static void assertDocCount(Directory luceneDir, int expectedCount) throws Exception { IndexReader reader = IndexReader.open(luceneDir); assertEquals(expectedCount, reader.numDocs()); Closeables.closeQuietly(reader);/*from w w w .ja v a2s .c o m*/ }