List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:net.semanticmetadata.lire.TestImageSearcher.java
License:Open Source License
public void testCachingSearcherParallelWithBundling() throws IOException, InterruptedException { final IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("C:\\Temp\\test-100k-cedd-idx"))); LinkedList<LireFeature> q = new LinkedList<LireFeature>(); for (int i = 0; i < ir.maxDoc(); i++) { Document d = ir.document(i); CEDD cedd = new CEDD(); BytesRef binaryValue = d.getBinaryValue(cedd.getFieldName()); cedd.setByteArrayRepresentation(binaryValue.bytes, binaryValue.offset, binaryValue.length); q.add(cedd);//from w w w. j av a 2 s . com } int count = 0; Thread[] searchers = new Thread[4]; final LinkedBlockingQueue<WorkItem> queryQueue = new LinkedBlockingQueue<WorkItem>(100); for (int i = 0; i < searchers.length; i++) { searchers[i] = new Thread(new Runnable() { @Override public void run() { SingleNddCeddImageSearcher is = new SingleNddCeddImageSearcher(ir); WorkItem remove; while ((remove = queryQueue.remove()).features != null) { try { SimpleResult[] hits = is.findMostSimilar(remove.features); for (int j = 0; j < hits.length; j++) { if (hits[j].getIndexNumber() != remove.id[j]) System.err.println("oops"); } } catch (IOException e) { e.printStackTrace(); } } } }); searchers[i].start(); } long time = System.currentTimeMillis(); LireFeature[] qarr = new LireFeature[10]; int[] iarr = new int[10]; int currentIndex = 0; int bundleCount = 0; Iterator<LireFeature> iterator = q.iterator(); while (iterator.hasNext() && bundleCount < 200) { LireFeature next = iterator.next(); try { iarr[currentIndex] = count; qarr[currentIndex++] = next; if (currentIndex >= qarr.length) { // do bundled search currentIndex = 0; queryQueue.put(new WorkItem(qarr.clone(), iarr.clone())); bundleCount++; } count++; } catch (InterruptedException e) { e.printStackTrace(); } } for (int i = 0; i < 8; i++) { queryQueue.put(new WorkItem(null, null)); } for (int i = 0; i < searchers.length; i++) { searchers[i].join(); } long l = System.currentTimeMillis() - time; System.out.printf("Tested %d search requests on %d documents: overall time of %d:%02d, %.2f ms per search", count, ir.maxDoc(), l / (1000 * 60), (l / 1000) % 60, ((float) l / (float) count)); }
From source file:net.semanticmetadata.lire.TestImageSearcher.java
License:Open Source License
public void testCachingSearcherBundling() throws IOException { IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("C:\\Temp\\test-100k-cedd-idx"))); SingleNddCeddImageSearcher is = new SingleNddCeddImageSearcher(ir); LinkedList<LireFeature> q = new LinkedList<LireFeature>(); for (int i = 0; i < ir.maxDoc(); i++) { Document d = ir.document(i); CEDD cedd = new CEDD(); BytesRef binaryValue = d.getBinaryValue(cedd.getFieldName()); cedd.setByteArrayRepresentation(binaryValue.bytes, binaryValue.offset, binaryValue.length); q.add(cedd);/*from w w w. j a va 2 s . c om*/ } long time = System.currentTimeMillis(); int count = 0; LireFeature[] qarr = new LireFeature[10]; int currentIndex = 0; for (Iterator<LireFeature> iterator = q.iterator(); iterator.hasNext();) { LireFeature next = iterator.next(); qarr[currentIndex++] = next; if (currentIndex >= qarr.length) { // do bundled search currentIndex = 0; is.findMostSimilar(qarr); } count++; if (count > 999 & currentIndex == 0) break; } long l = System.currentTimeMillis() - time; System.out.printf("Tested %d search requests on %d documents: overall time of %d:%02d, %.2f ms per search", count, ir.maxDoc(), l / (1000 * 60), (l / 1000) % 60, ((float) l / (float) count)); }
From source file:net.semanticmetadata.lire.utils.FileUtils.java
License:Open Source License
/** * Puts results into a HTML file.// w w w . ja v a 2 s .c o m * * @param prefix * @param hits * @param queryImage * @return * @throws IOException */ public static String saveImageResultsToHtml(String prefix, ImageSearchHits hits, String queryImage, IndexReader reader) throws IOException { long l = System.currentTimeMillis() / 1000; String fileName = "results-" + prefix + "-" + l + ".html"; BufferedWriter bw = new BufferedWriter(new FileWriter(fileName)); bw.write("<html>\n" + "<head><title>Search Results</title></head>\n" + "<body bgcolor=\"#FFFFFF\">\n"); bw.write("<h3>query</h3>\n"); bw.write("<a href=\"file://" + queryImage + "\"><img src=\"file://" + queryImage + "\"></a><p>\n"); bw.write("<h3>results</h3>\n"); for (int i = 0; i < hits.length(); i++) { bw.write(hits.score(i) + " - <a href=\"file://" + reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + "\"><img src=\"file://" + reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + "\"></a><p>\n"); } bw.write("</body>\n" + "</html>"); bw.close(); return fileName; }
From source file:net.semanticmetadata.lire.utils.FileUtils.java
License:Open Source License
/** * Puts results into a HTML file.//from w w w. j a v a 2 s . co m * * @param prefix * @param hits * @param reader * @param queryImage * @return * @throws IOException */ public static String saveImageResultsToHtml(String prefix, TopDocs hits, IndexReader reader, String queryImage) throws IOException { long l = System.currentTimeMillis() / 1000; String fileName = "results-" + prefix + "-" + l + ".html"; BufferedWriter bw = new BufferedWriter(new FileWriter(fileName)); bw.write("<html>\n" + "<head><title>Search Results</title></head>\n" + "<body bgcolor=\"#FFFFFF\">\n"); bw.write("<h3>query</h3>\n"); bw.write("<a href=\"file://" + queryImage + "\"><img src=\"file://" + queryImage + "\"></a><p>\n"); bw.write("<h3>results</h3>\n"); for (int i = 0; i < hits.scoreDocs.length; i++) { bw.write(hits.scoreDocs[i].score + " - <a href=\"file://" + reader.document(hits.scoreDocs[i].doc).get("descriptorImageIdentifier") + "\"><img src=\"file://" + reader.document(hits.scoreDocs[i].doc).get("descriptorImageIdentifier") + "\"></a><p>\n"); } bw.write("</body>\n" + "</html>"); bw.close(); return fileName; }
From source file:net.semanticmetadata.lire.utils.FileUtils.java
License:Open Source License
public static void saveImageResultsToPng(String prefix, ImageSearchHits hits, String queryImage, IndexReader reader) throws IOException { LinkedList<BufferedImage> results = new LinkedList<BufferedImage>(); int width = 0; for (int i = 0; i < hits.length(); i++) { // hits.score(i) // hits.doc(i).get("descriptorImageIdentifier") BufferedImage tmp = ImageIO.read(new FileInputStream( reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])); // if (tmp.getHeight() > 200) { double factor = 200d / ((double) tmp.getHeight()); tmp = ImageUtils.scaleImage(tmp, (int) (tmp.getWidth() * factor), 200); // } width += tmp.getWidth() + 5;// w ww . j ava2s. c o m results.add(tmp); } BufferedImage result = new BufferedImage(width, 220, BufferedImage.TYPE_INT_RGB); Graphics2D g2 = (Graphics2D) result.getGraphics(); g2.setColor(Color.white); g2.setBackground(Color.white); g2.clearRect(0, 0, result.getWidth(), result.getHeight()); g2.setColor(Color.black); g2.setFont(Font.decode("\"Arial\", Font.BOLD, 12")); int offset = 0; int count = 0; for (Iterator<BufferedImage> iterator = results.iterator(); iterator.hasNext();) { BufferedImage next = iterator.next(); g2.drawImage(next, offset, 20, null); g2.drawString(hits.score(count) + "", offset + 5, 12); offset += next.getWidth() + 5; count++; } ImageIO.write(result, "PNG", new File(prefix + "_" + (System.currentTimeMillis() / 1000) + ".png")); }
From source file:net.semanticmetadata.lire.utils.FileUtils.java
License:Open Source License
public static void saveImageResultsToPng(String prefix, TopDocs hits, String queryImage, IndexReader ir) throws IOException { LinkedList<BufferedImage> results = new LinkedList<BufferedImage>(); int width = 0; for (int i = 0; i < Math.min(hits.scoreDocs.length, 10); i++) { // hits.score(i) // hits.doc(i).get("descriptorImageIdentifier") BufferedImage tmp = ImageIO .read(new FileInputStream(ir.document(hits.scoreDocs[i].doc).get("descriptorImageIdentifier"))); if (tmp.getHeight() > 200) { double factor = 200d / ((double) tmp.getHeight()); tmp = ImageUtils.scaleImage(tmp, (int) (tmp.getWidth() * factor), 200); }//from w w w. j a va 2 s. c o m width += tmp.getWidth() + 5; results.add(tmp); } BufferedImage result = new BufferedImage(width, 220, BufferedImage.TYPE_INT_RGB); Graphics2D g2 = (Graphics2D) result.getGraphics(); g2.setColor(Color.black); g2.clearRect(0, 0, result.getWidth(), result.getHeight()); g2.setColor(Color.green); g2.setFont(Font.decode("\"Arial\", Font.BOLD, 12")); int offset = 0; int count = 0; for (Iterator<BufferedImage> iterator = results.iterator(); iterator.hasNext();) { BufferedImage next = iterator.next(); g2.drawImage(next, offset, 20, null); g2.drawString(hits.scoreDocs[count].score + "", offset + 5, 12); offset += next.getWidth() + 5; count++; } ImageIO.write(result, "PNG", new File(prefix + "_" + (System.currentTimeMillis() / 1000) + ".png")); }
From source file:net.semanticmetadata.lire.VisualWordsTest.java
License:Open Source License
public void testSearchInIndexSurf() throws IOException { int[] docIDs = new int[] { 7886, 1600, 4611, 4833, 4260, 2044, 7658 }; for (int i : docIDs) { IndexReader ir = DirectoryReader.open(FSDirectory.open(indexPath)); BOVWBuilder sfh = new BOVWBuilder(ir, new SurfFeature()); VisualWordsImageSearcher vis = new VisualWordsImageSearcher(10, DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW); // Document doc = sfh.getVisualWords(surfBuilder.createDocument(ImageIO.read(new File(queryImage)), queryImage)); ImageSearchHits hits = vis.search(ir.document(i), ir); FileUtils.saveImageResultsToPng("results_bow_no_tf_" + i, hits, queryImage); }/*from ww w.j a v a 2 s. co m*/ }
From source file:net.semanticmetadata.lire.VisualWordsTest.java
License:Open Source License
public void testSearchInIndexSift() throws IOException { int[] docIDs = new int[] { 0, 10, 23, 35, 56, 77 }; for (int i : docIDs) { System.out.println("i = " + i); IndexReader ir = DirectoryReader.open(FSDirectory.open(indexPath)); VisualWordsImageSearcher vis = new VisualWordsImageSearcher(10, DocumentBuilder.FIELD_NAME_SIFT + DocumentBuilder.FIELD_NAME_BOVW); ImageSearchHits hits = vis.search(ir.document(i), ir); FileUtils.saveImageResultsToPng("results_bow_no_tf_sift_" + i, hits, queryImage); }/* w ww . j a va 2s . c om*/ }
From source file:net.semanticmetadata.lire.VisualWordsTest.java
License:Open Source License
public void testWikiSearchIndex() throws IOException { String indexPath = "./bovw-test"; VisualWordsImageSearcher searcher = new VisualWordsImageSearcher(10, DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); // let's take the first document for a query: Document query = reader.document(2); ImageSearchHits hits = searcher.search(query, reader); // show or analyze your results .... FileUtils.saveImageResultsToPng("bovw", hits, query.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); }
From source file:net.sf.jclal.util.dataset.LuceneIndexToWekaDataSet.java
License:Open Source License
/** * It converts a index file of Lucene to a weka file for classification. The * weka file class are nominal. The classifiers will work with nominal * class./* w w w. j av a 2s. co m*/ * * * @param wekaFileName Path of weka file. * @param indexFile Path of index file based on Lucene. The document indexes * must have fields called "class" and "content". WARNING: The fields must * not contains any puntuaction sign. * * @return Instances of weka. The instances are sparse since it is about * text information. * * @throws FileNotFoundException If the file does not exists. * @throws IOException If happens a error while writing the file. */ public Instances convertLuceneToWekaClassification(String wekaFileName, String indexFile) throws FileNotFoundException, IOException { File nuevo = new File(wekaFileName); if (!verify(nuevo)) { return null; } FileUtil.writeFile(nuevo, "@RELATION " + nuevo.getName() + doubleLine); IndexSearcher searcher = new IndexSearcher(indexFile); IndexReader reader = searcher.getIndexReader(); int total = reader.maxDoc(); HashMap<String, Integer> terms = new HashMap<String, Integer>(total * 2); Set<String> labels = new HashSet<String>(total * 2); int i; for (int l = 0; l < total; l++) { if (!reader.isDeleted(l)) { TermFreqVector vector = reader.getTermFreqVector(l, content); Document doc = reader.document(l); String current = doc.getField(classF).stringValue(); labels.add(current); if (vector != null) { String listosI[] = vector.getTerms(); for (i = 0; i < listosI.length; i++) { if (!terms.containsKey(listosI[i])) { terms.put(listosI[i], terms.size()); } } } } } String[] labelReady = new String[labels.size()]; int posLabel = 0; for (String string : labels) { labelReady[posLabel] = string; posLabel++; } Container[] terminos = convertir(terms); Arrays.sort(terminos); for (int j = 0; j < terminos.length; j++) { FileUtil.writeFile(nuevo, "@ATTRIBUTE " + (int) terminos[j].getKey() + " NUMERIC" + "\n"); } FileUtil.writeFile(nuevo, "@ATTRIBUTE class {"); for (int j = 0; j < labelReady.length - 1; j++) { FileUtil.writeFile(nuevo, labelReady[j] + ","); } FileUtil.writeFile(nuevo, labelReady[labelReady.length - 1] + "}" + doubleLine); FileUtil.writeFile(nuevo, "@DATA\n"); for (int pos = 0; pos < searcher.maxDoc(); pos++) { if (!reader.isDeleted(pos)) { TermFreqVector vector = reader.getTermFreqVector(pos, content); if (vector != null) { int[] origen = vector.getTermFrequencies(); String[] termsI = vector.getTerms(); int[] positions = new int[origen.length]; for (int k = 0; k < origen.length; k++) { positions[k] = terms.get(termsI[k]); } Container[] escribir = convertir(positions, origen); Arrays.sort(escribir); FileUtil.writeFile(nuevo, "{"); for (int j = 0; j < escribir.length; j++) { FileUtil.writeFile(nuevo, (int) escribir[j].getKey() + " " + escribir[j].getValue() + ","); } FileUtil.writeFile(nuevo, terms.size() + " " + searcher.doc(pos).getField(classF).stringValue() + "}\n"); } } } //close files closeReaders(searcher, reader); //Test if the weka file works Instances test = testWekaFile(wekaFileName); return test; }