Java tutorial
/* * This file is part of the LIRE project: http://www.semanticmetadata.net/lire * LIRE is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * LIRE is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with LIRE; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * We kindly ask you to refer the any or one of the following publications in * any publication mentioning or employing Lire: * * Lux Mathias, Savvas A. Chatzichristofis. Lire: Lucene Image Retrieval * An Extensible Java CBIR Library. In proceedings of the 16th ACM International * Conference on Multimedia, pp. 1085-1088, Vancouver, Canada, 2008 * URL: http://doi.acm.org/10.1145/1459359.1459577 * * Lux Mathias. Content Based Image Retrieval with LIRE. In proceedings of the * 19th ACM International Conference on Multimedia, pp. 735-738, Scottsdale, * Arizona, USA, 2011 * URL: http://dl.acm.org/citation.cfm?id=2072432 * * Mathias Lux, Oge Marques. Visual Information Retrieval using Java and LIRE * Morgan & Claypool, 2013 * URL: http://www.morganclaypool.com/doi/abs/10.2200/S00468ED1V01Y201301ICR025 * * Copyright statement: * ==================== * (c) 2002-2013 by Mathias Lux (mathias@juggle.at) * http://www.semanticmetadata.net/lire, http://www.lire-project.net * * Updated: 07.11.14 14:16 */ package net.semanticmetadata.lire; import junit.framework.TestCase; import net.semanticmetadata.lire.filter.LsaFilter; import net.semanticmetadata.lire.filter.RerankFilter; import net.semanticmetadata.lire.impl.BitSamplingImageSearcher; import net.semanticmetadata.lire.impl.ChainedDocumentBuilder; import net.semanticmetadata.lire.impl.GenericDocumentBuilder; import net.semanticmetadata.lire.impl.GenericFastImageSearcher; import net.semanticmetadata.lire.utils.FileUtils; import net.semanticmetadata.lire.utils.LuceneUtils; import net.semanticmetadata.lire.utils.SerializationUtils; import org.apache.lucene.document.Document; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.util.BytesRef; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.*; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; /** * Created with IntelliJ IDEA. * User: mlux * Date: 29.11.12 * Time: 13:53 */ public class GeneralTest extends TestCase { private String[] testFiles = new String[] { "img01.JPG", "img02.JPG", "img03.JPG", "img04.JPG", "img05.JPG", "img06.JPG", "img07.JPG", "img08.JPG", "error.jpg", "91561.lire.jpg", "91561.jpg" }; private String testFilesPath = "./src/test/resources/images/"; private String indexPath = "test-index"; private String testExtensive = "./testdata/wang-1000"; public Class[] featureClasses = new Class[] { CEDD.class, FCTH.class, JCD.class, AutoColorCorrelogram.class, ColorLayout.class, EdgeHistogram.class, Gabor.class, JpegCoefficientHistogram.class, ScalableColor.class, SimpleColorHistogram.class, OpponentHistogram.class, LocalBinaryPatterns.class, RotationInvariantLocalBinaryPatterns.class, BinaryPatternsPyramid.class, LuminanceLayout.class, Tamura.class, FuzzyColorHistogram.class, PHOG.class }; private DocumentBuilder[] builders = new DocumentBuilder[] { DocumentBuilderFactory.getCEDDDocumentBuilder(), DocumentBuilderFactory.getFCTHDocumentBuilder(), DocumentBuilderFactory.getJCDDocumentBuilder(), DocumentBuilderFactory.getAutoColorCorrelogramDocumentBuilder(), DocumentBuilderFactory.getColorLayoutBuilder(), DocumentBuilderFactory.getEdgeHistogramBuilder(), // 5 DocumentBuilderFactory.getGaborDocumentBuilder(), DocumentBuilderFactory.getJpegCoefficientHistogramDocumentBuilder(), // 7 DocumentBuilderFactory.getScalableColorBuilder(), DocumentBuilderFactory.getColorHistogramDocumentBuilder(), DocumentBuilderFactory.getTamuraDocumentBuilder(), // 10 DocumentBuilderFactory.getOpponentHistogramDocumentBuilder(), // 11 DocumentBuilderFactory.getJointHistogramDocumentBuilder(), // 12 new GenericDocumentBuilder(PHOG.class, "phog") }; private ImageSearcher[] searchers = new ImageSearcher[] { ImageSearcherFactory.createCEDDImageSearcher(10), ImageSearcherFactory.createFCTHImageSearcher(10), ImageSearcherFactory.createJCDImageSearcher(10), ImageSearcherFactory.createAutoColorCorrelogramImageSearcher(10), ImageSearcherFactory.createColorLayoutImageSearcher(10), ImageSearcherFactory.createEdgeHistogramImageSearcher(10), ImageSearcherFactory.createGaborImageSearcher(10), ImageSearcherFactory.createJpegCoefficientHistogramImageSearcher(10), ImageSearcherFactory.createScalableColorImageSearcher(10), ImageSearcherFactory.createColorHistogramImageSearcher(10), ImageSearcherFactory.createTamuraImageSearcher(10), ImageSearcherFactory.createOpponentHistogramSearcher(10), ImageSearcherFactory.createJointHistogramImageSearcher(10), new GenericFastImageSearcher(10, PHOG.class, "phog") }; public void testExtractionAndMetric() throws IOException, IllegalAccessException, InstantiationException { for (Class c : featureClasses) { LireFeature lireFeature = (LireFeature) c.newInstance(); LireFeature tmpLireFeature = (LireFeature) c.newInstance(); for (String file : testFiles) { System.out.println(c.getName() + ": " + file); BufferedImage image = ImageIO.read(new FileInputStream(testFilesPath + file)); // image = ImageUtils.trimWhiteSpace(image); lireFeature.extract(image); float delta = 0.0000f; assertEquals(lireFeature.getDistance(lireFeature), 0, delta); // tmpLireFeature.setStringRepresentation(lireFeature.getStringRepresentation()); // assertEquals(lireFeature.getDistance(tmpLireFeature), 0, delta); tmpLireFeature.setByteArrayRepresentation(lireFeature.getByteArrayRepresentation()); assertEquals(lireFeature.getDistance(tmpLireFeature), 0, delta); tmpLireFeature.setByteArrayRepresentation(lireFeature.getByteArrayRepresentation(), 0, lireFeature.getByteArrayRepresentation().length); assertEquals(lireFeature.getDistance(tmpLireFeature), 0, delta); } } } public void testCreateAndSearchSmallIndex() throws IOException { for (int i = 0, buildersLength = builders.length; i < buildersLength; i++) { DocumentBuilder b = builders[i]; // create an index with a specific builder: IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-small", true); for (String identifier : testFiles) { Document doc = b.createDocument(new FileInputStream(testFilesPath + identifier), identifier); doc.add(new StoredField("video_file", "surgery1.mp4")); doc.add(new StoredField("timestamp", "25")); iw.addDocument(doc); } iw.close(); ImageSearcher s = searchers[i]; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-small"))); for (int k = 0; k < reader.maxDoc(); k++) { Document query = reader.document(k); ImageSearchHits hits = s.search(query, reader); for (int y = 0; y < hits.length(); y++) { Document result = hits.doc(y); if (y == 0) { // check if the first result is the query: assertEquals(result.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] .equals(query.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), true); System.out.println(result.getValues("video_file")[0]); } else { // check if they are ordered by distance: assertEquals(hits.score(y) < hits.score(y - 1), true); } } } } } public void testReadIndex() throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("ucid-index-39997508"))); for (int k = 0; k < reader.maxDoc(); k++) { Document document = reader.document(k); BytesRef b = document.getField("featureCEDDLoDe_Hist").binaryValue(); double[] doubles = SerializationUtils.toDoubleArray(b.bytes, b.offset, b.length); if (document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0].endsWith("00008.png")) System.out.println(Arrays.toString(doubles)); } // check lucene tuorials and docs IndexSearcher is = new IndexSearcher(reader); TopDocs td = is.search(new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, "")), 10); for (int i = 0; i < td.scoreDocs.length; i++) { ScoreDoc scoreDoc = td.scoreDocs[i]; Document document = reader.document(scoreDoc.doc); } } public void testIndexLarge() throws IOException { // ArrayList<String> images = FileUtils.getAllImages(new File("C:\\Temp\\testImagelogos"), true); ArrayList<String> images = FileUtils.getAllImages(new File("testdata/UCID"), false); IndexWriter iw = LuceneUtils.createIndexWriter("index-large", true, LuceneUtils.AnalyzerType.WhitespaceAnalyzer); // select one feature for the large index: int featureIndex = 0; int count = 0; long ms = System.currentTimeMillis(); DocumentBuilder builder = new ChainedDocumentBuilder(); ((ChainedDocumentBuilder) builder).addBuilder(builders[featureIndex]); // ((ChainedDocumentBuilder) builder).addBuilder(builders[0]); for (Iterator<String> iterator = images.iterator(); iterator.hasNext();) { count++; if (count > 100 && count % 500 == 0) { System.out.println( count + " files indexed. " + (System.currentTimeMillis() - ms) / (count) + " ms per file"); } String file = iterator.next(); try { // try to trim the image first .... // BufferedImage img = ImageUtils.trimWhiteSpace(ImageIO.read(new FileInputStream(file))); // iw.addDocument(builder.createDocument(img, file)); iw.addDocument(builder.createDocument(new FileInputStream(file), file)); } catch (Exception e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } } iw.close(); } public void testPerformance() throws IOException { System.out.println(" ****************** CEDD OLD ****************** "); indexFiles("C:\\Temp\\images1", "index-large-new", 0, true); } /** * There was an error that images with the same score but different documents in the index * were not included in the result list. Here's the test for that. */ public void testDuplicatesInIndex() throws IOException { indexFiles("src\\test\\resources\\images", "index-large-new", 0, true); indexFiles("src\\test\\resources\\images", "index-large-new", 0, false); indexFiles("src\\test\\resources\\images", "index-large-new", 0, false); ImageSearcher s = searchers[0]; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large-new"))); Document query = reader.document(0); ImageSearchHits hits = s.search(query, reader); FileUtils.saveImageResultsToPng("duplicate_", hits, query.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); } private void indexFiles(String dir, String index, int featureIndex, boolean createNewIndex) throws IOException { ArrayList<String> images = FileUtils.getAllImages(new File(dir), true); IndexWriter iw = LuceneUtils.createIndexWriter(index, createNewIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer); // select one feature for the large index: int count = 0; long ms = System.currentTimeMillis(); DocumentBuilder builder = new ChainedDocumentBuilder(); ((ChainedDocumentBuilder) builder).addBuilder(builders[featureIndex]); // ((ChainedDocumentBuilder) builder).addBuilder(builders[0]); for (Iterator<String> iterator = images.iterator(); iterator.hasNext();) { count++; if (count > 100 && count % 5000 == 0) { System.out.println( count + " files indexed. " + (System.currentTimeMillis() - ms) / (count) + " ms per file"); } String file = iterator.next(); try { iw.addDocument(builder.createDocument(new FileInputStream(file), file)); } catch (Exception e) { System.err.println("Error: " + e.getMessage()); } } iw.close(); } public void testSearchIndexLarge() throws IOException { for (int i = 0; i < 10; i++) { int queryDocID = (int) (Math.random() * 800); // queryDocID = 877 * (i + 1); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large"))); // select one feature for the large index: int featureIndex = 0; int count = 0; long ms = System.currentTimeMillis(); ImageSearchHits hits = searchers[featureIndex].search(reader.document(queryDocID), reader); for (int j = 0; j < hits.length(); j++) { String fileName = hits.doc(j).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; System.out.println(hits.score(j) + ": \t" + fileName); } // FileUtils.saveImageResultsToHtml("GeneralTest_testSearchIndexLarge_", hits, reader.document(10).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); FileUtils.saveImageResultsToPng("GeneralTest_testSearchIndexLarge_" + i + "_", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); } } public void testSearchRunTime() throws IOException { int queryDocID; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large-new"))); int featureIndex = 0; ImageSearchHits hits = searchers[featureIndex].search(reader.document(0), reader); hits = searchers[featureIndex].search(reader.document(1), reader); long ms = System.currentTimeMillis(); for (int i = 0; i < 100; i++) { queryDocID = i; // select one feature for the large index: hits = searchers[featureIndex].search(reader.document(queryDocID), reader); } ms = System.currentTimeMillis() - ms; System.out.println("ms = " + ms / 100); } public void testRerankFilters() throws IOException { int queryDocID = (int) (Math.random() * 10000); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large"))); // select one feature for the large index: int featureIndex = 4; int count = 0; long ms = System.currentTimeMillis(); ImageSearchHits hits = searchers[featureIndex].search(reader.document(queryDocID), reader); RerankFilter rerank = new RerankFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD); LsaFilter lsa = new LsaFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD); FileUtils.saveImageResultsToPng("GeneralTest_rerank_0_old", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); hits = rerank.filter(hits, reader.document(queryDocID)); FileUtils.saveImageResultsToPng("GeneralTest_rerank_1_new", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); hits = lsa.filter(hits, reader.document(queryDocID)); FileUtils.saveImageResultsToPng("GeneralTest_rerank_2_lsa", hits, reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]); } public void testClassify() throws IOException { boolean weightByRank = true; String[] classes = { "2012", "beach", "food", "london", "music", "nature", "people", "sky", "travel", "wedding" }; int k = 50; // CONFIG String fieldName = DocumentBuilder.FIELD_NAME_COLORLAYOUT; LireFeature feature = new ColorLayout(); String indexPath = "E:\\acmgc-cl-idx"; System.out.println( "Tests for feature " + fieldName + " with k=" + k + " - weighting by rank sum: " + weightByRank); System.out.println("========================================"); HashMap<String, Integer> tag2count = new HashMap<String, Integer>(k); HashMap<String, Double> tag2weight = new HashMap<String, Double>(k); int c = 9; // used for just one class ... // for (int c = 0; c < 10; c++) { String classIdentifier = classes[c]; String listFiles = "D:\\DataSets\\Yahoo-GC\\test\\" + classIdentifier + ".txt"; // INIT int[] confusion = new int[10]; Arrays.fill(confusion, 0); HashMap<String, Integer> class2id = new HashMap<String, Integer>(10); for (int i = 0; i < classes.length; i++) class2id.put(classes[i], i); BufferedReader br = new BufferedReader(new FileReader(listFiles)); String line; IndexReader ir = DirectoryReader.open(MMapDirectory.open(new File(indexPath))); // in-memory linear search // ImageSearcher bis = new GenericFastImageSearcher(k, feature.getClass(), fieldName, true, ir); // hashing based searcher BitSamplingImageSearcher bis = new BitSamplingImageSearcher(k, fieldName, fieldName + "_hash", feature, 1000); ImageSearchHits hits; int count = 0, countCorrect = 0; long ms = System.currentTimeMillis(); while ((line = br.readLine()) != null) { try { tag2count.clear(); tag2weight.clear(); hits = bis.search(ImageIO.read(new File(line)), ir); // set tag weights and counts. for (int l = 0; l < k; l++) { String tag = getTag(hits.doc(l)); if (tag2count.get(tag) == null) tag2count.put(tag, 1); else tag2count.put(tag, tag2count.get(tag) + 1); if (weightByRank) { if (tag2weight.get(tag) == null) tag2weight.put(tag, (double) l); else tag2weight.put(tag, (double) l + tag2weight.get(tag)); } else { if (tag2weight.get(tag) == null) tag2weight.put(tag, Double.valueOf(hits.score(l))); else tag2weight.put(tag, (double) l + hits.score(l)); } } // find class: int maxCount = 0, maxima = 0; String classifiedAs = null; for (Iterator<String> tagIterator = tag2count.keySet().iterator(); tagIterator.hasNext();) { String tag = tagIterator.next(); if (tag2count.get(tag) > maxCount) { maxCount = tag2count.get(tag); maxima = 1; classifiedAs = tag; } else if (tag2count.get(tag) == maxCount) { maxima++; } } // if there are two or more classes with the same number of results, then we take a look at the weights. // else the class is alread given in classifiedAs. if (maxima > 1) { double minWeight = Double.MAX_VALUE; for (Iterator<String> tagIterator = tag2count.keySet().iterator(); tagIterator.hasNext();) { String tag = tagIterator.next(); if (tag2weight.get(tag) < minWeight) { minWeight = tag2weight.get(tag); classifiedAs = tag; } } } // if (tag2.equals(tag3)) tag1 = tag2; count++; if (classifiedAs.equals(classIdentifier)) countCorrect++; // confusion: confusion[class2id.get(classifiedAs)]++; // System.out.printf("%10s (%4.3f, %10d, %4d)\n", classifiedAs, ((double) countCorrect / (double) count), count, (System.currentTimeMillis() - ms) / count); } catch (Exception e) { System.err.println(e.getMessage()); } } // System.out.println("Results for class " + classIdentifier); System.out.printf("Class\tAvg. Precision\tCount Test Images\tms per test\n"); System.out.printf("%s\t%4.5f\t%10d\t%4d\n", classIdentifier, ((double) countCorrect / (double) count), count, (System.currentTimeMillis() - ms) / count); System.out.printf("Confusion\t"); // for (int i = 0; i < classes.length; i++) { // System.out.printf("%s\t", classes[i]); // } // System.out.println(); for (int i = 0; i < classes.length; i++) { System.out.printf("%d\t", confusion[i]); } System.out.println(); // } } private String getTag(Document d) { StringBuilder ab = new StringBuilder(d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] .replace("E:\\I:\\ACM_complete_dataset\\", "")); return ab.substring(0, ab.indexOf("\\")).toString(); } public void testReUse() throws IOException, IllegalAccessException, InstantiationException { ArrayList<String> testFiles = FileUtils.getAllImages(new File("testdata/ferrari"), true); for (Class c : featureClasses) { LireFeature f1 = (LireFeature) c.newInstance(); System.out.println(c.getName()); for (String testFile : testFiles) { f1.extract(ImageIO.read(new File(testFile))); LireFeature f2 = (LireFeature) c.newInstance(); f2.extract(ImageIO.read(new File(testFile))); // System.out.println(Arrays.toString(f1.getDoubleHistogram())); // System.out.println(Arrays.toString(f2.getDoubleHistogram())); assertEquals(f2.getDistance(f1), 0d, 0.000000001); f2.setByteArrayRepresentation(f1.getByteArrayRepresentation()); assertEquals(f2.getDistance(f1), 0d, 0.000000001); byte[] tmp = new byte[1024 * 100]; Arrays.fill(tmp, (byte) 0x000F); byte[] bytes = f1.getByteArrayRepresentation(); System.arraycopy(bytes, 0, tmp, 12, bytes.length); f2.setByteArrayRepresentation(tmp, 12, bytes.length); assertEquals(f2.getDistance(f1), 0d, 0.000000001); } } } }