List of usage examples for org.apache.lucene.index IndexReader numDocs
public abstract int numDocs();
From source file:net.semanticmetadata.lire.classifiers.ClassifierTest.java
License:Open Source License
public static boolean testClassifyNCombinedFeaturesMulti(int start, int end, String storeToFile, int numberOfNeighbours, String indexLocation, String photosLocation, String testSetFile, int searchedClass, ArrayList<String> fieldsArray, ArrayList<String> classArray, int combineNfeatures, String class1, String class2, double informationGainThreshold, String useIndex) throws IOException, NoSuchFieldException, IllegalAccessException, ClassNotFoundException, InstantiationException { //numer of features and how much should be combined int feats = fieldsArray.size(); int combs = combineNfeatures; PrintWriter print_line = new PrintWriter(new BufferedWriter(new FileWriter(storeToFile))); //all the combinations stored here ArrayList combinations = print_nCr(feats, combs); // String[] fieldsArray = {"CEDD", "EdgeHistogram", "FCTH", "ColorLayout", "PHOG", "JCD", "Gabor", "JpegCoeffs", "Tamura", "Luminance_Layout", "Opponent_Histogram", "ScalableColor"}; // String[] classArray = {"CEDD", "EdgeHistogram", "FCTH", "ColorLayout", "PHOG", "JCD", "Gabor", "JpegCoefficientHistogram", "Tamura", "LuminanceLayout", "OpponentHistogram", "ScalableColor"}; //get the features for the column names String sCombinedFeatures = ""; for (int i = 0; i < 12; i++) { sCombinedFeatures = sCombinedFeatures + "Feature" + i + 1 + ";"; }/*from w ww . jav a2 s.c om*/ print_line.print(sCombinedFeatures + "K=;IGTH;Weight Rank=;Class;Precision;Recall;True Negative Rate;Accuracy;False Positive Rate;F-Measure;Count Test Images;Count Correct;ms per test;TP;FP;TN;FN"); print_line.println(); print_line.flush(); ArrayList<String> fields1List = new ArrayList<String>(); ArrayList<String> class1List = new ArrayList<String>(); for (int i = 0; i < combinations.size(); i += combs) { for (int j = 0; j < combs; j++) { // System.out.print(combinations.get(i + j).toString() + " "); int x = (Integer) combinations.get(i + j) - 1; fields1List.add(fieldsArray.get(x)); class1List.add(classArray.get(x)); } } for (int i = 0; i < combinations.size(); i += combs) { // System.out.println(i); ArrayList featureNameList = new ArrayList(); ArrayList lireFeatureList = new ArrayList(); ArrayList indexLocationList = new ArrayList(); //iterate over all fields lists and fill it in a array for (int j = 0; j < combs; j++) { // System.out.print(combinations.get(i + j).toString() + " "); featureNameList.add((String) DocumentBuilder.class .getField("FIELD_NAME_" + fields1List.get(i + j).toUpperCase()).get(null)); lireFeatureList.add((LireFeature) Class .forName("net.semanticmetadata.lire.imageanalysis." + class1List.get(i + j)).newInstance()); indexLocationList.add(indexLocation + class1List.get(i + j)); } boolean weightByRank = true; boolean createHTML = true; // String[] classes = {"yes", "no"}; String[] classes = { class1, class2 }; int k = numberOfNeighbours; //System.out.println("Tests for lf1 " + f1 + " with k=" + k + " combined with " + f2 + " - weighting by rank sum: " + weightByRank); //System.out.println("========================================"); HashMap<String, Integer> tag2count = new HashMap<String, Integer>(k); HashMap<String, Double> tag2weight = new HashMap<String, Double>(k); int c = 0; // used for just one class ... // for (int c = 0; c < 10; c++) { c = searchedClass; String classIdentifier = classes[c]; //"D:\\Datasets\\FashionTest\\fashion10000Test\\" + classIdentifier + ".txt"; // INIT ArrayList<String> classesHTML = new ArrayList<String>(); ArrayList<String> filesHTML = new ArrayList<String>(); int[] confusion = new int[2]; Arrays.fill(confusion, 0); HashMap<String, Integer> class2id = new HashMap<String, Integer>(2); for (int d = 0; d < classes.length; d++) class2id.put(classes[d], d); // BufferedReader br = new BufferedReader(new FileReader(testSetFile)); // String line; IndexReader irt1 = null; IndexReader irt2 = null; IndexReader irt3 = null; IndexReader irt4 = null; IndexReader irt5 = null; IndexReader irt6 = null; IndexReader irt7 = null; IndexReader irt8 = null; IndexReader irt9 = null; IndexReader irt10 = null; IndexReader irt11 = null; IndexReader irt12 = null; IndexReader ir2 = null; ImageSearcher bis2 = null; IndexReader ir3 = null; ImageSearcher bis3 = null; IndexReader ir4 = null; ImageSearcher bis4 = null; IndexReader ir5 = null; ImageSearcher bis5 = null; IndexReader ir6 = null; ImageSearcher bis6 = null; IndexReader ir7 = null; ImageSearcher bis7 = null; IndexReader ir8 = null; ImageSearcher bis8 = null; IndexReader ir9 = null; ImageSearcher bis9 = null; IndexReader ir10 = null; ImageSearcher bis10 = null; IndexReader ir11 = null; ImageSearcher bis11 = null; IndexReader ir12 = null; ImageSearcher bis12 = null; /* IndexReader ir2 = null; BitSamplingImageSearcher bis2 = null; IndexReader ir3 = null; BitSamplingImageSearcher bis3 = null; IndexReader ir4 = null; BitSamplingImageSearcher bis4 = null; IndexReader ir5 = null; BitSamplingImageSearcher bis5 = null; IndexReader ir6 = null; BitSamplingImageSearcher bis6 = null; IndexReader ir7 = null; BitSamplingImageSearcher bis7 = null; IndexReader ir8 = null; BitSamplingImageSearcher bis8 = null; IndexReader ir9 = null; BitSamplingImageSearcher bis9 = null; IndexReader ir10 = null; BitSamplingImageSearcher bis10 = null; IndexReader ir11 = null; BitSamplingImageSearcher bis11 = null; IndexReader ir12 = null; BitSamplingImageSearcher bis12 = null;*/ IndexReader ir1 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(0)))); irt1 = DirectoryReader .open(MMapDirectory.open(new File((String) indexLocationList.get(0) + "TestSet"))); // ImageSearcher bis1 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(0).getClass(), (String) featureNameList.get(0), true, ir1); GenericFastImageSearcher bis1 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(0).getClass(), (String) featureNameList.get(0), true, ir1); if (combs > 1) { ir2 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(1)))); irt2 = DirectoryReader .open(MMapDirectory.open(new File((String) indexLocationList.get(1) + "TestSet"))); bis2 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(1).getClass(), (String) featureNameList.get(1), true, ir2); } if (combs > 2) { ir3 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(2)))); irt3 = DirectoryReader .open(MMapDirectory.open(new File((String) indexLocationList.get(2) + "TestSet"))); bis3 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(2).getClass(), (String) featureNameList.get(2), true, ir3); } if (combs > 3) { ir4 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(3)))); irt4 = DirectoryReader .open(MMapDirectory.open(new File((String) indexLocationList.get(3) + "TestSet"))); bis4 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(3).getClass(), (String) featureNameList.get(3), true, ir4); } if (combs > 4) { ir5 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(4)))); irt5 = DirectoryReader .open(MMapDirectory.open(new File((String) indexLocationList.get(4) + "TestSet"))); bis5 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(4).getClass(), (String) featureNameList.get(4), true, ir5); } if (combs > 5) { ir6 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(5)))); irt6 = DirectoryReader .open(MMapDirectory.open(new File((String) indexLocationList.get(5) + "TestSet"))); bis6 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(5).getClass(), (String) featureNameList.get(5), true, ir6); } if (combs > 6) { ir7 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(6)))); irt7 = DirectoryReader .open(MMapDirectory.open(new File((String) indexLocationList.get(6) + "TestSet"))); bis7 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(6).getClass(), (String) featureNameList.get(6), true, ir7); } if (combs > 7) { ir8 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(7)))); irt8 = DirectoryReader .open(MMapDirectory.open(new File((String) indexLocationList.get(7) + "TestSet"))); bis8 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(7).getClass(), (String) featureNameList.get(7), true, ir8); } if (combs > 8) { ir9 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(8)))); irt9 = DirectoryReader .open(MMapDirectory.open(new File((String) indexLocationList.get(8) + "TestSet"))); bis9 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(8).getClass(), (String) featureNameList.get(8), true, ir9); } if (combs > 9) { ir10 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(9)))); irt10 = DirectoryReader .open(MMapDirectory.open(new File((String) indexLocationList.get(9) + "TestSet"))); bis10 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(9).getClass(), (String) featureNameList.get(9), true, ir10); } if (combs > 10) { ir11 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(10)))); irt11 = DirectoryReader .open(MMapDirectory.open(new File((String) indexLocationList.get(10) + "TestSet"))); bis11 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(10).getClass(), (String) featureNameList.get(10), true, ir11); } if (combs > 11) { ir12 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(11)))); irt12 = DirectoryReader .open(MMapDirectory.open(new File((String) indexLocationList.get(11) + "TestSet"))); bis12 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(11).getClass(), (String) featureNameList.get(11), true, ir12); } ImageSearchHits hits1; ImageSearchHits hits2 = null; ImageSearchHits hits3 = null; ImageSearchHits hits4 = null; ImageSearchHits hits5 = null; ImageSearchHits hits6 = null; ImageSearchHits hits7 = null; ImageSearchHits hits8 = null; ImageSearchHits hits9 = null; ImageSearchHits hits10 = null; ImageSearchHits hits11 = null; ImageSearchHits hits12 = null; int count = 0, countCorrect = 0; double countTp = 0, countFp = 0, countTn = 0, countFn = 0; //F1 Metric long ms = System.currentTimeMillis(); for (int x = 0; x < irt1.numDocs(); x++) { // while ((line = br.readLine()) != null) { // System.out.println(x); tag2count.clear(); tag2weight.clear(); // tag2count.put("yes", 1); // tag2count.put("no", 1); // tag2weight.put("yes", 1.0); // tag2weight.put("no", 1.0); tag2count.put(class1, 1); tag2count.put(class2, 1); tag2weight.put(class1, 1.0); tag2weight.put(class2, 1.0); hits1 = bis1.search(irt1.document(x), ir1); if (combs > 1) { hits2 = bis2.search(irt2.document(x), ir2); } if (combs > 2) { hits3 = bis3.search(irt3.document(x), ir3); } if (combs > 3) { hits4 = bis4.search(irt4.document(x), ir4); } if (combs > 4) { hits5 = bis5.search(irt5.document(x), ir5); } if (combs > 5) { hits6 = bis6.search(irt6.document(x), ir6); } if (combs > 6) { hits7 = bis7.search(irt7.document(x), ir7); } if (combs > 7) { hits8 = bis8.search(irt8.document(x), ir8); } if (combs > 8) { hits9 = bis9.search(irt9.document(x), ir9); } if (combs > 9) { hits10 = bis10.search(irt10.document(x), ir10); } if (combs > 10) { hits11 = bis11.search(irt11.document(x), ir11); } if (combs > 11) { hits12 = bis12.search(irt12.document(x), ir12); } // set tag weights and counts. for (int l = 0; l < k; l++) { // String tag = getTag(hits1.doc(l), photosLocation); tag2count.put(getTag(hits1.doc(l), photosLocation), tag2count.get(getTag(hits1.doc(l), photosLocation)) + 1); if (combs > 1) tag2count.put(getTag(hits2.doc(l), photosLocation), tag2count.get(getTag(hits2.doc(l), photosLocation)) + 1); if (combs > 2) tag2count.put(getTag(hits3.doc(l), photosLocation), tag2count.get(getTag(hits3.doc(l), photosLocation)) + 1); if (combs > 3) tag2count.put(getTag(hits4.doc(l), photosLocation), tag2count.get(getTag(hits4.doc(l), photosLocation)) + 1); if (combs > 4) tag2count.put(getTag(hits5.doc(l), photosLocation), tag2count.get(getTag(hits5.doc(l), photosLocation)) + 1); if (combs > 5) tag2count.put(getTag(hits6.doc(l), photosLocation), tag2count.get(getTag(hits6.doc(l), photosLocation)) + 1); if (combs > 6) tag2count.put(getTag(hits7.doc(l), photosLocation), tag2count.get(getTag(hits7.doc(l), photosLocation)) + 1); if (combs > 7) tag2count.put(getTag(hits8.doc(l), photosLocation), tag2count.get(getTag(hits8.doc(l), photosLocation)) + 1); if (combs > 8) tag2count.put(getTag(hits9.doc(l), photosLocation), tag2count.get(getTag(hits9.doc(l), photosLocation)) + 1); if (combs > 9) tag2count.put(getTag(hits10.doc(l), photosLocation), tag2count.get(getTag(hits10.doc(l), photosLocation)) + 1); if (combs > 10) tag2count.put(getTag(hits11.doc(l), photosLocation), tag2count.get(getTag(hits11.doc(l), photosLocation)) + 1); if (combs > 11) tag2count.put(getTag(hits12.doc(l), photosLocation), tag2count.get(getTag(hits12.doc(l), photosLocation)) + 1); if (weightByRank) { tag2weight.put(getTag(hits1.doc(l), photosLocation), (double) l); if (combs > 1) tag2weight.put(getTag(hits2.doc(l), photosLocation), (double) l); if (combs > 2) tag2weight.put(getTag(hits3.doc(l), photosLocation), (double) l); if (combs > 3) tag2weight.put(getTag(hits4.doc(l), photosLocation), (double) l); if (combs > 4) tag2weight.put(getTag(hits5.doc(l), photosLocation), (double) l); if (combs > 5) tag2weight.put(getTag(hits6.doc(l), photosLocation), (double) l); if (combs > 6) tag2weight.put(getTag(hits7.doc(l), photosLocation), (double) l); if (combs > 7) tag2weight.put(getTag(hits8.doc(l), photosLocation), (double) l); if (combs > 8) tag2weight.put(getTag(hits9.doc(l), photosLocation), (double) l); if (combs > 9) tag2weight.put(getTag(hits10.doc(l), photosLocation), (double) l); if (combs > 10) tag2weight.put(getTag(hits11.doc(l), photosLocation), (double) l); if (combs > 11) tag2weight.put(getTag(hits12.doc(l), photosLocation), (double) l); } // System.out.println(System.currentTimeMillis()-ms); // ms=System.currentTimeMillis(); } // find class, iterate over the tags (classes): int maxCount = 0, maxima = 0; String classifiedAs = null; for (Iterator<String> tagIterator = tag2count.keySet().iterator(); tagIterator.hasNext();) { String tag = tagIterator.next(); // System.out.println(tag+tag2count.get(tag)); if (tag2count.get(tag) > maxCount) { maxCount = tag2count.get(tag); maxima = 1; classifiedAs = tag; } else if (tag2count.get(tag) == maxCount) { maxima++; } } // if there are two or more classes with the same number of results, then we take a look at the weights. // else the class is alread given in classifiedAs. if (maxima > 1) { double minWeight = Double.MAX_VALUE; for (Iterator<String> tagIterator = tag2count.keySet().iterator(); tagIterator.hasNext();) { String tag = tagIterator.next(); if (tag2weight.get(tag) < minWeight) { minWeight = tag2weight.get(tag); classifiedAs = tag; } } } count++; //SHOW THE CLASSIFICATION // System.out.println(classifiedAs+";"+line); classesHTML.add(classifiedAs); filesHTML.add(irt1.document(x).getField("descriptorImageIdentifier").stringValue()); //F1 Metric // if (classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("yes")) { if (classifiedAs.equals(getTag(irt1.document(x), photosLocation)) && classifiedAs.equals(class1)) { countCorrect++; countTp++; // } else if (!classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("yes")) } else if (!classifiedAs.equals(getTag(irt1.document(x), photosLocation)) && classifiedAs.equals(class1)) countFp++; // if (classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("no")) { if (classifiedAs.equals(getTag(irt1.document(x), photosLocation)) && classifiedAs.equals(class2)) { countCorrect++; countTn++; // } else if (!classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("no")) } else if (!classifiedAs.equals(getTag(irt1.document(x), photosLocation)) && classifiedAs.equals(class2)) countFn++; // confusion: //confusion[class2id.get(classifiedAs)]++; // System.out.printf("%10s (%4.3f, %10d, %4d)\n", classifiedAs, ((double) countCorrect / (double) count), count, (System.currentTimeMillis() - ms) / count); } double precisicon = getPrecision(countTp, countFp); double recall = getRecall(countTp, countFn); double trueNegativeRate = getTrueNegativeRate(countTn, countFp); double accuracy = getAccuracy(countTp, countFp, countTn, countFn); double fMeasure = getFmeasure(precisicon, recall); double falsePositiveRate = getFalsePositiveRate(countFp, countTn); double mccMeasure = getMccMeasure(countTp, countFp, countTn, countFn); double wFM = getWFM(countTp, countFp, countTn, countFn, fMeasure, count); // System.out.println("Results for class " + classIdentifier); // System.out.printf("Class\tPrecision\tRecall\tTrue Negative Rate\tAccuracy\tF-Measure\tCount Test Images\tCount Corret\tms per test\n"); // System.out.printf("%s\t%4.5f\t%4.5f\t%4.5f\t%4.5f\t%4.5f\t%10d\t%10d\t%4d\n", classIdentifier, precisicon, recall, trueNegativeRate,accuracy, fMeasure, count, countCorrect, (System.currentTimeMillis() - ms) / count); // System.out.println(i + 1 + " of " + class1List.size() + " finished. " + (System.currentTimeMillis() - ms) / 1000 + " seconds per round. " + "Feature: " + " Current y: " + i); String classesLongName = ""; for (int j = 0; j < combs; j++) { // System.out.print(combinations.get(i + j).toString() + " "); classesLongName = classesLongName + fields1List.get(i + j) + ";"; } // print_line.printf("%s,%s;%s;%s;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%10d;%10d;%4d;%4.5f;%4.5f;%4.5f;%4.5f\n", classesLongName, k, weightByRank, classIdentifier, precisicon, recall, trueNegativeRate, accuracy, falsePositiveRate, fMeasure, count, countCorrect, (System.currentTimeMillis() - ms) / count, countTp, countFp, countTn, countFn); System.out.printf( "%s%s;%s;%s;%s;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%10d;%10d;%4d;%4.5f;%4.5f;%4.5f;%4.5f\n", classesLongName, k, informationGainThreshold, weightByRank, classIdentifier, precisicon, recall, trueNegativeRate, accuracy, falsePositiveRate, fMeasure, mccMeasure, wFM, count, countCorrect, (System.currentTimeMillis() - ms) / count, countTp, countFp, countTn, countFn); print_line.printf( "%s%s;%s;%s;%s;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f%4.5f;%4.5f;%4.5f;%10d;%10d;%4d;%4.5f;%4.5f;%4.5f;%4.5f\n", classesLongName, k, informationGainThreshold, weightByRank, classIdentifier, precisicon, recall, trueNegativeRate, accuracy, falsePositiveRate, fMeasure, mccMeasure, wFM, count, countCorrect, (System.currentTimeMillis() - ms) / count, countTp, countFp, countTn, countFn); print_line.flush(); //Create HTML if (createHTML == true) { String fileName = "classifieresults-" + System.currentTimeMillis() / 1000 + ".html"; BufferedWriter bw = new BufferedWriter(new FileWriter(fileName)); bw.write("<html>\n" + "<head><title>Classification Results</title></head>\n" + "<body bgcolor=\"#FFFFFF\">\n"); bw.write("<table>"); // int elems = Math.min(filesHTML.size(),50); int elems = filesHTML.size(); for (int d = 0; d < elems; d++) { if (d % 3 == 0) bw.write("<tr>"); String s = filesHTML.get(d); String colorF = "rgb(0, 255, 0)"; if (classesHTML.get(d).equals("no")) colorF = "rgb(255, 0, 0)"; // String s = ir1.document(topDocs.scoreDocs[i].doc).get("descriptorImageIdentifier"); // String s = filesHTML.get(d); // System.out.println(reader.document(topDocs.scoreDocs[i].doc).get("featLumLay")); // s = new File(s).getAbsolutePath(); // System.out.println(s); bw.write("<td><a href=\"" + s + "\"><img style=\"max-width:220px;border:medium solid " + colorF + ";\"src=\"" + s + "\" border=\"" + 5 + "\" style=\"border: 3px\n" + "black solid;\"></a></td>\n"); if (d % 3 == 2) bw.write("</tr>"); } if (elems % 3 != 0) { if (elems % 3 == 2) { bw.write("<td>-</td with exit code 0\nd>\n"); bw.write("<td>-</td>\n"); } else if (elems % 3 == 2) { bw.write("<td>-</td>\n"); } bw.write("</tr>"); } bw.write("</table></body>\n" + "</html>"); bw.close(); } // } // kfor // } } print_line.close(); return true; }
From source file:net.semanticmetadata.lire.impl.CEDDImageSearcher.java
License:Open Source License
public ImageDuplicates findDuplicates(IndexReader reader) throws IOException { // get the first document: SimpleImageDuplicates simpleImageDuplicates = null; try {//from w w w. ja va 2 s . c o m if (!IndexReader.indexExists(reader.directory())) throw new FileNotFoundException("No index found at this specific location."); Document doc = reader.document(0); CEDD lireFeature = (CEDD) descriptorClass.newInstance(); byte[] cls = doc.getBinaryValue(fieldName); if (cls != null && cls.length > 0) lireFeature.setByteArrayRepresentation(cls); HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>(); // find duplicates ... boolean hasDeletions = reader.hasDeletions(); int docs = reader.numDocs(); int numDuplicates = 0; for (int i = 0; i < docs; i++) { if (hasDeletions && reader.isDeleted(i)) { continue; } Document d = reader.document(i); float distance = getDistance(d, lireFeature); if (!duplicates.containsKey(distance)) { duplicates.put(distance, new LinkedList<String>()); } else { numDuplicates++; } duplicates.get(distance).add(d.getFieldable(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } if (numDuplicates == 0) return null; LinkedList<List<String>> results = new LinkedList<List<String>>(); for (float f : duplicates.keySet()) { if (duplicates.get(f).size() > 1) { results.add(duplicates.get(f)); } } simpleImageDuplicates = new SimpleImageDuplicates(results); } catch (InstantiationException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); } catch (IllegalAccessException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); } return simpleImageDuplicates; }
From source file:net.semanticmetadata.lire.impl.ColorLayoutImageSearcher.java
License:Open Source License
public ImageDuplicates findDuplicates(IndexReader reader) throws IOException { // get the first document: SimpleImageDuplicates simpleImageDuplicates = null; try {/*w w w . j a v a2 s .co m*/ if (!IndexReader.indexExists(reader.directory())) throw new FileNotFoundException("No index found at this specific location."); Document doc = reader.document(0); ColorLayout lireFeature = (ColorLayout) descriptorClass.newInstance(); byte[] cls = doc.getBinaryValue(fieldName); if (cls != null && cls.length > 0) lireFeature.setByteArrayRepresentation(cls); HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>(); // find duplicates ... boolean hasDeletions = reader.hasDeletions(); int docs = reader.numDocs(); int numDuplicates = 0; for (int i = 0; i < docs; i++) { if (hasDeletions && reader.isDeleted(i)) { continue; } Document d = reader.document(i); float distance = getDistance(d, lireFeature); if (!duplicates.containsKey(distance)) { duplicates.put(distance, new LinkedList<String>()); } else { numDuplicates++; } duplicates.get(distance).add(d.getFieldable(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } if (numDuplicates == 0) return null; LinkedList<List<String>> results = new LinkedList<List<String>>(); for (float f : duplicates.keySet()) { if (duplicates.get(f).size() > 1) { results.add(duplicates.get(f)); } } simpleImageDuplicates = new SimpleImageDuplicates(results); } catch (InstantiationException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); } catch (IllegalAccessException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); } return simpleImageDuplicates; }
From source file:net.semanticmetadata.lire.impl.custom.SingleNddCeddImageSearcher.java
License:Open Source License
protected void init(IndexReader reader) { this.reader = reader; if (reader.hasDeletions()) { throw new UnsupportedOperationException( "The index has to be optimized first to be cached! Use IndexWriter.forceMerge(0) to do this."); }/*from w w w . j av a 2 s . co m*/ docs = new TreeSet<SimpleResult>(); try { this.cachedInstance = (LireFeature) this.descriptorClass.newInstance(); if (fieldName == null) fieldName = this.cachedInstance.getFieldName(); } catch (InstantiationException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher (" + descriptorClass.getName() + "): " + e.getMessage()); } catch (IllegalAccessException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher (" + descriptorClass.getName() + "): " + e.getMessage()); } // put all respective features into an in-memory cache ... if (isCaching && reader != null) { int docs = reader.numDocs(); featureCache = new ArrayList<double[]>(docs); try { Document d; for (int i = 0; i < docs; i++) { d = reader.document(i); cachedInstance.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes, d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length); // normalize features,o we can use L1 if (!halfDimensions) { featureCache.add(normalize(cachedInstance.getDoubleHistogram())); } else { featureCache.add(crunch(cachedInstance.getDoubleHistogram())); } } } catch (IOException e) { e.printStackTrace(); } } }
From source file:net.semanticmetadata.lire.impl.FastOpponentImageSearcher.java
License:Open Source License
/** * @param reader/*from www .j av a2 s . c om*/ * @param lireFeature * @return the maximum distance found for normalizing. * @throws java.io.IOException */ protected double findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException { maxDistance = -1f; // clear result set ... docs.clear(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); Document d; double tmpDistance; int docs = reader.numDocs(); byte[] histogram = lireFeature.getByteArrayRepresentation(); for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. d = reader.document(i); tmpDistance = getDistance(d, histogram); assert (tmpDistance >= 0); // calculate the overall max distance to normalize score afterwards // if (overallMaxDistance < tmpDistance) { // overallMaxDistance = tmpDistance; // } // if it is the first document: if (maxDistance < 0) { maxDistance = tmpDistance; } // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult((float) tmpDistance, d, i)); if (tmpDistance > maxDistance) maxDistance = tmpDistance; } else if (tmpDistance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult((float) tmpDistance, d, i)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } return maxDistance; }
From source file:net.semanticmetadata.lire.impl.GenericFastImageSearcher.java
License:Open Source License
/** * @param reader//from w ww .ja va 2s . co m * @param lireFeature * @return the maximum distance found for normalizing. * @throws java.io.IOException */ protected float findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException { maxDistance = Float.MAX_VALUE; // overallMaxDistance = -1f; // clear result set ... docs.clear(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); Document d; float tmpDistance; int docs = reader.numDocs(); if (!isCaching) { // we read each and every document from the index and then we compare it to the query. for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. d = reader.document(i); tmpDistance = getDistance(d, lireFeature); assert (tmpDistance >= 0); // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(tmpDistance, d, i)); if (tmpDistance > maxDistance) maxDistance = tmpDistance; } else if (tmpDistance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult(tmpDistance, d, i)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } } else { // we use the in-memory cache to find the matching docs from the index. int count = 0; for (Iterator<byte[]> iterator = featureCache.iterator(); iterator.hasNext();) { cachedInstance.setByteArrayRepresentation(iterator.next()); tmpDistance = lireFeature.getDistance(cachedInstance); assert (tmpDistance >= 0); // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count)); if (tmpDistance > maxDistance) maxDistance = tmpDistance; } else if (tmpDistance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } count++; } } return maxDistance; }
From source file:net.semanticmetadata.lire.impl.GenericFastImageSearcher.java
License:Open Source License
public ImageDuplicates findDuplicates(IndexReader reader) throws IOException { // get the first document: SimpleImageDuplicates simpleImageDuplicates = null; try {/*from w w w .ja v a2 s . c o m*/ // if (!IndexReader.indexExists(reader.directory())) // throw new FileNotFoundException("No index found at this specific location."); Document doc = reader.document(0); LireFeature lireFeature = (LireFeature) descriptorClass.newInstance(); if (doc.getField(fieldName).binaryValue() != null && doc.getField(fieldName).binaryValue().length > 0) lireFeature.setByteArrayRepresentation(doc.getField(fieldName).binaryValue().bytes, doc.getField(fieldName).binaryValue().offset, doc.getField(fieldName).binaryValue().length); HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = reader.numDocs(); int numDuplicates = 0; for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document d = reader.document(i); float distance = getDistance(d, lireFeature); if (!duplicates.containsKey(distance)) { duplicates.put(distance, new LinkedList<String>()); } else { numDuplicates++; } duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } if (numDuplicates == 0) return null; LinkedList<List<String>> results = new LinkedList<List<String>>(); for (float f : duplicates.keySet()) { if (duplicates.get(f).size() > 1) { results.add(duplicates.get(f)); } } simpleImageDuplicates = new SimpleImageDuplicates(results); } catch (InstantiationException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); } catch (IllegalAccessException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); } return simpleImageDuplicates; }
From source file:net.semanticmetadata.lire.impl.GenericImageSearcher.java
License:Open Source License
/** * @param reader//from w w w . j a v a2s .c o m * @param lireFeature * @return the maximum distance found for normalizing. * @throws java.io.IOException */ protected float findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException { float maxDistance = -1f, overallMaxDistance = -1f; float tmpDistance = 0f; // clear result set ... docs.clear(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = reader.numDocs(); Document d = null; for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. d = reader.document(i); tmpDistance = getDistance(d, lireFeature); // if (distance < 0 || Float.isNaN(distance)) // System.out.println("X"); assert (tmpDistance >= 0); // calculate the overall max distance to normalize score afterwards if (overallMaxDistance < tmpDistance) { overallMaxDistance = tmpDistance; } // if it is the first document: if (maxDistance < 0) { maxDistance = tmpDistance; } // if the array is not full yet: if (this.docs.size() < maxHits) { this.docs.add(new SimpleResult(tmpDistance, d, i)); if (tmpDistance > maxDistance) maxDistance = tmpDistance; } else if (tmpDistance < maxDistance) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.docs.remove(this.docs.last()); // add the new one ... this.docs.add(new SimpleResult(tmpDistance, d, i)); // and set our new distance border ... maxDistance = this.docs.last().getDistance(); } } return maxDistance; }
From source file:net.semanticmetadata.lire.impl.GenericImageSearcher.java
License:Open Source License
public ImageDuplicates findDuplicates(IndexReader reader) throws IOException { // get the first document: SimpleImageDuplicates simpleImageDuplicates = null; try {// www .java2 s. c o m // if (!IndexReader.indexExists(reader.directory())) // throw new FileNotFoundException("No index found at this specific location."); Document doc = reader.document(0); LireFeature lireFeature = (LireFeature) descriptorClass.newInstance(); String[] cls = doc.getValues(fieldName); if (cls != null && cls.length > 0) lireFeature.setStringRepresentation(cls[0]); HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = reader.numDocs(); int numDuplicates = 0; for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document d = reader.document(i); float distance = getDistance(d, lireFeature); if (!duplicates.containsKey(distance)) { duplicates.put(distance, new LinkedList<String>()); } else { numDuplicates++; } duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue()); } if (numDuplicates == 0) return null; LinkedList<List<String>> results = new LinkedList<List<String>>(); for (float f : duplicates.keySet()) { if (duplicates.get(f).size() > 1) { results.add(duplicates.get(f)); } } simpleImageDuplicates = new SimpleImageDuplicates(results); } catch (InstantiationException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); } catch (IllegalAccessException e) { logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage()); } return simpleImageDuplicates; }
From source file:net.semanticmetadata.lire.impl.ParallelImageSearcher.java
License:Open Source License
/** * @param reader//w w w. jav a2 s . co m * @param lireFeature * @return the maximum distance found for normalizing. * @throws java.io.IOException */ @SuppressWarnings("unchecked") private float[] findSimilar(IndexReader reader, LireFeature[] lireFeature) throws IOException { float[] maxDistance = new float[lireFeature.length]; float[] overallMaxDistance = new float[lireFeature.length]; for (int i = 0; i < overallMaxDistance.length; i++) { overallMaxDistance[i] = -1f; maxDistance[i] = -1f; } parDocs = new TreeSet[lireFeature.length]; for (int i = 0; i < parDocs.length; i++) { parDocs[i] = new TreeSet<SimpleResult>(); } // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); // clear result set ... int docs = reader.numDocs(); for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document d = reader.document(i); float[] distance = getDistance(d, lireFeature); // calculate the overall max distance to normalize score afterwards for (int j = 0; j < distance.length; j++) { float f = distance[j]; if (overallMaxDistance[j] < f) { overallMaxDistance[j] = f; } // if it is the first document: if (maxDistance[j] < 0) { maxDistance[j] = f; } // if the array is not full yet: if (this.parDocs[j].size() < maxHits) { this.parDocs[j].add(new SimpleResult(f, d, i)); if (f > maxDistance[j]) { maxDistance[j] = f; } } else if (f < maxDistance[j]) { // if it is nearer to the sample than at least on of the current set: // remove the last one ... this.parDocs[j].remove(this.parDocs[j].last()); // add the new one ... this.parDocs[j].add(new SimpleResult(f, d, i)); // and set our new distance border ... maxDistance[j] = this.parDocs[j].last().getDistance(); } } } return maxDistance; }