Example usage for org.apache.lucene.index IndexReader numDocs

List of usage examples for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:net.semanticmetadata.lire.classifiers.ClassifierTest.java

License:Open Source License

public static boolean testClassifyNCombinedFeaturesMulti(int start, int end, String storeToFile,
        int numberOfNeighbours, String indexLocation, String photosLocation, String testSetFile,
        int searchedClass, ArrayList<String> fieldsArray, ArrayList<String> classArray, int combineNfeatures,
        String class1, String class2, double informationGainThreshold, String useIndex) throws IOException,
        NoSuchFieldException, IllegalAccessException, ClassNotFoundException, InstantiationException {

    //numer of features and how much should be combined
    int feats = fieldsArray.size();
    int combs = combineNfeatures;

    PrintWriter print_line = new PrintWriter(new BufferedWriter(new FileWriter(storeToFile)));

    //all the combinations stored here
    ArrayList combinations = print_nCr(feats, combs);

    //  String[] fieldsArray = {"CEDD", "EdgeHistogram", "FCTH", "ColorLayout", "PHOG", "JCD", "Gabor", "JpegCoeffs", "Tamura", "Luminance_Layout", "Opponent_Histogram", "ScalableColor"};
    //  String[] classArray = {"CEDD", "EdgeHistogram", "FCTH", "ColorLayout", "PHOG", "JCD", "Gabor", "JpegCoefficientHistogram", "Tamura", "LuminanceLayout", "OpponentHistogram", "ScalableColor"};

    //get the features for the column names
    String sCombinedFeatures = "";
    for (int i = 0; i < 12; i++) {
        sCombinedFeatures = sCombinedFeatures + "Feature" + i + 1 + ";";
    }/*from  w ww  .  jav a2 s.c om*/
    print_line.print(sCombinedFeatures
            + "K=;IGTH;Weight Rank=;Class;Precision;Recall;True Negative Rate;Accuracy;False Positive Rate;F-Measure;Count Test Images;Count Correct;ms per test;TP;FP;TN;FN");
    print_line.println();
    print_line.flush();

    ArrayList<String> fields1List = new ArrayList<String>();
    ArrayList<String> class1List = new ArrayList<String>();

    for (int i = 0; i < combinations.size(); i += combs) {
        for (int j = 0; j < combs; j++) {
            //     System.out.print(combinations.get(i + j).toString() + " ");
            int x = (Integer) combinations.get(i + j) - 1;
            fields1List.add(fieldsArray.get(x));
            class1List.add(classArray.get(x));
        }
    }

    for (int i = 0; i < combinations.size(); i += combs) {

        // System.out.println(i);

        ArrayList featureNameList = new ArrayList();
        ArrayList lireFeatureList = new ArrayList();
        ArrayList indexLocationList = new ArrayList();

        //iterate over all fields lists and fill it in a array
        for (int j = 0; j < combs; j++) {
            //   System.out.print(combinations.get(i + j).toString() + " ");
            featureNameList.add((String) DocumentBuilder.class
                    .getField("FIELD_NAME_" + fields1List.get(i + j).toUpperCase()).get(null));
            lireFeatureList.add((LireFeature) Class
                    .forName("net.semanticmetadata.lire.imageanalysis." + class1List.get(i + j)).newInstance());
            indexLocationList.add(indexLocation + class1List.get(i + j));
        }

        boolean weightByRank = true;
        boolean createHTML = true;
        //  String[] classes = {"yes", "no"};
        String[] classes = { class1, class2 };
        int k = numberOfNeighbours;

        //System.out.println("Tests for lf1 " + f1 + " with k=" + k + " combined with " + f2 + " - weighting by rank sum: " + weightByRank);
        //System.out.println("========================================");
        HashMap<String, Integer> tag2count = new HashMap<String, Integer>(k);
        HashMap<String, Double> tag2weight = new HashMap<String, Double>(k);
        int c = 0; // used for just one class ...
        //        for (int c = 0; c < 10; c++) {
        c = searchedClass;

        String classIdentifier = classes[c];

        //"D:\\Datasets\\FashionTest\\fashion10000Test\\" + classIdentifier + ".txt";

        // INIT
        ArrayList<String> classesHTML = new ArrayList<String>();
        ArrayList<String> filesHTML = new ArrayList<String>();

        int[] confusion = new int[2];
        Arrays.fill(confusion, 0);
        HashMap<String, Integer> class2id = new HashMap<String, Integer>(2);
        for (int d = 0; d < classes.length; d++)
            class2id.put(classes[d], d);

        //   BufferedReader br = new BufferedReader(new FileReader(testSetFile));
        //   String line;

        IndexReader irt1 = null;
        IndexReader irt2 = null;
        IndexReader irt3 = null;
        IndexReader irt4 = null;
        IndexReader irt5 = null;
        IndexReader irt6 = null;
        IndexReader irt7 = null;
        IndexReader irt8 = null;
        IndexReader irt9 = null;
        IndexReader irt10 = null;
        IndexReader irt11 = null;
        IndexReader irt12 = null;

        IndexReader ir2 = null;
        ImageSearcher bis2 = null;
        IndexReader ir3 = null;
        ImageSearcher bis3 = null;
        IndexReader ir4 = null;
        ImageSearcher bis4 = null;
        IndexReader ir5 = null;
        ImageSearcher bis5 = null;
        IndexReader ir6 = null;
        ImageSearcher bis6 = null;
        IndexReader ir7 = null;
        ImageSearcher bis7 = null;
        IndexReader ir8 = null;
        ImageSearcher bis8 = null;
        IndexReader ir9 = null;
        ImageSearcher bis9 = null;
        IndexReader ir10 = null;
        ImageSearcher bis10 = null;
        IndexReader ir11 = null;
        ImageSearcher bis11 = null;
        IndexReader ir12 = null;
        ImageSearcher bis12 = null;

        /*        IndexReader ir2 = null;
                BitSamplingImageSearcher bis2 = null;
                IndexReader ir3 = null;
                BitSamplingImageSearcher bis3 = null;
                IndexReader ir4 = null;
                BitSamplingImageSearcher bis4 = null;
                IndexReader ir5 = null;
                BitSamplingImageSearcher bis5 = null;
                IndexReader ir6 = null;
                BitSamplingImageSearcher bis6 = null;
                IndexReader ir7 = null;
                BitSamplingImageSearcher bis7 = null;
                IndexReader ir8 = null;
                BitSamplingImageSearcher bis8 = null;
                IndexReader ir9 = null;
                BitSamplingImageSearcher bis9 = null;
                IndexReader ir10 = null;
                BitSamplingImageSearcher bis10 = null;
                IndexReader ir11 = null;
                BitSamplingImageSearcher bis11 = null;
                IndexReader ir12 = null;
                BitSamplingImageSearcher bis12 = null;*/

        IndexReader ir1 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(0))));
        irt1 = DirectoryReader
                .open(MMapDirectory.open(new File((String) indexLocationList.get(0) + "TestSet")));
        //  ImageSearcher bis1 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(0).getClass(), (String) featureNameList.get(0), true, ir1);
        GenericFastImageSearcher bis1 = new GenericFastImageSearcher(k,
                (Class<?>) lireFeatureList.get(0).getClass(), (String) featureNameList.get(0), true, ir1);
        if (combs > 1) {
            ir2 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(1))));
            irt2 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(1) + "TestSet")));
            bis2 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(1).getClass(),
                    (String) featureNameList.get(1), true, ir2);
        }
        if (combs > 2) {
            ir3 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(2))));
            irt3 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(2) + "TestSet")));
            bis3 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(2).getClass(),
                    (String) featureNameList.get(2), true, ir3);
        }
        if (combs > 3) {
            ir4 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(3))));
            irt4 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(3) + "TestSet")));
            bis4 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(3).getClass(),
                    (String) featureNameList.get(3), true, ir4);
        }
        if (combs > 4) {
            ir5 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(4))));
            irt5 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(4) + "TestSet")));
            bis5 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(4).getClass(),
                    (String) featureNameList.get(4), true, ir5);
        }
        if (combs > 5) {
            ir6 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(5))));
            irt6 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(5) + "TestSet")));
            bis6 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(5).getClass(),
                    (String) featureNameList.get(5), true, ir6);
        }
        if (combs > 6) {
            ir7 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(6))));
            irt7 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(6) + "TestSet")));
            bis7 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(6).getClass(),
                    (String) featureNameList.get(6), true, ir7);
        }
        if (combs > 7) {
            ir8 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(7))));
            irt8 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(7) + "TestSet")));
            bis8 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(7).getClass(),
                    (String) featureNameList.get(7), true, ir8);
        }
        if (combs > 8) {
            ir9 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(8))));
            irt9 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(8) + "TestSet")));
            bis9 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(8).getClass(),
                    (String) featureNameList.get(8), true, ir9);
        }
        if (combs > 9) {
            ir10 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(9))));
            irt10 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(9) + "TestSet")));
            bis10 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(9).getClass(),
                    (String) featureNameList.get(9), true, ir10);
        }
        if (combs > 10) {
            ir11 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(10))));
            irt11 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(10) + "TestSet")));
            bis11 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(10).getClass(),
                    (String) featureNameList.get(10), true, ir11);
        }
        if (combs > 11) {
            ir12 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(11))));
            irt12 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(11) + "TestSet")));
            bis12 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(11).getClass(),
                    (String) featureNameList.get(11), true, ir12);
        }

        ImageSearchHits hits1;
        ImageSearchHits hits2 = null;
        ImageSearchHits hits3 = null;
        ImageSearchHits hits4 = null;
        ImageSearchHits hits5 = null;
        ImageSearchHits hits6 = null;
        ImageSearchHits hits7 = null;
        ImageSearchHits hits8 = null;
        ImageSearchHits hits9 = null;
        ImageSearchHits hits10 = null;
        ImageSearchHits hits11 = null;
        ImageSearchHits hits12 = null;

        int count = 0, countCorrect = 0;
        double countTp = 0, countFp = 0, countTn = 0, countFn = 0; //F1 Metric
        long ms = System.currentTimeMillis();
        for (int x = 0; x < irt1.numDocs(); x++) {

            //  while ((line = br.readLine()) != null) {

            // System.out.println(x);

            tag2count.clear();
            tag2weight.clear();
            //  tag2count.put("yes", 1);
            //  tag2count.put("no", 1);
            //  tag2weight.put("yes", 1.0);
            //  tag2weight.put("no", 1.0);

            tag2count.put(class1, 1);
            tag2count.put(class2, 1);
            tag2weight.put(class1, 1.0);
            tag2weight.put(class2, 1.0);

            hits1 = bis1.search(irt1.document(x), ir1);
            if (combs > 1) {
                hits2 = bis2.search(irt2.document(x), ir2);
            }
            if (combs > 2) {
                hits3 = bis3.search(irt3.document(x), ir3);
            }
            if (combs > 3) {
                hits4 = bis4.search(irt4.document(x), ir4);
            }
            if (combs > 4) {
                hits5 = bis5.search(irt5.document(x), ir5);
            }
            if (combs > 5) {
                hits6 = bis6.search(irt6.document(x), ir6);
            }
            if (combs > 6) {
                hits7 = bis7.search(irt7.document(x), ir7);
            }
            if (combs > 7) {
                hits8 = bis8.search(irt8.document(x), ir8);
            }
            if (combs > 8) {
                hits9 = bis9.search(irt9.document(x), ir9);
            }
            if (combs > 9) {
                hits10 = bis10.search(irt10.document(x), ir10);
            }
            if (combs > 10) {
                hits11 = bis11.search(irt11.document(x), ir11);
            }
            if (combs > 11) {
                hits12 = bis12.search(irt12.document(x), ir12);
            }

            // set tag weights and counts.
            for (int l = 0; l < k; l++) {

                //  String tag = getTag(hits1.doc(l), photosLocation);

                tag2count.put(getTag(hits1.doc(l), photosLocation),
                        tag2count.get(getTag(hits1.doc(l), photosLocation)) + 1);
                if (combs > 1)
                    tag2count.put(getTag(hits2.doc(l), photosLocation),
                            tag2count.get(getTag(hits2.doc(l), photosLocation)) + 1);
                if (combs > 2)
                    tag2count.put(getTag(hits3.doc(l), photosLocation),
                            tag2count.get(getTag(hits3.doc(l), photosLocation)) + 1);
                if (combs > 3)
                    tag2count.put(getTag(hits4.doc(l), photosLocation),
                            tag2count.get(getTag(hits4.doc(l), photosLocation)) + 1);
                if (combs > 4)
                    tag2count.put(getTag(hits5.doc(l), photosLocation),
                            tag2count.get(getTag(hits5.doc(l), photosLocation)) + 1);
                if (combs > 5)
                    tag2count.put(getTag(hits6.doc(l), photosLocation),
                            tag2count.get(getTag(hits6.doc(l), photosLocation)) + 1);
                if (combs > 6)
                    tag2count.put(getTag(hits7.doc(l), photosLocation),
                            tag2count.get(getTag(hits7.doc(l), photosLocation)) + 1);
                if (combs > 7)
                    tag2count.put(getTag(hits8.doc(l), photosLocation),
                            tag2count.get(getTag(hits8.doc(l), photosLocation)) + 1);
                if (combs > 8)
                    tag2count.put(getTag(hits9.doc(l), photosLocation),
                            tag2count.get(getTag(hits9.doc(l), photosLocation)) + 1);
                if (combs > 9)
                    tag2count.put(getTag(hits10.doc(l), photosLocation),
                            tag2count.get(getTag(hits10.doc(l), photosLocation)) + 1);
                if (combs > 10)
                    tag2count.put(getTag(hits11.doc(l), photosLocation),
                            tag2count.get(getTag(hits11.doc(l), photosLocation)) + 1);
                if (combs > 11)
                    tag2count.put(getTag(hits12.doc(l), photosLocation),
                            tag2count.get(getTag(hits12.doc(l), photosLocation)) + 1);

                if (weightByRank) {
                    tag2weight.put(getTag(hits1.doc(l), photosLocation), (double) l);
                    if (combs > 1)
                        tag2weight.put(getTag(hits2.doc(l), photosLocation), (double) l);
                    if (combs > 2)
                        tag2weight.put(getTag(hits3.doc(l), photosLocation), (double) l);
                    if (combs > 3)
                        tag2weight.put(getTag(hits4.doc(l), photosLocation), (double) l);
                    if (combs > 4)
                        tag2weight.put(getTag(hits5.doc(l), photosLocation), (double) l);
                    if (combs > 5)
                        tag2weight.put(getTag(hits6.doc(l), photosLocation), (double) l);
                    if (combs > 6)
                        tag2weight.put(getTag(hits7.doc(l), photosLocation), (double) l);
                    if (combs > 7)
                        tag2weight.put(getTag(hits8.doc(l), photosLocation), (double) l);
                    if (combs > 8)
                        tag2weight.put(getTag(hits9.doc(l), photosLocation), (double) l);
                    if (combs > 9)
                        tag2weight.put(getTag(hits10.doc(l), photosLocation), (double) l);
                    if (combs > 10)
                        tag2weight.put(getTag(hits11.doc(l), photosLocation), (double) l);
                    if (combs > 11)
                        tag2weight.put(getTag(hits12.doc(l), photosLocation), (double) l);
                }
                //  System.out.println(System.currentTimeMillis()-ms);
                //  ms=System.currentTimeMillis();
            }
            // find class, iterate over the tags (classes):
            int maxCount = 0, maxima = 0;
            String classifiedAs = null;
            for (Iterator<String> tagIterator = tag2count.keySet().iterator(); tagIterator.hasNext();) {
                String tag = tagIterator.next();
                //  System.out.println(tag+tag2count.get(tag));
                if (tag2count.get(tag) > maxCount) {
                    maxCount = tag2count.get(tag);
                    maxima = 1;
                    classifiedAs = tag;

                } else if (tag2count.get(tag) == maxCount) {
                    maxima++;
                }
            }
            // if there are two or more classes with the same number of results, then we take a look at the weights.
            // else the class is alread given in classifiedAs.
            if (maxima > 1) {
                double minWeight = Double.MAX_VALUE;
                for (Iterator<String> tagIterator = tag2count.keySet().iterator(); tagIterator.hasNext();) {
                    String tag = tagIterator.next();
                    if (tag2weight.get(tag) < minWeight) {
                        minWeight = tag2weight.get(tag);
                        classifiedAs = tag;
                    }
                }
            }

            count++;
            //SHOW THE CLASSIFICATION
            //     System.out.println(classifiedAs+";"+line);
            classesHTML.add(classifiedAs);
            filesHTML.add(irt1.document(x).getField("descriptorImageIdentifier").stringValue());

            //F1 Metric
            //     if (classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("yes")) {
            if (classifiedAs.equals(getTag(irt1.document(x), photosLocation)) && classifiedAs.equals(class1)) {
                countCorrect++;
                countTp++;
                //    } else if (!classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("yes"))
            } else if (!classifiedAs.equals(getTag(irt1.document(x), photosLocation))
                    && classifiedAs.equals(class1))
                countFp++;

            //    if (classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("no")) {
            if (classifiedAs.equals(getTag(irt1.document(x), photosLocation)) && classifiedAs.equals(class2)) {
                countCorrect++;
                countTn++;
                //     } else if (!classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("no"))
            } else if (!classifiedAs.equals(getTag(irt1.document(x), photosLocation))
                    && classifiedAs.equals(class2))
                countFn++;

            // confusion:
            //confusion[class2id.get(classifiedAs)]++;
            //                    System.out.printf("%10s (%4.3f, %10d, %4d)\n", classifiedAs, ((double) countCorrect / (double) count), count, (System.currentTimeMillis() - ms) / count);

        }

        double precisicon = getPrecision(countTp, countFp);
        double recall = getRecall(countTp, countFn);
        double trueNegativeRate = getTrueNegativeRate(countTn, countFp);
        double accuracy = getAccuracy(countTp, countFp, countTn, countFn);
        double fMeasure = getFmeasure(precisicon, recall);
        double falsePositiveRate = getFalsePositiveRate(countFp, countTn);
        double mccMeasure = getMccMeasure(countTp, countFp, countTn, countFn);
        double wFM = getWFM(countTp, countFp, countTn, countFn, fMeasure, count);
        // System.out.println("Results for class " + classIdentifier);
        // System.out.printf("Class\tPrecision\tRecall\tTrue Negative Rate\tAccuracy\tF-Measure\tCount Test Images\tCount Corret\tms per test\n");
        // System.out.printf("%s\t%4.5f\t%4.5f\t%4.5f\t%4.5f\t%4.5f\t%10d\t%10d\t%4d\n", classIdentifier, precisicon, recall, trueNegativeRate,accuracy, fMeasure,  count, countCorrect, (System.currentTimeMillis() - ms) / count);

        //   System.out.println(i + 1 + " of " + class1List.size() + " finished. " + (System.currentTimeMillis() - ms) / 1000 + " seconds per round. " + "Feature: " + " Current y: " + i);

        String classesLongName = "";

        for (int j = 0; j < combs; j++) {
            //   System.out.print(combinations.get(i + j).toString() + " ");
            classesLongName = classesLongName + fields1List.get(i + j) + ";";
        }

        //   print_line.printf("%s,%s;%s;%s;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%10d;%10d;%4d;%4.5f;%4.5f;%4.5f;%4.5f\n", classesLongName, k, weightByRank, classIdentifier, precisicon, recall, trueNegativeRate, accuracy, falsePositiveRate, fMeasure, count, countCorrect, (System.currentTimeMillis() - ms) / count, countTp, countFp, countTn, countFn);
        System.out.printf(
                "%s%s;%s;%s;%s;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%10d;%10d;%4d;%4.5f;%4.5f;%4.5f;%4.5f\n",
                classesLongName, k, informationGainThreshold, weightByRank, classIdentifier, precisicon, recall,
                trueNegativeRate, accuracy, falsePositiveRate, fMeasure, mccMeasure, wFM, count, countCorrect,
                (System.currentTimeMillis() - ms) / count, countTp, countFp, countTn, countFn);
        print_line.printf(
                "%s%s;%s;%s;%s;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f%4.5f;%4.5f;%4.5f;%10d;%10d;%4d;%4.5f;%4.5f;%4.5f;%4.5f\n",
                classesLongName, k, informationGainThreshold, weightByRank, classIdentifier, precisicon, recall,
                trueNegativeRate, accuracy, falsePositiveRate, fMeasure, mccMeasure, wFM, count, countCorrect,
                (System.currentTimeMillis() - ms) / count, countTp, countFp, countTn, countFn);
        print_line.flush();

        //Create HTML
        if (createHTML == true) {

            String fileName = "classifieresults-" + System.currentTimeMillis() / 1000 + ".html";
            BufferedWriter bw = new BufferedWriter(new FileWriter(fileName));
            bw.write("<html>\n" + "<head><title>Classification Results</title></head>\n"
                    + "<body bgcolor=\"#FFFFFF\">\n");
            bw.write("<table>");

            // int elems = Math.min(filesHTML.size(),50);
            int elems = filesHTML.size();

            for (int d = 0; d < elems; d++) {
                if (d % 3 == 0)
                    bw.write("<tr>");

                String s = filesHTML.get(d);
                String colorF = "rgb(0, 255, 0)";

                if (classesHTML.get(d).equals("no"))
                    colorF = "rgb(255, 0, 0)";
                // String s = ir1.document(topDocs.scoreDocs[i].doc).get("descriptorImageIdentifier");
                // String s = filesHTML.get(d);
                //  System.out.println(reader.document(topDocs.scoreDocs[i].doc).get("featLumLay"));
                //  s = new File(s).getAbsolutePath();
                // System.out.println(s);
                bw.write("<td><a href=\"" + s + "\"><img style=\"max-width:220px;border:medium solid " + colorF
                        + ";\"src=\"" + s + "\" border=\"" + 5 + "\" style=\"border: 3px\n"
                        + "black solid;\"></a></td>\n");
                if (d % 3 == 2)
                    bw.write("</tr>");
            }
            if (elems % 3 != 0) {
                if (elems % 3 == 2) {
                    bw.write("<td>-</td with exit code 0\nd>\n");
                    bw.write("<td>-</td>\n");
                } else if (elems % 3 == 2) {
                    bw.write("<td>-</td>\n");
                }
                bw.write("</tr>");
            }

            bw.write("</table></body>\n" + "</html>");
            bw.close();
        }
        //   } // kfor
        //        }
    }
    print_line.close();
    return true;
}

From source file:net.semanticmetadata.lire.impl.CEDDImageSearcher.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    SimpleImageDuplicates simpleImageDuplicates = null;
    try {//from w w  w. ja  va  2 s .  c  o m
        if (!IndexReader.indexExists(reader.directory()))
            throw new FileNotFoundException("No index found at this specific location.");
        Document doc = reader.document(0);

        CEDD lireFeature = (CEDD) descriptorClass.newInstance();
        byte[] cls = doc.getBinaryValue(fieldName);
        if (cls != null && cls.length > 0)
            lireFeature.setByteArrayRepresentation(cls);

        HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

        // find duplicates ...
        boolean hasDeletions = reader.hasDeletions();

        int docs = reader.numDocs();
        int numDuplicates = 0;
        for (int i = 0; i < docs; i++) {
            if (hasDeletions && reader.isDeleted(i)) {
                continue;
            }
            Document d = reader.document(i);
            float distance = getDistance(d, lireFeature);

            if (!duplicates.containsKey(distance)) {
                duplicates.put(distance, new LinkedList<String>());
            } else {
                numDuplicates++;
            }
            duplicates.get(distance).add(d.getFieldable(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
        }

        if (numDuplicates == 0)
            return null;

        LinkedList<List<String>> results = new LinkedList<List<String>>();
        for (float f : duplicates.keySet()) {
            if (duplicates.get(f).size() > 1) {
                results.add(duplicates.get(f));
            }
        }
        simpleImageDuplicates = new SimpleImageDuplicates(results);
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    }
    return simpleImageDuplicates;

}

From source file:net.semanticmetadata.lire.impl.ColorLayoutImageSearcher.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    SimpleImageDuplicates simpleImageDuplicates = null;
    try {/*w  w w  .  j a  v a2 s  .co  m*/
        if (!IndexReader.indexExists(reader.directory()))
            throw new FileNotFoundException("No index found at this specific location.");
        Document doc = reader.document(0);

        ColorLayout lireFeature = (ColorLayout) descriptorClass.newInstance();
        byte[] cls = doc.getBinaryValue(fieldName);
        if (cls != null && cls.length > 0)
            lireFeature.setByteArrayRepresentation(cls);

        HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

        // find duplicates ...
        boolean hasDeletions = reader.hasDeletions();

        int docs = reader.numDocs();
        int numDuplicates = 0;
        for (int i = 0; i < docs; i++) {
            if (hasDeletions && reader.isDeleted(i)) {
                continue;
            }
            Document d = reader.document(i);
            float distance = getDistance(d, lireFeature);

            if (!duplicates.containsKey(distance)) {
                duplicates.put(distance, new LinkedList<String>());
            } else {
                numDuplicates++;
            }
            duplicates.get(distance).add(d.getFieldable(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
        }

        if (numDuplicates == 0)
            return null;

        LinkedList<List<String>> results = new LinkedList<List<String>>();
        for (float f : duplicates.keySet()) {
            if (duplicates.get(f).size() > 1) {
                results.add(duplicates.get(f));
            }
        }
        simpleImageDuplicates = new SimpleImageDuplicates(results);
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    }
    return simpleImageDuplicates;

}

From source file:net.semanticmetadata.lire.impl.custom.SingleNddCeddImageSearcher.java

License:Open Source License

protected void init(IndexReader reader) {
    this.reader = reader;
    if (reader.hasDeletions()) {
        throw new UnsupportedOperationException(
                "The index has to be optimized first to be cached! Use IndexWriter.forceMerge(0) to do this.");
    }/*from  w  w  w .  j av a  2  s  . co  m*/
    docs = new TreeSet<SimpleResult>();
    try {
        this.cachedInstance = (LireFeature) this.descriptorClass.newInstance();
        if (fieldName == null)
            fieldName = this.cachedInstance.getFieldName();
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher ("
                + descriptorClass.getName() + "): " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher ("
                + descriptorClass.getName() + "): " + e.getMessage());
    }
    // put all respective features into an in-memory cache ...
    if (isCaching && reader != null) {
        int docs = reader.numDocs();
        featureCache = new ArrayList<double[]>(docs);
        try {
            Document d;
            for (int i = 0; i < docs; i++) {
                d = reader.document(i);
                cachedInstance.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes,
                        d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length);
                // normalize features,o we can use L1
                if (!halfDimensions) {
                    featureCache.add(normalize(cachedInstance.getDoubleHistogram()));
                } else {
                    featureCache.add(crunch(cachedInstance.getDoubleHistogram()));
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:net.semanticmetadata.lire.impl.FastOpponentImageSearcher.java

License:Open Source License

/**
 * @param reader/*from  www  .j  av a2 s  .  c  om*/
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected double findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    maxDistance = -1f;
    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Document d;
    double tmpDistance;
    int docs = reader.numDocs();
    byte[] histogram = lireFeature.getByteArrayRepresentation();
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        d = reader.document(i);
        tmpDistance = getDistance(d, histogram);
        assert (tmpDistance >= 0);
        // calculate the overall max distance to normalize score afterwards
        //            if (overallMaxDistance < tmpDistance) {
        //                overallMaxDistance = tmpDistance;
        //            }
        // if it is the first document:
        if (maxDistance < 0) {
            maxDistance = tmpDistance;
        }
        // if the array is not full yet:
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult((float) tmpDistance, d, i));
            if (tmpDistance > maxDistance)
                maxDistance = tmpDistance;
        } else if (tmpDistance < maxDistance) {
            // if it is nearer to the sample than at least on of the current set:
            // remove the last one ...
            this.docs.remove(this.docs.last());
            // add the new one ...
            this.docs.add(new SimpleResult((float) tmpDistance, d, i));
            // and set our new distance border ...
            maxDistance = this.docs.last().getDistance();
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.GenericFastImageSearcher.java

License:Open Source License

/**
 * @param reader//from   w ww  .ja  va  2s .  co  m
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected float findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    maxDistance = Float.MAX_VALUE;
    //        overallMaxDistance = -1f;

    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Document d;
    float tmpDistance;
    int docs = reader.numDocs();
    if (!isCaching) {
        // we read each and every document from the index and then we compare it to the query.
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.

            d = reader.document(i);
            tmpDistance = getDistance(d, lireFeature);
            assert (tmpDistance >= 0);
            // if the array is not full yet:
            if (this.docs.size() < maxHits) {
                this.docs.add(new SimpleResult(tmpDistance, d, i));
                if (tmpDistance > maxDistance)
                    maxDistance = tmpDistance;
            } else if (tmpDistance < maxDistance) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.docs.remove(this.docs.last());
                // add the new one ...
                this.docs.add(new SimpleResult(tmpDistance, d, i));
                // and set our new distance border ...
                maxDistance = this.docs.last().getDistance();
            }
        }
    } else {
        // we use the in-memory cache to find the matching docs from the index.
        int count = 0;
        for (Iterator<byte[]> iterator = featureCache.iterator(); iterator.hasNext();) {
            cachedInstance.setByteArrayRepresentation(iterator.next());
            tmpDistance = lireFeature.getDistance(cachedInstance);
            assert (tmpDistance >= 0);
            // if the array is not full yet:
            if (this.docs.size() < maxHits) {
                this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count));
                if (tmpDistance > maxDistance)
                    maxDistance = tmpDistance;
            } else if (tmpDistance < maxDistance) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.docs.remove(this.docs.last());
                // add the new one ...
                this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count));
                // and set our new distance border ...
                maxDistance = this.docs.last().getDistance();
            }
            count++;
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.GenericFastImageSearcher.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    SimpleImageDuplicates simpleImageDuplicates = null;
    try {/*from  w w  w  .ja v a2  s .  c o m*/
        //            if (!IndexReader.indexExists(reader.directory()))
        //                throw new FileNotFoundException("No index found at this specific location.");
        Document doc = reader.document(0);

        LireFeature lireFeature = (LireFeature) descriptorClass.newInstance();
        if (doc.getField(fieldName).binaryValue() != null && doc.getField(fieldName).binaryValue().length > 0)
            lireFeature.setByteArrayRepresentation(doc.getField(fieldName).binaryValue().bytes,
                    doc.getField(fieldName).binaryValue().offset, doc.getField(fieldName).binaryValue().length);

        HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

        // Needed for check whether the document is deleted.
        Bits liveDocs = MultiFields.getLiveDocs(reader);

        int docs = reader.numDocs();
        int numDuplicates = 0;
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.

            Document d = reader.document(i);
            float distance = getDistance(d, lireFeature);

            if (!duplicates.containsKey(distance)) {
                duplicates.put(distance, new LinkedList<String>());
            } else {
                numDuplicates++;
            }
            duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
        }

        if (numDuplicates == 0)
            return null;

        LinkedList<List<String>> results = new LinkedList<List<String>>();
        for (float f : duplicates.keySet()) {
            if (duplicates.get(f).size() > 1) {
                results.add(duplicates.get(f));
            }
        }
        simpleImageDuplicates = new SimpleImageDuplicates(results);
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    }
    return simpleImageDuplicates;

}

From source file:net.semanticmetadata.lire.impl.GenericImageSearcher.java

License:Open Source License

/**
 * @param reader//from  w w w  . j  a  v  a2s  .c o  m
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected float findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    float maxDistance = -1f, overallMaxDistance = -1f;
    float tmpDistance = 0f;
    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = reader.numDocs();
    Document d = null;
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        d = reader.document(i);
        tmpDistance = getDistance(d, lireFeature);
        //            if (distance < 0 || Float.isNaN(distance))
        //                System.out.println("X");
        assert (tmpDistance >= 0);
        // calculate the overall max distance to normalize score afterwards
        if (overallMaxDistance < tmpDistance) {
            overallMaxDistance = tmpDistance;
        }
        // if it is the first document:
        if (maxDistance < 0) {
            maxDistance = tmpDistance;
        }
        // if the array is not full yet:
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult(tmpDistance, d, i));
            if (tmpDistance > maxDistance)
                maxDistance = tmpDistance;
        } else if (tmpDistance < maxDistance) {
            // if it is nearer to the sample than at least on of the current set:
            // remove the last one ...
            this.docs.remove(this.docs.last());
            // add the new one ...
            this.docs.add(new SimpleResult(tmpDistance, d, i));
            // and set our new distance border ...
            maxDistance = this.docs.last().getDistance();
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.GenericImageSearcher.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    SimpleImageDuplicates simpleImageDuplicates = null;
    try {//  www  .java2 s.  c o m
        //            if (!IndexReader.indexExists(reader.directory()))
        //                throw new FileNotFoundException("No index found at this specific location.");
        Document doc = reader.document(0);

        LireFeature lireFeature = (LireFeature) descriptorClass.newInstance();
        String[] cls = doc.getValues(fieldName);
        if (cls != null && cls.length > 0)
            lireFeature.setStringRepresentation(cls[0]);

        HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

        // Needed for check whether the document is deleted.
        Bits liveDocs = MultiFields.getLiveDocs(reader);

        int docs = reader.numDocs();
        int numDuplicates = 0;
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.
            Document d = reader.document(i);
            float distance = getDistance(d, lireFeature);

            if (!duplicates.containsKey(distance)) {
                duplicates.put(distance, new LinkedList<String>());
            } else {
                numDuplicates++;
            }
            duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
        }

        if (numDuplicates == 0)
            return null;

        LinkedList<List<String>> results = new LinkedList<List<String>>();
        for (float f : duplicates.keySet()) {
            if (duplicates.get(f).size() > 1) {
                results.add(duplicates.get(f));
            }
        }
        simpleImageDuplicates = new SimpleImageDuplicates(results);
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    }
    return simpleImageDuplicates;

}

From source file:net.semanticmetadata.lire.impl.ParallelImageSearcher.java

License:Open Source License

/**
 * @param reader//w  w  w. jav  a2 s .  co m
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
@SuppressWarnings("unchecked")
private float[] findSimilar(IndexReader reader, LireFeature[] lireFeature) throws IOException {
    float[] maxDistance = new float[lireFeature.length];
    float[] overallMaxDistance = new float[lireFeature.length];

    for (int i = 0; i < overallMaxDistance.length; i++) {
        overallMaxDistance[i] = -1f;
        maxDistance[i] = -1f;
    }

    parDocs = new TreeSet[lireFeature.length];
    for (int i = 0; i < parDocs.length; i++) {
        parDocs[i] = new TreeSet<SimpleResult>();
    }

    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    // clear result set ...

    int docs = reader.numDocs();
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        Document d = reader.document(i);
        float[] distance = getDistance(d, lireFeature);
        // calculate the overall max distance to normalize score afterwards
        for (int j = 0; j < distance.length; j++) {
            float f = distance[j];
            if (overallMaxDistance[j] < f) {
                overallMaxDistance[j] = f;
            }
            // if it is the first document:
            if (maxDistance[j] < 0) {
                maxDistance[j] = f;
            }
            // if the array is not full yet:
            if (this.parDocs[j].size() < maxHits) {
                this.parDocs[j].add(new SimpleResult(f, d, i));
                if (f > maxDistance[j]) {
                    maxDistance[j] = f;
                }
            } else if (f < maxDistance[j]) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.parDocs[j].remove(this.parDocs[j].last());
                // add the new one ...
                this.parDocs[j].add(new SimpleResult(f, d, i));
                // and set our new distance border ...
                maxDistance[j] = this.parDocs[j].last().getDistance();
            }

        }
    }
    return maxDistance;
}