Example usage for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:net.semanticmetadata.lire.classifiers.ClassifierTest.java

License:Open Source License

public static boolean testClassifyNCombinedFeaturesMulti(int start, int end, String storeToFile,
        int numberOfNeighbours, String indexLocation, String photosLocation, String testSetFile,
        int searchedClass, ArrayList<String> fieldsArray, ArrayList<String> classArray, int combineNfeatures,
        String class1, String class2, double informationGainThreshold, String useIndex) throws IOException,
        NoSuchFieldException, IllegalAccessException, ClassNotFoundException, InstantiationException {

    //numer of features and how much should be combined
    int feats = fieldsArray.size();
    int combs = combineNfeatures;

    PrintWriter print_line = new PrintWriter(new BufferedWriter(new FileWriter(storeToFile)));

    //all the combinations stored here
    ArrayList combinations = print_nCr(feats, combs);

    //  String[] fieldsArray = {"CEDD", "EdgeHistogram", "FCTH", "ColorLayout", "PHOG", "JCD", "Gabor", "JpegCoeffs", "Tamura", "Luminance_Layout", "Opponent_Histogram", "ScalableColor"};
    //  String[] classArray = {"CEDD", "EdgeHistogram", "FCTH", "ColorLayout", "PHOG", "JCD", "Gabor", "JpegCoefficientHistogram", "Tamura", "LuminanceLayout", "OpponentHistogram", "ScalableColor"};

    //get the features for the column names
    String sCombinedFeatures = "";
    for (int i = 0; i < 12; i++) {
        sCombinedFeatures = sCombinedFeatures + "Feature" + i + 1 + ";";
    }/*from  w ww  .  jav a2 s.c om*/
    print_line.print(sCombinedFeatures
            + "K=;IGTH;Weight Rank=;Class;Precision;Recall;True Negative Rate;Accuracy;False Positive Rate;F-Measure;Count Test Images;Count Correct;ms per test;TP;FP;TN;FN");
    print_line.println();
    print_line.flush();

    ArrayList<String> fields1List = new ArrayList<String>();
    ArrayList<String> class1List = new ArrayList<String>();

    for (int i = 0; i < combinations.size(); i += combs) {
        for (int j = 0; j < combs; j++) {
            //     System.out.print(combinations.get(i + j).toString() + " ");
            int x = (Integer) combinations.get(i + j) - 1;
            fields1List.add(fieldsArray.get(x));
            class1List.add(classArray.get(x));
        }
    }

    for (int i = 0; i < combinations.size(); i += combs) {

        // System.out.println(i);

        ArrayList featureNameList = new ArrayList();
        ArrayList lireFeatureList = new ArrayList();
        ArrayList indexLocationList = new ArrayList();

        //iterate over all fields lists and fill it in a array
        for (int j = 0; j < combs; j++) {
            //   System.out.print(combinations.get(i + j).toString() + " ");
            featureNameList.add((String) DocumentBuilder.class
                    .getField("FIELD_NAME_" + fields1List.get(i + j).toUpperCase()).get(null));
            lireFeatureList.add((LireFeature) Class
                    .forName("net.semanticmetadata.lire.imageanalysis." + class1List.get(i + j)).newInstance());
            indexLocationList.add(indexLocation + class1List.get(i + j));
        }

        boolean weightByRank = true;
        boolean createHTML = true;
        //  String[] classes = {"yes", "no"};
        String[] classes = { class1, class2 };
        int k = numberOfNeighbours;

        //System.out.println("Tests for lf1 " + f1 + " with k=" + k + " combined with " + f2 + " - weighting by rank sum: " + weightByRank);
        //System.out.println("========================================");
        HashMap<String, Integer> tag2count = new HashMap<String, Integer>(k);
        HashMap<String, Double> tag2weight = new HashMap<String, Double>(k);
        int c = 0; // used for just one class ...
        //        for (int c = 0; c < 10; c++) {
        c = searchedClass;

        String classIdentifier = classes[c];

        //"D:\\Datasets\\FashionTest\\fashion10000Test\\" + classIdentifier + ".txt";

        // INIT
        ArrayList<String> classesHTML = new ArrayList<String>();
        ArrayList<String> filesHTML = new ArrayList<String>();

        int[] confusion = new int[2];
        Arrays.fill(confusion, 0);
        HashMap<String, Integer> class2id = new HashMap<String, Integer>(2);
        for (int d = 0; d < classes.length; d++)
            class2id.put(classes[d], d);

        //   BufferedReader br = new BufferedReader(new FileReader(testSetFile));
        //   String line;

        IndexReader irt1 = null;
        IndexReader irt2 = null;
        IndexReader irt3 = null;
        IndexReader irt4 = null;
        IndexReader irt5 = null;
        IndexReader irt6 = null;
        IndexReader irt7 = null;
        IndexReader irt8 = null;
        IndexReader irt9 = null;
        IndexReader irt10 = null;
        IndexReader irt11 = null;
        IndexReader irt12 = null;

        IndexReader ir2 = null;
        ImageSearcher bis2 = null;
        IndexReader ir3 = null;
        ImageSearcher bis3 = null;
        IndexReader ir4 = null;
        ImageSearcher bis4 = null;
        IndexReader ir5 = null;
        ImageSearcher bis5 = null;
        IndexReader ir6 = null;
        ImageSearcher bis6 = null;
        IndexReader ir7 = null;
        ImageSearcher bis7 = null;
        IndexReader ir8 = null;
        ImageSearcher bis8 = null;
        IndexReader ir9 = null;
        ImageSearcher bis9 = null;
        IndexReader ir10 = null;
        ImageSearcher bis10 = null;
        IndexReader ir11 = null;
        ImageSearcher bis11 = null;
        IndexReader ir12 = null;
        ImageSearcher bis12 = null;

        /*        IndexReader ir2 = null;
                BitSamplingImageSearcher bis2 = null;
                IndexReader ir3 = null;
                BitSamplingImageSearcher bis3 = null;
                IndexReader ir4 = null;
                BitSamplingImageSearcher bis4 = null;
                IndexReader ir5 = null;
                BitSamplingImageSearcher bis5 = null;
                IndexReader ir6 = null;
                BitSamplingImageSearcher bis6 = null;
                IndexReader ir7 = null;
                BitSamplingImageSearcher bis7 = null;
                IndexReader ir8 = null;
                BitSamplingImageSearcher bis8 = null;
                IndexReader ir9 = null;
                BitSamplingImageSearcher bis9 = null;
                IndexReader ir10 = null;
                BitSamplingImageSearcher bis10 = null;
                IndexReader ir11 = null;
                BitSamplingImageSearcher bis11 = null;
                IndexReader ir12 = null;
                BitSamplingImageSearcher bis12 = null;*/

        IndexReader ir1 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(0))));
        irt1 = DirectoryReader
                .open(MMapDirectory.open(new File((String) indexLocationList.get(0) + "TestSet")));
        //  ImageSearcher bis1 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(0).getClass(), (String) featureNameList.get(0), true, ir1);
        GenericFastImageSearcher bis1 = new GenericFastImageSearcher(k,
                (Class<?>) lireFeatureList.get(0).getClass(), (String) featureNameList.get(0), true, ir1);
        if (combs > 1) {
            ir2 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(1))));
            irt2 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(1) + "TestSet")));
            bis2 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(1).getClass(),
                    (String) featureNameList.get(1), true, ir2);
        }
        if (combs > 2) {
            ir3 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(2))));
            irt3 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(2) + "TestSet")));
            bis3 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(2).getClass(),
                    (String) featureNameList.get(2), true, ir3);
        }
        if (combs > 3) {
            ir4 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(3))));
            irt4 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(3) + "TestSet")));
            bis4 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(3).getClass(),
                    (String) featureNameList.get(3), true, ir4);
        }
        if (combs > 4) {
            ir5 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(4))));
            irt5 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(4) + "TestSet")));
            bis5 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(4).getClass(),
                    (String) featureNameList.get(4), true, ir5);
        }
        if (combs > 5) {
            ir6 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(5))));
            irt6 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(5) + "TestSet")));
            bis6 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(5).getClass(),
                    (String) featureNameList.get(5), true, ir6);
        }
        if (combs > 6) {
            ir7 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(6))));
            irt7 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(6) + "TestSet")));
            bis7 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(6).getClass(),
                    (String) featureNameList.get(6), true, ir7);
        }
        if (combs > 7) {
            ir8 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(7))));
            irt8 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(7) + "TestSet")));
            bis8 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(7).getClass(),
                    (String) featureNameList.get(7), true, ir8);
        }
        if (combs > 8) {
            ir9 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(8))));
            irt9 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(8) + "TestSet")));
            bis9 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(8).getClass(),
                    (String) featureNameList.get(8), true, ir9);
        }
        if (combs > 9) {
            ir10 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(9))));
            irt10 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(9) + "TestSet")));
            bis10 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(9).getClass(),
                    (String) featureNameList.get(9), true, ir10);
        }
        if (combs > 10) {
            ir11 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(10))));
            irt11 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(10) + "TestSet")));
            bis11 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(10).getClass(),
                    (String) featureNameList.get(10), true, ir11);
        }
        if (combs > 11) {
            ir12 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(11))));
            irt12 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(11) + "TestSet")));
            bis12 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(11).getClass(),
                    (String) featureNameList.get(11), true, ir12);
        }

        ImageSearchHits hits1;
        ImageSearchHits hits2 = null;
        ImageSearchHits hits3 = null;
        ImageSearchHits hits4 = null;
        ImageSearchHits hits5 = null;
        ImageSearchHits hits6 = null;
        ImageSearchHits hits7 = null;
        ImageSearchHits hits8 = null;
        ImageSearchHits hits9 = null;
        ImageSearchHits hits10 = null;
        ImageSearchHits hits11 = null;
        ImageSearchHits hits12 = null;

        int count = 0, countCorrect = 0;
        double countTp = 0, countFp = 0, countTn = 0, countFn = 0; //F1 Metric
        long ms = System.currentTimeMillis();
        for (int x = 0; x < irt1.numDocs(); x++) {

            //  while ((line = br.readLine()) != null) {

            // System.out.println(x);

            tag2count.clear();
            tag2weight.clear();
            //  tag2count.put("yes", 1);
            //  tag2count.put("no", 1);
            //  tag2weight.put("yes", 1.0);
            //  tag2weight.put("no", 1.0);

            tag2count.put(class1, 1);
            tag2count.put(class2, 1);
            tag2weight.put(class1, 1.0);
            tag2weight.put(class2, 1.0);

            hits1 = bis1.search(irt1.document(x), ir1);
            if (combs > 1) {
                hits2 = bis2.search(irt2.document(x), ir2);
            }
            if (combs > 2) {
                hits3 = bis3.search(irt3.document(x), ir3);
            }
            if (combs > 3) {
                hits4 = bis4.search(irt4.document(x), ir4);
            }
            if (combs > 4) {
                hits5 = bis5.search(irt5.document(x), ir5);
            }
            if (combs > 5) {
                hits6 = bis6.search(irt6.document(x), ir6);
            }
            if (combs > 6) {
                hits7 = bis7.search(irt7.document(x), ir7);
            }
            if (combs > 7) {
                hits8 = bis8.search(irt8.document(x), ir8);
            }
            if (combs > 8) {
                hits9 = bis9.search(irt9.document(x), ir9);
            }
            if (combs > 9) {
                hits10 = bis10.search(irt10.document(x), ir10);
            }
            if (combs > 10) {
                hits11 = bis11.search(irt11.document(x), ir11);
            }
            if (combs > 11) {
                hits12 = bis12.search(irt12.document(x), ir12);
            }

            // set tag weights and counts.
            for (int l = 0; l < k; l++) {

                //  String tag = getTag(hits1.doc(l), photosLocation);

                tag2count.put(getTag(hits1.doc(l), photosLocation),
                        tag2count.get(getTag(hits1.doc(l), photosLocation)) + 1);
                if (combs > 1)
                    tag2count.put(getTag(hits2.doc(l), photosLocation),
                            tag2count.get(getTag(hits2.doc(l), photosLocation)) + 1);
                if (combs > 2)
                    tag2count.put(getTag(hits3.doc(l), photosLocation),
                            tag2count.get(getTag(hits3.doc(l), photosLocation)) + 1);
                if (combs > 3)
                    tag2count.put(getTag(hits4.doc(l), photosLocation),
                            tag2count.get(getTag(hits4.doc(l), photosLocation)) + 1);
                if (combs > 4)
                    tag2count.put(getTag(hits5.doc(l), photosLocation),
                            tag2count.get(getTag(hits5.doc(l), photosLocation)) + 1);
                if (combs > 5)
                    tag2count.put(getTag(hits6.doc(l), photosLocation),
                            tag2count.get(getTag(hits6.doc(l), photosLocation)) + 1);
                if (combs > 6)
                    tag2count.put(getTag(hits7.doc(l), photosLocation),
                            tag2count.get(getTag(hits7.doc(l), photosLocation)) + 1);
                if (combs > 7)
                    tag2count.put(getTag(hits8.doc(l), photosLocation),
                            tag2count.get(getTag(hits8.doc(l), photosLocation)) + 1);
                if (combs > 8)
                    tag2count.put(getTag(hits9.doc(l), photosLocation),
                            tag2count.get(getTag(hits9.doc(l), photosLocation)) + 1);
                if (combs > 9)
                    tag2count.put(getTag(hits10.doc(l), photosLocation),
                            tag2count.get(getTag(hits10.doc(l), photosLocation)) + 1);
                if (combs > 10)
                    tag2count.put(getTag(hits11.doc(l), photosLocation),
                            tag2count.get(getTag(hits11.doc(l), photosLocation)) + 1);
                if (combs > 11)
                    tag2count.put(getTag(hits12.doc(l), photosLocation),
                            tag2count.get(getTag(hits12.doc(l), photosLocation)) + 1);

                if (weightByRank) {
                    tag2weight.put(getTag(hits1.doc(l), photosLocation), (double) l);
                    if (combs > 1)
                        tag2weight.put(getTag(hits2.doc(l), photosLocation), (double) l);
                    if (combs > 2)
                        tag2weight.put(getTag(hits3.doc(l), photosLocation), (double) l);
                    if (combs > 3)
                        tag2weight.put(getTag(hits4.doc(l), photosLocation), (double) l);
                    if (combs > 4)
                        tag2weight.put(getTag(hits5.doc(l), photosLocation), (double) l);
                    if (combs > 5)
                        tag2weight.put(getTag(hits6.doc(l), photosLocation), (double) l);
                    if (combs > 6)
                        tag2weight.put(getTag(hits7.doc(l), photosLocation), (double) l);
                    if (combs > 7)
                        tag2weight.put(getTag(hits8.doc(l), photosLocation), (double) l);
                    if (combs > 8)
                        tag2weight.put(getTag(hits9.doc(l), photosLocation), (double) l);
                    if (combs > 9)
                        tag2weight.put(getTag(hits10.doc(l), photosLocation), (double) l);
                    if (combs > 10)
                        tag2weight.put(getTag(hits11.doc(l), photosLocation), (double) l);
                    if (combs > 11)
                        tag2weight.put(getTag(hits12.doc(l), photosLocation), (double) l);
                }
                //  System.out.println(System.currentTimeMillis()-ms);
                //  ms=System.currentTimeMillis();
            }
            // find class, iterate over the tags (classes):
            int maxCount = 0, maxima = 0;
            String classifiedAs = null;
            for (Iterator<String> tagIterator = tag2count.keySet().iterator(); tagIterator.hasNext();) {
                String tag = tagIterator.next();
                //  System.out.println(tag+tag2count.get(tag));
                if (tag2count.get(tag) > maxCount) {
                    maxCount = tag2count.get(tag);
                    maxima = 1;
                    classifiedAs = tag;

                } else if (tag2count.get(tag) == maxCount) {
                    maxima++;
                }
            }
            // if there are two or more classes with the same number of results, then we take a look at the weights.
            // else the class is alread given in classifiedAs.
            if (maxima > 1) {
                double minWeight = Double.MAX_VALUE;
                for (Iterator<String> tagIterator = tag2count.keySet().iterator(); tagIterator.hasNext();) {
                    String tag = tagIterator.next();
                    if (tag2weight.get(tag) < minWeight) {
                        minWeight = tag2weight.get(tag);
                        classifiedAs = tag;
                    }
                }
            }

            count++;
            //SHOW THE CLASSIFICATION
            //     System.out.println(classifiedAs+";"+line);
            classesHTML.add(classifiedAs);
            filesHTML.add(irt1.document(x).getField("descriptorImageIdentifier").stringValue());

            //F1 Metric
            //     if (classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("yes")) {
            if (classifiedAs.equals(getTag(irt1.document(x), photosLocation)) && classifiedAs.equals(class1)) {
                countCorrect++;
                countTp++;
                //    } else if (!classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("yes"))
            } else if (!classifiedAs.equals(getTag(irt1.document(x), photosLocation))
                    && classifiedAs.equals(class1))
                countFp++;

            //    if (classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("no")) {
            if (classifiedAs.equals(getTag(irt1.document(x), photosLocation)) && classifiedAs.equals(class2)) {
                countCorrect++;
                countTn++;
                //     } else if (!classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("no"))
            } else if (!classifiedAs.equals(getTag(irt1.document(x), photosLocation))
                    && classifiedAs.equals(class2))
                countFn++;

            // confusion:
            //confusion[class2id.get(classifiedAs)]++;
            //                    System.out.printf("%10s (%4.3f, %10d, %4d)\n", classifiedAs, ((double) countCorrect / (double) count), count, (System.currentTimeMillis() - ms) / count);

        }

        double precisicon = getPrecision(countTp, countFp);
        double recall = getRecall(countTp, countFn);
        double trueNegativeRate = getTrueNegativeRate(countTn, countFp);
        double accuracy = getAccuracy(countTp, countFp, countTn, countFn);
        double fMeasure = getFmeasure(precisicon, recall);
        double falsePositiveRate = getFalsePositiveRate(countFp, countTn);
        double mccMeasure = getMccMeasure(countTp, countFp, countTn, countFn);
        double wFM = getWFM(countTp, countFp, countTn, countFn, fMeasure, count);
        // System.out.println("Results for class " + classIdentifier);
        // System.out.printf("Class\tPrecision\tRecall\tTrue Negative Rate\tAccuracy\tF-Measure\tCount Test Images\tCount Corret\tms per test\n");
        // System.out.printf("%s\t%4.5f\t%4.5f\t%4.5f\t%4.5f\t%4.5f\t%10d\t%10d\t%4d\n", classIdentifier, precisicon, recall, trueNegativeRate,accuracy, fMeasure,  count, countCorrect, (System.currentTimeMillis() - ms) / count);

        //   System.out.println(i + 1 + " of " + class1List.size() + " finished. " + (System.currentTimeMillis() - ms) / 1000 + " seconds per round. " + "Feature: " + " Current y: " + i);

        String classesLongName = "";

        for (int j = 0; j < combs; j++) {
            //   System.out.print(combinations.get(i + j).toString() + " ");
            classesLongName = classesLongName + fields1List.get(i + j) + ";";
        }

        //   print_line.printf("%s,%s;%s;%s;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%10d;%10d;%4d;%4.5f;%4.5f;%4.5f;%4.5f\n", classesLongName, k, weightByRank, classIdentifier, precisicon, recall, trueNegativeRate, accuracy, falsePositiveRate, fMeasure, count, countCorrect, (System.currentTimeMillis() - ms) / count, countTp, countFp, countTn, countFn);
        System.out.printf(
                "%s%s;%s;%s;%s;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%10d;%10d;%4d;%4.5f;%4.5f;%4.5f;%4.5f\n",
                classesLongName, k, informationGainThreshold, weightByRank, classIdentifier, precisicon, recall,
                trueNegativeRate, accuracy, falsePositiveRate, fMeasure, mccMeasure, wFM, count, countCorrect,
                (System.currentTimeMillis() - ms) / count, countTp, countFp, countTn, countFn);
        print_line.printf(
                "%s%s;%s;%s;%s;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f%4.5f;%4.5f;%4.5f;%10d;%10d;%4d;%4.5f;%4.5f;%4.5f;%4.5f\n",
                classesLongName, k, informationGainThreshold, weightByRank, classIdentifier, precisicon, recall,
                trueNegativeRate, accuracy, falsePositiveRate, fMeasure, mccMeasure, wFM, count, countCorrect,
                (System.currentTimeMillis() - ms) / count, countTp, countFp, countTn, countFn);
        print_line.flush();

        //Create HTML
        if (createHTML == true) {

            String fileName = "classifieresults-" + System.currentTimeMillis() / 1000 + ".html";
            BufferedWriter bw = new BufferedWriter(new FileWriter(fileName));
            bw.write("<html>\n" + "<head><title>Classification Results</title></head>\n"
                    + "<body bgcolor=\"#FFFFFF\">\n");
            bw.write("<table>");

            // int elems = Math.min(filesHTML.size(),50);
            int elems = filesHTML.size();

            for (int d = 0; d < elems; d++) {
                if (d % 3 == 0)
                    bw.write("<tr>");

                String s = filesHTML.get(d);
                String colorF = "rgb(0, 255, 0)";

                if (classesHTML.get(d).equals("no"))
                    colorF = "rgb(255, 0, 0)";
                // String s = ir1.document(topDocs.scoreDocs[i].doc).get("descriptorImageIdentifier");
                // String s = filesHTML.get(d);
                //  System.out.println(reader.document(topDocs.scoreDocs[i].doc).get("featLumLay"));
                //  s = new File(s).getAbsolutePath();
                // System.out.println(s);
                bw.write("<td><a href=\"" + s + "\"><img style=\"max-width:220px;border:medium solid " + colorF
                        + ";\"src=\"" + s + "\" border=\"" + 5 + "\" style=\"border: 3px\n"
                        + "black solid;\"></a></td>\n");
                if (d % 3 == 2)
                    bw.write("</tr>");
            }
            if (elems % 3 != 0) {
                if (elems % 3 == 2) {
                    bw.write("<td>-</td with exit code 0\nd>\n");
                    bw.write("<td>-</td>\n");
                } else if (elems % 3 == 2) {
                    bw.write("<td>-</td>\n");
                }
                bw.write("</tr>");
            }

            bw.write("</table></body>\n" + "</html>");
            bw.close();
        }
        //   } // kfor
        //        }
    }
    print_line.close();
    return true;
}

From source file:net.semanticmetadata.lire.impl.CEDDImageSearcher.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    SimpleImageDuplicates simpleImageDuplicates = null;
    try {//from w w  w. ja  va  2 s .  c  o m
        if (!IndexReader.indexExists(reader.directory()))
            throw new FileNotFoundException("No index found at this specific location.");
        Document doc = reader.document(0);

        CEDD lireFeature = (CEDD) descriptorClass.newInstance();
        byte[] cls = doc.getBinaryValue(fieldName);
        if (cls != null && cls.length > 0)
            lireFeature.setByteArrayRepresentation(cls);

        HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

        // find duplicates ...
        boolean hasDeletions = reader.hasDeletions();

        int docs = reader.numDocs();
        int numDuplicates = 0;
        for (int i = 0; i < docs; i++) {
            if (hasDeletions && reader.isDeleted(i)) {
                continue;
            }
            Document d = reader.document(i);
            float distance = getDistance(d, lireFeature);

            if (!duplicates.containsKey(distance)) {
                duplicates.put(distance, new LinkedList<String>());
            } else {
                numDuplicates++;
            }
            duplicates.get(distance).add(d.getFieldable(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
        }

        if (numDuplicates == 0)
            return null;

        LinkedList<List<String>> results = new LinkedList<List<String>>();
        for (float f : duplicates.keySet()) {
            if (duplicates.get(f).size() > 1) {
                results.add(duplicates.get(f));
            }
        }
        simpleImageDuplicates = new SimpleImageDuplicates(results);
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    }
    return simpleImageDuplicates;

}

From source file:net.semanticmetadata.lire.impl.ColorLayoutImageSearcher.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    SimpleImageDuplicates simpleImageDuplicates = null;
    try {/*w  w w  .  j a  v a2 s  .co  m*/
        if (!IndexReader.indexExists(reader.directory()))
            throw new FileNotFoundException("No index found at this specific location.");
        Document doc = reader.document(0);

        ColorLayout lireFeature = (ColorLayout) descriptorClass.newInstance();
        byte[] cls = doc.getBinaryValue(fieldName);
        if (cls != null && cls.length > 0)
            lireFeature.setByteArrayRepresentation(cls);

        HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

        // find duplicates ...
        boolean hasDeletions = reader.hasDeletions();

        int docs = reader.numDocs();
        int numDuplicates = 0;
        for (int i = 0; i < docs; i++) {
            if (hasDeletions && reader.isDeleted(i)) {
                continue;
            }
            Document d = reader.document(i);
            float distance = getDistance(d, lireFeature);

            if (!duplicates.containsKey(distance)) {
                duplicates.put(distance, new LinkedList<String>());
            } else {
                numDuplicates++;
            }
            duplicates.get(distance).add(d.getFieldable(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
        }

        if (numDuplicates == 0)
            return null;

        LinkedList<List<String>> results = new LinkedList<List<String>>();
        for (float f : duplicates.keySet()) {
            if (duplicates.get(f).size() > 1) {
                results.add(duplicates.get(f));
            }
        }
        simpleImageDuplicates = new SimpleImageDuplicates(results);
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    }
    return simpleImageDuplicates;

}

From source file:net.semanticmetadata.lire.impl.custom.SingleNddCeddImageSearcher.java

License:Open Source License

protected void init(IndexReader reader) {
    this.reader = reader;
    if (reader.hasDeletions()) {
        throw new UnsupportedOperationException(
                "The index has to be optimized first to be cached! Use IndexWriter.forceMerge(0) to do this.");
    }/*from  w  w  w .  j av a  2  s  . co  m*/
    docs = new TreeSet<SimpleResult>();
    try {
        this.cachedInstance = (LireFeature) this.descriptorClass.newInstance();
        if (fieldName == null)
            fieldName = this.cachedInstance.getFieldName();
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher ("
                + descriptorClass.getName() + "): " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher ("
                + descriptorClass.getName() + "): " + e.getMessage());
    }
    // put all respective features into an in-memory cache ...
    if (isCaching && reader != null) {
        int docs = reader.numDocs();
        featureCache = new ArrayList<double[]>(docs);
        try {
            Document d;
            for (int i = 0; i < docs; i++) {
                d = reader.document(i);
                cachedInstance.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes,
                        d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length);
                // normalize features,o we can use L1
                if (!halfDimensions) {
                    featureCache.add(normalize(cachedInstance.getDoubleHistogram()));
                } else {
                    featureCache.add(crunch(cachedInstance.getDoubleHistogram()));
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:net.semanticmetadata.lire.impl.FastOpponentImageSearcher.java

License:Open Source License

/**
 * @param reader/*from  www  .j  av a2 s  .  c  om*/
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected double findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    maxDistance = -1f;
    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Document d;
    double tmpDistance;
    int docs = reader.numDocs();
    byte[] histogram = lireFeature.getByteArrayRepresentation();
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        d = reader.document(i);
        tmpDistance = getDistance(d, histogram);
        assert (tmpDistance >= 0);
        // calculate the overall max distance to normalize score afterwards
        //            if (overallMaxDistance < tmpDistance) {
        //                overallMaxDistance = tmpDistance;
        //            }
        // if it is the first document:
        if (maxDistance < 0) {
            maxDistance = tmpDistance;
        }
        // if the array is not full yet:
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult((float) tmpDistance, d, i));
            if (tmpDistance > maxDistance)
                maxDistance = tmpDistance;
        } else if (tmpDistance < maxDistance) {
            // if it is nearer to the sample than at least on of the current set:
            // remove the last one ...
            this.docs.remove(this.docs.last());
            // add the new one ...
            this.docs.add(new SimpleResult((float) tmpDistance, d, i));
            // and set our new distance border ...
            maxDistance = this.docs.last().getDistance();
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.GenericFastImageSearcher.java

License:Open Source License

/**
 * @param reader//from   w ww  .ja  va  2s .  co  m
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected float findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    maxDistance = Float.MAX_VALUE;
    //        overallMaxDistance = -1f;

    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Document d;
    float tmpDistance;
    int docs = reader.numDocs();
    if (!isCaching) {
        // we read each and every document from the index and then we compare it to the query.
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.

            d = reader.document(i);
            tmpDistance = getDistance(d, lireFeature);
            assert (tmpDistance >= 0);
            // if the array is not full yet:
            if (this.docs.size() < maxHits) {
                this.docs.add(new SimpleResult(tmpDistance, d, i));
                if (tmpDistance > maxDistance)
                    maxDistance = tmpDistance;
            } else if (tmpDistance < maxDistance) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.docs.remove(this.docs.last());
                // add the new one ...
                this.docs.add(new SimpleResult(tmpDistance, d, i));
                // and set our new distance border ...
                maxDistance = this.docs.last().getDistance();
            }
        }
    } else {
        // we use the in-memory cache to find the matching docs from the index.
        int count = 0;
        for (Iterator<byte[]> iterator = featureCache.iterator(); iterator.hasNext();) {
            cachedInstance.setByteArrayRepresentation(iterator.next());
            tmpDistance = lireFeature.getDistance(cachedInstance);
            assert (tmpDistance >= 0);
            // if the array is not full yet:
            if (this.docs.size() < maxHits) {
                this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count));
                if (tmpDistance > maxDistance)
                    maxDistance = tmpDistance;
            } else if (tmpDistance < maxDistance) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.docs.remove(this.docs.last());
                // add the new one ...
                this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count));
                // and set our new distance border ...
                maxDistance = this.docs.last().getDistance();
            }
            count++;
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.GenericFastImageSearcher.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    SimpleImageDuplicates simpleImageDuplicates = null;
    try {/*from  w w  w  .ja v a2  s .  c o m*/
        //            if (!IndexReader.indexExists(reader.directory()))
        //                throw new FileNotFoundException("No index found at this specific location.");
        Document doc = reader.document(0);

        LireFeature lireFeature = (LireFeature) descriptorClass.newInstance();
        if (doc.getField(fieldName).binaryValue() != null && doc.getField(fieldName).binaryValue().length > 0)
            lireFeature.setByteArrayRepresentation(doc.getField(fieldName).binaryValue().bytes,
                    doc.getField(fieldName).binaryValue().offset, doc.getField(fieldName).binaryValue().length);

        HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

        // Needed for check whether the document is deleted.
        Bits liveDocs = MultiFields.getLiveDocs(reader);

        int docs = reader.numDocs();
        int numDuplicates = 0;
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.

            Document d = reader.document(i);
            float distance = getDistance(d, lireFeature);

            if (!duplicates.containsKey(distance)) {
                duplicates.put(distance, new LinkedList<String>());
            } else {
                numDuplicates++;
            }
            duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
        }

        if (numDuplicates == 0)
            return null;

        LinkedList<List<String>> results = new LinkedList<List<String>>();
        for (float f : duplicates.keySet()) {
            if (duplicates.get(f).size() > 1) {
                results.add(duplicates.get(f));
            }
        }
        simpleImageDuplicates = new SimpleImageDuplicates(results);
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    }
    return simpleImageDuplicates;

}

From source file:net.semanticmetadata.lire.impl.GenericImageSearcher.java

License:Open Source License

/**
 * @param reader//from  w w w  . j  a  v  a2s  .c o  m
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected float findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    float maxDistance = -1f, overallMaxDistance = -1f;
    float tmpDistance = 0f;
    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = reader.numDocs();
    Document d = null;
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        d = reader.document(i);
        tmpDistance = getDistance(d, lireFeature);
        //            if (distance < 0 || Float.isNaN(distance))
        //                System.out.println("X");
        assert (tmpDistance >= 0);
        // calculate the overall max distance to normalize score afterwards
        if (overallMaxDistance < tmpDistance) {
            overallMaxDistance = tmpDistance;
        }
        // if it is the first document:
        if (maxDistance < 0) {
            maxDistance = tmpDistance;
        }
        // if the array is not full yet:
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult(tmpDistance, d, i));
            if (tmpDistance > maxDistance)
                maxDistance = tmpDistance;
        } else if (tmpDistance < maxDistance) {
            // if it is nearer to the sample than at least on of the current set:
            // remove the last one ...
            this.docs.remove(this.docs.last());
            // add the new one ...
            this.docs.add(new SimpleResult(tmpDistance, d, i));
            // and set our new distance border ...
            maxDistance = this.docs.last().getDistance();
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.GenericImageSearcher.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    SimpleImageDuplicates simpleImageDuplicates = null;
    try {//  www  .java2 s.  c o m
        //            if (!IndexReader.indexExists(reader.directory()))
        //                throw new FileNotFoundException("No index found at this specific location.");
        Document doc = reader.document(0);

        LireFeature lireFeature = (LireFeature) descriptorClass.newInstance();
        String[] cls = doc.getValues(fieldName);
        if (cls != null && cls.length > 0)
            lireFeature.setStringRepresentation(cls[0]);

        HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

        // Needed for check whether the document is deleted.
        Bits liveDocs = MultiFields.getLiveDocs(reader);

        int docs = reader.numDocs();
        int numDuplicates = 0;
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.
            Document d = reader.document(i);
            float distance = getDistance(d, lireFeature);

            if (!duplicates.containsKey(distance)) {
                duplicates.put(distance, new LinkedList<String>());
            } else {
                numDuplicates++;
            }
            duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
        }

        if (numDuplicates == 0)
            return null;

        LinkedList<List<String>> results = new LinkedList<List<String>>();
        for (float f : duplicates.keySet()) {
            if (duplicates.get(f).size() > 1) {
                results.add(duplicates.get(f));
            }
        }
        simpleImageDuplicates = new SimpleImageDuplicates(results);
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    }
    return simpleImageDuplicates;

}

From source file:net.semanticmetadata.lire.impl.ParallelImageSearcher.java

License:Open Source License

/**
 * @param reader//w  w  w. jav  a2 s .  co m
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
@SuppressWarnings("unchecked")
private float[] findSimilar(IndexReader reader, LireFeature[] lireFeature) throws IOException {
    float[] maxDistance = new float[lireFeature.length];
    float[] overallMaxDistance = new float[lireFeature.length];

    for (int i = 0; i < overallMaxDistance.length; i++) {
        overallMaxDistance[i] = -1f;
        maxDistance[i] = -1f;
    }

    parDocs = new TreeSet[lireFeature.length];
    for (int i = 0; i < parDocs.length; i++) {
        parDocs[i] = new TreeSet<SimpleResult>();
    }

    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    // clear result set ...

    int docs = reader.numDocs();
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        Document d = reader.document(i);
        float[] distance = getDistance(d, lireFeature);
        // calculate the overall max distance to normalize score afterwards
        for (int j = 0; j < distance.length; j++) {
            float f = distance[j];
            if (overallMaxDistance[j] < f) {
                overallMaxDistance[j] = f;
            }
            // if it is the first document:
            if (maxDistance[j] < 0) {
                maxDistance[j] = f;
            }
            // if the array is not full yet:
            if (this.parDocs[j].size() < maxHits) {
                this.parDocs[j].add(new SimpleResult(f, d, i));
                if (f > maxDistance[j]) {
                    maxDistance[j] = f;
                }
            } else if (f < maxDistance[j]) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.parDocs[j].remove(this.parDocs[j].last());
                // add the new one ...
                this.parDocs[j].add(new SimpleResult(f, d, i));
                // and set our new distance border ...
                maxDistance[j] = this.parDocs[j].last().getDistance();
            }

        }
    }
    return maxDistance;
}