Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:net.semanticmetadata.lire.benchmarking.TestZuBuD.java

License:Open Source License

private void computeMAP(ImageSearcher searcher, String prefix, IndexReader reader, int clusters,
        IndexReader readerQueries) throws IOException {
    long start = System.currentTimeMillis();

    double queryCount = 0d;
    double errorRate = 0;
    double map = 0;
    double p10 = 0;
    int errorCount = 0;
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(readerQueries);
    PrintWriter fw;// www .  jav a2 s  .co  m
    if (searcher.toString().contains("ImageSearcherUsingWSs")) {
        (new File("eval/" + db + "/" + prefix.replace(' ', '_') + "/" + clusters + "/")).mkdirs();
        fw = new PrintWriter(new File("eval/" + db + "/" + prefix.replace(' ', '_') + "/" + clusters + "/"
                + prefix.replace(' ', '_') + "-" + db + clusters
                + searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1] + ".txt"));
    } else {
        //            (new File("eval/#WithMirFlickr/" + db + "/")).mkdirs();
        (new File("eval/" + db + "/")).mkdirs();
        if (clusters > 0)
            fw = new PrintWriter(
                    new File("eval/" + db + "/" + prefix.replace(' ', '_') + "-" + db + clusters + ".txt"));
        else
            //                fw = new PrintWriter(new File("eval/#WithMirFlickr/" + db + "/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal
            fw = new PrintWriter(
                    new File("eval/" + db + "/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal
    }
    Hashtable<Integer, String> evalText = new Hashtable<Integer, String>(260);
    for (int i = 0; i < readerQueries.maxDoc(); i++) {
        if (readerQueries.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        String fileName = getIDfromFileName(
                readerQueries.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
        if (queries.keySet().contains(fileName)) {
            String tmpEval = "";
            queryCount += 1d;
            // ok, we've got a query here for a document ...
            Document queryDoc = readerQueries.document(i);
            ImageSearchHits hits = searcher.search(queryDoc, reader);
            double rank = 0;
            double avgPrecision = 0;
            double found = 0;
            double tmpP10 = 0;
            Locale.setDefault(Locale.US);
            for (int y = 0; y < hits.length(); y++) {
                String hitFile = getIDfromFileName(reader.document(hits.documentID(y))
                        .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
                // TODO: Sort by query ID!
                tmpEval += String.format(Locale.US, "%d 1 %s %d %.2f test\n", query2id.get(fileName),
                        hitFile.substring(0, hitFile.lastIndexOf('.')), (int) rank + 1, hits.score(y));
                // if (!hitFile.equals(fileName)) {
                rank++;
                //                    if ((queries.get(fileName).contains(hitFile) || hitFile.equals(fileName))&&(!fileName.equals(hitFile))) { // it's a hit.
                if (queries.get(fileName).contains(hitFile) || hitFile.equals(fileName)) { // it's a hit.
                    found++;
                    // TODO: Compute error rate, etc. here.
                    avgPrecision += found / rank;// * (1d/queries.get(fileName).size());
                    //                        avgPrecision += found / (rank-1);// * (1d/queries.get(fileName).size());
                    //                            if (rank<=60) System.out.print('X');
                    if (rank <= 10)
                        tmpP10++;
                } else { // nothing has been found.
                    if (rank == 1)
                        errorRate += 1d;
                    //                            if (rank<=60) System.out.print('-');
                }
            }
            // }
            //                System.out.println();
            //                avgPrecision /= (double) (1d + queries.get(fileName).size()); // TODO: check!!
            avgPrecision /= (double) (queries.get(fileName).size());

            if (!(found - queries.get(fileName).size() == 0)) {
                // some of the results have not been found. We have to deal with it ...
                errorCount++;
            }

            // assertTrue(found - queries.get(fileName).size() == 0);
            map += avgPrecision;
            p10 += tmpP10;
            evalText.put(query2id.get(fileName), tmpEval);
        }
    }

    for (int i = 0; i < query2id.size(); i++) {
        fw.write(evalText.get(i + 1));
    }

    fw.close();
    errorRate = errorRate / queryCount;
    map = map / queryCount;
    p10 = p10 / (queryCount * 10d);

    double h = (System.currentTimeMillis() - start) / 3600000.0;
    double m = (h - Math.floor(h)) * 60.0;
    double s = (m - Math.floor(m)) * 60;
    String str = String.format("%s%02d:%02d", (((int) h > 0) ? String.format("%02d:", (int) h) : ""), (int) m,
            (int) s) + " ~ ";

    if (searcher.toString().contains("ImageSearcherUsingWSs"))
        str += String.format("%s%s\t%.4f\t%.4f\t%.4f\t(%s)", prefix, ((clusters > 0) ? ("\t" + clusters) : ""),
                map, p10, errorRate,
                searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1]);
    else
        str += String.format("%s%s\t%.4f\t%.4f\t%.4f", prefix, ((clusters > 0) ? ("\t" + clusters) : ""), map,
                p10, errorRate);
    if (errorCount > 0) {
        // some of the results have not been found. We have to deal with it ...
        str += "\t~~\tDid not find result ;(\t(" + errorCount + ")";
    }
    System.out.println(str);
}

From source file:net.semanticmetadata.lire.benchmarking.UCIDBenchmark.java

License:Open Source License

private void computeMAP(ImageSearcher searcher, String prefix, IndexReader reader) throws IOException {
    double queryCount = 0d;
    double errorRate = 0;
    double map = 0;
    double p10 = 0;
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    PrintWriter fw = new PrintWriter(new File("eval/" + prefix.replace(' ', '_') + "-eval.txt"));
    Hashtable<Integer, String> evalText = new Hashtable<Integer, String>(260);
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        String fileName = getIDfromFileName(
                reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
        if (queries.keySet().contains(fileName)) {
            String tmpEval = "";
            queryCount += 1d;/*w  w w  .  j  av  a 2s  .co  m*/
            // ok, we've got a query here for a document ...
            Document queryDoc = reader.document(i);
            ImageSearchHits hits = searcher.search(queryDoc, reader);
            double rank = 0;
            double avgPrecision = 0;
            double found = 0;
            double tmpP10 = 0;
            Locale.setDefault(Locale.US);
            for (int y = 0; y < hits.length(); y++) {
                String hitFile = getIDfromFileName(
                        hits.doc(y).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
                // TODO: Sort by query ID!
                tmpEval += String.format(Locale.US, "%d 1 %s %d %.2f test\n", query2id.get(fileName),
                        hitFile.substring(0, hitFile.lastIndexOf('.')), (int) rank + 1, 100 - hits.score(y));
                // if (!hitFile.equals(fileName)) {
                rank++;
                if (queries.get(fileName).contains(hitFile) || hitFile.equals(fileName)) { // it's a hit.
                    found++;
                    // TODO: Compute error rate, etc. here.
                    avgPrecision += found / rank;// * (1d/queries.get(fileName).size());
                    //                            if (rank<=60) System.out.print('X');
                    if (rank <= 10)
                        tmpP10++;
                } else { // nothing has been found.
                    if (rank == 1)
                        errorRate += 1d;
                    //                            if (rank<=60) System.out.print('-');
                }
            }
            // }
            //                System.out.println();
            if (found - queries.get(fileName).size() == 1)
                avgPrecision /= (double) (1d + queries.get(fileName).size());
            else {
                // some of the results have not been found. We have to deal with it ...
                System.err.println("Did not find result ;(");
            }

            // assertTrue(found - queries.get(fileName).size() == 0);
            map += avgPrecision;
            p10 += tmpP10;
            evalText.put(query2id.get(fileName), tmpEval);
        }
    }
    for (int i = 0; i < query2id.size(); i++) {
        fw.write(evalText.get(i + 1));
    }
    fw.close();
    errorRate = errorRate / queryCount;
    map = map / queryCount;
    p10 = p10 / (queryCount * 10d);
    System.out.print(prefix);
    System.out.format("\t%.5f\t%.5f\t%.5f\n", map, p10, errorRate);

}

From source file:net.semanticmetadata.lire.classifiers.ClassifierTest.java

License:Open Source License

public static boolean testClassifyNCombinedFeaturesMulti(int start, int end, String storeToFile,
        int numberOfNeighbours, String indexLocation, String photosLocation, String testSetFile,
        int searchedClass, ArrayList<String> fieldsArray, ArrayList<String> classArray, int combineNfeatures,
        String class1, String class2, double informationGainThreshold, String useIndex) throws IOException,
        NoSuchFieldException, IllegalAccessException, ClassNotFoundException, InstantiationException {

    //numer of features and how much should be combined
    int feats = fieldsArray.size();
    int combs = combineNfeatures;

    PrintWriter print_line = new PrintWriter(new BufferedWriter(new FileWriter(storeToFile)));

    //all the combinations stored here
    ArrayList combinations = print_nCr(feats, combs);

    //  String[] fieldsArray = {"CEDD", "EdgeHistogram", "FCTH", "ColorLayout", "PHOG", "JCD", "Gabor", "JpegCoeffs", "Tamura", "Luminance_Layout", "Opponent_Histogram", "ScalableColor"};
    //  String[] classArray = {"CEDD", "EdgeHistogram", "FCTH", "ColorLayout", "PHOG", "JCD", "Gabor", "JpegCoefficientHistogram", "Tamura", "LuminanceLayout", "OpponentHistogram", "ScalableColor"};

    //get the features for the column names
    String sCombinedFeatures = "";
    for (int i = 0; i < 12; i++) {
        sCombinedFeatures = sCombinedFeatures + "Feature" + i + 1 + ";";
    }/*from w  w  w  .j  a  v a 2s  . c  om*/
    print_line.print(sCombinedFeatures
            + "K=;IGTH;Weight Rank=;Class;Precision;Recall;True Negative Rate;Accuracy;False Positive Rate;F-Measure;Count Test Images;Count Correct;ms per test;TP;FP;TN;FN");
    print_line.println();
    print_line.flush();

    ArrayList<String> fields1List = new ArrayList<String>();
    ArrayList<String> class1List = new ArrayList<String>();

    for (int i = 0; i < combinations.size(); i += combs) {
        for (int j = 0; j < combs; j++) {
            //     System.out.print(combinations.get(i + j).toString() + " ");
            int x = (Integer) combinations.get(i + j) - 1;
            fields1List.add(fieldsArray.get(x));
            class1List.add(classArray.get(x));
        }
    }

    for (int i = 0; i < combinations.size(); i += combs) {

        // System.out.println(i);

        ArrayList featureNameList = new ArrayList();
        ArrayList lireFeatureList = new ArrayList();
        ArrayList indexLocationList = new ArrayList();

        //iterate over all fields lists and fill it in a array
        for (int j = 0; j < combs; j++) {
            //   System.out.print(combinations.get(i + j).toString() + " ");
            featureNameList.add((String) DocumentBuilder.class
                    .getField("FIELD_NAME_" + fields1List.get(i + j).toUpperCase()).get(null));
            lireFeatureList.add((LireFeature) Class
                    .forName("net.semanticmetadata.lire.imageanalysis." + class1List.get(i + j)).newInstance());
            indexLocationList.add(indexLocation + class1List.get(i + j));
        }

        boolean weightByRank = true;
        boolean createHTML = true;
        //  String[] classes = {"yes", "no"};
        String[] classes = { class1, class2 };
        int k = numberOfNeighbours;

        //System.out.println("Tests for lf1 " + f1 + " with k=" + k + " combined with " + f2 + " - weighting by rank sum: " + weightByRank);
        //System.out.println("========================================");
        HashMap<String, Integer> tag2count = new HashMap<String, Integer>(k);
        HashMap<String, Double> tag2weight = new HashMap<String, Double>(k);
        int c = 0; // used for just one class ...
        //        for (int c = 0; c < 10; c++) {
        c = searchedClass;

        String classIdentifier = classes[c];

        //"D:\\Datasets\\FashionTest\\fashion10000Test\\" + classIdentifier + ".txt";

        // INIT
        ArrayList<String> classesHTML = new ArrayList<String>();
        ArrayList<String> filesHTML = new ArrayList<String>();

        int[] confusion = new int[2];
        Arrays.fill(confusion, 0);
        HashMap<String, Integer> class2id = new HashMap<String, Integer>(2);
        for (int d = 0; d < classes.length; d++)
            class2id.put(classes[d], d);

        //   BufferedReader br = new BufferedReader(new FileReader(testSetFile));
        //   String line;

        IndexReader irt1 = null;
        IndexReader irt2 = null;
        IndexReader irt3 = null;
        IndexReader irt4 = null;
        IndexReader irt5 = null;
        IndexReader irt6 = null;
        IndexReader irt7 = null;
        IndexReader irt8 = null;
        IndexReader irt9 = null;
        IndexReader irt10 = null;
        IndexReader irt11 = null;
        IndexReader irt12 = null;

        IndexReader ir2 = null;
        ImageSearcher bis2 = null;
        IndexReader ir3 = null;
        ImageSearcher bis3 = null;
        IndexReader ir4 = null;
        ImageSearcher bis4 = null;
        IndexReader ir5 = null;
        ImageSearcher bis5 = null;
        IndexReader ir6 = null;
        ImageSearcher bis6 = null;
        IndexReader ir7 = null;
        ImageSearcher bis7 = null;
        IndexReader ir8 = null;
        ImageSearcher bis8 = null;
        IndexReader ir9 = null;
        ImageSearcher bis9 = null;
        IndexReader ir10 = null;
        ImageSearcher bis10 = null;
        IndexReader ir11 = null;
        ImageSearcher bis11 = null;
        IndexReader ir12 = null;
        ImageSearcher bis12 = null;

        /*        IndexReader ir2 = null;
                BitSamplingImageSearcher bis2 = null;
                IndexReader ir3 = null;
                BitSamplingImageSearcher bis3 = null;
                IndexReader ir4 = null;
                BitSamplingImageSearcher bis4 = null;
                IndexReader ir5 = null;
                BitSamplingImageSearcher bis5 = null;
                IndexReader ir6 = null;
                BitSamplingImageSearcher bis6 = null;
                IndexReader ir7 = null;
                BitSamplingImageSearcher bis7 = null;
                IndexReader ir8 = null;
                BitSamplingImageSearcher bis8 = null;
                IndexReader ir9 = null;
                BitSamplingImageSearcher bis9 = null;
                IndexReader ir10 = null;
                BitSamplingImageSearcher bis10 = null;
                IndexReader ir11 = null;
                BitSamplingImageSearcher bis11 = null;
                IndexReader ir12 = null;
                BitSamplingImageSearcher bis12 = null;*/

        IndexReader ir1 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(0))));
        irt1 = DirectoryReader
                .open(MMapDirectory.open(new File((String) indexLocationList.get(0) + "TestSet")));
        //  ImageSearcher bis1 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(0).getClass(), (String) featureNameList.get(0), true, ir1);
        GenericFastImageSearcher bis1 = new GenericFastImageSearcher(k,
                (Class<?>) lireFeatureList.get(0).getClass(), (String) featureNameList.get(0), true, ir1);
        if (combs > 1) {
            ir2 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(1))));
            irt2 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(1) + "TestSet")));
            bis2 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(1).getClass(),
                    (String) featureNameList.get(1), true, ir2);
        }
        if (combs > 2) {
            ir3 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(2))));
            irt3 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(2) + "TestSet")));
            bis3 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(2).getClass(),
                    (String) featureNameList.get(2), true, ir3);
        }
        if (combs > 3) {
            ir4 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(3))));
            irt4 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(3) + "TestSet")));
            bis4 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(3).getClass(),
                    (String) featureNameList.get(3), true, ir4);
        }
        if (combs > 4) {
            ir5 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(4))));
            irt5 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(4) + "TestSet")));
            bis5 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(4).getClass(),
                    (String) featureNameList.get(4), true, ir5);
        }
        if (combs > 5) {
            ir6 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(5))));
            irt6 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(5) + "TestSet")));
            bis6 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(5).getClass(),
                    (String) featureNameList.get(5), true, ir6);
        }
        if (combs > 6) {
            ir7 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(6))));
            irt7 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(6) + "TestSet")));
            bis7 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(6).getClass(),
                    (String) featureNameList.get(6), true, ir7);
        }
        if (combs > 7) {
            ir8 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(7))));
            irt8 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(7) + "TestSet")));
            bis8 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(7).getClass(),
                    (String) featureNameList.get(7), true, ir8);
        }
        if (combs > 8) {
            ir9 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(8))));
            irt9 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(8) + "TestSet")));
            bis9 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(8).getClass(),
                    (String) featureNameList.get(8), true, ir9);
        }
        if (combs > 9) {
            ir10 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(9))));
            irt10 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(9) + "TestSet")));
            bis10 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(9).getClass(),
                    (String) featureNameList.get(9), true, ir10);
        }
        if (combs > 10) {
            ir11 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(10))));
            irt11 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(10) + "TestSet")));
            bis11 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(10).getClass(),
                    (String) featureNameList.get(10), true, ir11);
        }
        if (combs > 11) {
            ir12 = DirectoryReader.open(MMapDirectory.open(new File((String) indexLocationList.get(11))));
            irt12 = DirectoryReader
                    .open(MMapDirectory.open(new File((String) indexLocationList.get(11) + "TestSet")));
            bis12 = new GenericFastImageSearcher(k, (Class<?>) lireFeatureList.get(11).getClass(),
                    (String) featureNameList.get(11), true, ir12);
        }

        ImageSearchHits hits1;
        ImageSearchHits hits2 = null;
        ImageSearchHits hits3 = null;
        ImageSearchHits hits4 = null;
        ImageSearchHits hits5 = null;
        ImageSearchHits hits6 = null;
        ImageSearchHits hits7 = null;
        ImageSearchHits hits8 = null;
        ImageSearchHits hits9 = null;
        ImageSearchHits hits10 = null;
        ImageSearchHits hits11 = null;
        ImageSearchHits hits12 = null;

        int count = 0, countCorrect = 0;
        double countTp = 0, countFp = 0, countTn = 0, countFn = 0; //F1 Metric
        long ms = System.currentTimeMillis();
        for (int x = 0; x < irt1.numDocs(); x++) {

            //  while ((line = br.readLine()) != null) {

            // System.out.println(x);

            tag2count.clear();
            tag2weight.clear();
            //  tag2count.put("yes", 1);
            //  tag2count.put("no", 1);
            //  tag2weight.put("yes", 1.0);
            //  tag2weight.put("no", 1.0);

            tag2count.put(class1, 1);
            tag2count.put(class2, 1);
            tag2weight.put(class1, 1.0);
            tag2weight.put(class2, 1.0);

            hits1 = bis1.search(irt1.document(x), ir1);
            if (combs > 1) {
                hits2 = bis2.search(irt2.document(x), ir2);
            }
            if (combs > 2) {
                hits3 = bis3.search(irt3.document(x), ir3);
            }
            if (combs > 3) {
                hits4 = bis4.search(irt4.document(x), ir4);
            }
            if (combs > 4) {
                hits5 = bis5.search(irt5.document(x), ir5);
            }
            if (combs > 5) {
                hits6 = bis6.search(irt6.document(x), ir6);
            }
            if (combs > 6) {
                hits7 = bis7.search(irt7.document(x), ir7);
            }
            if (combs > 7) {
                hits8 = bis8.search(irt8.document(x), ir8);
            }
            if (combs > 8) {
                hits9 = bis9.search(irt9.document(x), ir9);
            }
            if (combs > 9) {
                hits10 = bis10.search(irt10.document(x), ir10);
            }
            if (combs > 10) {
                hits11 = bis11.search(irt11.document(x), ir11);
            }
            if (combs > 11) {
                hits12 = bis12.search(irt12.document(x), ir12);
            }

            // set tag weights and counts.
            for (int l = 0; l < k; l++) {

                //  String tag = getTag(hits1.doc(l), photosLocation);

                tag2count.put(getTag(hits1.doc(l), photosLocation),
                        tag2count.get(getTag(hits1.doc(l), photosLocation)) + 1);
                if (combs > 1)
                    tag2count.put(getTag(hits2.doc(l), photosLocation),
                            tag2count.get(getTag(hits2.doc(l), photosLocation)) + 1);
                if (combs > 2)
                    tag2count.put(getTag(hits3.doc(l), photosLocation),
                            tag2count.get(getTag(hits3.doc(l), photosLocation)) + 1);
                if (combs > 3)
                    tag2count.put(getTag(hits4.doc(l), photosLocation),
                            tag2count.get(getTag(hits4.doc(l), photosLocation)) + 1);
                if (combs > 4)
                    tag2count.put(getTag(hits5.doc(l), photosLocation),
                            tag2count.get(getTag(hits5.doc(l), photosLocation)) + 1);
                if (combs > 5)
                    tag2count.put(getTag(hits6.doc(l), photosLocation),
                            tag2count.get(getTag(hits6.doc(l), photosLocation)) + 1);
                if (combs > 6)
                    tag2count.put(getTag(hits7.doc(l), photosLocation),
                            tag2count.get(getTag(hits7.doc(l), photosLocation)) + 1);
                if (combs > 7)
                    tag2count.put(getTag(hits8.doc(l), photosLocation),
                            tag2count.get(getTag(hits8.doc(l), photosLocation)) + 1);
                if (combs > 8)
                    tag2count.put(getTag(hits9.doc(l), photosLocation),
                            tag2count.get(getTag(hits9.doc(l), photosLocation)) + 1);
                if (combs > 9)
                    tag2count.put(getTag(hits10.doc(l), photosLocation),
                            tag2count.get(getTag(hits10.doc(l), photosLocation)) + 1);
                if (combs > 10)
                    tag2count.put(getTag(hits11.doc(l), photosLocation),
                            tag2count.get(getTag(hits11.doc(l), photosLocation)) + 1);
                if (combs > 11)
                    tag2count.put(getTag(hits12.doc(l), photosLocation),
                            tag2count.get(getTag(hits12.doc(l), photosLocation)) + 1);

                if (weightByRank) {
                    tag2weight.put(getTag(hits1.doc(l), photosLocation), (double) l);
                    if (combs > 1)
                        tag2weight.put(getTag(hits2.doc(l), photosLocation), (double) l);
                    if (combs > 2)
                        tag2weight.put(getTag(hits3.doc(l), photosLocation), (double) l);
                    if (combs > 3)
                        tag2weight.put(getTag(hits4.doc(l), photosLocation), (double) l);
                    if (combs > 4)
                        tag2weight.put(getTag(hits5.doc(l), photosLocation), (double) l);
                    if (combs > 5)
                        tag2weight.put(getTag(hits6.doc(l), photosLocation), (double) l);
                    if (combs > 6)
                        tag2weight.put(getTag(hits7.doc(l), photosLocation), (double) l);
                    if (combs > 7)
                        tag2weight.put(getTag(hits8.doc(l), photosLocation), (double) l);
                    if (combs > 8)
                        tag2weight.put(getTag(hits9.doc(l), photosLocation), (double) l);
                    if (combs > 9)
                        tag2weight.put(getTag(hits10.doc(l), photosLocation), (double) l);
                    if (combs > 10)
                        tag2weight.put(getTag(hits11.doc(l), photosLocation), (double) l);
                    if (combs > 11)
                        tag2weight.put(getTag(hits12.doc(l), photosLocation), (double) l);
                }
                //  System.out.println(System.currentTimeMillis()-ms);
                //  ms=System.currentTimeMillis();
            }
            // find class, iterate over the tags (classes):
            int maxCount = 0, maxima = 0;
            String classifiedAs = null;
            for (Iterator<String> tagIterator = tag2count.keySet().iterator(); tagIterator.hasNext();) {
                String tag = tagIterator.next();
                //  System.out.println(tag+tag2count.get(tag));
                if (tag2count.get(tag) > maxCount) {
                    maxCount = tag2count.get(tag);
                    maxima = 1;
                    classifiedAs = tag;

                } else if (tag2count.get(tag) == maxCount) {
                    maxima++;
                }
            }
            // if there are two or more classes with the same number of results, then we take a look at the weights.
            // else the class is alread given in classifiedAs.
            if (maxima > 1) {
                double minWeight = Double.MAX_VALUE;
                for (Iterator<String> tagIterator = tag2count.keySet().iterator(); tagIterator.hasNext();) {
                    String tag = tagIterator.next();
                    if (tag2weight.get(tag) < minWeight) {
                        minWeight = tag2weight.get(tag);
                        classifiedAs = tag;
                    }
                }
            }

            count++;
            //SHOW THE CLASSIFICATION
            //     System.out.println(classifiedAs+";"+line);
            classesHTML.add(classifiedAs);
            filesHTML.add(irt1.document(x).getField("descriptorImageIdentifier").stringValue());

            //F1 Metric
            //     if (classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("yes")) {
            if (classifiedAs.equals(getTag(irt1.document(x), photosLocation)) && classifiedAs.equals(class1)) {
                countCorrect++;
                countTp++;
                //    } else if (!classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("yes"))
            } else if (!classifiedAs.equals(getTag(irt1.document(x), photosLocation))
                    && classifiedAs.equals(class1))
                countFp++;

            //    if (classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("no")) {
            if (classifiedAs.equals(getTag(irt1.document(x), photosLocation)) && classifiedAs.equals(class2)) {
                countCorrect++;
                countTn++;
                //     } else if (!classifiedAs.equals(getTagLine(line, photosLocation)) && classifiedAs.equals("no"))
            } else if (!classifiedAs.equals(getTag(irt1.document(x), photosLocation))
                    && classifiedAs.equals(class2))
                countFn++;

            // confusion:
            //confusion[class2id.get(classifiedAs)]++;
            //                    System.out.printf("%10s (%4.3f, %10d, %4d)\n", classifiedAs, ((double) countCorrect / (double) count), count, (System.currentTimeMillis() - ms) / count);

        }

        double precisicon = getPrecision(countTp, countFp);
        double recall = getRecall(countTp, countFn);
        double trueNegativeRate = getTrueNegativeRate(countTn, countFp);
        double accuracy = getAccuracy(countTp, countFp, countTn, countFn);
        double fMeasure = getFmeasure(precisicon, recall);
        double falsePositiveRate = getFalsePositiveRate(countFp, countTn);
        double mccMeasure = getMccMeasure(countTp, countFp, countTn, countFn);
        double wFM = getWFM(countTp, countFp, countTn, countFn, fMeasure, count);
        // System.out.println("Results for class " + classIdentifier);
        // System.out.printf("Class\tPrecision\tRecall\tTrue Negative Rate\tAccuracy\tF-Measure\tCount Test Images\tCount Corret\tms per test\n");
        // System.out.printf("%s\t%4.5f\t%4.5f\t%4.5f\t%4.5f\t%4.5f\t%10d\t%10d\t%4d\n", classIdentifier, precisicon, recall, trueNegativeRate,accuracy, fMeasure,  count, countCorrect, (System.currentTimeMillis() - ms) / count);

        //   System.out.println(i + 1 + " of " + class1List.size() + " finished. " + (System.currentTimeMillis() - ms) / 1000 + " seconds per round. " + "Feature: " + " Current y: " + i);

        String classesLongName = "";

        for (int j = 0; j < combs; j++) {
            //   System.out.print(combinations.get(i + j).toString() + " ");
            classesLongName = classesLongName + fields1List.get(i + j) + ";";
        }

        //   print_line.printf("%s,%s;%s;%s;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%10d;%10d;%4d;%4.5f;%4.5f;%4.5f;%4.5f\n", classesLongName, k, weightByRank, classIdentifier, precisicon, recall, trueNegativeRate, accuracy, falsePositiveRate, fMeasure, count, countCorrect, (System.currentTimeMillis() - ms) / count, countTp, countFp, countTn, countFn);
        System.out.printf(
                "%s%s;%s;%s;%s;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f;%10d;%10d;%4d;%4.5f;%4.5f;%4.5f;%4.5f\n",
                classesLongName, k, informationGainThreshold, weightByRank, classIdentifier, precisicon, recall,
                trueNegativeRate, accuracy, falsePositiveRate, fMeasure, mccMeasure, wFM, count, countCorrect,
                (System.currentTimeMillis() - ms) / count, countTp, countFp, countTn, countFn);
        print_line.printf(
                "%s%s;%s;%s;%s;%4.5f;%4.5f;%4.5f;%4.5f;%4.5f%4.5f;%4.5f;%4.5f;%10d;%10d;%4d;%4.5f;%4.5f;%4.5f;%4.5f\n",
                classesLongName, k, informationGainThreshold, weightByRank, classIdentifier, precisicon, recall,
                trueNegativeRate, accuracy, falsePositiveRate, fMeasure, mccMeasure, wFM, count, countCorrect,
                (System.currentTimeMillis() - ms) / count, countTp, countFp, countTn, countFn);
        print_line.flush();

        //Create HTML
        if (createHTML == true) {

            String fileName = "classifieresults-" + System.currentTimeMillis() / 1000 + ".html";
            BufferedWriter bw = new BufferedWriter(new FileWriter(fileName));
            bw.write("<html>\n" + "<head><title>Classification Results</title></head>\n"
                    + "<body bgcolor=\"#FFFFFF\">\n");
            bw.write("<table>");

            // int elems = Math.min(filesHTML.size(),50);
            int elems = filesHTML.size();

            for (int d = 0; d < elems; d++) {
                if (d % 3 == 0)
                    bw.write("<tr>");

                String s = filesHTML.get(d);
                String colorF = "rgb(0, 255, 0)";

                if (classesHTML.get(d).equals("no"))
                    colorF = "rgb(255, 0, 0)";
                // String s = ir1.document(topDocs.scoreDocs[i].doc).get("descriptorImageIdentifier");
                // String s = filesHTML.get(d);
                //  System.out.println(reader.document(topDocs.scoreDocs[i].doc).get("featLumLay"));
                //  s = new File(s).getAbsolutePath();
                // System.out.println(s);
                bw.write("<td><a href=\"" + s + "\"><img style=\"max-width:220px;border:medium solid " + colorF
                        + ";\"src=\"" + s + "\" border=\"" + 5 + "\" style=\"border: 3px\n"
                        + "black solid;\"></a></td>\n");
                if (d % 3 == 2)
                    bw.write("</tr>");
            }
            if (elems % 3 != 0) {
                if (elems % 3 == 2) {
                    bw.write("<td>-</td with exit code 0\nd>\n");
                    bw.write("<td>-</td>\n");
                } else if (elems % 3 == 2) {
                    bw.write("<td>-</td>\n");
                }
                bw.write("</tr>");
            }

            bw.write("</table></body>\n" + "</html>");
            bw.close();
        }
        //   } // kfor
        //        }
    }
    print_line.close();
    return true;
}

From source file:net.semanticmetadata.lire.filter.FilterTest.java

License:Open Source License

public void testRerankFilter() throws IOException {
    // index images
    // indexFiles();
    // search/*ww  w  .  jav  a 2 s  .c om*/
    System.out.println("---< searching >-------------------------");
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    Document document = reader.document(0);
    ImageSearcher searcher = ImageSearcherFactory.createCEDDImageSearcher(100);
    ImageSearchHits hits = searcher.search(document, reader);
    // rerank
    System.out.println("---< filtering >-------------------------");
    RerankFilter filter = new RerankFilter(ColorLayout.class, DocumentBuilder.FIELD_NAME_COLORLAYOUT);
    hits = filter.filter(hits, document);

    // output
    FileUtils.saveImageResultsToHtml("filtertest", hits,
            document.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
}

From source file:net.semanticmetadata.lire.filter.FilterTest.java

License:Open Source License

public void testLsaFilter() throws IOException {
    // index images
    //        indexFiles();
    // search//from  w ww . j  a  v a2 s.c  o m
    System.out.println("---< searching >-------------------------");
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    Document document = reader.document(0);
    ImageSearcher searcher = ImageSearcherFactory.createCEDDImageSearcher(100);
    ImageSearchHits hits = searcher.search(document, reader);
    // rerank
    System.out.println("---< filtering >-------------------------");
    LsaFilter filter = new LsaFilter(CEDD.class, DocumentBuilder.FIELD_NAME_CEDD);
    hits = filter.filter(hits, document);

    // output
    FileUtils.saveImageResultsToHtml("filtertest", hits,
            document.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
}

From source file:net.semanticmetadata.lire.filter.LsaFilter.java

License:Open Source License

public ImageSearchHits filter(TopDocs results, IndexReader reader, Document query) throws IOException {
    LinkedList<SimpleResult> tmp = new LinkedList<SimpleResult>();
    double max = 0;
    for (int i = 0; i < results.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = results.scoreDocs[i];
        SimpleResult s = new SimpleResult(1 / scoreDoc.score, reader.document(scoreDoc.doc), scoreDoc.doc);
        max = Math.max(max, 1 / scoreDoc.score);
        tmp.add(s);//from   w w  w  .  j a va  2  s  . c  om
    }

    return filter(new SimpleImageSearchHits(tmp, (float) max), query);
}

From source file:net.semanticmetadata.lire.filter.RerankFilter.java

License:Open Source License

@Override
public ImageSearchHits filter(TopDocs results, IndexReader reader, Document query) throws IOException {
    LireFeature queryFeature = null;/*from  w w  w  . j  a  v a2 s.c o m*/
    LireFeature tempFeature = null;
    float distance = 0, maxDistance = 0;
    TreeSet<SimpleResult> resultSet = new TreeSet<SimpleResult>();

    // create our feature classes
    try {
        queryFeature = (LireFeature) featureClass.newInstance();
        tempFeature = (LireFeature) featureClass.newInstance();
    } catch (Exception e) {
        logger.severe("Could not instantiate class " + featureClass.getName() + " in " + getClass().getName()
                + " (" + e.getMessage() + ").");
        return null;
    }

    // check if features are there and compatible.
    if (query.getField(fieldName) != null) {
        queryFeature.setByteArrayRepresentation(query.getField(fieldName).binaryValue().bytes,
                query.getField(fieldName).binaryValue().offset, query.getField(fieldName).binaryValue().length);
    } else {
        logger.severe("Given feature class " + featureClass.getName()
                + " is not available in the query document (" + getClass().getName() + ").");
        return null;
    }
    ScoreDoc[] scoreDocs = results.scoreDocs;
    Document tmp = null;
    for (int x = 0; x < scoreDocs.length; x++) {
        tmp = reader.document(scoreDocs[x].doc);
        if (tmp.getField(fieldName) != null) {
            tempFeature.setByteArrayRepresentation(tmp.getField(fieldName).binaryValue().bytes,
                    tmp.getField(fieldName).binaryValue().offset, tmp.getField(fieldName).binaryValue().length);
            distance = queryFeature.getDistance(tempFeature);
            maxDistance = Math.max(maxDistance, distance);
            resultSet.add(new SimpleResult(distance, tmp, scoreDocs[x].doc));
        } else {
            logger.info(
                    "Could not instantiate class " + featureClass.getName() + " from the given result set.");
        }
    }
    return new SimpleImageSearchHits(resultSet, maxDistance);
}

From source file:net.semanticmetadata.lire.filters.LsaFilter.java

License:Open Source License

/**
 * @param results//from   w  w  w.  jav  a2  s .c o m
 * @param query
 * @return the filtered results or null if error occurs.
 */
public ImageSearchHits filter(ImageSearchHits results, IndexReader reader, Document query) {
    // create a double[items][histogram]
    tempFeature = null;
    LinkedList<double[]> features = new LinkedList<double[]>();
    try {
        tempFeature = (LireFeature) featureClass.newInstance();
    } catch (Exception e) {
        logger.severe("Could not create feature " + featureClass.getName() + " (" + e.getMessage() + ").");
        return null;
    }
    // get all features from the result set, take care of those that do not have the respective field.
    for (int i = 0; i < results.length(); i++) {
        Document d = null;
        try {
            d = reader.document(results.documentID(i));
        } catch (IOException e) {
            e.printStackTrace();
        }
        if (d.getField(fieldName) != null) {
            tempFeature.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes,
                    d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length);
            features.add(tempFeature.getFeatureVector());
        }
    }
    // now go for the query
    if (query.getField(fieldName) != null) {
        tempFeature.setByteArrayRepresentation(query.getField(fieldName).binaryValue().bytes,
                query.getField(fieldName).binaryValue().offset, query.getField(fieldName).binaryValue().length);
    } else {
        logger.severe("Query document is missing the given feature " + featureClass.getName() + ".");
        return null;
    }
    double[][] matrixData = new double[features.size() + 1][tempFeature.getFeatureVector().length];
    System.arraycopy(tempFeature.getFeatureVector(), 0, matrixData[0], 0,
            tempFeature.getFeatureVector().length);
    int count = 1;
    for (Iterator<double[]> iterator = features.iterator(); iterator.hasNext();) {
        double[] next = iterator.next();
        System.arraycopy(next, 0, matrixData[count], 0, next.length);
        count++;
    }
    for (int i = 0; i < matrixData.length; i++) {
        double[] doubles = matrixData[i];
        for (int j = 0; j < doubles.length; j++) {
            if (Double.isNaN(doubles[j]))
                System.err.println("Value is NaN");
            ;
        }
    }
    // create a matrix object and do the magic
    Array2DRowRealMatrix m = new Array2DRowRealMatrix(matrixData);
    long ms = System.currentTimeMillis();
    SingularValueDecomposition svd = new SingularValueDecomposition(m);
    ms = System.currentTimeMillis() - ms;
    double[] singularValues = svd.getSingularValues();
    RealMatrix s = svd.getS();
    // if no number of dimensions is given reduce to a tenth.
    if (numberOfDimensions < 1)
        numberOfDimensions = singularValues.length / 10;
    for (int i = numberOfDimensions; i < singularValues.length; i++) {
        s.setEntry(i, i, 0);
    }
    RealMatrix mNew = svd.getU().multiply(s).multiply(svd.getVT());
    double[][] data = mNew.getData();

    // create the new result set
    TreeSet<SimpleResult> result = new TreeSet<SimpleResult>();
    double maxDistance = 0;
    double[] queryData = data[0];
    for (int i = 1; i < data.length; i++) {
        double[] doubles = data[i];
        double distance = MetricsUtils.distL1(doubles, queryData);
        result.add(new SimpleResult((float) distance, results.documentID(i - 1)));
        maxDistance = Math.max(maxDistance, distance);
    }
    ImageSearchHits hits;
    hits = new SimpleImageSearchHits(result, (float) maxDistance);
    return hits;
}

From source file:net.semanticmetadata.lire.filters.RerankFilter.java

License:Open Source License

public ImageSearchHits filter(ImageSearchHits results, IndexReader reader, Document query) {
    GlobalFeature queryFeature = null;//ww  w. j a va  2s  . c om
    GlobalFeature tempFeature = null;
    double distance = 0, maxDistance = 0;
    TreeSet<SimpleResult> resultSet = new TreeSet<SimpleResult>();

    // create our feature classes
    try {
        queryFeature = (GlobalFeature) featureClass.newInstance();
        tempFeature = (GlobalFeature) featureClass.newInstance();
    } catch (Exception e) {
        logger.severe("Could not instantiate class " + featureClass.getName() + " in " + getClass().getName()
                + " (" + e.getMessage() + ").");
        return null;
    }

    // check if features are there and compatible.
    if (query.getField(fieldName) != null) {
        queryFeature.setByteArrayRepresentation(query.getField(fieldName).binaryValue().bytes,
                query.getField(fieldName).binaryValue().offset, query.getField(fieldName).binaryValue().length);
    } else {
        logger.severe("Given feature class " + featureClass.getName()
                + " is not available in the query document (" + getClass().getName() + ").");
        return null;
    }

    for (int x = 0; x < results.length(); x++) {
        Document d = null;
        try {
            d = reader.document(results.documentID(x));
        } catch (IOException e) {
            e.printStackTrace();
        }
        if (d.getField(fieldName) != null) {
            tempFeature.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes,
                    d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length);
            distance = queryFeature.getDistance(tempFeature);
            maxDistance = Math.max(maxDistance, distance);
            resultSet.add(new SimpleResult(distance, results.documentID(x)));
        } else {
            logger.info(
                    "Could not instantiate class " + featureClass.getName() + " from the given result set.");
        }
    }
    return new SimpleImageSearchHits(resultSet, maxDistance);
}

From source file:net.semanticmetadata.lire.filters.RerankFilter.java

License:Open Source License

@Override
public ImageSearchHits filter(TopDocs results, IndexReader reader, Document query) throws IOException {
    GlobalFeature queryFeature = null;//from w  w w  . j av a 2  s  . c  o  m
    GlobalFeature tempFeature = null;
    double distance = 0, maxDistance = 0;
    TreeSet<SimpleResult> resultSet = new TreeSet<SimpleResult>();

    // create our feature classes
    try {
        queryFeature = (GlobalFeature) featureClass.newInstance();
        tempFeature = (GlobalFeature) featureClass.newInstance();
    } catch (Exception e) {
        logger.severe("Could not instantiate class " + featureClass.getName() + " in " + getClass().getName()
                + " (" + e.getMessage() + ").");
        return null;
    }

    // check if features are there and compatible.
    if (query.getField(fieldName) != null) {
        queryFeature.setByteArrayRepresentation(query.getField(fieldName).binaryValue().bytes,
                query.getField(fieldName).binaryValue().offset, query.getField(fieldName).binaryValue().length);
    } else {
        logger.severe("Given feature class " + featureClass.getName()
                + " is not available in the query document (" + getClass().getName() + ").");
        return null;
    }
    ScoreDoc[] scoreDocs = results.scoreDocs;
    Document tmp = null;
    for (int x = 0; x < scoreDocs.length; x++) {
        tmp = reader.document(scoreDocs[x].doc);
        if (tmp.getField(fieldName) != null) {
            tempFeature.setByteArrayRepresentation(tmp.getField(fieldName).binaryValue().bytes,
                    tmp.getField(fieldName).binaryValue().offset, tmp.getField(fieldName).binaryValue().length);
            distance = queryFeature.getDistance(tempFeature);
            maxDistance = Math.max(maxDistance, distance);
            resultSet.add(new SimpleResult(distance, scoreDocs[x].doc));
        } else {
            logger.info(
                    "Could not instantiate class " + featureClass.getName() + " from the given result set.");
        }
    }
    return new SimpleImageSearchHits(resultSet, maxDistance);
}