Example usage for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException

Source Link

Document

Returns the stored fields of the n^th Document in this index.

Usage

From source file:net.dataninja.ee.textEngine.MoreLikeThisQuery.java

License:Apache License

/**
 * Find words for a more-like-this query former.
 *
 * @param docNum the id of the lucene document from which to find terms
 *///  w  w w .j a  v  a  2s .  c om
private PriorityQueue retrieveTerms(IndexReader indexReader, int docNum, Analyzer analyzer) throws IOException {
    // Gather term frequencies for all fields.
    Map termFreqMap = new HashMap();
    Document d = indexReader.document(docNum);

    for (int i = 0; i < fieldNames.length; i++) {
        String fieldName = fieldNames[i];
        String[] text = d.getValues(fieldName);
        if (text == null)
            continue;

        for (int j = 0; j < text.length; j++) {
            TokenStream tokens = analyzer.tokenStream(fieldName, new StringReader(text[j]));
            addTermFrequencies(tokens, fieldName, termFreqMap);
        } // for j
    } // for i

    // Combine like terms from each field and calculate a score for each.
    Map termScoreMap = condenseTerms(indexReader, termFreqMap);

    // Finally, make a queue by score.
    return createQueue(indexReader, termScoreMap);
}

From source file:net.semanticmetadata.lire.benchmarking.TestHashingIndex.java

License:Open Source License

private void testHashing(Class featureClass, String fieldName)
        throws IOException, InstantiationException, IllegalAccessException {
    String hashesFile = "hashes.obj";
    String hashesFileL = "l_hashes.obj";
    int numResults = 50;
    int maxQueries = 20;
    int queryOffset = 100;

    File file = new File(hashesFile);
    if (file.exists())
        file.delete();//w w  w  .ja  va  2 s  .co m
    file = new File(hashesFileL);
    if (file.exists())
        file.delete();
    BitSampling.generateHashFunctions(hashesFile);
    LocalitySensitiveHashing.generateHashFunctions(hashesFileL);
    //        HashingIndexor hi = new HashingIndexor();
    ProximityHashingIndexor hi = new ProximityHashingIndexor();
    BitSampling.readHashFunctions(new FileInputStream(hashesFile));
    LocalitySensitiveHashing.readHashFunctions(new FileInputStream(hashesFileL));
    hi.setFeatureClass(featureClass);
    hi.addInputFile(new File(dataSetDataOut));
    hi.setIndexPath(testIndex);
    hi.run();
    System.out.println();

    IndexReader reader = DirectoryReader
            .open(new RAMDirectory(FSDirectory.open(new File(testIndex)), IOContext.READONCE));
    // generating ground truth for all queries ...
    ImageSearcher groundTruth = new GenericFastImageSearcher(numResults, featureClass, fieldName);
    ArrayList<ImageSearchHits> trueHitsList = new ArrayList<ImageSearchHits>(maxQueries);
    long time = System.currentTimeMillis();
    for (int q = 0; q < maxQueries; q++) {
        trueHitsList.add(q, groundTruth.search(reader.document(q + queryOffset), reader));
    }
    time = System.currentTimeMillis() - time;
    // header
    System.out.println(featureClass.getName().substring(featureClass.getName().lastIndexOf('.') + 1));
    System.out.println("Number of queries: " + maxQueries);
    System.out.println("Time taken for linear search: " + (time / maxQueries));
    System.out.printf("numFunctionBundles: %d, numBits: %d, w: %2.2f, dimensions: %d\n",
            BitSampling.getNumFunctionBundles(), BitSampling.getBits(), BitSampling.getW(),
            BitSampling.dimensions);
    System.out.println("#hashedResults\ttrue pos.\t#results\tms per search\tprecision");

    for (int j = 100; j <= 3000; j += 100) {
        ImageSearcher hashed = new BitSamplingImageSearcher(numResults, fieldName, fieldName + "_hash",
                (LireFeature) featureClass.newInstance(), new FileInputStream(hashesFile), j);
        long ms = 0;
        long msSum = 0;
        int posSum = 0;
        for (int q = 0; q < maxQueries; q++) {
            ms = System.currentTimeMillis();
            ImageSearchHits hashedHits = hashed.search(reader.document(q + queryOffset), reader);
            assert (hashedHits.length() <= numResults);
            msSum += System.currentTimeMillis() - ms;
            HashSet<Integer> t = new HashSet<Integer>(hashedHits.length());
            HashSet<Integer> h = new HashSet<Integer>(hashedHits.length());
            for (int i = 0; i < trueHitsList.get(q).length(); i++) {
                t.add(((SimpleImageSearchHits) trueHitsList.get(q)).readerID(i));
                h.add(((SimpleImageSearchHits) hashedHits).readerID(i));
            }
            assert (t.size() == h.size());
            int intersect = 0;
            for (Iterator<Integer> iterator = h.iterator(); iterator.hasNext();) {
                if (t.contains(iterator.next())) {
                    intersect++;
                }
            }
            posSum += intersect;
        }
        if (j > 1400)
            j += 100;
        double truePositives = ((double) posSum) / ((double) maxQueries);
        System.out.printf("%4d\t%4.1f\t%4d\t%6.1f\t%1.3f\n", j, truePositives, numResults,
                ((double) msSum) / ((double) maxQueries), truePositives / (double) numResults);
        if (posSum / maxQueries == numResults)
            break;
    }
}

From source file:net.semanticmetadata.lire.benchmarking.TestNister.java

License:Open Source License

public void computePrecision(String pathName, Similarity similarity, String label) throws IOException {
    //        ImageSearcher vis = new GenericImageSearcher(4, SimpleFeature.class, "featureSURFHistogram");
    //        ImageSearcher vis = new GenericFastImageSearcher(4, CEDD.class, DocumentBuilder.FIELD_NAME_CEDD);
    //        VisualWordsImageSearcher vis = new VisualWordsImageSearcher(4, similarity, DocumentBuilder.FIELD_NAME_SIFT_VISUAL_WORDS);
    VisualWordsImageSearcher vis = new VisualWordsImageSearcher(4, similarity,
            DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW);
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(pathName)));

    int queryID, resultID;
    int countSearches = 0, countTruePositives = 0;
    float avgPrecision = 0f;

    Set<Integer> test = StatsUtils.drawSample(100, 10200);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i : test) {
        //        for (int j = 0; j < tests.length; j++) {
        //            int i = tests[j];
        //        for (int i =0; i < 1000; i++) {
        //        for (int i =0; i < reader.numDocs(); i++) {

        if (!((reader.hasDeletions() && !liveDocs.get(i)))) {
            ImageSearchHits hits = vis.search(reader.document(i), reader);
            String s = reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
            s = s.replaceAll("\\D", "");
            queryID = Integer.parseInt(s);
            countTruePositives = 0;//from w  w  w.  ja  v a  2s.  com
            for (int k = 0; k < hits.length(); k++) {
                String name = hits.doc(k).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
                name = name.replaceAll("\\D", "");
                resultID = Integer.parseInt(name);
                if (queryID / 4 == resultID / 4) {
                    //System.out.print("X");
                    countTruePositives++;
                }
                //else System.out.print("O");
            }
            countSearches++;
            avgPrecision += (float) countTruePositives / 4f;
            // progress:
            //                if (countSearches%100==0) System.out.print('.');
            //                if (countSearches%1000==0) System.out.print(':');
            //System.out.println();
        }
    }
    avgPrecision = avgPrecision / (float) countSearches;
    FileWriter fw = new FileWriter(new File("precision_results.txt"), true);
    System.out.println(label + " p@4= " + avgPrecision);
    fw.write(label + " p@4= " + avgPrecision + "\n");
    fw.close();
}

From source file:net.semanticmetadata.lire.benchmarking.TestNister.java

License:Open Source License

public void testDocLengthIDF(String pathName) throws IOException {
    df = new double[1024];
    int[] len = new int[10200];

    avgDocLength = 0;//from w w  w.jav  a2 s.c o m
    double numDocs = 0;
    for (int i = 0; i < df.length; i++)
        df[i] = 0;
    for (int i = 0; i < len.length; i++)
        len[i] = 0;
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(pathName)));
    for (int i = 0; i < reader.numDocs(); i++) {
        //            if (!reader.isDeleted(i)) {
        String s = reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        String f = reader.document(i).getValues("featureSURFHistogram")[0];
        SimpleFeature sf = new SimpleFeature();
        sf.setStringRepresentation(f);
        double[] h = sf.getDoubleHistogram();
        for (int j = 0; j < h.length; j++) {
            if (h[j] > 0.0)
                df[j] += 1; // add to the document frequency
            avgDocLength += h[j];
            len[i] += h[j];
        }
        numDocs += 1;
        //            }
    }
    //        System.out.println("avgDocLength = " + avgDocLength/numDocs);
    //        for (int i = 0; i < df.length; i++)
    //            System.out.print(df[i] + ",");
    //        System.out.println();
    //        for (int i = 0; i < len.length; i++)
    //            System.out.print(len[i] + ", ");
    //        System.out.println();
}

From source file:net.semanticmetadata.lire.benchmarking.TestSimple.java

License:Open Source License

private void doSearch(ImageSearcher searcher, String prefix, IndexReader reader) throws IOException {
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    String fileName, fullFileName;
    Document queryDoc;//from  ww w  . j a  v a 2  s .  co m
    ImageSearchHits hits;
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        fullFileName = reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        fileName = getIDfromFileName(fullFileName);
        if (allQueries.contains(fileName)) {
            // ok, we've got a query here for a document ...
            queryDoc = reader.document(i);
            hits = searcher.search(queryDoc, reader);
            FileUtils.browseUri(FileUtils.saveImageResultsToHtml(prefix + "-" + fileName, hits, fullFileName));
        }
    }
    for (int i = 0; i < outsideQueries.size(); i++) {
        fullFileName = outsideQueries.get(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        fileName = getIDfromFileName(fullFileName);
        if (allQueries.contains(fileName)) {
            // ok, we've got a query here for a document ...
            queryDoc = outsideQueries.get(i);
            hits = searcher.search(queryDoc, reader);
            FileUtils.browseUri(FileUtils.saveImageResultsToHtml(prefix + "-" + fileName, hits, fullFileName));
        }
    }
}

From source file:net.semanticmetadata.lire.benchmarking.TestUCID.java

License:Open Source License

private void computeMAP(ImageSearcher searcher, String prefix, IndexReader reader) throws IOException {
    double queryCount = 0d;
    double errorRate = 0;
    double map = 0;
    double p10 = 0;
    int errorCount = 0;
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    PrintWriter fw;/*from w  w  w  . j av a2  s  .com*/
    if (searcher.toString().contains("ImageSearcherUsingWSs")) {
        (new File("eval/" + prefix.replace(' ', '_') + "/" + clusters + "/")).mkdirs();
        fw = new PrintWriter(new File("eval/" + prefix.replace(' ', '_') + "/" + clusters + "/"
                + prefix.replace(' ', '_') + "-" + db + clusters
                + searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1] + ".txt"));
    } else
        fw = new PrintWriter(new File("eval/" + prefix.replace(' ', '_') + "-" + db + clusters + ".txt"));
    //            fw = new PrintWriter(new File("eval/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal
    Hashtable<Integer, String> evalText = new Hashtable<Integer, String>(260);
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        String fileName = getIDfromFileName(
                reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
        if (queries.keySet().contains(fileName)) {
            String tmpEval = "";
            queryCount += 1d;
            // ok, we've got a query here for a document ...
            Document queryDoc = reader.document(i);
            ImageSearchHits hits = searcher.search(queryDoc, reader);
            double rank = 0;
            double avgPrecision = 0;
            double found = 0;
            double tmpP10 = 0;
            Locale.setDefault(Locale.US);
            for (int y = 0; y < hits.length(); y++) {
                String hitFile = getIDfromFileName(
                        hits.doc(y).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
                // TODO: Sort by query ID!
                tmpEval += String.format(Locale.US, "%d 1 %s %d %.2f test\n", query2id.get(fileName),
                        hitFile.substring(0, hitFile.lastIndexOf('.')), (int) rank + 1, hits.score(y));
                // if (!hitFile.equals(fileName)) {
                rank++;
                //                    if ((queries.get(fileName).contains(hitFile) || hitFile.equals(fileName))&&(!fileName.equals(hitFile))) { // it's a hit.
                if (queries.get(fileName).contains(hitFile) || hitFile.equals(fileName)) { // it's a hit.
                    found++;
                    // TODO: Compute error rate, etc. here.
                    avgPrecision += found / rank;// * (1d/queries.get(fileName).size());
                    //                        avgPrecision += found / (rank-1);// * (1d/queries.get(fileName).size());
                    //                            if (rank<=60) System.out.print('X');
                    if (rank <= 10)
                        tmpP10++;
                } else { // nothing has been found.
                    if (rank == 1)
                        errorRate += 1d;
                    //                            if (rank<=60) System.out.print('-');
                }
            }
            // }
            //                System.out.println();
            avgPrecision /= (double) (1d + queries.get(fileName).size());
            //                avgPrecision /= (double) (queries.get(fileName).size());

            if (!(found - queries.get(fileName).size() == 1)) {
                // some of the results have not been found. We have to deal with it ...
                errorCount++;
            }

            // assertTrue(found - queries.get(fileName).size() == 0);
            map += avgPrecision;
            p10 += tmpP10;
            evalText.put(query2id.get(fileName), tmpEval);
        }
    }
    for (int i = 0; i < query2id.size(); i++) {
        fw.write(evalText.get(i + 1));
    }
    fw.close();
    errorRate = errorRate / queryCount;
    map = map / queryCount;
    p10 = p10 / (queryCount * 10d);
    //        System.out.print(prefix);
    String s;
    if (searcher.toString().contains("ImageSearcherUsingWSs"))
        s = String.format("%s\t%.4f\t%.4f\t%.4f\t(%s)", prefix, map, p10, errorRate,
                searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1]);
    else
        s = String.format("%s\t%.4f\t%.4f\t%.4f", prefix, map, p10, errorRate);
    if (errorCount > 0) {
        // some of the results have not been found. We have to deal with it ...
        //System.err.println("Did not find result ;(  (" + errorCount + ")");
        s += "\t~~\tDid not find result ;(\t(" + errorCount + ")";
    }
    System.out.println(s);
}

From source file:net.semanticmetadata.lire.benchmarking.TestUCID.java

License:Open Source License

private void testSearchSpeed(ArrayList<String> images, final Class featureClass) throws IOException {
    parallelIndexer = new ParallelIndexer(8, indexPath, testExtensive, true) {
        @Override//from  ww  w.  j  av a 2s. c  o  m
        public void addBuilders(ChainedDocumentBuilder builder) {
            builder.addBuilder(new GenericDocumentBuilder(featureClass, "feature"));
        }
    };
    parallelIndexer.run();
    IndexReader reader = DirectoryReader
            .open(new RAMDirectory(FSDirectory.open(new File(indexPath)), IOContext.READONCE));
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    double queryCount = 0d;
    ImageSearcher searcher = new GenericFastImageSearcher(100, featureClass, "feature");
    long ms = System.currentTimeMillis();
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        String fileName = getIDfromFileName(
                reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
        if (queries.keySet().contains(fileName)) {
            queryCount += 1d;
            // ok, we've got a query here for a document ...
            Document queryDoc = reader.document(i);
            ImageSearchHits hits = searcher.search(queryDoc, reader);
        }
    }
    ms = System.currentTimeMillis() - ms;
    System.out.printf("%s \t %3.1f \n",
            featureClass.getName().substring(featureClass.getName().lastIndexOf('.') + 1),
            (double) ms / queryCount);
}

From source file:net.semanticmetadata.lire.benchmarking.TestUniversal.java

License:Open Source License

private void computeMAP(ImageSearcher searcher, String prefix, IndexReader reader, int clusters)
        throws IOException {
    long start = System.currentTimeMillis();
    long timeOfSearch = 0, ms;

    double queryCount = 0d;
    double errorRate = 0;
    double map = 0;
    double p10 = 0;
    int errorCount = 0;
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    PrintWriter fw;/*  w  ww.jav  a  2 s. co  m*/
    if (searcher.toString().contains("ImageSearcherUsingWSs")) {
        (new File("eval/" + db + "/" + prefix.replace(' ', '_') + "/" + clusters + "/")).mkdirs();
        fw = new PrintWriter(new File("eval/" + db + "/" + prefix.replace(' ', '_') + "/" + clusters + "/"
                + prefix.replace(' ', '_') + "-" + db + clusters
                + searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1] + ".txt"));
    } else {
        //            (new File("eval/#WithMirFlickr/" + db + "/")).mkdirs();
        (new File("eval/" + db + "/")).mkdirs();
        if (clusters > 0)
            fw = new PrintWriter(
                    new File("eval/" + db + "/" + prefix.replace(' ', '_') + "-" + db + clusters + ".txt"));
        else
            //                fw = new PrintWriter(new File("eval/#WithMirFlickr/" + db + "/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal
            fw = new PrintWriter(
                    new File("eval/" + db + "/" + prefix.replace(' ', '_') + "-" + db + "Global.txt")); //forGlobal
    }
    Hashtable<Integer, String> evalText = new Hashtable<Integer, String>(260);
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        String fileName = getIDfromFileName(
                reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
        if (queries.keySet().contains(fileName)) {
            String tmpEval = "";
            queryCount += 1d;
            // ok, we've got a query here for a document ...
            Document queryDoc = reader.document(i);
            ms = System.currentTimeMillis();
            ImageSearchHits hits = searcher.search(queryDoc, reader);
            timeOfSearch += System.currentTimeMillis() - ms;
            double rank = 0;
            double avgPrecision = 0;
            double found = 0;
            double tmpP10 = 0;
            Locale.setDefault(Locale.US);
            for (int y = 0; y < hits.length(); y++) {
                //                    String hitFile = getIDfromFileName(hits.doc(y).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
                String hitFile = getIDfromFileName(reader.document(hits.documentID(y))
                        .getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
                //                    String hitFile = getIDfromFileName(hits.path(y));
                // TODO: Sort by query ID!
                tmpEval += String.format(Locale.US, "%d 1 %s %d %.2f test\n", query2id.get(fileName),
                        hitFile.substring(0, hitFile.lastIndexOf('.')), (int) rank + 1, hits.score(y));
                // if (!hitFile.equals(fileName)) {
                rank++;
                //                    if ((queries.get(fileName).contains(hitFile) || hitFile.equals(fileName))&&(!fileName.equals(hitFile))) { // it's a hit.
                if (queries.get(fileName).contains(hitFile) || hitFile.equals(fileName)) { // it's a hit.
                    found++;
                    // TODO: Compute error rate, etc. here.
                    avgPrecision += found / rank;// * (1d/queries.get(fileName).size());
                    //                        avgPrecision += found / (rank-1);// * (1d/queries.get(fileName).size());
                    //                            if (rank<=60) System.out.print('X');
                    if (rank <= 10)
                        tmpP10++;
                } else { // nothing has been found.
                    if (rank == 1)
                        errorRate += 1d;
                    //                            if (rank<=60) System.out.print('-');
                }
            }
            // }
            //                System.out.println();
            avgPrecision /= (double) (1d + queries.get(fileName).size());
            //                avgPrecision /= (double) (queries.get(fileName).size());

            if (!(found - queries.get(fileName).size() == 1)) {
                // some of the results have not been found. We have to deal with it ...
                errorCount++;
            }

            // assertTrue(found - queries.get(fileName).size() == 0);
            map += avgPrecision;
            p10 += tmpP10;
            evalText.put(query2id.get(fileName), tmpEval);
        }
    }

    for (int i = 0; i < query2id.size(); i++) {
        fw.write(evalText.get(i + 1));
    }

    fw.close();
    errorRate = errorRate / queryCount;
    map = map / queryCount;
    p10 = p10 / (queryCount * 10d);

    double h = (System.currentTimeMillis() - start) / 3600000.0;
    double m = (h - Math.floor(h)) * 60.0;
    double s = (m - Math.floor(m)) * 60;
    String str = String.format("%s%02d:%02d", (((int) h > 0) ? String.format("%02d:", (int) h) : ""), (int) m,
            (int) s) + " ~ ";

    if (searcher.toString().contains("ImageSearcherUsingWSs"))
        str += String.format("%s%s\t%.4f\t%.4f\t%.4f\t(%s)", prefix, ((clusters > 0) ? ("\t" + clusters) : ""),
                map, p10, errorRate,
                searcher.toString().split("\\s+")[searcher.toString().split("\\s+").length - 1]);
    else
        str += String.format("%s%s\t%.4f\t%.4f\t%.4f", prefix, ((clusters > 0) ? ("\t" + clusters) : ""), map,
                p10, errorRate);
    if (errorCount > 0) {
        // some of the results have not been found. We have to deal with it ...
        str += "\t~~\tDid not find result ;(\t(" + errorCount + ")";
    }
    h = timeOfSearch / 3600000.0;
    m = (h - Math.floor(h)) * 60.0;
    s = (m - Math.floor(m)) * 60;
    str += " ~ TimeOfsearch: " + String.format("%s%02d:%02d",
            (((int) h > 0) ? String.format("%02d:", (int) h) : ""), (int) m, (int) s);

    System.out.println(str);
}

From source file:net.semanticmetadata.lire.benchmarking.TestUniversal.java

License:Open Source License

private void testSearchSpeed(Class<? extends GlobalFeature> featureClass) throws IOException {
    ParallelIndexer parallelIndexer = new ParallelIndexer(DocumentBuilder.NUM_OF_THREADS, indexPath,
            testExtensive, true);//from   www  .  j a  v  a 2 s .c  om
    parallelIndexer.addExtractor(featureClass);
    parallelIndexer.run();
    IndexReader reader = DirectoryReader
            .open(new RAMDirectory(FSDirectory.open(Paths.get(indexPath)), IOContext.READONCE));
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    double queryCount = 0d;
    ImageSearcher searcher = new GenericFastImageSearcher(100, featureClass);
    long ms = System.currentTimeMillis();
    String fileName;
    Document queryDoc;
    ImageSearchHits hits;
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        fileName = getIDfromFileName(reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
        if (queries.keySet().contains(fileName)) {
            queryCount += 1d;
            // ok, we've got a query here for a document ...
            queryDoc = reader.document(i);
            hits = searcher.search(queryDoc, reader);
        }
    }
    ms = System.currentTimeMillis() - ms;
    System.out.printf("%s \t %3.1f \n",
            featureClass.getName().substring(featureClass.getName().lastIndexOf('.') + 1),
            (double) ms / queryCount);
}

From source file:net.semanticmetadata.lire.benchmarking.TestWang.java

License:Open Source License

public void tttestGetDistribution() throws IOException {
    BufferedWriter bw = new BufferedWriter(new FileWriter("data.csv"));
    IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath)));
    // get the first document:
    //        if (!IndexReader.indexExists(reader.directory()))
    //            throw new FileNotFoundException("No index found at this specific location.");

    CEDD cedd1 = new CEDD();
    FCTH fcth1 = new FCTH();

    CEDD cedd2 = new CEDD();
    FCTH fcth2 = new FCTH();

    JCD jcd1 = new JCD();
    JCD jcd2 = new JCD();
    String[] cls;//  w w w.  j  av a2  s . co  m

    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = reader.numDocs();
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        Document doc = reader.document(i);
        cls = doc.getValues(DocumentBuilder.FIELD_NAME_CEDD);
        if (cls != null && cls.length > 0)
            cedd1.setStringRepresentation(cls[0]);
        cls = doc.getValues(DocumentBuilder.FIELD_NAME_FCTH);
        if (cls != null && cls.length > 0)
            fcth1.setStringRepresentation(cls[0]);

        for (int j = i + 1; j < docs; j++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.
            Document doc2 = reader.document(j);
            cls = doc2.getValues(DocumentBuilder.FIELD_NAME_CEDD);
            if (cls != null && cls.length > 0)
                cedd2.setStringRepresentation(cls[0]);
            cls = doc2.getValues(DocumentBuilder.FIELD_NAME_FCTH);
            if (cls != null && cls.length > 0)
                fcth2.setStringRepresentation(cls[0]);
            jcd1.init(cedd1, fcth1);
            jcd2.init(cedd2, fcth2);
            bw.write(cedd1.getDistance(cedd2) + ";" + fcth1.getDistance(fcth2) + ";" + jcd1.getDistance(jcd2)
                    + "\n");
        }
        if (i % 100 == 0)
            System.out.println(i + " entries processed ... ");
    }
    bw.close();
}