Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:net.semanticmetadata.lire.GeneralTest.java

License:Open Source License

public void testCreateAndSearchSmallIndex() throws IOException {
    for (int i = 0, buildersLength = builders.length; i < buildersLength; i++) {
        DocumentBuilder b = builders[i];
        // create an index with a specific builder:
        IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-small", true);
        for (String identifier : testFiles) {
            Document doc = b.createDocument(new FileInputStream(testFilesPath + identifier), identifier);
            doc.add(new StoredField("video_file", "surgery1.mp4"));
            doc.add(new StoredField("timestamp", "25"));
            iw.addDocument(doc);// w  ww. j av  a  2 s .  c om
        }
        iw.close();

        ImageSearcher s = searchers[i];
        IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-small")));
        for (int k = 0; k < reader.maxDoc(); k++) {
            Document query = reader.document(k);
            ImageSearchHits hits = s.search(query, reader);
            for (int y = 0; y < hits.length(); y++) {
                Document result = hits.doc(y);
                if (y == 0) {
                    // check if the first result is the query:
                    assertEquals(result.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]
                            .equals(query.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), true);
                    System.out.println(result.getValues("video_file")[0]);
                } else {
                    // check if they are ordered by distance:
                    assertEquals(hits.score(y) < hits.score(y - 1), true);
                }
            }
        }
    }
}

From source file:net.semanticmetadata.lire.GeneralTest.java

License:Open Source License

public void testReadIndex() throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("ucid-index-39997508")));
    for (int k = 0; k < reader.maxDoc(); k++) {
        Document document = reader.document(k);
        BytesRef b = document.getField("featureCEDDLoDe_Hist").binaryValue();
        double[] doubles = SerializationUtils.toDoubleArray(b.bytes, b.offset, b.length);
        if (document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0].endsWith("00008.png"))
            System.out.println(Arrays.toString(doubles));
    }/*from   ww w .ja  va 2 s .  co m*/

    // check lucene tuorials and docs
    IndexSearcher is = new IndexSearcher(reader);
    TopDocs td = is.search(new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, "")), 10);
    for (int i = 0; i < td.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = td.scoreDocs[i];
        Document document = reader.document(scoreDoc.doc);
    }
}

From source file:net.semanticmetadata.lire.GeneralTest.java

License:Open Source License

/**
 * There was an error that images with the same score but different documents in the index
 * were not included in the result list. Here's the test for that.
 *///from   ww w  . j  av  a  2s . co m
public void testDuplicatesInIndex() throws IOException {
    indexFiles("src\\test\\resources\\images", "index-large-new", 0, true);
    indexFiles("src\\test\\resources\\images", "index-large-new", 0, false);
    indexFiles("src\\test\\resources\\images", "index-large-new", 0, false);

    ImageSearcher s = searchers[0];
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large-new")));
    Document query = reader.document(0);
    ImageSearchHits hits = s.search(query, reader);
    FileUtils.saveImageResultsToPng("duplicate_", hits,
            query.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
}

From source file:net.semanticmetadata.lire.GeneralTest.java

License:Open Source License

public void testSearchIndexLarge() throws IOException {

    for (int i = 0; i < 10; i++) {
        int queryDocID = (int) (Math.random() * 800);
        //            queryDocID = 877 * (i + 1);
        IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large")));
        // select one feature for the large index:
        int featureIndex = 0;
        int count = 0;
        long ms = System.currentTimeMillis();
        ImageSearchHits hits = searchers[featureIndex].search(reader.document(queryDocID), reader);
        for (int j = 0; j < hits.length(); j++) {
            String fileName = hits.doc(j).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
            System.out.println(hits.score(j) + ": \t" + fileName);
        }// w  w w  . j  a v  a  2  s  .  c om
        //        FileUtils.saveImageResultsToHtml("GeneralTest_testSearchIndexLarge_", hits, reader.document(10).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
        FileUtils.saveImageResultsToPng("GeneralTest_testSearchIndexLarge_" + i + "_", hits,
                reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
    }
}

From source file:net.semanticmetadata.lire.GeneralTest.java

License:Open Source License

public void testSearchRunTime() throws IOException {
    int queryDocID;
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large-new")));
    int featureIndex = 0;
    ImageSearchHits hits = searchers[featureIndex].search(reader.document(0), reader);
    hits = searchers[featureIndex].search(reader.document(1), reader);
    long ms = System.currentTimeMillis();
    for (int i = 0; i < 100; i++) {
        queryDocID = i;/* www  .j a va2 s  . c om*/
        // select one feature for the large index:
        hits = searchers[featureIndex].search(reader.document(queryDocID), reader);
    }
    ms = System.currentTimeMillis() - ms;
    System.out.println("ms = " + ms / 100);
}

From source file:net.semanticmetadata.lire.GeneralTest.java

License:Open Source License

public void testRerankFilters() throws IOException {
    int queryDocID = (int) (Math.random() * 10000);
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File("index-large")));
    // select one feature for the large index:
    int featureIndex = 4;
    int count = 0;
    long ms = System.currentTimeMillis();
    ImageSearchHits hits = searchers[featureIndex].search(reader.document(queryDocID), reader);
    RerankFilter rerank = new RerankFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD);
    LsaFilter lsa = new LsaFilter(featureClasses[0], DocumentBuilder.FIELD_NAME_CEDD);
    FileUtils.saveImageResultsToPng("GeneralTest_rerank_0_old", hits,
            reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
    hits = rerank.filter(hits, reader.document(queryDocID));
    FileUtils.saveImageResultsToPng("GeneralTest_rerank_1_new", hits,
            reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
    hits = lsa.filter(hits, reader.document(queryDocID));
    FileUtils.saveImageResultsToPng("GeneralTest_rerank_2_lsa", hits,
            reader.document(queryDocID).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
}

From source file:net.semanticmetadata.lire.imageanalysis.CEDDTest.java

License:Open Source License

public void testIndexSerialization() throws IOException {
    ParallelIndexer pi = new ParallelIndexer(8, "test-idx", "testdata\\UCID", true) {
        @Override/*from   w w w. j a va  2 s  .co m*/
        public void addBuilders(ChainedDocumentBuilder builder) {
            builder.addBuilder(new GenericDocumentBuilder(CEDD.class));
        }
    };
    pi.run();
    IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("test-idx")));
    CEDD tmp = new CEDD();
    CEDD idx = new CEDD();
    for (int i = 0; i < ir.maxDoc(); i++) {
        Document d = ir.document(i);
        BytesRef ref = d.getBinaryValue(new CEDD().getFieldName());
        idx.setByteArrayRepresentation(ref.bytes, ref.offset, ref.length);
        tmp.extract(ImageIO.read(new File(d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])));
        for (int j = 0; j < tmp.getDoubleHistogram().length; j++) {
            double v = tmp.getDoubleHistogram()[j];
            if (Math.abs(v - idx.getDoubleHistogram()[j]) > 0.1) {
                System.err.println(
                        d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " error at position " + j);
                break;
            }
        }
        // assertEquals((double) idx.getDistance(tmp), 0d, 0.00001);
    }
}

From source file:net.semanticmetadata.lire.imageanalysis.OpponentHistogramTest.java

License:Open Source License

public void testFastSearch() throws IOException {
    Codec.forName("LireCustomCodec");
    //        ParallelIndexer pin = new ParallelIndexer(7, "./index-fast-3", "testdata/wang-1000") {
    ParallelIndexer pin = new ParallelIndexer(7, "./index-fast-3", "D:\\DataSets\\Flickrphotos\\01", true) {
        @Override//from ww  w  .ja  v a  2  s  .  c  om
        public void addBuilders(ChainedDocumentBuilder builder) {
            builder.addBuilder(DocumentBuilderFactory.getOpponentHistogramDocumentBuilder());
        }
    };
    pin.run();
    IndexReader ir = DirectoryReader.open(MMapDirectory.open(new File("./index-fast-3")));
    System.out.println("ir.maxDoc() = " + ir.maxDoc());

    long ms = System.currentTimeMillis();
    ImageSearcher is = new FastOpponentImageSearcher(50);
    ms = System.currentTimeMillis() - ms;
    System.out.println("init ms = " + ms);

    ms = System.currentTimeMillis();
    for (int i = 0; i < 100; i++)
        is.search(ir.document(i), ir);
    ms = System.currentTimeMillis() - ms;
    System.out.println("cached ms = " + ms);

    is = ImageSearcherFactory.createOpponentHistogramSearcher(50);
    ms = System.currentTimeMillis();
    for (int i = 0; i < 100; i++)
        is.search(ir.document(i), ir);
    ms = System.currentTimeMillis() - ms;
    System.out.println("read from Lucene ms = " + ms);
}

From source file:net.semanticmetadata.lire.imageanalysis.ScalableColorTest.java

License:Open Source License

public void testIndexSerialization() throws IOException {
    ParallelIndexer pi = new ParallelIndexer(8, "test-idx", "testdata\\UCID", true) {
        @Override/*from   w  w w .  j a  v  a  2s .  c om*/
        public void addBuilders(ChainedDocumentBuilder builder) {
            builder.addBuilder(new GenericDocumentBuilder(ScalableColor.class));
        }
    };
    pi.run();
    IndexReader ir = DirectoryReader.open(FSDirectory.open(new File("test-idx")));
    ScalableColor tmp = new ScalableColor();
    ScalableColor idx = new ScalableColor();
    for (int i = 0; i < ir.maxDoc(); i++) {
        Document d = ir.document(i);
        BytesRef ref = d.getBinaryValue(new ScalableColor().getFieldName());
        idx.setByteArrayRepresentation(ref.bytes, ref.offset, ref.length);
        tmp.extract(ImageIO.read(new File(d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0])));
        for (int j = 0; j < tmp.getDoubleHistogram().length; j++) {
            double v = tmp.getDoubleHistogram()[j];
            if (Math.abs(v - idx.getDoubleHistogram()[j]) > 0.1) {
                System.err.println(
                        d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " error at position " + j);
                break;
            }
        }
        // assertEquals((double) idx.getDistance(tmp), 0d, 0.00001);
    }
}

From source file:net.semanticmetadata.lire.impl.BitSamplingImageSearcher.java

License:Open Source License

private ImageSearchHits search(String[] hashes, LireFeature queryFeature, IndexReader reader)
        throws IOException {
    // first search by text:
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(new BaseSimilarity());
    BooleanQuery query = new BooleanQuery();
    for (int i = 0; i < hashes.length; i++) {
        // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before.
        if (partialHashes) {
            if (Math.random() < 0.5)
                query.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")),
                        BooleanClause.Occur.SHOULD));
        } else//from   w  ww  .j  a v  a 2  s .com
            query.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")),
                    BooleanClause.Occur.SHOULD));
    }
    TopDocs docs = searcher.search(query, maxResultsHashBased);
    //        System.out.println(docs.totalHits);
    // then re-rank
    TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>();
    float maxDistance = -1f;
    float tmpScore;
    for (int i = 0; i < docs.scoreDocs.length; i++) {
        feature.setByteArrayRepresentation(
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes,
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset,
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length);
        tmpScore = queryFeature.getDistance(feature);
        assert (tmpScore >= 0);
        if (resultScoreDocs.size() < maximumHits) {
            resultScoreDocs.add(
                    new SimpleResult(tmpScore, reader.document(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc));
            maxDistance = Math.max(maxDistance, tmpScore);
        } else if (tmpScore < maxDistance) {
            // if it is nearer to the sample than at least one of the current set:
            // remove the last one ...
            resultScoreDocs.remove(resultScoreDocs.last());
            // add the new one ...
            resultScoreDocs.add(
                    new SimpleResult(tmpScore, reader.document(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc));
            // and set our new distance border ...
            maxDistance = resultScoreDocs.last().getDistance();
        }
    }
    assert (resultScoreDocs.size() <= maximumHits);
    return new SimpleImageSearchHits(resultScoreDocs, maxDistance);
}