Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:net.semanticmetadata.lire.indexing.MetricSpacesTest.java

License:Open Source License

public void testMetrics() throws IOException {
    String smallIdx = "wang-cedd";
    MetricSpacesInvertedListIndexing ms = MetricSpacesInvertedListIndexing.getDefaultInstance();
    MetricSpacesInvertedListIndexing.numReferenceObjectsUsed = 10;
    MetricSpacesInvertedListIndexing.numReferenceObjects = 50;

    TopDocs docs = ms.search(ImageIO.read(new FileInputStream("wang-data-1000/10.jpg")), smallIdx);
    IndexReader ir = DirectoryReader.open(FSDirectory.open(new File(smallIdx)));
    for (int i = 0; i < docs.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = docs.scoreDocs[i];
        String identifier = ir.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        System.out.println(scoreDoc.score + ": " + identifier + " \t(" + scoreDoc.doc + ")");

    }//from  w  w  w. j a  va 2s.c om
}

From source file:net.semanticmetadata.lire.lucene.TestRerankTextSearch.java

License:Open Source License

public void testSearch() throws IOException, ParseException {
    // create a Lucene IndexReader and the according IndexSearcher:
    IndexReader reader = DirectoryReader.open(FSDirectory.open(testIndex));
    IndexSearcher searcher = new IndexSearcher(reader);
    // The QueryParser takes a String and creates a query out of it. Make sure you use the same field
    // as for indexing, in this case "tags"
    QueryParser q = new QueryParser(Version.LUCENE_42, "tags", new SimpleAnalyzer(Version.LUCENE_42));
    // let's just take the tags of the first document in the index:
    Query query = q.parse(reader.document(1).getValues("tags")[0]);
    // now that's the actual search:
    // NOTE: The number of results here is critical. The less documents are returned here, the
    // less the image re-ranking can mess up. However, the recall (the absolute number of relevant
    // documents returned) is also influenced by this. Best to try several values like 10, 100, 200, 500, ...
    TopDocs results = searcher.search(query, 10);
    // here we print the results of the text search, just for the win.
    System.out.println("-----------> SEARCH RESULTS ...");
    for (int i = 0; i < results.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = results.scoreDocs[i];
        System.out.print(scoreDoc.score + "\t: ");
        // reader.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] gets you the actual image file path.
        // LIRE manages all needed filed names as static Strings in DocumentBuilder ...
        System.out.print(/*  w  ww  .  j ava 2 s  . co m*/
                reader.document(scoreDoc.doc).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " -> ");
        System.out.println(reader.document(scoreDoc.doc).getValues("tags")[0]);
    }
    // just for a visual example ... this will pop up a browser window
    FileUtils.browseUri(FileUtils.saveImageResultsToHtml("text", results, reader,
            reader.document(1).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]));

    // and now for the re-ranking:
    // make sure to use a low level feature that has been indexed -- check the DocumentBuilder in above method.
    RerankFilter rerank = new RerankFilter(FCTH.class, DocumentBuilder.FIELD_NAME_FCTH);
    // note that you need the document here, it contains the low level feature ...
    // if you don't have it but just the image you need to create a new one with the
    // appropriate DocumentBuilder -- check the DocumentBuilder in above method.
    ImageSearchHits hitsReranked = rerank.filter(results, reader, reader.document(1));
    // and here we print the re-ranked hits:
    System.out.println("-----------> RERANKED ...");
    for (int i = 0; i < hitsReranked.length(); i++) {
        System.out.print(hitsReranked.score(i) + "\t: ");
        System.out.print(hitsReranked.doc(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0] + " -> ");
        System.out.println(hitsReranked.doc(i).getValues("tags")[0]);
    }
    // just for a visual example ... this will pop up a browser window.
    FileUtils.browseUri(FileUtils.saveImageResultsToHtml("reranked", hitsReranked,
            reader.document(1).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]));
}

From source file:net.semanticmetadata.lire.lucene.TestSearchID.java

License:Open Source License

public void testSearch() throws IOException {
    // use GeneralTest to create the index ...
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-small")));

    IndexSearcher is = new IndexSearcher(reader);
    TopDocs docs = is.search(new TermQuery(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, "img01.JPG")), 10);
    Document result = reader.document(docs.scoreDocs[0].doc);
    System.out.println(result.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
}

From source file:net.semanticmetadata.lire.sampleapp.IndexingAndSearchWithLocalFeatures.java

License:Open Source License

/**
 * Linear search on the indexed data./*from w  ww  . j  a  va2 s.com*/
 * @param indexPath
 * @throws IOException
 */
public static void search(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));

    // make sure that this matches what you used for indexing (see below) ...
    ImageSearcher imgSearcher = new GenericFastImageSearcher(1000, CEDD.class,
            SimpleExtractor.KeypointDetector.CVSURF, new BOVW(), 128, true, reader, indexPath + ".config");
    // just a static example with a given image.
    ImageSearchHits hits = imgSearcher
            .search(ImageIO.read(new File("testdata/ferrari/black/2828686873_2fa36f83d7_b.jpg")), reader);
    for (int i = 0; i < hits.length(); i++) {
        System.out.printf("%.2f: (%d) %s\n", hits.score(i), hits.documentID(i),
                reader.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]);
    }
}

From source file:net.semanticmetadata.lire.sampleapp.Searcher.java

License:Open Source License

public static void main(String[] args) throws IOException {
    // Checking if arg[0] is there and if it is an image.
    BufferedImage img = null;// w ww .  j  av a 2 s.  com
    boolean passed = false;
    if (args.length > 0) {
        File f = new File(args[0]);
        if (f.exists()) {
            try {
                img = ImageIO.read(f);
                passed = true;
            } catch (IOException e) {
                e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
            }
        }
    }
    if (!passed) {
        System.out.println("No image given as first argument.");
        System.out.println("Run \"Searcher <query image>\" to search for <query image>.");
        System.exit(1);
    }

    IndexReader ir = DirectoryReader.open(FSDirectory.open(Paths.get("index")));
    ImageSearcher searcher = new GenericFastImageSearcher(30, CEDD.class);
    //        ImageSearcher searcher = new GenericFastImageSearcher(30, AutoColorCorrelogram.class);

    // searching with a image file ...
    ImageSearchHits hits = searcher.search(img, ir);
    // searching with a Lucene document instance ...
    //        ImageSearchHits hits = searcher.search(ir.document(0), ir);
    for (int i = 0; i < hits.length(); i++) {
        String fileName = ir.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        System.out.println(hits.score(i) + ": \t" + fileName);
    }
}

From source file:net.semanticmetadata.lire.searchers.BitSamplingImageSearcher.java

License:Open Source License

private ImageSearchHits search(String[] hashes, GlobalFeature queryFeature, IndexReader reader)
        throws IOException {
    // first search by text:
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setSimilarity(new BaseSimilarity());
    BooleanQuery query = null;//  w w  w.  j  av  a  2 s  .  co  m
    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    for (int i = 0; i < hashes.length; i++) {
        // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before.
        if (partialHashes) {
            if (Math.random() < 0.5)
                builder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")),
                        BooleanClause.Occur.SHOULD));
        } else
            builder.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")),
                    BooleanClause.Occur.SHOULD));
    }
    query = builder.build();
    TopDocs docs = searcher.search(query, maxResultsHashBased);
    //        System.out.println(docs.totalHits);
    // then re-rank
    TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>();
    double maxDistance = -1d;
    double tmpScore;
    for (int i = 0; i < docs.scoreDocs.length; i++) {
        feature.setByteArrayRepresentation(
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes,
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset,
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length);
        tmpScore = queryFeature.getDistance(feature);
        assert (tmpScore >= 0);
        if (resultScoreDocs.size() < maximumHits) {
            resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc));
            maxDistance = Math.max(maxDistance, tmpScore);
        } else if (tmpScore < maxDistance) {
            // if it is nearer to the sample than at least one of the current set:
            // remove the last one ...
            resultScoreDocs.remove(resultScoreDocs.last());
            // add the new one ...
            resultScoreDocs.add(new SimpleResult(tmpScore, docs.scoreDocs[i].doc));
            // and set our new distance border ...
            maxDistance = resultScoreDocs.last().getDistance();
        }
    }
    assert (resultScoreDocs.size() <= maximumHits);
    return new SimpleImageSearchHits(resultScoreDocs, maxDistance);
}

From source file:net.semanticmetadata.lire.searchers.custom.SingleNddCeddImageSearcher.java

License:Open Source License

protected void init(IndexReader reader) {
    this.reader = reader;
    if (reader.hasDeletions()) {
        throw new UnsupportedOperationException(
                "The index has to be optimized first to be cached! Use IndexWriter.forceMerge(0) to do this.");
    }//  ww  w.  ja va2s .  c  om
    docs = new TreeSet<SimpleResult>();
    try {
        this.cachedInstance = (GlobalFeature) this.descriptorClass.newInstance();
        if (fieldName == null)
            fieldName = this.cachedInstance.getFieldName();
    } catch (InstantiationException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher ("
                + descriptorClass.getName() + "): " + e.getMessage());
    } catch (IllegalAccessException e) {
        logger.log(Level.SEVERE, "Error instantiating class for generic image searcher ("
                + descriptorClass.getName() + "): " + e.getMessage());
    }
    // put all respective features into an in-memory cache ...
    if (isCaching && reader != null) {
        int docs = reader.numDocs();
        featureCache = new ArrayList<double[]>(docs);
        try {
            Document d;
            for (int i = 0; i < docs; i++) {
                d = reader.document(i);
                cachedInstance.setByteArrayRepresentation(d.getField(fieldName).binaryValue().bytes,
                        d.getField(fieldName).binaryValue().offset, d.getField(fieldName).binaryValue().length);
                // normalize features,o we can use L1
                if (!halfDimensions) {
                    featureCache.add(normalize(cachedInstance.getFeatureVector()));
                } else {
                    featureCache.add(crunch(cachedInstance.getFeatureVector()));
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

From source file:net.semanticmetadata.lire.searchers.custom.TopDocsImageSearcher.java

License:Open Source License

/**
 * @param results//from   ww  w  .j  av a  2s.  c o  m
 * @param reader
 * @param globalFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected double findSimilar(TopDocs results, IndexReader reader, GlobalFeature globalFeature)
        throws IOException {
    double maxDistance = -1d, overallMaxDistance = -1d;
    boolean hasDeletions = reader.hasDeletions();

    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = results.totalHits;
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        Document d = reader.document(results.scoreDocs[i].doc);
        double distance = getDistance(d, globalFeature);
        assert (distance >= 0);
        // calculate the overall max distance to normalize score afterwards
        if (overallMaxDistance < distance) {
            overallMaxDistance = distance;
        }
        // if it is the first document:
        if (maxDistance < 0) {
            maxDistance = distance;
        }
        // if the array is not full yet:
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult(distance, results.scoreDocs[i].doc));
            if (distance > maxDistance)
                maxDistance = distance;
        } else if (distance < maxDistance) {
            // if it is nearer to the sample than at least on of the current set:
            // remove the last one ...
            this.docs.remove(this.docs.last());
            // add the new one ...
            this.docs.add(new SimpleResult(distance, results.scoreDocs[i].doc));
            // and set our new distance border ...
            maxDistance = this.docs.last().getDistance();
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.searchers.FastOpponentImageSearcher.java

License:Open Source License

/**
 * @param reader//from   w ww . j a v  a 2 s .  c om
 * @param globalFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected double findSimilar(IndexReader reader, GlobalFeature globalFeature) throws IOException {
    maxDistance = -1f;
    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Document d;
    double tmpDistance;
    int docs = reader.numDocs();
    byte[] histogram = globalFeature.getByteArrayRepresentation();
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        d = reader.document(i);
        tmpDistance = getDistance(d, histogram);
        assert (tmpDistance >= 0);
        // calculate the overall max distance to normalize score afterwards
        //            if (overallMaxDistance < tmpDistance) {
        //                overallMaxDistance = tmpDistance;
        //            }
        // if it is the first document:
        if (maxDistance < 0) {
            maxDistance = tmpDistance;
        }
        // if the array is not full yet:
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult(tmpDistance, i));
            if (tmpDistance > maxDistance)
                maxDistance = tmpDistance;
        } else if (tmpDistance < maxDistance) {
            // if it is nearer to the sample than at least on of the current set:
            // remove the last one ...
            this.docs.remove(this.docs.last());
            // add the new one ...
            this.docs.add(new SimpleResult(tmpDistance, i));
            // and set our new distance border ...
            maxDistance = this.docs.last().getDistance();
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.searchers.forevaluations.GenericFastImageSearcherForEvaluation.java

License:Open Source License

/**
 * @param reader/*from   ww w  .  j  a va2s .  co m*/
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws IOException
 */
protected double findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    maxDistance = -1d;

    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Document d;
    double tmpDistance;
    int docs = reader.numDocs();
    if (!isCaching) {
        // we read each and every document from the index and then we compare it to the query.
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.

            d = reader.document(i);
            tmpDistance = getDistance(d, lireFeature);
            assert (tmpDistance >= 0);
            // if the array is not full yet:
            if (this.docs.size() < maxHits) {
                this.docs.add(new SimpleResultForEvaluation(tmpDistance, i,
                        d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]));
                if (tmpDistance > maxDistance)
                    maxDistance = tmpDistance;
            } else if (tmpDistance < maxDistance) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.docs.remove(this.docs.last());
                // add the new one ...
                this.docs.add(new SimpleResultForEvaluation(tmpDistance, i,
                        d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]));
                // and set our new distance border ...
                maxDistance = this.docs.last().getDistance();
            }
        }
    } else {
        LinkedList<Consumer> tasks = new LinkedList<Consumer>();
        LinkedList<Thread> threads = new LinkedList<Thread>();
        Consumer consumer;
        Thread thread;
        Thread p = new Thread(new Producer());
        p.start();
        for (int i = 0; i < numThreads; i++) {
            consumer = new Consumer(lireFeature);
            thread = new Thread(consumer);
            thread.start();
            tasks.add(consumer);
            threads.add(thread);
        }
        for (Thread next : threads) {
            try {
                next.join();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
        TreeSet<SimpleResultForEvaluation> tmpDocs;
        boolean flag;
        SimpleResultForEvaluation simpleResult;
        for (Consumer task : tasks) {
            tmpDocs = task.getResult();
            flag = true;
            while (flag && (tmpDocs.size() > 0)) {
                simpleResult = tmpDocs.pollFirst();
                if (this.docs.size() < maxHits) {
                    this.docs.add(simpleResult);
                    if (simpleResult.getDistance() > maxDistance)
                        maxDistance = simpleResult.getDistance();
                } else if (simpleResult.getDistance() < maxDistance) {
                    //                        this.docs.remove(this.docs.last());
                    this.docs.pollLast();
                    this.docs.add(simpleResult);
                    maxDistance = this.docs.last().getDistance();
                } else
                    flag = false;
            }
        }
    }
    return maxDistance;
}