Example usage for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException

Source Link

Document

Returns the stored fields of the n^th Document in this index.

Usage

From source file:net.semanticmetadata.lire.impl.ParallelImageSearcher.java

License:Open Source License

/**
 * @param reader//from w  w  w. ja v a2 s  . c o  m
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
@SuppressWarnings("unchecked")
private float[] findSimilar(IndexReader reader, LireFeature[] lireFeature) throws IOException {
    float[] maxDistance = new float[lireFeature.length];
    float[] overallMaxDistance = new float[lireFeature.length];

    for (int i = 0; i < overallMaxDistance.length; i++) {
        overallMaxDistance[i] = -1f;
        maxDistance[i] = -1f;
    }

    parDocs = new TreeSet[lireFeature.length];
    for (int i = 0; i < parDocs.length; i++) {
        parDocs[i] = new TreeSet<SimpleResult>();
    }

    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    // clear result set ...

    int docs = reader.numDocs();
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        Document d = reader.document(i);
        float[] distance = getDistance(d, lireFeature);
        // calculate the overall max distance to normalize score afterwards
        for (int j = 0; j < distance.length; j++) {
            float f = distance[j];
            if (overallMaxDistance[j] < f) {
                overallMaxDistance[j] = f;
            }
            // if it is the first document:
            if (maxDistance[j] < 0) {
                maxDistance[j] = f;
            }
            // if the array is not full yet:
            if (this.parDocs[j].size() < maxHits) {
                this.parDocs[j].add(new SimpleResult(f, d, i));
                if (f > maxDistance[j]) {
                    maxDistance[j] = f;
                }
            } else if (f < maxDistance[j]) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.parDocs[j].remove(this.parDocs[j].last());
                // add the new one ...
                this.parDocs[j].add(new SimpleResult(f, d, i));
                // and set our new distance border ...
                maxDistance[j] = this.parDocs[j].last().getDistance();
            }

        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.searcher.BitSamplingImageSearcher.java

License:Open Source License

private ImageSearchHits search(String[] hashes, LireFeature queryFeature, IndexReader reader)
        throws IOException {
    // first search by text:
    IndexSearcher searcher = new IndexSearcher(reader);
    // searcher.setSimilarity(new BaseSimilarity());
    BooleanQuery query = new BooleanQuery();
    for (int i = 0; i < hashes.length; i++) {
        // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before.
        if (partialHashes) {
            if (Math.random() < 0.5)
                query.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")),
                        BooleanClause.Occur.SHOULD));
        } else/*from w ww  . j  av  a  2s  . c  o m*/
            query.add(new BooleanClause(new TermQuery(new Term(hashesFieldName, hashes[i] + "")),
                    BooleanClause.Occur.SHOULD));
    }
    TopDocs docs = searcher.search(query, maxResultsHashBased);
    //        System.out.println(docs.totalHits);
    // then re-rank
    TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>();
    float maxDistance = -1f;
    float tmpScore;
    for (int i = 0; i < docs.scoreDocs.length; i++) {
        feature.setByteArrayRepresentation(
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).bytes,
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).offset,
                reader.document(docs.scoreDocs[i].doc).getBinaryValue(featureFieldName).length);
        tmpScore = queryFeature.getDistance(feature);
        assert (tmpScore >= 0);
        if (resultScoreDocs.size() < maximumHits) {
            resultScoreDocs.add(
                    new SimpleResult(tmpScore, reader.document(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc));
            maxDistance = Math.max(maxDistance, tmpScore);
        } else if (tmpScore < maxDistance) {
            // if it is nearer to the sample than at least one of the current set:
            // remove the last one ...
            resultScoreDocs.remove(resultScoreDocs.last());
            // add the new one ...
            resultScoreDocs.add(
                    new SimpleResult(tmpScore, reader.document(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc));
            // and set our new distance border ...
            maxDistance = resultScoreDocs.last().getDistance();
        }
    }
    assert (resultScoreDocs.size() <= maximumHits);
    return new SimpleImageSearchHits(resultScoreDocs, maxDistance);
}

From source file:net.semanticmetadata.lire.impl.searcher.GenericFastImageSearcher.java

License:Open Source License

/**
 * @param reader//from  w ww  . j av a2  s  .  c o m
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected float findSimilar(IndexReader reader, LireFeature lireFeature) throws IOException {
    maxDistance = -1f;
    //        overallMaxDistance = -1f;

    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    Document d;
    float tmpDistance;
    int docs = reader.numDocs();
    if (!isCaching) {
        // we read each and every document from the index and then we compare it to the query.
        for (int i = 0; i < docs; i++) {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue; // if it is deleted, just ignore it.

            d = reader.document(i);
            tmpDistance = getDistance(d, lireFeature);
            assert (tmpDistance >= 0);
            // if it is the first document:
            if (maxDistance < 0) {
                maxDistance = tmpDistance;
            }
            // if the array is not full yet:
            if (this.docs.size() < maxHits) {
                this.docs.add(new SimpleResult(tmpDistance, d, i));
                if (tmpDistance > maxDistance)
                    maxDistance = tmpDistance;
            } else if (tmpDistance < maxDistance) {
                // if it is nearer to the sample than at least on of the current set:
                // remove the last one ...
                this.docs.remove(this.docs.last());
                // add the new one ...
                this.docs.add(new SimpleResult(tmpDistance, d, i));
                // and set our new distance border ...
                maxDistance = this.docs.last().getDistance();
            }
        }
    } else {
        // we use the in-memory cache to find the matching docs from the index.
        int count = 0;
        for (Iterator<byte[]> iterator = featureCache.iterator(); iterator.hasNext();) {
            cachedInstance.setByteArrayRepresentation(iterator.next());
            if (reader.hasDeletions() && !liveDocs.get(count)) {
                count++;
                continue; // if it is deleted, just ignore it.
            } else {
                tmpDistance = lireFeature.getDistance(cachedInstance);
                assert (tmpDistance >= 0);
                // if it is the first document:
                if (maxDistance < 0) {
                    maxDistance = tmpDistance;
                }
                // if the array is not full yet:
                if (this.docs.size() < maxHits) {
                    this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count));
                    if (tmpDistance > maxDistance)
                        maxDistance = tmpDistance;
                } else if (tmpDistance < maxDistance) {
                    // if it is nearer to the sample than at least on of the current set:
                    // remove the last one ...
                    this.docs.remove(this.docs.last());
                    // add the new one ...
                    this.docs.add(new SimpleResult(tmpDistance, reader.document(count), count));
                    // and set our new distance border ...
                    maxDistance = this.docs.last().getDistance();
                }
                count++;
            }
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.searcher.KeyWordsImageSearcher.java

License:Open Source License

public ImageSearchHits search(BufferedImage image, ImageInfo imageInfo, IndexReader reader) throws IOException {

    if (imageInfo.getTitle() == null || imageInfo.getTitle().length() == 0)
        return null;

    SimpleImageSearchHits sh = null;//from w w  w.j  a  v a 2 s .c o m
    //LuceneIndexReaderIndexSearcher
    IndexSearcher isearcher = new IndexSearcher(reader);
    //?
    String queryString = imageInfo.getTitle();
    Query tq = null;
    try {
        //??
        tq = qp.parse(queryString);
        //
        TopDocs docs = isearcher.search(tq, numMaxHits);
        LinkedList<SimpleResult> res = new LinkedList<SimpleResult>();
        float maxDistance = 0;
        //????
        for (int i = 0; i < docs.scoreDocs.length; i++) {
            float d = 1f / docs.scoreDocs[i].score;
            maxDistance = Math.max(d, maxDistance);
            SimpleResult sr = new SimpleResult(d, reader.document(docs.scoreDocs[i].doc), i);
            res.add(sr);
        }
        //
        sh = new SimpleImageSearchHits(res, maxDistance);
    } catch (ParseException e) {
        System.err.println(queryString);
        e.printStackTrace();
    }
    return sh;
}

From source file:net.semanticmetadata.lire.impl.searcher.LocationBasedImageSearcher.java

License:Open Source License

/**
 * @param reader//from   w  ww.j ava 2 s  . co m
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected float findSimilar(IndexReader reader, ImageInfo imageInfo) throws IOException {
    float maxDistance = -1f, allMaxDistance = -1f;
    float tmpDistance = 0f;

    docs.clear();
    //????
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docNumber = reader.numDocs();
    Document d = null;
    for (int i = 0; i < docNumber; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; //?

        d = reader.document(i);//?
        tmpDistance = getDistance(d, imageInfo);//?
        //????
        if (tmpDistance < 0 || tmpDistance > this.threshold)
            continue;

        //????
        if (allMaxDistance < tmpDistance) {
            allMaxDistance = tmpDistance;
        }
        //??
        if (maxDistance < 0) {
            maxDistance = tmpDistance;
        }
        //????
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult(tmpDistance, d, i));
            if (tmpDistance > maxDistance)
                maxDistance = tmpDistance;
            //?????
        } else if (tmpDistance < maxDistance) {
            //???
            this.docs.remove(this.docs.size() - 1);
            this.docs.add(new SimpleResult(tmpDistance, d, i));
            //?
            maxDistance = tmpDistance;

            Collections.sort(docs);
        }
    }
    //?
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.searcher.TopDocsImageSearcher.java

License:Open Source License

/**
 * @param results//from   w w w  . j  a v a  2 s .  c  o m
 * @param reader
 * @param lireFeature
 * @return the maximum distance found for normalizing.
 * @throws java.io.IOException
 */
protected float findSimilar(TopDocs results, IndexReader reader, LireFeature lireFeature) throws IOException {
    float maxDistance = -1f, overallMaxDistance = -1f;
    boolean hasDeletions = reader.hasDeletions();

    // clear result set ...
    docs.clear();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    int docs = results.totalHits;
    for (int i = 0; i < docs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.

        Document d = reader.document(results.scoreDocs[i].doc);
        float distance = getDistance(d, lireFeature);
        assert (distance >= 0);
        // calculate the overall max distance to normalize score afterwards
        if (overallMaxDistance < distance) {
            overallMaxDistance = distance;
        }
        // if it is the first document:
        if (maxDistance < 0) {
            maxDistance = distance;
        }
        // if the array is not full yet:
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult(distance, d, i));
            if (distance > maxDistance)
                maxDistance = distance;
        } else if (distance < maxDistance) {
            // if it is nearer to the sample than at least on of the current set:
            // remove the last one ...
            this.docs.remove(this.docs.last());
            // add the new one ...
            this.docs.add(new SimpleResult(distance, d, i));
            // and set our new distance border ...
            maxDistance = this.docs.last().getDistance();
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.searcher.VisualWordsImageSearcher.java

License:Open Source License

public ImageSearchHits search(Document doc, IndexReader reader) throws IOException {
    SimpleImageSearchHits sh = null;/*from   w w  w .j a v a 2  s.c o  m*/
    IndexSearcher isearcher = new IndexSearcher(reader);
    isearcher.setSimilarity(similarity);
    String queryString = doc.getValues(fieldName)[0];
    Query tq = null;
    try {
        tq = qp.parse(queryString);
        TopDocs docs = isearcher.search(tq, numMaxHits);
        LinkedList<SimpleResult> res = new LinkedList<SimpleResult>();
        float maxDistance = 0;
        for (int i = 0; i < docs.scoreDocs.length; i++) {
            float d = 1f / docs.scoreDocs[i].score;
            maxDistance = Math.max(d, maxDistance);
            SimpleResult sr = new SimpleResult(d, reader.document(docs.scoreDocs[i].doc), i);
            res.add(sr);
        }
        sh = new SimpleImageSearchHits(res, maxDistance);
    } catch (ParseException e) {
        System.err.println(queryString);
        e.printStackTrace();
    }
    return sh;
}

From source file:net.semanticmetadata.lire.impl.SimpleImageSearcher.java

License:Open Source License

/**
 * @param reader/*w  ww . j a va2 s. co m*/
 * @param cl
 * @param sc
 * @param eh
 * @return the maximum distance found for normalizing.
 * @throws IOException
 */
private float findSimilar(IndexReader reader, ColorLayoutImpl cl, ScalableColorImpl sc,
        EdgeHistogramImplementation eh) throws IOException {
    float maxDistance = -1f, overallMaxDistance = -1f;
    boolean hasDeletions = reader.hasDeletions();

    // clear result set ...
    docs.clear();

    int docs = reader.numDocs();
    for (int i = 0; i < docs; i++) {
        // bugfix by Roman Kern

        Document d = reader.document(i);
        float distance = getDistance(d, cl, sc, eh);
        // calculate the overall max distance to normalize score afterwards
        if (overallMaxDistance < distance) {
            overallMaxDistance = distance;
        }
        // if it is the first document:
        if (maxDistance < 0) {
            maxDistance = distance;
        }
        // if the array is not full yet:
        if (this.docs.size() < maxHits) {
            this.docs.add(new SimpleResult(distance, d));
            if (distance > maxDistance)
                maxDistance = distance;
        } else if (distance < maxDistance) {
            // if it is nearer to the sample than at least on of the current set:
            // remove the last one ...
            this.docs.remove(this.docs.last());
            // add the new one ...
            this.docs.add(new SimpleResult(distance, d));
            // and set our new distance border ...
            maxDistance = this.docs.last().getDistance();
        }
    }
    return maxDistance;
}

From source file:net.semanticmetadata.lire.impl.SimpleImageSearcher.java

License:Open Source License

public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    if (!IndexReader.indexExists(reader.directory()))
        throw new FileNotFoundException("No index found at this specific location.");
    Document doc = reader.document(0);
    ScalableColorImpl sc = null;// w w w .  j  av  a  2  s  .c  om
    ColorLayoutImpl cl = null;
    EdgeHistogramImplementation eh = null;

    String[] cls = doc.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT);
    if (cls != null && cls.length > 0)
        cl = new ColorLayoutImpl(cls[0]);
    String[] scs = doc.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR);
    if (scs != null && scs.length > 0)
        sc = new ScalableColorImpl(scs[0]);
    String[] ehs = doc.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM);
    if (ehs != null && ehs.length > 0)
        eh = new EdgeHistogramImplementation(ehs[0]);

    HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

    // find duplicates ...
    boolean hasDeletions = reader.hasDeletions();

    int docs = reader.numDocs();
    int numDuplicates = 0;
    for (int i = 0; i < docs; i++) {
        Document d = reader.document(i);
        float distance = getDistance(d, cl, sc, eh);

        if (!duplicates.containsKey(distance)) {
            duplicates.put(distance, new LinkedList<String>());
        } else {
            numDuplicates++;
        }
        duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

    if (numDuplicates == 0)
        return null;

    LinkedList<List<String>> results = new LinkedList<List<String>>();
    for (float f : duplicates.keySet()) {
        if (duplicates.get(f).size() > 1) {
            results.add(duplicates.get(f));
        }
    }
    return new SimpleImageDuplicates(results);
}

From source file:net.semanticmetadata.lire.indexing.HashingTest.java

License:Open Source License

private TopDocs rerank(TopDocs docs, LireFeature feature, IndexReader reader)
        throws IOException, IllegalAccessException, InstantiationException {
    LireFeature tmp = feature.getClass().newInstance();
    ArrayList<ScoreDoc> res = new ArrayList<ScoreDoc>(docs.scoreDocs.length);
    float maxScore = 0f;
    for (int i = 0; i < docs.scoreDocs.length; i++) {
        tmp.setByteArrayRepresentation(reader.document(docs.scoreDocs[i].doc)
                .getBinaryValue(DocumentBuilder.FIELD_NAME_OPPONENT_HISTOGRAM).bytes);
        maxScore = Math.max(1 / tmp.getDistance(feature), maxScore);
        res.add(new ScoreDoc(docs.scoreDocs[i].doc, 1 / tmp.getDistance(feature)));
    }/*w w w.ja va  2  s  .  c o  m*/
    // sorting res ...
    Collections.sort(res, new Comparator<ScoreDoc>() {
        @Override
        public int compare(ScoreDoc o1, ScoreDoc o2) {
            return (int) Math.signum(o2.score - o1.score);
        }
    });
    return new TopDocs(50, (ScoreDoc[]) res.toArray(new ScoreDoc[res.size()]), maxScore);
}