Example usage for org.apache.lucene.index MultiDocValues getBinaryValues

List of usage examples for org.apache.lucene.index MultiDocValues getBinaryValues

Introduction

In this page you can find the example usage for org.apache.lucene.index MultiDocValues getBinaryValues.

Prototype

public static BinaryDocValues getBinaryValues(final IndexReader r, final String field) throws IOException 

Source Link

Document

Returns a BinaryDocValues for a reader's docvalues (potentially merging on-the-fly)

Usage

From source file:com.github.flaxsearch.util.ReaderManager.java

License:Apache License

default BinaryDocValues getBinaryDocValues(Integer segment, String field) throws IOException {
    if (segment == null)
        return MultiDocValues.getBinaryValues(getIndexReader(), field);
    return getLeafReader(segment).getBinaryDocValues(field);
}

From source file:net.semanticmetadata.lire.searchers.GenericDocValuesImageSearcher.java

License:Open Source License

protected void init() {
    // put all respective features into an in-memory cache ...
    if (reader != null) {
        try {//from ww  w .  j a  va2  s. co m
            docValues = MultiDocValues.getBinaryValues(reader, cachedInstance.getFieldName());
        } catch (IOException e) {
            e.printStackTrace();
        }

    }
}

From source file:net.semanticmetadata.lire.solr.FastLireRequestHandler.java

License:Open Source License

/**
 * Actual search implementation based on (i) hash based retrieval and (ii) feature based re-ranking.
 *
 * @param rsp/*from ww  w.j  a  v  a  2  s  . c om*/
 * @param searcher
 * @param hashFieldName the hash field name
 * @param maximumHits
 * @param terms
 * @param queryFeature
 * @throws java.io.IOException
 * @throws IllegalAccessException
 * @throws InstantiationException
 */
private void doSearch(SolrQueryRequest req, SolrQueryResponse rsp, SolrIndexSearcher searcher,
        String hashFieldName, int maximumHits, List<Term> terms, Query query, LireFeature queryFeature)
        throws IOException, IllegalAccessException, InstantiationException {
    // temp feature instance
    LireFeature tmpFeature = queryFeature.getClass().newInstance();
    // Taking the time of search for statistical purposes.
    time = System.currentTimeMillis();

    Filter filter = null;
    // if the request contains a filter:
    if (req.getParams().get("fq") != null) {
        // only filters with [<field>:<value> ]+ are supported
        StringTokenizer st = new StringTokenizer(req.getParams().get("fq"), " ");
        LinkedList<Term> filterTerms = new LinkedList<Term>();
        while (st.hasMoreElements()) {
            String[] tmpToken = st.nextToken().split(":");
            if (tmpToken.length > 1) {
                filterTerms.add(new Term(tmpToken[0], tmpToken[1]));
            }
        }
        if (filterTerms.size() > 0)
            filter = new TermsFilter(filterTerms);
    }

    TopDocs docs; // with query only.
    if (filter == null) {
        docs = searcher.search(query, numberOfCandidateResults);
    } else {
        docs = searcher.search(query, filter, numberOfCandidateResults);
    }
    //        TopDocs docs = searcher.search(query, new TermsFilter(terms), numberOfCandidateResults);   // with TermsFilter and boosting by simple query
    //        TopDocs docs = searcher.search(new ConstantScoreQuery(new TermsFilter(terms)), numberOfCandidateResults); // just with TermsFilter
    time = System.currentTimeMillis() - time;
    rsp.add("RawDocsCount", docs.scoreDocs.length + "");
    rsp.add("RawDocsSearchTime", time + "");
    // re-rank
    time = System.currentTimeMillis();
    TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>();
    float maxDistance = -1f;
    float tmpScore;

    String featureFieldName = FeatureRegistry.getFeatureFieldName(hashFieldName);
    // iterating and re-ranking the documents.
    BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(), featureFieldName); // ***  #
    BytesRef bytesRef = new BytesRef();
    for (int i = 0; i < docs.scoreDocs.length; i++) {
        // using DocValues to retrieve the field values ...
        binaryValues.get(docs.scoreDocs[i].doc, bytesRef);
        tmpFeature.setByteArrayRepresentation(bytesRef.bytes, bytesRef.offset, bytesRef.length);
        // Getting the document from the index.
        // This is the slow step based on the field compression of stored fields.
        //            tmpFeature.setByteArrayRepresentation(d.getBinaryValue(name).bytes, d.getBinaryValue(name).offset, d.getBinaryValue(name).length);
        tmpScore = queryFeature.getDistance(tmpFeature);
        if (resultScoreDocs.size() < maximumHits) { // todo: There's potential here for a memory saver, think of a clever data structure that can do the trick without creating a new SimpleResult for each result.
            resultScoreDocs.add(
                    new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc));
            maxDistance = resultScoreDocs.last().getDistance();
        } else if (tmpScore < maxDistance) {
            //                if it is nearer to the sample than at least one of the current set:
            //                remove the last one ...
            resultScoreDocs.remove(resultScoreDocs.last());
            //                add the new one ...
            resultScoreDocs.add(
                    new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc));
            //                and set our new distance border ...
            maxDistance = resultScoreDocs.last().getDistance();
        }
    }
    //        System.out.println("** Creating response.");
    time = System.currentTimeMillis() - time;
    rsp.add("ReRankSearchTime", time + "");
    LinkedList list = new LinkedList();
    for (Iterator<SimpleResult> it = resultScoreDocs.iterator(); it.hasNext();) {
        SimpleResult result = it.next();
        HashMap m = new HashMap(2);
        m.put("d", result.getDistance());
        // add fields as requested:
        if (req.getParams().get("fl") == null) {
            m.put("id", result.getDocument().get("id"));
            if (result.getDocument().get("title") != null)
                m.put("title", result.getDocument().get("title"));
        } else {
            String fieldsRequested = req.getParams().get("fl");
            if (fieldsRequested.contains("score")) {
                m.put("score", result.getDistance());
            }
            if (fieldsRequested.contains("*")) {
                // all fields
                for (IndexableField field : result.getDocument().getFields()) {
                    String tmpField = field.name();
                    if (result.getDocument().getFields(tmpField).length > 1) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getValues(tmpField));
                    } else if (result.getDocument().getFields(tmpField).length > 0) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getFields(tmpField)[0].stringValue());
                    }
                }
            } else {
                StringTokenizer st;
                if (fieldsRequested.contains(","))
                    st = new StringTokenizer(fieldsRequested, ",");
                else
                    st = new StringTokenizer(fieldsRequested, " ");
                while (st.hasMoreElements()) {
                    String tmpField = st.nextToken();
                    if (result.getDocument().getFields(tmpField).length > 1) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getValues(tmpField));
                    } else if (result.getDocument().getFields(tmpField).length > 0) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getFields(tmpField)[0].stringValue());
                    }
                }
            }
        }
        //            m.put(field, result.getDocument().get(field));
        //            m.put(field.replace("_ha", "_hi"), result.getDocument().getBinaryValue(field));
        list.add(m);
    }
    rsp.add("docs", list);
    // rsp.add("Test-name", "Test-val");
}

From source file:net.semanticmetadata.lire.solr.LireRequestHandler.java

License:Open Source License

/**
 * Handles the get parameters id, field and rows.
 *
 * @param req//from  w  w w . ja  va  2s  .  c  o  m
 * @param rsp
 * @throws IOException
 * @throws InstantiationException
 * @throws IllegalAccessException
 */
private void handleIdSearch(SolrQueryRequest req, SolrQueryResponse rsp)
        throws IOException, InstantiationException, IllegalAccessException {
    SolrIndexSearcher searcher = req.getSearcher();
    try {
        //            TopDocs hits = searcher.search(new TermQuery(new Term("id", req.getParams().get("id"))), 1);
        int queryDocId = searcher.getFirstMatch(new Term("id", req.getParams().get("id")));
        // get the parameters
        String paramField = req.getParams().get("field", "cl_ha");
        if (!paramField.endsWith("_ha"))
            paramField += "_ha";
        numberOfQueryTerms = req.getParams().getDouble("accuracy", DEFAULT_NUMBER_OF_QUERY_TERMS);
        numberOfCandidateResults = req.getParams().getInt("candidates", DEFAULT_NUMBER_OF_CANDIDATES);
        useMetricSpaces = req.getParams().getBool("ms", DEFAULT_USE_METRIC_SPACES);
        int paramRows = req.getParams().getInt("rows", defaultNumberOfResults);

        GlobalFeature queryFeature = (GlobalFeature) FeatureRegistry.getClassForHashField(paramField)
                .newInstance();
        rsp.add("QueryField", paramField);
        rsp.add("QueryFeature", queryFeature.getClass().getName());
        if (queryDocId > -1) {
            // Using DocValues to get the actual data from the index.
            BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(),
                    FeatureRegistry.getFeatureFieldName(paramField));
            if (binaryValues == null) {
                rsp.add("Error",
                        "Could not find the DocValues of the query document. Are they in the index? Id: "
                                + req.getParams().get("id"));
                // System.err.println("Could not find the DocValues of the query document. Are they in the index?");
            }
            queryFeature.setByteArrayRepresentation(binaryValues.get(queryDocId).bytes,
                    binaryValues.get(queryDocId).offset, binaryValues.get(queryDocId).length);

            Query query = null;
            if (!useMetricSpaces) {
                // check singleton cache if the term stats can be cached.
                HashTermStatistics.addToStatistics(searcher, paramField);
                // Re-generating the hashes to save space (instead of storing them in the index)
                int[] hashes = BitSampling.generateHashes(queryFeature.getFeatureVector());
                query = createQuery(hashes, paramField, numberOfQueryTerms);
            } else if (MetricSpaces.supportsFeature(queryFeature)) {
                // ----< Metric Spaces >-----
                int queryLength = (int) StatsUtils.clamp(
                        numberOfQueryTerms * MetricSpaces.getPostingListLength(queryFeature), 3,
                        MetricSpaces.getPostingListLength(queryFeature));
                String msQuery = MetricSpaces.generateBoostedQuery(queryFeature, queryLength);
                QueryParser qp = new QueryParser(paramField.replace("_ha", "_ms"), new WhitespaceAnalyzer());
                query = qp.parse(msQuery);
            } else {
                query = new MatchAllDocsQuery();
                rsp.add("Error",
                        "Feature not supported by MetricSpaces: " + queryFeature.getClass().getSimpleName());
            }
            doSearch(req, rsp, searcher, paramField, paramRows, getFilterQuery(req.getParams().get("fq")),
                    query, queryFeature);
        } else {
            rsp.add("Error", "Did not find an image with the given id " + req.getParams().get("id"));
        }
    } catch (Exception e) {
        rsp.add("Error", "There was an error with your search for the image with the id "
                + req.getParams().get("id") + ": " + e.getMessage());
    }
}

From source file:net.semanticmetadata.lire.solr.LireRequestHandler.java

License:Open Source License

/**
 * Actual search implementation based on (i) hash based retrieval and (ii) feature based re-ranking.
 *
 * @param req           the SolrQueryRequest
 * @param rsp           the response to write the data to
 * @param searcher      the actual index searcher object to search the index
 * @param hashFieldName the name of the field the hashes can be found
 * @param maximumHits   the maximum number of hits, the smaller the faster
 * @param filterQuery   can be null/* w ww .j a  va  2s  . c  o  m*/
 * @param query         the (Boolean) query for querying the candidates from the IndexSearcher
 * @param queryFeature  the image feature used for re-ranking the results
 * @throws IOException
 * @throws IllegalAccessException
 * @throws InstantiationException
 */
private void doSearch(SolrQueryRequest req, SolrQueryResponse rsp, SolrIndexSearcher searcher,
        String hashFieldName, int maximumHits, Query filterQuery, Query query, GlobalFeature queryFeature)
        throws IOException, IllegalAccessException, InstantiationException {
    // temp feature instance
    GlobalFeature tmpFeature = queryFeature.getClass().newInstance();
    // Taking the time of search for statistical purposes.
    time = System.currentTimeMillis();

    String featureFieldName = FeatureRegistry.getFeatureFieldName(hashFieldName);
    BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(), featureFieldName);

    time = System.currentTimeMillis() - time;
    rsp.add("DocValuesOpenTime", time + "");

    Iterator<Integer> docIterator;
    int numberOfResults = 0;
    time = System.currentTimeMillis();
    if (filterQuery != null) {
        DocList docList = searcher.getDocList(query, filterQuery, Sort.RELEVANCE, 0, numberOfCandidateResults);
        numberOfResults = docList.size();
        docIterator = docList.iterator();
    } else {
        TopDocs docs = searcher.search(query, numberOfCandidateResults);
        numberOfResults = docs.totalHits;
        docIterator = new TopDocsIterator(docs);
    }
    time = System.currentTimeMillis() - time;
    rsp.add("RawDocsCount", numberOfResults + "");
    rsp.add("RawDocsSearchTime", time + "");
    time = System.currentTimeMillis();
    TreeSet<CachingSimpleResult> resultScoreDocs = getReRankedResults(docIterator, binaryValues, queryFeature,
            tmpFeature, maximumHits, searcher);

    // Creating response ...
    time = System.currentTimeMillis() - time;
    rsp.add("ReRankSearchTime", time + "");
    LinkedList list = new LinkedList();
    for (Iterator<CachingSimpleResult> it = resultScoreDocs.iterator(); it.hasNext();) {
        CachingSimpleResult result = it.next();
        HashMap m = new HashMap(2);
        m.put("d", result.getDistance());
        // add fields as requested:
        if (req.getParams().get("fl") == null) {
            m.put("id", result.getDocument().get("id"));
            if (result.getDocument().get("title") != null)
                m.put("title", result.getDocument().get("title"));
        } else {
            String fieldsRequested = req.getParams().get("fl");
            if (fieldsRequested.contains("score")) {
                m.put("score", result.getDistance());
            }
            if (fieldsRequested.contains("*")) {
                // all fields
                for (IndexableField field : result.getDocument().getFields()) {
                    String tmpField = field.name();

                    if (result.getDocument().getFields(tmpField).length > 1) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getValues(tmpField));
                    } else if (result.getDocument().getFields(tmpField).length > 0) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getFields(tmpField)[0].stringValue());
                    }
                }
            } else {
                StringTokenizer st;
                if (fieldsRequested.contains(","))
                    st = new StringTokenizer(fieldsRequested, ",");
                else
                    st = new StringTokenizer(fieldsRequested, " ");
                while (st.hasMoreElements()) {
                    String tmpField = st.nextToken();
                    if (result.getDocument().getFields(tmpField).length > 1) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getValues(tmpField));
                    } else if (result.getDocument().getFields(tmpField).length > 0) {
                        m.put(result.getDocument().getFields(tmpField)[0].name(),
                                result.getDocument().getFields(tmpField)[0].stringValue());
                    }
                }
            }
        }
        //            m.put(field, result.getDocument().get(field));
        //            m.put(field.replace("_ha", "_hi"), result.getDocument().getBinaryValue(field));
        list.add(m);
    }
    rsp.add("docs", list);
    // rsp.add("Test-name", "Test-val");
}

From source file:org.apache.solr.index.SlowCompositeReaderWrapper.java

License:Apache License

@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
    ensureOpen();
    return MultiDocValues.getBinaryValues(in, field);
}

From source file:org.apache.tika.eval.tools.SlowCompositeReaderWrapper.java

License:Apache License

@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
    ensureOpen();/*ww w .  jav a 2s  .com*/
    return MultiDocValues.getBinaryValues(in, field); // TODO cache?
}