Example usage for org.apache.solr.search SolrIndexSearcher maxDoc

List of usage examples for org.apache.solr.search SolrIndexSearcher maxDoc

Introduction

In this page you can find the example usage for org.apache.solr.search SolrIndexSearcher maxDoc.

Prototype

public final int maxDoc() 

Source Link

Usage

From source file:org.alfresco.solr.query.SolrDeniedScorer.java

License:Open Source License

public static SolrDeniedScorer createDenyScorer(Weight weight, LeafReaderContext context,
        SolrIndexSearcher searcher, String authority) throws IOException {
    DocSet deniedDocs = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_DENIED_CACHE, authority);

    if (deniedDocs == null) {
        // Cache miss: query the index for ACL docs where the denial matches the authority. 
        DocSet aclDocs = searcher.getDocSet(new TermQuery(new Term(QueryConstants.FIELD_DENIED, authority)));

        // Allocate a bitset to store the results.
        deniedDocs = new BitDocSet(new FixedBitSet(searcher.maxDoc()));

        // Translate from ACL docs to real docs
        for (DocIterator it = aclDocs.iterator(); it.hasNext(); /**/) {
            int docID = it.nextDoc();
            // Obtain the ACL ID for this ACL doc.
            long aclID = searcher.getSlowAtomicReader().getNumericDocValues(QueryConstants.FIELD_ACLID)
                    .get(docID);/*  www.  ja va  2 s .  c  o m*/
            SchemaField schemaField = searcher.getSchema().getField(QueryConstants.FIELD_ACLID);
            Query query = schemaField.getType().getFieldQuery(null, schemaField, Long.toString(aclID));
            // Find real docs that match the ACL ID
            DocSet docsForAclId = searcher.getDocSet(query);
            deniedDocs = deniedDocs.union(docsForAclId);
            // Exclude the ACL docs from the results, we only want real docs that match.
            // Probably not very efficient, what we really want is remove(docID)
            deniedDocs = deniedDocs.andNot(aclDocs);
        }

        searcher.cacheInsert(CacheConstants.ALFRESCO_DENIED_CACHE, authority, deniedDocs);
    }
    return new SolrDeniedScorer(weight, deniedDocs, context, searcher);
}

From source file:org.alfresco.solr.query.SolrDenySetScorer.java

License:Open Source License

public static SolrDenySetScorer createDenySetScorer(Weight weight, LeafReaderContext context,
        SolrIndexSearcher searcher, String authorities, LeafReader reader) throws IOException {
    DocSet deniedDocSet = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_DENIED_CACHE, authorities);

    if (deniedDocSet == null) {

        String[] auths = authorities.substring(1).split(authorities.substring(0, 1));

        deniedDocSet = new BitDocSet(new FixedBitSet(searcher.maxDoc()));

        BooleanQuery.Builder bQuery = new BooleanQuery.Builder();
        for (String current : auths) {
            bQuery.add(new TermQuery(new Term(QueryConstants.FIELD_DENIED, current)), Occur.SHOULD);
        }// w w w. j a va 2  s. c  o m

        DocSet aclDocs = searcher.getDocSet(bQuery.build());

        BooleanQuery.Builder aQuery = new BooleanQuery.Builder();
        for (DocIterator it = aclDocs.iterator(); it.hasNext(); /**/) {
            int docID = it.nextDoc();
            // Obtain the ACL ID for this ACL doc.
            long aclID = searcher.getSlowAtomicReader().getNumericDocValues(QueryConstants.FIELD_ACLID)
                    .get(docID);
            SchemaField schemaField = searcher.getSchema().getField(QueryConstants.FIELD_ACLID);
            Query query = schemaField.getType().getFieldQuery(null, schemaField, Long.toString(aclID));
            aQuery.add(query, Occur.SHOULD);

            if ((aQuery.build().clauses().size() > 999) || !it.hasNext()) {
                DocSet docsForAclId = searcher.getDocSet(aQuery.build());
                deniedDocSet = deniedDocSet.union(docsForAclId);

                aQuery = new BooleanQuery.Builder();
            }
        }

        // Exclude the ACL docs from the results, we only want real docs that match.
        // Probably not very efficient, what we really want is remove(docID)
        deniedDocSet = deniedDocSet.andNot(aclDocs);
        searcher.cacheInsert(CacheConstants.ALFRESCO_DENIED_CACHE, authorities, deniedDocSet);
    }

    // TODO: cache the full set? e.g. searcher.cacheInsert(CacheConstants.ALFRESCO_READERSET_CACHE, authorities, readableDocSet)
    // plus check of course, for presence in cache at start of method.
    return new SolrDenySetScorer(weight, deniedDocSet, context, searcher);

}

From source file:org.alfresco.solr.query.SolrDenySetScorer2.java

License:Open Source License

public static SolrDenySetScorer2 createDenySetScorer(Weight weight, LeafReaderContext context,
        SolrIndexSearcher searcher, String authorities, LeafReader reader) throws IOException {
    DocSet deniedDocSet = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_DENIED_CACHE, authorities);

    if (deniedDocSet == null) {

        String[] auths = authorities.substring(1).split(authorities.substring(0, 1));

        deniedDocSet = new BitDocSet(new FixedBitSet(searcher.maxDoc()));

        BooleanQuery.Builder bQuery = new BooleanQuery.Builder();
        for (String current : auths) {
            bQuery.add(new TermQuery(new Term(QueryConstants.FIELD_DENIED, current)), Occur.SHOULD);
        }// w w w .  jav  a2 s.  c  om
        WrappedQuery wrapped = new WrappedQuery(bQuery.build());
        wrapped.setCache(false);

        DocSet aclDocs = searcher.getDocSet(wrapped);

        HashSet<Long> aclsFound = new HashSet<Long>(aclDocs.size());
        NumericDocValues aclDocValues = searcher.getSlowAtomicReader()
                .getNumericDocValues(QueryConstants.FIELD_ACLID);

        for (DocIterator it = aclDocs.iterator(); it.hasNext(); /**/) {
            int docID = it.nextDoc();
            // Obtain the ACL ID for this ACL doc.
            long aclID = aclDocValues.get(docID);
            aclsFound.add(getLong(aclID));
        }

        if (aclsFound.size() > 0) {
            for (LeafReaderContext readerContext : searcher.getSlowAtomicReader().leaves()) {
                int maxDoc = readerContext.reader().maxDoc();
                NumericDocValues fieldValues = DocValuesCache.getNumericDocValues(QueryConstants.FIELD_ACLID,
                        readerContext.reader());
                if (fieldValues != null) {
                    for (int i = 0; i < maxDoc; i++) {
                        long aclID = fieldValues.get(i);
                        Long key = getLong(aclID);
                        if (aclsFound.contains(key)) {
                            deniedDocSet.add(readerContext.docBase + i);
                        }
                    }
                }

            }
        }

        // Exclude the ACL docs from the results, we only want real docs that match.
        // Probably not very efficient, what we really want is remove(docID)
        deniedDocSet = deniedDocSet.andNot(aclDocs);
        searcher.cacheInsert(CacheConstants.ALFRESCO_DENIED_CACHE, authorities, deniedDocSet);
    }

    // TODO: cache the full set? e.g. searcher.cacheInsert(CacheConstants.ALFRESCO_READERSET_CACHE, authorities, readableDocSet)
    // plus check of course, for presence in cache at start of method.
    return new SolrDenySetScorer2(weight, deniedDocSet, context, searcher);

}

From source file:org.alfresco.solr.query.SolrReaderScorer.java

License:Open Source License

public static SolrReaderScorer createReaderScorer(Weight weight, LeafReaderContext context,
        SolrIndexSearcher searcher, String authority) throws IOException {
    DocSet readableDocs = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_READER_CACHE, authority);

    if (readableDocs == null) {
        // Cache miss: query the index for ACL docs where the reader matches the authority. 
        DocSet aclDocs = searcher.getDocSet(new TermQuery(new Term(QueryConstants.FIELD_READER, authority)));

        // Allocate a bitset to store the results.
        readableDocs = new BitDocSet(new FixedBitSet(searcher.maxDoc()));

        // Translate from ACL docs to real docs
        for (DocIterator it = aclDocs.iterator(); it.hasNext(); /**/) {
            int docID = it.nextDoc();
            // Obtain the ACL ID for this ACL doc.
            long aclID = searcher.getSlowAtomicReader().getNumericDocValues(QueryConstants.FIELD_ACLID)
                    .get(docID);//w ww  .  j  ava2 s.co m
            SchemaField schemaField = searcher.getSchema().getField(QueryConstants.FIELD_ACLID);
            Query query = schemaField.getType().getFieldQuery(null, schemaField, Long.toString(aclID));
            DocSet docsForAclId = searcher.getDocSet(query);
            readableDocs = readableDocs.union(docsForAclId);
            // Exclude the ACL docs from the results, we only want real docs that match.
            // Probably not very efficient, what we really want is remove(docID)
            readableDocs = readableDocs.andNot(aclDocs);
        }

        searcher.cacheInsert(CacheConstants.ALFRESCO_READER_CACHE, authority, readableDocs);
    }

    return new SolrReaderScorer(weight, readableDocs, context, searcher);
}

From source file:org.alfresco.solr.query.SolrReaderSetScorer.java

License:Open Source License

public static SolrReaderSetScorer createReaderSetScorer(Weight weight, LeafReaderContext context,
        SolrIndexSearcher searcher, String authorities, LeafReader reader) throws IOException {

    DocSet readableDocSet = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_READER_CACHE, authorities);

    if (readableDocSet == null) {

        String[] auths = authorities.substring(1).split(authorities.substring(0, 1));

        readableDocSet = new BitDocSet(new FixedBitSet(searcher.maxDoc()));

        BooleanQuery.Builder bQuery = new BooleanQuery.Builder();
        for (String current : auths) {
            bQuery.add(new TermQuery(new Term(QueryConstants.FIELD_READER, current)), Occur.SHOULD);
        }//from w w  w .java  2 s  . c om

        DocSet aclDocs = searcher.getDocSet(bQuery.build());

        BooleanQuery.Builder aQuery = new BooleanQuery.Builder();
        for (DocIterator it = aclDocs.iterator(); it.hasNext(); /**/) {
            int docID = it.nextDoc();
            // Obtain the ACL ID for this ACL doc.
            long aclID = searcher.getSlowAtomicReader().getNumericDocValues(QueryConstants.FIELD_ACLID)
                    .get(docID);
            SchemaField schemaField = searcher.getSchema().getField(QueryConstants.FIELD_ACLID);
            Query query = schemaField.getType().getFieldQuery(null, schemaField, Long.toString(aclID));
            aQuery.add(query, Occur.SHOULD);

            if ((aQuery.build().clauses().size() > 999) || !it.hasNext()) {
                DocSet docsForAclId = searcher.getDocSet(aQuery.build());
                readableDocSet = readableDocSet.union(docsForAclId);

                aQuery = new BooleanQuery.Builder();
            }
        }
        // Exclude the ACL docs from the results, we only want real docs that match.
        // Probably not very efficient, what we really want is remove(docID)
        readableDocSet = readableDocSet.andNot(aclDocs);
        searcher.cacheInsert(CacheConstants.ALFRESCO_READER_CACHE, authorities, readableDocSet);
    }

    // TODO: cache the full set? e.g. searcher.cacheInsert(CacheConstants.ALFRESCO_READERSET_CACHE, authorities, readableDocSet)
    // plus check of course, for presence in cache at start of method.
    return new SolrReaderSetScorer(weight, readableDocSet, context, searcher);
}

From source file:org.alfresco.solr.query.SolrReaderSetScorer2.java

License:Open Source License

public static AbstractSolrCachingScorer createReaderSetScorer(Weight weight, LeafReaderContext context,
        SolrIndexSearcher searcher, String authorities, LeafReader reader) throws IOException {

    DocSet readableDocSet = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_READER_CACHE, authorities);

    if (readableDocSet == null) {

        String[] auths = authorities.substring(1).split(authorities.substring(0, 1));

        readableDocSet = new BitDocSet(new FixedBitSet(searcher.maxDoc()));

        BooleanQuery.Builder bQuery = new BooleanQuery.Builder();
        for (String current : auths) {
            bQuery.add(new TermQuery(new Term(QueryConstants.FIELD_READER, current)), Occur.SHOULD);
        }//from  w ww.  ja va  2  s  . c  o  m
        WrappedQuery wrapped = new WrappedQuery(bQuery.build());
        wrapped.setCache(false);

        DocSet aclDocs = searcher.getDocSet(wrapped);

        HashSet<Long> aclsFound = new HashSet<Long>(aclDocs.size());
        NumericDocValues aclDocValues = searcher.getSlowAtomicReader()
                .getNumericDocValues(QueryConstants.FIELD_ACLID);

        for (DocIterator it = aclDocs.iterator(); it.hasNext(); /**/) {
            int docID = it.nextDoc();
            // Obtain the ACL ID for this ACL doc.
            long aclID = aclDocValues.get(docID);
            aclsFound.add(getLong(aclID));
        }

        if (aclsFound.size() > 0) {
            for (LeafReaderContext readerContext : searcher.getSlowAtomicReader().leaves()) {
                int maxDoc = readerContext.reader().maxDoc();
                NumericDocValues fieldValues = DocValuesCache.getNumericDocValues(QueryConstants.FIELD_ACLID,
                        readerContext.reader());
                if (fieldValues != null) {
                    for (int i = 0; i < maxDoc; i++) {
                        long aclID = fieldValues.get(i);
                        Long key = getLong(aclID);
                        if (aclsFound.contains(key)) {
                            readableDocSet.add(readerContext.docBase + i);
                        }
                    }
                }

            }
        }

        // Exclude the ACL docs from the results, we only want real docs that match.
        // Probably not very efficient, what we really want is remove(docID)
        readableDocSet = readableDocSet.andNot(aclDocs);
        searcher.cacheInsert(CacheConstants.ALFRESCO_READER_CACHE, authorities, readableDocSet);
    }

    // TODO: cache the full set? e.g. searcher.cacheInsert(CacheConstants.ALFRESCO_READERSET_CACHE, authorities, readableDocSet)
    // plus check of course, for presence in cache at start of method.
    return new SolrReaderSetScorer2(weight, readableDocSet, context, searcher);
}

From source file:org.opensextant.solrtexttagger.TaggerRequestHandler.java

License:Open Source License

@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    setTopInitArgsAsInvariants(req);/*from  w ww . j av  a 2s. c  o m*/

    //--Read params
    final String indexedField = req.getParams().get("field");
    if (indexedField == null)
        throw new RuntimeException("required param 'field'");

    final TagClusterReducer tagClusterReducer = chooseTagClusterReducer(req.getParams().get(OVERLAPS));
    final int rows = req.getParams().getInt(CommonParams.ROWS, 10000);
    final int tagsLimit = req.getParams().getInt(TAGS_LIMIT, 1000);
    final boolean addMatchText = req.getParams().getBool(MATCH_TEXT, false);
    final SchemaField idSchemaField = req.getSchema().getUniqueKeyField();
    if (idSchemaField == null) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                "The tagger requires a" + "uniqueKey in the schema.");//TODO this could be relaxes
    }
    final boolean skipAltTokens = req.getParams().getBool(SKIP_ALT_TOKENS, false);
    final boolean ignoreStopWords = req.getParams().getBool(IGNORE_STOPWORDS,
            fieldHasIndexedStopFilter(indexedField, req));
    final boolean htmlOffsetAdjust = req.getParams().getBool(HTML_OFFSET_ADJUST, false);
    final boolean xmlOffsetAdjust = req.getParams().getBool(XML_OFFSET_ADJUST, false);
    final String nonTaggableTags = req.getParams().get(NON_TAGGABLE_TAGS);
    final String textToTag = req.getParams().get(TEXT_TO_TAG);

    //--Get posted data
    Reader inputReader = null;
    Iterable<ContentStream> streams = req.getContentStreams();
    if (streams != null) {
        Iterator<ContentStream> iter = streams.iterator();
        if (iter.hasNext()) {
            inputReader = iter.next().getReader();
        }
        if (iter.hasNext()) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    getClass().getSimpleName() + " does not support multiple ContentStreams");
        }
    }
    if (inputReader == null) {
        if (textToTag != null) {
            inputReader = new StringReader(textToTag);
        } else {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                    getClass().getSimpleName() + " requires text to be POSTed to it");
        }
    }
    final String inputString;//only populated if needed
    if (addMatchText || xmlOffsetAdjust || htmlOffsetAdjust) {
        //Read the input fully into a String buffer that we'll need later,
        // then replace the input with a reader wrapping the buffer.
        inputString = CharStreams.toString(inputReader);
        inputReader.close();
        inputReader = new StringReader(inputString);
    } else {
        inputString = null;//not used
    }

    final OffsetCorrector offsetCorrector = initOffsetCorrector(htmlOffsetAdjust, xmlOffsetAdjust, inputString,
            nonTaggableTags);
    final SolrIndexSearcher searcher = req.getSearcher();
    final FixedBitSet matchDocIdsBS = new FixedBitSet(searcher.maxDoc());
    final List tags = new ArrayList(2000);

    try {
        Analyzer analyzer = req.getSchema().getField(indexedField).getType().getQueryAnalyzer();
        try (TokenStream tokenStream = analyzer.tokenStream("", inputReader)) {
            Terms terms = searcher.getSlowAtomicReader().terms(indexedField);
            if (terms == null)
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
                        "field " + indexedField + " has no indexed data");
            Tagger tagger = new Tagger(terms, computeDocCorpus(req), tokenStream, tagClusterReducer,
                    skipAltTokens, ignoreStopWords) {
                @SuppressWarnings("unchecked")
                @Override
                protected void tagCallback(int startOffset, int endOffset, Object docIdsKey) {
                    if (tags.size() >= tagsLimit)
                        return;
                    if (offsetCorrector != null) {
                        int[] offsetPair = offsetCorrector.correctPair(startOffset, endOffset);
                        if (offsetPair == null) {
                            log.debug("Discarded offsets [{}, {}] because couldn't balance XML.", startOffset,
                                    endOffset);
                            return;
                        }
                        startOffset = offsetPair[0];
                        endOffset = offsetPair[1];
                    }

                    NamedList tag = new NamedList();
                    tag.add("startOffset", startOffset);
                    tag.add("endOffset", endOffset);
                    if (addMatchText)
                        tag.add("matchText", inputString.substring(startOffset, endOffset));
                    //below caches, and also flags matchDocIdsBS
                    tag.add("ids", lookupSchemaDocIds(docIdsKey));
                    tags.add(tag);
                }

                Map<Object, List> docIdsListCache = new HashMap<>(2000);

                ValueSourceAccessor uniqueKeyCache = new ValueSourceAccessor(searcher,
                        idSchemaField.getType().getValueSource(idSchemaField, null));

                @SuppressWarnings("unchecked")
                private List lookupSchemaDocIds(Object docIdsKey) {
                    List schemaDocIds = docIdsListCache.get(docIdsKey);
                    if (schemaDocIds != null)
                        return schemaDocIds;
                    IntsRef docIds = lookupDocIds(docIdsKey);
                    //translate lucene docIds to schema ids
                    schemaDocIds = new ArrayList(docIds.length);
                    for (int i = docIds.offset; i < docIds.offset + docIds.length; i++) {
                        int docId = docIds.ints[i];
                        matchDocIdsBS.set(docId);//also, flip docid in bitset
                        schemaDocIds.add(uniqueKeyCache.objectVal(docId));//translates here
                    }
                    assert !schemaDocIds.isEmpty();

                    docIdsListCache.put(docIds, schemaDocIds);
                    return schemaDocIds;
                }

            };
            tagger.enableDocIdsCache(2000);//TODO configurable
            tagger.process();
        }
    } finally {
        inputReader.close();
    }
    rsp.add("tagsCount", tags.size());
    rsp.add("tags", tags);

    rsp.setReturnFields(new SolrReturnFields(req));

    //Solr's standard name for matching docs in response
    rsp.add("response", getDocList(rows, matchDocIdsBS));
}

From source file:org.opensextant.solrtexttagger.TaggerRequestHandler.java

License:Open Source License

/**
 * The set of documents matching the provided 'fq' (filter query). Don't include deleted docs
 * either. If null is returned, then all docs are available.
 *//* ww w .  j a v  a  2  s .c o  m*/
private Bits computeDocCorpus(SolrQueryRequest req) throws SyntaxError, IOException {
    final String[] corpusFilterQueries = req.getParams().getParams("fq");
    final SolrIndexSearcher searcher = req.getSearcher();
    final Bits docBits;
    if (corpusFilterQueries != null && corpusFilterQueries.length > 0) {
        List<Query> filterQueries = new ArrayList<Query>(corpusFilterQueries.length);
        for (String corpusFilterQuery : corpusFilterQueries) {
            QParser qParser = QParser.getParser(corpusFilterQuery, null, req);
            try {
                filterQueries.add(qParser.parse());
            } catch (SyntaxError e) {
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
            }
        }

        final DocSet docSet = searcher.getDocSet(filterQueries);//hopefully in the cache
        //note: before Solr 4.7 we could call docSet.getBits() but no longer.
        if (docSet instanceof BitDocSet) {
            docBits = ((BitDocSet) docSet).getBits();
        } else {
            docBits = new Bits() {

                @Override
                public boolean get(int index) {
                    return docSet.exists(index);
                }

                @Override
                public int length() {
                    return searcher.maxDoc();
                }
            };
        }
    } else {
        docBits = searcher.getSlowAtomicReader().getLiveDocs();
    }
    return docBits;
}