List of usage examples for org.apache.solr.search SolrIndexSearcher maxDoc
public final int maxDoc()
From source file:org.alfresco.solr.query.SolrDeniedScorer.java
License:Open Source License
public static SolrDeniedScorer createDenyScorer(Weight weight, LeafReaderContext context, SolrIndexSearcher searcher, String authority) throws IOException { DocSet deniedDocs = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_DENIED_CACHE, authority); if (deniedDocs == null) { // Cache miss: query the index for ACL docs where the denial matches the authority. DocSet aclDocs = searcher.getDocSet(new TermQuery(new Term(QueryConstants.FIELD_DENIED, authority))); // Allocate a bitset to store the results. deniedDocs = new BitDocSet(new FixedBitSet(searcher.maxDoc())); // Translate from ACL docs to real docs for (DocIterator it = aclDocs.iterator(); it.hasNext(); /**/) { int docID = it.nextDoc(); // Obtain the ACL ID for this ACL doc. long aclID = searcher.getSlowAtomicReader().getNumericDocValues(QueryConstants.FIELD_ACLID) .get(docID);/* www. ja va 2 s . c o m*/ SchemaField schemaField = searcher.getSchema().getField(QueryConstants.FIELD_ACLID); Query query = schemaField.getType().getFieldQuery(null, schemaField, Long.toString(aclID)); // Find real docs that match the ACL ID DocSet docsForAclId = searcher.getDocSet(query); deniedDocs = deniedDocs.union(docsForAclId); // Exclude the ACL docs from the results, we only want real docs that match. // Probably not very efficient, what we really want is remove(docID) deniedDocs = deniedDocs.andNot(aclDocs); } searcher.cacheInsert(CacheConstants.ALFRESCO_DENIED_CACHE, authority, deniedDocs); } return new SolrDeniedScorer(weight, deniedDocs, context, searcher); }
From source file:org.alfresco.solr.query.SolrDenySetScorer.java
License:Open Source License
public static SolrDenySetScorer createDenySetScorer(Weight weight, LeafReaderContext context, SolrIndexSearcher searcher, String authorities, LeafReader reader) throws IOException { DocSet deniedDocSet = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_DENIED_CACHE, authorities); if (deniedDocSet == null) { String[] auths = authorities.substring(1).split(authorities.substring(0, 1)); deniedDocSet = new BitDocSet(new FixedBitSet(searcher.maxDoc())); BooleanQuery.Builder bQuery = new BooleanQuery.Builder(); for (String current : auths) { bQuery.add(new TermQuery(new Term(QueryConstants.FIELD_DENIED, current)), Occur.SHOULD); }// w w w. j a va 2 s. c o m DocSet aclDocs = searcher.getDocSet(bQuery.build()); BooleanQuery.Builder aQuery = new BooleanQuery.Builder(); for (DocIterator it = aclDocs.iterator(); it.hasNext(); /**/) { int docID = it.nextDoc(); // Obtain the ACL ID for this ACL doc. long aclID = searcher.getSlowAtomicReader().getNumericDocValues(QueryConstants.FIELD_ACLID) .get(docID); SchemaField schemaField = searcher.getSchema().getField(QueryConstants.FIELD_ACLID); Query query = schemaField.getType().getFieldQuery(null, schemaField, Long.toString(aclID)); aQuery.add(query, Occur.SHOULD); if ((aQuery.build().clauses().size() > 999) || !it.hasNext()) { DocSet docsForAclId = searcher.getDocSet(aQuery.build()); deniedDocSet = deniedDocSet.union(docsForAclId); aQuery = new BooleanQuery.Builder(); } } // Exclude the ACL docs from the results, we only want real docs that match. // Probably not very efficient, what we really want is remove(docID) deniedDocSet = deniedDocSet.andNot(aclDocs); searcher.cacheInsert(CacheConstants.ALFRESCO_DENIED_CACHE, authorities, deniedDocSet); } // TODO: cache the full set? e.g. searcher.cacheInsert(CacheConstants.ALFRESCO_READERSET_CACHE, authorities, readableDocSet) // plus check of course, for presence in cache at start of method. return new SolrDenySetScorer(weight, deniedDocSet, context, searcher); }
From source file:org.alfresco.solr.query.SolrDenySetScorer2.java
License:Open Source License
public static SolrDenySetScorer2 createDenySetScorer(Weight weight, LeafReaderContext context, SolrIndexSearcher searcher, String authorities, LeafReader reader) throws IOException { DocSet deniedDocSet = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_DENIED_CACHE, authorities); if (deniedDocSet == null) { String[] auths = authorities.substring(1).split(authorities.substring(0, 1)); deniedDocSet = new BitDocSet(new FixedBitSet(searcher.maxDoc())); BooleanQuery.Builder bQuery = new BooleanQuery.Builder(); for (String current : auths) { bQuery.add(new TermQuery(new Term(QueryConstants.FIELD_DENIED, current)), Occur.SHOULD); }// w w w . jav a2 s. c om WrappedQuery wrapped = new WrappedQuery(bQuery.build()); wrapped.setCache(false); DocSet aclDocs = searcher.getDocSet(wrapped); HashSet<Long> aclsFound = new HashSet<Long>(aclDocs.size()); NumericDocValues aclDocValues = searcher.getSlowAtomicReader() .getNumericDocValues(QueryConstants.FIELD_ACLID); for (DocIterator it = aclDocs.iterator(); it.hasNext(); /**/) { int docID = it.nextDoc(); // Obtain the ACL ID for this ACL doc. long aclID = aclDocValues.get(docID); aclsFound.add(getLong(aclID)); } if (aclsFound.size() > 0) { for (LeafReaderContext readerContext : searcher.getSlowAtomicReader().leaves()) { int maxDoc = readerContext.reader().maxDoc(); NumericDocValues fieldValues = DocValuesCache.getNumericDocValues(QueryConstants.FIELD_ACLID, readerContext.reader()); if (fieldValues != null) { for (int i = 0; i < maxDoc; i++) { long aclID = fieldValues.get(i); Long key = getLong(aclID); if (aclsFound.contains(key)) { deniedDocSet.add(readerContext.docBase + i); } } } } } // Exclude the ACL docs from the results, we only want real docs that match. // Probably not very efficient, what we really want is remove(docID) deniedDocSet = deniedDocSet.andNot(aclDocs); searcher.cacheInsert(CacheConstants.ALFRESCO_DENIED_CACHE, authorities, deniedDocSet); } // TODO: cache the full set? e.g. searcher.cacheInsert(CacheConstants.ALFRESCO_READERSET_CACHE, authorities, readableDocSet) // plus check of course, for presence in cache at start of method. return new SolrDenySetScorer2(weight, deniedDocSet, context, searcher); }
From source file:org.alfresco.solr.query.SolrReaderScorer.java
License:Open Source License
public static SolrReaderScorer createReaderScorer(Weight weight, LeafReaderContext context, SolrIndexSearcher searcher, String authority) throws IOException { DocSet readableDocs = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_READER_CACHE, authority); if (readableDocs == null) { // Cache miss: query the index for ACL docs where the reader matches the authority. DocSet aclDocs = searcher.getDocSet(new TermQuery(new Term(QueryConstants.FIELD_READER, authority))); // Allocate a bitset to store the results. readableDocs = new BitDocSet(new FixedBitSet(searcher.maxDoc())); // Translate from ACL docs to real docs for (DocIterator it = aclDocs.iterator(); it.hasNext(); /**/) { int docID = it.nextDoc(); // Obtain the ACL ID for this ACL doc. long aclID = searcher.getSlowAtomicReader().getNumericDocValues(QueryConstants.FIELD_ACLID) .get(docID);//w ww . j ava2 s.co m SchemaField schemaField = searcher.getSchema().getField(QueryConstants.FIELD_ACLID); Query query = schemaField.getType().getFieldQuery(null, schemaField, Long.toString(aclID)); DocSet docsForAclId = searcher.getDocSet(query); readableDocs = readableDocs.union(docsForAclId); // Exclude the ACL docs from the results, we only want real docs that match. // Probably not very efficient, what we really want is remove(docID) readableDocs = readableDocs.andNot(aclDocs); } searcher.cacheInsert(CacheConstants.ALFRESCO_READER_CACHE, authority, readableDocs); } return new SolrReaderScorer(weight, readableDocs, context, searcher); }
From source file:org.alfresco.solr.query.SolrReaderSetScorer.java
License:Open Source License
public static SolrReaderSetScorer createReaderSetScorer(Weight weight, LeafReaderContext context, SolrIndexSearcher searcher, String authorities, LeafReader reader) throws IOException { DocSet readableDocSet = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_READER_CACHE, authorities); if (readableDocSet == null) { String[] auths = authorities.substring(1).split(authorities.substring(0, 1)); readableDocSet = new BitDocSet(new FixedBitSet(searcher.maxDoc())); BooleanQuery.Builder bQuery = new BooleanQuery.Builder(); for (String current : auths) { bQuery.add(new TermQuery(new Term(QueryConstants.FIELD_READER, current)), Occur.SHOULD); }//from w w w .java 2 s . c om DocSet aclDocs = searcher.getDocSet(bQuery.build()); BooleanQuery.Builder aQuery = new BooleanQuery.Builder(); for (DocIterator it = aclDocs.iterator(); it.hasNext(); /**/) { int docID = it.nextDoc(); // Obtain the ACL ID for this ACL doc. long aclID = searcher.getSlowAtomicReader().getNumericDocValues(QueryConstants.FIELD_ACLID) .get(docID); SchemaField schemaField = searcher.getSchema().getField(QueryConstants.FIELD_ACLID); Query query = schemaField.getType().getFieldQuery(null, schemaField, Long.toString(aclID)); aQuery.add(query, Occur.SHOULD); if ((aQuery.build().clauses().size() > 999) || !it.hasNext()) { DocSet docsForAclId = searcher.getDocSet(aQuery.build()); readableDocSet = readableDocSet.union(docsForAclId); aQuery = new BooleanQuery.Builder(); } } // Exclude the ACL docs from the results, we only want real docs that match. // Probably not very efficient, what we really want is remove(docID) readableDocSet = readableDocSet.andNot(aclDocs); searcher.cacheInsert(CacheConstants.ALFRESCO_READER_CACHE, authorities, readableDocSet); } // TODO: cache the full set? e.g. searcher.cacheInsert(CacheConstants.ALFRESCO_READERSET_CACHE, authorities, readableDocSet) // plus check of course, for presence in cache at start of method. return new SolrReaderSetScorer(weight, readableDocSet, context, searcher); }
From source file:org.alfresco.solr.query.SolrReaderSetScorer2.java
License:Open Source License
public static AbstractSolrCachingScorer createReaderSetScorer(Weight weight, LeafReaderContext context, SolrIndexSearcher searcher, String authorities, LeafReader reader) throws IOException { DocSet readableDocSet = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_READER_CACHE, authorities); if (readableDocSet == null) { String[] auths = authorities.substring(1).split(authorities.substring(0, 1)); readableDocSet = new BitDocSet(new FixedBitSet(searcher.maxDoc())); BooleanQuery.Builder bQuery = new BooleanQuery.Builder(); for (String current : auths) { bQuery.add(new TermQuery(new Term(QueryConstants.FIELD_READER, current)), Occur.SHOULD); }//from w ww. ja va 2 s . c o m WrappedQuery wrapped = new WrappedQuery(bQuery.build()); wrapped.setCache(false); DocSet aclDocs = searcher.getDocSet(wrapped); HashSet<Long> aclsFound = new HashSet<Long>(aclDocs.size()); NumericDocValues aclDocValues = searcher.getSlowAtomicReader() .getNumericDocValues(QueryConstants.FIELD_ACLID); for (DocIterator it = aclDocs.iterator(); it.hasNext(); /**/) { int docID = it.nextDoc(); // Obtain the ACL ID for this ACL doc. long aclID = aclDocValues.get(docID); aclsFound.add(getLong(aclID)); } if (aclsFound.size() > 0) { for (LeafReaderContext readerContext : searcher.getSlowAtomicReader().leaves()) { int maxDoc = readerContext.reader().maxDoc(); NumericDocValues fieldValues = DocValuesCache.getNumericDocValues(QueryConstants.FIELD_ACLID, readerContext.reader()); if (fieldValues != null) { for (int i = 0; i < maxDoc; i++) { long aclID = fieldValues.get(i); Long key = getLong(aclID); if (aclsFound.contains(key)) { readableDocSet.add(readerContext.docBase + i); } } } } } // Exclude the ACL docs from the results, we only want real docs that match. // Probably not very efficient, what we really want is remove(docID) readableDocSet = readableDocSet.andNot(aclDocs); searcher.cacheInsert(CacheConstants.ALFRESCO_READER_CACHE, authorities, readableDocSet); } // TODO: cache the full set? e.g. searcher.cacheInsert(CacheConstants.ALFRESCO_READERSET_CACHE, authorities, readableDocSet) // plus check of course, for presence in cache at start of method. return new SolrReaderSetScorer2(weight, readableDocSet, context, searcher); }
From source file:org.opensextant.solrtexttagger.TaggerRequestHandler.java
License:Open Source License
@Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { setTopInitArgsAsInvariants(req);/*from w ww . j av a 2s. c o m*/ //--Read params final String indexedField = req.getParams().get("field"); if (indexedField == null) throw new RuntimeException("required param 'field'"); final TagClusterReducer tagClusterReducer = chooseTagClusterReducer(req.getParams().get(OVERLAPS)); final int rows = req.getParams().getInt(CommonParams.ROWS, 10000); final int tagsLimit = req.getParams().getInt(TAGS_LIMIT, 1000); final boolean addMatchText = req.getParams().getBool(MATCH_TEXT, false); final SchemaField idSchemaField = req.getSchema().getUniqueKeyField(); if (idSchemaField == null) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The tagger requires a" + "uniqueKey in the schema.");//TODO this could be relaxes } final boolean skipAltTokens = req.getParams().getBool(SKIP_ALT_TOKENS, false); final boolean ignoreStopWords = req.getParams().getBool(IGNORE_STOPWORDS, fieldHasIndexedStopFilter(indexedField, req)); final boolean htmlOffsetAdjust = req.getParams().getBool(HTML_OFFSET_ADJUST, false); final boolean xmlOffsetAdjust = req.getParams().getBool(XML_OFFSET_ADJUST, false); final String nonTaggableTags = req.getParams().get(NON_TAGGABLE_TAGS); final String textToTag = req.getParams().get(TEXT_TO_TAG); //--Get posted data Reader inputReader = null; Iterable<ContentStream> streams = req.getContentStreams(); if (streams != null) { Iterator<ContentStream> iter = streams.iterator(); if (iter.hasNext()) { inputReader = iter.next().getReader(); } if (iter.hasNext()) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, getClass().getSimpleName() + " does not support multiple ContentStreams"); } } if (inputReader == null) { if (textToTag != null) { inputReader = new StringReader(textToTag); } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, getClass().getSimpleName() + " requires text to be POSTed to it"); } } final String inputString;//only populated if needed if (addMatchText || xmlOffsetAdjust || htmlOffsetAdjust) { //Read the input fully into a String buffer that we'll need later, // then replace the input with a reader wrapping the buffer. inputString = CharStreams.toString(inputReader); inputReader.close(); inputReader = new StringReader(inputString); } else { inputString = null;//not used } final OffsetCorrector offsetCorrector = initOffsetCorrector(htmlOffsetAdjust, xmlOffsetAdjust, inputString, nonTaggableTags); final SolrIndexSearcher searcher = req.getSearcher(); final FixedBitSet matchDocIdsBS = new FixedBitSet(searcher.maxDoc()); final List tags = new ArrayList(2000); try { Analyzer analyzer = req.getSchema().getField(indexedField).getType().getQueryAnalyzer(); try (TokenStream tokenStream = analyzer.tokenStream("", inputReader)) { Terms terms = searcher.getSlowAtomicReader().terms(indexedField); if (terms == null) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "field " + indexedField + " has no indexed data"); Tagger tagger = new Tagger(terms, computeDocCorpus(req), tokenStream, tagClusterReducer, skipAltTokens, ignoreStopWords) { @SuppressWarnings("unchecked") @Override protected void tagCallback(int startOffset, int endOffset, Object docIdsKey) { if (tags.size() >= tagsLimit) return; if (offsetCorrector != null) { int[] offsetPair = offsetCorrector.correctPair(startOffset, endOffset); if (offsetPair == null) { log.debug("Discarded offsets [{}, {}] because couldn't balance XML.", startOffset, endOffset); return; } startOffset = offsetPair[0]; endOffset = offsetPair[1]; } NamedList tag = new NamedList(); tag.add("startOffset", startOffset); tag.add("endOffset", endOffset); if (addMatchText) tag.add("matchText", inputString.substring(startOffset, endOffset)); //below caches, and also flags matchDocIdsBS tag.add("ids", lookupSchemaDocIds(docIdsKey)); tags.add(tag); } Map<Object, List> docIdsListCache = new HashMap<>(2000); ValueSourceAccessor uniqueKeyCache = new ValueSourceAccessor(searcher, idSchemaField.getType().getValueSource(idSchemaField, null)); @SuppressWarnings("unchecked") private List lookupSchemaDocIds(Object docIdsKey) { List schemaDocIds = docIdsListCache.get(docIdsKey); if (schemaDocIds != null) return schemaDocIds; IntsRef docIds = lookupDocIds(docIdsKey); //translate lucene docIds to schema ids schemaDocIds = new ArrayList(docIds.length); for (int i = docIds.offset; i < docIds.offset + docIds.length; i++) { int docId = docIds.ints[i]; matchDocIdsBS.set(docId);//also, flip docid in bitset schemaDocIds.add(uniqueKeyCache.objectVal(docId));//translates here } assert !schemaDocIds.isEmpty(); docIdsListCache.put(docIds, schemaDocIds); return schemaDocIds; } }; tagger.enableDocIdsCache(2000);//TODO configurable tagger.process(); } } finally { inputReader.close(); } rsp.add("tagsCount", tags.size()); rsp.add("tags", tags); rsp.setReturnFields(new SolrReturnFields(req)); //Solr's standard name for matching docs in response rsp.add("response", getDocList(rows, matchDocIdsBS)); }
From source file:org.opensextant.solrtexttagger.TaggerRequestHandler.java
License:Open Source License
/** * The set of documents matching the provided 'fq' (filter query). Don't include deleted docs * either. If null is returned, then all docs are available. *//* ww w . j a v a 2 s .c o m*/ private Bits computeDocCorpus(SolrQueryRequest req) throws SyntaxError, IOException { final String[] corpusFilterQueries = req.getParams().getParams("fq"); final SolrIndexSearcher searcher = req.getSearcher(); final Bits docBits; if (corpusFilterQueries != null && corpusFilterQueries.length > 0) { List<Query> filterQueries = new ArrayList<Query>(corpusFilterQueries.length); for (String corpusFilterQuery : corpusFilterQueries) { QParser qParser = QParser.getParser(corpusFilterQuery, null, req); try { filterQueries.add(qParser.parse()); } catch (SyntaxError e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } } final DocSet docSet = searcher.getDocSet(filterQueries);//hopefully in the cache //note: before Solr 4.7 we could call docSet.getBits() but no longer. if (docSet instanceof BitDocSet) { docBits = ((BitDocSet) docSet).getBits(); } else { docBits = new Bits() { @Override public boolean get(int index) { return docSet.exists(index); } @Override public int length() { return searcher.maxDoc(); } }; } } else { docBits = searcher.getSlowAtomicReader().getLiveDocs(); } return docBits; }