Example usage for org.apache.lucene.index Fields terms

Introduction

In this page you can find the example usage for org.apache.lucene.index Fields terms.

Prototype

public abstract Terms terms(String field) throws IOException;

Source Link

Document

Get the Terms for this field.

Usage

From source file:org.apache.solr.handler.component.TermsComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(TermsParams.TERMS, false))
        return;//from   w w  w  .  j a va 2s.  c  o  m

    String[] fields = params.getParams(TermsParams.TERMS_FIELD);

    NamedList<Object> termsResult = new SimpleOrderedMap<Object>();
    rb.rsp.add("terms", termsResult);

    if (fields == null || fields.length == 0)
        return;

    int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
    if (limit < 0) {
        limit = Integer.MAX_VALUE;
    }

    String lowerStr = params.get(TermsParams.TERMS_LOWER);
    String upperStr = params.get(TermsParams.TERMS_UPPER);
    boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
    boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
    boolean sort = !TermsParams.TERMS_SORT_INDEX
            .equals(params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
    int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1);
    int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT);
    if (freqmax < 0) {
        freqmax = Integer.MAX_VALUE;
    }
    String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
    String regexp = params.get(TermsParams.TERMS_REGEXP_STR);
    Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null;

    boolean raw = params.getBool(TermsParams.TERMS_RAW, false);

    final AtomicReader indexReader = rb.req.getSearcher().getAtomicReader();
    Fields lfields = indexReader.fields();

    for (String field : fields) {
        NamedList<Integer> fieldTerms = new NamedList<Integer>();
        termsResult.add(field, fieldTerms);

        Terms terms = lfields == null ? null : lfields.terms(field);
        if (terms == null) {
            // no terms for this field
            continue;
        }

        FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
        if (ft == null)
            ft = new StrField();

        // prefix must currently be text
        BytesRef prefixBytes = prefix == null ? null : new BytesRef(prefix);

        BytesRef upperBytes = null;
        if (upperStr != null) {
            upperBytes = new BytesRef();
            ft.readableToIndexed(upperStr, upperBytes);
        }

        BytesRef lowerBytes;
        if (lowerStr == null) {
            // If no lower bound was specified, use the prefix
            lowerBytes = prefixBytes;
        } else {
            lowerBytes = new BytesRef();
            if (raw) {
                // TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists
                // perhaps we detect if the FieldType is non-character and expect hex if so?
                lowerBytes = new BytesRef(lowerStr);
            } else {
                lowerBytes = new BytesRef();
                ft.readableToIndexed(lowerStr, lowerBytes);
            }
        }

        TermsEnum termsEnum = terms.iterator(null);
        BytesRef term = null;

        if (lowerBytes != null) {
            if (termsEnum.seekCeil(lowerBytes) == TermsEnum.SeekStatus.END) {
                termsEnum = null;
            } else {
                term = termsEnum.term();
                //Only advance the enum if we are excluding the lower bound and the lower Term actually matches
                if (lowerIncl == false && term.equals(lowerBytes)) {
                    term = termsEnum.next();
                }
            }
        } else {
            // position termsEnum on first term
            term = termsEnum.next();
        }

        int i = 0;
        BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort
                ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit)
                : null);
        CharsRef external = new CharsRef();
        while (term != null && (i < limit || sort)) {
            boolean externalized = false; // did we fill in "external" yet for this term?

            // stop if the prefix doesn't match
            if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes))
                break;

            if (pattern != null) {
                // indexed text or external text?
                // TODO: support "raw" mode?
                ft.indexedToReadable(term, external);
                externalized = true;
                if (!pattern.matcher(external).matches()) {
                    term = termsEnum.next();
                    continue;
                }
            }

            if (upperBytes != null) {
                int upperCmp = term.compareTo(upperBytes);
                // if we are past the upper term, or equal to it (when don't include upper) then stop.
                if (upperCmp > 0 || (upperCmp == 0 && !upperIncl))
                    break;
            }

            // This is a good term in the range.  Check if mincount/maxcount conditions are satisfied.
            int docFreq = termsEnum.docFreq();
            if (docFreq >= freqmin && docFreq <= freqmax) {
                // add the term to the list
                if (sort) {
                    queue.add(new CountPair<BytesRef, Integer>(BytesRef.deepCopyOf(term), docFreq));
                } else {

                    // TODO: handle raw somehow
                    if (!externalized) {
                        ft.indexedToReadable(term, external);
                    }
                    fieldTerms.add(external.toString(), docFreq);
                    i++;
                }
            }

            term = termsEnum.next();
        }

        if (sort) {
            for (CountPair<BytesRef, Integer> item : queue) {
                if (i >= limit)
                    break;
                ft.indexedToReadable(item.key, external);
                fieldTerms.add(external.toString(), item.val);
                i++;
            }
        }
    }
}

From source file:org.apache.solr.handler.component.TermVectorComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
        return;//from   ww w  .  j a  v a  2  s  . c  om
    }

    NamedList<Object> termVectors = new NamedList<Object>();
    rb.rsp.add(TERM_VECTORS, termVectors);

    IndexSchema schema = rb.req.getSchema();
    SchemaField keyField = schema.getUniqueKeyField();
    String uniqFieldName = null;
    if (keyField != null) {
        uniqFieldName = keyField.getName();
        termVectors.add("uniqueKeyFieldName", uniqFieldName);
    }

    FieldOptions allFields = new FieldOptions();
    //figure out what options we have, and try to get the appropriate vector
    allFields.termFreq = params.getBool(TermVectorParams.TF, false);
    allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
    allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
    allFields.docFreq = params.getBool(TermVectorParams.DF, false);
    allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
    //boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
    //short cut to all values.
    if (params.getBool(TermVectorParams.ALL, false)) {
        allFields.termFreq = true;
        allFields.positions = true;
        allFields.offsets = true;
        allFields.docFreq = true;
        allFields.tfIdf = true;
    }

    //Build up our per field mapping
    Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>();
    NamedList<List<String>> warnings = new NamedList<List<String>>();
    List<String> noTV = new ArrayList<String>();
    List<String> noPos = new ArrayList<String>();
    List<String> noOff = new ArrayList<String>();

    Set<String> fields = getFields(rb);
    if (null != fields) {
        //we have specific fields to retrieve, or no fields
        for (String field : fields) {

            // workarround SOLR-3523
            if (null == field || "score".equals(field))
                continue;

            // we don't want to issue warnings about the uniqueKey field
            // since it can cause lots of confusion in distributed requests
            // where the uniqueKey field is injected into the fl for merging
            final boolean fieldIsUniqueKey = field.equals(uniqFieldName);

            SchemaField sf = schema.getFieldOrNull(field);
            if (sf != null) {
                if (sf.storeTermVector()) {
                    FieldOptions option = fieldOptions.get(field);
                    if (option == null) {
                        option = new FieldOptions();
                        option.fieldName = field;
                        fieldOptions.put(field, option);
                    }
                    //get the per field mappings
                    option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
                    option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
                    option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
                    //Validate these are even an option
                    option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS,
                            allFields.positions);
                    if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) {
                        noPos.add(field);
                    }
                    option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
                    if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) {
                        noOff.add(field);
                    }
                } else {//field doesn't have term vectors
                    if (!fieldIsUniqueKey)
                        noTV.add(field);
                }
            } else {
                //field doesn't exist
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
            }
        }
    } //else, deal with all fields

    // NOTE: currently all typs of warnings are schema driven, and garunteed
    // to be consistent across all shards - if additional types of warnings 
    // are added that might be differnet between shards, finishStage() needs 
    // to be changed to account for that.
    boolean hasWarnings = false;
    if (!noTV.isEmpty()) {
        warnings.add("noTermVectors", noTV);
        hasWarnings = true;
    }
    if (!noPos.isEmpty()) {
        warnings.add("noPositions", noPos);
        hasWarnings = true;
    }
    if (!noOff.isEmpty()) {
        warnings.add("noOffsets", noOff);
        hasWarnings = true;
    }
    if (hasWarnings) {
        termVectors.add("warnings", warnings);
    }

    DocListAndSet listAndSet = rb.getResults();
    List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
    Iterator<Integer> iter;
    if (docIds != null && !docIds.isEmpty()) {
        iter = docIds.iterator();
    } else {
        DocList list = listAndSet.docList;
        iter = list.iterator();
    }
    SolrIndexSearcher searcher = rb.req.getSearcher();

    IndexReader reader = searcher.getIndexReader();
    //the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors

    //Only load the id field to get the uniqueKey of that
    //field

    final String finalUniqFieldName = uniqFieldName;

    final List<String> uniqValues = new ArrayList<String>();

    // TODO: is this required to be single-valued? if so, we should STOP
    // once we find it...
    final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() {
        @Override
        public void stringField(FieldInfo fieldInfo, String value) {
            uniqValues.add(value);
        }

        @Override
        public void intField(FieldInfo fieldInfo, int value) {
            uniqValues.add(Integer.toString(value));
        }

        @Override
        public void longField(FieldInfo fieldInfo, long value) {
            uniqValues.add(Long.toString(value));
        }

        @Override
        public Status needsField(FieldInfo fieldInfo) {
            return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO;
        }
    };

    TermsEnum termsEnum = null;

    while (iter.hasNext()) {
        Integer docId = iter.next();
        NamedList<Object> docNL = new NamedList<Object>();

        if (keyField != null) {
            reader.document(docId, getUniqValue);
            String uniqVal = null;
            if (uniqValues.size() != 0) {
                uniqVal = uniqValues.get(0);
                uniqValues.clear();
                docNL.add("uniqueKey", uniqVal);
                termVectors.add(uniqVal, docNL);
            }
        } else {
            // support for schemas w/o a unique key,
            termVectors.add("doc-" + docId, docNL);
        }

        if (null != fields) {
            for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
                final String field = entry.getKey();
                final Terms vector = reader.getTermVector(docId, field);
                if (vector != null) {
                    termsEnum = vector.iterator(termsEnum);
                    mapOneVector(docNL, entry.getValue(), reader, docId, vector.iterator(termsEnum), field);
                }
            }
        } else {
            // extract all fields
            final Fields vectors = reader.getTermVectors(docId);
            for (String field : vectors) {
                Terms terms = vectors.terms(field);
                if (terms != null) {
                    termsEnum = terms.iterator(termsEnum);
                    mapOneVector(docNL, allFields, reader, docId, termsEnum, field);
                }
            }
        }
    }
}

From source file:org.apache.solr.request.SimpleFacets.java

License:Apache License

/**
 * Returns a list of terms in the specified field along with the 
 * corresponding count of documents in the set that match that constraint.
 * This method uses the FilterCache to get the intersection count between <code>docs</code>
 * and the DocSet for each term in the filter.
 *
 * @see FacetParams#FACET_LIMIT//ww w. j  a va  2 s. c o  m
 * @see FacetParams#FACET_ZEROS
 * @see FacetParams#FACET_MISSING
 */
public NamedList<Integer> getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field,
        int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException {

    /* :TODO: potential optimization...
    * cache the Terms with the highest docFreq and try them first
    * don't enum if we get our max from them
    */

    // Minimum term docFreq in order to use the filterCache for that term.
    int minDfFilterCache = params.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0);

    // make sure we have a set that is fast for random access, if we will use it for that
    DocSet fastForRandomSet = docs;
    if (minDfFilterCache > 0 && docs instanceof SortedIntDocSet) {
        SortedIntDocSet sset = (SortedIntDocSet) docs;
        fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
    }

    IndexSchema schema = searcher.getSchema();
    AtomicReader r = searcher.getAtomicReader();
    FieldType ft = schema.getFieldType(field);

    boolean sortByCount = sort.equals("count") || sort.equals("true");
    final int maxsize = limit >= 0 ? offset + limit : Integer.MAX_VALUE - 1;
    final BoundedTreeSet<CountPair<BytesRef, Integer>> queue = sortByCount
            ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(maxsize)
            : null;
    final NamedList<Integer> res = new NamedList<Integer>();

    int min = mincount - 1; // the smallest value in the top 'N' values    
    int off = offset;
    int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

    BytesRef startTermBytes = null;
    if (prefix != null) {
        String indexedPrefix = ft.toInternal(prefix);
        startTermBytes = new BytesRef(indexedPrefix);
    }

    Fields fields = r.fields();
    Terms terms = fields == null ? null : fields.terms(field);
    TermsEnum termsEnum = null;
    SolrIndexSearcher.DocsEnumState deState = null;
    BytesRef term = null;
    if (terms != null) {
        termsEnum = terms.iterator(null);

        // TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for
        // facet.offset when sorting by index order.

        if (startTermBytes != null) {
            if (termsEnum.seekCeil(startTermBytes) == TermsEnum.SeekStatus.END) {
                termsEnum = null;
            } else {
                term = termsEnum.term();
            }
        } else {
            // position termsEnum on first term
            term = termsEnum.next();
        }
    }

    DocsEnum docsEnum = null;
    CharsRef charsRef = new CharsRef(10);

    if (docs.size() >= mincount) {
        while (term != null) {

            if (startTermBytes != null && !StringHelper.startsWith(term, startTermBytes))
                break;

            int df = termsEnum.docFreq();

            // If we are sorting, we can use df>min (rather than >=) since we
            // are going in index order.  For certain term distributions this can
            // make a large difference (for example, many terms with df=1).
            if (df > 0 && df > min) {
                int c;

                if (df >= minDfFilterCache) {
                    // use the filter cache

                    if (deState == null) {
                        deState = new SolrIndexSearcher.DocsEnumState();
                        deState.fieldName = field;
                        deState.liveDocs = r.getLiveDocs();
                        deState.termsEnum = termsEnum;
                        deState.docsEnum = docsEnum;
                    }

                    c = searcher.numDocs(docs, deState);

                    docsEnum = deState.docsEnum;
                } else {
                    // iterate over TermDocs to calculate the intersection

                    // TODO: specialize when base docset is a bitset or hash set (skipDocs)?  or does it matter for this?
                    // TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class impl)
                    // TODO: would passing deleted docs lead to better efficiency over checking the fastForRandomSet?
                    docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
                    c = 0;

                    if (docsEnum instanceof MultiDocsEnum) {
                        MultiDocsEnum.EnumWithSlice[] subs = ((MultiDocsEnum) docsEnum).getSubs();
                        int numSubs = ((MultiDocsEnum) docsEnum).getNumSubs();
                        for (int subindex = 0; subindex < numSubs; subindex++) {
                            MultiDocsEnum.EnumWithSlice sub = subs[subindex];
                            if (sub.docsEnum == null)
                                continue;
                            int base = sub.slice.start;
                            int docid;
                            while ((docid = sub.docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                                if (fastForRandomSet.exists(docid + base))
                                    c++;
                            }
                        }
                    } else {
                        int docid;
                        while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            if (fastForRandomSet.exists(docid))
                                c++;
                        }
                    }

                }

                if (sortByCount) {
                    if (c > min) {
                        BytesRef termCopy = BytesRef.deepCopyOf(term);
                        queue.add(new CountPair<BytesRef, Integer>(termCopy, c));
                        if (queue.size() >= maxsize)
                            min = queue.last().val;
                    }
                } else {
                    if (c >= mincount && --off < 0) {
                        if (--lim < 0)
                            break;
                        ft.indexedToReadable(term, charsRef);
                        res.add(charsRef.toString(), c);
                    }
                }
            }

            term = termsEnum.next();
        }
    }

    if (sortByCount) {
        for (CountPair<BytesRef, Integer> p : queue) {
            if (--off >= 0)
                continue;
            if (--lim < 0)
                break;
            ft.indexedToReadable(p.key, charsRef);
            res.add(charsRef.toString(), p.val);
        }
    }

    if (missing) {
        res.add(null, getFieldMissingCount(searcher, docs, field));
    }

    return res;
}

From source file:org.apache.solr.search.DocSetUtil.java

License:Apache License

public static DocSet createDocSet(SolrIndexSearcher searcher, Term term) throws IOException {
    DirectoryReader reader = searcher.getRawReader(); // raw reader to avoid extra wrapping overhead
    int maxDoc = searcher.getIndexReader().maxDoc();
    int smallSetSize = smallSetSize(maxDoc);

    String field = term.field();/*ww w.j  ava 2  s .  c  om*/
    BytesRef termVal = term.bytes();

    int maxCount = 0;
    int firstReader = -1;
    List<LeafReaderContext> leaves = reader.leaves();
    PostingsEnum[] postList = new PostingsEnum[leaves.size()]; // use array for slightly higher scanning cost, but fewer memory allocations
    for (LeafReaderContext ctx : leaves) {
        assert leaves.get(ctx.ord) == ctx;
        LeafReader r = ctx.reader();
        Fields f = r.fields();
        Terms t = f.terms(field);
        if (t == null)
            continue; // field is missing
        TermsEnum te = t.iterator();
        if (te.seekExact(termVal)) {
            maxCount += te.docFreq();
            postList[ctx.ord] = te.postings(null, PostingsEnum.NONE);
            if (firstReader < 0)
                firstReader = ctx.ord;
        }
    }

    DocSet answer = null;
    if (maxCount == 0) {
        answer = DocSet.EMPTY;
    } else if (maxCount <= smallSetSize) {
        answer = createSmallSet(leaves, postList, maxCount, firstReader);
    } else {
        answer = createBigSet(leaves, postList, maxDoc, firstReader);
    }

    return DocSetUtil.getDocSet(answer, searcher);
}

From source file:org.apache.solr.search.facet.FacetFieldProcessorByEnumTermsStream.java

License:Apache License

private void setup() throws IOException {

    countOnly = freq.facetStats.size() == 0 || freq.facetStats.values().iterator().next() instanceof CountAgg;
    hasSubFacets = freq.subFacets.size() > 0;
    bucketsToSkip = freq.offset;/*from www .  j av  a 2  s  . c  om*/

    createAccs(-1, 1);

    // Minimum term docFreq in order to use the filterCache for that term.
    if (freq.cacheDf == -1) { // -1 means never cache
        minDfFilterCache = Integer.MAX_VALUE;
    } else if (freq.cacheDf == 0) { // default; compute as fraction of maxDoc
        minDfFilterCache = Math.max(fcontext.searcher.maxDoc() >> 4, 3); // (minimum of 3 is for test coverage purposes)
    } else {
        minDfFilterCache = freq.cacheDf;
    }

    docs = fcontext.base;
    fastForRandomSet = null;

    if (freq.prefix != null) {
        String indexedPrefix = sf.getType().toInternal(freq.prefix);
        startTermBytes = new BytesRef(indexedPrefix);
    } else if (sf.getType().getNumericType() != null) {
        String triePrefix = TrieField.getMainValuePrefix(sf.getType());
        if (triePrefix != null) {
            startTermBytes = new BytesRef(triePrefix);
        }
    }

    Fields fields = fcontext.searcher.getSlowAtomicReader().fields();
    Terms terms = fields == null ? null : fields.terms(sf.getName());

    termsEnum = null;
    deState = null;
    term = null;

    if (terms != null) {

        termsEnum = terms.iterator();

        // TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for
        // facet.offset when sorting by index order.

        if (startTermBytes != null) {
            if (termsEnum.seekCeil(startTermBytes) == TermsEnum.SeekStatus.END) {
                termsEnum = null;
            } else {
                term = termsEnum.term();
            }
        } else {
            // position termsEnum on first term
            term = termsEnum.next();
        }
    }

    List<LeafReaderContext> leafList = fcontext.searcher.getTopReaderContext().leaves();
    leaves = leafList.toArray(new LeafReaderContext[leafList.size()]);
}

From source file:org.apache.solr.search.FloatPayloadValueSource.java

License:Apache License

@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {

    Fields fields = readerContext.reader().fields();
    final Terms terms = fields.terms(indexedField);

    FunctionValues defaultValues = defaultValueSource.getValues(context, readerContext);

    // copied the bulk of this from TFValueSource - TODO: this is a very repeated pattern - base-class this advance logic stuff?
    return new FloatDocValues(this) {
        PostingsEnum docs;/*from  w  w  w. ja  v  a2 s.c o m*/
        int atDoc;
        int lastDocRequested = -1;

        {
            reset();
        }

        public void reset() throws IOException {
            // no one should call us for deleted docs?

            if (terms != null) {
                final TermsEnum termsEnum = terms.iterator();
                if (termsEnum.seekExact(indexedBytes)) {
                    docs = termsEnum.postings(null, PostingsEnum.ALL);
                } else {
                    docs = null;
                }
            } else {
                docs = null;
            }

            if (docs == null) {
                // dummy PostingsEnum so floatVal() can work
                // when would this be called?  if field/val did not match?  this is called for every doc?  create once and cache?
                docs = new PostingsEnum() {
                    @Override
                    public int freq() {
                        return 0;
                    }

                    @Override
                    public int nextPosition() throws IOException {
                        return -1;
                    }

                    @Override
                    public int startOffset() throws IOException {
                        return -1;
                    }

                    @Override
                    public int endOffset() throws IOException {
                        return -1;
                    }

                    @Override
                    public BytesRef getPayload() throws IOException {
                        return null;
                    }

                    @Override
                    public int docID() {
                        return DocIdSetIterator.NO_MORE_DOCS;
                    }

                    @Override
                    public int nextDoc() {
                        return DocIdSetIterator.NO_MORE_DOCS;
                    }

                    @Override
                    public int advance(int target) {
                        return DocIdSetIterator.NO_MORE_DOCS;
                    }

                    @Override
                    public long cost() {
                        return 0;
                    }
                };
            }
            atDoc = -1;
        }

        @Override
        public float floatVal(int doc) {
            try {
                if (doc < lastDocRequested) {
                    // out-of-order access.... reset
                    reset();
                }
                lastDocRequested = doc;

                if (atDoc < doc) {
                    atDoc = docs.advance(doc);
                }

                if (atDoc > doc) {
                    // term doesn't match this document... either because we hit the
                    // end, or because the next doc is after this doc.
                    return defaultValues.floatVal(doc);
                }

                // a match!
                int freq = docs.freq();
                int numPayloadsSeen = 0;
                float currentScore = 0;
                for (int i = 0; i < freq; i++) {
                    docs.nextPosition();
                    BytesRef payload = docs.getPayload();
                    if (payload != null) {
                        float payloadVal = decoder.decode(atDoc, docs.startOffset(), docs.endOffset(), payload);

                        // payloadFunction = null represents "first"
                        if (payloadFunction == null)
                            return payloadVal;

                        currentScore = payloadFunction.currentScore(doc, indexedField, docs.startOffset(),
                                docs.endOffset(), numPayloadsSeen, currentScore, payloadVal);
                        numPayloadsSeen++;

                    }
                }

                return (numPayloadsSeen > 0)
                        ? payloadFunction.docScore(doc, indexedField, numPayloadsSeen, currentScore)
                        : defaultValues.floatVal(doc);
            } catch (IOException e) {
                throw new RuntimeException("caught exception in function " + description() + " : doc=" + doc,
                        e);
            }
        }
    };
}

From source file:org.apache.solr.search.function.TermFreqValueSource.java

License:Apache License

@Override
public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
    Fields fields = readerContext.reader.fields();
    final Terms terms = fields.terms(field);

    return new IntDocValues(this) {
        DocsEnum docs;/*from w ww  .j  av a 2  s.  c om*/
        int atDoc;
        int lastDocRequested = -1;

        {
            reset();
        }

        public void reset() throws IOException {
            // no one should call us for deleted docs?
            docs = terms.docs(null, indexedBytes, null);
            if (docs == null) {
                docs = new DocsEnum() {
                    @Override
                    public int freq() {
                        return 0;
                    }

                    @Override
                    public int docID() {
                        return DocIdSetIterator.NO_MORE_DOCS;
                    }

                    @Override
                    public int nextDoc() throws IOException {
                        return DocIdSetIterator.NO_MORE_DOCS;
                    }

                    @Override
                    public int advance(int target) throws IOException {
                        return DocIdSetIterator.NO_MORE_DOCS;
                    }
                };
            }
            atDoc = -1;
        }

        @Override
        public int intVal(int doc) {
            try {
                if (doc < lastDocRequested) {
                    // out-of-order access.... reset
                    reset();
                }
                lastDocRequested = doc;

                if (atDoc < doc) {
                    atDoc = docs.advance(doc);
                }

                if (atDoc > doc) {
                    // term doesn't match this document... either because we hit the
                    // end, or because the next doc is after this doc.
                    return 0;
                }

                // a match!
                return docs.freq();
            } catch (IOException e) {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                        "caught exception in function " + description() + " : doc=" + doc, e);
            }
        }
    };
}

From source file:org.apache.solr.search.SolrIndexSearcher.java

License:Apache License

/**
 * Returns the first document number containing the term <code>t</code>
 * Returns -1 if no document was found./*from   ww  w . ja  v  a 2 s.  c  o m*/
 * This method is primarily intended for clients that want to fetch
 * documents using a unique identifier."
 * @return the first document number containing the term
 */
public int getFirstMatch(Term t) throws IOException {
    Fields fields = atomicReader.fields();
    if (fields == null)
        return -1;
    Terms terms = fields.terms(t.field());
    if (terms == null)
        return -1;
    BytesRef termBytes = t.bytes();
    final TermsEnum termsEnum = terms.iterator(null);
    if (!termsEnum.seekExact(termBytes)) {
        return -1;
    }
    DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
    if (docs == null)
        return -1;
    int id = docs.nextDoc();
    return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}

From source file:org.apache.solr.search.TestRTGBase.java

License:Apache License

protected int getFirstMatch(IndexReader r, Term t) throws IOException {
    Fields fields = MultiFields.getFields(r);
    if (fields == null)
        return -1;
    Terms terms = fields.terms(t.field());
    if (terms == null)
        return -1;
    BytesRef termBytes = t.bytes();/* w  w  w .  j  av  a2 s  .  co m*/
    final TermsEnum termsEnum = terms.iterator(null);
    if (!termsEnum.seekExact(termBytes)) {
        return -1;
    }
    DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
    int id = docs.nextDoc();
    if (id != DocIdSetIterator.NO_MORE_DOCS) {
        int next = docs.nextDoc();
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
    }
    return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}

From source file:org.apache.solr.update.SolrIndexSplitter.java

License:Apache License

OpenBitSet[] split(AtomicReaderContext readerContext) throws IOException {
    AtomicReader reader = readerContext.reader();
    OpenBitSet[] docSets = new OpenBitSet[numPieces];
    for (int i = 0; i < docSets.length; i++) {
        docSets[i] = new OpenBitSet(reader.maxDoc());
    }//ww w. j av  a  2  s .com
    Bits liveDocs = reader.getLiveDocs();

    Fields fields = reader.fields();
    Terms terms = fields == null ? null : fields.terms(field.getName());
    TermsEnum termsEnum = terms == null ? null : terms.iterator(null);
    if (termsEnum == null)
        return docSets;

    BytesRef term = null;
    DocsEnum docsEnum = null;

    CharsRef idRef = new CharsRef(100);
    for (;;) {
        term = termsEnum.next();
        if (term == null)
            break;

        // figure out the hash for the term

        // FUTURE: if conversion to strings costs too much, we could
        // specialize and use the hash function that can work over bytes.
        idRef = field.getType().indexedToReadable(term, idRef);
        String idString = idRef.toString();

        if (splitKey != null) {
            // todo have composite routers support these kind of things instead
            String part1 = getRouteKey(idString);
            if (part1 == null)
                continue;
            if (!splitKey.equals(part1)) {
                continue;
            }
        }

        int hash = 0;
        if (hashRouter != null) {
            hash = hashRouter.sliceHash(idString, null, null, null);
        }

        docsEnum = termsEnum.docs(liveDocs, docsEnum, DocsEnum.FLAG_NONE);
        for (;;) {
            int doc = docsEnum.nextDoc();
            if (doc == DocsEnum.NO_MORE_DOCS)
                break;
            if (ranges == null) {
                docSets[currPartition].fastSet(doc);
                currPartition = (currPartition + 1) % numPieces;
            } else {
                for (int i = 0; i < rangesArr.length; i++) { // inner-loop: use array here for extra speed.
                    if (rangesArr[i].includes(hash)) {
                        docSets[i].fastSet(doc);
                    }
                }
            }
        }
    }

    return docSets;
}