Example usage for org.apache.lucene.index Fields terms

List of usage examples for org.apache.lucene.index Fields terms

Introduction

In this page you can find the example usage for org.apache.lucene.index Fields terms.

Prototype

public abstract Terms terms(String field) throws IOException;

Source Link

Document

Get the Terms for this field.

Usage

From source file:org.apache.solr.handler.component.TermsComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(TermsParams.TERMS, false))
        return;//from   w w  w  .  j a va 2s.  c  o  m

    String[] fields = params.getParams(TermsParams.TERMS_FIELD);

    NamedList<Object> termsResult = new SimpleOrderedMap<Object>();
    rb.rsp.add("terms", termsResult);

    if (fields == null || fields.length == 0)
        return;

    int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
    if (limit < 0) {
        limit = Integer.MAX_VALUE;
    }

    String lowerStr = params.get(TermsParams.TERMS_LOWER);
    String upperStr = params.get(TermsParams.TERMS_UPPER);
    boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
    boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
    boolean sort = !TermsParams.TERMS_SORT_INDEX
            .equals(params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
    int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1);
    int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT);
    if (freqmax < 0) {
        freqmax = Integer.MAX_VALUE;
    }
    String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
    String regexp = params.get(TermsParams.TERMS_REGEXP_STR);
    Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null;

    boolean raw = params.getBool(TermsParams.TERMS_RAW, false);

    final AtomicReader indexReader = rb.req.getSearcher().getAtomicReader();
    Fields lfields = indexReader.fields();

    for (String field : fields) {
        NamedList<Integer> fieldTerms = new NamedList<Integer>();
        termsResult.add(field, fieldTerms);

        Terms terms = lfields == null ? null : lfields.terms(field);
        if (terms == null) {
            // no terms for this field
            continue;
        }

        FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
        if (ft == null)
            ft = new StrField();

        // prefix must currently be text
        BytesRef prefixBytes = prefix == null ? null : new BytesRef(prefix);

        BytesRef upperBytes = null;
        if (upperStr != null) {
            upperBytes = new BytesRef();
            ft.readableToIndexed(upperStr, upperBytes);
        }

        BytesRef lowerBytes;
        if (lowerStr == null) {
            // If no lower bound was specified, use the prefix
            lowerBytes = prefixBytes;
        } else {
            lowerBytes = new BytesRef();
            if (raw) {
                // TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists
                // perhaps we detect if the FieldType is non-character and expect hex if so?
                lowerBytes = new BytesRef(lowerStr);
            } else {
                lowerBytes = new BytesRef();
                ft.readableToIndexed(lowerStr, lowerBytes);
            }
        }

        TermsEnum termsEnum = terms.iterator(null);
        BytesRef term = null;

        if (lowerBytes != null) {
            if (termsEnum.seekCeil(lowerBytes) == TermsEnum.SeekStatus.END) {
                termsEnum = null;
            } else {
                term = termsEnum.term();
                //Only advance the enum if we are excluding the lower bound and the lower Term actually matches
                if (lowerIncl == false && term.equals(lowerBytes)) {
                    term = termsEnum.next();
                }
            }
        } else {
            // position termsEnum on first term
            term = termsEnum.next();
        }

        int i = 0;
        BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort
                ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit)
                : null);
        CharsRef external = new CharsRef();
        while (term != null && (i < limit || sort)) {
            boolean externalized = false; // did we fill in "external" yet for this term?

            // stop if the prefix doesn't match
            if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes))
                break;

            if (pattern != null) {
                // indexed text or external text?
                // TODO: support "raw" mode?
                ft.indexedToReadable(term, external);
                externalized = true;
                if (!pattern.matcher(external).matches()) {
                    term = termsEnum.next();
                    continue;
                }
            }

            if (upperBytes != null) {
                int upperCmp = term.compareTo(upperBytes);
                // if we are past the upper term, or equal to it (when don't include upper) then stop.
                if (upperCmp > 0 || (upperCmp == 0 && !upperIncl))
                    break;
            }

            // This is a good term in the range.  Check if mincount/maxcount conditions are satisfied.
            int docFreq = termsEnum.docFreq();
            if (docFreq >= freqmin && docFreq <= freqmax) {
                // add the term to the list
                if (sort) {
                    queue.add(new CountPair<BytesRef, Integer>(BytesRef.deepCopyOf(term), docFreq));
                } else {

                    // TODO: handle raw somehow
                    if (!externalized) {
                        ft.indexedToReadable(term, external);
                    }
                    fieldTerms.add(external.toString(), docFreq);
                    i++;
                }
            }

            term = termsEnum.next();
        }

        if (sort) {
            for (CountPair<BytesRef, Integer> item : queue) {
                if (i >= limit)
                    break;
                ft.indexedToReadable(item.key, external);
                fieldTerms.add(external.toString(), item.val);
                i++;
            }
        }
    }
}

From source file:org.apache.solr.handler.component.TermVectorComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false)) {
        return;//from   ww w  .  j a  v a  2  s  . c  om
    }

    NamedList<Object> termVectors = new NamedList<Object>();
    rb.rsp.add(TERM_VECTORS, termVectors);

    IndexSchema schema = rb.req.getSchema();
    SchemaField keyField = schema.getUniqueKeyField();
    String uniqFieldName = null;
    if (keyField != null) {
        uniqFieldName = keyField.getName();
        termVectors.add("uniqueKeyFieldName", uniqFieldName);
    }

    FieldOptions allFields = new FieldOptions();
    //figure out what options we have, and try to get the appropriate vector
    allFields.termFreq = params.getBool(TermVectorParams.TF, false);
    allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
    allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
    allFields.docFreq = params.getBool(TermVectorParams.DF, false);
    allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
    //boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
    //short cut to all values.
    if (params.getBool(TermVectorParams.ALL, false)) {
        allFields.termFreq = true;
        allFields.positions = true;
        allFields.offsets = true;
        allFields.docFreq = true;
        allFields.tfIdf = true;
    }

    //Build up our per field mapping
    Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>();
    NamedList<List<String>> warnings = new NamedList<List<String>>();
    List<String> noTV = new ArrayList<String>();
    List<String> noPos = new ArrayList<String>();
    List<String> noOff = new ArrayList<String>();

    Set<String> fields = getFields(rb);
    if (null != fields) {
        //we have specific fields to retrieve, or no fields
        for (String field : fields) {

            // workarround SOLR-3523
            if (null == field || "score".equals(field))
                continue;

            // we don't want to issue warnings about the uniqueKey field
            // since it can cause lots of confusion in distributed requests
            // where the uniqueKey field is injected into the fl for merging
            final boolean fieldIsUniqueKey = field.equals(uniqFieldName);

            SchemaField sf = schema.getFieldOrNull(field);
            if (sf != null) {
                if (sf.storeTermVector()) {
                    FieldOptions option = fieldOptions.get(field);
                    if (option == null) {
                        option = new FieldOptions();
                        option.fieldName = field;
                        fieldOptions.put(field, option);
                    }
                    //get the per field mappings
                    option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
                    option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
                    option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
                    //Validate these are even an option
                    option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS,
                            allFields.positions);
                    if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) {
                        noPos.add(field);
                    }
                    option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
                    if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) {
                        noOff.add(field);
                    }
                } else {//field doesn't have term vectors
                    if (!fieldIsUniqueKey)
                        noTV.add(field);
                }
            } else {
                //field doesn't exist
                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
            }
        }
    } //else, deal with all fields

    // NOTE: currently all typs of warnings are schema driven, and garunteed
    // to be consistent across all shards - if additional types of warnings 
    // are added that might be differnet between shards, finishStage() needs 
    // to be changed to account for that.
    boolean hasWarnings = false;
    if (!noTV.isEmpty()) {
        warnings.add("noTermVectors", noTV);
        hasWarnings = true;
    }
    if (!noPos.isEmpty()) {
        warnings.add("noPositions", noPos);
        hasWarnings = true;
    }
    if (!noOff.isEmpty()) {
        warnings.add("noOffsets", noOff);
        hasWarnings = true;
    }
    if (hasWarnings) {
        termVectors.add("warnings", warnings);
    }

    DocListAndSet listAndSet = rb.getResults();
    List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
    Iterator<Integer> iter;
    if (docIds != null && !docIds.isEmpty()) {
        iter = docIds.iterator();
    } else {
        DocList list = listAndSet.docList;
        iter = list.iterator();
    }
    SolrIndexSearcher searcher = rb.req.getSearcher();

    IndexReader reader = searcher.getIndexReader();
    //the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors

    //Only load the id field to get the uniqueKey of that
    //field

    final String finalUniqFieldName = uniqFieldName;

    final List<String> uniqValues = new ArrayList<String>();

    // TODO: is this required to be single-valued? if so, we should STOP
    // once we find it...
    final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() {
        @Override
        public void stringField(FieldInfo fieldInfo, String value) {
            uniqValues.add(value);
        }

        @Override
        public void intField(FieldInfo fieldInfo, int value) {
            uniqValues.add(Integer.toString(value));
        }

        @Override
        public void longField(FieldInfo fieldInfo, long value) {
            uniqValues.add(Long.toString(value));
        }

        @Override
        public Status needsField(FieldInfo fieldInfo) {
            return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO;
        }
    };

    TermsEnum termsEnum = null;

    while (iter.hasNext()) {
        Integer docId = iter.next();
        NamedList<Object> docNL = new NamedList<Object>();

        if (keyField != null) {
            reader.document(docId, getUniqValue);
            String uniqVal = null;
            if (uniqValues.size() != 0) {
                uniqVal = uniqValues.get(0);
                uniqValues.clear();
                docNL.add("uniqueKey", uniqVal);
                termVectors.add(uniqVal, docNL);
            }
        } else {
            // support for schemas w/o a unique key,
            termVectors.add("doc-" + docId, docNL);
        }

        if (null != fields) {
            for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
                final String field = entry.getKey();
                final Terms vector = reader.getTermVector(docId, field);
                if (vector != null) {
                    termsEnum = vector.iterator(termsEnum);
                    mapOneVector(docNL, entry.getValue(), reader, docId, vector.iterator(termsEnum), field);
                }
            }
        } else {
            // extract all fields
            final Fields vectors = reader.getTermVectors(docId);
            for (String field : vectors) {
                Terms terms = vectors.terms(field);
                if (terms != null) {
                    termsEnum = terms.iterator(termsEnum);
                    mapOneVector(docNL, allFields, reader, docId, termsEnum, field);
                }
            }
        }
    }
}

From source file:org.apache.solr.request.SimpleFacets.java

License:Apache License

/**
 * Returns a list of terms in the specified field along with the 
 * corresponding count of documents in the set that match that constraint.
 * This method uses the FilterCache to get the intersection count between <code>docs</code>
 * and the DocSet for each term in the filter.
 *
 * @see FacetParams#FACET_LIMIT//ww w. j  a va  2 s. c o  m
 * @see FacetParams#FACET_ZEROS
 * @see FacetParams#FACET_MISSING
 */
public NamedList<Integer> getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field,
        int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException {

    /* :TODO: potential optimization...
    * cache the Terms with the highest docFreq and try them first
    * don't enum if we get our max from them
    */

    // Minimum term docFreq in order to use the filterCache for that term.
    int minDfFilterCache = params.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0);

    // make sure we have a set that is fast for random access, if we will use it for that
    DocSet fastForRandomSet = docs;
    if (minDfFilterCache > 0 && docs instanceof SortedIntDocSet) {
        SortedIntDocSet sset = (SortedIntDocSet) docs;
        fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
    }

    IndexSchema schema = searcher.getSchema();
    AtomicReader r = searcher.getAtomicReader();
    FieldType ft = schema.getFieldType(field);

    boolean sortByCount = sort.equals("count") || sort.equals("true");
    final int maxsize = limit >= 0 ? offset + limit : Integer.MAX_VALUE - 1;
    final BoundedTreeSet<CountPair<BytesRef, Integer>> queue = sortByCount
            ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(maxsize)
            : null;
    final NamedList<Integer> res = new NamedList<Integer>();

    int min = mincount - 1; // the smallest value in the top 'N' values    
    int off = offset;
    int lim = limit >= 0 ? limit : Integer.MAX_VALUE;

    BytesRef startTermBytes = null;
    if (prefix != null) {
        String indexedPrefix = ft.toInternal(prefix);
        startTermBytes = new BytesRef(indexedPrefix);
    }

    Fields fields = r.fields();
    Terms terms = fields == null ? null : fields.terms(field);
    TermsEnum termsEnum = null;
    SolrIndexSearcher.DocsEnumState deState = null;
    BytesRef term = null;
    if (terms != null) {
        termsEnum = terms.iterator(null);

        // TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for
        // facet.offset when sorting by index order.

        if (startTermBytes != null) {
            if (termsEnum.seekCeil(startTermBytes) == TermsEnum.SeekStatus.END) {
                termsEnum = null;
            } else {
                term = termsEnum.term();
            }
        } else {
            // position termsEnum on first term
            term = termsEnum.next();
        }
    }

    DocsEnum docsEnum = null;
    CharsRef charsRef = new CharsRef(10);

    if (docs.size() >= mincount) {
        while (term != null) {

            if (startTermBytes != null && !StringHelper.startsWith(term, startTermBytes))
                break;

            int df = termsEnum.docFreq();

            // If we are sorting, we can use df>min (rather than >=) since we
            // are going in index order.  For certain term distributions this can
            // make a large difference (for example, many terms with df=1).
            if (df > 0 && df > min) {
                int c;

                if (df >= minDfFilterCache) {
                    // use the filter cache

                    if (deState == null) {
                        deState = new SolrIndexSearcher.DocsEnumState();
                        deState.fieldName = field;
                        deState.liveDocs = r.getLiveDocs();
                        deState.termsEnum = termsEnum;
                        deState.docsEnum = docsEnum;
                    }

                    c = searcher.numDocs(docs, deState);

                    docsEnum = deState.docsEnum;
                } else {
                    // iterate over TermDocs to calculate the intersection

                    // TODO: specialize when base docset is a bitset or hash set (skipDocs)?  or does it matter for this?
                    // TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class impl)
                    // TODO: would passing deleted docs lead to better efficiency over checking the fastForRandomSet?
                    docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
                    c = 0;

                    if (docsEnum instanceof MultiDocsEnum) {
                        MultiDocsEnum.EnumWithSlice[] subs = ((MultiDocsEnum) docsEnum).getSubs();
                        int numSubs = ((MultiDocsEnum) docsEnum).getNumSubs();
                        for (int subindex = 0; subindex < numSubs; subindex++) {
                            MultiDocsEnum.EnumWithSlice sub = subs[subindex];
                            if (sub.docsEnum == null)
                                continue;
                            int base = sub.slice.start;
                            int docid;
                            while ((docid = sub.docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                                if (fastForRandomSet.exists(docid + base))
                                    c++;
                            }
                        }
                    } else {
                        int docid;
                        while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            if (fastForRandomSet.exists(docid))
                                c++;
                        }
                    }

                }

                if (sortByCount) {
                    if (c > min) {
                        BytesRef termCopy = BytesRef.deepCopyOf(term);
                        queue.add(new CountPair<BytesRef, Integer>(termCopy, c));
                        if (queue.size() >= maxsize)
                            min = queue.last().val;
                    }
                } else {
                    if (c >= mincount && --off < 0) {
                        if (--lim < 0)
                            break;
                        ft.indexedToReadable(term, charsRef);
                        res.add(charsRef.toString(), c);
                    }
                }
            }

            term = termsEnum.next();
        }
    }

    if (sortByCount) {
        for (CountPair<BytesRef, Integer> p : queue) {
            if (--off >= 0)
                continue;
            if (--lim < 0)
                break;
            ft.indexedToReadable(p.key, charsRef);
            res.add(charsRef.toString(), p.val);
        }
    }

    if (missing) {
        res.add(null, getFieldMissingCount(searcher, docs, field));
    }

    return res;
}

From source file:org.apache.solr.search.DocSetUtil.java

License:Apache License

public static DocSet createDocSet(SolrIndexSearcher searcher, Term term) throws IOException {
    DirectoryReader reader = searcher.getRawReader(); // raw reader to avoid extra wrapping overhead
    int maxDoc = searcher.getIndexReader().maxDoc();
    int smallSetSize = smallSetSize(maxDoc);

    String field = term.field();/*ww w.j  ava 2  s .  c  om*/
    BytesRef termVal = term.bytes();

    int maxCount = 0;
    int firstReader = -1;
    List<LeafReaderContext> leaves = reader.leaves();
    PostingsEnum[] postList = new PostingsEnum[leaves.size()]; // use array for slightly higher scanning cost, but fewer memory allocations
    for (LeafReaderContext ctx : leaves) {
        assert leaves.get(ctx.ord) == ctx;
        LeafReader r = ctx.reader();
        Fields f = r.fields();
        Terms t = f.terms(field);
        if (t == null)
            continue; // field is missing
        TermsEnum te = t.iterator();
        if (te.seekExact(termVal)) {
            maxCount += te.docFreq();
            postList[ctx.ord] = te.postings(null, PostingsEnum.NONE);
            if (firstReader < 0)
                firstReader = ctx.ord;
        }
    }

    DocSet answer = null;
    if (maxCount == 0) {
        answer = DocSet.EMPTY;
    } else if (maxCount <= smallSetSize) {
        answer = createSmallSet(leaves, postList, maxCount, firstReader);
    } else {
        answer = createBigSet(leaves, postList, maxDoc, firstReader);
    }

    return DocSetUtil.getDocSet(answer, searcher);
}

From source file:org.apache.solr.search.facet.FacetFieldProcessorByEnumTermsStream.java

License:Apache License

private void setup() throws IOException {

    countOnly = freq.facetStats.size() == 0 || freq.facetStats.values().iterator().next() instanceof CountAgg;
    hasSubFacets = freq.subFacets.size() > 0;
    bucketsToSkip = freq.offset;/*from www .  j av  a 2  s  . c  om*/

    createAccs(-1, 1);

    // Minimum term docFreq in order to use the filterCache for that term.
    if (freq.cacheDf == -1) { // -1 means never cache
        minDfFilterCache = Integer.MAX_VALUE;
    } else if (freq.cacheDf == 0) { // default; compute as fraction of maxDoc
        minDfFilterCache = Math.max(fcontext.searcher.maxDoc() >> 4, 3); // (minimum of 3 is for test coverage purposes)
    } else {
        minDfFilterCache = freq.cacheDf;
    }

    docs = fcontext.base;
    fastForRandomSet = null;

    if (freq.prefix != null) {
        String indexedPrefix = sf.getType().toInternal(freq.prefix);
        startTermBytes = new BytesRef(indexedPrefix);
    } else if (sf.getType().getNumericType() != null) {
        String triePrefix = TrieField.getMainValuePrefix(sf.getType());
        if (triePrefix != null) {
            startTermBytes = new BytesRef(triePrefix);
        }
    }

    Fields fields = fcontext.searcher.getSlowAtomicReader().fields();
    Terms terms = fields == null ? null : fields.terms(sf.getName());

    termsEnum = null;
    deState = null;
    term = null;

    if (terms != null) {

        termsEnum = terms.iterator();

        // TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for
        // facet.offset when sorting by index order.

        if (startTermBytes != null) {
            if (termsEnum.seekCeil(startTermBytes) == TermsEnum.SeekStatus.END) {
                termsEnum = null;
            } else {
                term = termsEnum.term();
            }
        } else {
            // position termsEnum on first term
            term = termsEnum.next();
        }
    }

    List<LeafReaderContext> leafList = fcontext.searcher.getTopReaderContext().leaves();
    leaves = leafList.toArray(new LeafReaderContext[leafList.size()]);
}

From source file:org.apache.solr.search.FloatPayloadValueSource.java

License:Apache License

@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {

    Fields fields = readerContext.reader().fields();
    final Terms terms = fields.terms(indexedField);

    FunctionValues defaultValues = defaultValueSource.getValues(context, readerContext);

    // copied the bulk of this from TFValueSource - TODO: this is a very repeated pattern - base-class this advance logic stuff?
    return new FloatDocValues(this) {
        PostingsEnum docs;/*from  w  w  w. ja  v  a2 s.c o m*/
        int atDoc;
        int lastDocRequested = -1;

        {
            reset();
        }

        public void reset() throws IOException {
            // no one should call us for deleted docs?

            if (terms != null) {
                final TermsEnum termsEnum = terms.iterator();
                if (termsEnum.seekExact(indexedBytes)) {
                    docs = termsEnum.postings(null, PostingsEnum.ALL);
                } else {
                    docs = null;
                }
            } else {
                docs = null;
            }

            if (docs == null) {
                // dummy PostingsEnum so floatVal() can work
                // when would this be called?  if field/val did not match?  this is called for every doc?  create once and cache?
                docs = new PostingsEnum() {
                    @Override
                    public int freq() {
                        return 0;
                    }

                    @Override
                    public int nextPosition() throws IOException {
                        return -1;
                    }

                    @Override
                    public int startOffset() throws IOException {
                        return -1;
                    }

                    @Override
                    public int endOffset() throws IOException {
                        return -1;
                    }

                    @Override
                    public BytesRef getPayload() throws IOException {
                        return null;
                    }

                    @Override
                    public int docID() {
                        return DocIdSetIterator.NO_MORE_DOCS;
                    }

                    @Override
                    public int nextDoc() {
                        return DocIdSetIterator.NO_MORE_DOCS;
                    }

                    @Override
                    public int advance(int target) {
                        return DocIdSetIterator.NO_MORE_DOCS;
                    }

                    @Override
                    public long cost() {
                        return 0;
                    }
                };
            }
            atDoc = -1;
        }

        @Override
        public float floatVal(int doc) {
            try {
                if (doc < lastDocRequested) {
                    // out-of-order access.... reset
                    reset();
                }
                lastDocRequested = doc;

                if (atDoc < doc) {
                    atDoc = docs.advance(doc);
                }

                if (atDoc > doc) {
                    // term doesn't match this document... either because we hit the
                    // end, or because the next doc is after this doc.
                    return defaultValues.floatVal(doc);
                }

                // a match!
                int freq = docs.freq();
                int numPayloadsSeen = 0;
                float currentScore = 0;
                for (int i = 0; i < freq; i++) {
                    docs.nextPosition();
                    BytesRef payload = docs.getPayload();
                    if (payload != null) {
                        float payloadVal = decoder.decode(atDoc, docs.startOffset(), docs.endOffset(), payload);

                        // payloadFunction = null represents "first"
                        if (payloadFunction == null)
                            return payloadVal;

                        currentScore = payloadFunction.currentScore(doc, indexedField, docs.startOffset(),
                                docs.endOffset(), numPayloadsSeen, currentScore, payloadVal);
                        numPayloadsSeen++;

                    }
                }

                return (numPayloadsSeen > 0)
                        ? payloadFunction.docScore(doc, indexedField, numPayloadsSeen, currentScore)
                        : defaultValues.floatVal(doc);
            } catch (IOException e) {
                throw new RuntimeException("caught exception in function " + description() + " : doc=" + doc,
                        e);
            }
        }
    };
}

From source file:org.apache.solr.search.function.TermFreqValueSource.java

License:Apache License

@Override
public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
    Fields fields = readerContext.reader.fields();
    final Terms terms = fields.terms(field);

    return new IntDocValues(this) {
        DocsEnum docs;/*from w ww  .j  av a 2  s.  c om*/
        int atDoc;
        int lastDocRequested = -1;

        {
            reset();
        }

        public void reset() throws IOException {
            // no one should call us for deleted docs?
            docs = terms.docs(null, indexedBytes, null);
            if (docs == null) {
                docs = new DocsEnum() {
                    @Override
                    public int freq() {
                        return 0;
                    }

                    @Override
                    public int docID() {
                        return DocIdSetIterator.NO_MORE_DOCS;
                    }

                    @Override
                    public int nextDoc() throws IOException {
                        return DocIdSetIterator.NO_MORE_DOCS;
                    }

                    @Override
                    public int advance(int target) throws IOException {
                        return DocIdSetIterator.NO_MORE_DOCS;
                    }
                };
            }
            atDoc = -1;
        }

        @Override
        public int intVal(int doc) {
            try {
                if (doc < lastDocRequested) {
                    // out-of-order access.... reset
                    reset();
                }
                lastDocRequested = doc;

                if (atDoc < doc) {
                    atDoc = docs.advance(doc);
                }

                if (atDoc > doc) {
                    // term doesn't match this document... either because we hit the
                    // end, or because the next doc is after this doc.
                    return 0;
                }

                // a match!
                return docs.freq();
            } catch (IOException e) {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
                        "caught exception in function " + description() + " : doc=" + doc, e);
            }
        }
    };
}

From source file:org.apache.solr.search.SolrIndexSearcher.java

License:Apache License

/**
 * Returns the first document number containing the term <code>t</code>
 * Returns -1 if no document was found./*from   ww  w . ja  v  a 2 s.  c  o m*/
 * This method is primarily intended for clients that want to fetch
 * documents using a unique identifier."
 * @return the first document number containing the term
 */
public int getFirstMatch(Term t) throws IOException {
    Fields fields = atomicReader.fields();
    if (fields == null)
        return -1;
    Terms terms = fields.terms(t.field());
    if (terms == null)
        return -1;
    BytesRef termBytes = t.bytes();
    final TermsEnum termsEnum = terms.iterator(null);
    if (!termsEnum.seekExact(termBytes)) {
        return -1;
    }
    DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
    if (docs == null)
        return -1;
    int id = docs.nextDoc();
    return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}

From source file:org.apache.solr.search.TestRTGBase.java

License:Apache License

protected int getFirstMatch(IndexReader r, Term t) throws IOException {
    Fields fields = MultiFields.getFields(r);
    if (fields == null)
        return -1;
    Terms terms = fields.terms(t.field());
    if (terms == null)
        return -1;
    BytesRef termBytes = t.bytes();/* w  w  w .  j  av  a2 s  .  co m*/
    final TermsEnum termsEnum = terms.iterator(null);
    if (!termsEnum.seekExact(termBytes)) {
        return -1;
    }
    DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
    int id = docs.nextDoc();
    if (id != DocIdSetIterator.NO_MORE_DOCS) {
        int next = docs.nextDoc();
        assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
    }
    return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}

From source file:org.apache.solr.update.SolrIndexSplitter.java

License:Apache License

OpenBitSet[] split(AtomicReaderContext readerContext) throws IOException {
    AtomicReader reader = readerContext.reader();
    OpenBitSet[] docSets = new OpenBitSet[numPieces];
    for (int i = 0; i < docSets.length; i++) {
        docSets[i] = new OpenBitSet(reader.maxDoc());
    }//ww w. j av  a  2  s .com
    Bits liveDocs = reader.getLiveDocs();

    Fields fields = reader.fields();
    Terms terms = fields == null ? null : fields.terms(field.getName());
    TermsEnum termsEnum = terms == null ? null : terms.iterator(null);
    if (termsEnum == null)
        return docSets;

    BytesRef term = null;
    DocsEnum docsEnum = null;

    CharsRef idRef = new CharsRef(100);
    for (;;) {
        term = termsEnum.next();
        if (term == null)
            break;

        // figure out the hash for the term

        // FUTURE: if conversion to strings costs too much, we could
        // specialize and use the hash function that can work over bytes.
        idRef = field.getType().indexedToReadable(term, idRef);
        String idString = idRef.toString();

        if (splitKey != null) {
            // todo have composite routers support these kind of things instead
            String part1 = getRouteKey(idString);
            if (part1 == null)
                continue;
            if (!splitKey.equals(part1)) {
                continue;
            }
        }

        int hash = 0;
        if (hashRouter != null) {
            hash = hashRouter.sliceHash(idString, null, null, null);
        }

        docsEnum = termsEnum.docs(liveDocs, docsEnum, DocsEnum.FLAG_NONE);
        for (;;) {
            int doc = docsEnum.nextDoc();
            if (doc == DocsEnum.NO_MORE_DOCS)
                break;
            if (ranges == null) {
                docSets[currPartition].fastSet(doc);
                currPartition = (currPartition + 1) % numPieces;
            } else {
                for (int i = 0; i < rangesArr.length; i++) { // inner-loop: use array here for extra speed.
                    if (rangesArr[i].includes(hash)) {
                        docSets[i].fastSet(doc);
                    }
                }
            }
        }
    }

    return docSets;
}