Example usage for org.apache.lucene.util CharsRef toString

List of usage examples for org.apache.lucene.util CharsRef toString

Introduction

In this page you can find the example usage for org.apache.lucene.util CharsRef toString.

Prototype

@Override
    public String toString() 

Source Link

Usage

From source file:com.o19s.solr.swan.highlight.SpanAwareFieldTermStack.java

License:Apache License

/**
 * a constructor./*from  w w  w  .  j  av a2s.  co m*/
 * 
 * @param reader IndexReader of the index
 * @param docId document id to be highlighted
 * @param fieldName field of the document to be highlighted
 * @param fieldQuery FieldQuery object
 * @throws IOException If there is a low-level I/O error
 */
public SpanAwareFieldTermStack(IndexReader reader, int docId, String fieldName,
        final SpanAwareFieldQuery fieldQuery) throws IOException {
    this.fieldName = fieldName;

    Set<String> termSet = fieldQuery.getTermSet(fieldName);
    Set<String> alwaysHighlightTermSet = fieldQuery.getHighlightTermSet(fieldName);

    // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
    if (termSet == null)
        return;

    final Fields vectors = reader.getTermVectors(docId);
    if (vectors == null) {
        // null snippet
        return;
    }

    final Terms vector = vectors.terms(fieldName);
    if (vector == null) {
        // null snippet
        return;
    }

    final CharsRef spare = new CharsRef();
    final TermsEnum termsEnum = vector.iterator(null);
    DocsAndPositionsEnum dpEnum = null;
    BytesRef text;

    int numDocs = reader.maxDoc();
    while ((text = termsEnum.next()) != null) {
        UnicodeUtil.UTF8toUTF16(text, spare);
        final String term = spare.toString();
        if (!termSet.contains(term)) {
            continue;
        }
        dpEnum = termsEnum.docsAndPositions(null, dpEnum);
        if (dpEnum == null) {
            // null snippet
            return;
        }

        dpEnum.nextDoc();

        // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html
        final float weight = (float) (Math
                .log(numDocs / (double) (reader.docFreq(new Term(fieldName, text)) + 1)) + 1.0);

        final int freq = dpEnum.freq();

        for (int i = 0; i < freq; i++) {
            int pos = dpEnum.nextPosition();
            if (dpEnum.startOffset() < 0) {
                return; // no offsets, null snippet
            }

            if (alwaysHighlightTermSet.contains(term)
                    || fieldQuery.doesDocFieldContainPosition(fieldName, docId, dpEnum.startOffset())) {
                termList.add(new TermInfo(term, dpEnum.startOffset(), dpEnum.endOffset(), pos, weight));
            }
        }

    }

    // sort by position
    Collections.sort(termList);
}

From source file:edu.upenn.library.solrplugins.FilingPrefixIgnorer.java

License:Apache License

@Override
public CharsRef transform(CharsRef input) {
    return new CharsRef(transform(input.toString()));
}

From source file:in.geocoder.component.GeocodingComponent.java

License:Apache License

private NamedList<Integer> getTerms(SolrIndexSearcher searcher, IndexSchema schema, String field)
        throws IOException {
    NamedList<Object> termsResult = new SimpleOrderedMap<Object>();

    boolean sort = true;

    boolean raw = false;

    final AtomicReader indexReader = searcher.getAtomicReader();
    Fields lfields = indexReader.fields();

    NamedList<Integer> fieldTerms = new NamedList<Integer>();
    termsResult.add(field, fieldTerms);/* www. java  2  s .c om*/

    Terms terms = lfields == null ? null : lfields.terms(field);
    if (terms == null) {
        // no terms for this field
        return new NamedList<Integer>();
    }

    FieldType ft = raw ? null : schema.getFieldTypeNoEx(field);
    if (ft == null)
        ft = new StrField();

    TermsEnum termsEnum = terms.iterator(null);
    BytesRef term = null;

    term = termsEnum.next();

    BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort
            ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(Integer.MAX_VALUE)
            : null);
    CharsRef external = new CharsRef();
    while (term != null) {
        boolean externalized = false; // did we fill in "external" yet for this term?

        // This is a good term in the range.  Check if mincount/maxcount conditions are satisfied.
        int docFreq = termsEnum.docFreq();
        // add the term to the list
        if (sort) {
            queue.add(new CountPair<BytesRef, Integer>(BytesRef.deepCopyOf(term), docFreq));
        } else {
            // TODO: handle raw somehow
            if (!externalized) {
                ft.indexedToReadable(term, external);
            }
            fieldTerms.add(external.toString(), docFreq);
        }

        term = termsEnum.next();
    }

    if (sort) {
        for (CountPair<BytesRef, Integer> item : queue) {
            ft.indexedToReadable(item.key, external);
            fieldTerms.add(external.toString(), item.val);
        }
    }

    return fieldTerms;
}

From source file:org.apache.solr.handler.admin.LukeRequestHandler.java

License:Apache License

private static SimpleOrderedMap<Object> getDocumentFieldsInfo(Document doc, int docId, IndexReader reader,
        IndexSchema schema) throws IOException {
    final CharsRef spare = new CharsRef();
    SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
    for (Object o : doc.getFields()) {
        Field field = (Field) o;
        SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>();

        SchemaField sfield = schema.getFieldOrNull(field.name());
        FieldType ftype = (sfield == null) ? null : sfield.getType();

        f.add("type", (ftype == null) ? null : ftype.getTypeName());
        f.add("schema", getFieldFlags(sfield));
        f.add("flags", getFieldFlags(field));

        Term t = new Term(field.name(), ftype != null ? ftype.storedToIndexed(field) : field.stringValue());

        f.add("value", (ftype == null) ? null : ftype.toExternal(field));

        // TODO: this really should be "stored"
        f.add("internal", field.stringValue()); // may be a binary number

        BytesRef bytes = field.binaryValue();
        if (bytes != null) {
            f.add("binary", Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.length));
        }/*  w w  w  . j  a va2 s.co  m*/
        f.add("boost", field.boost());
        f.add("docFreq", t.text() == null ? 0 : reader.docFreq(t)); // this can be 0 for non-indexed fields

        // If we have a term vector, return that
        if (field.fieldType().storeTermVectors()) {
            try {
                Terms v = reader.getTermVector(docId, field.name());
                if (v != null) {
                    SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>();
                    final TermsEnum termsEnum = v.iterator(null);
                    BytesRef text;
                    while ((text = termsEnum.next()) != null) {
                        final int freq = (int) termsEnum.totalTermFreq();
                        UnicodeUtil.UTF8toUTF16(text, spare);
                        tfv.add(spare.toString(), freq);
                    }
                    f.add("termVector", tfv);
                }
            } catch (Exception ex) {
                log.warn("error writing term vector", ex);
            }
        }

        finfo.add(field.name(), f);
    }
    return finfo;
}

From source file:org.apache.solr.handler.admin.LukeRequestHandler.java

License:Apache License

@SuppressWarnings("unchecked")
private static void getDetailedFieldInfo(SolrQueryRequest req, String field, SimpleOrderedMap<Object> fieldMap)
        throws IOException {

    SolrParams params = req.getParams();
    final int numTerms = params.getInt(NUMTERMS, DEFAULT_COUNT);

    TopTermQueue tiq = new TopTermQueue(numTerms + 1); // Something to collect the top N terms in.

    final CharsRef spare = new CharsRef();

    Fields fields = MultiFields.getFields(req.getSearcher().getIndexReader());

    if (fields == null) { // No indexed fields
        return;//from   w  ww . j  a  va 2 s  .com
    }

    Terms terms = fields.terms(field);
    if (terms == null) { // No terms in the field.
        return;
    }
    TermsEnum termsEnum = terms.iterator(null);
    BytesRef text;
    int[] buckets = new int[HIST_ARRAY_SIZE];
    while ((text = termsEnum.next()) != null) {
        ++tiq.distinctTerms;
        int freq = termsEnum.docFreq(); // This calculation seems odd, but it gives the same results as it used to.
        int slot = 32 - Integer.numberOfLeadingZeros(Math.max(0, freq - 1));
        buckets[slot] = buckets[slot] + 1;
        if (numTerms > 0 && freq > tiq.minFreq) {
            UnicodeUtil.UTF8toUTF16(text, spare);
            String t = spare.toString();

            tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq()));
            if (tiq.size() > numTerms) { // if tiq full
                tiq.pop(); // remove lowest in tiq
                tiq.minFreq = tiq.getTopTermInfo().docFreq;
            }
        }
    }
    tiq.histogram.add(buckets);
    fieldMap.add("distinct", tiq.distinctTerms);

    // Include top terms
    fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema()));

    // Add a histogram
    fieldMap.add("histogram", tiq.histogram.toNamedList());
}

From source file:org.apache.solr.handler.component.ExpandAllComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {

    if (!doExpandAll(rb)) {
        return;//from   www .  j a  v a  2  s  .  c o  m
    }

    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();

    boolean isShard = params.getBool(ShardParams.IS_SHARD, false);
    String ids = params.get(ShardParams.IDS);

    if (ids == null && isShard) {
        return;
    }

    String field = params.get(ExpandParams.EXPAND_FIELD);
    if (field == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
                    CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
                    field = cp.getField();
                }
            }
        }
    }

    if (field == null) {
        throw new IOException("Expand field is null.");
    }

    String sortParam = params.get(ExpandParams.EXPAND_SORT);
    String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
    String qs = params.get(ExpandParams.EXPAND_Q);
    int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);

    Sort sort = null;

    if (sortParam != null) {
        sort = QueryParsing.parseSortSpec(sortParam, rb.req).getSort();
    }

    Query query = null;
    if (qs == null) {
        query = rb.getQuery();
    } else {
        try {
            QParser parser = QParser.getParser(qs, null, req);
            query = parser.getQuery();
        } catch (Exception e) {
            throw new IOException(e);
        }
    }

    List<Query> newFilters = new ArrayList();

    if (fqs == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
                    newFilters.add(q);
                }
            }
        }
    } else {
        try {
            for (String fq : fqs) {
                if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
                    QParser fqp = QParser.getParser(fq, null, req);
                    newFilters.add(fqp.getQuery());
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
    }

    SolrIndexSearcher searcher = req.getSearcher();
    AtomicReader reader = searcher.getAtomicReader();
    SortedDocValues values = FieldCache.DEFAULT.getTermsIndex(reader, field);
    FixedBitSet groupBits = new FixedBitSet(values.getValueCount());
    DocList docList = rb.getResults().docList;
    IntOpenHashSet collapsedSet = new IntOpenHashSet(docList.size() * 2);

    DocIterator idit = docList.iterator();

    while (idit.hasNext()) {
        int doc = idit.nextDoc();
        int ord = values.getOrd(doc);
        if (ord > -1) {
            groupBits.set(ord);
            collapsedSet.add(doc);
        }
    }

    Collector collector = null;
    GroupExpandCollector groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit,
            sort);
    SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
    if (pfilter.postFilter != null) {
        pfilter.postFilter.setLastDelegate(groupExpandCollector);
        collector = pfilter.postFilter;
    } else {
        collector = groupExpandCollector;
    }

    searcher.search(query, pfilter.filter, collector);
    IntObjectOpenHashMap groups = groupExpandCollector.getGroups();
    Iterator<IntObjectCursor> it = groups.iterator();
    Map<String, DocSlice> outMap = new HashMap();
    BytesRef bytesRef = new BytesRef();
    CharsRef charsRef = new CharsRef();
    FieldType fieldType = searcher.getSchema().getField(field).getType();

    while (it.hasNext()) {
        IntObjectCursor cursor = it.next();
        int ord = cursor.key;
        TopDocsCollector topDocsCollector = (TopDocsCollector) cursor.value;
        TopDocs topDocs = topDocsCollector.topDocs();
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if (scoreDocs.length > 0) {
            int[] docs = new int[scoreDocs.length];
            float[] scores = new float[scoreDocs.length];
            for (int i = 0; i < docs.length; i++) {
                ScoreDoc scoreDoc = scoreDocs[i];
                docs[i] = scoreDoc.doc;
                scores[i] = scoreDoc.score;
            }
            DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits,
                    topDocs.getMaxScore());
            values.lookupOrd(ord, bytesRef);
            fieldType.indexedToReadable(bytesRef, charsRef);
            String group = charsRef.toString();
            outMap.put(group, slice);
        }
    }

    rb.rsp.add("expanded", outMap);
}

From source file:org.apache.solr.handler.component.ExpandComponent.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*w ww.j  ava2 s .c  om*/
public void process(ResponseBuilder rb) throws IOException {

    if (!rb.doExpand) {
        return;
    }

    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();

    boolean isShard = params.getBool(ShardParams.IS_SHARD, false);
    String ids = params.get(ShardParams.IDS);

    if (ids == null && isShard) {
        return;
    }

    String field = params.get(ExpandParams.EXPAND_FIELD);
    if (field == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
                    CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
                    field = cp.getField();
                }
            }
        }
    }

    if (field == null) {
        throw new IOException("Expand field is null.");
    }

    String sortParam = params.get(ExpandParams.EXPAND_SORT);
    String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
    String qs = params.get(ExpandParams.EXPAND_Q);
    int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);

    Sort sort = null;

    if (sortParam != null) {
        sort = QueryParsing.parseSortSpec(sortParam, rb.req).getSort();
    }

    Query query;
    if (qs == null) {
        query = rb.getQuery();
    } else {
        try {
            QParser parser = QParser.getParser(qs, null, req);
            query = parser.getQuery();
        } catch (Exception e) {
            throw new IOException(e);
        }
    }

    List<Query> newFilters = new ArrayList<>();

    if (fqs == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
                    newFilters.add(q);
                }
            }
        }
    } else {
        try {
            for (String fq : fqs) {
                if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
                    QParser fqp = QParser.getParser(fq, null, req);
                    newFilters.add(fqp.getQuery());
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
    }

    SolrIndexSearcher searcher = req.getSearcher();
    AtomicReader reader = searcher.getAtomicReader();
    SortedDocValues values = FieldCache.DEFAULT.getTermsIndex(reader, field);
    FixedBitSet groupBits = new FixedBitSet(values.getValueCount());
    DocList docList = rb.getResults().docList;
    IntOpenHashSet collapsedSet = new IntOpenHashSet(docList.size() * 2);

    DocIterator idit = docList.iterator();

    while (idit.hasNext()) {
        int doc = idit.nextDoc();
        int ord = values.getOrd(doc);
        if (ord > -1) {
            groupBits.set(ord);
            collapsedSet.add(doc);
        }
    }

    Collector collector;
    if (sort != null)
        sort = sort.rewrite(searcher);
    GroupExpandCollector groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit,
            sort);
    SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
    if (pfilter.postFilter != null) {
        pfilter.postFilter.setLastDelegate(groupExpandCollector);
        collector = pfilter.postFilter;
    } else {
        collector = groupExpandCollector;
    }

    searcher.search(query, pfilter.filter, collector);
    IntObjectMap groups = groupExpandCollector.getGroups();
    Map<String, DocSlice> outMap = new HashMap();
    CharsRef charsRef = new CharsRef();
    FieldType fieldType = searcher.getSchema().getField(field).getType();
    for (IntObjectCursor cursor : (Iterable<IntObjectCursor>) groups) {
        int ord = cursor.key;
        TopDocsCollector topDocsCollector = (TopDocsCollector) cursor.value;
        TopDocs topDocs = topDocsCollector.topDocs();
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if (scoreDocs.length > 0) {
            int[] docs = new int[scoreDocs.length];
            float[] scores = new float[scoreDocs.length];
            for (int i = 0; i < docs.length; i++) {
                ScoreDoc scoreDoc = scoreDocs[i];
                docs[i] = scoreDoc.doc;
                scores[i] = scoreDoc.score;
            }
            DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits,
                    topDocs.getMaxScore());
            final BytesRef bytesRef = values.lookupOrd(ord);
            fieldType.indexedToReadable(bytesRef, charsRef);
            String group = charsRef.toString();
            outMap.put(group, slice);
        }
    }

    rb.rsp.add("expanded", outMap);
}

From source file:org.apache.solr.handler.component.HelloHandlerComponent.java

License:Apache License

protected void doFieldSortValues(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException {
    SolrQueryRequest req = rb.req;/*from  www. ja  v a  2  s  .  com*/
    SolrQueryResponse rsp = rb.rsp;
    final CharsRef spare = new CharsRef();
    // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't
    // currently have an option to return sort field values.  Because of this, we
    // take the documents given and re-derive the sort values.
    boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES, false);
    if (fsv) {
        Sort sort = searcher.weightSort(rb.getSortSpec().getSort());
        SortField[] sortFields = sort == null ? new SortField[] { SortField.FIELD_SCORE } : sort.getSort();
        NamedList<Object[]> sortVals = new NamedList<Object[]>(); // order is important for the sort fields
        Field field = new StringField("dummy", "", Field.Store.NO); // a dummy Field
        IndexReaderContext topReaderContext = searcher.getTopReaderContext();
        List<AtomicReaderContext> leaves = topReaderContext.leaves();
        AtomicReaderContext currentLeaf = null;
        if (leaves.size() == 1) {
            // if there is a single segment, use that subReader and avoid looking up each time
            currentLeaf = leaves.get(0);
            leaves = null;
        }

        DocList docList = rb.getResults().docList;

        // sort ids from lowest to highest so we can access them in order
        int nDocs = docList.size();
        long[] sortedIds = new long[nDocs];
        DocIterator it = rb.getResults().docList.iterator();
        for (int i = 0; i < nDocs; i++) {
            sortedIds[i] = (((long) it.nextDoc()) << 32) | i;
        }
        Arrays.sort(sortedIds);

        for (SortField sortField : sortFields) {
            SortField.Type type = sortField.getType();
            if (type == SortField.Type.SCORE || type == SortField.Type.DOC)
                continue;

            FieldComparator comparator = null;

            String fieldname = sortField.getField();
            FieldType ft = fieldname == null ? null : req.getSchema().getFieldTypeNoEx(fieldname);

            Object[] vals = new Object[nDocs];

            int lastIdx = -1;
            int idx = 0;

            for (long idAndPos : sortedIds) {
                int doc = (int) (idAndPos >>> 32);
                int position = (int) idAndPos;

                if (leaves != null) {
                    idx = ReaderUtil.subIndex(doc, leaves);
                    currentLeaf = leaves.get(idx);
                    if (idx != lastIdx) {
                        // we switched segments.  invalidate comparator.
                        comparator = null;
                    }
                }

                if (comparator == null) {
                    comparator = sortField.getComparator(1, 0);
                    comparator = comparator.setNextReader(currentLeaf);
                }

                doc -= currentLeaf.docBase; // adjust for what segment this is in
                comparator.copy(0, doc);
                Object val = comparator.value(0);

                // Sortable float, double, int, long types all just use a string
                // comparator. For these, we need to put the type into a readable
                // format.  One reason for this is that XML can't represent all
                // string values (or even all unicode code points).
                // indexedToReadable() should be a no-op and should
                // thus be harmless anyway (for all current ways anyway)
                if (val instanceof String) {
                    field.setStringValue((String) val);
                    val = ft.toObject(field);
                }

                // Must do the same conversion when sorting by a
                // String field in Lucene, which returns the terms
                // data as BytesRef:
                if (val instanceof BytesRef) {
                    UnicodeUtil.UTF8toUTF16((BytesRef) val, spare);
                    field.setStringValue(spare.toString());
                    val = ft.toObject(field);
                }

                vals[position] = val;
            }

            sortVals.add(fieldname, vals);
        }

        rsp.add("sort_values", sortVals);
    }
}

From source file:org.apache.solr.handler.component.QueryComponent.java

License:Apache License

protected void doFieldSortValues(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException {
    SolrQueryRequest req = rb.req;//from   www .j  ava  2  s.com
    SolrQueryResponse rsp = rb.rsp;
    final CharsRef spare = new CharsRef();
    // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't
    // currently have an option to return sort field values.  Because of this, we
    // take the documents given and re-derive the sort values.
    boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES, false);
    if (fsv) {
        Sort sort = searcher.weightSort(rb.getSortSpec().getSort());
        SortField[] sortFields = sort == null ? new SortField[] { SortField.FIELD_SCORE } : sort.getSort();
        NamedList<Object[]> sortVals = new NamedList<Object[]>(); // order is important for the sort fields
        Field field = new StringField("dummy", "", Field.Store.NO); // a dummy Field
        IndexReaderContext topReaderContext = searcher.getTopReaderContext();
        List<AtomicReaderContext> leaves = topReaderContext.leaves();
        AtomicReaderContext currentLeaf = null;
        if (leaves.size() == 1) {
            // if there is a single segment, use that subReader and avoid looking up each time
            currentLeaf = leaves.get(0);
            leaves = null;
        }

        DocList docList = rb.getResults().docList;

        // sort ids from lowest to highest so we can access them in order
        int nDocs = docList.size();
        long[] sortedIds = new long[nDocs];
        DocIterator it = rb.getResults().docList.iterator();
        for (int i = 0; i < nDocs; i++) {
            sortedIds[i] = (((long) it.nextDoc()) << 32) | i;
        }
        Arrays.sort(sortedIds);

        for (SortField sortField : sortFields) {
            SortField.Type type = sortField.getType();
            if (type == SortField.Type.SCORE || type == SortField.Type.DOC)
                continue;

            FieldComparator comparator = null;

            String fieldname = sortField.getField();
            FieldType ft = fieldname == null ? null : searcher.getSchema().getFieldTypeNoEx(fieldname);

            Object[] vals = new Object[nDocs];

            int lastIdx = -1;
            int idx = 0;

            for (long idAndPos : sortedIds) {
                int doc = (int) (idAndPos >>> 32);
                int position = (int) idAndPos;

                if (leaves != null) {
                    idx = ReaderUtil.subIndex(doc, leaves);
                    currentLeaf = leaves.get(idx);
                    if (idx != lastIdx) {
                        // we switched segments.  invalidate comparator.
                        comparator = null;
                    }
                }

                if (comparator == null) {
                    comparator = sortField.getComparator(1, 0);
                    comparator = comparator.setNextReader(currentLeaf);
                }

                doc -= currentLeaf.docBase; // adjust for what segment this is in
                comparator.copy(0, doc);
                Object val = comparator.value(0);

                // Sortable float, double, int, long types all just use a string
                // comparator. For these, we need to put the type into a readable
                // format.  One reason for this is that XML can't represent all
                // string values (or even all unicode code points).
                // indexedToReadable() should be a no-op and should
                // thus be harmless anyway (for all current ways anyway)
                if (val instanceof String) {
                    field.setStringValue((String) val);
                    val = ft.toObject(field);
                }

                // Must do the same conversion when sorting by a
                // String field in Lucene, which returns the terms
                // data as BytesRef:
                if (val instanceof BytesRef) {
                    UnicodeUtil.UTF8toUTF16((BytesRef) val, spare);
                    field.setStringValue(spare.toString());
                    val = ft.toObject(field);
                }

                vals[position] = val;
            }

            sortVals.add(fieldname, vals);
        }

        rsp.add("sort_values", sortVals);
    }
}

From source file:org.apache.solr.handler.component.TermsComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(TermsParams.TERMS, false))
        return;/*from  ww w .jav a2  s. c  o m*/

    String[] fields = params.getParams(TermsParams.TERMS_FIELD);

    NamedList<Object> termsResult = new SimpleOrderedMap<Object>();
    rb.rsp.add("terms", termsResult);

    if (fields == null || fields.length == 0)
        return;

    int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
    if (limit < 0) {
        limit = Integer.MAX_VALUE;
    }

    String lowerStr = params.get(TermsParams.TERMS_LOWER);
    String upperStr = params.get(TermsParams.TERMS_UPPER);
    boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
    boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
    boolean sort = !TermsParams.TERMS_SORT_INDEX
            .equals(params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
    int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1);
    int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT);
    if (freqmax < 0) {
        freqmax = Integer.MAX_VALUE;
    }
    String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
    String regexp = params.get(TermsParams.TERMS_REGEXP_STR);
    Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null;

    boolean raw = params.getBool(TermsParams.TERMS_RAW, false);

    final AtomicReader indexReader = rb.req.getSearcher().getAtomicReader();
    Fields lfields = indexReader.fields();

    for (String field : fields) {
        NamedList<Integer> fieldTerms = new NamedList<Integer>();
        termsResult.add(field, fieldTerms);

        Terms terms = lfields == null ? null : lfields.terms(field);
        if (terms == null) {
            // no terms for this field
            continue;
        }

        FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
        if (ft == null)
            ft = new StrField();

        // prefix must currently be text
        BytesRef prefixBytes = prefix == null ? null : new BytesRef(prefix);

        BytesRef upperBytes = null;
        if (upperStr != null) {
            upperBytes = new BytesRef();
            ft.readableToIndexed(upperStr, upperBytes);
        }

        BytesRef lowerBytes;
        if (lowerStr == null) {
            // If no lower bound was specified, use the prefix
            lowerBytes = prefixBytes;
        } else {
            lowerBytes = new BytesRef();
            if (raw) {
                // TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists
                // perhaps we detect if the FieldType is non-character and expect hex if so?
                lowerBytes = new BytesRef(lowerStr);
            } else {
                lowerBytes = new BytesRef();
                ft.readableToIndexed(lowerStr, lowerBytes);
            }
        }

        TermsEnum termsEnum = terms.iterator(null);
        BytesRef term = null;

        if (lowerBytes != null) {
            if (termsEnum.seekCeil(lowerBytes) == TermsEnum.SeekStatus.END) {
                termsEnum = null;
            } else {
                term = termsEnum.term();
                //Only advance the enum if we are excluding the lower bound and the lower Term actually matches
                if (lowerIncl == false && term.equals(lowerBytes)) {
                    term = termsEnum.next();
                }
            }
        } else {
            // position termsEnum on first term
            term = termsEnum.next();
        }

        int i = 0;
        BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort
                ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit)
                : null);
        CharsRef external = new CharsRef();
        while (term != null && (i < limit || sort)) {
            boolean externalized = false; // did we fill in "external" yet for this term?

            // stop if the prefix doesn't match
            if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes))
                break;

            if (pattern != null) {
                // indexed text or external text?
                // TODO: support "raw" mode?
                ft.indexedToReadable(term, external);
                externalized = true;
                if (!pattern.matcher(external).matches()) {
                    term = termsEnum.next();
                    continue;
                }
            }

            if (upperBytes != null) {
                int upperCmp = term.compareTo(upperBytes);
                // if we are past the upper term, or equal to it (when don't include upper) then stop.
                if (upperCmp > 0 || (upperCmp == 0 && !upperIncl))
                    break;
            }

            // This is a good term in the range.  Check if mincount/maxcount conditions are satisfied.
            int docFreq = termsEnum.docFreq();
            if (docFreq >= freqmin && docFreq <= freqmax) {
                // add the term to the list
                if (sort) {
                    queue.add(new CountPair<BytesRef, Integer>(BytesRef.deepCopyOf(term), docFreq));
                } else {

                    // TODO: handle raw somehow
                    if (!externalized) {
                        ft.indexedToReadable(term, external);
                    }
                    fieldTerms.add(external.toString(), docFreq);
                    i++;
                }
            }

            term = termsEnum.next();
        }

        if (sort) {
            for (CountPair<BytesRef, Integer> item : queue) {
                if (i >= limit)
                    break;
                ft.indexedToReadable(item.key, external);
                fieldTerms.add(external.toString(), item.val);
                i++;
            }
        }
    }
}