List of usage examples for org.apache.lucene.util CharsRef toString
@Override
public String toString()
From source file:com.o19s.solr.swan.highlight.SpanAwareFieldTermStack.java
License:Apache License
/** * a constructor./*from w w w . j av a2s. co m*/ * * @param reader IndexReader of the index * @param docId document id to be highlighted * @param fieldName field of the document to be highlighted * @param fieldQuery FieldQuery object * @throws IOException If there is a low-level I/O error */ public SpanAwareFieldTermStack(IndexReader reader, int docId, String fieldName, final SpanAwareFieldQuery fieldQuery) throws IOException { this.fieldName = fieldName; Set<String> termSet = fieldQuery.getTermSet(fieldName); Set<String> alwaysHighlightTermSet = fieldQuery.getHighlightTermSet(fieldName); // just return to make null snippet if un-matched fieldName specified when fieldMatch == true if (termSet == null) return; final Fields vectors = reader.getTermVectors(docId); if (vectors == null) { // null snippet return; } final Terms vector = vectors.terms(fieldName); if (vector == null) { // null snippet return; } final CharsRef spare = new CharsRef(); final TermsEnum termsEnum = vector.iterator(null); DocsAndPositionsEnum dpEnum = null; BytesRef text; int numDocs = reader.maxDoc(); while ((text = termsEnum.next()) != null) { UnicodeUtil.UTF8toUTF16(text, spare); final String term = spare.toString(); if (!termSet.contains(term)) { continue; } dpEnum = termsEnum.docsAndPositions(null, dpEnum); if (dpEnum == null) { // null snippet return; } dpEnum.nextDoc(); // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html final float weight = (float) (Math .log(numDocs / (double) (reader.docFreq(new Term(fieldName, text)) + 1)) + 1.0); final int freq = dpEnum.freq(); for (int i = 0; i < freq; i++) { int pos = dpEnum.nextPosition(); if (dpEnum.startOffset() < 0) { return; // no offsets, null snippet } if (alwaysHighlightTermSet.contains(term) || fieldQuery.doesDocFieldContainPosition(fieldName, docId, dpEnum.startOffset())) { termList.add(new TermInfo(term, dpEnum.startOffset(), dpEnum.endOffset(), pos, weight)); } } } // sort by position Collections.sort(termList); }
From source file:edu.upenn.library.solrplugins.FilingPrefixIgnorer.java
License:Apache License
@Override public CharsRef transform(CharsRef input) { return new CharsRef(transform(input.toString())); }
From source file:in.geocoder.component.GeocodingComponent.java
License:Apache License
private NamedList<Integer> getTerms(SolrIndexSearcher searcher, IndexSchema schema, String field) throws IOException { NamedList<Object> termsResult = new SimpleOrderedMap<Object>(); boolean sort = true; boolean raw = false; final AtomicReader indexReader = searcher.getAtomicReader(); Fields lfields = indexReader.fields(); NamedList<Integer> fieldTerms = new NamedList<Integer>(); termsResult.add(field, fieldTerms);/* www. java 2 s .c om*/ Terms terms = lfields == null ? null : lfields.terms(field); if (terms == null) { // no terms for this field return new NamedList<Integer>(); } FieldType ft = raw ? null : schema.getFieldTypeNoEx(field); if (ft == null) ft = new StrField(); TermsEnum termsEnum = terms.iterator(null); BytesRef term = null; term = termsEnum.next(); BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(Integer.MAX_VALUE) : null); CharsRef external = new CharsRef(); while (term != null) { boolean externalized = false; // did we fill in "external" yet for this term? // This is a good term in the range. Check if mincount/maxcount conditions are satisfied. int docFreq = termsEnum.docFreq(); // add the term to the list if (sort) { queue.add(new CountPair<BytesRef, Integer>(BytesRef.deepCopyOf(term), docFreq)); } else { // TODO: handle raw somehow if (!externalized) { ft.indexedToReadable(term, external); } fieldTerms.add(external.toString(), docFreq); } term = termsEnum.next(); } if (sort) { for (CountPair<BytesRef, Integer> item : queue) { ft.indexedToReadable(item.key, external); fieldTerms.add(external.toString(), item.val); } } return fieldTerms; }
From source file:org.apache.solr.handler.admin.LukeRequestHandler.java
License:Apache License
private static SimpleOrderedMap<Object> getDocumentFieldsInfo(Document doc, int docId, IndexReader reader, IndexSchema schema) throws IOException { final CharsRef spare = new CharsRef(); SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>(); for (Object o : doc.getFields()) { Field field = (Field) o; SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>(); SchemaField sfield = schema.getFieldOrNull(field.name()); FieldType ftype = (sfield == null) ? null : sfield.getType(); f.add("type", (ftype == null) ? null : ftype.getTypeName()); f.add("schema", getFieldFlags(sfield)); f.add("flags", getFieldFlags(field)); Term t = new Term(field.name(), ftype != null ? ftype.storedToIndexed(field) : field.stringValue()); f.add("value", (ftype == null) ? null : ftype.toExternal(field)); // TODO: this really should be "stored" f.add("internal", field.stringValue()); // may be a binary number BytesRef bytes = field.binaryValue(); if (bytes != null) { f.add("binary", Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.length)); }/* w w w . j a va2 s.co m*/ f.add("boost", field.boost()); f.add("docFreq", t.text() == null ? 0 : reader.docFreq(t)); // this can be 0 for non-indexed fields // If we have a term vector, return that if (field.fieldType().storeTermVectors()) { try { Terms v = reader.getTermVector(docId, field.name()); if (v != null) { SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>(); final TermsEnum termsEnum = v.iterator(null); BytesRef text; while ((text = termsEnum.next()) != null) { final int freq = (int) termsEnum.totalTermFreq(); UnicodeUtil.UTF8toUTF16(text, spare); tfv.add(spare.toString(), freq); } f.add("termVector", tfv); } } catch (Exception ex) { log.warn("error writing term vector", ex); } } finfo.add(field.name(), f); } return finfo; }
From source file:org.apache.solr.handler.admin.LukeRequestHandler.java
License:Apache License
@SuppressWarnings("unchecked") private static void getDetailedFieldInfo(SolrQueryRequest req, String field, SimpleOrderedMap<Object> fieldMap) throws IOException { SolrParams params = req.getParams(); final int numTerms = params.getInt(NUMTERMS, DEFAULT_COUNT); TopTermQueue tiq = new TopTermQueue(numTerms + 1); // Something to collect the top N terms in. final CharsRef spare = new CharsRef(); Fields fields = MultiFields.getFields(req.getSearcher().getIndexReader()); if (fields == null) { // No indexed fields return;//from w ww . j a va 2 s .com } Terms terms = fields.terms(field); if (terms == null) { // No terms in the field. return; } TermsEnum termsEnum = terms.iterator(null); BytesRef text; int[] buckets = new int[HIST_ARRAY_SIZE]; while ((text = termsEnum.next()) != null) { ++tiq.distinctTerms; int freq = termsEnum.docFreq(); // This calculation seems odd, but it gives the same results as it used to. int slot = 32 - Integer.numberOfLeadingZeros(Math.max(0, freq - 1)); buckets[slot] = buckets[slot] + 1; if (numTerms > 0 && freq > tiq.minFreq) { UnicodeUtil.UTF8toUTF16(text, spare); String t = spare.toString(); tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq())); if (tiq.size() > numTerms) { // if tiq full tiq.pop(); // remove lowest in tiq tiq.minFreq = tiq.getTopTermInfo().docFreq; } } } tiq.histogram.add(buckets); fieldMap.add("distinct", tiq.distinctTerms); // Include top terms fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema())); // Add a histogram fieldMap.add("histogram", tiq.histogram.toNamedList()); }
From source file:org.apache.solr.handler.component.ExpandAllComponent.java
License:Apache License
@Override public void process(ResponseBuilder rb) throws IOException { if (!doExpandAll(rb)) { return;//from www . j a v a 2 s . c o m } SolrQueryRequest req = rb.req; SolrParams params = req.getParams(); boolean isShard = params.getBool(ShardParams.IS_SHARD, false); String ids = params.get(ShardParams.IDS); if (ids == null && isShard) { return; } String field = params.get(ExpandParams.EXPAND_FIELD); if (field == null) { List<Query> filters = rb.getFilters(); if (filters != null) { for (Query q : filters) { if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) { CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q; field = cp.getField(); } } } } if (field == null) { throw new IOException("Expand field is null."); } String sortParam = params.get(ExpandParams.EXPAND_SORT); String[] fqs = params.getParams(ExpandParams.EXPAND_FQ); String qs = params.get(ExpandParams.EXPAND_Q); int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5); Sort sort = null; if (sortParam != null) { sort = QueryParsing.parseSortSpec(sortParam, rb.req).getSort(); } Query query = null; if (qs == null) { query = rb.getQuery(); } else { try { QParser parser = QParser.getParser(qs, null, req); query = parser.getQuery(); } catch (Exception e) { throw new IOException(e); } } List<Query> newFilters = new ArrayList(); if (fqs == null) { List<Query> filters = rb.getFilters(); if (filters != null) { for (Query q : filters) { if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) { newFilters.add(q); } } } } else { try { for (String fq : fqs) { if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) { QParser fqp = QParser.getParser(fq, null, req); newFilters.add(fqp.getQuery()); } } } catch (Exception e) { throw new IOException(e); } } SolrIndexSearcher searcher = req.getSearcher(); AtomicReader reader = searcher.getAtomicReader(); SortedDocValues values = FieldCache.DEFAULT.getTermsIndex(reader, field); FixedBitSet groupBits = new FixedBitSet(values.getValueCount()); DocList docList = rb.getResults().docList; IntOpenHashSet collapsedSet = new IntOpenHashSet(docList.size() * 2); DocIterator idit = docList.iterator(); while (idit.hasNext()) { int doc = idit.nextDoc(); int ord = values.getOrd(doc); if (ord > -1) { groupBits.set(ord); collapsedSet.add(doc); } } Collector collector = null; GroupExpandCollector groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit, sort); SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters); if (pfilter.postFilter != null) { pfilter.postFilter.setLastDelegate(groupExpandCollector); collector = pfilter.postFilter; } else { collector = groupExpandCollector; } searcher.search(query, pfilter.filter, collector); IntObjectOpenHashMap groups = groupExpandCollector.getGroups(); Iterator<IntObjectCursor> it = groups.iterator(); Map<String, DocSlice> outMap = new HashMap(); BytesRef bytesRef = new BytesRef(); CharsRef charsRef = new CharsRef(); FieldType fieldType = searcher.getSchema().getField(field).getType(); while (it.hasNext()) { IntObjectCursor cursor = it.next(); int ord = cursor.key; TopDocsCollector topDocsCollector = (TopDocsCollector) cursor.value; TopDocs topDocs = topDocsCollector.topDocs(); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if (scoreDocs.length > 0) { int[] docs = new int[scoreDocs.length]; float[] scores = new float[scoreDocs.length]; for (int i = 0; i < docs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; docs[i] = scoreDoc.doc; scores[i] = scoreDoc.score; } DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits, topDocs.getMaxScore()); values.lookupOrd(ord, bytesRef); fieldType.indexedToReadable(bytesRef, charsRef); String group = charsRef.toString(); outMap.put(group, slice); } } rb.rsp.add("expanded", outMap); }
From source file:org.apache.solr.handler.component.ExpandComponent.java
License:Apache License
@SuppressWarnings("unchecked") @Override/*w ww.j ava2 s .c om*/ public void process(ResponseBuilder rb) throws IOException { if (!rb.doExpand) { return; } SolrQueryRequest req = rb.req; SolrParams params = req.getParams(); boolean isShard = params.getBool(ShardParams.IS_SHARD, false); String ids = params.get(ShardParams.IDS); if (ids == null && isShard) { return; } String field = params.get(ExpandParams.EXPAND_FIELD); if (field == null) { List<Query> filters = rb.getFilters(); if (filters != null) { for (Query q : filters) { if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) { CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q; field = cp.getField(); } } } } if (field == null) { throw new IOException("Expand field is null."); } String sortParam = params.get(ExpandParams.EXPAND_SORT); String[] fqs = params.getParams(ExpandParams.EXPAND_FQ); String qs = params.get(ExpandParams.EXPAND_Q); int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5); Sort sort = null; if (sortParam != null) { sort = QueryParsing.parseSortSpec(sortParam, rb.req).getSort(); } Query query; if (qs == null) { query = rb.getQuery(); } else { try { QParser parser = QParser.getParser(qs, null, req); query = parser.getQuery(); } catch (Exception e) { throw new IOException(e); } } List<Query> newFilters = new ArrayList<>(); if (fqs == null) { List<Query> filters = rb.getFilters(); if (filters != null) { for (Query q : filters) { if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) { newFilters.add(q); } } } } else { try { for (String fq : fqs) { if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) { QParser fqp = QParser.getParser(fq, null, req); newFilters.add(fqp.getQuery()); } } } catch (Exception e) { throw new IOException(e); } } SolrIndexSearcher searcher = req.getSearcher(); AtomicReader reader = searcher.getAtomicReader(); SortedDocValues values = FieldCache.DEFAULT.getTermsIndex(reader, field); FixedBitSet groupBits = new FixedBitSet(values.getValueCount()); DocList docList = rb.getResults().docList; IntOpenHashSet collapsedSet = new IntOpenHashSet(docList.size() * 2); DocIterator idit = docList.iterator(); while (idit.hasNext()) { int doc = idit.nextDoc(); int ord = values.getOrd(doc); if (ord > -1) { groupBits.set(ord); collapsedSet.add(doc); } } Collector collector; if (sort != null) sort = sort.rewrite(searcher); GroupExpandCollector groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit, sort); SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters); if (pfilter.postFilter != null) { pfilter.postFilter.setLastDelegate(groupExpandCollector); collector = pfilter.postFilter; } else { collector = groupExpandCollector; } searcher.search(query, pfilter.filter, collector); IntObjectMap groups = groupExpandCollector.getGroups(); Map<String, DocSlice> outMap = new HashMap(); CharsRef charsRef = new CharsRef(); FieldType fieldType = searcher.getSchema().getField(field).getType(); for (IntObjectCursor cursor : (Iterable<IntObjectCursor>) groups) { int ord = cursor.key; TopDocsCollector topDocsCollector = (TopDocsCollector) cursor.value; TopDocs topDocs = topDocsCollector.topDocs(); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if (scoreDocs.length > 0) { int[] docs = new int[scoreDocs.length]; float[] scores = new float[scoreDocs.length]; for (int i = 0; i < docs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; docs[i] = scoreDoc.doc; scores[i] = scoreDoc.score; } DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits, topDocs.getMaxScore()); final BytesRef bytesRef = values.lookupOrd(ord); fieldType.indexedToReadable(bytesRef, charsRef); String group = charsRef.toString(); outMap.put(group, slice); } } rb.rsp.add("expanded", outMap); }
From source file:org.apache.solr.handler.component.HelloHandlerComponent.java
License:Apache License
protected void doFieldSortValues(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException { SolrQueryRequest req = rb.req;/*from www. ja v a 2 s . com*/ SolrQueryResponse rsp = rb.rsp; final CharsRef spare = new CharsRef(); // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't // currently have an option to return sort field values. Because of this, we // take the documents given and re-derive the sort values. boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES, false); if (fsv) { Sort sort = searcher.weightSort(rb.getSortSpec().getSort()); SortField[] sortFields = sort == null ? new SortField[] { SortField.FIELD_SCORE } : sort.getSort(); NamedList<Object[]> sortVals = new NamedList<Object[]>(); // order is important for the sort fields Field field = new StringField("dummy", "", Field.Store.NO); // a dummy Field IndexReaderContext topReaderContext = searcher.getTopReaderContext(); List<AtomicReaderContext> leaves = topReaderContext.leaves(); AtomicReaderContext currentLeaf = null; if (leaves.size() == 1) { // if there is a single segment, use that subReader and avoid looking up each time currentLeaf = leaves.get(0); leaves = null; } DocList docList = rb.getResults().docList; // sort ids from lowest to highest so we can access them in order int nDocs = docList.size(); long[] sortedIds = new long[nDocs]; DocIterator it = rb.getResults().docList.iterator(); for (int i = 0; i < nDocs; i++) { sortedIds[i] = (((long) it.nextDoc()) << 32) | i; } Arrays.sort(sortedIds); for (SortField sortField : sortFields) { SortField.Type type = sortField.getType(); if (type == SortField.Type.SCORE || type == SortField.Type.DOC) continue; FieldComparator comparator = null; String fieldname = sortField.getField(); FieldType ft = fieldname == null ? null : req.getSchema().getFieldTypeNoEx(fieldname); Object[] vals = new Object[nDocs]; int lastIdx = -1; int idx = 0; for (long idAndPos : sortedIds) { int doc = (int) (idAndPos >>> 32); int position = (int) idAndPos; if (leaves != null) { idx = ReaderUtil.subIndex(doc, leaves); currentLeaf = leaves.get(idx); if (idx != lastIdx) { // we switched segments. invalidate comparator. comparator = null; } } if (comparator == null) { comparator = sortField.getComparator(1, 0); comparator = comparator.setNextReader(currentLeaf); } doc -= currentLeaf.docBase; // adjust for what segment this is in comparator.copy(0, doc); Object val = comparator.value(0); // Sortable float, double, int, long types all just use a string // comparator. For these, we need to put the type into a readable // format. One reason for this is that XML can't represent all // string values (or even all unicode code points). // indexedToReadable() should be a no-op and should // thus be harmless anyway (for all current ways anyway) if (val instanceof String) { field.setStringValue((String) val); val = ft.toObject(field); } // Must do the same conversion when sorting by a // String field in Lucene, which returns the terms // data as BytesRef: if (val instanceof BytesRef) { UnicodeUtil.UTF8toUTF16((BytesRef) val, spare); field.setStringValue(spare.toString()); val = ft.toObject(field); } vals[position] = val; } sortVals.add(fieldname, vals); } rsp.add("sort_values", sortVals); } }
From source file:org.apache.solr.handler.component.QueryComponent.java
License:Apache License
protected void doFieldSortValues(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException { SolrQueryRequest req = rb.req;//from www .j ava 2 s.com SolrQueryResponse rsp = rb.rsp; final CharsRef spare = new CharsRef(); // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't // currently have an option to return sort field values. Because of this, we // take the documents given and re-derive the sort values. boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES, false); if (fsv) { Sort sort = searcher.weightSort(rb.getSortSpec().getSort()); SortField[] sortFields = sort == null ? new SortField[] { SortField.FIELD_SCORE } : sort.getSort(); NamedList<Object[]> sortVals = new NamedList<Object[]>(); // order is important for the sort fields Field field = new StringField("dummy", "", Field.Store.NO); // a dummy Field IndexReaderContext topReaderContext = searcher.getTopReaderContext(); List<AtomicReaderContext> leaves = topReaderContext.leaves(); AtomicReaderContext currentLeaf = null; if (leaves.size() == 1) { // if there is a single segment, use that subReader and avoid looking up each time currentLeaf = leaves.get(0); leaves = null; } DocList docList = rb.getResults().docList; // sort ids from lowest to highest so we can access them in order int nDocs = docList.size(); long[] sortedIds = new long[nDocs]; DocIterator it = rb.getResults().docList.iterator(); for (int i = 0; i < nDocs; i++) { sortedIds[i] = (((long) it.nextDoc()) << 32) | i; } Arrays.sort(sortedIds); for (SortField sortField : sortFields) { SortField.Type type = sortField.getType(); if (type == SortField.Type.SCORE || type == SortField.Type.DOC) continue; FieldComparator comparator = null; String fieldname = sortField.getField(); FieldType ft = fieldname == null ? null : searcher.getSchema().getFieldTypeNoEx(fieldname); Object[] vals = new Object[nDocs]; int lastIdx = -1; int idx = 0; for (long idAndPos : sortedIds) { int doc = (int) (idAndPos >>> 32); int position = (int) idAndPos; if (leaves != null) { idx = ReaderUtil.subIndex(doc, leaves); currentLeaf = leaves.get(idx); if (idx != lastIdx) { // we switched segments. invalidate comparator. comparator = null; } } if (comparator == null) { comparator = sortField.getComparator(1, 0); comparator = comparator.setNextReader(currentLeaf); } doc -= currentLeaf.docBase; // adjust for what segment this is in comparator.copy(0, doc); Object val = comparator.value(0); // Sortable float, double, int, long types all just use a string // comparator. For these, we need to put the type into a readable // format. One reason for this is that XML can't represent all // string values (or even all unicode code points). // indexedToReadable() should be a no-op and should // thus be harmless anyway (for all current ways anyway) if (val instanceof String) { field.setStringValue((String) val); val = ft.toObject(field); } // Must do the same conversion when sorting by a // String field in Lucene, which returns the terms // data as BytesRef: if (val instanceof BytesRef) { UnicodeUtil.UTF8toUTF16((BytesRef) val, spare); field.setStringValue(spare.toString()); val = ft.toObject(field); } vals[position] = val; } sortVals.add(fieldname, vals); } rsp.add("sort_values", sortVals); } }
From source file:org.apache.solr.handler.component.TermsComponent.java
License:Apache License
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(TermsParams.TERMS, false)) return;/*from ww w .jav a2 s. c o m*/ String[] fields = params.getParams(TermsParams.TERMS_FIELD); NamedList<Object> termsResult = new SimpleOrderedMap<Object>(); rb.rsp.add("terms", termsResult); if (fields == null || fields.length == 0) return; int limit = params.getInt(TermsParams.TERMS_LIMIT, 10); if (limit < 0) { limit = Integer.MAX_VALUE; } String lowerStr = params.get(TermsParams.TERMS_LOWER); String upperStr = params.get(TermsParams.TERMS_UPPER); boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false); boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true); boolean sort = !TermsParams.TERMS_SORT_INDEX .equals(params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT)); int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); if (freqmax < 0) { freqmax = Integer.MAX_VALUE; } String prefix = params.get(TermsParams.TERMS_PREFIX_STR); String regexp = params.get(TermsParams.TERMS_REGEXP_STR); Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null; boolean raw = params.getBool(TermsParams.TERMS_RAW, false); final AtomicReader indexReader = rb.req.getSearcher().getAtomicReader(); Fields lfields = indexReader.fields(); for (String field : fields) { NamedList<Integer> fieldTerms = new NamedList<Integer>(); termsResult.add(field, fieldTerms); Terms terms = lfields == null ? null : lfields.terms(field); if (terms == null) { // no terms for this field continue; } FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field); if (ft == null) ft = new StrField(); // prefix must currently be text BytesRef prefixBytes = prefix == null ? null : new BytesRef(prefix); BytesRef upperBytes = null; if (upperStr != null) { upperBytes = new BytesRef(); ft.readableToIndexed(upperStr, upperBytes); } BytesRef lowerBytes; if (lowerStr == null) { // If no lower bound was specified, use the prefix lowerBytes = prefixBytes; } else { lowerBytes = new BytesRef(); if (raw) { // TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists // perhaps we detect if the FieldType is non-character and expect hex if so? lowerBytes = new BytesRef(lowerStr); } else { lowerBytes = new BytesRef(); ft.readableToIndexed(lowerStr, lowerBytes); } } TermsEnum termsEnum = terms.iterator(null); BytesRef term = null; if (lowerBytes != null) { if (termsEnum.seekCeil(lowerBytes) == TermsEnum.SeekStatus.END) { termsEnum = null; } else { term = termsEnum.term(); //Only advance the enum if we are excluding the lower bound and the lower Term actually matches if (lowerIncl == false && term.equals(lowerBytes)) { term = termsEnum.next(); } } } else { // position termsEnum on first term term = termsEnum.next(); } int i = 0; BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit) : null); CharsRef external = new CharsRef(); while (term != null && (i < limit || sort)) { boolean externalized = false; // did we fill in "external" yet for this term? // stop if the prefix doesn't match if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes)) break; if (pattern != null) { // indexed text or external text? // TODO: support "raw" mode? ft.indexedToReadable(term, external); externalized = true; if (!pattern.matcher(external).matches()) { term = termsEnum.next(); continue; } } if (upperBytes != null) { int upperCmp = term.compareTo(upperBytes); // if we are past the upper term, or equal to it (when don't include upper) then stop. if (upperCmp > 0 || (upperCmp == 0 && !upperIncl)) break; } // This is a good term in the range. Check if mincount/maxcount conditions are satisfied. int docFreq = termsEnum.docFreq(); if (docFreq >= freqmin && docFreq <= freqmax) { // add the term to the list if (sort) { queue.add(new CountPair<BytesRef, Integer>(BytesRef.deepCopyOf(term), docFreq)); } else { // TODO: handle raw somehow if (!externalized) { ft.indexedToReadable(term, external); } fieldTerms.add(external.toString(), docFreq); i++; } } term = termsEnum.next(); } if (sort) { for (CountPair<BytesRef, Integer> item : queue) { if (i >= limit) break; ft.indexedToReadable(item.key, external); fieldTerms.add(external.toString(), item.val); i++; } } } }