List of usage examples for org.apache.lucene.util UnicodeUtil BIG_TERM
BytesRef BIG_TERM
To view the source code for org.apache.lucene.util UnicodeUtil BIG_TERM.
Click Source Link
From source file:lux.solr.MissingStringLastComparatorSource.java
License:Apache License
public MissingStringLastComparatorSource() { this(UnicodeUtil.BIG_TERM); }
From source file:org.apache.solr.request.DocValuesFacets.java
License:Apache License
public static NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException { SchemaField schemaField = searcher.getSchema().getField(fieldName); FieldType ft = schemaField.getType(); NamedList<Integer> res = new NamedList<Integer>(); final SortedSetDocValues si; // for term lookups only OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones if (schemaField.multiValued()) { si = searcher.getAtomicReader().getSortedSetDocValues(fieldName); if (si instanceof MultiSortedSetDocValues) { ordinalMap = ((MultiSortedSetDocValues) si).mapping; }//from w ww . j a v a 2 s. c om } else { SortedDocValues single = searcher.getAtomicReader().getSortedDocValues(fieldName); si = single == null ? null : new SingletonSortedSetDocValues(single); if (single instanceof MultiSortedDocValues) { ordinalMap = ((MultiSortedDocValues) single).mapping; } } if (si == null) { return finalize(res, searcher, schemaField, docs, -1, missing); } if (si.getValueCount() >= Integer.MAX_VALUE) { throw new UnsupportedOperationException( "Currently this faceting method is limited to " + Integer.MAX_VALUE + " unique terms"); } final BytesRef br = new BytesRef(); final BytesRef prefixRef; if (prefix == null) { prefixRef = null; } else if (prefix.length() == 0) { prefix = null; prefixRef = null; } else { prefixRef = new BytesRef(prefix); } int startTermIndex, endTermIndex; if (prefix != null) { startTermIndex = (int) si.lookupTerm(prefixRef); if (startTermIndex < 0) startTermIndex = -startTermIndex - 1; prefixRef.append(UnicodeUtil.BIG_TERM); endTermIndex = (int) si.lookupTerm(prefixRef); assert endTermIndex < 0; endTermIndex = -endTermIndex - 1; } else { startTermIndex = -1; endTermIndex = (int) si.getValueCount(); } final int nTerms = endTermIndex - startTermIndex; int missingCount = -1; final CharsRef charsRef = new CharsRef(10); if (nTerms > 0 && docs.size() >= mincount) { // count collection array only needs to be as big as the number of terms we are // going to collect counts for. final int[] counts = new int[nTerms]; Filter filter = docs.getTopFilter(); List<AtomicReaderContext> leaves = searcher.getTopReaderContext().leaves(); for (int subIndex = 0; subIndex < leaves.size(); subIndex++) { AtomicReaderContext leaf = leaves.get(subIndex); DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs DocIdSetIterator disi = null; if (dis != null) { disi = dis.iterator(); } if (disi != null) { if (schemaField.multiValued()) { SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName); if (sub == null) { sub = SortedSetDocValues.EMPTY; } if (sub instanceof SingletonSortedSetDocValues) { // some codecs may optimize SORTED_SET storage for single-valued fields final SortedDocValues values = ((SingletonSortedSetDocValues) sub).getSortedDocValues(); accumSingle(counts, startTermIndex, values, disi, subIndex, ordinalMap); } else { accumMulti(counts, startTermIndex, sub, disi, subIndex, ordinalMap); } } else { SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName); if (sub == null) { sub = SortedDocValues.EMPTY; } accumSingle(counts, startTermIndex, sub, disi, subIndex, ordinalMap); } } } if (startTermIndex == -1) { missingCount = counts[0]; } // IDEA: we could also maintain a count of "other"... everything that fell outside // of the top 'N' int off = offset; int lim = limit >= 0 ? limit : Integer.MAX_VALUE; if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1; maxsize = Math.min(maxsize, nTerms); LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE); int min = mincount - 1; // the smallest value in the top 'N' values for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) { int c = counts[i]; if (c > min) { // NOTE: we use c>min rather than c>=min as an optimization because we are going in // index order, so we already know that the keys are ordered. This can be very // important if a lot of the counts are repeated (like zero counts would be). // smaller term numbers sort higher, so subtract the term number instead long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i); boolean displaced = queue.insert(pair); if (displaced) min = (int) (queue.top() >>> 32); } } // if we are deep paging, we don't have to order the highest "offset" counts. int collectCount = Math.max(0, queue.size() - off); assert collectCount <= lim; // the start and end indexes of our list "sorted" (starting with the highest value) int sortedIdxStart = queue.size() - (collectCount - 1); int sortedIdxEnd = queue.size() + 1; final long[] sorted = queue.sort(collectCount); for (int i = sortedIdxStart; i < sortedIdxEnd; i++) { long pair = sorted[i]; int c = (int) (pair >>> 32); int tnum = Integer.MAX_VALUE - (int) pair; si.lookupOrd(startTermIndex + tnum, br); ft.indexedToReadable(br, charsRef); res.add(charsRef.toString(), c); } } else { // add results in index order int i = (startTermIndex == -1) ? 1 : 0; if (mincount <= 0) { // if mincount<=0, then we won't discard any terms and we know exactly // where to start. i += off; off = 0; } for (; i < nTerms; i++) { int c = counts[i]; if (c < mincount || --off >= 0) continue; if (--lim < 0) break; si.lookupOrd(startTermIndex + i, br); ft.indexedToReadable(br, charsRef); res.add(charsRef.toString(), c); } } } return finalize(res, searcher, schemaField, docs, missingCount, missing); }
From source file:org.apache.solr.request.SimpleFacets.java
License:Apache License
/** * Use the Lucene FieldCache to get counts for each unique field value in <code>docs</code>. * The field must have at most one indexed token per document. *///from w ww.ja v a 2s. c om public static NamedList<Integer> getFieldCacheCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException { // TODO: If the number of terms is high compared to docs.size(), and zeros==false, // we should use an alternate strategy to avoid // 1) creating another huge int[] for the counts // 2) looping over that huge int[] looking for the rare non-zeros. // // Yet another variation: if docs.size() is small and termvectors are stored, // then use them instead of the FieldCache. // // TODO: this function is too big and could use some refactoring, but // we also need a facet cache, and refactoring of SimpleFacets instead of // trying to pass all the various params around. FieldType ft = searcher.getSchema().getFieldType(fieldName); NamedList<Integer> res = new NamedList<Integer>(); SortedDocValues si = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), fieldName); final BytesRef br = new BytesRef(); final BytesRef prefixRef; if (prefix == null) { prefixRef = null; } else if (prefix.length() == 0) { prefix = null; prefixRef = null; } else { prefixRef = new BytesRef(prefix); } int startTermIndex, endTermIndex; if (prefix != null) { startTermIndex = si.lookupTerm(prefixRef); if (startTermIndex < 0) startTermIndex = -startTermIndex - 1; prefixRef.append(UnicodeUtil.BIG_TERM); endTermIndex = si.lookupTerm(prefixRef); assert endTermIndex < 0; endTermIndex = -endTermIndex - 1; } else { startTermIndex = -1; endTermIndex = si.getValueCount(); } final int nTerms = endTermIndex - startTermIndex; int missingCount = -1; final CharsRef charsRef = new CharsRef(10); if (nTerms > 0 && docs.size() >= mincount) { // count collection array only needs to be as big as the number of terms we are // going to collect counts for. final int[] counts = new int[nTerms]; DocIterator iter = docs.iterator(); while (iter.hasNext()) { int term = si.getOrd(iter.nextDoc()); int arrIdx = term - startTermIndex; if (arrIdx >= 0 && arrIdx < nTerms) counts[arrIdx]++; } if (startTermIndex == -1) { missingCount = counts[0]; } // IDEA: we could also maintain a count of "other"... everything that fell outside // of the top 'N' int off = offset; int lim = limit >= 0 ? limit : Integer.MAX_VALUE; if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1; maxsize = Math.min(maxsize, nTerms); LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE); int min = mincount - 1; // the smallest value in the top 'N' values for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) { int c = counts[i]; if (c > min) { // NOTE: we use c>min rather than c>=min as an optimization because we are going in // index order, so we already know that the keys are ordered. This can be very // important if a lot of the counts are repeated (like zero counts would be). // smaller term numbers sort higher, so subtract the term number instead long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i); boolean displaced = queue.insert(pair); if (displaced) min = (int) (queue.top() >>> 32); } } // if we are deep paging, we don't have to order the highest "offset" counts. int collectCount = Math.max(0, queue.size() - off); assert collectCount <= lim; // the start and end indexes of our list "sorted" (starting with the highest value) int sortedIdxStart = queue.size() - (collectCount - 1); int sortedIdxEnd = queue.size() + 1; final long[] sorted = queue.sort(collectCount); for (int i = sortedIdxStart; i < sortedIdxEnd; i++) { long pair = sorted[i]; int c = (int) (pair >>> 32); int tnum = Integer.MAX_VALUE - (int) pair; si.lookupOrd(startTermIndex + tnum, br); ft.indexedToReadable(br, charsRef); res.add(charsRef.toString(), c); } } else { // add results in index order int i = (startTermIndex == -1) ? 1 : 0; if (mincount <= 0) { // if mincount<=0, then we won't discard any terms and we know exactly // where to start. i += off; off = 0; } for (; i < nTerms; i++) { int c = counts[i]; if (c < mincount || --off >= 0) continue; if (--lim < 0) break; si.lookupOrd(startTermIndex + i, br); ft.indexedToReadable(br, charsRef); res.add(charsRef.toString(), c); } } } if (missing) { if (missingCount < 0) { missingCount = getFieldMissingCount(searcher, docs, fieldName); } res.add(null, missingCount); } return res; }
From source file:org.apache.solr.request.UnInvertedField.java
License:Apache License
public NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet baseDocs, int offset, int limit, Integer mincount, boolean missing, String sort, String prefix) throws IOException { use.incrementAndGet();//from w w w .j a va 2 s . c o m FieldType ft = searcher.getSchema().getFieldType(field); NamedList<Integer> res = new NamedList<Integer>(); // order is important DocSet docs = baseDocs; int baseSize = docs.size(); int maxDoc = searcher.maxDoc(); //System.out.println("GET COUNTS field=" + field + " baseSize=" + baseSize + " minCount=" + mincount + " maxDoc=" + maxDoc + " numTermsInField=" + numTermsInField); if (baseSize >= mincount) { final int[] index = this.index; // tricky: we add more more element than we need because we will reuse this array later // for ordering term ords before converting to term labels. final int[] counts = new int[numTermsInField + 1]; // // If there is prefix, find it's start and end term numbers // int startTerm = 0; int endTerm = numTermsInField; // one past the end TermsEnum te = getOrdTermsEnum(searcher.getAtomicReader()); if (te != null && prefix != null && prefix.length() > 0) { final BytesRef prefixBr = new BytesRef(prefix); if (te.seekCeil(prefixBr) == TermsEnum.SeekStatus.END) { startTerm = numTermsInField; } else { startTerm = (int) te.ord(); } prefixBr.append(UnicodeUtil.BIG_TERM); if (te.seekCeil(prefixBr) == TermsEnum.SeekStatus.END) { endTerm = numTermsInField; } else { endTerm = (int) te.ord(); } } /*********** // Alternative 2: get the docSet of the prefix (could take a while) and // then do the intersection with the baseDocSet first. if (prefix != null && prefix.length() > 0) { docs = searcher.getDocSet(new ConstantScorePrefixQuery(new Term(field, ft.toInternal(prefix))), docs); // The issue with this method are problems of returning 0 counts for terms w/o // the prefix. We can't just filter out those terms later because it may // mean that we didn't collect enough terms in the queue (in the sorted case). } ***********/ boolean doNegative = baseSize > maxDoc >> 1 && termInstances > 0 && startTerm == 0 && endTerm == numTermsInField && docs instanceof BitDocSet; if (doNegative) { OpenBitSet bs = (OpenBitSet) ((BitDocSet) docs).getBits().clone(); bs.flip(0, maxDoc); // TODO: when iterator across negative elements is available, use that // instead of creating a new bitset and inverting. docs = new BitDocSet(bs, maxDoc - baseSize); // simply negating will mean that we have deleted docs in the set. // that should be OK, as their entries in our table should be empty. //System.out.println(" NEG"); } // For the biggest terms, do straight set intersections for (TopTerm tt : bigTerms.values()) { //System.out.println(" do big termNum=" + tt.termNum + " term=" + tt.term.utf8ToString()); // TODO: counts could be deferred if sorted==false if (tt.termNum >= startTerm && tt.termNum < endTerm) { counts[tt.termNum] = searcher.numDocs(new TermQuery(new Term(field, tt.term)), docs); //System.out.println(" count=" + counts[tt.termNum]); } else { //System.out.println("SKIP term=" + tt.termNum); } } // TODO: we could short-circuit counting altogether for sorted faceting // where we already have enough terms from the bigTerms // TODO: we could shrink the size of the collection array, and // additionally break when the termNumber got above endTerm, but // it would require two extra conditionals in the inner loop (although // they would be predictable for the non-prefix case). // Perhaps a different copy of the code would be warranted. if (termInstances > 0) { DocIterator iter = docs.iterator(); while (iter.hasNext()) { int doc = iter.nextDoc(); //System.out.println("iter doc=" + doc); int code = index[doc]; if ((code & 0xff) == 1) { //System.out.println(" ptr"); int pos = code >>> 8; int whichArray = (doc >>> 16) & 0xff; byte[] arr = tnums[whichArray]; int tnum = 0; for (;;) { int delta = 0; for (;;) { byte b = arr[pos++]; delta = (delta << 7) | (b & 0x7f); if ((b & 0x80) == 0) break; } if (delta == 0) break; tnum += delta - TNUM_OFFSET; //System.out.println(" tnum=" + tnum); counts[tnum]++; } } else { //System.out.println(" inlined"); int tnum = 0; int delta = 0; for (;;) { delta = (delta << 7) | (code & 0x7f); if ((code & 0x80) == 0) { if (delta == 0) break; tnum += delta - TNUM_OFFSET; //System.out.println(" tnum=" + tnum); counts[tnum]++; delta = 0; } code >>>= 8; } } } } final CharsRef charsRef = new CharsRef(); int off = offset; int lim = limit >= 0 ? limit : Integer.MAX_VALUE; if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) { int maxsize = limit > 0 ? offset + limit : Integer.MAX_VALUE - 1; maxsize = Math.min(maxsize, numTermsInField); LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize, 1000), maxsize, Long.MIN_VALUE); int min = mincount - 1; // the smallest value in the top 'N' values //System.out.println("START=" + startTerm + " END=" + endTerm); for (int i = startTerm; i < endTerm; i++) { int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i]; if (c > min) { // NOTE: we use c>min rather than c>=min as an optimization because we are going in // index order, so we already know that the keys are ordered. This can be very // important if a lot of the counts are repeated (like zero counts would be). // smaller term numbers sort higher, so subtract the term number instead long pair = (((long) c) << 32) + (Integer.MAX_VALUE - i); boolean displaced = queue.insert(pair); if (displaced) min = (int) (queue.top() >>> 32); } } // now select the right page from the results // if we are deep paging, we don't have to order the highest "offset" counts. int collectCount = Math.max(0, queue.size() - off); assert collectCount <= lim; // the start and end indexes of our list "sorted" (starting with the highest value) int sortedIdxStart = queue.size() - (collectCount - 1); int sortedIdxEnd = queue.size() + 1; final long[] sorted = queue.sort(collectCount); final int[] indirect = counts; // reuse the counts array for the index into the tnums array assert indirect.length >= sortedIdxEnd; for (int i = sortedIdxStart; i < sortedIdxEnd; i++) { long pair = sorted[i]; int c = (int) (pair >>> 32); int tnum = Integer.MAX_VALUE - (int) pair; indirect[i] = i; // store the index for indirect sorting sorted[i] = tnum; // reuse the "sorted" array to store the term numbers for indirect sorting // add a null label for now... we'll fill it in later. res.add(null, c); } // now sort the indexes by the term numbers PrimUtils.sort(sortedIdxStart, sortedIdxEnd, indirect, new PrimUtils.IntComparator() { @Override public int compare(int a, int b) { return (int) sorted[a] - (int) sorted[b]; } @Override public boolean lessThan(int a, int b) { return sorted[a] < sorted[b]; } @Override public boolean equals(int a, int b) { return sorted[a] == sorted[b]; } }); // convert the term numbers to term values and set // as the label //System.out.println("sortStart=" + sortedIdxStart + " end=" + sortedIdxEnd); for (int i = sortedIdxStart; i < sortedIdxEnd; i++) { int idx = indirect[i]; int tnum = (int) sorted[idx]; final String label = getReadableValue(getTermValue(te, tnum), ft, charsRef); //System.out.println(" label=" + label); res.setName(idx - sortedIdxStart, label); } } else { // add results in index order int i = startTerm; if (mincount <= 0) { // if mincount<=0, then we won't discard any terms and we know exactly // where to start. i = startTerm + off; off = 0; } for (; i < endTerm; i++) { int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i]; if (c < mincount || --off >= 0) continue; if (--lim < 0) break; final String label = getReadableValue(getTermValue(te, i), ft, charsRef); res.add(label, c); } } } if (missing) { // TODO: a faster solution for this? res.add(null, SimpleFacets.getFieldMissingCount(searcher, baseDocs, field)); } //System.out.println(" res=" + res); return res; }
From source file:org.apache.solr.search.facet.FacetFieldProcessorByArrayDV.java
License:Apache License
@Override protected void findStartAndEndOrds() throws IOException { if (multiValuedField) { si = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null); if (si instanceof MultiDocValues.MultiSortedSetDocValues) { ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) si).mapping; }/*from w w w .j a va 2 s .co m*/ } else { // multi-valued view SortedDocValues single = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null); si = DocValues.singleton(single); if (single instanceof MultiDocValues.MultiSortedDocValues) { ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping; } } if (si.getValueCount() >= Integer.MAX_VALUE) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field has too many unique values. field=" + sf + " nterms= " + si.getValueCount()); } if (prefixRef != null) { startTermIndex = (int) si.lookupTerm(prefixRef.get()); if (startTermIndex < 0) startTermIndex = -startTermIndex - 1; prefixRef.append(UnicodeUtil.BIG_TERM); endTermIndex = (int) si.lookupTerm(prefixRef.get()); assert endTermIndex < 0; endTermIndex = -endTermIndex - 1; } else { startTermIndex = 0; endTermIndex = (int) si.getValueCount(); } nTerms = endTermIndex - startTermIndex; }
From source file:org.apache.solr.search.facet.FacetFieldProcessorByArrayUIF.java
License:Apache License
@Override protected void findStartAndEndOrds() throws IOException { uif = UnInvertedField.getUnInvertedField(freq.field, fcontext.searcher); te = uif.getOrdTermsEnum(fcontext.searcher.getSlowAtomicReader()); // "te" can be null startTermIndex = 0;/*from w ww . j a va2 s .c o m*/ endTermIndex = uif.numTerms(); // one past the end if (prefixRef != null && te != null) { if (te.seekCeil(prefixRef.get()) == TermsEnum.SeekStatus.END) { startTermIndex = uif.numTerms(); } else { startTermIndex = (int) te.ord(); } prefixRef.append(UnicodeUtil.BIG_TERM); if (te.seekCeil(prefixRef.get()) == TermsEnum.SeekStatus.END) { endTermIndex = uif.numTerms(); } else { endTermIndex = (int) te.ord(); } } nTerms = endTermIndex - startTermIndex; }
From source file:org.apache.solr.search.facet.FacetFieldProcessorDV.java
License:Apache License
protected void findStartAndEndOrds() throws IOException { if (multiValuedField) { si = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null); if (si instanceof MultiDocValues.MultiSortedSetDocValues) { ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) si).mapping; }//w w w.j a v a2 s .com } else { SortedDocValues single = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null); si = DocValues.singleton(single); // multi-valued view if (single instanceof MultiDocValues.MultiSortedDocValues) { ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping; } } if (si.getValueCount() >= Integer.MAX_VALUE) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field has too many unique values. field=" + sf + " nterms= " + si.getValueCount()); } if (prefixRef != null) { startTermIndex = (int) si.lookupTerm(prefixRef.get()); if (startTermIndex < 0) startTermIndex = -startTermIndex - 1; prefixRef.append(UnicodeUtil.BIG_TERM); endTermIndex = (int) si.lookupTerm(prefixRef.get()); assert endTermIndex < 0; endTermIndex = -endTermIndex - 1; } else { startTermIndex = 0; endTermIndex = (int) si.getValueCount(); } nTerms = endTermIndex - startTermIndex; }
From source file:org.elasticsearch.index.fielddata.AbstractStringFieldDataTestCase.java
License:Apache License
public void testActualMissingValue(boolean reverse) throws IOException { // missing value is set to an actual value final String[] values = new String[randomIntBetween(2, 30)]; for (int i = 1; i < values.length; ++i) { values[i] = TestUtil.randomUnicodeString(getRandom()); }/*from w w w . j av a 2 s. com*/ final int numDocs = scaledRandomIntBetween(10, 3072); for (int i = 0; i < numDocs; ++i) { final String value = RandomPicks.randomFrom(getRandom(), values); if (value == null) { writer.addDocument(new Document()); } else { Document d = new Document(); addField(d, "value", value); writer.addDocument(d); } if (randomInt(10) == 0) { writer.commit(); } } final IndexFieldData indexFieldData = getForField("value"); final String missingValue = values[1]; IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, true)); XFieldComparatorSource comparator = indexFieldData.comparatorSource(missingValue, MultiValueMode.MIN, null); TopFieldDocs topDocs = searcher.search(new MatchAllDocsQuery(), randomBoolean() ? numDocs : randomIntBetween(10, numDocs), new Sort(new SortField("value", comparator, reverse))); assertEquals(numDocs, topDocs.totalHits); BytesRef previousValue = reverse ? UnicodeUtil.BIG_TERM : new BytesRef(); for (int i = 0; i < topDocs.scoreDocs.length; ++i) { final String docValue = searcher.doc(topDocs.scoreDocs[i].doc).get("value"); final BytesRef value = new BytesRef(docValue == null ? missingValue : docValue); if (reverse) { assertTrue(previousValue.compareTo(value) >= 0); } else { assertTrue(previousValue.compareTo(value) <= 0); } previousValue = value; } searcher.getIndexReader().close(); }
From source file:org.elasticsearch.index.fielddata.AbstractStringFieldDataTestCase.java
License:Apache License
public void testSortMissing(boolean first, boolean reverse) throws IOException { final String[] values = new String[randomIntBetween(2, 10)]; for (int i = 1; i < values.length; ++i) { values[i] = TestUtil.randomUnicodeString(getRandom()); }//ww w . jav a2s. c o m final int numDocs = scaledRandomIntBetween(10, 3072); for (int i = 0; i < numDocs; ++i) { final String value = RandomPicks.randomFrom(getRandom(), values); if (value == null) { writer.addDocument(new Document()); } else { Document d = new Document(); addField(d, "value", value); writer.addDocument(d); } if (randomInt(10) == 0) { writer.commit(); } } final IndexFieldData indexFieldData = getForField("value"); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, true)); XFieldComparatorSource comparator = indexFieldData.comparatorSource(first ? "_first" : "_last", MultiValueMode.MIN, null); TopFieldDocs topDocs = searcher.search(new MatchAllDocsQuery(), randomBoolean() ? numDocs : randomIntBetween(10, numDocs), new Sort(new SortField("value", comparator, reverse))); assertEquals(numDocs, topDocs.totalHits); BytesRef previousValue = first ? null : reverse ? UnicodeUtil.BIG_TERM : new BytesRef(); for (int i = 0; i < topDocs.scoreDocs.length; ++i) { final String docValue = searcher.doc(topDocs.scoreDocs[i].doc).get("value"); if (first && docValue == null) { assertNull(previousValue); } else if (!first && docValue != null) { assertNotNull(previousValue); } final BytesRef value = docValue == null ? null : new BytesRef(docValue); if (previousValue != null && value != null) { if (reverse) { assertTrue(previousValue.compareTo(value) >= 0); } else { assertTrue(previousValue.compareTo(value) <= 0); } } previousValue = value; } searcher.getIndexReader().close(); }
From source file:org.elasticsearch.index.fielddata.AbstractStringFieldDataTests.java
License:Apache License
public void testActualMissingValue(boolean reverse) throws IOException { // missing value is set to an actual value Document d = new Document(); final StringField s = new StringField("value", "", Field.Store.YES); d.add(s);//from w w w . j a va 2 s. c om final String[] values = new String[randomIntBetween(2, 30)]; for (int i = 1; i < values.length; ++i) { values[i] = _TestUtil.randomUnicodeString(getRandom()); } final int numDocs = atLeast(100); for (int i = 0; i < numDocs; ++i) { final String value = RandomPicks.randomFrom(getRandom(), values); if (value == null) { writer.addDocument(new Document()); } else { s.setStringValue(value); writer.addDocument(d); } if (randomInt(10) == 0) { writer.commit(); } } final IndexFieldData indexFieldData = getForField("value"); final String missingValue = values[1]; IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, true)); XFieldComparatorSource comparator = indexFieldData.comparatorSource(missingValue, SortMode.MIN); TopFieldDocs topDocs = searcher.search(new MatchAllDocsQuery(), randomBoolean() ? numDocs : randomIntBetween(10, numDocs), new Sort(new SortField("value", comparator, reverse))); assertEquals(numDocs, topDocs.totalHits); BytesRef previousValue = reverse ? UnicodeUtil.BIG_TERM : new BytesRef(); for (int i = 0; i < topDocs.scoreDocs.length; ++i) { final String docValue = searcher.doc(topDocs.scoreDocs[i].doc).get("value"); final BytesRef value = new BytesRef(docValue == null ? missingValue : docValue); if (reverse) { assertTrue(previousValue.compareTo(value) >= 0); } else { assertTrue(previousValue.compareTo(value) <= 0); } previousValue = value; } searcher.getIndexReader().close(); }