List of usage examples for org.apache.lucene.index LeafReader getSortedDocValues
public abstract SortedDocValues getSortedDocValues(String field) throws IOException;
From source file:com.qwazr.search.field.ValueConverter.java
License:Apache License
final static ValueConverter newConverter(FieldDefinition fieldDef, LeafReader dvReader, FieldInfo fieldInfo) throws IOException { if (fieldInfo == null) return null; DocValuesType type = fieldInfo.getDocValuesType(); if (type == null) return null; switch (type) { case BINARY://from w w w. j ava2 s . c om BinaryDocValues binaryDocValue = dvReader.getBinaryDocValues(fieldInfo.name); if (binaryDocValue == null) return null; return new BinaryDVConverter(binaryDocValue); case SORTED: SortedDocValues sortedDocValues = dvReader.getSortedDocValues(fieldInfo.name); if (sortedDocValues == null) return null; return new SortedDVConverter(sortedDocValues); case NONE: break; case NUMERIC: NumericDocValues numericDocValues = dvReader.getNumericDocValues(fieldInfo.name); if (numericDocValues == null) return null; return newNumericConverter(fieldDef, numericDocValues); case SORTED_NUMERIC: SortedNumericDocValues sortedNumericDocValues = dvReader.getSortedNumericDocValues(fieldInfo.name); if (sortedNumericDocValues == null) return null; return newSortedNumericConverter(fieldDef, sortedNumericDocValues); case SORTED_SET: SortedSetDocValues sortedSetDocValues = dvReader.getSortedSetDocValues(fieldInfo.name); if (sortedSetDocValues == null) return null; return null; default: throw new IOException("Unsupported doc value type: " + type + " for field: " + fieldInfo.name); } return null; }
From source file:org.apache.solr.handler.ExportWriter.java
License:Apache License
private SortDoc getSortDoc(SolrIndexSearcher searcher, SortField[] sortFields) throws IOException { SortValue[] sortValues = new SortValue[sortFields.length]; IndexSchema schema = searcher.getSchema(); for (int i = 0; i < sortFields.length; ++i) { SortField sf = sortFields[i];//from w w w . j a v a 2s. com String field = sf.getField(); boolean reverse = sf.getReverse(); SchemaField schemaField = schema.getField(field); FieldType ft = schemaField.getType(); if (!schemaField.hasDocValues()) { throw new IOException(field + " must have DocValues to use this feature."); } if (ft instanceof TrieIntField) { if (reverse) { sortValues[i] = new IntValue(field, new IntDesc()); } else { sortValues[i] = new IntValue(field, new IntAsc()); } } else if (ft instanceof TrieFloatField) { if (reverse) { sortValues[i] = new FloatValue(field, new FloatDesc()); } else { sortValues[i] = new FloatValue(field, new FloatAsc()); } } else if (ft instanceof TrieDoubleField) { if (reverse) { sortValues[i] = new DoubleValue(field, new DoubleDesc()); } else { sortValues[i] = new DoubleValue(field, new DoubleAsc()); } } else if (ft instanceof TrieLongField) { if (reverse) { sortValues[i] = new LongValue(field, new LongDesc()); } else { sortValues[i] = new LongValue(field, new LongAsc()); } } else if (ft instanceof StrField) { LeafReader reader = searcher.getSlowAtomicReader(); SortedDocValues vals = reader.getSortedDocValues(field); if (reverse) { sortValues[i] = new StringValue(vals, field, new IntDesc()); } else { sortValues[i] = new StringValue(vals, field, new IntAsc()); } } else if (ft instanceof TrieDateField) { if (reverse) { sortValues[i] = new LongValue(field, new LongDesc()); } else { sortValues[i] = new LongValue(field, new LongAsc()); } } else if (ft instanceof BoolField) { // This is a bit of a hack, but since the boolean field stores ByteRefs, just like Strings // _and_ since "F" happens to sort before "T" (thus false sorts "less" than true) // we can just use the existing StringValue here. LeafReader reader = searcher.getSlowAtomicReader(); SortedDocValues vals = reader.getSortedDocValues(field); if (reverse) { sortValues[i] = new StringValue(vals, field, new IntDesc()); } else { sortValues[i] = new StringValue(vals, field, new IntAsc()); } } else { throw new IOException( "Sort fields must be one of the following types: int,float,long,double,string,date,boolean"); } } if (sortValues.length == 1) { return new SingleValueSortDoc(sortValues[0]); } else if (sortValues.length == 2) { return new DoubleValueSortDoc(sortValues[0], sortValues[1]); } else if (sortValues.length == 3) { return new TripleValueSortDoc(sortValues[0], sortValues[1], sortValues[2]); } else if (sortValues.length == 4) { return new QuadValueSortDoc(sortValues[0], sortValues[1], sortValues[2], sortValues[3]); } else { throw new IOException("A max of 4 sorts can be specified"); } }
From source file:org.apache.solr.index.SlowCompositeReaderWrapper.java
License:Apache License
@Override public SortedDocValues getSortedDocValues(String field) throws IOException { ensureOpen();/*from w w w .jav a 2 s . c o m*/ OrdinalMap map = null; synchronized (cachedOrdMaps) { map = cachedOrdMaps.get(field); if (map == null) { // uncached, or not a multi dv SortedDocValues dv = MultiDocValues.getSortedValues(in, field); if (dv instanceof MultiSortedDocValues) { map = ((MultiSortedDocValues) dv).mapping; if (map.owner == getCoreCacheKey() && merging == false) { cachedOrdMaps.put(field, map); } } return dv; } } int size = in.leaves().size(); final SortedDocValues[] values = new SortedDocValues[size]; final int[] starts = new int[size + 1]; long totalCost = 0; for (int i = 0; i < size; i++) { LeafReaderContext context = in.leaves().get(i); final LeafReader reader = context.reader(); final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) { return null; } SortedDocValues v = reader.getSortedDocValues(field); if (v == null) { v = DocValues.emptySorted(); } totalCost += v.cost(); values[i] = v; starts[i] = context.docBase; } starts[size] = maxDoc(); return new MultiSortedDocValues(values, starts, map, totalCost); }
From source file:org.apache.solr.index.UninvertDocValuesMergePolicyTest.java
License:Apache License
public void testIndexAndAddDocValues() throws Exception { Random rand = random();//from www .j av a2 s. c o m for (int i = 0; i < 100; i++) { assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i))); if (rand.nextBoolean()) { assertU(commit()); } } assertU(commit()); // Assert everything has been indexed and there are no docvalues withNewRawReader(h, topReader -> { assertEquals(100, topReader.numDocs()); final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader); // The global field type should not have docValues yet assertEquals(DocValuesType.NONE, infos.fieldInfo(TEST_FIELD).getDocValuesType()); }); addDocValuesTo(h, TEST_FIELD); // Add some more documents with doc values turned on including updating some for (int i = 90; i < 110; i++) { assertU(adoc(ID_FIELD, String.valueOf(i), TEST_FIELD, String.valueOf(i))); if (rand.nextBoolean()) { assertU(commit()); } } assertU(commit()); withNewRawReader(h, topReader -> { assertEquals(110, topReader.numDocs()); final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader); // The global field type should have docValues because a document with dvs was added assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType()); }); int optimizeSegments = 1; assertU(optimize("maxSegments", String.valueOf(optimizeSegments))); // Assert all docs have the right docvalues withNewRawReader(h, topReader -> { // Assert merged into one segment assertEquals(110, topReader.numDocs()); assertEquals(optimizeSegments, topReader.leaves().size()); final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader); // The global field type should have docValues because a document with dvs was added assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType()); // Check that all segments have the right docvalues type with the correct value // Also check that other fields (e.g. the id field) didn't mistakenly get docvalues added for (LeafReaderContext ctx : topReader.leaves()) { LeafReader r = ctx.reader(); SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD); for (int i = 0; i < r.numDocs(); ++i) { Document doc = r.document(i); String v = doc.getField(TEST_FIELD).stringValue(); String id = doc.getField(ID_FIELD).stringValue(); assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType()); assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType()); assertEquals(v, id); docvalues.nextDoc(); assertEquals(v, docvalues.binaryValue().utf8ToString()); } } }); }
From source file:org.apache.solr.index.UninvertDocValuesMergePolicyTest.java
License:Apache License
public void testNonIndexedFieldDoesNonFail() throws Exception { // Remove Indexed from fieldType removeIndexFrom(h, TEST_FIELD);//from w ww . j a va2 s.co m assertU(adoc(ID_FIELD, String.valueOf(1), TEST_FIELD, String.valueOf(1))); assertU(commit()); addDocValuesTo(h, TEST_FIELD); assertU(adoc(ID_FIELD, String.valueOf(2), TEST_FIELD, String.valueOf(2))); assertU(commit()); assertU(optimize("maxSegments", "1")); withNewRawReader(h, topReader -> { // Assert merged into one segment assertEquals(2, topReader.numDocs()); assertEquals(1, topReader.leaves().size()); final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader); // The global field type should have docValues because a document with dvs was added assertEquals(DocValuesType.SORTED, infos.fieldInfo(TEST_FIELD).getDocValuesType()); for (LeafReaderContext ctx : topReader.leaves()) { LeafReader r = ctx.reader(); SortedDocValues docvalues = r.getSortedDocValues(TEST_FIELD); for (int i = 0; i < r.numDocs(); ++i) { Document doc = r.document(i); String v = doc.getField(TEST_FIELD).stringValue(); String id = doc.getField(ID_FIELD).stringValue(); assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(TEST_FIELD).getDocValuesType()); assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(ID_FIELD).getDocValuesType()); if (id.equals("2")) { assertTrue(docvalues.advanceExact(i)); assertEquals(v, docvalues.binaryValue().utf8ToString()); } else { assertFalse(docvalues.advanceExact(i)); } } } }); }
From source file:org.apache.solr.schema.TestSortableTextField.java
License:Apache License
public void testWhiteboxIndexReader() throws Exception { assertU(adoc("id", "1", "whitespace_stxt", "how now brown cow ?", "whitespace_m_stxt", "xxx", "whitespace_m_stxt", "yyy", "whitespace_f_stxt", "aaa bbb", "keyword_stxt", "Blarggghhh!")); assertU(commit());// w w w . ja v a 2 s . c om final RefCounted<SolrIndexSearcher> searcher = h.getCore().getNewestSearcher(false); try { final LeafReader r = searcher.get().getSlowAtomicReader(); // common cases... for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt", "whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) { assertNotNull("FieldInfos: " + field, r.getFieldInfos().fieldInfo(field)); assertEquals("DocValuesType: " + field, DocValuesType.SORTED, r.getFieldInfos().fieldInfo(field).getDocValuesType()); assertNotNull("DocValues: " + field, r.getSortedDocValues(field)); assertNotNull("Terms: " + field, r.terms(field)); } // special cases... assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nodv_stxt")); assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo("whitespace_nodv_stxt").getDocValuesType()); assertNull(r.getSortedDocValues("whitespace_nodv_stxt")); assertNotNull(r.terms("whitespace_nodv_stxt")); // assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nois_stxt")); assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo("whitespace_nois_stxt").getDocValuesType()); assertNotNull(r.getSortedDocValues("whitespace_nois_stxt")); assertNull(r.terms("whitespace_nois_stxt")); // assertNotNull(r.getFieldInfos().fieldInfo("whitespace_m_stxt")); assertEquals(DocValuesType.SORTED_SET, r.getFieldInfos().fieldInfo("whitespace_m_stxt").getDocValuesType()); assertNotNull(r.getSortedSetDocValues("whitespace_m_stxt")); assertNotNull(r.terms("whitespace_m_stxt")); } finally { if (null != searcher) { searcher.decref(); } } }
From source file:org.apache.solr.search.SolrDocumentFetcher.java
License:Apache License
/** * This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument * * @param doc/*from www . ja v a 2s. c o m*/ * A SolrDocument or SolrInputDocument instance where docValues will be added * @param docid * The lucene docid of the document to be populated * @param fields * The list of docValues fields to be decorated */ public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid, Set<String> fields) throws IOException { final List<LeafReaderContext> leafContexts = searcher.getLeafContexts(); final int subIndex = ReaderUtil.subIndex(docid, leafContexts); final int localId = docid - leafContexts.get(subIndex).docBase; final LeafReader leafReader = leafContexts.get(subIndex).reader(); for (String fieldName : fields) { final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName); if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) { log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField); continue; } FieldInfo fi = searcher.getFieldInfos().fieldInfo(fieldName); if (fi == null) { continue; // Searcher doesn't have info about this field, hence ignore it. } final DocValuesType dvType = fi.getDocValuesType(); switch (dvType) { case NUMERIC: final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName); if (ndv == null) { continue; } Long val; if (ndv.advanceExact(localId)) { val = ndv.longValue(); } else { continue; } Object newVal = val; if (schemaField.getType().isPointField()) { // TODO: Maybe merge PointField with TrieFields here NumberType type = schemaField.getType().getNumberType(); switch (type) { case INTEGER: newVal = val.intValue(); break; case LONG: newVal = val.longValue(); break; case FLOAT: newVal = Float.intBitsToFloat(val.intValue()); break; case DOUBLE: newVal = Double.longBitsToDouble(val); break; case DATE: newVal = new Date(val); break; default: throw new AssertionError("Unexpected PointType: " + type); } } else { if (schemaField.getType() instanceof TrieIntField) { newVal = val.intValue(); } else if (schemaField.getType() instanceof TrieFloatField) { newVal = Float.intBitsToFloat(val.intValue()); } else if (schemaField.getType() instanceof TrieDoubleField) { newVal = Double.longBitsToDouble(val); } else if (schemaField.getType() instanceof TrieDateField) { newVal = new Date(val); } else if (schemaField.getType() instanceof EnumField) { newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue()); } } doc.addField(fieldName, newVal); break; case BINARY: BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName); if (bdv == null) { continue; } BytesRef value; if (bdv.advanceExact(localId)) { value = BytesRef.deepCopyOf(bdv.binaryValue()); } else { continue; } doc.addField(fieldName, value); break; case SORTED: SortedDocValues sdv = leafReader.getSortedDocValues(fieldName); if (sdv == null) { continue; } if (sdv.advanceExact(localId)) { final BytesRef bRef = sdv.binaryValue(); // Special handling for Boolean fields since they're stored as 'T' and 'F'. if (schemaField.getType() instanceof BoolField) { doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef)); } else { doc.addField(fieldName, bRef.utf8ToString()); } } break; case SORTED_NUMERIC: final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName); NumberType type = schemaField.getType().getNumberType(); if (numericDv != null) { if (numericDv.advance(localId) == localId) { final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount()); for (int i = 0; i < numericDv.docValueCount(); i++) { long number = numericDv.nextValue(); switch (type) { case INTEGER: outValues.add((int) number); break; case LONG: outValues.add(number); break; case FLOAT: outValues.add(NumericUtils.sortableIntToFloat((int) number)); break; case DOUBLE: outValues.add(NumericUtils.sortableLongToDouble(number)); break; case DATE: outValues.add(new Date(number)); break; default: throw new AssertionError("Unexpected PointType: " + type); } } assert outValues.size() > 0; doc.addField(fieldName, outValues); } } case SORTED_SET: final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName); if (values != null && values.getValueCount() > 0) { if (values.advance(localId) == localId) { final List<Object> outValues = new LinkedList<>(); for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values .nextOrd()) { value = values.lookupOrd(ord); outValues.add(schemaField.getType().toObject(schemaField, value)); } assert outValues.size() > 0; doc.addField(fieldName, outValues); } } case NONE: break; } } }
From source file:org.apache.solr.uninverting.FieldCacheImpl.java
License:Apache License
public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException { SortedDocValues valuesIn = reader.getSortedDocValues(field); if (valuesIn != null) { // Not cached here by FieldCacheImpl (cached instead // per-thread by SegmentReader): return valuesIn; } else {/* w ww . j a v a2 s . c o m*/ final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return DocValues.emptySorted(); } else if (info.getDocValuesType() != DocValuesType.NONE) { // we don't try to build a sorted instance from numeric/binary doc // values because dedup can be very costly throw new IllegalStateException( "Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (info.getIndexOptions() == IndexOptions.NONE) { return DocValues.emptySorted(); } SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio)); return impl.iterator(); } }
From source file:org.apache.solr.uninverting.FieldCacheImpl.java
License:Apache License
public BinaryDocValues getTerms(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException { BinaryDocValues valuesIn = reader.getBinaryDocValues(field); if (valuesIn == null) { valuesIn = reader.getSortedDocValues(field); }/*from w w w . j a v a 2s . c om*/ if (valuesIn != null) { // Not cached here by FieldCacheImpl (cached instead // per-thread by SegmentReader): return valuesIn; } final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return DocValues.emptyBinary(); } else if (info.getDocValuesType() != DocValuesType.NONE) { throw new IllegalStateException( "Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (info.getIndexOptions() == IndexOptions.NONE) { return DocValues.emptyBinary(); } BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio)); return impl.iterator(); }
From source file:org.apache.solr.uninverting.FieldCacheImpl.java
License:Apache License
public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException { // not a general purpose filtering mechanism... assert prefix == null || prefix == INT32_TERM_PREFIX || prefix == INT64_TERM_PREFIX; SortedSetDocValues dv = reader.getSortedSetDocValues(field); if (dv != null) { return dv; }/*from ww w . j a v a 2 s . com*/ SortedDocValues sdv = reader.getSortedDocValues(field); if (sdv != null) { return DocValues.singleton(sdv); } final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return DocValues.emptySortedSet(); } else if (info.getDocValuesType() != DocValuesType.NONE) { throw new IllegalStateException( "Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (info.getIndexOptions() == IndexOptions.NONE) { return DocValues.emptySortedSet(); } // ok we need to uninvert. check if we can optimize a bit. Terms terms = reader.terms(field); if (terms == null) { return DocValues.emptySortedSet(); } else { // if #postings = #docswithfield we know that the field is "single valued enough". // it's possible the same term might appear twice in the same document, but SORTED_SET discards frequency. // it's still ok with filtering (which we limit to numerics), it just means precisionStep = Inf long numPostings = terms.getSumDocFreq(); if (numPostings != -1 && numPostings == terms.getDocCount()) { return DocValues.singleton(getTermsIndex(reader, field)); } } DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, prefix)); return dto.iterator(reader); }