List of usage examples for org.apache.lucene.index LeafReader getSortedSetDocValues
public abstract SortedSetDocValues getSortedSetDocValues(String field) throws IOException;
From source file:com.qwazr.search.field.ValueConverter.java
License:Apache License
final static ValueConverter newConverter(FieldDefinition fieldDef, LeafReader dvReader, FieldInfo fieldInfo) throws IOException { if (fieldInfo == null) return null; DocValuesType type = fieldInfo.getDocValuesType(); if (type == null) return null; switch (type) { case BINARY://from w w w .j av a2s . com BinaryDocValues binaryDocValue = dvReader.getBinaryDocValues(fieldInfo.name); if (binaryDocValue == null) return null; return new BinaryDVConverter(binaryDocValue); case SORTED: SortedDocValues sortedDocValues = dvReader.getSortedDocValues(fieldInfo.name); if (sortedDocValues == null) return null; return new SortedDVConverter(sortedDocValues); case NONE: break; case NUMERIC: NumericDocValues numericDocValues = dvReader.getNumericDocValues(fieldInfo.name); if (numericDocValues == null) return null; return newNumericConverter(fieldDef, numericDocValues); case SORTED_NUMERIC: SortedNumericDocValues sortedNumericDocValues = dvReader.getSortedNumericDocValues(fieldInfo.name); if (sortedNumericDocValues == null) return null; return newSortedNumericConverter(fieldDef, sortedNumericDocValues); case SORTED_SET: SortedSetDocValues sortedSetDocValues = dvReader.getSortedSetDocValues(fieldInfo.name); if (sortedSetDocValues == null) return null; return null; default: throw new IOException("Unsupported doc value type: " + type + " for field: " + fieldInfo.name); } return null; }
From source file:com.qwazr.search.index.IndexUtils.java
License:Apache License
final static SortedSetDocValuesReaderState getNewFacetsState(IndexReader indexReader) throws IOException { LeafReader topReader = SlowCompositeReaderWrapper.wrap(indexReader); if (topReader == null) return null; SortedSetDocValues dv = topReader.getSortedSetDocValues(FieldDefinition.FACET_FIELD); if (dv == null) return null; return new DefaultSortedSetDocValuesReaderState(indexReader, FieldDefinition.FACET_FIELD); }
From source file:org.apache.solr.index.SlowCompositeReaderWrapper.java
License:Apache License
@Override public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { ensureOpen();// w ww .jav a 2 s. c o m OrdinalMap map = null; synchronized (cachedOrdMaps) { map = cachedOrdMaps.get(field); if (map == null) { // uncached, or not a multi dv SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field); if (dv instanceof MultiDocValues.MultiSortedSetDocValues) { map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping; if (map.owner == getCoreCacheKey() && merging == false) { cachedOrdMaps.put(field, map); } } return dv; } } assert map != null; int size = in.leaves().size(); final SortedSetDocValues[] values = new SortedSetDocValues[size]; final int[] starts = new int[size + 1]; long cost = 0; for (int i = 0; i < size; i++) { LeafReaderContext context = in.leaves().get(i); final LeafReader reader = context.reader(); final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) { return null; } SortedSetDocValues v = reader.getSortedSetDocValues(field); if (v == null) { v = DocValues.emptySortedSet(); } values[i] = v; starts[i] = context.docBase; cost += v.cost(); } starts[size] = maxDoc(); return new MultiDocValues.MultiSortedSetDocValues(values, starts, map, cost); }
From source file:org.apache.solr.schema.TestSortableTextField.java
License:Apache License
public void testWhiteboxIndexReader() throws Exception { assertU(adoc("id", "1", "whitespace_stxt", "how now brown cow ?", "whitespace_m_stxt", "xxx", "whitespace_m_stxt", "yyy", "whitespace_f_stxt", "aaa bbb", "keyword_stxt", "Blarggghhh!")); assertU(commit());/*from ww w. j av a 2 s . c o m*/ final RefCounted<SolrIndexSearcher> searcher = h.getCore().getNewestSearcher(false); try { final LeafReader r = searcher.get().getSlowAtomicReader(); // common cases... for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt", "whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) { assertNotNull("FieldInfos: " + field, r.getFieldInfos().fieldInfo(field)); assertEquals("DocValuesType: " + field, DocValuesType.SORTED, r.getFieldInfos().fieldInfo(field).getDocValuesType()); assertNotNull("DocValues: " + field, r.getSortedDocValues(field)); assertNotNull("Terms: " + field, r.terms(field)); } // special cases... assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nodv_stxt")); assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo("whitespace_nodv_stxt").getDocValuesType()); assertNull(r.getSortedDocValues("whitespace_nodv_stxt")); assertNotNull(r.terms("whitespace_nodv_stxt")); // assertNotNull(r.getFieldInfos().fieldInfo("whitespace_nois_stxt")); assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo("whitespace_nois_stxt").getDocValuesType()); assertNotNull(r.getSortedDocValues("whitespace_nois_stxt")); assertNull(r.terms("whitespace_nois_stxt")); // assertNotNull(r.getFieldInfos().fieldInfo("whitespace_m_stxt")); assertEquals(DocValuesType.SORTED_SET, r.getFieldInfos().fieldInfo("whitespace_m_stxt").getDocValuesType()); assertNotNull(r.getSortedSetDocValues("whitespace_m_stxt")); assertNotNull(r.terms("whitespace_m_stxt")); } finally { if (null != searcher) { searcher.decref(); } } }
From source file:org.apache.solr.search.SolrDocumentFetcher.java
License:Apache License
/** * This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument * * @param doc//from ww w .j ava 2 s .com * A SolrDocument or SolrInputDocument instance where docValues will be added * @param docid * The lucene docid of the document to be populated * @param fields * The list of docValues fields to be decorated */ public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid, Set<String> fields) throws IOException { final List<LeafReaderContext> leafContexts = searcher.getLeafContexts(); final int subIndex = ReaderUtil.subIndex(docid, leafContexts); final int localId = docid - leafContexts.get(subIndex).docBase; final LeafReader leafReader = leafContexts.get(subIndex).reader(); for (String fieldName : fields) { final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName); if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) { log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField); continue; } FieldInfo fi = searcher.getFieldInfos().fieldInfo(fieldName); if (fi == null) { continue; // Searcher doesn't have info about this field, hence ignore it. } final DocValuesType dvType = fi.getDocValuesType(); switch (dvType) { case NUMERIC: final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName); if (ndv == null) { continue; } Long val; if (ndv.advanceExact(localId)) { val = ndv.longValue(); } else { continue; } Object newVal = val; if (schemaField.getType().isPointField()) { // TODO: Maybe merge PointField with TrieFields here NumberType type = schemaField.getType().getNumberType(); switch (type) { case INTEGER: newVal = val.intValue(); break; case LONG: newVal = val.longValue(); break; case FLOAT: newVal = Float.intBitsToFloat(val.intValue()); break; case DOUBLE: newVal = Double.longBitsToDouble(val); break; case DATE: newVal = new Date(val); break; default: throw new AssertionError("Unexpected PointType: " + type); } } else { if (schemaField.getType() instanceof TrieIntField) { newVal = val.intValue(); } else if (schemaField.getType() instanceof TrieFloatField) { newVal = Float.intBitsToFloat(val.intValue()); } else if (schemaField.getType() instanceof TrieDoubleField) { newVal = Double.longBitsToDouble(val); } else if (schemaField.getType() instanceof TrieDateField) { newVal = new Date(val); } else if (schemaField.getType() instanceof EnumField) { newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue()); } } doc.addField(fieldName, newVal); break; case BINARY: BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName); if (bdv == null) { continue; } BytesRef value; if (bdv.advanceExact(localId)) { value = BytesRef.deepCopyOf(bdv.binaryValue()); } else { continue; } doc.addField(fieldName, value); break; case SORTED: SortedDocValues sdv = leafReader.getSortedDocValues(fieldName); if (sdv == null) { continue; } if (sdv.advanceExact(localId)) { final BytesRef bRef = sdv.binaryValue(); // Special handling for Boolean fields since they're stored as 'T' and 'F'. if (schemaField.getType() instanceof BoolField) { doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef)); } else { doc.addField(fieldName, bRef.utf8ToString()); } } break; case SORTED_NUMERIC: final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName); NumberType type = schemaField.getType().getNumberType(); if (numericDv != null) { if (numericDv.advance(localId) == localId) { final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount()); for (int i = 0; i < numericDv.docValueCount(); i++) { long number = numericDv.nextValue(); switch (type) { case INTEGER: outValues.add((int) number); break; case LONG: outValues.add(number); break; case FLOAT: outValues.add(NumericUtils.sortableIntToFloat((int) number)); break; case DOUBLE: outValues.add(NumericUtils.sortableLongToDouble(number)); break; case DATE: outValues.add(new Date(number)); break; default: throw new AssertionError("Unexpected PointType: " + type); } } assert outValues.size() > 0; doc.addField(fieldName, outValues); } } case SORTED_SET: final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName); if (values != null && values.getValueCount() > 0) { if (values.advance(localId) == localId) { final List<Object> outValues = new LinkedList<>(); for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values .nextOrd()) { value = values.lookupOrd(ord); outValues.add(schemaField.getType().toObject(schemaField, value)); } assert outValues.size() > 0; doc.addField(fieldName, outValues); } } case NONE: break; } } }
From source file:org.apache.solr.uninverting.FieldCacheImpl.java
License:Apache License
public SortedSetDocValues getDocTermOrds(LeafReader reader, String field, BytesRef prefix) throws IOException { // not a general purpose filtering mechanism... assert prefix == null || prefix == INT32_TERM_PREFIX || prefix == INT64_TERM_PREFIX; SortedSetDocValues dv = reader.getSortedSetDocValues(field); if (dv != null) { return dv; }//from w w w .j a v a2 s . c o m SortedDocValues sdv = reader.getSortedDocValues(field); if (sdv != null) { return DocValues.singleton(sdv); } final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (info == null) { return DocValues.emptySortedSet(); } else if (info.getDocValuesType() != DocValuesType.NONE) { throw new IllegalStateException( "Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); } else if (info.getIndexOptions() == IndexOptions.NONE) { return DocValues.emptySortedSet(); } // ok we need to uninvert. check if we can optimize a bit. Terms terms = reader.terms(field); if (terms == null) { return DocValues.emptySortedSet(); } else { // if #postings = #docswithfield we know that the field is "single valued enough". // it's possible the same term might appear twice in the same document, but SORTED_SET discards frequency. // it's still ok with filtering (which we limit to numerics), it just means precisionStep = Inf long numPostings = terms.getSumDocFreq(); if (numPostings != -1 && numPostings == terms.getDocCount()) { return DocValues.singleton(getTermsIndex(reader, field)); } } DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, prefix)); return dto.iterator(reader); }
From source file:org.apache.solr.uninverting.TestFieldCacheVsDocValues.java
License:Apache License
private void doTestSortedSetVsUninvertedField(int minLength, int maxLength) throws Exception { Directory dir = newDirectory();/*from w w w. j ava2 s. com*/ IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf); // index some docs int numDocs = atLeast(300); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); Field idField = new StringField("id", Integer.toString(i), Field.Store.NO); doc.add(idField); final int length = TestUtil.nextInt(random(), minLength, maxLength); int numValues = random().nextInt(17); // create a random list of strings List<String> values = new ArrayList<>(); for (int v = 0; v < numValues; v++) { values.add(TestUtil.randomSimpleString(random(), minLength, length)); } // add in any order to the indexed field ArrayList<String> unordered = new ArrayList<>(values); Collections.shuffle(unordered, random()); for (String v : values) { doc.add(newStringField("indexed", v, Field.Store.NO)); } // add in any order to the dv field ArrayList<String> unordered2 = new ArrayList<>(values); Collections.shuffle(unordered2, random()); for (String v : unordered2) { doc.add(new SortedSetDocValuesField("dv", new BytesRef(v))); } writer.addDocument(doc); if (random().nextInt(31) == 0) { writer.commit(); } } // delete some docs int numDeletions = random().nextInt(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = random().nextInt(numDocs); writer.deleteDocuments(new Term("id", Integer.toString(id))); } // compare per-segment DirectoryReader ir = writer.getReader(); for (LeafReaderContext context : ir.leaves()) { LeafReader r = context.reader(); SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(r, "indexed", null); SortedSetDocValues actual = r.getSortedSetDocValues("dv"); assertEquals(r.maxDoc(), expected, actual); } ir.close(); writer.forceMerge(1); // now compare again after the merge ir = writer.getReader(); LeafReader ar = getOnlyLeafReader(ir); SortedSetDocValues expected = FieldCache.DEFAULT.getDocTermOrds(ar, "indexed", null); SortedSetDocValues actual = ar.getSortedSetDocValues("dv"); assertEquals(ir.maxDoc(), expected, actual); ir.close(); writer.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestUninvertingReader.java
License:Apache License
public void testSortedSetInteger() throws IOException { Directory dir = newDirectory();/*from w w w . j a v a 2s .com*/ IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); Document doc = new Document(); doc.add(new LegacyIntField("foo", 5, Field.Store.NO)); iw.addDocument(doc); doc = new Document(); doc.add(new LegacyIntField("foo", 5, Field.Store.NO)); doc.add(new LegacyIntField("foo", -3, Field.Store.NO)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), Collections.singletonMap("foo", Type.SORTED_SET_INTEGER)); LeafReader ar = ir.leaves().get(0).reader(); SortedSetDocValues v = ar.getSortedSetDocValues("foo"); assertEquals(2, v.getValueCount()); assertEquals(0, v.nextDoc()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); assertEquals(1, v.nextDoc()); assertEquals(0, v.nextOrd()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); BytesRef value = v.lookupOrd(0); assertEquals(-3, LegacyNumericUtils.prefixCodedToInt(value)); value = v.lookupOrd(1); assertEquals(5, LegacyNumericUtils.prefixCodedToInt(value)); TestUtil.checkReader(ir); ir.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestUninvertingReader.java
License:Apache License
public void testSortedSetFloat() throws IOException { Directory dir = newDirectory();//from w w w .ja v a 2 s . c o m IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); Document doc = new Document(); doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO)); iw.addDocument(doc); doc = new Document(); doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(5f), Field.Store.NO)); doc.add(new LegacyIntField("foo", Float.floatToRawIntBits(-3f), Field.Store.NO)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), Collections.singletonMap("foo", Type.SORTED_SET_FLOAT)); LeafReader ar = ir.leaves().get(0).reader(); SortedSetDocValues v = ar.getSortedSetDocValues("foo"); assertEquals(2, v.getValueCount()); assertEquals(0, v.nextDoc()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); assertEquals(1, v.nextDoc()); assertEquals(0, v.nextOrd()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); BytesRef value = v.lookupOrd(0); assertEquals(Float.floatToRawIntBits(-3f), LegacyNumericUtils.prefixCodedToInt(value)); value = v.lookupOrd(1); assertEquals(Float.floatToRawIntBits(5f), LegacyNumericUtils.prefixCodedToInt(value)); TestUtil.checkReader(ir); ir.close(); dir.close(); }
From source file:org.apache.solr.uninverting.TestUninvertingReader.java
License:Apache License
public void testSortedSetLong() throws IOException { Directory dir = newDirectory();// ww w. j av a2 s. c om IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); Document doc = new Document(); doc.add(new LegacyLongField("foo", 5, Field.Store.NO)); iw.addDocument(doc); doc = new Document(); doc.add(new LegacyLongField("foo", 5, Field.Store.NO)); doc.add(new LegacyLongField("foo", -3, Field.Store.NO)); iw.addDocument(doc); iw.forceMerge(1); iw.close(); DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), Collections.singletonMap("foo", Type.SORTED_SET_LONG)); LeafReader ar = ir.leaves().get(0).reader(); SortedSetDocValues v = ar.getSortedSetDocValues("foo"); assertEquals(2, v.getValueCount()); assertEquals(0, v.nextDoc()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); assertEquals(1, v.nextDoc()); assertEquals(0, v.nextOrd()); assertEquals(1, v.nextOrd()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); BytesRef value = v.lookupOrd(0); assertEquals(-3, LegacyNumericUtils.prefixCodedToLong(value)); value = v.lookupOrd(1); assertEquals(5, LegacyNumericUtils.prefixCodedToLong(value)); TestUtil.checkReader(ir); ir.close(); dir.close(); }