List of usage examples for org.apache.lucene.index FieldInfo getDocValuesType
public DocValuesType getDocValuesType()
From source file:com.github.flaxsearch.api.FieldData.java
License:Apache License
public FieldData(FieldInfo fieldInfo) { this.name = fieldInfo.name; this.indexOptions = fieldInfo.getIndexOptions(); this.hasNorms = fieldInfo.hasNorms(); this.docValuesType = fieldInfo.getDocValuesType(); this.pointDimensionCount = fieldInfo.getPointDimensionCount(); this.hasPayloads = fieldInfo.hasPayloads(); }
From source file:com.qwazr.search.field.ValueConverter.java
License:Apache License
final static ValueConverter newConverter(FieldDefinition fieldDef, LeafReader dvReader, FieldInfo fieldInfo) throws IOException { if (fieldInfo == null) return null; DocValuesType type = fieldInfo.getDocValuesType(); if (type == null) return null; switch (type) { case BINARY://w ww . j a v a 2 s . co m BinaryDocValues binaryDocValue = dvReader.getBinaryDocValues(fieldInfo.name); if (binaryDocValue == null) return null; return new BinaryDVConverter(binaryDocValue); case SORTED: SortedDocValues sortedDocValues = dvReader.getSortedDocValues(fieldInfo.name); if (sortedDocValues == null) return null; return new SortedDVConverter(sortedDocValues); case NONE: break; case NUMERIC: NumericDocValues numericDocValues = dvReader.getNumericDocValues(fieldInfo.name); if (numericDocValues == null) return null; return newNumericConverter(fieldDef, numericDocValues); case SORTED_NUMERIC: SortedNumericDocValues sortedNumericDocValues = dvReader.getSortedNumericDocValues(fieldInfo.name); if (sortedNumericDocValues == null) return null; return newSortedNumericConverter(fieldDef, sortedNumericDocValues); case SORTED_SET: SortedSetDocValues sortedSetDocValues = dvReader.getSortedSetDocValues(fieldInfo.name); if (sortedSetDocValues == null) return null; return null; default: throw new IOException("Unsupported doc value type: " + type + " for field: " + fieldInfo.name); } return null; }
From source file:com.vmware.xenon.services.common.Lucene60FieldInfosFormatWithCache.java
License:Open Source License
@Override public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException { final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION); try (IndexOutput output = directory.createOutput(fileName, context)) { CodecUtil.writeIndexHeader(output, Lucene60FieldInfosFormatWithCache.CODEC_NAME, Lucene60FieldInfosFormatWithCache.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix); output.writeVInt(infos.size());//from w ww . j av a 2 s. com for (FieldInfo fi : infos) { fi.checkConsistency(); output.writeString(fi.name); output.writeVInt(fi.number); byte bits = 0x0; if (fi.hasVectors()) { bits |= STORE_TERMVECTOR; } if (fi.omitsNorms()) { bits |= OMIT_NORMS; } if (fi.hasPayloads()) { bits |= STORE_PAYLOADS; } output.writeByte(bits); output.writeByte(indexOptionsByte(fi.getIndexOptions())); // pack the DV type and hasNorms in one byte output.writeByte(docValuesByte(fi.getDocValuesType())); output.writeLong(fi.getDocValuesGen()); output.writeMapOfStrings(fi.attributes()); int pointDimensionCount = fi.getPointDimensionCount(); output.writeVInt(pointDimensionCount); if (pointDimensionCount != 0) { output.writeVInt(fi.getPointNumBytes()); } } CodecUtil.writeFooter(output); } }
From source file:net.semanticmetadata.lire.solr.LireValueSource.java
License:Open Source License
@Override /**/*from www . j av a 2s . c o m*/ * Check also {@link org.apache.lucene.queries.function.valuesource.BytesRefFieldSource} */ public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException { final FieldInfo fieldInfo = readerContext.reader().getFieldInfos().fieldInfo(field); if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.BINARY) { final BinaryDocValues binaryValues = DocValues.getBinary(readerContext.reader(), field); final Bits docsWithField = DocValues.getDocsWithField(readerContext.reader(), field); return new FunctionValues() { @Override public boolean exists(int doc) { return docsWithField.get(doc); } @Override public boolean bytesVal(int doc, BytesRefBuilder target) { target.copyBytes(binaryValues.get(doc)); return target.length() > 0; } @Override public float floatVal(int doc) { return (float) doubleVal(doc); } public String strVal(int doc) { final BytesRefBuilder bytes = new BytesRefBuilder(); return bytesVal(doc, bytes) ? bytes.get().utf8ToString() : null; } /** * This method basically decides which type is delivered on request. It can be a String, * in this case it is the double form the distance function. * @param doc * @return the distance as Double, mapping to {@link FunctionValues#doubleVal(int)} */ @Override public Object objectVal(int doc) { return doubleVal(doc); } @Override public String toString(int doc) { return description() + '=' + strVal(doc); } @Override /** * This method has to be implemented to support sorting! */ public double doubleVal(int doc) { if (binaryValues.get(doc).length > 0) { tmpFeature.setByteArrayRepresentation(binaryValues.get(doc).bytes, binaryValues.get(doc).offset, binaryValues.get(doc).length); return tmpFeature.getDistance(feature); } else return maxDistance; // make sure max distance is returned for those without value } }; } else { // there is no DocVal to sort by. Therefore we need to set the function value to -1 and everything without DocVal gets ranked first? return new DocTermsIndexDocValues(this, readerContext, field) { @Override protected String toTerm(String readableValue) { return Double.toString(maxDistance); } @Override public Object objectVal(int doc) { return maxDistance; } @Override public String toString(int doc) { return description() + '=' + strVal(doc); } public double doubleVal(int doc) { return maxDistance; } }; } }
From source file:org.apache.solr.index.SlowCompositeReaderWrapper.java
License:Apache License
@Override public SortedDocValues getSortedDocValues(String field) throws IOException { ensureOpen();//ww w . jav a 2 s. c om OrdinalMap map = null; synchronized (cachedOrdMaps) { map = cachedOrdMaps.get(field); if (map == null) { // uncached, or not a multi dv SortedDocValues dv = MultiDocValues.getSortedValues(in, field); if (dv instanceof MultiSortedDocValues) { map = ((MultiSortedDocValues) dv).mapping; if (map.owner == getCoreCacheKey() && merging == false) { cachedOrdMaps.put(field, map); } } return dv; } } int size = in.leaves().size(); final SortedDocValues[] values = new SortedDocValues[size]; final int[] starts = new int[size + 1]; long totalCost = 0; for (int i = 0; i < size; i++) { LeafReaderContext context = in.leaves().get(i); final LeafReader reader = context.reader(); final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED) { return null; } SortedDocValues v = reader.getSortedDocValues(field); if (v == null) { v = DocValues.emptySorted(); } totalCost += v.cost(); values[i] = v; starts[i] = context.docBase; } starts[size] = maxDoc(); return new MultiSortedDocValues(values, starts, map, totalCost); }
From source file:org.apache.solr.index.SlowCompositeReaderWrapper.java
License:Apache License
@Override public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { ensureOpen();/* w ww.jav a2s.c o m*/ OrdinalMap map = null; synchronized (cachedOrdMaps) { map = cachedOrdMaps.get(field); if (map == null) { // uncached, or not a multi dv SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field); if (dv instanceof MultiDocValues.MultiSortedSetDocValues) { map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping; if (map.owner == getCoreCacheKey() && merging == false) { cachedOrdMaps.put(field, map); } } return dv; } } assert map != null; int size = in.leaves().size(); final SortedSetDocValues[] values = new SortedSetDocValues[size]; final int[] starts = new int[size + 1]; long cost = 0; for (int i = 0; i < size; i++) { LeafReaderContext context = in.leaves().get(i); final LeafReader reader = context.reader(); final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) { return null; } SortedSetDocValues v = reader.getSortedSetDocValues(field); if (v == null) { v = DocValues.emptySortedSet(); } values[i] = v; starts[i] = context.docBase; cost += v.cost(); } starts[size] = maxDoc(); return new MultiDocValues.MultiSortedSetDocValues(values, starts, map, cost); }
From source file:org.apache.solr.index.UninvertDocValuesMergePolicyFactory.java
License:Apache License
private UninvertingReader.Type getUninversionType(FieldInfo fi) { SchemaField sf = schema.getFieldOrNull(fi.name); if (null != sf && sf.hasDocValues() && fi.getDocValuesType() == DocValuesType.NONE && fi.getIndexOptions() != IndexOptions.NONE) { return sf.getType().getUninversionType(sf); } else {/*from w w w. j av a 2 s .c o m*/ return null; } }
From source file:org.apache.solr.search.facet.FacetFieldProcessorByHashDV.java
License:Apache License
FacetFieldProcessorByHashDV(FacetContext fcontext, FacetField freq, SchemaField sf) {
super(fcontext, freq, sf);
if (freq.mincount == 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
getClass() + " doesn't support mincount=0");
}/* www. j a va 2s . c o m*/
if (freq.prefix != null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, getClass() + " doesn't support prefix"); // yet, but it could
}
FieldInfo fieldInfo = fcontext.searcher.getSlowAtomicReader().getFieldInfos().fieldInfo(sf.getName());
if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.NUMERIC
&& fieldInfo.getDocValuesType() != DocValuesType.SORTED) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
getClass() + " only support single valued number/string with docValues");
}
}
From source file:org.apache.solr.search.SolrDocumentFetcher.java
License:Apache License
/** * This will fetch and add the docValues fields to a given SolrDocument/SolrInputDocument * * @param doc//from w w w . j a v a 2s. c o m * A SolrDocument or SolrInputDocument instance where docValues will be added * @param docid * The lucene docid of the document to be populated * @param fields * The list of docValues fields to be decorated */ public void decorateDocValueFields(@SuppressWarnings("rawtypes") SolrDocumentBase doc, int docid, Set<String> fields) throws IOException { final List<LeafReaderContext> leafContexts = searcher.getLeafContexts(); final int subIndex = ReaderUtil.subIndex(docid, leafContexts); final int localId = docid - leafContexts.get(subIndex).docBase; final LeafReader leafReader = leafContexts.get(subIndex).reader(); for (String fieldName : fields) { final SchemaField schemaField = searcher.getSchema().getFieldOrNull(fieldName); if (schemaField == null || !schemaField.hasDocValues() || doc.containsKey(fieldName)) { log.warn("Couldn't decorate docValues for field: [{}], schemaField: [{}]", fieldName, schemaField); continue; } FieldInfo fi = searcher.getFieldInfos().fieldInfo(fieldName); if (fi == null) { continue; // Searcher doesn't have info about this field, hence ignore it. } final DocValuesType dvType = fi.getDocValuesType(); switch (dvType) { case NUMERIC: final NumericDocValues ndv = leafReader.getNumericDocValues(fieldName); if (ndv == null) { continue; } Long val; if (ndv.advanceExact(localId)) { val = ndv.longValue(); } else { continue; } Object newVal = val; if (schemaField.getType().isPointField()) { // TODO: Maybe merge PointField with TrieFields here NumberType type = schemaField.getType().getNumberType(); switch (type) { case INTEGER: newVal = val.intValue(); break; case LONG: newVal = val.longValue(); break; case FLOAT: newVal = Float.intBitsToFloat(val.intValue()); break; case DOUBLE: newVal = Double.longBitsToDouble(val); break; case DATE: newVal = new Date(val); break; default: throw new AssertionError("Unexpected PointType: " + type); } } else { if (schemaField.getType() instanceof TrieIntField) { newVal = val.intValue(); } else if (schemaField.getType() instanceof TrieFloatField) { newVal = Float.intBitsToFloat(val.intValue()); } else if (schemaField.getType() instanceof TrieDoubleField) { newVal = Double.longBitsToDouble(val); } else if (schemaField.getType() instanceof TrieDateField) { newVal = new Date(val); } else if (schemaField.getType() instanceof EnumField) { newVal = ((EnumField) schemaField.getType()).intValueToStringValue(val.intValue()); } } doc.addField(fieldName, newVal); break; case BINARY: BinaryDocValues bdv = leafReader.getBinaryDocValues(fieldName); if (bdv == null) { continue; } BytesRef value; if (bdv.advanceExact(localId)) { value = BytesRef.deepCopyOf(bdv.binaryValue()); } else { continue; } doc.addField(fieldName, value); break; case SORTED: SortedDocValues sdv = leafReader.getSortedDocValues(fieldName); if (sdv == null) { continue; } if (sdv.advanceExact(localId)) { final BytesRef bRef = sdv.binaryValue(); // Special handling for Boolean fields since they're stored as 'T' and 'F'. if (schemaField.getType() instanceof BoolField) { doc.addField(fieldName, schemaField.getType().toObject(schemaField, bRef)); } else { doc.addField(fieldName, bRef.utf8ToString()); } } break; case SORTED_NUMERIC: final SortedNumericDocValues numericDv = leafReader.getSortedNumericDocValues(fieldName); NumberType type = schemaField.getType().getNumberType(); if (numericDv != null) { if (numericDv.advance(localId) == localId) { final List<Object> outValues = new ArrayList<Object>(numericDv.docValueCount()); for (int i = 0; i < numericDv.docValueCount(); i++) { long number = numericDv.nextValue(); switch (type) { case INTEGER: outValues.add((int) number); break; case LONG: outValues.add(number); break; case FLOAT: outValues.add(NumericUtils.sortableIntToFloat((int) number)); break; case DOUBLE: outValues.add(NumericUtils.sortableLongToDouble(number)); break; case DATE: outValues.add(new Date(number)); break; default: throw new AssertionError("Unexpected PointType: " + type); } } assert outValues.size() > 0; doc.addField(fieldName, outValues); } } case SORTED_SET: final SortedSetDocValues values = leafReader.getSortedSetDocValues(fieldName); if (values != null && values.getValueCount() > 0) { if (values.advance(localId) == localId) { final List<Object> outValues = new LinkedList<>(); for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values .nextOrd()) { value = values.lookupOrd(ord); outValues.add(schemaField.getType().toObject(schemaField, value)); } assert outValues.size() > 0; doc.addField(fieldName, outValues); } } case NONE: break; } } }
From source file:org.apache.solr.uninverting.DocTermOrds.java
License:Apache License
/** Call this only once (if you subclass!) */ protected void uninvert(final LeafReader reader, Bits liveDocs, final BytesRef termPrefix) throws IOException { final FieldInfo info = reader.getFieldInfos().fieldInfo(field); if (checkForDocValues && info != null && info.getDocValuesType() != DocValuesType.NONE) { throw new IllegalStateException( "Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); }//ww w. j a v a 2 s. com //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix); final long startTime = System.nanoTime(); prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix); final int maxDoc = reader.maxDoc(); final int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number final int[] lastTerm = new int[maxDoc]; // last term we saw for this document final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts) final Terms terms = reader.terms(field); if (terms == null) { // No terms return; } final TermsEnum te = terms.iterator(); final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef(); //System.out.println("seekStart=" + seekStart.utf8ToString()); if (te.seekCeil(seekStart) == TermsEnum.SeekStatus.END) { // No terms match return; } // For our "term index wrapper" final List<BytesRef> indexedTerms = new ArrayList<>(); final PagedBytes indexedTermsBytes = new PagedBytes(15); // we need a minimum of 9 bytes, but round up to 12 since the space would // be wasted with most allocators anyway. byte[] tempArr = new byte[12]; // // enumerate all terms, and build an intermediate form of the un-inverted field. // // During this intermediate form, every document has a (potential) byte[] // and the int[maxDoc()] array either contains the termNumber list directly // or the *end* offset of the termNumber list in its byte array (for faster // appending and faster creation of the final form). // // idea... if things are too large while building, we could do a range of docs // at a time (but it would be a fair amount slower to build) // could also do ranges in parallel to take advantage of multiple CPUs // OPTIONAL: remap the largest df terms to the lowest 128 (single byte) // values. This requires going over the field first to find the most // frequent terms ahead of time. int termNum = 0; postingsEnum = null; // Loop begins with te positioned to first term (we call // seek above): for (;;) { final BytesRef t = te.term(); if (t == null || (termPrefix != null && !StringHelper.startsWith(t, termPrefix))) { break; } //System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum); visitTerm(te, termNum); if ((termNum & indexIntervalMask) == 0) { // Index this term sizeOfIndexedStrings += t.length; BytesRef indexedTerm = new BytesRef(); indexedTermsBytes.copy(t, indexedTerm); // TODO: really should 1) strip off useless suffix, // and 2) use FST not array/PagedBytes indexedTerms.add(indexedTerm); } final int df = te.docFreq(); if (df <= maxTermDocFreq) { postingsEnum = te.postings(postingsEnum, PostingsEnum.NONE); // dF, but takes deletions into account int actualDF = 0; for (;;) { int doc = postingsEnum.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } //System.out.println(" chunk=" + chunk + " docs"); actualDF++; termInstances++; //System.out.println(" docID=" + doc); // add TNUM_OFFSET to the term number to make room for special reserved values: // 0 (end term) and 1 (index into byte array follows) int delta = termNum - lastTerm[doc] + TNUM_OFFSET; lastTerm[doc] = termNum; int val = index[doc]; if ((val & 0xff) == 1) { // index into byte array (actually the end of // the doc-specific byte[] when building) int pos = val >>> 8; int ilen = vIntSize(delta); byte[] arr = bytes[doc]; int newend = pos + ilen; if (newend > arr.length) { // We avoid a doubling strategy to lower memory usage. // this faceting method isn't for docs with many terms. // In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary. // TODO: figure out what array lengths we can round up to w/o actually using more memory // (how much space does a byte[] take up? Is data preceded by a 32 bit length only? // It should be safe to round up to the nearest 32 bits in any case. int newLen = (newend + 3) & 0xfffffffc; // 4 byte alignment byte[] newarr = new byte[newLen]; System.arraycopy(arr, 0, newarr, 0, pos); arr = newarr; bytes[doc] = newarr; } pos = writeInt(delta, arr, pos); index[doc] = (pos << 8) | 1; // update pointer to end index in byte[] } else { // OK, this int has data in it... find the end (a zero starting byte - not // part of another number, hence not following a byte with the high bit set). int ipos; if (val == 0) { ipos = 0; } else if ((val & 0x0000ff80) == 0) { ipos = 1; } else if ((val & 0x00ff8000) == 0) { ipos = 2; } else if ((val & 0xff800000) == 0) { ipos = 3; } else { ipos = 4; } //System.out.println(" ipos=" + ipos); int endPos = writeInt(delta, tempArr, ipos); //System.out.println(" endpos=" + endPos); if (endPos <= 4) { //System.out.println(" fits!"); // value will fit in the integer... move bytes back for (int j = ipos; j < endPos; j++) { val |= (tempArr[j] & 0xff) << (j << 3); } index[doc] = val; } else { // value won't fit... move integer into byte[] for (int j = 0; j < ipos; j++) { tempArr[j] = (byte) val; val >>>= 8; } // point at the end index in the byte[] index[doc] = (endPos << 8) | 1; bytes[doc] = tempArr; tempArr = new byte[12]; } } } setActualDocFreq(termNum, actualDF); } termNum++; if (te.next() == null) { break; } } numTermsInField = termNum; long midPoint = System.nanoTime(); if (termInstances == 0) { // we didn't invert anything // lower memory consumption. tnums = null; } else { this.index = index; // // transform intermediate form into the final form, building a single byte[] // at a time, and releasing the intermediate byte[]s as we go to avoid // increasing the memory footprint. // for (int pass = 0; pass < 256; pass++) { byte[] target = tnums[pass]; int pos = 0; // end in target; if (target != null) { pos = target.length; } else { target = new byte[4096]; } // loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx // where pp is the pass (which array we are building), and xx is all values. // each pass shares the same byte[] for termNumber lists. for (int docbase = pass << 16; docbase < maxDoc; docbase += (1 << 24)) { int lim = Math.min(docbase + (1 << 16), maxDoc); for (int doc = docbase; doc < lim; doc++) { //System.out.println(" pass=" + pass + " process docID=" + doc); int val = index[doc]; if ((val & 0xff) == 1) { int len = val >>> 8; //System.out.println(" ptr pos=" + pos); index[doc] = (pos << 8) | 1; // change index to point to start of array if ((pos & 0xff000000) != 0) { // we only have 24 bits for the array index throw new IllegalStateException( "Too many values for UnInvertedField faceting on field " + field); } byte[] arr = bytes[doc]; /* for(byte b : arr) { //System.out.println(" b=" + Integer.toHexString((int) b)); } */ bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM if (target.length <= pos + len) { int newlen = target.length; /*** we don't have to worry about the array getting too large * since the "pos" param will overflow first (only 24 bits available) if ((newlen<<1) <= 0) { // overflow... newlen = Integer.MAX_VALUE; if (newlen <= pos + len) { throw new SolrException(400,"Too many terms to uninvert field!"); } } else { while (newlen <= pos + len) newlen<<=1; // doubling strategy } ****/ while (newlen <= pos + len) newlen <<= 1; // doubling strategy byte[] newtarget = new byte[newlen]; System.arraycopy(target, 0, newtarget, 0, pos); target = newtarget; } System.arraycopy(arr, 0, target, pos, len); pos += len + 1; // skip single byte at end and leave it 0 for terminator } } } // shrink array if (pos < target.length) { byte[] newtarget = new byte[pos]; System.arraycopy(target, 0, newtarget, 0, pos); target = newtarget; } tnums[pass] = target; if ((pass << 16) > maxDoc) break; } } indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]); long endTime = System.nanoTime(); total_time = (int) TimeUnit.MILLISECONDS.convert(endTime - startTime, TimeUnit.NANOSECONDS); phase1_time = (int) TimeUnit.MILLISECONDS.convert(midPoint - startTime, TimeUnit.NANOSECONDS); }