Example usage for org.apache.lucene.document BinaryDocValuesField BinaryDocValuesField

List of usage examples for org.apache.lucene.document BinaryDocValuesField BinaryDocValuesField

Introduction

In this page you can find the example usage for org.apache.lucene.document BinaryDocValuesField BinaryDocValuesField.

Prototype

public BinaryDocValuesField(String name, BytesRef value) 

Source Link

Document

Create a new binary DocValues field.

Usage

From source file:DVBench.java

License:Apache License

static void doBench(int bpv) throws Exception {
    File file = new File("/data/indices/dvbench");
    file.mkdirs();//from www . j a  v  a  2  s  .  c o  m
    Directory dir = FSDirectory.open(file);
    IndexWriterConfig config = new IndexWriterConfig(null);
    config.setOpenMode(OpenMode.CREATE);
    config.setMergeScheduler(new SerialMergeScheduler());
    config.setMergePolicy(new LogDocMergePolicy());
    config.setMaxBufferedDocs(25000);
    IndexWriter writer = new IndexWriter(dir, config);

    MyRandom r = new MyRandom();
    int numdocs = 400000;
    Document doc = new Document();
    Field dv = new NumericDocValuesField("dv", 0);
    Field inv = new LongField("inv", 0, Field.Store.NO);
    Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8));
    Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8));

    doc.add(dv);
    doc.add(inv);
    doc.add(boxed);
    doc.add(boxed2);
    for (int i = 0; i < numdocs; i++) {
        // defeat blockpackedwriter
        final long value;
        if (i % 8192 == 0) {
            value = bpv == 64 ? Long.MIN_VALUE : 0;
        } else if (i % 8192 == 1) {
            value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1;
        } else {
            value = r.nextLong(bpv);
        }
        dv.setLongValue(value);
        inv.setLongValue(value);
        box(value, boxed.binaryValue());
        box(value, boxed2.binaryValue());
        boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length
        writer.addDocument(doc);
    }

    writer.close();

    // run dv search tests
    String description = "dv (bpv=" + bpv + ")";
    DirectoryReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setQueryCache(null); // don't bench the cache

    int hash = 0;
    // warmup
    hash += search(description, searcher, "dv", 300, true);
    hash += search(description, searcher, "dv", 300, false);

    // Uninverting
    Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG);
    DirectoryReader uninv = UninvertingReader.wrap(reader, mapping);
    IndexSearcher searcher2 = new IndexSearcher(uninv);
    searcher2.setQueryCache(null); // don't bench the cache

    description = "fc (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher2, "inv", 300, true);
    hash += search(description, searcher2, "inv", 300, false);

    // Boxed inside binary
    DirectoryReader boxedReader = new BinaryAsVLongReader(reader);
    IndexSearcher searcher3 = new IndexSearcher(boxedReader);
    searcher3.setQueryCache(null); // don't bench the cache
    description = "boxed (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed", 300, true);
    hash += search(description, searcher3, "boxed", 300, false);

    description = "boxed fixed-length (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed2", 300, true);
    hash += search(description, searcher3, "boxed2", 300, false);

    if (hash == 3) {
        // wont happen
        System.out.println("hash=" + hash);
    }
    reader.close();
    dir.close();
}

From source file:cn.codepub.redis.directory.Main.java

License:Apache License

private static Document addDocument(int i) {
    Document document = new Document();
    document.add(new StringField("key1", "key" + i, Field.Store.YES));
    document.add(new IntField("key2", i * 100000, Field.Store.YES));
    document.add(new FloatField("key3", (float) i * 100000, Field.Store.YES));
    document.add(new LongField("key4", (long) i * 100000, Field.Store.YES));
    document.add(new DoubleField("key5", (double) i * 100000, Field.Store.YES));
    document.add(new TextField("key6", RandomStringUtils.randomAlphabetic(10), Field.Store.YES));
    document.add(new StringField("key7", RandomStringUtils.randomAlphabetic(5), Field.Store.YES));
    document.add(new BinaryDocValuesField("key8", new BytesRef(RandomStringUtils.randomAlphabetic(5))));
    document.add(new DoubleDocValuesField("key9", RandomUtils.nextDouble(0, 1000)));
    document.add(new FloatDocValuesField("key10", RandomUtils.nextFloat(0, 1000)));
    document.add(new LongField("key11", (long) i * 50000, Field.Store.YES));
    document.add(new IntField("key12", i * 50000, Field.Store.YES));
    document.add(new FloatField("key13", (float) i * 50000, Field.Store.YES));
    document.add(new DoubleField("key14", (double) i * 50000, Field.Store.YES));
    document.add(new StringField("key15", RandomStringUtils.randomAlphabetic(6), Field.Store.YES));
    return document;
}

From source file:com.b2international.index.lucene.DocValuesStringIndexField.java

License:Apache License

@Override
public BinaryDocValuesField toDocValuesField(String value) {
    return new BinaryDocValuesField(fieldName(), toBytesRef(value));
}

From source file:com.github.flaxsearch.testutil.Fixtures.java

License:Apache License

private static void populateIndex(Directory directory) {
    try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig())) {

        {/*from  ww  w.j av  a 2 s . com*/
            Document doc = new Document();
            doc.add(new TextField("field2", "here is some text", Field.Store.YES));
            doc.add(new StringField("field1", "value1", Field.Store.YES));
            doc.add(new IntPoint("point", 2, 4));
            doc.add(new IntPoint("point", 0, 1));
            doc.add(new IntPoint("point", 2, 1));
            doc.add(new IntPoint("point", 14, 4));
            writer.addDocument(doc);
            // more than one segment
            writer.commit();
        }

        {
            Document doc = new Document();
            doc.add(new StringField("field1", "value2", Field.Store.YES));
            doc.add(new BinaryDocValuesField("field1", new BytesRef("some bytes")));
            doc.add(new TextField("field3",
                    "this is some more text in a different field value1 value11 value12 value21",
                    Field.Store.YES));
            writer.addDocument(doc);
        }

    } catch (IOException e) {
        throw new RuntimeException("We're a RAMDirectory, this should never happen!");
    }
}

From source file:com.qwazr.search.bench.test.Merging.UpdateBinaryDocValuesTest.java

License:Apache License

@Override
protected BinaryDocValuesField getField(String id) {
    return new BinaryDocValuesField("dv", new BytesRef("dv" + id));
}

From source file:com.qwazr.search.field.BinaryDocValuesType.java

License:Apache License

@Override
final public void fillValue(final Object value, final FieldConsumer consumer) {
    consumer.accept(new BinaryDocValuesField(fieldName, new BytesRef(value.toString())));
}

From source file:com.tuplejump.stargate.Fields.java

License:Apache License

private static Field stringDocValuesField(String name, final AbstractType abstractType,
        final ByteBuffer byteBufferValue) {
    Object value = abstractType.compose(byteBufferValue);
    BytesRef bytesRef = new BytesRef(value.toString());
    final String stripedName = striped + name;
    return new BinaryDocValuesField(stripedName, bytesRef);
}

From source file:com.xiaomi.linden.core.search.LindenDocParser.java

License:Apache License

public static Document parse(LindenDocument lindenDoc, LindenConfig config) {
    if (!lindenDoc.isSetFields()) {
        return null;
    }// w  w w  .  j  a  va  2s .c  om
    Document doc = new Document();
    doc.add(new StringField(config.getSchema().getId(), lindenDoc.getId(), Field.Store.YES));
    for (LindenField field : lindenDoc.getFields()) {
        LindenFieldSchema schema = field.getSchema();
        Field.Store isStored = schema.isStored() ? Field.Store.YES : Field.Store.NO;
        String name = field.getSchema().getName();
        Object value;

        if (!schema.isIndexed() && schema.isStored()) {
            doc.add(new Field(name, field.getValue(), STORED_ONLY));
        }
        switch (schema.getType()) {
        case INTEGER:
            value = Integer.valueOf(field.getValue());
            if (schema.isIndexed()) {
                doc.add(new IntField(name, (Integer) value, isStored));
            }
            if (schema.isDocValues()) {
                long docValuesBits = ((Integer) value).longValue();
                doc.add(new NumericDocValuesField(name, docValuesBits));
            }
            break;
        case LONG:
            value = Long.valueOf(field.getValue());
            if (schema.isIndexed()) {
                doc.add(new LongField(name, (Long) value, isStored));
            }
            if (schema.isDocValues()) {
                doc.add(new NumericDocValuesField(name, (long) value));
            }
            break;
        case DOUBLE:
            value = Double.valueOf(field.getValue());
            if (schema.isIndexed()) {
                doc.add(new DoubleField(name, (Double) value, isStored));
            }
            if (schema.isDocValues()) {
                long docValuesBits = Double.doubleToLongBits((Double) value);
                doc.add(new NumericDocValuesField(name, docValuesBits));
            }
            break;
        case FLOAT:
            value = Float.valueOf(field.getValue());
            if (schema.isIndexed()) {
                doc.add(new FloatField(name, (Float) value, isStored));
            }
            if (schema.isDocValues()) {
                long docValuesBits = Float.floatToIntBits((Float) value);
                doc.add(new NumericDocValuesField(name, docValuesBits));
            }
            break;
        case STRING:
            if (Strings.isNullOrEmpty(field.getValue())) {
                break;
            }
            if (schema.isIndexed()) {
                FieldType type = new FieldType();
                type.setTokenized(schema.isTokenized());
                type.setIndexed(schema.isIndexed());
                type.setStored(schema.isStored());
                type.setOmitNorms(schema.isOmitNorms());
                if (schema.isSnippet()) {
                    type.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
                    // snippet will use the stored info.
                    type.setStored(true);
                }
                if (schema.isOmitFreqs()) {
                    type.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY);
                }
                doc.add(new Field(name, field.getValue(), type));
            }
            if (schema.isDocValues()) {
                BytesRef bytes = new BytesRef(field.getValue());
                doc.add(new BinaryDocValuesField(name, bytes));
            }
            break;
        case FACET:
            String[] facetPath = field.getValue().split("/");
            doc.add(new FacetField(name, facetPath));
            if (schema.isIndexed()) {
                doc.add(new StringField(name, field.getValue(), isStored));
            }
            if (schema.isDocValues()) {
                doc.add(new BinaryDocValuesField(name, new BytesRef(field.getValue())));
            }
            break;
        default:
        }
    }
    if (lindenDoc.isSetCoordinate()) {
        Coordinate coord = lindenDoc.getCoordinate();
        Shape shape = SpatialContext.GEO.makePoint(coord.getLongitude(), coord.getLatitude());
        for (IndexableField field : config.getSpatialStrategy().createIndexableFields(shape)) {
            doc.add(field);
        }
    }
    return doc;
}

From source file:lucene.security.index.SecureAtomicReaderTestBase.java

License:Apache License

private Iterable<IndexableField> getDoc(int i) {
    Document document = new Document();
    document.add(new StringField("test", "test", Store.YES));
    document.add(new StringField("info", "info", Store.YES));
    if (i == 3) {
        document.add(new StringField("shouldnotsee", "shouldnotsee", Store.YES));
    }/*ww  w . j a v  a 2s.c o m*/
    document.add(new NumericDocValuesField("number", i));
    document.add(new BinaryDocValuesField("bin", new BytesRef(Integer.toString(i).getBytes())));
    document.add(new SortedDocValuesField("sorted", new BytesRef(Integer.toString(i).getBytes())));
    document.add(new SortedSetDocValuesField("sortedset", new BytesRef(Integer.toString(i).getBytes())));
    document.add(
            new SortedSetDocValuesField("sortedset", new BytesRef(("0" + Integer.toString(i)).getBytes())));
    return document;
}

From source file:net.semanticmetadata.lire.builders.GlobalDocumentBuilder.java

License:Open Source License

/**
 * Extracts the global feature and returns the Lucene Fields for the selected image.
 *
 * @param image         is the selected image.
 * @param extractorItem is the extractor to be used to extract the features.
 * @return Lucene Fields.//  w w  w. j  av  a2 s  .  c o  m
 */
private Field[] getGlobalDescriptorFields(BufferedImage image, ExtractorItem extractorItem) {
    Field[] result;
    //        if (hashingEnabled) result = new Field[2];
    //        else result = new Field[1];
    Field hash = null;
    Field vector = null;

    GlobalFeature globalFeature = extractGlobalFeature(image,
            (GlobalFeature) extractorItem.getExtractorInstance());

    if (!useDocValues) {
        // TODO: Stored field is compressed and upon search decompression takes a lot of time (> 50% with a small index with 50k images). Find something else ...
        vector = new StoredField(extractorItems.get(extractorItem)[0],
                new BytesRef(globalFeature.getByteArrayRepresentation()));
    } else {
        // Alternative: The DocValues field. It's extremely fast to read, but it's all in RAM most likely.
        vector = new BinaryDocValuesField(extractorItems.get(extractorItem)[0],
                new BytesRef(globalFeature.getByteArrayRepresentation()));
    }

    // if BitSampling is an issue we add a field with the given hashFunctionsFileName and the suffix "hash":
    if (hashingEnabled) {
        // TODO: check eventually if there is a more compressed string version of the integers. i.e. the hex string
        if (globalFeature.getFeatureVector().length <= 3100) {
            int[] hashes;
            if (hashingMode == HashingMode.BitSampling) {
                hashes = BitSampling.generateHashes(globalFeature.getFeatureVector());
                hash = new TextField(extractorItems.get(extractorItem)[1],
                        SerializationUtils.arrayToString(hashes), Field.Store.YES);
            } else if (hashingMode == HashingMode.LSH) {
                hashes = LocalitySensitiveHashing.generateHashes(globalFeature.getFeatureVector());
                hash = new TextField(extractorItems.get(extractorItem)[1],
                        SerializationUtils.arrayToString(hashes), Field.Store.YES);
            } else if (hashingMode == HashingMode.MetricSpaces) {
                if (MetricSpaces.supportsFeature(globalFeature)) {
                    // the name of the field is set at "addExtractor" time.
                    hash = new TextField(extractorItems.get(extractorItem)[1],
                            MetricSpaces.generateHashString(globalFeature), Field.Store.YES);
                }
            }
        } else
            System.err.println("Could not create hashes, feature vector too long: "
                    + globalFeature.getFeatureVector().length + " (" + globalFeature.getClass().getName()
                    + ")");
    }
    if (hash != null)
        result = new Field[] { vector, hash };
    else
        result = new Field[] { vector };
    return result;
}