List of usage examples for org.apache.lucene.document BinaryDocValuesField BinaryDocValuesField
public BinaryDocValuesField(String name, BytesRef value)
From source file:DVBench.java
License:Apache License
static void doBench(int bpv) throws Exception { File file = new File("/data/indices/dvbench"); file.mkdirs();//from www . j a v a 2 s . c o m Directory dir = FSDirectory.open(file); IndexWriterConfig config = new IndexWriterConfig(null); config.setOpenMode(OpenMode.CREATE); config.setMergeScheduler(new SerialMergeScheduler()); config.setMergePolicy(new LogDocMergePolicy()); config.setMaxBufferedDocs(25000); IndexWriter writer = new IndexWriter(dir, config); MyRandom r = new MyRandom(); int numdocs = 400000; Document doc = new Document(); Field dv = new NumericDocValuesField("dv", 0); Field inv = new LongField("inv", 0, Field.Store.NO); Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8)); Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8)); doc.add(dv); doc.add(inv); doc.add(boxed); doc.add(boxed2); for (int i = 0; i < numdocs; i++) { // defeat blockpackedwriter final long value; if (i % 8192 == 0) { value = bpv == 64 ? Long.MIN_VALUE : 0; } else if (i % 8192 == 1) { value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1; } else { value = r.nextLong(bpv); } dv.setLongValue(value); inv.setLongValue(value); box(value, boxed.binaryValue()); box(value, boxed2.binaryValue()); boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length writer.addDocument(doc); } writer.close(); // run dv search tests String description = "dv (bpv=" + bpv + ")"; DirectoryReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); searcher.setQueryCache(null); // don't bench the cache int hash = 0; // warmup hash += search(description, searcher, "dv", 300, true); hash += search(description, searcher, "dv", 300, false); // Uninverting Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG); DirectoryReader uninv = UninvertingReader.wrap(reader, mapping); IndexSearcher searcher2 = new IndexSearcher(uninv); searcher2.setQueryCache(null); // don't bench the cache description = "fc (bpv=" + bpv + ")"; // warmup hash += search(description, searcher2, "inv", 300, true); hash += search(description, searcher2, "inv", 300, false); // Boxed inside binary DirectoryReader boxedReader = new BinaryAsVLongReader(reader); IndexSearcher searcher3 = new IndexSearcher(boxedReader); searcher3.setQueryCache(null); // don't bench the cache description = "boxed (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed", 300, true); hash += search(description, searcher3, "boxed", 300, false); description = "boxed fixed-length (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed2", 300, true); hash += search(description, searcher3, "boxed2", 300, false); if (hash == 3) { // wont happen System.out.println("hash=" + hash); } reader.close(); dir.close(); }
From source file:cn.codepub.redis.directory.Main.java
License:Apache License
private static Document addDocument(int i) { Document document = new Document(); document.add(new StringField("key1", "key" + i, Field.Store.YES)); document.add(new IntField("key2", i * 100000, Field.Store.YES)); document.add(new FloatField("key3", (float) i * 100000, Field.Store.YES)); document.add(new LongField("key4", (long) i * 100000, Field.Store.YES)); document.add(new DoubleField("key5", (double) i * 100000, Field.Store.YES)); document.add(new TextField("key6", RandomStringUtils.randomAlphabetic(10), Field.Store.YES)); document.add(new StringField("key7", RandomStringUtils.randomAlphabetic(5), Field.Store.YES)); document.add(new BinaryDocValuesField("key8", new BytesRef(RandomStringUtils.randomAlphabetic(5)))); document.add(new DoubleDocValuesField("key9", RandomUtils.nextDouble(0, 1000))); document.add(new FloatDocValuesField("key10", RandomUtils.nextFloat(0, 1000))); document.add(new LongField("key11", (long) i * 50000, Field.Store.YES)); document.add(new IntField("key12", i * 50000, Field.Store.YES)); document.add(new FloatField("key13", (float) i * 50000, Field.Store.YES)); document.add(new DoubleField("key14", (double) i * 50000, Field.Store.YES)); document.add(new StringField("key15", RandomStringUtils.randomAlphabetic(6), Field.Store.YES)); return document; }
From source file:com.b2international.index.lucene.DocValuesStringIndexField.java
License:Apache License
@Override public BinaryDocValuesField toDocValuesField(String value) { return new BinaryDocValuesField(fieldName(), toBytesRef(value)); }
From source file:com.github.flaxsearch.testutil.Fixtures.java
License:Apache License
private static void populateIndex(Directory directory) { try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig())) { {/*from ww w.j av a 2 s . com*/ Document doc = new Document(); doc.add(new TextField("field2", "here is some text", Field.Store.YES)); doc.add(new StringField("field1", "value1", Field.Store.YES)); doc.add(new IntPoint("point", 2, 4)); doc.add(new IntPoint("point", 0, 1)); doc.add(new IntPoint("point", 2, 1)); doc.add(new IntPoint("point", 14, 4)); writer.addDocument(doc); // more than one segment writer.commit(); } { Document doc = new Document(); doc.add(new StringField("field1", "value2", Field.Store.YES)); doc.add(new BinaryDocValuesField("field1", new BytesRef("some bytes"))); doc.add(new TextField("field3", "this is some more text in a different field value1 value11 value12 value21", Field.Store.YES)); writer.addDocument(doc); } } catch (IOException e) { throw new RuntimeException("We're a RAMDirectory, this should never happen!"); } }
From source file:com.qwazr.search.bench.test.Merging.UpdateBinaryDocValuesTest.java
License:Apache License
@Override protected BinaryDocValuesField getField(String id) { return new BinaryDocValuesField("dv", new BytesRef("dv" + id)); }
From source file:com.qwazr.search.field.BinaryDocValuesType.java
License:Apache License
@Override final public void fillValue(final Object value, final FieldConsumer consumer) { consumer.accept(new BinaryDocValuesField(fieldName, new BytesRef(value.toString()))); }
From source file:com.tuplejump.stargate.Fields.java
License:Apache License
private static Field stringDocValuesField(String name, final AbstractType abstractType, final ByteBuffer byteBufferValue) { Object value = abstractType.compose(byteBufferValue); BytesRef bytesRef = new BytesRef(value.toString()); final String stripedName = striped + name; return new BinaryDocValuesField(stripedName, bytesRef); }
From source file:com.xiaomi.linden.core.search.LindenDocParser.java
License:Apache License
public static Document parse(LindenDocument lindenDoc, LindenConfig config) { if (!lindenDoc.isSetFields()) { return null; }// w w w . j a va 2s .c om Document doc = new Document(); doc.add(new StringField(config.getSchema().getId(), lindenDoc.getId(), Field.Store.YES)); for (LindenField field : lindenDoc.getFields()) { LindenFieldSchema schema = field.getSchema(); Field.Store isStored = schema.isStored() ? Field.Store.YES : Field.Store.NO; String name = field.getSchema().getName(); Object value; if (!schema.isIndexed() && schema.isStored()) { doc.add(new Field(name, field.getValue(), STORED_ONLY)); } switch (schema.getType()) { case INTEGER: value = Integer.valueOf(field.getValue()); if (schema.isIndexed()) { doc.add(new IntField(name, (Integer) value, isStored)); } if (schema.isDocValues()) { long docValuesBits = ((Integer) value).longValue(); doc.add(new NumericDocValuesField(name, docValuesBits)); } break; case LONG: value = Long.valueOf(field.getValue()); if (schema.isIndexed()) { doc.add(new LongField(name, (Long) value, isStored)); } if (schema.isDocValues()) { doc.add(new NumericDocValuesField(name, (long) value)); } break; case DOUBLE: value = Double.valueOf(field.getValue()); if (schema.isIndexed()) { doc.add(new DoubleField(name, (Double) value, isStored)); } if (schema.isDocValues()) { long docValuesBits = Double.doubleToLongBits((Double) value); doc.add(new NumericDocValuesField(name, docValuesBits)); } break; case FLOAT: value = Float.valueOf(field.getValue()); if (schema.isIndexed()) { doc.add(new FloatField(name, (Float) value, isStored)); } if (schema.isDocValues()) { long docValuesBits = Float.floatToIntBits((Float) value); doc.add(new NumericDocValuesField(name, docValuesBits)); } break; case STRING: if (Strings.isNullOrEmpty(field.getValue())) { break; } if (schema.isIndexed()) { FieldType type = new FieldType(); type.setTokenized(schema.isTokenized()); type.setIndexed(schema.isIndexed()); type.setStored(schema.isStored()); type.setOmitNorms(schema.isOmitNorms()); if (schema.isSnippet()) { type.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); // snippet will use the stored info. type.setStored(true); } if (schema.isOmitFreqs()) { type.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY); } doc.add(new Field(name, field.getValue(), type)); } if (schema.isDocValues()) { BytesRef bytes = new BytesRef(field.getValue()); doc.add(new BinaryDocValuesField(name, bytes)); } break; case FACET: String[] facetPath = field.getValue().split("/"); doc.add(new FacetField(name, facetPath)); if (schema.isIndexed()) { doc.add(new StringField(name, field.getValue(), isStored)); } if (schema.isDocValues()) { doc.add(new BinaryDocValuesField(name, new BytesRef(field.getValue()))); } break; default: } } if (lindenDoc.isSetCoordinate()) { Coordinate coord = lindenDoc.getCoordinate(); Shape shape = SpatialContext.GEO.makePoint(coord.getLongitude(), coord.getLatitude()); for (IndexableField field : config.getSpatialStrategy().createIndexableFields(shape)) { doc.add(field); } } return doc; }
From source file:lucene.security.index.SecureAtomicReaderTestBase.java
License:Apache License
private Iterable<IndexableField> getDoc(int i) { Document document = new Document(); document.add(new StringField("test", "test", Store.YES)); document.add(new StringField("info", "info", Store.YES)); if (i == 3) { document.add(new StringField("shouldnotsee", "shouldnotsee", Store.YES)); }/*ww w . j a v a 2s.c o m*/ document.add(new NumericDocValuesField("number", i)); document.add(new BinaryDocValuesField("bin", new BytesRef(Integer.toString(i).getBytes()))); document.add(new SortedDocValuesField("sorted", new BytesRef(Integer.toString(i).getBytes()))); document.add(new SortedSetDocValuesField("sortedset", new BytesRef(Integer.toString(i).getBytes()))); document.add( new SortedSetDocValuesField("sortedset", new BytesRef(("0" + Integer.toString(i)).getBytes()))); return document; }
From source file:net.semanticmetadata.lire.builders.GlobalDocumentBuilder.java
License:Open Source License
/** * Extracts the global feature and returns the Lucene Fields for the selected image. * * @param image is the selected image. * @param extractorItem is the extractor to be used to extract the features. * @return Lucene Fields.// w w w. j av a2 s . c o m */ private Field[] getGlobalDescriptorFields(BufferedImage image, ExtractorItem extractorItem) { Field[] result; // if (hashingEnabled) result = new Field[2]; // else result = new Field[1]; Field hash = null; Field vector = null; GlobalFeature globalFeature = extractGlobalFeature(image, (GlobalFeature) extractorItem.getExtractorInstance()); if (!useDocValues) { // TODO: Stored field is compressed and upon search decompression takes a lot of time (> 50% with a small index with 50k images). Find something else ... vector = new StoredField(extractorItems.get(extractorItem)[0], new BytesRef(globalFeature.getByteArrayRepresentation())); } else { // Alternative: The DocValues field. It's extremely fast to read, but it's all in RAM most likely. vector = new BinaryDocValuesField(extractorItems.get(extractorItem)[0], new BytesRef(globalFeature.getByteArrayRepresentation())); } // if BitSampling is an issue we add a field with the given hashFunctionsFileName and the suffix "hash": if (hashingEnabled) { // TODO: check eventually if there is a more compressed string version of the integers. i.e. the hex string if (globalFeature.getFeatureVector().length <= 3100) { int[] hashes; if (hashingMode == HashingMode.BitSampling) { hashes = BitSampling.generateHashes(globalFeature.getFeatureVector()); hash = new TextField(extractorItems.get(extractorItem)[1], SerializationUtils.arrayToString(hashes), Field.Store.YES); } else if (hashingMode == HashingMode.LSH) { hashes = LocalitySensitiveHashing.generateHashes(globalFeature.getFeatureVector()); hash = new TextField(extractorItems.get(extractorItem)[1], SerializationUtils.arrayToString(hashes), Field.Store.YES); } else if (hashingMode == HashingMode.MetricSpaces) { if (MetricSpaces.supportsFeature(globalFeature)) { // the name of the field is set at "addExtractor" time. hash = new TextField(extractorItems.get(extractorItem)[1], MetricSpaces.generateHashString(globalFeature), Field.Store.YES); } } } else System.err.println("Could not create hashes, feature vector too long: " + globalFeature.getFeatureVector().length + " (" + globalFeature.getClass().getName() + ")"); } if (hash != null) result = new Field[] { vector, hash }; else result = new Field[] { vector }; return result; }