Example usage for org.apache.lucene.document Field binaryValue

List of usage examples for org.apache.lucene.document Field binaryValue

Introduction

In this page you can find the example usage for org.apache.lucene.document Field binaryValue.

Prototype

@Override
    public BytesRef binaryValue() 

Source Link

Usage

From source file:DVBench.java

License:Apache License

static void doBench(int bpv) throws Exception {
    File file = new File("/data/indices/dvbench");
    file.mkdirs();/*from w w w  . ja va  2s.  co m*/
    Directory dir = FSDirectory.open(file);
    IndexWriterConfig config = new IndexWriterConfig(null);
    config.setOpenMode(OpenMode.CREATE);
    config.setMergeScheduler(new SerialMergeScheduler());
    config.setMergePolicy(new LogDocMergePolicy());
    config.setMaxBufferedDocs(25000);
    IndexWriter writer = new IndexWriter(dir, config);

    MyRandom r = new MyRandom();
    int numdocs = 400000;
    Document doc = new Document();
    Field dv = new NumericDocValuesField("dv", 0);
    Field inv = new LongField("inv", 0, Field.Store.NO);
    Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8));
    Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8));

    doc.add(dv);
    doc.add(inv);
    doc.add(boxed);
    doc.add(boxed2);
    for (int i = 0; i < numdocs; i++) {
        // defeat blockpackedwriter
        final long value;
        if (i % 8192 == 0) {
            value = bpv == 64 ? Long.MIN_VALUE : 0;
        } else if (i % 8192 == 1) {
            value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1;
        } else {
            value = r.nextLong(bpv);
        }
        dv.setLongValue(value);
        inv.setLongValue(value);
        box(value, boxed.binaryValue());
        box(value, boxed2.binaryValue());
        boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length
        writer.addDocument(doc);
    }

    writer.close();

    // run dv search tests
    String description = "dv (bpv=" + bpv + ")";
    DirectoryReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setQueryCache(null); // don't bench the cache

    int hash = 0;
    // warmup
    hash += search(description, searcher, "dv", 300, true);
    hash += search(description, searcher, "dv", 300, false);

    // Uninverting
    Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG);
    DirectoryReader uninv = UninvertingReader.wrap(reader, mapping);
    IndexSearcher searcher2 = new IndexSearcher(uninv);
    searcher2.setQueryCache(null); // don't bench the cache

    description = "fc (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher2, "inv", 300, true);
    hash += search(description, searcher2, "inv", 300, false);

    // Boxed inside binary
    DirectoryReader boxedReader = new BinaryAsVLongReader(reader);
    IndexSearcher searcher3 = new IndexSearcher(boxedReader);
    searcher3.setQueryCache(null); // don't bench the cache
    description = "boxed (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed", 300, true);
    hash += search(description, searcher3, "boxed", 300, false);

    description = "boxed fixed-length (bpv=" + bpv + ")";
    // warmup
    hash += search(description, searcher3, "boxed2", 300, true);
    hash += search(description, searcher3, "boxed2", 300, false);

    if (hash == 3) {
        // wont happen
        System.out.println("hash=" + hash);
    }
    reader.close();
    dir.close();
}

From source file:com.ibm.jaql.lang.expr.index.ProbeLuceneFn.java

License:Apache License

@Override
public JsonIterator iter(Context context) throws Exception {
    JsonRecord fd = (JsonRecord) exprs[0].eval(context);
    if (fd == null) {
        return JsonIterator.NULL;
    }//from w  w w .java2 s . com
    JsonString loc = (JsonString) fd.get(new JsonString("location"));
    if (loc == null) {
        return JsonIterator.NULL;
    }
    JsonString jquery = (JsonString) exprs[1].eval(context);
    if (jquery == null) {
        return JsonIterator.NULL;
    }

    HashSet<String> fields = null;
    JsonIterator iter = exprs[2].iter(context);
    for (JsonValue sv : iter) {
        JsonString s = (JsonString) sv;
        if (s != null) {
            if (fields == null) {
                fields = new HashSet<String>();
            }
            fields.add(s.toString());
        }
    }
    final FieldSelector fieldSelector = (fields == null) ? null
            : new SetBasedFieldSelector(fields, new HashSet<String>());

    final IndexSearcher searcher = new IndexSearcher(loc.toString());
    Analyzer analyzer = new StandardAnalyzer();
    QueryParser qp = new QueryParser("key", analyzer);
    Query query = qp.parse(jquery.toString());

    query = searcher.rewrite(query);
    final Scorer scorer = query.weight(searcher).scorer(searcher.getIndexReader());
    final BufferedJsonRecord rec = new BufferedJsonRecord();
    final JsonString jdoc = new JsonString("doc");
    final MutableJsonLong jdocid = new MutableJsonLong();

    return new JsonIterator(rec) {
        @Override
        public boolean moveNext() throws Exception {
            if (!scorer.next()) {
                return false;
            }
            rec.clear();
            int i = scorer.doc();
            jdocid.set(i);
            rec.add(jdoc, jdocid);
            if (fieldSelector != null) {
                Document doc = searcher.doc(i, fieldSelector);
                for (Object x : doc.getFields()) {
                    Field f = (Field) x;
                    String name = f.name();
                    byte[] val = f.binaryValue();
                    ByteArrayInputStream bais = new ByteArrayInputStream(val); // TODO: reuse
                    DataInputStream in = new DataInputStream(bais); // TODO: reuse
                    JsonValue ival = serializer.read(in, null);
                    rec.add(new JsonString(name), ival);
                }
            }
            return true; // currentValue == rec
        }
    };
}

From source file:com.stratio.cassandra.lucene.service.ClusteringKeyMapperTest.java

License:Apache License

@Test
public void testAddFields() throws InvalidRequestException, ConfigurationException {
    List<ColumnDef> columnDefinitions = new ArrayList<>();
    columnDefinitions.add(new ColumnDef(ByteBufferUtil.bytes("field1"), UTF8Type.class.getCanonicalName())
            .setIndex_name("field1").setIndex_type(IndexType.KEYS));

    columnDefinitions.add(new ColumnDef(ByteBufferUtil.bytes("field2"), IntegerType.class.getCanonicalName())
            .setIndex_name("field2").setIndex_type(IndexType.KEYS));
    CfDef cfDef = new CfDef().setDefault_validation_class(AsciiType.class.getCanonicalName())
            .setColumn_metadata(columnDefinitions).setKeyspace("Keyspace1").setName("Standard1");

    CFMetaData metadata = ThriftConversion.fromThrift(cfDef);
    Schema schema = SchemaBuilders.schema().mapper("field1", stringMapper()).mapper("field2", textMapper())
            .build();//from  w w w.j  a v a  2 s.c o  m
    ClusteringKeyMapper clusteringKeyMapper = ClusteringKeyMapper.instance(metadata, schema);

    CellName cellName = CellNames.simpleSparse(new ColumnIdentifier("aaaa", false));
    Document doc = new Document();

    clusteringKeyMapper.addFields(doc, cellName);
    Field field = (Field) doc.getField(ClusteringKeyMapper.FIELD_NAME);
    assertNotNull("clusteringKeyMapper addFields to Document must add al least one Field to Doc", field);
    assertEquals("clusteringKeyMapper.byteRef included in Document must be equal",
            clusteringKeyMapper.bytesRef(cellName), field.binaryValue());

}

From source file:org.apache.jackrabbit.core.query.lucene.AbstractIndex.java

License:Apache License

/**
 * Returns a document that is finished with text extraction and is ready to
 * be added to the index./*w ww. j a  v a  2 s .c o m*/
 *
 * @param doc the document to check.
 * @return <code>doc</code> if it is finished already or a stripped down
 *         copy of <code>doc</code> without text extractors.
 * @throws IOException if the document cannot be added to the indexing
 *                     queue.
 */
private Document getFinishedDocument(Document doc) throws IOException {
    if (!Util.isDocumentReady(doc)) {
        Document copy = new Document();
        for (Iterator fields = doc.getFields().iterator(); fields.hasNext();) {
            Field f = (Field) fields.next();
            Field field = null;
            Field.TermVector tv = getTermVectorParameter(f);
            Field.Store stored = getStoreParameter(f);
            Field.Index indexed = getIndexParameter(f);
            if (f.readerValue() != null) {
                // replace all readers with empty string reader
                field = new Field(f.name(), new StringReader(""), tv);
            } else if (f.stringValue() != null) {
                field = new Field(f.name(), f.stringValue(), stored, indexed, tv);
            } else if (f.isBinary()) {
                field = new Field(f.name(), f.binaryValue(), stored);
            }
            if (field != null) {
                field.setOmitNorms(f.getOmitNorms());
                copy.add(field);
            }
        }
        // schedule the original document for later indexing
        Document existing = indexingQueue.addDocument(doc);
        if (existing != null) {
            // the queue already contained a pending document for this
            // node. -> dispose the document
            Util.disposeDocument(existing);
        }
        // use the stripped down copy for now
        doc = copy;
    }
    return doc;
}

From source file:org.apache.jackrabbit.core.query.lucene.AbstractIndex.java

License:Apache License

/**
 * Returns a document that is finished with text extraction and is ready to
 * be added to the index.//w  w w .  j  av a  2s .  c  o m
 *
 * @param doc the document to check.
 * @return <code>doc</code> if it is finished already or a stripped down
 *         copy of <code>doc</code> without text extractors.
 * @throws IOException if the document cannot be added to the indexing
 *                     queue.
 */
private Document getFinishedDocument(Document doc) throws IOException {
    if (!Util.isDocumentReady(doc)) {
        Document copy = new Document();
        for (Enumeration fields = doc.fields(); fields.hasMoreElements();) {
            Field f = (Field) fields.nextElement();
            Field field = null;
            Field.TermVector tv = getTermVectorParameter(f);
            Field.Store stored = getStoreParameter(f);
            Field.Index indexed = getIndexParameter(f);
            if (f.readerValue() != null) {
                // replace all readers with empty string reader
                field = new Field(f.name(), new StringReader(""), tv);
            } else if (f.stringValue() != null) {
                field = new Field(f.name(), f.stringValue(), stored, indexed, tv);
            } else if (f.isBinary()) {
                field = new Field(f.name(), f.binaryValue(), stored);
            }
            if (field != null) {
                field.setOmitNorms(f.getOmitNorms());
                copy.add(field);
            }
        }
        // schedule the original document for later indexing
        Document existing = indexingQueue.addDocument(doc);
        if (existing != null) {
            // the queue already contained a pending document for this
            // node. -> dispose the document
            Util.disposeDocument(existing);
        }
        // use the stripped down copy for now
        doc = copy;
    }
    return doc;
}

From source file:org.apache.solr.handler.admin.LukeRequestHandler.java

License:Apache License

private static SimpleOrderedMap<Object> getDocumentFieldsInfo(Document doc, int docId, IndexReader reader,
        IndexSchema schema) throws IOException {
    final CharsRef spare = new CharsRef();
    SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
    for (Object o : doc.getFields()) {
        Field field = (Field) o;
        SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>();

        SchemaField sfield = schema.getFieldOrNull(field.name());
        FieldType ftype = (sfield == null) ? null : sfield.getType();

        f.add("type", (ftype == null) ? null : ftype.getTypeName());
        f.add("schema", getFieldFlags(sfield));
        f.add("flags", getFieldFlags(field));

        Term t = new Term(field.name(), ftype != null ? ftype.storedToIndexed(field) : field.stringValue());

        f.add("value", (ftype == null) ? null : ftype.toExternal(field));

        // TODO: this really should be "stored"
        f.add("internal", field.stringValue()); // may be a binary number

        BytesRef bytes = field.binaryValue();
        if (bytes != null) {
            f.add("binary", Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.length));
        }/*  w  ww .j av a2s.co m*/
        f.add("boost", field.boost());
        f.add("docFreq", t.text() == null ? 0 : reader.docFreq(t)); // this can be 0 for non-indexed fields

        // If we have a term vector, return that
        if (field.fieldType().storeTermVectors()) {
            try {
                Terms v = reader.getTermVector(docId, field.name());
                if (v != null) {
                    SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>();
                    final TermsEnum termsEnum = v.iterator(null);
                    BytesRef text;
                    while ((text = termsEnum.next()) != null) {
                        final int freq = (int) termsEnum.totalTermFreq();
                        UnicodeUtil.UTF8toUTF16(text, spare);
                        tfv.add(spare.toString(), freq);
                    }
                    f.add("termVector", tfv);
                }
            } catch (Exception ex) {
                log.warn("error writing term vector", ex);
            }
        }

        finfo.add(field.name(), f);
    }
    return finfo;
}

From source file:org.apache.solr.handler.component.AlfrescoLukeRequestHandler.java

License:Open Source License

private static SimpleOrderedMap<Object> getDocumentFieldsInfo(Document doc, int docId, IndexReader reader,
        IndexSchema schema) throws IOException {
    final CharsRefBuilder spare = new CharsRefBuilder();
    SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<>();
    for (Object o : doc.getFields()) {
        Field field = (Field) o;
        SimpleOrderedMap<Object> f = new SimpleOrderedMap<>();

        SchemaField sfield = schema.getFieldOrNull(field.name());
        FieldType ftype = (sfield == null) ? null : sfield.getType();

        f.add("type", (ftype == null) ? null : ftype.getTypeName());
        f.add("schema", getFieldFlags(sfield));
        f.add("flags", getFieldFlags(field));

        Term t = new Term(field.name(), ftype != null ? ftype.storedToIndexed(field) : field.stringValue());

        f.add("value", (ftype == null) ? null : ftype.toExternal(field));

        // TODO: this really should be "stored"
        f.add("internal", field.stringValue()); // may be a binary number

        BytesRef bytes = field.binaryValue();
        if (bytes != null) {
            f.add("binary", Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.length));
        }/*w ww  .  j ava 2 s  .  c o m*/
        f.add("boost", field.boost());
        f.add("docFreq", t.text() == null ? 0 : reader.docFreq(t)); // this
        // can
        // be 0
        // for
        // non-indexed
        // fields

        // If we have a term vector, return that
        if (field.fieldType().storeTermVectors()) {
            try {
                Terms v = reader.getTermVector(docId, field.name());
                if (v != null) {
                    SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<>();
                    final TermsEnum termsEnum = v.iterator();
                    BytesRef text;
                    while ((text = termsEnum.next()) != null) {
                        final int freq = (int) termsEnum.totalTermFreq();
                        spare.copyUTF8Bytes(text);
                        tfv.add(spare.toString(), freq);
                    }
                    f.add("termVector", tfv);
                }
            } catch (Exception ex) {
                log.warn("error writing term vector", ex);
            }
        }

        finfo.add(field.name(), f);
    }
    return finfo;
}

From source file:org.apache.solr.schema.JsonPreAnalyzedParser.java

License:Apache License

@Override
public String toFormattedString(Field f) throws IOException {
    Map<String, Object> map = new LinkedHashMap<String, Object>();
    map.put(VERSION_KEY, VERSION);/*ww  w . j ava 2s.  com*/
    if (f.fieldType().stored()) {
        String stringValue = f.stringValue();
        if (stringValue != null) {
            map.put(STRING_KEY, stringValue);
        }
        BytesRef binaryValue = f.binaryValue();
        if (binaryValue != null) {
            map.put(BINARY_KEY,
                    Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length));
        }
    }
    TokenStream ts = f.tokenStreamValue();
    if (ts != null) {
        List<Map<String, Object>> tokens = new LinkedList<Map<String, Object>>();
        while (ts.incrementToken()) {
            Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
            String cTerm = null;
            String tTerm = null;
            Map<String, Object> tok = new TreeMap<String, Object>();
            while (it.hasNext()) {
                Class<? extends Attribute> cl = it.next();
                if (!ts.hasAttribute(cl)) {
                    continue;
                }
                Attribute att = ts.getAttribute(cl);
                if (cl.isAssignableFrom(CharTermAttribute.class)) {
                    CharTermAttribute catt = (CharTermAttribute) att;
                    cTerm = new String(catt.buffer(), 0, catt.length());
                } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
                    TermToBytesRefAttribute tatt = (TermToBytesRefAttribute) att;
                    tTerm = tatt.getBytesRef().utf8ToString();
                } else {
                    if (cl.isAssignableFrom(FlagsAttribute.class)) {
                        tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute) att).getFlags()));
                    } else if (cl.isAssignableFrom(OffsetAttribute.class)) {
                        tok.put(OFFSET_START_KEY, ((OffsetAttribute) att).startOffset());
                        tok.put(OFFSET_END_KEY, ((OffsetAttribute) att).endOffset());
                    } else if (cl.isAssignableFrom(PayloadAttribute.class)) {
                        BytesRef p = ((PayloadAttribute) att).getPayload();
                        if (p != null && p.length > 0) {
                            tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length));
                        }
                    } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
                        tok.put(POSINCR_KEY, ((PositionIncrementAttribute) att).getPositionIncrement());
                    } else if (cl.isAssignableFrom(TypeAttribute.class)) {
                        tok.put(TYPE_KEY, ((TypeAttribute) att).type());
                    } else {
                        tok.put(cl.getName(), att.toString());
                    }
                }
            }
            String term = null;
            if (cTerm != null) {
                term = cTerm;
            } else {
                term = tTerm;
            }
            if (term != null && term.length() > 0) {
                tok.put(TOKEN_KEY, term);
            }
            tokens.add(tok);
        }
        map.put(TOKENS_KEY, tokens);
    }
    return JSONUtil.toJSON(map, -1);
}

From source file:org.elasticsearch.action.mlt.TransportMoreLikeThisAction.java

License:Apache License

private Object convertField(Field field) {
    if (field.stringValue() != null) {
        return field.stringValue();
    } else if (field.binaryValue() != null) {
        return BytesRef.deepCopyOf(field.binaryValue()).bytes;
    } else if (field.numericValue() != null) {
        return field.numericValue();
    } else {/*from  ww  w  . j a  v a2s .com*/
        throw new ElasticsearchIllegalStateException(
                "Field should have either a string, numeric or binary value");
    }
}

From source file:org.eu.bitzone.Leia.java

License:Apache License

public void actionReconstruct(final Object docNumText) {
    final int[] nums = new int[1];
    try {//from  w  ww  . ja va 2  s. c o m
        final String numString = getString(docNumText, "text");
        nums[0] = Integer.parseInt(numString);
    } catch (final Exception e) {
        showStatus("ERROR: no valid document selected");
        return;
    }
    final Progress progress = new Progress(this);
    progress.setMessage("Reconstructing ...");
    progress.show();
    final Thread thr = new Thread() {

        @Override
        public void run() {
            try {
                final int docNum = nums[0];
                final DocReconstructor recon = new DocReconstructor(ir, idxFields, numTerms);
                recon.addObserver(progress);
                final Reconstructed doc = recon.reconstruct(docNum);
                final Object dialog = addComponent(null, "/xml/editdoc.xml", null, null);
                putProperty(dialog, "docNum", new Integer(docNum));
                final Object cbAnalyzers = find(dialog, "cbAnalyzers");
                populateAnalyzers(cbAnalyzers);
                setInteger(cbAnalyzers, "selected", 0);
                final Object editTabs = find(dialog, "editTabs");
                setString(find(dialog, "docNum"), "text", "Fields of Doc #: " + docNum);
                for (int p = 0; p < idxFields.length; p++) {
                    final String key = idxFields[p];
                    if (!doc.hasField(key)) {
                        continue;
                    }
                    final IndexableField[] fields = doc.getStoredFields().get(key);
                    GrowableStringArray recField = doc.getReconstructedFields().get(key);
                    int count = 0;
                    if (recField != null) {
                        count = 1;
                    }
                    if (fields != null && fields.length > count) {
                        count = fields.length;
                    }
                    for (int i = 0; i < count; i++) {
                        if (i > 0) {
                            recField = null; // show it only for the first field
                        }
                        final Object tab = create("tab");
                        setString(tab, "text", key);
                        setFont(tab, getFont().deriveFont(Font.BOLD));
                        add(editTabs, tab);
                        final Object editfield = addComponent(tab, "/xml/editfield.xml", null, null);
                        final Object fType = find(editfield, "fType");
                        final Object sText = find(editfield, "sText");
                        final Object rText = find(editfield, "rText");
                        final Object fBoost = find(editfield, "fBoost");
                        final Object cbStored = find(editfield, "cbStored");
                        // Object cbCmp = find(editfield, "cbCmp");
                        final Object cbBin = find(editfield, "cbBin");
                        final Object cbIndexed = find(editfield, "cbIndexed");
                        final Object cbTokenized = find(editfield, "cbTokenized");
                        final Object cbTVF = find(editfield, "cbTVF");
                        final Object cbTVFp = find(editfield, "cbTVFp");
                        final Object cbTVFo = find(editfield, "cbTVFo");
                        final Object cbONorms = find(editfield, "cbONorms");
                        final Object cbOTF = find(editfield, "cbOTF");
                        final Object stored = find(editfield, "stored");
                        final Object restored = find(editfield, "restored");
                        if (ar != null) {
                            setBoolean(cbONorms, "selected", !ar.hasNorms(key));
                        }
                        Field f = null;
                        if (fields != null && fields.length > i) {
                            f = (Field) fields[i];
                            setString(fType, "text", "Original stored field content");
                            String text;
                            if (f.binaryValue() != null) {
                                text = Util.bytesToHex(f.binaryValue(), true);
                                setBoolean(cbBin, "selected", true);
                            } else {
                                text = f.stringValue();
                            }
                            setString(sText, "text", text);
                            setString(fBoost, "text", String.valueOf(f.boost()));
                            final IndexableFieldType t = f.fieldType();
                            setBoolean(cbStored, "selected", t.stored());
                            // Lucene 3.0 doesn't support compressed fields
                            // setBoolean(cbCmp, "selected", false);
                            setBoolean(cbIndexed, "selected", t.indexed());
                            setBoolean(cbTokenized, "selected", t.tokenized());
                            setBoolean(cbTVF, "selected", t.storeTermVectors());
                            setBoolean(cbTVFp, "selected", t.storeTermVectorPositions());
                            setBoolean(cbTVFo, "selected", t.storeTermVectorOffsets());
                            // XXX omitTF needs fixing!
                            // setBoolean(cbOTF, "selected", f.getOmitTermFreqAndPositions());
                        } else {
                            remove(stored);
                        }
                        if (recField != null) {
                            String sep = " ";
                            if (f == null) {
                                setString(fType, "text", "RESTORED content ONLY - check for errors!");
                                setColor(fType, "foreground", Color.red);
                            } else {
                                setBoolean(rText, "editable", false);
                                setBoolean(rText, "border", false);
                                setString(restored, "text", "Tokenized (from all '" + key + "' fields)");
                                sep = ", ";
                            }
                            setBoolean(cbIndexed, "selected", true);
                            setString(fBoost, "text", String.valueOf(1.0f));
                            setString(rText, "text", recField.toString(sep));
                        } else {
                            remove(restored);
                        }
                    }
                }
                add(dialog);
                getPreferredSize(editTabs);
            } catch (final Exception e) {
                e.printStackTrace();
                showStatus(e.getMessage());
            }
            progress.hide();
        }
    };
    thr.start();
}