List of usage examples for org.apache.lucene.document Field binaryValue
@Override
public BytesRef binaryValue()
From source file:DVBench.java
License:Apache License
static void doBench(int bpv) throws Exception { File file = new File("/data/indices/dvbench"); file.mkdirs();/*from w w w . ja va 2s. co m*/ Directory dir = FSDirectory.open(file); IndexWriterConfig config = new IndexWriterConfig(null); config.setOpenMode(OpenMode.CREATE); config.setMergeScheduler(new SerialMergeScheduler()); config.setMergePolicy(new LogDocMergePolicy()); config.setMaxBufferedDocs(25000); IndexWriter writer = new IndexWriter(dir, config); MyRandom r = new MyRandom(); int numdocs = 400000; Document doc = new Document(); Field dv = new NumericDocValuesField("dv", 0); Field inv = new LongField("inv", 0, Field.Store.NO); Field boxed = new BinaryDocValuesField("boxed", new BytesRef(8)); Field boxed2 = new BinaryDocValuesField("boxed2", new BytesRef(8)); doc.add(dv); doc.add(inv); doc.add(boxed); doc.add(boxed2); for (int i = 0; i < numdocs; i++) { // defeat blockpackedwriter final long value; if (i % 8192 == 0) { value = bpv == 64 ? Long.MIN_VALUE : 0; } else if (i % 8192 == 1) { value = bpv == 64 ? Long.MAX_VALUE : (1L << bpv) - 1; } else { value = r.nextLong(bpv); } dv.setLongValue(value); inv.setLongValue(value); box(value, boxed.binaryValue()); box(value, boxed2.binaryValue()); boxed2.binaryValue().length = (bpv + 7) / 8; // fixed length writer.addDocument(doc); } writer.close(); // run dv search tests String description = "dv (bpv=" + bpv + ")"; DirectoryReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); searcher.setQueryCache(null); // don't bench the cache int hash = 0; // warmup hash += search(description, searcher, "dv", 300, true); hash += search(description, searcher, "dv", 300, false); // Uninverting Map<String, UninvertingReader.Type> mapping = Collections.singletonMap("inv", UninvertingReader.Type.LONG); DirectoryReader uninv = UninvertingReader.wrap(reader, mapping); IndexSearcher searcher2 = new IndexSearcher(uninv); searcher2.setQueryCache(null); // don't bench the cache description = "fc (bpv=" + bpv + ")"; // warmup hash += search(description, searcher2, "inv", 300, true); hash += search(description, searcher2, "inv", 300, false); // Boxed inside binary DirectoryReader boxedReader = new BinaryAsVLongReader(reader); IndexSearcher searcher3 = new IndexSearcher(boxedReader); searcher3.setQueryCache(null); // don't bench the cache description = "boxed (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed", 300, true); hash += search(description, searcher3, "boxed", 300, false); description = "boxed fixed-length (bpv=" + bpv + ")"; // warmup hash += search(description, searcher3, "boxed2", 300, true); hash += search(description, searcher3, "boxed2", 300, false); if (hash == 3) { // wont happen System.out.println("hash=" + hash); } reader.close(); dir.close(); }
From source file:com.ibm.jaql.lang.expr.index.ProbeLuceneFn.java
License:Apache License
@Override public JsonIterator iter(Context context) throws Exception { JsonRecord fd = (JsonRecord) exprs[0].eval(context); if (fd == null) { return JsonIterator.NULL; }//from w w w .java2 s . com JsonString loc = (JsonString) fd.get(new JsonString("location")); if (loc == null) { return JsonIterator.NULL; } JsonString jquery = (JsonString) exprs[1].eval(context); if (jquery == null) { return JsonIterator.NULL; } HashSet<String> fields = null; JsonIterator iter = exprs[2].iter(context); for (JsonValue sv : iter) { JsonString s = (JsonString) sv; if (s != null) { if (fields == null) { fields = new HashSet<String>(); } fields.add(s.toString()); } } final FieldSelector fieldSelector = (fields == null) ? null : new SetBasedFieldSelector(fields, new HashSet<String>()); final IndexSearcher searcher = new IndexSearcher(loc.toString()); Analyzer analyzer = new StandardAnalyzer(); QueryParser qp = new QueryParser("key", analyzer); Query query = qp.parse(jquery.toString()); query = searcher.rewrite(query); final Scorer scorer = query.weight(searcher).scorer(searcher.getIndexReader()); final BufferedJsonRecord rec = new BufferedJsonRecord(); final JsonString jdoc = new JsonString("doc"); final MutableJsonLong jdocid = new MutableJsonLong(); return new JsonIterator(rec) { @Override public boolean moveNext() throws Exception { if (!scorer.next()) { return false; } rec.clear(); int i = scorer.doc(); jdocid.set(i); rec.add(jdoc, jdocid); if (fieldSelector != null) { Document doc = searcher.doc(i, fieldSelector); for (Object x : doc.getFields()) { Field f = (Field) x; String name = f.name(); byte[] val = f.binaryValue(); ByteArrayInputStream bais = new ByteArrayInputStream(val); // TODO: reuse DataInputStream in = new DataInputStream(bais); // TODO: reuse JsonValue ival = serializer.read(in, null); rec.add(new JsonString(name), ival); } } return true; // currentValue == rec } }; }
From source file:com.stratio.cassandra.lucene.service.ClusteringKeyMapperTest.java
License:Apache License
@Test public void testAddFields() throws InvalidRequestException, ConfigurationException { List<ColumnDef> columnDefinitions = new ArrayList<>(); columnDefinitions.add(new ColumnDef(ByteBufferUtil.bytes("field1"), UTF8Type.class.getCanonicalName()) .setIndex_name("field1").setIndex_type(IndexType.KEYS)); columnDefinitions.add(new ColumnDef(ByteBufferUtil.bytes("field2"), IntegerType.class.getCanonicalName()) .setIndex_name("field2").setIndex_type(IndexType.KEYS)); CfDef cfDef = new CfDef().setDefault_validation_class(AsciiType.class.getCanonicalName()) .setColumn_metadata(columnDefinitions).setKeyspace("Keyspace1").setName("Standard1"); CFMetaData metadata = ThriftConversion.fromThrift(cfDef); Schema schema = SchemaBuilders.schema().mapper("field1", stringMapper()).mapper("field2", textMapper()) .build();//from w w w.j a v a 2 s.c o m ClusteringKeyMapper clusteringKeyMapper = ClusteringKeyMapper.instance(metadata, schema); CellName cellName = CellNames.simpleSparse(new ColumnIdentifier("aaaa", false)); Document doc = new Document(); clusteringKeyMapper.addFields(doc, cellName); Field field = (Field) doc.getField(ClusteringKeyMapper.FIELD_NAME); assertNotNull("clusteringKeyMapper addFields to Document must add al least one Field to Doc", field); assertEquals("clusteringKeyMapper.byteRef included in Document must be equal", clusteringKeyMapper.bytesRef(cellName), field.binaryValue()); }
From source file:org.apache.jackrabbit.core.query.lucene.AbstractIndex.java
License:Apache License
/** * Returns a document that is finished with text extraction and is ready to * be added to the index./*w ww. j a v a 2 s .c o m*/ * * @param doc the document to check. * @return <code>doc</code> if it is finished already or a stripped down * copy of <code>doc</code> without text extractors. * @throws IOException if the document cannot be added to the indexing * queue. */ private Document getFinishedDocument(Document doc) throws IOException { if (!Util.isDocumentReady(doc)) { Document copy = new Document(); for (Iterator fields = doc.getFields().iterator(); fields.hasNext();) { Field f = (Field) fields.next(); Field field = null; Field.TermVector tv = getTermVectorParameter(f); Field.Store stored = getStoreParameter(f); Field.Index indexed = getIndexParameter(f); if (f.readerValue() != null) { // replace all readers with empty string reader field = new Field(f.name(), new StringReader(""), tv); } else if (f.stringValue() != null) { field = new Field(f.name(), f.stringValue(), stored, indexed, tv); } else if (f.isBinary()) { field = new Field(f.name(), f.binaryValue(), stored); } if (field != null) { field.setOmitNorms(f.getOmitNorms()); copy.add(field); } } // schedule the original document for later indexing Document existing = indexingQueue.addDocument(doc); if (existing != null) { // the queue already contained a pending document for this // node. -> dispose the document Util.disposeDocument(existing); } // use the stripped down copy for now doc = copy; } return doc; }
From source file:org.apache.jackrabbit.core.query.lucene.AbstractIndex.java
License:Apache License
/** * Returns a document that is finished with text extraction and is ready to * be added to the index.//w w w . j av a 2s . c o m * * @param doc the document to check. * @return <code>doc</code> if it is finished already or a stripped down * copy of <code>doc</code> without text extractors. * @throws IOException if the document cannot be added to the indexing * queue. */ private Document getFinishedDocument(Document doc) throws IOException { if (!Util.isDocumentReady(doc)) { Document copy = new Document(); for (Enumeration fields = doc.fields(); fields.hasMoreElements();) { Field f = (Field) fields.nextElement(); Field field = null; Field.TermVector tv = getTermVectorParameter(f); Field.Store stored = getStoreParameter(f); Field.Index indexed = getIndexParameter(f); if (f.readerValue() != null) { // replace all readers with empty string reader field = new Field(f.name(), new StringReader(""), tv); } else if (f.stringValue() != null) { field = new Field(f.name(), f.stringValue(), stored, indexed, tv); } else if (f.isBinary()) { field = new Field(f.name(), f.binaryValue(), stored); } if (field != null) { field.setOmitNorms(f.getOmitNorms()); copy.add(field); } } // schedule the original document for later indexing Document existing = indexingQueue.addDocument(doc); if (existing != null) { // the queue already contained a pending document for this // node. -> dispose the document Util.disposeDocument(existing); } // use the stripped down copy for now doc = copy; } return doc; }
From source file:org.apache.solr.handler.admin.LukeRequestHandler.java
License:Apache License
private static SimpleOrderedMap<Object> getDocumentFieldsInfo(Document doc, int docId, IndexReader reader, IndexSchema schema) throws IOException { final CharsRef spare = new CharsRef(); SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>(); for (Object o : doc.getFields()) { Field field = (Field) o; SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>(); SchemaField sfield = schema.getFieldOrNull(field.name()); FieldType ftype = (sfield == null) ? null : sfield.getType(); f.add("type", (ftype == null) ? null : ftype.getTypeName()); f.add("schema", getFieldFlags(sfield)); f.add("flags", getFieldFlags(field)); Term t = new Term(field.name(), ftype != null ? ftype.storedToIndexed(field) : field.stringValue()); f.add("value", (ftype == null) ? null : ftype.toExternal(field)); // TODO: this really should be "stored" f.add("internal", field.stringValue()); // may be a binary number BytesRef bytes = field.binaryValue(); if (bytes != null) { f.add("binary", Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.length)); }/* w ww .j av a2s.co m*/ f.add("boost", field.boost()); f.add("docFreq", t.text() == null ? 0 : reader.docFreq(t)); // this can be 0 for non-indexed fields // If we have a term vector, return that if (field.fieldType().storeTermVectors()) { try { Terms v = reader.getTermVector(docId, field.name()); if (v != null) { SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>(); final TermsEnum termsEnum = v.iterator(null); BytesRef text; while ((text = termsEnum.next()) != null) { final int freq = (int) termsEnum.totalTermFreq(); UnicodeUtil.UTF8toUTF16(text, spare); tfv.add(spare.toString(), freq); } f.add("termVector", tfv); } } catch (Exception ex) { log.warn("error writing term vector", ex); } } finfo.add(field.name(), f); } return finfo; }
From source file:org.apache.solr.handler.component.AlfrescoLukeRequestHandler.java
License:Open Source License
private static SimpleOrderedMap<Object> getDocumentFieldsInfo(Document doc, int docId, IndexReader reader, IndexSchema schema) throws IOException { final CharsRefBuilder spare = new CharsRefBuilder(); SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<>(); for (Object o : doc.getFields()) { Field field = (Field) o; SimpleOrderedMap<Object> f = new SimpleOrderedMap<>(); SchemaField sfield = schema.getFieldOrNull(field.name()); FieldType ftype = (sfield == null) ? null : sfield.getType(); f.add("type", (ftype == null) ? null : ftype.getTypeName()); f.add("schema", getFieldFlags(sfield)); f.add("flags", getFieldFlags(field)); Term t = new Term(field.name(), ftype != null ? ftype.storedToIndexed(field) : field.stringValue()); f.add("value", (ftype == null) ? null : ftype.toExternal(field)); // TODO: this really should be "stored" f.add("internal", field.stringValue()); // may be a binary number BytesRef bytes = field.binaryValue(); if (bytes != null) { f.add("binary", Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.length)); }/*w ww . j ava 2 s . c o m*/ f.add("boost", field.boost()); f.add("docFreq", t.text() == null ? 0 : reader.docFreq(t)); // this // can // be 0 // for // non-indexed // fields // If we have a term vector, return that if (field.fieldType().storeTermVectors()) { try { Terms v = reader.getTermVector(docId, field.name()); if (v != null) { SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<>(); final TermsEnum termsEnum = v.iterator(); BytesRef text; while ((text = termsEnum.next()) != null) { final int freq = (int) termsEnum.totalTermFreq(); spare.copyUTF8Bytes(text); tfv.add(spare.toString(), freq); } f.add("termVector", tfv); } } catch (Exception ex) { log.warn("error writing term vector", ex); } } finfo.add(field.name(), f); } return finfo; }
From source file:org.apache.solr.schema.JsonPreAnalyzedParser.java
License:Apache License
@Override public String toFormattedString(Field f) throws IOException { Map<String, Object> map = new LinkedHashMap<String, Object>(); map.put(VERSION_KEY, VERSION);/*ww w . j ava 2s. com*/ if (f.fieldType().stored()) { String stringValue = f.stringValue(); if (stringValue != null) { map.put(STRING_KEY, stringValue); } BytesRef binaryValue = f.binaryValue(); if (binaryValue != null) { map.put(BINARY_KEY, Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length)); } } TokenStream ts = f.tokenStreamValue(); if (ts != null) { List<Map<String, Object>> tokens = new LinkedList<Map<String, Object>>(); while (ts.incrementToken()) { Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator(); String cTerm = null; String tTerm = null; Map<String, Object> tok = new TreeMap<String, Object>(); while (it.hasNext()) { Class<? extends Attribute> cl = it.next(); if (!ts.hasAttribute(cl)) { continue; } Attribute att = ts.getAttribute(cl); if (cl.isAssignableFrom(CharTermAttribute.class)) { CharTermAttribute catt = (CharTermAttribute) att; cTerm = new String(catt.buffer(), 0, catt.length()); } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) { TermToBytesRefAttribute tatt = (TermToBytesRefAttribute) att; tTerm = tatt.getBytesRef().utf8ToString(); } else { if (cl.isAssignableFrom(FlagsAttribute.class)) { tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute) att).getFlags())); } else if (cl.isAssignableFrom(OffsetAttribute.class)) { tok.put(OFFSET_START_KEY, ((OffsetAttribute) att).startOffset()); tok.put(OFFSET_END_KEY, ((OffsetAttribute) att).endOffset()); } else if (cl.isAssignableFrom(PayloadAttribute.class)) { BytesRef p = ((PayloadAttribute) att).getPayload(); if (p != null && p.length > 0) { tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length)); } } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) { tok.put(POSINCR_KEY, ((PositionIncrementAttribute) att).getPositionIncrement()); } else if (cl.isAssignableFrom(TypeAttribute.class)) { tok.put(TYPE_KEY, ((TypeAttribute) att).type()); } else { tok.put(cl.getName(), att.toString()); } } } String term = null; if (cTerm != null) { term = cTerm; } else { term = tTerm; } if (term != null && term.length() > 0) { tok.put(TOKEN_KEY, term); } tokens.add(tok); } map.put(TOKENS_KEY, tokens); } return JSONUtil.toJSON(map, -1); }
From source file:org.elasticsearch.action.mlt.TransportMoreLikeThisAction.java
License:Apache License
private Object convertField(Field field) { if (field.stringValue() != null) { return field.stringValue(); } else if (field.binaryValue() != null) { return BytesRef.deepCopyOf(field.binaryValue()).bytes; } else if (field.numericValue() != null) { return field.numericValue(); } else {/*from ww w . j a v a2s .com*/ throw new ElasticsearchIllegalStateException( "Field should have either a string, numeric or binary value"); } }
From source file:org.eu.bitzone.Leia.java
License:Apache License
public void actionReconstruct(final Object docNumText) { final int[] nums = new int[1]; try {//from w ww . ja va 2 s. c o m final String numString = getString(docNumText, "text"); nums[0] = Integer.parseInt(numString); } catch (final Exception e) { showStatus("ERROR: no valid document selected"); return; } final Progress progress = new Progress(this); progress.setMessage("Reconstructing ..."); progress.show(); final Thread thr = new Thread() { @Override public void run() { try { final int docNum = nums[0]; final DocReconstructor recon = new DocReconstructor(ir, idxFields, numTerms); recon.addObserver(progress); final Reconstructed doc = recon.reconstruct(docNum); final Object dialog = addComponent(null, "/xml/editdoc.xml", null, null); putProperty(dialog, "docNum", new Integer(docNum)); final Object cbAnalyzers = find(dialog, "cbAnalyzers"); populateAnalyzers(cbAnalyzers); setInteger(cbAnalyzers, "selected", 0); final Object editTabs = find(dialog, "editTabs"); setString(find(dialog, "docNum"), "text", "Fields of Doc #: " + docNum); for (int p = 0; p < idxFields.length; p++) { final String key = idxFields[p]; if (!doc.hasField(key)) { continue; } final IndexableField[] fields = doc.getStoredFields().get(key); GrowableStringArray recField = doc.getReconstructedFields().get(key); int count = 0; if (recField != null) { count = 1; } if (fields != null && fields.length > count) { count = fields.length; } for (int i = 0; i < count; i++) { if (i > 0) { recField = null; // show it only for the first field } final Object tab = create("tab"); setString(tab, "text", key); setFont(tab, getFont().deriveFont(Font.BOLD)); add(editTabs, tab); final Object editfield = addComponent(tab, "/xml/editfield.xml", null, null); final Object fType = find(editfield, "fType"); final Object sText = find(editfield, "sText"); final Object rText = find(editfield, "rText"); final Object fBoost = find(editfield, "fBoost"); final Object cbStored = find(editfield, "cbStored"); // Object cbCmp = find(editfield, "cbCmp"); final Object cbBin = find(editfield, "cbBin"); final Object cbIndexed = find(editfield, "cbIndexed"); final Object cbTokenized = find(editfield, "cbTokenized"); final Object cbTVF = find(editfield, "cbTVF"); final Object cbTVFp = find(editfield, "cbTVFp"); final Object cbTVFo = find(editfield, "cbTVFo"); final Object cbONorms = find(editfield, "cbONorms"); final Object cbOTF = find(editfield, "cbOTF"); final Object stored = find(editfield, "stored"); final Object restored = find(editfield, "restored"); if (ar != null) { setBoolean(cbONorms, "selected", !ar.hasNorms(key)); } Field f = null; if (fields != null && fields.length > i) { f = (Field) fields[i]; setString(fType, "text", "Original stored field content"); String text; if (f.binaryValue() != null) { text = Util.bytesToHex(f.binaryValue(), true); setBoolean(cbBin, "selected", true); } else { text = f.stringValue(); } setString(sText, "text", text); setString(fBoost, "text", String.valueOf(f.boost())); final IndexableFieldType t = f.fieldType(); setBoolean(cbStored, "selected", t.stored()); // Lucene 3.0 doesn't support compressed fields // setBoolean(cbCmp, "selected", false); setBoolean(cbIndexed, "selected", t.indexed()); setBoolean(cbTokenized, "selected", t.tokenized()); setBoolean(cbTVF, "selected", t.storeTermVectors()); setBoolean(cbTVFp, "selected", t.storeTermVectorPositions()); setBoolean(cbTVFo, "selected", t.storeTermVectorOffsets()); // XXX omitTF needs fixing! // setBoolean(cbOTF, "selected", f.getOmitTermFreqAndPositions()); } else { remove(stored); } if (recField != null) { String sep = " "; if (f == null) { setString(fType, "text", "RESTORED content ONLY - check for errors!"); setColor(fType, "foreground", Color.red); } else { setBoolean(rText, "editable", false); setBoolean(rText, "border", false); setString(restored, "text", "Tokenized (from all '" + key + "' fields)"); sep = ", "; } setBoolean(cbIndexed, "selected", true); setString(fBoost, "text", String.valueOf(1.0f)); setString(rText, "text", recField.toString(sep)); } else { remove(restored); } } } add(dialog); getPreferredSize(editTabs); } catch (final Exception e) { e.printStackTrace(); showStatus(e.getMessage()); } progress.hide(); } }; thr.start(); }