List of usage examples for org.apache.lucene.util BytesRef deepCopyOf
public static BytesRef deepCopyOf(BytesRef other)
other
The returned BytesRef will have a length of other.length and an offset of zero.
From source file:org.elasticsearch.test.unit.index.mapper.geo.LatLonMappingGeoPointTests.java
License:Apache License
@Test public void testLatLonInOneValueArray() throws Exception { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties") .startObject("point").field("type", "geo_point").field("lat_lon", true).field("store", "yes") .endObject().endObject().endObject().endObject().string(); DocumentMapper defaultMapper = MapperTests.newParser().parse(mapping); ParsedDocument doc = defaultMapper.parse("type", "1", XContentFactory.jsonBuilder().startObject() .startArray("point").value("1.2,1.3").value("1.4,1.5").endArray().endObject().bytes()); assertThat(doc.rootDoc().getFields("point.lat").length, equalTo(2)); assertThat(doc.rootDoc().getFields("point.lon").length, equalTo(2)); assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[0].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.2))); assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[0].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.3))); assertThat(doc.rootDoc().getFields("point")[0].stringValue(), equalTo("1.2,1.3")); assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[1].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.4))); assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[1].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.5))); assertThat(doc.rootDoc().getFields("point")[1].stringValue(), equalTo("1.4,1.5")); }
From source file:org.elasticsearch.test.unit.index.mapper.geo.LatLonMappingGeoPointTests.java
License:Apache License
@Test public void testLonLatArrayStored() throws Exception { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties") .startObject("point").field("type", "geo_point").field("lat_lon", true).field("store", "yes") .endObject().endObject().endObject().endObject().string(); DocumentMapper defaultMapper = MapperTests.newParser().parse(mapping); ParsedDocument doc = defaultMapper.parse("type", "1", XContentFactory.jsonBuilder().startObject() .startArray("point").value(1.3).value(1.2).endArray().endObject().bytes()); assertThat(doc.rootDoc().getField("point.lat"), notNullValue()); assertThat(BytesRef.deepCopyOf(doc.rootDoc().getField("point.lat").binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.2))); assertThat(doc.rootDoc().getField("point.lon"), notNullValue()); assertThat(BytesRef.deepCopyOf(doc.rootDoc().getField("point.lon").binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.3))); assertThat(doc.rootDoc().get("point"), equalTo("1.2,1.3")); }
From source file:org.elasticsearch.test.unit.index.mapper.geo.LatLonMappingGeoPointTests.java
License:Apache License
@Test public void testLonLatArrayArrayStored() throws Exception { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties") .startObject("point").field("type", "geo_point").field("lat_lon", true).field("store", "yes") .endObject().endObject().endObject().endObject().string(); DocumentMapper defaultMapper = MapperTests.newParser().parse(mapping); ParsedDocument doc = defaultMapper.parse("type", "1", XContentFactory.jsonBuilder().startObject().startArray("point").startArray().value(1.3).value(1.2) .endArray().startArray().value(1.5).value(1.4).endArray().endArray().endObject().bytes()); assertThat(doc.rootDoc().getFields("point.lat").length, equalTo(2)); assertThat(doc.rootDoc().getFields("point.lon").length, equalTo(2)); assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[0].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.2))); assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[0].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.3))); assertThat(doc.rootDoc().getFields("point")[0].stringValue(), equalTo("1.2,1.3")); assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[1].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.4))); assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[1].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.5))); assertThat(doc.rootDoc().getFields("point")[1].stringValue(), equalTo("1.4,1.5")); }
From source file:org.elasticsearch.transport.netty.ChannelBufferBytesReferenceTests.java
License:Apache License
public void testImmutable() throws IOException { BytesReference bytesReference = newBytesReference(randomIntBetween(10, 3 * PAGE_SIZE)); BytesRef bytesRef = BytesRef.deepCopyOf(bytesReference.toBytesRef()); ChannelBuffer channelBuffer = ChannelBuffers.wrappedBuffer(bytesRef.bytes, bytesRef.offset, bytesRef.length);/*from w ww. j a v a 2s . c o m*/ ChannelBufferBytesReference channelBufferBytesReference = new ChannelBufferBytesReference(channelBuffer, bytesRef.length); assertEquals(channelBufferBytesReference, bytesReference); channelBuffer.readInt(); // this advances the index of the channel buffer assertEquals(channelBufferBytesReference, bytesReference); assertEquals(bytesRef, channelBufferBytesReference.toBytesRef()); BytesRef unicodeBytes = new BytesRef(randomUnicodeOfCodepointLength(100)); channelBuffer = ChannelBuffers.wrappedBuffer(unicodeBytes.bytes, unicodeBytes.offset, unicodeBytes.length); channelBufferBytesReference = new ChannelBufferBytesReference(channelBuffer, unicodeBytes.length); String utf8ToString = channelBufferBytesReference.utf8ToString(); channelBuffer.readInt(); // this advances the index of the channel buffer assertEquals(utf8ToString, channelBufferBytesReference.utf8ToString()); }
From source file:org.elasticsearch.transport.netty4.ByteBufBytesReferenceTests.java
License:Apache License
public void testImmutable() throws IOException { BytesReference bytesReference = newBytesReference(randomIntBetween(10, 3 * PAGE_SIZE)); BytesRef bytesRef = BytesRef.deepCopyOf(bytesReference.toBytesRef()); ByteBuf channelBuffer = Unpooled.wrappedBuffer(bytesRef.bytes, bytesRef.offset, bytesRef.length); ByteBufBytesReference byteBufBytesReference = new ByteBufBytesReference(channelBuffer, bytesRef.length); assertEquals(byteBufBytesReference, bytesReference); channelBuffer.readInt(); // this advances the index of the channel buffer assertEquals(byteBufBytesReference, bytesReference); assertEquals(bytesRef, byteBufBytesReference.toBytesRef()); BytesRef unicodeBytes = new BytesRef(randomUnicodeOfCodepointLength(100)); channelBuffer = Unpooled.wrappedBuffer(unicodeBytes.bytes, unicodeBytes.offset, unicodeBytes.length); byteBufBytesReference = new ByteBufBytesReference(channelBuffer, unicodeBytes.length); String utf8ToString = byteBufBytesReference.utf8ToString(); channelBuffer.readInt(); // this advances the index of the channel buffer assertEquals(utf8ToString, byteBufBytesReference.utf8ToString()); }
From source file:org.pageseeder.flint.lucene.search.Terms.java
License:Apache License
/** * Loads all the fuzzy terms in the list of terms given the reader. * * @param reader Index reader to use./* w w w . j a va 2s . c o m*/ * @param bucket Where to store the terms. * @param term The term to use. * * @throws IOException If an error is thrown by the fuzzy term enumeration. */ @Beta public static void fuzzy(IndexReader reader, Bucket<Term> bucket, Term term, int minSimilarity) throws IOException { AttributeSource atts = new AttributeSource(); Fields fields = MultiFields.getFields(reader); org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field()); if (terms == null) return; FuzzyTermsEnum fuzzy = new FuzzyTermsEnum(terms, atts, term, minSimilarity, 0, true); BytesRef val; BytesRef searched = term.bytes(); while ((val = fuzzy.next()) != null) { if (!searched.bytesEquals(val)) { Term t = new Term(term.field(), BytesRef.deepCopyOf(val)); bucket.add(t, reader.docFreq(t)); } } }
From source file:org.pageseeder.flint.lucene.search.Terms.java
License:Apache License
/** * Loads all the prefix terms in the list of terms given the reader. * * @param reader Index reader to use.//from w w w . j a v a 2 s. c o m * @param bucket Where to store the terms. * @param term The term to use. * * @throws IOException If an error is thrown by the prefix term enumeration. */ public static void prefix(IndexReader reader, Bucket<Term> bucket, Term term) throws IOException { Fields fields = MultiFields.getFields(reader); org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field()); if (terms == null) return; TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())), term.bytes()); BytesRef val; while ((val = prefixes.next()) != null) { Term t = new Term(term.field(), BytesRef.deepCopyOf(val)); bucket.add(t, reader.docFreq(t)); } }
From source file:org.pageseeder.flint.lucene.search.Terms.java
License:Apache License
/** * Returns the list of terms for the specified field. * * @param reader The index reader/* w w w . j a v a 2s .c o m*/ * @param field The field * * @return the list of terms for this field * * @throws IOException should any IO error be reported. */ @Beta public static List<Term> terms(IndexReader reader, String field) throws IOException { LOGGER.debug("Loading terms for field {}", field); org.apache.lucene.index.Terms terms = MultiFields.getTerms(reader, field); if (terms == null) return Collections.emptyList(); TermsEnum termsEnum = terms.iterator(); if (termsEnum == TermsEnum.EMPTY) return Collections.emptyList(); Map<BytesRef, Term> termsList = new HashMap<BytesRef, Term>(); while (termsEnum.next() != null) { BytesRef t = termsEnum.term(); if (t == null) break; termsList.put(t, new Term(field, BytesRef.deepCopyOf(t))); } return new ArrayList<>(termsList.values()); }
From source file:org.sindice.siren.search.node.TopNodeTermsRewrite.java
License:Apache License
@Override public Q rewrite(final IndexReader reader, final MultiNodeTermQuery query) throws IOException { final int maxSize = Math.min(size, this.getMaxSize()); final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>(); this.collectTerms(reader, query, new TermCollector() { private final MaxNonCompetitiveBoostAttribute maxBoostAtt = attributes .addAttribute(MaxNonCompetitiveBoostAttribute.class); private final Map<BytesRef, ScoreTerm> visitedTerms = new HashMap<BytesRef, ScoreTerm>(); private TermsEnum termsEnum; private Comparator<BytesRef> termComp; private BoostAttribute boostAtt; private ScoreTerm st; @Override/* ww w.j ava 2 s . c o m*/ public void setNextEnum(final TermsEnum termsEnum) throws IOException { this.termsEnum = termsEnum; this.termComp = termsEnum.getComparator(); assert this.compareToLastTerm(null); // lazy init the initial ScoreTerm because comparator is not known on ctor: if (st == null) st = new ScoreTerm(this.termComp, new TermContext(topReaderContext)); boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class); } // for assert: private BytesRef lastTerm; private boolean compareToLastTerm(final BytesRef t) throws IOException { if (lastTerm == null && t != null) { lastTerm = BytesRef.deepCopyOf(t); } else if (t == null) { lastTerm = null; } else { assert termsEnum.getComparator().compare(lastTerm, t) < 0 : "lastTerm=" + lastTerm + " t=" + t; lastTerm.copyBytes(t); } return true; } @Override public boolean collect(final BytesRef bytes) throws IOException { final float boost = boostAtt.getBoost(); // make sure within a single seg we always collect // terms in order assert this.compareToLastTerm(bytes); //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord); // ignore uncompetitive hits if (stQueue.size() == maxSize) { final ScoreTerm t = stQueue.peek(); if (boost < t.boost) return true; if (boost == t.boost && termComp.compare(bytes, t.bytes) > 0) return true; } ScoreTerm t = visitedTerms.get(bytes); final TermState state = termsEnum.termState(); assert state != null; if (t != null) { // if the term is already in the PQ, only update docFreq of term in PQ assert t.boost == boost : "boost should be equal in all segment TermsEnums"; t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } else { // add new entry in PQ, we must clone the term, else it may get overwritten! st.bytes.copyBytes(bytes); st.boost = boost; visitedTerms.put(st.bytes, st); assert st.termState.docFreq() == 0; st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); stQueue.offer(st); // possibly drop entries from queue if (stQueue.size() > maxSize) { st = stQueue.poll(); visitedTerms.remove(st.bytes); st.termState.clear(); // reset the termstate! } else { st = new ScoreTerm(termComp, new TermContext(topReaderContext)); } assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize"; // set maxBoostAtt with values to help FuzzyTermsEnum to optimize if (stQueue.size() == maxSize) { t = stQueue.peek(); maxBoostAtt.setMaxNonCompetitiveBoost(t.boost); maxBoostAtt.setCompetitiveTerm(t.bytes); } } return true; } }); final Q q = this.getTopLevelQuery(); final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]); ArrayUtil.mergeSort(scoreTerms, scoreTermSortByTermComp); for (final ScoreTerm st : scoreTerms) { final Term term = new Term(query.field, st.bytes); assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq() + " term=" + term; this.addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query } return q; }
From source file:perf.IDPerfTest.java
License:Apache License
private static Result testOne(String indexPath, String desc, IDIterator ids, final int minTermsInBlock, final int maxTermsInBlock) throws IOException { System.out.println("\ntest: " + desc + " termBlocks=" + minTermsInBlock + "/" + maxTermsInBlock); Directory dir = FSDirectory.open(new File(indexPath)); //IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48)); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_8, new StandardAnalyzer(Version.LUCENE_4_8)); iwc.setMergeScheduler(new SerialMergeScheduler()); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // So I can walk the files and get the *.tip sizes: iwc.setUseCompoundFile(false);/*from w w w . j a v a 2s . c o m*/ iwc.setCodec(new Lucene53Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return new Lucene50PostingsFormat(minTermsInBlock, maxTermsInBlock); } }); /// 7/7/7 segment structure: iwc.setMaxBufferedDocs(ID_COUNT / 777); iwc.setRAMBufferSizeMB(-1); //iwc.setInfoStream(new PrintStreamInfoStream(System.out)); //iwc.setMergePolicy(new LogDocMergePolicy()); ((TieredMergePolicy) iwc.getMergePolicy()).setFloorSegmentMB(.001); ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(0.0); //((LogDocMergePolicy) iwc.getMergePolicy()).setMinMergeDocs(1000); iwc.getMergePolicy().setNoCFSRatio(0.0); IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); FieldType ft = new FieldType(StringField.TYPE_NOT_STORED); ft.setTokenized(true); ft.freeze(); BytesRef idValue = new BytesRef(64); Field idField = new Field("id", new BinaryTokenStream(idValue), ft); doc.add(idField); long t0 = System.nanoTime(); BytesRef[] lookupIDs = new BytesRef[ID_SEARCH_COUNT]; Random random = new Random(17); int lookupCount = 0; double rate = 1.01 * ((double) ID_SEARCH_COUNT) / ID_COUNT; for (int i = 0; i < ID_COUNT; i++) { ids.next(idValue); if (lookupCount < lookupIDs.length && random.nextDouble() <= rate) { lookupIDs[lookupCount++] = BytesRef.deepCopyOf(idValue); } // Trickery: the idsIter changed the idValue which the BinaryTokenStream reuses for each added doc w.addDocument(doc); } if (lookupCount < lookupIDs.length) { throw new RuntimeException("didn't get enough lookup ids: " + lookupCount + " vs " + lookupIDs.length); } long indexTime = System.nanoTime() - t0; System.out.println(" indexing done; waitForMerges..."); w.waitForMerges(); IndexReader r = DirectoryReader.open(w, true); System.out.println(" reader=" + r); shuffle(random, lookupIDs); shuffle(random, lookupIDs); long bestTime = Long.MAX_VALUE; long checksum = 0; List<AtomicReaderContext> leaves = new ArrayList<>(r.leaves()); // Sort largest to smallest: Collections.sort(leaves, new Comparator<AtomicReaderContext>() { @Override public int compare(AtomicReaderContext c1, AtomicReaderContext c2) { return c2.reader().maxDoc() - c1.reader().maxDoc(); } }); TermsEnum[] termsEnums = new TermsEnum[leaves.size()]; DocsEnum[] docsEnums = new DocsEnum[leaves.size()]; int[] docBases = new int[leaves.size()]; for (int i = 0; i < leaves.size(); i++) { //System.out.println("i=" + i + " count=" + leaves.get(i).reader().maxDoc()); termsEnums[i] = leaves.get(i).reader().fields().terms("id").iterator(null); docBases[i] = leaves.get(i).docBase; } long rawLookupCount = 0; int countx = 0; for (int iter = 0; iter < 5; iter++) { t0 = System.nanoTime(); BlockTreeTermsReader.seekExactFastNotFound = 0; BlockTreeTermsReader.seekExactFastRootNotFound = 0; rawLookupCount = 0; for (BytesRef id : lookupIDs) { if (countx++ < 50) { System.out.println(" id=" + id); } boolean found = false; for (int seg = 0; seg < termsEnums.length; seg++) { rawLookupCount++; if (termsEnums[seg].seekExact(id)) { docsEnums[seg] = termsEnums[seg].docs(null, docsEnums[seg], 0); int docID = docsEnums[seg].nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { // uh-oh! throw new RuntimeException("id not found: " + id); } // paranoia: checksum += docID + docBases[seg]; found = true; // Optimization vs MultiFields: we don't need to check any more segments since id is PK break; } } if (found == false) { // uh-oh! throw new RuntimeException("id not found: " + id); } } long lookupTime = System.nanoTime() - t0; System.out.println(String.format(Locale.ROOT, " iter=" + iter + " lookupTime=%.3f sec", lookupTime / 1000000000.0)); if (lookupTime < bestTime) { bestTime = lookupTime; System.out.println(" **"); } } long totalBytes = 0; long termsIndexTotalBytes = 0; for (String fileName : dir.listAll()) { long bytes = dir.fileLength(fileName); totalBytes += bytes; if (fileName.endsWith(".tip")) { termsIndexTotalBytes += bytes; } } r.close(); w.rollback(); dir.close(); return new Result(desc, ID_COUNT / (indexTime / 1000000.0), lookupIDs.length / (bestTime / 1000000.0), totalBytes, termsIndexTotalBytes, checksum, BlockTreeTermsReader.seekExactFastNotFound, BlockTreeTermsReader.seekExactFastRootNotFound, rawLookupCount, minTermsInBlock, maxTermsInBlock); }