Example usage for org.apache.lucene.util BytesRef deepCopyOf

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef deepCopyOf.

Prototype

public static BytesRef deepCopyOf(BytesRef other)

Source Link

Document

Creates a new BytesRef that points to a copy of the bytes from other

The returned BytesRef will have a length of other.length and an offset of zero.

Usage

From source file:org.elasticsearch.test.unit.index.mapper.geo.LatLonMappingGeoPointTests.java

License:Apache License

@Test
public void testLatLonInOneValueArray() throws Exception {
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
            .startObject("point").field("type", "geo_point").field("lat_lon", true).field("store", "yes")
            .endObject().endObject().endObject().endObject().string();

    DocumentMapper defaultMapper = MapperTests.newParser().parse(mapping);

    ParsedDocument doc = defaultMapper.parse("type", "1", XContentFactory.jsonBuilder().startObject()
            .startArray("point").value("1.2,1.3").value("1.4,1.5").endArray().endObject().bytes());

    assertThat(doc.rootDoc().getFields("point.lat").length, equalTo(2));
    assertThat(doc.rootDoc().getFields("point.lon").length, equalTo(2));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[0].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.2)));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[0].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.3)));
    assertThat(doc.rootDoc().getFields("point")[0].stringValue(), equalTo("1.2,1.3"));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[1].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.4)));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[1].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.5)));
    assertThat(doc.rootDoc().getFields("point")[1].stringValue(), equalTo("1.4,1.5"));
}

From source file:org.elasticsearch.test.unit.index.mapper.geo.LatLonMappingGeoPointTests.java

License:Apache License

@Test
public void testLonLatArrayStored() throws Exception {
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
            .startObject("point").field("type", "geo_point").field("lat_lon", true).field("store", "yes")
            .endObject().endObject().endObject().endObject().string();

    DocumentMapper defaultMapper = MapperTests.newParser().parse(mapping);

    ParsedDocument doc = defaultMapper.parse("type", "1", XContentFactory.jsonBuilder().startObject()
            .startArray("point").value(1.3).value(1.2).endArray().endObject().bytes());

    assertThat(doc.rootDoc().getField("point.lat"), notNullValue());
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getField("point.lat").binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.2)));
    assertThat(doc.rootDoc().getField("point.lon"), notNullValue());
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getField("point.lon").binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.3)));
    assertThat(doc.rootDoc().get("point"), equalTo("1.2,1.3"));
}

From source file:org.elasticsearch.test.unit.index.mapper.geo.LatLonMappingGeoPointTests.java

License:Apache License

@Test
public void testLonLatArrayArrayStored() throws Exception {
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
            .startObject("point").field("type", "geo_point").field("lat_lon", true).field("store", "yes")
            .endObject().endObject().endObject().endObject().string();

    DocumentMapper defaultMapper = MapperTests.newParser().parse(mapping);

    ParsedDocument doc = defaultMapper.parse("type", "1",
            XContentFactory.jsonBuilder().startObject().startArray("point").startArray().value(1.3).value(1.2)
                    .endArray().startArray().value(1.5).value(1.4).endArray().endArray().endObject().bytes());

    assertThat(doc.rootDoc().getFields("point.lat").length, equalTo(2));
    assertThat(doc.rootDoc().getFields("point.lon").length, equalTo(2));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[0].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.2)));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[0].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.3)));
    assertThat(doc.rootDoc().getFields("point")[0].stringValue(), equalTo("1.2,1.3"));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[1].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.4)));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[1].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.5)));
    assertThat(doc.rootDoc().getFields("point")[1].stringValue(), equalTo("1.4,1.5"));
}

From source file:org.elasticsearch.transport.netty.ChannelBufferBytesReferenceTests.java

License:Apache License

public void testImmutable() throws IOException {
    BytesReference bytesReference = newBytesReference(randomIntBetween(10, 3 * PAGE_SIZE));
    BytesRef bytesRef = BytesRef.deepCopyOf(bytesReference.toBytesRef());
    ChannelBuffer channelBuffer = ChannelBuffers.wrappedBuffer(bytesRef.bytes, bytesRef.offset,
            bytesRef.length);/*from   w ww.  j  a  v  a  2s . c o m*/
    ChannelBufferBytesReference channelBufferBytesReference = new ChannelBufferBytesReference(channelBuffer,
            bytesRef.length);
    assertEquals(channelBufferBytesReference, bytesReference);
    channelBuffer.readInt(); // this advances the index of the channel buffer
    assertEquals(channelBufferBytesReference, bytesReference);
    assertEquals(bytesRef, channelBufferBytesReference.toBytesRef());

    BytesRef unicodeBytes = new BytesRef(randomUnicodeOfCodepointLength(100));
    channelBuffer = ChannelBuffers.wrappedBuffer(unicodeBytes.bytes, unicodeBytes.offset, unicodeBytes.length);
    channelBufferBytesReference = new ChannelBufferBytesReference(channelBuffer, unicodeBytes.length);
    String utf8ToString = channelBufferBytesReference.utf8ToString();
    channelBuffer.readInt(); // this advances the index of the channel buffer
    assertEquals(utf8ToString, channelBufferBytesReference.utf8ToString());
}

From source file:org.elasticsearch.transport.netty4.ByteBufBytesReferenceTests.java

License:Apache License

public void testImmutable() throws IOException {
    BytesReference bytesReference = newBytesReference(randomIntBetween(10, 3 * PAGE_SIZE));
    BytesRef bytesRef = BytesRef.deepCopyOf(bytesReference.toBytesRef());
    ByteBuf channelBuffer = Unpooled.wrappedBuffer(bytesRef.bytes, bytesRef.offset, bytesRef.length);
    ByteBufBytesReference byteBufBytesReference = new ByteBufBytesReference(channelBuffer, bytesRef.length);
    assertEquals(byteBufBytesReference, bytesReference);
    channelBuffer.readInt(); // this advances the index of the channel buffer
    assertEquals(byteBufBytesReference, bytesReference);
    assertEquals(bytesRef, byteBufBytesReference.toBytesRef());

    BytesRef unicodeBytes = new BytesRef(randomUnicodeOfCodepointLength(100));
    channelBuffer = Unpooled.wrappedBuffer(unicodeBytes.bytes, unicodeBytes.offset, unicodeBytes.length);
    byteBufBytesReference = new ByteBufBytesReference(channelBuffer, unicodeBytes.length);
    String utf8ToString = byteBufBytesReference.utf8ToString();
    channelBuffer.readInt(); // this advances the index of the channel buffer
    assertEquals(utf8ToString, byteBufBytesReference.utf8ToString());
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Loads all the fuzzy terms in the list of terms given the reader.
 *
 * @param reader  Index reader to use./*  w w w .  j  a  va  2s .  c o m*/
 * @param bucket  Where to store the terms.
 * @param term    The term to use.
 *
 * @throws IOException If an error is thrown by the fuzzy term enumeration.
 */
@Beta
public static void fuzzy(IndexReader reader, Bucket<Term> bucket, Term term, int minSimilarity)
        throws IOException {
    AttributeSource atts = new AttributeSource();
    Fields fields = MultiFields.getFields(reader);
    org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
    if (terms == null)
        return;
    FuzzyTermsEnum fuzzy = new FuzzyTermsEnum(terms, atts, term, minSimilarity, 0, true);
    BytesRef val;
    BytesRef searched = term.bytes();
    while ((val = fuzzy.next()) != null) {
        if (!searched.bytesEquals(val)) {
            Term t = new Term(term.field(), BytesRef.deepCopyOf(val));
            bucket.add(t, reader.docFreq(t));
        }
    }
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Loads all the prefix terms in the list of terms given the reader.
 *
 * @param reader  Index reader to use.//from  w w w  . j  a  v a  2 s.  c o m
 * @param bucket  Where to store the terms.
 * @param term    The term to use.
 *
 * @throws IOException If an error is thrown by the prefix term enumeration.
 */
public static void prefix(IndexReader reader, Bucket<Term> bucket, Term term) throws IOException {
    Fields fields = MultiFields.getFields(reader);
    org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
    if (terms == null)
        return;
    TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())),
            term.bytes());
    BytesRef val;
    while ((val = prefixes.next()) != null) {
        Term t = new Term(term.field(), BytesRef.deepCopyOf(val));
        bucket.add(t, reader.docFreq(t));
    }
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Returns the list of terms for the specified field.
 *
 * @param reader The index reader/*  w w  w  .  j  a  v  a  2s  .c  o  m*/
 * @param field  The field
 *
 * @return the list of terms for this field
 *
 * @throws IOException should any IO error be reported.
 */
@Beta
public static List<Term> terms(IndexReader reader, String field) throws IOException {
    LOGGER.debug("Loading terms for field {}", field);
    org.apache.lucene.index.Terms terms = MultiFields.getTerms(reader, field);
    if (terms == null)
        return Collections.emptyList();
    TermsEnum termsEnum = terms.iterator();
    if (termsEnum == TermsEnum.EMPTY)
        return Collections.emptyList();
    Map<BytesRef, Term> termsList = new HashMap<BytesRef, Term>();
    while (termsEnum.next() != null) {
        BytesRef t = termsEnum.term();
        if (t == null)
            break;
        termsList.put(t, new Term(field, BytesRef.deepCopyOf(t)));
    }
    return new ArrayList<>(termsList.values());
}

From source file:org.sindice.siren.search.node.TopNodeTermsRewrite.java

License:Apache License

@Override
public Q rewrite(final IndexReader reader, final MultiNodeTermQuery query) throws IOException {
    final int maxSize = Math.min(size, this.getMaxSize());
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
    this.collectTerms(reader, query, new TermCollector() {
        private final MaxNonCompetitiveBoostAttribute maxBoostAtt = attributes
                .addAttribute(MaxNonCompetitiveBoostAttribute.class);

        private final Map<BytesRef, ScoreTerm> visitedTerms = new HashMap<BytesRef, ScoreTerm>();

        private TermsEnum termsEnum;
        private Comparator<BytesRef> termComp;
        private BoostAttribute boostAtt;
        private ScoreTerm st;

        @Override/* ww  w.j  ava 2  s  .  c o m*/
        public void setNextEnum(final TermsEnum termsEnum) throws IOException {
            this.termsEnum = termsEnum;
            this.termComp = termsEnum.getComparator();

            assert this.compareToLastTerm(null);

            // lazy init the initial ScoreTerm because comparator is not known on ctor:
            if (st == null)
                st = new ScoreTerm(this.termComp, new TermContext(topReaderContext));
            boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
        }

        // for assert:
        private BytesRef lastTerm;

        private boolean compareToLastTerm(final BytesRef t) throws IOException {
            if (lastTerm == null && t != null) {
                lastTerm = BytesRef.deepCopyOf(t);
            } else if (t == null) {
                lastTerm = null;
            } else {
                assert termsEnum.getComparator().compare(lastTerm, t) < 0 : "lastTerm=" + lastTerm + " t=" + t;
                lastTerm.copyBytes(t);
            }
            return true;
        }

        @Override
        public boolean collect(final BytesRef bytes) throws IOException {
            final float boost = boostAtt.getBoost();

            // make sure within a single seg we always collect
            // terms in order
            assert this.compareToLastTerm(bytes);

            //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord);
            // ignore uncompetitive hits
            if (stQueue.size() == maxSize) {
                final ScoreTerm t = stQueue.peek();
                if (boost < t.boost)
                    return true;
                if (boost == t.boost && termComp.compare(bytes, t.bytes) > 0)
                    return true;
            }
            ScoreTerm t = visitedTerms.get(bytes);
            final TermState state = termsEnum.termState();
            assert state != null;
            if (t != null) {
                // if the term is already in the PQ, only update docFreq of term in PQ
                assert t.boost == boost : "boost should be equal in all segment TermsEnums";
                t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
            } else {
                // add new entry in PQ, we must clone the term, else it may get overwritten!
                st.bytes.copyBytes(bytes);
                st.boost = boost;
                visitedTerms.put(st.bytes, st);
                assert st.termState.docFreq() == 0;
                st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
                stQueue.offer(st);
                // possibly drop entries from queue
                if (stQueue.size() > maxSize) {
                    st = stQueue.poll();
                    visitedTerms.remove(st.bytes);
                    st.termState.clear(); // reset the termstate!
                } else {
                    st = new ScoreTerm(termComp, new TermContext(topReaderContext));
                }
                assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize";
                // set maxBoostAtt with values to help FuzzyTermsEnum to optimize
                if (stQueue.size() == maxSize) {
                    t = stQueue.peek();
                    maxBoostAtt.setMaxNonCompetitiveBoost(t.boost);
                    maxBoostAtt.setCompetitiveTerm(t.bytes);
                }
            }

            return true;
        }
    });

    final Q q = this.getTopLevelQuery();
    final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
    ArrayUtil.mergeSort(scoreTerms, scoreTermSortByTermComp);

    for (final ScoreTerm st : scoreTerms) {
        final Term term = new Term(query.field, st.bytes);
        assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs "
                + st.termState.docFreq() + " term=" + term;
        this.addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query
    }
    return q;
}

From source file:perf.IDPerfTest.java

License:Apache License

private static Result testOne(String indexPath, String desc, IDIterator ids, final int minTermsInBlock,
        final int maxTermsInBlock) throws IOException {
    System.out.println("\ntest: " + desc + " termBlocks=" + minTermsInBlock + "/" + maxTermsInBlock);
    Directory dir = FSDirectory.open(new File(indexPath));
    //IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48));
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_8, new StandardAnalyzer(Version.LUCENE_4_8));
    iwc.setMergeScheduler(new SerialMergeScheduler());
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    // So I can walk the files and get the *.tip sizes:
    iwc.setUseCompoundFile(false);/*from   w w  w . j a  v  a 2s . c  o m*/

    iwc.setCodec(new Lucene53Codec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {
            return new Lucene50PostingsFormat(minTermsInBlock, maxTermsInBlock);
        }
    });

    /// 7/7/7 segment structure:
    iwc.setMaxBufferedDocs(ID_COUNT / 777);
    iwc.setRAMBufferSizeMB(-1);
    //iwc.setInfoStream(new PrintStreamInfoStream(System.out));
    //iwc.setMergePolicy(new LogDocMergePolicy());
    ((TieredMergePolicy) iwc.getMergePolicy()).setFloorSegmentMB(.001);
    ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(0.0);
    //((LogDocMergePolicy) iwc.getMergePolicy()).setMinMergeDocs(1000);
    iwc.getMergePolicy().setNoCFSRatio(0.0);

    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();

    FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
    ft.setTokenized(true);
    ft.freeze();

    BytesRef idValue = new BytesRef(64);
    Field idField = new Field("id", new BinaryTokenStream(idValue), ft);
    doc.add(idField);

    long t0 = System.nanoTime();
    BytesRef[] lookupIDs = new BytesRef[ID_SEARCH_COUNT];
    Random random = new Random(17);
    int lookupCount = 0;
    double rate = 1.01 * ((double) ID_SEARCH_COUNT) / ID_COUNT;
    for (int i = 0; i < ID_COUNT; i++) {
        ids.next(idValue);
        if (lookupCount < lookupIDs.length && random.nextDouble() <= rate) {
            lookupIDs[lookupCount++] = BytesRef.deepCopyOf(idValue);
        }
        // Trickery: the idsIter changed the idValue which the BinaryTokenStream reuses for each added doc
        w.addDocument(doc);
    }

    if (lookupCount < lookupIDs.length) {
        throw new RuntimeException("didn't get enough lookup ids: " + lookupCount + " vs " + lookupIDs.length);
    }

    long indexTime = System.nanoTime() - t0;

    System.out.println("  indexing done; waitForMerges...");
    w.waitForMerges();

    IndexReader r = DirectoryReader.open(w, true);
    System.out.println("  reader=" + r);

    shuffle(random, lookupIDs);
    shuffle(random, lookupIDs);

    long bestTime = Long.MAX_VALUE;
    long checksum = 0;

    List<AtomicReaderContext> leaves = new ArrayList<>(r.leaves());
    // Sort largest to smallest:
    Collections.sort(leaves, new Comparator<AtomicReaderContext>() {
        @Override
        public int compare(AtomicReaderContext c1, AtomicReaderContext c2) {
            return c2.reader().maxDoc() - c1.reader().maxDoc();
        }
    });
    TermsEnum[] termsEnums = new TermsEnum[leaves.size()];
    DocsEnum[] docsEnums = new DocsEnum[leaves.size()];
    int[] docBases = new int[leaves.size()];
    for (int i = 0; i < leaves.size(); i++) {
        //System.out.println("i=" + i + " count=" + leaves.get(i).reader().maxDoc());
        termsEnums[i] = leaves.get(i).reader().fields().terms("id").iterator(null);
        docBases[i] = leaves.get(i).docBase;
    }

    long rawLookupCount = 0;

    int countx = 0;
    for (int iter = 0; iter < 5; iter++) {
        t0 = System.nanoTime();
        BlockTreeTermsReader.seekExactFastNotFound = 0;
        BlockTreeTermsReader.seekExactFastRootNotFound = 0;
        rawLookupCount = 0;
        for (BytesRef id : lookupIDs) {
            if (countx++ < 50) {
                System.out.println("    id=" + id);
            }
            boolean found = false;
            for (int seg = 0; seg < termsEnums.length; seg++) {
                rawLookupCount++;
                if (termsEnums[seg].seekExact(id)) {
                    docsEnums[seg] = termsEnums[seg].docs(null, docsEnums[seg], 0);
                    int docID = docsEnums[seg].nextDoc();
                    if (docID == DocsEnum.NO_MORE_DOCS) {
                        // uh-oh!
                        throw new RuntimeException("id not found: " + id);
                    }
                    // paranoia:
                    checksum += docID + docBases[seg];

                    found = true;

                    // Optimization vs MultiFields: we don't need to check any more segments since id is PK
                    break;
                }
            }
            if (found == false) {
                // uh-oh!
                throw new RuntimeException("id not found: " + id);
            }
        }
        long lookupTime = System.nanoTime() - t0;
        System.out.println(String.format(Locale.ROOT, "  iter=" + iter + " lookupTime=%.3f sec",
                lookupTime / 1000000000.0));
        if (lookupTime < bestTime) {
            bestTime = lookupTime;
            System.out.println("    **");
        }
    }

    long totalBytes = 0;
    long termsIndexTotalBytes = 0;
    for (String fileName : dir.listAll()) {
        long bytes = dir.fileLength(fileName);
        totalBytes += bytes;
        if (fileName.endsWith(".tip")) {
            termsIndexTotalBytes += bytes;
        }
    }

    r.close();
    w.rollback();
    dir.close();

    return new Result(desc, ID_COUNT / (indexTime / 1000000.0), lookupIDs.length / (bestTime / 1000000.0),
            totalBytes, termsIndexTotalBytes, checksum, BlockTreeTermsReader.seekExactFastNotFound,
            BlockTreeTermsReader.seekExactFastRootNotFound, rawLookupCount, minTermsInBlock, maxTermsInBlock);
}