Example usage for org.apache.lucene.util BytesRef deepCopyOf

List of usage examples for org.apache.lucene.util BytesRef deepCopyOf

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef deepCopyOf.

Prototype

public static BytesRef deepCopyOf(BytesRef other) 

Source Link

Document

Creates a new BytesRef that points to a copy of the bytes from other

The returned BytesRef will have a length of other.length and an offset of zero.

Usage

From source file:org.elasticsearch.test.unit.index.mapper.geo.LatLonMappingGeoPointTests.java

License:Apache License

@Test
public void testLatLonInOneValueArray() throws Exception {
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
            .startObject("point").field("type", "geo_point").field("lat_lon", true).field("store", "yes")
            .endObject().endObject().endObject().endObject().string();

    DocumentMapper defaultMapper = MapperTests.newParser().parse(mapping);

    ParsedDocument doc = defaultMapper.parse("type", "1", XContentFactory.jsonBuilder().startObject()
            .startArray("point").value("1.2,1.3").value("1.4,1.5").endArray().endObject().bytes());

    assertThat(doc.rootDoc().getFields("point.lat").length, equalTo(2));
    assertThat(doc.rootDoc().getFields("point.lon").length, equalTo(2));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[0].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.2)));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[0].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.3)));
    assertThat(doc.rootDoc().getFields("point")[0].stringValue(), equalTo("1.2,1.3"));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[1].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.4)));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[1].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.5)));
    assertThat(doc.rootDoc().getFields("point")[1].stringValue(), equalTo("1.4,1.5"));
}

From source file:org.elasticsearch.test.unit.index.mapper.geo.LatLonMappingGeoPointTests.java

License:Apache License

@Test
public void testLonLatArrayStored() throws Exception {
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
            .startObject("point").field("type", "geo_point").field("lat_lon", true).field("store", "yes")
            .endObject().endObject().endObject().endObject().string();

    DocumentMapper defaultMapper = MapperTests.newParser().parse(mapping);

    ParsedDocument doc = defaultMapper.parse("type", "1", XContentFactory.jsonBuilder().startObject()
            .startArray("point").value(1.3).value(1.2).endArray().endObject().bytes());

    assertThat(doc.rootDoc().getField("point.lat"), notNullValue());
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getField("point.lat").binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.2)));
    assertThat(doc.rootDoc().getField("point.lon"), notNullValue());
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getField("point.lon").binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.3)));
    assertThat(doc.rootDoc().get("point"), equalTo("1.2,1.3"));
}

From source file:org.elasticsearch.test.unit.index.mapper.geo.LatLonMappingGeoPointTests.java

License:Apache License

@Test
public void testLonLatArrayArrayStored() throws Exception {
    String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties")
            .startObject("point").field("type", "geo_point").field("lat_lon", true).field("store", "yes")
            .endObject().endObject().endObject().endObject().string();

    DocumentMapper defaultMapper = MapperTests.newParser().parse(mapping);

    ParsedDocument doc = defaultMapper.parse("type", "1",
            XContentFactory.jsonBuilder().startObject().startArray("point").startArray().value(1.3).value(1.2)
                    .endArray().startArray().value(1.5).value(1.4).endArray().endArray().endObject().bytes());

    assertThat(doc.rootDoc().getFields("point.lat").length, equalTo(2));
    assertThat(doc.rootDoc().getFields("point.lon").length, equalTo(2));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[0].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.2)));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[0].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.3)));
    assertThat(doc.rootDoc().getFields("point")[0].stringValue(), equalTo("1.2,1.3"));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[1].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.4)));
    assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[1].binaryValue()).bytes,
            equalTo(Numbers.doubleToBytes(1.5)));
    assertThat(doc.rootDoc().getFields("point")[1].stringValue(), equalTo("1.4,1.5"));
}

From source file:org.elasticsearch.transport.netty.ChannelBufferBytesReferenceTests.java

License:Apache License

public void testImmutable() throws IOException {
    BytesReference bytesReference = newBytesReference(randomIntBetween(10, 3 * PAGE_SIZE));
    BytesRef bytesRef = BytesRef.deepCopyOf(bytesReference.toBytesRef());
    ChannelBuffer channelBuffer = ChannelBuffers.wrappedBuffer(bytesRef.bytes, bytesRef.offset,
            bytesRef.length);/*from   w ww.  j  a  v  a  2s . c o m*/
    ChannelBufferBytesReference channelBufferBytesReference = new ChannelBufferBytesReference(channelBuffer,
            bytesRef.length);
    assertEquals(channelBufferBytesReference, bytesReference);
    channelBuffer.readInt(); // this advances the index of the channel buffer
    assertEquals(channelBufferBytesReference, bytesReference);
    assertEquals(bytesRef, channelBufferBytesReference.toBytesRef());

    BytesRef unicodeBytes = new BytesRef(randomUnicodeOfCodepointLength(100));
    channelBuffer = ChannelBuffers.wrappedBuffer(unicodeBytes.bytes, unicodeBytes.offset, unicodeBytes.length);
    channelBufferBytesReference = new ChannelBufferBytesReference(channelBuffer, unicodeBytes.length);
    String utf8ToString = channelBufferBytesReference.utf8ToString();
    channelBuffer.readInt(); // this advances the index of the channel buffer
    assertEquals(utf8ToString, channelBufferBytesReference.utf8ToString());
}

From source file:org.elasticsearch.transport.netty4.ByteBufBytesReferenceTests.java

License:Apache License

public void testImmutable() throws IOException {
    BytesReference bytesReference = newBytesReference(randomIntBetween(10, 3 * PAGE_SIZE));
    BytesRef bytesRef = BytesRef.deepCopyOf(bytesReference.toBytesRef());
    ByteBuf channelBuffer = Unpooled.wrappedBuffer(bytesRef.bytes, bytesRef.offset, bytesRef.length);
    ByteBufBytesReference byteBufBytesReference = new ByteBufBytesReference(channelBuffer, bytesRef.length);
    assertEquals(byteBufBytesReference, bytesReference);
    channelBuffer.readInt(); // this advances the index of the channel buffer
    assertEquals(byteBufBytesReference, bytesReference);
    assertEquals(bytesRef, byteBufBytesReference.toBytesRef());

    BytesRef unicodeBytes = new BytesRef(randomUnicodeOfCodepointLength(100));
    channelBuffer = Unpooled.wrappedBuffer(unicodeBytes.bytes, unicodeBytes.offset, unicodeBytes.length);
    byteBufBytesReference = new ByteBufBytesReference(channelBuffer, unicodeBytes.length);
    String utf8ToString = byteBufBytesReference.utf8ToString();
    channelBuffer.readInt(); // this advances the index of the channel buffer
    assertEquals(utf8ToString, byteBufBytesReference.utf8ToString());
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Loads all the fuzzy terms in the list of terms given the reader.
 *
 * @param reader  Index reader to use./*  w w w .  j  a  va  2s .  c o m*/
 * @param bucket  Where to store the terms.
 * @param term    The term to use.
 *
 * @throws IOException If an error is thrown by the fuzzy term enumeration.
 */
@Beta
public static void fuzzy(IndexReader reader, Bucket<Term> bucket, Term term, int minSimilarity)
        throws IOException {
    AttributeSource atts = new AttributeSource();
    Fields fields = MultiFields.getFields(reader);
    org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
    if (terms == null)
        return;
    FuzzyTermsEnum fuzzy = new FuzzyTermsEnum(terms, atts, term, minSimilarity, 0, true);
    BytesRef val;
    BytesRef searched = term.bytes();
    while ((val = fuzzy.next()) != null) {
        if (!searched.bytesEquals(val)) {
            Term t = new Term(term.field(), BytesRef.deepCopyOf(val));
            bucket.add(t, reader.docFreq(t));
        }
    }
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Loads all the prefix terms in the list of terms given the reader.
 *
 * @param reader  Index reader to use.//from  w w w  . j  a  v a  2 s.  c o m
 * @param bucket  Where to store the terms.
 * @param term    The term to use.
 *
 * @throws IOException If an error is thrown by the prefix term enumeration.
 */
public static void prefix(IndexReader reader, Bucket<Term> bucket, Term term) throws IOException {
    Fields fields = MultiFields.getFields(reader);
    org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
    if (terms == null)
        return;
    TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())),
            term.bytes());
    BytesRef val;
    while ((val = prefixes.next()) != null) {
        Term t = new Term(term.field(), BytesRef.deepCopyOf(val));
        bucket.add(t, reader.docFreq(t));
    }
}

From source file:org.pageseeder.flint.lucene.search.Terms.java

License:Apache License

/**
 * Returns the list of terms for the specified field.
 *
 * @param reader The index reader/*  w w  w  .  j  a  v  a  2s  .c  o  m*/
 * @param field  The field
 *
 * @return the list of terms for this field
 *
 * @throws IOException should any IO error be reported.
 */
@Beta
public static List<Term> terms(IndexReader reader, String field) throws IOException {
    LOGGER.debug("Loading terms for field {}", field);
    org.apache.lucene.index.Terms terms = MultiFields.getTerms(reader, field);
    if (terms == null)
        return Collections.emptyList();
    TermsEnum termsEnum = terms.iterator();
    if (termsEnum == TermsEnum.EMPTY)
        return Collections.emptyList();
    Map<BytesRef, Term> termsList = new HashMap<BytesRef, Term>();
    while (termsEnum.next() != null) {
        BytesRef t = termsEnum.term();
        if (t == null)
            break;
        termsList.put(t, new Term(field, BytesRef.deepCopyOf(t)));
    }
    return new ArrayList<>(termsList.values());
}

From source file:org.sindice.siren.search.node.TopNodeTermsRewrite.java

License:Apache License

@Override
public Q rewrite(final IndexReader reader, final MultiNodeTermQuery query) throws IOException {
    final int maxSize = Math.min(size, this.getMaxSize());
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
    this.collectTerms(reader, query, new TermCollector() {
        private final MaxNonCompetitiveBoostAttribute maxBoostAtt = attributes
                .addAttribute(MaxNonCompetitiveBoostAttribute.class);

        private final Map<BytesRef, ScoreTerm> visitedTerms = new HashMap<BytesRef, ScoreTerm>();

        private TermsEnum termsEnum;
        private Comparator<BytesRef> termComp;
        private BoostAttribute boostAtt;
        private ScoreTerm st;

        @Override/* ww  w.j  ava 2  s  .  c o m*/
        public void setNextEnum(final TermsEnum termsEnum) throws IOException {
            this.termsEnum = termsEnum;
            this.termComp = termsEnum.getComparator();

            assert this.compareToLastTerm(null);

            // lazy init the initial ScoreTerm because comparator is not known on ctor:
            if (st == null)
                st = new ScoreTerm(this.termComp, new TermContext(topReaderContext));
            boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
        }

        // for assert:
        private BytesRef lastTerm;

        private boolean compareToLastTerm(final BytesRef t) throws IOException {
            if (lastTerm == null && t != null) {
                lastTerm = BytesRef.deepCopyOf(t);
            } else if (t == null) {
                lastTerm = null;
            } else {
                assert termsEnum.getComparator().compare(lastTerm, t) < 0 : "lastTerm=" + lastTerm + " t=" + t;
                lastTerm.copyBytes(t);
            }
            return true;
        }

        @Override
        public boolean collect(final BytesRef bytes) throws IOException {
            final float boost = boostAtt.getBoost();

            // make sure within a single seg we always collect
            // terms in order
            assert this.compareToLastTerm(bytes);

            //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord);
            // ignore uncompetitive hits
            if (stQueue.size() == maxSize) {
                final ScoreTerm t = stQueue.peek();
                if (boost < t.boost)
                    return true;
                if (boost == t.boost && termComp.compare(bytes, t.bytes) > 0)
                    return true;
            }
            ScoreTerm t = visitedTerms.get(bytes);
            final TermState state = termsEnum.termState();
            assert state != null;
            if (t != null) {
                // if the term is already in the PQ, only update docFreq of term in PQ
                assert t.boost == boost : "boost should be equal in all segment TermsEnums";
                t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
            } else {
                // add new entry in PQ, we must clone the term, else it may get overwritten!
                st.bytes.copyBytes(bytes);
                st.boost = boost;
                visitedTerms.put(st.bytes, st);
                assert st.termState.docFreq() == 0;
                st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
                stQueue.offer(st);
                // possibly drop entries from queue
                if (stQueue.size() > maxSize) {
                    st = stQueue.poll();
                    visitedTerms.remove(st.bytes);
                    st.termState.clear(); // reset the termstate!
                } else {
                    st = new ScoreTerm(termComp, new TermContext(topReaderContext));
                }
                assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize";
                // set maxBoostAtt with values to help FuzzyTermsEnum to optimize
                if (stQueue.size() == maxSize) {
                    t = stQueue.peek();
                    maxBoostAtt.setMaxNonCompetitiveBoost(t.boost);
                    maxBoostAtt.setCompetitiveTerm(t.bytes);
                }
            }

            return true;
        }
    });

    final Q q = this.getTopLevelQuery();
    final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
    ArrayUtil.mergeSort(scoreTerms, scoreTermSortByTermComp);

    for (final ScoreTerm st : scoreTerms) {
        final Term term = new Term(query.field, st.bytes);
        assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs "
                + st.termState.docFreq() + " term=" + term;
        this.addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query
    }
    return q;
}

From source file:perf.IDPerfTest.java

License:Apache License

private static Result testOne(String indexPath, String desc, IDIterator ids, final int minTermsInBlock,
        final int maxTermsInBlock) throws IOException {
    System.out.println("\ntest: " + desc + " termBlocks=" + minTermsInBlock + "/" + maxTermsInBlock);
    Directory dir = FSDirectory.open(new File(indexPath));
    //IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48));
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_8, new StandardAnalyzer(Version.LUCENE_4_8));
    iwc.setMergeScheduler(new SerialMergeScheduler());
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    // So I can walk the files and get the *.tip sizes:
    iwc.setUseCompoundFile(false);/*from   w w  w . j a  v  a 2s . c  o m*/

    iwc.setCodec(new Lucene53Codec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {
            return new Lucene50PostingsFormat(minTermsInBlock, maxTermsInBlock);
        }
    });

    /// 7/7/7 segment structure:
    iwc.setMaxBufferedDocs(ID_COUNT / 777);
    iwc.setRAMBufferSizeMB(-1);
    //iwc.setInfoStream(new PrintStreamInfoStream(System.out));
    //iwc.setMergePolicy(new LogDocMergePolicy());
    ((TieredMergePolicy) iwc.getMergePolicy()).setFloorSegmentMB(.001);
    ((TieredMergePolicy) iwc.getMergePolicy()).setNoCFSRatio(0.0);
    //((LogDocMergePolicy) iwc.getMergePolicy()).setMinMergeDocs(1000);
    iwc.getMergePolicy().setNoCFSRatio(0.0);

    IndexWriter w = new IndexWriter(dir, iwc);
    Document doc = new Document();

    FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
    ft.setTokenized(true);
    ft.freeze();

    BytesRef idValue = new BytesRef(64);
    Field idField = new Field("id", new BinaryTokenStream(idValue), ft);
    doc.add(idField);

    long t0 = System.nanoTime();
    BytesRef[] lookupIDs = new BytesRef[ID_SEARCH_COUNT];
    Random random = new Random(17);
    int lookupCount = 0;
    double rate = 1.01 * ((double) ID_SEARCH_COUNT) / ID_COUNT;
    for (int i = 0; i < ID_COUNT; i++) {
        ids.next(idValue);
        if (lookupCount < lookupIDs.length && random.nextDouble() <= rate) {
            lookupIDs[lookupCount++] = BytesRef.deepCopyOf(idValue);
        }
        // Trickery: the idsIter changed the idValue which the BinaryTokenStream reuses for each added doc
        w.addDocument(doc);
    }

    if (lookupCount < lookupIDs.length) {
        throw new RuntimeException("didn't get enough lookup ids: " + lookupCount + " vs " + lookupIDs.length);
    }

    long indexTime = System.nanoTime() - t0;

    System.out.println("  indexing done; waitForMerges...");
    w.waitForMerges();

    IndexReader r = DirectoryReader.open(w, true);
    System.out.println("  reader=" + r);

    shuffle(random, lookupIDs);
    shuffle(random, lookupIDs);

    long bestTime = Long.MAX_VALUE;
    long checksum = 0;

    List<AtomicReaderContext> leaves = new ArrayList<>(r.leaves());
    // Sort largest to smallest:
    Collections.sort(leaves, new Comparator<AtomicReaderContext>() {
        @Override
        public int compare(AtomicReaderContext c1, AtomicReaderContext c2) {
            return c2.reader().maxDoc() - c1.reader().maxDoc();
        }
    });
    TermsEnum[] termsEnums = new TermsEnum[leaves.size()];
    DocsEnum[] docsEnums = new DocsEnum[leaves.size()];
    int[] docBases = new int[leaves.size()];
    for (int i = 0; i < leaves.size(); i++) {
        //System.out.println("i=" + i + " count=" + leaves.get(i).reader().maxDoc());
        termsEnums[i] = leaves.get(i).reader().fields().terms("id").iterator(null);
        docBases[i] = leaves.get(i).docBase;
    }

    long rawLookupCount = 0;

    int countx = 0;
    for (int iter = 0; iter < 5; iter++) {
        t0 = System.nanoTime();
        BlockTreeTermsReader.seekExactFastNotFound = 0;
        BlockTreeTermsReader.seekExactFastRootNotFound = 0;
        rawLookupCount = 0;
        for (BytesRef id : lookupIDs) {
            if (countx++ < 50) {
                System.out.println("    id=" + id);
            }
            boolean found = false;
            for (int seg = 0; seg < termsEnums.length; seg++) {
                rawLookupCount++;
                if (termsEnums[seg].seekExact(id)) {
                    docsEnums[seg] = termsEnums[seg].docs(null, docsEnums[seg], 0);
                    int docID = docsEnums[seg].nextDoc();
                    if (docID == DocsEnum.NO_MORE_DOCS) {
                        // uh-oh!
                        throw new RuntimeException("id not found: " + id);
                    }
                    // paranoia:
                    checksum += docID + docBases[seg];

                    found = true;

                    // Optimization vs MultiFields: we don't need to check any more segments since id is PK
                    break;
                }
            }
            if (found == false) {
                // uh-oh!
                throw new RuntimeException("id not found: " + id);
            }
        }
        long lookupTime = System.nanoTime() - t0;
        System.out.println(String.format(Locale.ROOT, "  iter=" + iter + " lookupTime=%.3f sec",
                lookupTime / 1000000000.0));
        if (lookupTime < bestTime) {
            bestTime = lookupTime;
            System.out.println("    **");
        }
    }

    long totalBytes = 0;
    long termsIndexTotalBytes = 0;
    for (String fileName : dir.listAll()) {
        long bytes = dir.fileLength(fileName);
        totalBytes += bytes;
        if (fileName.endsWith(".tip")) {
            termsIndexTotalBytes += bytes;
        }
    }

    r.close();
    w.rollback();
    dir.close();

    return new Result(desc, ID_COUNT / (indexTime / 1000000.0), lookupIDs.length / (bestTime / 1000000.0),
            totalBytes, termsIndexTotalBytes, checksum, BlockTreeTermsReader.seekExactFastNotFound,
            BlockTreeTermsReader.seekExactFastRootNotFound, rawLookupCount, minTermsInBlock, maxTermsInBlock);
}