Example usage for org.apache.lucene.util BytesRef bytesEquals

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef bytesEquals.

Prototype

public boolean bytesEquals(BytesRef other)

Source Link

Document

Expert: compares the bytes against another BytesRef, returning true if the bytes are equal.

Usage

From source file:de.unihildesheim.iw.lucene.util.BytesRefUtilsTest.java

License:Open Source License

@Test
public void testCopyBytes() throws Exception {
    final BytesRef br = new BytesRef("foo");
    final BytesRef result = new BytesRef(BytesRefUtils.copyBytes(br));
    Assert.assertTrue("Bytes mismatch.", br.bytesEquals(result));
}

From source file:de.unihildesheim.iw.lucene.util.BytesRefUtilsTest.java

License:Open Source License

@Test
public void testCopyBytes_empty() throws Exception {
    final BytesRef br = new BytesRef();
    final BytesRef result = new BytesRef(BytesRefUtils.copyBytes(br));
    Assert.assertTrue("Bytes mismatch.", br.bytesEquals(result));
}

From source file:org.codelibs.elasticsearch.common.util.BytesRefHash.java

License:Apache License

/**
 * Get the id associated with <code>key</code>
 *//*  ww  w  .  j  av  a 2 s .co m*/
public long find(BytesRef key, int code) {
    final long slot = slot(rehash(code), mask);
    for (long index = slot;; index = nextSlot(index, mask)) {
        final long id = id(index);
        if (id == -1L || key.bytesEquals(get(id, spare))) {
            return id;
        }
    }
}

From source file:org.codelibs.elasticsearch.common.util.BytesRefHash.java

License:Apache License

private long set(BytesRef key, int code, long id) {
    assert rehash(key.hashCode()) == code;
    assert size < maxSize;
    final long slot = slot(code, mask);
    for (long index = slot;; index = nextSlot(index, mask)) {
        final long curId = id(index);
        if (curId == -1) { // means unset
            id(index, id);/*w ww .  j  av a2s .  co m*/
            append(id, key, code);
            ++size;
            return id;
        } else if (key.bytesEquals(get(curId, spare))) {
            return -1 - curId;
        }
    }
}

From source file:org.elasticsearch.common.lucene.search.XTermsFilter.java

License:Apache License

private XTermsFilter(FieldAndTermEnum iter, int length) {
    // TODO: maybe use oal.index.PrefixCodedTerms instead?
    // If number of terms is more than a few hundred it
    // should be a win

    // TODO: we also pack terms in FieldCache/DocValues
    // ... maybe we can refactor to share that code

    // TODO: yet another option is to build the union of the terms in
    // an automaton an call intersect on the termsenum if the density is high

    int hash = 9;
    byte[] serializedTerms = new byte[0];
    this.offsets = new int[length + 1];
    int lastEndOffset = 0;
    int index = 0;
    ArrayList<TermsAndField> termsAndFields = new ArrayList<TermsAndField>();
    TermsAndField lastTermsAndField = null;
    BytesRef previousTerm = null;
    String previousField = null;/*  ww w .j  a v a  2  s  .c o  m*/
    BytesRef currentTerm;
    String currentField;
    while ((currentTerm = iter.next()) != null) {
        currentField = iter.field();
        if (currentField == null) {
            throw new IllegalArgumentException("Field must not be null");
        }
        if (previousField != null) {
            // deduplicate
            if (previousField.equals(currentField)) {
                if (previousTerm.bytesEquals(currentTerm)) {
                    continue;
                }
            } else {
                final int start = lastTermsAndField == null ? 0 : lastTermsAndField.end;
                lastTermsAndField = new TermsAndField(start, index, previousField);
                termsAndFields.add(lastTermsAndField);
            }
        }
        hash = PRIME * hash + currentField.hashCode();
        hash = PRIME * hash + currentTerm.hashCode();
        if (serializedTerms.length < lastEndOffset + currentTerm.length) {
            serializedTerms = ArrayUtil.grow(serializedTerms, lastEndOffset + currentTerm.length);
        }
        System.arraycopy(currentTerm.bytes, currentTerm.offset, serializedTerms, lastEndOffset,
                currentTerm.length);
        offsets[index] = lastEndOffset;
        lastEndOffset += currentTerm.length;
        index++;
        previousTerm = currentTerm;
        previousField = currentField;
    }
    offsets[index] = lastEndOffset;
    final int start = lastTermsAndField == null ? 0 : lastTermsAndField.end;
    lastTermsAndField = new TermsAndField(start, index, previousField);
    termsAndFields.add(lastTermsAndField);
    this.termsBytes = ArrayUtil.shrink(serializedTerms, lastEndOffset);
    this.termsAndFields = termsAndFields.toArray(new TermsAndField[termsAndFields.size()]);
    this.hashCode = hash;

}

From source file:org.elasticsearch.index.translog.TranslogHeader.java

License:Apache License

/**
 * Read a translog header from the given path and file channel
 *//*from   w ww .  ja  v a2s  . c  o  m*/
static TranslogHeader read(final String translogUUID, final Path path, final FileChannel channel)
        throws IOException {
    // This input is intentionally not closed because closing it will close the FileChannel.
    final BufferedChecksumStreamInput in = new BufferedChecksumStreamInput(
            new InputStreamStreamInput(java.nio.channels.Channels.newInputStream(channel), channel.size()),
            path.toString());
    final int version;
    try {
        version = CodecUtil.checkHeader(new InputStreamDataInput(in), TRANSLOG_CODEC, VERSION_CHECKSUMS,
                VERSION_PRIMARY_TERM);
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) {
        tryReportOldVersionError(path, channel);
        throw new TranslogCorruptedException(path.toString(), "translog header corrupted", e);
    }
    if (version == VERSION_CHECKSUMS) {
        throw new IllegalStateException("pre-2.0 translog found [" + path + "]");
    }
    // Read the translogUUID
    final int uuidLen = in.readInt();
    if (uuidLen > channel.size()) {
        throw new TranslogCorruptedException(path.toString(), "UUID length can't be larger than the translog");
    }
    final BytesRef uuid = new BytesRef(uuidLen);
    uuid.length = uuidLen;
    in.read(uuid.bytes, uuid.offset, uuid.length);
    final BytesRef expectedUUID = new BytesRef(translogUUID);
    if (uuid.bytesEquals(expectedUUID) == false) {
        throw new TranslogCorruptedException(path.toString(), "expected shard UUID " + expectedUUID
                + " but got: " + uuid + " this translog file belongs to a different translog");
    }
    // Read the primary term
    final long primaryTerm;
    if (version == VERSION_PRIMARY_TERM) {
        primaryTerm = in.readLong();
        assert primaryTerm >= 0 : "Primary term must be non-negative [" + primaryTerm + "]; translog path ["
                + path + "]";
    } else {
        assert version == VERSION_CHECKPOINTS : "Unknown header version [" + version + "]";
        primaryTerm = UNKNOWN_PRIMARY_TERM;
    }
    // Verify the checksum
    if (version >= VERSION_PRIMARY_TERM) {
        Translog.verifyChecksum(in);
    }
    final int headerSizeInBytes = headerSizeInBytes(version, uuid.length);
    assert channel.position() == headerSizeInBytes : "Header is not fully read; header size ["
            + headerSizeInBytes + "], position [" + channel.position() + "]";
    return new TranslogHeader(translogUUID, primaryTerm, headerSizeInBytes);
}

From source file:org.elasticsearch.index.translog.TranslogReader.java

License:Apache License

/**
 * Given a file, return a VersionedTranslogStream based on an
 * optionally-existing header in the file. If the file does not exist, or
 * has zero length, returns the latest version. If the header does not
 * exist, assumes Version 0 of the translog file format.
 * <p/>//from  w  w  w  . j  a  v  a  2  s .  c o m
 *
 * @throws IOException
 */
public static ImmutableTranslogReader open(ChannelReference channelReference, Checkpoint checkpoint,
        String translogUUID) throws IOException {
    final FileChannel channel = channelReference.getChannel();
    final Path path = channelReference.getPath();
    assert channelReference.getGeneration() == checkpoint.generation : "expected generation: "
            + channelReference.getGeneration() + " but got: " + checkpoint.generation;

    try {
        if (checkpoint.offset == 0 && checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT) { // only old files can be empty
            return new LegacyTranslogReader(channelReference.getGeneration(), channelReference, 0);
        }

        InputStreamStreamInput headerStream = new InputStreamStreamInput(Channels.newInputStream(channel)); // don't close
        // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the
        // header, in binary this looks like:
        //
        // binary: 0011 1111 1101 0111 0110 1100 0001 0111
        // hex   :    3    f    d    7    6    c    1    7
        //
        // With version 0 of the translog, the first byte is the
        // Operation.Type, which will always be between 0-4, so we know if
        // we grab the first byte, it can be:
        // 0x3f => Lucene's magic number, so we can assume it's version 1 or later
        // 0x00 => version 0 of the translog
        //
        // otherwise the first byte of the translog is corrupted and we
        // should bail
        byte b1 = headerStream.readByte();
        if (b1 == LUCENE_CODEC_HEADER_BYTE) {
            // Read 3 more bytes, meaning a whole integer has been read
            byte b2 = headerStream.readByte();
            byte b3 = headerStream.readByte();
            byte b4 = headerStream.readByte();
            // Convert the 4 bytes that were read into an integer
            int header = ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0);
            // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17)
            // ourselves here, because it allows us to read the first
            // byte separately
            if (header != CodecUtil.CODEC_MAGIC) {
                throw new TranslogCorruptedException(
                        "translog looks like version 1 or later, but has corrupted header");
            }
            // Confirm the rest of the header using CodecUtil, extracting
            // the translog version
            int version = CodecUtil.checkHeaderNoMagic(new InputStreamDataInput(headerStream),
                    TranslogWriter.TRANSLOG_CODEC, 1, Integer.MAX_VALUE);
            switch (version) {
            case TranslogWriter.VERSION_CHECKSUMS:
                assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: "
                        + checkpoint.numOps;
                assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size("
                        + Files.size(path) + ") for: " + path;
                // legacy - we still have to support it somehow
                return new LegacyTranslogReaderBase(channelReference.getGeneration(), channelReference,
                        CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC), checkpoint.offset);
            case TranslogWriter.VERSION_CHECKPOINTS:
                assert path.getFileName().toString()
                        .endsWith(Translog.TRANSLOG_FILE_SUFFIX) : "new file ends with old suffix: " + path;
                assert checkpoint.numOps > TranslogReader.UNKNOWN_OP_COUNT : "expected at least 0 operatin but got: "
                        + checkpoint.numOps;
                assert checkpoint.offset <= channel.size() : "checkpoint is inconsistent with channel length: "
                        + channel.size() + " " + checkpoint;
                int len = headerStream.readInt();
                if (len > channel.size()) {
                    throw new TranslogCorruptedException("uuid length can't be larger than the translog");
                }
                BytesRef ref = new BytesRef(len);
                ref.length = len;
                headerStream.read(ref.bytes, ref.offset, ref.length);
                BytesRef uuidBytes = new BytesRef(translogUUID);
                if (uuidBytes.bytesEquals(ref) == false) {
                    throw new TranslogCorruptedException("expected shard UUID [" + uuidBytes + "] but got: ["
                            + ref + "] this translog file belongs to a different translog");
                }
                return new ImmutableTranslogReader(channelReference.getGeneration(), channelReference,
                        ref.length + CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC)
                                + RamUsageEstimator.NUM_BYTES_INT,
                        checkpoint.offset, checkpoint.numOps);
            default:
                throw new TranslogCorruptedException(
                        "No known translog stream version: " + version + " path:" + path);
            }
        } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) {
            assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: "
                    + checkpoint.numOps;
            assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size("
                    + Files.size(path) + ") for: " + path;
            return new LegacyTranslogReader(channelReference.getGeneration(), channelReference,
                    checkpoint.offset);
        } else {
            throw new TranslogCorruptedException("Invalid first byte in translog file, got: "
                    + Long.toHexString(b1) + ", expected 0x00 or 0x3f");
        }
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) {
        throw new TranslogCorruptedException("Translog header corrupted", e);
    }
}

From source file:org.elasticsearch.search.aggregations.support.FieldDataSourceTests.java

License:Apache License

private static void assertConsistent(BytesValues values) {
    for (int i = 0; i < 10; ++i) {
        final int valueCount = values.setDocument(i);
        for (int j = 0; j < valueCount; ++j) {
            final BytesRef term = values.nextValue();
            assertEquals(term.hashCode(), values.currentValueHash());
            assertTrue(term.bytesEquals(values.copyShared()));
        }//ww  w .java2s .  c om
    }
}

From source file:org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.java

License:Apache License

protected void postFilter(final Candidate candidate, final CharsRef spare, BytesRef byteSpare,
        final List<Candidate> candidates) throws IOException {
    if (postFilter == null) {
        candidates.add(candidate);//from  w  w w. j ava2s .c  o m
    } else {
        final BytesRef result = byteSpare;
        SuggestUtils.analyze(postFilter, candidate.term, field, new SuggestUtils.TokenConsumer() {
            @Override
            public void nextToken() throws IOException {
                this.fillBytesRef(result);

                if (posIncAttr.getPositionIncrement() > 0 && result.bytesEquals(candidate.term)) {
                    BytesRef term = BytesRef.deepCopyOf(result);
                    long freq = frequency(term);
                    candidates.add(new Candidate(BytesRef.deepCopyOf(term), freq, candidate.stringDistance,
                            score(candidate.frequency, candidate.stringDistance, dictSize), false));
                } else {
                    candidates.add(
                            new Candidate(BytesRef.deepCopyOf(result), candidate.frequency, nonErrorLikelihood,
                                    score(candidate.frequency, candidate.stringDistance, dictSize), false));
                }
            }
        }, spare);
    }
}

From source file:org.nlp4l.lucene.BuddyWordsFinder.java

License:Apache License

public Scorer[] findCoincidentalTerms(String field, BytesRef term) throws IOException {

    baseTermFilter.start(reader, field, term);
    if (baseTermFilter.skip(term) || baseTermFilter.skipByPopularity(term))
        return null;
    //System.out.println(term.utf8ToString());

    Bits liveDocs = MultiFields.getLiveDocs(reader);

    PostingsEnum de = MultiFields.getTermDocsEnum(reader, field, term);
    if (de == null)
        return null;
    int numDocsAnalyzed = 0;
    phraseTerms.clear();/*from   ww w  .  j a  v a 2s  .c  om*/

    while (de.nextDoc() != PostingsEnum.NO_MORE_DOCS && numDocsAnalyzed < maxDocsToAnalyze) {
        int docId = de.docID();

        //first record all of the positions of the term in a bitset which
        // represents terms in the current doc.
        int freq = de.freq();
        PostingsEnum pe = MultiFields.getTermPositionsEnum(reader, field, term);
        int ret = pe.advance(docId);
        if (ret == PostingsEnum.NO_MORE_DOCS)
            continue;
        termPos.clear();
        for (int i = 0; i < freq; i++) {
            int pos = pe.nextPosition();
            if (pos < termPos.size())
                termPos.set(pos);
        }

        // now look at all OTHER terms in this doc and see if they are
        // positioned in a pre-defined sized window around the current term
        Fields vectors = reader.getTermVectors(docId);
        // check it has term vectors
        if (vectors == null)
            return null;
        Terms vector = vectors.terms(field);
        // check it has position info
        if (vector == null || !vector.hasPositions())
            return null;

        TermsEnum te = vector.iterator();
        BytesRef otherTerm = null;
        while ((otherTerm = te.next()) != null) {
            if (term.bytesEquals(otherTerm))
                continue;
            coiTermFilter.start(reader, field, otherTerm);
            if (coiTermFilter.skip(otherTerm) || coiTermFilter.skipByPopularity(otherTerm))
                continue;

            PostingsEnum pe2 = MultiFields.getTermPositionsEnum(reader, field, otherTerm);
            ret = pe2.advance(docId);
            if (ret == PostingsEnum.NO_MORE_DOCS)
                continue;
            freq = pe2.freq();
            boolean matchFound = false;
            for (int i = 0; i < freq && (!matchFound); i++) {
                int pos = pe2.nextPosition();
                int startpos = Math.max(0, pos - slop);
                int endpos = pos + slop;
                for (int prevpos = startpos; (prevpos <= endpos) && (!matchFound); prevpos++) {
                    if (termPos.get(prevpos)) {
                        // Add term to hashmap containing co-occurence
                        // counts for this term
                        Scorer pt = phraseTerms.get(otherTerm.utf8ToString());
                        if (pt == null) {
                            pt = new Scorer(baseTermFilter.getCurrentTermDocFreq(), otherTerm.utf8ToString(),
                                    coiTermFilter.getCurrentTermDocFreq());
                            phraseTerms.put(pt.coiTerm, pt);
                        }
                        pt.incCoiDocCount();
                        matchFound = true;
                    }
                }
            }
        }
        numDocsAnalyzed++;
    } // end of while loop

    // now sort and dump the top terms associated with this term.
    TopTerms topTerms = new TopTerms(maxCoiTermsPerTerm);
    for (String key : phraseTerms.keySet()) {
        Scorer pt = phraseTerms.get(key);
        topTerms.insertWithOverflow(pt);
    }
    Scorer[] tops = new Scorer[topTerms.size()];
    int tp = tops.length - 1;
    while (topTerms.size() > 0) {
        Scorer top = topTerms.pop();
        tops[tp--] = top;
    }
    return tops;
}