Example usage for org.apache.lucene.util BytesRef bytesEquals

List of usage examples for org.apache.lucene.util BytesRef bytesEquals

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef bytesEquals.

Prototype

public boolean bytesEquals(BytesRef other) 

Source Link

Document

Expert: compares the bytes against another BytesRef, returning true if the bytes are equal.

Usage

From source file:de.unihildesheim.iw.lucene.util.BytesRefUtilsTest.java

License:Open Source License

@Test
public void testCopyBytes() throws Exception {
    final BytesRef br = new BytesRef("foo");
    final BytesRef result = new BytesRef(BytesRefUtils.copyBytes(br));
    Assert.assertTrue("Bytes mismatch.", br.bytesEquals(result));
}

From source file:de.unihildesheim.iw.lucene.util.BytesRefUtilsTest.java

License:Open Source License

@Test
public void testCopyBytes_empty() throws Exception {
    final BytesRef br = new BytesRef();
    final BytesRef result = new BytesRef(BytesRefUtils.copyBytes(br));
    Assert.assertTrue("Bytes mismatch.", br.bytesEquals(result));
}

From source file:org.codelibs.elasticsearch.common.util.BytesRefHash.java

License:Apache License

/**
 * Get the id associated with <code>key</code>
 *//*  ww  w  .  j  av  a 2 s .co m*/
public long find(BytesRef key, int code) {
    final long slot = slot(rehash(code), mask);
    for (long index = slot;; index = nextSlot(index, mask)) {
        final long id = id(index);
        if (id == -1L || key.bytesEquals(get(id, spare))) {
            return id;
        }
    }
}

From source file:org.codelibs.elasticsearch.common.util.BytesRefHash.java

License:Apache License

private long set(BytesRef key, int code, long id) {
    assert rehash(key.hashCode()) == code;
    assert size < maxSize;
    final long slot = slot(code, mask);
    for (long index = slot;; index = nextSlot(index, mask)) {
        final long curId = id(index);
        if (curId == -1) { // means unset
            id(index, id);/*w ww .  j  av a2s .  co m*/
            append(id, key, code);
            ++size;
            return id;
        } else if (key.bytesEquals(get(curId, spare))) {
            return -1 - curId;
        }
    }
}

From source file:org.elasticsearch.common.lucene.search.XTermsFilter.java

License:Apache License

private XTermsFilter(FieldAndTermEnum iter, int length) {
    // TODO: maybe use oal.index.PrefixCodedTerms instead?
    // If number of terms is more than a few hundred it
    // should be a win

    // TODO: we also pack terms in FieldCache/DocValues
    // ... maybe we can refactor to share that code

    // TODO: yet another option is to build the union of the terms in
    // an automaton an call intersect on the termsenum if the density is high

    int hash = 9;
    byte[] serializedTerms = new byte[0];
    this.offsets = new int[length + 1];
    int lastEndOffset = 0;
    int index = 0;
    ArrayList<TermsAndField> termsAndFields = new ArrayList<TermsAndField>();
    TermsAndField lastTermsAndField = null;
    BytesRef previousTerm = null;
    String previousField = null;/*  ww w .j  a v a  2  s  .c o  m*/
    BytesRef currentTerm;
    String currentField;
    while ((currentTerm = iter.next()) != null) {
        currentField = iter.field();
        if (currentField == null) {
            throw new IllegalArgumentException("Field must not be null");
        }
        if (previousField != null) {
            // deduplicate
            if (previousField.equals(currentField)) {
                if (previousTerm.bytesEquals(currentTerm)) {
                    continue;
                }
            } else {
                final int start = lastTermsAndField == null ? 0 : lastTermsAndField.end;
                lastTermsAndField = new TermsAndField(start, index, previousField);
                termsAndFields.add(lastTermsAndField);
            }
        }
        hash = PRIME * hash + currentField.hashCode();
        hash = PRIME * hash + currentTerm.hashCode();
        if (serializedTerms.length < lastEndOffset + currentTerm.length) {
            serializedTerms = ArrayUtil.grow(serializedTerms, lastEndOffset + currentTerm.length);
        }
        System.arraycopy(currentTerm.bytes, currentTerm.offset, serializedTerms, lastEndOffset,
                currentTerm.length);
        offsets[index] = lastEndOffset;
        lastEndOffset += currentTerm.length;
        index++;
        previousTerm = currentTerm;
        previousField = currentField;
    }
    offsets[index] = lastEndOffset;
    final int start = lastTermsAndField == null ? 0 : lastTermsAndField.end;
    lastTermsAndField = new TermsAndField(start, index, previousField);
    termsAndFields.add(lastTermsAndField);
    this.termsBytes = ArrayUtil.shrink(serializedTerms, lastEndOffset);
    this.termsAndFields = termsAndFields.toArray(new TermsAndField[termsAndFields.size()]);
    this.hashCode = hash;

}

From source file:org.elasticsearch.index.translog.TranslogHeader.java

License:Apache License

/**
 * Read a translog header from the given path and file channel
 *//*from   w ww .  ja  v a2s  . c  o  m*/
static TranslogHeader read(final String translogUUID, final Path path, final FileChannel channel)
        throws IOException {
    // This input is intentionally not closed because closing it will close the FileChannel.
    final BufferedChecksumStreamInput in = new BufferedChecksumStreamInput(
            new InputStreamStreamInput(java.nio.channels.Channels.newInputStream(channel), channel.size()),
            path.toString());
    final int version;
    try {
        version = CodecUtil.checkHeader(new InputStreamDataInput(in), TRANSLOG_CODEC, VERSION_CHECKSUMS,
                VERSION_PRIMARY_TERM);
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) {
        tryReportOldVersionError(path, channel);
        throw new TranslogCorruptedException(path.toString(), "translog header corrupted", e);
    }
    if (version == VERSION_CHECKSUMS) {
        throw new IllegalStateException("pre-2.0 translog found [" + path + "]");
    }
    // Read the translogUUID
    final int uuidLen = in.readInt();
    if (uuidLen > channel.size()) {
        throw new TranslogCorruptedException(path.toString(), "UUID length can't be larger than the translog");
    }
    final BytesRef uuid = new BytesRef(uuidLen);
    uuid.length = uuidLen;
    in.read(uuid.bytes, uuid.offset, uuid.length);
    final BytesRef expectedUUID = new BytesRef(translogUUID);
    if (uuid.bytesEquals(expectedUUID) == false) {
        throw new TranslogCorruptedException(path.toString(), "expected shard UUID " + expectedUUID
                + " but got: " + uuid + " this translog file belongs to a different translog");
    }
    // Read the primary term
    final long primaryTerm;
    if (version == VERSION_PRIMARY_TERM) {
        primaryTerm = in.readLong();
        assert primaryTerm >= 0 : "Primary term must be non-negative [" + primaryTerm + "]; translog path ["
                + path + "]";
    } else {
        assert version == VERSION_CHECKPOINTS : "Unknown header version [" + version + "]";
        primaryTerm = UNKNOWN_PRIMARY_TERM;
    }
    // Verify the checksum
    if (version >= VERSION_PRIMARY_TERM) {
        Translog.verifyChecksum(in);
    }
    final int headerSizeInBytes = headerSizeInBytes(version, uuid.length);
    assert channel.position() == headerSizeInBytes : "Header is not fully read; header size ["
            + headerSizeInBytes + "], position [" + channel.position() + "]";
    return new TranslogHeader(translogUUID, primaryTerm, headerSizeInBytes);
}

From source file:org.elasticsearch.index.translog.TranslogReader.java

License:Apache License

/**
 * Given a file, return a VersionedTranslogStream based on an
 * optionally-existing header in the file. If the file does not exist, or
 * has zero length, returns the latest version. If the header does not
 * exist, assumes Version 0 of the translog file format.
 * <p/>//from  w  w  w  . j  a  v  a  2  s .  c o m
 *
 * @throws IOException
 */
public static ImmutableTranslogReader open(ChannelReference channelReference, Checkpoint checkpoint,
        String translogUUID) throws IOException {
    final FileChannel channel = channelReference.getChannel();
    final Path path = channelReference.getPath();
    assert channelReference.getGeneration() == checkpoint.generation : "expected generation: "
            + channelReference.getGeneration() + " but got: " + checkpoint.generation;

    try {
        if (checkpoint.offset == 0 && checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT) { // only old files can be empty
            return new LegacyTranslogReader(channelReference.getGeneration(), channelReference, 0);
        }

        InputStreamStreamInput headerStream = new InputStreamStreamInput(Channels.newInputStream(channel)); // don't close
        // Lucene's CodecUtil writes a magic number of 0x3FD76C17 with the
        // header, in binary this looks like:
        //
        // binary: 0011 1111 1101 0111 0110 1100 0001 0111
        // hex   :    3    f    d    7    6    c    1    7
        //
        // With version 0 of the translog, the first byte is the
        // Operation.Type, which will always be between 0-4, so we know if
        // we grab the first byte, it can be:
        // 0x3f => Lucene's magic number, so we can assume it's version 1 or later
        // 0x00 => version 0 of the translog
        //
        // otherwise the first byte of the translog is corrupted and we
        // should bail
        byte b1 = headerStream.readByte();
        if (b1 == LUCENE_CODEC_HEADER_BYTE) {
            // Read 3 more bytes, meaning a whole integer has been read
            byte b2 = headerStream.readByte();
            byte b3 = headerStream.readByte();
            byte b4 = headerStream.readByte();
            // Convert the 4 bytes that were read into an integer
            int header = ((b1 & 0xFF) << 24) + ((b2 & 0xFF) << 16) + ((b3 & 0xFF) << 8) + ((b4 & 0xFF) << 0);
            // We confirm CodecUtil's CODEC_MAGIC number (0x3FD76C17)
            // ourselves here, because it allows us to read the first
            // byte separately
            if (header != CodecUtil.CODEC_MAGIC) {
                throw new TranslogCorruptedException(
                        "translog looks like version 1 or later, but has corrupted header");
            }
            // Confirm the rest of the header using CodecUtil, extracting
            // the translog version
            int version = CodecUtil.checkHeaderNoMagic(new InputStreamDataInput(headerStream),
                    TranslogWriter.TRANSLOG_CODEC, 1, Integer.MAX_VALUE);
            switch (version) {
            case TranslogWriter.VERSION_CHECKSUMS:
                assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: "
                        + checkpoint.numOps;
                assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size("
                        + Files.size(path) + ") for: " + path;
                // legacy - we still have to support it somehow
                return new LegacyTranslogReaderBase(channelReference.getGeneration(), channelReference,
                        CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC), checkpoint.offset);
            case TranslogWriter.VERSION_CHECKPOINTS:
                assert path.getFileName().toString()
                        .endsWith(Translog.TRANSLOG_FILE_SUFFIX) : "new file ends with old suffix: " + path;
                assert checkpoint.numOps > TranslogReader.UNKNOWN_OP_COUNT : "expected at least 0 operatin but got: "
                        + checkpoint.numOps;
                assert checkpoint.offset <= channel.size() : "checkpoint is inconsistent with channel length: "
                        + channel.size() + " " + checkpoint;
                int len = headerStream.readInt();
                if (len > channel.size()) {
                    throw new TranslogCorruptedException("uuid length can't be larger than the translog");
                }
                BytesRef ref = new BytesRef(len);
                ref.length = len;
                headerStream.read(ref.bytes, ref.offset, ref.length);
                BytesRef uuidBytes = new BytesRef(translogUUID);
                if (uuidBytes.bytesEquals(ref) == false) {
                    throw new TranslogCorruptedException("expected shard UUID [" + uuidBytes + "] but got: ["
                            + ref + "] this translog file belongs to a different translog");
                }
                return new ImmutableTranslogReader(channelReference.getGeneration(), channelReference,
                        ref.length + CodecUtil.headerLength(TranslogWriter.TRANSLOG_CODEC)
                                + RamUsageEstimator.NUM_BYTES_INT,
                        checkpoint.offset, checkpoint.numOps);
            default:
                throw new TranslogCorruptedException(
                        "No known translog stream version: " + version + " path:" + path);
            }
        } else if (b1 == UNVERSIONED_TRANSLOG_HEADER_BYTE) {
            assert checkpoint.numOps == TranslogReader.UNKNOWN_OP_COUNT : "expected unknown op count but got: "
                    + checkpoint.numOps;
            assert checkpoint.offset == Files.size(path) : "offset(" + checkpoint.offset + ") != file_size("
                    + Files.size(path) + ") for: " + path;
            return new LegacyTranslogReader(channelReference.getGeneration(), channelReference,
                    checkpoint.offset);
        } else {
            throw new TranslogCorruptedException("Invalid first byte in translog file, got: "
                    + Long.toHexString(b1) + ", expected 0x00 or 0x3f");
        }
    } catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) {
        throw new TranslogCorruptedException("Translog header corrupted", e);
    }
}

From source file:org.elasticsearch.search.aggregations.support.FieldDataSourceTests.java

License:Apache License

private static void assertConsistent(BytesValues values) {
    for (int i = 0; i < 10; ++i) {
        final int valueCount = values.setDocument(i);
        for (int j = 0; j < valueCount; ++j) {
            final BytesRef term = values.nextValue();
            assertEquals(term.hashCode(), values.currentValueHash());
            assertTrue(term.bytesEquals(values.copyShared()));
        }//ww  w .java2s .  c om
    }
}

From source file:org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.java

License:Apache License

protected void postFilter(final Candidate candidate, final CharsRef spare, BytesRef byteSpare,
        final List<Candidate> candidates) throws IOException {
    if (postFilter == null) {
        candidates.add(candidate);//from  w  w w. j ava2s .c  o m
    } else {
        final BytesRef result = byteSpare;
        SuggestUtils.analyze(postFilter, candidate.term, field, new SuggestUtils.TokenConsumer() {
            @Override
            public void nextToken() throws IOException {
                this.fillBytesRef(result);

                if (posIncAttr.getPositionIncrement() > 0 && result.bytesEquals(candidate.term)) {
                    BytesRef term = BytesRef.deepCopyOf(result);
                    long freq = frequency(term);
                    candidates.add(new Candidate(BytesRef.deepCopyOf(term), freq, candidate.stringDistance,
                            score(candidate.frequency, candidate.stringDistance, dictSize), false));
                } else {
                    candidates.add(
                            new Candidate(BytesRef.deepCopyOf(result), candidate.frequency, nonErrorLikelihood,
                                    score(candidate.frequency, candidate.stringDistance, dictSize), false));
                }
            }
        }, spare);
    }
}

From source file:org.nlp4l.lucene.BuddyWordsFinder.java

License:Apache License

public Scorer[] findCoincidentalTerms(String field, BytesRef term) throws IOException {

    baseTermFilter.start(reader, field, term);
    if (baseTermFilter.skip(term) || baseTermFilter.skipByPopularity(term))
        return null;
    //System.out.println(term.utf8ToString());

    Bits liveDocs = MultiFields.getLiveDocs(reader);

    PostingsEnum de = MultiFields.getTermDocsEnum(reader, field, term);
    if (de == null)
        return null;
    int numDocsAnalyzed = 0;
    phraseTerms.clear();/*from   ww w  .  j a  v a 2s  .c  om*/

    while (de.nextDoc() != PostingsEnum.NO_MORE_DOCS && numDocsAnalyzed < maxDocsToAnalyze) {
        int docId = de.docID();

        //first record all of the positions of the term in a bitset which
        // represents terms in the current doc.
        int freq = de.freq();
        PostingsEnum pe = MultiFields.getTermPositionsEnum(reader, field, term);
        int ret = pe.advance(docId);
        if (ret == PostingsEnum.NO_MORE_DOCS)
            continue;
        termPos.clear();
        for (int i = 0; i < freq; i++) {
            int pos = pe.nextPosition();
            if (pos < termPos.size())
                termPos.set(pos);
        }

        // now look at all OTHER terms in this doc and see if they are
        // positioned in a pre-defined sized window around the current term
        Fields vectors = reader.getTermVectors(docId);
        // check it has term vectors
        if (vectors == null)
            return null;
        Terms vector = vectors.terms(field);
        // check it has position info
        if (vector == null || !vector.hasPositions())
            return null;

        TermsEnum te = vector.iterator();
        BytesRef otherTerm = null;
        while ((otherTerm = te.next()) != null) {
            if (term.bytesEquals(otherTerm))
                continue;
            coiTermFilter.start(reader, field, otherTerm);
            if (coiTermFilter.skip(otherTerm) || coiTermFilter.skipByPopularity(otherTerm))
                continue;

            PostingsEnum pe2 = MultiFields.getTermPositionsEnum(reader, field, otherTerm);
            ret = pe2.advance(docId);
            if (ret == PostingsEnum.NO_MORE_DOCS)
                continue;
            freq = pe2.freq();
            boolean matchFound = false;
            for (int i = 0; i < freq && (!matchFound); i++) {
                int pos = pe2.nextPosition();
                int startpos = Math.max(0, pos - slop);
                int endpos = pos + slop;
                for (int prevpos = startpos; (prevpos <= endpos) && (!matchFound); prevpos++) {
                    if (termPos.get(prevpos)) {
                        // Add term to hashmap containing co-occurence
                        // counts for this term
                        Scorer pt = phraseTerms.get(otherTerm.utf8ToString());
                        if (pt == null) {
                            pt = new Scorer(baseTermFilter.getCurrentTermDocFreq(), otherTerm.utf8ToString(),
                                    coiTermFilter.getCurrentTermDocFreq());
                            phraseTerms.put(pt.coiTerm, pt);
                        }
                        pt.incCoiDocCount();
                        matchFound = true;
                    }
                }
            }
        }
        numDocsAnalyzed++;
    } // end of while loop

    // now sort and dump the top terms associated with this term.
    TopTerms topTerms = new TopTerms(maxCoiTermsPerTerm);
    for (String key : phraseTerms.keySet()) {
        Scorer pt = phraseTerms.get(key);
        topTerms.insertWithOverflow(pt);
    }
    Scorer[] tops = new Scorer[topTerms.size()];
    int tp = tops.length - 1;
    while (topTerms.size() > 0) {
        Scorer top = topTerms.pop();
        tops[tp--] = top;
    }
    return tops;
}