Example usage for org.apache.lucene.util BytesRef BytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef BytesRef.

Prototype

public BytesRef(CharSequence text)

Source Link

Document

Initialize the byte[] from the UTF8 bytes for the provided String.

Usage

From source file:com.qwazr.search.field.SortedDocValuesType.java

License:Apache License

@Override
final public void fillValue(final Object value, final FieldConsumer consumer) {
    consumer.accept(new SortedDocValuesField(fieldName, new BytesRef(value.toString())));
}

From source file:com.qwazr.search.field.SortedSetDocValuesType.java

License:Apache License

@Override
final public void fillValue(final Object value, final FieldConsumer consumer) {
    if (value instanceof BytesRef)
        consumer.accept(new SortedSetDocValuesField(fieldName, (BytesRef) value));
    else//  w  w w. java 2  s. c  o  m
        consumer.accept(new SortedSetDocValuesField(fieldName, new BytesRef(value.toString())));
}

From source file:com.qwazr.search.index.BytesRefUtils.java

License:Apache License

final static public BytesRef from(final String value) {
    if (value == null)
        return new BytesRef(BytesRef.EMPTY_BYTES);
    return new BytesRef(value);
}

From source file:com.qwazr.search.index.BytesRefUtils.java

License:Apache License

final static public BytesRef from(final Integer value) {
    if (value == null)
        return new BytesRef(BytesRef.EMPTY_BYTES);
    final BytesRefBuilder builder = new BytesRefBuilder();
    NumericUtils.intToPrefixCoded(value, 0, builder);
    return builder.toBytesRef();
}

From source file:com.qwazr.search.index.BytesRefUtils.java

License:Apache License

final static public BytesRef from(final Long value) {
    if (value == null)
        return new BytesRef(BytesRef.EMPTY_BYTES);
    final BytesRefBuilder builder = new BytesRefBuilder();
    NumericUtils.longToPrefixCoded(value, 0, builder);
    return builder.toBytesRef();
}

From source file:com.qwazr.search.index.BytesRefUtils.java

License:Apache License

final static public BytesRef from(final Float value) {
    if (value == null)
        return new BytesRef(BytesRef.EMPTY_BYTES);
    final BytesRefBuilder builder = new BytesRefBuilder();
    NumericUtils.intToPrefixCoded(NumericUtils.floatToSortableInt(value), 0, builder);
    return builder.toBytesRef();
}

From source file:com.qwazr.search.index.BytesRefUtils.java

License:Apache License

final static public BytesRef from(final Double value) {
    if (value == null)
        return new BytesRef(BytesRef.EMPTY_BYTES);
    final BytesRefBuilder builder = new BytesRefBuilder();
    NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(value), 0, builder);
    return builder.toBytesRef();
}

From source file:com.qwazr.search.index.BytesRefUtils.java

License:Apache License

final static public BytesRef fromAny(final Object value) {
    if (value == null)
        return new BytesRef(BytesRef.EMPTY_BYTES);
    if (value instanceof String)
        return from((String) value);
    if (value instanceof Integer)
        return from((Integer) value);
    if (value instanceof Float)
        return from((Float) value);
    if (value instanceof Long)
        return from((String) value);
    if (value instanceof Double)
        return from((String) value);
    if (value instanceof BytesRef)
        return (BytesRef) value;
    return new BytesRef(value.toString());
}

From source file:com.rocana.lucene.codec.v1.RocanaBasePostingsFormatTestCase.java

License:Apache License

@Override
public void testInvertedWrite() throws Exception {
    Directory dir = newDirectory();// ww  w .j  a v a2  s. c o m
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);

    // Must be concurrent because thread(s) can be merging
    // while up to one thread flushes, and each of those
    // threads iterates over the map while the flushing
    // thread might be adding to it:
    final Map<String, TermFreqs> termFreqs = new ConcurrentHashMap<>();

    final AtomicLong sumDocFreq = new AtomicLong();
    final AtomicLong sumTotalTermFreq = new AtomicLong();

    // TODO: would be better to use / delegate to the current
    // Codec returned by getCodec()

    iwc.setCodec(new AssertingCodec() {
        @Override
        public PostingsFormat getPostingsFormatForField(String field) {

            PostingsFormat p = getCodec().postingsFormat();
            if (p instanceof PerFieldPostingsFormat) {
                p = ((PerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            if (p instanceof RocanaPerFieldPostingsFormat) {
                p = ((RocanaPerFieldPostingsFormat) p).getPostingsFormatForField(field);
            }
            final PostingsFormat defaultPostingsFormat = p;

            final Thread mainThread = Thread.currentThread();

            if (field.equals("body")) {

                // A PF that counts up some stats and then in
                // the end we verify the stats match what the
                // final IndexReader says, just to exercise the
                // new freedom of iterating the postings more
                // than once at flush/merge:

                return new PostingsFormat(defaultPostingsFormat.getName()) {

                    @Override
                    public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {

                        final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);

                        return new FieldsConsumer() {
                            @Override
                            public void write(Fields fields) throws IOException {
                                fieldsConsumer.write(fields);

                                boolean isMerge = state.context.context == IOContext.Context.MERGE;

                                // We only use one thread for flushing
                                // in this test:
                                assert isMerge || Thread.currentThread() == mainThread;

                                // We iterate the provided TermsEnum
                                // twice, so we excercise this new freedom
                                // with the inverted API; if
                                // addOnSecondPass is true, we add up
                                // term stats on the 2nd iteration:
                                boolean addOnSecondPass = random().nextBoolean();

                                //System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);

                                // Gather our own stats:
                                Terms terms = fields.terms("body");
                                assert terms != null;

                                TermsEnum termsEnum = terms.iterator();
                                PostingsEnum docs = null;
                                while (termsEnum.next() != null) {
                                    BytesRef term = termsEnum.term();
                                    // TODO: also sometimes ask for payloads/offsets?
                                    boolean noPositions = random().nextBoolean();
                                    if (noPositions) {
                                        docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                    } else {
                                        docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                    }
                                    int docFreq = 0;
                                    long totalTermFreq = 0;
                                    while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                        docFreq++;
                                        totalTermFreq += docs.freq();
                                        int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                        if (!noPositions) {
                                            for (int i = 0; i < limit; i++) {
                                                docs.nextPosition();
                                            }
                                        }
                                    }

                                    String termString = term.utf8ToString();

                                    // During merge we should only see terms
                                    // we had already seen during a
                                    // previous flush:
                                    assertTrue(isMerge == false || termFreqs.containsKey(termString));

                                    if (isMerge == false) {
                                        if (addOnSecondPass == false) {
                                            TermFreqs tf = termFreqs.get(termString);
                                            if (tf == null) {
                                                tf = new TermFreqs();
                                                termFreqs.put(termString, tf);
                                            }
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        } else if (termFreqs.containsKey(termString) == false) {
                                            // Add placeholder (2nd pass will
                                            // set its counts):
                                            termFreqs.put(termString, new TermFreqs());
                                        }
                                    }
                                }

                                // Also test seeking the TermsEnum:
                                for (String term : termFreqs.keySet()) {
                                    if (termsEnum.seekExact(new BytesRef(term))) {
                                        // TODO: also sometimes ask for payloads/offsets?
                                        boolean noPositions = random().nextBoolean();
                                        if (noPositions) {
                                            docs = termsEnum.postings(docs, PostingsEnum.FREQS);
                                        } else {
                                            docs = termsEnum.postings(null, PostingsEnum.POSITIONS);
                                        }

                                        int docFreq = 0;
                                        long totalTermFreq = 0;
                                        while (docs.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
                                            docFreq++;
                                            totalTermFreq += docs.freq();
                                            int limit = TestUtil.nextInt(random(), 1, docs.freq());
                                            if (!noPositions) {
                                                for (int i = 0; i < limit; i++) {
                                                    docs.nextPosition();
                                                }
                                            }
                                        }

                                        if (isMerge == false && addOnSecondPass) {
                                            TermFreqs tf = termFreqs.get(term);
                                            assert tf != null;
                                            tf.docFreq += docFreq;
                                            tf.totalTermFreq += totalTermFreq;
                                            sumDocFreq.addAndGet(docFreq);
                                            sumTotalTermFreq.addAndGet(totalTermFreq);
                                        }

                                        //System.out.println("  term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
                                        assertTrue(docFreq <= termFreqs.get(term).docFreq);
                                        assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
                                    }
                                }

                                // Also test seekCeil
                                for (int iter = 0; iter < 10; iter++) {
                                    BytesRef term = new BytesRef(
                                            TestUtil.randomRealisticUnicodeString(random()));
                                    SeekStatus status = termsEnum.seekCeil(term);
                                    if (status == SeekStatus.NOT_FOUND) {
                                        assertTrue(term.compareTo(termsEnum.term()) < 0);
                                    }
                                }
                            }

                            @Override
                            public void close() throws IOException {
                                fieldsConsumer.close();
                            }
                        };
                    }

                    @Override
                    public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
                        return defaultPostingsFormat.fieldsProducer(state);
                    }
                };
            } else {
                return defaultPostingsFormat;
            }
        }
    });

    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);

    LineFileDocs docs = new LineFileDocs(random());
    int bytesToIndex = atLeast(100) * 1024;
    int bytesIndexed = 0;
    while (bytesIndexed < bytesToIndex) {
        Document doc = docs.nextDoc();
        w.addDocument(doc);
        bytesIndexed += RamUsageTester.sizeOf(doc);
    }

    IndexReader r = w.getReader();
    w.close();

    Terms terms = MultiFields.getTerms(r, "body");
    assertEquals(sumDocFreq.get(), terms.getSumDocFreq());
    assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq());

    TermsEnum termsEnum = terms.iterator();
    long termCount = 0;
    boolean supportsOrds = true;
    while (termsEnum.next() != null) {
        BytesRef term = termsEnum.term();
        assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq());
        assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq());
        if (supportsOrds) {
            long ord;
            try {
                ord = termsEnum.ord();
            } catch (UnsupportedOperationException uoe) {
                supportsOrds = false;
                ord = -1;
            }
            if (ord != -1) {
                assertEquals(termCount, ord);
            }
        }
        termCount++;
    }
    assertEquals(termFreqs.size(), termCount);

    r.close();
    dir.close();
}

From source file:com.rocana.lucene.codec.v1.RocanaBlockTreeTermsReader.java

License:Apache License

/** Sole constructor. */
public RocanaBlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state)
        throws IOException {
    boolean success = false;
    IndexInput indexIn = null;/*from w  w  w  .  j  av a2 s . c  om*/

    this.postingsReader = postingsReader;
    this.segment = state.segmentInfo.name;

    String termsName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION);
    try {
        termsIn = state.directory.openInput(termsName, state.context);
        version = CodecUtil.checkIndexHeader(termsIn, TERMS_CODEC_NAME, VERSION_START, VERSION_CURRENT,
                state.segmentInfo.getId(), state.segmentSuffix);

        if (version < VERSION_AUTO_PREFIX_TERMS) {
            // Old (pre-5.2.0) index, no auto-prefix terms:
            this.anyAutoPrefixTerms = false;
        } else if (version == VERSION_AUTO_PREFIX_TERMS) {
            // 5.2.x index, might have auto-prefix terms:
            this.anyAutoPrefixTerms = true;
        } else {
            // 5.3.x index, we record up front if we may have written any auto-prefix terms:
            assert version >= VERSION_AUTO_PREFIX_TERMS_COND;
            byte b = termsIn.readByte();
            if (b == 0) {
                this.anyAutoPrefixTerms = false;
            } else if (b == 1) {
                this.anyAutoPrefixTerms = true;
            } else {
                throw new CorruptIndexException("invalid anyAutoPrefixTerms: expected 0 or 1 but got " + b,
                        termsIn);
            }
        }

        String indexName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
        indexIn = state.directory.openInput(indexName, state.context);
        CodecUtil.checkIndexHeader(indexIn, TERMS_INDEX_CODEC_NAME, version, version, state.segmentInfo.getId(),
                state.segmentSuffix);

        // IMPORTANT: comment out this one line to prevent checksumming the entire file.
        //            This is the reason we have a custom Lucene codec and forked Lucene classes.
        //CodecUtil.checksumEntireFile(indexIn);

        // Have PostingsReader init itself
        postingsReader.init(termsIn, state);

        // NOTE: data file is too costly to verify checksum against all the bytes on open,
        // but for now we at least verify proper structure of the checksum footer: which looks
        // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
        // such as file truncation.
        CodecUtil.retrieveChecksum(termsIn);

        // Read per-field details
        seekDir(termsIn, dirOffset);
        seekDir(indexIn, indexDirOffset);

        final int numFields = termsIn.readVInt();
        if (numFields < 0) {
            throw new CorruptIndexException("invalid numFields: " + numFields, termsIn);
        }

        for (int i = 0; i < numFields; ++i) {
            final int field = termsIn.readVInt();
            final long numTerms = termsIn.readVLong();
            if (numTerms <= 0) {
                throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn);
            }
            final int numBytes = termsIn.readVInt();
            if (numBytes < 0) {
                throw new CorruptIndexException(
                        "invalid rootCode for field number: " + field + ", numBytes=" + numBytes, termsIn);
            }
            final BytesRef rootCode = new BytesRef(new byte[numBytes]);
            termsIn.readBytes(rootCode.bytes, 0, numBytes);
            rootCode.length = numBytes;
            final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
            if (fieldInfo == null) {
                throw new CorruptIndexException("invalid field number: " + field, termsIn);
            }
            final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1
                    : termsIn.readVLong();
            final long sumDocFreq = termsIn.readVLong();
            final int docCount = termsIn.readVInt();
            final int longsSize = termsIn.readVInt();
            if (longsSize < 0) {
                throw new CorruptIndexException(
                        "invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn);
            }
            BytesRef minTerm = readBytesRef(termsIn);
            BytesRef maxTerm = readBytesRef(termsIn);
            if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
                throw new CorruptIndexException(
                        "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), termsIn);
            }
            if (sumDocFreq < docCount) { // #postings must be >= #docs with field
                throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount,
                        termsIn);
            }
            if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
                throw new CorruptIndexException(
                        "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq,
                        termsIn);
            }
            final long indexStartFP = indexIn.readVLong();
            RocanaFieldReader previous = fields.put(fieldInfo.name,
                    new RocanaFieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq,
                            docCount, indexStartFP, longsSize, indexIn, minTerm, maxTerm));
            if (previous != null) {
                throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn);
            }
        }

        indexIn.close();
        success = true;
    } finally {
        if (!success) {
            // this.close() will close in:
            IOUtils.closeWhileHandlingException(indexIn, this);
        }
    }
}