proj.zoie.api.ZoieSegmentReader.java Source code

Java tutorial

Introduction

Here is the source code for proj.zoie.api.ZoieSegmentReader.java

Source

package proj.zoie.api;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import it.unimi.dsi.fastutil.ints.IntRBTreeSet;
import it.unimi.dsi.fastutil.longs.LongIterator;
import it.unimi.dsi.fastutil.longs.LongSet;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Payload;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermPositions;

import proj.zoie.api.impl.util.ArrayDocIdSet;
import proj.zoie.api.indexing.AbstractZoieIndexable;
import proj.zoie.api.indexing.IndexReaderDecorator;
import proj.zoie.impl.indexing.internal.ZoieSegmentTermDocs;
import proj.zoie.impl.indexing.internal.ZoieSegmentTermPositions;

public class ZoieSegmentReader<R extends IndexReader> extends ZoieIndexReader<R> {
    public static final String termVal = "_UID";
    public static final Term UID_TERM = new Term(AbstractZoieIndexable.DOCUMENT_ID_PAYLOAD_FIELD, termVal);
    private R _decoratedReader;
    private long[] _uidArray;
    private IntRBTreeSet _delDocIdSet = new IntRBTreeSet();
    private int[] _currentDelDocIds;

    static final class UIDTokenStream extends TokenStream {
        private boolean returnToken = false;

        private PayloadAttribute payloadAttr;
        private CharTermAttribute termAttr;

        UIDTokenStream(long uid) {
            byte[] buffer = new byte[8];
            buffer[0] = (byte) (uid);
            buffer[1] = (byte) (uid >> 8);
            buffer[2] = (byte) (uid >> 16);
            buffer[3] = (byte) (uid >> 24);
            buffer[4] = (byte) (uid >> 32);
            buffer[5] = (byte) (uid >> 40);
            buffer[6] = (byte) (uid >> 48);
            buffer[7] = (byte) (uid >> 56);
            payloadAttr = addAttribute(PayloadAttribute.class);
            payloadAttr.setPayload(new Payload(buffer));
            termAttr = addAttribute(CharTermAttribute.class);
            termAttr.append(termVal);
            returnToken = true;
        }

        @Override
        public final boolean incrementToken() throws IOException {
            if (returnToken) {
                returnToken = false;
                return true;
            } else {
                return false;
            }
        }
    }

    public static void fillDocumentID(Document doc, long id) {
        Field uidField = new Field(ZoieSegmentReader.UID_TERM.field(), new UIDTokenStream(id));
        uidField.setOmitNorms(true);
        doc.add(uidField);
    }

    public ZoieSegmentReader(IndexReader in, IndexReaderDecorator<R> decorator) throws IOException {
        super(in, decorator);
        if (!(in instanceof SegmentReader)) {
            throw new IllegalStateException("ZoieSegmentReader can only be constucted from " + SegmentReader.class);
        }
        init(in);
        _decoratedReader = (decorator == null ? null : decorator.decorate(this));
    }

    ZoieSegmentReader(ZoieSegmentReader<R> copyFrom, IndexReader innerReader, boolean withDeletes)
            throws IOException {
        super(innerReader, copyFrom._decorator);
        _uidArray = copyFrom._uidArray;
        _maxUID = copyFrom._maxUID;
        _minUID = copyFrom._minUID;
        _noDedup = copyFrom._noDedup;
        _docIDMapper = copyFrom._docIDMapper;
        _delDocIdSet = copyFrom._delDocIdSet;

        if (copyFrom._decorator == null) {
            _decoratedReader = null;
        } else {
            _decoratedReader = copyFrom._decorator.redecorate(copyFrom._decoratedReader, this, withDeletes);
        }
    }

    /**
     * make exact shallow copy for duplication. The decorated reader is also shallow copied.
     * @param copyFrom
     * @param innerReader
     * @throws IOException
     */
    ZoieSegmentReader(ZoieSegmentReader<R> copyFrom, IndexReader innerReader) throws IOException {
        super(innerReader, copyFrom._decorator);
        _uidArray = copyFrom._uidArray;
        _maxUID = copyFrom._maxUID;
        _minUID = copyFrom._minUID;
        _noDedup = copyFrom._noDedup;
        _docIDMapper = copyFrom._docIDMapper;
        _delDocIdSet = copyFrom._delDocIdSet;
        _currentDelDocIds = copyFrom._currentDelDocIds;

        if (copyFrom._decorator == null) {
            _decoratedReader = null;
        } else {
            _decoratedReader = copyFrom._decorator.redecorate(copyFrom._decoratedReader, this,
                    this.getDelDocIds() != null && this.getDelDocIds().length > 0);
        }
    }

    @Override
    public void markDeletes(LongSet delDocs, LongSet deletedUIDs) {
        DocIDMapper<?> idMapper = getDocIDMaper();
        LongIterator iter = delDocs.iterator();
        IntRBTreeSet delDocIdSet = _delDocIdSet;

        while (iter.hasNext()) {
            long uid = iter.nextLong();
            if (ZoieIndexReader.DELETED_UID != uid) {
                int docid = idMapper.getDocID(uid);
                if (docid != DocIDMapper.NOT_FOUND) {
                    delDocIdSet.add(docid);
                    deletedUIDs.add(uid);
                }
            }
        }
    }

    @Override
    public void commitDeletes() {
        _currentDelDocIds = _delDocIdSet.toIntArray();
    }

    public void setDelDocIds() {
        _delDocIds = _currentDelDocIds;
        if (_decorator != null && _decoratedReader != null)
            _decorator.setDeleteSet(_decoratedReader, new ArrayDocIdSet(_currentDelDocIds));
    }

    public R getDecoratedReader() {
        return _decoratedReader;
    }

    @Override
    public List<R> getDecoratedReaders() {
        ArrayList<R> list = new ArrayList<R>(1);
        if (_decoratedReader != null) {
            list.add(_decoratedReader);
        }
        return list;
    }

    @Override
    public byte[] getStoredValue(long uid) throws IOException {
        int docid = this.getDocIDMaper().getDocID(uid);
        if (docid < 0)
            return null;

        if (docid >= 0) {
            Document doc = document(docid);
            if (doc != null) {
                return doc.getBinaryValue(AbstractZoieIndexable.DOCUMENT_STORE_FIELD);
            }
        }
        return null;
    }

    private void init(IndexReader reader) throws IOException {
        int maxDoc = reader.maxDoc();
        _uidArray = new long[maxDoc];
        TermPositions tp = null;
        byte[] payloadBuffer = new byte[8]; // four bytes for a long
        try {
            tp = reader.termPositions(UID_TERM);
            int idx = 0;
            while (tp.next()) {
                int doc = tp.doc();
                assert doc < maxDoc;

                while (idx < doc)
                    _uidArray[idx++] = DELETED_UID; // fill the gap

                tp.nextPosition();
                tp.getPayload(payloadBuffer, 0);
                long uid = bytesToLong(payloadBuffer);
                if (uid < _minUID)
                    _minUID = uid;
                if (uid > _maxUID)
                    _maxUID = uid;
                _uidArray[idx++] = uid;
            }
            while (idx < maxDoc)
                _uidArray[idx++] = DELETED_UID; // fill the gap
        } finally {
            if (tp != null) {
                tp.close();
            }
        }
    }

    public static long bytesToLong(byte[] bytes) {
        return ((long) (bytes[7] & 0xFF) << 56) | ((long) (bytes[6] & 0xFF) << 48)
                | ((long) (bytes[5] & 0xFF) << 40) | ((long) (bytes[4] & 0xFF) << 32)
                | ((long) (bytes[3] & 0xFF) << 24) | ((long) (bytes[2] & 0xFF) << 16)
                | ((long) (bytes[1] & 0xFF) << 8) | (bytes[0] & 0xFF);
    }

    @Override
    public long getUID(int docid) {
        return _uidArray[docid];
    }

    public long[] getUIDArray() {
        return _uidArray;
    }

    @Override
    protected boolean hasIndexDeletions() {
        return in.hasDeletions();
    }

    @Override
    public boolean isDeleted(int docid) {
        if (!_noDedup) {
            int[] delSet = _delDocIds;//.get();
            if (delSet != null && Arrays.binarySearch(delSet, docid) >= 0)
                return true;
        }
        return in.isDeleted(docid);
    }

    @Override
    public TermDocs termDocs(Term term) throws IOException {
        ensureOpen();
        TermDocs td = in.termDocs(term);
        if (_noDedup)
            return td;

        int[] delDocIds = _delDocIds;//.get();
        if (td == null || delDocIds == null || delDocIds.length == 0)
            return td;
        return new ZoieSegmentTermDocs(td, new ArrayDocIdSet(delDocIds));
    }

    @Override
    public TermDocs termDocs() throws IOException {
        ensureOpen();
        TermDocs td = in.termDocs();
        if (_noDedup)
            return td;

        int[] delDocIds = _delDocIds;//.get();
        if (td == null || delDocIds == null || delDocIds.length == 0)
            return td;

        return new ZoieSegmentTermDocs(td, new ArrayDocIdSet(delDocIds));
    }

    @Override
    public TermPositions termPositions(Term term) throws IOException {
        ensureOpen();
        TermPositions tp = in.termPositions(term);
        if (_noDedup)
            return tp;

        int[] delDocIds = _delDocIds;//.get();
        if (tp == null || delDocIds == null || delDocIds.length == 0)
            return tp;

        return new ZoieSegmentTermPositions(tp, new ArrayDocIdSet(delDocIds));
    }

    @Override
    public TermPositions termPositions() throws IOException {
        ensureOpen();
        TermPositions tp = in.termPositions();
        if (_noDedup)
            return tp;

        int[] delDocIds = _delDocIds;//.get();
        if (tp == null || delDocIds == null || delDocIds.length == 0)
            return tp;

        return new ZoieSegmentTermPositions(tp, new ArrayDocIdSet(delDocIds));
    }

    @Override
    public ZoieIndexReader<R>[] getSequentialSubReaders() {
        return null;
    }

    public String getSegmentName() {
        return ((SegmentReader) in).getSegmentName();
    }

    @Override
    protected synchronized void doClose() throws IOException {
        _decoratedReader.close();
    }

    @Override
    public void decRef() throws IOException {
        // not synchronized, since it doesn't do anything anyway
    }

    @Override
    public int numDocs() {
        if (_currentDelDocIds != null) {
            return super.maxDoc() - _currentDelDocIds.length;
        } else {
            return super.numDocs();
        }
    }

    /**
    * makes exact shallow copy of a given ZoieMultiReader
    * @param <R>
    * @param source
    * @return
    * @throws IOException
    */
    @Override
    public ZoieSegmentReader<R> copy() throws IOException {
        return new ZoieSegmentReader<R>(this, this.in);
    }

}