org.janusgraph.diskstorage.cassandra.embedded.CassandraEmbeddedKeyColumnValueStore.java Source code

Java tutorial

Introduction

Here is the source code for org.janusgraph.diskstorage.cassandra.embedded.CassandraEmbeddedKeyColumnValueStore.java

Source

// Copyright 2017 JanusGraph Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package org.janusgraph.diskstorage.cassandra.embedded;

import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import org.janusgraph.diskstorage.util.time.TimestampProvider;
import org.janusgraph.diskstorage.*;
import org.janusgraph.diskstorage.cassandra.utils.CassandraHelper;
import org.janusgraph.diskstorage.keycolumnvalue.*;
import org.janusgraph.diskstorage.util.RecordIterator;
import org.janusgraph.diskstorage.util.StaticArrayBuffer;
import org.janusgraph.diskstorage.util.StaticArrayEntry;

import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.config.Schema;
import org.apache.cassandra.db.*;
import org.apache.cassandra.db.ConsistencyLevel;
import org.apache.cassandra.db.composites.CellNames;
import org.apache.cassandra.db.composites.Composite;
import org.apache.cassandra.db.filter.IDiskAtomFilter;
import org.apache.cassandra.db.filter.SliceQueryFilter;
import org.apache.cassandra.dht.*;
import org.apache.cassandra.exceptions.InvalidRequestException;
import org.apache.cassandra.exceptions.IsBootstrappingException;
import org.apache.cassandra.exceptions.RequestTimeoutException;
import org.apache.cassandra.exceptions.UnavailableException;
import org.apache.cassandra.service.StorageProxy;
import org.apache.cassandra.service.StorageService;
import org.apache.cassandra.thrift.SlicePredicate;
import org.apache.cassandra.thrift.SliceRange;
import org.apache.cassandra.thrift.ThriftValidation;
import org.apache.commons.lang.ArrayUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.nio.ByteBuffer;
import java.util.*;
import static org.janusgraph.diskstorage.cassandra.CassandraTransaction.getTx;

public class CassandraEmbeddedKeyColumnValueStore implements KeyColumnValueStore {

    private static final Logger log = LoggerFactory.getLogger(CassandraEmbeddedKeyColumnValueStore.class);

    private final String keyspace;
    private final String columnFamily;
    private final CassandraEmbeddedStoreManager storeManager;
    private final TimestampProvider times;
    private final CassandraEmbeddedGetter entryGetter;

    public CassandraEmbeddedKeyColumnValueStore(String keyspace, String columnFamily,
            CassandraEmbeddedStoreManager storeManager) throws RuntimeException {
        this.keyspace = keyspace;
        this.columnFamily = columnFamily;
        this.storeManager = storeManager;
        this.times = this.storeManager.getTimestampProvider();
        entryGetter = new CassandraEmbeddedGetter(storeManager.getMetaDataSchema(columnFamily), times);
    }

    @Override
    public void close() throws BackendException {
    }

    @Override
    public void acquireLock(StaticBuffer key, StaticBuffer column, StaticBuffer expectedValue, StoreTransaction txh)
            throws BackendException {
        throw new UnsupportedOperationException();
    }

    @Override
    public KeyIterator getKeys(KeyRangeQuery keyRangeQuery, StoreTransaction txh) throws BackendException {
        IPartitioner partitioner = StorageService.getPartitioner();

        // see rant about this in Astyanax implementation
        if (partitioner instanceof RandomPartitioner || partitioner instanceof Murmur3Partitioner)
            throw new PermanentBackendException(
                    "This operation is only supported when byte-ordered partitioner is used.");

        return new RowIterator(keyRangeQuery, storeManager.getPageSize(), txh);
    }

    @Override
    public KeyIterator getKeys(SliceQuery query, StoreTransaction txh) throws BackendException {
        return new RowIterator(getMinimumToken(), getMaximumToken(), query, storeManager.getPageSize(), txh);
    }

    /**
     * Create a RangeSliceCommand and run it against the StorageProxy.
     * <p>
     * To match the behavior of the standard Cassandra thrift API endpoint, the
     * {@code nowMillis} argument should be the number of milliseconds since the
     * UNIX Epoch (e.g. System.currentTimeMillis() or equivalent obtained
     * through a {@link TimestampProvider}). This is per
     * {@link org.apache.cassandra.thrift.CassandraServer#get_range_slices(ColumnParent, SlicePredicate, KeyRange, ConsistencyLevel)},
     * which passes the server's System.currentTimeMillis() to the
     * {@code RangeSliceCommand} constructor.
     */
    private List<Row> getKeySlice(Token start, Token end, @Nullable SliceQuery sliceQuery, int pageSize,
            long nowMillis) throws BackendException {
        IPartitioner partitioner = StorageService.getPartitioner();

        SliceRange columnSlice = new SliceRange();
        if (sliceQuery == null) {
            columnSlice.setStart(ArrayUtils.EMPTY_BYTE_ARRAY).setFinish(ArrayUtils.EMPTY_BYTE_ARRAY).setCount(5);
        } else {
            columnSlice.setStart(sliceQuery.getSliceStart().asByteBuffer())
                    .setFinish(sliceQuery.getSliceEnd().asByteBuffer())
                    .setCount(sliceQuery.hasLimit() ? sliceQuery.getLimit() : Integer.MAX_VALUE);
        }
        /* Note: we need to fetch columns for each row as well to remove "range ghosts" */
        SlicePredicate predicate = new SlicePredicate().setSlice_range(columnSlice);

        RowPosition startPosition = start.minKeyBound(partitioner);
        RowPosition endPosition = end.minKeyBound(partitioner);

        List<Row> rows;

        try {
            CFMetaData cfm = Schema.instance.getCFMetaData(keyspace, columnFamily);
            IDiskAtomFilter filter = ThriftValidation.asIFilter(predicate, cfm, null);

            RangeSliceCommand cmd = new RangeSliceCommand(keyspace, columnFamily, nowMillis, filter,
                    new Bounds<RowPosition>(startPosition, endPosition), pageSize);

            rows = StorageProxy.getRangeSlice(cmd, ConsistencyLevel.QUORUM);
        } catch (Exception e) {
            throw new PermanentBackendException(e);
        }

        return rows;
    }

    @Override
    public String getName() {
        return columnFamily;
    }

    @Override
    public EntryList getSlice(KeySliceQuery query, StoreTransaction txh) throws BackendException {

        /**
         * This timestamp mimics the timestamp used by
         * {@link org.apache.cassandra.thrift.CassandraServer#get(ByteBuffer,ColumnPath,ConsistencyLevel)}.
         *
         * That method passes the server's System.currentTimeMillis() to
         * {@link ReadCommand#create(String, ByteBuffer, String, long, IDiskAtomFilter)}.
         * {@code create(...)} in turn passes that timestamp to the SliceFromReadCommand constructor.
         */
        final long nowMillis = times.getTime().toEpochMilli();
        Composite startComposite = CellNames.simpleDense(query.getSliceStart().asByteBuffer());
        Composite endComposite = CellNames.simpleDense(query.getSliceEnd().asByteBuffer());
        SliceQueryFilter sqf = new SliceQueryFilter(startComposite, endComposite, false,
                query.getLimit() + (query.hasLimit() ? 1 : 0));
        ReadCommand sliceCmd = new SliceFromReadCommand(keyspace, query.getKey().asByteBuffer(), columnFamily,
                nowMillis, sqf);

        List<Row> slice = read(sliceCmd, getTx(txh).getReadConsistencyLevel().getDB());

        if (null == slice || 0 == slice.size())
            return EntryList.EMPTY_LIST;

        int sliceSize = slice.size();
        if (1 < sliceSize)
            throw new PermanentBackendException("Received " + sliceSize + " rows for single key");

        Row r = slice.get(0);

        if (null == r) {
            log.warn("Null Row object retrieved from Cassandra StorageProxy");
            return EntryList.EMPTY_LIST;
        }

        ColumnFamily cf = r.cf;

        if (null == cf) {
            log.debug("null ColumnFamily (\"{}\")", columnFamily);
            return EntryList.EMPTY_LIST;
        }

        if (cf.isMarkedForDelete())
            return EntryList.EMPTY_LIST;

        return CassandraHelper.makeEntryList(
                Iterables.filter(cf.getSortedColumns(), new FilterDeletedColumns(nowMillis)), entryGetter,
                query.getSliceEnd(), query.getLimit());

    }

    private class FilterDeletedColumns implements Predicate<Cell> {

        private final long tsMillis;
        private final int tsSeconds;

        private FilterDeletedColumns(long tsMillis) {
            this.tsMillis = tsMillis;
            this.tsSeconds = (int) (this.tsMillis / 1000L);
        }

        @Override
        public boolean apply(Cell input) {
            if (!input.isLive(tsMillis))
                return false;

            // Don't do this.  getTimeToLive() is a duration divorced from any particular clock.
            // For instance, if TTL=10 seconds, getTimeToLive() will have value 10 (not 10 + epoch seconds), and
            // this will always return false.
            //if (input instanceof ExpiringCell)
            //    return tsSeconds < ((ExpiringCell)input).getTimeToLive();

            return true;
        }
    }

    @Override
    public Map<StaticBuffer, EntryList> getSlice(List<StaticBuffer> keys, SliceQuery query, StoreTransaction txh)
            throws BackendException {
        throw new UnsupportedOperationException();
    }

    @Override
    public void mutate(StaticBuffer key, List<Entry> additions, List<StaticBuffer> deletions, StoreTransaction txh)
            throws BackendException {
        Map<StaticBuffer, KCVMutation> mutations = ImmutableMap.of(key, new KCVMutation(additions, deletions));
        mutateMany(mutations, txh);
    }

    public void mutateMany(Map<StaticBuffer, KCVMutation> mutations, StoreTransaction txh) throws BackendException {
        storeManager.mutateMany(ImmutableMap.of(columnFamily, mutations), txh);
    }

    private static List<Row> read(ReadCommand cmd, org.apache.cassandra.db.ConsistencyLevel clvl)
            throws BackendException {
        ArrayList<ReadCommand> cmdHolder = new ArrayList<ReadCommand>(1);
        cmdHolder.add(cmd);
        return read(cmdHolder, clvl);
    }

    private static List<Row> read(List<ReadCommand> cmds, org.apache.cassandra.db.ConsistencyLevel clvl)
            throws BackendException {
        try {
            return StorageProxy.read(cmds, clvl);
        } catch (UnavailableException e) {
            throw new TemporaryBackendException(e);
        } catch (RequestTimeoutException e) {
            throw new PermanentBackendException(e);
        } catch (IsBootstrappingException e) {
            throw new TemporaryBackendException(e);
        } catch (InvalidRequestException e) {
            throw new PermanentBackendException(e);
        }
    }

    private static class CassandraEmbeddedGetter implements StaticArrayEntry.GetColVal<Cell, ByteBuffer> {

        private final EntryMetaData[] schema;
        private final TimestampProvider times;

        private CassandraEmbeddedGetter(EntryMetaData[] schema, TimestampProvider times) {
            this.schema = schema;
            this.times = times;
        }

        @Override
        public ByteBuffer getColumn(Cell element) {
            return org.apache.cassandra.utils.ByteBufferUtil.clone(element.name().toByteBuffer());
        }

        @Override
        public ByteBuffer getValue(Cell element) {
            return org.apache.cassandra.utils.ByteBufferUtil.clone(element.value());
        }

        @Override
        public EntryMetaData[] getMetaSchema(Cell element) {
            return schema;
        }

        @Override
        public Object getMetaData(Cell element, EntryMetaData meta) {
            switch (meta) {
            case TIMESTAMP:
                return element.timestamp();
            case TTL:
                return ((element instanceof ExpiringCell) ? ((ExpiringCell) element).getTimeToLive() : 0);
            default:
                throw new UnsupportedOperationException("Unsupported meta data: " + meta);
            }
        }
    }

    private class RowIterator implements KeyIterator {
        private final Token maximumToken;
        private final SliceQuery sliceQuery;
        private final StoreTransaction txh;

        /**
         * This RowIterator will use this timestamp for its entire lifetime,
         * even if the iterator runs more than one distinct slice query while
         * paging. <b>This field must be in units of milliseconds since
         * the UNIX Epoch</b>.
         * <p>
         * This timestamp is passed to three methods/constructors:
         * <ul>
         *  <li>{@link org.apache.cassandra.db.Column#isMarkedForDelete(long now)}</li>
         *  <li>{@link org.apache.cassandra.db.ColumnFamily#hasOnlyTombstones(long)}</li>
         *  <li>
         *   the {@link RangeSliceCommand} constructor via the last argument
         *   to {@link CassandraEmbeddedKeyColumnValueStore#getKeySlice(Token, Token, SliceQuery, int, long)}
         *  </li>
         * </ul>
         * The second list entry just calls the first and almost doesn't deserve
         * a mention at present, but maybe the implementation will change in the future.
         * <p>
         * When this value needs to be compared to TTL seconds expressed in seconds,
         * Cassandra internals do the conversion.
         * Consider {@link ExpiringColumn#isMarkedForDelete(long)}, which is implemented,
         * as of 2.0.6, by the following one-liner:
         * <p>
         * {@code return (int) (now / 1000) >= getLocalDeletionTime()}
         * <p>
         * The {@code now / 1000} does the conversion from milliseconds to seconds
         * (the units of getLocalDeletionTime()).
         */
        private final long nowMillis;

        private Iterator<Row> keys;
        private ByteBuffer lastSeenKey = null;
        private Row currentRow;
        private int pageSize;

        private boolean isClosed;

        public RowIterator(KeyRangeQuery keyRangeQuery, int pageSize, StoreTransaction txh)
                throws BackendException {
            this(StorageService.getPartitioner().getToken(keyRangeQuery.getKeyStart().asByteBuffer()),
                    StorageService.getPartitioner().getToken(keyRangeQuery.getKeyEnd().asByteBuffer()),
                    keyRangeQuery, pageSize, txh);
        }

        public RowIterator(Token minimum, Token maximum, SliceQuery sliceQuery, int pageSize, StoreTransaction txh)
                throws BackendException {
            this.pageSize = pageSize;
            this.sliceQuery = sliceQuery;
            this.maximumToken = maximum;
            this.txh = txh;
            this.nowMillis = times.getTime().toEpochMilli();
            this.keys = getRowsIterator(getKeySlice(minimum, maximum, sliceQuery, pageSize, nowMillis));
        }

        @Override
        public boolean hasNext() {
            try {
                return hasNextInternal();
            } catch (BackendException e) {
                throw new RuntimeException(e);
            }
        }

        @Override
        public StaticBuffer next() {
            ensureOpen();

            if (!hasNext())
                throw new NoSuchElementException();

            currentRow = keys.next();
            ByteBuffer currentKey = currentRow.key.getKey().duplicate();

            try {
                return StaticArrayBuffer.of(currentKey);
            } finally {
                lastSeenKey = currentKey;
            }
        }

        @Override
        public void close() {
            isClosed = true;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }

        @Override
        public RecordIterator<Entry> getEntries() {
            ensureOpen();

            if (sliceQuery == null)
                throw new IllegalStateException("getEntries() requires SliceQuery to be set.");

            return new RecordIterator<Entry>() {
                final Iterator<Entry> columns = CassandraHelper.makeEntryIterator(
                        Iterables.filter(currentRow.cf.getSortedColumns(), new FilterDeletedColumns(nowMillis)),
                        entryGetter, sliceQuery.getSliceEnd(), sliceQuery.getLimit());

                //cfToEntries(currentRow.cf, sliceQuery).iterator();

                @Override
                public boolean hasNext() {
                    ensureOpen();
                    return columns.hasNext();
                }

                @Override
                public Entry next() {
                    ensureOpen();
                    return columns.next();
                }

                @Override
                public void close() {
                    isClosed = true;
                }

                @Override
                public void remove() {
                    throw new UnsupportedOperationException();
                }
            };

        }

        private final boolean hasNextInternal() throws BackendException {
            ensureOpen();

            if (keys == null)
                return false;

            boolean hasNext = keys.hasNext();

            if (!hasNext && lastSeenKey != null) {
                Token lastSeenToken = StorageService.getPartitioner().getToken(lastSeenKey.duplicate());

                // let's check if we reached key upper bound already so we can skip one useless call to Cassandra
                if (maximumToken != getMinimumToken() && lastSeenToken.equals(maximumToken)) {
                    return false;
                }

                List<Row> newKeys = getKeySlice(StorageService.getPartitioner().getToken(lastSeenKey), maximumToken,
                        sliceQuery, pageSize, nowMillis);

                keys = getRowsIterator(newKeys, lastSeenKey);
                hasNext = keys.hasNext();
            }

            return hasNext;
        }

        private void ensureOpen() {
            if (isClosed)
                throw new IllegalStateException("Iterator has been closed.");
        }

        private Iterator<Row> getRowsIterator(List<Row> rows) {
            if (rows == null)
                return null;

            return Iterators.filter(rows.iterator(), new Predicate<Row>() {
                @Override
                public boolean apply(@Nullable Row row) {
                    // The hasOnlyTombstones(x) call below ultimately calls Column.isMarkedForDelete(x)
                    return !(row == null || row.cf == null || row.cf.isMarkedForDelete()
                            || row.cf.hasOnlyTombstones(nowMillis));
                }
            });
        }

        private Iterator<Row> getRowsIterator(List<Row> rows, final ByteBuffer exceptKey) {
            Iterator<Row> rowIterator = getRowsIterator(rows);

            if (rowIterator == null)
                return null;

            return Iterators.filter(rowIterator, new Predicate<Row>() {
                @Override
                public boolean apply(@Nullable Row row) {
                    return row != null && !row.key.getKey().equals(exceptKey);
                }
            });
        }
    }

    private static Token getMinimumToken() throws PermanentBackendException {
        IPartitioner partitioner = StorageService.getPartitioner();

        if (partitioner instanceof RandomPartitioner) {
            return ((RandomPartitioner) partitioner).getMinimumToken();
        } else if (partitioner instanceof Murmur3Partitioner) {
            return ((Murmur3Partitioner) partitioner).getMinimumToken();
        } else if (partitioner instanceof ByteOrderedPartitioner) {
            //TODO: This makes the assumption that its an EdgeStore (i.e. 8 byte keys)
            return new BytesToken(org.janusgraph.diskstorage.util.ByteBufferUtil.zeroByteBuffer(8));
        } else {
            throw new PermanentBackendException("Unsupported partitioner: " + partitioner);
        }
    }

    private static Token getMaximumToken() throws PermanentBackendException {
        IPartitioner partitioner = StorageService.getPartitioner();

        if (partitioner instanceof RandomPartitioner) {
            return new BigIntegerToken(RandomPartitioner.MAXIMUM);
        } else if (partitioner instanceof Murmur3Partitioner) {
            return new LongToken(Murmur3Partitioner.MAXIMUM);
        } else if (partitioner instanceof ByteOrderedPartitioner) {
            //TODO: This makes the assumption that its an EdgeStore (i.e. 8 byte keys)
            return new BytesToken(org.janusgraph.diskstorage.util.ByteBufferUtil.oneByteBuffer(8));
        } else {
            throw new PermanentBackendException("Unsupported partitioner: " + partitioner);
        }
    }
}