org.corfudb.infrastructure.RocksLogUnitServer.java Source code

Java tutorial

Introduction

Here is the source code for org.corfudb.infrastructure.RocksLogUnitServer.java

Source

/**
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
// @author Amy Tai
//
// implement object homes.
package org.corfudb.infrastructure;

import lombok.Getter;
import org.apache.thrift.TException;
import org.apache.thrift.TMultiplexedProcessor;
import org.apache.thrift.protocol.TCompactProtocol;
import org.apache.thrift.server.TServer;
import org.apache.thrift.server.TThreadPoolServer;
import org.apache.thrift.transport.TFastFramedTransport;
import org.apache.thrift.transport.TServerSocket;
import org.corfudb.infrastructure.thrift.*;
import org.corfudb.runtime.protocols.IServerProtocol;
import org.corfudb.runtime.protocols.logunits.CorfuDBSimpleLogUnitProtocol;
import org.corfudb.util.Utils;
import org.rocksdb.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.*;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicBoolean;

public class RocksLogUnitServer implements RocksLogUnitService.Iface, ICorfuDBServer {
    private Logger log = LoggerFactory.getLogger(RocksLogUnitServer.class);

    List<Integer> masterIncarnation = null;
    protected int UNITCAPACITY = 100000; // capacity in PAGESIZE units, i.e. UNITCAPACITY*PAGESIZE bytes
    protected int PORT = -1; // REQUIRED: port number this unit listens on
    protected String DRIVENAME = null; // where to persist data (unless rammode is on)
    protected boolean RAMMODE = true; // command line switch: work in memory (no data persistence)
    protected boolean RECOVERY = false; // command line switch: indicate whether we load stream from disk on startup
    protected boolean REBUILD = false;
    boolean simFailure = false;
    protected String rebuildnode = null;

    protected int PAGESIZE;
    @Getter
    private Thread thread;
    boolean running;
    TServer server;

    private int ckmark = 0; // start offset of latest checkpoint. TODO: persist!!

    private Object DriveLck = new Object();

    private long gcmark = 0; // pages up to 'gcmark' have been evicted; note, we must have gcmark <= CM.trimmark
    private int lowwater = 0, highwater = 0, freewater = -1;

    long highWatermark = -1L;

    private HashMap<Long, Hints> hintMap = new HashMap();
    private RocksDB db = null;
    private AtomicBoolean ready = new AtomicBoolean(); // for testing

    public boolean isReady() {
        return ready.get();
    }

    public void initLogStore(int sz) {
        if (RAMMODE) {
            //TODO: RocksDB in ram-mode?
        }
        UNITCAPACITY = freewater = sz;
        masterIncarnation = new ArrayList<Integer>();
        masterIncarnation.add(0);
    }

    public void initLogStore(byte[] initmap, int sz) throws Exception {
        if (RAMMODE) {
            //TODO: RocksDB in ram-mode?
        }
        UNITCAPACITY = freewater = sz;
        masterIncarnation = new ArrayList<Integer>();
        masterIncarnation.add(0);
    }

    public RocksLogUnitServer() {
        //default constructor
    }

    //TODO: Make this accept an object from an interface, such as IWriteOnceLogUnit?
    private boolean rebuildFrom(CorfuDBSimpleLogUnitProtocol nodeToFetch) {
        SimpleLogUnitWrap data = nodeToFetch.fetchRebuild();

        if (data == null || !data.isSetErr() || !data.getErr().equals(ErrorCode.OK)) {
            log.error("couldn't get rebuild data from node: {}", nodeToFetch.getFullString());
            log.error("data: {}", data);
            return false;
        }

        SimpleLogUnitServer temp = new SimpleLogUnitServer();
        try {
            temp.initLogStore(data.getBmap(), data.getUnitcapacity());
        } catch (Exception ex) {
            log.error("couldn't rebuild log store from bitmap: {}", ex);
            return false;
        }

        long startAddress = data.getLowwater();
        if (!data.isSetCtnt())
            return true;
        for (ByteBuffer bb : data.getCtnt()) {
            //TODO: FIX THE FAKE STREAM!! once simple log unit server gets streams
            try {
                put(startAddress, data.getHintmap().get(startAddress).getNextMap().keySet(), bb,
                        temp.getET(startAddress));
            } catch (IOException e) {
                log.error("Trying to rebuild node, got exception: {}", e);
            }
            startAddress++;
        }
        return true;
    }

    public void simulateFailure(boolean fail, long length) throws TException {
        if (fail && length != -1) {
            this.simFailure = true;
            final RocksLogUnitServer t = this;
            new Timer().schedule(new TimerTask() {
                @Override
                public void run() {
                    t.simFailure = false;
                }
            }, length);
        } else {
            this.simFailure = fail;
        }
    }

    @Override
    public ICorfuDBServer getInstance(final Map<String, Object> config) {
        final RocksLogUnitServer lut = this;

        //These are required and will throw an exception if not defined.
        lut.RAMMODE = (Boolean) config.get("ramdisk");
        lut.UNITCAPACITY = (Integer) config.get("capacity");
        lut.PORT = (Integer) config.get("port");
        lut.PAGESIZE = (Integer) config.get("pagesize");
        lut.gcmark = (Integer) config.get("trim");

        masterIncarnation = new ArrayList<Integer>();
        masterIncarnation.add(0);
        //These are not required and will be only populated if given
        if (config.containsKey("drive")) {
            lut.DRIVENAME = (String) config.get("drive");
        }
        if (config.containsKey("recovery")) {
            lut.RECOVERY = (Boolean) config.get("recovery");
        }
        if (config.containsKey("rebuild")) {
            lut.REBUILD = true;
            lut.rebuildnode = (String) config.get("rebuild");
        }

        thread = new Thread(this);
        return this;
    }

    @Override
    public void close() {
        running = false;
        server.stop();
    }

    private byte[] getKey(long address, UUID stream) throws IOException {
        ByteBuffer br = ByteBuffer.allocate(Long.BYTES * 3);
        br.putLong(stream.getLeastSignificantBits());
        br.putLong(stream.getMostSignificantBits());
        br.putLong(address);

        return br.array();
    }

    // Assumes each ByteBuffer has length <= PAGESIZE.
    private WriteResult put(long address, Set<org.corfudb.infrastructure.thrift.UUID> streams, ByteBuffer buf,
            ExtntMarkType et) throws IOException {
        // TODO: If streams is null, add to EVERY stream??
        if (streams == null)
            return new WriteResult().setCode(ErrorCode.ERR_BADPARAM);
        for (org.corfudb.infrastructure.thrift.UUID stream : streams) {
            byte[] key = getKey(address, Utils.fromThriftUUID(stream));

            ByteArrayOutputStream bs = new ByteArrayOutputStream();
            bs.write(buf.array());
            bs.write(et.getValue());
            try {
                byte[] value = db.get(key);
                if (value == null)
                    db.put(key, bs.toByteArray());
                else
                    return new WriteResult().setCode(ErrorCode.ERR_OVERWRITE).setData(ByteBuffer.wrap(value));
            } catch (RocksDBException e) {
                throw new IOException(e.getMessage());
            }
        }
        return new WriteResult().setCode(ErrorCode.OK);
    }

    public void trimLogStore(long toOffset) throws IOException {
        throw new UnsupportedOperationException("trimLogStore not implemented in Rocks-backed server!!");
    }

    public ExtntWrap get(long logOffset, org.corfudb.infrastructure.thrift.UUID stream) throws IOException {
        ExtntWrap wr = new ExtntWrap();
        //TODO : figure out trim story
        byte[] key = getKey(logOffset, Utils.fromThriftUUID(stream));
        byte[] value = null;
        try {
            value = db.get(key);
        } catch (RocksDBException e) {
            throw new IOException(e.getMessage());
        }

        if (value == null) {
            wr.setInf(new ExtntInfo(logOffset, 0, ExtntMarkType.EX_EMPTY));
            wr.setErr(ErrorCode.ERR_UNWRITTEN);
        } else {
            // Length of the data is -1 because we stick the ET in the last byte.
            // TODO: Check the ET of the value?
            wr.setInf(
                    new ExtntInfo(logOffset, value.length - 1, ExtntMarkType.findByValue(value[value.length - 1])));
            byte[] returnValue = new byte[value.length - 1];
            for (int i = 0; i < returnValue.length; i++) {
                returnValue[i] = value[i];
            }
            ArrayList<ByteBuffer> content = new ArrayList<ByteBuffer>();
            content.add(ByteBuffer.wrap(returnValue));
            wr.setCtnt(content);
            wr.setErr(ErrorCode.OK);
        }
        return wr;
    }

    private void writegcmark() throws IOException {
        // TODO what about persisting the configuration??
        throw new UnsupportedOperationException("Haven't implemented writegcmark in Rocks-backed server");
    }

    private void recover() throws Exception {
        throw new UnsupportedOperationException("Haven't implemented recover in Rocks-backed server");
    }

    /*
    private void rebuildfromnode() throws Exception {
        Endpoint cn = Endpoint.genEndpoint(rebuildnode);
        TTransport buildsock = new TSocket(cn.getHostname(), cn.getPort());
        buildsock.open();
        TProtocol prot = new TBinaryProtocol(buildsock);
        TMultiplexedProtocol mprot = new TMultiplexedProtocol(prot, "CONFIG");
        
        SimpleLogUnitConfigService.Client cl = new SimpleLogUnitConfigService.Client(mprot);
        stream.info("established connection with rebuild-node {}", rebuildnode);
        SimpleLogUnitWrap wr = null;
        try {
            wr = cl.rebuild();
            stream.info("obtained mirror lowwater={} highwater={} trimmark={} ctnt-length={}",
                    wr.getLowwater(), wr.getHighwater(), wr.getTrimmark(), wr.getCtntSize());
            initLogStore(wr.getBmap(), UNITCAPACITY);
            lowwater = highwater = wr.getLowwater();
            gcmark = wr.getTrimmark();
            ckmark = (int)wr.getCkmark();
            put(wr.getCtnt());
            if (highwater != wr.getHighwater())
                stream.error("rebuildfromnode lowwater={} highwater={} received ({},{})",
                        lowwater, highwater,
                        wr.getLowwater(), wr.getHighwater());
        } catch (TException e) {
            e.printStackTrace();
        }
    }*/
    @Override
    public boolean ping() throws TException {
        if (simFailure) {
            throw new TException("Simulated failure mode!");
        }
        return true;
    }

    @Override
    public void setEpoch(long epoch) throws TException {
        if (simFailure) {
            throw new TException("Simulated failure mode!");
        }
        Long lEpoch = epoch;
        this.masterIncarnation.set(0, lEpoch.intValue());
    }

    /////////////////////////////////////////////////////////////////////////////////////////////
    /* (non-Javadoc)
     * implements to CorfuUnitServer.Iface write() method.
     * @see CorfuUnitServer.Iface#write(ExtntWrap)
     *
     * we make great effort for the write to either succeed in full, or not leave any partial garbage behind.
     * this means that we first check if all the pages to be written are free, and that the incoming entry contains content for each page.
     * in the event of some error in the middle, we reset any values we already set.
     */
    @Override
    synchronized public WriteResult write(UnitServerHdr hdr, ByteBuffer ctnt, ExtntMarkType et) throws TException {
        if (simFailure) {
            throw new TException("Simulated failure mode!");
        }
        if (Util.compareIncarnations(hdr.getEpoch(), masterIncarnation) < 0) {
            log.info("write request has stale incarnation={} cur incarnation={}", hdr.getEpoch(),
                    masterIncarnation);
            return new WriteResult().setCode(ErrorCode.ERR_STALEEPOCH);
        }

        log.debug("write({} size={} marktype={})", hdr, ctnt.capacity(), et);
        try {
            WriteResult wr = put(hdr.off, hdr.streamID, ctnt, et);
            highWatermark = Long.max(highWatermark, hdr.off);
            return wr;
        } catch (IOException e) {
            e.printStackTrace();
            return new WriteResult().setCode(ErrorCode.ERR_IO);
        }
    }

    /**
     * mark an extent 'skipped'
     * @param hdr epoch and offset of the extent
     * @return OK if succeeds in marking the extent for 'skip'
     *       ERROR_TRIMMED if the extent-range has already been trimmed
     *       ERROR_OVERWRITE if the extent is occupied (could be a good thing)
     *       ERROR_FULL if the extent spills over the capacity of the stream
     * @throws TException
     */
    @Override
    synchronized public ErrorCode fix(UnitServerHdr hdr) throws TException {
        if (simFailure) {
            throw new TException("Simulated failure mode!");
        }
        return write(hdr, ByteBuffer.allocate(0), ExtntMarkType.EX_SKIP).getCode();
    }

    private ExtntWrap genWrap(ErrorCode err) {
        return new ExtntWrap(err, new ExtntInfo(), new ArrayList<ByteBuffer>());
    }

    private Hints genHint(ErrorCode err) {
        return new Hints(err, new HashMap<org.corfudb.infrastructure.thrift.UUID, Long>(), false, null);
    }

    /* (non-Javadoc)
     * @see CorfuUnitServer.Iface#read(org.corfudb.CorfuHeader, ExtntInfo)
     *
     * this method performs actual reading of a range of pages.
     * it fails if any page within range has not been written.
     * it returns OK_SKIP if it finds any page within range which has been junk-filled (i.e., the entire range becomes junked).
     *
     * the method also reads-ahead the subsequent meta-info entry if hdr.readnext is set.
     * if the next meta info record is not available, it returns the current meta-info structure
     *
     *  @param a CorfuHeader describing the range to read
     */
    @Override
    synchronized public ExtntWrap read(UnitServerHdr hdr) throws TException {
        if (simFailure) {
            throw new TException("Simulated failure mode!");
        }
        if (Util.compareIncarnations(hdr.getEpoch(), masterIncarnation) < 0)
            return genWrap(ErrorCode.ERR_STALEEPOCH);
        log.debug("read({})", hdr);
        try {
            return get(hdr.off, hdr.streamID.iterator().next());
        } catch (IOException e) {
            e.printStackTrace();
            return genWrap(ErrorCode.ERR_IO);
        }
    }

    /**
     * wait until any previously written stream entries have been forced to persistent store
     */
    @Override
    synchronized public void sync() throws TException {
        if (simFailure) {
            throw new TException("Simulated failure mode!");
        }
        synchronized (DriveLck) {
            try {
                DriveLck.wait();
            } catch (Exception e) {
                log.error("forcing sync to persistent store failed, quitting");
                System.exit(1);
            }
        }
    }

    @Override
    synchronized public long querytrim() {
        //return CM.getTrimmark();
        //TODO figure out trim story
        return 0;
    }

    @Override
    synchronized public long highestAddress() throws TException {
        if (simFailure) {
            throw new TException("Simulated failure mode!");
        }
        return highWatermark;
    }

    @Override
    synchronized public void reset() {
        log.debug("Reset requested, resetting state");
        try {
            if (RAMMODE) {
                //TODO: Ram-mode in RocksDB?
                initLogStore(UNITCAPACITY);
                writegcmark();
                highWatermark = -1L;
                hintMap = new HashMap<>();
            }
        } catch (Exception e) {
            log.error("Error during reset", e);
        }
    }

    @Override
    synchronized public long queryck() {
        return ckmark;
    }

    ErrorCode trim(long toOffset) {
        try {
            trimLogStore(toOffset);
        } catch (IOException e) {
            e.printStackTrace();
            return ErrorCode.ERR_IO;
        }
        if (!RAMMODE) {
            try {
                log.debug("forcing bitmap and gcmark to disk");
                synchronized (DriveLck) {
                    try {
                        DriveLck.wait();
                    } catch (InterruptedException e) {
                        log.error("forcing sync to persistent store failed, quitting");
                        System.exit(1);
                    }
                }
                writegcmark();
            } catch (IOException e) {
                log.error("writing gcmark failed");
                e.printStackTrace();
                return ErrorCode.ERR_IO;
            }
        }
        return ErrorCode.OK;
    }

    @Override
    synchronized public void ckpoint(UnitServerHdr hdr) throws TException {
        if (simFailure) {
            throw new TException("Simulated failure mode!");
        }
        // if (hdr.getEpoch() < epoch) return ErrorCode.ERR_STALEEPOCH;
        log.info("mark latest checkpoint offset={}", hdr.off);
        if (hdr.off > ckmark)
            ckmark = (int) (hdr.off % UNITCAPACITY);
    }

    //////////////////////////////////////////////////////////////////////////////

    ////////////////////////////////////////////////////////////////////////////////////

    public void serverloop() throws Exception {

        log.warn("@C@ RocksDBLoggingUnit starting");

        if (!RAMMODE) {
            RocksDB.loadLibrary();

            Options options = new Options().setCreateIfMissing(true);
            options.setAllowMmapReads(true);
            // For easy prefix-lookups.
            options.setMemTableConfig(new HashSkipListMemTableConfig());
            options.setTableFormatConfig(new PlainTableConfig());
            options.useFixedLengthPrefixExtractor(16); // Prefix length in bytes
            try {
                db = RocksDB.open(options, DRIVENAME);
            } catch (RocksDBException e) {
                e.printStackTrace();
                log.warn("couldn't open rocksdb, exception: {}", e);
                System.exit(1); // not much to do without storage...
            }
        } else {
            //TODO: Rammode in RocksDB?
        }

        if (RECOVERY) {
            recover();
        } else if (REBUILD) {
            CorfuDBSimpleLogUnitProtocol protocol = null;
            try {
                // Fix epoch later; but if we set it to -1, this guarantees that any write will trigger a view change,
                // which we want
                //TODO: Fix how CorfuDBSimpleLogUnitProtocol is essentially hardcoded?
                protocol = (CorfuDBSimpleLogUnitProtocol) IServerProtocol
                        .protocolFactory(CorfuDBSimpleLogUnitProtocol.class, rebuildnode, -1);
            } catch (Exception ex) {
                log.error("Error invoking protocol for protocol: ", ex);
                log.error("Cannot rebuild node");
                System.exit(1);
            }

            if (!rebuildFrom(protocol))
                System.exit(1);
        } else {
            initLogStore(UNITCAPACITY);
            //writegcmark();
        }
        ready.set(true);

        TServerSocket serverTransport;
        System.out.println("run..");

        try {
            serverTransport = new TServerSocket(PORT);

            //LogUnitConfigServiceImpl cnfg = new LogUnitConfigServiceImpl();

            TMultiplexedProcessor mprocessor = new TMultiplexedProcessor();
            mprocessor.registerProcessor("SUNIT", new RocksLogUnitService.Processor<RocksLogUnitServer>(this));
            //TODO: Figure out what the Config service is for a RocksDB indexed server?
            //mprocessor.registerProcessor("CONFIG", new SimpleLogUnitConfigService.Processor<LogUnitConfigServiceImpl>(cnfg));

            server = new TThreadPoolServer(new TThreadPoolServer.Args(serverTransport).processor(mprocessor)
                    .protocolFactory(TCompactProtocol::new)
                    .inputTransportFactory(new TFastFramedTransport.Factory())
                    .outputTransportFactory(new TFastFramedTransport.Factory()));
            System.out.println("Starting Corfu storage unit server on multiplexed port " + PORT);

            server.serve();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * When an object implementing interface <code>Runnable</code> is used
     * to create a thread, starting the thread causes the object's
     * <code>run</code> method to be called in that separately executing
     * thread.
     * <p>
     * The general contract of the method <code>run</code> is that it may
     * take any action whatsoever.
     *
     * @see Thread#run()
     */
    @Override
    public void run() {
        running = true;
        while (running) {
            try {
                this.serverloop();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
}