org.apache.flume.channel.file.Log.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.flume.channel.file.Log.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.flume.channel.file;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.commons.io.FileUtils;
import org.apache.flume.ChannelException;
import org.apache.flume.Event;
import org.apache.flume.annotations.InterfaceAudience;
import org.apache.flume.annotations.InterfaceStability;
import org.apache.flume.channel.file.encryption.KeyProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileLock;
import java.nio.channels.OverlappingFileLockException;
import java.security.Key;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReferenceArray;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;

/**
 * Stores FlumeEvents on disk and pointers to the events in a in memory queue.
 * Once a log object is created the replay method should be called to reconcile
 * the on disk write ahead log with the last checkpoint of the queue.
 *
 * Before calling any of commitPut/commitTake/get/put/rollback/take
 * Log.tryLockShared should be called and the above operations
 * should only be called if tryLockShared returns true. After
 * the operation and any additional modifications of the
 * FlumeEventQueue, the Log.unlockShared method should be called.
 */
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class Log {
    public static final String PREFIX = "log-";
    private static final Logger LOGGER = LoggerFactory.getLogger(Log.class);
    private static final int MIN_NUM_LOGS = 2;
    public static final String FILE_LOCK = "in_use.lock";
    // for reader
    private final Map<Integer, LogFile.RandomReader> idLogFileMap = Collections
            .synchronizedMap(new HashMap<Integer, LogFile.RandomReader>());
    private final AtomicInteger nextFileID = new AtomicInteger(0);
    private final File checkpointDir;
    private final File backupCheckpointDir;
    private final File[] logDirs;
    private final int queueCapacity;
    private final AtomicReferenceArray<LogFile.Writer> logFiles;

    private final ScheduledExecutorService workerExecutor;

    private volatile boolean open;
    private FlumeEventQueue queue;
    private long checkpointInterval;
    private long maxFileSize;
    private final boolean useFastReplay;
    private final long minimumRequiredSpace;
    private final Map<String, FileLock> locks;
    private final ReentrantReadWriteLock checkpointLock = new ReentrantReadWriteLock(true);

    /**
     * Set of files that should be excluded from backup and restores.
     */
    public static final Set<String> EXCLUDES = Sets.newHashSet(FILE_LOCK);
    /**
     * Shared lock
     */
    private final ReadLock checkpointReadLock = checkpointLock.readLock();
    /**
     * Exclusive lock
     */
    private final WriteLock checkpointWriterLock = checkpointLock.writeLock();
    private int logWriteTimeout;
    private final String channelNameDescriptor;
    private int checkpointWriteTimeout;
    private boolean useLogReplayV1;
    private KeyProvider encryptionKeyProvider;
    private String encryptionCipherProvider;
    private String encryptionKeyAlias;
    private Key encryptionKey;
    private final long usableSpaceRefreshInterval;
    private boolean didFastReplay = false;
    private boolean didFullReplayDueToBadCheckpointException = false;
    private final boolean useDualCheckpoints;
    private volatile boolean backupRestored = false;

    private int readCount;
    private int putCount;
    private int takeCount;
    private int committedCount;
    private int rollbackCount;

    private final List<File> pendingDeletes = Lists.newArrayList();

    static class Builder {
        private long bCheckpointInterval;
        private long bMinimumRequiredSpace;
        private long bMaxFileSize;
        private int bQueueCapacity;
        private File bCheckpointDir;
        private File[] bLogDirs;
        private int bLogWriteTimeout = FileChannelConfiguration.DEFAULT_WRITE_TIMEOUT;
        private String bName;
        private int bCheckpointWriteTimeout = FileChannelConfiguration.DEFAULT_CHECKPOINT_WRITE_TIMEOUT;
        private boolean useLogReplayV1;
        private boolean useFastReplay;
        private KeyProvider bEncryptionKeyProvider;
        private String bEncryptionKeyAlias;
        private String bEncryptionCipherProvider;
        private long bUsableSpaceRefreshInterval = 15L * 1000L;
        private boolean bUseDualCheckpoints = false;
        private File bBackupCheckpointDir = null;

        Builder setUsableSpaceRefreshInterval(long usableSpaceRefreshInterval) {
            bUsableSpaceRefreshInterval = usableSpaceRefreshInterval;
            return this;
        }

        Builder setCheckpointInterval(long interval) {
            bCheckpointInterval = interval;
            return this;
        }

        Builder setMaxFileSize(long maxSize) {
            bMaxFileSize = maxSize;
            return this;
        }

        Builder setQueueSize(int capacity) {
            bQueueCapacity = capacity;
            return this;
        }

        Builder setCheckpointDir(File cpDir) {
            bCheckpointDir = cpDir;
            return this;
        }

        Builder setLogDirs(File[] dirs) {
            bLogDirs = dirs;
            return this;
        }

        Builder setLogWriteTimeout(int timeout) {
            bLogWriteTimeout = timeout;
            return this;
        }

        Builder setChannelName(String name) {
            bName = name;
            return this;
        }

        Builder setMinimumRequiredSpace(long minimumRequiredSpace) {
            bMinimumRequiredSpace = minimumRequiredSpace;
            return this;
        }

        Builder setCheckpointWriteTimeout(int checkpointTimeout) {
            bCheckpointWriteTimeout = checkpointTimeout;
            return this;
        }

        Builder setUseLogReplayV1(boolean useLogReplayV1) {
            this.useLogReplayV1 = useLogReplayV1;
            return this;
        }

        Builder setUseFastReplay(boolean useFastReplay) {
            this.useFastReplay = useFastReplay;
            return this;
        }

        Builder setEncryptionKeyProvider(KeyProvider encryptionKeyProvider) {
            bEncryptionKeyProvider = encryptionKeyProvider;
            return this;
        }

        Builder setEncryptionKeyAlias(String encryptionKeyAlias) {
            bEncryptionKeyAlias = encryptionKeyAlias;
            return this;
        }

        Builder setEncryptionCipherProvider(String encryptionCipherProvider) {
            bEncryptionCipherProvider = encryptionCipherProvider;
            return this;
        }

        Builder setUseDualCheckpoints(boolean UseDualCheckpoints) {
            this.bUseDualCheckpoints = UseDualCheckpoints;
            return this;
        }

        Builder setBackupCheckpointDir(File backupCheckpointDir) {
            this.bBackupCheckpointDir = backupCheckpointDir;
            return this;
        }

        Log build() throws IOException {
            return new Log(bCheckpointInterval, bMaxFileSize, bQueueCapacity, bLogWriteTimeout,
                    bCheckpointWriteTimeout, bUseDualCheckpoints, bCheckpointDir, bBackupCheckpointDir, bName,
                    useLogReplayV1, useFastReplay, bMinimumRequiredSpace, bEncryptionKeyProvider,
                    bEncryptionKeyAlias, bEncryptionCipherProvider, bUsableSpaceRefreshInterval, bLogDirs);
        }
    }

    private Log(long checkpointInterval, long maxFileSize, int queueCapacity, int logWriteTimeout,
            int checkpointWriteTimeout, boolean useDualCheckpoints, File checkpointDir, File backupCheckpointDir,
            String name, boolean useLogReplayV1, boolean useFastReplay, long minimumRequiredSpace,
            @Nullable KeyProvider encryptionKeyProvider, @Nullable String encryptionKeyAlias,
            @Nullable String encryptionCipherProvider, long usableSpaceRefreshInterval, File... logDirs)
            throws IOException {
        Preconditions.checkArgument(checkpointInterval > 0, "checkpointInterval <= 0");
        Preconditions.checkArgument(queueCapacity > 0, "queueCapacity <= 0");
        Preconditions.checkArgument(maxFileSize > 0, "maxFileSize <= 0");
        Preconditions.checkNotNull(checkpointDir, "checkpointDir");
        Preconditions.checkArgument(usableSpaceRefreshInterval > 0, "usableSpaceRefreshInterval <= 0");
        Preconditions.checkArgument(checkpointDir.isDirectory() || checkpointDir.mkdirs(),
                "CheckpointDir " + checkpointDir + " could not be created");
        if (useDualCheckpoints) {
            Preconditions.checkNotNull(backupCheckpointDir,
                    "backupCheckpointDir is" + " null while dual checkpointing is enabled.");
            Preconditions.checkArgument(backupCheckpointDir.isDirectory() || backupCheckpointDir.mkdirs(),
                    "Backup CheckpointDir " + backupCheckpointDir + " could not be created");
        }
        Preconditions.checkNotNull(logDirs, "logDirs");
        Preconditions.checkArgument(logDirs.length > 0, "logDirs empty");
        Preconditions.checkArgument(name != null && !name.trim().isEmpty(), "channel name should be specified");

        this.channelNameDescriptor = "[channel=" + name + "]";
        this.useLogReplayV1 = useLogReplayV1;
        this.useFastReplay = useFastReplay;
        this.minimumRequiredSpace = minimumRequiredSpace;
        this.usableSpaceRefreshInterval = usableSpaceRefreshInterval;
        for (File logDir : logDirs) {
            Preconditions.checkArgument(logDir.isDirectory() || logDir.mkdirs(),
                    "LogDir " + logDir + " could not be created");
        }
        locks = Maps.newHashMap();
        try {
            lock(checkpointDir);
            if (useDualCheckpoints) {
                lock(backupCheckpointDir);
            }
            for (File logDir : logDirs) {
                lock(logDir);
            }
        } catch (IOException e) {
            unlock(checkpointDir);
            for (File logDir : logDirs) {
                unlock(logDir);
            }
            throw e;
        }
        if (encryptionKeyProvider != null && encryptionKeyAlias != null && encryptionCipherProvider != null) {
            LOGGER.info("Encryption is enabled with encryptionKeyProvider = " + encryptionKeyProvider
                    + ", encryptionKeyAlias = " + encryptionKeyAlias + ", encryptionCipherProvider = "
                    + encryptionCipherProvider);
            this.encryptionKeyProvider = encryptionKeyProvider;
            this.encryptionKeyAlias = encryptionKeyAlias;
            this.encryptionCipherProvider = encryptionCipherProvider;
            this.encryptionKey = encryptionKeyProvider.getKey(encryptionKeyAlias);
        } else if (encryptionKeyProvider == null && encryptionKeyAlias == null
                && encryptionCipherProvider == null) {
            LOGGER.info("Encryption is not enabled");
        } else {
            throw new IllegalArgumentException(
                    "Encryption configuration must all " + "null or all not null: encryptionKeyProvider = "
                            + encryptionKeyProvider + ", encryptionKeyAlias = " + encryptionKeyAlias
                            + ", encryptionCipherProvider = " + encryptionCipherProvider);
        }
        open = false;
        this.checkpointInterval = Math.max(checkpointInterval, 1000);
        this.maxFileSize = maxFileSize;
        this.queueCapacity = queueCapacity;
        this.useDualCheckpoints = useDualCheckpoints;
        this.checkpointDir = checkpointDir;
        this.backupCheckpointDir = backupCheckpointDir;
        this.logDirs = logDirs;
        this.logWriteTimeout = logWriteTimeout;
        this.checkpointWriteTimeout = checkpointWriteTimeout;
        logFiles = new AtomicReferenceArray<LogFile.Writer>(this.logDirs.length);
        workerExecutor = Executors.newSingleThreadScheduledExecutor(
                new ThreadFactoryBuilder().setNameFormat("Log-BackgroundWorker-" + name).build());
        workerExecutor.scheduleWithFixedDelay(new BackgroundWorker(this), this.checkpointInterval,
                this.checkpointInterval, TimeUnit.MILLISECONDS);
    }

    /**
     * Read checkpoint and data files from disk replaying them to the state
     * directly before the shutdown or crash.
     * @throws IOException
     */
    void replay() throws IOException {
        Preconditions.checkState(!open, "Cannot replay after Log has been opened");

        Preconditions.checkState(tryLockExclusive(), "Cannot obtain lock on " + channelNameDescriptor);

        try {
            /*
             * First we are going to look through the data directories
             * and find all log files. We will store the highest file id
             * (at the end of the filename) we find and use that when we
             * create additional log files.
             *
             * Also store up the list of files so we can replay them later.
             */
            LOGGER.info("Replay started");
            nextFileID.set(0);
            List<File> dataFiles = Lists.newArrayList();
            for (File logDir : logDirs) {
                for (File file : LogUtils.getLogs(logDir)) {
                    int id = LogUtils.getIDForFile(file);
                    dataFiles.add(file);
                    nextFileID.set(Math.max(nextFileID.get(), id));
                    idLogFileMap.put(id,
                            LogFileFactory.getRandomReader(new File(logDir, PREFIX + id), encryptionKeyProvider));
                }
            }
            LOGGER.info("Found NextFileID " + nextFileID + ", from " + dataFiles);

            /*
             * sort the data files by file id so we can replay them by file id
             * which should approximately give us sequential events
             */
            LogUtils.sort(dataFiles);

            boolean shouldFastReplay = this.useFastReplay;
            /*
             * Read the checkpoint (in memory queue) from one of two alternating
             * locations. We will read the last one written to disk.
             */
            File checkpointFile = new File(checkpointDir, "checkpoint");
            if (shouldFastReplay) {
                if (checkpointFile.exists()) {
                    LOGGER.debug("Disabling fast full replay because checkpoint " + "exists: " + checkpointFile);
                    shouldFastReplay = false;
                } else {
                    LOGGER.debug("Not disabling fast full replay because checkpoint " + " does not exist: "
                            + checkpointFile);
                }
            }
            File inflightTakesFile = new File(checkpointDir, "inflighttakes");
            File inflightPutsFile = new File(checkpointDir, "inflightputs");
            EventQueueBackingStore backingStore = null;

            try {
                backingStore = EventQueueBackingStoreFactory.get(checkpointFile, backupCheckpointDir, queueCapacity,
                        channelNameDescriptor, true, this.useDualCheckpoints);
                queue = new FlumeEventQueue(backingStore, inflightTakesFile, inflightPutsFile);
                LOGGER.info("Last Checkpoint " + new Date(checkpointFile.lastModified()) + ", queue depth = "
                        + queue.getSize());

                /*
                 * We now have everything we need to actually replay the log files
                 * the queue, the timestamp the queue was written to disk, and
                 * the list of data files.
                 *
                 * This will throw if and only if checkpoint file was fine,
                 * but the inflights were not. If the checkpoint was bad, the backing
                 * store factory would have thrown.
                 */
                doReplay(queue, dataFiles, encryptionKeyProvider, shouldFastReplay);
            } catch (BadCheckpointException ex) {
                backupRestored = false;
                if (useDualCheckpoints) {
                    LOGGER.warn("Checkpoint may not have completed successfully. "
                            + "Restoring checkpoint and starting up.", ex);
                    if (EventQueueBackingStoreFile.backupExists(backupCheckpointDir)) {
                        backupRestored = EventQueueBackingStoreFile.restoreBackup(checkpointDir,
                                backupCheckpointDir);
                    }
                }
                if (!backupRestored) {
                    LOGGER.warn("Checkpoint may not have completed successfully. "
                            + "Forcing full replay, this may take a while.", ex);
                    if (!Serialization.deleteAllFiles(checkpointDir, EXCLUDES)) {
                        throw new IOException("Could not delete files in checkpoint "
                                + "directory to recover from a corrupt or incomplete checkpoint");
                    }
                }
                backingStore = EventQueueBackingStoreFactory.get(checkpointFile, backupCheckpointDir, queueCapacity,
                        channelNameDescriptor, true, useDualCheckpoints);
                queue = new FlumeEventQueue(backingStore, inflightTakesFile, inflightPutsFile);
                // If the checkpoint was deleted due to BadCheckpointException, then
                // trigger fast replay if the channel is configured to.
                shouldFastReplay = this.useFastReplay;
                doReplay(queue, dataFiles, encryptionKeyProvider, shouldFastReplay);
                if (!shouldFastReplay) {
                    didFullReplayDueToBadCheckpointException = true;
                }
            }

            for (int index = 0; index < logDirs.length; index++) {
                LOGGER.info("Rolling " + logDirs[index]);
                roll(index);
            }

            /*
             * Now that we have replayed, write the current queue to disk
             */
            writeCheckpoint(true);

            open = true;
        } catch (Exception ex) {
            LOGGER.error("Failed to initialize Log on " + channelNameDescriptor, ex);
            if (ex instanceof IOException) {
                throw (IOException) ex;
            }
            Throwables.propagate(ex);
        } finally {
            unlockExclusive();
        }
    }

    @SuppressWarnings("deprecation")
    private void doReplay(FlumeEventQueue queue, List<File> dataFiles, KeyProvider encryptionKeyProvider,
            boolean useFastReplay) throws Exception {
        CheckpointRebuilder rebuilder = new CheckpointRebuilder(dataFiles, queue);
        if (useFastReplay && rebuilder.rebuild()) {
            didFastReplay = true;
            LOGGER.info("Fast replay successful.");
        } else {
            ReplayHandler replayHandler = new ReplayHandler(queue, encryptionKeyProvider);
            if (useLogReplayV1) {
                LOGGER.info("Replaying logs with v1 replay logic");
                replayHandler.replayLogv1(dataFiles);
            } else {
                LOGGER.info("Replaying logs with v2 replay logic");
                replayHandler.replayLog(dataFiles);
            }
            readCount = replayHandler.getReadCount();
            putCount = replayHandler.getPutCount();
            takeCount = replayHandler.getTakeCount();
            rollbackCount = replayHandler.getRollbackCount();
            committedCount = replayHandler.getCommitCount();
        }
    }

    @VisibleForTesting
    boolean didFastReplay() {
        return didFastReplay;
    }

    @VisibleForTesting
    public int getReadCount() {
        return readCount;
    }

    @VisibleForTesting
    public int getPutCount() {
        return putCount;
    }

    @VisibleForTesting
    public int getTakeCount() {
        return takeCount;
    }

    @VisibleForTesting
    public int getCommittedCount() {
        return committedCount;
    }

    @VisibleForTesting
    public int getRollbackCount() {
        return rollbackCount;
    }

    /**
     * Was a checkpoint backup used to replay?
     * @return true if a checkpoint backup was used to replay.
     */
    @VisibleForTesting
    boolean backupRestored() {
        return backupRestored;
    }

    @VisibleForTesting
    boolean didFullReplayDueToBadCheckpointException() {
        return didFullReplayDueToBadCheckpointException;
    }

    int getNextFileID() {
        Preconditions.checkState(open, "Log is closed");
        return nextFileID.get();
    }

    FlumeEventQueue getFlumeEventQueue() {
        Preconditions.checkState(open, "Log is closed");
        return queue;
    }

    /**
     * Return the FlumeEvent for an event pointer. This method is
     * non-transactional. It is assumed the client has obtained this
     * FlumeEventPointer via FlumeEventQueue.
     *
     * @param pointer
     * @return FlumeEventPointer
     * @throws IOException
     * @throws InterruptedException
     */
    FlumeEvent get(FlumeEventPointer pointer) throws IOException, InterruptedException, NoopRecordException {
        Preconditions.checkState(open, "Log is closed");
        int id = pointer.getFileID();
        LogFile.RandomReader logFile = idLogFileMap.get(id);
        Preconditions.checkNotNull(logFile, "LogFile is null for id " + id);
        try {
            return logFile.get(pointer.getOffset());
        } catch (CorruptEventException ex) {
            open = false;
            throw new IOException("Corrupt event found. Please run File Channel " + "Integrity tool.", ex);
        }
    }

    /**
     * Log a put of an event
     *
     * Synchronization not required as this method is atomic
     * @param transactionID
     * @param event
     * @return
     * @throws IOException
     */
    FlumeEventPointer put(long transactionID, Event event) throws IOException {
        Preconditions.checkState(open, "Log is closed");
        FlumeEvent flumeEvent = new FlumeEvent(event.getHeaders(), event.getBody());
        Put put = new Put(transactionID, WriteOrderOracle.next(), flumeEvent);
        ByteBuffer buffer = TransactionEventRecord.toByteBuffer(put);
        int logFileIndex = nextLogWriter(transactionID);
        long usableSpace = logFiles.get(logFileIndex).getUsableSpace();
        long requiredSpace = minimumRequiredSpace + buffer.limit();
        if (usableSpace <= requiredSpace) {
            throw new IOException("Usable space exhaused, only " + usableSpace + " bytes remaining, required "
                    + requiredSpace + " bytes");
        }
        boolean error = true;
        try {
            try {
                FlumeEventPointer ptr = logFiles.get(logFileIndex).put(buffer);
                error = false;
                return ptr;
            } catch (LogFileRetryableIOException e) {
                if (!open) {
                    throw e;
                }
                roll(logFileIndex, buffer);
                FlumeEventPointer ptr = logFiles.get(logFileIndex).put(buffer);
                error = false;
                return ptr;
            }
        } finally {
            if (error && open) {
                roll(logFileIndex);
            }
        }
    }

    /**
     * Log a take of an event, pointer points at the corresponding put
     *
     * Synchronization not required as this method is atomic
     * @param transactionID
     * @param pointer
     * @throws IOException
     */
    void take(long transactionID, FlumeEventPointer pointer) throws IOException {
        Preconditions.checkState(open, "Log is closed");
        Take take = new Take(transactionID, WriteOrderOracle.next(), pointer.getOffset(), pointer.getFileID());
        ByteBuffer buffer = TransactionEventRecord.toByteBuffer(take);
        int logFileIndex = nextLogWriter(transactionID);
        long usableSpace = logFiles.get(logFileIndex).getUsableSpace();
        long requiredSpace = minimumRequiredSpace + buffer.limit();
        if (usableSpace <= requiredSpace) {
            throw new IOException("Usable space exhaused, only " + usableSpace + " bytes remaining, required "
                    + requiredSpace + " bytes");
        }
        boolean error = true;
        try {
            try {
                logFiles.get(logFileIndex).take(buffer);
                error = false;
            } catch (LogFileRetryableIOException e) {
                if (!open) {
                    throw e;
                }
                roll(logFileIndex, buffer);
                logFiles.get(logFileIndex).take(buffer);
                error = false;
            }
        } finally {
            if (error && open) {
                roll(logFileIndex);
            }
        }
    }

    /**
     * Log a rollback of a transaction
     *
     * Synchronization not required as this method is atomic
     * @param transactionID
     * @throws IOException
     */
    void rollback(long transactionID) throws IOException {
        Preconditions.checkState(open, "Log is closed");

        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("Rolling back " + transactionID);
        }
        Rollback rollback = new Rollback(transactionID, WriteOrderOracle.next());
        ByteBuffer buffer = TransactionEventRecord.toByteBuffer(rollback);
        int logFileIndex = nextLogWriter(transactionID);
        long usableSpace = logFiles.get(logFileIndex).getUsableSpace();
        long requiredSpace = minimumRequiredSpace + buffer.limit();
        if (usableSpace <= requiredSpace) {
            throw new IOException("Usable space exhaused, only " + usableSpace + " bytes remaining, required "
                    + requiredSpace + " bytes");
        }
        boolean error = true;
        try {
            try {
                logFiles.get(logFileIndex).rollback(buffer);
                error = false;
            } catch (LogFileRetryableIOException e) {
                if (!open) {
                    throw e;
                }
                roll(logFileIndex, buffer);
                logFiles.get(logFileIndex).rollback(buffer);
                error = false;
            }
        } finally {
            if (error && open) {
                roll(logFileIndex);
            }
        }
    }

    /**
     * Log commit of put, we need to know which type of commit
     * so we know if the pointers corresponding to the events
     * should be added or removed from the flume queue. We
     * could infer but it's best to be explicit.
     *
     * Synchronization not required as this method is atomic
     * @param transactionID
     * @throws IOException
     * @throws InterruptedException
     */
    void commitPut(long transactionID) throws IOException, InterruptedException {
        Preconditions.checkState(open, "Log is closed");
        commit(transactionID, TransactionEventRecord.Type.PUT.get());
    }

    /**
     * Log commit of take, we need to know which type of commit
     * so we know if the pointers corresponding to the events
     * should be added or removed from the flume queue. We
     * could infer but it's best to be explicit.
     *
     * Synchronization not required as this method is atomic
     * @param transactionID
     * @throws IOException
     * @throws InterruptedException
     */
    void commitTake(long transactionID) throws IOException, InterruptedException {
        Preconditions.checkState(open, "Log is closed");
        commit(transactionID, TransactionEventRecord.Type.TAKE.get());
    }

    private boolean tryLockExclusive() {
        try {
            return checkpointWriterLock.tryLock(checkpointWriteTimeout, TimeUnit.SECONDS);
        } catch (InterruptedException ex) {
            LOGGER.warn("Interrupted while waiting for log exclusive lock", ex);
            Thread.currentThread().interrupt();
        }
        return false;
    }

    private void unlockExclusive() {
        checkpointWriterLock.unlock();
    }

    boolean tryLockShared() {
        try {
            return checkpointReadLock.tryLock(logWriteTimeout, TimeUnit.SECONDS);
        } catch (InterruptedException ex) {
            LOGGER.warn("Interrupted while waiting for log shared lock", ex);
            Thread.currentThread().interrupt();
        }
        return false;
    }

    void unlockShared() {
        checkpointReadLock.unlock();
    }

    private void lockExclusive() {
        checkpointWriterLock.lock();
    }

    /**
     * Synchronization not required since this method gets the write lock,
     * so checkpoint and this method cannot run at the same time.
     */
    void close() throws IOException {
        lockExclusive();
        try {
            open = false;
            shutdownWorker();
            if (logFiles != null) {
                for (int index = 0; index < logFiles.length(); index++) {
                    LogFile.Writer writer = logFiles.get(index);
                    if (writer != null) {
                        writer.close();
                    }
                }
            }
            synchronized (idLogFileMap) {
                for (Integer logId : idLogFileMap.keySet()) {
                    LogFile.RandomReader reader = idLogFileMap.get(logId);
                    if (reader != null) {
                        reader.close();
                    }
                }
            }
            queue.close();
            try {
                unlock(checkpointDir);
            } catch (IOException ex) {
                LOGGER.warn("Error unlocking " + checkpointDir, ex);
            }
            if (useDualCheckpoints) {
                try {
                    unlock(backupCheckpointDir);
                } catch (IOException ex) {
                    LOGGER.warn("Error unlocking " + checkpointDir, ex);
                }
            }
            for (File logDir : logDirs) {
                try {
                    unlock(logDir);
                } catch (IOException ex) {
                    LOGGER.warn("Error unlocking " + logDir, ex);
                }
            }
        } finally {
            unlockExclusive();
        }
    }

    void shutdownWorker() {
        String msg = "Attempting to shutdown background worker.";
        System.out.println(msg);
        LOGGER.info(msg);
        workerExecutor.shutdown();
        try {
            workerExecutor.awaitTermination(10, TimeUnit.SECONDS);
        } catch (InterruptedException e) {
            LOGGER.error("Interrupted while waiting for worker to die.");
        }
    }

    void setCheckpointInterval(long checkpointInterval) {
        this.checkpointInterval = checkpointInterval;
    }

    void setMaxFileSize(long maxFileSize) {
        this.maxFileSize = maxFileSize;
    }

    /**
     * Synchronization not required as this method is atomic
     *
     * @param transactionID
     * @param type
     * @throws IOException
     */
    private void commit(long transactionID, short type) throws IOException {
        Preconditions.checkState(open, "Log is closed");
        Commit commit = new Commit(transactionID, WriteOrderOracle.next(), type);
        ByteBuffer buffer = TransactionEventRecord.toByteBuffer(commit);
        int logFileIndex = nextLogWriter(transactionID);
        long usableSpace = logFiles.get(logFileIndex).getUsableSpace();
        long requiredSpace = minimumRequiredSpace + buffer.limit();
        if (usableSpace <= requiredSpace) {
            throw new IOException("Usable space exhaused, only " + usableSpace + " bytes remaining, required "
                    + requiredSpace + " bytes");
        }
        boolean error = true;
        try {
            try {
                LogFile.Writer logFileWriter = logFiles.get(logFileIndex);
                // If multiple transactions are committing at the same time,
                // this ensures that the number of actual fsyncs is small and a
                // number of them are grouped together into one.
                logFileWriter.commit(buffer);
                logFileWriter.sync();
                error = false;
            } catch (LogFileRetryableIOException e) {
                if (!open) {
                    throw e;
                }
                roll(logFileIndex, buffer);
                LogFile.Writer logFileWriter = logFiles.get(logFileIndex);
                logFileWriter.commit(buffer);
                logFileWriter.sync();
                error = false;
            }
        } finally {
            if (error && open) {
                roll(logFileIndex);
            }
        }
    }

    /**
     * Atomic so not synchronization required.
     * @return
     */
    private int nextLogWriter(long transactionID) {
        return (int) Math.abs(transactionID % (long) logFiles.length());
    }

    /**
     * Unconditionally roll
     * Synchronization done internally
     *
     * @param index
     * @throws IOException
     */
    private void roll(int index) throws IOException {
        roll(index, null);
    }

    /**
     * Roll a log if needed. Roll always occurs if the log at the index
     * does not exist (typically on startup), or buffer is null. Otherwise
     * LogFile.Writer.isRollRequired is checked again to ensure we don't
     * have threads pile up on this log resulting in multiple successive
     * rolls
     *
     * Synchronization required since both synchronized and unsynchronized
     * methods call this method, and this method acquires only a
     * read lock. The synchronization guarantees that multiple threads don't
     * roll at the same time.
     *
     * @param index
     * @throws IOException
     */
    private synchronized void roll(int index, ByteBuffer buffer) throws IOException {
        if (!tryLockShared()) {
            throw new ChannelException("Failed to obtain lock for writing to the "
                    + "log. Try increasing the log write timeout value. " + channelNameDescriptor);
        }

        try {
            LogFile.Writer oldLogFile = logFiles.get(index);
            // check to make sure a roll is actually required due to
            // the possibility of multiple writes waiting on lock
            if (oldLogFile == null || buffer == null || oldLogFile.isRollRequired(buffer)) {
                try {
                    LOGGER.info("Roll start " + logDirs[index]);
                    int fileID = nextFileID.incrementAndGet();
                    File file = new File(logDirs[index], PREFIX + fileID);
                    LogFile.Writer writer = LogFileFactory.getWriter(file, fileID, maxFileSize, encryptionKey,
                            encryptionKeyAlias, encryptionCipherProvider, usableSpaceRefreshInterval);
                    idLogFileMap.put(fileID, LogFileFactory.getRandomReader(file, encryptionKeyProvider));
                    // writer from this point on will get new reference
                    logFiles.set(index, writer);
                    // close out old log
                    if (oldLogFile != null) {
                        oldLogFile.close();
                    }
                } finally {
                    LOGGER.info("Roll end");
                }
            }
        } finally {
            unlockShared();
        }
    }

    private boolean writeCheckpoint() throws Exception {
        return writeCheckpoint(false);
    }

    /**
     * Write the current checkpoint object and then swap objects so that
     * the next checkpoint occurs on the other checkpoint directory.
     *
     * Synchronization is not required because this method acquires a
     * write lock. So this method gets exclusive access to all the
     * data structures this method accesses.
     * @param force  a flag to force the writing of checkpoint
     * @throws IOException if we are unable to write the checkpoint out to disk
     */
    private Boolean writeCheckpoint(Boolean force) throws Exception {
        boolean checkpointCompleted = false;
        long usableSpace = checkpointDir.getUsableSpace();
        if (usableSpace <= minimumRequiredSpace) {
            throw new IOException("Usable space exhaused, only " + usableSpace + " bytes remaining, required "
                    + minimumRequiredSpace + " bytes");
        }
        boolean lockAcquired = tryLockExclusive();
        if (!lockAcquired) {
            return false;
        }
        SortedSet<Integer> logFileRefCountsAll = null, logFileRefCountsActive = null;
        try {
            if (queue.checkpoint(force)) {
                long logWriteOrderID = queue.getLogWriteOrderID();

                //Since the active files might also be in the queue's fileIDs,
                //we need to either move each one to a new set or remove each one
                //as we do here. Otherwise we cannot make sure every element in
                //fileID set from the queue have been updated.
                //Since clone is smarter than insert, better to make
                //a copy of the set first so that we can use it later.
                logFileRefCountsAll = queue.getFileIDs();
                logFileRefCountsActive = new TreeSet<Integer>(logFileRefCountsAll);

                int numFiles = logFiles.length();
                for (int i = 0; i < numFiles; i++) {
                    LogFile.Writer logWriter = logFiles.get(i);
                    int logFileID = logWriter.getLogFileID();
                    File logFile = logWriter.getFile();
                    LogFile.MetaDataWriter writer = LogFileFactory.getMetaDataWriter(logFile, logFileID);
                    try {
                        writer.markCheckpoint(logWriter.position(), logWriteOrderID);
                    } finally {
                        writer.close();
                    }
                    logFileRefCountsAll.remove(logFileID);
                    LOGGER.info("Updated checkpoint for file: " + logFile + " position: " + logWriter.position()
                            + " logWriteOrderID: " + logWriteOrderID);
                }

                // Update any inactive data files as well
                Iterator<Integer> idIterator = logFileRefCountsAll.iterator();
                while (idIterator.hasNext()) {
                    int id = idIterator.next();
                    LogFile.RandomReader reader = idLogFileMap.remove(id);
                    File file = reader.getFile();
                    reader.close();
                    LogFile.MetaDataWriter writer = LogFileFactory.getMetaDataWriter(file, id);
                    try {
                        writer.markCheckpoint(logWriteOrderID);
                    } finally {
                        writer.close();
                    }
                    reader = LogFileFactory.getRandomReader(file, encryptionKeyProvider);
                    idLogFileMap.put(id, reader);
                    LOGGER.debug("Updated checkpoint for file: " + file + "logWriteOrderID " + logWriteOrderID);
                    idIterator.remove();
                }
                Preconditions.checkState(logFileRefCountsAll.size() == 0,
                        "Could not update all data file timestamps: " + logFileRefCountsAll);
                //Add files from all log directories
                for (int index = 0; index < logDirs.length; index++) {
                    logFileRefCountsActive.add(logFiles.get(index).getLogFileID());
                }
                checkpointCompleted = true;
            }
        } finally {
            unlockExclusive();
        }
        //Do the deletes outside the checkpointWriterLock
        //Delete logic is expensive.
        if (open && checkpointCompleted) {
            removeOldLogs(logFileRefCountsActive);
        }
        //Since the exception is not caught, this will not be returned if
        //an exception is thrown from the try.
        return true;
    }

    private void removeOldLogs(SortedSet<Integer> fileIDs) {
        Preconditions.checkState(open, "Log is closed");
        // To maintain a single code path for deletes, if backup of checkpoint is
        // enabled or not, we will track the files which can be deleted after the
        // current checkpoint (since the one which just got backed up still needs
        // these files) and delete them only after the next (since the current
        // checkpoint will become the backup at that time,
        // and thus these files are no longer needed).
        for (File fileToDelete : pendingDeletes) {
            LOGGER.info("Removing old file: " + fileToDelete);
            FileUtils.deleteQuietly(fileToDelete);
        }
        pendingDeletes.clear();
        // we will find the smallest fileID currently in use and
        // won't delete any files with an id larger than the min
        int minFileID = fileIDs.first();
        LOGGER.debug("Files currently in use: " + fileIDs);
        for (File logDir : logDirs) {
            List<File> logs = LogUtils.getLogs(logDir);
            // sort oldset to newest
            LogUtils.sort(logs);
            // ensure we always keep two logs per dir
            int size = logs.size() - MIN_NUM_LOGS;
            for (int index = 0; index < size; index++) {
                File logFile = logs.get(index);
                int logFileID = LogUtils.getIDForFile(logFile);
                if (logFileID < minFileID) {
                    LogFile.RandomReader reader = idLogFileMap.remove(logFileID);
                    if (reader != null) {
                        reader.close();
                    }
                    File metaDataFile = Serialization.getMetaDataFile(logFile);
                    pendingDeletes.add(logFile);
                    pendingDeletes.add(metaDataFile);
                }
            }
        }
    }

    /**
     * Lock storage to provide exclusive access.
     *
     * <p> Locking is not supported by all file systems.
     * E.g., NFS does not consistently support exclusive locks.
     *
     * <p> If locking is supported we guarantee exculsive access to the
     * storage directory. Otherwise, no guarantee is given.
     *
     * @throws IOException if locking fails
     */
    private void lock(File dir) throws IOException {
        FileLock lock = tryLock(dir);
        if (lock == null) {
            String msg = "Cannot lock " + dir + ". The directory is already locked. " + channelNameDescriptor;
            LOGGER.info(msg);
            throw new IOException(msg);
        }
        FileLock secondLock = tryLock(dir);
        if (secondLock != null) {
            LOGGER.warn("Directory " + dir + " does not support locking");
            secondLock.release();
            secondLock.channel().close();
        }
        locks.put(dir.getAbsolutePath(), lock);
    }

    /**
     * Attempts to acquire an exclusive lock on the directory.
     *
     * @return A lock object representing the newly-acquired lock or
     * <code>null</code> if directory is already locked.
     * @throws IOException if locking fails.
     */
    @SuppressWarnings("resource")
    private FileLock tryLock(File dir) throws IOException {
        File lockF = new File(dir, FILE_LOCK);
        lockF.deleteOnExit();
        RandomAccessFile file = new RandomAccessFile(lockF, "rws");
        FileLock res = null;
        try {
            res = file.getChannel().tryLock();
        } catch (OverlappingFileLockException oe) {
            file.close();
            return null;
        } catch (IOException e) {
            LOGGER.error("Cannot create lock on " + lockF, e);
            file.close();
            throw e;
        }
        return res;
    }

    /**
     * Unlock directory.
     *
     * @throws IOException
     */
    private void unlock(File dir) throws IOException {
        FileLock lock = locks.remove(dir.getAbsolutePath());
        if (lock == null) {
            return;
        }
        lock.release();
        lock.channel().close();
        lock = null;
    }

    static class BackgroundWorker implements Runnable {
        private static final Logger LOG = LoggerFactory.getLogger(BackgroundWorker.class);
        private final Log log;

        public BackgroundWorker(Log log) {
            this.log = log;
        }

        @Override
        public void run() {
            try {
                if (log.open) {
                    log.writeCheckpoint();
                }
            } catch (IOException e) {
                LOG.error("Error doing checkpoint", e);
            } catch (Throwable e) {
                LOG.error("General error in checkpoint worker", e);
            }
        }
    }
}