com.arpnetworking.tsdcore.tailer.StatefulTailer.java Source code

Java tutorial

Introduction

Here is the source code for com.arpnetworking.tsdcore.tailer.StatefulTailer.java

Source

/**
 * Copyright 2014 Groupon.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.arpnetworking.tsdcore.tailer;

import com.arpnetworking.utility.OvalBuilder;
import com.arpnetworking.utility.TimerTrigger;
import com.arpnetworking.utility.Trigger;
import com.google.common.base.Charsets;
import com.google.common.base.MoreObjects;
import com.google.common.base.Optional;
import com.google.common.base.Throwables;
import net.sf.oval.constraint.NotNull;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.io.IOUtils;
import org.joda.time.Duration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.SeekableByteChannel;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.StandardOpenOption;
import java.nio.file.attribute.BasicFileAttributes;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Objects;

/**
 * A reimplementation of the Apache Commons IO tailer based on the 2.5 snapshot
 * version. This version attempts to address several shortcomings of the Apache
 * Commons implementation. In particular, more robust support for rename-
 * recreate file rotations and some progress for copy-truncate cases. The major
 * new feature is the <code>PositionStore</code> which is used to checkpoint
 * the offset in the tailed file as identified by a hash of the file prefix.
 *
 * @author Brandon Arp (barp at groupon dot com)
 * @author Ville Koskela (vkoskela at groupon dot com)
 */
public final class StatefulTailer implements Tailer {

    /**
     * {@inheritDoc}
     */
    @Override
    public void stop() {
        _isRunning = false;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void run() {
        try {
            fileLoop();
        } catch (final InterruptedException e) {
            Thread.currentThread().interrupt();
            handleThrowable(e);
            // CHECKSTYLE.OFF: IllegalCatch - Intercept all exceptions
        } catch (final Throwable t) {
            handleThrowable(t);
            // CHECKSTYLE.ON: IllegalCatch
        } finally {
            IOUtils.closeQuietly(_positionStore);
            IOUtils.closeQuietly(_lineBuffer);
        }
    }

    /**
     * Determine if the <code>Tailer</code> is running.
     *
     * @return <code>True</code> if and only if the <code>Tailer</code> is running.
     */
    protected boolean isRunning() {
        return _isRunning;
    }

    private void fileLoop() throws IOException, InterruptedException {
        SeekableByteChannel reader = null;
        InitialPosition nextInitialPosition = _initialPosition;
        try {
            while (isRunning()) {
                // Attempt to open the file
                try {
                    reader = Files.newByteChannel(_file.toPath(), StandardOpenOption.READ);
                    LOGGER.trace(String.format("Opened file; file=%s", _file));
                } catch (final NoSuchFileException e) {
                    _listener.fileNotFound();
                    _trigger.waitOnTrigger();
                }

                if (reader != null) {
                    // Attempt to resume from checkpoint
                    long position = nextInitialPosition.get(reader);
                    // Any subsequent file opens we should start at the beginning
                    nextInitialPosition = InitialPosition.START;
                    _hash = computeHash(reader, REQUIRED_BYTES_FOR_HASH);
                    if (_hash.isPresent()) {
                        position = _positionStore.getPosition(_hash.get()).or(position).longValue();
                    }
                    LOGGER.trace(
                            String.format("Starting tail; file=%s, position=%d", _file, Long.valueOf(position)));
                    reader.position(position);

                    // Read the file
                    readLoop(reader);

                    // Reset per file state
                    IOUtils.closeQuietly(reader);
                    reader = null;
                    _hash = Optional.absent();
                }
            }
        } finally {
            IOUtils.closeQuietly(reader);
            reader = null;
            _hash = Optional.absent();
        }
    }

    private void readLoop(final SeekableByteChannel reader) throws IOException, InterruptedException {
        Optional<Long> lastChecked = Optional.absent();
        Optional<String> currentReaderPrefixHash = Optional.absent();
        int currentReaderPrefixHashLength = 0;
        while (isRunning()) {
            // Obtain properties of file we expect we are reading
            final Attributes attributes;
            try {
                attributes = getAttributes(_file, lastChecked);
            } catch (final NoSuchFileException t) {
                rotate(Optional.of(reader),
                        String.format("File rotation detected based attributes access failure; file=%s", _file));

                // Return to the file loop
                return;
            }

            if (attributes.getLength() < reader.position()) {
                // File was rotated; either:
                // 1) Position is past the length of the file
                // 2) The expected file is smaller than the current file
                rotate(Optional.of(reader), String.format(
                        "File rotation detected based on length, position and size; file=%s, length=%d, position=%d, size=%d",
                        _file, Long.valueOf(attributes.getLength()), Long.valueOf(reader.position()),
                        Long.valueOf(reader.size())));

                // Return to the file loop
                return;

            } else {
                // File was _likely_ not rotated
                if (reader.size() > reader.position()) {
                    // There is more data in the file
                    if (!readLines(reader)) {
                        // There actually isn't any more data in the file; this
                        // means the file was rotated and the new file has more
                        // data than the old file (e.g. rotation from empty).

                        // TODO(vkoskela): Account for missing final newline. [MAI-322]
                        // There is a degenerate case where the last line in a
                        // file does not have a newline. Then readLines will
                        // always find new data, but the file has been rotated
                        // away. We should buffer the contents of partial lines
                        // thereby detecting when the length grows whether we
                        // actually got more data in the current file.

                        rotate(Optional.<SeekableByteChannel>absent(), String.format(
                                "File rotation detected based on length and no new data; file=%s, length=%d, position=%d",
                                _file, Long.valueOf(attributes.getLength()), Long.valueOf(reader.position())));

                        // Return to the file loop
                        return;
                    }
                    lastChecked = Optional.of(Long.valueOf(_file.lastModified()));

                } else if (attributes.isNewer()) {
                    // The file does not contain any additional data, but its
                    // last modified date is after the last read date. The file
                    // must have rotated and contains the same length of
                    // content. This can happen on periodic systems which log
                    // the same data at the beginning of each period.

                    rotate(Optional.<SeekableByteChannel>absent(),
                            String.format(
                                    "File rotation detected based equal length and position but newer"
                                            + "; file=%s, length=%d, position=%d, lastChecked=%s, attributes=%s",
                                    _file, Long.valueOf(attributes.getLength()), Long.valueOf(reader.position()),
                                    lastChecked.get(), attributes));

                    // Return to the file loop
                    return;

                } else {
                    // The files are the same size and the timestamps are the
                    // same. This is more common than it sounds since file
                    // modification timestamps are not very precise on many
                    // file systems.
                    //
                    // Since we're not doing anything at this point let's hash
                    // the first N bytes of the current file and the expected
                    // file to see if we're still working on the same file.

                    final Optional<Boolean> hashesSame = compareByHash(currentReaderPrefixHash,
                            currentReaderPrefixHashLength);
                    if (hashesSame.isPresent() && !hashesSame.get().booleanValue()) {
                        // The file rotated with the same length!
                        rotate(Optional.<SeekableByteChannel>absent(),
                                String.format("File rotation detected based on hash; file=%s", _file));

                        // Return to the file loop
                        return;
                    }
                    // else: the files are empty or the hashes are the same. In
                    // either case we don't have enough data to determine if
                    // the files are different; we'll need to wait and see when
                    // more data is written if the size and length diverge.

                    // TODO(vkoskela): Configurable maximum rotation hash size. [MAI-323]
                    // TODO(vkoskela): Configurable minimum rotation hash size. [MAI-324]
                    // TODO(vkoskela): Configurable identity hash size. [MAI-325]
                    // TODO(vkoskela): We should add a rehash interval. [MAI-326]
                    // This interval would be separate from the read interval,
                    // and generally longer, preventing us from rehashing the
                    // file every interval; but short enough that we don't wait
                    // too long before realizing a slowly growing file was
                    // rotated.
                }
            }

            // Compute the prefix hash unless we have an identity
            if (!_hash.isPresent()) {
                currentReaderPrefixHashLength = (int) Math.min(reader.size(), REQUIRED_BYTES_FOR_HASH);
                currentReaderPrefixHash = computeHash(reader, currentReaderPrefixHashLength);
            }

            // Read interval
            _trigger.waitOnTrigger();

            // Update the reader position
            updateCheckpoint(reader.position());
        }
    }

    private Attributes getAttributes(final File file, final Optional<Long> lastChecked) throws IOException {
        final BasicFileAttributes attributes = Files.readAttributes(file.toPath(), BasicFileAttributes.class);
        LOGGER.trace(String.format("File attributes; file=%s, lastModifiedTime=%d, size=%d", file,
                Long.valueOf(attributes.lastModifiedTime().toMillis()), Long.valueOf(attributes.size())));

        return new Attributes(attributes.size(), attributes.lastModifiedTime().toMillis(), lastChecked.isPresent()
                && attributes.lastModifiedTime().toMillis() > lastChecked.get().longValue());
    }

    private void rotate(final Optional<SeekableByteChannel> reader, final String reason)
            throws InterruptedException, IOException {
        // Allow a full read interval before calling it quits on the old file
        if (reader.isPresent()) {
            _trigger.waitOnTrigger();
            readLines(reader.get());
        }

        // Inform the listener
        _listener.fileRotated();

        LOGGER.trace(reason);
    }

    private boolean readLines(final SeekableByteChannel reader) throws IOException {
        // Compute the hash if not already set
        if (!_hash.isPresent() && reader.size() >= REQUIRED_BYTES_FOR_HASH) {
            _hash = computeHash(reader, REQUIRED_BYTES_FOR_HASH);
        }

        // Track current position in file and next read position
        // NOTE: The next read position is always the beginning of a line
        long position = reader.position();
        long nextReadPosition = position;

        // Reset buffers
        _buffer.clear();
        _lineBuffer.reset();

        // Process available data
        int bufferSize = reader.read(_buffer);
        boolean hasData = false;
        boolean hasCR = false;
        while (isRunning() && bufferSize != -1) {
            hasData = true;
            for (int i = 0; i < bufferSize; i++) {
                final byte ch = _buffer.get(i);
                switch (ch) {
                case '\n':
                    hasCR = false;
                    handleLine();
                    nextReadPosition = position + i + 1;
                    updateCheckpoint(nextReadPosition);
                    break;
                case '\r':
                    if (hasCR) {
                        _lineBuffer.write('\r');
                    }
                    hasCR = true;
                    break;
                default:
                    if (hasCR) {
                        hasCR = false;
                        handleLine();
                        nextReadPosition = position + i + 1;
                        updateCheckpoint(nextReadPosition);
                    }
                    _lineBuffer.write(ch);
                }
            }
            position = reader.position();
            _buffer.clear();
            bufferSize = reader.read(_buffer);
        }

        reader.position(nextReadPosition);
        return hasData;
    }

    private Optional<Boolean> compareByHash(final Optional<String> prefixHash, final int prefixLength) {
        final int appliedLength;
        if (_hash.isPresent()) {
            appliedLength = REQUIRED_BYTES_FOR_HASH;
        } else {
            appliedLength = prefixLength;
        }
        try (final SeekableByteChannel reader = Files.newByteChannel(_file.toPath(), StandardOpenOption.READ)) {
            final Optional<String> filePrefixHash = computeHash(reader, appliedLength);

            LOGGER.trace(String.format("Comparing hashes; hash1=%s, hash2=%s, size=%d", prefixHash, filePrefixHash,
                    Integer.valueOf(appliedLength)));

            return Optional
                    .of(Boolean.valueOf(Objects.equals(_hash.or(prefixHash).orNull(), filePrefixHash.orNull())));
        } catch (final IOException e) {
            return Optional.absent();
        }
    }

    private Optional<String> computeHash(final SeekableByteChannel reader, final int hashSize) throws IOException {
        // Don't hash empty data sets
        if (hashSize <= 0) {
            return Optional.absent();
        }

        // Validate sufficient data to compute the hash
        final long oldPosition = reader.position();
        reader.position(0);
        if (reader.size() < hashSize) {
            reader.position(oldPosition);
            LOGGER.trace(String.format("Reader size insufficient to compute hash; hashSize=%s, hashSize=%d",
                    Integer.valueOf(hashSize), Long.valueOf(reader.size())));
            return Optional.absent();
        }

        // Read the data to hash
        final ByteBuffer buffer = ByteBuffer.allocate(hashSize);
        int totalBytesRead = 0;
        while (totalBytesRead < hashSize) {
            final int bytesRead = reader.read(buffer);
            if (bytesRead < 0) {
                LOGGER.warn(String.format("Unexpected end of file reached; totalBytesRead=%d",
                        Long.valueOf(totalBytesRead)));
                return Optional.absent();
            }
            totalBytesRead += bytesRead;
        }

        // Compute the hash
        _md5.reset();
        final byte[] digest = _md5.digest(buffer.array());
        final String hash = Hex.encodeHexString(digest);
        LOGGER.trace(String.format("Computed hash; hash=%s, bytes=%s", hash, Hex.encodeHexString(buffer.array())));

        // Return the reader to its original state
        reader.position(oldPosition);
        return Optional.of(hash);
    }

    private void updateCheckpoint(final long position) {
        if (_hash.isPresent()) {
            _positionStore.setPosition(_hash.get(), position);
        }
    }

    private void handleLine() {
        //CHECKSTYLE.OFF: IllegalInstantiation - This is how you convert a byte[] to String.
        LOGGER.trace("handleLine: " + new String(_lineBuffer.toByteArray(), _characterSet));
        _listener.handle(new String(_lineBuffer.toByteArray(), _characterSet));
        _lineBuffer.reset();
        //CHECKSTYLE.ON: IllegalInstantiation
    }

    private void handleThrowable(final Throwable t) {
        _listener.handle(t);
        stop();
    }

    // NOTE: Package private for testing

    /* package private */ StatefulTailer(final Builder builder, final Trigger trigger) {
        _file = builder._file;
        _positionStore = builder._positionStore;
        _listener = builder._listener;
        _trigger = trigger;

        _buffer = ByteBuffer.allocate(INITIAL_BUFFER_SIZE);
        _lineBuffer = new ByteArrayOutputStream(INITIAL_BUFFER_SIZE);
        try {
            _md5 = MessageDigest.getInstance("MD5");
        } catch (final NoSuchAlgorithmException e) {
            throw Throwables.propagate(e);
        }

        _initialPosition = builder._initialPosition;
        _listener.initialize(this);
    }

    private StatefulTailer(final Builder builder) {
        // TODO(vkoskela): Configurable grace period separate from interval. [MAI-327]
        this(builder, new TimerTrigger(builder._readInterval));
    }

    private final File _file;
    private final PositionStore _positionStore;
    private final TailerListener _listener;
    private final ByteBuffer _buffer;
    private final ByteArrayOutputStream _lineBuffer;
    private final MessageDigest _md5;
    private final Charset _characterSet = Charsets.UTF_8;
    private final InitialPosition _initialPosition;
    private final Trigger _trigger;

    private volatile boolean _isRunning = true;
    private Optional<String> _hash = Optional.absent();

    private static final Long ZERO = Long.valueOf(0);
    private static final int REQUIRED_BYTES_FOR_HASH = 512;
    private static final int INITIAL_BUFFER_SIZE = 65536;
    private static final Logger LOGGER = LoggerFactory.getLogger(StatefulTailer.class);

    private static final class Attributes {

        public Attributes(final long length, final long lastModifiedTime, final boolean newer) {
            _length = length;
            _lastModifiedTime = lastModifiedTime;
            _newer = newer;
        }

        public long getLength() {
            return _length;
        }

        public long getLastModifiedTime() {
            return _lastModifiedTime;
        }

        public boolean isNewer() {
            return _newer;
        }

        @Override
        public String toString() {
            return MoreObjects.toStringHelper(Attributes.class).add("Length", _length)
                    .add("LastModifiedTime", _lastModifiedTime).add("Newer", _newer).toString();
        }

        private final long _length;
        private final long _lastModifiedTime;
        private final boolean _newer;
    }

    /**
     * Implementation of builder pattern for <code>StatefulTailer</code>.
     *
     * @author Brandon Arp (barp at groupon dot com)
     */
    public static class Builder extends OvalBuilder<StatefulTailer> {

        /**
         * Public constructor.
         */
        public Builder() {
            super(StatefulTailer.class);
        }

        /**
         * Sets the file to read. Cannot be null or empty.
         *
         * @param value The file to read.
         * @return This instance of {@link Builder}
         */
        public Builder setFile(final File value) {
            _file = value;
            return this;
        }

        /**
         * Sets the <code>PositionStore</code> to be used to checkpoint the
         * file read position. Cannot be null.
         *
         * @param value The <code>PositionStore</code> instance.
         * @return This instance of {@link Builder}
         */
        public Builder setPositionStore(final PositionStore value) {
            _positionStore = value;
            return this;
        }

        /**
         * Sets the <code>TailerListener</code> instance. Cannot be null.
         *
         * @param value The <code>TailerListener</code> instance.
         * @return This instance of {@link Builder}
         */
        public Builder setListener(final TailerListener value) {
            _listener = value;
            return this;
        }

        /**
         * Sets the interval between file reads. Optional. Default is 500
         * milliseconds.
         *
         * @param value The file read interval.
         * @return This instance of {@link Builder}
         */
        public Builder setReadInterval(final Duration value) {
            _readInterval = value;
            return this;
        }

        /**
         * Sets the tailer to start at the current end of the file.
         *
         * @param initialPosition The initial position of the tailer
         * @return This instance of {@link Builder}
         */
        public Builder setInitialPosition(final InitialPosition initialPosition) {
            _initialPosition = initialPosition;
            return this;
        }

        @NotNull
        private File _file;
        @NotNull
        private PositionStore _positionStore;
        @NotNull
        private TailerListener _listener;
        @NotNull
        private Duration _readInterval = Duration.millis(500);
        @NotNull
        private InitialPosition _initialPosition = InitialPosition.START;
    }
}