com.arpnetworking.metrics.common.tailer.StatefulTailer.java Source code

Java tutorial

Introduction

Here is the source code for com.arpnetworking.metrics.common.tailer.StatefulTailer.java

Source

/**
 * Copyright 2014 Groupon.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.arpnetworking.metrics.common.tailer;

import com.arpnetworking.commons.builder.OvalBuilder;
import com.arpnetworking.logback.annotations.LogValue;
import com.arpnetworking.steno.LogValueMapFactory;
import com.arpnetworking.steno.Logger;
import com.arpnetworking.steno.LoggerFactory;
import com.arpnetworking.utility.TimerTrigger;
import com.arpnetworking.utility.Trigger;
import com.google.common.base.MoreObjects;
import com.google.common.base.Throwables;
import net.sf.oval.constraint.NotNull;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.io.IOUtils;
import org.joda.time.Duration;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.SeekableByteChannel;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.nio.file.attribute.BasicFileAttributes;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Objects;
import java.util.Optional;

/**
 * A reimplementation of the Apache Commons IO tailer based on the 2.5 snapshot
 * version. This version attempts to address several shortcomings of the Apache
 * Commons implementation. In particular, more robust support for rename-
 * recreate file rotations and some progress for copy-truncate cases. The major
 * new feature is the <code>PositionStore</code> which is used to checkpoint
 * the offset in the tailed file as identified by a hash of the file prefix.
 *
 * @author Brandon Arp (brandonarp at gmail dot com)
 * @author Ville Koskela (ville dot koskela at inscopemetrics dot com)
 */
public final class StatefulTailer implements Tailer {

    /**
     * {@inheritDoc}
     */
    @Override
    public void stop() {
        _isRunning = false;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void run() {
        Thread.currentThread().setUncaughtExceptionHandler((thread, throwable) -> LOGGER.error()
                .setMessage("Unhandled exception").setThrowable(throwable).log());

        try {
            fileLoop();
        } finally {
            IOUtils.closeQuietly(_positionStore);
            IOUtils.closeQuietly(_lineBuffer);
        }
    }

    /**
     * Generate a Steno log compatible representation.
     *
     * @return Steno log compatible representation.
     */
    @LogValue
    public Object toLogValue() {
        return LogValueMapFactory.<String, Object>builder().put("file", _file).put("positionStore", _positionStore)
                .put("listener", _listener).put("isRunning", _isRunning).put("trigger", _trigger).build();
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public String toString() {
        return toLogValue().toString();
    }

    /**
     * Determine if the <code>Tailer</code> is running.
     *
     * @return <code>True</code> if and only if the <code>Tailer</code> is running.
     */
    protected boolean isRunning() {
        return _isRunning;
    }

    private void fileLoop() {
        SeekableByteChannel reader = null;
        InitialPosition nextInitialPosition = _initialPosition;
        try {
            while (isRunning()) {
                // Attempt to open the file
                try {
                    reader = Files.newByteChannel(_file, StandardOpenOption.READ);
                    LOGGER.trace().setMessage("Opened file").addData("file", _file).log();
                } catch (final NoSuchFileException e) {
                    _listener.fileNotFound();
                    _trigger.waitOnTrigger();
                }

                if (reader != null) {
                    // Position the reader
                    resume(reader, nextInitialPosition);
                    _listener.fileOpened();

                    // Any subsequent file opens we should start at the beginning
                    nextInitialPosition = InitialPosition.START;

                    // Read the file
                    readLoop(reader);

                    // Reset per file state
                    IOUtils.closeQuietly(reader);
                    reader = null;
                    _hash = Optional.empty();
                }
            }
            // Clients may elect to kill the stateful tailer on an exception by calling stop, or they
            // may log the exception and continue. In the latter case it is strongly recommended that
            // clients pause before continuing; otherwise, if the error persists the stateful tailer
            // may create non-trivial load on the io subsystem.
            // NOTE: Any non-exception throwable will kill the stateful tailer.
        } catch (final InterruptedException e) {
            Thread.currentThread().interrupt();
            handleThrowable(e);
            // CHECKSTYLE.OFF: IllegalCatch - Allow clients to decide how to handle exceptions
        } catch (final Exception e) {
            // CHECKSTYLE.ON: IllegalCatch
            handleThrowable(e);
        } finally {
            IOUtils.closeQuietly(reader);
            reader = null;
            _hash = Optional.empty();
        }
    }

    private void resume(final SeekableByteChannel reader, final InitialPosition initialPosition)
            throws IOException {
        // Attempt to resume from checkpoint
        long position = initialPosition.get(reader);

        // Override position with last known position from store
        _hash = computeHash(reader, REQUIRED_BYTES_FOR_HASH);
        if (_hash.isPresent()) {
            final Optional<Long> storedPosition = _positionStore.getPosition(_hash.get());
            if (storedPosition.isPresent()) {
                // Optionally limit the size of the backlog to process
                final long fileSize = reader.size();
                if (_maximumOffsetOnResume.isPresent()
                        && fileSize - storedPosition.get() > _maximumOffsetOnResume.get()) {
                    position = fileSize - _maximumOffsetOnResume.get();
                    // TODO(vkoskela): Discard the current potentially partial line [AINT-584]
                } else {
                    position = storedPosition.get();
                }
            }
        }

        LOGGER.info().setMessage("Starting tailer").addData("file", _file).addData("position", position).log();

        reader.position(position);
    }

    // CHECKSTYLE.OFF: MethodLength - Nothing to refactor here.
    private void readLoop(final SeekableByteChannel reader) throws IOException, InterruptedException {
        Optional<Long> lastChecked = Optional.empty();
        Optional<String> currentReaderPrefixHash = Optional.empty();
        int currentReaderPrefixHashLength = 0;
        while (isRunning()) {
            // Obtain properties of file we expect we are reading
            final Attributes attributes;
            try {
                attributes = getAttributes(_file, lastChecked);
            } catch (final NoSuchFileException t) {
                rotate(Optional.of(reader),
                        String.format("File rotation detected based attributes access failure; file=%s", _file));

                // Return to the file loop
                return;
            }

            if (attributes.getLength() < reader.position()) {
                // File was rotated; either:
                // 1) Position is past the length of the file
                // 2) The expected file is smaller than the current file
                rotate(Optional.of(reader), String.format(
                        "File rotation detected based on length, position and size; file=%s, length=%d, position=%d, size=%d",
                        _file, attributes.getLength(), reader.position(), reader.size()));

                // Return to the file loop
                return;

            } else {
                // File was _likely_ not rotated
                if (reader.size() > reader.position()) {
                    // There is more data in the file
                    if (!readLines(reader)) {
                        // There actually isn't any more data in the file; this
                        // means the file was rotated and the new file has more
                        // data than the old file (e.g. rotation from empty).

                        // TODO(vkoskela): Account for missing final newline. [MAI-322]
                        // There is a degenerate case where the last line in a
                        // file does not have a newline. Then readLines will
                        // always find new data, but the file has been rotated
                        // away. We should buffer the contents of partial lines
                        // thereby detecting when the length grows whether we
                        // actually got more data in the current file.

                        rotate(Optional.<SeekableByteChannel>empty(), String.format(
                                "File rotation detected based on length and no new data; file=%s, length=%d, position=%d",
                                _file, attributes.getLength(), reader.position()));

                        // Return to the file loop
                        return;
                    }
                    lastChecked = Optional.of(attributes.getLastModifiedTime());

                    // This control path, specifically, successfully reading
                    // data from the file does not trigger a wait. This permits
                    // continuous reading without pausing.

                } else if (attributes.isNewer()) {
                    // The file does not contain any additional data, but its
                    // last modified date is after the last read date. The file
                    // must have rotated and contains the same length of
                    // content. This can happen on periodic systems which log
                    // the same data at the beginning of each period.

                    rotate(Optional.<SeekableByteChannel>empty(), String.format(
                            "File rotation detected based equal length and position but newer"
                                    + "; file=%s, length=%d, position=%d, lastChecked=%s, attributes=%s",
                            _file, attributes.getLength(), reader.position(), lastChecked.get(), attributes));

                    // Return to the file loop
                    return;

                } else {
                    // The files are the same size and the timestamps are the
                    // same. This is more common than it sounds since file
                    // modification timestamps are not very precise on many
                    // file systems.
                    //
                    // Since we're not doing anything at this point let's hash
                    // the first N bytes of the current file and the expected
                    // file to see if we're still working on the same file.

                    final Optional<Boolean> hashesSame = compareByHash(currentReaderPrefixHash,
                            currentReaderPrefixHashLength);
                    if (hashesSame.isPresent() && !hashesSame.get()) {
                        // The file rotated with the same length!
                        rotate(Optional.<SeekableByteChannel>empty(),
                                String.format("File rotation detected based on hash; file=%s", _file));

                        // Return to the file loop
                        return;
                    }
                    // else: the files are empty or the hashes are the same. In
                    // either case we don't have enough data to determine if
                    // the files are different; we'll need to wait and see when
                    // more data is written if the size and age diverge.

                    // TODO(vkoskela): Configurable maximum rotation hash size. [MAI-323]
                    // TODO(vkoskela): Configurable minimum rotation hash size. [MAI-324]
                    // TODO(vkoskela): Configurable identity hash size. [MAI-325]
                    // TODO(vkoskela): We should add a rehash interval. [MAI-326]
                    // This interval would be separate from the read interval,
                    // and generally longer, preventing us from rehashing the
                    // file every interval; but short enough that we don't wait
                    // too long before realizing a slowly growing file was
                    // rotated.

                    // Read interval
                    _trigger.waitOnTrigger();
                }
            }

            // Compute the prefix hash unless we have an identity
            final int newPrefixHashLength = (int) Math.min(reader.size(), REQUIRED_BYTES_FOR_HASH);
            if (!_hash.isPresent() && (currentReaderPrefixHashLength != newPrefixHashLength
                    || !currentReaderPrefixHash.isPresent())) {
                currentReaderPrefixHashLength = newPrefixHashLength;
                currentReaderPrefixHash = computeHash(reader, currentReaderPrefixHashLength);
            }

            // Update the reader position
            updateCheckpoint(reader.position());
        }
    }
    // CHECKSTYLE.ON: MethodLength

    private Attributes getAttributes(final Path file, final Optional<Long> lastChecked) throws IOException {
        final BasicFileAttributes attributes = Files.readAttributes(file, BasicFileAttributes.class);
        LOGGER.trace().setMessage("File attributes").addData("file", file)
                .addData("lastModifiedTime", attributes.lastModifiedTime().toMillis())
                .addData("size", attributes.size()).log();

        return new Attributes(attributes.size(), attributes.lastModifiedTime().toMillis(),
                lastChecked.isPresent() && attributes.lastModifiedTime().toMillis() > lastChecked.get());
    }

    private void rotate(final Optional<SeekableByteChannel> reader, final String reason)
            throws InterruptedException, IOException {
        // Allow a full read interval before calling it quits on the old file
        if (reader.isPresent()) {
            _trigger.waitOnTrigger();
            readLines(reader.get());
        }

        // Inform the listener
        _listener.fileRotated();

        LOGGER.info(reason);
    }

    private boolean readLines(final SeekableByteChannel reader) throws IOException {
        // Compute the hash if not already set
        if (!_hash.isPresent() && reader.size() >= REQUIRED_BYTES_FOR_HASH) {
            _hash = computeHash(reader, REQUIRED_BYTES_FOR_HASH);
        }

        // Track current position in file and next read position
        // NOTE: The next read position is always the beginning of a line
        long position = reader.position();
        long nextReadPosition = position;

        // Reset buffers
        _buffer.clear();
        _lineBuffer.reset();

        // Process available data
        int bufferSize = reader.read(_buffer);
        boolean hasData = false;
        boolean hasCR = false;
        while (isRunning() && bufferSize != -1) {
            hasData = true;
            for (int i = 0; i < bufferSize; i++) {
                final byte ch = _buffer.get(i);
                switch (ch) {
                case '\n':
                    hasCR = false;
                    handleLine();
                    nextReadPosition = position + i + 1;
                    updateCheckpoint(nextReadPosition);
                    break;
                case '\r':
                    if (hasCR) {
                        _lineBuffer.write('\r');
                    }
                    hasCR = true;
                    break;
                default:
                    if (hasCR) {
                        hasCR = false;
                        handleLine();
                        nextReadPosition = position + i + 1;
                        updateCheckpoint(nextReadPosition);
                    }
                    _lineBuffer.write(ch);
                }
            }
            position = reader.position();
            _buffer.clear();
            bufferSize = reader.read(_buffer);
        }

        reader.position(nextReadPosition);
        return hasData;
    }

    private Optional<Boolean> compareByHash(final Optional<String> prefixHash, final int prefixLength) {
        final int appliedLength;
        if (_hash.isPresent()) {
            appliedLength = REQUIRED_BYTES_FOR_HASH;
        } else {
            appliedLength = prefixLength;
        }
        try (final SeekableByteChannel reader = Files.newByteChannel(_file, StandardOpenOption.READ)) {
            final Optional<String> filePrefixHash = computeHash(reader, appliedLength);

            LOGGER.trace().setMessage("Comparing hashes").addData("hash1", prefixHash)
                    .addData("filePrefixHash", filePrefixHash).addData("size", appliedLength).log();

            return Optional.of(Objects.equals(_hash.orElse(prefixHash.orElse(null)), filePrefixHash.orElse(null)));
        } catch (final IOException e) {
            return Optional.empty();
        }
    }

    private Optional<String> computeHash(final SeekableByteChannel reader, final int hashSize) throws IOException {
        // Don't hash empty data sets
        if (hashSize <= 0) {
            return Optional.empty();
        }

        // Validate sufficient data to compute the hash
        final long oldPosition = reader.position();
        reader.position(0);
        if (reader.size() < hashSize) {
            reader.position(oldPosition);
            LOGGER.trace().setMessage("Reader size insufficient to compute hash").addData("hashSize", hashSize)
                    .addData("readerSize", reader.size()).log();
            return Optional.empty();
        }

        // Read the data to hash
        final ByteBuffer buffer = ByteBuffer.allocate(hashSize);
        int totalBytesRead = 0;
        while (totalBytesRead < hashSize) {
            final int bytesRead = reader.read(buffer);
            if (bytesRead < 0) {
                LOGGER.warn().setMessage("Unexpected end of file reached").addData("totalBytesRead", totalBytesRead)
                        .log();
                return Optional.empty();
            }
            totalBytesRead += bytesRead;
        }

        // Compute the hash
        _md5.reset();
        final byte[] digest = _md5.digest(buffer.array());
        final String hash = Hex.encodeHexString(digest);
        LOGGER.trace().setMessage("Computed hash").addData("hash", hash).log();

        // Return the reader to its original state
        reader.position(oldPosition);
        return Optional.of(hash);
    }

    private void updateCheckpoint(final long position) {
        if (_hash.isPresent()) {
            _positionStore.setPosition(_hash.get(), position);
        }
    }

    private void handleLine() {
        _listener.handle(_lineBuffer.toByteArray());
        _lineBuffer.reset();
    }

    private void handleThrowable(final Throwable t) {
        _listener.handle(t);
    }

    // NOTE: Package private for testing

    /* package private */ StatefulTailer(final Builder builder, final Trigger trigger) {
        _file = builder._file;
        _positionStore = builder._positionStore;
        _listener = builder._listener;
        _trigger = trigger;

        _buffer = ByteBuffer.allocate(INITIAL_BUFFER_SIZE);
        _lineBuffer = new ByteArrayOutputStream(INITIAL_BUFFER_SIZE);
        try {
            _md5 = MessageDigest.getInstance("MD5");
        } catch (final NoSuchAlgorithmException e) {
            throw Throwables.propagate(e);
        }

        _initialPosition = builder._initialPosition;
        _maximumOffsetOnResume = Optional.ofNullable(builder._maximumOffsetOnResume);
        _listener.initialize(this);
    }

    private StatefulTailer(final Builder builder) {
        // TODO(vkoskela): Configurable grace period separate from interval. [MAI-327]
        this(builder, new TimerTrigger(builder._readInterval));
    }

    private final Path _file;
    private final PositionStore _positionStore;
    private final TailerListener _listener;
    private final ByteBuffer _buffer;
    private final ByteArrayOutputStream _lineBuffer;
    private final MessageDigest _md5;
    private final InitialPosition _initialPosition;
    private final Optional<Long> _maximumOffsetOnResume;
    private final Trigger _trigger;

    private volatile boolean _isRunning = true;
    private Optional<String> _hash = Optional.empty();

    private static final int REQUIRED_BYTES_FOR_HASH = 512;
    private static final int INITIAL_BUFFER_SIZE = 65536;
    private static final Logger LOGGER = LoggerFactory.getLogger(StatefulTailer.class);

    private static final class Attributes {

        private Attributes(final long length, final long lastModifiedTime, final boolean newer) {
            _length = length;
            _lastModifiedTime = lastModifiedTime;
            _newer = newer;
        }

        public long getLength() {
            return _length;
        }

        public long getLastModifiedTime() {
            return _lastModifiedTime;
        }

        public boolean isNewer() {
            return _newer;
        }

        @Override
        public String toString() {
            return MoreObjects.toStringHelper(this).add("id", Integer.toHexString(System.identityHashCode(this)))
                    .add("Length", _length).add("LastModifiedTime", _lastModifiedTime).add("Newer", _newer)
                    .toString();
        }

        private final long _length;
        private final long _lastModifiedTime;
        private final boolean _newer;
    }

    /**
     * Implementation of builder pattern for <code>StatefulTailer</code>.
     *
     * @author Brandon Arp (brandonarp at gmail dot com)
     */
    public static class Builder extends OvalBuilder<StatefulTailer> {

        /**
         * Public constructor.
         */
        public Builder() {
            super((java.util.function.Function<Builder, StatefulTailer>) StatefulTailer::new);
        }

        /**
         * Sets the file to read. Cannot be null or empty.
         *
         * @param value The file to read.
         * @return This instance of {@link Builder}
         */
        public Builder setFile(final Path value) {
            _file = value;
            return this;
        }

        /**
         * Sets the <code>PositionStore</code> to be used to checkpoint the
         * file read position. Cannot be null.
         *
         * @param value The <code>PositionStore</code> instance.
         * @return This instance of {@link Builder}
         */
        public Builder setPositionStore(final PositionStore value) {
            _positionStore = value;
            return this;
        }

        /**
         * Sets the <code>TailerListener</code> instance. Cannot be null.
         *
         * @param value The <code>TailerListener</code> instance.
         * @return This instance of {@link Builder}
         */
        public Builder setListener(final TailerListener value) {
            _listener = value;
            return this;
        }

        /**
         * Sets the interval between file reads. Optional. Default is 500
         * milliseconds.
         *
         * @param value The file read interval.
         * @return This instance of {@link Builder}
         */
        public Builder setReadInterval(final Duration value) {
            _readInterval = value;
            return this;
        }

        /**
         * Sets the tailer to start at the current end of the file.
         *
         * @param initialPosition The initial position of the tailer
         * @return This instance of {@link Builder}
         */
        public Builder setInitialPosition(final InitialPosition initialPosition) {
            _initialPosition = initialPosition;
            return this;
        }

        /**
         * Sets the maximum offset on resume. Optional. Default is no maximum.
         *
         * @param maximumOffsetOnResume The maximum offset on resume.
         * @return This instance of {@link Builder}
         */
        public Builder setMaximumOffsetOnResume(final Long maximumOffsetOnResume) {
            _maximumOffsetOnResume = maximumOffsetOnResume;
            return this;
        }

        @NotNull
        private Path _file;
        @NotNull
        private PositionStore _positionStore;
        @NotNull
        private TailerListener _listener;
        @NotNull
        private Duration _readInterval = Duration.millis(250);
        @NotNull
        private InitialPosition _initialPosition = InitialPosition.START;
        private Long _maximumOffsetOnResume = null;
    }
}