org.apache.gobblin.writer.SequentialBasedBatchAccumulator.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.gobblin.writer.SequentialBasedBatchAccumulator.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.gobblin.writer;

import java.util.LinkedList;
import java.util.ArrayList;
import java.util.Deque;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.Future;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.util.concurrent.Futures;
import com.typesafe.config.Config;

import org.apache.gobblin.util.ConfigUtils;

/**
 * Sequential and TTL based accumulator
 * A producer can add a record to this accumulator. It generates a batch on the first record arrival. All subsequent records
 * are added to the same batch until a batch size limit is reached. {@link BufferedAsyncDataWriter} keeps iterating available
 * batches from this accumulator, all completed batches (full sized) will be popped out one by one but an incomplete batch
 * keeps in the deque until a TTL is expired.
 */

public class SequentialBasedBatchAccumulator<D> extends BatchAccumulator<D> {

    private static final LargeMessagePolicy DEFAULT_LARGE_MESSAGE_POLICY = LargeMessagePolicy.FAIL;
    private Deque<BytesBoundedBatch<D>> dq = new LinkedList<>();
    private IncompleteRecordBatches incomplete = new IncompleteRecordBatches();
    private final long batchSizeLimit;
    private final long memSizeLimit;
    private final double tolerance = 0.95;
    private final long expireInMilliSecond;
    private final LargeMessagePolicy largeMessagePolicy;
    private static final Logger LOG = LoggerFactory.getLogger(SequentialBasedBatchAccumulator.class);

    private final ReentrantLock dqLock = new ReentrantLock();
    private final Condition notEmpty = dqLock.newCondition();
    private final Condition notFull = dqLock.newCondition();
    private final long capacity;

    public SequentialBasedBatchAccumulator() {
        this(1024 * 256, 1000, 100);
    }

    public SequentialBasedBatchAccumulator(Properties properties) {
        this(ConfigUtils.propertiesToConfig(properties));
    }

    public SequentialBasedBatchAccumulator(Config config) {
        this(ConfigUtils.getLong(config, Batch.BATCH_SIZE, Batch.BATCH_SIZE_DEFAULT),
                ConfigUtils.getLong(config, Batch.BATCH_TTL, Batch.BATCH_TTL_DEFAULT),
                ConfigUtils.getLong(config, Batch.BATCH_QUEUE_CAPACITY, Batch.BATCH_QUEUE_CAPACITY_DEFAULT));
    }

    public SequentialBasedBatchAccumulator(long batchSizeLimit, long expireInMilliSecond, long capacity) {
        this(batchSizeLimit, expireInMilliSecond, capacity, DEFAULT_LARGE_MESSAGE_POLICY);
    }

    public SequentialBasedBatchAccumulator(long batchSizeLimit, long expireInMilliSecond, long capacity,
            LargeMessagePolicy largeMessagePolicy) {
        this.batchSizeLimit = batchSizeLimit;
        this.expireInMilliSecond = expireInMilliSecond;
        this.capacity = capacity;
        this.memSizeLimit = (long) (this.tolerance * this.batchSizeLimit);
        this.largeMessagePolicy = largeMessagePolicy;
    }

    public long getNumOfBatches() {
        this.dqLock.lock();
        try {
            return this.dq.size();
        } finally {
            this.dqLock.unlock();
        }
    }

    /**
     * Add a data to internal deque data structure
     */
    public final Future<RecordMetadata> enqueue(D record, WriteCallback callback) throws InterruptedException {
        final ReentrantLock lock = this.dqLock;
        lock.lock();
        try {
            BytesBoundedBatch last = dq.peekLast();
            if (last != null) {
                Future<RecordMetadata> future = null;
                try {
                    future = last.tryAppend(record, callback, this.largeMessagePolicy);
                } catch (RecordTooLargeException e) {
                    // Ok if the record was too large for the current batch
                }
                if (future != null) {
                    return future;
                }
            }

            // Create a new batch because previous one has no space
            BytesBoundedBatch batch = new BytesBoundedBatch(this.memSizeLimit, this.expireInMilliSecond);
            LOG.debug("Batch " + batch.getId() + " is generated");
            Future<RecordMetadata> future = null;
            try {
                future = batch.tryAppend(record, callback, this.largeMessagePolicy);
            } catch (RecordTooLargeException e) {
                // If a new batch also wasn't able to accomodate the new message
                throw new RuntimeException("Failed due to a message that was too large", e);
            }

            // The future might be null, since the largeMessagePolicy might be set to DROP
            if (future == null) {
                assert largeMessagePolicy.equals(LargeMessagePolicy.DROP);
                LOG.error("Batch " + batch.getId() + " is silently marked as complete, dropping a huge record: "
                        + record);
                future = Futures.immediateFuture(new RecordMetadata(0));
                callback.onSuccess(WriteResponse.EMPTY);
                return future;
            }

            // if queue is full, we should not add more
            while (dq.size() >= this.capacity) {
                LOG.debug("Accumulator size {} is greater than capacity {}, waiting", dq.size(), this.capacity);
                this.notFull.await();
            }
            dq.addLast(batch);
            incomplete.add(batch);
            this.notEmpty.signal();
            return future;

        } finally {
            lock.unlock();
        }
    }

    /**
     * A threadsafe helper class to hold RecordBatches that haven't been ack'd yet
     * This is mainly used for flush operation so that all the batches waiting in
     * the incomplete set will be blocked
     */
    private final static class IncompleteRecordBatches {
        private final Set<Batch> incomplete;

        public IncompleteRecordBatches() {
            this.incomplete = new HashSet<>();
        }

        public void add(Batch batch) {
            synchronized (incomplete) {
                this.incomplete.add(batch);
            }
        }

        public void remove(Batch batch) {
            synchronized (incomplete) {
                boolean removed = this.incomplete.remove(batch);
                if (!removed)
                    throw new IllegalStateException(
                            "Remove from the incomplete set failed. This should be impossible.");
            }
        }

        public ArrayList<Batch> all() {
            synchronized (incomplete) {
                return new ArrayList(this.incomplete);
            }
        }
    }

    /**
     * If accumulator has been closed,  below actions are performed:
     *    1) remove and return the first batch if available.
     *    2) return null if queue is empty.
     * If accumulator has not been closed, below actions are performed:
     *    1) if queue.size == 0, block current thread until more batches are available or accumulator is closed.
     *    2) if queue size == 1, remove and return the first batch if TTL has expired, else return null.
     *    3) if queue size > 1, remove and return the first batch element.
     */
    public Batch<D> getNextAvailableBatch() {
        final ReentrantLock lock = SequentialBasedBatchAccumulator.this.dqLock;
        try {
            lock.lock();
            if (SequentialBasedBatchAccumulator.this.isClosed()) {
                return dq.poll();
            } else {
                while (dq.size() == 0) {
                    LOG.debug("ready to sleep because of queue is empty");
                    SequentialBasedBatchAccumulator.this.notEmpty.await();
                    if (SequentialBasedBatchAccumulator.this.isClosed()) {
                        return dq.poll();
                    }
                }

                if (dq.size() > 1) {
                    BytesBoundedBatch candidate = dq.poll();
                    SequentialBasedBatchAccumulator.this.notFull.signal();
                    LOG.debug("retrieve batch " + candidate.getId());
                    return candidate;
                }

                if (dq.size() == 1) {
                    if (dq.peekFirst().isTTLExpire()) {
                        LOG.debug("Batch " + dq.peekFirst().getId() + " is expired");
                        BytesBoundedBatch candidate = dq.poll();
                        SequentialBasedBatchAccumulator.this.notFull.signal();
                        return candidate;
                    } else {
                        return null;
                    }
                } else {
                    throw new RuntimeException("Should never get to here");
                }
            }

        } catch (InterruptedException e) {
            LOG.error("Wait for next batch is interrupted. " + e.toString());
        } finally {
            lock.unlock();
        }

        return null;
    }

    public void close() {
        super.close();
        this.dqLock.lock();
        try {
            this.notEmpty.signal();
        } finally {
            this.dqLock.unlock();
        }
    }

    /**
     * This will block until all the incomplete batches are acknowledged
     */
    public void flush() {
        try {
            ArrayList<Batch> batches = this.incomplete.all();
            int numOutstandingRecords = 0;
            for (Batch batch : batches) {
                numOutstandingRecords += batch.getRecords().size();
            }
            LOG.debug("Flush called on {} batches with {} records total", batches.size(), numOutstandingRecords);
            for (Batch batch : batches) {
                batch.await();
            }
        } catch (Exception e) {
            LOG.error("Error happened while flushing batches");
        }
    }

    /**
     * Once batch is acknowledged, remove it from incomplete list
     */
    public void deallocate(Batch<D> batch) {
        this.incomplete.remove(batch);
    }
}