gobblin.writer.SequentialBasedBatchAccumulator.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.writer.SequentialBasedBatchAccumulator.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package gobblin.writer;

import java.util.LinkedList;
import java.util.ArrayList;
import java.util.Deque;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.Future;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.util.concurrent.Futures;
import com.typesafe.config.Config;

import gobblin.util.ConfigUtils;

/**
 * Sequential and TTL based accumulator
 * A producer can add a record to this accumulator. It generates a batch on the first record arrival. All subsequent records
 * are added to the same batch until a batch size limit is reached. {@link BufferedAsyncDataWriter} keeps iterating available
 * batches from this accumulator, all completed batches (full sized) will be popped out one by one but an incomplete batch
 * keeps in the deque until a TTL is expired.
 */

public abstract class SequentialBasedBatchAccumulator<D> extends BatchAccumulator<D> {

    private Deque<BytesBoundedBatch<D>> dq = new LinkedList<>();
    private IncompleteRecordBatches incomplete = new IncompleteRecordBatches();
    private final long batchSizeLimit;
    private final long memSizeLimit;
    private final double tolerance = 0.95;
    private final long expireInMilliSecond;
    private static final Logger LOG = LoggerFactory.getLogger(SequentialBasedBatchAccumulator.class);

    private final ReentrantLock dqLock = new ReentrantLock();
    private final Condition notEmpty = dqLock.newCondition();
    private final Condition notFull = dqLock.newCondition();
    private final long capacity;

    public SequentialBasedBatchAccumulator() {
        this(1024 * 256, 1000, 100);
    }

    public SequentialBasedBatchAccumulator(Properties properties) {
        Config config = ConfigUtils.propertiesToConfig(properties);
        this.batchSizeLimit = ConfigUtils.getLong(config, Batch.BATCH_SIZE, Batch.BATCH_SIZE_DEFAULT);

        this.expireInMilliSecond = ConfigUtils.getLong(config, Batch.BATCH_TTL, Batch.BATCH_TTL_DEFAULT);

        this.capacity = ConfigUtils.getLong(config, Batch.BATCH_QUEUE_CAPACITY, Batch.BATCH_QUEUE_CAPACITY_DEFAULT);

        this.memSizeLimit = (long) (this.tolerance * this.batchSizeLimit);
    }

    public SequentialBasedBatchAccumulator(long batchSizeLimit, long expireInMilliSecond, long capacity) {
        this.batchSizeLimit = batchSizeLimit;
        this.expireInMilliSecond = expireInMilliSecond;
        this.capacity = capacity;
        this.memSizeLimit = (long) (this.tolerance * this.batchSizeLimit);
    }

    public long getNumOfBatches() {
        this.dqLock.lock();
        try {
            return this.dq.size();
        } finally {
            this.dqLock.unlock();
        }
    }

    /**
     * Add a data to internal deque data structure
     */
    public final Future<RecordMetadata> enqueue(D record, WriteCallback callback) throws InterruptedException {
        final ReentrantLock lock = this.dqLock;
        lock.lock();
        try {
            BytesBoundedBatch last = dq.peekLast();
            if (last != null) {
                Future<RecordMetadata> future = last.tryAppend(record, callback);
                if (future != null) {
                    return future;
                }
            }

            // Create a new batch because previous one has no space
            BytesBoundedBatch batch = new BytesBoundedBatch(this.memSizeLimit, this.expireInMilliSecond);
            LOG.debug("Batch " + batch.getId() + " is generated");
            Future<RecordMetadata> future = batch.tryAppend(record, callback);

            // Even single record can exceed the batch size limit
            // Ignore the record because Eventhub can only accept payload less than 256KB
            if (future == null) {
                LOG.error("Batch " + batch.getId() + " is marked as complete because it contains a huge record: "
                        + record);
                future = Futures.immediateFuture(new RecordMetadata(0));
                callback.onSuccess(WriteResponse.EMPTY);
                return future;
            }

            // if queue is full, we should not add more
            while (dq.size() >= this.capacity) {
                this.notFull.await();
            }
            dq.addLast(batch);
            incomplete.add(batch);
            this.notEmpty.signal();
            return future;

        } finally {
            lock.unlock();
        }
    }

    /**
     * A threadsafe helper class to hold RecordBatches that haven't been ack'd yet
     * This is mainly used for flush operation so that all the batches waiting in
     * the incomplete set will be blocked
     */
    private final static class IncompleteRecordBatches {
        private final Set<Batch> incomplete;

        public IncompleteRecordBatches() {
            this.incomplete = new HashSet<>();
        }

        public void add(Batch batch) {
            synchronized (incomplete) {
                this.incomplete.add(batch);
            }
        }

        public void remove(Batch batch) {
            synchronized (incomplete) {
                boolean removed = this.incomplete.remove(batch);
                if (!removed)
                    throw new IllegalStateException(
                            "Remove from the incomplete set failed. This should be impossible.");
            }
        }

        public ArrayList<Batch> all() {
            synchronized (incomplete) {
                return new ArrayList(this.incomplete);
            }
        }
    }

    /**
     * If accumulator has been closed,  below actions are performed:
     *    1) remove and return the first batch if available.
     *    2) return null if queue is empty.
     * If accumulator has not been closed, below actions are performed:
     *    1) if queue.size == 0, block current thread until more batches are available or accumulator is closed.
     *    2) if queue size == 1, remove and return the first batch if TTL has expired, else return null.
     *    3) if queue size > 1, remove and return the first batch element.
     */
    public Batch<D> getNextAvailableBatch() {
        final ReentrantLock lock = SequentialBasedBatchAccumulator.this.dqLock;
        try {
            lock.lock();
            if (SequentialBasedBatchAccumulator.this.isClosed()) {
                return dq.poll();
            } else {
                while (dq.size() == 0) {
                    LOG.info("ready to sleep because of queue is empty");
                    SequentialBasedBatchAccumulator.this.notEmpty.await();
                    if (SequentialBasedBatchAccumulator.this.isClosed()) {
                        return dq.poll();
                    }
                }

                if (dq.size() > 1) {
                    BytesBoundedBatch candidate = dq.poll();
                    SequentialBasedBatchAccumulator.this.notFull.signal();
                    LOG.debug("retrieve batch " + candidate.getId());
                    return candidate;
                }

                if (dq.size() == 1) {
                    if (dq.peekFirst().isTTLExpire()) {
                        LOG.info("Batch " + dq.peekFirst().getId() + " is expired");
                        BytesBoundedBatch candidate = dq.poll();
                        SequentialBasedBatchAccumulator.this.notFull.signal();
                        return candidate;
                    } else {
                        return null;
                    }
                } else {
                    throw new RuntimeException("Should never get to here");
                }
            }

        } catch (InterruptedException e) {
            LOG.error("Wait for next batch is interrupted. " + e.toString());
        } finally {
            lock.unlock();
        }

        return null;
    }

    public void close() {
        super.close();
        this.dqLock.lock();
        try {
            this.notEmpty.signal();
        } finally {
            this.dqLock.unlock();
        }
    }

    /**
     * This will block until all the incomplete batches are acknowledged
     */
    public void flush() {
        try {
            ArrayList<Batch> batches = this.incomplete.all();
            LOG.info("flush on {} batches", batches.size());
            for (Batch batch : batches) {
                batch.await();
            }
        } catch (Exception e) {
            LOG.info("Error happens when flushing");
        }
    }

    /**
     * Once batch is acknowledged, remove it from incomplete list
     */
    public void deallocate(Batch<D> batch) {
        this.incomplete.remove(batch);
    }
}