com.cloudera.flume.collector.CollectorSink.java Source code

Introduction

Here is the source code for com.cloudera.flume.collector.CollectorSink.java
Source

/**
 * Licensed to Cloudera, Inc. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Cloudera, Inc. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.cloudera.flume.collector;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cloudera.flume.agent.FlumeNode;
import com.cloudera.flume.conf.Context;
import com.cloudera.flume.conf.FlumeConfiguration;
import com.cloudera.flume.conf.FlumeSpecException;
import com.cloudera.flume.conf.SinkFactory.SinkBuilder;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventSink;
import com.cloudera.flume.core.EventSinkDecorator;
import com.cloudera.flume.core.MaskDecorator;
import com.cloudera.flume.handlers.debug.InsistentAppendDecorator;
import com.cloudera.flume.handlers.debug.InsistentOpenDecorator;
import com.cloudera.flume.handlers.debug.StubbornAppendSink;
import com.cloudera.flume.handlers.endtoend.AckChecksumChecker;
import com.cloudera.flume.handlers.endtoend.AckListener;
import com.cloudera.flume.handlers.hdfs.EscapedCustomDfsSink;
import com.cloudera.flume.handlers.rolling.ProcessTagger;
import com.cloudera.flume.handlers.rolling.RollSink;
import com.cloudera.flume.handlers.rolling.Tagger;
import com.cloudera.flume.handlers.rolling.TimeTrigger;
import com.cloudera.flume.reporter.ReportEvent;
import com.cloudera.util.BackoffPolicy;
import com.cloudera.util.CumulativeCappedExponentialBackoff;
import com.google.common.base.Preconditions;

/**
 * This collector sink is the high level specification a user would use.
 * 
 * Currently is is just a DFSEventSink that uses some default values from the
 * flume configuration file and is overridable by the user.
 * 
 * TODO (jon) this default output needs to be replaced with a combo of henry's
 * new tagged output writer and the custom format dfs writer..
 */
public class CollectorSink extends EventSink.Base {
    static final Logger LOG = LoggerFactory.getLogger(CollectorSink.class);

    final EventSink snk;
    AckAccumulator accum = new AckAccumulator();
    final AckListener ackDest;

    // This is a container for acks that should be ready for delivery when the
    // hdfs sink is closed/flushed
    Set<String> rollAckSet = new HashSet<String>();

    // References package exposed for testing
    final RollSink roller;

    CollectorSink(Context ctx, String path, String filename, long millis, AckListener ackDest)
            throws FlumeSpecException {
        this(ctx, path, filename, millis, new ProcessTagger(), 250, ackDest);
    }

    CollectorSink(Context ctx, final String logdir, final String filename, final long millis, final Tagger tagger,
            long checkmillis, AckListener ackDest) {
        this.ackDest = ackDest;
        this.roller = new RollSink(ctx, "collectorSink", new TimeTrigger(tagger, millis), checkmillis) {
            @Override
            public EventSink newSink(Context ctx) throws IOException {
                String tag = tagger.newTag();
                String path = logdir + Path.SEPARATOR_CHAR;
                EventSink dfs = new EscapedCustomDfsSink(path, filename + tag);
                return new RollDetectDeco(dfs, tag);
            }
        };

        long initMs = FlumeConfiguration.get().getInsistentOpenInitBackoff();
        long cumulativeMaxMs = FlumeConfiguration.get().getFailoverMaxCumulativeBackoff();
        long maxMs = FlumeConfiguration.get().getFailoverMaxSingleBackoff();
        BackoffPolicy backoff1 = new CumulativeCappedExponentialBackoff(initMs, maxMs, cumulativeMaxMs);
        BackoffPolicy backoff2 = new CumulativeCappedExponentialBackoff(initMs, maxMs, cumulativeMaxMs);

        // the collector snk has ack checking logic, retry and reopen logic, and
        // needs an extra mask before rolling, writing to disk and forwarding acks
        // (roll detect).

        // { ackChecksumChecker => insistentAppend => stubbornAppend =>
        // insistentOpen => mask("rolltag") => roll(xx) { rollDetect =>
        // escapedCusomtDfs } }
        EventSink tmp = new MaskDecorator<EventSink>(roller, "rolltag");
        tmp = new InsistentOpenDecorator<EventSink>(tmp, backoff1);
        tmp = new StubbornAppendSink<EventSink>(tmp);
        tmp = new InsistentAppendDecorator<EventSink>(tmp, backoff2);
        snk = new AckChecksumChecker<EventSink>(tmp, accum);
    }

    String curRollTag;

    /**
     * This is a helper class that wraps the body of the collector sink, so that
     * and gives notifications when a roll hash happened. Because only close has
     * sane flushing semantics in hdfs <= v0.20.x we need to collect acks, data is
     * safe only after a close on the hdfs file happens.
     */
    class RollDetectDeco extends EventSinkDecorator<EventSink> {
        String tag;

        public RollDetectDeco(EventSink s, String tag) {
            super(s);
            this.tag = tag;
        }

        public void open() throws IOException, InterruptedException {
            // set the collector's current tag to curRollTAg.
            LOG.debug("opening roll detect deco {}", tag);
            curRollTag = tag;
            super.open();
            LOG.debug("opened  roll detect deco {}", tag);
        }

        @Override
        public void close() throws IOException, InterruptedException {
            LOG.debug("closing roll detect deco {}", tag);
            super.close();
            flushRollAcks();
            LOG.debug("closed  roll detect deco {}", tag);
        }

        void flushRollAcks() throws IOException {
            AckListener master = ackDest;
            Collection<String> acktags;
            synchronized (rollAckSet) {
                acktags = new ArrayList<String>(rollAckSet);
                rollAckSet.clear();
                LOG.debug("Roll closed, pushing acks for " + tag + " :: " + acktags);
            }

            for (String at : acktags) {
                master.end(at);
            }
        }

    };

    /**
     * This accumulates ack tags in rollAckMap so that they can be pushed to the
     * master when the the hdfs file associated with the rolltag is closed.
     */
    class AckAccumulator implements AckListener {

        @Override
        public void end(String group) throws IOException {
            synchronized (rollAckSet) {
                LOG.debug("Adding to acktag {} to rolltag {}", group, curRollTag);
                rollAckSet.add(group);
                LOG.debug("Current rolltag acktag mapping: {}", rollAckSet);
            }
        }

        @Override
        public void err(String group) throws IOException {
        }

        @Override
        public void expired(String key) throws IOException {
        }

        @Override
        public void start(String group) throws IOException {
        }

    };

    @Override
    public void append(Event e) throws IOException, InterruptedException {
        snk.append(e);
        super.append(e);
    }

    @Override
    public void close() throws IOException, InterruptedException {
        snk.close();
    }

    @Override
    public void open() throws IOException, InterruptedException {
        snk.open();
    }

    @Override
    public String getName() {
        return "Collector";
    }

    @Override
    public void getReports(String namePrefix, Map<String, ReportEvent> reports) {
        super.getReports(namePrefix, reports);
        snk.getReports(namePrefix + getName() + ".", reports);
    }

    public EventSink getSink() {
        return snk;
    }

    public static SinkBuilder builder() {
        return new SinkBuilder() {
            @Override
            public EventSink build(Context context, String... argv) {
                Preconditions.checkArgument(argv.length <= 3 && argv.length >= 2,
                        "usage: collectorSink[(dfsdir,path[,rollmillis])]");
                String logdir = FlumeConfiguration.get().getCollectorDfsDir(); // default
                long millis = FlumeConfiguration.get().getCollectorRollMillis();
                String prefix = "";
                if (argv.length >= 1) {
                    logdir = argv[0]; // override
                }
                if (argv.length >= 2) {
                    prefix = argv[1];
                }
                if (argv.length >= 3) {
                    millis = Long.parseLong(argv[2]);
                }
                try {
                    EventSink snk = new CollectorSink(context, logdir, prefix, millis,
                            FlumeNode.getInstance().getCollectorAckListener());
                    return snk;
                } catch (FlumeSpecException e) {
                    LOG.error("CollectorSink spec error " + e, e);
                    throw new IllegalArgumentException("usage: collectorSink[(dfsdir,path[,rollmillis])]" + e);
                }
            }
        };
    }
}