cn.cnic.bigdatalab.flume.sink.mongodb.MongoSink.java Source code

Java tutorial

Introduction

Here is the source code for cn.cnic.bigdatalab.flume.sink.mongodb.MongoSink.java

Source

/**
 * Copyright (C) 2014 Stratio (http://stratio.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package cn.cnic.bigdatalab.flume.sink.mongodb;

import com.mongodb.*;
import org.apache.flume.*;
import org.apache.flume.conf.Configurable;
import org.apache.flume.instrumentation.SinkCounter;
import org.apache.flume.sink.AbstractSink;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

/**
 *
 * Reads events from a channel and writes them to MongoDB. It can read fields
 * from both body and headers.
 *
 * Configuration parameters are:
 *
 * <p><ul>
 * <li><tt>dynamic</tt> <em>(boolean)</em>: If true, the dynamic mode will be
 *      enabled and the database and collection to use will be selected by the
 *      event headers. Defaults to <tt>false</tt>.</li>
 * <li><tt>dynamicDB</tt> <em>(string)</em>: Name of the event header that will
 *      be looked up for the database name. This will only work when dynamic
 *      mode is enabled. Defaults to <tt>db</tt>.</li>
 * <li><tt>dynamicCollection</tt> <em>(string)</em>: Name of the event header
 *      that will be looked up for the collection name. This will only work when
 *      dynamic mode is enabled. Defaults to <tt>collection</tt>.</li>
 * <li><tt>mongoUri</tt> <em>(string, required)</em>:
 *      A <a href="http://api.mongodb.org/java/current/com/mongodb/MongoClientURI.html">Mongo client URI</a>
 *      defining the MongoDB server address and, optionally, default database
 *      and collection. When dynamic mode is enabled, the collection defined
 *      here will be used as a fallback.</li>
 * <li><tt>mappingFile</tt> <em>(string)</em>: Path to a
 *      <a href="http://json-schema.org/">JSON Schema</a>
 *      to be used for type mapping purposes.</li>
 * </ul></p>
 *
 */
public class MongoSink extends AbstractSink implements Configurable {

    private static final Logger log = LoggerFactory.getLogger(MongoSink.class);

    private static final String CONF_URI = "mongoUri";
    private static final String CONF_MAPPING_FILE = "mappingFile";
    private static final String CONF_BATCH_SIZE = "batchSize";
    private static final String CONF_DYNAMIC = "dynamic";
    private static final String CONF_DYNAMIC_DB_FIELD = "dynamicDB";
    private static final String CONF_DYNAMIC_COLLECTION_FIELD = "dynamicCollection";
    private static final String CONF_UPDATE_INSTEAD_REPLACE = "updateInsteadReplace";
    private static final int DEFAULT_BATCH_SIZE = 25;
    private static final boolean DEFAULT_DYNAMIC = false;
    private static final String DEFAULT_DYNAMIC_DB_FIELD = "db";
    private static final String DEFAULT_DYNAMIC_COLLECTION_FIELD = "collection";
    private static final boolean DEFAULT_UPDATE_INSTEAD_REPLACE = false;

    private SinkCounter sinkCounter;
    private int batchSize;
    private MongoClient mongoClient;
    private MongoClientURI mongoClientURI;
    private DB mongoDefaultDb;
    private DBCollection mongoDefaultCollection;
    private boolean isDynamicMode;
    private String dynamicDBField;
    private String dynamicCollectionField;
    private EventParser eventParser;
    private boolean updateInsteadReplace;
    private final CounterGroup counterGroup = new CounterGroup();

    public MongoSink() {
        super();
    }

    /**
     * {@inheritDoc}
     *
     * @param context
     */
    @Override
    public void configure(Context context) {
        try {
            if (!"INJECTED".equals(context.getString(CONF_URI))) {
                this.mongoClientURI = new MongoClientURI(context.getString(CONF_URI),
                        MongoClientOptions.builder().writeConcern(WriteConcern.SAFE));
                this.mongoClient = new MongoClient(mongoClientURI);
                if (mongoClientURI.getDatabase() != null) {
                    this.mongoDefaultDb = mongoClient.getDB(mongoClientURI.getDatabase());
                }
                if (mongoClientURI.getCollection() != null) {
                    this.mongoDefaultCollection = mongoDefaultDb.getCollection(mongoClientURI.getCollection());
                }
            }
            final String mappingFilename = context.getString(CONF_MAPPING_FILE);
            this.eventParser = (mappingFilename == null) ? new EventParser()
                    : new EventParser(MappingDefinition.load(mappingFilename));

            this.isDynamicMode = context.getBoolean(CONF_DYNAMIC, DEFAULT_DYNAMIC);
            if (!isDynamicMode && mongoDefaultCollection == null) {
                throw new MongoSinkException(
                        "Default MongoDB collection must be specified unless dynamic mode is enabled");
            }
            this.dynamicDBField = context.getString(CONF_DYNAMIC_DB_FIELD, DEFAULT_DYNAMIC_DB_FIELD);
            this.dynamicCollectionField = context.getString(CONF_DYNAMIC_COLLECTION_FIELD,
                    DEFAULT_DYNAMIC_COLLECTION_FIELD);

            this.sinkCounter = new SinkCounter(this.getName());
            this.batchSize = context.getInteger(CONF_BATCH_SIZE, DEFAULT_BATCH_SIZE);

            this.updateInsteadReplace = context.getBoolean(CONF_UPDATE_INSTEAD_REPLACE,
                    DEFAULT_UPDATE_INSTEAD_REPLACE);

        } catch (IOException ex) {
            throw new MongoSinkException(ex);
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Status process() throws EventDeliveryException {
        log.debug("Executing MongoSink.process()...");
        Status status = Status.READY;
        Channel channel = getChannel();
        Transaction txn = channel.getTransaction();

        try {
            txn.begin();
            int count;
            List<Event> eventList = new ArrayList<Event>();
            for (count = 0; count < batchSize; ++count) {
                Event event = channel.take();

                if (event == null) {
                    break;
                }
                eventList.add(event);
            }

            if (count <= 0) {
                sinkCounter.incrementBatchEmptyCount();
                counterGroup.incrementAndGet("channel.underflow");
                status = Status.BACKOFF;
            } else {
                if (count < batchSize) {
                    sinkCounter.incrementBatchUnderflowCount();
                    status = Status.BACKOFF;
                } else {
                    sinkCounter.incrementBatchCompleteCount();
                }

                for (Event event : eventList) {
                    final DBObject document = this.eventParser.parse(event);

                    if (this.updateInsteadReplace && document.get("_id") != null) {
                        // update requires '_id' field to match document
                        BasicDBObject searchQuery = new BasicDBObject().append("_id", document.get("_id")); // update by _id
                        BasicDBObject updatedDocument = new BasicDBObject().append("$set", document);
                        getDBCollection(event).update(searchQuery, updatedDocument, true, false);
                    } else {
                        getDBCollection(event).save(document);
                    }
                }

                sinkCounter.addToEventDrainAttemptCount(eventList.size());
            }
            txn.commit();
            sinkCounter.addToEventDrainSuccessCount(count);
            counterGroup.incrementAndGet("transaction.success");
        } catch (ChannelException e) {
            log.error("Unexpected error while executing MongoSink.process", e);
            txn.rollback();
            status = Status.BACKOFF;
            this.sinkCounter.incrementConnectionFailedCount();
        } catch (Throwable t) {
            log.error("Unexpected error while executing MongoSink.process", t);
            txn.rollback();
            status = Status.BACKOFF;
            if (t instanceof Error) {
                throw new MongoSinkException(t);
            }
        } finally {
            txn.close();
        }
        return status;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public synchronized void start() {
        this.sinkCounter.start();
        super.start();
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public synchronized void stop() {
        this.mongoClient.close();
        this.sinkCounter.stop();
        super.stop();
    }

    private DBCollection getDBCollection(Event event) {
        if (!isDynamicMode) {
            return mongoDefaultCollection;
        }
        final Map<String, String> headers = event.getHeaders();
        final String dbName = headers.get(dynamicDBField);
        final String collectionName = headers.get(dynamicCollectionField);
        if (collectionName == null) {
            if (mongoDefaultCollection == null) {
                throw new MongoSinkException("No collection specified and no default set");
            }
            return mongoDefaultCollection;
        }
        DB db;
        if (dbName == null) {
            if (mongoDefaultDb == null) {
                throw new MongoSinkException("No DB specified and no default set");
            }
            db = mongoDefaultDb;
        } else {
            db = mongoClient.getDB(dbName);
        }
        return db.getCollection(collectionName);
    }

    private List<Event> takeEventsFromChannel(Channel channel, int eventsToTake) {
        List<Event> events = new ArrayList<Event>();
        for (int i = 0; i < eventsToTake; i++) {
            this.sinkCounter.incrementEventDrainAttemptCount();
            events.add(channel.take());
        }
        events.removeAll(Collections.singleton(null));
        return events;
    }

}