org.apache.streams.cassandra.CassandraPersistWriter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.streams.cassandra.CassandraPersistWriter.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.streams.cassandra;

import org.apache.streams.config.ComponentConfigurator;
import org.apache.streams.config.StreamsConfigurator;
import org.apache.streams.core.StreamsDatum;
import org.apache.streams.core.StreamsPersistWriter;
import org.apache.streams.jackson.StreamsJacksonMapper;
import org.apache.streams.util.GuidUtils;

import com.datastax.driver.core.BatchStatement;
import com.datastax.driver.core.BoundStatement;
import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.DataType;
import com.datastax.driver.core.KeyspaceMetadata;
import com.datastax.driver.core.Metadata;
import com.datastax.driver.core.PreparedStatement;
import com.datastax.driver.core.Session;
import com.datastax.driver.core.TableMetadata;
import com.datastax.driver.core.querybuilder.Insert;
import com.datastax.driver.core.querybuilder.QueryBuilder;
import com.datastax.driver.core.schemabuilder.SchemaBuilder;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Queue;
import java.util.Random;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

public class CassandraPersistWriter implements StreamsPersistWriter, Runnable, Flushable, Closeable {

    public static final String STREAMS_ID = "CassandraPersistWriter";

    private static final Logger LOGGER = LoggerFactory.getLogger(CassandraPersistWriter.class);

    private static final long MAX_WRITE_LATENCY = 1000;

    protected volatile Queue<StreamsDatum> persistQueue;

    private ObjectMapper mapper = StreamsJacksonMapper.getInstance();
    private volatile AtomicLong lastWrite = new AtomicLong(System.currentTimeMillis());
    private ScheduledExecutorService backgroundFlushTask = Executors.newSingleThreadScheduledExecutor();

    private CassandraConfiguration config;
    private CassandraClient client;

    private Session session;

    protected PreparedStatement insertStatement;

    protected List<BoundStatement> insertBatch = new ArrayList<>();

    protected final ReadWriteLock lock = new ReentrantReadWriteLock();

    public CassandraPersistWriter() {
        this(new ComponentConfigurator<>(CassandraConfiguration.class)
                .detectConfiguration(StreamsConfigurator.getConfig().getConfig("cassandra")));
    }

    public CassandraPersistWriter(CassandraConfiguration config) {
        this.config = config;
    }

    public void setPersistQueue(Queue<StreamsDatum> persistQueue) {
        this.persistQueue = persistQueue;
    }

    public Queue<StreamsDatum> getPersistQueue() {
        return persistQueue;
    }

    @Override
    public String getId() {
        return STREAMS_ID;
    }

    @Override
    public void write(StreamsDatum streamsDatum) {

        ObjectNode node;

        if (streamsDatum.getDocument() instanceof String) {
            try {
                node = mapper.readValue((String) streamsDatum.getDocument(), ObjectNode.class);

                byte[] value = node.toString().getBytes();

                String key = GuidUtils.generateGuid(node.toString());
                if (!Objects.isNull(streamsDatum.getMetadata().get("id"))) {
                    key = streamsDatum.getMetadata().get("id").toString();
                }

                BoundStatement statement = insertStatement.bind(key, ByteBuffer.wrap(value));
                insertBatch.add(statement);
            } catch (IOException ex) {
                LOGGER.warn("Failure adding object: {}", streamsDatum.getDocument().toString());
                return;
            }
        } else {
            try {
                node = mapper.valueToTree(streamsDatum.getDocument());

                byte[] value = node.toString().getBytes();

                String key = GuidUtils.generateGuid(node.toString());
                if (!Objects.isNull(streamsDatum.getId())) {
                    key = streamsDatum.getId();
                }

                BoundStatement statement = insertStatement.bind(key, ByteBuffer.wrap(value));
                insertBatch.add(statement);
            } catch (Exception ex) {
                LOGGER.warn("Failure adding object: {}", streamsDatum.getDocument().toString());
                return;
            }
        }

        flushIfNecessary();
    }

    @Override
    public void flush() throws IOException {
        try {
            LOGGER.debug("Attempting to flush {} items to cassandra", insertBatch.size());
            lock.writeLock().lock();

            BatchStatement batchStatement = new BatchStatement();
            batchStatement.addAll(insertBatch);
            session.execute(batchStatement);

            lastWrite.set(System.currentTimeMillis());
            insertBatch = new ArrayList<>();
        } finally {
            lock.writeLock().unlock();
        }
    }

    @Override
    public synchronized void close() throws IOException {
        session.close();
        client.cluster().close();
        backgroundFlushTask.shutdownNow();
    }

    /**
     * start write thread.
     */
    public void start() {
        try {
            connectToCassandra();
            client.start();
            createKeyspaceAndTable();
            createInsertStatement();
        } catch (Exception e) {
            LOGGER.error("Exception", e);
            return;
        }
        backgroundFlushTask.scheduleAtFixedRate(new Runnable() {
            @Override
            public void run() {
                flushIfNecessary();
            }
        }, 0, MAX_WRITE_LATENCY * 2, TimeUnit.MILLISECONDS);
    }

    /**
     * stop.
     */
    public void stop() {
        try {
            flush();
        } catch (IOException ex) {
            LOGGER.error("Error flushing", ex);
        }

        try {
            close();
        } catch (IOException ex) {
            LOGGER.error("Error closing", ex);
        }

        try {
            backgroundFlushTask.shutdown();
            // Wait a while for existing tasks to terminate
            if (!backgroundFlushTask.awaitTermination(15, TimeUnit.SECONDS)) {
                backgroundFlushTask.shutdownNow(); // Cancel currently executing tasks
                // Wait a while for tasks to respond to being cancelled
                if (!backgroundFlushTask.awaitTermination(15, TimeUnit.SECONDS)) {
                    LOGGER.error("Stream did not terminate");
                }
            }
        } catch (InterruptedException ie) {
            // (Re-)Cancel if current thread also interrupted
            backgroundFlushTask.shutdownNow();
            // Preserve interrupt status
            Thread.currentThread().interrupt();
        }
    }

    @Override
    public void run() {
        while (true) {
            if (persistQueue.peek() != null) {
                try {
                    StreamsDatum entry = persistQueue.remove();
                    write(entry);
                } catch (Exception ex) {
                    LOGGER.warn("Failure writing entry from Queue: {}", ex.getMessage());
                }
            }
            try {
                Thread.sleep(new Random().nextInt(1));
            } catch (InterruptedException interrupt) {
                LOGGER.trace("Interrupt", interrupt);
            }
        }
    }

    @Override
    public void prepare(Object configurationObject) {
        this.persistQueue = new ConcurrentLinkedQueue<>();
        start();
    }

    @Override
    public void cleanUp() {
        stop();
    }

    protected void flushIfNecessary() {
        long lastLatency = System.currentTimeMillis() - lastWrite.get();
        //Flush iff the size > 0 AND the size is divisible by 100 or the time between now and the last flush is greater
        //than the maximum desired latency
        if (insertBatch.size() > 0 && (insertBatch.size() % 100 == 0 || lastLatency > MAX_WRITE_LATENCY)) {
            try {
                flush();
            } catch (IOException ex) {
                LOGGER.error("Error writing to Cassandra", ex);
            }
        }
    }

    private synchronized void connectToCassandra() throws Exception {
        client = new CassandraClient(config);
    }

    private void createKeyspaceAndTable() {
        Metadata metadata = client.cluster().getMetadata();
        if (Objects.isNull(metadata.getKeyspace(config.getKeyspace()))) {
            LOGGER.info("Keyspace {} does not exist. Creating Keyspace", config.getKeyspace());
            Map<String, Object> replication = new HashMap<>();
            replication.put("class", "SimpleStrategy");
            replication.put("replication_factor", 1);

            String createKeyspaceStmt = SchemaBuilder.createKeyspace(config.getKeyspace()).with()
                    .replication(replication).getQueryString();
            client.cluster().connect().execute(createKeyspaceStmt);
        }

        session = client.cluster().connect(config.getKeyspace());

        KeyspaceMetadata ks = metadata.getKeyspace(config.getKeyspace());
        TableMetadata tableMetadata = ks.getTable(config.getTable());

        if (Objects.isNull(tableMetadata)) {
            LOGGER.info("Table {} does not exist in Keyspace {}. Creating Table", config.getTable(),
                    config.getKeyspace());
            String createTableStmt = SchemaBuilder.createTable(config.getTable())
                    .addPartitionKey(config.getPartitionKeyColumn(), DataType.varchar())
                    .addColumn(config.getColumn(), DataType.blob()).getQueryString();

            session.execute(createTableStmt);
        }
    }

    private void createInsertStatement() {
        Insert insertBuilder = QueryBuilder.insertInto(config.getTable());
        insertBuilder.value(config.getPartitionKeyColumn(), new Object());
        insertBuilder.value(config.getColumn(), new Object());
        insertStatement = session.prepare(insertBuilder.getQueryString());
    }
}