com.lowzj.connect.aliyun.oss.TopicPartitionWriter.java Source code

Java tutorial

Introduction

Here is the source code for com.lowzj.connect.aliyun.oss.TopicPartitionWriter.java

Source

/*
 * Copyright 2017 Confluent Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.lowzj.connect.aliyun.oss;

import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Queue;

import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.errors.IllegalWorkerStateException;
import org.apache.kafka.connect.errors.RetriableException;
import org.apache.kafka.connect.errors.SchemaProjectorException;
import org.apache.kafka.connect.sink.SinkRecord;
import org.apache.kafka.connect.sink.SinkTaskContext;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.lowzj.connect.aliyun.oss.storage.AliyunOSSStorage;

import io.confluent.connect.storage.common.StorageCommonConfig;
import io.confluent.connect.storage.format.RecordWriter;
import io.confluent.connect.storage.format.RecordWriterProvider;
import io.confluent.connect.storage.hive.HiveConfig;
import io.confluent.connect.storage.partitioner.Partitioner;
import io.confluent.connect.storage.partitioner.PartitionerConfig;
import io.confluent.connect.storage.schema.StorageSchemaCompatibility;
import io.confluent.connect.storage.util.DateTimeUtils;

public class TopicPartitionWriter {
    private static final Logger log = LoggerFactory.getLogger(TopicPartitionWriter.class);

    private final Map<String, String> commitFiles;
    private final Map<String, RecordWriter> writers;
    private final Map<String, Schema> currentSchemas;
    private final TopicPartition tp;
    private final Partitioner<FieldSchema> partitioner;
    private String topicsDir;
    private State state;
    private final Queue<SinkRecord> buffer;
    private final SinkTaskContext context;
    private int recordCount;
    private final int flushSize;
    private final long rotateIntervalMs;
    private final long rotateScheduleIntervalMs;
    private long nextScheduledRotate;
    private final RecordWriterProvider<AliyunOSSSinkConnectorConfig> writerProvider;
    private long currentOffset;
    private Long offsetToCommit;
    private Long nextOffsetToCommit;
    private final Map<String, Long> startOffsets;
    private long timeoutMs;
    private long failureTime;
    private final StorageSchemaCompatibility compatibility;
    private final String extension;
    private final String zeroPadOffsetFormat;
    private final String dirDelim;
    private final String fileDelim;
    private final DateTimeZone timezone;
    private final Time time;

    public TopicPartitionWriter(TopicPartition tp, AliyunOSSStorage storage,
            RecordWriterProvider<AliyunOSSSinkConnectorConfig> writerProvider, Partitioner<FieldSchema> partitioner,
            AliyunOSSSinkConnectorConfig connectorConfig, SinkTaskContext context) {
        this(tp, writerProvider, partitioner, connectorConfig, context, Time.SYSTEM);
    }

    // Visible for testing
    TopicPartitionWriter(TopicPartition tp, RecordWriterProvider<AliyunOSSSinkConnectorConfig> writerProvider,
            Partitioner<FieldSchema> partitioner, AliyunOSSSinkConnectorConfig connectorConfig,
            SinkTaskContext context, Time time) {
        this.time = time;
        this.tp = tp;
        this.context = context;
        this.writerProvider = writerProvider;
        this.partitioner = partitioner;

        flushSize = connectorConfig.getInt(AliyunOSSSinkConnectorConfig.FLUSH_SIZE_CONFIG);
        topicsDir = connectorConfig.getString(StorageCommonConfig.TOPICS_DIR_CONFIG);
        rotateIntervalMs = connectorConfig.getLong(AliyunOSSSinkConnectorConfig.ROTATE_INTERVAL_MS_CONFIG);
        rotateScheduleIntervalMs = connectorConfig
                .getLong(AliyunOSSSinkConnectorConfig.ROTATE_SCHEDULE_INTERVAL_MS_CONFIG);
        timeoutMs = connectorConfig.getLong(AliyunOSSSinkConnectorConfig.RETRY_BACKOFF_CONFIG);
        compatibility = StorageSchemaCompatibility
                .getCompatibility(connectorConfig.getString(HiveConfig.SCHEMA_COMPATIBILITY_CONFIG));

        buffer = new LinkedList<>();
        commitFiles = new HashMap<>();
        writers = new HashMap<>();
        currentSchemas = new HashMap<>();
        startOffsets = new HashMap<>();
        state = State.WRITE_STARTED;
        failureTime = -1L;
        currentOffset = -1L;
        dirDelim = connectorConfig.getString(StorageCommonConfig.DIRECTORY_DELIM_CONFIG);
        fileDelim = connectorConfig.getString(StorageCommonConfig.FILE_DELIM_CONFIG);
        extension = writerProvider.getExtension();
        zeroPadOffsetFormat = "%0"
                + connectorConfig.getInt(AliyunOSSSinkConnectorConfig.FILENAME_OFFSET_ZERO_PAD_WIDTH_CONFIG) + "d";

        timezone = rotateScheduleIntervalMs > 0
                ? DateTimeZone.forID(connectorConfig.getString(PartitionerConfig.TIMEZONE_CONFIG))
                : null;

        // Initialize rotation timers
        updateRotationTimers();
    }

    private enum State {
        WRITE_STARTED, WRITE_PARTITION_PAUSED, SHOULD_ROTATE, FILE_COMMITTED;

        private static final State[] VALS = values();

        public State next() {
            return VALS[(ordinal() + 1) % VALS.length];
        }
    }

    private void updateRotationTimers() {
        long lastRotate = time.milliseconds();
        if (log.isDebugEnabled() && rotateIntervalMs > 0) {
            log.debug("Update last rotation timer. Next rotation for {} will be in {}ms", tp, rotateIntervalMs);
        }
        if (rotateScheduleIntervalMs > 0) {
            nextScheduledRotate = DateTimeUtils.getNextTimeAdjustedByDay(lastRotate, rotateScheduleIntervalMs,
                    timezone);
            if (log.isDebugEnabled()) {
                log.debug("Update scheduled rotation timer. Next rotation for {} will be at {}", tp,
                        new DateTime(nextScheduledRotate).withZone(timezone).toString());
            }
        }
    }

    @SuppressWarnings("fallthrough")
    public void write() {
        long now = time.milliseconds();
        if (failureTime > 0 && now - failureTime < timeoutMs) {
            return;
        }

        updateRotationTimers();

        while (!buffer.isEmpty()) {
            try {
                switch (state) {
                case WRITE_STARTED:
                    pause();
                    nextState();
                case WRITE_PARTITION_PAUSED:
                    SinkRecord record = buffer.peek();
                    Schema valueSchema = record.valueSchema();
                    String encodedPartition = partitioner.encodePartition(record);
                    Schema currentValueSchema = currentSchemas.get(encodedPartition);
                    if (currentValueSchema == null) {
                        currentSchemas.put(encodedPartition, valueSchema);
                        currentValueSchema = valueSchema;
                    }

                    if (compatibility.shouldChangeSchema(record, null, currentValueSchema) && recordCount > 0) {
                        // This branch is never true for the first record read by this TopicPartitionWriter
                        currentSchemas.put(encodedPartition, valueSchema);
                        nextOffsetToCommit = currentOffset;
                        nextState();
                    } else {
                        SinkRecord projectedRecord = compatibility.project(record, null, currentValueSchema);
                        writeRecord(projectedRecord);
                        buffer.poll();
                        if (shouldRotate(projectedRecord.timestamp())) {
                            log.info("Starting commit and rotation for topic partition {} with start offset {}", tp,
                                    startOffsets);
                            nextOffsetToCommit = currentOffset + 1;
                            nextState();
                            // Fall through and try to rotate immediately
                        } else {
                            break;
                        }
                    }
                case SHOULD_ROTATE:
                    updateRotationTimers();
                    commitFiles();
                    nextState();
                case FILE_COMMITTED:
                    setState(State.WRITE_PARTITION_PAUSED);
                    break;
                default:
                    log.error("{} is not a valid state to write record for topic partition {}.", state, tp);
                }
            } catch (SchemaProjectorException | IllegalWorkerStateException e) {
                throw new ConnectException(e);
            } catch (RetriableException e) {
                log.error("Exception on topic partition {}: ", tp, e);
                failureTime = time.milliseconds();
                setRetryTimeout(timeoutMs);
                break;
            }
        }
        if (buffer.isEmpty()) {
            resume();
            setState(State.WRITE_STARTED);
        }
    }

    public void close() throws ConnectException {
        log.debug("Closing TopicPartitionWriter {}", tp);
        for (RecordWriter writer : writers.values()) {
            writer.close();
        }
        writers.clear();
        startOffsets.clear();
    }

    public void buffer(SinkRecord sinkRecord) {
        buffer.add(sinkRecord);
    }

    public Long getOffsetToCommitAndReset() {
        Long latest = offsetToCommit;
        offsetToCommit = null;
        return latest;
    }

    private String getDirectoryPrefix(String encodedPartition) {
        return partitioner.generatePartitionedPath(tp.topic(), encodedPartition);
    }

    private void nextState() {
        state = state.next();
    }

    private void setState(State state) {
        this.state = state;
    }

    private boolean shouldRotate(Long timestamp) {
        boolean scheduledRotation = rotateScheduleIntervalMs > 0 && timestamp != null
                && timestamp >= nextScheduledRotate;
        boolean messageSizeRotation = recordCount >= flushSize;

        log.trace(
                "Should rotate (count {} >= flush size {} and schedule interval {} next schedule {} timestamp {})? {}",
                recordCount, flushSize, rotateScheduleIntervalMs, nextScheduledRotate, timestamp,
                scheduledRotation || messageSizeRotation);

        return scheduledRotation || messageSizeRotation;
    }

    private void pause() {
        log.trace("Pausing writer for topic-partition '{}'", tp);
        context.pause(tp);
    }

    private void resume() {
        log.trace("Resuming writer for topic-partition '{}'", tp);
        context.resume(tp);
    }

    private RecordWriter getWriter(SinkRecord record, String encodedPartition) throws ConnectException {
        if (writers.containsKey(encodedPartition)) {
            return writers.get(encodedPartition);
        }
        String commitFilename = getCommitFilename(encodedPartition);
        RecordWriter writer = writerProvider.getRecordWriter(null, commitFilename);
        writers.put(encodedPartition, writer);
        return writer;
    }

    private String getCommitFilename(String encodedPartition) {
        String commitFile;
        if (commitFiles.containsKey(encodedPartition)) {
            commitFile = commitFiles.get(encodedPartition);
        } else {
            long startOffset = startOffsets.get(encodedPartition);
            String prefix = getDirectoryPrefix(encodedPartition);
            commitFile = fileKeyToCommit(prefix, startOffset);
            commitFiles.put(encodedPartition, commitFile);
        }
        return commitFile;
    }

    private String fileKey(String topicsPrefix, String keyPrefix, String name) {
        return topicsPrefix + dirDelim + keyPrefix + dirDelim + name;
    }

    private String fileKeyToCommit(String dirPrefix, long startOffset) {
        String name = tp.topic() + fileDelim + tp.partition() + fileDelim
                + String.format(zeroPadOffsetFormat, startOffset) + extension;
        return fileKey(topicsDir, dirPrefix, name);
    }

    private void writeRecord(SinkRecord record) {
        currentOffset = record.kafkaOffset();

        String encodedPartition = partitioner.encodePartition(record);
        if (!startOffsets.containsKey(encodedPartition)) {
            log.trace("Setting writer's start offset for '{}' to {}", encodedPartition, currentOffset);
            startOffsets.put(encodedPartition, currentOffset);
        }

        RecordWriter writer = getWriter(record, encodedPartition);
        writer.write(record);
        ++recordCount;
    }

    private void commitFiles() {
        for (Map.Entry<String, String> entry : commitFiles.entrySet()) {
            commitFile(entry.getKey());
            log.debug("Committed {} for {}", entry.getValue(), tp);
        }
        offsetToCommit = nextOffsetToCommit;
        commitFiles.clear();
        currentSchemas.clear();
        recordCount = 0;
        log.info("Files committed to S3. Target commit offset for {} is {}", tp, offsetToCommit);
    }

    private void commitFile(String encodedPartition) {
        if (!startOffsets.containsKey(encodedPartition)) {
            log.warn("Tried to commit file with missing starting offset partition: {}. Ignoring.");
            return;
        }

        if (writers.containsKey(encodedPartition)) {
            RecordWriter writer = writers.get(encodedPartition);
            // Commits the file and closes the underlying output stream.
            writer.commit();
            writers.remove(encodedPartition);
            log.debug("Removed writer for '{}'", encodedPartition);
        }

        startOffsets.remove(encodedPartition);
    }

    private void setRetryTimeout(long timeoutMs) {
        context.timeout(timeoutMs);
    }
}