com.aliyun.openservices.tablestore.hadoop.TableStoreRecordWriter.java Source code

Java tutorial

Introduction

Here is the source code for com.aliyun.openservices.tablestore.hadoop.TableStoreRecordWriter.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.aliyun.openservices.tablestore.hadoop;

import java.util.List;
import java.util.Deque;
import java.util.ArrayDeque;
import java.util.Set;
import java.util.HashSet;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.RecordWriter;

import com.alicloud.openservices.tablestore.SyncClientInterface;
import com.alicloud.openservices.tablestore.TableStoreException;
import com.alicloud.openservices.tablestore.model.Error;
import com.alicloud.openservices.tablestore.model.RowChange;
import com.alicloud.openservices.tablestore.model.BatchWriteRowRequest;
import com.alicloud.openservices.tablestore.model.BatchWriteRowResponse;
import com.alicloud.openservices.tablestore.core.utils.Preconditions;

public class TableStoreRecordWriter extends RecordWriter<Writable, BatchWriteWritable> {
    private static final Logger logger = LoggerFactory.getLogger(TableStoreRecordWriter.class);

    private SyncClientInterface ots;
    private String outputTable;
    private long rowCounter = 0;
    private Deque<RowChange> waitingRows = new ArrayDeque<RowChange>();
    private static int BATCH_SIZE_DEFAULT_MAX = 200;
    private static int BATCH_SIZE_INCR = 10;
    private static double BATCH_SIZE_BACKOFF = 0.8;
    private int batchSize = BATCH_SIZE_DEFAULT_MAX;
    private int maxBatchSize = BATCH_SIZE_DEFAULT_MAX;

    public TableStoreRecordWriter(SyncClientInterface ots, String outputTable) {
        Preconditions.checkNotNull(ots, "ots client must be nonnull.");
        Preconditions.checkNotNull(outputTable, "output table must be nonnull.");
        this.ots = ots;
        this.outputTable = outputTable;
        logger.debug("max batch size: {}", this.maxBatchSize);
    }

    public TableStoreRecordWriter(SyncClientInterface ots, String outputTable, int maxBatchSize) {
        Preconditions.checkNotNull(ots, "ots client must be nonnull.");
        Preconditions.checkNotNull(outputTable, "output table must be nonnull.");
        this.ots = ots;
        this.outputTable = outputTable;
        this.batchSize = maxBatchSize;
        this.maxBatchSize = maxBatchSize;
        logger.debug("max batch size: {}", this.maxBatchSize);
    }

    public int getMaxBatchSize() {
        return maxBatchSize;
    }

    @Override
    public void write(Writable x, BatchWriteWritable batch) {
        List<RowChange> rows = batch.getRowChanges();
        for (RowChange row : rows) {
            waitingRows.addLast(row);
        }
        for (; waitingRows.size() >= batchSize;) {
            rowCounter += batchWrite();
        }
    }

    @Override
    public void close(TaskAttemptContext ctx) {
        for (; !waitingRows.isEmpty();) {
            rowCounter += batchWrite();
        }
        logger.info("this task wrote {} rows", rowCounter);
        ots.shutdown();
    }

    private int batchWrite() {
        while (true) {
            Deque<RowChange> rows = prepareRows();
            if (rows.size() < batchSize) {
                logger.info("small batch size: {}", rows.size());
            } else {
                logger.debug("batch size: {}", rows.size());
            }
            try {
                launch(rows);
                batchSize += BATCH_SIZE_INCR;
                if (batchSize > maxBatchSize) {
                    batchSize = maxBatchSize;
                }
                return rows.size();
            } catch (TableStoreException ex) {
                if (ex.getErrorCode() != "OTSParameterInvalid" || batchSize == 1) {
                    throw ex;
                } else {
                    logger.info("Batch-size backs off. current batch-size: {}", rows.size());
                    batchSize = (int) (rows.size() * BATCH_SIZE_BACKOFF);
                    if (batchSize < 1) {
                        batchSize = 1;
                    }
                    while (!rows.isEmpty()) {
                        RowChange row = rows.pollLast();
                        waitingRows.addFirst(row);
                    }
                }
            }
        }
    }

    private Deque<RowChange> prepareRows() {
        Deque<RowChange> res = new ArrayDeque<RowChange>();
        Set<Integer> detectDupRows = new HashSet<Integer>();
        for (int rowCnt = 0; rowCnt < batchSize && !waitingRows.isEmpty(); ++rowCnt) {
            RowChange row = waitingRows.pollFirst();
            int hash = row.getTableName().hashCode() * 1327144901 + row.getPrimaryKey().hashCode();
            if (detectDupRows.contains(hash)) {
                break;
            }
            res.addLast(row);
            detectDupRows.add(hash);
        }
        return res;
    }

    private void launch(Deque<RowChange> rows) {
        BatchWriteRowRequest req = new BatchWriteRowRequest();
        for (RowChange row : rows) {
            req.addRowChange(row);
        }
        BatchWriteRowResponse resp = ots.batchWriteRow(req);
        List<BatchWriteRowResponse.RowResult> failed = resp.getFailedRows();
        for (BatchWriteRowResponse.RowResult res : failed) {
            logger.error("fail to write to TableStore. table: {} error-code: {} error-message: {} request-id: {}",
                    res.getTableName(), res.getError().getCode(), res.getError().getMessage(), resp.getRequestId());
        }
        if (!failed.isEmpty()) {
            Error err = failed.get(0).getError();
            throw new TableStoreException(err.getMessage(), null, err.getCode(), resp.getRequestId(), 0);
        }
    }
}