com.alibaba.otter.node.etl.load.loader.db.FileLoadAction.java Source code

Java tutorial

Introduction

Here is the source code for com.alibaba.otter.node.etl.load.loader.db.FileLoadAction.java

Source

/*
 * Copyright (C) 2010-2101 Alibaba Group Holding Limited.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.alibaba.otter.node.etl.load.loader.db;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;
import org.springframework.beans.factory.DisposableBean;
import org.springframework.beans.factory.InitializingBean;

import com.alibaba.otter.node.common.config.ConfigClientService;
import com.alibaba.otter.node.etl.OtterConstants;
import com.alibaba.otter.node.etl.load.exception.LoadException;
import com.alibaba.otter.node.etl.load.loader.LoadStatsTracker;
import com.alibaba.otter.node.etl.load.loader.LoadStatsTracker.LoadCounter;
import com.alibaba.otter.node.etl.load.loader.db.context.FileLoadContext;
import com.alibaba.otter.node.etl.load.loader.weight.WeightBuckets;
import com.alibaba.otter.node.etl.load.loader.weight.WeightController;
import com.alibaba.otter.shared.common.model.config.ConfigHelper;
import com.alibaba.otter.shared.common.model.config.channel.Channel;
import com.alibaba.otter.shared.common.model.config.data.DataMediaPair;
import com.alibaba.otter.shared.common.model.config.pipeline.Pipeline;
import com.alibaba.otter.shared.common.utils.NioUtils;
import com.alibaba.otter.shared.common.utils.thread.NamedThreadFactory;
import com.alibaba.otter.shared.etl.model.FileBatch;
import com.alibaba.otter.shared.etl.model.FileData;
import com.alibaba.otter.shared.etl.model.Identity;

/**
 * ?load
 * 
 * @author jianghang 2011-10-31 ?11:33:22
 * @author zebinxu 2012-4-28 ?3:39:17 ??? file load ??
 * @version 4.0.0
 */
public class FileLoadAction implements InitializingBean, DisposableBean {

    private static final Logger logger = LoggerFactory.getLogger(FileLoadAction.class);
    private int retry = 5;
    private ConfigClientService configClientService;
    private LoadStatsTracker loadStatsTracker;
    private boolean dump = true;

    // for concurrent file load
    private static final String WORKER_NAME = "FileLoadAction";
    private static final String WORKER_NAME_FORMAT = "pipelineId = %s , pipelineName = %s , " + WORKER_NAME;
    private static final int DEFAULT_POOL_SIZE = 5;
    private int poolSize = DEFAULT_POOL_SIZE;
    private ExecutorService executor;

    /**
     * ??
     */
    public FileLoadContext load(FileBatch fileBatch, File rootDir, WeightController controller) {
        if (false == rootDir.exists()) {
            throw new LoadException(rootDir.getPath() + " is not exist");
        }
        FileLoadContext context = buildContext(fileBatch.getIdentity());
        context.setPrepareDatas(fileBatch.getFiles());
        boolean isDryRun = context.getPipeline().getParameters().isDryRun();
        try {
            // ???
            WeightBuckets<FileData> buckets = buildWeightBuckets(fileBatch.getIdentity(), fileBatch.getFiles());
            List<Long> weights = buckets.weights();
            controller.start(weights);
            // ??
            for (int i = 0; i < weights.size(); i++) {
                Long weight = weights.get(i);
                controller.await(weight.intValue());
                if (logger.isInfoEnabled()) {
                    logger.debug("##start load for weight:{}\n", weight);
                }

                // ??weight?
                List<FileData> items = buckets.getItems(weight);
                if (context.getPipeline().getParameters().isDryRun()) {
                    dryRun(context, items, rootDir);
                } else {
                    moveFiles(context, items, rootDir);
                }

                controller.single(weight.intValue());
                if (logger.isInfoEnabled()) {
                    logger.debug("##end load for weight:{}\n", weight);
                }
            }

            if (dump || isDryRun) {
                MDC.put(OtterConstants.splitPipelineLoadLogFileKey,
                        String.valueOf(fileBatch.getIdentity().getPipelineId()));
                logger.info(FileloadDumper.dumpContext("successed", context));
                MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            if (dump || isDryRun) {
                MDC.put(OtterConstants.splitPipelineLoadLogFileKey,
                        String.valueOf(fileBatch.getIdentity().getPipelineId()));
                logger.info(FileloadDumper.dumpContext("error", context));
                MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
            }
        } catch (Exception e) {
            if (dump || isDryRun) {
                MDC.put(OtterConstants.splitPipelineLoadLogFileKey,
                        String.valueOf(fileBatch.getIdentity().getPipelineId()));
                logger.info(FileloadDumper.dumpContext("error", context));
                MDC.remove(OtterConstants.splitPipelineLoadLogFileKey);
            }
            throw new LoadException(e);
        } finally {

            // ???
            NioUtils.delete(rootDir, 3);
        }

        return context;
    }

    private void adjustPoolSize(FileLoadContext context) {
        Pipeline pipeline = context.getPipeline();
        int newPoolSize = pipeline.getParameters().getFileLoadPoolSize();
        if (newPoolSize != poolSize) {
            poolSize = newPoolSize;
            if (executor instanceof ThreadPoolExecutor) {
                ThreadPoolExecutor pool = (ThreadPoolExecutor) executor;
                pool.setCorePoolSize(newPoolSize);
                pool.setMaximumPoolSize(newPoolSize);
            }
        }

    }

    private FileLoadContext buildContext(Identity identity) {
        FileLoadContext context = new FileLoadContext();
        context.setIdentity(identity);
        Channel channel = configClientService.findChannel(identity.getChannelId());
        Pipeline pipeline = configClientService.findPipeline(identity.getPipelineId());
        context.setChannel(channel);
        context.setPipeline(pipeline);
        return context;
    }

    /**
     * weight??item?
     */
    private WeightBuckets<FileData> buildWeightBuckets(Identity identity, List<FileData> datas) {
        WeightBuckets<FileData> buckets = new WeightBuckets<FileData>();
        for (FileData data : datas) {
            // ?weight
            DataMediaPair pair = ConfigHelper.findDataMediaPair(getPipeline(identity), data.getPairId());
            buckets.addItem(pair.getPushWeight(), data);
        }

        return buckets;
    }

    private Pipeline getPipeline(Identity identity) {
        return configClientService.findPipeline(identity.getPipelineId());
    }

    private void dryRun(FileLoadContext context, List<FileData> fileDatas, File rootDir) {
        for (FileData fileData : fileDatas) {
            boolean isLocal = StringUtils.isBlank(fileData.getNameSpace());
            String entryName = null;
            if (true == isLocal) {
                entryName = FilenameUtils.getPath(fileData.getPath()) + FilenameUtils.getName(fileData.getPath());
            } else {
                entryName = fileData.getNameSpace() + File.separator + fileData.getPath();
            }
            File sourceFile = new File(rootDir, entryName);
            if (true == sourceFile.exists() && false == sourceFile.isDirectory()) {
                if (false == isLocal) {
                    throw new LoadException(fileData + " is not support!");
                } else {
                    // meta?
                    fileData.setSize(sourceFile.length());
                    fileData.setLastModifiedTime(sourceFile.lastModified());
                    context.getProcessedDatas().add(fileData);
                }

                LoadCounter counter = loadStatsTracker.getStat(context.getIdentity()).getStat(fileData.getPairId());
                counter.getFileCount().incrementAndGet();
                counter.getFileSize().addAndGet(fileData.getSize());
            } else if (fileData.getEventType().isDelete()) {
                // 
                if (false == isLocal) {
                    throw new LoadException(fileData + " is not support!");
                } else {
                    context.getProcessedDatas().add(fileData);
                }
            } else {
                context.getFailedDatas().add(fileData);// 
            }
        }
    }

    /**
     * ? fast-fail 
     */
    private void moveFiles(FileLoadContext context, List<FileData> fileDatas, File rootDir) {
        Exception exception = null;
        adjustPoolSize(context);
        ExecutorCompletionService<Exception> executorComplition = new ExecutorCompletionService<Exception>(
                executor);

        List<Future<Exception>> results = new ArrayList<Future<Exception>>();
        for (FileData fileData : fileDatas) {
            Future<Exception> future = executorComplition.submit(new FileLoadWorker(context, rootDir, fileData));
            results.add(future);

            // fast fail
            if (future.isDone()) { // ( CallerRunsPolicy)
                try {
                    exception = future.get();
                } catch (Exception e) {
                    exception = e;
                }
                if (exception != null) {
                    for (Future<Exception> result : results) {
                        if (!result.isDone() && !result.isCancelled()) {
                            result.cancel(true);
                        }
                    }
                    throw exception instanceof LoadException ? (LoadException) exception
                            : new LoadException(exception);
                }
            }

        }

        int resultSize = results.size();
        int cursor = 0;
        while (cursor < resultSize) {
            try {
                Future<Exception> result = executorComplition.take();
                exception = result.get();
            } catch (Exception e) {
                exception = e;
                break;
            }
            cursor++;
        }

        if (cursor != resultSize) { // ??
            for (Future<Exception> future : results) {
                if (!future.isDone() && !future.isCancelled()) {
                    future.cancel(true);
                }
            }

        }

        if (exception != null) {
            throw exception instanceof LoadException ? (LoadException) exception : new LoadException(exception);
        }
    }

    private class FileLoadWorker implements Callable<Exception> {

        private FileLoadContext context;
        private File rootDir;
        private FileData fileData;

        public FileLoadWorker(FileLoadContext context, File rootDir, FileData fileData) {
            this.context = context;
            this.rootDir = rootDir;
            this.fileData = fileData;

        }

        public Exception call() throws Exception {
            Thread.currentThread().setName(String.format(WORKER_NAME_FORMAT, context.getPipeline().getId(),
                    context.getPipeline().getName()));
            try {
                MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(context.getPipeline().getId()));
                if (fileData == null) {
                    return null;
                }
                // ??
                int count = 0;
                Throwable exception = null;
                while (count++ < retry) {
                    try {
                        doMove(context, rootDir, fileData);
                        return null;
                    } catch (Exception e) {
                        exception = e;
                        if (count < retry) {
                            Thread.sleep(50);
                        }
                    }
                }

                throw new LoadException(
                        String.format("FileLoadWorker is error! createFile failed[%s]", fileData.getPath()),
                        exception);
            } finally {
                MDC.remove(OtterConstants.splitPipelineLogFileKey);
            }
        }
    }

    private void doMove(FileLoadContext context, File rootDir, FileData fileData) throws IOException {
        boolean isLocal = StringUtils.isBlank(fileData.getNameSpace());
        String entryName = null;
        if (true == isLocal) {
            entryName = FilenameUtils.getPath(fileData.getPath()) + FilenameUtils.getName(fileData.getPath());
        } else {
            entryName = fileData.getNameSpace() + File.separator + fileData.getPath();
        }
        File sourceFile = new File(rootDir, entryName);
        if (true == sourceFile.exists() && false == sourceFile.isDirectory()) {
            if (false == isLocal) {
                throw new LoadException(fileData + " is not support!");
            } else {
                File targetFile = new File(fileData.getPath());
                // copy to product path
                NioUtils.copy(sourceFile, targetFile, retry);
                if (true == targetFile.exists()) {
                    // meta?
                    fileData.setSize(sourceFile.length());
                    fileData.setLastModifiedTime(sourceFile.lastModified());
                    context.getProcessedDatas().add(fileData);
                } else {
                    throw new LoadException(String.format("copy/rename [%s] to [%s] failed by unknow reason",
                            sourceFile.getPath(), targetFile.getPath()));
                }

            }

            LoadCounter counter = loadStatsTracker.getStat(context.getIdentity()).getStat(fileData.getPairId());
            counter.getFileCount().incrementAndGet();
            counter.getFileSize().addAndGet(fileData.getSize());
        } else if (fileData.getEventType().isDelete()) {
            // 
            if (false == isLocal) {
                throw new LoadException(fileData + " is not support!");
            } else {
                File targetFile = new File(fileData.getPath());
                if (NioUtils.delete(targetFile, retry)) {
                    context.getProcessedDatas().add(fileData);
                } else {
                    context.getFailedDatas().add(fileData);
                }
            }
        } else {
            context.getFailedDatas().add(fileData);// 
        }

    }

    public void afterPropertiesSet() throws Exception {
        executor = new ThreadPoolExecutor(poolSize, poolSize, 0L, TimeUnit.MILLISECONDS,
                new ArrayBlockingQueue<Runnable>(poolSize * 4), new NamedThreadFactory(WORKER_NAME),
                new ThreadPoolExecutor.CallerRunsPolicy());
    }

    public void destroy() throws Exception {
        executor.shutdownNow();
    }

    // ====================== setter / getter ===========================

    public void setConfigClientService(ConfigClientService configClientService) {
        this.configClientService = configClientService;
    }

    public void setLoadStatsTracker(LoadStatsTracker loadStatsTracker) {
        this.loadStatsTracker = loadStatsTracker;
    }

    public void setRetry(int retry) {
        this.retry = retry;
    }

    public void setDump(boolean dump) {
        this.dump = dump;
    }

    public void setPoolSize(int poolSize) {
        this.poolSize = poolSize;
    }

}