Java tutorial
/* * Copyright (C) 2010-2101 Alibaba Group Holding Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.alibaba.otter.node.etl.load.loader.db; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.slf4j.MDC; import org.springframework.beans.factory.DisposableBean; import org.springframework.beans.factory.InitializingBean; import com.alibaba.otter.node.common.config.ConfigClientService; import com.alibaba.otter.node.etl.OtterConstants; import com.alibaba.otter.node.etl.load.exception.LoadException; import com.alibaba.otter.node.etl.load.loader.LoadStatsTracker; import com.alibaba.otter.node.etl.load.loader.LoadStatsTracker.LoadCounter; import com.alibaba.otter.node.etl.load.loader.db.context.FileLoadContext; import com.alibaba.otter.node.etl.load.loader.weight.WeightBuckets; import com.alibaba.otter.node.etl.load.loader.weight.WeightController; import com.alibaba.otter.shared.common.model.config.ConfigHelper; import com.alibaba.otter.shared.common.model.config.channel.Channel; import com.alibaba.otter.shared.common.model.config.data.DataMediaPair; import com.alibaba.otter.shared.common.model.config.pipeline.Pipeline; import com.alibaba.otter.shared.common.utils.NioUtils; import com.alibaba.otter.shared.common.utils.thread.NamedThreadFactory; import com.alibaba.otter.shared.etl.model.FileBatch; import com.alibaba.otter.shared.etl.model.FileData; import com.alibaba.otter.shared.etl.model.Identity; /** * ?load * * @author jianghang 2011-10-31 ?11:33:22 * @author zebinxu 2012-4-28 ?3:39:17 ??? file load ?? * @version 4.0.0 */ public class FileLoadAction implements InitializingBean, DisposableBean { private static final Logger logger = LoggerFactory.getLogger(FileLoadAction.class); private int retry = 5; private ConfigClientService configClientService; private LoadStatsTracker loadStatsTracker; private boolean dump = true; // for concurrent file load private static final String WORKER_NAME = "FileLoadAction"; private static final String WORKER_NAME_FORMAT = "pipelineId = %s , pipelineName = %s , " + WORKER_NAME; private static final int DEFAULT_POOL_SIZE = 5; private int poolSize = DEFAULT_POOL_SIZE; private ExecutorService executor; /** * ?? */ public FileLoadContext load(FileBatch fileBatch, File rootDir, WeightController controller) { if (false == rootDir.exists()) { throw new LoadException(rootDir.getPath() + " is not exist"); } FileLoadContext context = buildContext(fileBatch.getIdentity()); context.setPrepareDatas(fileBatch.getFiles()); boolean isDryRun = context.getPipeline().getParameters().isDryRun(); try { // ??? WeightBuckets<FileData> buckets = buildWeightBuckets(fileBatch.getIdentity(), fileBatch.getFiles()); List<Long> weights = buckets.weights(); controller.start(weights); // ?? for (int i = 0; i < weights.size(); i++) { Long weight = weights.get(i); controller.await(weight.intValue()); if (logger.isInfoEnabled()) { logger.debug("##start load for weight:{}\n", weight); } // ??weight? List<FileData> items = buckets.getItems(weight); if (context.getPipeline().getParameters().isDryRun()) { dryRun(context, items, rootDir); } else { moveFiles(context, items, rootDir); } controller.single(weight.intValue()); if (logger.isInfoEnabled()) { logger.debug("##end load for weight:{}\n", weight); } } if (dump || isDryRun) { MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId())); logger.info(FileloadDumper.dumpContext("successed", context)); MDC.remove(OtterConstants.splitPipelineLoadLogFileKey); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); if (dump || isDryRun) { MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId())); logger.info(FileloadDumper.dumpContext("error", context)); MDC.remove(OtterConstants.splitPipelineLoadLogFileKey); } } catch (Exception e) { if (dump || isDryRun) { MDC.put(OtterConstants.splitPipelineLoadLogFileKey, String.valueOf(fileBatch.getIdentity().getPipelineId())); logger.info(FileloadDumper.dumpContext("error", context)); MDC.remove(OtterConstants.splitPipelineLoadLogFileKey); } throw new LoadException(e); } finally { // ??? NioUtils.delete(rootDir, 3); } return context; } private void adjustPoolSize(FileLoadContext context) { Pipeline pipeline = context.getPipeline(); int newPoolSize = pipeline.getParameters().getFileLoadPoolSize(); if (newPoolSize != poolSize) { poolSize = newPoolSize; if (executor instanceof ThreadPoolExecutor) { ThreadPoolExecutor pool = (ThreadPoolExecutor) executor; pool.setCorePoolSize(newPoolSize); pool.setMaximumPoolSize(newPoolSize); } } } private FileLoadContext buildContext(Identity identity) { FileLoadContext context = new FileLoadContext(); context.setIdentity(identity); Channel channel = configClientService.findChannel(identity.getChannelId()); Pipeline pipeline = configClientService.findPipeline(identity.getPipelineId()); context.setChannel(channel); context.setPipeline(pipeline); return context; } /** * weight??item? */ private WeightBuckets<FileData> buildWeightBuckets(Identity identity, List<FileData> datas) { WeightBuckets<FileData> buckets = new WeightBuckets<FileData>(); for (FileData data : datas) { // ?weight DataMediaPair pair = ConfigHelper.findDataMediaPair(getPipeline(identity), data.getPairId()); buckets.addItem(pair.getPushWeight(), data); } return buckets; } private Pipeline getPipeline(Identity identity) { return configClientService.findPipeline(identity.getPipelineId()); } private void dryRun(FileLoadContext context, List<FileData> fileDatas, File rootDir) { for (FileData fileData : fileDatas) { boolean isLocal = StringUtils.isBlank(fileData.getNameSpace()); String entryName = null; if (true == isLocal) { entryName = FilenameUtils.getPath(fileData.getPath()) + FilenameUtils.getName(fileData.getPath()); } else { entryName = fileData.getNameSpace() + File.separator + fileData.getPath(); } File sourceFile = new File(rootDir, entryName); if (true == sourceFile.exists() && false == sourceFile.isDirectory()) { if (false == isLocal) { throw new LoadException(fileData + " is not support!"); } else { // meta? fileData.setSize(sourceFile.length()); fileData.setLastModifiedTime(sourceFile.lastModified()); context.getProcessedDatas().add(fileData); } LoadCounter counter = loadStatsTracker.getStat(context.getIdentity()).getStat(fileData.getPairId()); counter.getFileCount().incrementAndGet(); counter.getFileSize().addAndGet(fileData.getSize()); } else if (fileData.getEventType().isDelete()) { // if (false == isLocal) { throw new LoadException(fileData + " is not support!"); } else { context.getProcessedDatas().add(fileData); } } else { context.getFailedDatas().add(fileData);// } } } /** * ? fast-fail */ private void moveFiles(FileLoadContext context, List<FileData> fileDatas, File rootDir) { Exception exception = null; adjustPoolSize(context); ExecutorCompletionService<Exception> executorComplition = new ExecutorCompletionService<Exception>( executor); List<Future<Exception>> results = new ArrayList<Future<Exception>>(); for (FileData fileData : fileDatas) { Future<Exception> future = executorComplition.submit(new FileLoadWorker(context, rootDir, fileData)); results.add(future); // fast fail if (future.isDone()) { // ( CallerRunsPolicy) try { exception = future.get(); } catch (Exception e) { exception = e; } if (exception != null) { for (Future<Exception> result : results) { if (!result.isDone() && !result.isCancelled()) { result.cancel(true); } } throw exception instanceof LoadException ? (LoadException) exception : new LoadException(exception); } } } int resultSize = results.size(); int cursor = 0; while (cursor < resultSize) { try { Future<Exception> result = executorComplition.take(); exception = result.get(); } catch (Exception e) { exception = e; break; } cursor++; } if (cursor != resultSize) { // ?? for (Future<Exception> future : results) { if (!future.isDone() && !future.isCancelled()) { future.cancel(true); } } } if (exception != null) { throw exception instanceof LoadException ? (LoadException) exception : new LoadException(exception); } } private class FileLoadWorker implements Callable<Exception> { private FileLoadContext context; private File rootDir; private FileData fileData; public FileLoadWorker(FileLoadContext context, File rootDir, FileData fileData) { this.context = context; this.rootDir = rootDir; this.fileData = fileData; } public Exception call() throws Exception { Thread.currentThread().setName(String.format(WORKER_NAME_FORMAT, context.getPipeline().getId(), context.getPipeline().getName())); try { MDC.put(OtterConstants.splitPipelineLogFileKey, String.valueOf(context.getPipeline().getId())); if (fileData == null) { return null; } // ?? int count = 0; Throwable exception = null; while (count++ < retry) { try { doMove(context, rootDir, fileData); return null; } catch (Exception e) { exception = e; if (count < retry) { Thread.sleep(50); } } } throw new LoadException( String.format("FileLoadWorker is error! createFile failed[%s]", fileData.getPath()), exception); } finally { MDC.remove(OtterConstants.splitPipelineLogFileKey); } } } private void doMove(FileLoadContext context, File rootDir, FileData fileData) throws IOException { boolean isLocal = StringUtils.isBlank(fileData.getNameSpace()); String entryName = null; if (true == isLocal) { entryName = FilenameUtils.getPath(fileData.getPath()) + FilenameUtils.getName(fileData.getPath()); } else { entryName = fileData.getNameSpace() + File.separator + fileData.getPath(); } File sourceFile = new File(rootDir, entryName); if (true == sourceFile.exists() && false == sourceFile.isDirectory()) { if (false == isLocal) { throw new LoadException(fileData + " is not support!"); } else { File targetFile = new File(fileData.getPath()); // copy to product path NioUtils.copy(sourceFile, targetFile, retry); if (true == targetFile.exists()) { // meta? fileData.setSize(sourceFile.length()); fileData.setLastModifiedTime(sourceFile.lastModified()); context.getProcessedDatas().add(fileData); } else { throw new LoadException(String.format("copy/rename [%s] to [%s] failed by unknow reason", sourceFile.getPath(), targetFile.getPath())); } } LoadCounter counter = loadStatsTracker.getStat(context.getIdentity()).getStat(fileData.getPairId()); counter.getFileCount().incrementAndGet(); counter.getFileSize().addAndGet(fileData.getSize()); } else if (fileData.getEventType().isDelete()) { // if (false == isLocal) { throw new LoadException(fileData + " is not support!"); } else { File targetFile = new File(fileData.getPath()); if (NioUtils.delete(targetFile, retry)) { context.getProcessedDatas().add(fileData); } else { context.getFailedDatas().add(fileData); } } } else { context.getFailedDatas().add(fileData);// } } public void afterPropertiesSet() throws Exception { executor = new ThreadPoolExecutor(poolSize, poolSize, 0L, TimeUnit.MILLISECONDS, new ArrayBlockingQueue<Runnable>(poolSize * 4), new NamedThreadFactory(WORKER_NAME), new ThreadPoolExecutor.CallerRunsPolicy()); } public void destroy() throws Exception { executor.shutdownNow(); } // ====================== setter / getter =========================== public void setConfigClientService(ConfigClientService configClientService) { this.configClientService = configClientService; } public void setLoadStatsTracker(LoadStatsTracker loadStatsTracker) { this.loadStatsTracker = loadStatsTracker; } public void setRetry(int retry) { this.retry = retry; } public void setDump(boolean dump) { this.dump = dump; } public void setPoolSize(int poolSize) { this.poolSize = poolSize; } }