Java tutorial
/** * Copyright 2011-2016 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.bulkloader.extractor; import java.io.BufferedInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.lang.ProcessBuilder.Redirect; import java.net.URI; import java.text.MessageFormat; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.SequenceFile.CompressionType; import com.asakusafw.bulkloader.bean.ImportBean; import com.asakusafw.bulkloader.bean.ImportTargetTableBean; import com.asakusafw.bulkloader.common.ConfigurationLoader; import com.asakusafw.bulkloader.common.Constants; import com.asakusafw.bulkloader.common.FileNameUtil; import com.asakusafw.bulkloader.exception.BulkLoaderSystemException; import com.asakusafw.bulkloader.log.Log; import com.asakusafw.bulkloader.transfer.FileList; import com.asakusafw.bulkloader.transfer.FileProtocol; import com.asakusafw.runtime.io.ModelInput; import com.asakusafw.runtime.io.ModelOutput; import com.asakusafw.runtime.io.TsvIoFactory; import com.asakusafw.runtime.stage.temporary.TemporaryStorage; import com.asakusafw.thundergate.runtime.cache.CacheInfo; import com.asakusafw.thundergate.runtime.cache.CacheStorage; import com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient; /** * ????????? * @author yuta.shirai */ public class DfsFileImport { static final Log LOG = new Log(DfsFileImport.class); private static final int INPUT_BUFFER_BYTES = 128 * 1024; private final ExecutorService executor; private final String cacheBuildCommand; /** * Creates a new instance. */ public DfsFileImport() { File cmd = ConfigurationLoader.getLocalScriptPath(Constants.PATH_LOCAL_CACHE_BUILD); this.cacheBuildCommand = cmd.getAbsolutePath(); int parallel = Integer.parseInt(ConfigurationLoader.getProperty(Constants.PROP_KEY_CACHE_BUILDER_PARALLEL)); LOG.debugMessage("Building a cache builder with {0} threads", parallel); this.executor = Executors.newFixedThreadPool(parallel); } /** * ??DFS???? * {@link FileList}???????TSV??Model??????????? * ??????? * ?????? * <ul> * <li> {@link com.asakusafw.bulkloader.transfer.FileProtocol.Kind#CONTENT} </li> * <li> {@link com.asakusafw.bulkloader.transfer.FileProtocol.Kind#CREATE_CACHE} </li> * <li> {@link com.asakusafw.bulkloader.transfer.FileProtocol.Kind#UPDATE_CACHE} </li> * </ul> * @param bean ???Bean * @param user OS??? * @return ?true?false */ public boolean importFile(ImportBean bean, String user) { // ? FileList.Reader reader; try { reader = FileList.createReader(getInputStream()); } catch (IOException e) { LOG.error(e, "TG-EXTRACTOR-02001", "?FileList???"); return false; } try { // FileList???? List<Future<?>> running = new ArrayList<>(); while (reader.next()) { FileProtocol protocol = reader.getCurrentProtocol(); try (InputStream content = reader.openContent()) { switch (protocol.getKind()) { case CONTENT: importContent(protocol, content, bean, user); break; case CREATE_CACHE: case UPDATE_CACHE: long recordCount = putCachePatch(protocol, content, bean, user); Callable<?> builder = createCacheBuilder(protocol, bean, user, recordCount); if (builder != null) { LOG.debugMessage("Submitting cache builder: {0} {1}", protocol.getKind(), protocol.getInfo().getTableName()); running.add(executor.submit(builder)); } break; default: throw new AssertionError(protocol.getKind()); } } } waitForCompleteTasks(bean, running); // return true; } catch (BulkLoaderSystemException e) { LOG.log(e); } catch (IOException e) { // FileList?? LOG.error(e, "TG-EXTRACTOR-02001", "?FileList???"); } finally { try { reader.close(); } catch (IOException e) { // ???????????? e.printStackTrace(); } } return false; } private void importContent(FileProtocol protocol, InputStream content, ImportBean bean, String user) throws BulkLoaderSystemException { assert protocol != null; assert content != null; assert bean != null; assert user != null; String tableName = FileNameUtil.getImportTableName(protocol.getLocation()); ImportTargetTableBean targetTableBean = bean.getTargetTable(tableName); if (targetTableBean == null) { // ???DSL????? throw new BulkLoaderSystemException(getClass(), "TG-EXTRACTOR-02001", MessageFormat.format( "????DSL?????{0}", tableName)); } URI dfsFilePath = resolveLocation(bean, user, targetTableBean.getDfsFilePath()); Class<?> targetTableModel = targetTableBean.getImportTargetType(); LOG.info("TG-EXTRACTOR-02002", tableName, dfsFilePath.toString(), targetTableModel.toString()); // ???? long recordCount = write(targetTableModel, dfsFilePath, content); LOG.info("TG-EXTRACTOR-02003", tableName, dfsFilePath.toString(), targetTableModel.toString()); LOG.info("TG-PROFILE-01002", bean.getTargetName(), bean.getBatchId(), bean.getJobflowId(), bean.getExecutionId(), tableName, recordCount); } private long putCachePatch(FileProtocol protocol, InputStream content, ImportBean bean, String user) throws BulkLoaderSystemException { assert protocol != null; assert content != null; assert bean != null; assert user != null; assert protocol.getKind() == FileProtocol.Kind.CREATE_CACHE || protocol.getKind() == FileProtocol.Kind.UPDATE_CACHE; CacheInfo info = protocol.getInfo(); assert info != null; ImportTargetTableBean targetTableBean = bean.getTargetTable(info.getTableName()); if (targetTableBean == null) { // ???DSL????? throw new BulkLoaderSystemException(getClass(), "TG-EXTRACTOR-02001", MessageFormat.format( "????DSL?????{0}", info.getTableName())); } URI dfsFilePath = resolveLocation(bean, user, protocol.getLocation()); try (CacheStorage storage = new CacheStorage(new Configuration(), dfsFilePath)) { LOG.info("TG-EXTRACTOR-11001", info.getId(), info.getTableName(), storage.getPatchProperties()); storage.putPatchCacheInfo(info); LOG.info("TG-EXTRACTOR-11002", info.getId(), info.getTableName(), storage.getPatchProperties()); Class<?> targetTableModel = targetTableBean.getImportTargetType(); Path targetUri = storage.getPatchContents("0"); LOG.info("TG-EXTRACTOR-11003", info.getId(), info.getTableName(), targetUri); long recordCount = write(targetTableModel, targetUri.toUri(), content); LOG.info("TG-EXTRACTOR-11004", info.getId(), info.getTableName(), targetUri, recordCount); LOG.info("TG-PROFILE-01002", bean.getTargetName(), bean.getBatchId(), bean.getJobflowId(), bean.getExecutionId(), info.getTableName(), recordCount); return recordCount; } catch (IOException e) { throw new BulkLoaderSystemException(e, getClass(), "TG-EXTRACTOR-11005", info.getId(), info.getTableName(), dfsFilePath); } } private Callable<?> createCacheBuilder(FileProtocol protocol, ImportBean bean, String user, long recordCount) throws BulkLoaderSystemException { assert protocol != null; assert bean != null; assert user != null; CacheInfo info = protocol.getInfo(); URI location = resolveLocation(bean, user, protocol.getLocation()); assert info != null; try { switch (protocol.getKind()) { case CREATE_CACHE: return createCacheBuilder(CacheBuildClient.SUBCOMMAND_CREATE, bean, location, info); case UPDATE_CACHE: if (recordCount > 0) { return createCacheBuilder(CacheBuildClient.SUBCOMMAND_UPDATE, bean, location, info); } else { return null; } default: throw new AssertionError(protocol); } } catch (IOException e) { throw new BulkLoaderSystemException(e, getClass(), "TG-EXTRACTOR-12002", protocol.getKind(), info.getId(), info.getTableName(), bean.getTargetName(), bean.getBatchId(), bean.getJobflowId(), bean.getExecutionId()); } } /** * Creates a cache builder for the specified cache (of candidate). * @param subcommand subcommand name * @param bean current importer script * @param location cache location * @param info cache information * @return the future object of the execution, or {@code null} if nothing to do * @throws IOException if failed to start execution */ protected Callable<?> createCacheBuilder(final String subcommand, ImportBean bean, final URI location, final CacheInfo info) throws IOException { assert subcommand != null; assert bean != null; assert location != null; assert info != null; List<String> command = new ArrayList<>(); command.add(cacheBuildCommand); command.add(subcommand); command.add(bean.getBatchId()); command.add(bean.getJobflowId()); command.add(bean.getExecutionId()); command.add(location.toString()); command.add(info.getModelClassName()); command.add(info.getTableName()); LOG.info("TG-EXTRACTOR-12001", subcommand, info.getId(), info.getTableName(), bean.getTargetName(), bean.getBatchId(), bean.getJobflowId(), bean.getExecutionId(), command); final ProcessBuilder builder = new ProcessBuilder(command); builder.directory(new File(System.getProperty("user.home", "."))); return new Callable<Void>() { @Override public Void call() throws Exception { LOG.info("TG-EXTRACTOR-12003", subcommand, info.getId(), info.getTableName()); Process process = builder.redirectOutput(Redirect.INHERIT).redirectError(Redirect.INHERIT).start(); try { int exitCode = process.waitFor(); if (exitCode != 0) { throw new IOException( MessageFormat.format("Cache builder returns unexpected exit code: {0}", exitCode)); } LOG.info("TG-EXTRACTOR-12004", subcommand, info.getId(), info.getTableName()); } catch (Exception e) { throw new BulkLoaderSystemException(e, DfsFileImport.class, "TG-EXTRACTOR-12005", subcommand, info.getId(), info.getTableName()); } finally { process.destroy(); } return null; } }; } /** * Resolves target location. * @param bean importer bean * @param user current user name * @param location target location * @return the resolved location * @throws BulkLoaderSystemException if failed to resolve */ protected URI resolveLocation(ImportBean bean, String user, String location) throws BulkLoaderSystemException { Configuration conf = new Configuration(); URI dfsFilePath = FileNameUtil.createPath(conf, location, bean.getExecutionId(), user).toUri(); return dfsFilePath; } private void waitForCompleteTasks(ImportBean bean, List<Future<?>> running) throws BulkLoaderSystemException { assert bean != null; assert running != null; if (running.isEmpty()) { return; } LOG.info("TG-EXTRACTOR-12006", bean.getTargetName(), bean.getBatchId(), bean.getJobflowId(), bean.getExecutionId()); boolean sawError = false; LinkedList<Future<?>> rest = new LinkedList<>(running); while (rest.isEmpty() == false) { Future<?> future = rest.removeFirst(); try { future.get(1, TimeUnit.SECONDS); } catch (TimeoutException e) { // continue... rest.addLast(future); } catch (InterruptedException e) { cancel(rest); throw new BulkLoaderSystemException(e, getClass(), "TG-EXTRACTOR-12007", bean.getTargetName(), bean.getBatchId(), bean.getJobflowId(), bean.getExecutionId()); } catch (ExecutionException e) { cancel(rest); Throwable cause = e.getCause(); if (cause instanceof RuntimeException) { throw (RuntimeException) cause; } else if (cause instanceof Error) { throw (Error) cause; } else if (cause instanceof BulkLoaderSystemException) { LOG.log((BulkLoaderSystemException) cause); sawError = true; } else { LOG.error(e, "TG-EXTRACTOR-12008", bean.getTargetName(), bean.getBatchId(), bean.getJobflowId(), bean.getExecutionId()); sawError = true; } } } if (sawError) { throw new BulkLoaderSystemException(getClass(), "TG-EXTRACTOR-12008", bean.getTargetName(), bean.getBatchId(), bean.getJobflowId(), bean.getExecutionId()); } else { LOG.info("TG-EXTRACTOR-12009", bean.getTargetName(), bean.getBatchId(), bean.getJobflowId(), bean.getExecutionId()); } } private void cancel(List<Future<?>> futures) { assert futures != null; for (Future<?> future : futures) { future.cancel(true); } } /** * ?TSV?????????? * @param <T> Import??Model? * @param targetTableModel Import??Model? * @param dfsFilePath HFSF??? * @param inputStream FileList * @return ????? * @throws BulkLoaderSystemException ?????? */ protected <T> long write(Class<T> targetTableModel, URI dfsFilePath, InputStream inputStream) throws BulkLoaderSystemException { Configuration conf = new Configuration(); TsvIoFactory<T> factory = new TsvIoFactory<>(targetTableModel); try (ModelInput<T> input = factory.createModelInput(inputStream)) { long count = 0; T buffer = factory.createModelObject(); try (ModelOutput<T> output = TemporaryStorage.openOutput(conf, targetTableModel, new Path(dfsFilePath))) { while (input.readTo(buffer)) { count++; output.write(buffer); } } return count; } catch (IOException e) { throw new BulkLoaderSystemException(e, getClass(), "TG-EXTRACTOR-02001", "DFS??????URI" + dfsFilePath); } } /** * ??? * @param strCompType CompressionType? * @return CompressionType */ protected CompressionType getCompType(String strCompType) { CompressionType compType = null; try { compType = CompressionType.valueOf(strCompType); } catch (Exception e) { compType = CompressionType.NONE; LOG.warn("TG-EXTRACTOR-02004", strCompType); } return compType; } /** * InputStream???? * @return InputStream * @throws IOException if failed to open stream */ protected InputStream getInputStream() throws IOException { return new BufferedInputStream(System.in, INPUT_BUFFER_BYTES); } }