com.asakusafw.bulkloader.collector.ExportFileSend.java Source code

Java tutorial

Introduction

Here is the source code for com.asakusafw.bulkloader.collector.ExportFileSend.java

Source

/**
 * Copyright 2011-2016 Asakusa Framework Team.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.asakusafw.bulkloader.collector;

import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.MessageFormat;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.output.CountingOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;

import com.asakusafw.bulkloader.bean.ExportTargetTableBean;
import com.asakusafw.bulkloader.bean.ExporterBean;
import com.asakusafw.bulkloader.common.ConfigurationLoader;
import com.asakusafw.bulkloader.common.Constants;
import com.asakusafw.bulkloader.common.FileCompType;
import com.asakusafw.bulkloader.common.FileNameUtil;
import com.asakusafw.bulkloader.exception.BulkLoaderSystemException;
import com.asakusafw.bulkloader.log.Log;
import com.asakusafw.bulkloader.transfer.FileList;
import com.asakusafw.runtime.io.ModelInput;
import com.asakusafw.runtime.io.ModelOutput;
import com.asakusafw.runtime.io.TsvIoFactory;
import com.asakusafw.runtime.stage.temporary.TemporaryStorage;

/**
 * ExportDB????
 * @author yuta.shirai
 */
public class ExportFileSend {

    static final Log LOG = new Log(ExportFileSend.class);

    /**
     * ?????
     */
    Map<String, Integer> fileNameMap = new HashMap<>();

    /**
     * ExportDB????
     * <p>
     * Export??TSV???????DB?????
     * ???Exporter??????
     * </p>
     * @param bean ???Bean
     * @param user OS???
     * @return Export??true:??false:
     */
    public boolean sendExportFile(ExporterBean bean, String user) {

        // ???
        String strCompType = ConfigurationLoader.getProperty(Constants.PROP_KEY_EXP_FILE_COMP_TYPE);
        FileCompType compType = FileCompType.find(strCompType);

        OutputStream output = getOutputStream();
        try {
            FileList.Writer writer;
            try {
                writer = FileList.createWriter(output, compType == FileCompType.DEFLATED);
            } catch (IOException e) {
                throw new BulkLoaderSystemException(e, getClass(), "TG-COLLECTOR-02001",
                        "Exporter??????????");
            }
            Configuration conf = new Configuration();
            List<String> l = bean.getExportTargetTableList();
            for (String tableName : l) {
                ExportTargetTableBean targetTable = bean.getExportTargetTable(tableName);
                Class<? extends Writable> targetTableModel = targetTable.getExportTargetType()
                        .asSubclass(Writable.class);

                List<Path> filePath = FileNameUtil.createPaths(conf, targetTable.getDfsFilePaths(),
                        bean.getExecutionId(), user);

                // Export???
                int fileCount = filePath.size();
                long recordCount = 0;
                for (int i = 0; i < fileCount; i++) {
                    // Export?
                    LOG.info("TG-COLLECTOR-02002", tableName, filePath.get(i), compType.getSymbol(),
                            targetTableModel.toString());
                    long countInFile = send(targetTableModel, filePath.get(i).toString(), writer, tableName);
                    if (countInFile >= 0) {
                        recordCount += countInFile;
                    }
                    LOG.info("TG-COLLECTOR-02003", tableName, filePath.get(i), compType.getSymbol(),
                            targetTableModel.toString());
                }

                LOG.info("TG-PROFILE-01004", bean.getTargetName(), bean.getBatchId(), bean.getJobflowId(),
                        bean.getExecutionId(), tableName, recordCount);
            }

            try {
                writer.close();
            } catch (IOException e) {
                // ????????????
                e.printStackTrace();
            }

            // 
            return true;
        } catch (BulkLoaderSystemException e) {
            LOG.log(e);
            return false;
        } finally {
            try {
                output.close();
            } catch (IOException e) {
                // ????????????
                e.printStackTrace();
            }
        }
    }

    /**
     * ????TSV??
     * {@link com.asakusafw.bulkloader.transfer.FileList.Writer}????
     * @param <T> ?
     * @param targetTableModel Export??Model?
     * @param filePath Export
     * @param writer ?Writer
     * @param tableName ??
     * @return ?????????????????? -1
     * @throws BulkLoaderSystemException ??????
     */
    protected <T extends Writable> long send(Class<T> targetTableModel, String filePath, FileList.Writer writer,
            String tableName) throws BulkLoaderSystemException {
        FileSystem fs = null;
        String fileName = null;

        // ??
        long maxSize = Long.parseLong(ConfigurationLoader.getProperty(Constants.PROP_KEY_EXP_LOAD_MAX_SIZE));

        try {
            TsvIoFactory<T> factory = new TsvIoFactory<>(targetTableModel);
            Configuration conf = new Configuration();
            fs = FileSystem.get(new URI(filePath), conf);

            // ?????
            FileStatus[] status = fs.globStatus(new Path(filePath));
            Path[] listedPaths = FileUtil.stat2Paths(status);
            if (listedPaths == null) {
                LOG.info("TG-COLLECTOR-02006", tableName, filePath);
                return -1;
            } else {
                LOG.info("TG-COLLECTOR-02007", listedPaths.length, tableName, filePath);
            }
            long count = 0;
            boolean addEntry = false;
            for (Path path : listedPaths) {
                // ?????
                if (isSystemFile(path)) {
                    continue;
                }

                // TODO ????
                // ??????
                ModelInput<T> input = TemporaryStorage.openInput(conf, targetTableModel, path);
                try {
                    while (true) {
                        // 
                        addEntry = true;
                        fileName = FileNameUtil.createSendExportFileName(tableName, fileNameMap);
                        OutputStream output = writer.openNext(FileList.content(fileName));
                        try {
                            CountingOutputStream counter = new CountingOutputStream(output);
                            ModelOutput<T> modelOut = factory.createModelOutput(counter);
                            T model = factory.createModelObject();
                            LOG.info("TG-COLLECTOR-02004", tableName, path.toString(), fileName);

                            // ???ModelTSV??
                            boolean nextFile = false;
                            while (input.readTo(model)) {
                                // Modol???
                                modelOut.write(model);
                                count++;
                                // ???????
                                // char?byte?????????
                                // ??????(????)
                                if (counter.getByteCount() > maxSize) {
                                    nextFile = true;
                                    break;
                                }
                            }
                            modelOut.close();
                            LOG.info("TG-COLLECTOR-02005", tableName, path.toString(), fileName);

                            if (nextFile) {
                                // ???????
                                continue;
                            } else {
                                // ????????
                                break;
                            }
                        } finally {
                            output.close();
                        }
                    }
                } finally {
                    input.close();
                }
            }
            if (addEntry) {
                return count;
            } else {
                assert count == 0;
                return -1;
            }
        } catch (IOException e) {
            throw new BulkLoaderSystemException(e, getClass(), "TG-COLLECTOR-02001", MessageFormat
                    .format("HDFS?{0} ???{1}", filePath, fileName));
        } catch (URISyntaxException e) {
            throw new BulkLoaderSystemException(e, getClass(), "TG-COLLECTOR-02001",
                    MessageFormat.format("HDFS???HDFS?{0}", filePath));
        } finally {
            if (fs != null) {
                try {
                    fs.close();
                } catch (IOException e) {
                    throw new BulkLoaderSystemException(e, this.getClass(), "TG-COLLECTOR-02001",
                            MessageFormat.format(
                                    "HDFS???URI{0}",
                                    filePath));
                }
            }
        }
    }

    /**
     * ?Hadoop??????{@code true}?
     * @param path ?
     * @return ???????{@code true}????????{@code false}
     */
    private boolean isSystemFile(Path path) {
        assert path != null;
        String name = path.getName();
        return name.equals(FileOutputCommitter.SUCCEEDED_FILE_NAME) || name.equals("_logs");
    }

    /**
     * OutputStream????
     * @return OutputStream
     */
    protected OutputStream getOutputStream() {
        return SystemOutManager.getOut();
    }
}