org.huahinframework.unit.JobDriver.java Source code

Java tutorial

Introduction

Here is the source code for org.huahinframework.unit.JobDriver.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.huahinframework.unit;

import static org.junit.Assert.*;

import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Pattern;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
import org.apache.hadoop.mrunit.types.Pair;
import org.huahinframework.core.DataFormatException;
import org.huahinframework.core.SimpleJob;
import org.huahinframework.core.SimpleJobTool;
import org.huahinframework.core.io.Key;
import org.huahinframework.core.io.Record;
import org.huahinframework.core.io.Value;
import org.huahinframework.core.lib.input.creator.JoinRegexValueCreator;
import org.huahinframework.core.lib.input.creator.JoinSomeRegexValueCreator;
import org.huahinframework.core.lib.input.creator.JoinSomeValueCreator;
import org.huahinframework.core.lib.input.creator.JoinValueCreator;
import org.huahinframework.core.lib.input.creator.LabelValueCreator;
import org.huahinframework.core.lib.input.creator.SimpleValueCreator;
import org.huahinframework.core.lib.input.creator.ValueCreator;
import org.huahinframework.core.util.StringUtil;
import org.junit.Before;

/**
 * This is a test driver of a Job.
 *
 * <p>Example:</p>
 * <p><blockquote><pre>
 * public class JobTest extends JobDriver {
 *   private static final String LABELS = new String[] { "label", "value" };
 *   public void test()
 *       throws IOException, InstantiationException,
 *              IllegalAccessException, ClassNotFoundException {
 *     addJob(LABELS, StringUtil.TAB, false).setFilter(TestFilter.class)
 *                                          .setSummaizer(TestSummarizer.class);
 *
 *     List<String> input = new ArrayList<String>();
 *     input.add("label\t1");
 *     input.add("label\t2");
 *     input.add("label\t3");
 *
 *     List<Record> output = new ArrayList<Record>();
 *     Record record = new Record();
 *     record.addGrouping("label", "label");
 *     record.addValue("value", 6);
 *     output.add(record);
 *
 *     run(input, output, true);
 *   }
 * }
 * </pre></blockquote></p>
 */
public abstract class JobDriver extends SimpleJobTool {
    private static final String OUTPUT = "(%s, %s)";

    private String masterSeparator;
    private List<String> masterData;
    private boolean bigJoin;
    private String fileName;
    private long fileLength;

    /**
     * @throws java.lang.Exception
     */
    @Before
    public void setUp() throws Exception {
        conf = new Configuration();
        jobName = StringUtil.createInternalJobID();
    }

    /**
     * Run the test with this method.
     * @param input input {@link String} {@link List} or {@link Record} {@link List}
     * @param output result of {@link Record} {@link List}
     * @param stdout whether to output the execution results to stdout.
     * @throws InstantiationException
     * @throws IllegalAccessException
     * @throws ClassNotFoundException
     * @throws IOException
     * @throws URISyntaxException
     */
    @SuppressWarnings({ "unchecked", "rawtypes" })
    public void run(List<?> input, List<Record> output, boolean stdout) throws InstantiationException,
            IllegalAccessException, ClassNotFoundException, IOException, URISyntaxException {
        if (input == null || input.isEmpty()) {
            fail("input is empty");
        }

        List<Pair<WritableComparable, Writable>> actual = null;
        Object o = input.get(0);
        if (o instanceof String) {
            actual = runString((List<String>) input);
        } else if (o instanceof Record) {
            actual = runRecord((List<Record>) input);
        } else {
            fail("input object is miss match");
        }

        if (stdout) {
            for (Pair<WritableComparable, Writable> pair : actual) {
                System.out.println(pair);
            }
        }

        assertEquals(output.size(), actual.size());

        if (output != null) {
            for (int i = 0; i < output.size(); i++) {
                Pair<WritableComparable, Writable> pair = actual.get(i);
                Record r = output.get(i);
                String s = String.format(OUTPUT, r.getKey().toString(), r.getValue().toString());
                assertEquals(s, pair.toString());
            }
        }
    }

    /**
     * @param input
     * @return List<Pair<Key, Value>>
     * @throws InstantiationException
     * @throws IllegalAccessException
     * @throws ClassNotFoundException
     * @throws IOException
     * @throws URISyntaxException
     */
    @SuppressWarnings("rawtypes")
    private List<Pair<WritableComparable, Writable>> runString(List<? extends String> input)
            throws InstantiationException, IllegalAccessException, ClassNotFoundException, IOException,
            URISyntaxException {
        boolean first = true;
        List<Pair<WritableComparable, Writable>> actual = null;
        String[] beforeSummarizerOutputLabel = null;

        for (Job job : sequencalJobChain.getJobs()) {
            MapReduceDriver<WritableComparable, Writable, WritableComparable, Writable, WritableComparable, Writable> driver = createDriver(
                    job);
            Configuration localConf = job.getConfiguration();

            boolean onlyJoin = false;
            if (first) {
                boolean formatIgnored = sequencalJobChain.getJobs().get(0).getConfiguration()
                        .getBoolean(SimpleJob.FORMAT_IGNORED, false);
                String[] labels = sequencalJobChain.getJobs().get(0).getConfiguration()
                        .getStrings(SimpleJob.LABELS);
                String separator = sequencalJobChain.getJobs().get(0).getConfiguration().get(SimpleJob.SEPARATOR,
                        StringUtil.COMMA);

                if (masterSeparator == null) {
                    masterSeparator = separator;
                    conf.set(SimpleJob.MASTER_SEPARATOR, masterSeparator);
                }

                if (labels != null) {
                    if (conf.getInt(SimpleJob.READER_TYPE, -1) == -1) {
                        conf.setInt(SimpleJob.READER_TYPE, SimpleJob.LABELS_READER);
                    }
                } else {
                    if (conf.getInt(SimpleJob.READER_TYPE, -1) == -1) {
                        conf.setInt(SimpleJob.READER_TYPE, SimpleJob.SIMPLE_READER);
                    }
                }

                Key key = new Key();
                key.addPrimitiveValue("KEY", 1L);
                if (fileName == null) {
                    fileName = "exmpale.txt";
                }
                key.addPrimitiveValue("FILE_NAME", fileName);
                key.addPrimitiveValue("FILE_LENGTH", fileLength);

                ValueCreator valueCreator = null;
                int type = conf.getInt(SimpleJob.READER_TYPE, -1);
                switch (type) {
                case SimpleJob.SIMPLE_READER:
                    valueCreator = new SimpleValueCreator(separator,
                            conf.getBoolean(SimpleJob.SEPARATOR_REGEX, false));
                    break;
                case SimpleJob.LABELS_READER:
                    valueCreator = new LabelValueCreator(labels, formatIgnored, separator,
                            conf.getBoolean(SimpleJob.SEPARATOR_REGEX, false));
                    break;
                case SimpleJob.SINGLE_COLUMN_JOIN_READER:
                    Map<String, String[]> simpleJoinMap = getSimpleMaster(conf);
                    if (!conf.getBoolean(SimpleJob.JOIN_REGEX, false)) {
                        valueCreator = new JoinValueCreator(labels, formatIgnored, separator, formatIgnored,
                                simpleJoinMap, conf);
                    } else {
                        Map<Pattern, String[]> simpleJoinRegexMap = new HashMap<Pattern, String[]>();
                        for (Entry<String, String[]> entry : simpleJoinMap.entrySet()) {
                            Pattern p = Pattern.compile(entry.getKey());
                            simpleJoinRegexMap.put(p, entry.getValue());
                        }
                        valueCreator = new JoinRegexValueCreator(labels, formatIgnored, separator, formatIgnored,
                                simpleJoinRegexMap, conf);
                    }
                    break;
                case SimpleJob.SOME_COLUMN_JOIN_READER:
                    Map<List<String>, String[]> simpleColumnsJoinMap = getSimpleColumnsMaster(conf);
                    if (!conf.getBoolean(SimpleJob.JOIN_REGEX, false)) {
                        valueCreator = new JoinSomeValueCreator(labels, formatIgnored, separator, formatIgnored,
                                simpleColumnsJoinMap, conf);
                    } else {
                        Map<List<Pattern>, String[]> simpleColumnsJoinRegexMap = new HashMap<List<Pattern>, String[]>();
                        for (Entry<List<String>, String[]> entry : simpleColumnsJoinMap.entrySet()) {
                            List<String> l = entry.getKey();
                            List<Pattern> p = new ArrayList<Pattern>();
                            for (String s : l) {
                                p.add(Pattern.compile(s));
                            }
                            simpleColumnsJoinRegexMap.put(p, entry.getValue());
                        }
                        valueCreator = new JoinSomeRegexValueCreator(labels, formatIgnored, separator,
                                formatIgnored, simpleColumnsJoinRegexMap, conf);
                    }
                    break;
                default:
                    break;
                }

                if (bigJoin && sequencalJobChain.getJobs().size() == 1) {
                    SimpleJob sj = (SimpleJob) job;
                    if (!sj.isMapper() && !sj.isReducer()) {
                        actual = new ArrayList<Pair<WritableComparable, Writable>>();
                        String[] masterLabels = conf.getStrings(SimpleJob.MASTER_LABELS);
                        for (String s : input) {
                            Value value = new Value();
                            valueCreator.create(s, value);
                            Key k = new Key();
                            for (String label : labels) {
                                k.addPrimitiveValue(label, value.getPrimitiveValue(label));
                            }

                            String[] masterData = null;
                            if (type == SimpleJob.SINGLE_COLUMN_JOIN_READER) {
                                Map<String, String[]> simpleJoinMap = getSimpleMaster(conf);
                                masterData = simpleJoinMap
                                        .get(value.getPrimitiveValue(conf.get(SimpleJob.JOIN_MASTER_COLUMN)));
                            } else {
                                Map<List<String>, String[]> simpleColumnsJoinMap = getSimpleColumnsMaster(conf);
                                List<String> l = new ArrayList<String>();
                                for (String data : conf.getStrings(SimpleJob.JOIN_MASTER_COLUMN)) {
                                    l.add((String) value.getPrimitiveValue(data));
                                }
                                masterData = simpleColumnsJoinMap.get(l);
                            }

                            if (masterData == null) {
                                masterData = new String[masterLabels.length];
                            }
                            for (int i = 0; i < masterData.length; i++) {
                                k.addPrimitiveValue(masterLabels[i], masterData[i]);
                            }

                            Pair<WritableComparable, Writable> p = new Pair<WritableComparable, Writable>(k,
                                    new Text());
                            actual.add(p);
                        }
                        onlyJoin = true;
                    }
                }

                for (String s : input) {
                    Value value = new Value();
                    valueCreator.create(s, value);
                    driver.addInput(key, value);
                }

                if (job instanceof SimpleJob) {
                    SimpleJob sj = (SimpleJob) job;
                    String[] summarizerOutputLabels = sj.getSummarizerOutputLabels();
                    if (summarizerOutputLabels != null) {
                        beforeSummarizerOutputLabel = summarizerOutputLabels;
                    }
                }

                first = false;
            } else {
                for (Pair<WritableComparable, Writable> pair : actual) {
                    driver.addInput(pair.getFirst(), pair.getSecond());
                }

                if (job instanceof SimpleJob) {
                    SimpleJob sj = (SimpleJob) job;
                    if (!sj.isNatural()) {
                        if (beforeSummarizerOutputLabel != null) {
                            job.getConfiguration().setStrings(SimpleJob.BEFORE_SUMMARIZER_OUTPUT_LABELS,
                                    beforeSummarizerOutputLabel);
                        }
                        String[] summarizerOutputLabels = sj.getSummarizerOutputLabels();
                        if (summarizerOutputLabels != null) {
                            beforeSummarizerOutputLabel = summarizerOutputLabels;
                        }
                    }
                }
            }

            if (!onlyJoin) {
                driver.setConfiguration(localConf);
                actual = driver.run();
            }
        }

        return actual;
    }

    /**
     * @param input
     * @return List<Pair<Key, Value>>
     * @throws InstantiationException
     * @throws IllegalAccessException
     * @throws ClassNotFoundException
     * @throws IOException
     */
    @SuppressWarnings("rawtypes")
    private List<Pair<WritableComparable, Writable>> runRecord(List<? extends Record> input)
            throws InstantiationException, IllegalAccessException, ClassNotFoundException, IOException {
        boolean first = true;
        List<Pair<WritableComparable, Writable>> actual = null;
        String[] beforeSummarizerOutputLabel = null;

        for (Job job : sequencalJobChain.getJobs()) {
            MapReduceDriver<WritableComparable, Writable, WritableComparable, Writable, WritableComparable, Writable> driver = createDriver(
                    job);
            Configuration localConf = job.getConfiguration();

            if (first) {
                for (Record r : input) {
                    driver.addInput(r.getKey(), r.getValue());
                }

                if (job instanceof SimpleJob) {
                    SimpleJob sj = (SimpleJob) job;
                    String[] summarizerOutputLabels = sj.getSummarizerOutputLabels();
                    if (summarizerOutputLabels != null) {
                        beforeSummarizerOutputLabel = summarizerOutputLabels;
                    }
                }

                first = false;
            } else {
                for (Pair<WritableComparable, Writable> pair : actual) {
                    driver.addInput(pair.getFirst(), pair.getSecond());
                }

                if (job instanceof SimpleJob) {
                    SimpleJob sj = (SimpleJob) job;
                    if (!sj.isNatural()) {
                        if (beforeSummarizerOutputLabel != null) {
                            job.getConfiguration().setStrings(SimpleJob.BEFORE_SUMMARIZER_OUTPUT_LABELS,
                                    beforeSummarizerOutputLabel);
                        }
                        String[] summarizerOutputLabels = sj.getSummarizerOutputLabels();
                        if (summarizerOutputLabels != null) {
                            beforeSummarizerOutputLabel = summarizerOutputLabels;
                        }
                    }
                }
            }

            driver.setConfiguration(localConf);
            actual = driver.run();
        }

        return actual;
    }

    /**
     * @param job
     * @return List<Pair<Key, Value>>
     * @throws InstantiationException
     * @throws IllegalAccessException
     * @throws ClassNotFoundException
     */
    @SuppressWarnings({ "rawtypes", "unchecked" })
    private MapReduceDriver<WritableComparable, Writable, WritableComparable, Writable, WritableComparable, Writable> createDriver(
            Job job) throws InstantiationException, IllegalAccessException, ClassNotFoundException {
        Mapper mapper = job.getMapperClass().newInstance();
        Reducer reducer = job.getReducerClass().newInstance();
        RawComparator groupingComparator = job.getGroupingComparator();
        RawComparator sortComparator = job.getSortComparator();
        MapReduceDriver<WritableComparable, Writable, WritableComparable, Writable, WritableComparable, Writable> driver = MapReduceDriver
                .newMapReduceDriver(mapper, reducer).withKeyGroupingComparator(groupingComparator)
                .withKeyOrderComparator(sortComparator);
        driver.setConfiguration(job.getConfiguration());
        return driver;
    }

    /**
     * Easily supports the Join. To use the setSimpleJoin,
     * you must be a size master data appear in the memory of the task.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param masterData master data
     */
    protected void setSimpleJoin(String[] masterLabels, String masterColumn, String dataColumn,
            List<String> masterData) {
        setSimpleJoin(masterLabels, masterColumn, dataColumn, null, false, masterData);
    }

    /**
     * Easily supports the Join. To use the setSimpleJoin,
     * you must be a size master data appear in the memory of the task.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param regex master join is regex;
     * @param masterData master data
     */
    protected void setSimpleJoin(String[] masterLabels, String masterColumn, String dataColumn, boolean regex,
            List<String> masterData) {
        setSimpleJoin(masterLabels, masterColumn, dataColumn, null, regex, masterData);
    }

    /**
     * Easily supports the Join. To use the setSimpleJoin,
     * you must be a size master data appear in the memory of the task.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param masterSeparator separator
     * @param regex master join is regex
     * @param masterData master data
     */
    protected void setSimpleJoin(String[] masterLabels, String masterColumn, String dataColumn,
            String masterSeparator, boolean regex, List<String> masterData) {
        this.conf.setInt(SimpleJob.READER_TYPE, SimpleJob.SINGLE_COLUMN_JOIN_READER);
        this.conf.setStrings(SimpleJob.MASTER_LABELS, masterLabels);
        this.conf.set(SimpleJob.JOIN_MASTER_COLUMN, masterColumn);
        this.conf.set(SimpleJob.JOIN_DATA_COLUMN, dataColumn);
        this.masterSeparator = masterSeparator;
        this.conf.setBoolean(SimpleJob.JOIN_REGEX, regex);
        this.masterData = masterData;
    }

    /**
     * Easily supports the Join. To use the setSimpleJoin,
     * you must be a size master data appear in the memory of the task.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param masterData master data
     * @throws DataFormatException
     */
    protected void setSimpleJoin(String[] masterLabels, String[] masterColumn, String[] dataColumn,
            List<String> masterData) throws DataFormatException {
        setSimpleJoin(masterLabels, masterColumn, dataColumn, null, false, masterData);
    }

    /**
     * Easily supports the Join. To use the setSimpleJoin,
     * you must be a size master data appear in the memory of the task.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param regex master join is regex;
     * @param masterData master data
     * @throws DataFormatException
     */
    protected void setSimpleJoin(String[] masterLabels, String[] masterColumn, String[] dataColumn, boolean regex,
            List<String> masterData) throws DataFormatException {
        setSimpleJoin(masterLabels, masterColumn, dataColumn, null, regex, masterData);
    }

    /**
     * Easily supports the Join. To use the setSimpleJoin,
     * you must be a size master data appear in the memory of the task.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param masterSeparator separator
     * @param regex master join is regex
     * @param masterData master data
     * @throws DataFormatException
     */
    protected void setSimpleJoin(String[] masterLabels, String[] masterColumn, String[] dataColumn,
            String masterSeparator, boolean regex, List<String> masterData) throws DataFormatException {
        if (masterColumn.length != dataColumn.length) {
            throw new DataFormatException("masterColumns and dataColumns lenght is miss match.");
        }

        this.conf.setInt(SimpleJob.READER_TYPE, SimpleJob.SOME_COLUMN_JOIN_READER);
        this.conf.setStrings(SimpleJob.MASTER_LABELS, masterLabels);
        this.conf.setStrings(SimpleJob.JOIN_MASTER_COLUMN, masterColumn);
        this.conf.setStrings(SimpleJob.JOIN_DATA_COLUMN, dataColumn);
        this.masterSeparator = masterSeparator;
        this.conf.setBoolean(SimpleJob.JOIN_REGEX, regex);
        this.masterData = masterData;
    }

    /**
     * to join the data that does not fit into memory.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param masterData master data
     */
    protected void setBigJoin(String[] masterLabels, String masterColumn, String dataColumn,
            List<String> masterData) {
        setBigJoin(masterLabels, masterColumn, dataColumn, null, masterData);
    }

    /**
     * to join the data that does not fit into memory.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param masterSeparator separator
     * @param masterData master data
     */
    protected void setBigJoin(String[] masterLabels, String masterColumn, String dataColumn, String masterSeparator,
            List<String> masterData) {
        this.conf.setInt(SimpleJob.READER_TYPE, SimpleJob.SINGLE_COLUMN_JOIN_READER);
        this.conf.setStrings(SimpleJob.MASTER_LABELS, masterLabels);
        this.conf.set(SimpleJob.JOIN_MASTER_COLUMN, masterColumn);
        this.conf.set(SimpleJob.JOIN_DATA_COLUMN, dataColumn);
        this.masterSeparator = masterSeparator;
        this.masterData = masterData;
        this.bigJoin = true;
    }

    /**
     * to join the data that does not fit into memory.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param masterData master data
     * @throws DataFormatException
     */
    protected void setBigJoin(String[] masterLabels, String[] masterColumn, String[] dataColumn,
            List<String> masterData) throws DataFormatException {
        setBigJoin(masterLabels, masterColumn, dataColumn, null, masterData);
    }

    /**
     * to join the data that does not fit into memory.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param masterSeparator separator
     * @param masterData master data
     * @throws DataFormatException
     */
    protected void setBigJoin(String[] masterLabels, String[] masterColumn, String[] dataColumn,
            String masterSeparator, List<String> masterData) throws DataFormatException {
        if (masterColumn.length != dataColumn.length) {
            throw new DataFormatException("masterColumns and dataColumns lenght is miss match.");
        }

        this.conf.setInt(SimpleJob.READER_TYPE, SimpleJob.SOME_COLUMN_JOIN_READER);
        this.conf.setStrings(SimpleJob.MASTER_LABELS, masterLabels);
        this.conf.setStrings(SimpleJob.JOIN_MASTER_COLUMN, masterColumn);
        this.conf.setStrings(SimpleJob.JOIN_DATA_COLUMN, dataColumn);
        this.masterSeparator = masterSeparator;
        this.masterData = masterData;
        this.bigJoin = true;
    }

    /**
     * This method is to determine automatically join the Simple and Big.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param masterData master data
     */
    protected void setJoin(String[] masterLabels, String masterColumn, String dataColumn, List<String> masterData) {
        setJoin(masterLabels, masterColumn, dataColumn, null, false, masterData);
    }

    /**
     * This method is to determine automatically join the Simple and Big.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param regex master join is regex;
     * @param masterData master data
     */
    protected void setJoin(String[] masterLabels, String masterColumn, String dataColumn, boolean regex,
            List<String> masterData) {
        setJoin(masterLabels, masterColumn, dataColumn, null, regex, masterData);
    }

    /**
     * This method is to determine automatically join the Simple and Big.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param masterSeparator separator
     * @param regex master join is regex
     * @param masterData master data
     */
    protected void setJoin(String[] masterLabels, String masterColumn, String dataColumn, String masterSeparator,
            boolean regex, List<String> masterData) {
        setSimpleJoin(masterLabels, masterColumn, dataColumn, masterSeparator, regex, masterData);
    }

    /**
     * This method is to determine automatically join the Simple and Big.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param masterData master data
     * @throws DataFormatException
     */
    protected void setJoin(String[] masterLabels, String[] masterColumn, String[] dataColumn,
            List<String> masterData) throws DataFormatException {
        setJoin(masterLabels, masterColumn, dataColumn, null, false, masterData);
    }

    /**
     * This method is to determine automatically join the Simple and Big.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param regex master join is regex;
     * @param masterData master data
     * @throws DataFormatException
     */
    protected void setJoin(String[] masterLabels, String[] masterColumn, String[] dataColumn, boolean regex,
            List<String> masterData) throws DataFormatException {
        setJoin(masterLabels, masterColumn, dataColumn, null, regex, masterData);
    }

    /**
     * This method is to determine automatically join the Simple and Big.
     * @param masterLabels label of master data
     * @param masterColumn master column
     * @param dataColumn data column
     * @param masterSeparator separator
     * @param regex master join is regex
     * @param masterData master data
     * @throws DataFormatException
     */
    protected void setJoin(String[] masterLabels, String[] masterColumn, String[] dataColumn,
            String masterSeparator, boolean regex, List<String> masterData) throws DataFormatException {
        setSimpleJoin(masterLabels, masterColumn, dataColumn, masterSeparator, regex, masterData);
    }

    /**
     * @param conf
     * @return Map
     * @throws IOException
     * @throws URISyntaxException
     */
    private Map<String, String[]> getSimpleMaster(Configuration conf) throws IOException, URISyntaxException {
        String path = conf.get(SimpleJob.MASTER_PATH);
        String[] masterLabels = conf.getStrings(SimpleJob.MASTER_LABELS);
        String separator = conf.get(SimpleJob.MASTER_SEPARATOR);
        String masterColumn = conf.get(SimpleJob.JOIN_MASTER_COLUMN);
        int joinColumnNo = StringUtil.getMatchNo(masterLabels, masterColumn);
        return getSimpleMaster(masterLabels, joinColumnNo, path, separator);
    }

    /**
     * @param masterLabels
     * @param joinColumnNo
     * @param path
     * @param separator
     * @return Map
     * @throws IOException
     */
    private Map<String, String[]> getSimpleMaster(String[] masterLabels, int joinColumnNo, String path,
            String separator) throws IOException {
        Map<String, String[]> m = new HashMap<String, String[]>();
        for (String line : masterData) {
            String[] strings = StringUtil.split(line, separator, false);
            if (masterLabels.length != strings.length) {
                continue;
            }

            String joinData = strings[joinColumnNo];
            String[] data = new String[strings.length];
            for (int i = 0; i < strings.length; i++) {
                data[i] = strings[i];
            }

            m.put(joinData, data);
        }
        return m;
    }

    /**
     * @param conf
     * @return Map
     * @throws IOException
     * @throws URISyntaxException
     */
    public Map<List<String>, String[]> getSimpleColumnsMaster(Configuration conf)
            throws IOException, URISyntaxException {
        String path = conf.get(SimpleJob.MASTER_PATH);
        String[] masterLabels = conf.getStrings(SimpleJob.MASTER_LABELS);
        String separator = conf.get(SimpleJob.MASTER_SEPARATOR);
        String[] masterColumn = conf.getStrings(SimpleJob.JOIN_MASTER_COLUMN);
        int[] joinColumnNo = StringUtil.getMatchNos(masterLabels, masterColumn);
        return getSimpleColumnsMaster(masterLabels, joinColumnNo, path, separator);
    }

    /**
     * @param masterLabels
     * @param joinColumnNo
     * @param path
     * @param separator
     * @return Map
     * @throws IOException
     * @throws URISyntaxException
     */
    private Map<List<String>, String[]> getSimpleColumnsMaster(String[] masterLabels, int[] joinColumnNo,
            String path, String separator) throws IOException {
        Map<List<String>, String[]> m = new HashMap<List<String>, String[]>();

        for (String line : masterData) {
            String[] strings = StringUtil.split(line, separator, false);
            if (masterLabels.length != strings.length) {
                continue;
            }

            List<String> joinData = new ArrayList<String>();
            for (int i : joinColumnNo) {
                joinData.add(strings[i]);
            }

            String[] data = new String[strings.length];
            for (int i = 0; i < strings.length; i++) {
                data[i] = strings[i];
            }

            m.put(joinData, data);
        }
        return m;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public int run(String[] args) throws Exception {
        return 0;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    protected String setInputPath(String[] arg0) {
        return null;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    protected String setOutputPath(String[] arg0) {
        return null;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    protected void setup() throws Exception {
    }

    /**
     * set input file name
     * @param fileName the fileName to set
     */
    public void setFileName(String fileName) {
        this.fileName = fileName;
    }

    /**
     * set input file length
     * @param fileLength the fileLength to set
     */
    public void setFileLength(long fileLength) {
        this.fileLength = fileLength;
    }
}