Example usage for org.apache.hadoop.mapred JobConf getDouble

List of usage examples for org.apache.hadoop.mapred JobConf getDouble

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getDouble.

Prototype

public double getDouble(String name, double defaultValue) 

Source Link

Document

Get the value of the name property as a double.

Usage

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

/**
 * //from  ww w  . ja v a  2  s.  c  o  m
 * @param job
 * @param inputIndexes
 * @param inputs
 * @param inputInfos
 * @param brlens
 * @param bclens
 * @param distCacheOnly
 * @param setConverter
 * @param target
 * @throws Exception
 */
public static void setUpMultipleInputs(JobConf job, byte[] inputIndexes, String[] inputs,
        InputInfo[] inputInfos, int[] brlens, int[] bclens, boolean[] distCacheOnly, boolean setConverter,
        ConvertTarget target) throws Exception {
    if (inputs.length != inputInfos.length)
        throw new Exception("number of inputs and inputInfos does not match");

    //set up names of the input matrices and their inputformat information
    job.setStrings(INPUT_MATRICIES_DIRS_CONFIG, inputs);
    MRJobConfiguration.setMapFunctionInputMatrixIndexes(job, inputIndexes);

    //set up converter infos (converter determined implicitly)
    if (setConverter) {
        for (int i = 0; i < inputs.length; i++)
            setInputInfo(job, inputIndexes[i], inputInfos[i], brlens[i], bclens[i], target);
    }

    //remove redundant inputs and pure broadcast variables
    ArrayList<Path> lpaths = new ArrayList<Path>();
    ArrayList<InputInfo> liinfos = new ArrayList<InputInfo>();
    for (int i = 0; i < inputs.length; i++) {
        Path p = new Path(inputs[i]);

        //check and skip redundant inputs
        if (lpaths.contains(p) //path already included
                || distCacheOnly[i]) //input only required in dist cache
        {
            continue;
        }

        lpaths.add(p);
        liinfos.add(inputInfos[i]);
    }

    boolean combineInputFormat = false;
    if (OptimizerUtils.ALLOW_COMBINE_FILE_INPUT_FORMAT) {
        //determine total input sizes
        double totalInputSize = 0;
        for (int i = 0; i < inputs.length; i++)
            totalInputSize += MapReduceTool.getFilesizeOnHDFS(new Path(inputs[i]));

        //set max split size (default blocksize) to 2x blocksize if (1) sort buffer large enough, 
        //(2) degree of parallelism not hurt, and only a single input (except broadcasts)
        //(the sort buffer size is relevant for pass-through of, potentially modified, inputs to the reducers)
        //(the single input constraint stems from internal runtime assumptions used to relate meta data to inputs)
        long sizeSortBuff = InfrastructureAnalyzer.getRemoteMaxMemorySortBuffer();
        long sizeHDFSBlk = InfrastructureAnalyzer.getHDFSBlockSize();
        long newSplitSize = sizeHDFSBlk * 2;
        double spillPercent = job.getDouble("mapreduce.map.sort.spill.percent", 1.0);
        int numPMap = OptimizerUtils.getNumMappers();
        if (numPMap < totalInputSize / newSplitSize && sizeSortBuff * spillPercent >= newSplitSize
                && lpaths.size() == 1) {
            job.setLong("mapreduce.input.fileinputformat.split.maxsize", newSplitSize);
            combineInputFormat = true;
        }
    }

    //add inputs to jobs input (incl input format configuration)
    for (int i = 0; i < lpaths.size(); i++) {
        //add input to job inputs (for binaryblock we use CombineSequenceFileInputFormat to reduce task latency)
        if (combineInputFormat && liinfos.get(i) == InputInfo.BinaryBlockInputInfo)
            MultipleInputs.addInputPath(job, lpaths.get(i), CombineSequenceFileInputFormat.class);
        else
            MultipleInputs.addInputPath(job, lpaths.get(i), liinfos.get(i).inputFormatClass);
    }
}