Example usage for org.apache.hadoop.mapred JobConf setBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setBoolean.

Prototype

public void setBoolean(String name, boolean value)

Source Link

Document

Set the value of the name property to a boolean.

Usage

From source file:com.facebook.presto.hive.parquet.ParquetTester.java

License:Apache License

public void assertRoundTrip(ObjectInspector objectInspector, Iterable<?> writeValues, Iterable<?> readValues,
        Type type) throws Exception {
    for (WriterVersion version : versions) {
        for (CompressionCodecName compressionCodecName : compressions) {
            try (TempFile tempFile = new TempFile("test", "parquet")) {
                JobConf jobConf = new JobConf();
                jobConf.setEnum(ParquetOutputFormat.COMPRESSION, compressionCodecName);
                jobConf.setBoolean(ParquetOutputFormat.ENABLE_DICTIONARY, true);
                jobConf.setEnum(ParquetOutputFormat.WRITER_VERSION, version);
                writeParquetColumn(jobConf, tempFile.getFile(), compressionCodecName, objectInspector,
                        writeValues.iterator());
                assertFileContents(jobConf, objectInspector, tempFile, readValues, type);
            }/*from w  w  w .ja va  2 s. c om*/
        }
    }
}

From source file:com.ibm.bi.dml.runtime.matrix.CSVReblockMR.java

License:Open Source License

public static AssignRowIDMRReturn runAssignRowIDMRJob(String[] inputs, InputInfo[] inputInfos, int[] brlens,
        int[] bclens, String reblockInstructions, int replication, String[] smallestFiles, boolean transform,
        String naStrings, String specFile) throws Exception {
    AssignRowIDMRReturn ret = new AssignRowIDMRReturn();
    JobConf job;
    job = new JobConf(CSVReblockMR.class);
    job.setJobName("Assign-RowID-MR");

    byte[] realIndexes = new byte[inputs.length];
    for (byte b = 0; b < realIndexes.length; b++)
        realIndexes[b] = b;//  ww w .  j  ava  2s  .com

    //set up the input files and their format information
    MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, false,
            ConvertTarget.CELL);

    job.setStrings(SMALLEST_FILE_NAME_PER_INPUT, smallestFiles);

    //set up the aggregate instructions that will happen in the combiner and reducer
    MRJobConfiguration.setCSVReblockInstructions(job, reblockInstructions);

    //set up the replication factor for the results
    job.setInt("dfs.replication", replication);

    //set up the number of reducers
    job.setNumReduceTasks(1);

    // Print the complete instruction
    //if (LOG.isTraceEnabled())
    //inst.printCompelteMRJobInstruction();

    // configure mapper and the mapper output key value pairs
    job.setMapperClass(CSVAssignRowIDMapper.class);
    job.setMapOutputKeyClass(ByteWritable.class);
    job.setMapOutputValueClass(OffsetCount.class);

    //configure reducer
    job.setReducerClass(CSVAssignRowIDReducer.class);

    //turn off adaptivemr
    job.setBoolean("adaptivemr.map.enable", false);

    //set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);

    //set up the output file
    ret.counterFile = new Path(MRJobConfiguration.constructTempOutputFilename());
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, ret.counterFile);
    job.setOutputKeyClass(ByteWritable.class);
    job.setOutputValueClass(OffsetCount.class);

    // setup properties relevant to transform
    job.setBoolean(MRJobConfiguration.TF_TRANSFORM, transform);
    if (transform) {
        if (naStrings != null)
            // Adding "dummy" string to handle the case of na_strings = ""
            job.set(MRJobConfiguration.TF_NA_STRINGS, TfUtils.prepNAStrings(naStrings));
        job.set(MRJobConfiguration.TF_SPEC_FILE, specFile);
    }

    RunningJob runjob = JobClient.runJob(job);

    /* Process different counters */

    Group rgroup = runjob.getCounters().getGroup(NUM_ROWS_IN_MATRIX);
    Group cgroup = runjob.getCounters().getGroup(NUM_COLS_IN_MATRIX);
    ret.rlens = new long[inputs.length];
    ret.clens = new long[inputs.length];
    for (int i = 0; i < inputs.length; i++) {
        // number of non-zeros
        ret.rlens[i] = rgroup.getCounter(Integer.toString(i));
        ret.clens[i] = cgroup.getCounter(Integer.toString(i));
    }
    return ret;
}

From source file:com.ibm.bi.dml.runtime.matrix.CSVReblockMR.java

License:Open Source License

private static JobReturn runCSVReblockJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos,
        long[] rlens, long[] clens, int[] brlens, int[] bclens, String reblockInstructions,
        String otherInstructionsInReducer, int numReducers, int replication, byte[] resultIndexes,
        String[] outputs, OutputInfo[] outputInfos, Path counterFile, String[] smallestFiles) throws Exception {
    JobConf job;
    job = new JobConf(ReblockMR.class);
    job.setJobName("CSV-Reblock-MR");

    byte[] realIndexes = new byte[inputs.length];
    for (byte b = 0; b < realIndexes.length; b++)
        realIndexes[b] = b;//w w w.  j a  v a2  s.  com

    //set up the input files and their format information
    MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, false,
            ConvertTarget.CELL);

    job.setStrings(SMALLEST_FILE_NAME_PER_INPUT, smallestFiles);

    //set up the dimensions of input matrices
    MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens);

    //set up the block size
    MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);

    //set up the aggregate instructions that will happen in the combiner and reducer
    MRJobConfiguration.setCSVReblockInstructions(job, reblockInstructions);

    //set up the instructions that will happen in the reducer, after the aggregation instrucions
    MRJobConfiguration.setInstructionsInReducer(job, otherInstructionsInReducer);

    //set up the replication factor for the results
    job.setInt("dfs.replication", replication);

    //set up preferred custom serialization framework for binary block format
    if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION)
        MRJobConfiguration.addBinaryBlockSerializationFramework(job);

    //set up what matrices are needed to pass from the mapper to reducer
    HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, null,
            reblockInstructions, null, otherInstructionsInReducer, resultIndexes);

    MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, null,
            reblockInstructions, null, null, otherInstructionsInReducer, resultIndexes, mapoutputIndexes,
            false);

    MatrixCharacteristics[] stats = ret.stats;

    //set up the number of reducers
    int numRed = WriteCSVMR.determineNumReducers(rlens, clens,
            ConfigurationManager.getConfig().getIntValue(DMLConfig.NUM_REDUCERS), ret.numReducerGroups);
    job.setNumReduceTasks(numRed);

    // Print the complete instruction
    //if (LOG.isTraceEnabled())
    //   inst.printCompelteMRJobInstruction(stats);

    // Update resultDimsUnknown based on computed "stats"
    byte[] resultDimsUnknown = new byte[resultIndexes.length];
    for (int i = 0; i < resultIndexes.length; i++) {
        if (stats[i].getRows() == -1 || stats[i].getCols() == -1) {
            resultDimsUnknown[i] = (byte) 1;
        } else {
            resultDimsUnknown[i] = (byte) 0;
        }
    }

    //set up the multiple output files, and their format information
    MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown, outputs, outputInfos, true,
            true);

    // configure mapper and the mapper output key value pairs
    job.setMapperClass(CSVReblockMapper.class);
    job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class);
    job.setMapOutputValueClass(BlockRow.class);

    //configure reducer
    job.setReducerClass(CSVReblockReducer.class);

    //turn off adaptivemr
    job.setBoolean("adaptivemr.map.enable", false);

    //set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);
    Path cachefile = new Path(counterFile, "part-00000");
    DistributedCache.addCacheFile(cachefile.toUri(), job);
    DistributedCache.createSymlink(job);
    job.set(ROWID_FILE_NAME, cachefile.toString());

    RunningJob runjob = JobClient.runJob(job);

    MapReduceTool.deleteFileIfExistOnHDFS(counterFile, job);

    /* Process different counters */

    Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
    for (int i = 0; i < resultIndexes.length; i++) {
        // number of non-zeros
        stats[i].setNonZeros(group.getCounter(Integer.toString(i)));
        //   System.out.println("result #"+resultIndexes[i]+" ===>\n"+stats[i]);
    }
    return new JobReturn(stats, outputInfos, runjob.isSuccessful());
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static void setMatrixValueClass(JobConf job, boolean blockRepresentation) {
    job.setBoolean(BLOCK_REPRESENTATION_CONFIG, blockRepresentation);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static void setMatrixValueClassForCM_N_COM(JobConf job, boolean weightedCellRepresentation) {
    job.setBoolean(WEIGHTEDCELL_REPRESENTATION_CONFIG, weightedCellRepresentation);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static void setParforCachingConfig(JobConf job, boolean flag) {
    job.setBoolean(PARFOR_CACHING_CONFIG, flag);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static void setPartitioningInfo(JobConf job, long rlen, long clen, int brlen, int bclen, InputInfo ii,
        OutputInfo oi, PDataPartitionFormat dpf, int n, String fnameNew, String itervar, String matrixvar,
        boolean tSparseCol) throws DMLRuntimeException {
    //set basic partitioning information
    setPartitioningInfo(job, rlen, clen, brlen, bclen, ii, oi, dpf, n, fnameNew);

    //set iteration variable name (used for ParFor-DPE)
    job.set(PARTITIONING_ITERVAR_CONFIG, itervar);

    //set iteration variable name (used for ParFor-DPE)
    job.set(PARTITIONING_MATRIXVAR_CONFIG, matrixvar);

    //set transpose sparse column vector
    job.setBoolean(PARTITIONING_TRANSPOSE_COL_CONFIG, tSparseCol);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static void setPartitioningInfo(JobConf job, long rlen, long clen, int brlen, int bclen, InputInfo ii,
        OutputInfo oi, PDataPartitionFormat dpf, int n, String fnameNew, boolean keepIndexes)
        throws DMLRuntimeException {
    //set basic partitioning information
    setPartitioningInfo(job, rlen, clen, brlen, bclen, ii, oi, dpf, n, fnameNew);

    //set transpose sparse column vector
    job.setBoolean(PARTITIONING_OUTPUT_KEEP_INDEXES_CONFIG, keepIndexes);

}

From source file:com.ibm.bi.dml.runtime.matrix.sort.PickFromCompactInputFormat.java

License:Open Source License

public static Set<Integer> setPickRecordsInEachPartFile(JobConf job, NumItemsByEachReducerMetaData metadata,
        double[] probs) {
    HashMap<Integer, ArrayList<Pair<Integer, Integer>>> posMap = new HashMap<Integer, ArrayList<Pair<Integer, Integer>>>();

    getPointsInEachPartFile(metadata.getNumItemsArray(), probs, posMap);

    for (Entry<Integer, ArrayList<Pair<Integer, Integer>>> e : posMap.entrySet()) {
        job.set(SELECTED_POINTS_PREFIX + e.getKey(), getString(e.getValue()));
        //System.out.println(e.getKey()+": "+getString(e.getValue()));
    }// ww w  .j  av a  2s  .c  om
    job.setBoolean(INPUT_IS_VECTOR, true);
    return posMap.keySet();
}

From source file:com.ibm.bi.dml.runtime.matrix.sort.PickFromCompactInputFormat.java

License:Open Source License

public static void setRangePickPartFiles(JobConf job, NumItemsByEachReducerMetaData metadata, double lbound,
        double ubound) {

    if (lbound < 0 || lbound > 1 || ubound < 0 || ubound > 1 || lbound >= ubound) {
        throw new RuntimeException("Invalid ranges for range pick: [" + lbound + "," + ubound + "]");
    }/*w  w  w. j  av a 2s.  c o m*/

    long[] counts = metadata.getNumItemsArray();
    long[] ranges = new long[counts.length];
    ranges[0] = counts[0];
    for (int i = 1; i < counts.length; i++)
        ranges[i] = ranges[i - 1] + counts[i];
    long sumwt = ranges[ranges.length - 1];

    double qbegin = lbound * sumwt;
    double qend = ubound * sumwt;

    // Find part files that overlap with range [qbegin,qend]
    int partID = -1;
    long wt = 0;

    // scan until the part containing qbegin
    while (wt < qbegin) {
        partID++;
        wt += counts[partID];
    }

    StringBuilder sb = new StringBuilder();
    while (wt <= qend) {
        sb.append(partID + "," + (wt - counts[partID]) + ";"); // partID, weight until this part
        partID++;
        if (partID < counts.length)
            wt += counts[partID];
    }
    sb.append(partID + "," + (wt - counts[partID]) + ";");

    sb.append(sumwt + "," + lbound + "," + ubound);
    //System.out.println("range string: " + sb.toString());
    job.set(SELECTED_RANGES, sb.toString());

    job.setBoolean(INPUT_IS_VECTOR, false);
}