Example usage for org.apache.hadoop.mapred JobConf getBoolean

List of usage examples for org.apache.hadoop.mapred JobConf getBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getBoolean.

Prototype

public boolean getBoolean(String name, boolean defaultValue) 

Source Link

Document

Get the value of the name property as a boolean.

Usage

From source file:nl.tudelft.graphalytics.mapreducev2.evo.UndirectedForestFireModelReducer.java

License:Apache License

@Override
public void configure(JobConf conf) {
    this.isInit = conf.getBoolean(ForestFireModelUtils.IS_INIT, false);
    this.isFinal = conf.getBoolean(ForestFireModelUtils.IS_FINAL, false);
    this.maxID = conf.getLong(ForestFireModelUtils.MAX_ID, -1);
    this.pRatio = conf.getFloat(ForestFireModelUtils.P_RATIO, 0);
}

From source file:org.apache.avro.mapred.AvroMultipleOutputs.java

License:Apache License

/**
 * Returns if a named output is multiple.
 *
 * @param conf        job conf//w  w  w. j  a va2s.c  om
 * @param namedOutput named output
 * @return <code>true</code> if the name output is multi, <code>false</code>
 *         if it is single. If the name output is not defined it returns
 *         <code>false</code>
 */
public static boolean isMultiNamedOutput(JobConf conf, String namedOutput) {
    checkNamedOutput(conf, namedOutput, false);
    return conf.getBoolean(MO_PREFIX + namedOutput + MULTI, false);
}

From source file:org.apache.avro.mapred.AvroMultipleOutputs.java

License:Apache License

/**
 * Returns if the counters for the named outputs are enabled or not.
 * <p/>/*from ww  w  .  j  a v a 2  s  . c om*/
 * By default these counters are disabled.
 * <p/>
 * MultipleOutputs supports counters, by default the are disabled.
 * The counters group is the {@link MultipleOutputs} class name.
 * </p>
 * The names of the counters are the same as the named outputs. For multi
 * named outputs the name of the counter is the concatenation of the named
 * output, and underscore '_' and the multiname.
 *
 *
 * @param conf    job conf to enableadd the named output.
 * @return TRUE if the counters are enabled, FALSE if they are disabled.
 */
public static boolean getCountersEnabled(JobConf conf) {
    return conf.getBoolean(COUNTERS_ENABLED, false);
}

From source file:org.apache.avro.mapred.tether.TetheredProcess.java

License:Apache License

private Process startSubprocess(JobConf job) throws IOException, InterruptedException {
    // get the executable command
    List<String> command = new ArrayList<String>();

    String executable = "";
    if (job.getBoolean(TetherJob.TETHER_EXEC_CACHED, false)) {
        //we want to use the cached executable
        Path[] localFiles = DistributedCache.getLocalCacheFiles(job);
        if (localFiles == null) { // until MAPREDUCE-476
            URI[] files = DistributedCache.getCacheFiles(job);
            localFiles = new Path[] { new Path(files[0].toString()) };
        }//from  w  w  w  . ja  v  a2 s .  c om
        executable = localFiles[0].toString();
        FileUtil.chmod(executable.toString(), "a+x");
    } else {
        executable = job.get(TetherJob.TETHER_EXEC);
    }

    command.add(executable);

    // Add the executable arguments. We assume the arguments are separated by
    // spaces so we split the argument string based on spaces and add each
    // token to command We need to do it this way because
    // TaskLog.captureOutAndError will put quote marks around each argument so
    // if we pass a single string containing all arguments we get quoted
    // incorrectly
    String args = job.get(TetherJob.TETHER_EXEC_ARGS);
    String[] aparams = args.split("\n");
    for (int i = 0; i < aparams.length; i++) {
        aparams[i] = aparams[i].trim();
        if (aparams[i].length() > 0) {
            command.add(aparams[i]);
        }
    }

    if (System.getProperty("hadoop.log.dir") == null && System.getenv("HADOOP_LOG_DIR") != null)
        System.setProperty("hadoop.log.dir", System.getenv("HADOOP_LOG_DIR"));

    // wrap the command in a stdout/stderr capture
    TaskAttemptID taskid = TaskAttemptID.forName(job.get("mapred.task.id"));
    File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT);
    File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR);
    long logLength = TaskLog.getTaskLogLength(job);
    command = TaskLog.captureOutAndError(command, stdout, stderr, logLength);
    stdout.getParentFile().mkdirs();
    stderr.getParentFile().mkdirs();

    // add output server's port to env
    Map<String, String> env = new HashMap<String, String>();
    env.put("AVRO_TETHER_OUTPUT_PORT", Integer.toString(outputServer.getPort()));

    // start child process
    ProcessBuilder builder = new ProcessBuilder(command);
    System.out.println(command);
    builder.environment().putAll(env);
    return builder.start();
}

From source file:org.apache.avro.mapred.tether.TetherJob.java

License:Apache License

private static void setupTetherJob(JobConf job) throws IOException {
    job.setMapRunnerClass(TetherMapRunner.class);
    job.setPartitionerClass(TetherPartitioner.class);
    job.setReducerClass(TetherReducer.class);

    job.setInputFormat(TetherInputFormat.class);
    job.setOutputFormat(TetherOutputFormat.class);

    job.setOutputKeyClass(TetherData.class);
    job.setOutputKeyComparatorClass(TetherKeyComparator.class);
    job.setMapOutputValueClass(NullWritable.class);

    // set the map output key class to TetherData
    job.setMapOutputKeyClass(TetherData.class);

    // add TetherKeySerialization to io.serializations
    Collection<String> serializations = job.getStringCollection("io.serializations");
    if (!serializations.contains(TetherKeySerialization.class.getName())) {
        serializations.add(TetherKeySerialization.class.getName());
        job.setStrings("io.serializations", serializations.toArray(new String[0]));
    }/*from   www  .  ja v  a2 s .com*/

    // determine whether the executable should be added to the cache.
    if (job.getBoolean(TETHER_EXEC_CACHED, false)) {
        DistributedCache.addCacheFile(getExecutable(job), job);
    }
}

From source file:org.apache.eagle.jpm.analyzer.mr.suggestion.MapReduceCompressionSettingProcessor.java

License:Apache License

@Override
public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) {
    StringBuilder sb = new StringBuilder();
    List<String> optSettings = new ArrayList<>();

    JobConf jobconf = new JobConf(context.getJobconf());
    if (jobconf.getLong(NUM_REDUCES, 0) > 0) {
        if (!jobconf.getCompressMapOutput()) {
            optSettings.add(String.format("%s=true", MAP_OUTPUT_COMPRESS));
            sb.append("Please set " + MAP_OUTPUT_COMPRESS + " to true to reduce network IO.\n");
        } else {/*from w  ww . ja  v  a 2  s .  c o m*/
            String codecClassName = jobconf.get(MAP_OUTPUT_COMPRESS_CODEC);
            if (!(codecClassName.endsWith("LzoCodec") || codecClassName.endsWith("SnappyCodec"))) {
                optSettings.add(String.format("%s=LzoCodec or SnappyCodec", MAP_OUTPUT_COMPRESS_CODEC));
                sb.append("Best practice: use LzoCodec or SnappyCodec for " + MAP_OUTPUT_COMPRESS_CODEC)
                        .append("\n");
            }
        }
    }

    if (!jobconf.getBoolean(FileOutputFormat.COMPRESS, false)) {
        optSettings.add(String.format("%s=true", FileOutputFormat.COMPRESS));
        sb.append(
                "Please set " + FileOutputFormat.COMPRESS + " to true to reduce disk usage and network IO.\n");
    } else {
        String codecName = jobconf.get(FileOutputFormat.COMPRESS_CODEC, "");
        String outputFileFormat = jobconf.get(OUTPUT_FORMAT_CLASS_ATTR, "");

        if ((codecName.endsWith("GzipCodec") || codecName.endsWith("SnappyCodec")
                || codecName.endsWith("DefaultCodec")) && outputFileFormat.endsWith("TextOutputFormat")) {
            sb.append("Best practice: don't use Gzip/Snappy/DefaultCodec with TextOutputFormat");
            sb.append(" as this will cause the output files to be unsplittable. ");
            sb.append("Please use LZO instead or ");
            sb.append("use a container file format such as SequenceFileOutputFormat.\n");
        }
    }

    if (sb.length() > 0) {
        return new Result.ProcessorResult(Result.RuleType.COMPRESS, Result.ResultLevel.INFO, sb.toString(),
                optSettings);
    }
    return null;
}

From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java

License:Apache License

private HivePartitionWriter writerForLocation(String location) throws IOException {
    JobConf clonedConf = new JobConf(jobConf);
    clonedConf.set(OUTDIR, location);//from  w  w  w.  j  a v  a2  s .co m
    OutputFormat outputFormat;
    try {
        StorageDescriptor sd = hiveTablePartition.getStorageDescriptor();
        Class outputFormatClz = Class.forName(sd.getOutputFormat(), true,
                Thread.currentThread().getContextClassLoader());
        outputFormatClz = HiveFileFormatUtils.getOutputFormatSubstitute(outputFormatClz);
        outputFormat = (OutputFormat) outputFormatClz.newInstance();
    } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) {
        throw new FlinkRuntimeException("Unable to instantiate the hadoop output format", e);
    }
    ReflectionUtils.setConf(outputFormat, clonedConf);
    OutputCommitter outputCommitter = clonedConf.getOutputCommitter();
    JobContext jobContext = new JobContextImpl(clonedConf, new JobID());
    outputCommitter.setupJob(jobContext);
    final boolean isCompressed = clonedConf.getBoolean(HiveConf.ConfVars.COMPRESSRESULT.varname, false);
    if (isCompressed) {
        String codecStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC.varname);
        if (!StringUtils.isNullOrWhitespaceOnly(codecStr)) {
            try {
                Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) Class
                        .forName(codecStr, true, Thread.currentThread().getContextClassLoader());
                FileOutputFormat.setOutputCompressorClass(clonedConf, codec);
            } catch (ClassNotFoundException e) {
                throw new RuntimeException(e);
            }
        }
        String typeStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE.varname);
        if (!StringUtils.isNullOrWhitespaceOnly(typeStr)) {
            SequenceFile.CompressionType style = SequenceFile.CompressionType.valueOf(typeStr);
            SequenceFileOutputFormat.setOutputCompressionType(clonedConf, style);
        }
    }
    String taskPartition = String.valueOf(clonedConf.getInt("mapreduce.task.partition", -1));
    Path taskPath = FileOutputFormat.getTaskOutputPath(clonedConf, taskPartition);
    FileSinkOperator.RecordWriter recordWriter;
    try {
        recordWriter = HiveFileFormatUtils.getRecordWriter(clonedConf, outputFormat, outputClass, isCompressed,
                tblProperties, taskPath, Reporter.NULL);
    } catch (HiveException e) {
        throw new IOException(e);
    }
    return new HivePartitionWriter(clonedConf, outputFormat, recordWriter, outputCommitter);
}

From source file:org.apache.hive.hcatalog.mapreduce.HCatBaseInputFormat.java

License:Apache License

private List<String> setInputPath(JobConf jobConf, String location) throws IOException {

    // ideally we should just call FileInputFormat.setInputPaths() here - but
    // that won't work since FileInputFormat.setInputPaths() needs
    // a Job object instead of a JobContext which we are handed here

    int length = location.length();
    int curlyOpen = 0;
    int pathStart = 0;
    boolean globPattern = false;
    List<String> pathStrings = new ArrayList<String>();

    for (int i = 0; i < length; i++) {
        char ch = location.charAt(i);
        switch (ch) {
        case '{': {
            curlyOpen++;/*from  w ww  .  java 2  s. c o  m*/
            if (!globPattern) {
                globPattern = true;
            }
            break;
        }
        case '}': {
            curlyOpen--;
            if (curlyOpen == 0 && globPattern) {
                globPattern = false;
            }
            break;
        }
        case ',': {
            if (!globPattern) {
                pathStrings.add(location.substring(pathStart, i));
                pathStart = i + 1;
            }
            break;
        }
        }
    }
    pathStrings.add(location.substring(pathStart, length));

    String separator = "";
    StringBuilder str = new StringBuilder();

    boolean ignoreInvalidPath = jobConf.getBoolean(HCatConstants.HCAT_INPUT_IGNORE_INVALID_PATH_KEY,
            HCatConstants.HCAT_INPUT_IGNORE_INVALID_PATH_DEFAULT);
    Iterator<String> pathIterator = pathStrings.iterator();
    while (pathIterator.hasNext()) {
        String pathString = pathIterator.next();
        if (ignoreInvalidPath && org.apache.commons.lang.StringUtils.isBlank(pathString)) {
            continue;
        }
        Path path = new Path(pathString);
        FileSystem fs = path.getFileSystem(jobConf);
        if (ignoreInvalidPath && !fs.exists(path)) {
            pathIterator.remove();
            continue;
        }
        final String qualifiedPath = fs.makeQualified(path).toString();
        str.append(separator).append(StringUtils.escapeString(qualifiedPath));
        separator = StringUtils.COMMA_STR;
    }

    if (!ignoreInvalidPath || !pathStrings.isEmpty()) {
        jobConf.set("mapred.input.dir", str.toString());
    }
    return pathStrings;
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2JobResourceManager.java

License:Apache License

/**
 * Set working directory in local file system.
 *
 * @param dir Working directory.//from   w  w  w.j  a va2  s  .com
 * @throws IOException If fails.
 */
private void setLocalFSWorkingDirectory(File dir) throws IOException {
    JobConf cfg = ctx.getJobConf();

    ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(cfg.getClassLoader());

    try {
        cfg.set(HadoopFileSystemsUtils.LOC_FS_WORK_DIR_PROP, dir.getAbsolutePath());

        if (!cfg.getBoolean(FILE_DISABLE_CACHING_PROPERTY_NAME, false))
            FileSystem.getLocal(cfg).setWorkingDirectory(new Path(dir.getAbsolutePath()));
    } finally {
        HadoopCommonUtils.restoreContextClassLoader(oldLdr);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2JobResourceManager.java

License:Apache License

/**
 * Set working directory in local file system.
 *
 * @throws IOException If fails.//  w  w  w  .  j av  a  2  s .  c  o  m
 * @param dir Working directory.
 */
private void setLocalFSWorkingDirectory(File dir) throws IOException {
    JobConf cfg = ctx.getJobConf();

    Thread.currentThread().setContextClassLoader(cfg.getClassLoader());

    try {
        cfg.set(GridHadoopFileSystemsUtils.LOC_FS_WORK_DIR_PROP, dir.getAbsolutePath());

        if (!cfg.getBoolean("fs.file.impl.disable.cache", false))
            FileSystem.getLocal(cfg).setWorkingDirectory(new Path(dir.getAbsolutePath()));
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}