List of usage examples for org.apache.hadoop.mapred JobConf getBoolean
public boolean getBoolean(String name, boolean defaultValue)
name
property as a boolean
. From source file:nl.tudelft.graphalytics.mapreducev2.evo.UndirectedForestFireModelReducer.java
License:Apache License
@Override public void configure(JobConf conf) { this.isInit = conf.getBoolean(ForestFireModelUtils.IS_INIT, false); this.isFinal = conf.getBoolean(ForestFireModelUtils.IS_FINAL, false); this.maxID = conf.getLong(ForestFireModelUtils.MAX_ID, -1); this.pRatio = conf.getFloat(ForestFireModelUtils.P_RATIO, 0); }
From source file:org.apache.avro.mapred.AvroMultipleOutputs.java
License:Apache License
/** * Returns if a named output is multiple. * * @param conf job conf//w w w. j a va2s.c om * @param namedOutput named output * @return <code>true</code> if the name output is multi, <code>false</code> * if it is single. If the name output is not defined it returns * <code>false</code> */ public static boolean isMultiNamedOutput(JobConf conf, String namedOutput) { checkNamedOutput(conf, namedOutput, false); return conf.getBoolean(MO_PREFIX + namedOutput + MULTI, false); }
From source file:org.apache.avro.mapred.AvroMultipleOutputs.java
License:Apache License
/** * Returns if the counters for the named outputs are enabled or not. * <p/>/*from ww w . j a v a 2 s . c om*/ * By default these counters are disabled. * <p/> * MultipleOutputs supports counters, by default the are disabled. * The counters group is the {@link MultipleOutputs} class name. * </p> * The names of the counters are the same as the named outputs. For multi * named outputs the name of the counter is the concatenation of the named * output, and underscore '_' and the multiname. * * * @param conf job conf to enableadd the named output. * @return TRUE if the counters are enabled, FALSE if they are disabled. */ public static boolean getCountersEnabled(JobConf conf) { return conf.getBoolean(COUNTERS_ENABLED, false); }
From source file:org.apache.avro.mapred.tether.TetheredProcess.java
License:Apache License
private Process startSubprocess(JobConf job) throws IOException, InterruptedException { // get the executable command List<String> command = new ArrayList<String>(); String executable = ""; if (job.getBoolean(TetherJob.TETHER_EXEC_CACHED, false)) { //we want to use the cached executable Path[] localFiles = DistributedCache.getLocalCacheFiles(job); if (localFiles == null) { // until MAPREDUCE-476 URI[] files = DistributedCache.getCacheFiles(job); localFiles = new Path[] { new Path(files[0].toString()) }; }//from w w w . ja v a2 s . c om executable = localFiles[0].toString(); FileUtil.chmod(executable.toString(), "a+x"); } else { executable = job.get(TetherJob.TETHER_EXEC); } command.add(executable); // Add the executable arguments. We assume the arguments are separated by // spaces so we split the argument string based on spaces and add each // token to command We need to do it this way because // TaskLog.captureOutAndError will put quote marks around each argument so // if we pass a single string containing all arguments we get quoted // incorrectly String args = job.get(TetherJob.TETHER_EXEC_ARGS); String[] aparams = args.split("\n"); for (int i = 0; i < aparams.length; i++) { aparams[i] = aparams[i].trim(); if (aparams[i].length() > 0) { command.add(aparams[i]); } } if (System.getProperty("hadoop.log.dir") == null && System.getenv("HADOOP_LOG_DIR") != null) System.setProperty("hadoop.log.dir", System.getenv("HADOOP_LOG_DIR")); // wrap the command in a stdout/stderr capture TaskAttemptID taskid = TaskAttemptID.forName(job.get("mapred.task.id")); File stdout = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDOUT); File stderr = TaskLog.getTaskLogFile(taskid, false, TaskLog.LogName.STDERR); long logLength = TaskLog.getTaskLogLength(job); command = TaskLog.captureOutAndError(command, stdout, stderr, logLength); stdout.getParentFile().mkdirs(); stderr.getParentFile().mkdirs(); // add output server's port to env Map<String, String> env = new HashMap<String, String>(); env.put("AVRO_TETHER_OUTPUT_PORT", Integer.toString(outputServer.getPort())); // start child process ProcessBuilder builder = new ProcessBuilder(command); System.out.println(command); builder.environment().putAll(env); return builder.start(); }
From source file:org.apache.avro.mapred.tether.TetherJob.java
License:Apache License
private static void setupTetherJob(JobConf job) throws IOException { job.setMapRunnerClass(TetherMapRunner.class); job.setPartitionerClass(TetherPartitioner.class); job.setReducerClass(TetherReducer.class); job.setInputFormat(TetherInputFormat.class); job.setOutputFormat(TetherOutputFormat.class); job.setOutputKeyClass(TetherData.class); job.setOutputKeyComparatorClass(TetherKeyComparator.class); job.setMapOutputValueClass(NullWritable.class); // set the map output key class to TetherData job.setMapOutputKeyClass(TetherData.class); // add TetherKeySerialization to io.serializations Collection<String> serializations = job.getStringCollection("io.serializations"); if (!serializations.contains(TetherKeySerialization.class.getName())) { serializations.add(TetherKeySerialization.class.getName()); job.setStrings("io.serializations", serializations.toArray(new String[0])); }/*from www . ja v a2 s .com*/ // determine whether the executable should be added to the cache. if (job.getBoolean(TETHER_EXEC_CACHED, false)) { DistributedCache.addCacheFile(getExecutable(job), job); } }
From source file:org.apache.eagle.jpm.analyzer.mr.suggestion.MapReduceCompressionSettingProcessor.java
License:Apache License
@Override public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) { StringBuilder sb = new StringBuilder(); List<String> optSettings = new ArrayList<>(); JobConf jobconf = new JobConf(context.getJobconf()); if (jobconf.getLong(NUM_REDUCES, 0) > 0) { if (!jobconf.getCompressMapOutput()) { optSettings.add(String.format("%s=true", MAP_OUTPUT_COMPRESS)); sb.append("Please set " + MAP_OUTPUT_COMPRESS + " to true to reduce network IO.\n"); } else {/*from w ww . ja v a 2 s . c o m*/ String codecClassName = jobconf.get(MAP_OUTPUT_COMPRESS_CODEC); if (!(codecClassName.endsWith("LzoCodec") || codecClassName.endsWith("SnappyCodec"))) { optSettings.add(String.format("%s=LzoCodec or SnappyCodec", MAP_OUTPUT_COMPRESS_CODEC)); sb.append("Best practice: use LzoCodec or SnappyCodec for " + MAP_OUTPUT_COMPRESS_CODEC) .append("\n"); } } } if (!jobconf.getBoolean(FileOutputFormat.COMPRESS, false)) { optSettings.add(String.format("%s=true", FileOutputFormat.COMPRESS)); sb.append( "Please set " + FileOutputFormat.COMPRESS + " to true to reduce disk usage and network IO.\n"); } else { String codecName = jobconf.get(FileOutputFormat.COMPRESS_CODEC, ""); String outputFileFormat = jobconf.get(OUTPUT_FORMAT_CLASS_ATTR, ""); if ((codecName.endsWith("GzipCodec") || codecName.endsWith("SnappyCodec") || codecName.endsWith("DefaultCodec")) && outputFileFormat.endsWith("TextOutputFormat")) { sb.append("Best practice: don't use Gzip/Snappy/DefaultCodec with TextOutputFormat"); sb.append(" as this will cause the output files to be unsplittable. "); sb.append("Please use LZO instead or "); sb.append("use a container file format such as SequenceFileOutputFormat.\n"); } } if (sb.length() > 0) { return new Result.ProcessorResult(Result.RuleType.COMPRESS, Result.ResultLevel.INFO, sb.toString(), optSettings); } return null; }
From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java
License:Apache License
private HivePartitionWriter writerForLocation(String location) throws IOException { JobConf clonedConf = new JobConf(jobConf); clonedConf.set(OUTDIR, location);//from w w w. j a v a2 s .co m OutputFormat outputFormat; try { StorageDescriptor sd = hiveTablePartition.getStorageDescriptor(); Class outputFormatClz = Class.forName(sd.getOutputFormat(), true, Thread.currentThread().getContextClassLoader()); outputFormatClz = HiveFileFormatUtils.getOutputFormatSubstitute(outputFormatClz); outputFormat = (OutputFormat) outputFormatClz.newInstance(); } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) { throw new FlinkRuntimeException("Unable to instantiate the hadoop output format", e); } ReflectionUtils.setConf(outputFormat, clonedConf); OutputCommitter outputCommitter = clonedConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(clonedConf, new JobID()); outputCommitter.setupJob(jobContext); final boolean isCompressed = clonedConf.getBoolean(HiveConf.ConfVars.COMPRESSRESULT.varname, false); if (isCompressed) { String codecStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC.varname); if (!StringUtils.isNullOrWhitespaceOnly(codecStr)) { try { Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) Class .forName(codecStr, true, Thread.currentThread().getContextClassLoader()); FileOutputFormat.setOutputCompressorClass(clonedConf, codec); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } String typeStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE.varname); if (!StringUtils.isNullOrWhitespaceOnly(typeStr)) { SequenceFile.CompressionType style = SequenceFile.CompressionType.valueOf(typeStr); SequenceFileOutputFormat.setOutputCompressionType(clonedConf, style); } } String taskPartition = String.valueOf(clonedConf.getInt("mapreduce.task.partition", -1)); Path taskPath = FileOutputFormat.getTaskOutputPath(clonedConf, taskPartition); FileSinkOperator.RecordWriter recordWriter; try { recordWriter = HiveFileFormatUtils.getRecordWriter(clonedConf, outputFormat, outputClass, isCompressed, tblProperties, taskPath, Reporter.NULL); } catch (HiveException e) { throw new IOException(e); } return new HivePartitionWriter(clonedConf, outputFormat, recordWriter, outputCommitter); }
From source file:org.apache.hive.hcatalog.mapreduce.HCatBaseInputFormat.java
License:Apache License
private List<String> setInputPath(JobConf jobConf, String location) throws IOException { // ideally we should just call FileInputFormat.setInputPaths() here - but // that won't work since FileInputFormat.setInputPaths() needs // a Job object instead of a JobContext which we are handed here int length = location.length(); int curlyOpen = 0; int pathStart = 0; boolean globPattern = false; List<String> pathStrings = new ArrayList<String>(); for (int i = 0; i < length; i++) { char ch = location.charAt(i); switch (ch) { case '{': { curlyOpen++;/*from w ww . java 2 s. c o m*/ if (!globPattern) { globPattern = true; } break; } case '}': { curlyOpen--; if (curlyOpen == 0 && globPattern) { globPattern = false; } break; } case ',': { if (!globPattern) { pathStrings.add(location.substring(pathStart, i)); pathStart = i + 1; } break; } } } pathStrings.add(location.substring(pathStart, length)); String separator = ""; StringBuilder str = new StringBuilder(); boolean ignoreInvalidPath = jobConf.getBoolean(HCatConstants.HCAT_INPUT_IGNORE_INVALID_PATH_KEY, HCatConstants.HCAT_INPUT_IGNORE_INVALID_PATH_DEFAULT); Iterator<String> pathIterator = pathStrings.iterator(); while (pathIterator.hasNext()) { String pathString = pathIterator.next(); if (ignoreInvalidPath && org.apache.commons.lang.StringUtils.isBlank(pathString)) { continue; } Path path = new Path(pathString); FileSystem fs = path.getFileSystem(jobConf); if (ignoreInvalidPath && !fs.exists(path)) { pathIterator.remove(); continue; } final String qualifiedPath = fs.makeQualified(path).toString(); str.append(separator).append(StringUtils.escapeString(qualifiedPath)); separator = StringUtils.COMMA_STR; } if (!ignoreInvalidPath || !pathStrings.isEmpty()) { jobConf.set("mapred.input.dir", str.toString()); } return pathStrings; }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2JobResourceManager.java
License:Apache License
/** * Set working directory in local file system. * * @param dir Working directory.//from w w w.j a va2 s .com * @throws IOException If fails. */ private void setLocalFSWorkingDirectory(File dir) throws IOException { JobConf cfg = ctx.getJobConf(); ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(cfg.getClassLoader()); try { cfg.set(HadoopFileSystemsUtils.LOC_FS_WORK_DIR_PROP, dir.getAbsolutePath()); if (!cfg.getBoolean(FILE_DISABLE_CACHING_PROPERTY_NAME, false)) FileSystem.getLocal(cfg).setWorkingDirectory(new Path(dir.getAbsolutePath())); } finally { HadoopCommonUtils.restoreContextClassLoader(oldLdr); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2JobResourceManager.java
License:Apache License
/** * Set working directory in local file system. * * @throws IOException If fails.// w w w . j av a 2 s . c o m * @param dir Working directory. */ private void setLocalFSWorkingDirectory(File dir) throws IOException { JobConf cfg = ctx.getJobConf(); Thread.currentThread().setContextClassLoader(cfg.getClassLoader()); try { cfg.set(GridHadoopFileSystemsUtils.LOC_FS_WORK_DIR_PROP, dir.getAbsolutePath()); if (!cfg.getBoolean("fs.file.impl.disable.cache", false)) FileSystem.getLocal(cfg).setWorkingDirectory(new Path(dir.getAbsolutePath())); } finally { Thread.currentThread().setContextClassLoader(null); } }