List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name, String defaultValue)
name
. From source file:com.m6d.filecrush.crush.TextOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { boolean isCompressed = getCompressOutput(job); String keyValueSeparator = job.get("mapreduce.output.textoutputformat.separator", ""); if (!isCompressed) { Path file = FileOutputFormat.getTaskOutputPath(job, name); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, progress); return new LineRecordWriter<K, V>(fileOut, keyValueSeparator); } else {/*from w ww.j av a2s . c o m*/ Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); // create the named codec CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job); // build the filename including the extension Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension()); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, progress); return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator); } }
From source file:com.mellanox.hadoop.mapred.UdaPlugin.java
License:Apache License
public UdaPluginRT(UdaShuffleConsumerPluginShared udaShuffleConsumer, ReduceTask reduceTask, JobConf jobConf, Reporter reporter, int numMaps) throws IOException { super(jobConf); this.udaShuffleConsumer = udaShuffleConsumer; this.reduceTask = reduceTask; String totalRdmaSizeStr = jobConf.get("mapred.rdma.shuffle.total.size", "0"); // default 0 means ignoring this parameter and use instead -Xmx and mapred.job.shuffle.input.buffer.percent long totalRdmaSize = StringUtils.TraditionalBinaryPrefix.string2long(totalRdmaSizeStr); long maxRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size", 1024); long minRdmaBufferSize = jobConf.getLong("mapred.rdma.buf.size.min", 16); long shuffleMemorySize = totalRdmaSize; StringBuilder meminfoSb = new StringBuilder(); meminfoSb.append("UDA: numMaps=").append(numMaps); meminfoSb.append(", maxRdmaBufferSize=").append(maxRdmaBufferSize); meminfoSb.append("KB, minRdmaBufferSize=").append(minRdmaBufferSize).append("KB"); meminfoSb.append("KB, rdmaShuffleTotalSize=").append(totalRdmaSize); if (totalRdmaSize < 0) { LOG.warn("Illegal paramter value: mapred.rdma.shuffle.total.size=" + totalRdmaSize); }//from ww w . jav a 2 s . c o m if (totalRdmaSize <= 0) { long maxHeapSize = Runtime.getRuntime().maxMemory(); double shuffleInputBufferPercent = jobConf.getFloat("mapred.job.shuffle.input.buffer.percent", DEFAULT_SHUFFLE_INPUT_PERCENT); if ((shuffleInputBufferPercent < 0) || (shuffleInputBufferPercent > 1)) { LOG.warn("UDA: mapred.job.shuffle.input.buffer.percent is out of range - set to default: " + DEFAULT_SHUFFLE_INPUT_PERCENT); shuffleInputBufferPercent = DEFAULT_SHUFFLE_INPUT_PERCENT; } shuffleMemorySize = (long) (maxHeapSize * shuffleInputBufferPercent); LOG.info("Using JAVA Xmx with mapred.job.shuffle.input.buffer.percent to limit UDA shuffle memory"); meminfoSb.append(", maxHeapSize=").append(maxHeapSize).append("B"); meminfoSb.append(", shuffleInputBufferPercent=").append(shuffleInputBufferPercent); meminfoSb.append("==> shuffleMemorySize=").append(shuffleMemorySize).append("B"); LOG.info("RDMA shuffle memory is limited to " + shuffleMemorySize / 1024 / 1024 + "MB"); } else { LOG.info("Using mapred.rdma.shuffle.total.size to limit UDA shuffle memory"); LOG.info("RDMA shuffle memory is limited to " + totalRdmaSize / 1024 / 1024 + "MB"); } LOG.debug(meminfoSb.toString()); LOG.info("UDA: user prefer rdma.buf.size=" + maxRdmaBufferSize + "KB"); LOG.info("UDA: minimum rdma.buf.size=" + minRdmaBufferSize + "KB"); if (jobConf.getSpeculativeExecution()) { // (getMapSpeculativeExecution() || getReduceSpeculativeExecution()) LOG.info("UDA has limited support for map task speculative execution"); } LOG.info("UDA: number of segments to fetch: " + numMaps); /* init variables */ init_kv_bufs(); launchCppSide(true, this); // true: this is RT => we should execute NetMerger this.j2c_queue = new J2CQueue<K, V>(); this.mTaskReporter = reporter; this.mMapsNeed = numMaps; /* send init message */ TaskAttemptID reduceId = reduceTask.getTaskID(); mParams.clear(); mParams.add(Integer.toString(numMaps)); mParams.add(reduceId.getJobID().toString()); mParams.add(reduceId.toString()); mParams.add(jobConf.get("mapred.netmerger.hybrid.lpq.size", "0")); mParams.add(Long.toString(maxRdmaBufferSize * 1024)); // in Bytes - pass the raw value we got from xml file (with only conversion to bytes) mParams.add(Long.toString(minRdmaBufferSize * 1024)); // in Bytes . passed for checking if rdmaBuffer is still larger than minRdmaBuffer after alignment mParams.add(jobConf.getOutputKeyClass().getName()); boolean compression = jobConf.getCompressMapOutput(); //"true" or "false" String alg = null; if (compression) { alg = jobConf.get("mapred.map.output.compression.codec", null); } mParams.add(alg); String bufferSize = Integer.toString(256 * 1024); if (alg != null) { if (alg.contains("lzo.LzoCodec")) { bufferSize = jobConf.get("io.compression.codec.lzo.buffersize", bufferSize); } else if (alg.contains("SnappyCodec")) { bufferSize = jobConf.get("io.compression.codec.snappy.buffersize", bufferSize); } } mParams.add(bufferSize); mParams.add(Long.toString(shuffleMemorySize)); String[] dirs = jobConf.getLocalDirs(); ArrayList<String> dirsCanBeCreated = new ArrayList<String>(); //checking if the directories can be created for (int i = 0; i < dirs.length; i++) { try { DiskChecker.checkDir(new File(dirs[i].trim())); //saving only the directories that can be created dirsCanBeCreated.add(dirs[i].trim()); } catch (DiskErrorException e) { } } //sending the directories int numDirs = dirsCanBeCreated.size(); mParams.add(Integer.toString(numDirs)); for (int i = 0; i < numDirs; i++) { mParams.add(dirsCanBeCreated.get(i)); } LOG.info("mParams array is " + mParams); LOG.info("UDA: sending INIT_COMMAND"); String msg = UdaCmd.formCmd(UdaCmd.INIT_COMMAND, mParams); UdaBridge.doCommand(msg); this.mProgress = new Progress(); this.mProgress.set(0.5f); }
From source file:com.mellanox.hadoop.mapred.UdaShuffleProviderPluginShared.java
License:Apache License
static void buildCmdParams(List<String> params, JobConf jobConf) { params.clear();/*from w w w . j a va 2 s . com*/ params.add("-w"); params.add(jobConf.get("mapred.rdma.wqe.per.conn", "256")); params.add("-r"); params.add(jobConf.get("mapred.rdma.cma.port", "9011")); params.add("-m"); params.add("1"); params.add("-g"); params.add(System.getProperty("hadoop.log.dir")); params.add("-s"); params.add(jobConf.get("mapred.rdma.buf.size", "1024")); }
From source file:com.ricemap.spateDB.mapred.TextOutputFormat.java
License:Apache License
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { boolean isCompressed = getCompressOutput(job); String keyValueSeparator = job.get("mapred.textoutputformat.separator", "\t"); if (!isCompressed) { Path file = FileOutputFormat.getTaskOutputPath(job, name); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, progress); return new LineRecordWriter<K, V>(fileOut, keyValueSeparator); } else {/* ww w . j a v a 2s . c om*/ Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class); // create the named codec CompressionCodec codec = ReflectionUtils.newInstance(codecClass, job); // build the filename including the extension Path file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension()); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, progress); return new LineRecordWriter<K, V>(new DataOutputStream(codec.createOutputStream(fileOut)), keyValueSeparator); } }
From source file:com.TCG.Nutch_DNS.HostDbFilter.java
License:Apache License
public void configure(JobConf job) { urlFiltering = job.getBoolean(URL_FILTERING, false); urlNormalizers = job.getBoolean(URL_NORMALIZING, false); url404Purging = job.getBoolean(HostDb.CRAWLDB_PURGE_404, false); if (urlFiltering) { filters = new URLFilters(job); }/*ww w .java 2s . co m*/ if (urlNormalizers) { scope = job.get(URL_NORMALIZING_SCOPE, URLNormalizers.SCOPE_CRAWLDB); normalizers = new URLNormalizers(job, scope); } }
From source file:com.tripadvisor.hadoop.DumpDDL.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration config = getConf(); JobConf conf = new JobConf(config, DumpDDL.class); GenericOptionsParser parser = new GenericOptionsParser(conf, args); for (String arg : args) { if (arg.contains("=")) { String vname = arg.substring(0, arg.indexOf('=')); String vval = arg.substring(arg.indexOf('=') + 1); conf.set(vname, vval.replace("\"", "")); }/*w ww . j a v a 2s. c o m*/ } HiveUtil hu = new HiveUtil(); System.out.println(hu.dumpDDL(conf.get(DB_NAME, "default"), conf.get("ignore.tables.filename"))); return 0; }
From source file:com.uber.hoodie.hadoop.realtime.HoodieRealtimeRecordReader.java
License:Apache License
public static boolean canSkipMerging(JobConf jobConf) { return Boolean.valueOf(jobConf.get(REALTIME_SKIP_MERGE_PROP, DEFAULT_REALTIME_SKIP_MERGE)); }
From source file:com.yolodata.tbana.hadoop.mapred.csv.CSVNLineInputFormat.java
License:Open Source License
@Override public RecordReader<LongWritable, List<Text>> getRecordReader(InputSplit inputSplit, JobConf jobConf, Reporter reporter) throws IOException { String quote = jobConf.get(CSVLineRecordReader.FORMAT_DELIMITER, CSVLineRecordReader.DEFAULT_DELIMITER); String separator = jobConf.get(CSVLineRecordReader.FORMAT_SEPARATOR, CSVLineRecordReader.DEFAULT_SEPARATOR); if (quote == null || separator == null) { throw new IOException("CSVTextInputFormat: missing parameter delimiter"); }/* w w w . j a v a 2 s. c om*/ reporter.setStatus(inputSplit.toString()); CSVLineRecordReader reader = new CSVLineRecordReader(); reader.initialize(inputSplit, jobConf); return reader; }
From source file:de.tudarmstadt.ukp.dkpro.bigdata.hadoop.UIMAMapReduceBase.java
License:Open Source License
@Override public void configure(JobConf job) { try {//w ww . j a v a2 s . c om this.job = job; this.mapOutputValueClass = job.getMapOutputValueClass(); this.outputValueClass = job.getOutputValueClass(); this.samplingPropability = job.getInt("dkpro.map.samplingratio", 100); final EngineFactory engineFactory = (EngineFactory) Class .forName(job.get("dkpro.uima.factory", DkproHadoopDriver.class.getName())).newInstance(); engineFactory.configure(job); final AnalysisEngineDescription engineDescription = getEngineDescription(engineFactory, job); // replace the $dir variable within the configuration. this.fs = FileSystem.get(job); this.localFS = FileSystem.getLocal(job); this.working_dir = new Path("uima_output_" + job.get("mapred.task.id")); final Path outputPath = FileOutputFormat.getOutputPath(job); this.results_dir = this.fs.startLocalOutput(outputPath, job.getLocalPath(this.working_dir.getName())); this.localFS.mkdirs(this.results_dir); final String[] resources = job.get("dkpro.resources", "").split(","); sLogger.info("Writing local data to: " + this.results_dir); this.resourceURIs = new TreeMap<String, URL>(); for (final String resource : resources) { final URL r = job.getResource(resource); if (r != null && !resource.isEmpty()) { this.resourceURIs.put(resource, r); } } replaceRecursively(engineDescription); this.engine = createEngine(engineDescription); } catch (final Exception e) { sLogger.fatal("Error while configuring pipeline", e); e.printStackTrace(); throw new RuntimeException(e); } }
From source file:edu.stolaf.cs.wmrserver.streaming.PipeMapper.java
License:Apache License
public void configure(JobConf job) { super.configure(job); //disable the auto increment of the counter. For streaming, no of //processed records could be different(equal or less) than the no of //records input. SkipBadRecords.setAutoIncrMapperProcCount(job, false); skipping = job.getBoolean("mapred.skip.on", false); String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class) .getCanonicalName();//from www. j ava 2 s . c om ignoreKey = inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()); try { mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8"); mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8"); numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1); } catch (UnsupportedEncodingException e) { throw new RuntimeException("The current system does not support UTF-8 encoding!", e); } }