List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name)
name
property, null
if no such property exists. From source file:org.apache.cassandra.hadoop.ColumnFamilyInputFormat.java
License:Apache License
public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, ColumnFamilyRecordReader.Column>> getRecordReader( org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { TaskAttemptContext tac = HadoopCompat.newMapContext(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID)), null, null, null, new ReporterWrapper(reporter), null);/* w w w .ja va 2s . co m*/ ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader( jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT)); recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac); return recordReader; }
From source file:org.apache.cassandra.hadoop.cql3.CqlInputFormat.java
License:Apache License
public RecordReader<Long, Row> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { TaskAttemptContext tac = new TaskAttemptContext(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) { @Override/*from w w w.j a v a 2s. c o m*/ public void progress() { reporter.progress(); } }; CqlRecordReader recordReader = new CqlRecordReader(); recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac); return recordReader; }
From source file:org.apache.cassandra.hadoop.cql3.CqlPagingInputFormat.java
License:Apache License
public RecordReader<Map<String, ByteBuffer>, Map<String, ByteBuffer>> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { TaskAttemptContext tac = new TaskAttemptContext(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) { @Override/*from www . j a v a 2 s. c o m*/ public void progress() { reporter.progress(); } }; CqlPagingRecordReader recordReader = new CqlPagingRecordReader(); recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac); return recordReader; }
From source file:org.apache.cassandra.hadoop2.ColumnFamilyInputFormat.java
License:Apache License
public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, Column>> getRecordReader( org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { TaskAttemptContext tac = new TaskAttemptContextImpl(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) { @Override//from w ww. jav a 2s .c om public void progress() { reporter.progress(); } }; ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader( jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT)); recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac); return recordReader; }
From source file:org.apache.cassandra.hadoop2.cql3.CqlPagingInputFormat.java
License:Apache License
public RecordReader<Map<String, ByteBuffer>, Map<String, ByteBuffer>> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { TaskAttemptContext tac = new TaskAttemptContextImpl(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) { @Override//from w w w . j a va 2 s . c om public void progress() { reporter.progress(); } }; CqlPagingRecordReader recordReader = new CqlPagingRecordReader(); recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac); return recordReader; }
From source file:org.apache.crunch.io.hbase.HFileTargetIT.java
License:Apache License
/** * We need to set the address of JobHistory server, as it randomly picks a unused port * to listen. Unfortunately, HBaseTestingUtility neither does that nor provides a way * for us to know the picked address. We have to access it using reflection. * * This is necessary when testing with MRv2, but does no harm to MRv1. *//*from w w w . j a va 2 s .co m*/ private static void dirtyFixForJobHistoryServerAddress() { try { // Retrieve HBASE_TEST_UTILITY.mrCluster via reflection, as it is private. Field mrClusterField = HBaseTestingUtility.class.getDeclaredField("mrCluster"); mrClusterField.setAccessible(true); MiniMRCluster mrCluster = (MiniMRCluster) mrClusterField.get(HBASE_TEST_UTILITY); JobConf jobConf = mrCluster.createJobConf(); Configuration conf = HBASE_TEST_UTILITY.getConfiguration(); String proprety = "mapreduce.jobhistory.address"; String value = jobConf.get(proprety); if (value != null) { // maybe null if we're running MRv1 conf.set(proprety, value); } } catch (IllegalAccessException e) { throw new AssertionError(e); } catch (NoSuchFieldException e) { throw new AssertionError(e); } }
From source file:org.apache.eagle.jpm.analyzer.mr.suggestion.MapReduceCompressionSettingProcessor.java
License:Apache License
@Override public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) { StringBuilder sb = new StringBuilder(); List<String> optSettings = new ArrayList<>(); JobConf jobconf = new JobConf(context.getJobconf()); if (jobconf.getLong(NUM_REDUCES, 0) > 0) { if (!jobconf.getCompressMapOutput()) { optSettings.add(String.format("%s=true", MAP_OUTPUT_COMPRESS)); sb.append("Please set " + MAP_OUTPUT_COMPRESS + " to true to reduce network IO.\n"); } else {//from w w w. jav a 2 s . co m String codecClassName = jobconf.get(MAP_OUTPUT_COMPRESS_CODEC); if (!(codecClassName.endsWith("LzoCodec") || codecClassName.endsWith("SnappyCodec"))) { optSettings.add(String.format("%s=LzoCodec or SnappyCodec", MAP_OUTPUT_COMPRESS_CODEC)); sb.append("Best practice: use LzoCodec or SnappyCodec for " + MAP_OUTPUT_COMPRESS_CODEC) .append("\n"); } } } if (!jobconf.getBoolean(FileOutputFormat.COMPRESS, false)) { optSettings.add(String.format("%s=true", FileOutputFormat.COMPRESS)); sb.append( "Please set " + FileOutputFormat.COMPRESS + " to true to reduce disk usage and network IO.\n"); } else { String codecName = jobconf.get(FileOutputFormat.COMPRESS_CODEC, ""); String outputFileFormat = jobconf.get(OUTPUT_FORMAT_CLASS_ATTR, ""); if ((codecName.endsWith("GzipCodec") || codecName.endsWith("SnappyCodec") || codecName.endsWith("DefaultCodec")) && outputFileFormat.endsWith("TextOutputFormat")) { sb.append("Best practice: don't use Gzip/Snappy/DefaultCodec with TextOutputFormat"); sb.append(" as this will cause the output files to be unsplittable. "); sb.append("Please use LZO instead or "); sb.append("use a container file format such as SequenceFileOutputFormat.\n"); } } if (sb.length() > 0) { return new Result.ProcessorResult(Result.RuleType.COMPRESS, Result.ResultLevel.INFO, sb.toString(), optSettings); } return null; }
From source file:org.apache.flink.api.java.hadoop.mapred.utils.HadoopUtils.java
License:Apache License
/** * Merge HadoopConfiguration into JobConf. This is necessary for the HDFS configuration. *///from w w w . j a va 2 s . c om public static void mergeHadoopConf(JobConf jobConf) { org.apache.hadoop.conf.Configuration hadoopConf = getHadoopConfiguration(); for (Map.Entry<String, String> e : hadoopConf) { if (jobConf.get(e.getKey()) == null) { jobConf.set(e.getKey(), e.getValue()); } } }
From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java
License:Apache License
private HivePartitionWriter writerForLocation(String location) throws IOException { JobConf clonedConf = new JobConf(jobConf); clonedConf.set(OUTDIR, location);// w w w .ja v a 2s . c o m OutputFormat outputFormat; try { StorageDescriptor sd = hiveTablePartition.getStorageDescriptor(); Class outputFormatClz = Class.forName(sd.getOutputFormat(), true, Thread.currentThread().getContextClassLoader()); outputFormatClz = HiveFileFormatUtils.getOutputFormatSubstitute(outputFormatClz); outputFormat = (OutputFormat) outputFormatClz.newInstance(); } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) { throw new FlinkRuntimeException("Unable to instantiate the hadoop output format", e); } ReflectionUtils.setConf(outputFormat, clonedConf); OutputCommitter outputCommitter = clonedConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(clonedConf, new JobID()); outputCommitter.setupJob(jobContext); final boolean isCompressed = clonedConf.getBoolean(HiveConf.ConfVars.COMPRESSRESULT.varname, false); if (isCompressed) { String codecStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC.varname); if (!StringUtils.isNullOrWhitespaceOnly(codecStr)) { try { Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) Class .forName(codecStr, true, Thread.currentThread().getContextClassLoader()); FileOutputFormat.setOutputCompressorClass(clonedConf, codec); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } String typeStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE.varname); if (!StringUtils.isNullOrWhitespaceOnly(typeStr)) { SequenceFile.CompressionType style = SequenceFile.CompressionType.valueOf(typeStr); SequenceFileOutputFormat.setOutputCompressionType(clonedConf, style); } } String taskPartition = String.valueOf(clonedConf.getInt("mapreduce.task.partition", -1)); Path taskPath = FileOutputFormat.getTaskOutputPath(clonedConf, taskPartition); FileSinkOperator.RecordWriter recordWriter; try { recordWriter = HiveFileFormatUtils.getRecordWriter(clonedConf, outputFormat, outputClass, isCompressed, tblProperties, taskPath, Reporter.NULL); } catch (HiveException e) { throw new IOException(e); } return new HivePartitionWriter(clonedConf, outputFormat, recordWriter, outputCommitter); }
From source file:org.apache.hawq.pxf.plugins.json.JsonRecordReader.java
License:Apache License
/** * Create new multi-line json object reader. * //ww w. j a v a 2s . co m * @param conf * Hadoop context * @param split * HDFS split to start the reading from * @throws IOException IOException when reading the file */ public JsonRecordReader(JobConf conf, FileSplit split) throws IOException { this.jsonMemberName = conf.get(RECORD_MEMBER_IDENTIFIER); this.maxObjectLength = conf.getInt(RECORD_MAX_LENGTH, Integer.MAX_VALUE); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(conf); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(conf); FSDataInputStream fileIn = fs.open(split.getPath()); if (codec != null) { is = codec.createInputStream(fileIn); start = 0; end = Long.MAX_VALUE; } else { if (start != 0) { fileIn.seek(start); } is = fileIn; } parser = new PartitionedJsonParser(is); this.pos = start; }