Example usage for org.apache.hadoop.mapred JobConf get

List of usage examples for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name) 

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:org.apache.cassandra.hadoop.ColumnFamilyInputFormat.java

License:Apache License

public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, ColumnFamilyRecordReader.Column>> getRecordReader(
        org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {
    TaskAttemptContext tac = HadoopCompat.newMapContext(jobConf,
            TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID)), null, null, null, new ReporterWrapper(reporter),
            null);/* w w  w .ja  va 2s  .  co m*/

    ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader(
            jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT));
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
    return recordReader;
}

From source file:org.apache.cassandra.hadoop.cql3.CqlInputFormat.java

License:Apache License

public RecordReader<Long, Row> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {
    TaskAttemptContext tac = new TaskAttemptContext(jobConf,
            TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) {
        @Override/*from   w w  w.j  a  v  a  2s. c  o  m*/
        public void progress() {
            reporter.progress();
        }
    };

    CqlRecordReader recordReader = new CqlRecordReader();
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
    return recordReader;
}

From source file:org.apache.cassandra.hadoop.cql3.CqlPagingInputFormat.java

License:Apache License

public RecordReader<Map<String, ByteBuffer>, Map<String, ByteBuffer>> getRecordReader(InputSplit split,
        JobConf jobConf, final Reporter reporter) throws IOException {
    TaskAttemptContext tac = new TaskAttemptContext(jobConf,
            TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) {
        @Override/*from  www . j  a v  a  2  s. c  o m*/
        public void progress() {
            reporter.progress();
        }
    };

    CqlPagingRecordReader recordReader = new CqlPagingRecordReader();
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
    return recordReader;
}

From source file:org.apache.cassandra.hadoop2.ColumnFamilyInputFormat.java

License:Apache License

public org.apache.hadoop.mapred.RecordReader<ByteBuffer, SortedMap<ByteBuffer, Column>> getRecordReader(
        org.apache.hadoop.mapred.InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException {
    TaskAttemptContext tac = new TaskAttemptContextImpl(jobConf,
            TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) {
        @Override//from  w  ww.  jav  a 2s .c om
        public void progress() {
            reporter.progress();
        }
    };

    ColumnFamilyRecordReader recordReader = new ColumnFamilyRecordReader(
            jobConf.getInt(CASSANDRA_HADOOP_MAX_KEY_SIZE, CASSANDRA_HADOOP_MAX_KEY_SIZE_DEFAULT));
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
    return recordReader;
}

From source file:org.apache.cassandra.hadoop2.cql3.CqlPagingInputFormat.java

License:Apache License

public RecordReader<Map<String, ByteBuffer>, Map<String, ByteBuffer>> getRecordReader(InputSplit split,
        JobConf jobConf, final Reporter reporter) throws IOException {
    TaskAttemptContext tac = new TaskAttemptContextImpl(jobConf,
            TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) {
        @Override//from   w  w  w .  j  a  va 2 s .  c om
        public void progress() {
            reporter.progress();
        }
    };

    CqlPagingRecordReader recordReader = new CqlPagingRecordReader();
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit) split, tac);
    return recordReader;
}

From source file:org.apache.crunch.io.hbase.HFileTargetIT.java

License:Apache License

/**
 * We need to set the address of JobHistory server, as it randomly picks a unused port
 * to listen. Unfortunately, HBaseTestingUtility neither does that nor provides a way
 * for us to know the picked address. We have to access it using reflection.
 *
 * This is necessary when testing with MRv2, but does no harm to MRv1.
 *//*from  w w w  . j  a va  2  s  .co  m*/
private static void dirtyFixForJobHistoryServerAddress() {
    try {
        // Retrieve HBASE_TEST_UTILITY.mrCluster via reflection, as it is private.
        Field mrClusterField = HBaseTestingUtility.class.getDeclaredField("mrCluster");
        mrClusterField.setAccessible(true);
        MiniMRCluster mrCluster = (MiniMRCluster) mrClusterField.get(HBASE_TEST_UTILITY);
        JobConf jobConf = mrCluster.createJobConf();
        Configuration conf = HBASE_TEST_UTILITY.getConfiguration();
        String proprety = "mapreduce.jobhistory.address";
        String value = jobConf.get(proprety);
        if (value != null) { // maybe null if we're running MRv1
            conf.set(proprety, value);
        }
    } catch (IllegalAccessException e) {
        throw new AssertionError(e);
    } catch (NoSuchFieldException e) {
        throw new AssertionError(e);
    }
}

From source file:org.apache.eagle.jpm.analyzer.mr.suggestion.MapReduceCompressionSettingProcessor.java

License:Apache License

@Override
public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) {
    StringBuilder sb = new StringBuilder();
    List<String> optSettings = new ArrayList<>();

    JobConf jobconf = new JobConf(context.getJobconf());
    if (jobconf.getLong(NUM_REDUCES, 0) > 0) {
        if (!jobconf.getCompressMapOutput()) {
            optSettings.add(String.format("%s=true", MAP_OUTPUT_COMPRESS));
            sb.append("Please set " + MAP_OUTPUT_COMPRESS + " to true to reduce network IO.\n");
        } else {//from  w  w  w.  jav  a  2  s  .  co  m
            String codecClassName = jobconf.get(MAP_OUTPUT_COMPRESS_CODEC);
            if (!(codecClassName.endsWith("LzoCodec") || codecClassName.endsWith("SnappyCodec"))) {
                optSettings.add(String.format("%s=LzoCodec or SnappyCodec", MAP_OUTPUT_COMPRESS_CODEC));
                sb.append("Best practice: use LzoCodec or SnappyCodec for " + MAP_OUTPUT_COMPRESS_CODEC)
                        .append("\n");
            }
        }
    }

    if (!jobconf.getBoolean(FileOutputFormat.COMPRESS, false)) {
        optSettings.add(String.format("%s=true", FileOutputFormat.COMPRESS));
        sb.append(
                "Please set " + FileOutputFormat.COMPRESS + " to true to reduce disk usage and network IO.\n");
    } else {
        String codecName = jobconf.get(FileOutputFormat.COMPRESS_CODEC, "");
        String outputFileFormat = jobconf.get(OUTPUT_FORMAT_CLASS_ATTR, "");

        if ((codecName.endsWith("GzipCodec") || codecName.endsWith("SnappyCodec")
                || codecName.endsWith("DefaultCodec")) && outputFileFormat.endsWith("TextOutputFormat")) {
            sb.append("Best practice: don't use Gzip/Snappy/DefaultCodec with TextOutputFormat");
            sb.append(" as this will cause the output files to be unsplittable. ");
            sb.append("Please use LZO instead or ");
            sb.append("use a container file format such as SequenceFileOutputFormat.\n");
        }
    }

    if (sb.length() > 0) {
        return new Result.ProcessorResult(Result.RuleType.COMPRESS, Result.ResultLevel.INFO, sb.toString(),
                optSettings);
    }
    return null;
}

From source file:org.apache.flink.api.java.hadoop.mapred.utils.HadoopUtils.java

License:Apache License

/**
 * Merge HadoopConfiguration into JobConf. This is necessary for the HDFS configuration.
 *///from w w w .  j  a va 2 s .  c om
public static void mergeHadoopConf(JobConf jobConf) {
    org.apache.hadoop.conf.Configuration hadoopConf = getHadoopConfiguration();
    for (Map.Entry<String, String> e : hadoopConf) {
        if (jobConf.get(e.getKey()) == null) {
            jobConf.set(e.getKey(), e.getValue());
        }
    }
}

From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java

License:Apache License

private HivePartitionWriter writerForLocation(String location) throws IOException {
    JobConf clonedConf = new JobConf(jobConf);
    clonedConf.set(OUTDIR, location);// w  w w  .ja v  a 2s .  c o m
    OutputFormat outputFormat;
    try {
        StorageDescriptor sd = hiveTablePartition.getStorageDescriptor();
        Class outputFormatClz = Class.forName(sd.getOutputFormat(), true,
                Thread.currentThread().getContextClassLoader());
        outputFormatClz = HiveFileFormatUtils.getOutputFormatSubstitute(outputFormatClz);
        outputFormat = (OutputFormat) outputFormatClz.newInstance();
    } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) {
        throw new FlinkRuntimeException("Unable to instantiate the hadoop output format", e);
    }
    ReflectionUtils.setConf(outputFormat, clonedConf);
    OutputCommitter outputCommitter = clonedConf.getOutputCommitter();
    JobContext jobContext = new JobContextImpl(clonedConf, new JobID());
    outputCommitter.setupJob(jobContext);
    final boolean isCompressed = clonedConf.getBoolean(HiveConf.ConfVars.COMPRESSRESULT.varname, false);
    if (isCompressed) {
        String codecStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC.varname);
        if (!StringUtils.isNullOrWhitespaceOnly(codecStr)) {
            try {
                Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) Class
                        .forName(codecStr, true, Thread.currentThread().getContextClassLoader());
                FileOutputFormat.setOutputCompressorClass(clonedConf, codec);
            } catch (ClassNotFoundException e) {
                throw new RuntimeException(e);
            }
        }
        String typeStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE.varname);
        if (!StringUtils.isNullOrWhitespaceOnly(typeStr)) {
            SequenceFile.CompressionType style = SequenceFile.CompressionType.valueOf(typeStr);
            SequenceFileOutputFormat.setOutputCompressionType(clonedConf, style);
        }
    }
    String taskPartition = String.valueOf(clonedConf.getInt("mapreduce.task.partition", -1));
    Path taskPath = FileOutputFormat.getTaskOutputPath(clonedConf, taskPartition);
    FileSinkOperator.RecordWriter recordWriter;
    try {
        recordWriter = HiveFileFormatUtils.getRecordWriter(clonedConf, outputFormat, outputClass, isCompressed,
                tblProperties, taskPath, Reporter.NULL);
    } catch (HiveException e) {
        throw new IOException(e);
    }
    return new HivePartitionWriter(clonedConf, outputFormat, recordWriter, outputCommitter);
}

From source file:org.apache.hawq.pxf.plugins.json.JsonRecordReader.java

License:Apache License

/**
 * Create new multi-line json object reader.
 * //ww w.  j a v  a 2s  .  co m
 * @param conf
 *            Hadoop context
 * @param split
 *            HDFS split to start the reading from
 * @throws IOException IOException when reading the file
 */
public JsonRecordReader(JobConf conf, FileSplit split) throws IOException {

    this.jsonMemberName = conf.get(RECORD_MEMBER_IDENTIFIER);
    this.maxObjectLength = conf.getInt(RECORD_MAX_LENGTH, Integer.MAX_VALUE);

    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(conf);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(split.getPath());
    if (codec != null) {
        is = codec.createInputStream(fileIn);
        start = 0;
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            fileIn.seek(start);
        }
        is = fileIn;
    }
    parser = new PartitionedJsonParser(is);
    this.pos = start;
}