Example usage for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults)

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.ebay.erl.mobius.core.SortProjectionConfigure.java

License:Apache License

SortProjectionConfigure(Configuration conf, Dataset aDataset) throws IOException {
    Configuration aJobConf = aDataset.createJobConf(assignedDatasetID);
    this.conf = new JobConf(Util.merge(conf, aJobConf));
    this.conf.set(ConfigureConstants.IS_SORT_JOB, "true");
    this.conf.set(ConfigureConstants.MAPPER_CLASS, aDataset.getMapper().getCanonicalName());
    this.aDataset = aDataset;
}

From source file:com.example.hadoop.hdfs.test.HdfsClient.java

License:Open Source License

public static JobConf config() {
    JobConf conf = new JobConf(HdfsClient.class);
    conf.setJobName("HdfsClient");
    conf.addResource("classpath:/hadoop/core-site.xml");
    conf.addResource("classpath:/hadoop/hdfs-site.xml");
    conf.addResource("classpath:/hadoop/mapred-site.xml");
    return conf;//from  w w  w.ja  v a2s  .  c om
}

From source file:com.example.hadoop.mapreduce.test.MapReduceTest.java

License:Open Source License

public static void main(String[] args) throws IOException {
    String input = HDFS_PATH + "/input/README.txt";
    String input2 = HDFS_PATH + "/input/README2.txt";
    String output = HDFS_PATH + "/test/output";

    // ?mapreduce???
    if (HdfsClient.exists(output)) {
        HdfsClient.rm(output);/*w  ww . jav a 2s  . c o m*/
    }

    JobConf conf = new JobConf(MapReduceTest.class);
    conf.setJobName("MapReduceTest");
    conf.addResource("classpath:/hadoop/core-site.xml");
    conf.addResource("classpath:/hadoop/hdfs-site.xml");
    conf.addResource("classpath:/hadoop/mapred-site.xml");

    // mapper
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);

    // reducer
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    // mapper
    conf.setMapperClass(MapperTest.class);
    // combiner?????mapper??reducer?
    conf.setCombinerClass(ReducerTest.class);
    // reducer
    conf.setReducerClass(ReducerTest.class);

    // MapReduce?
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    // MapReduce?
    FileInputFormat.setInputPaths(conf, new Path[] { new Path(input), new Path(input2) });
    // MapReduce?
    FileOutputFormat.setOutputPath(conf, new Path(output));

    try {
        JobClient.runJob(conf);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

@Test
public void testMROutput() throws Exception {
    JobConf job = new JobConf(conf);
    Properties properties = new Properties();
    StructObjectInspector inspector;/* w w w . j  a va2 s. co  m*/
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class,
                ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL);
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(1, 2, 3), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(4, 5, 6), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(7, 8, 9), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "z,r");
    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    conf.set("hive.io.file.readcolumn.ids", "1");
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    StructObjectInspector inner = (StructObjectInspector) fields.get(1).getFieldObjectInspector();
    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    IntObjectInspector intInspector = (IntObjectInspector) inFields.get(0).getFieldObjectInspector();
    while (reader.next(key, value)) {
        assertEquals(null, inspector.getStructFieldData(value, fields.get(0)));
        Object sub = inspector.getStructFieldData(value, fields.get(1));
        assertEquals(3 * rowNum + 1, intInspector.get(inner.getStructFieldData(sub, inFields.get(0))));
        assertEquals(3 * rowNum + 2, intInspector.get(inner.getStructFieldData(sub, inFields.get(1))));
        rowNum += 1;
    }
    assertEquals(3, rowNum);
    reader.close();
}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

@Test
public void testMROutput2() throws Exception {
    JobConf job = new JobConf(conf);
    // Test that you can set the output directory using this config
    job.set("mapred.work.output.dir", testFilePath.getParent().toString());
    Properties properties = new Properties();
    StructObjectInspector inspector;//from  ww w.j  a v a  2s. co m
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
                ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    RecordWriter writer = outFormat.getRecordWriter(fs, job, testFilePath.getName(), Reporter.NULL);
    writer.write(NullWritable.get(), serde.serialize(new StringRow("a"), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "col");
    properties.setProperty("columns.types", "string");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    reader.next(key, value);
    assertEquals("a", ((StringObjectInspector) fields.get(0).getFieldObjectInspector())
            .getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    reader.close();

}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

@Test
public void testEmptyFile() throws Exception {
    JobConf job = new JobConf(conf);
    Properties properties = new Properties();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
            properties, Reporter.NULL);//  w w w  . ja  va 2  s  .c  o  m
    writer.close(true);
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    SerDe serde = new OrcSerde();
    serde.initialize(conf, properties);
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // read the whole file
    conf.set("hive.io.file.readcolumn.ids", "0,1");
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    assertEquals(0.0, reader.getProgress(), 0.00001);
    assertEquals(0, reader.getPos());
    assertEquals(false, reader.next(key, value));
    reader.close();
    assertEquals(null, serde.getSerDeStats());
}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

@Test
public void testDefaultTypes() throws Exception {
    JobConf job = new JobConf(conf);
    Properties properties = new Properties();
    StructObjectInspector inspector;/*from  w w w .jav a  2s.  c o  m*/
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
                ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class,
            true, properties, Reporter.NULL);
    writer.write(serde.serialize(new StringRow("owen"), inspector));
    writer.write(serde.serialize(new StringRow("beth"), inspector));
    writer.write(serde.serialize(new StringRow("laurel"), inspector));
    writer.write(serde.serialize(new StringRow("hazen"), inspector));
    writer.write(serde.serialize(new StringRow("colin"), inspector));
    writer.write(serde.serialize(new StringRow("miles"), inspector));
    writer.close(true);
    serde = new OrcSerde();
    properties.setProperty("columns", "str,str2");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<str:string,str2:string>", inspector.getTypeName());
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // read the whole file
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Writable value = (Writable) reader.createValue();
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    StringObjectInspector strInspector = (StringObjectInspector) fields.get(0).getFieldObjectInspector();
    assertEquals(true, reader.next(key, value));
    assertEquals("owen",
            strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("beth",
            strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("laurel",
            strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("hazen",
            strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("colin",
            strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("miles",
            strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(false, reader.next(key, value));
    reader.close();
}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

/**
 * Tests that passing null as the file system to getRecordWriter works, this is
 * to be compatible with the way Sequence and RC file tolerate nulls.
 * @throws Exception/*  w  ww . j av  a  2s . co m*/
 */
@Test
public void testNullFileSystem() throws Exception {
    conf.set("mapred.work.output.dir", testFilePath.getParent().toString());
    JobConf job = new JobConf(conf);
    Properties properties = new Properties();
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
                ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    OrcSerde serde = new OrcSerde();
    OrcOutputFormat outFormat = new OrcOutputFormat();
    RecordWriter<NullWritable, OrcSerdeRow> writer = outFormat.getRecordWriter(null, conf,
            testFilePath.getName(), Reporter.NULL);

    writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("a"), inspector));
    writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("b"), inspector));
    writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("c"), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "str,str2");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    OrcInputFormat in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // read the whole file
    org.apache.hadoop.mapred.RecordReader<NullWritable, OrcLazyRow> reader = in.getRecordReader(splits[0], conf,
            Reporter.NULL);
    NullWritable key = reader.createKey();
    OrcLazyRow value = (OrcLazyRow) reader.createValue();
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    StringObjectInspector strInspector = (StringObjectInspector) fields.get(0).getFieldObjectInspector();
    assertEquals(true, reader.next(key, value));
    assertEquals("a", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("b", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("c", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(false, reader.next(key, value));
    reader.close();
}

From source file:com.facebook.hiveio.common.HadoopUtils.java

License:Apache License

/**
 * Hack to configure InputFormats before they get used.
 * @param inputFormat InputFormat to configure
 * @param conf Configuration to use//from   w ww.j a va2s  .  co m
 */
public static void configureInputFormat(InputFormat inputFormat, Configuration conf) {
    JobConf jobConf = new JobConf(conf);
    setJobConfIfPossible(inputFormat, jobConf);
    // TextInputFormat is not always JobConfigurable, so we need to explicitly
    // call this here to make sure it gets configured with the
    // compression codecs.
    if (inputFormat instanceof TextInputFormat) {
        ((TextInputFormat) inputFormat).configure(jobConf);
    }
}

From source file:com.facebook.hiveio.input.HiveApiInputFormat.java

License:Apache License

/**
 * Compute splits from partitions/*from w ww .ja  v  a 2 s .co  m*/
 *
 * @param conf Configuration
 * @param inputDesc Hive table input description
 * @param tableSchema schema for table
 * @param partitions list of input partitions
 * @return list of input splits
 * @throws IOException
 */
private List<InputSplit> computeSplits(Configuration conf, HiveInputDescription inputDesc,
        HiveTableSchema tableSchema, List<InputPartition> partitions) throws IOException {
    int partitionNum = 0;
    List<InputSplit> splits = Lists.newArrayList();

    int[] columnIds = computeColumnIds(inputDesc.getColumns(), tableSchema);

    for (InputPartition inputPartition : partitions) {
        org.apache.hadoop.mapred.InputFormat baseInputFormat = inputPartition.makeInputFormat(conf);
        HadoopUtils.setInputDir(conf, inputPartition.getLocation());

        org.apache.hadoop.mapred.InputSplit[] baseSplits = baseInputFormat.getSplits(new JobConf(conf),
                inputDesc.getNumSplits());
        LOG.info(
                "Requested {} splits from partition ({} out of {}) partition values: "
                        + "{}, got {} splits from inputFormat {}",
                inputDesc.getNumSplits(), partitionNum + 1, Iterables.size(partitions),
                inputPartition.getInputSplitData().getPartitionValues(), baseSplits.length,
                baseInputFormat.getClass().getCanonicalName());

        for (org.apache.hadoop.mapred.InputSplit baseSplit : baseSplits) {
            InputSplit split = new HInputSplit(baseInputFormat, baseSplit, tableSchema, columnIds,
                    inputPartition.getInputSplitData(), conf);
            splits.add(split);
        }

        partitionNum++;
    }
    return splits;
}