Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults) 

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:com.ebay.erl.mobius.core.SortProjectionConfigure.java

License:Apache License

SortProjectionConfigure(Configuration conf, Dataset aDataset) throws IOException {
    Configuration aJobConf = aDataset.createJobConf(assignedDatasetID);
    this.conf = new JobConf(Util.merge(conf, aJobConf));
    this.conf.set(ConfigureConstants.IS_SORT_JOB, "true");
    this.conf.set(ConfigureConstants.MAPPER_CLASS, aDataset.getMapper().getCanonicalName());
    this.aDataset = aDataset;
}

From source file:com.example.hadoop.hdfs.test.HdfsClient.java

License:Open Source License

public static JobConf config() {
    JobConf conf = new JobConf(HdfsClient.class);
    conf.setJobName("HdfsClient");
    conf.addResource("classpath:/hadoop/core-site.xml");
    conf.addResource("classpath:/hadoop/hdfs-site.xml");
    conf.addResource("classpath:/hadoop/mapred-site.xml");
    return conf;//from  w w  w.ja  v a2s  .  c om
}

From source file:com.example.hadoop.mapreduce.test.MapReduceTest.java

License:Open Source License

public static void main(String[] args) throws IOException {
    String input = HDFS_PATH + "/input/README.txt";
    String input2 = HDFS_PATH + "/input/README2.txt";
    String output = HDFS_PATH + "/test/output";

    // ?mapreduce???
    if (HdfsClient.exists(output)) {
        HdfsClient.rm(output);/*w  ww . jav a 2s  . c o m*/
    }

    JobConf conf = new JobConf(MapReduceTest.class);
    conf.setJobName("MapReduceTest");
    conf.addResource("classpath:/hadoop/core-site.xml");
    conf.addResource("classpath:/hadoop/hdfs-site.xml");
    conf.addResource("classpath:/hadoop/mapred-site.xml");

    // mapper
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(IntWritable.class);

    // reducer
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    // mapper
    conf.setMapperClass(MapperTest.class);
    // combiner?????mapper??reducer?
    conf.setCombinerClass(ReducerTest.class);
    // reducer
    conf.setReducerClass(ReducerTest.class);

    // MapReduce?
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    // MapReduce?
    FileInputFormat.setInputPaths(conf, new Path[] { new Path(input), new Path(input2) });
    // MapReduce?
    FileOutputFormat.setOutputPath(conf, new Path(output));

    try {
        JobClient.runJob(conf);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

@Test
public void testMROutput() throws Exception {
    JobConf job = new JobConf(conf);
    Properties properties = new Properties();
    StructObjectInspector inspector;/* w w w . j  a va2 s. co  m*/
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class,
                ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL);
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(1, 2, 3), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(4, 5, 6), inspector));
    writer.write(NullWritable.get(), serde.serialize(new NestedRow(7, 8, 9), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "z,r");
    properties.setProperty("columns.types", "int:struct<x:int,y:int>");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    conf.set("hive.io.file.readcolumn.ids", "1");
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    StructObjectInspector inner = (StructObjectInspector) fields.get(1).getFieldObjectInspector();
    List<? extends StructField> inFields = inner.getAllStructFieldRefs();
    IntObjectInspector intInspector = (IntObjectInspector) inFields.get(0).getFieldObjectInspector();
    while (reader.next(key, value)) {
        assertEquals(null, inspector.getStructFieldData(value, fields.get(0)));
        Object sub = inspector.getStructFieldData(value, fields.get(1));
        assertEquals(3 * rowNum + 1, intInspector.get(inner.getStructFieldData(sub, inFields.get(0))));
        assertEquals(3 * rowNum + 2, intInspector.get(inner.getStructFieldData(sub, inFields.get(1))));
        rowNum += 1;
    }
    assertEquals(3, rowNum);
    reader.close();
}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

@Test
public void testMROutput2() throws Exception {
    JobConf job = new JobConf(conf);
    // Test that you can set the output directory using this config
    job.set("mapred.work.output.dir", testFilePath.getParent().toString());
    Properties properties = new Properties();
    StructObjectInspector inspector;//from  ww w.j  a v a  2s. co m
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
                ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    OutputFormat<?, ?> outFormat = new OrcOutputFormat();
    RecordWriter writer = outFormat.getRecordWriter(fs, job, testFilePath.getName(), Reporter.NULL);
    writer.write(NullWritable.get(), serde.serialize(new StringRow("a"), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "col");
    properties.setProperty("columns.types", "string");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    reader.next(key, value);
    assertEquals("a", ((StringObjectInspector) fields.get(0).getFieldObjectInspector())
            .getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    reader.close();

}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

@Test
public void testEmptyFile() throws Exception {
    JobConf job = new JobConf(conf);
    Properties properties = new Properties();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
            properties, Reporter.NULL);//  w w w  . ja  va 2  s  .c  o  m
    writer.close(true);
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    SerDe serde = new OrcSerde();
    serde.initialize(conf, properties);
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // read the whole file
    conf.set("hive.io.file.readcolumn.ids", "0,1");
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Object value = reader.createValue();
    assertEquals(0.0, reader.getProgress(), 0.00001);
    assertEquals(0, reader.getPos());
    assertEquals(false, reader.next(key, value));
    reader.close();
    assertEquals(null, serde.getSerDeStats());
}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

@Test
public void testDefaultTypes() throws Exception {
    JobConf job = new JobConf(conf);
    Properties properties = new Properties();
    StructObjectInspector inspector;/*from  w w w .jav a  2s.  c o  m*/
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
                ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    SerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class,
            true, properties, Reporter.NULL);
    writer.write(serde.serialize(new StringRow("owen"), inspector));
    writer.write(serde.serialize(new StringRow("beth"), inspector));
    writer.write(serde.serialize(new StringRow("laurel"), inspector));
    writer.write(serde.serialize(new StringRow("hazen"), inspector));
    writer.write(serde.serialize(new StringRow("colin"), inspector));
    writer.write(serde.serialize(new StringRow("miles"), inspector));
    writer.close(true);
    serde = new OrcSerde();
    properties.setProperty("columns", "str,str2");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<str:string,str2:string>", inspector.getTypeName());
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // read the whole file
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Writable value = (Writable) reader.createValue();
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    StringObjectInspector strInspector = (StringObjectInspector) fields.get(0).getFieldObjectInspector();
    assertEquals(true, reader.next(key, value));
    assertEquals("owen",
            strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("beth",
            strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("laurel",
            strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("hazen",
            strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("colin",
            strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("miles",
            strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(false, reader.next(key, value));
    reader.close();
}

From source file:com.facebook.hive.orc.TestInputOutputFormat.java

License:Apache License

/**
 * Tests that passing null as the file system to getRecordWriter works, this is
 * to be compatible with the way Sequence and RC file tolerate nulls.
 * @throws Exception/*  w  ww . j av  a  2s . co m*/
 */
@Test
public void testNullFileSystem() throws Exception {
    conf.set("mapred.work.output.dir", testFilePath.getParent().toString());
    JobConf job = new JobConf(conf);
    Properties properties = new Properties();
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class,
                ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    OrcSerde serde = new OrcSerde();
    OrcOutputFormat outFormat = new OrcOutputFormat();
    RecordWriter<NullWritable, OrcSerdeRow> writer = outFormat.getRecordWriter(null, conf,
            testFilePath.getName(), Reporter.NULL);

    writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("a"), inspector));
    writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("b"), inspector));
    writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("c"), inspector));
    writer.close(Reporter.NULL);
    serde = new OrcSerde();
    properties.setProperty("columns", "str,str2");
    serde.initialize(conf, properties);
    inspector = (StructObjectInspector) serde.getObjectInspector();
    OrcInputFormat in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);

    // read the whole file
    org.apache.hadoop.mapred.RecordReader<NullWritable, OrcLazyRow> reader = in.getRecordReader(splits[0], conf,
            Reporter.NULL);
    NullWritable key = reader.createKey();
    OrcLazyRow value = (OrcLazyRow) reader.createValue();
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    StringObjectInspector strInspector = (StringObjectInspector) fields.get(0).getFieldObjectInspector();
    assertEquals(true, reader.next(key, value));
    assertEquals("a", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("b", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(true, reader.next(key, value));
    assertEquals("c", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
    assertEquals(false, reader.next(key, value));
    reader.close();
}

From source file:com.facebook.hiveio.common.HadoopUtils.java

License:Apache License

/**
 * Hack to configure InputFormats before they get used.
 * @param inputFormat InputFormat to configure
 * @param conf Configuration to use//from   w ww.j a va2s  .  co m
 */
public static void configureInputFormat(InputFormat inputFormat, Configuration conf) {
    JobConf jobConf = new JobConf(conf);
    setJobConfIfPossible(inputFormat, jobConf);
    // TextInputFormat is not always JobConfigurable, so we need to explicitly
    // call this here to make sure it gets configured with the
    // compression codecs.
    if (inputFormat instanceof TextInputFormat) {
        ((TextInputFormat) inputFormat).configure(jobConf);
    }
}

From source file:com.facebook.hiveio.input.HiveApiInputFormat.java

License:Apache License

/**
 * Compute splits from partitions/*from w ww .ja  v  a 2 s .co  m*/
 *
 * @param conf Configuration
 * @param inputDesc Hive table input description
 * @param tableSchema schema for table
 * @param partitions list of input partitions
 * @return list of input splits
 * @throws IOException
 */
private List<InputSplit> computeSplits(Configuration conf, HiveInputDescription inputDesc,
        HiveTableSchema tableSchema, List<InputPartition> partitions) throws IOException {
    int partitionNum = 0;
    List<InputSplit> splits = Lists.newArrayList();

    int[] columnIds = computeColumnIds(inputDesc.getColumns(), tableSchema);

    for (InputPartition inputPartition : partitions) {
        org.apache.hadoop.mapred.InputFormat baseInputFormat = inputPartition.makeInputFormat(conf);
        HadoopUtils.setInputDir(conf, inputPartition.getLocation());

        org.apache.hadoop.mapred.InputSplit[] baseSplits = baseInputFormat.getSplits(new JobConf(conf),
                inputDesc.getNumSplits());
        LOG.info(
                "Requested {} splits from partition ({} out of {}) partition values: "
                        + "{}, got {} splits from inputFormat {}",
                inputDesc.getNumSplits(), partitionNum + 1, Iterables.size(partitions),
                inputPartition.getInputSplitData().getPartitionValues(), baseSplits.length,
                baseInputFormat.getClass().getCanonicalName());

        for (org.apache.hadoop.mapred.InputSplit baseSplit : baseSplits) {
            InputSplit split = new HInputSplit(baseInputFormat, baseSplit, tableSchema, columnIds,
                    inputPartition.getInputSplitData(), conf);
            splits.add(split);
        }

        partitionNum++;
    }
    return splits;
}