List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(boolean loadDefaults)
From source file:com.ebay.erl.mobius.core.SortProjectionConfigure.java
License:Apache License
SortProjectionConfigure(Configuration conf, Dataset aDataset) throws IOException { Configuration aJobConf = aDataset.createJobConf(assignedDatasetID); this.conf = new JobConf(Util.merge(conf, aJobConf)); this.conf.set(ConfigureConstants.IS_SORT_JOB, "true"); this.conf.set(ConfigureConstants.MAPPER_CLASS, aDataset.getMapper().getCanonicalName()); this.aDataset = aDataset; }
From source file:com.example.hadoop.hdfs.test.HdfsClient.java
License:Open Source License
public static JobConf config() { JobConf conf = new JobConf(HdfsClient.class); conf.setJobName("HdfsClient"); conf.addResource("classpath:/hadoop/core-site.xml"); conf.addResource("classpath:/hadoop/hdfs-site.xml"); conf.addResource("classpath:/hadoop/mapred-site.xml"); return conf;//from w w w.ja v a2s . c om }
From source file:com.example.hadoop.mapreduce.test.MapReduceTest.java
License:Open Source License
public static void main(String[] args) throws IOException { String input = HDFS_PATH + "/input/README.txt"; String input2 = HDFS_PATH + "/input/README2.txt"; String output = HDFS_PATH + "/test/output"; // ?mapreduce??? if (HdfsClient.exists(output)) { HdfsClient.rm(output);/*w ww . jav a 2s . c o m*/ } JobConf conf = new JobConf(MapReduceTest.class); conf.setJobName("MapReduceTest"); conf.addResource("classpath:/hadoop/core-site.xml"); conf.addResource("classpath:/hadoop/hdfs-site.xml"); conf.addResource("classpath:/hadoop/mapred-site.xml"); // mapper conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); // reducer conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); // mapper conf.setMapperClass(MapperTest.class); // combiner?????mapper??reducer? conf.setCombinerClass(ReducerTest.class); // reducer conf.setReducerClass(ReducerTest.class); // MapReduce? conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // MapReduce? FileInputFormat.setInputPaths(conf, new Path[] { new Path(input), new Path(input2) }); // MapReduce? FileOutputFormat.setOutputPath(conf, new Path(output)); try { JobClient.runJob(conf); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.facebook.hive.orc.TestInputOutputFormat.java
License:Apache License
@Test public void testMROutput() throws Exception { JobConf job = new JobConf(conf); Properties properties = new Properties(); StructObjectInspector inspector;/* w w w . j a va2 s. co m*/ synchronized (TestOrcFile.class) { inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } SerDe serde = new OrcSerde(); OutputFormat<?, ?> outFormat = new OrcOutputFormat(); RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL); writer.write(NullWritable.get(), serde.serialize(new NestedRow(1, 2, 3), inspector)); writer.write(NullWritable.get(), serde.serialize(new NestedRow(4, 5, 6), inspector)); writer.write(NullWritable.get(), serde.serialize(new NestedRow(7, 8, 9), inspector)); writer.close(Reporter.NULL); serde = new OrcSerde(); properties.setProperty("columns", "z,r"); properties.setProperty("columns.types", "int:struct<x:int,y:int>"); serde.initialize(conf, properties); inspector = (StructObjectInspector) serde.getObjectInspector(); InputFormat<?, ?> in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); InputSplit[] splits = in.getSplits(conf, 1); assertEquals(1, splits.length); conf.set("hive.io.file.readcolumn.ids", "1"); org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL); Object key = reader.createKey(); Object value = reader.createValue(); int rowNum = 0; List<? extends StructField> fields = inspector.getAllStructFieldRefs(); StructObjectInspector inner = (StructObjectInspector) fields.get(1).getFieldObjectInspector(); List<? extends StructField> inFields = inner.getAllStructFieldRefs(); IntObjectInspector intInspector = (IntObjectInspector) inFields.get(0).getFieldObjectInspector(); while (reader.next(key, value)) { assertEquals(null, inspector.getStructFieldData(value, fields.get(0))); Object sub = inspector.getStructFieldData(value, fields.get(1)); assertEquals(3 * rowNum + 1, intInspector.get(inner.getStructFieldData(sub, inFields.get(0)))); assertEquals(3 * rowNum + 2, intInspector.get(inner.getStructFieldData(sub, inFields.get(1)))); rowNum += 1; } assertEquals(3, rowNum); reader.close(); }
From source file:com.facebook.hive.orc.TestInputOutputFormat.java
License:Apache License
@Test public void testMROutput2() throws Exception { JobConf job = new JobConf(conf); // Test that you can set the output directory using this config job.set("mapred.work.output.dir", testFilePath.getParent().toString()); Properties properties = new Properties(); StructObjectInspector inspector;//from ww w.j a v a 2s. co m synchronized (TestOrcFile.class) { inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } SerDe serde = new OrcSerde(); OutputFormat<?, ?> outFormat = new OrcOutputFormat(); RecordWriter writer = outFormat.getRecordWriter(fs, job, testFilePath.getName(), Reporter.NULL); writer.write(NullWritable.get(), serde.serialize(new StringRow("a"), inspector)); writer.close(Reporter.NULL); serde = new OrcSerde(); properties.setProperty("columns", "col"); properties.setProperty("columns.types", "string"); serde.initialize(conf, properties); inspector = (StructObjectInspector) serde.getObjectInspector(); InputFormat<?, ?> in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); InputSplit[] splits = in.getSplits(conf, 1); assertEquals(1, splits.length); org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL); Object key = reader.createKey(); Object value = reader.createValue(); int rowNum = 0; List<? extends StructField> fields = inspector.getAllStructFieldRefs(); reader.next(key, value); assertEquals("a", ((StringObjectInspector) fields.get(0).getFieldObjectInspector()) .getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0)))); reader.close(); }
From source file:com.facebook.hive.orc.TestInputOutputFormat.java
License:Apache License
@Test public void testEmptyFile() throws Exception { JobConf job = new JobConf(conf); Properties properties = new Properties(); HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat(); FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true, properties, Reporter.NULL);// w w w . ja va 2 s .c o m writer.close(true); properties.setProperty("columns", "x,y"); properties.setProperty("columns.types", "int:int"); SerDe serde = new OrcSerde(); serde.initialize(conf, properties); InputFormat<?, ?> in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); InputSplit[] splits = in.getSplits(conf, 1); assertEquals(1, splits.length); // read the whole file conf.set("hive.io.file.readcolumn.ids", "0,1"); org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL); Object key = reader.createKey(); Object value = reader.createValue(); assertEquals(0.0, reader.getProgress(), 0.00001); assertEquals(0, reader.getPos()); assertEquals(false, reader.next(key, value)); reader.close(); assertEquals(null, serde.getSerDeStats()); }
From source file:com.facebook.hive.orc.TestInputOutputFormat.java
License:Apache License
@Test public void testDefaultTypes() throws Exception { JobConf job = new JobConf(conf); Properties properties = new Properties(); StructObjectInspector inspector;/*from w w w .jav a 2s. c o m*/ synchronized (TestOrcFile.class) { inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } SerDe serde = new OrcSerde(); HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat(); FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class, true, properties, Reporter.NULL); writer.write(serde.serialize(new StringRow("owen"), inspector)); writer.write(serde.serialize(new StringRow("beth"), inspector)); writer.write(serde.serialize(new StringRow("laurel"), inspector)); writer.write(serde.serialize(new StringRow("hazen"), inspector)); writer.write(serde.serialize(new StringRow("colin"), inspector)); writer.write(serde.serialize(new StringRow("miles"), inspector)); writer.close(true); serde = new OrcSerde(); properties.setProperty("columns", "str,str2"); serde.initialize(conf, properties); inspector = (StructObjectInspector) serde.getObjectInspector(); assertEquals("struct<str:string,str2:string>", inspector.getTypeName()); InputFormat<?, ?> in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); InputSplit[] splits = in.getSplits(conf, 1); assertEquals(1, splits.length); // read the whole file org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL); Object key = reader.createKey(); Writable value = (Writable) reader.createValue(); List<? extends StructField> fields = inspector.getAllStructFieldRefs(); StringObjectInspector strInspector = (StringObjectInspector) fields.get(0).getFieldObjectInspector(); assertEquals(true, reader.next(key, value)); assertEquals("owen", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0)))); assertEquals(true, reader.next(key, value)); assertEquals("beth", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0)))); assertEquals(true, reader.next(key, value)); assertEquals("laurel", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0)))); assertEquals(true, reader.next(key, value)); assertEquals("hazen", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0)))); assertEquals(true, reader.next(key, value)); assertEquals("colin", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0)))); assertEquals(true, reader.next(key, value)); assertEquals("miles", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0)))); assertEquals(false, reader.next(key, value)); reader.close(); }
From source file:com.facebook.hive.orc.TestInputOutputFormat.java
License:Apache License
/** * Tests that passing null as the file system to getRecordWriter works, this is * to be compatible with the way Sequence and RC file tolerate nulls. * @throws Exception/* w ww . j av a 2s . co m*/ */ @Test public void testNullFileSystem() throws Exception { conf.set("mapred.work.output.dir", testFilePath.getParent().toString()); JobConf job = new JobConf(conf); Properties properties = new Properties(); StructObjectInspector inspector; synchronized (TestOrcFile.class) { inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); } OrcSerde serde = new OrcSerde(); OrcOutputFormat outFormat = new OrcOutputFormat(); RecordWriter<NullWritable, OrcSerdeRow> writer = outFormat.getRecordWriter(null, conf, testFilePath.getName(), Reporter.NULL); writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("a"), inspector)); writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("b"), inspector)); writer.write(NullWritable.get(), (OrcSerdeRow) serde.serialize(new StringRow("c"), inspector)); writer.close(Reporter.NULL); serde = new OrcSerde(); properties.setProperty("columns", "str,str2"); serde.initialize(conf, properties); inspector = (StructObjectInspector) serde.getObjectInspector(); OrcInputFormat in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); InputSplit[] splits = in.getSplits(conf, 1); assertEquals(1, splits.length); // read the whole file org.apache.hadoop.mapred.RecordReader<NullWritable, OrcLazyRow> reader = in.getRecordReader(splits[0], conf, Reporter.NULL); NullWritable key = reader.createKey(); OrcLazyRow value = (OrcLazyRow) reader.createValue(); List<? extends StructField> fields = inspector.getAllStructFieldRefs(); StringObjectInspector strInspector = (StringObjectInspector) fields.get(0).getFieldObjectInspector(); assertEquals(true, reader.next(key, value)); assertEquals("a", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0)))); assertEquals(true, reader.next(key, value)); assertEquals("b", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0)))); assertEquals(true, reader.next(key, value)); assertEquals("c", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0)))); assertEquals(false, reader.next(key, value)); reader.close(); }
From source file:com.facebook.hiveio.common.HadoopUtils.java
License:Apache License
/** * Hack to configure InputFormats before they get used. * @param inputFormat InputFormat to configure * @param conf Configuration to use//from w ww.j a va2s . co m */ public static void configureInputFormat(InputFormat inputFormat, Configuration conf) { JobConf jobConf = new JobConf(conf); setJobConfIfPossible(inputFormat, jobConf); // TextInputFormat is not always JobConfigurable, so we need to explicitly // call this here to make sure it gets configured with the // compression codecs. if (inputFormat instanceof TextInputFormat) { ((TextInputFormat) inputFormat).configure(jobConf); } }
From source file:com.facebook.hiveio.input.HiveApiInputFormat.java
License:Apache License
/** * Compute splits from partitions/*from w ww .ja v a 2 s .co m*/ * * @param conf Configuration * @param inputDesc Hive table input description * @param tableSchema schema for table * @param partitions list of input partitions * @return list of input splits * @throws IOException */ private List<InputSplit> computeSplits(Configuration conf, HiveInputDescription inputDesc, HiveTableSchema tableSchema, List<InputPartition> partitions) throws IOException { int partitionNum = 0; List<InputSplit> splits = Lists.newArrayList(); int[] columnIds = computeColumnIds(inputDesc.getColumns(), tableSchema); for (InputPartition inputPartition : partitions) { org.apache.hadoop.mapred.InputFormat baseInputFormat = inputPartition.makeInputFormat(conf); HadoopUtils.setInputDir(conf, inputPartition.getLocation()); org.apache.hadoop.mapred.InputSplit[] baseSplits = baseInputFormat.getSplits(new JobConf(conf), inputDesc.getNumSplits()); LOG.info( "Requested {} splits from partition ({} out of {}) partition values: " + "{}, got {} splits from inputFormat {}", inputDesc.getNumSplits(), partitionNum + 1, Iterables.size(partitions), inputPartition.getInputSplitData().getPartitionValues(), baseSplits.length, baseInputFormat.getClass().getCanonicalName()); for (org.apache.hadoop.mapred.InputSplit baseSplit : baseSplits) { InputSplit split = new HInputSplit(baseInputFormat, baseSplit, tableSchema, columnIds, inputPartition.getInputSplitData(), conf); splits.add(split); } partitionNum++; } return splits; }