List of usage examples for org.apache.hadoop.mapred JobConf setOutputCommitter
public void setOutputCommitter(Class<? extends OutputCommitter> theClass)
From source file:edu.umn.cs.spatialHadoop.temporal.RepartitionTemporal.java
License:Apache License
public static void repartitionMapReduce(Path[] inputPaths, Path outputPath, Shape stockShape, long blockSize, CellInfo[] cellInfos, String sindex, boolean overwrite) throws IOException { JobConf job = new JobConf(Repartition.class); job.setJobName("RepartitionTemporal"); FileSystem outFs = outputPath.getFileSystem(job); // Overwrite output file if (outFs.exists(outputPath)) { if (overwrite) outFs.delete(outputPath, true); else/* w w w .java 2s .c o m*/ throw new RuntimeException( "Output file '" + outputPath + "' already exists and overwrite flag is not set"); } // Decide which map function to use depending on the type of global // index if (sindex.equals("rtree") || sindex.equals("str")) { // Repartition without replication job.setMapperClass(RepartitionMapNoReplication.class); } else { // Repartition with replication (grid and r+tree) job.setMapperClass(RepartitionMap.class); } job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(stockShape.getClass()); CombinedSpatialInputFormat.setInputPaths(job, inputPaths); job.setInputFormat(CombinedSpatialInputFormat.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); FileOutputFormat.setOutputPath(job, outputPath); if (sindex.equals("grid") || sindex.equals("str") || sindex.equals("str+")) { job.setOutputFormat(GridOutputFormat.class); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { // For now, the two types of local index are the same job.setOutputFormat(RTreeGridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cellInfos); job.setBoolean(SpatialSite.OVERWRITE, overwrite); // Set reduce function job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); // Set output committer that combines output files together job.setOutputCommitter(RepartitionOutputCommitter.class); if (blockSize != 0) { job.setLong("dfs.block.size", blockSize); job.setLong("fs.local.block.size", blockSize); } JobClient.runJob(job); }
From source file:org.apache.blur.hive.BlurHiveStorageHandler.java
License:Apache License
@Override public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { if (BlurSerDe.shouldUseMRWorkingPath(jobConf)) { String loadId = UUID.randomUUID().toString(); jobConf.set(BlurSerDe.BLUR_MR_LOAD_ID, loadId); jobConf.setOutputCommitter(BlurHiveMRLoaderOutputCommitter.class); } else {//from ww w . j a v a 2s . c om try { String bulkId = UUID.randomUUID().toString(); String connectionStr = jobConf.get(BlurSerDe.BLUR_CONTROLLER_CONNECTION_STR); Iface client = BlurClient.getClient(connectionStr); client.bulkMutateStart(bulkId); BlurHiveOutputFormat.setBulkId(jobConf, bulkId); jobConf.setOutputCommitter(BlurHiveOutputCommitter.class); } catch (BlurException e) { throw new RuntimeException(e); } catch (TException e) { throw new RuntimeException(e); } } }
From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java
License:Apache License
@Test public void hbaseBulkOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { String testName = "hbaseBulkOutputFormatTest"; Path methodTestDir = new Path(getTestDir(), testName); LOG.info("starting: " + testName); String tableName = newTableName(testName).toLowerCase(); String familyName = "my_family"; byte[] familyNameBytes = Bytes.toBytes(familyName); //include hbase config in conf file Configuration conf = new Configuration(allConf); //create table conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); conf.set("yarn.scheduler.capacity.root.queues", "default"); conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); createTable(tableName, new String[] { familyName }); String data[] = { "1,english:one,spanish:uno", "2,english:two,spanish:dos", "3,english:three,spanish:tres" }; // input/output settings Path inputPath = new Path(methodTestDir, "mr_input"); FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); for (String line : data) os.write(Bytes.toBytes(line + "\n")); os.close();/*from w w w .j av a2 s . c om*/ Path interPath = new Path(methodTestDir, "inter"); //create job JobConf job = new JobConf(conf); job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); job.setJarByClass(this.getClass()); job.setMapperClass(MapWriteOldMapper.class); job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); job.setOutputFormat(HBaseBulkOutputFormat.class); org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(job, interPath); job.setOutputCommitter(HBaseBulkOutputCommitter.class); //manually create transaction RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); try { OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, HCatUtil.serialize(txn)); job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); } finally { rm.close(); } job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(HCatRecord.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); RunningJob runJob = JobClient.runJob(job); runJob.waitForCompletion(); assertTrue(runJob.isSuccessful()); //verify HTable table = new HTable(conf, tableName); Scan scan = new Scan(); scan.addFamily(familyNameBytes); ResultScanner scanner = table.getScanner(scan); int index = 0; for (Result result : scanner) { String vals[] = data[index].toString().split(","); for (int i = 1; i < vals.length; i++) { String pair[] = vals[i].split(":"); assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); } index++; } //test if load count is the same assertEquals(data.length, index); //test if scratch directory was erased assertFalse(FileSystem.get(job).exists(interPath)); }
From source file:org.apache.parquet.hadoop.mapred.DeprecatedParquetOutputFormat.java
License:Apache License
public static void setAsOutputFormat(JobConf jobConf) { jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class); jobConf.setOutputCommitter(MapredParquetOutputCommitter.class); }
From source file:org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.java
License:Apache License
public static void setUpMultipleOutputs(JobConf job, byte[] resultIndexes, byte[] resultDimsUnknown, String[] outputs, OutputInfo[] outputInfos, boolean inBlockRepresentation, boolean mayContainCtable) throws Exception { if (resultIndexes.length != outputs.length) throw new Exception("number of outputs and result indexes does not match"); if (outputs.length != outputInfos.length) throw new Exception("number of outputs and outputInfos indexes does not match"); job.set(RESULT_INDEXES_CONFIG, MRJobConfiguration.getIndexesString(resultIndexes)); job.set(RESULT_DIMS_UNKNOWN_CONFIG, MRJobConfiguration.getIndexesString(resultDimsUnknown)); job.setStrings(OUTPUT_MATRICES_DIRS_CONFIG, outputs); job.setOutputCommitter(MultipleOutputCommitter.class); for (int i = 0; i < outputs.length; i++) { MapReduceTool.deleteFileIfExistOnHDFS(new Path(outputs[i]), job); if (mayContainCtable && resultDimsUnknown[i] == (byte) 1) { setOutputInfo(job, i, outputInfos[i], false); } else {//w w w.j a va2s . c om setOutputInfo(job, i, outputInfos[i], inBlockRepresentation); } MultipleOutputs.addNamedOutput(job, Integer.toString(i), outputInfos[i].outputFormatClass, outputInfos[i].outputKeyClass, outputInfos[i].outputValueClass); } job.setOutputFormat(NullOutputFormat.class); // configure temp output Path tempOutputPath = new Path(constructTempOutputFilename()); FileOutputFormat.setOutputPath(job, tempOutputPath); MapReduceTool.deleteFileIfExistOnHDFS(tempOutputPath, job); }
From source file:org.commoncrawl.mapred.ec2.parser.EC2ParserTask.java
License:Open Source License
private static void parse(FileSystem fs, Configuration conf, ImmutableList<Path> paths) throws IOException { LOG.info("Need to Parse:" + paths.toString()); // create output path long segmentId = System.currentTimeMillis(); Path outputPath = new Path(S3N_BUCKET_PREFIX + SEGMENTS_PATH + Long.toString(segmentId)); LOG.info("Starting Map-Reduce Job. SegmentId:" + segmentId + " OutputPath:" + outputPath); // run job... JobConf jobConf = new JobBuilder("parse job", conf) .inputs(paths).inputFormat(SequenceFileInputFormat.class).keyValue(Text.class, ParseOutput.class) .mapper(ParserMapper.class).maxMapAttempts(3).maxMapTaskFailures(100).speculativeExecution(true) .numReducers(0).outputFormat(ParserOutputFormat.class).output(outputPath) .minSplitSize(134217728 * 2).build(); jobConf.set("fs.default.name", S3N_BUCKET_PREFIX); jobConf.setOutputCommitter(OutputCommitter.class); JobClient.runJob(jobConf);//from ww w.j av a 2 s . co m LOG.info("Job Finished. Writing Segments Manifest File"); writeSegmentManifestFile(fs, segmentId, paths); }
From source file:tachyon.client.keyvalue.hadoop.KeyValueOutputFormat.java
License:Apache License
/** * {@inheritDoc}/* w w w . j a v a 2 s . c om*/ * <p> * {@link KeyValueOutputCommitter} is forced to be used. * <p> * NOTE: This method is called immediately when job is submitted, so that modifications to the * {@link JobConf} are reflected in the whole job. */ @Override public void checkOutputSpecs(FileSystem ignored, JobConf conf) throws FileAlreadyExistsException, InvalidJobConfException, IOException { super.checkOutputSpecs(ignored, conf); conf.setOutputCommitter(KeyValueOutputCommitter.class); }
From source file:tachyon.examples.keyvalue.hadoop.CloneKeyValueStoreMapReduce.java
License:Apache License
/** * @param args two parameters, the first is the input key-value store path, the second is the * output key-value store path//ww w .j ava 2 s .co m * @throws Exception if any exception happens */ public static void main(String[] args) throws Exception { JobConf conf = new JobConf(CloneKeyValueStoreMapReduce.class); conf.setJobName("clone key-value store"); conf.setOutputKeyClass(BytesWritable.class); conf.setOutputValueClass(BytesWritable.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(KeyValueInputFormat.class); conf.setOutputFormat(KeyValueOutputFormat.class); conf.setOutputCommitter(KeyValueOutputCommitter.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }