List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath
public static void setOutputPath(Job job, Path outputDir)
From source file:com.moz.fiji.mapreduce.output.HFileMapReduceJobOutput.java
License:Apache License
/** {@inheritDoc} */ @Override/* w w w . jav a 2 s. c o m*/ public void configure(Job job) throws IOException { // sets Hadoop output format, Fiji output table and # of reducers: super.configure(job); final Configuration conf = job.getConfiguration(); // Fiji table context: conf.setClass(FijiConfKeys.FIJI_TABLE_CONTEXT_CLASS, HFileWriterContext.class, FijiTableContext.class); // Set the output path. FileOutputFormat.setOutputPath(job, mPath); // Configure the total order partitioner so generated HFile shards are contiguous and sorted. configurePartitioner(job, makeTableKeySplit(getOutputTableURI(), getNumReduceTasks(), conf)); // Note: the HFile job output requires the reducer of the MapReduce job to be IdentityReducer. // This is enforced externally. }
From source file:com.mozilla.grouperfish.pig.storage.DocumentVectorStorage.java
License:Apache License
@Override public void setStoreLocation(String location, Job job) throws IOException { job.setOutputKeyClass(Text.class); job.setOutputValueClass(VectorWritable.class); FileOutputFormat.setOutputPath(job, new Path(location)); }
From source file:com.mozilla.grouperfish.pig.storage.LDACStorage.java
License:Apache License
@Override public void setStoreLocation(String location, Job job) throws IOException { job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileOutputFormat.setOutputPath(job, new Path(location)); }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java
License:Apache License
@Override public void setStoreLocation(String location, Job job) throws IOException { job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); FileOutputFormat.setOutputPath(job, new Path(location)); }
From source file:com.mozilla.hadoop.Backup.java
License:Apache License
/** * @param args//w ww . j ava 2 s .com * @return * @throws IOException * @throws ParseException */ public Job initJob(String[] args) throws IOException, ParseException { Path inputPath = null; Path loadPath = null; String outputPath = null; boolean useSpecifiedPaths = false; for (int idx = 0; idx < args.length; idx++) { if ("-f".equals(args[idx])) { useSpecifiedPaths = true; loadPath = new Path(args[++idx]); } else if (idx == args.length - 1) { outputPath = args[idx]; } else { inputPath = new Path(args[idx]); } } Path mrOutputPath = new Path(NAME + "-results"); conf.setBoolean("mapred.map.tasks.speculative.execution", false); conf.set("backup.input.path", inputPath.toString()); conf.set("backup.output.path", outputPath); FileSystem inputFs = null; FileSystem outputFs = null; Path[] inputSources = null; try { inputFs = FileSystem.get(inputPath.toUri(), new Configuration()); outputFs = FileSystem.get(getConf()); if (useSpecifiedPaths) { inputSources = createInputSources(loadPaths(outputFs, loadPath), outputFs); } else { inputSources = createInputSources(getPaths(inputFs, inputPath, 0, 2), outputFs); } } finally { checkAndClose(inputFs); checkAndClose(outputFs); } Job job = new Job(getConf()); job.setJobName(NAME); job.setJarByClass(Backup.class); job.setMapperClass(BackupMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); for (Path source : inputSources) { System.out.println("Adding input path: " + source.toString()); FileInputFormat.addInputPath(job, source); } FileOutputFormat.setOutputPath(job, mrOutputPath); return job; }
From source file:com.mozilla.pig.storage.SeqFileMultiStorage.java
License:Apache License
@Override public void setStoreLocation(String location, Job job) throws IOException { job.setOutputKeyClass(this.keyClass); job.setOutputKeyClass(this.keyClass); Configuration conf = job.getConfiguration(); if ("true".equals(conf.get("output.compression.enabled"))) { FileOutputFormat.setCompressOutput(job, true); String codec = conf.get("output.compression.codec"); FileOutputFormat.setOutputCompressorClass(job, PigContext.resolveClassName(codec).asSubclass(CompressionCodec.class)); }/*from ww w . ja v a 2s . co m*/ FileOutputFormat.setOutputPath(job, new Path(location)); }
From source file:com.mozilla.socorro.hadoop.CrashCountToHbase.java
License:LGPL
/** * @param args// w w w . j a v a2s .c om * @return * @throws IOException * @throws ParseException */ public Job initJob(String[] args) throws IOException { Job job = new Job(getConf()); job.setJobName(NAME); job.setJarByClass(CrashCountToHbase.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.setMapperClass(CrashCountToHBaseMapper.class); job.setReducerClass(CrashCountToHBaseReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job; }
From source file:com.mozilla.socorro.hadoop.CrashReportJob.java
License:LGPL
/** * @param args/*w w w . j a va2 s . com*/ * @return * @throws IOException * @throws ParseException */ public static Job initJob(String jobName, Configuration conf, Class<?> mainClass, Class<? extends TableMapper> mapperClass, Class<? extends Reducer> combinerClass, Class<? extends Reducer> reducerClass, Map<byte[], byte[]> columns, Class<? extends WritableComparable> keyOut, Class<? extends Writable> valueOut, Path outputPath) throws IOException, ParseException { // Set both start/end time and start/stop row Calendar startCal = Calendar.getInstance(); Calendar endCal = Calendar.getInstance(); SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); String startDateStr = conf.get(START_DATE); String endDateStr = conf.get(END_DATE); if (!StringUtils.isBlank(startDateStr)) { startCal.setTime(sdf.parse(startDateStr)); } if (!StringUtils.isBlank(endDateStr)) { endCal.setTime(sdf.parse(endDateStr)); } conf.setLong(START_TIME, startCal.getTimeInMillis()); conf.setLong(END_TIME, DateUtil.getEndTimeAtResolution(endCal.getTimeInMillis(), Calendar.DATE)); Job job = new Job(conf); job.setJobName(jobName); job.setJarByClass(mainClass); // input table configuration Scan[] scans = MultiScanTableMapReduceUtil.generateScans(startCal, endCal, columns, 100, false); MultiScanTableMapReduceUtil.initMultiScanTableMapperJob(TABLE_NAME_CRASH_REPORTS, scans, mapperClass, keyOut, valueOut, job); if (combinerClass != null) { job.setCombinerClass(combinerClass); } if (reducerClass != null) { job.setReducerClass(reducerClass); } else { job.setNumReduceTasks(0); } FileOutputFormat.setOutputPath(job, outputPath); return job; }
From source file:com.msd.gin.halyard.tools.HalyardBulkLoad.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 3) { System.err.println("Usage: bulkload [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D" + SKIP_INVALID_PROPERTY + "=true] [-D" + SPLIT_BITS_PROPERTY + "=8] [-D" + DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + OVERRIDE_CONTEXT_PROPERTY + "=true] <input_path(s)> <output_path> <table_name>"); return -1; }// w w w . j a v a 2s. c o m TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.class, Rio.class, AbstractRDFHandler.class, RDFFormat.class, RDFParser.class); HBaseConfiguration.addHbaseResources(getConf()); if (SnappyCodec.isNativeCodeLoaded()) { getConf().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); getConf().setClass(MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class); } getConf().setDouble(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART, 1.0); getConf().setLong(MRJobConfig.TASK_TIMEOUT, 3600000l); getConf().setInt(MRJobConfig.IO_SORT_FACTOR, 100); getConf().setInt(MRJobConfig.IO_SORT_MB, 1000); getConf().setInt(FileInputFormat.SPLIT_MAXSIZE, 1000000000); getConf().setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, 2048); Job job = Job.getInstance(getConf(), "HalyardBulkLoad -> " + args[1] + " -> " + args[2]); job.setJarByClass(HalyardBulkLoad.class); job.setMapperClass(RDFMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); job.setInputFormatClass(RioFileInputFormat.class); job.setSpeculativeExecution(false); job.setReduceSpeculativeExecution(false); Map<String, Integer> contextSplitsMap = new HashMap<>(); for (Map.Entry<String, String> me : getConf().getValByRegex(CONTEXT_SPLIT_REGEXP).entrySet()) { int splits = Integer.parseInt(me.getKey().substring(me.getKey().lastIndexOf('.') + 1)); StringTokenizer stk = new StringTokenizer(me.getValue(), ","); while (stk.hasMoreTokens()) { contextSplitsMap.put(stk.nextToken(), splits); } } try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true, getConf().getInt(SPLIT_BITS_PROPERTY, 3), contextSplitsMap)) { HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator()); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); if (job.waitForCompletion(true)) { new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable); LOG.info("Bulk Load Completed.."); return 0; } } return -1; }