List of usage examples for org.apache.hadoop.mapred JobConf setOutputFormat
public void setOutputFormat(Class<? extends OutputFormat> theClass)
From source file:fm.last.hadoop.programs.labs.trackstats.TrackStatisticsProgram.java
License:Apache License
/** * Create a JobConf for a Job that will calculate the number of unique listeners per track. * //from www . j a v a 2 s . c o m * @param inputDir The path to the folder containing the raw listening data files. * @return The unique listeners JobConf. */ private JobConf getUniqueListenersJobConf(Path inputDir) { log.info("Creating configuration for unique listeners Job"); // output results to a temporary intermediate folder, this will get deleted by start() method Path uniqueListenersOutput = new Path("uniqueListeners"); JobConf conf = new JobConf(TrackStatisticsProgram.class); conf.setOutputKeyClass(IntWritable.class); // track id conf.setOutputValueClass(IntWritable.class); // number of unique listeners conf.setInputFormat(TextInputFormat.class); // raw listening data conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setMapperClass(UniqueListenersMapper.class); conf.setCombinerClass(UniqueListenersCombiner.class); conf.setReducerClass(UniqueListenersReducer.class); FileInputFormat.addInputPath(conf, inputDir); FileOutputFormat.setOutputPath(conf, uniqueListenersOutput); conf.setJobName("uniqueListeners"); return conf; }
From source file:fm.last.hadoop.programs.labs.trackstats.TrackStatisticsProgram.java
License:Apache License
/** * Creates a JobConf for a Job that will sum up the TrackStatistics per track. * //from ww w.j ava2 s .c o m * @param inputDir The path to the folder containing the raw input data files. * @return The sum JobConf. */ private JobConf getSumJobConf(Path inputDir) { log.info("Creating configuration for sum job"); // output results to a temporary intermediate folder, this will get deleted by start() method Path playsOutput = new Path("sum"); JobConf conf = new JobConf(TrackStatisticsProgram.class); conf.setOutputKeyClass(IntWritable.class); // track id conf.setOutputValueClass(TrackStats.class); // statistics for a track conf.setInputFormat(TextInputFormat.class); // raw listening data conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setMapperClass(SumMapper.class); conf.setCombinerClass(SumReducer.class); conf.setReducerClass(SumReducer.class); FileInputFormat.addInputPath(conf, inputDir); FileOutputFormat.setOutputPath(conf, playsOutput); conf.setJobName("sum"); return conf; }
From source file:fm.last.hadoop.programs.labs.trackstats.TrackStatisticsProgram.java
License:Apache License
/** * Creates a JobConf for a Job that will merge the unique listeners and track statistics. * /*from ww w . j av a 2 s .co m*/ * @param outputPath The path for the results to be output to. * @param sumInputDir The path containing the data from the sum Job. * @param listenersInputDir The path containing the data from the unique listeners job. * @return The merge JobConf. */ private JobConf getMergeConf(Path outputPath, Path sumInputDir, Path listenersInputDir) { log.info("Creating configuration for merge job"); JobConf conf = new JobConf(TrackStatisticsProgram.class); conf.setOutputKeyClass(IntWritable.class); // track id conf.setOutputValueClass(TrackStats.class); // overall track statistics conf.setCombinerClass(SumReducer.class); // safe to re-use reducer as a combiner here conf.setReducerClass(SumReducer.class); conf.setOutputFormat(TextOutputFormat.class); FileOutputFormat.setOutputPath(conf, outputPath); MultipleInputs.addInputPath(conf, sumInputDir, SequenceFileInputFormat.class, IdentityMapper.class); MultipleInputs.addInputPath(conf, listenersInputDir, SequenceFileInputFormat.class, MergeListenersMapper.class); conf.setJobName("merge"); return conf; }
From source file:FormatStorage1.MergeFileUtil.java
License:Open Source License
public static void run(String inputdir, String outputdir, Configuration conf) throws IOException { JobConf job = new JobConf(conf); job.setJobName("MergeFileUtil"); job.setJarByClass(MergeFileUtil.class); FileSystem fs = null;//from w ww .j a v a 2 s . c om fs = FileSystem.get(job); if (fs.exists(new Path(outputdir))) { throw new IOException("outputdir: " + outputdir + " exist!!!"); } FileStatus[] fss = fs.listStatus(new Path(inputdir)); if (fss == null || fss.length <= 0) { throw new IOException("no input files"); } IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(fss[0].getPath().toString()); job.set("ifdf.head.info", ifdf.fileInfo().head().toStr()); ifdf.close(); long wholesize = 0; for (FileStatus status : fss) { wholesize += status.getLen(); } job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, inputdir); FileOutputFormat.setOutputPath(job, new Path(outputdir)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(IRecord.class); job.setMapperClass(MergeMap.class); job.setInputFormat(CombineFormatStorageFileInputFormat.class); job.setOutputFormat(MergeIFormatOutputFormat.class); JobClient jc = new JobClient(job); RunningJob rjob = jc.submitJob(job); try { String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rjob.isComplete()) { Thread.sleep(1000); int mapProgress = Math.round(rjob.mapProgress() * 100); int reduceProgress = Math.round(rjob.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.err.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } LOG.info(rjob.getJobState()); } catch (IOException e1) { e1.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:FormatStorage1.MergeFileUtil.java
License:Open Source License
public static void runold(String inputdir, String outputdir, Configuration conf) throws IOException { JobConf job = new JobConf(conf); job.setJobName("MergeFileUtil"); job.setJarByClass(MergeFileUtil.class); FileSystem fs = null;//from www . ja v a 2 s . c o m fs = FileSystem.get(job); if (fs.exists(new Path(outputdir))) { throw new IOException("outputdir: " + outputdir + " exist!!!"); } FileStatus[] fss = fs.listStatus(new Path(inputdir)); if (fss == null || fss.length <= 0) { throw new IOException("no input files"); } for (FileStatus status : fss) { if (status.isDir()) { throw new IOException("!!!input dir contains directory:\t" + status.getPath().toString()); } } IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(fss[0].getPath().toString()); job.set("ifdf.head.info", ifdf.fileInfo().head().toStr()); ifdf.close(); long wholesize = 0; for (FileStatus status : fss) { wholesize += status.getLen(); } long fl = 512 * 1024 * 1024; int reduces = (int) (wholesize / fl + 1); job.setNumReduceTasks(reduces); FileInputFormat.setInputPaths(job, inputdir); FileOutputFormat.setOutputPath(job, new Path(outputdir)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(IRecord.class); job.setMapperClass(MergeMap.class); job.setReducerClass(MergeReduce.class); job.setInputFormat(MergeIFormatInputFormat.class); job.setOutputFormat(MergeIFormatOutputFormat.class); JobClient jc = new JobClient(job); RunningJob rjob = jc.submitJob(job); try { String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rjob.isComplete()) { Thread.sleep(1000); int mapProgress = Math.round(rjob.mapProgress() * 100); int reduceProgress = Math.round(rjob.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.err.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } LOG.info(rjob.getJobState()); } catch (IOException e1) { e1.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:FormatStorage1.MergeFileUtil1.java
License:Open Source License
public static void run(String inputdir, String outputdir, Configuration conf) throws IOException { JobConf job = new JobConf(conf); job.setJobName("MergeFileUtil1"); job.setJarByClass(MergeFileUtil1.class); FileSystem fs = null;/*from www . j av a2 s. co m*/ fs = FileSystem.get(job); if (fs.exists(new Path(outputdir))) { throw new IOException("outputdir: " + outputdir + " exist!!!"); } FileStatus[] fss = fs.listStatus(new Path(inputdir)); if (fss == null || fss.length <= 0) { throw new IOException("no input files"); } IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(fss[0].getPath().toString()); job.set("ifdf.head.info", ifdf.fileInfo().head().toStr()); ifdf.close(); long wholesize = 0; for (FileStatus status : fss) { wholesize += status.getLen(); } job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, inputdir); FileOutputFormat.setOutputPath(job, new Path(outputdir)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(IRecord.class); job.setMapperClass(MergeMap.class); job.setInputFormat(CombineFormatStorageFileInputFormat.class); job.setOutputFormat(MergeIFormatOutputFormat1.class); JobClient jc = new JobClient(job); RunningJob rjob = jc.submitJob(job); try { String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rjob.isComplete()) { Thread.sleep(1000); int mapProgress = Math.round(rjob.mapProgress() * 100); int reduceProgress = Math.round(rjob.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.err.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } LOG.info(rjob.getJobState()); } catch (IOException e1) { e1.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:FormatStorage1.MergeFileUtil1.java
License:Open Source License
public static void runold(String inputdir, String outputdir, Configuration conf) throws IOException { JobConf job = new JobConf(conf); job.setJobName("MergeFileUtil"); job.setJarByClass(MergeFileUtil1.class); FileSystem fs = null;/*w w w . j av a2s. co m*/ fs = FileSystem.get(job); if (fs.exists(new Path(outputdir))) { throw new IOException("outputdir: " + outputdir + " exist!!!"); } FileStatus[] fss = fs.listStatus(new Path(inputdir)); if (fss == null || fss.length <= 0) { throw new IOException("no input files"); } for (FileStatus status : fss) { if (status.isDir()) { throw new IOException("!!!input dir contains directory:\t" + status.getPath().toString()); } } IFormatDataFile ifdf = new IFormatDataFile(job); ifdf.open(fss[0].getPath().toString()); job.set("ifdf.head.info", ifdf.fileInfo().head().toStr()); ifdf.close(); long wholesize = 0; for (FileStatus status : fss) { wholesize += status.getLen(); } long fl = 512 * 1024 * 1024; int reduces = (int) (wholesize / fl + 1); job.setNumReduceTasks(reduces); FileInputFormat.setInputPaths(job, inputdir); FileOutputFormat.setOutputPath(job, new Path(outputdir)); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(IRecord.class); job.setMapperClass(MergeMap.class); job.setReducerClass(MergeReduce.class); job.setInputFormat(MergeIFormatInputFormat.class); job.setOutputFormat(MergeIFormatOutputFormat.class); JobClient jc = new JobClient(job); RunningJob rjob = jc.submitJob(job); try { String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rjob.isComplete()) { Thread.sleep(1000); int mapProgress = Math.round(rjob.mapProgress() * 100); int reduceProgress = Math.round(rjob.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.err.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } LOG.info(rjob.getJobState()); } catch (IOException e1) { e1.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:fr.worf.hadoop.scratchpad.Scratch2MapReduce.java
License:Apache License
/** * @param args the command line arguments * @throws java.io.IOException//from ww w. j a v a 2s .c om */ public static void main(String[] args) throws IOException { JobConf job = new JobConf(Scratch2MapReduce.class); job.setJobName("wordcount"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path("/home/slash/test/testfile1.txt")); FileOutputFormat.setOutputPath(job, new Path("/home/slash/test/testfile2.txt")); RunningJob runJob = JobClient.runJob(job); }
From source file:gov.nasa.jpl.memex.pooledtimeseries.GradientTimeSeries.java
License:Apache License
public static void main(String[] args) throws Exception { System.loadLibrary(Core.NATIVE_LIBRARY_NAME); Configuration baseConf = new Configuration(); baseConf.set("mapred.reduce.tasks", "0"); JobConf conf = new JobConf(baseConf, GradientTimeSeries.class); conf.setJobName("gradient_time_series"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(MultiFileOutput.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);// ww w .ja v a 2 s . com }
From source file:gov.nasa.jpl.memex.pooledtimeseries.OpticalTimeSeries.java
License:Apache License
public static void main(String[] args) throws Exception { System.loadLibrary(Core.NATIVE_LIBRARY_NAME); LOG.info("Loaded- " + Core.NATIVE_LIBRARY_NAME); Configuration baseConf = new Configuration(); baseConf.set("mapred.reduce.tasks", "0"); JobConf conf = new JobConf(baseConf, OpticalTimeSeries.class); conf.setJobName("optical_time_series"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(MultiFileOutput.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/*from w w w . j av a 2 s.c o m*/ }