List of usage examples for org.apache.hadoop.mapred FileOutputFormat setCompressOutput
public static void setCompressOutput(JobConf conf, boolean compress)
From source file:hibench.PageRankDataGenerator.java
License:Apache License
private void createPageRankNodes() throws IOException { LOG.info("Creating PageRank nodes...", null); JobConf job = new JobConf(WebDataGen.class); String jobname = "Create " + paths.dname + " pagerank nodes"; job.setJobName(jobname);/* w w w. java 2 s . c o m*/ job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, paths.getPath(DataPaths.LINKS)); job.setInputFormat(TextInputFormat.class); if (options.PAGERANK_NODE_BALANCE) { /*** * Balance the output order of nodes, to prevent the running * of pagerank bench from potential data skew */ job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setMapperClass(BalancedLinkNodesMapper.class); job.setReducerClass(BalancedLinkNodesReducer.class); // job.setPartitionerClass(ModulusPartitioner.class); if (options.reds > 0) { job.setNumReduceTasks(options.reds); } else { job.setNumReduceTasks(DataOptions.getMaxNumReduce()); } } else { job.setMapOutputKeyClass(Text.class); job.setMapperClass(OutputLinkNodesMapper.class); job.setNumReduceTasks(0); } if (options.SEQUENCE_OUT) { job.setOutputFormat(SequenceFileOutputFormat.class); } else { job.setOutputFormat(TextOutputFormat.class); } if (null != options.codecClass) { job.set("mapred.output.compression.type", "BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.codecClass); } FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.VERTICALS)); LOG.info("Running Job: " + jobname); LOG.info("Links file " + paths.getPath(DataPaths.LINKS) + " as input"); LOG.info("Vertices file " + paths.getResult(DataPaths.VERTICALS) + " as output"); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); LOG.info("Cleaning temp files..."); paths.cleanTempFiles(paths.getResult(DataPaths.VERTICALS)); }
From source file:hibench.PageRankDataGenerator.java
License:Apache License
/*** * Create pagerank edge table, output link A->B as <A, B> pairs * @throws IOException/*from w w w.j ava2 s. com*/ */ private void createPageRankLinks() throws IOException { LOG.info("Creating PageRank links", null); JobConf job = new JobConf(WebDataGen.class); String jobname = "Create " + paths.dname + " pagerank links"; job.setJobName(jobname); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, paths.getPath(DataPaths.T_LINK_PAGE)); job.setInputFormat(TextInputFormat.class); job.setMapperClass(OutputLinkEdgesMapper.class); if (options.SEQUENCE_OUT) { job.setOutputFormat(SequenceFileOutputFormat.class); } else { job.setOutputFormat(TextOutputFormat.class); } if (null != options.codecClass) { job.set("mapred.output.compression.type", "BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.codecClass); } FileOutputFormat.setOutputPath(job, paths.getResult(DataPaths.EDGES)); LOG.info("Running Job: " + jobname); LOG.info("Table link-page " + paths.getPath(DataPaths.T_LINK_PAGE) + " as input"); LOG.info("Edges file " + paths.getResult(DataPaths.EDGES) + " as output"); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); LOG.info("Cleaning temp files..."); paths.cleanTempFiles(paths.getResult(DataPaths.EDGES)); }
From source file:mapreduce.BigramCount.java
License:Apache License
/** * Runs this tool.//from w ww. j a va2s.co m */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int mapTasks = 1;//Integer.parseInt(args[2]); int reduceTasks = 1;//Integer.parseInt(args[3]); sLogger.info("Tool: BigramCount"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + mapTasks); sLogger.info(" - number of reducers: " + reduceTasks); JobConf conf = new JobConf(BigramCount.class); conf.setJobName("BigramCount"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); /** * Note that these must match the Class arguments given in the mapper */ conf.setOutputKeyClass(WordPair.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MyMapper.class); conf.setPartitionerClass(MyPartitioner.class); conf.setCombinerClass(MyReducer.class); conf.setReducerClass(MyReducer.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:nthu.scopelab.tsqr.ssvd.BtJob.java
License:Apache License
public static void run(Configuration conf, Path[] inputPath, Path btPath, String qrfPath, int k, int p, int outerBlockHeight, int reduceTasks, boolean outputBBtProducts, String reduceSchedule, int mis) throws Exception { boolean outputQ = true; String stages[] = reduceSchedule.split(","); JobConf job = new JobConf(conf, BtJob.class); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setInt(SCHEDULE_NUM, stages.length); job.setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, outerBlockHeight); job.setInt(QJob.PROP_K, k);/* w ww . j av a 2 s . co m*/ job.setInt(QJob.PROP_P, p); job.setBoolean(QmultiplyJob.OUTPUT_Q, outputQ); job.setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts); job.set(QmultiplyJob.QRF_DIR, qrfPath); FileSystem.get(job).delete(btPath, true); FileOutputFormat.setOutputPath(job, btPath); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setJobName("BtJob"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(SparseRowBlockWritable.class); job.setOutputKeyClass(IntWritable.class); //job.setOutputValueClass(SparseRowBlockWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(BtMapper.class); job.setCombinerClass(OuterProductCombiner.class); job.setReducerClass(OuterProductReducer.class); fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job)); mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job)); job.setNumMapTasks(fgather.recNumMapTasks(mis)); //job.setNumReduceTasks(0); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, inputPath); if (outputQ) { MultipleOutputs.addNamedOutput(job, QmultiplyJob.Q_MAT, SequenceFileOutputFormat.class, IntWritable.class, LMatrixWritable.class); } if (outputBBtProducts) { MultipleOutputs.addNamedOutput(job, OUTPUT_BBT, SequenceFileOutputFormat.class, IntWritable.class, VectorWritable.class); } RunningJob rj = JobClient.runJob(job); System.out.println("Btjob Job ID: " + rj.getJobID().toString()); }
From source file:nthu.scopelab.tsqr.ssvd.itBtJob.java
License:Apache License
public static void run(Configuration conf, Path[] inputPath, Path btPath, String qrfPath, int k, int p, int outerBlockHeight, int reduceTasks, boolean outputBBtProducts, String reduceSchedule, int mis) throws Exception { boolean outputQ = true; String stages[] = reduceSchedule.split(","); JobConf job = new JobConf(conf, itBtJob.class); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setInt(SCHEDULE_NUM, stages.length); job.setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, outerBlockHeight); job.setInt(QJob.PROP_K, k);/*from w w w. j av a2 s . c o m*/ job.setInt(QJob.PROP_P, p); job.setBoolean(QmultiplyJob.OUTPUT_Q, outputQ); job.setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts); job.set(QmultiplyJob.QRF_DIR, qrfPath); FileSystem.get(job).delete(btPath, true); FileOutputFormat.setOutputPath(job, btPath); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); job.setJobName("itBtJob"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(SparseRowBlockWritable.class); job.setOutputKeyClass(IntWritable.class); //job.setOutputValueClass(SparseRowBlockWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(BtMapper.class); job.setCombinerClass(OuterProductCombiner.class); job.setReducerClass(OuterProductReducer.class); fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job)); mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job)); job.setNumMapTasks(fgather.recNumMapTasks(mis)); //job.setNumReduceTasks(0); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, inputPath); if (outputQ) { MultipleOutputs.addNamedOutput(job, QmultiplyJob.Q_MAT, SequenceFileOutputFormat.class, IntWritable.class, LMatrixWritable.class); } if (outputBBtProducts) { MultipleOutputs.addNamedOutput(job, OUTPUT_BBT, SequenceFileOutputFormat.class, IntWritable.class, VectorWritable.class); } RunningJob rj = JobClient.runJob(job); System.out.println("itBtJob Job ID: " + rj.getJobID().toString()); }
From source file:nthu.scopelab.tsqr.ssvd.UJob.java
License:Apache License
public void start(Configuration conf, Path inputPathQ, Path inputUHatPath, Path sigmaPath, Path outputPath, int k, boolean uHalfSigma, int mis) throws ClassNotFoundException, InterruptedException, IOException { String input = ""; JobConf job = new JobConf(conf, UJob.class); jobclient = new JobClient(job); job.setJobName("UJob"); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setMapperClass(MultiplyMapper.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LMatrixWritable.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(LMatrixWritable.class); FileSystem fs = FileSystem.get(job); fileGather fgather = new fileGather( new Path(inputPathQ.toString().substring(0, inputPathQ.toString().lastIndexOf("/") - 1)), "Q-", fs); mis = Checker.checkMis(mis, fgather.getInputSize(), fs); job.setNumMapTasks(fgather.recNumMapTasks(mis)); job.setNumReduceTasks(0);//ww w . java2 s . c om job.set("mapreduce.output.basename", OUTPUT_U); job.set(PROP_UHAT_PATH, inputUHatPath.toString()); job.set(PROP_SIGMA_PATH, sigmaPath.toString()); if (uHalfSigma) { job.set(PROP_U_HALFSIGMA, "y"); } job.setInt(QJob.PROP_K, k); FileSystem.get(job).delete(outputPath, true); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); FileInputFormat.setInputPaths(job, inputPathQ); //JobClient.runJob(job); jobid = jobclient.submitJob(job).getID(); }
From source file:org.apache.avro.mapred.TestAvroMultipleOutputs.java
License:Apache License
@SuppressWarnings("deprecation") public void testJob() throws Exception { JobConf job = new JobConf(); // private static final String UTF8 = "UTF-8"; String dir = System.getProperty("test.dir", ".") + "/mapred"; Path outputPath = new Path(dir + "/out"); outputPath.getFileSystem(job).delete(outputPath); WordCountUtil.writeLinesFile();//www. j av a2 s .co m job.setJobName("AvroMultipleOutputs"); AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroJob.setMapperClass(job, MapImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, new Path(dir + "/in")); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, false); AvroMultipleOutputs.addNamedOutput(job, "myavro", AvroOutputFormat.class, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroMultipleOutputs.addNamedOutput(job, "myavro1", AvroOutputFormat.class, Schema.create(Schema.Type.STRING)); AvroMultipleOutputs.addNamedOutput(job, "myavro2", AvroOutputFormat.class, Schema.create(Schema.Type.STRING)); WordCountUtil.setMeta(job); JobClient.runJob(job); WordCountUtil.validateCountsFile(); }
From source file:org.apache.avro.mapred.TestAvroMultipleOutputs.java
License:Apache License
@SuppressWarnings("deprecation") public void testJob_noreducer() throws Exception { JobConf job = new JobConf(); job.setNumReduceTasks(0);// ww w.j av a 2 s .c o m // private static final String UTF8 = "UTF-8"; String dir = System.getProperty("test.dir", ".") + "/mapred"; Path outputPath = new Path(dir + "/out"); outputPath.getFileSystem(job).delete(outputPath); WordCountUtil.writeLinesFile(); job.setJobName("AvroMultipleOutputs_noreducer"); AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroJob.setMapperClass(job, MapImpl.class); FileInputFormat.setInputPaths(job, new Path(dir + "/in")); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, false); AvroMultipleOutputs.addNamedOutput(job, "myavro2", AvroOutputFormat.class, Schema.create(Schema.Type.STRING)); JobClient.runJob(job); }
From source file:org.apache.avro.mapred.TestWeather.java
License:Apache License
/** Uses default mapper with no reduces for a map-only identity job. */ @Test/*from w w w .j av a 2s. c o m*/ @SuppressWarnings("deprecation") public void testMapOnly() throws Exception { JobConf job = new JobConf(); String inDir = System.getProperty("share.dir", "../../../share") + "/test/data"; Path input = new Path(inDir + "/weather.avro"); Path output = new Path(System.getProperty("test.dir", "target/test") + "/weather-ident"); output.getFileSystem(job).delete(output); job.setJobName("identity map weather"); AvroJob.setInputSchema(job, Weather.SCHEMA$); AvroJob.setOutputSchema(job, Weather.SCHEMA$); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); job.setNumReduceTasks(0); // map-only JobClient.runJob(job); // check output is correct DatumReader<Weather> reader = new SpecificDatumReader<Weather>(); DataFileReader<Weather> check = new DataFileReader<Weather>(new File(inDir + "/weather.avro"), reader); DataFileReader<Weather> sorted = new DataFileReader<Weather>( new File(output.toString() + "/part-00000.avro"), reader); for (Weather w : sorted) assertEquals(check.next(), w); check.close(); sorted.close(); }
From source file:org.apache.avro.mapred.TestWeather.java
License:Apache License
@Test @SuppressWarnings("deprecation") public void testSort() throws Exception { JobConf job = new JobConf(); String inDir = System.getProperty("share.dir", "../../../share") + "/test/data"; Path input = new Path(inDir + "/weather.avro"); Path output = new Path(System.getProperty("test.dir", "target/test") + "/weather-sort"); output.getFileSystem(job).delete(output); job.setJobName("sort weather"); AvroJob.setInputSchema(job, Weather.SCHEMA$); AvroJob.setMapOutputSchema(job, Pair.getPairSchema(Weather.SCHEMA$, Schema.create(Type.NULL))); AvroJob.setOutputSchema(job, Weather.SCHEMA$); AvroJob.setMapperClass(job, SortMapper.class); AvroJob.setReducerClass(job, SortReducer.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); AvroJob.setOutputCodec(job, SNAPPY_CODEC); JobClient.runJob(job);//from w w w .j a v a2 s . c o m // check output is correct DatumReader<Weather> reader = new SpecificDatumReader<Weather>(); DataFileReader<Weather> check = new DataFileReader<Weather>(new File(inDir + "/weather-sorted.avro"), reader); DataFileReader<Weather> sorted = new DataFileReader<Weather>( new File(output.toString() + "/part-00000.avro"), reader); for (Weather w : sorted) assertEquals(check.next(), w); check.close(); sorted.close(); // check that AvroMapper and AvroReducer get close() and configure() called assertEquals(1, mapCloseCalls.get()); assertEquals(1, reducerCloseCalls.get()); assertEquals(1, mapConfigureCalls.get()); assertEquals(1, reducerConfigureCalls.get()); }