List of usage examples for org.apache.hadoop.mapreduce.lib.output MultipleOutputs setCountersEnabled
public static void setCountersEnabled(Job job, boolean enabled)
From source file:Analysis.A6_User_Differentiation_By_Age.Partition_Users_By_Age_Driver.java
/** * @param args the command line arguments *///from w ww .j av a 2 s . c om public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Users by Age"); job.setJarByClass(Partition_Users_By_Age_Driver.class); job.setMapperClass(Partition_Users_By_Age_Mapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); // partitioner class inclusion job.setPartitionerClass(Partition_Users_By_Age_Partitioner.class); // set multiple formats for custom naming partitioning MultipleOutputs.addNamedOutput(job, "ageBins", TextOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.setCountersEnabled(job, true); //11-17, 18-25, 26-35, 36-49,50-65,66-80, 81-99 // set num of reduce tasks based on partition we need (here we need 10 cos total no.of countries) job.setNumReduceTasks(8); job.setReducerClass(Partition_Users_By_Age_Reducer.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:Assignment5_P4_BinningPattern.Binning_IPAddress_By_DayDriver.java
/** * @param args the command line arguments *///w ww . j av a 2 s .co m public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "IP Address By Hour"); job.setJarByClass(Binning_IPAddress_By_DayDriver.class); job.setMapperClass(Binning_IPAddress_By_Day_Mapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); MultipleOutputs.addNamedOutput(job, "textualBins", TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, "massaBins", TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); // set num of reduce tasks to 0 job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:binningbycategories.BinningbyCategories.java
/** * @param args the command line arguments * @throws java.lang.Exception//w ww. j av a 2s . c o m */ public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "Binning"); job.setJarByClass(BinningbyCategories.class); job.setMapperClass(YouTubeBinMapper.class); job.setNumReduceTasks(0); TextInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); // Configure the MultipleOutputs by adding an output called "bins" // With the proper output format and mapper key/value pairs MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class); // Enable the counters for the job // If there is a significant number of different named outputs, this // should be disabled MultipleOutputs.setCountersEnabled(job, true); System.exit(job.waitForCompletion(true) ? 0 : 2); }
From source file:BinningByState.Driver.java
public static void main(String args[]) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "BinningByState"); MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.setCountersEnabled(job, true); job.setJarByClass(Driver.class); job.setMapperClass(BinningMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0);/* ww w .j a va 2 s . co m*/ // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.NamedOutputAnnotationHandler.java
License:Apache License
@Override public void process(Annotation annotation, Job job, Object target) throws ToolException { NamedOutput namedOut = (NamedOutput) annotation; KeyValue kv = namedOut.type(); // If this is a MultipleOutputs member we're annotating, see if we can't // get the key/value from the parameters if there are any. Pair<Type, Type> kvTypePair = getGenericTypeParams(target); Class<?> keyClass = kv.key(); if (keyClass == void.class) { if (kvTypePair != null) { keyClass = (Class<?>) kvTypePair.getKey(); } else {/* www.j a v a 2 s . c o m*/ // fall back on job output key class keyClass = job.getOutputKeyClass(); } } Class<?> valueClass = kv.value(); if (valueClass == void.class) { if (kvTypePair != null) { valueClass = (Class<?>) kvTypePair.getValue(); } else { valueClass = job.getOutputValueClass(); } } String[] names = getNames(namedOut); for (String name : names) { name = (String) evaluateExpression(name); if (!configured.contains(name)) { MultipleOutputs.addNamedOutput(job, name, namedOut.format(), keyClass, valueClass); MultipleOutputs.setCountersEnabled(job, namedOut.countersEnabled()); configured.add(name); } } }
From source file:ipldataanalysis2.IPLDataAnalysis2.java
/** * @param args the command line arguments *///from w w w. j a va 2 s.c o m @Override public int run(String[] args) throws Exception { if (args.length != 2) { System.out.printf("Two parameters are required for Data Analysis for IPL- <input dir> <output dir>\n"); return -1; } Job job = new Job(getConf(), "Job1"); job.setJarByClass(IPLDataAnalysis2.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.setCountersEnabled(job, true); job.setMapperClass(DataAnalysisMapper.class); job.setNumReduceTasks(0); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:org.apache.rya.accumulo.mr.merge.CopyTool.java
License:Apache License
@Override protected void setupAccumuloOutput(final Job job, final String outputTable) throws AccumuloSecurityException { AccumuloOutputFormat.setConnectorInfo(job, childUserName, new PasswordToken(childPwd)); AccumuloOutputFormat.setCreateTables(job, true); AccumuloOutputFormat.setDefaultTableName(job, outputTable); if (!childMock) { AccumuloOutputFormat.setZooKeeperInstance(job, new ClientConfiguration().withInstance(childInstance).withZkHosts(childZk)); } else {/*from ww w . j av a 2 s. com*/ AccumuloOutputFormat.setMockInstance(job, childInstance); } if (useCopyFileOutput) { log.info("Using file output format mode."); if (StringUtils.isNotBlank(baseOutputDir)) { Path baseOutputPath; Path filesOutputPath; if (StringUtils.isNotBlank(outputTable)) { filesOutputPath = getPath(baseOutputDir, outputTable, "files"); baseOutputPath = filesOutputPath.getParent(); job.setOutputFormatClass(AccumuloFileOutputFormat.class); } else { // If table name is not given, configure output for one level higher: // it's up to the job to handle subdirectories. Make sure the parent // exists. filesOutputPath = getPath(baseOutputDir); baseOutputPath = filesOutputPath; LazyOutputFormat.setOutputFormatClass(job, AccumuloFileOutputFormat.class); MultipleOutputs.setCountersEnabled(job, true); } log.info("File output destination: " + filesOutputPath); if (useCopyFileOutputDirectoryClear) { try { clearOutputDir(baseOutputPath); } catch (final IOException e) { log.error("Error clearing out output path.", e); } } try { final FileSystem fs = FileSystem.get(conf); fs.mkdirs(filesOutputPath.getParent()); fs.setPermission(filesOutputPath.getParent(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); } catch (final IOException e) { log.error("Failed to set permission for output path.", e); } FileOutputFormat.setOutputPath(job, filesOutputPath); if (StringUtils.isNotBlank(compressionType)) { if (isValidCompressionType(compressionType)) { log.info("File compression type: " + compressionType); AccumuloFileOutputFormat.setCompressionType(job, compressionType); } else { log.warn("Invalid compression type: " + compressionType); } } } } else { log.info("Using accumulo output format mode."); job.setOutputFormatClass(AccumuloOutputFormat.class); } }
From source file:org.apache.rya.reasoning.mr.AbstractReasoningTool.java
License:Apache License
/** * Set up the MapReduce job to output a schema (TBox). *///from www. j a va 2 s. c o m protected void configureSchemaOutput() { Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration()); SequenceFileOutputFormat.setOutputPath(job, outPath); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(SchemaWritable.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); MultipleOutputs.addNamedOutput(job, "schemaobj", SequenceFileOutputFormat.class, NullWritable.class, SchemaWritable.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); }
From source file:org.apache.rya.reasoning.mr.AbstractReasoningTool.java
License:Apache License
/** * Set up a MapReduce job to output newly derived triples. * @param intermediate True if this is intermediate data. Outputs * to [base]-[iteration]-[temp]. *///w w w . j a v a 2 s.c om protected void configureDerivationOutput(boolean intermediate) { Path outPath; Configuration conf = job.getConfiguration(); int iteration = MRReasoningUtils.getCurrentIteration(conf); if (intermediate) { outPath = MRReasoningUtils.getOutputPath(conf, MRReasoningUtils.OUTPUT_BASE + iteration + MRReasoningUtils.TEMP_SUFFIX); } else { outPath = MRReasoningUtils.getOutputPath(conf, MRReasoningUtils.OUTPUT_BASE + iteration); } SequenceFileOutputFormat.setOutputPath(job, outPath); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT, SequenceFileOutputFormat.class, Fact.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT, SequenceFileOutputFormat.class, Fact.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT, SequenceFileOutputFormat.class, Fact.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT, SequenceFileOutputFormat.class, Derivation.class, NullWritable.class); MultipleOutputs.setCountersEnabled(job, true); // Set up an output for diagnostic info, if needed MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class, Text.class); }
From source file:org.apache.rya.reasoning.mr.AbstractReasoningTool.java
License:Apache License
/** * Set up a MapReduce job to output human-readable text. *///from w w w .ja v a 2s .c om protected void configureTextOutput(String destination) { Path outPath; outPath = MRReasoningUtils.getOutputPath(job.getConfiguration(), destination); TextOutputFormat.setOutputPath(job, outPath); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); }