Example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs setCountersEnabled

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs setCountersEnabled.

Prototype

public static void setCountersEnabled(Job job, boolean enabled)

Source Link

Document

Enables or disables counters for the named outputs.

Usage

From source file:Analysis.A6_User_Differentiation_By_Age.Partition_Users_By_Age_Driver.java

/**
 * @param args the command line arguments
 *///from w ww .j av a  2 s .  c om
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Users by Age");
    job.setJarByClass(Partition_Users_By_Age_Driver.class);

    job.setMapperClass(Partition_Users_By_Age_Mapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    // partitioner class inclusion
    job.setPartitionerClass(Partition_Users_By_Age_Partitioner.class);

    // set multiple formats for custom naming partitioning
    MultipleOutputs.addNamedOutput(job, "ageBins", TextOutputFormat.class, Text.class, NullWritable.class);
    MultipleOutputs.setCountersEnabled(job, true);

    //11-17, 18-25, 26-35, 36-49,50-65,66-80, 81-99

    // set num of reduce tasks based on partition we need (here we need 10 cos total no.of countries)
    job.setNumReduceTasks(8);
    job.setReducerClass(Partition_Users_By_Age_Reducer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Assignment5_P4_BinningPattern.Binning_IPAddress_By_DayDriver.java

/**
 * @param args the command line arguments
 *///w  ww . j av  a  2 s  .co  m
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "IP Address By Hour");
    job.setJarByClass(Binning_IPAddress_By_DayDriver.class);

    job.setMapperClass(Binning_IPAddress_By_Day_Mapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);

    MultipleOutputs.addNamedOutput(job, "textualBins", TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, "massaBins", TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.setCountersEnabled(job, true);

    // set num of reduce tasks to 0
    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:binningbycategories.BinningbyCategories.java

/**
 * @param args the command line arguments
 * @throws java.lang.Exception//w ww.  j  av  a 2s  . c  o  m
 */
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Binning");
    job.setJarByClass(BinningbyCategories.class);
    job.setMapperClass(YouTubeBinMapper.class);
    job.setNumReduceTasks(0);

    TextInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // Configure the MultipleOutputs by adding an output called "bins"
    // With the proper output format and mapper key/value pairs
    MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class);

    // Enable the counters for the job
    // If there is a significant number of different named outputs, this
    // should be disabled
    MultipleOutputs.setCountersEnabled(job, true);

    System.exit(job.waitForCompletion(true) ? 0 : 2);
}

From source file:BinningByState.Driver.java

public static void main(String args[]) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "BinningByState");
    MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class);
    MultipleOutputs.setCountersEnabled(job, true);
    job.setJarByClass(Driver.class);
    job.setMapperClass(BinningMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(0);/* ww w  .j a  va  2 s  .  co  m*/
    //        job.setOutputKeyClass(Text.class);
    //        job.setOutputValueClass(NullWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.conversantmedia.mapreduce.tool.annotation.handler.NamedOutputAnnotationHandler.java

License:Apache License

@Override
public void process(Annotation annotation, Job job, Object target) throws ToolException {
    NamedOutput namedOut = (NamedOutput) annotation;
    KeyValue kv = namedOut.type();

    // If this is a MultipleOutputs member we're annotating, see if we can't
    // get the key/value from the parameters if there are any.
    Pair<Type, Type> kvTypePair = getGenericTypeParams(target);

    Class<?> keyClass = kv.key();
    if (keyClass == void.class) {
        if (kvTypePair != null) {
            keyClass = (Class<?>) kvTypePair.getKey();
        } else {/*  www.j a v a  2 s . c  o m*/
            // fall back on job output key class
            keyClass = job.getOutputKeyClass();
        }
    }

    Class<?> valueClass = kv.value();
    if (valueClass == void.class) {
        if (kvTypePair != null) {
            valueClass = (Class<?>) kvTypePair.getValue();
        } else {
            valueClass = job.getOutputValueClass();
        }
    }

    String[] names = getNames(namedOut);
    for (String name : names) {
        name = (String) evaluateExpression(name);
        if (!configured.contains(name)) {
            MultipleOutputs.addNamedOutput(job, name, namedOut.format(), keyClass, valueClass);
            MultipleOutputs.setCountersEnabled(job, namedOut.countersEnabled());
            configured.add(name);
        }
    }
}

From source file:ipldataanalysis2.IPLDataAnalysis2.java

/**
 * @param args the command line arguments
 *///from  w w  w.  j a  va  2 s.c  o m
@Override
public int run(String[] args) throws Exception {

    if (args.length != 2) {
        System.out.printf("Two parameters are required for Data Analysis for IPL- <input dir> <output dir>\n");
        return -1;
    }

    Job job = new Job(getConf(), "Job1");
    job.setJarByClass(IPLDataAnalysis2.class);
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class);
    MultipleOutputs.setCountersEnabled(job, true);
    job.setMapperClass(DataAnalysisMapper.class);
    job.setNumReduceTasks(0);
    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;

}

From source file:org.apache.rya.accumulo.mr.merge.CopyTool.java

License:Apache License

@Override
protected void setupAccumuloOutput(final Job job, final String outputTable) throws AccumuloSecurityException {
    AccumuloOutputFormat.setConnectorInfo(job, childUserName, new PasswordToken(childPwd));
    AccumuloOutputFormat.setCreateTables(job, true);
    AccumuloOutputFormat.setDefaultTableName(job, outputTable);
    if (!childMock) {
        AccumuloOutputFormat.setZooKeeperInstance(job,
                new ClientConfiguration().withInstance(childInstance).withZkHosts(childZk));
    } else {/*from   ww  w .  j  av a  2 s.  com*/
        AccumuloOutputFormat.setMockInstance(job, childInstance);
    }
    if (useCopyFileOutput) {
        log.info("Using file output format mode.");
        if (StringUtils.isNotBlank(baseOutputDir)) {
            Path baseOutputPath;
            Path filesOutputPath;
            if (StringUtils.isNotBlank(outputTable)) {
                filesOutputPath = getPath(baseOutputDir, outputTable, "files");
                baseOutputPath = filesOutputPath.getParent();
                job.setOutputFormatClass(AccumuloFileOutputFormat.class);
            } else {
                // If table name is not given, configure output for one level higher:
                // it's up to the job to handle subdirectories. Make sure the parent
                // exists.
                filesOutputPath = getPath(baseOutputDir);
                baseOutputPath = filesOutputPath;
                LazyOutputFormat.setOutputFormatClass(job, AccumuloFileOutputFormat.class);
                MultipleOutputs.setCountersEnabled(job, true);
            }
            log.info("File output destination: " + filesOutputPath);
            if (useCopyFileOutputDirectoryClear) {
                try {
                    clearOutputDir(baseOutputPath);
                } catch (final IOException e) {
                    log.error("Error clearing out output path.", e);
                }
            }
            try {
                final FileSystem fs = FileSystem.get(conf);
                fs.mkdirs(filesOutputPath.getParent());
                fs.setPermission(filesOutputPath.getParent(),
                        new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
            } catch (final IOException e) {
                log.error("Failed to set permission for output path.", e);
            }
            FileOutputFormat.setOutputPath(job, filesOutputPath);

            if (StringUtils.isNotBlank(compressionType)) {
                if (isValidCompressionType(compressionType)) {
                    log.info("File compression type: " + compressionType);
                    AccumuloFileOutputFormat.setCompressionType(job, compressionType);
                } else {
                    log.warn("Invalid compression type: " + compressionType);
                }
            }
        }
    } else {
        log.info("Using accumulo output format mode.");
        job.setOutputFormatClass(AccumuloOutputFormat.class);
    }
}

From source file:org.apache.rya.reasoning.mr.AbstractReasoningTool.java

License:Apache License

/**
 * Set up the MapReduce job to output a schema (TBox).
 *///from  www.  j a va 2 s.  c  o  m
protected void configureSchemaOutput() {
    Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration());
    SequenceFileOutputFormat.setOutputPath(job, outPath);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(SchemaWritable.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
    MultipleOutputs.addNamedOutput(job, "schemaobj", SequenceFileOutputFormat.class, NullWritable.class,
            SchemaWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class,
            Text.class);
    MultipleOutputs.setCountersEnabled(job, true);
}

From source file:org.apache.rya.reasoning.mr.AbstractReasoningTool.java

License:Apache License

/**
 * Set up a MapReduce job to output newly derived triples.
 * @param   intermediate    True if this is intermediate data. Outputs
 *                          to [base]-[iteration]-[temp].
 *///w  w w . j  a  v a  2 s.c  om
protected void configureDerivationOutput(boolean intermediate) {
    Path outPath;
    Configuration conf = job.getConfiguration();
    int iteration = MRReasoningUtils.getCurrentIteration(conf);
    if (intermediate) {
        outPath = MRReasoningUtils.getOutputPath(conf,
                MRReasoningUtils.OUTPUT_BASE + iteration + MRReasoningUtils.TEMP_SUFFIX);
    } else {
        outPath = MRReasoningUtils.getOutputPath(conf, MRReasoningUtils.OUTPUT_BASE + iteration);
    }
    SequenceFileOutputFormat.setOutputPath(job, outPath);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT, SequenceFileOutputFormat.class,
            Fact.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT, SequenceFileOutputFormat.class,
            Fact.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT, SequenceFileOutputFormat.class, Fact.class,
            NullWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT, SequenceFileOutputFormat.class,
            Derivation.class, NullWritable.class);
    MultipleOutputs.setCountersEnabled(job, true);
    // Set up an output for diagnostic info, if needed
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class,
            Text.class);
}

From source file:org.apache.rya.reasoning.mr.AbstractReasoningTool.java

License:Apache License

/**
 * Set up a MapReduce job to output human-readable text.
 *///from  w  w w .ja  v a 2s .c  om
protected void configureTextOutput(String destination) {
    Path outPath;
    outPath = MRReasoningUtils.getOutputPath(job.getConfiguration(), destination);
    TextOutputFormat.setOutputPath(job, outPath);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT, TextOutputFormat.class,
            NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT, TextOutputFormat.class,
            NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT, TextOutputFormat.class, NullWritable.class,
            Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT, TextOutputFormat.class,
            NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class,
            Text.class);
    MultipleOutputs.setCountersEnabled(job, true);
}