Example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs addNamedOutput

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs addNamedOutput.

Prototype

@SuppressWarnings("unchecked")
public static void addNamedOutput(Job job, String namedOutput, Class<? extends OutputFormat> outputFormatClass,
        Class<?> keyClass, Class<?> valueClass)

Source Link

Document

Adds a named output for the job.

Usage

From source file:Analysis.A6_User_Differentiation_By_Age.Partition_Users_By_Age_Driver.java

/**
 * @param args the command line arguments
 *//* w  w  w . j  av  a 2s. co  m*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Users by Age");
    job.setJarByClass(Partition_Users_By_Age_Driver.class);

    job.setMapperClass(Partition_Users_By_Age_Mapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    // partitioner class inclusion
    job.setPartitionerClass(Partition_Users_By_Age_Partitioner.class);

    // set multiple formats for custom naming partitioning
    MultipleOutputs.addNamedOutput(job, "ageBins", TextOutputFormat.class, Text.class, NullWritable.class);
    MultipleOutputs.setCountersEnabled(job, true);

    //11-17, 18-25, 26-35, 36-49,50-65,66-80, 81-99

    // set num of reduce tasks based on partition we need (here we need 10 cos total no.of countries)
    job.setNumReduceTasks(8);
    job.setReducerClass(Partition_Users_By_Age_Reducer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Assignment5_P4_BinningPattern.Binning_IPAddress_By_DayDriver.java

/**
 * @param args the command line arguments
 *///  w ww  .  j a v  a 2  s.c o m
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "IP Address By Hour");
    job.setJarByClass(Binning_IPAddress_By_DayDriver.class);

    job.setMapperClass(Binning_IPAddress_By_Day_Mapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);

    MultipleOutputs.addNamedOutput(job, "textualBins", TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, "massaBins", TextOutputFormat.class, NullWritable.class, Text.class);
    MultipleOutputs.setCountersEnabled(job, true);

    // set num of reduce tasks to 0
    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:binningbycategories.BinningbyCategories.java

/**
 * @param args the command line arguments
 * @throws java.lang.Exception// w w  w  . ja v  a2 s  . com
 */
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Binning");
    job.setJarByClass(BinningbyCategories.class);
    job.setMapperClass(YouTubeBinMapper.class);
    job.setNumReduceTasks(0);

    TextInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    // Configure the MultipleOutputs by adding an output called "bins"
    // With the proper output format and mapper key/value pairs
    MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class);

    // Enable the counters for the job
    // If there is a significant number of different named outputs, this
    // should be disabled
    MultipleOutputs.setCountersEnabled(job, true);

    System.exit(job.waitForCompletion(true) ? 0 : 2);
}

From source file:BinningByState.Driver.java

public static void main(String args[]) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "BinningByState");
    MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class);
    MultipleOutputs.setCountersEnabled(job, true);
    job.setJarByClass(Driver.class);
    job.setMapperClass(BinningMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(0);/*from   www . j  av a 2  s. c  o m*/
    //        job.setOutputKeyClass(Text.class);
    //        job.setOutputValueClass(NullWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:cienciaCelularMR.Main.java

@Override
public int run(String[] args) throws Exception {

    for (int i = 0; i < args.length; i++) {
        System.out.println("Hadoop - arg[" + i + "] es: " + args[i]);
    }//  w  ww. j  a  v a 2s . co  m
    //Configuracin de memoria de YARN
    Configuration conf = new Configuration();
    conf.set("mapreduce.map.memory.mb", "1400");
    conf.set("mapreduce.reduce.memory.mb", "2800");
    conf.set("mapreduce.map.java.opts", "-Xmx1120m");
    conf.set("mapreduce.reduce.java.opts", "-Xmx2240m");
    conf.set("yarn.app.mapreduce.am.resource.mb", "2800");
    conf.set("yarn.app.mapreduce.am.command-opts", "-Xmx2240m");
    conf.set("yarn.nodemanager.resource.memory-mb", "5040");
    conf.set("yarn.scheduler.minimum-allocation-mb", "1400");
    conf.set("yarn.scheduler.maximum-allocation-mb", "5040");
    conf.set("mapreduce.task.timeout", "18000000");//5 horas

    //Creacin del Job
    Job job = Job.getInstance(conf);
    job.setInputFormatClass(WholeFileInputFormat.class);
    FileInputFormat.setInputPaths(job, new Path(args[5]));
    FileOutputFormat.setOutputPath(job, new Path(args[6]));

    //Salidas alternativas de Mapper para brindar informacin
    MultipleOutputs.addNamedOutput(job, "controloutput", TextOutputFormat.class, KeyMcell.class, Text.class);
    MultipleOutputs.addNamedOutput(job, "errormcell", TextOutputFormat.class, KeyMcell.class, Text.class);

    //Archivos copiados a cache de los nodos
    job.addCacheFile(new Path("wasb:///mcell.exe").toUri());
    job.addCacheFile(new Path("wasb:///fernet.exe").toUri());
    job.addCacheFile(new Path("wasb:///fernet.cfg").toUri());
    job.addCacheFile(new Path("wasb:///libconfig_d.dll").toUri());
    job.addCacheFile(new Path("wasb:///libtiff3.dll").toUri());
    job.addCacheFile(new Path("wasb:///jpeg62.dll").toUri());
    job.addCacheFile(new Path("wasb:///zlib1.dll").toUri());
    job.addCacheFile(new Path("wasb:///msvcr100d.dll").toUri());

    job.setJarByClass(Main.class);

    Configuration mapAConf = new Configuration(false);
    ChainMapper.addMapper(job, McellMapper.class, KeyMcell.class, BytesWritable.class, KeyMcell.class,
            Text.class, mapAConf);

    Configuration mapBConf = new Configuration(false);
    ChainMapper.addMapper(job, FernetMapper.class, KeyMcell.class, Text.class, KeyMcell.class,
            FernetOutput.class, mapBConf);

    job.setReducerClass(ResultReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);

    job.submit();
    return 0;
}

From source file:com.conversantmedia.mapreduce.tool.annotation.handler.NamedOutputAnnotationHandler.java

License:Apache License

@Override
public void process(Annotation annotation, Job job, Object target) throws ToolException {
    NamedOutput namedOut = (NamedOutput) annotation;
    KeyValue kv = namedOut.type();

    // If this is a MultipleOutputs member we're annotating, see if we can't
    // get the key/value from the parameters if there are any.
    Pair<Type, Type> kvTypePair = getGenericTypeParams(target);

    Class<?> keyClass = kv.key();
    if (keyClass == void.class) {
        if (kvTypePair != null) {
            keyClass = (Class<?>) kvTypePair.getKey();
        } else {// w  ww  . jav a 2  s .c o m
            // fall back on job output key class
            keyClass = job.getOutputKeyClass();
        }
    }

    Class<?> valueClass = kv.value();
    if (valueClass == void.class) {
        if (kvTypePair != null) {
            valueClass = (Class<?>) kvTypePair.getValue();
        } else {
            valueClass = job.getOutputValueClass();
        }
    }

    String[] names = getNames(namedOut);
    for (String name : names) {
        name = (String) evaluateExpression(name);
        if (!configured.contains(name)) {
            MultipleOutputs.addNamedOutput(job, name, namedOut.format(), keyClass, valueClass);
            MultipleOutputs.setCountersEnabled(job, namedOut.countersEnabled());
            configured.add(name);
        }
    }
}

From source file:com.jhkt.playgroundArena.hadoop.tasks.jobs.AverageMultipleOutputJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, AverageMultipleOutputJob.class.getSimpleName());
    job.setJarByClass(AverageMultipleOutputJob.class);

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setJobName("Sample Multiple Output Job");
    job.setMapperClass(AverageMapper.class);
    job.setReducerClass(AverageMultipleOutputReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    MultipleOutputs.addNamedOutput(job, "greaterThan1000", TextOutputFormat.class, Text.class,
            DoubleWritable.class);
    MultipleOutputs.addNamedOutput(job, "lessThan1000", TextOutputFormat.class, Text.class,
            DoubleWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);

    return 0;/*ww  w .j  av a 2  s. c  om*/
}

From source file:com.mozilla.socorro.hadoop.CrashCount2.java

License:LGPL

/**
 * @param args/* ww w  .j a  v a  2  s  .  c  o m*/
 * @return
 * @throws IOException
 * @throws ParseException
 */
public Job initJob(String[] args) throws IOException, ParseException {
    Map<byte[], byte[]> columns = new HashMap<byte[], byte[]>();
    columns.put(PROCESSED_DATA_BYTES, JSON_BYTES);
    Job job = CrashReportJob.initJob(NAME, getConf(), CrashCount2.class, CrashCount2Mapper.class,
            LongSumReducer.class, LongSumReducer.class, columns, Text.class, LongWritable.class,
            new Path(args[0]));

    MultipleOutputs.addNamedOutput(job, "strings", TextOutputFormat.class, Text.class, Text.class);

    return job;
}

From source file:com.mvdb.platform.action.VersionMerge.java

License:Apache License

public static void main(String[] args) throws Exception {
    logger.error("error1");
    logger.warn("warning1");
    logger.info("info1");
    logger.debug("debug1");
    logger.trace("trace1");
    ActionUtils.setUpInitFileProperty();
    //        LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory();
    //        StatusPrinter.print(lc);

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //Also add  lastMergedTimeStamp and  mergeUptoTimestamp and passive db name which would be mv1 or mv2
    if (otherArgs.length != 3) {
        System.err.println("Usage: versionmerge <customer-directory>");
        System.exit(2);// w  ww  .ja  v  a2  s  .  c  om
    }
    //Example: file:/home/umesh/.mvdb/etl/data/alpha
    //Example: hdfs://localhost:9000/data/alpha
    String customerDirectory = otherArgs[0];
    String lastMergedDirName = otherArgs[1];
    String lastCopiedDirName = otherArgs[2];

    org.apache.hadoop.conf.Configuration conf1 = new org.apache.hadoop.conf.Configuration();
    //conf1.addResource(new Path("/home/umesh/ops/hadoop-1.2.0/conf/core-site.xml"));
    FileSystem hdfsFileSystem = FileSystem.get(conf1);

    Path topPath = new Path(customerDirectory);

    //Clean scratch db
    Path passiveDbPath = new Path(topPath, "db/mv1");
    Path tempDbPath = new Path(topPath, "db/tmp-" + (int) (Math.random() * 100000));
    if (hdfsFileSystem.exists(tempDbPath)) {
        boolean success = hdfsFileSystem.delete(tempDbPath, true);
        if (success == false) {
            System.err.println(String.format("Unable to delete temp directory %s", tempDbPath.toString()));
            System.exit(1);
        }
    }
    //last three parameters are hardcoded and  the nulls must be replaced later after changing inout parameters. 
    Path[] inputPaths = getInputPaths(hdfsFileSystem, topPath, lastMergedDirName, lastCopiedDirName, null);
    Set<String> tableNameSet = new HashSet<String>();
    for (Path path : inputPaths) {
        tableNameSet.add(path.getName());
    }

    Job job = new Job(conf, "versionmerge");
    job.setJarByClass(VersionMerge.class);
    job.setMapperClass(VersionMergeMapper.class);
    job.setReducerClass(VersionMergeReducer.class);
    job.setMapOutputKeyClass(MergeKey.class);
    job.setMapOutputValueClass(BytesWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    String lastDirName = null;
    if (inputPaths != null && inputPaths.length > 1) {
        lastDirName = inputPaths[(inputPaths.length) - 2].getParent().getName();
    }
    for (Path inputPath : inputPaths) {
        FileInputFormat.addInputPath(job, inputPath);
    }
    FileOutputFormat.setOutputPath(job, tempDbPath);

    for (String table : tableNameSet) {
        if (table.endsWith(".dat") == false) {
            continue;
        }
        table = table.replaceAll("-", "");
        table = table.replaceAll(".dat", "");
        MultipleOutputs.addNamedOutput(job, table, SequenceFileOutputFormat.class, Text.class,
                BytesWritable.class);
    }
    boolean success = job.waitForCompletion(true);
    System.out.println("Success:" + success);
    System.out.println(ManagementFactory.getRuntimeMXBean().getName());
    if (success && lastDirName != null) {
        ActionUtils.setConfigurationValue(new Path(customerDirectory).getName(),
                ConfigurationKeys.LAST_MERGE_TO_MVDB_DIRNAME, lastDirName);
    }
    //hdfsFileSystem.delete(passiveDbPath, true);
    //hdfsFileSystem.rename(tempDbPath, passiveDbPath);
    System.exit(success ? 0 : 1);
}

From source file:com.rw.legion.DefaultJob.java

License:Apache License

/**
 * Main method.//from  w w  w  . j  a  va 2  s. c  o  m
 * 
 * @param args  Arguments should be: 1) input path, 2) output path, 3)
 * location of Legion objective file.
 */
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    // Load the Legion objective from the JSON doc.
    Path path = new Path(args[2]);
    FileSystem fs = FileSystem.get(new URI(args[2]), conf);
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String json = "";

    String line = br.readLine();

    while (line != null) {
        json += line;
        line = br.readLine();
    }

    br.close();

    /*
     *  Save the JSON for the Legion objective to the Hadoop configuration,
     *  so we can access it in other containers.
     */
    conf.setStrings("legion_objective", json);

    // De-serialize the objective so we can access the settings here.
    LegionObjective legionObjective = ObjectiveDeserializer.deserialize(json);

    // Start configuring the MapReduce job.
    Job hadoopJob = Job.getInstance(conf, "Legion");

    hadoopJob.setJarByClass(DefaultJob.class);
    hadoopJob.setMapperClass(DefaultMapper.class);
    LazyOutputFormat.setOutputFormatClass(hadoopJob, TextOutputFormat.class);

    // Compress the output to speed things up.
    TextOutputFormat.setCompressOutput(hadoopJob, true);
    TextOutputFormat.setOutputCompressorClass(hadoopJob, GzipCodec.class);

    // What input format do we use?

    try {
        @SuppressWarnings("unchecked")
        Class<? extends FileInputFormat<NullWritable, LegionRecord>> inputClass = (Class<? extends FileInputFormat<NullWritable, LegionRecord>>) Class
                .forName(legionObjective.getInputFormat());

        hadoopJob.setInputFormatClass(inputClass);
    } catch (Exception e) {
        throw new JsonParseException(
                "Problem loading input format " + "class '" + legionObjective.getInputFormat() + "'");
    }

    // Should we set a max combined size?

    if (legionObjective.getMaxCombinedSize() != null) {
        CombineFileInputFormat.setMaxInputSplitSize(hadoopJob, legionObjective.getMaxCombinedSize());
    }

    /* 
     * These are just static convenience methods, so it doesn't matter if
     * they come from the wrong class.
     */
    FileInputFormat.setInputDirRecursive(hadoopJob, true);
    FileInputFormat.addInputPath(hadoopJob, new Path(args[0]));

    FileOutputFormat.setOutputPath(hadoopJob, new Path(args[1]));

    // Since a Legion objective can specify multiple output tables.
    for (OutputTable outputTable : legionObjective.getOutputTables()) {
        MultipleOutputs.addNamedOutput(hadoopJob, outputTable.getTitle(), TextOutputFormat.class,
                NullWritable.class, Text.class);
    }

    MultipleOutputs.addNamedOutput(hadoopJob, "skipped", TextOutputFormat.class, NullWritable.class,
            Text.class);

    hadoopJob.waitForCompletion(true);
}