Example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs addNamedOutput

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output MultipleOutputs addNamedOutput.

Prototype

@SuppressWarnings("unchecked")
public static void addNamedOutput(Job job, String namedOutput, Class<? extends OutputFormat> outputFormatClass,
        Class<?> keyClass, Class<?> valueClass)

Source Link

Document

Adds a named output for the job.

Usage

From source file:org.apache.rya.reasoning.mr.AbstractReasoningTool.java

License:Apache License

/**
 * Set up a MapReduce job to output human-readable text.
 *///ww  w. j a  v a2 s. com
protected void configureTextOutput(String destination) {
    Path outPath;
    outPath = MRReasoningUtils.getOutputPath(job.getConfiguration(), destination);
    TextOutputFormat.setOutputPath(job, outPath);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT, TextOutputFormat.class,
            NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT, TextOutputFormat.class,
            NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT, TextOutputFormat.class, NullWritable.class,
            Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT, TextOutputFormat.class,
            NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class,
            Text.class);
    MultipleOutputs.setCountersEnabled(job, true);
}

From source file:org.bgi.flexlab.gaea.tools.mapreduce.annotator.Annotator.java

License:Open Source License

public int runAnnotator(String[] arg0) throws Exception {

    Configuration conf = new Configuration();
    String[] remainArgs = remainArgs(arg0, conf);

    AnnotatorOptions options = new AnnotatorOptions();
    options.parse(remainArgs);/* www  . jav a2 s  .c  o m*/
    options.setHadoopConf(remainArgs, conf);
    System.out.println("inputFilePath: " + conf.get("inputFilePath"));
    BioJob job = BioJob.getInstance(conf);

    if (options.isCachedRef())
        System.err.println("--------- isCachedRef --------");
    ReferenceShare.distributeCache(options.getReferenceSequencePath(), job);

    job.setHeader(new Path(options.getInput()), new Path(options.getOutput()));
    job.setJobName("GaeaAnnotator");
    job.setJarByClass(this.getClass());
    job.setMapperClass(AnnotationMapper.class);
    job.setReducerClass(AnnotationReducer.class);
    job.setNumReduceTasks(options.getReducerNum());

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(VcfLineWritable.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(MNLineInputFormat.class);

    List<String> sampleNames = new ArrayList<>();

    Path inputPath = new Path(conf.get("inputFilePath"));
    FileSystem fs = inputPath.getFileSystem(conf);
    FileStatus[] files = fs.listStatus(inputPath);

    for (FileStatus file : files) {//sample names
        System.out.println(file.getPath());

        if (file.isFile()) {
            SingleVCFHeader singleVcfHeader = new SingleVCFHeader();
            singleVcfHeader.readHeaderFrom(file.getPath(), fs);
            VCFHeader vcfHeader = singleVcfHeader.getHeader();
            sampleNames.addAll(vcfHeader.getSampleNamesInOrder());
        }

    }

    MNLineInputFormat.addInputPath(job, new Path(options.getInputFilePath()));
    MNLineInputFormat.setMinNumLinesToSplit(job, 1000); //????
    MNLineInputFormat.setMapperNum(job, options.getMapperNum());
    Path partTmp = new Path(options.getTmpPath());

    FileOutputFormat.setOutputPath(job, partTmp);
    for (int i = 0; i < sampleNames.size(); i++)//?sample name?
    {
        System.out.println("sampleName " + i + ":" + SampleNameModifier.modify(sampleNames.get(i)));
        MultipleOutputs.addNamedOutput(job, SampleNameModifier.modify(sampleNames.get(i)),
                TextOutputFormat.class, NullWritable.class, Text.class);
    }
    if (job.waitForCompletion(true)) {
        for (int i = 0; i < sampleNames.size(); i++) {//??
            GZIPOutputStream os = new GZIPOutputStream(
                    new FileOutputStream(options.getOutputPath() + "/" + sampleNames.get(i) + ".tsv.gz"));
            final FileStatus[] parts = partTmp.getFileSystem(conf).globStatus(new Path(options.getTmpPath()
                    + "/" + sampleNames.get(i) + "/part" + "-*-[0-9][0-9][0-9][0-9][0-9]*"));
            boolean writeHeader = true;
            for (FileStatus p : parts) {
                FSDataInputStream dis = p.getPath().getFileSystem(conf).open(p.getPath());
                BufferedReader reader = new BufferedReader(new InputStreamReader(dis));
                String line;
                while ((line = reader.readLine()) != null) {
                    if (line.startsWith("#")) {
                        if (writeHeader) {
                            os.write(line.getBytes());
                            os.write('\n');
                            writeHeader = false;
                        }
                        continue;
                    }
                    os.write(line.getBytes());
                    os.write('\n');
                }
            }
            os.close();
        }
        partTmp.getFileSystem(conf).delete(partTmp, true);

        return 0;
    } else {
        return 1;
    }
}

From source file:org.bgi.flexlab.gaea.tools.mapreduce.fastqqualitycontrol.FastqQualityControl.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    BioJob job = BioJob.getInstance();//from w w  w .ja  v a2  s.c  o  m
    Configuration conf = job.getConfiguration();
    String[] remainArgs = remainArgs(args, conf);

    FastqQualityControlOptions option = new FastqQualityControlOptions();
    option.parse(remainArgs);
    conf.setInt(FastqRecordReader.READ_NAME_TYPE, option.getReadType());
    option.setHadoopConf(args, conf);

    job.setJobName("GaeaFastqQC");
    job.setJarByClass(FastqQualityControl.class);
    job.setMapperClass(PairEndAggregatorMapper.class);
    job.setReducerClass(FastqQualityControlReducer.class);

    job.setInputFormatClass(FastqInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setNumReduceTasks(option.getReducerNumber());
    job.setOutputKeyValue(Text.class, Text.class, NullWritable.class, Text.class);

    FastqMultipleSample sample = null;
    if (option.getMultiSampleList() != null && option.getMultiSampleList() != "") {
        sample = new FastqMultipleSample(option.getMultiSampleList(), true);
        Map<String, FastqSample> sampleList = sample.getSampleList();

        for (FastqSample sl : sampleList.values()) {
            if (sl.getFastq1() != null) {
                MultipleInputs.addInputPath(job, new Path(sl.getFastq1()), FastqInputFormat.class);
            } else {
                System.err.println(sl.getSampleName() + " has no fq1!");
                System.exit(1);
            }
            if (sl.getFastq2() != null) {
                MultipleInputs.addInputPath(job, new Path(sl.getFastq2()), FastqInputFormat.class);
            } else {
                System.err.println(sl.getSampleName() + " is SE data!");
            }
            if (sl.getAdapter1() != null) {
                MultipleInputs.addInputPath(job, new Path(sl.getAdapter1()), AdaptorInputFormat.class);
            }
            if (sl.getAdapter2() != null) {
                MultipleInputs.addInputPath(job, new Path(sl.getAdapter2()), AdaptorInputFormat.class);
            }
        }
    } else {
        if (option.getInputFastq1() != null) {
            MultipleInputs.addInputPath(job, new Path(option.getInputFastq1()), FastqInputFormat.class);
        }
        if (option.getInputFastq2() != null) {
            MultipleInputs.addInputPath(job, new Path(option.getInputFastq2()), FastqInputFormat.class);
        }
        if (option.getAdapter1() != null) {
            MultipleInputs.addInputPath(job, new Path(option.getAdapter1()), AdaptorInputFormat.class);
        }
        if (option.getAdapter2() != null) {
            MultipleInputs.addInputPath(job, new Path(option.getAdapter2()), AdaptorInputFormat.class);
        }
    }

    Path outputPath = new Path(option.getOutputDirectory() + "/out_fq");
    FileOutputFormat.setOutputPath(job, outputPath);
    MultipleOutputs.addNamedOutput(job, "filterStatistic", TextOutputFormat.class, NullWritable.class,
            Text.class);
    MultipleOutputs.addNamedOutput(job, "qualFreqStatistic", TextOutputFormat.class, NullWritable.class,
            Text.class);

    if (job.waitForCompletion(true)) {
        FastqQualityControlReporterIO report = new FastqQualityControlReporterIO(sample,
                option.isMultiStatis());
        report.mergeReport(outputPath, conf, new Path(option.getOutputDirectory()));
        return 0;
    } else {
        return 1;
    }
}

From source file:org.bgi.flexlab.gaea.tools.mapreduce.realigner.Realigner.java

License:Open Source License

private int runRealigner(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    BioJob job = BioJob.getInstance();//from w ww .  j a v a 2  s.co  m
    Configuration conf = job.getConfiguration();
    String[] remainArgs = remainArgs(args, conf);

    options = new RealignerExtendOptions();
    options.parse(remainArgs);

    option = options.getRealignerOptions();

    String jobName = "Gaea realigner and recalibrator";

    if (options.isRecalibration() && !options.isRealignment()) {
        job.setOnlyBaseRecalibrator(true);
        jobName = "GaeaRecalibrator";
    } else if (options.isRealignment() && !options.isRecalibration())
        jobName = "GaeaRealigner";

    if (option.isMultiSample())
        job.setMultipleSample();

    job.setJobName(jobName);

    option.setHadoopConf(remainArgs, conf);

    header = job.setHeader(new Path(option.getRealignerInput()), new Path(options.getCommonOutput()));

    job.setAnySamInputFormat(option.getInputFormat());
    job.setOutputFormatClass(GaeaBamOutputFormat.class);
    job.setOutputKeyValue(WindowsBasedWritable.class, SamRecordWritable.class, NullWritable.class,
            SamRecordWritable.class);

    job.setJarByClass(Realigner.class);
    job.setWindowsBasicMapperClass(WindowsBasedSamRecordMapper.class, option.getWindowsSize(),
            option.getExtendSize());
    job.setReducerClass(RealignerReducer.class);
    job.setNumReduceTasks(option.getReducerNumber());

    FileInputFormat.setInputPaths(job, new Path(option.getRealignerInput()));
    FileOutputFormat.setOutputPath(job, new Path(option.getRealignerOutput()));

    if (options.isRecalibration())
        MultipleOutputs.addNamedOutput(job, RecalibratorContextWriter.RECALIBRATOR_TABLE_TAG,
                TextOutputFormat.class, NullWritable.class, Text.class);

    if (job.waitForCompletion(true)) {
        if (options.isRecalibration())
            return mergeReportTable(options.getBqsrOptions(), header,
                    options.getCommonOutput() + RECALIBRATOR_REPORT_TABLE_NAME);
        return 0;
    }

    return 1;
}

From source file:org.bgi.flexlab.gaea.tools.mapreduce.vcf.sort.VCFSort.java

License:Open Source License

private void setMultiOutputs(MultipleVCFHeader mVcfHeader, BioJob job) {
    // TODO Auto-generated method stub
    int i = 0;// ww w. j  a  va2 s. c  om
    Map<Integer, String> multiOutputs = new HashMap<>();
    for (int id : mVcfHeader.getFileName2ID().values()) {
        multiOutputs.put(id, "SortResult" + ++i);
        MultipleOutputs.addNamedOutput(job, multiOutputs.get(id), SortOutputFormat.class, NullWritable.class,
                VariantContextWritable.class);
    }
    options.setMultiOutputs(multiOutputs);
}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected boolean runDictionaryJob()
        throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException {
    boolean jobOK;
    Job job = null;// w  ww  .  j  a v a 2s  . c o m
    BufferedWriter bufferedWriter;

    // if output path exists...
    if (this.dictionaryFS.exists(this.conf.getDictionaryOutputPath())) {
        if (this.conf.getDeleteDictionaryOutputPath()) { // ... and option provided, delete recursively
            this.dictionaryFS.delete(this.conf.getDictionaryOutputPath(), true);
        } else { // ... and option not provided, fail
            System.out.println("Dictionary output path does exist: " + this.conf.getDictionaryOutputPath());
            System.out.println("Select other path or use option -dd to overwrite");
            System.exit(-1);
        }
    }

    // Sample the SequenceInputFormat to do TotalSort and create final output
    job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName() + " phase 2");

    job.setJarByClass(HDTBuilderDriver.class);

    System.out.println("samples = " + this.conf.getDictionarySamplesPath());
    System.out.println("output = " + this.conf.getDictionaryOutputPath());

    FileInputFormat.addInputPath(job, this.conf.getDictionarySamplesPath());
    FileOutputFormat.setOutputPath(job, this.conf.getDictionaryOutputPath());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    // Identity Mapper
    // job.setMapperClass(Mapper.class);
    job.setCombinerClass(DictionaryCombiner.class);
    job.setPartitionerClass(TotalOrderPartitioner.class);
    job.setReducerClass(DictionaryReducer.class);

    job.setNumReduceTasks(this.conf.getDictionaryReducers());

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    System.out.println("Sampling started");
    InputSampler.writePartitionFile(job,
            new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability()));
    String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration());
    URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
    DistributedCache.createSymlink(job.getConfiguration());
    System.out.println("Sampling finished");

    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SHARED, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SUBJECTS, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.PREDICATES, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.OBJECTS, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    jobOK = job.waitForCompletion(true);

    this.numShared = job.getCounters().findCounter(Counters.Shared).getValue();
    this.numSubjects = job.getCounters().findCounter(Counters.Subjects).getValue();
    this.numPredicates = job.getCounters().findCounter(Counters.Predicates).getValue();
    this.numObjects = job.getCounters().findCounter(Counters.Objects).getValue();

    bufferedWriter = new BufferedWriter(
            new OutputStreamWriter(this.dictionaryFS.create(this.conf.getDictionaryCountersFile())));

    bufferedWriter.write(HDTBuilderConfiguration.SHARED + "=" + this.numShared + "\n");
    bufferedWriter.write(HDTBuilderConfiguration.SUBJECTS + "=" + this.numSubjects + "\n");
    bufferedWriter.write(HDTBuilderConfiguration.PREDICATES + "=" + this.numPredicates + "\n");
    bufferedWriter.write(HDTBuilderConfiguration.OBJECTS + "=" + this.numObjects + "\n");

    bufferedWriter.close();

    return jobOK;
}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected boolean runDictionaryJobWithOneJob()
        throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException {
    boolean jobOK;
    Job job = null;//from w w w  .j a v a 2 s  .  c om
    BufferedWriter bufferedWriter;

    // if input path does not exists, fail
    if (!this.inputFS.exists(this.conf.getInputPath())) {
        System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath());
        System.exit(-1);
    }

    // if output path exists...
    if (this.dictionaryFS.exists(this.conf.getDictionaryOutputPath())) {
        if (this.conf.getDeleteDictionaryOutputPath()) { // ... and option provided, delete recursively
            this.dictionaryFS.delete(this.conf.getDictionaryOutputPath(), true);
        } else { // ... and option not provided, fail
            System.out.println("Dictionary output path does exist: " + this.conf.getDictionaryOutputPath());
            System.out.println("Select other path or use option -dd to overwrite");
            System.exit(-1);
        }
    }

    // Launch job
    job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName());
    job.setJarByClass(HDTBuilderDriver.class);

    FileInputFormat.addInputPath(job, this.conf.getInputPath());
    FileOutputFormat.setOutputPath(job, this.conf.getDictionaryOutputPath());

    job.setInputFormatClass(LzoTextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    job.setMapperClass(DictionaryMapper.class);
    job.setCombinerClass(DictionaryCombiner.class);
    job.setReducerClass(DictionaryReducer.class);

    job.setNumReduceTasks(this.conf.getDictionaryReducers());

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SHARED, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SUBJECTS, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.PREDICATES, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.OBJECTS, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);

    jobOK = job.waitForCompletion(true);

    this.numShared = job.getCounters().findCounter(Counters.Shared).getValue();
    this.numSubjects = job.getCounters().findCounter(Counters.Subjects).getValue();
    this.numPredicates = job.getCounters().findCounter(Counters.Predicates).getValue();
    this.numObjects = job.getCounters().findCounter(Counters.Objects).getValue();

    bufferedWriter = new BufferedWriter(
            new OutputStreamWriter(this.dictionaryFS.create(this.conf.getDictionaryCountersFile())));

    bufferedWriter.write(HDTBuilderConfiguration.SHARED + "=" + this.numShared + "\n");
    bufferedWriter.write(HDTBuilderConfiguration.SUBJECTS + "=" + this.numSubjects + "\n");
    bufferedWriter.write(HDTBuilderConfiguration.PREDICATES + "=" + this.numPredicates + "\n");
    bufferedWriter.write(HDTBuilderConfiguration.OBJECTS + "=" + this.numObjects + "\n");

    bufferedWriter.close();

    return jobOK;
}

From source file:pad.InitializationDriver.java

License:Apache License

/**
 * Execute the InitializationDriver Job.
 * //from  w w  w. j  a  v a  2s .  c  o  m
 * If the input file format is adjacency list, then we can easily determinate the initial number of nodes
 * that is equal to the number of rows of the input file while the number of cliques is zero.
 * In order to obtain a list of arcs from the adjacency list, we use the \see InitializationMapperAdjacent
 * as Mapper and zero Reducer.
 * 
 * If the input file format is cliques list, then we can easily determinate the number of cliques
 * that is equal to the number of rows of the input file.
 * In order to obtain a edges list from the cliques list, we use the \see InitializationMapperClique
 * as Mapper. We store this result into a special folder \see MOS_OUTPUT_NAME.
 * Into the regular folder, this Mapper emits all the encountered nodes.
 * We use \see InitializationReducerNumNodes as Reducer in order to count the initial number of nodes
 * counting all the distinct nodes found. The combiner (\see InitializationCombinerNumNodes) reduce locally
 * the number of duplicated nodes.
 * Obtained the value of the NUM_INITIAL_NODES counter ( \see UtilCounters ), we delete the empty files
 * produced by the Reducer and we move the real results into the main/regular folder.
 * 
 * @param args      array of external arguments, not used in this method
 * @return          <c>1</c> if the InitializationDriver Job failed its execution; <c>0</c> if everything is ok. 
 * @throws Exception 
 */
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    // GenericOptionsParser invocation in order to suppress the hadoop warning.
    new GenericOptionsParser(conf, args);
    Job job = new Job(conf, "InitializationDriver");
    job.setJarByClass(InitializationDriver.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    FileInputFormat.addInputPath(job, this.input);
    FileOutputFormat.setOutputPath(job, this.output);

    if (this.type == InputType.ADJACENCY_LIST) {
        // In order to obtain the arcs list from the adjacency list, we need only a Mapper task.
        job.setMapperClass(InitializationMapperAdjacency.class);
        job.setNumReduceTasks(0);
    } else {
        // Set up the special folder.
        MultipleOutputs.addNamedOutput(job, MOS_OUTPUT_NAME, SequenceFileOutputFormat.class, IntWritable.class,
                IntWritable.class);
        MultipleOutputs.setCountersEnabled(job, true);
        // In order to obtain the edges list from the cliques list, we need only a Mapper task
        // and we save the result into the special folder.
        // Then, we need a Reducer task in order to count the initial number of nodes
        job.setMapperClass(InitializationMapperClique.class);
        job.setCombinerClass(InitializationCombinerNumNodes.class);
        job.setReducerClass(InitializationReducerNumNodes.class);
    }

    if (!job.waitForCompletion(verbose))
        return 1;

    // Set up the private variables looking to the counters value
    this.numCliques = job.getCounters().findCounter(UtilCounters.NUM_CLIQUES).getValue();
    this.numInitialNodes = job.getCounters().findCounter(UtilCounters.NUM_INITIAL_NODES).getValue();

    if (this.type == InputType.CLIQUES_LIST) {
        FileSystem fs = FileSystem.get(conf);

        // Delete the empty outputs of the Job
        FileStatus[] filesStatus = fs.listStatus(this.output);
        for (FileStatus fileStatus : filesStatus)
            if (fileStatus.getPath().getName().contains("part"))
                fs.delete(fileStatus.getPath(), false);

        // Move the real outputs into the parent folder
        filesStatus = fs.listStatus(this.output.suffix("/" + MOS_OUTPUT_NAME));
        for (FileStatus fileStatus : filesStatus)
            fs.rename(fileStatus.getPath(), this.output.suffix("/" + fileStatus.getPath().getName()));

        // Delete empty special folder
        fs.delete(this.output.suffix("/" + MOS_OUTPUT_NAME), true);
    }

    return 0;
}

From source file:Patterns.A3_Partitioning.Partition_Users_By_Country_Driver.java

/**
 * @param args the command line arguments
 *///from  ww  w  .  j  av  a  2  s  . c o  m
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Users by Country");
    job.setJarByClass(Partition_Users_By_Country_Driver.class);

    job.setMapperClass(Partition_Users_By_Country_Mapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // partitioner class inclusion
    job.setPartitionerClass(Partition_Users_By_Country_Partitioner.class);

    // set multiple formats for custom naming partitioning
    MultipleOutputs.addNamedOutput(job, "countryBins", TextOutputFormat.class, Text.class, NullWritable.class);
    MultipleOutputs.setCountersEnabled(job, true);

    // set num of reduce tasks based on partition we need (here we need 10 cos total no.of countries)
    job.setNumReduceTasks(11);
    job.setReducerClass(Partition_Users_By_Country_Reducer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:seoeun.hadoop.multipleoutputs.TestMRMultipleOutputs.java

License:Apache License

protected void _testMultipleOutputs(boolean withCounters) throws Exception {
    String input = "a\nb\nc\nd\ne\nc\nd\ne";

    //Configuration conf = createJobConf();
    Configuration conf = new Configuration();
    Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input);

    job.setJobName("mo");
    MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class, LongWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, SEQUENCE, SequenceFileOutputFormat.class, IntWritable.class,
            Text.class);

    MultipleOutputs.setCountersEnabled(job, withCounters);

    job.setMapperClass(MOMap.class);
    job.setReducerClass(MOReduce.class);

    job.waitForCompletion(true);/* w w  w .  j  a  va2  s  . c om*/

    // assert number of named output part files
    int namedOutputCount = 0;
    int valueBasedOutputCount = 0;
    FileSystem fs = OUT_DIR.getFileSystem(conf);
    FileStatus[] statuses = fs.listStatus(OUT_DIR);
    for (FileStatus status : statuses) {
        String fileName = status.getPath().getName();
        if (fileName.equals("text-m-00000") || fileName.equals("text-m-00001")
                || fileName.equals("text-r-00000") || fileName.equals("sequence_A-m-00000")
                || fileName.equals("sequence_A-m-00001") || fileName.equals("sequence_B-m-00000")
                || fileName.equals("sequence_B-m-00001") || fileName.equals("sequence_B-r-00000")
                || fileName.equals("sequence_C-r-00000")) {
            namedOutputCount++;
        } else if (fileName.equals("a-r-00000") || fileName.equals("b-r-00000") || fileName.equals("c-r-00000")
                || fileName.equals("d-r-00000") || fileName.equals("e-r-00000")) {
            valueBasedOutputCount++;
        }
    }
    //assertEquals(9, namedOutputCount);
    //assertEquals(5, valueBasedOutputCount);

    // assert TextOutputFormat files correctness
    BufferedReader reader = new BufferedReader(
            new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(job), "text-r-00000"))));
    int count = 0;
    String line = reader.readLine();
    while (line != null) {
        assertTrue(line.endsWith(TEXT));
        line = reader.readLine();
        count++;
    }
    reader.close();
    assertFalse(count == 0);

    // assert SequenceOutputFormat files correctness
    SequenceFile.Reader seqReader = new SequenceFile.Reader(fs,
            new Path(FileOutputFormat.getOutputPath(job), "sequence_B-r-00000"), conf);

    assertEquals(IntWritable.class, seqReader.getKeyClass());
    assertEquals(Text.class, seqReader.getValueClass());

    count = 0;
    IntWritable key = new IntWritable();
    Text value = new Text();
    while (seqReader.next(key, value)) {
        assertEquals(SEQUENCE, value.toString());
        count++;
    }
    seqReader.close();
    assertFalse(count == 0);

    if (withCounters) {
        CounterGroup counters = job.getCounters().getGroup(MultipleOutputs.class.getName());
        assertEquals(9, counters.size());
        assertEquals(4, counters.findCounter(TEXT).getValue());
        assertEquals(2, counters.findCounter(SEQUENCE + "_A").getValue());
        assertEquals(4, counters.findCounter(SEQUENCE + "_B").getValue());
        assertEquals(2, counters.findCounter(SEQUENCE + "_C").getValue());
        assertEquals(2, counters.findCounter("a").getValue());
        assertEquals(2, counters.findCounter("b").getValue());
        assertEquals(4, counters.findCounter("c").getValue());
        assertEquals(4, counters.findCounter("d").getValue());
        assertEquals(4, counters.findCounter("e").getValue());
    }
}