List of usage examples for org.apache.hadoop.mapreduce.lib.output MultipleOutputs addNamedOutput
@SuppressWarnings("unchecked") public static void addNamedOutput(Job job, String namedOutput, Class<? extends OutputFormat> outputFormatClass, Class<?> keyClass, Class<?> valueClass)
From source file:org.apache.rya.reasoning.mr.AbstractReasoningTool.java
License:Apache License
/** * Set up a MapReduce job to output human-readable text. *///ww w. j a v a2 s. com protected void configureTextOutput(String destination) { Path outPath; outPath = MRReasoningUtils.getOutputPath(job.getConfiguration(), destination); TextOutputFormat.setOutputPath(job, outPath); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); }
From source file:org.bgi.flexlab.gaea.tools.mapreduce.annotator.Annotator.java
License:Open Source License
public int runAnnotator(String[] arg0) throws Exception { Configuration conf = new Configuration(); String[] remainArgs = remainArgs(arg0, conf); AnnotatorOptions options = new AnnotatorOptions(); options.parse(remainArgs);/* www . jav a2 s .c o m*/ options.setHadoopConf(remainArgs, conf); System.out.println("inputFilePath: " + conf.get("inputFilePath")); BioJob job = BioJob.getInstance(conf); if (options.isCachedRef()) System.err.println("--------- isCachedRef --------"); ReferenceShare.distributeCache(options.getReferenceSequencePath(), job); job.setHeader(new Path(options.getInput()), new Path(options.getOutput())); job.setJobName("GaeaAnnotator"); job.setJarByClass(this.getClass()); job.setMapperClass(AnnotationMapper.class); job.setReducerClass(AnnotationReducer.class); job.setNumReduceTasks(options.getReducerNum()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(VcfLineWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(MNLineInputFormat.class); List<String> sampleNames = new ArrayList<>(); Path inputPath = new Path(conf.get("inputFilePath")); FileSystem fs = inputPath.getFileSystem(conf); FileStatus[] files = fs.listStatus(inputPath); for (FileStatus file : files) {//sample names System.out.println(file.getPath()); if (file.isFile()) { SingleVCFHeader singleVcfHeader = new SingleVCFHeader(); singleVcfHeader.readHeaderFrom(file.getPath(), fs); VCFHeader vcfHeader = singleVcfHeader.getHeader(); sampleNames.addAll(vcfHeader.getSampleNamesInOrder()); } } MNLineInputFormat.addInputPath(job, new Path(options.getInputFilePath())); MNLineInputFormat.setMinNumLinesToSplit(job, 1000); //???? MNLineInputFormat.setMapperNum(job, options.getMapperNum()); Path partTmp = new Path(options.getTmpPath()); FileOutputFormat.setOutputPath(job, partTmp); for (int i = 0; i < sampleNames.size(); i++)//?sample name? { System.out.println("sampleName " + i + ":" + SampleNameModifier.modify(sampleNames.get(i))); MultipleOutputs.addNamedOutput(job, SampleNameModifier.modify(sampleNames.get(i)), TextOutputFormat.class, NullWritable.class, Text.class); } if (job.waitForCompletion(true)) { for (int i = 0; i < sampleNames.size(); i++) {//?? GZIPOutputStream os = new GZIPOutputStream( new FileOutputStream(options.getOutputPath() + "/" + sampleNames.get(i) + ".tsv.gz")); final FileStatus[] parts = partTmp.getFileSystem(conf).globStatus(new Path(options.getTmpPath() + "/" + sampleNames.get(i) + "/part" + "-*-[0-9][0-9][0-9][0-9][0-9]*")); boolean writeHeader = true; for (FileStatus p : parts) { FSDataInputStream dis = p.getPath().getFileSystem(conf).open(p.getPath()); BufferedReader reader = new BufferedReader(new InputStreamReader(dis)); String line; while ((line = reader.readLine()) != null) { if (line.startsWith("#")) { if (writeHeader) { os.write(line.getBytes()); os.write('\n'); writeHeader = false; } continue; } os.write(line.getBytes()); os.write('\n'); } } os.close(); } partTmp.getFileSystem(conf).delete(partTmp, true); return 0; } else { return 1; } }
From source file:org.bgi.flexlab.gaea.tools.mapreduce.fastqqualitycontrol.FastqQualityControl.java
License:Open Source License
@Override public int run(String[] args) throws Exception { BioJob job = BioJob.getInstance();//from w w w .ja v a2 s.c o m Configuration conf = job.getConfiguration(); String[] remainArgs = remainArgs(args, conf); FastqQualityControlOptions option = new FastqQualityControlOptions(); option.parse(remainArgs); conf.setInt(FastqRecordReader.READ_NAME_TYPE, option.getReadType()); option.setHadoopConf(args, conf); job.setJobName("GaeaFastqQC"); job.setJarByClass(FastqQualityControl.class); job.setMapperClass(PairEndAggregatorMapper.class); job.setReducerClass(FastqQualityControlReducer.class); job.setInputFormatClass(FastqInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(option.getReducerNumber()); job.setOutputKeyValue(Text.class, Text.class, NullWritable.class, Text.class); FastqMultipleSample sample = null; if (option.getMultiSampleList() != null && option.getMultiSampleList() != "") { sample = new FastqMultipleSample(option.getMultiSampleList(), true); Map<String, FastqSample> sampleList = sample.getSampleList(); for (FastqSample sl : sampleList.values()) { if (sl.getFastq1() != null) { MultipleInputs.addInputPath(job, new Path(sl.getFastq1()), FastqInputFormat.class); } else { System.err.println(sl.getSampleName() + " has no fq1!"); System.exit(1); } if (sl.getFastq2() != null) { MultipleInputs.addInputPath(job, new Path(sl.getFastq2()), FastqInputFormat.class); } else { System.err.println(sl.getSampleName() + " is SE data!"); } if (sl.getAdapter1() != null) { MultipleInputs.addInputPath(job, new Path(sl.getAdapter1()), AdaptorInputFormat.class); } if (sl.getAdapter2() != null) { MultipleInputs.addInputPath(job, new Path(sl.getAdapter2()), AdaptorInputFormat.class); } } } else { if (option.getInputFastq1() != null) { MultipleInputs.addInputPath(job, new Path(option.getInputFastq1()), FastqInputFormat.class); } if (option.getInputFastq2() != null) { MultipleInputs.addInputPath(job, new Path(option.getInputFastq2()), FastqInputFormat.class); } if (option.getAdapter1() != null) { MultipleInputs.addInputPath(job, new Path(option.getAdapter1()), AdaptorInputFormat.class); } if (option.getAdapter2() != null) { MultipleInputs.addInputPath(job, new Path(option.getAdapter2()), AdaptorInputFormat.class); } } Path outputPath = new Path(option.getOutputDirectory() + "/out_fq"); FileOutputFormat.setOutputPath(job, outputPath); MultipleOutputs.addNamedOutput(job, "filterStatistic", TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, "qualFreqStatistic", TextOutputFormat.class, NullWritable.class, Text.class); if (job.waitForCompletion(true)) { FastqQualityControlReporterIO report = new FastqQualityControlReporterIO(sample, option.isMultiStatis()); report.mergeReport(outputPath, conf, new Path(option.getOutputDirectory())); return 0; } else { return 1; } }
From source file:org.bgi.flexlab.gaea.tools.mapreduce.realigner.Realigner.java
License:Open Source License
private int runRealigner(String[] args) throws IOException, ClassNotFoundException, InterruptedException { BioJob job = BioJob.getInstance();//from w ww . j a v a 2 s.co m Configuration conf = job.getConfiguration(); String[] remainArgs = remainArgs(args, conf); options = new RealignerExtendOptions(); options.parse(remainArgs); option = options.getRealignerOptions(); String jobName = "Gaea realigner and recalibrator"; if (options.isRecalibration() && !options.isRealignment()) { job.setOnlyBaseRecalibrator(true); jobName = "GaeaRecalibrator"; } else if (options.isRealignment() && !options.isRecalibration()) jobName = "GaeaRealigner"; if (option.isMultiSample()) job.setMultipleSample(); job.setJobName(jobName); option.setHadoopConf(remainArgs, conf); header = job.setHeader(new Path(option.getRealignerInput()), new Path(options.getCommonOutput())); job.setAnySamInputFormat(option.getInputFormat()); job.setOutputFormatClass(GaeaBamOutputFormat.class); job.setOutputKeyValue(WindowsBasedWritable.class, SamRecordWritable.class, NullWritable.class, SamRecordWritable.class); job.setJarByClass(Realigner.class); job.setWindowsBasicMapperClass(WindowsBasedSamRecordMapper.class, option.getWindowsSize(), option.getExtendSize()); job.setReducerClass(RealignerReducer.class); job.setNumReduceTasks(option.getReducerNumber()); FileInputFormat.setInputPaths(job, new Path(option.getRealignerInput())); FileOutputFormat.setOutputPath(job, new Path(option.getRealignerOutput())); if (options.isRecalibration()) MultipleOutputs.addNamedOutput(job, RecalibratorContextWriter.RECALIBRATOR_TABLE_TAG, TextOutputFormat.class, NullWritable.class, Text.class); if (job.waitForCompletion(true)) { if (options.isRecalibration()) return mergeReportTable(options.getBqsrOptions(), header, options.getCommonOutput() + RECALIBRATOR_REPORT_TABLE_NAME); return 0; } return 1; }
From source file:org.bgi.flexlab.gaea.tools.mapreduce.vcf.sort.VCFSort.java
License:Open Source License
private void setMultiOutputs(MultipleVCFHeader mVcfHeader, BioJob job) { // TODO Auto-generated method stub int i = 0;// ww w. j a va2 s. c om Map<Integer, String> multiOutputs = new HashMap<>(); for (int id : mVcfHeader.getFileName2ID().values()) { multiOutputs.put(id, "SortResult" + ++i); MultipleOutputs.addNamedOutput(job, multiOutputs.get(id), SortOutputFormat.class, NullWritable.class, VariantContextWritable.class); } options.setMultiOutputs(multiOutputs); }
From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java
License:Open Source License
protected boolean runDictionaryJob() throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException { boolean jobOK; Job job = null;// w ww . j a v a 2s . c o m BufferedWriter bufferedWriter; // if output path exists... if (this.dictionaryFS.exists(this.conf.getDictionaryOutputPath())) { if (this.conf.getDeleteDictionaryOutputPath()) { // ... and option provided, delete recursively this.dictionaryFS.delete(this.conf.getDictionaryOutputPath(), true); } else { // ... and option not provided, fail System.out.println("Dictionary output path does exist: " + this.conf.getDictionaryOutputPath()); System.out.println("Select other path or use option -dd to overwrite"); System.exit(-1); } } // Sample the SequenceInputFormat to do TotalSort and create final output job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName() + " phase 2"); job.setJarByClass(HDTBuilderDriver.class); System.out.println("samples = " + this.conf.getDictionarySamplesPath()); System.out.println("output = " + this.conf.getDictionaryOutputPath()); FileInputFormat.addInputPath(job, this.conf.getDictionarySamplesPath()); FileOutputFormat.setOutputPath(job, this.conf.getDictionaryOutputPath()); job.setInputFormatClass(SequenceFileInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); // Identity Mapper // job.setMapperClass(Mapper.class); job.setCombinerClass(DictionaryCombiner.class); job.setPartitionerClass(TotalOrderPartitioner.class); job.setReducerClass(DictionaryReducer.class); job.setNumReduceTasks(this.conf.getDictionaryReducers()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); System.out.println("Sampling started"); InputSampler.writePartitionFile(job, new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability())); String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration()); URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH); DistributedCache.addCacheFile(partitionUri, job.getConfiguration()); DistributedCache.createSymlink(job.getConfiguration()); System.out.println("Sampling finished"); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SHARED, SequenceFileOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SUBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.PREDICATES, SequenceFileOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.OBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); jobOK = job.waitForCompletion(true); this.numShared = job.getCounters().findCounter(Counters.Shared).getValue(); this.numSubjects = job.getCounters().findCounter(Counters.Subjects).getValue(); this.numPredicates = job.getCounters().findCounter(Counters.Predicates).getValue(); this.numObjects = job.getCounters().findCounter(Counters.Objects).getValue(); bufferedWriter = new BufferedWriter( new OutputStreamWriter(this.dictionaryFS.create(this.conf.getDictionaryCountersFile()))); bufferedWriter.write(HDTBuilderConfiguration.SHARED + "=" + this.numShared + "\n"); bufferedWriter.write(HDTBuilderConfiguration.SUBJECTS + "=" + this.numSubjects + "\n"); bufferedWriter.write(HDTBuilderConfiguration.PREDICATES + "=" + this.numPredicates + "\n"); bufferedWriter.write(HDTBuilderConfiguration.OBJECTS + "=" + this.numObjects + "\n"); bufferedWriter.close(); return jobOK; }
From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java
License:Open Source License
protected boolean runDictionaryJobWithOneJob() throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException { boolean jobOK; Job job = null;//from w w w .j a v a 2 s . c om BufferedWriter bufferedWriter; // if input path does not exists, fail if (!this.inputFS.exists(this.conf.getInputPath())) { System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath()); System.exit(-1); } // if output path exists... if (this.dictionaryFS.exists(this.conf.getDictionaryOutputPath())) { if (this.conf.getDeleteDictionaryOutputPath()) { // ... and option provided, delete recursively this.dictionaryFS.delete(this.conf.getDictionaryOutputPath(), true); } else { // ... and option not provided, fail System.out.println("Dictionary output path does exist: " + this.conf.getDictionaryOutputPath()); System.out.println("Select other path or use option -dd to overwrite"); System.exit(-1); } } // Launch job job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName()); job.setJarByClass(HDTBuilderDriver.class); FileInputFormat.addInputPath(job, this.conf.getInputPath()); FileOutputFormat.setOutputPath(job, this.conf.getDictionaryOutputPath()); job.setInputFormatClass(LzoTextInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); job.setMapperClass(DictionaryMapper.class); job.setCombinerClass(DictionaryCombiner.class); job.setReducerClass(DictionaryReducer.class); job.setNumReduceTasks(this.conf.getDictionaryReducers()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SHARED, SequenceFileOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SUBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.PREDICATES, SequenceFileOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.OBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class); jobOK = job.waitForCompletion(true); this.numShared = job.getCounters().findCounter(Counters.Shared).getValue(); this.numSubjects = job.getCounters().findCounter(Counters.Subjects).getValue(); this.numPredicates = job.getCounters().findCounter(Counters.Predicates).getValue(); this.numObjects = job.getCounters().findCounter(Counters.Objects).getValue(); bufferedWriter = new BufferedWriter( new OutputStreamWriter(this.dictionaryFS.create(this.conf.getDictionaryCountersFile()))); bufferedWriter.write(HDTBuilderConfiguration.SHARED + "=" + this.numShared + "\n"); bufferedWriter.write(HDTBuilderConfiguration.SUBJECTS + "=" + this.numSubjects + "\n"); bufferedWriter.write(HDTBuilderConfiguration.PREDICATES + "=" + this.numPredicates + "\n"); bufferedWriter.write(HDTBuilderConfiguration.OBJECTS + "=" + this.numObjects + "\n"); bufferedWriter.close(); return jobOK; }
From source file:pad.InitializationDriver.java
License:Apache License
/** * Execute the InitializationDriver Job. * //from w w w. j a v a 2s . c o m * If the input file format is adjacency list, then we can easily determinate the initial number of nodes * that is equal to the number of rows of the input file while the number of cliques is zero. * In order to obtain a list of arcs from the adjacency list, we use the \see InitializationMapperAdjacent * as Mapper and zero Reducer. * * If the input file format is cliques list, then we can easily determinate the number of cliques * that is equal to the number of rows of the input file. * In order to obtain a edges list from the cliques list, we use the \see InitializationMapperClique * as Mapper. We store this result into a special folder \see MOS_OUTPUT_NAME. * Into the regular folder, this Mapper emits all the encountered nodes. * We use \see InitializationReducerNumNodes as Reducer in order to count the initial number of nodes * counting all the distinct nodes found. The combiner (\see InitializationCombinerNumNodes) reduce locally * the number of duplicated nodes. * Obtained the value of the NUM_INITIAL_NODES counter ( \see UtilCounters ), we delete the empty files * produced by the Reducer and we move the real results into the main/regular folder. * * @param args array of external arguments, not used in this method * @return <c>1</c> if the InitializationDriver Job failed its execution; <c>0</c> if everything is ok. * @throws Exception */ public int run(String[] args) throws Exception { Configuration conf = new Configuration(); // GenericOptionsParser invocation in order to suppress the hadoop warning. new GenericOptionsParser(conf, args); Job job = new Job(conf, "InitializationDriver"); job.setJarByClass(InitializationDriver.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, this.input); FileOutputFormat.setOutputPath(job, this.output); if (this.type == InputType.ADJACENCY_LIST) { // In order to obtain the arcs list from the adjacency list, we need only a Mapper task. job.setMapperClass(InitializationMapperAdjacency.class); job.setNumReduceTasks(0); } else { // Set up the special folder. MultipleOutputs.addNamedOutput(job, MOS_OUTPUT_NAME, SequenceFileOutputFormat.class, IntWritable.class, IntWritable.class); MultipleOutputs.setCountersEnabled(job, true); // In order to obtain the edges list from the cliques list, we need only a Mapper task // and we save the result into the special folder. // Then, we need a Reducer task in order to count the initial number of nodes job.setMapperClass(InitializationMapperClique.class); job.setCombinerClass(InitializationCombinerNumNodes.class); job.setReducerClass(InitializationReducerNumNodes.class); } if (!job.waitForCompletion(verbose)) return 1; // Set up the private variables looking to the counters value this.numCliques = job.getCounters().findCounter(UtilCounters.NUM_CLIQUES).getValue(); this.numInitialNodes = job.getCounters().findCounter(UtilCounters.NUM_INITIAL_NODES).getValue(); if (this.type == InputType.CLIQUES_LIST) { FileSystem fs = FileSystem.get(conf); // Delete the empty outputs of the Job FileStatus[] filesStatus = fs.listStatus(this.output); for (FileStatus fileStatus : filesStatus) if (fileStatus.getPath().getName().contains("part")) fs.delete(fileStatus.getPath(), false); // Move the real outputs into the parent folder filesStatus = fs.listStatus(this.output.suffix("/" + MOS_OUTPUT_NAME)); for (FileStatus fileStatus : filesStatus) fs.rename(fileStatus.getPath(), this.output.suffix("/" + fileStatus.getPath().getName())); // Delete empty special folder fs.delete(this.output.suffix("/" + MOS_OUTPUT_NAME), true); } return 0; }
From source file:Patterns.A3_Partitioning.Partition_Users_By_Country_Driver.java
/** * @param args the command line arguments *///from ww w . j av a 2 s . c o m public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Users by Country"); job.setJarByClass(Partition_Users_By_Country_Driver.class); job.setMapperClass(Partition_Users_By_Country_Mapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // partitioner class inclusion job.setPartitionerClass(Partition_Users_By_Country_Partitioner.class); // set multiple formats for custom naming partitioning MultipleOutputs.addNamedOutput(job, "countryBins", TextOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.setCountersEnabled(job, true); // set num of reduce tasks based on partition we need (here we need 10 cos total no.of countries) job.setNumReduceTasks(11); job.setReducerClass(Partition_Users_By_Country_Reducer.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:seoeun.hadoop.multipleoutputs.TestMRMultipleOutputs.java
License:Apache License
protected void _testMultipleOutputs(boolean withCounters) throws Exception { String input = "a\nb\nc\nd\ne\nc\nd\ne"; //Configuration conf = createJobConf(); Configuration conf = new Configuration(); Job job = MapReduceTestUtil.createJob(conf, IN_DIR, OUT_DIR, 2, 1, input); job.setJobName("mo"); MultipleOutputs.addNamedOutput(job, TEXT, TextOutputFormat.class, LongWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, SEQUENCE, SequenceFileOutputFormat.class, IntWritable.class, Text.class); MultipleOutputs.setCountersEnabled(job, withCounters); job.setMapperClass(MOMap.class); job.setReducerClass(MOReduce.class); job.waitForCompletion(true);/* w w w . j a va2 s . c om*/ // assert number of named output part files int namedOutputCount = 0; int valueBasedOutputCount = 0; FileSystem fs = OUT_DIR.getFileSystem(conf); FileStatus[] statuses = fs.listStatus(OUT_DIR); for (FileStatus status : statuses) { String fileName = status.getPath().getName(); if (fileName.equals("text-m-00000") || fileName.equals("text-m-00001") || fileName.equals("text-r-00000") || fileName.equals("sequence_A-m-00000") || fileName.equals("sequence_A-m-00001") || fileName.equals("sequence_B-m-00000") || fileName.equals("sequence_B-m-00001") || fileName.equals("sequence_B-r-00000") || fileName.equals("sequence_C-r-00000")) { namedOutputCount++; } else if (fileName.equals("a-r-00000") || fileName.equals("b-r-00000") || fileName.equals("c-r-00000") || fileName.equals("d-r-00000") || fileName.equals("e-r-00000")) { valueBasedOutputCount++; } } //assertEquals(9, namedOutputCount); //assertEquals(5, valueBasedOutputCount); // assert TextOutputFormat files correctness BufferedReader reader = new BufferedReader( new InputStreamReader(fs.open(new Path(FileOutputFormat.getOutputPath(job), "text-r-00000")))); int count = 0; String line = reader.readLine(); while (line != null) { assertTrue(line.endsWith(TEXT)); line = reader.readLine(); count++; } reader.close(); assertFalse(count == 0); // assert SequenceOutputFormat files correctness SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(job), "sequence_B-r-00000"), conf); assertEquals(IntWritable.class, seqReader.getKeyClass()); assertEquals(Text.class, seqReader.getValueClass()); count = 0; IntWritable key = new IntWritable(); Text value = new Text(); while (seqReader.next(key, value)) { assertEquals(SEQUENCE, value.toString()); count++; } seqReader.close(); assertFalse(count == 0); if (withCounters) { CounterGroup counters = job.getCounters().getGroup(MultipleOutputs.class.getName()); assertEquals(9, counters.size()); assertEquals(4, counters.findCounter(TEXT).getValue()); assertEquals(2, counters.findCounter(SEQUENCE + "_A").getValue()); assertEquals(4, counters.findCounter(SEQUENCE + "_B").getValue()); assertEquals(2, counters.findCounter(SEQUENCE + "_C").getValue()); assertEquals(2, counters.findCounter("a").getValue()); assertEquals(2, counters.findCounter("b").getValue()); assertEquals(4, counters.findCounter("c").getValue()); assertEquals(4, counters.findCounter("d").getValue()); assertEquals(4, counters.findCounter("e").getValue()); } }