List of usage examples for org.apache.hadoop.mapreduce Job getJobName
public String getJobName()
From source file:mvm.rya.accumulo.pig.AccumuloStorage.java
License:Apache License
@Override public void setLocation(String location, Job job) throws IOException { if (logger.isDebugEnabled()) { logger.debug("Set Location[" + location + "] for job[" + job.getJobName() + "]"); }//from w w w . j a va 2s .c om conf = job.getConfiguration(); setLocationFromUri(location, job); if (!ConfiguratorBase.isConnectorInfoSet(AccumuloInputFormat.class, conf)) { try { AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(password.getBytes())); } catch (AccumuloSecurityException e) { throw new RuntimeException(e); } AccumuloInputFormat.setInputTableName(job, table); AccumuloInputFormat.setScanAuthorizations(job, authorizations); if (!mock) { AccumuloInputFormat.setZooKeeperInstance(job, inst, zookeepers); } else { AccumuloInputFormat.setMockInstance(job, inst); } } if (columnFamilyColumnQualifierPairs.size() > 0) AccumuloInputFormat.fetchColumns(job, columnFamilyColumnQualifierPairs); logger.info("Set ranges[" + ranges + "] for job[" + job.getJobName() + "] on table[" + table + "] " + "for columns[" + columnFamilyColumnQualifierPairs + "] with authorizations[" + authorizations + "]"); if (ranges.size() == 0) { throw new IOException("Accumulo Range must be specified"); } AccumuloInputFormat.setRanges(job, ranges); }
From source file:org.apache.druid.indexer.DeterminePartitionsJob.java
License:Apache License
@Override public boolean run() { try {//w w w . j av a2s. c o m /* * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear * in the final segment. */ if (!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) { throw new ISE( "DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]", config.getPartitionsSpec()); } final SingleDimensionPartitionsSpec partitionsSpec = (SingleDimensionPartitionsSpec) config .getPartitionsSpec(); if (!partitionsSpec.isAssumeGrouped()) { groupByJob = Job.getInstance(new Configuration(), StringUtils.format( "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals())); JobHelper.injectSystemProperties(groupByJob); config.addJobProperties(groupByJob); groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class); groupByJob.setMapOutputKeyClass(BytesWritable.class); groupByJob.setMapOutputValueClass(NullWritable.class); groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setOutputKeyClass(BytesWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob); config.addInputPaths(groupByJob); config.intoConfiguration(groupByJob); FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); groupByJob.submit(); log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); // Store the jobId in the file if (groupByJob.getJobID() != null) { JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), groupByJob.getJobID().toString()); } try { if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); failureCause = Utils.getFailureMessage(groupByJob, config.JSON_MAPPER); return false; } } catch (IOException ioe) { if (!Utils.checkAppSuccessForJobIOException(ioe, groupByJob, config.isUseYarnRMJobStatusFallback())) { throw ioe; } } } else { log.info("Skipping group-by job."); } /* * Read grouped data and determine appropriate partitions. */ final Job dimSelectionJob = Job.getInstance(new Configuration(), StringUtils.format( "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals())); dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19"); JobHelper.injectSystemProperties(dimSelectionJob); config.addJobProperties(dimSelectionJob); if (!partitionsSpec.isAssumeGrouped()) { // Read grouped data from the groupByJob. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class); dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir()); } else { // Directly read the source data, since we assume it's already grouped. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class); config.addInputPaths(dimSelectionJob); } SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob, DeterminePartitionsDimSelectionPartitioner.class); dimSelectionJob.setMapOutputValueClass(Text.class); dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class); dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class); dimSelectionJob.setOutputKeyClass(BytesWritable.class); dimSelectionJob.setOutputValueClass(Text.class); dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size()); JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), dimSelectionJob); config.intoConfiguration(dimSelectionJob); FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath()); dimSelectionJob.submit(); log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL()); // Store the jobId in the file if (dimSelectionJob.getJobID() != null) { JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), dimSelectionJob.getJobID().toString()); } try { if (!dimSelectionJob.waitForCompletion(true)) { log.error("Job failed: %s", dimSelectionJob.getJobID().toString()); failureCause = Utils.getFailureMessage(dimSelectionJob, config.JSON_MAPPER); return false; } } catch (IOException ioe) { if (!Utils.checkAppSuccessForJobIOException(ioe, dimSelectionJob, config.isUseYarnRMJobStatusFallback())) { throw ioe; } } /* * Load partitions determined by the previous job. */ log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); FileSystem fileSystem = null; Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>(); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) { final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity); if (fileSystem == null) { fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration()); } if (Utils.exists(dimSelectionJob, fileSystem, partitionInfoPath)) { List<ShardSpec> specs = config.JSON_MAPPER.readValue( Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>() { }); List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size()); for (int i = 0; i < specs.size(); ++i) { actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++)); log.info("DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i, actualSpecs.get(i)); } shardSpecs.put(segmentGranularity.getStartMillis(), actualSpecs); } else { log.info("Path[%s] didn't exist!?", partitionInfoPath); } } config.setShardSpecs(shardSpecs); return true; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java
License:Apache License
public static void cleanup(Job job) throws IOException { final Path jobDir = getJobPath(job.getJobID(), job.getWorkingDirectory()); final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); RuntimeException e = null;/*from ww w . j a v a 2 s . c o m*/ try { JobHelper.deleteWithRetry(fs, jobDir, true); } catch (RuntimeException ex) { e = ex; } try { JobHelper.deleteWithRetry(fs, getJobClassPathDir(job.getJobName(), job.getWorkingDirectory()), true); } catch (RuntimeException ex) { if (e == null) { e = ex; } else { e.addSuppressed(ex); } } if (e != null) { throw e; } }
From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java
License:Apache License
public List<DataSegment> run() throws IOException { final JobConf jobConf = new JobConf(); jobConf.setKeepFailedTaskFiles(false); for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) { jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()"); }/*w w w.j a v a 2s. c om*/ final List<DataSegment> segments = converterConfig.getSegments(); if (segments.isEmpty()) { throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource()); } converterConfigIntoConfiguration(converterConfig, segments, jobConf); jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache())); setJobName(jobConf, segments); if (converterConfig.getJobPriority() != null) { jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority())); } final Job job = Job.getInstance(jobConf); job.setInputFormatClass(ConfigInputFormat.class); job.setMapperClass(ConvertingMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setMapSpeculativeExecution(false); job.setOutputFormatClass(ConvertingOutputFormat.class); JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())), job); Throwable throwable = null; try { job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); final boolean success = job.waitForCompletion(true); if (!success) { final TaskReport[] reports = job.getTaskReports(TaskType.MAP); if (reports != null) { for (final TaskReport report : reports) { log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics())); } } return null; } try { loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue(); writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue(); } catch (IOException ex) { log.error(ex, "Could not fetch counters"); } final JobID jobID = job.getJobID(); final Path jobDir = getJobPath(jobID, job.getWorkingDirectory()); final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true); final List<Path> goodPaths = new ArrayList<>(); while (it.hasNext()) { final LocatedFileStatus locatedFileStatus = it.next(); if (locatedFileStatus.isFile()) { final Path myPath = locatedFileStatus.getPath(); if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) { goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY)); } } } if (goodPaths.isEmpty()) { log.warn("No good data found at [%s]", jobDir); return null; } final List<DataSegment> returnList = ImmutableList .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() { @Nullable @Override public DataSegment apply(final Path input) { try { if (!fs.exists(input)) { throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]", ConvertingOutputFormat.DATA_SUCCESS_KEY, ConvertingOutputFormat.DATA_FILE_KEY, jobDir); } } catch (final IOException e) { throw Throwables.propagate(e); } try (final InputStream stream = fs.open(input)) { return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class); } catch (final IOException e) { throw Throwables.propagate(e); } } })); if (returnList.size() == segments.size()) { return returnList; } else { throw new ISE( "Tasks reported success but result length did not match! Expected %d found %d at path [%s]", segments.size(), returnList.size(), jobDir); } } catch (InterruptedException | ClassNotFoundException e) { RuntimeException exception = Throwables.propagate(e); throwable = exception; throw exception; } catch (Throwable t) { throwable = t; throw t; } finally { try { cleanup(job); } catch (IOException e) { if (throwable != null) { throwable.addSuppressed(e); } else { log.error(e, "Could not clean up job [%s]", job.getJobID()); } } } }
From source file:org.apache.druid.indexer.Utils.java
License:Apache License
public static String getFailureMessage(Job failedJob, ObjectMapper jsonMapper) { try {/* w w w . jav a 2 s . co m*/ Map<String, String> taskDiagsMap = new HashMap<>(); TaskCompletionEvent[] completionEvents = failedJob.getTaskCompletionEvents(0, 100); for (TaskCompletionEvent tce : completionEvents) { String[] taskDiags = failedJob.getTaskDiagnostics(tce.getTaskAttemptId()); String combinedTaskDiags = ""; for (String taskDiag : taskDiags) { combinedTaskDiags += taskDiag; } taskDiagsMap.put(tce.getTaskAttemptId().toString(), combinedTaskDiags); } return jsonMapper.writeValueAsString(taskDiagsMap); } catch (IOException | InterruptedException ie) { log.error(ie, "couldn't get failure cause for job [%s]", failedJob.getJobName()); return null; } }
From source file:org.apache.gobblin.runtime.mapreduce.MRTask.java
License:Apache License
@Override public void run() { try {//from w w w . ja va 2 s.c om Job job = createJob(); if (job == null) { log.info("No MR job created. Skipping."); this.workingState = WorkUnitState.WorkingState.SUCCESSFUL; this.eventSubmitter.submit(Events.MR_JOB_SKIPPED); onSkippedMRJob(); return; } job.submit(); log.info("MR tracking URL {} for job {}", job.getTrackingURL(), job.getJobName()); this.eventSubmitter.submit(Events.MR_JOB_STARTED_EVENT, Events.JOB_URL, job.getTrackingURL()); job.waitForCompletion(false); this.mrJob = job; if (job.isSuccessful()) { this.eventSubmitter.submit(Events.MR_JOB_SUCCESSFUL, Events.JOB_URL, job.getTrackingURL()); this.onMRTaskComplete(true, null); } else { this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.JOB_URL, job.getTrackingURL()); this.onMRTaskComplete(false, new IOException(String.format("MR Job:%s is not successful", job.getTrackingURL()))); } } catch (Throwable t) { log.error("Failed to run MR job.", t); this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.FAILURE_CONTEXT, t.getMessage()); this.onMRTaskComplete(false, t); } }
From source file:org.apache.gora.tutorial.log.LogAnalytics.java
License:Apache License
/** * Creates and returns the {@link Job} for submitting to Hadoop mapreduce. * @param inStore//w w w. jav a 2 s. c o m * @param outStore * @param numReducer * @return * @throws IOException */ public Job createJob(DataStore<Long, Pageview> inStore, DataStore<String, MetricDatum> outStore, int numReducer) throws IOException { Job job = new Job(getConf()); job.setJobName("Log Analytics"); log.info("Creating Hadoop Job: " + job.getJobName()); job.setNumReduceTasks(numReducer); job.setJarByClass(getClass()); /* Mappers are initialized with GoraMapper.initMapper() or * GoraInputFormat.setInput()*/ GoraMapper.initMapperJob(job, inStore, TextLong.class, LongWritable.class, LogAnalyticsMapper.class, true); /* Reducers are initialized with GoraReducer#initReducer(). * If the output is not to be persisted via Gora, any reducer * can be used instead. */ GoraReducer.initReducerJob(job, outStore, LogAnalyticsReducer.class); return job; }
From source file:org.apache.hcatalog.mapreduce.TestMultiOutputFormat.java
License:Apache License
/** * A test job that reads a input file and outputs each word and the index of * the word encountered to a text file and sequence file with different key * values.// ww w. j a v a 2 s .com */ @Test public void testMultiOutputFormatWithoutReduce() throws Throwable { Job job = new Job(mrConf, "MultiOutNoReduce"); job.setMapperClass(MultiOutWordIndexMapper.class); job.setJarByClass(this.getClass()); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(MultiOutputFormat.class); job.setNumReduceTasks(0); JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class); Path outDir = new Path(workDir.getPath(), job.getJobName()); FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); String fileContent = "Hello World"; String inputFile = createInputFile(fileContent); FileInputFormat.setInputPaths(job, new Path(inputFile)); //Test for merging of configs DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs); String dummyFile = createInputFile("dummy file"); DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1").getConfiguration(), fs); // duplicate of the value. Merging should remove duplicates DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2").getConfiguration(), fs); configurer.configure(); // Verify if the configs are merged Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration()); List<Path> fileClassPathsList = Arrays.asList(fileClassPaths); Assert.assertTrue(fileClassPathsList.contains(new Path(inputFile))); Assert.assertTrue(fileClassPathsList.contains(new Path(dummyFile))); URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration()); List<URI> cacheFilesList = Arrays.asList(cacheFiles); Assert.assertTrue(cacheFilesList.contains(new Path(inputFile).makeQualified(fs).toUri())); Assert.assertTrue(cacheFilesList.contains(new Path(dummyFile).makeQualified(fs).toUri())); Assert.assertTrue(job.waitForCompletion(true)); Path textOutPath = new Path(outDir, "out1/part-m-00000"); String[] textOutput = readFully(textOutPath).split("\n"); Path seqOutPath = new Path(outDir, "out2/part-m-00000"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); Text key = new Text(); IntWritable value = new IntWritable(); String[] words = fileContent.split(" "); Assert.assertEquals(words.length, textOutput.length); LOG.info("Verifying file contents"); for (int i = 0; i < words.length; i++) { Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]); reader.next(key, value); Assert.assertEquals(words[i], key.toString()); Assert.assertEquals((i + 1), value.get()); } Assert.assertFalse(reader.next(key, value)); }
From source file:org.apache.hcatalog.mapreduce.TestMultiOutputFormat.java
License:Apache License
/** * A word count test job that reads a input file and outputs the count of * words to a text file and sequence file with different key values. *//*w w w .ja v a 2 s . com*/ @Test public void testMultiOutputFormatWithReduce() throws Throwable { Job job = new Job(mrConf, "MultiOutWithReduce"); job.setMapperClass(WordCountMapper.class); job.setReducerClass(MultiOutWordCountReducer.class); job.setJarByClass(this.getClass()); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(MultiOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class); configurer.addOutputFormat("out3", NullOutputFormat.class, Text.class, IntWritable.class); Path outDir = new Path(workDir.getPath(), job.getJobName()); FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); configurer.configure(); String fileContent = "Hello World Hello World World"; String inputFile = createInputFile(fileContent); FileInputFormat.setInputPaths(job, new Path(inputFile)); Assert.assertTrue(job.waitForCompletion(true)); Path textOutPath = new Path(outDir, "out1/part-r-00000"); String[] textOutput = readFully(textOutPath).split("\n"); Path seqOutPath = new Path(outDir, "out2/part-r-00000"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); Text key = new Text(); IntWritable value = new IntWritable(); String[] words = "Hello World".split(" "); Assert.assertEquals(words.length, textOutput.length); for (int i = 0; i < words.length; i++) { Assert.assertEquals((i + 2) + "\t" + words[i], textOutput[i]); reader.next(key, value); Assert.assertEquals(words[i], key.toString()); Assert.assertEquals((i + 2), value.get()); } Assert.assertFalse(reader.next(key, value)); }
From source file:org.apache.hive.hcatalog.mapreduce.TestMultiOutputFormat.java
License:Apache License
/** * A test job that reads a input file and outputs each word and the index of * the word encountered to a text file and sequence file with different key * values./*from ww w .j a v a 2 s . c o m*/ */ @Test public void testMultiOutputFormatWithoutReduce() throws Throwable { Job job = new Job(mrConf, "MultiOutNoReduce"); job.setMapperClass(MultiOutWordIndexMapper.class); job.setJarByClass(this.getClass()); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(MultiOutputFormat.class); job.setNumReduceTasks(0); JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class); Path outDir = new Path(workDir.getPath(), job.getJobName()); FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); String fileContent = "Hello World"; String inputFile = createInputFile(fileContent); FileInputFormat.setInputPaths(job, new Path(inputFile)); //Test for merging of configs DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs); String dummyFile = createInputFile("dummy file"); DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1").getConfiguration(), fs); // duplicate of the value. Merging should remove duplicates DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2").getConfiguration(), fs); configurer.configure(); // Verify if the configs are merged Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration()); List<Path> fileClassPathsList = Arrays.asList(fileClassPaths); Assert.assertTrue("Cannot find " + (new Path(inputFile)) + " in " + fileClassPathsList, fileClassPathsList.contains(new Path(inputFile))); Assert.assertTrue("Cannot find " + (new Path(dummyFile)) + " in " + fileClassPathsList, fileClassPathsList.contains(new Path(dummyFile))); URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration()); List<URI> cacheFilesList = Arrays.asList(cacheFiles); URI inputFileURI = new Path(inputFile).makeQualified(fs).toUri(); Assert.assertTrue("Cannot find " + inputFileURI + " in " + cacheFilesList, cacheFilesList.contains(inputFileURI)); URI dummyFileURI = new Path(dummyFile).makeQualified(fs).toUri(); Assert.assertTrue("Cannot find " + dummyFileURI + " in " + cacheFilesList, cacheFilesList.contains(dummyFileURI)); Assert.assertTrue(job.waitForCompletion(true)); Path textOutPath = new Path(outDir, "out1/part-m-00000"); String[] textOutput = readFully(textOutPath).split("\n"); Path seqOutPath = new Path(outDir, "out2/part-m-00000"); SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); Text key = new Text(); IntWritable value = new IntWritable(); String[] words = fileContent.split(" "); Assert.assertEquals(words.length, textOutput.length); LOG.info("Verifying file contents"); for (int i = 0; i < words.length; i++) { Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]); reader.next(key, value); Assert.assertEquals(words[i], key.toString()); Assert.assertEquals((i + 1), value.get()); } Assert.assertFalse(reader.next(key, value)); }