Example usage for org.apache.hadoop.mapreduce Job getJobName

List of usage examples for org.apache.hadoop.mapreduce Job getJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getJobName.

Prototype

public String getJobName() 

Source Link

Document

The user-specified job name.

Usage

From source file:mvm.rya.accumulo.pig.AccumuloStorage.java

License:Apache License

@Override
public void setLocation(String location, Job job) throws IOException {
    if (logger.isDebugEnabled()) {
        logger.debug("Set Location[" + location + "] for job[" + job.getJobName() + "]");
    }//from  w  w w  . j  a va 2s  .c om
    conf = job.getConfiguration();
    setLocationFromUri(location, job);

    if (!ConfiguratorBase.isConnectorInfoSet(AccumuloInputFormat.class, conf)) {
        try {
            AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(password.getBytes()));
        } catch (AccumuloSecurityException e) {
            throw new RuntimeException(e);
        }
        AccumuloInputFormat.setInputTableName(job, table);
        AccumuloInputFormat.setScanAuthorizations(job, authorizations);
        if (!mock) {
            AccumuloInputFormat.setZooKeeperInstance(job, inst, zookeepers);
        } else {
            AccumuloInputFormat.setMockInstance(job, inst);
        }
    }
    if (columnFamilyColumnQualifierPairs.size() > 0)
        AccumuloInputFormat.fetchColumns(job, columnFamilyColumnQualifierPairs);
    logger.info("Set ranges[" + ranges + "] for job[" + job.getJobName() + "] on table[" + table + "] "
            + "for columns[" + columnFamilyColumnQualifierPairs + "] with authorizations[" + authorizations
            + "]");

    if (ranges.size() == 0) {
        throw new IOException("Accumulo Range must be specified");
    }
    AccumuloInputFormat.setRanges(job, ranges);
}

From source file:org.apache.druid.indexer.DeterminePartitionsJob.java

License:Apache License

@Override
public boolean run() {
    try {//w  w  w  .  j  av a2s.  c  o m
        /*
         * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
         * in the final segment.
         */

        if (!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) {
            throw new ISE(
                    "DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]",
                    config.getPartitionsSpec());
        }

        final SingleDimensionPartitionsSpec partitionsSpec = (SingleDimensionPartitionsSpec) config
                .getPartitionsSpec();

        if (!partitionsSpec.isAssumeGrouped()) {
            groupByJob = Job.getInstance(new Configuration(), StringUtils.format(
                    "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()));

            JobHelper.injectSystemProperties(groupByJob);
            config.addJobProperties(groupByJob);

            groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
            groupByJob.setMapOutputKeyClass(BytesWritable.class);
            groupByJob.setMapOutputValueClass(NullWritable.class);
            groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setOutputKeyClass(BytesWritable.class);
            groupByJob.setOutputValueClass(NullWritable.class);
            groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
            JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()),
                    JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob);

            config.addInputPaths(groupByJob);
            config.intoConfiguration(groupByJob);
            FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());

            groupByJob.submit();
            log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(),
                    groupByJob.getTrackingURL());

            // Store the jobId in the file
            if (groupByJob.getJobID() != null) {
                JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), groupByJob.getJobID().toString());
            }

            try {
                if (!groupByJob.waitForCompletion(true)) {
                    log.error("Job failed: %s", groupByJob.getJobID());
                    failureCause = Utils.getFailureMessage(groupByJob, config.JSON_MAPPER);
                    return false;
                }
            } catch (IOException ioe) {
                if (!Utils.checkAppSuccessForJobIOException(ioe, groupByJob,
                        config.isUseYarnRMJobStatusFallback())) {
                    throw ioe;
                }
            }
        } else {
            log.info("Skipping group-by job.");
        }

        /*
         * Read grouped data and determine appropriate partitions.
         */
        final Job dimSelectionJob = Job.getInstance(new Configuration(), StringUtils.format(
                "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals()));

        dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");

        JobHelper.injectSystemProperties(dimSelectionJob);
        config.addJobProperties(dimSelectionJob);

        if (!partitionsSpec.isAssumeGrouped()) {
            // Read grouped data from the groupByJob.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
            dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
            FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
        } else {
            // Directly read the source data, since we assume it's already grouped.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
            config.addInputPaths(dimSelectionJob);
        }

        SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob,
                DeterminePartitionsDimSelectionPartitioner.class);
        dimSelectionJob.setMapOutputValueClass(Text.class);
        dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
        dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
        dimSelectionJob.setOutputKeyClass(BytesWritable.class);
        dimSelectionJob.setOutputValueClass(Text.class);
        dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
        dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size());
        JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()),
                JobHelper.distributedClassPath(config.makeIntermediatePath()), dimSelectionJob);

        config.intoConfiguration(dimSelectionJob);
        FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());

        dimSelectionJob.submit();
        log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(),
                dimSelectionJob.getTrackingURL());

        // Store the jobId in the file
        if (dimSelectionJob.getJobID() != null) {
            JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), dimSelectionJob.getJobID().toString());
        }

        try {
            if (!dimSelectionJob.waitForCompletion(true)) {
                log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
                failureCause = Utils.getFailureMessage(dimSelectionJob, config.JSON_MAPPER);
                return false;
            }
        } catch (IOException ioe) {
            if (!Utils.checkAppSuccessForJobIOException(ioe, dimSelectionJob,
                    config.isUseYarnRMJobStatusFallback())) {
                throw ioe;
            }
        }

        /*
         * Load partitions determined by the previous job.
         */

        log.info("Job completed, loading up partitions for intervals[%s].",
                config.getSegmentGranularIntervals());
        FileSystem fileSystem = null;
        Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>();
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
            final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
            if (fileSystem == null) {
                fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
            }
            if (Utils.exists(dimSelectionJob, fileSystem, partitionInfoPath)) {
                List<ShardSpec> specs = config.JSON_MAPPER.readValue(
                        Utils.openInputStream(dimSelectionJob, partitionInfoPath),
                        new TypeReference<List<ShardSpec>>() {
                        });

                List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
                for (int i = 0; i < specs.size(); ++i) {
                    actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
                    log.info("DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i,
                            actualSpecs.get(i));
                }

                shardSpecs.put(segmentGranularity.getStartMillis(), actualSpecs);
            } else {
                log.info("Path[%s] didn't exist!?", partitionInfoPath);
            }
        }
        config.setShardSpecs(shardSpecs);

        return true;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public static void cleanup(Job job) throws IOException {
    final Path jobDir = getJobPath(job.getJobID(), job.getWorkingDirectory());
    final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
    RuntimeException e = null;/*from ww  w  . j  a v a 2 s . c o m*/
    try {
        JobHelper.deleteWithRetry(fs, jobDir, true);
    } catch (RuntimeException ex) {
        e = ex;
    }
    try {
        JobHelper.deleteWithRetry(fs, getJobClassPathDir(job.getJobName(), job.getWorkingDirectory()), true);
    } catch (RuntimeException ex) {
        if (e == null) {
            e = ex;
        } else {
            e.addSuppressed(ex);
        }
    }
    if (e != null) {
        throw e;
    }
}

From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }/*w w w.j a  v  a  2s.  c  om*/
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
            JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
            job);

    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(),
                            Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();

        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList
                .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                        try {
                            if (!fs.exists(input)) {
                                throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]",
                                        ConvertingOutputFormat.DATA_SUCCESS_KEY,
                                        ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                            }
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                        try (final InputStream stream = fs.open(input)) {
                            return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE(
                    "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
                    segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}

From source file:org.apache.druid.indexer.Utils.java

License:Apache License

public static String getFailureMessage(Job failedJob, ObjectMapper jsonMapper) {
    try {/* w  w  w . jav  a 2  s . co  m*/
        Map<String, String> taskDiagsMap = new HashMap<>();
        TaskCompletionEvent[] completionEvents = failedJob.getTaskCompletionEvents(0, 100);
        for (TaskCompletionEvent tce : completionEvents) {
            String[] taskDiags = failedJob.getTaskDiagnostics(tce.getTaskAttemptId());
            String combinedTaskDiags = "";
            for (String taskDiag : taskDiags) {
                combinedTaskDiags += taskDiag;
            }
            taskDiagsMap.put(tce.getTaskAttemptId().toString(), combinedTaskDiags);
        }
        return jsonMapper.writeValueAsString(taskDiagsMap);
    } catch (IOException | InterruptedException ie) {
        log.error(ie, "couldn't get failure cause for job [%s]", failedJob.getJobName());
        return null;
    }
}

From source file:org.apache.gobblin.runtime.mapreduce.MRTask.java

License:Apache License

@Override
public void run() {

    try {//from   w  w w .  ja  va 2  s.c  om
        Job job = createJob();

        if (job == null) {
            log.info("No MR job created. Skipping.");
            this.workingState = WorkUnitState.WorkingState.SUCCESSFUL;
            this.eventSubmitter.submit(Events.MR_JOB_SKIPPED);
            onSkippedMRJob();
            return;
        }

        job.submit();

        log.info("MR tracking URL {} for job {}", job.getTrackingURL(), job.getJobName());

        this.eventSubmitter.submit(Events.MR_JOB_STARTED_EVENT, Events.JOB_URL, job.getTrackingURL());
        job.waitForCompletion(false);
        this.mrJob = job;

        if (job.isSuccessful()) {
            this.eventSubmitter.submit(Events.MR_JOB_SUCCESSFUL, Events.JOB_URL, job.getTrackingURL());
            this.onMRTaskComplete(true, null);
        } else {
            this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.JOB_URL, job.getTrackingURL());
            this.onMRTaskComplete(false,
                    new IOException(String.format("MR Job:%s is not successful", job.getTrackingURL())));
        }
    } catch (Throwable t) {
        log.error("Failed to run MR job.", t);
        this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.FAILURE_CONTEXT, t.getMessage());
        this.onMRTaskComplete(false, t);
    }
}

From source file:org.apache.gora.tutorial.log.LogAnalytics.java

License:Apache License

/**
 * Creates and returns the {@link Job} for submitting to Hadoop mapreduce.
 * @param inStore//w  w  w. jav  a  2  s.  c  o m
 * @param outStore
 * @param numReducer
 * @return
 * @throws IOException
 */
public Job createJob(DataStore<Long, Pageview> inStore, DataStore<String, MetricDatum> outStore, int numReducer)
        throws IOException {
    Job job = new Job(getConf());
    job.setJobName("Log Analytics");
    log.info("Creating Hadoop Job: " + job.getJobName());
    job.setNumReduceTasks(numReducer);
    job.setJarByClass(getClass());

    /* Mappers are initialized with GoraMapper.initMapper() or 
     * GoraInputFormat.setInput()*/
    GoraMapper.initMapperJob(job, inStore, TextLong.class, LongWritable.class, LogAnalyticsMapper.class, true);

    /* Reducers are initialized with GoraReducer#initReducer().
     * If the output is not to be persisted via Gora, any reducer 
     * can be used instead. */
    GoraReducer.initReducerJob(job, outStore, LogAnalyticsReducer.class);

    return job;
}

From source file:org.apache.hcatalog.mapreduce.TestMultiOutputFormat.java

License:Apache License

/**
 * A test job that reads a input file and outputs each word and the index of
 * the word encountered to a text file and sequence file with different key
 * values.// ww w. j  a v a  2  s  .com
 */
@Test
public void testMultiOutputFormatWithoutReduce() throws Throwable {
    Job job = new Job(mrConf, "MultiOutNoReduce");
    job.setMapperClass(MultiOutWordIndexMapper.class);
    job.setJarByClass(this.getClass());
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(MultiOutputFormat.class);
    job.setNumReduceTasks(0);

    JobConfigurer configurer = MultiOutputFormat.createConfigurer(job);
    configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class);
    configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class);
    Path outDir = new Path(workDir.getPath(), job.getJobName());
    FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1"));
    FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2"));

    String fileContent = "Hello World";
    String inputFile = createInputFile(fileContent);
    FileInputFormat.setInputPaths(job, new Path(inputFile));

    //Test for merging of configs
    DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs);
    String dummyFile = createInputFile("dummy file");
    DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1").getConfiguration(), fs);
    // duplicate of the value. Merging should remove duplicates
    DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2").getConfiguration(), fs);

    configurer.configure();

    // Verify if the configs are merged
    Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration());
    List<Path> fileClassPathsList = Arrays.asList(fileClassPaths);
    Assert.assertTrue(fileClassPathsList.contains(new Path(inputFile)));
    Assert.assertTrue(fileClassPathsList.contains(new Path(dummyFile)));

    URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration());
    List<URI> cacheFilesList = Arrays.asList(cacheFiles);
    Assert.assertTrue(cacheFilesList.contains(new Path(inputFile).makeQualified(fs).toUri()));
    Assert.assertTrue(cacheFilesList.contains(new Path(dummyFile).makeQualified(fs).toUri()));

    Assert.assertTrue(job.waitForCompletion(true));

    Path textOutPath = new Path(outDir, "out1/part-m-00000");
    String[] textOutput = readFully(textOutPath).split("\n");
    Path seqOutPath = new Path(outDir, "out2/part-m-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf);
    Text key = new Text();
    IntWritable value = new IntWritable();
    String[] words = fileContent.split(" ");
    Assert.assertEquals(words.length, textOutput.length);
    LOG.info("Verifying file contents");
    for (int i = 0; i < words.length; i++) {
        Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]);
        reader.next(key, value);
        Assert.assertEquals(words[i], key.toString());
        Assert.assertEquals((i + 1), value.get());
    }
    Assert.assertFalse(reader.next(key, value));
}

From source file:org.apache.hcatalog.mapreduce.TestMultiOutputFormat.java

License:Apache License

/**
 * A word count test job that reads a input file and outputs the count of
 * words to a text file and sequence file with different key values.
 *//*w w w .ja v a  2  s  .  com*/
@Test
public void testMultiOutputFormatWithReduce() throws Throwable {
    Job job = new Job(mrConf, "MultiOutWithReduce");

    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(MultiOutWordCountReducer.class);
    job.setJarByClass(this.getClass());
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(MultiOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    JobConfigurer configurer = MultiOutputFormat.createConfigurer(job);

    configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class);
    configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class);
    configurer.addOutputFormat("out3", NullOutputFormat.class, Text.class, IntWritable.class);
    Path outDir = new Path(workDir.getPath(), job.getJobName());
    FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1"));
    FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2"));

    configurer.configure();

    String fileContent = "Hello World Hello World World";
    String inputFile = createInputFile(fileContent);
    FileInputFormat.setInputPaths(job, new Path(inputFile));

    Assert.assertTrue(job.waitForCompletion(true));

    Path textOutPath = new Path(outDir, "out1/part-r-00000");
    String[] textOutput = readFully(textOutPath).split("\n");
    Path seqOutPath = new Path(outDir, "out2/part-r-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf);
    Text key = new Text();
    IntWritable value = new IntWritable();
    String[] words = "Hello World".split(" ");
    Assert.assertEquals(words.length, textOutput.length);
    for (int i = 0; i < words.length; i++) {
        Assert.assertEquals((i + 2) + "\t" + words[i], textOutput[i]);
        reader.next(key, value);
        Assert.assertEquals(words[i], key.toString());
        Assert.assertEquals((i + 2), value.get());
    }
    Assert.assertFalse(reader.next(key, value));

}

From source file:org.apache.hive.hcatalog.mapreduce.TestMultiOutputFormat.java

License:Apache License

/**
 * A test job that reads a input file and outputs each word and the index of
 * the word encountered to a text file and sequence file with different key
 * values./*from ww  w  .j a v  a  2 s  . c  o m*/
 */
@Test
public void testMultiOutputFormatWithoutReduce() throws Throwable {
    Job job = new Job(mrConf, "MultiOutNoReduce");
    job.setMapperClass(MultiOutWordIndexMapper.class);
    job.setJarByClass(this.getClass());
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(MultiOutputFormat.class);
    job.setNumReduceTasks(0);

    JobConfigurer configurer = MultiOutputFormat.createConfigurer(job);
    configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class);
    configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class);
    Path outDir = new Path(workDir.getPath(), job.getJobName());
    FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1"));
    FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2"));

    String fileContent = "Hello World";
    String inputFile = createInputFile(fileContent);
    FileInputFormat.setInputPaths(job, new Path(inputFile));

    //Test for merging of configs
    DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs);
    String dummyFile = createInputFile("dummy file");
    DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1").getConfiguration(), fs);
    // duplicate of the value. Merging should remove duplicates
    DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2").getConfiguration(), fs);

    configurer.configure();

    // Verify if the configs are merged
    Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration());
    List<Path> fileClassPathsList = Arrays.asList(fileClassPaths);
    Assert.assertTrue("Cannot find " + (new Path(inputFile)) + " in " + fileClassPathsList,
            fileClassPathsList.contains(new Path(inputFile)));
    Assert.assertTrue("Cannot find " + (new Path(dummyFile)) + " in " + fileClassPathsList,
            fileClassPathsList.contains(new Path(dummyFile)));

    URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration());
    List<URI> cacheFilesList = Arrays.asList(cacheFiles);
    URI inputFileURI = new Path(inputFile).makeQualified(fs).toUri();
    Assert.assertTrue("Cannot find " + inputFileURI + " in " + cacheFilesList,
            cacheFilesList.contains(inputFileURI));
    URI dummyFileURI = new Path(dummyFile).makeQualified(fs).toUri();
    Assert.assertTrue("Cannot find " + dummyFileURI + " in " + cacheFilesList,
            cacheFilesList.contains(dummyFileURI));

    Assert.assertTrue(job.waitForCompletion(true));

    Path textOutPath = new Path(outDir, "out1/part-m-00000");
    String[] textOutput = readFully(textOutPath).split("\n");
    Path seqOutPath = new Path(outDir, "out2/part-m-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf);
    Text key = new Text();
    IntWritable value = new IntWritable();
    String[] words = fileContent.split(" ");
    Assert.assertEquals(words.length, textOutput.length);
    LOG.info("Verifying file contents");
    for (int i = 0; i < words.length; i++) {
        Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]);
        reader.next(key, value);
        Assert.assertEquals(words[i], key.toString());
        Assert.assertEquals((i + 1), value.get());
    }
    Assert.assertFalse(reader.next(key, value));
}