Example usage for org.apache.hadoop.mapreduce Job getJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getJobName.

Prototype

public String getJobName()

Source Link

Document

The user-specified job name.

Usage

From source file:mvm.rya.accumulo.pig.AccumuloStorage.java

License:Apache License

@Override
public void setLocation(String location, Job job) throws IOException {
    if (logger.isDebugEnabled()) {
        logger.debug("Set Location[" + location + "] for job[" + job.getJobName() + "]");
    }//from  w  w w  . j  a va 2s  .c om
    conf = job.getConfiguration();
    setLocationFromUri(location, job);

    if (!ConfiguratorBase.isConnectorInfoSet(AccumuloInputFormat.class, conf)) {
        try {
            AccumuloInputFormat.setConnectorInfo(job, user, new PasswordToken(password.getBytes()));
        } catch (AccumuloSecurityException e) {
            throw new RuntimeException(e);
        }
        AccumuloInputFormat.setInputTableName(job, table);
        AccumuloInputFormat.setScanAuthorizations(job, authorizations);
        if (!mock) {
            AccumuloInputFormat.setZooKeeperInstance(job, inst, zookeepers);
        } else {
            AccumuloInputFormat.setMockInstance(job, inst);
        }
    }
    if (columnFamilyColumnQualifierPairs.size() > 0)
        AccumuloInputFormat.fetchColumns(job, columnFamilyColumnQualifierPairs);
    logger.info("Set ranges[" + ranges + "] for job[" + job.getJobName() + "] on table[" + table + "] "
            + "for columns[" + columnFamilyColumnQualifierPairs + "] with authorizations[" + authorizations
            + "]");

    if (ranges.size() == 0) {
        throw new IOException("Accumulo Range must be specified");
    }
    AccumuloInputFormat.setRanges(job, ranges);
}

From source file:org.apache.druid.indexer.DeterminePartitionsJob.java

License:Apache License

@Override
public boolean run() {
    try {//w  w  w  .  j  av a2s.  c  o m
        /*
         * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
         * in the final segment.
         */

        if (!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) {
            throw new ISE(
                    "DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]",
                    config.getPartitionsSpec());
        }

        final SingleDimensionPartitionsSpec partitionsSpec = (SingleDimensionPartitionsSpec) config
                .getPartitionsSpec();

        if (!partitionsSpec.isAssumeGrouped()) {
            groupByJob = Job.getInstance(new Configuration(), StringUtils.format(
                    "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()));

            JobHelper.injectSystemProperties(groupByJob);
            config.addJobProperties(groupByJob);

            groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
            groupByJob.setMapOutputKeyClass(BytesWritable.class);
            groupByJob.setMapOutputValueClass(NullWritable.class);
            groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setOutputKeyClass(BytesWritable.class);
            groupByJob.setOutputValueClass(NullWritable.class);
            groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
            JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()),
                    JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob);

            config.addInputPaths(groupByJob);
            config.intoConfiguration(groupByJob);
            FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());

            groupByJob.submit();
            log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(),
                    groupByJob.getTrackingURL());

            // Store the jobId in the file
            if (groupByJob.getJobID() != null) {
                JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), groupByJob.getJobID().toString());
            }

            try {
                if (!groupByJob.waitForCompletion(true)) {
                    log.error("Job failed: %s", groupByJob.getJobID());
                    failureCause = Utils.getFailureMessage(groupByJob, config.JSON_MAPPER);
                    return false;
                }
            } catch (IOException ioe) {
                if (!Utils.checkAppSuccessForJobIOException(ioe, groupByJob,
                        config.isUseYarnRMJobStatusFallback())) {
                    throw ioe;
                }
            }
        } else {
            log.info("Skipping group-by job.");
        }

        /*
         * Read grouped data and determine appropriate partitions.
         */
        final Job dimSelectionJob = Job.getInstance(new Configuration(), StringUtils.format(
                "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals()));

        dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");

        JobHelper.injectSystemProperties(dimSelectionJob);
        config.addJobProperties(dimSelectionJob);

        if (!partitionsSpec.isAssumeGrouped()) {
            // Read grouped data from the groupByJob.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
            dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
            FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
        } else {
            // Directly read the source data, since we assume it's already grouped.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
            config.addInputPaths(dimSelectionJob);
        }

        SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob,
                DeterminePartitionsDimSelectionPartitioner.class);
        dimSelectionJob.setMapOutputValueClass(Text.class);
        dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
        dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
        dimSelectionJob.setOutputKeyClass(BytesWritable.class);
        dimSelectionJob.setOutputValueClass(Text.class);
        dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
        dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size());
        JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()),
                JobHelper.distributedClassPath(config.makeIntermediatePath()), dimSelectionJob);

        config.intoConfiguration(dimSelectionJob);
        FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());

        dimSelectionJob.submit();
        log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(),
                dimSelectionJob.getTrackingURL());

        // Store the jobId in the file
        if (dimSelectionJob.getJobID() != null) {
            JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), dimSelectionJob.getJobID().toString());
        }

        try {
            if (!dimSelectionJob.waitForCompletion(true)) {
                log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
                failureCause = Utils.getFailureMessage(dimSelectionJob, config.JSON_MAPPER);
                return false;
            }
        } catch (IOException ioe) {
            if (!Utils.checkAppSuccessForJobIOException(ioe, dimSelectionJob,
                    config.isUseYarnRMJobStatusFallback())) {
                throw ioe;
            }
        }

        /*
         * Load partitions determined by the previous job.
         */

        log.info("Job completed, loading up partitions for intervals[%s].",
                config.getSegmentGranularIntervals());
        FileSystem fileSystem = null;
        Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>();
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) {
            final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
            if (fileSystem == null) {
                fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
            }
            if (Utils.exists(dimSelectionJob, fileSystem, partitionInfoPath)) {
                List<ShardSpec> specs = config.JSON_MAPPER.readValue(
                        Utils.openInputStream(dimSelectionJob, partitionInfoPath),
                        new TypeReference<List<ShardSpec>>() {
                        });

                List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
                for (int i = 0; i < specs.size(); ++i) {
                    actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
                    log.info("DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i,
                            actualSpecs.get(i));
                }

                shardSpecs.put(segmentGranularity.getStartMillis(), actualSpecs);
            } else {
                log.info("Path[%s] didn't exist!?", partitionInfoPath);
            }
        }
        config.setShardSpecs(shardSpecs);

        return true;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public static void cleanup(Job job) throws IOException {
    final Path jobDir = getJobPath(job.getJobID(), job.getWorkingDirectory());
    final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
    RuntimeException e = null;/*from ww  w  . j  a v a 2 s . c o m*/
    try {
        JobHelper.deleteWithRetry(fs, jobDir, true);
    } catch (RuntimeException ex) {
        e = ex;
    }
    try {
        JobHelper.deleteWithRetry(fs, getJobClassPathDir(job.getJobName(), job.getWorkingDirectory()), true);
    } catch (RuntimeException ex) {
        if (e == null) {
            e = ex;
        } else {
            e.addSuppressed(ex);
        }
    }
    if (e != null) {
        throw e;
    }
}

From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }/*w w w.j a  v  a  2s.  c  om*/
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
            JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
            job);

    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(),
                            Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();

        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList
                .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                        try {
                            if (!fs.exists(input)) {
                                throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]",
                                        ConvertingOutputFormat.DATA_SUCCESS_KEY,
                                        ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                            }
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                        try (final InputStream stream = fs.open(input)) {
                            return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE(
                    "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
                    segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}

From source file:org.apache.druid.indexer.Utils.java

License:Apache License

public static String getFailureMessage(Job failedJob, ObjectMapper jsonMapper) {
    try {/* w  w  w . jav  a 2  s . co  m*/
        Map<String, String> taskDiagsMap = new HashMap<>();
        TaskCompletionEvent[] completionEvents = failedJob.getTaskCompletionEvents(0, 100);
        for (TaskCompletionEvent tce : completionEvents) {
            String[] taskDiags = failedJob.getTaskDiagnostics(tce.getTaskAttemptId());
            String combinedTaskDiags = "";
            for (String taskDiag : taskDiags) {
                combinedTaskDiags += taskDiag;
            }
            taskDiagsMap.put(tce.getTaskAttemptId().toString(), combinedTaskDiags);
        }
        return jsonMapper.writeValueAsString(taskDiagsMap);
    } catch (IOException | InterruptedException ie) {
        log.error(ie, "couldn't get failure cause for job [%s]", failedJob.getJobName());
        return null;
    }
}

From source file:org.apache.gobblin.runtime.mapreduce.MRTask.java

License:Apache License

@Override
public void run() {

    try {//from   w  w w .  ja  va 2  s.c  om
        Job job = createJob();

        if (job == null) {
            log.info("No MR job created. Skipping.");
            this.workingState = WorkUnitState.WorkingState.SUCCESSFUL;
            this.eventSubmitter.submit(Events.MR_JOB_SKIPPED);
            onSkippedMRJob();
            return;
        }

        job.submit();

        log.info("MR tracking URL {} for job {}", job.getTrackingURL(), job.getJobName());

        this.eventSubmitter.submit(Events.MR_JOB_STARTED_EVENT, Events.JOB_URL, job.getTrackingURL());
        job.waitForCompletion(false);
        this.mrJob = job;

        if (job.isSuccessful()) {
            this.eventSubmitter.submit(Events.MR_JOB_SUCCESSFUL, Events.JOB_URL, job.getTrackingURL());
            this.onMRTaskComplete(true, null);
        } else {
            this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.JOB_URL, job.getTrackingURL());
            this.onMRTaskComplete(false,
                    new IOException(String.format("MR Job:%s is not successful", job.getTrackingURL())));
        }
    } catch (Throwable t) {
        log.error("Failed to run MR job.", t);
        this.eventSubmitter.submit(Events.MR_JOB_FAILED, Events.FAILURE_CONTEXT, t.getMessage());
        this.onMRTaskComplete(false, t);
    }
}

From source file:org.apache.gora.tutorial.log.LogAnalytics.java

License:Apache License

/**
 * Creates and returns the {@link Job} for submitting to Hadoop mapreduce.
 * @param inStore//w  w  w. jav  a  2  s.  c  o m
 * @param outStore
 * @param numReducer
 * @return
 * @throws IOException
 */
public Job createJob(DataStore<Long, Pageview> inStore, DataStore<String, MetricDatum> outStore, int numReducer)
        throws IOException {
    Job job = new Job(getConf());
    job.setJobName("Log Analytics");
    log.info("Creating Hadoop Job: " + job.getJobName());
    job.setNumReduceTasks(numReducer);
    job.setJarByClass(getClass());

    /* Mappers are initialized with GoraMapper.initMapper() or 
     * GoraInputFormat.setInput()*/
    GoraMapper.initMapperJob(job, inStore, TextLong.class, LongWritable.class, LogAnalyticsMapper.class, true);

    /* Reducers are initialized with GoraReducer#initReducer().
     * If the output is not to be persisted via Gora, any reducer 
     * can be used instead. */
    GoraReducer.initReducerJob(job, outStore, LogAnalyticsReducer.class);

    return job;
}

From source file:org.apache.hcatalog.mapreduce.TestMultiOutputFormat.java

License:Apache License

/**
 * A test job that reads a input file and outputs each word and the index of
 * the word encountered to a text file and sequence file with different key
 * values.// ww w. j  a v a  2  s  .com
 */
@Test
public void testMultiOutputFormatWithoutReduce() throws Throwable {
    Job job = new Job(mrConf, "MultiOutNoReduce");
    job.setMapperClass(MultiOutWordIndexMapper.class);
    job.setJarByClass(this.getClass());
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(MultiOutputFormat.class);
    job.setNumReduceTasks(0);

    JobConfigurer configurer = MultiOutputFormat.createConfigurer(job);
    configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class);
    configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class);
    Path outDir = new Path(workDir.getPath(), job.getJobName());
    FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1"));
    FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2"));

    String fileContent = "Hello World";
    String inputFile = createInputFile(fileContent);
    FileInputFormat.setInputPaths(job, new Path(inputFile));

    //Test for merging of configs
    DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs);
    String dummyFile = createInputFile("dummy file");
    DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1").getConfiguration(), fs);
    // duplicate of the value. Merging should remove duplicates
    DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2").getConfiguration(), fs);

    configurer.configure();

    // Verify if the configs are merged
    Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration());
    List<Path> fileClassPathsList = Arrays.asList(fileClassPaths);
    Assert.assertTrue(fileClassPathsList.contains(new Path(inputFile)));
    Assert.assertTrue(fileClassPathsList.contains(new Path(dummyFile)));

    URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration());
    List<URI> cacheFilesList = Arrays.asList(cacheFiles);
    Assert.assertTrue(cacheFilesList.contains(new Path(inputFile).makeQualified(fs).toUri()));
    Assert.assertTrue(cacheFilesList.contains(new Path(dummyFile).makeQualified(fs).toUri()));

    Assert.assertTrue(job.waitForCompletion(true));

    Path textOutPath = new Path(outDir, "out1/part-m-00000");
    String[] textOutput = readFully(textOutPath).split("\n");
    Path seqOutPath = new Path(outDir, "out2/part-m-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf);
    Text key = new Text();
    IntWritable value = new IntWritable();
    String[] words = fileContent.split(" ");
    Assert.assertEquals(words.length, textOutput.length);
    LOG.info("Verifying file contents");
    for (int i = 0; i < words.length; i++) {
        Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]);
        reader.next(key, value);
        Assert.assertEquals(words[i], key.toString());
        Assert.assertEquals((i + 1), value.get());
    }
    Assert.assertFalse(reader.next(key, value));
}

From source file:org.apache.hcatalog.mapreduce.TestMultiOutputFormat.java

License:Apache License

/**
 * A word count test job that reads a input file and outputs the count of
 * words to a text file and sequence file with different key values.
 *//*w w w .ja v a  2  s  .  com*/
@Test
public void testMultiOutputFormatWithReduce() throws Throwable {
    Job job = new Job(mrConf, "MultiOutWithReduce");

    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(MultiOutWordCountReducer.class);
    job.setJarByClass(this.getClass());
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(MultiOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    JobConfigurer configurer = MultiOutputFormat.createConfigurer(job);

    configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class);
    configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class);
    configurer.addOutputFormat("out3", NullOutputFormat.class, Text.class, IntWritable.class);
    Path outDir = new Path(workDir.getPath(), job.getJobName());
    FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1"));
    FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2"));

    configurer.configure();

    String fileContent = "Hello World Hello World World";
    String inputFile = createInputFile(fileContent);
    FileInputFormat.setInputPaths(job, new Path(inputFile));

    Assert.assertTrue(job.waitForCompletion(true));

    Path textOutPath = new Path(outDir, "out1/part-r-00000");
    String[] textOutput = readFully(textOutPath).split("\n");
    Path seqOutPath = new Path(outDir, "out2/part-r-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf);
    Text key = new Text();
    IntWritable value = new IntWritable();
    String[] words = "Hello World".split(" ");
    Assert.assertEquals(words.length, textOutput.length);
    for (int i = 0; i < words.length; i++) {
        Assert.assertEquals((i + 2) + "\t" + words[i], textOutput[i]);
        reader.next(key, value);
        Assert.assertEquals(words[i], key.toString());
        Assert.assertEquals((i + 2), value.get());
    }
    Assert.assertFalse(reader.next(key, value));

}

From source file:org.apache.hive.hcatalog.mapreduce.TestMultiOutputFormat.java

License:Apache License

/**
 * A test job that reads a input file and outputs each word and the index of
 * the word encountered to a text file and sequence file with different key
 * values./*from ww  w  .j a v  a  2 s  . c  o m*/
 */
@Test
public void testMultiOutputFormatWithoutReduce() throws Throwable {
    Job job = new Job(mrConf, "MultiOutNoReduce");
    job.setMapperClass(MultiOutWordIndexMapper.class);
    job.setJarByClass(this.getClass());
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(MultiOutputFormat.class);
    job.setNumReduceTasks(0);

    JobConfigurer configurer = MultiOutputFormat.createConfigurer(job);
    configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class);
    configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class);
    Path outDir = new Path(workDir.getPath(), job.getJobName());
    FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1"));
    FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2"));

    String fileContent = "Hello World";
    String inputFile = createInputFile(fileContent);
    FileInputFormat.setInputPaths(job, new Path(inputFile));

    //Test for merging of configs
    DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs);
    String dummyFile = createInputFile("dummy file");
    DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1").getConfiguration(), fs);
    // duplicate of the value. Merging should remove duplicates
    DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2").getConfiguration(), fs);

    configurer.configure();

    // Verify if the configs are merged
    Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration());
    List<Path> fileClassPathsList = Arrays.asList(fileClassPaths);
    Assert.assertTrue("Cannot find " + (new Path(inputFile)) + " in " + fileClassPathsList,
            fileClassPathsList.contains(new Path(inputFile)));
    Assert.assertTrue("Cannot find " + (new Path(dummyFile)) + " in " + fileClassPathsList,
            fileClassPathsList.contains(new Path(dummyFile)));

    URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration());
    List<URI> cacheFilesList = Arrays.asList(cacheFiles);
    URI inputFileURI = new Path(inputFile).makeQualified(fs).toUri();
    Assert.assertTrue("Cannot find " + inputFileURI + " in " + cacheFilesList,
            cacheFilesList.contains(inputFileURI));
    URI dummyFileURI = new Path(dummyFile).makeQualified(fs).toUri();
    Assert.assertTrue("Cannot find " + dummyFileURI + " in " + cacheFilesList,
            cacheFilesList.contains(dummyFileURI));

    Assert.assertTrue(job.waitForCompletion(true));

    Path textOutPath = new Path(outDir, "out1/part-m-00000");
    String[] textOutput = readFully(textOutPath).split("\n");
    Path seqOutPath = new Path(outDir, "out2/part-m-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf);
    Text key = new Text();
    IntWritable value = new IntWritable();
    String[] words = fileContent.split(" ");
    Assert.assertEquals(words.length, textOutput.length);
    LOG.info("Verifying file contents");
    for (int i = 0; i < words.length; i++) {
        Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]);
        reader.next(key, value);
        Assert.assertEquals(words[i], key.toString());
        Assert.assertEquals((i + 1), value.get());
    }
    Assert.assertFalse(reader.next(key, value));
}