Example usage for org.apache.hadoop.mapreduce Job getOutputFormatClass

List of usage examples for org.apache.hadoop.mapreduce Job getOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getOutputFormatClass.

Prototype

@SuppressWarnings("unchecked")
public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException 

Source Link

Document

Get the OutputFormat class for the job.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

/**
 * Creates a jar that contains everything that are needed for running the MapReduce program by Hadoop.
 *
 * @return a new {@link File} containing the job jar
 *///from   w ww  . ja v  a  2s.  c  o  m
private File buildJobJar(Job job, File tempDir) throws IOException, URISyntaxException {
    File jobJar = new File(tempDir, "job.jar");
    LOG.debug("Creating Job jar: {}", jobJar);

    // For local mode, nothing is needed in the job jar since we use the classloader in the configuration object.
    if (MapReduceTaskContextProvider.isLocal(job.getConfiguration())) {
        JarOutputStream output = new JarOutputStream(new FileOutputStream(jobJar));
        output.close();
        return jobJar;
    }

    // Excludes libraries that are for sure not needed.
    // Hadoop - Available from the cluster
    // Spark - MR never uses Spark
    final HadoopClassExcluder hadoopClassExcluder = new HadoopClassExcluder();
    ApplicationBundler appBundler = new ApplicationBundler(new ClassAcceptor() {
        @Override
        public boolean accept(String className, URL classUrl, URL classPathUrl) {
            if (className.startsWith("org.apache.spark")
                    || classPathUrl.toString().contains("spark-assembly")) {
                return false;
            }
            return hadoopClassExcluder.accept(className, classUrl, classPathUrl);
        }
    });
    Set<Class<?>> classes = Sets.newHashSet();
    classes.add(MapReduce.class);
    classes.add(MapperWrapper.class);
    classes.add(ReducerWrapper.class);

    // We only need to trace the Input/OutputFormat class due to MAPREDUCE-5957 so that those classes are included
    // in the job.jar and be available in the MR system classpath before our job classloader (ApplicationClassLoader)
    // take over the classloading.
    if (cConf.getBoolean(Constants.AppFabric.MAPREDUCE_INCLUDE_CUSTOM_CLASSES)) {
        try {
            Class<? extends InputFormat<?, ?>> inputFormatClass = job.getInputFormatClass();
            LOG.info("InputFormat class: {} {}", inputFormatClass, inputFormatClass.getClassLoader());
            classes.add(inputFormatClass);

            // If it is StreamInputFormat, also add the StreamEventCodec class as well.
            if (StreamInputFormat.class.isAssignableFrom(inputFormatClass)) {
                Class<? extends StreamEventDecoder> decoderType = StreamInputFormat
                        .getDecoderClass(job.getConfiguration());
                if (decoderType != null) {
                    classes.add(decoderType);
                }
            }
        } catch (Throwable t) {
            LOG.info("InputFormat class not found: {}", t.getMessage(), t);
            // Ignore
        }
        try {
            Class<? extends OutputFormat<?, ?>> outputFormatClass = job.getOutputFormatClass();
            LOG.info("OutputFormat class: {} {}", outputFormatClass, outputFormatClass.getClassLoader());
            classes.add(outputFormatClass);
        } catch (Throwable t) {
            LOG.info("OutputFormat class not found: {}", t.getMessage(), t);
            // Ignore
        }
    }
    // End of MAPREDUCE-5957.

    try {
        Class<?> hbaseTableUtilClass = HBaseTableUtilFactory.getHBaseTableUtilClass();
        classes.add(hbaseTableUtilClass);
    } catch (ProvisionException e) {
        LOG.warn("Not including HBaseTableUtil classes in submitted Job Jar since they are not available");
    }

    ClassLoader oldCLassLoader = ClassLoaders.setContextClassLoader(job.getConfiguration().getClassLoader());
    appBundler.createBundle(Locations.toLocation(jobJar), classes);
    ClassLoaders.setContextClassLoader(oldCLassLoader);

    LOG.info("Built MapReduce Job Jar at {}", jobJar.toURI());
    return jobJar;
}

From source file:co.nubetech.hiho.dedup.DedupJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    populateConfiguration(args);//ww w  .ja  v a 2s.co m
    try {
        checkMandatoryConfs();
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }
    Job job = new Job(conf);
    job.setJobName("Dedup job");
    job.setJarByClass(DedupJob.class);

    Class inputFormatClass = Class.forName(inputFormat);
    Class outputFormatClass = Class.forName(outputFormat);
    Class inputKeyClass = Class.forName(inputKeyClassName);
    Class inputValueClass = Class.forName(inputValueClassName);

    if (dedupBy.equals("key")) {
        job.setMapperClass(DedupKeyMapper.class);
        job.setReducerClass(DedupKeyReducer.class);
        job.setMapOutputValueClass(inputValueClass);
    } else if (dedupBy.equals("value")) {
        job.setMapperClass(DedupValueMapper.class);
        job.setReducerClass(DedupValueReducer.class);
        job.setMapOutputValueClass(inputKeyClass);
    }

    job.setInputFormatClass(inputFormatClass);
    if (inputFormat.equals("co.nubetech.hiho.dedup.DelimitedTextInputFormat")) {
        DelimitedTextInputFormat.setProperties(job, delimiter, column);
    }

    job.setMapOutputKeyClass(HihoTuple.class);

    job.setOutputKeyClass(inputKeyClass);
    job.setOutputValueClass(inputValueClass);
    job.setPartitionerClass(HihoHashPartitioner.class);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    try {
        logger.debug("Output format class is " + job.getOutputFormatClass());
        logger.debug("Class is " + ReflectionUtils
                .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
        job.waitForCompletion(false);
        if (job.isComplete()) {
            Counters counters = job.getCounters();
            totalRecordsRead = counters.findCounter(DedupRecordCounter.TOTAL_RECORDS_READ).getValue();
            badRecords = counters.findCounter(DedupRecordCounter.BAD_RECORD).getValue();
            output = counters.findCounter(DedupRecordCounter.OUTPUT).getValue();
            duplicateRecords = totalRecordsRead - output;
            logger.info("Total records read are: " + totalRecordsRead);
            logger.info("Bad Records are: " + badRecords);
            logger.info("Output records are: " + output);
            logger.info("Duplicate records are: " + duplicateRecords);
        }

    } catch (Exception e) {
        e.printStackTrace();
    }
    return 0;
}

From source file:co.nubetech.hiho.job.DBQueryInputJob.java

License:Apache License

public void runJobs(Configuration conf, int jobCounter) throws IOException {

    try {//from   www.  j  a va2s. com
        checkMandatoryConfs(conf);
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new IOException(e1);
    }

    Job job = new Job(conf);
    for (Entry<String, String> entry : conf) {
        logger.warn("key, value " + entry.getKey() + "=" + entry.getValue());
    }

    // logger.debug("Number of maps " +
    // conf.getInt("mapred.map.tasks", 1));
    // conf.setInt(JobContext.NUM_MAPS,
    // conf.getInt("mapreduce.job.maps", 1));
    // job.getConfiguration().setInt("mapred.map.tasks", 4);
    job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1));
    logger.warn("Number of maps " + conf.getInt(MRJobConfig.NUM_MAPS, 1));

    job.setJobName("Import job");
    job.setJarByClass(DBQueryInputJob.class);

    String strategy = conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY);
    OutputStrategyEnum os = OutputStrategyEnum.value(strategy);
    if (os == null) {
        throw new IllegalArgumentException("Wrong value of output strategy. Please correct");
    }
    if (os != OutputStrategyEnum.AVRO) {
        switch (os) {

        case DUMP: {
            // job.setMapperClass(DBImportMapper.class);
            break;
        }
        /*
         * case AVRO: { job.setMapperClass(DBInputAvroMapper.class); //
         * need avro in cp // job.setJarByClass(Schema.class); // need
         * jackson which is needed by avro - ugly! //
         * job.setJarByClass(ObjectMapper.class);
         * job.setMapOutputKeyClass(NullWritable.class);
         * job.setMapOutputValueClass(AvroValue.class);
         * job.setOutputKeyClass(NullWritable.class);
         * job.setOutputValueClass(AvroValue.class);
         * job.setOutputFormatClass(AvroOutputFormat.class);
         * 
         * AvroOutputFormat.setOutputPath(job, new
         * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); break; }
         */
        case DELIMITED: {
            job.setMapperClass(DBInputDelimMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

            NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
        }
        case JSON: {
            // job.setMapperClass(DBImportJsonMapper.class);
            // job.setJarByClass(ObjectMapper.class);
            break;
        }
        default: {
            job.setMapperClass(DBInputDelimMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class);

            NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
            break;
        }
        }

        String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
        String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY);
        logger.debug("About to set the params");
        DBQueryInputFormat.setInput(job, inputQuery, inputBoundingQuery, params);
        logger.debug("Set the params");

        job.setNumReduceTasks(0);

        try {
            // job.setJarByClass(Class.forName(conf.get(
            // org.apache.hadoop.mapred.lib.db.DBConfiguration.DRIVER_CLASS_PROPERTY)));
            logger.debug("OUTPUT format class is " + job.getOutputFormatClass());

            /*
             * org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
             * ReflectionUtils.newInstance(job.getOutputFormatClass(),
             * job.getConfiguration()); output.checkOutputSpecs(job);
             */
            logger.debug("Class is " + ReflectionUtils
                    .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
            job.waitForCompletion(false);
            if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null) {
                generateHiveScript(conf, job, jobCounter);
                generatePigScript(conf, job);
            }

        }
        /*
         * catch (HIHOException h) { h.printStackTrace(); }
         */
        catch (Exception e) {
            e.printStackTrace();
        } catch (HIHOException e) {
            e.printStackTrace();
        }
    }
    // avro to be handled differently, thanks to all the incompatibilities
    // in the apis.
    else {
        String inputQuery = conf.get(DBConfiguration.INPUT_QUERY);
        String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY);
        logger.debug("About to set the params");
        // co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(job,
        // inputQuery, inputBoundingQuery, params);
        logger.debug("Set the params");

        JobConf jobConf = new JobConf(conf);

        try {
            GenericDBWritable queryWritable = getDBWritable(jobConf);
            Schema pair = DBMapper.getPairSchema(queryWritable.getColumns());

            AvroJob.setMapOutputSchema(jobConf, pair);
            GenericRecordAvroOutputFormat.setOutputPath(jobConf,
                    new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));

            co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(jobConf, inputQuery,
                    inputBoundingQuery, params);
            jobConf.setInputFormat(co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.class);
            jobConf.setMapperClass(DBInputAvroMapper.class);
            jobConf.setMapOutputKeyClass(NullWritable.class);
            jobConf.setMapOutputValueClass(AvroValue.class);
            jobConf.setOutputKeyClass(NullWritable.class);
            jobConf.setOutputValueClass(Text.class);
            jobConf.setOutputFormat(GenericRecordAvroOutputFormat.class);
            jobConf.setJarByClass(DBQueryInputJob.class);
            jobConf.setStrings("io.serializations",
                    "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization,org.apache.avro.mapred.AvroSerialization");
            jobConf.setNumReduceTasks(0);
            /*
             * jobConf.setOutputFormat(org.apache.hadoop.mapred.
             * SequenceFileOutputFormat.class);
             * org.apache.hadoop.mapred.SequenceFileOutputFormat
             * .setOutputPath(jobConf, new
             * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH)));
             */
            JobClient.runJob(jobConf);
        } catch (Throwable e) {
            e.printStackTrace();
        }

    }

}

From source file:co.nubetech.hiho.merge.MergeJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    populateConfiguration(args);/*from  w w  w .  j  a  v a2 s  .  c  om*/
    try {
        checkMandatoryConfs();
    } catch (HIHOException e1) {
        e1.printStackTrace();
        throw new Exception(e1);
    }

    Class inputFormatClass = Class.forName(inputFormat);
    Class outputFormatClass = Class.forName(outputFormat);
    Class inputKeyClass = Class.forName(inputKeyClassName);
    Class inputValueClass = Class.forName(inputValueClassName);

    Configuration conf = getConf();
    conf.set(HIHOConf.MERGE_OLD_PATH, oldPath);
    conf.set(HIHOConf.MERGE_NEW_PATH, newPath);

    Job job = new Job(conf);
    job.setJobName("Merge job");
    job.setJarByClass(MergeJob.class);

    if (mergeBy.equals("key")) {
        job.setMapperClass(MergeKeyMapper.class);
        job.setReducerClass(MergeKeyReducer.class);

    } else if (mergeBy.equals("value")) {
        job.setMapperClass(MergeValueMapper.class);
        job.setReducerClass(MergeValueReducer.class);
    }

    job.setInputFormatClass(inputFormatClass);
    DelimitedTextInputFormat.setProperties(job, delimiter, column);
    job.setMapOutputKeyClass(HihoTuple.class);
    job.setMapOutputValueClass(HihoValue.class);

    job.setOutputKeyClass(inputKeyClass);
    job.setOutputValueClass(inputValueClass);
    FileInputFormat.setInputPaths(job, oldPath + "," + newPath);
    job.setOutputFormatClass(outputFormatClass);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    try {
        logger.debug("Output format class is " + job.getOutputFormatClass());
        logger.debug("Class is " + ReflectionUtils
                .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName());
        job.waitForCompletion(false);
        if (job.isComplete()) {
            Counters counters = job.getCounters();
            totalRecordsOld = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_OLD).getValue();
            totalRecordsNew = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_NEW).getValue();
            badRecords = counters.findCounter(MergeRecordCounter.BAD_RECORD).getValue();
            output = counters.findCounter(MergeRecordCounter.OUTPUT).getValue();
            logger.info("Total old records read are: " + totalRecordsOld);
            logger.info("Total new records read are: " + totalRecordsNew);
            logger.info("Bad Records are: " + badRecords);
            logger.info("Output records are: " + output);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return 0;
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

private void runJob(Job job) throws ClassNotFoundException, IOException, InterruptedException {
    assert job.getJobID() != null;
    TaskID taskId = newMapTaskId(job.getJobID(), 0);
    Configuration conf = job.getConfiguration();
    OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
    OutputCommitter committer = output/*from  w w  w.j  a v a  2 s .c om*/
            .getOutputCommitter(newTaskAttemptContext(conf, newTaskAttemptId(taskId, 0)));
    boolean succeed = false;
    committer.setupJob(job);
    try {
        if (job.getNumReduceTasks() == 0) {
            runMap(job, null);
        } else {
            try (KeyValueSorter<?, ?> sorter = createSorter(job, job.getMapOutputKeyClass(),
                    job.getMapOutputValueClass())) {
                runMap(job, sorter);
                runReduce(job, sorter);
            }
        }
        committer.commitJob(job);
        succeed = true;
    } finally {
        if (succeed == false) {
            try {
                committer.abortJob(job, State.FAILED);
            } catch (IOException e) {
                LOG.error(MessageFormat.format("error occurred while aborting job: {0} ({1})", job.getJobID(),
                        job.getJobName()), e);
            }
        }
    }
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
private void runMap(Job job, KeyValueSorter<?, ?> sorter)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    InputFormat<?, ?> input = ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
    List<InputSplit> splits = input.getSplits(job);
    int serial = 1;
    for (InputSplit split : splits) {
        TaskAttemptID id = newTaskAttemptId(newMapTaskId(job.getJobID(), serial++), 0);
        Mapper<?, ?, ?, ?> mapper = ReflectionUtils.newInstance(job.getMapperClass(), conf);
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("starting mapper: {0}@{1} ({2}bytes)", //$NON-NLS-1$
                    mapper.getClass().getName(), id, split.getLength()));
        }//from  w  w  w.  j  a  v  a 2s .co  m
        TaskAttemptContext context = newTaskAttemptContext(conf, id);
        // we always obtain a new OutputFormat object / OutputFormat.getOutputCommiter() may be cached
        OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
        OutputCommitter committer = output.getOutputCommitter(context);
        committer.setupTask(context);
        boolean succeed = false;
        try (RecordReader<?, ?> reader = input.createRecordReader(split, newTaskAttemptContext(conf, id))) {
            RecordWriter<?, ?> writer;
            if (sorter != null) {
                writer = new ShuffleWriter(sorter);
            } else {
                writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            }
            try {
                Mapper.Context c = newMapperContext(conf, id, reader, writer, committer, split);
                reader.initialize(split, c);
                mapper.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
            doCommitTask(context, committer);
            succeed = true;
        } finally {
            if (succeed == false) {
                doAbortTask(context, committer);
            }
        }
    }
}

From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunner.java

License:Apache License

@SuppressWarnings({ "unchecked", "rawtypes" })
private void runReduce(Job job, KeyValueSorter<?, ?> sorter)
        throws ClassNotFoundException, IOException, InterruptedException {
    Configuration conf = job.getConfiguration();
    OutputFormat<?, ?> output = ReflectionUtils.newInstance(job.getOutputFormatClass(), conf);
    TaskAttemptID id = newTaskAttemptId(newReduceTaskId(job.getJobID(), 1), 0);
    Reducer<?, ?, ?, ?> reducer = ReflectionUtils.newInstance(job.getReducerClass(), conf);
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("starting reducer: {0}@{1} ({2}records, {3}bytes)", //$NON-NLS-1$
                reducer.getClass().getName(), id, sorter.getRecordCount(), sorter.getSizeInBytes()));
    }//from w  w  w  .  j  a  va  2 s.c  o m
    TaskAttemptContext context = newTaskAttemptContext(conf, id);
    OutputCommitter committer = output.getOutputCommitter(context);
    committer.setupTask(context);
    boolean succeed = false;
    try {
        ShuffleReader reader = new ShuffleReader(sorter, new Progress());
        try {
            RecordWriter<?, ?> writer = output.getRecordWriter(newTaskAttemptContext(conf, id));
            try {
                Reducer.Context c = newReducerContext(conf, id, reader, sorter.getKeyClass(),
                        sorter.getValueClass(), writer, committer, (RawComparator) job.getGroupingComparator());
                reducer.run(c);
            } finally {
                writer.close(newTaskAttemptContext(conf, id));
            }
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                LOG.warn(MessageFormat.format("error occurred while reducer mapper input: {0} ({1})", id,
                        job.getJobName()), e);
            }
        }
        doCommitTask(context, committer);
        succeed = true;
    } finally {
        if (succeed == false) {
            doAbortTask(context, committer);
        }
    }
}

From source file:com.baynote.kafka.hadoop.KafkaJobBuilderTest.java

License:Apache License

@Test
public void testConfigureWholeJob() throws Exception {
    // base configuration
    builder.setZkConnect("localhost:2181");
    builder.addQueueInput("queue_name", "group_name", MockMapper.class);
    builder.setTextFileOutputFormat("/a/hdfs/path");

    // extended configuration
    builder.setJobName("job_name");
    builder.setMapOutputKeyClass(Text.class);
    builder.setMapOutputValueClass(BytesWritable.class);
    builder.setReducerClass(MockReducer.class);
    builder.setTaskMemorySettings("-Xmx2048m");
    builder.setNumReduceTasks(100);/*from w w  w . j av  a 2  s .  com*/
    builder.setParitioner(MockPartitioner.class);
    builder.setKafkaFetchSizeBytes(1024);

    Job job = builder.configureJob(conf);

    assertEquals("job_name", job.getJobName());
    assertEquals(Text.class, job.getMapOutputKeyClass());
    assertEquals(BytesWritable.class, job.getMapOutputValueClass());
    assertEquals(MockReducer.class, job.getReducerClass());
    assertEquals(MockMapper.class, job.getMapperClass());
    assertEquals("-Xmx2048m", job.getConfiguration().get("mapred.child.java.opts"));
    assertEquals(100, job.getNumReduceTasks());
    assertEquals(MockPartitioner.class, job.getPartitionerClass());
    assertEquals(1024, KafkaInputFormat.getKafkaFetchSizeBytes(job.getConfiguration()));
    assertEquals(TextOutputFormat.class, job.getOutputFormatClass());
    assertEquals(KafkaInputFormat.class, job.getInputFormatClass());
    assertEquals("file:/a/hdfs/path", TextOutputFormat.getOutputPath(job).toString());

    builder.setJobName(null);
    builder.setSequenceFileOutputFormat();
    builder.setUseLazyOutput();
    builder.addQueueInput("queue_name_2", "group_name_2", MockMapper.class);

    job = builder.configureJob(conf);
    assertEquals(LazyOutputFormat.class, job.getOutputFormatClass());
    assertEquals(MultipleKafkaInputFormat.class, job.getInputFormatClass());
    assertEquals(DelegatingMapper.class, job.getMapperClass());
    assertEquals(BytesWritable.class, job.getOutputKeyClass());
    assertEquals(BytesWritable.class, job.getOutputValueClass());
    assertNotNull(SequenceFileOutputFormat.getOutputPath(job));
    assertNotNull(job.getJobName());

    // use s3
    builder.useS3("my_aws_key", "s3cr3t", "my-bucket");
    builder.setTextFileOutputFormat("/a/hdfs/path");
    job = builder.configureJob(conf);

    assertEquals("my_aws_key", job.getConfiguration().get("fs.s3n.awsAccessKeyId"));
    assertEquals("s3cr3t", job.getConfiguration().get("fs.s3n.awsSecretAccessKey"));
    assertEquals("my_aws_key", job.getConfiguration().get("fs.s3.awsAccessKeyId"));
    assertEquals("s3cr3t", job.getConfiguration().get("fs.s3.awsSecretAccessKey"));
}

From source file:com.cloudera.castagna.logparser.Utils.java

License:Apache License

public static void log(Job job, Logger log) throws ClassNotFoundException {
    log.debug("{} -> {} ({}, {}) -> {}#{} ({}, {}) -> {}",
            new Object[] { job.getInputFormatClass().getSimpleName(), job.getMapperClass().getSimpleName(),
                    job.getMapOutputKeyClass().getSimpleName(), job.getMapOutputValueClass().getSimpleName(),
                    job.getReducerClass().getSimpleName(), job.getNumReduceTasks(),
                    job.getOutputKeyClass().getSimpleName(), job.getOutputValueClass().getSimpleName(),
                    job.getOutputFormatClass().getSimpleName() });
    Path[] inputs = FileInputFormat.getInputPaths(job);
    Path output = FileOutputFormat.getOutputPath(job);
    log.debug("input: {}", inputs[0]);
    log.debug("output: {}", output);
}

From source file:com.example.Driver.java

License:Open Source License

public int run(String[] args) throws Exception {

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Your job name");

    job.setJarByClass(Driver.class);

    logger.info("job " + job.getJobName() + " [" + job.getJar() + "] started with the following arguments: "
            + Arrays.toString(args));

    if (args.length < 2) {
        logger.warn("to run this jar are necessary at 2 parameters \"" + job.getJar()
                + " input_files output_directory");
        return 1;
    }/*from   w  w w .  j a  v  a  2s. c  om*/

    job.setMapperClass(WordcountMapper.class);
    logger.info("mapper class is " + job.getMapperClass());

    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(IntWritable.class);
    logger.info("mapper output key class is " + job.getMapOutputKeyClass());
    logger.info("mapper output value class is " + job.getMapOutputValueClass());

    job.setReducerClass(WordcountReducer.class);
    logger.info("reducer class is " + job.getReducerClass());
    job.setCombinerClass(WordcountReducer.class);
    logger.info("combiner class is " + job.getCombinerClass());
    //When you are not runnign any Reducer
    //OR    job.setNumReduceTasks(0);
    //      logger.info("number of reduce task is " + job.getNumReduceTasks());

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    logger.info("output key class is " + job.getOutputKeyClass());
    logger.info("output value class is " + job.getOutputValueClass());

    job.setInputFormatClass(TextInputFormat.class);
    logger.info("input format class is " + job.getInputFormatClass());

    job.setOutputFormatClass(TextOutputFormat.class);
    logger.info("output format class is " + job.getOutputFormatClass());

    Path filePath = new Path(args[0]);
    logger.info("input path " + filePath);
    FileInputFormat.setInputPaths(job, filePath);

    Path outputPath = new Path(args[1]);
    logger.info("output path " + outputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.waitForCompletion(true);
    return 0;
}