Example usage for org.apache.hadoop.mapreduce Job getJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getJobName.

Prototype

public String getJobName()

Source Link

Document

The user-specified job name.

Usage

From source file:com.marklogic.contentpump.ContentPump.java

License:Apache License

public static int runCommand(String[] args) throws IOException {
    // get command
    String cmd = args[0];//from  ww  w . ja  v  a  2  s .  c  o  m
    if (cmd.equalsIgnoreCase("help")) {
        printUsage();
        return 1;
    } else if (cmd.equalsIgnoreCase("version")) {
        logVersions();
        return 1;
    }

    Command command = Command.forName(cmd);

    // get options arguments
    String[] optionArgs = Arrays.copyOfRange(args, 1, args.length);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Command: " + command);
        StringBuilder buf = new StringBuilder();
        for (String arg : optionArgs) {
            buf.append(arg);
            buf.append(' ');
        }
        LOG.debug("Arguments: " + buf);
    }

    // parse hadoop specific options
    Configuration conf = new Configuration();
    GenericOptionsParser genericParser = new GenericOptionsParser(conf, optionArgs);
    String[] remainingArgs = genericParser.getRemainingArgs();

    // parse command specific options
    CommandlineOptions options = new CommandlineOptions();
    command.configOptions(options);
    CommandLineParser parser = new GnuParser();
    CommandLine cmdline;
    try {
        cmdline = parser.parse(options, remainingArgs);
    } catch (Exception e) {
        LOG.error("Error parsing command arguments: ");
        LOG.error(e.getMessage());
        // Print the command usage message and exit.    
        command.printUsage(command, options.getPublicOptions());
        return 1; // Exit on exception here.
    }

    for (String arg : cmdline.getArgs()) {
        LOG.error("Unrecognized argument: " + arg);
        // Print the command usage message and exit.
        command.printUsage(command, options.getPublicOptions());
        return 1; // Exit on exception here.
    }

    // check running mode and hadoop conf dir configuration 
    String mode = cmdline.getOptionValue(MODE);
    String hadoopConfDir = System.getenv(HADOOP_CONFDIR_ENV_NAME);
    if (cmdline.hasOption(HADOOP_CONF_DIR)) {
        hadoopConfDir = cmdline.getOptionValue(HADOOP_CONF_DIR);
    }

    boolean distributed = hadoopConfDir != null && (mode == null || mode.equals(MODE_DISTRIBUTED));
    if (MODE_DISTRIBUTED.equalsIgnoreCase(mode) && !distributed) {
        LOG.error("Cannot run in distributed mode.  HADOOP_CONF_DIR is " + "not configured.");
    }

    if (LOG.isDebugEnabled()) {
        LOG.debug("Running in: " + (distributed ? "distributed " : "local") + "mode");
        if (distributed) {
            LOG.debug("HADOOP_CONF_DIR is set to " + hadoopConfDir);
        }
    }
    conf.set(EXECUTION_MODE, distributed ? MODE_DISTRIBUTED : MODE_LOCAL);

    if (distributed) {
        if (!cmdline.hasOption(SPLIT_INPUT) && Command.getInputType(cmdline).equals(InputType.DELIMITED_TEXT)) {
            conf.setBoolean(ConfigConstants.CONF_SPLIT_INPUT, true);
        }
        File hdConfDir = new File(hadoopConfDir);
        try {
            checkHadoopConfDir(hdConfDir);
        } catch (IllegalArgumentException e) {
            LOG.error("Error found with Hadoop home setting", e);
            System.err.println(e.getMessage());
            return 1;
        }
        // set new class loader based on Hadoop Conf Dir
        try {
            setClassLoader(hdConfDir, conf);
        } catch (Exception e) {
            LOG.error("Error configuring class loader", e);
            System.err.println(e.getMessage());
            return 1;
        }
    } else { // running in local mode
        // Tell Hadoop that we are running in local mode.  This is useful
        // when the user has Hadoop home or their Hadoop conf dir in their
        // classpath but want to run in local mode.
        conf.set(CONF_MAPREDUCE_JOBTRACKER_ADDRESS, "local");
    }

    // create job
    Job job = null;
    try {
        if (distributed) {
            // So far all jobs created by mlcp are map only,
            // so set number of reduce tasks to 0.
            conf.setInt("mapreduce.job.reduces", 0);
            // No speculative runs since speculative tasks don't get to 
            // clean up sessions properly
            conf.setBoolean("mapreduce.map.speculative", false);
        } else {
            // set working directory
            conf.set(CONF_MAPREDUCE_JOB_WORKING_DIR, System.getProperty("user.dir"));
        }
        job = command.createJob(conf, cmdline);
    } catch (Exception e) {
        // Print exception message.
        e.printStackTrace();
        return 1;
    }

    LOG.info("Job name: " + job.getJobName());
    // run job
    try {
        if (distributed) {
            // submit job
            submitJob(job);
        } else {
            runJobLocally(job, cmdline, command);
        }
        return 0;
    } catch (Exception e) {
        LOG.error("Error running a ContentPump job", e);
        e.printStackTrace(System.err);
        return 1;
    }
}

From source file:com.marklogic.contentpump.ContentPump.java

License:Apache License

private static void submitJob(Job job) throws Exception {
    String cpHome = System.getProperty(CONTENTPUMP_HOME_PROPERTY_NAME);

    // find job jar
    File cpHomeDir = new File(cpHome);
    FilenameFilter jobJarFilter = new FilenameFilter() {
        @Override/*from  w ww .  j a v a2 s  . c  om*/
        public boolean accept(File dir, String name) {
            if (name.endsWith(".jar") && name.startsWith(CONTENTPUMP_JAR_PREFIX)) {
                return true;
            } else {
                return false;
            }
        }
    };
    File[] cpJars = cpHomeDir.listFiles(jobJarFilter);
    if (cpJars == null || cpJars.length == 0) {
        throw new RuntimeException("Content Pump jar file " + "is not found under " + cpHome);
    }
    if (cpJars.length > 1) {
        throw new RuntimeException("More than one Content Pump jar file " + "are found under " + cpHome);
    }
    // set job jar
    Configuration conf = job.getConfiguration();
    conf.set("mapreduce.job.jar", cpJars[0].toURI().toURL().toString());

    // find lib jars
    FilenameFilter filter = new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            if (name.endsWith(".jar") && !name.startsWith("hadoop")) {
                return true;
            } else {
                return false;
            }
        }

    };

    // set lib jars
    StringBuilder jars = new StringBuilder();
    for (File jar : cpHomeDir.listFiles(filter)) {
        if (jars.length() > 0) {
            jars.append(',');
        }
        jars.append(jar.toURI().toURL().toString());
    }
    conf.set("tmpjars", jars.toString());
    if (LOG.isTraceEnabled())
        LOG.trace("LIBJARS:" + jars.toString());
    job.waitForCompletion(true);
    AuditUtil.auditMlcpFinish(conf, job.getJobName(), job.getCounters());
}

From source file:com.marklogic.contentpump.ContentPump.java

License:Apache License

private static void runJobLocally(Job job, CommandLine cmdline, Command cmd) throws Exception {
    LocalJobRunner runner = new LocalJobRunner(job, cmdline, cmd);
    runner.run();//from ww w .  j  a v a  2s . co m
    AuditUtil.auditMlcpFinish(job.getConfiguration(), job.getJobName(), runner.getReporter().counters);
}

From source file:com.metamx.druid.indexer.DeterminePartitionsJob.java

License:Open Source License

public boolean run() {
    try {//ww w  . j a v a  2 s .co m
        /*
         * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
         * in the final segment.
         */

        if (!config.getPartitionsSpec().isAssumeGrouped()) {
            final Job groupByJob = new Job(new Configuration(), String.format(
                    "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()));

            injectSystemProperties(groupByJob);
            groupByJob.setInputFormatClass(TextInputFormat.class);
            groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
            groupByJob.setMapOutputKeyClass(BytesWritable.class);
            groupByJob.setMapOutputValueClass(NullWritable.class);
            groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setOutputKeyClass(BytesWritable.class);
            groupByJob.setOutputValueClass(NullWritable.class);
            groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
            groupByJob.setJarByClass(DeterminePartitionsJob.class);

            config.addInputPaths(groupByJob);
            config.intoConfiguration(groupByJob);
            FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());

            groupByJob.submit();
            log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(),
                    groupByJob.getTrackingURL());

            if (!groupByJob.waitForCompletion(true)) {
                log.error("Job failed: %s", groupByJob.getJobID());
                return false;
            }
        } else {
            log.info("Skipping group-by job.");
        }

        /*
         * Read grouped data and determine appropriate partitions.
         */
        final Job dimSelectionJob = new Job(new Configuration(), String.format(
                "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals()));

        dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");

        injectSystemProperties(dimSelectionJob);

        if (!config.getPartitionsSpec().isAssumeGrouped()) {
            // Read grouped data from the groupByJob.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
            dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
            FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
        } else {
            // Directly read the source data, since we assume it's already grouped.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
            dimSelectionJob.setInputFormatClass(TextInputFormat.class);
            config.addInputPaths(dimSelectionJob);
        }

        SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob);
        dimSelectionJob.setMapOutputValueClass(Text.class);
        dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
        dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
        dimSelectionJob.setOutputKeyClass(BytesWritable.class);
        dimSelectionJob.setOutputValueClass(Text.class);
        dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
        dimSelectionJob.setJarByClass(DeterminePartitionsJob.class);

        config.intoConfiguration(dimSelectionJob);
        FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());

        dimSelectionJob.submit();
        log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(),
                dimSelectionJob.getTrackingURL());

        if (!dimSelectionJob.waitForCompletion(true)) {
            log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
            return false;
        }

        /*
         * Load partitions determined by the previous job.
         */

        log.info("Job completed, loading up partitions for intervals[%s].",
                config.getSegmentGranularIntervals());
        FileSystem fileSystem = null;
        Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
            DateTime bucket = segmentGranularity.getStart();

            final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0));
            if (fileSystem == null) {
                fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
            }
            if (fileSystem.exists(partitionInfoPath)) {
                List<ShardSpec> specs = config.jsonMapper.readValue(
                        Utils.openInputStream(dimSelectionJob, partitionInfoPath),
                        new TypeReference<List<ShardSpec>>() {
                        });

                List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
                for (int i = 0; i < specs.size(); ++i) {
                    actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
                    log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
                }

                shardSpecs.put(bucket, actualSpecs);
            } else {
                log.info("Path[%s] didn't exist!?", partitionInfoPath);
            }
        }
        config.setShardSpecs(shardSpecs);

        return true;
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}

From source file:com.metamx.druid.indexer.IndexGeneratorJob.java

License:Open Source License

public boolean run() {
    try {/*from www  . j  ava  2s.  c  o  m*/
        Job job = new Job(new Configuration(),
                String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()));

        job.getConfiguration().set("io.sort.record.percent", "0.23");

        for (String propName : System.getProperties().stringPropertyNames()) {
            Configuration conf = job.getConfiguration();
            if (propName.startsWith("hadoop.")) {
                conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
            }
        }

        job.setInputFormatClass(TextInputFormat.class);

        job.setMapperClass(IndexGeneratorMapper.class);
        job.setMapOutputValueClass(Text.class);

        SortableBytes.useSortableBytesAsMapOutputKey(job);

        job.setNumReduceTasks(Iterables.size(config.getAllBuckets()));
        job.setPartitionerClass(IndexGeneratorPartitioner.class);

        job.setReducerClass(IndexGeneratorReducer.class);
        job.setOutputKeyClass(BytesWritable.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
        FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());

        config.addInputPaths(job);
        config.intoConfiguration(job);

        job.setJarByClass(IndexGeneratorJob.class);

        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());

        boolean success = job.waitForCompletion(true);

        Counter invalidRowCount = job.getCounters()
                .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
        jobStats.setInvalidRowCount(invalidRowCount.getValue());

        return success;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:com.moz.fiji.mapreduce.framework.JobHistoryFijiTable.java

License:Apache License

/**
 * Writes a job into the JobHistoryFijiTable.
 *
 * @param job The job to save./*  www  .ja v  a2  s.co m*/
 * @param startTime The time the job began, in milliseconds.
 * @param endTime The time the job ended, in milliseconds
 * @throws IOException If there is an error writing to the table.
 */
public void recordJob(final Job job, final long startTime, final long endTime) throws IOException {
    recordJob(job.getJobID().toString(), job.getJobName(), startTime, endTime, job.isSuccessful(),
            job.getConfiguration(), getCounters(job), Collections.<String, String>emptyMap());
}

From source file:com.panguso.lc.analysis.format.Logcenter.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    context = new ClassPathXmlApplicationContext("applicationContext.xml");
    Properties prop = context.getBean("configProperties", Properties.class);
    // ??//ww w . j a v  a2  s. c o  m
    // String time = new DateTime().toString("yyyyMMddHH");

    // hadoop.lib=/application/format/lib/
    // hadoop.conf=/application/format/conf/
    // hadoop.src=/log/src/
    // hadoop.dest=/log/dest/
    // hadoop.archive=/log/archive/
    libPath = prop.getProperty("hadoop.lib");
    confPath = prop.getProperty("hadoop.conf");
    srcPath = prop.getProperty("hadoop.src");
    destPath = prop.getProperty("hadoop.dest");
    archivePath = prop.getProperty("hadoop.archive");
    Configuration conf = getConf();
    logger.info("libPath=" + libPath);
    logger.info("confPath=" + confPath);
    logger.info("srcPath=" + srcPath);
    logger.info("destPath=" + destPath);
    logger.info("archivePath=" + archivePath);

    FileSystem fs = FileSystem.get(conf);
    // --jar
    FileStatus[] fJars = fs.listStatus(new Path(libPath));
    for (FileStatus fileStatus : fJars) {
        String jar = libPath + fileStatus.getPath().getName();
        DistributedCache.addFileToClassPath(new Path(jar), conf, FileSystem.get(conf));
    }
    // --?
    FileStatus[] fProp = fs.listStatus(new Path(confPath));
    for (FileStatus fileStatus : fProp) {
        DistributedCache.addArchiveToClassPath(new Path(confPath + fileStatus.getPath().getName()), conf,
                FileSystem.get(conf));
    }
    FileStatus[] fDirs = fs.listStatus(new Path(srcPath));
    if (fDirs != null && fDirs.length > 0) {
        for (FileStatus file : fDirs) {
            // dir
            String currentTime = file.getPath().getName();
            String srcPathWithTime = srcPath + currentTime + "/";
            String destPathWithTime = destPath + currentTime + "/";
            String archPathWithTime = archivePath + currentTime + "/";
            // ??
            if (analysisService.isSuccessful(currentTime)) {
                continue;
            }

            // ??job?

            // 
            fs.delete(new Path(destPathWithTime), true);

            // ?
            // if (!fs.exists(new Path(srcPathWithTime))) {
            // logger.warn("outPath does not exist,inputPath=" +
            // srcPathWithTime);
            // analysisService.saveFailureJob(job.getJobName(),
            // currentTime);
            // return -1;
            // }
            // ?classpath";"":"
            Job job = new Job(conf);
            String jars = job.getConfiguration().get("mapred.job.classpath.files");
            job.getConfiguration().set("mapred.job.classpath.files", jars.replace(";", ":"));
            logger.info("current dir=" + currentTime);
            job.setJobName("format_" + currentTime);

            job.setJarByClass(Logcenter.class);
            job.setMapperClass(FormatAnalysisMapper.class);
            job.setReducerClass(FormatAnalysisReducer.class);
            job.setCombinerClass(FormatAnalysisReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            // job.setNumReduceTasks(0);
            // //??reduce????namenode
            FileInputFormat.addInputPath(job, new Path(srcPathWithTime));
            FileOutputFormat.setOutputPath(job, new Path(destPathWithTime));

            // ?
            boolean result = false;
            try {
                result = job.waitForCompletion(true);
            } catch (FileAlreadyExistsException e) {
                logger.warn(e.getMessage(), e);
            }
            if (!result) {
                logger.warn("job execute failure!");
                analysisService.saveFailureJob(job.getJobName(), currentTime);
                continue;
                // return -1;
            }

            // ,
            fs.delete(new Path(archPathWithTime), true);
            fs.rename(new Path(srcPathWithTime), new Path(archPathWithTime));
            analysisService.saveSuccessJob(job.getJobName(), currentTime);
        }
    }

    FileSystem.closeAll();
    return 0;
}

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.MapReduceUtils.java

License:LGPL

/**
 * Wait the completion of a job./*from   ww  w  . j  a  va2  s  .c o m*/
 * @param job the job to submit
 * @param jobDescription the description of the job
 * @param waitTimeInMillis waiting time between 2 checks of the completion of
 *          jobs
 * @param status step status
 * @param counterGroup group of the counter to log
 * @throws EoulsanException if the job fail or if an exception occurs while
 *           submitting or waiting the end of the job
 */
public static void submitAndWaitForJob(final Job job, final String jobDescription, final int waitTimeInMillis,
        final TaskStatus status, final String counterGroup) throws EoulsanException {

    if (job == null) {
        throw new NullPointerException("The job is null");
    }

    if (jobDescription == null) {
        throw new NullPointerException("The jobDescription is null");
    }

    try {

        // Set the description of the context
        status.setDescription(job.getJobName());

        // Submit the job
        job.submit();

        // Add the Hadoop job to the list of job to kill if workflow fails
        HadoopJobEmergencyStopTask.addHadoopJobEmergencyStopTask(job);

        // Job the completion of the job (non verbose mode)
        job.waitForCompletion(false);

        // Remove the Hadoop job to the list of job to kill if workflow fails
        HadoopJobEmergencyStopTask.removeHadoopJobEmergencyStopTask(job);

        // Check if the job has been successfully executed
        if (!job.isSuccessful()) {

            status.setProgressMessage("FAILED");

            throw new EoulsanException("Fail of the Hadoop job: " + job.getJobFile());
        }

        // Set the counters
        status.setCounters(new HadoopReporter(job.getCounters()), counterGroup);

    } catch (ClassNotFoundException | InterruptedException | IOException e) {
        throw new EoulsanException(e);
    }
}

From source file:gov.nasa.jpl.memex.pooledtimeseries.MeanChiSquareDistanceCalculation.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.loadLibrary(Core.NATIVE_LIBRARY_NAME);

    Configuration baseConf = new Configuration();
    baseConf.set("mapreduce.job.maps", "96");
    baseConf.set("mapred.tasktracker.map.tasks.maximum", "96");

    JobConf conf = new JobConf();
    System.out.println("Before Map:" + conf.getNumMapTasks());
    conf.setNumMapTasks(96);/*from  ww w .  j a va  2s  .  c om*/
    System.out.println("After Map:" + conf.getNumMapTasks());

    Job job = Job.getInstance(baseConf);
    job.setJarByClass(MeanChiSquareDistanceCalculation.class);

    job.setJobName("mean_chi_square_calculation");
    System.out.println("Job ID" + job.getJobID());
    System.out.println("Track:" + baseConf.get("mapred.job.tracker"));
    System.out.println("Job Name" + job.getJobName());
    System.out.println(baseConf.get("mapreduce.job.maps"));
    System.out.println("Caching video-metric-bak.tgz");
    job.addCacheArchive(new URI("/user/pts/video-metric-bak.tgz"));
    URI[] cacheFiles = job.getCacheFiles();
    if (cacheFiles != null && cacheFiles.length > 0) {
        System.out.println("Cache file ->" + cacheFiles[0]);
    }
    System.out.println("Cached video-metric-bak.tgz");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.waitForCompletion(true);

}

From source file:gov.nasa.jpl.memex.pooledtimeseries.SimilarityCalculation.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.loadLibrary(Core.NATIVE_LIBRARY_NAME);

    Configuration baseConf = new Configuration();
    baseConf.set("mapreduce.job.maps", "96");
    baseConf.set("mapreduce.job.reduces", "0");
    baseConf.set("mapred.tasktracker.map.tasks.maximum", "96");
    baseConf.set("meanDistsFilePath", args[2]);

    JobConf conf = new JobConf();
    System.out.println("Before Map:" + conf.getNumMapTasks());
    conf.setNumMapTasks(196);//w  w  w  .j  a  v  a2  s .  c o  m
    System.out.println("After Map:" + conf.getNumMapTasks());

    Job job = Job.getInstance(baseConf);
    System.out.println("Track: " + baseConf.get("mapred.job.tracker"));
    System.out.println("Job ID" + job.getJobID());
    System.out.println("Job Name" + job.getJobName());
    System.out.println(baseConf.get("mapreduce.job.maps"));
    job.setJarByClass(SimilarityCalculation.class);

    job.setJobName("similarity_calc");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(Map.class);

    job.waitForCompletion(true);
}