Example usage for org.apache.hadoop.mapreduce Job getJobName

List of usage examples for org.apache.hadoop.mapreduce Job getJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getJobName.

Prototype

public String getJobName() 

Source Link

Document

The user-specified job name.

Usage

From source file:com.marklogic.contentpump.ContentPump.java

License:Apache License

public static int runCommand(String[] args) throws IOException {
    // get command
    String cmd = args[0];//from  ww  w . ja  v  a  2  s .  c  o  m
    if (cmd.equalsIgnoreCase("help")) {
        printUsage();
        return 1;
    } else if (cmd.equalsIgnoreCase("version")) {
        logVersions();
        return 1;
    }

    Command command = Command.forName(cmd);

    // get options arguments
    String[] optionArgs = Arrays.copyOfRange(args, 1, args.length);
    if (LOG.isDebugEnabled()) {
        LOG.debug("Command: " + command);
        StringBuilder buf = new StringBuilder();
        for (String arg : optionArgs) {
            buf.append(arg);
            buf.append(' ');
        }
        LOG.debug("Arguments: " + buf);
    }

    // parse hadoop specific options
    Configuration conf = new Configuration();
    GenericOptionsParser genericParser = new GenericOptionsParser(conf, optionArgs);
    String[] remainingArgs = genericParser.getRemainingArgs();

    // parse command specific options
    CommandlineOptions options = new CommandlineOptions();
    command.configOptions(options);
    CommandLineParser parser = new GnuParser();
    CommandLine cmdline;
    try {
        cmdline = parser.parse(options, remainingArgs);
    } catch (Exception e) {
        LOG.error("Error parsing command arguments: ");
        LOG.error(e.getMessage());
        // Print the command usage message and exit.    
        command.printUsage(command, options.getPublicOptions());
        return 1; // Exit on exception here.
    }

    for (String arg : cmdline.getArgs()) {
        LOG.error("Unrecognized argument: " + arg);
        // Print the command usage message and exit.
        command.printUsage(command, options.getPublicOptions());
        return 1; // Exit on exception here.
    }

    // check running mode and hadoop conf dir configuration 
    String mode = cmdline.getOptionValue(MODE);
    String hadoopConfDir = System.getenv(HADOOP_CONFDIR_ENV_NAME);
    if (cmdline.hasOption(HADOOP_CONF_DIR)) {
        hadoopConfDir = cmdline.getOptionValue(HADOOP_CONF_DIR);
    }

    boolean distributed = hadoopConfDir != null && (mode == null || mode.equals(MODE_DISTRIBUTED));
    if (MODE_DISTRIBUTED.equalsIgnoreCase(mode) && !distributed) {
        LOG.error("Cannot run in distributed mode.  HADOOP_CONF_DIR is " + "not configured.");
    }

    if (LOG.isDebugEnabled()) {
        LOG.debug("Running in: " + (distributed ? "distributed " : "local") + "mode");
        if (distributed) {
            LOG.debug("HADOOP_CONF_DIR is set to " + hadoopConfDir);
        }
    }
    conf.set(EXECUTION_MODE, distributed ? MODE_DISTRIBUTED : MODE_LOCAL);

    if (distributed) {
        if (!cmdline.hasOption(SPLIT_INPUT) && Command.getInputType(cmdline).equals(InputType.DELIMITED_TEXT)) {
            conf.setBoolean(ConfigConstants.CONF_SPLIT_INPUT, true);
        }
        File hdConfDir = new File(hadoopConfDir);
        try {
            checkHadoopConfDir(hdConfDir);
        } catch (IllegalArgumentException e) {
            LOG.error("Error found with Hadoop home setting", e);
            System.err.println(e.getMessage());
            return 1;
        }
        // set new class loader based on Hadoop Conf Dir
        try {
            setClassLoader(hdConfDir, conf);
        } catch (Exception e) {
            LOG.error("Error configuring class loader", e);
            System.err.println(e.getMessage());
            return 1;
        }
    } else { // running in local mode
        // Tell Hadoop that we are running in local mode.  This is useful
        // when the user has Hadoop home or their Hadoop conf dir in their
        // classpath but want to run in local mode.
        conf.set(CONF_MAPREDUCE_JOBTRACKER_ADDRESS, "local");
    }

    // create job
    Job job = null;
    try {
        if (distributed) {
            // So far all jobs created by mlcp are map only,
            // so set number of reduce tasks to 0.
            conf.setInt("mapreduce.job.reduces", 0);
            // No speculative runs since speculative tasks don't get to 
            // clean up sessions properly
            conf.setBoolean("mapreduce.map.speculative", false);
        } else {
            // set working directory
            conf.set(CONF_MAPREDUCE_JOB_WORKING_DIR, System.getProperty("user.dir"));
        }
        job = command.createJob(conf, cmdline);
    } catch (Exception e) {
        // Print exception message.
        e.printStackTrace();
        return 1;
    }

    LOG.info("Job name: " + job.getJobName());
    // run job
    try {
        if (distributed) {
            // submit job
            submitJob(job);
        } else {
            runJobLocally(job, cmdline, command);
        }
        return 0;
    } catch (Exception e) {
        LOG.error("Error running a ContentPump job", e);
        e.printStackTrace(System.err);
        return 1;
    }
}

From source file:com.marklogic.contentpump.ContentPump.java

License:Apache License

private static void submitJob(Job job) throws Exception {
    String cpHome = System.getProperty(CONTENTPUMP_HOME_PROPERTY_NAME);

    // find job jar
    File cpHomeDir = new File(cpHome);
    FilenameFilter jobJarFilter = new FilenameFilter() {
        @Override/*from  w ww .  j a v a2 s  . c  om*/
        public boolean accept(File dir, String name) {
            if (name.endsWith(".jar") && name.startsWith(CONTENTPUMP_JAR_PREFIX)) {
                return true;
            } else {
                return false;
            }
        }
    };
    File[] cpJars = cpHomeDir.listFiles(jobJarFilter);
    if (cpJars == null || cpJars.length == 0) {
        throw new RuntimeException("Content Pump jar file " + "is not found under " + cpHome);
    }
    if (cpJars.length > 1) {
        throw new RuntimeException("More than one Content Pump jar file " + "are found under " + cpHome);
    }
    // set job jar
    Configuration conf = job.getConfiguration();
    conf.set("mapreduce.job.jar", cpJars[0].toURI().toURL().toString());

    // find lib jars
    FilenameFilter filter = new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            if (name.endsWith(".jar") && !name.startsWith("hadoop")) {
                return true;
            } else {
                return false;
            }
        }

    };

    // set lib jars
    StringBuilder jars = new StringBuilder();
    for (File jar : cpHomeDir.listFiles(filter)) {
        if (jars.length() > 0) {
            jars.append(',');
        }
        jars.append(jar.toURI().toURL().toString());
    }
    conf.set("tmpjars", jars.toString());
    if (LOG.isTraceEnabled())
        LOG.trace("LIBJARS:" + jars.toString());
    job.waitForCompletion(true);
    AuditUtil.auditMlcpFinish(conf, job.getJobName(), job.getCounters());
}

From source file:com.marklogic.contentpump.ContentPump.java

License:Apache License

private static void runJobLocally(Job job, CommandLine cmdline, Command cmd) throws Exception {
    LocalJobRunner runner = new LocalJobRunner(job, cmdline, cmd);
    runner.run();//from ww w .  j  a v a  2s . co m
    AuditUtil.auditMlcpFinish(job.getConfiguration(), job.getJobName(), runner.getReporter().counters);
}

From source file:com.metamx.druid.indexer.DeterminePartitionsJob.java

License:Open Source License

public boolean run() {
    try {//ww w  . j a v a  2 s .co m
        /*
         * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
         * in the final segment.
         */

        if (!config.getPartitionsSpec().isAssumeGrouped()) {
            final Job groupByJob = new Job(new Configuration(), String.format(
                    "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()));

            injectSystemProperties(groupByJob);
            groupByJob.setInputFormatClass(TextInputFormat.class);
            groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
            groupByJob.setMapOutputKeyClass(BytesWritable.class);
            groupByJob.setMapOutputValueClass(NullWritable.class);
            groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setOutputKeyClass(BytesWritable.class);
            groupByJob.setOutputValueClass(NullWritable.class);
            groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
            groupByJob.setJarByClass(DeterminePartitionsJob.class);

            config.addInputPaths(groupByJob);
            config.intoConfiguration(groupByJob);
            FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());

            groupByJob.submit();
            log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(),
                    groupByJob.getTrackingURL());

            if (!groupByJob.waitForCompletion(true)) {
                log.error("Job failed: %s", groupByJob.getJobID());
                return false;
            }
        } else {
            log.info("Skipping group-by job.");
        }

        /*
         * Read grouped data and determine appropriate partitions.
         */
        final Job dimSelectionJob = new Job(new Configuration(), String.format(
                "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals()));

        dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");

        injectSystemProperties(dimSelectionJob);

        if (!config.getPartitionsSpec().isAssumeGrouped()) {
            // Read grouped data from the groupByJob.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
            dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
            FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
        } else {
            // Directly read the source data, since we assume it's already grouped.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
            dimSelectionJob.setInputFormatClass(TextInputFormat.class);
            config.addInputPaths(dimSelectionJob);
        }

        SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob);
        dimSelectionJob.setMapOutputValueClass(Text.class);
        dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
        dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
        dimSelectionJob.setOutputKeyClass(BytesWritable.class);
        dimSelectionJob.setOutputValueClass(Text.class);
        dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
        dimSelectionJob.setJarByClass(DeterminePartitionsJob.class);

        config.intoConfiguration(dimSelectionJob);
        FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());

        dimSelectionJob.submit();
        log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(),
                dimSelectionJob.getTrackingURL());

        if (!dimSelectionJob.waitForCompletion(true)) {
            log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
            return false;
        }

        /*
         * Load partitions determined by the previous job.
         */

        log.info("Job completed, loading up partitions for intervals[%s].",
                config.getSegmentGranularIntervals());
        FileSystem fileSystem = null;
        Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
            DateTime bucket = segmentGranularity.getStart();

            final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0));
            if (fileSystem == null) {
                fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
            }
            if (fileSystem.exists(partitionInfoPath)) {
                List<ShardSpec> specs = config.jsonMapper.readValue(
                        Utils.openInputStream(dimSelectionJob, partitionInfoPath),
                        new TypeReference<List<ShardSpec>>() {
                        });

                List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
                for (int i = 0; i < specs.size(); ++i) {
                    actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
                    log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
                }

                shardSpecs.put(bucket, actualSpecs);
            } else {
                log.info("Path[%s] didn't exist!?", partitionInfoPath);
            }
        }
        config.setShardSpecs(shardSpecs);

        return true;
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}

From source file:com.metamx.druid.indexer.IndexGeneratorJob.java

License:Open Source License

public boolean run() {
    try {/*from www  . j  ava  2s.  c  o  m*/
        Job job = new Job(new Configuration(),
                String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()));

        job.getConfiguration().set("io.sort.record.percent", "0.23");

        for (String propName : System.getProperties().stringPropertyNames()) {
            Configuration conf = job.getConfiguration();
            if (propName.startsWith("hadoop.")) {
                conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
            }
        }

        job.setInputFormatClass(TextInputFormat.class);

        job.setMapperClass(IndexGeneratorMapper.class);
        job.setMapOutputValueClass(Text.class);

        SortableBytes.useSortableBytesAsMapOutputKey(job);

        job.setNumReduceTasks(Iterables.size(config.getAllBuckets()));
        job.setPartitionerClass(IndexGeneratorPartitioner.class);

        job.setReducerClass(IndexGeneratorReducer.class);
        job.setOutputKeyClass(BytesWritable.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
        FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());

        config.addInputPaths(job);
        config.intoConfiguration(job);

        job.setJarByClass(IndexGeneratorJob.class);

        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());

        boolean success = job.waitForCompletion(true);

        Counter invalidRowCount = job.getCounters()
                .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
        jobStats.setInvalidRowCount(invalidRowCount.getValue());

        return success;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:com.moz.fiji.mapreduce.framework.JobHistoryFijiTable.java

License:Apache License

/**
 * Writes a job into the JobHistoryFijiTable.
 *
 * @param job The job to save./*  www  .ja v  a2  s.co m*/
 * @param startTime The time the job began, in milliseconds.
 * @param endTime The time the job ended, in milliseconds
 * @throws IOException If there is an error writing to the table.
 */
public void recordJob(final Job job, final long startTime, final long endTime) throws IOException {
    recordJob(job.getJobID().toString(), job.getJobName(), startTime, endTime, job.isSuccessful(),
            job.getConfiguration(), getCounters(job), Collections.<String, String>emptyMap());
}

From source file:com.panguso.lc.analysis.format.Logcenter.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    context = new ClassPathXmlApplicationContext("applicationContext.xml");
    Properties prop = context.getBean("configProperties", Properties.class);
    // ??//ww w . j a v  a2  s. c o  m
    // String time = new DateTime().toString("yyyyMMddHH");

    // hadoop.lib=/application/format/lib/
    // hadoop.conf=/application/format/conf/
    // hadoop.src=/log/src/
    // hadoop.dest=/log/dest/
    // hadoop.archive=/log/archive/
    libPath = prop.getProperty("hadoop.lib");
    confPath = prop.getProperty("hadoop.conf");
    srcPath = prop.getProperty("hadoop.src");
    destPath = prop.getProperty("hadoop.dest");
    archivePath = prop.getProperty("hadoop.archive");
    Configuration conf = getConf();
    logger.info("libPath=" + libPath);
    logger.info("confPath=" + confPath);
    logger.info("srcPath=" + srcPath);
    logger.info("destPath=" + destPath);
    logger.info("archivePath=" + archivePath);

    FileSystem fs = FileSystem.get(conf);
    // --jar
    FileStatus[] fJars = fs.listStatus(new Path(libPath));
    for (FileStatus fileStatus : fJars) {
        String jar = libPath + fileStatus.getPath().getName();
        DistributedCache.addFileToClassPath(new Path(jar), conf, FileSystem.get(conf));
    }
    // --?
    FileStatus[] fProp = fs.listStatus(new Path(confPath));
    for (FileStatus fileStatus : fProp) {
        DistributedCache.addArchiveToClassPath(new Path(confPath + fileStatus.getPath().getName()), conf,
                FileSystem.get(conf));
    }
    FileStatus[] fDirs = fs.listStatus(new Path(srcPath));
    if (fDirs != null && fDirs.length > 0) {
        for (FileStatus file : fDirs) {
            // dir
            String currentTime = file.getPath().getName();
            String srcPathWithTime = srcPath + currentTime + "/";
            String destPathWithTime = destPath + currentTime + "/";
            String archPathWithTime = archivePath + currentTime + "/";
            // ??
            if (analysisService.isSuccessful(currentTime)) {
                continue;
            }

            // ??job?

            // 
            fs.delete(new Path(destPathWithTime), true);

            // ?
            // if (!fs.exists(new Path(srcPathWithTime))) {
            // logger.warn("outPath does not exist,inputPath=" +
            // srcPathWithTime);
            // analysisService.saveFailureJob(job.getJobName(),
            // currentTime);
            // return -1;
            // }
            // ?classpath";"":"
            Job job = new Job(conf);
            String jars = job.getConfiguration().get("mapred.job.classpath.files");
            job.getConfiguration().set("mapred.job.classpath.files", jars.replace(";", ":"));
            logger.info("current dir=" + currentTime);
            job.setJobName("format_" + currentTime);

            job.setJarByClass(Logcenter.class);
            job.setMapperClass(FormatAnalysisMapper.class);
            job.setReducerClass(FormatAnalysisReducer.class);
            job.setCombinerClass(FormatAnalysisReducer.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            // job.setNumReduceTasks(0);
            // //??reduce????namenode
            FileInputFormat.addInputPath(job, new Path(srcPathWithTime));
            FileOutputFormat.setOutputPath(job, new Path(destPathWithTime));

            // ?
            boolean result = false;
            try {
                result = job.waitForCompletion(true);
            } catch (FileAlreadyExistsException e) {
                logger.warn(e.getMessage(), e);
            }
            if (!result) {
                logger.warn("job execute failure!");
                analysisService.saveFailureJob(job.getJobName(), currentTime);
                continue;
                // return -1;
            }

            // ,
            fs.delete(new Path(archPathWithTime), true);
            fs.rename(new Path(srcPathWithTime), new Path(archPathWithTime));
            analysisService.saveSuccessJob(job.getJobName(), currentTime);
        }
    }

    FileSystem.closeAll();
    return 0;
}

From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.MapReduceUtils.java

License:LGPL

/**
 * Wait the completion of a job./*from   ww  w  . j  a  va2  s  .c o m*/
 * @param job the job to submit
 * @param jobDescription the description of the job
 * @param waitTimeInMillis waiting time between 2 checks of the completion of
 *          jobs
 * @param status step status
 * @param counterGroup group of the counter to log
 * @throws EoulsanException if the job fail or if an exception occurs while
 *           submitting or waiting the end of the job
 */
public static void submitAndWaitForJob(final Job job, final String jobDescription, final int waitTimeInMillis,
        final TaskStatus status, final String counterGroup) throws EoulsanException {

    if (job == null) {
        throw new NullPointerException("The job is null");
    }

    if (jobDescription == null) {
        throw new NullPointerException("The jobDescription is null");
    }

    try {

        // Set the description of the context
        status.setDescription(job.getJobName());

        // Submit the job
        job.submit();

        // Add the Hadoop job to the list of job to kill if workflow fails
        HadoopJobEmergencyStopTask.addHadoopJobEmergencyStopTask(job);

        // Job the completion of the job (non verbose mode)
        job.waitForCompletion(false);

        // Remove the Hadoop job to the list of job to kill if workflow fails
        HadoopJobEmergencyStopTask.removeHadoopJobEmergencyStopTask(job);

        // Check if the job has been successfully executed
        if (!job.isSuccessful()) {

            status.setProgressMessage("FAILED");

            throw new EoulsanException("Fail of the Hadoop job: " + job.getJobFile());
        }

        // Set the counters
        status.setCounters(new HadoopReporter(job.getCounters()), counterGroup);

    } catch (ClassNotFoundException | InterruptedException | IOException e) {
        throw new EoulsanException(e);
    }
}

From source file:gov.nasa.jpl.memex.pooledtimeseries.MeanChiSquareDistanceCalculation.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.loadLibrary(Core.NATIVE_LIBRARY_NAME);

    Configuration baseConf = new Configuration();
    baseConf.set("mapreduce.job.maps", "96");
    baseConf.set("mapred.tasktracker.map.tasks.maximum", "96");

    JobConf conf = new JobConf();
    System.out.println("Before Map:" + conf.getNumMapTasks());
    conf.setNumMapTasks(96);/*from  ww w .  j a va  2s  .  c om*/
    System.out.println("After Map:" + conf.getNumMapTasks());

    Job job = Job.getInstance(baseConf);
    job.setJarByClass(MeanChiSquareDistanceCalculation.class);

    job.setJobName("mean_chi_square_calculation");
    System.out.println("Job ID" + job.getJobID());
    System.out.println("Track:" + baseConf.get("mapred.job.tracker"));
    System.out.println("Job Name" + job.getJobName());
    System.out.println(baseConf.get("mapreduce.job.maps"));
    System.out.println("Caching video-metric-bak.tgz");
    job.addCacheArchive(new URI("/user/pts/video-metric-bak.tgz"));
    URI[] cacheFiles = job.getCacheFiles();
    if (cacheFiles != null && cacheFiles.length > 0) {
        System.out.println("Cache file ->" + cacheFiles[0]);
    }
    System.out.println("Cached video-metric-bak.tgz");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.waitForCompletion(true);

}

From source file:gov.nasa.jpl.memex.pooledtimeseries.SimilarityCalculation.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.loadLibrary(Core.NATIVE_LIBRARY_NAME);

    Configuration baseConf = new Configuration();
    baseConf.set("mapreduce.job.maps", "96");
    baseConf.set("mapreduce.job.reduces", "0");
    baseConf.set("mapred.tasktracker.map.tasks.maximum", "96");
    baseConf.set("meanDistsFilePath", args[2]);

    JobConf conf = new JobConf();
    System.out.println("Before Map:" + conf.getNumMapTasks());
    conf.setNumMapTasks(196);//w  w  w  .j  a  v  a2  s .  c o  m
    System.out.println("After Map:" + conf.getNumMapTasks());

    Job job = Job.getInstance(baseConf);
    System.out.println("Track: " + baseConf.get("mapred.job.tracker"));
    System.out.println("Job ID" + job.getJobID());
    System.out.println("Job Name" + job.getJobName());
    System.out.println(baseConf.get("mapreduce.job.maps"));
    job.setJarByClass(SimilarityCalculation.class);

    job.setJobName("similarity_calc");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(Map.class);

    job.waitForCompletion(true);
}