Example usage for org.apache.hadoop.mapreduce Job getConfiguration

List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration() 

Source Link

Document

Return the configuration for the job.

Usage

From source file:bucket_sort.NLineInputFormat.java

License:Apache License

/**
 * Set the number of lines per split/*  ww w .  j av  a 2 s. co m*/
 * @param job the job to modify
 * @param numLines the number of lines per split
 */
public static void setNumLinesPerSplit(Job job, int numLines) {
    job.getConfiguration().setInt(LINES_PER_MAP, numLines);
}

From source file:bulkload.ImportTsv.java

License:Apache License

/**
 * Sets up the actual job./*from   w  ww .j  a v  a  2 s  .  c  o  m*/
 * 
 * @param conf
 *            The current configuration.
 * @param args
 *            The command line parameters.
 * @return The newly created job.
 * @throws IOException
 *             When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {

    Job job = null;
    try (Connection connection = ConnectionFactory.createConnection(conf)) {
        try (Admin admin = connection.getAdmin()) {
            // Support non-XML supported characters
            // by re-encoding the passed separator as a Base64 string.
            String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
            if (actualSeparator != null) {
                conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes()));
            }
            TableName tableName = TableName.valueOf(args[0]);
            if (!admin.tableExists(tableName)) {
                String errorMsg = format("Table '%s' does not exist.", tableName);
                LOG.error(errorMsg);
                throw new TableNotFoundException(errorMsg);
            }
            Path inputDir = new Path(args[1]);
            String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString());
            job = Job.getInstance(conf, jobName);
            job.setJarByClass(TsvImporter.class);
            FileInputFormat.setInputPaths(job, inputDir);
            job.setInputFormatClass(TextInputFormat.class);
            job.setMapperClass(TsvImporter.class);

            String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
            if (hfileOutPath != null) {
                try (HTable table = (HTable) connection.getTable(tableName)) {
                    Path outputDir = new Path(hfileOutPath);
                    FileSystem fs = FileSystem.get(conf);
                    if (fs.exists(outputDir)) {
                        if (!fs.delete(outputDir, true)) {
                            throw new IllegalStateException("delete path:" + outputDir + " failed");
                        }
                    }
                    FileOutputFormat.setOutputPath(job, outputDir);
                    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
                    job.setMapOutputValueClass(Put.class);
                    job.setReducerClass(PutSortReducer.class);
                    HFileOutputFormat2.configureIncrementalLoad(job, table, table);
                }
            } else {
                // No reducers. Just write straight to table. Call
                // initTableReducerJob
                // to set up the TableOutputFormat.
                TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
                job.setNumReduceTasks(0);

                //               TableMapReduceUtil.addDependencyJars(job);
                //               TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
                //                     com.google.common.base.Function.class /* Guava used by TsvParser */);
            }

            // Workaround to remove unnecessary hadoop dependencies
            String[] jars = job.getConfiguration().get("tmpjars").split(",", -1);
            StringBuilder filteredJars = new StringBuilder();
            for (String j : jars) {
                String[] parts = j.split("/", -1);
                String fileName = parts[parts.length - 1];
                if (fileName.indexOf("hadoop-") != 0) {
                    filteredJars.append(j);
                    filteredJars.append(",");
                }
            }
            job.getConfiguration().set("tmpjars", filteredJars.toString());
        }
    }

    return job;
}

From source file:byte_import.HexastoreBulkImport.java

License:Open Source License

public Job createSubmittableJob(String[] args) {
    TABLE_NAME = args[1];//from w  w  w . ja  v  a2s . c  o  m
    Job job = null;
    try {
        job = new Job(new Configuration(), NAME);
        job.setJarByClass(HexastoreBulkImport.class);
        job.setMapperClass(sampler.TotalOrderPrep.Map.class);
        job.setReducerClass(Reduce.class);
        job.setCombinerClass(Combiner.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setPartitionerClass(TotalOrderPartitioner.class);
        //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("/user/npapa/"+regions+"partitions/part-r-00000"));
        TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("partitions/part-r-00000"));
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(HFileOutputFormat.class);
        Path out = new Path("out");
        FileOutputFormat.setOutputPath(job, out);
        Configuration conf = new Configuration();
        FileSystem fs;
        try {
            fs = FileSystem.get(conf);
            if (fs.exists(out)) {
                fs.delete(out, true);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        HBaseAdmin hadmin = new HBaseAdmin(conf);
        HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_stats");
        HColumnDescriptor family = new HColumnDescriptor("size");
        desc.addFamily(family);
        conf.setInt("zookeeper.session.timeout", 600000);
        if (hadmin.tableExists(TABLE_NAME + "_stats")) {
            //hadmin.disableTable(TABLE_NAME+"_stats");
            //hadmin.deleteTable(TABLE_NAME+"_stats");
        } else {
            hadmin.createTable(desc);
        }

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        //job.getConfiguration().setInt("mapred.map.tasks", 18);
        job.getConfiguration().set("h2rdf.tableName", TABLE_NAME);
        job.getConfiguration().setInt("mapred.reduce.tasks", (int) TotalOrderPrep.regions);
        job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
        job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
        job.getConfiguration().setInt("io.sort.mb", 100);
        job.getConfiguration().setInt("io.file.buffer.size", 131072);
        job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
        job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);
        job.getConfiguration().setInt("mapred.tasktracker.map.tasks.maximum", 5);
        job.getConfiguration().setInt("mapred.tasktracker.reduce.tasks.maximum", 5);
        //job.getConfiguration().setInt("io.sort.mb", 100);

    } catch (IOException e2) {
        e2.printStackTrace();
    }

    return job;
}

From source file:ca.uwaterloo.iss4e.hadoop.io.CartesianInputFormat.java

License:Open Source License

public static void setLeftInputInfo(Job job, Class<? extends FileInputFormat> inputFormat, String inputPath) {
    Configuration conf = job.getConfiguration();
    conf.set(LEFT_INPUT_FORMAT, inputFormat.getCanonicalName());
    conf.set(LEFT_INPUT_PATH, inputPath);
}

From source file:ca.uwaterloo.iss4e.hadoop.io.CartesianInputFormat.java

License:Open Source License

public static void setRightInputInfo(Job job, Class<? extends FileInputFormat> inputFormat, String inputPath) {
    Configuration conf = job.getConfiguration();
    conf.set(RIGHT_INPUT_FORMAT, inputFormat.getCanonicalName());
    conf.set(RIGHT_INPUT_PATH, inputPath);
}

From source file:cascading.flow.hadoop.MapReduceFlowPlatformTest.java

License:Open Source License

@Test
public void testCascade() throws IOException {
    getPlatform().copyFromLocal(inputFileApache);

    // Setup two standard cascading flows that will generate the input for the first MapReduceFlow
    Tap source1 = new Hfs(new TextLine(new Fields("offset", "line")), remove(inputFileApache, false));
    String sinkPath4 = getOutputPath("flow4");
    Tap sink1 = new Hfs(new TextLine(new Fields("offset", "line")), remove(sinkPath4, true), SinkMode.REPLACE);
    Flow firstFlow = getPlatform().getFlowConnector(getProperties()).connect(source1, sink1,
            new Pipe("first-flow"));

    String sinkPath5 = getOutputPath("flow5");
    Tap sink2 = new Hfs(new TextLine(new Fields("offset", "line")), remove(sinkPath5, true), SinkMode.REPLACE);
    Flow secondFlow = getPlatform().getFlowConnector(getProperties()).connect(sink1, sink2,
            new Pipe("second-flow"));

    JobConf defaultConf = HadoopPlanner.createJobConf(getProperties());

    JobConf firstConf = new JobConf(defaultConf);
    firstConf.setJobName("first-mr");

    firstConf.setOutputKeyClass(LongWritable.class);
    firstConf.setOutputValueClass(Text.class);

    firstConf.setMapperClass(IdentityMapper.class);
    firstConf.setReducerClass(IdentityReducer.class);

    firstConf.setInputFormat(TextInputFormat.class);
    firstConf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(firstConf, new Path(remove(sinkPath5, true)));
    String sinkPath1 = getOutputPath("flow1");
    FileOutputFormat.setOutputPath(firstConf, new Path(remove(sinkPath1, true)));

    Flow firstMR = new MapReduceFlow(firstConf, true);

    JobConf secondConf = new JobConf(defaultConf);
    secondConf.setJobName("second-mr");

    secondConf.setOutputKeyClass(LongWritable.class);
    secondConf.setOutputValueClass(Text.class);

    secondConf.setMapperClass(IdentityMapper.class);
    secondConf.setReducerClass(IdentityReducer.class);

    secondConf.setInputFormat(TextInputFormat.class);
    secondConf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(secondConf, new Path(remove(sinkPath1, true)));
    String sinkPath2 = getOutputPath("flow2");
    FileOutputFormat.setOutputPath(secondConf, new Path(remove(sinkPath2, true)));

    Flow secondMR = new MapReduceFlow(secondConf, true);

    Job job = new Job(defaultConf);
    job.setJobName("third-mr");

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(org.apache.hadoop.mapreduce.Mapper.class);
    job.setReducerClass(org.apache.hadoop.mapreduce.Reducer.class);

    job.setInputFormatClass(org.apache.hadoop.mapreduce.lib.input.TextInputFormat.class);
    job.setOutputFormatClass(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class);
    job.getConfiguration().set("mapred.mapper.new-api", "true");
    job.getConfiguration().set("mapred.reducer.new-api", "true");

    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(remove(sinkPath2, true)));
    String sinkPath3 = getOutputPath("flow3");
    org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job,
            new Path(remove(sinkPath3, true)));

    Flow thirdMR = new MapReduceFlow(new JobConf(job.getConfiguration()), true);

    CascadeConnector cascadeConnector = new CascadeConnector();

    // pass out of order
    Cascade cascade = cascadeConnector.connect(firstFlow, secondFlow, thirdMR, firstMR, secondMR);

    cascade.complete();//from   w ww . ja va2s.  com

    validateLength(new Hfs(new TextLine(), sinkPath3).openForRead(new HadoopFlowProcess(defaultConf)), 10);
}

From source file:cassandra_mapreduce.MapReduceCassandraDB.java

License:GNU General Public License

public int run(String[] args) throws Exception {

    String columnName = "value";
    getConf().set(CONF_COLUMN_NAME, columnName);
    getConf().set("mapred.job.tracker", args[0] + ":8021");
    Job job = new Job(getConf(), "Phase1");
    job.setJarByClass(MapReduceCassandraDB.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setReducerClass(ReducerToCassandra.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(ByteBuffer.class);
    job.setOutputValueClass(List.class);

    job.setInputFormatClass(ColumnFamilyInputFormat.class);
    job.setOutputFormatClass(ColumnFamilyOutputFormat.class);
    ConfigHelper.setRangeBatchSize(job.getConfiguration(), 800);
    ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY);

    ConfigHelper.setRpcPort(job.getConfiguration(), "9160");
    ConfigHelper.setInitialAddress(job.getConfiguration(), args[0]);
    ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner");
    ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY);
    SlicePredicate predicate = new SlicePredicate()
            .setColumn_names(Arrays.asList(ByteBuffer.wrap(columnName.getBytes())));
    ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);

    job.waitForCompletion(true);// w w  w. j av a2s  . com

    //Phase 2
    Job job2 = new Job(getConf(), "Phase2");
    job2.setJarByClass(MapReduceCassandraDB.class);
    job2.setMapperClass(Mapper2.class);
    job2.setReducerClass(Reducer2.class);
    job2.setMapOutputKeyClass(Text.class);
    job2.setMapOutputValueClass(IntWritable.class);
    job2.setOutputKeyClass(ByteBuffer.class);
    job2.setOutputValueClass(List.class);

    job2.setInputFormatClass(ColumnFamilyInputFormat.class);
    job2.setOutputFormatClass(ColumnFamilyOutputFormat.class);
    ConfigHelper.setOutputColumnFamily(job2.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY2);

    ConfigHelper.setRpcPort(job2.getConfiguration(), "9160");
    ConfigHelper.setInitialAddress(job2.getConfiguration(), args[0]);
    ConfigHelper.setPartitioner(job2.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner");
    ConfigHelper.setInputColumnFamily(job2.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY);
    SlicePredicate predicate2 = new SlicePredicate()
            .setColumn_names(Arrays.asList(ByteBuffer.wrap(columnName.getBytes())));
    ConfigHelper.setInputSlicePredicate(job2.getConfiguration(), predicate2);

    job2.waitForCompletion(true);

    //        job.setCombinerClass(IntSumReducer.class);
    //        job.setReducerClass(IntSumReducer.class);
    //        job.setOutputKeyClass(Text.class);
    //        job.setOutputValueClass(Text.class);
    //
    //        job.setInputFormatClass(ColumnFamilyInputFormat.class);
    //        FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX));
    //        
    //        ConfigHelper.setRpcPort(job.getConfiguration(), "9160");
    //        ConfigHelper.setInitialAddress(job.getConfiguration(), args[0]);
    //        ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner");
    //        ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY);
    //        SlicePredicate predicate = new SlicePredicate().setColumn_names(Arrays.asList(ByteBuffer.wrap(columnName.getBytes())));
    //        ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate);
    //
    //        job.waitForCompletion(true);

    return 0;
}

From source file:cc.slda.AnnotateDocuments.java

License:Apache License

/**
 * Runs this tool./*from   w ww .j  a  va 2s.c  o  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));
    options.addOption(OptionBuilder.withArgName(PCUTOFF).hasArg()
            .withDescription("probability of topic assignment").create(PCUTOFF));
    options.addOption(OptionBuilder.withArgName(INDEX).hasArg()
            .withDescription("path to data directory containing term and title indices").create(INDEX));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(INDEX)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    float cutoff = 0.9f;
    if (cmdline.hasOption(PCUTOFF)) {
        cutoff = Float.parseFloat(cmdline.getOptionValue(PCUTOFF));
    }
    LOG.info("Tool: " + AnnotateDocuments.class.getSimpleName());
    LOG.info(" - indices path: " + indexPath);
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    LOG.info(" - log(probCutoff): " + Math.log(cutoff));

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    Job job = Job.getInstance(conf);
    job.setJobName(AnnotateDocuments.class.getSimpleName());
    job.setJarByClass(AnnotateDocuments.class);

    String termIndex = indexPath + Path.SEPARATOR + TERM;
    String titleIndex = indexPath + Path.SEPARATOR + TITLE;

    Path termIndexPath = new Path(termIndex);
    Path titleIndexPath = new Path(titleIndex);

    Preconditions.checkArgument(fs.exists(termIndexPath), "Missing term index files... " + termIndexPath);
    DistributedCache.addCacheFile(termIndexPath.toUri(), job.getConfiguration());
    Preconditions.checkArgument(fs.exists(titleIndexPath), "Missing title index files... " + titleIndexPath);
    DistributedCache.addCacheFile(titleIndexPath.toUri(), job.getConfiguration());

    job.setNumReduceTasks(reduceTasks);
    conf.setFloat(PCUTOFF, cutoff);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(HMapSIW.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(HMapSIW.class);

    job.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:cgl.hadoop.apps.runner.DataAnalysis.java

License:Open Source License

/**
 * Launch the MapReduce computation.//from www. ja v  a  2s . com
 * This method first, remove any previous working directories and create a new one
 * Then the data (file names) is copied to this new directory and launch the 
 * MapReduce (map-only though) computation.
 * @param numMapTasks - Number of map tasks.
 * @param numReduceTasks - Number of reduce tasks =0.
 * @param programDir - The directory where the Cap3 program is.
 * @param execName - Name of the executable.
 * @param dataDir - Directory where the data is located.
 * @param outputDir - Output directory to place the output.
 * @param cmdArgs - These are the command line arguments to the Cap3 program.
 * @throws Exception - Throws any exception occurs in this program.
 */
void launch(int numReduceTasks, String programDir, String execName, String workingDir, String databaseArchive,
        String databaseName, String dataDir, String outputDir, String cmdArgs) throws Exception {

    Configuration conf = new Configuration();
    Job job = new Job(conf, execName);

    // First get the file system handler, delete any previous files, add the
    // files and write the data to it, then pass its name as a parameter to
    // job
    Path hdMainDir = new Path(outputDir);
    FileSystem fs = FileSystem.get(conf);
    fs.delete(hdMainDir, true);

    Path hdOutDir = new Path(hdMainDir, "out");

    // Starting the data analysis.
    Configuration jc = job.getConfiguration();

    jc.set(WORKING_DIR, workingDir);
    jc.set(EXECUTABLE, execName);
    jc.set(PROGRAM_DIR, programDir); // this the name of the executable archive
    jc.set(DB_ARCHIVE, databaseArchive);
    jc.set(DB_NAME, databaseName);
    jc.set(PARAMETERS, cmdArgs);
    jc.set(OUTPUT_DIR, outputDir);

    // using distributed cache
    // flush it
    //DistributedCache.releaseCache(new URI(programDir), jc);
    //DistributedCache.releaseCache(new URI(databaseArchive), jc);
    //DistributedCache.purgeCache(jc);
    // reput the data into cache
    long startTime = System.currentTimeMillis();
    //DistributedCache.addCacheArchive(new URI(databaseArchive), jc);
    DistributedCache.addCacheArchive(new URI(programDir), jc);
    System.out.println(
            "Add Distributed Cache in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    FileInputFormat.setInputPaths(job, dataDir);
    FileOutputFormat.setOutputPath(job, hdOutDir);

    job.setJarByClass(DataAnalysis.class);
    job.setMapperClass(RunnerMap.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(DataFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numReduceTasks);

    startTime = System.currentTimeMillis();

    int exitStatus = job.waitForCompletion(true) ? 0 : 1;
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    //clean the cache

    System.exit(exitStatus);
}

From source file:cityhub.CityHub.java

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "ReduceJoin");
    job.setJarByClass(CityHub.class);

    MultipleInputs.addInputPath(job, new Path(strings[0]), TextInputFormat.class, JoinMapper1.class);
    MultipleInputs.addInputPath(job, new Path(strings[1]), TextInputFormat.class, JoinMapper2.class);
    job.getConfiguration().set("join.type", "innerjoin");

    job.setReducerClass(JoinReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(strings[2]));

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    boolean complete = job.waitForCompletion(true);
    Configuration conf1 = new Configuration();
    Job job2 = Job.getInstance(conf1, "chaining");
    if (complete) {
        job2.setJarByClass(CityHub.class);

        MultipleInputs.addInputPath(job2, new Path(strings[2]), TextInputFormat.class, JoinMapper3.class);
        MultipleInputs.addInputPath(job2, new Path(strings[3]), TextInputFormat.class, JoinMapper4.class);
        job2.getConfiguration().set("join.type", "innerjoin");

        job2.setReducerClass(JoinReducer1.class);
        job2.setOutputFormatClass(TextOutputFormat.class);

        job2.setOutputKeyClass(Text.class);
        job2.setOutputValueClass(Text.class);
        TextOutputFormat.setOutputPath(job2, new Path(strings[4]));
    }//from  ww  w  .j a v a2s  .co m
    boolean success = job2.waitForCompletion(true);
    return success ? 0 : 4;

}