Example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass.

Prototype

public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException 

Source Link

Document

Set the OutputFormat for the job.

Usage

From source file:com.synerzip.analytics.commoncrawl.googleads.counter.GoogleAdsCounterJob.java

License:Apache License

/**
 * Configures and submits the Map Reduce Job to Hadoop
 *//*from  w w  w .  jav a2  s .c o  m*/
public int run(String[] args) throws Exception {

    String inputPath = null;
    String outputPath = null;
    boolean overwrite = false;
    String s3AccessKey = null;
    String s3SecretKey = null;

    // Read the command line arguments. We're not using GenericOptionsParser
    // to prevent having to include commons.cli as a dependency.
    for (int index = 0; index < args.length; index++) {
        try {

            if (ARGNAME_INPATH.equals(args[index])) {
                inputPath = args[++index];
            } else if (ARGNAME_OUTPATH.equals(args[index])) {
                outputPath = args[++index];
            } else if (ARGNAME_S3ACCESSKEY.equals(args[index])) {
                s3AccessKey = args[++index];
            } else if (ARGNAME_S3SECRETKEY.equals(args[index])) {
                s3SecretKey = args[++index];
            } else if (ARGNAME_MAXFILES.equals(args[index])) {
                // FIXME - No use of static methods
                WarcFileFilter.setMax(Long.parseLong(args[++index]));
            } else if (ARGNAME_OVERWRITE.equals(args[index])) {
                overwrite = true;
            } else {
                LOG.warn("Unsupported argument: " + args[index]);
            }
        } catch (ArrayIndexOutOfBoundsException e) {
            usage();
            throw new IllegalArgumentException();
        }
    }

    if (inputPath == null || outputPath == null) {
        usage();
        throw new IllegalArgumentException();
    }

    if (inputPath.contains("s3n") && (s3AccessKey == null || s3SecretKey == null)) {
        usage();
        LOG.info("Please specify Access Key and Secret Key to access data on AWS S3 storage ");
        throw new IllegalArgumentException();
    }

    // Create the Hadoop job.
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setJarByClass(GoogleAdsCounterJob.class);
    if (inputPath.contains("s3n") && (s3AccessKey != null && s3SecretKey != null)) {
        conf.set("AWS_ACCESS_KEY_ID", s3AccessKey);
        conf.set("AWS_SECRET_ACCESS_KEY", s3SecretKey);
    }
    // Scan the provided input path for WARC files.
    LOG.info("setting input path to '" + inputPath + "'");

    WarcFileFilter.setFilter(FILEFILTER);
    FileInputFormat.addInputPath(job, new Path(inputPath));

    // FIXME - I see the problem that you want to give a dynamic number to a
    // static class. My question is, Is this really required, if we just
    // point to a file in s3 that should solve our problem
    FileInputFormat.setInputPathFilter(job, WarcFileFilter.class);

    // Delete the output path directory if it already exists and user wants
    // to overwrite it.
    if (overwrite) {
        LOG.info("clearing the output path at '" + outputPath + "'");
        FileSystem fs = FileSystem.get(new URI(outputPath), conf);
        if (fs.exists(new Path(outputPath))) {
            fs.delete(new Path(outputPath), true);
        }
    }

    // Set the path where final output 'part' files will be saved.
    LOG.info("setting output path to '" + outputPath + "'");
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    /*
     * // Defines additional single text based output 'GoogleAdClient' for
     * the job MultipleOutputs.addNamedOutput(job, "GoogleAdClient",
     * TextOutputFormat.class, Text.class,LongWritable.class );
     * 
     * // Defines additional text based output 'GoogleAdType' for the job
     * MultipleOutputs.addNamedOutput(job,
     * "GoogleAdType",TextOutputFormat.class, Text.class,
     * LongWritable.class);
     */
    // Set which InputFormat class to use.
    job.setInputFormatClass(WARCInputFormat.class);

    // Set which OutputFormat class to use.
    job.setOutputFormatClass(TextOutputFormat.class);

    /*
     * Using MultipleOutputs creates zero-sized default output e.g.: *
     * part-r-00000. To prevent this use LazyOutputFormat instead of
     * job.setOutputFormatClass(TextOutputFormat.class) in Hadoop job
     * configuration.
     */
    // LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    //   job.setPartitionerClass(GoogleAdsCounterPartitioner.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    //job.setNumReduceTasks(4);
    // Set the output data types.
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // Set which Mapper and Reducer classes to use.
    job.setMapperClass(GoogleAdsCounterMapper.class);
    // job.setMapperClass(CrawlMapper_AdStatsDetails.class);
    job.setReducerClass(GoogleAdsCounterReducer.class);

    // set combiner
    //job.setCombinerClass(GoogleAdsCounterReducer.class);

    // set job name
    job.setJobName("CommonCrawl Data Processing : Counting Google Ads");

    long startTime = System.currentTimeMillis();
    if (job.waitForCompletion(true)) {

        LOG.info("Job completion status : " + job.waitForCompletion(true));
        long endTime = System.currentTimeMillis();

        long difference = endTime - startTime;
        LOG.info("Elapsed milliseconds: " + difference);
        Counter totalResponsePagesCounter = job.getCounters().findCounter(TestCounters.TOTALRESPONSEPAGES);
        LOG.info("totalResponsePagesCounter = " + totalResponsePagesCounter.getValue());

        Counter totalGoogleAdPagesCounter = job.getCounters().findCounter(TestCounters.TOTALGOOGLEADSPAGES);
        LOG.info("totalGoogleAdPagesCounter = " + totalGoogleAdPagesCounter.getValue());

        return 0;
    } else {
        return 1;
    }
}

From source file:com.talis.hadoop.rdf.collation.QuadsCollater.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Configuration configuration = getConf();

    boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION,
            Constants.OPTION_USE_COMPRESSION_DEFAULT);
    if (useCompression) {
        configuration.setBoolean("mapred.compress.map.output", true);
        configuration.set("mapred.output.compression.type", "BLOCK");
        configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
    }/*from www  .jav a 2s . co m*/

    boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT,
            Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT);
    FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        fs.delete(new Path(args[1]), true);
    }

    Job job = new Job(configuration);
    job.setJobName(JOB_NAME);
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    FileOutputFormat.setCompressOutput(job, true);

    job.setInputFormatClass(NQuadsInputFormat.class);
    job.setMapperClass(CollationMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(QuadWritable.class);

    job.setReducerClass(CollationReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(QuadArrayWritable.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    if (LOG.isDebugEnabled())
        Utils.log(job, LOG);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.talis.hadoop.rdf.merge.IndexMerge.java

License:Apache License

public int run(String[] args) throws Exception {

    Configuration configuration = getConf();

    boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION,
            Constants.OPTION_USE_COMPRESSION_DEFAULT);
    if (useCompression) {
        configuration.setBoolean("mapred.compress.map.output", true);
        configuration.set("mapred.output.compression.type", "BLOCK");
        configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
    }/*  w ww .jav a2 s. c om*/

    boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT,
            Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT);
    FileSystem fs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        fs.delete(new Path(args[1]), true);
    }

    Job job = new Job(configuration);
    job.setJobName(JOB_NAME);
    job.setJarByClass(getClass());

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);
    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(IndexMergeReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setNumReduceTasks(1);

    if (LOG.isDebugEnabled())
        Utils.log(job, LOG);

    return job.waitForCompletion(true) ? 0 : -1;
}

From source file:com.talis.hadoop.rdf.solr.QuadsIndexer.java

License:Apache License

public int run(String[] args) throws Exception {

    Configuration configuration = getConf();

    boolean useCompression = configuration.getBoolean(Constants.OPTION_USE_COMPRESSION,
            Constants.OPTION_USE_COMPRESSION_DEFAULT);
    if (useCompression) {
        configuration.setBoolean("mapred.compress.map.output", true);
        configuration.set("mapred.output.compression.type", "BLOCK");
        configuration.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
    }/*from  w ww.  j  a va 2s . c om*/

    boolean overrideOutput = configuration.getBoolean(Constants.OPTION_OVERRIDE_OUTPUT,
            Constants.OPTION_OVERRIDE_OUTPUT_DEFAULT);
    FileSystem outputFs = FileSystem.get(new Path(args[1]).toUri(), configuration);
    if (overrideOutput) {
        outputFs.delete(new Path(args[1]), true);
    }

    Job job = new Job(configuration);
    job.setJobName(JOB_NAME);
    job.setJarByClass(getClass());

    int shards = -1;
    boolean compressOutput = false;

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);
    Path solrConfig = new Path(args[2]);
    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);

    if (shards > 0) {
        job.setNumReduceTasks(shards);
    }

    job.setMapperClass(Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(QuadArrayWritable.class);

    job.setReducerClass(SolrReducer.class);
    SolrDocumentConverter.setSolrDocumentConverter(LiteralsIndexer.class, job.getConfiguration());

    job.setOutputFormatClass(SolrOutputFormat.class);

    String zipName = "solr.zip";
    FileSystem solrConfigFs = FileSystem.get(solrConfig.toUri(), configuration);
    final URI baseZipUrl = solrConfigFs.getUri().resolve(solrConfig.toString() + '#' + zipName);
    DistributedCache.addCacheArchive(baseZipUrl, job.getConfiguration());
    job.getConfiguration().set(SolrOutputFormat.SETUP_OK, solrConfig.toString());
    SolrOutputFormat.setOutputZipFormat(compressOutput, job.getConfiguration());

    if (LOG.isDebugEnabled())
        Utils.log(job, LOG);

    return job.waitForCompletion(true) ? 0 : -1;
}

From source file:com.telefonica.iot.tidoop.apiext.utils.CKANMapReduceExample.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // check the number of arguments, show the usage if it is wrong
    if (args.length != 7) {
        showUsage();//from   ww w .  jav  a2  s.  com
        return -1;
    } // if

    // get the arguments
    String ckanHost = args[0];
    String ckanPort = args[1];
    boolean sslEnabled = args[2].equals("true");
    String ckanAPIKey = args[3];
    String ckanInputs = args[4];
    String ckanOutput = args[5];
    String splitsLength = args[6];

    // create and configure a MapReduce job
    Configuration conf = this.getConf();
    Job job = Job.getInstance(conf, "CKAN MapReduce test");
    job.setJarByClass(CKANMapReduceExample.class);
    job.setMapperClass(RecordSizeGetter.class);
    job.setCombinerClass(RecordSizeAdder.class);
    job.setReducerClass(RecordSizeAdder.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(CKANInputFormat.class);
    CKANInputFormat.setInput(job, ckanInputs);
    CKANInputFormat.setEnvironment(job, ckanHost, ckanPort, sslEnabled, ckanAPIKey);
    CKANInputFormat.setSplitsLength(job, splitsLength);
    job.setOutputFormatClass(CKANOutputFormat.class);
    CKANOutputFormat.setEnvironment(job, ckanHost, ckanPort, sslEnabled, ckanAPIKey);
    CKANOutputFormat.setOutputPkg(job, ckanOutput);

    // run the MapReduce job
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.teradata.benchto.generator.HiveTypesGenerator.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(/*  w  w w  .java 2s. co m*/
            Option.builder("format").required().hasArg().desc("file format (orc, parquet or text)").build());
    options.addOption(Option.builder("type").required().hasArg().desc(
            "hive type to be generated (bigint, int, boolean, double, binary, date, timestamp, string, decimal or varchar)")
            .build());
    options.addOption(Option.builder("rows").required().hasArg().desc("total row count").build());
    options.addOption(Option.builder("mappers").required().hasArg().desc("total mappers count").build());
    options.addOption(Option.builder("path").hasArg()
            .desc("base path for generating files, default is: /benchmarks/benchto/types").build());
    options.addOption(Option.builder("regex").numberOfArgs(3)
            .desc("generate varchars from regex pattern, arguments are: pattern, min length, max length")
            .build());

    CommandLine line;
    String format;
    String hiveType;
    long numberOfRows;
    long numberOfFiles;
    String basePath;
    Optional<String> regexPattern = Optional.absent();
    Optional<Integer> regexMinLength = Optional.absent();
    Optional<Integer> regexMaxLength = Optional.absent();
    try {
        line = new DefaultParser().parse(options, args);
        format = line.getOptionValue("format");
        hiveType = line.getOptionValue("type");
        numberOfRows = parseLong(line.getOptionValue("rows"));
        numberOfFiles = parseLong(line.getOptionValue("mappers"));
        basePath = line.getOptionValue("path", "/benchmarks/benchto/types");
        if (line.hasOption("regex")) {
            String[] values = line.getOptionValues("regex");
            regexPattern = Optional.of(values[0]);
            regexMinLength = Optional.of(parseInt(values[1]));
            regexMaxLength = Optional.of(parseInt(values[2]));
        }
    } catch (Exception e) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("benchto-generator", options);
        throw e;
    }

    String jobName = format("GenerateData-%s-%s-%d", format, hiveType, numberOfRows);
    Path outputDir = new Path(format("%s/%s-%s/%d", basePath, format, hiveType, numberOfRows));
    Class<? extends OutputFormat> outputFormatClass = getOutputFormatClass(format);

    LOG.info("Generating " + numberOfRows + " " + hiveType + "s, directory: " + outputDir
            + ", number of files: " + numberOfFiles);

    Configuration configuration = new Configuration();
    configuration.set(FORMAT_PROPERTY_NAME, format);
    configuration.set(HIVE_TYPE_PROPERTY_NAME, hiveType);
    configuration.setLong(NUM_ROWS_PROPERTY_NAME, numberOfRows);
    configuration.setLong(NUM_MAPS, numberOfFiles);
    if (regexPattern.isPresent()) {
        configuration.set(REGEX_PATTERN, regexPattern.get());
        configuration.setInt(REGEX_MIN_LENGTH, regexMinLength.get());
        configuration.setInt(REGEX_MAX_LENGTH, regexMaxLength.get());
    }

    Job generatorJob = Job.getInstance(configuration, jobName);
    FileOutputFormat.setOutputPath(generatorJob, outputDir);
    ParquetOutputFormat.setWriteSupportClass(generatorJob, DataWritableWriteSupport.class);
    generatorJob.setJarByClass(HiveTypesGenerator.class);
    generatorJob.setMapperClass(HiveTypesMapper.class);
    generatorJob.setNumReduceTasks(0);
    generatorJob.setOutputKeyClass(NullWritable.class);
    generatorJob.setOutputValueClass(Writable.class);
    generatorJob.setInputFormatClass(CounterInputFormat.class);
    generatorJob.setOutputFormatClass(outputFormatClass);

    return generatorJob.waitForCompletion(true) ? 0 : 1;
}

From source file:com.teradata.compaction.mapreduce.MergeParquetFilesMR.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "MergeParquet");

    if (args.length != 2) {
        System.err.println("Usage: java -jar MergeParquetFilesMR path_to_input_folder path_to_output_folder ");
        System.exit(0);/*ww  w.  j  a v  a2  s.  c  o m*/
    }

    final Path inputPath = new Path(args[0]);
    final Path out = new Path(args[1]);

    Schema schemaParquetFile = getBaseSchema(inputPath, conf);
    job.setJarByClass(MergeParquetFilesMR.class);
    job.setMapperClass(SampleParquetMapper.class);
    job.setReducerClass(SampleParquetReducer.class);
    job.setInputFormatClass(AvroParquetInputFormat.class);
    job.setOutputFormatClass(AvroParquetOutputFormat.class);
    job.setMapOutputKeyClass(NullWritable.class);

    AvroJob.setMapOutputValueSchema(job, schemaParquetFile);
    AvroParquetOutputFormat.setSchema(job, schemaParquetFile);
    FileInputFormat.addInputPath(job, inputPath);
    AvroParquetOutputFormat.setOutputPath(job, out);
    job.setNumReduceTasks(1);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.tfm.utad.reducerdata.ReducerDataPig.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss");
    Date date = new Date();

    Path inputPath = new Path("/home/jab/camus/reducer-data-pig");
    Path outputDir = new Path("/home/jab/camus/pigdata/" + sdf.format(date));

    // Create configuration
    Configuration conf = new Configuration(true);
    conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN);
    FileSystem fs = FileSystem.get(conf);
    Path filesPath = new Path(inputPath + "/*");
    FileStatus[] files = fs.globStatus(filesPath);

    // Create job
    Job job = new Job(conf, "ReducerDataPig");
    job.setJarByClass(ReducerDataPig.class);

    // Setup MapReduce
    job.setMapperClass(ReducerDataPigMapper.class);
    job.setReducerClass(ReducerDataPigReducer.class);
    job.setNumReduceTasks(1);//from  w w w . ja  v  a2  s .  c o m

    // Specify key / value
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(ReducerPigKey.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Counters counters = job.getCounters();
        Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA);
        LOG.info("Counter malformed data: " + malformedCounter.getValue());
        for (FileStatus fStatus : files) {
            LOG.info("File name:" + fStatus.getPath());
            if (fStatus.isFile()) {
                LOG.info("Removing file in path:" + fStatus.getPath());
                fs.delete(fStatus.getPath(), false);
            }
        }
    }
}

From source file:com.tfm.utad.reducerdata.ReducerDataVertica.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss");
    Date date = new Date();

    Path inputPath = new Path("/home/jab/camus/reducer-data-vertica");
    Path outputDir = new Path("/home/jab/camus/verticadb/" + sdf.format(date));

    // Create configuration
    Configuration conf = new Configuration(true);
    conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN);
    FileSystem fs = FileSystem.get(conf);
    Path filesPath = new Path(inputPath + "/*");
    FileStatus[] files = fs.globStatus(filesPath);

    // Create job
    Job job = new Job(conf, "ReducerDataVertica");
    job.setJarByClass(ReducerDataVertica.class);

    // Setup MapReduce
    job.setMapperClass(ReducerDataVerticaMapper.class);
    job.setReducerClass(ReducerDataVerticaReducer.class);
    job.setNumReduceTasks(1);//from   w  ww . j ava  2  s .  c  om

    // Specify key / value
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(ReducerVerticaValue.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Counters counters = job.getCounters();
        Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA);
        LOG.info("Counter malformed data: " + malformedCounter.getValue());
        for (FileStatus fStatus : files) {
            LOG.info("File name:" + fStatus.getPath());
            if (fStatus.isFile()) {
                LOG.info("Removing file in path:" + fStatus.getPath());
                fs.delete(fStatus.getPath(), false);
            }
        }
    }
}

From source file:com.toddbodnar.simpleHadoop.distributedHadoopDriver.java

/**
 * Runs a job// w w w .java  2  s. com
 *
 * @param theJob the MapReduceJob to be run
 * @param verbose if true, output progress information
 */
public static void run(MapReduceJob theJob, boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = GetConfiguration.get();
    Job job = Job.getInstance(conf, theJob.toString());
    job.setJarByClass(distributedHadoopDriver.class);

    job.setMapperClass(theJob.getMapper().getClass());
    job.setReducerClass(theJob.getReducer().getClass());

    job.setMapOutputKeyClass(theJob.getKeyType());
    job.setMapOutputValueClass(theJob.getValueType());

    theJob.writeConfig(job.getConfiguration());

    hdfsFile input = hdfsFile.transferToHDFS(theJob.getInput().getFile());
    if (!input.equals(theJob.getInput().getFile())) {
        garbage_collector.noteCreated(input);
    }
    if (theJob.getClass().equals(join.class)) {
        join jobLeftJoin = (join) theJob;

        hdfsFile input2 = hdfsFile.transferToHDFS(jobLeftJoin.getOtherInput().getFile());
        if (!input2.equals(jobLeftJoin.getOtherInput().getFile())) {
            garbage_collector.noteCreated(input2);
        }

        Mapper maps[] = jobLeftJoin.getMapperPairs();
        MultipleInputs.addInputPath(job, input.getPath(), TextInputFormat.class, maps[0].getClass());
        MultipleInputs.addInputPath(job, input2.getPath(), TextInputFormat.class, maps[1].getClass());
    } else {
        MultipleInputs.addInputPath(job, input.getPath(), TextInputFormat.class);
    }

    job.getConfiguration().set(TextOutputFormat.SEPERATOR, "");

    job.setOutputFormatClass(TextOutputFormat.class);

    //FileInputFormat.setInputPaths(job, new Path(theJob.getInput().getFile().getLocation()));
    Path out = new Path(settings.hdfs_prefix + "/TMP_TABLE_" + theJob.hashCode());
    FileOutputFormat.setOutputPath(job, out);

    boolean success = job.waitForCompletion(true);

    if (!success) {
        System.err.println("Error processing " + theJob);
        return;
    }

    FileSystem fs = FileSystem.get(GetConfiguration.get());

    fs.delete(new Path(out, "_SUCCESS"), false);

    table output = new table(new hdfsFile(out), theJob.getOutput().getColNames());
    output.setSeperator(theJob.getOutput().getSeperator());

    theJob.setOutput(output);

    garbage_collector.noteCreated(output.getFile());
}