Example usage for org.apache.hadoop.mapreduce Job setInputFormatClass

List of usage examples for org.apache.hadoop.mapreduce Job setInputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setInputFormatClass.

Prototype

public void setInputFormatClass(Class<? extends InputFormat> cls) throws IllegalStateException 

Source Link

Document

Set the InputFormat for the job.

Usage

From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java

License:Apache License

private void create() throws InterruptedException, IOException {
    Job job = newJob();
    List<StageInput> inputList = new ArrayList<>();
    inputList.add(new StageInput(storage.getPatchContents("*").toString(), TemporaryInputFormat.class,
            CreateCacheMapper.class));
    StageInputDriver.set(job, inputList);
    job.setInputFormatClass(StageInputFormat.class);
    job.setMapperClass(StageInputMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(modelClass);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(modelClass);

    TemporaryOutputFormat.setOutputPath(job, getNextDirectory());
    job.setOutputFormatClass(TemporaryOutputFormat.class);
    job.getConfiguration().setClass("mapred.output.committer.class", LegacyBridgeOutputCommitter.class,
            org.apache.hadoop.mapred.OutputCommitter.class);

    job.setNumReduceTasks(0);/*from   w w w .  j  a  va 2 s .c om*/

    LOG.info(MessageFormat.format("applying patch (no join): {0} / (empty) -> {2}",
            storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
    try {
        boolean succeed = job.waitForCompletion(true);
        LOG.info(MessageFormat.format("applied patch (no join): succeed={0}, {1} / (empty) -> {3}", succeed,
                storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
        if (succeed == false) {
            throw new IOException(MessageFormat.format("failed to apply patch (no join): {0} / (empty) -> {2}",
                    storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents()));
        }
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    }
    putMeta();
}

From source file:com.asp.tranlog.ImportTsv.java

License:Apache License

/**
 * Sets up the actual job./*w w  w.  ja  va  2  s .  co  m*/
 * 
 * @param conf
 *            The current configuration.
 * @param args
 *            The command line parameters.
 * @return The newly created job.
 * @throws IOException
 *             When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
        throws IOException, ClassNotFoundException {

    // Support non-XML supported characters
    // by re-encoding the passed separator as a Base64 string.
    String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
    if (actualSeparator != null) {
        conf.set(SEPARATOR_CONF_KEY, new String(Base64.encodeBytes(actualSeparator.getBytes())));
    }

    // See if a non-default Mapper was set
    String mapperClassName = conf.get(MAPPER_CONF_KEY);
    Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER;

    String tableName = args[0];
    Path inputDir = new Path(args[1]);
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(mapperClass);
    FileInputFormat.setInputPaths(job, inputDir);

    String inputCodec = conf.get(INPUT_LZO_KEY);
    if (inputCodec == null) {
        FileInputFormat.setMaxInputSplitSize(job, 67108864l); // max split
        // size =
        // 64m
        job.setInputFormatClass(TextInputFormat.class);
    } else {
        if (inputCodec.equalsIgnoreCase("lzo"))
            job.setInputFormatClass(LzoTextInputFormat.class);
        else {
            usage("not supported compression codec!");
            System.exit(-1);
        }
    }

    job.setMapperClass(mapperClass);

    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
    if (hfileOutPath != null) {
        HTable table = new HTable(conf, tableName);
        job.setReducerClass(PutSortReducer.class);
        Path outputDir = new Path(hfileOutPath);
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);
        HFileOutputFormat.configureIncrementalLoad(job, table);
    } else {
        // No reducers. Just write straight to table. Call
        // initTableReducerJob
        // to set up the TableOutputFormat.
        TableMapReduceUtil.initTableReducerJob(tableName, null, job);
        job.setNumReduceTasks(0);
    }

    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /*
                                                                                                       * Guava used by TsvParser
                                                                                                       */);
    return job;
}

From source file:com.avira.couchdoop.demo.ImportDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 1) {
        System.err.println("Usage: <output_path>");
        return 1;
    }/*w  w w . j  a  v  a  2s  .  c  om*/
    String output = args[0];

    Job job = Job.getInstance(getConf());
    job.setJarByClass(this.getClass());

    // User classpath takes precedence in favor of Hadoop classpath.
    // This is because the Couchbase client requires a newer version of
    // org.apache.httpcomponents:httpcore.
    job.setUserClassesTakesPrecedence(true);

    // Input
    job.setInputFormatClass(CouchbaseViewInputFormat.class);

    // Mapper
    job.setMapperClass(ImportMapper.class);

    // Reducer
    job.setNumReduceTasks(0);

    // Output
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileOutputFormat.setOutputPath(job, new Path(output));

    if (!job.waitForCompletion(true)) {
        return 2;
    }

    return 0;
}

From source file:com.avira.couchdoop.imp.CouchbaseViewInputFormat.java

License:Apache License

public static void initJob(Job job, String urls, String bucket, String password, String designDocumentName,
        String viewName, String viewKeys) {
    job.setInputFormatClass(CouchbaseViewInputFormat.class);

    Configuration conf = job.getConfiguration();
    conf.set(CouchbaseArgs.ARG_COUCHBASE_URLS.getPropertyName(), urls);
    conf.set(CouchbaseArgs.ARG_COUCHBASE_BUCKET.getPropertyName(), bucket);
    conf.set(CouchbaseArgs.ARG_COUCHBASE_PASSWORD.getPropertyName(), password);
    conf.set(ImportViewArgs.ARG_DESIGNDOC_NAME.getPropertyName(), designDocumentName);
    conf.set(ImportViewArgs.ARG_VIEW_NAME.getPropertyName(), viewName);
    conf.set(ImportViewArgs.ARG_VIEW_KEYS.getPropertyName(), viewKeys);
}

From source file:com.avira.couchdoop.jobs.CouchbaseViewImporter.java

License:Apache License

public Job configureJob(Configuration conf, String output) throws IOException {
    conf.setInt("mapreduce.map.failures.maxpercent", 5);
    conf.setInt("mapred.max.map.failures.percent", 5);
    conf.setInt("mapred.max.tracker.failures", 20);

    Job job = Job.getInstance(conf);
    job.setJarByClass(CouchbaseViewImporter.class);

    // Input//from ww  w  . jav  a2  s .c  o  m
    job.setInputFormatClass(CouchbaseViewInputFormat.class);

    // Mapper
    job.setMapperClass(CouchbaseViewToFileMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    // Reducer
    job.setNumReduceTasks(0);

    // Output
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    FileOutputFormat.setOutputPath(job, new Path(output));

    return job;
}

From source file:com.avira.couchdoop.jobs.CouchbaseViewToHBaseImporter.java

License:Apache License

public Job configureJob(Configuration conf, String outputTable) throws IOException {
    conf.setInt("mapreduce.map.failures.maxpercent", 5);
    conf.setInt("mapred.max.map.failures.percent", 5);
    conf.setInt("mapred.max.tracker.failures", 20);

    Job job = Job.getInstance(conf);
    job.setJarByClass(CouchbaseViewToHBaseImporter.class);

    // Input/*w ww.j  ava 2 s.  co m*/
    job.setInputFormatClass(CouchbaseViewInputFormat.class);

    // Mapper
    job.setMapperClass(CouchbaseViewToHBaseMapper.class);

    // Reducer
    job.setNumReduceTasks(0);

    // Output
    TableMapReduceUtil.initTableReducerJob(outputTable, IdentityTableReducer.class, job);

    return job;
}

From source file:com.awcoleman.BouncyCastleGenericCDRHadoop.BasicDriverMapReduce.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 2) {
        System.out.println("Missing input and output filenames. Exiting.");
        System.exit(1);//from  w  w  w .j  a  v  a  2 s . com
    }

    Job job = new Job(super.getConf());
    job.setJarByClass(BasicDriverMapReduce.class);
    job.setJobName("BasicDriver1");
    job.setMapperClass(BasicMapper.class);
    job.setReducerClass(BasicReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setInputFormatClass(RawFileAsBinaryInputFormat.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.awcoleman.BouncyCastleGenericCDRHadoopWithWritable.BasicDriverMapReduce.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 2) {
        System.out.println("Missing input and output filenames. Exiting.");
        System.exit(1);/*from  w w w .  j a  v a 2 s  .  c o m*/
    }

    @SuppressWarnings("deprecation")
    Job job = new Job(super.getConf());
    job.setJarByClass(BasicDriverMapReduce.class);
    job.setJobName("BasicDriverMapReduce");
    job.setMapperClass(BasicMapper.class);
    job.setReducerClass(BasicReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(CallDetailRecord.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setInputFormatClass(RawFileAsBinaryInputFormat.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.awcoleman.ExampleJobSummaryLogWithOutput.BinRecToAvroRecDriver.java

License:Apache License

public int run(String[] args) throws Exception {

    String input = null;/*  w w  w  . j  a  v  a2s . co m*/
    String output = null;

    if (args.length < 2) {
        System.err.printf("Usage: %s <input> <output>\n", this.getClass().getSimpleName());
        return -1;
    } else {
        input = args[0];
        output = args[1];
    }

    Job job = Job.getInstance(getConf(), "BinRecToAvroRecDriver");
    Configuration conf = job.getConfiguration();

    //Add job log to hold Driver logging (and any summary info about the dataset,job, or counters we want to write)
    String fapath = createTempFileAppender(job);

    //get schema
    Schema outSchema = ReflectData.get().getSchema(com.awcoleman.examples.avro.BinRecForPartitions.class);
    job.getConfiguration().set("outSchema", outSchema.toString());

    //Job conf settings
    job.setJarByClass(BinRecToAvroRecDriver.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setInputFormatClass(BinRecInputFormat.class);
    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    AvroJob.setOutputKeySchema(job, outSchema);

    AvroJob.setMapOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setMapOutputValueSchema(job, outSchema);

    //Job output compression
    FileOutputFormat.setCompressOutput(job, true);
    job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.DEFLATE_CODEC);

    //Input and Output Paths
    FileInputFormat.setInputPaths(job, new Path(input));
    Path outPath = new Path(output);
    FileOutputFormat.setOutputPath(job, outPath);
    outPath.getFileSystem(conf).delete(outPath, true);

    boolean jobCompletionStatus = job.waitForCompletion(true);

    //Print Custom Counters before exiting
    Counters counters = job.getCounters();
    for (MYJOB_CNTRS customCounter : MYJOB_CNTRS.values()) {
        Counter thisCounter = counters.findCounter(customCounter);
        System.out.println("Custom Counter " + customCounter + "=" + thisCounter.getValue());
    }

    long mycnt1 = job.getCounters()
            .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS",
                    "MYCNT1")
            .getValue();
    long mycnt2 = job.getCounters()
            .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS",
                    "MYCNT2")
            .getValue();
    long mycnt3 = job.getCounters()
            .findCounter("com.awcoleman.TestingGettingContainerLogger.BinRecToAvroRecDriver$MYJOB_CNTRS",
                    "MYCNT3")
            .getValue();

    long myfakekpi = mycnt1 - mycnt2;

    String msgMyfakekpi = "The Fake KPI of the Dataset: " + String.format("%,d", myfakekpi);
    System.out.println(msgMyfakekpi);
    logger.info(msgMyfakekpi);

    //Finished, so move job log to HDFS in _log dir, clean
    copyTempFileAppenderToHDFSOutpath(job, fapath, output);

    return jobCompletionStatus ? 0 : 1;
}

From source file:com.bah.applefox.main.plugins.fulltextindex.FTLoader.java

License:Apache License

/**
 * run takes the comandline args as arguments (in this case from a
 * configuration file), creates a new job, configures it, initiates it,
 * waits for completion, and returns 0 if it is successful (1 if it is not)
 * /*from   w  w  w  . j  av  a 2 s.co  m*/
 * @param args
 *            the commandline arguments (in this case from a configuration
 *            file)
 * 
 * @return 0 if the job ran successfully and 1 if it isn't
 */
public int run(String[] args) throws Exception {
    try {
        // Initialize variables
        FTLoader.articleFile = args[8];
        FTLoader.maxNGrams = Integer.parseInt(args[9]);
        FTLoader.stopWords = getStopWords();
        FTLoader.dTable = args[10];
        FTLoader.urlCheckedTable = args[11];
        FTLoader.divsFile = args[20];
        FTLoader.exDivs = getExDivs();

        // Give the job a name
        String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis();

        // Create job and set the jar
        Job job = new Job(getConf(), jobName);
        job.setJarByClass(this.getClass());

        String urlTable = args[5];

        job.setInputFormatClass(AccumuloInputFormat.class);
        InputFormatBase.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
        InputFormatBase.setInputInfo(job.getConfiguration(), args[2], args[3].getBytes(), urlTable,
                new Authorizations());

        job.setMapperClass(MapperClass.class);
        job.setMapOutputKeyClass(Key.class);
        job.setMapOutputValueClass(Value.class);

        job.setReducerClass(ReducerClass.class);
        job.setNumReduceTasks(Integer.parseInt(args[4]));

        job.setOutputFormatClass(AccumuloOutputFormat.class);
        job.setOutputKeyClass(Key.class);
        job.setOutputValueClass(Value.class);

        AccumuloOutputFormat.setZooKeeperInstance(job.getConfiguration(), args[0], args[1]);
        AccumuloOutputFormat.setOutputInfo(job.getConfiguration(), args[2], args[3].getBytes(), true, urlTable);

        job.waitForCompletion(true);

        return job.isSuccessful() ? 0 : 1;
    } catch (IOException e) {
        if (e.getMessage() != null) {
            log.error(e.getMessage());
        } else {
            log.error(e.getStackTrace());
        }
    } catch (InterruptedException e) {
        if (e.getMessage() != null) {
            log.error(e.getMessage());
        } else {
            log.error(e.getStackTrace());
        }
    } catch (ClassNotFoundException e) {
        if (e.getMessage() != null) {
            log.error(e.getMessage());
        } else {
            log.error(e.getStackTrace());
        }
    }
    return 1;
}