Example usage for org.apache.hadoop.mapreduce Job setInputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setInputFormatClass.

Prototype

public void setInputFormatClass(Class<? extends InputFormat> cls) throws IllegalStateException

Source Link

Document

Set the InputFormat for the job.

Usage

From source file:com.pivotal.gfxd.demo.mapreduce.LoadAverage.java

License:Open Source License

/**
 * This method is assuming fs.default.name as args[0]
 *
 * @param args/*from   w  w w.ja  v  a  2s. co m*/
 * @return
 * @throws Exception
 */
@Override
public int run(String[] args) throws Exception {
    System.out.println("Starting MapReduce Job");
    GfxdDataSerializable.initTypes();
    Configuration conf = new Configuration();
    //Configuration conf = getConf();

    Path outputPath = new Path("/output");
    String hdfsHomeDir = "/sensorStore"; //args[1];
    String tableName = "RAW_SENSOR";
    String outTableName = "LOAD_AVERAGES_SHADOW";
    String gfxdURL = conf.get("gemfirexd.url", "jdbc:gemfirexd://localhost:1527");

    // conf.set("fs.default.name", args[0]);
    String hdfsUrl = conf.get("fs.defaultFS");

    FileSystem hdfs = FileSystem.get(new URI(hdfsUrl), conf);

    // Retrieve last run timestamp
    long now = System.currentTimeMillis();
    long lastStart = getLastStart(hdfs);

    outputPath.getFileSystem(conf).delete(outputPath, true);

    conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir);
    conf.set(RowInputFormat.INPUT_TABLE, tableName);
    conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
    conf.setLong(RowInputFormat.START_TIME_MILLIS, lastStart);
    conf.setLong(RowInputFormat.END_TIME_MILLIS, now);

    conf.set(RowOutputFormat.OUTPUT_URL, gfxdURL);
    conf.set(RowOutputFormat.OUTPUT_TABLE, outTableName);

    // print config to troubleshoot possible issues
    // Configuration.dumpConfiguration(conf, new PrintWriter(System.out));

    Job job = Job.getInstance(conf, "LoadAverage");

    job.setNumReduceTasks(1);

    job.setInputFormatClass(RowInputFormat.class);

    // configure mapper and reducer
    job.setJarByClass(LoadAverage.class);
    job.setMapperClass(LoadAverageMapper.class);
    job.setReducerClass(LoadAverageReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LoadKey.class);

    TextOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(RowOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(LoadAverageModel.class);

    boolean jobSuccess = job.waitForCompletion(true);
    if (jobSuccess) {
        writeLastStart(hdfs, now);
    }

    return jobSuccess ? 0 : 1;
}

From source file:com.pivotal.hawq.mapreduce.demo.HAWQInputFormatDemo.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        printUsage();//from   w  w  w.j av  a  2 s. co m
    }

    String dbURL = args[0];
    String tableName = args[1];
    String outputPath = args[2];
    String username = (args.length >= 4) ? args[3] : null;
    String password = (args.length >= 5) ? args[4] : null;

    Job job = new Job(getConf(), "HAWQInputFormatDemo");
    job.setJarByClass(HAWQInputFormatDemo.class);

    job.setInputFormatClass(HAWQInputFormat.class);

    HAWQInputFormat.setInput(job.getConfiguration(), dbURL, username, password, tableName);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapperClass(HAWQEchoMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    long startTime = System.currentTimeMillis();
    int returnCode = job.waitForCompletion(true) ? 0 : 1;
    long endTime = System.currentTimeMillis();

    System.out.println("Time elapsed: " + (endTime - startTime) + " milliseconds");

    return returnCode;
}

From source file:com.pivotal.hawq.mapreduce.demo.HAWQInputFormatDemo2.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();//  ww w.  j  a  v  a 2 s .c  o m
    }

    String metadataFile = args[0];
    String outputPath = args[1];

    Job job = new Job(getConf(), "HAWQInputFormatDemo2");
    job.setJarByClass(HAWQInputFormatDemo2.class);

    job.setInputFormatClass(HAWQInputFormat.class);

    HAWQInputFormat.setInput(job.getConfiguration(), metadataFile);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapperClass(HAWQEchoMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    long startTime = System.currentTimeMillis();
    int returnCode = job.waitForCompletion(true) ? 0 : 1;
    long endTime = System.currentTimeMillis();

    System.out.println("Time elapsed: " + (endTime - startTime) + " milliseconds");

    return returnCode;
}

From source file:com.pivotal.hawq.mapreduce.MapReduceClusterDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3 && args.length != 4) {
        System.err.printf("Usage: %s [generic options] <tableName> <dburl> <output> [<mapper_classname>]\n",
                getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }/*from w  w w  . j a va2 s.  c  om*/

    String tableName = args[0];
    String dbUrl = args[1];
    Path outputPath = new Path(args[2]);
    Class<? extends Mapper> mapperClass = (args.length == 3) ? HAWQTableMapper.class
            : (Class<? extends Mapper>) Class.forName(args[3]);

    // delete previous output
    FileSystem fs = FileSystem.get(getConf());
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    fs.close();

    Job job = new Job(getConf(), "job_read_" + tableName);
    job.setJarByClass(MapReduceClusterDriver.class);

    job.setInputFormatClass(HAWQInputFormat.class);
    HAWQInputFormat.setInput(job.getConfiguration(), dbUrl, null, null, tableName);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setMapperClass(mapperClass);
    job.setReducerClass(HAWQTableReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.pivotal.hawq.mapreduce.MapReduceLocalDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2 && args.length != 3) {
        System.err.printf("Usage: %s [generic options] <metadata_file> <output> [<mapper_classname>]\n",
                getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from  w  w w. j a va2  s .co  m

    String metadataFile = args[0];
    Path outputPath = new Path(args[1]);
    Class<? extends Mapper> mapperClass = (args.length == 2) ? HAWQTableMapper.class
            : (Class<? extends Mapper>) Class.forName(args[2]);

    // delete previous output
    FileSystem fs = FileSystem.getLocal(getConf());
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    fs.close();

    Job job = new Job(getConf());
    job.setJarByClass(MapReduceLocalDriver.class);

    job.setInputFormatClass(HAWQInputFormat.class);
    HAWQInputFormat.setInput(job.getConfiguration(), metadataFile);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setMapperClass(mapperClass);
    job.setReducerClass(HAWQTableReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.pivotal.hawq.mapreduce.pt.HAWQInputFormatPerformanceTest_TPCH.java

License:Apache License

private int runMapReduceJob() throws Exception {
    Path outputPath = new Path("/output");
    // delete previous output
    FileSystem fs = FileSystem.get(getConf());
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);//  www.  ja  v a 2  s  .  c o  m
    fs.close();

    Job job = new Job(getConf());
    job.setJarByClass(HAWQInputFormatPerformanceTest_TPCH.class);

    job.setInputFormatClass(HAWQInputFormat.class);

    long startTime = System.currentTimeMillis();
    HAWQInputFormat.setInput(job.getConfiguration(), MRFormatConfiguration.TEST_DB_URL, null, null, tableName);
    metadataExtractTime = System.currentTimeMillis() - startTime;

    FileOutputFormat.setOutputPath(job, outputPath);

    job.setMapperClass(TPCHTableMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Void.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob1(Path input1, Path input2, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);
    conf.set("mapred.output.compression.type", "BLOCK");

    Job job = new Job(conf, "data-piqid.piqconnect.IterationStage1");
    job.setJarByClass(Runner.class);

    job.setMapperClass(IterationStage1._Mapper.class);
    job.setReducerClass(IterationStage1._Reducer.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setMapOutputKeyClass(IterationStage1.JoinKey.class);
    job.setMapOutputValueClass(BlockWritable.class);
    job.setOutputKeyClass(VLongWritable.class);
    job.setOutputValueClass(BlockWritable.class);
    job.setGroupingComparatorClass(IterationStage1.IndexComparator.class);
    job.setPartitionerClass(IterationStage1.IndexPartitioner.class);
    job.setSortComparatorClass(IterationStage1.SortComparator.class);

    FileInputFormat.setInputPaths(job, input1, input2);
    SequenceFileOutputFormat.setOutputPath(job, output);
    SequenceFileOutputFormat.setCompressOutput(job, true);

    setCompression(job);// www.ja  v a  2 s  .  c  om

    return job;
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob2(Path input, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);

    Job job = new Job(conf, "data-piqid.piqconnect.IterationStage2");
    job.setJarByClass(Runner.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(IterationStage2._Reducer.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapOutputKeyClass(VLongWritable.class);
    job.setMapOutputValueClass(BlockWritable.class);
    job.setOutputKeyClass(BlockIndexWritable.class);
    job.setOutputValueClass(BlockWritable.class);
    job.setSortComparatorClass(VLongWritableComparator.class);

    SequenceFileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);
    FileOutputFormat.setCompressOutput(job, true);

    setCompression(job);//from   ww w .  ja v a2  s .c o  m
    return job;
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob3(Path input, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);

    Job job = new Job(conf, "data-piqid.piqconnect.FinalResultBuilder");
    job.setJarByClass(Runner.class);

    job.setMapperClass(FinalResultBuilder._Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setNumReduceTasks(0);/*from   w w  w  .  j a  v  a 2  s . c o  m*/
    job.setOutputKeyClass(VLongWritable.class);
    job.setOutputValueClass(VLongWritable.class);

    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);
    FileOutputFormat.setCompressOutput(job, true);

    setCompression(job);
    return job;
}

From source file:com.rim.logdriver.util.Cat.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//ww  w  . j  a v a2  s  . c o  m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 2) {
        System.out.println("usage: [genericOptions] input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    for (int i = 0; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Cat.class);
    jobConf.setIfUnset("mapred.job.name", "Cat Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(CatMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}