Example usage for org.apache.hadoop.mapreduce Job setInputFormatClass

List of usage examples for org.apache.hadoop.mapreduce Job setInputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setInputFormatClass.

Prototype

public void setInputFormatClass(Class<? extends InputFormat> cls) throws IllegalStateException 

Source Link

Document

Set the InputFormat for the job.

Usage

From source file:com.pivotal.gfxd.demo.mapreduce.LoadAverage.java

License:Open Source License

/**
 * This method is assuming fs.default.name as args[0]
 *
 * @param args/*from   w  w w.ja  v  a  2s. co m*/
 * @return
 * @throws Exception
 */
@Override
public int run(String[] args) throws Exception {
    System.out.println("Starting MapReduce Job");
    GfxdDataSerializable.initTypes();
    Configuration conf = new Configuration();
    //Configuration conf = getConf();

    Path outputPath = new Path("/output");
    String hdfsHomeDir = "/sensorStore"; //args[1];
    String tableName = "RAW_SENSOR";
    String outTableName = "LOAD_AVERAGES_SHADOW";
    String gfxdURL = conf.get("gemfirexd.url", "jdbc:gemfirexd://localhost:1527");

    // conf.set("fs.default.name", args[0]);
    String hdfsUrl = conf.get("fs.defaultFS");

    FileSystem hdfs = FileSystem.get(new URI(hdfsUrl), conf);

    // Retrieve last run timestamp
    long now = System.currentTimeMillis();
    long lastStart = getLastStart(hdfs);

    outputPath.getFileSystem(conf).delete(outputPath, true);

    conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir);
    conf.set(RowInputFormat.INPUT_TABLE, tableName);
    conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
    conf.setLong(RowInputFormat.START_TIME_MILLIS, lastStart);
    conf.setLong(RowInputFormat.END_TIME_MILLIS, now);

    conf.set(RowOutputFormat.OUTPUT_URL, gfxdURL);
    conf.set(RowOutputFormat.OUTPUT_TABLE, outTableName);

    // print config to troubleshoot possible issues
    // Configuration.dumpConfiguration(conf, new PrintWriter(System.out));

    Job job = Job.getInstance(conf, "LoadAverage");

    job.setNumReduceTasks(1);

    job.setInputFormatClass(RowInputFormat.class);

    // configure mapper and reducer
    job.setJarByClass(LoadAverage.class);
    job.setMapperClass(LoadAverageMapper.class);
    job.setReducerClass(LoadAverageReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LoadKey.class);

    TextOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(RowOutputFormat.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(LoadAverageModel.class);

    boolean jobSuccess = job.waitForCompletion(true);
    if (jobSuccess) {
        writeLastStart(hdfs, now);
    }

    return jobSuccess ? 0 : 1;
}

From source file:com.pivotal.hawq.mapreduce.demo.HAWQInputFormatDemo.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length < 3) {
        printUsage();//from   w  w  w.j av  a  2 s. co m
    }

    String dbURL = args[0];
    String tableName = args[1];
    String outputPath = args[2];
    String username = (args.length >= 4) ? args[3] : null;
    String password = (args.length >= 5) ? args[4] : null;

    Job job = new Job(getConf(), "HAWQInputFormatDemo");
    job.setJarByClass(HAWQInputFormatDemo.class);

    job.setInputFormatClass(HAWQInputFormat.class);

    HAWQInputFormat.setInput(job.getConfiguration(), dbURL, username, password, tableName);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapperClass(HAWQEchoMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    long startTime = System.currentTimeMillis();
    int returnCode = job.waitForCompletion(true) ? 0 : 1;
    long endTime = System.currentTimeMillis();

    System.out.println("Time elapsed: " + (endTime - startTime) + " milliseconds");

    return returnCode;
}

From source file:com.pivotal.hawq.mapreduce.demo.HAWQInputFormatDemo2.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();//  ww w.  j  a  v  a 2 s .c  o m
    }

    String metadataFile = args[0];
    String outputPath = args[1];

    Job job = new Job(getConf(), "HAWQInputFormatDemo2");
    job.setJarByClass(HAWQInputFormatDemo2.class);

    job.setInputFormatClass(HAWQInputFormat.class);

    HAWQInputFormat.setInput(job.getConfiguration(), metadataFile);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapperClass(HAWQEchoMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    long startTime = System.currentTimeMillis();
    int returnCode = job.waitForCompletion(true) ? 0 : 1;
    long endTime = System.currentTimeMillis();

    System.out.println("Time elapsed: " + (endTime - startTime) + " milliseconds");

    return returnCode;
}

From source file:com.pivotal.hawq.mapreduce.MapReduceClusterDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3 && args.length != 4) {
        System.err.printf("Usage: %s [generic options] <tableName> <dburl> <output> [<mapper_classname>]\n",
                getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }/*from w  w w  . j a va2 s.  c  om*/

    String tableName = args[0];
    String dbUrl = args[1];
    Path outputPath = new Path(args[2]);
    Class<? extends Mapper> mapperClass = (args.length == 3) ? HAWQTableMapper.class
            : (Class<? extends Mapper>) Class.forName(args[3]);

    // delete previous output
    FileSystem fs = FileSystem.get(getConf());
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    fs.close();

    Job job = new Job(getConf(), "job_read_" + tableName);
    job.setJarByClass(MapReduceClusterDriver.class);

    job.setInputFormatClass(HAWQInputFormat.class);
    HAWQInputFormat.setInput(job.getConfiguration(), dbUrl, null, null, tableName);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setMapperClass(mapperClass);
    job.setReducerClass(HAWQTableReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.pivotal.hawq.mapreduce.MapReduceLocalDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2 && args.length != 3) {
        System.err.printf("Usage: %s [generic options] <metadata_file> <output> [<mapper_classname>]\n",
                getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from  w  w w. j a va2  s .co  m

    String metadataFile = args[0];
    Path outputPath = new Path(args[1]);
    Class<? extends Mapper> mapperClass = (args.length == 2) ? HAWQTableMapper.class
            : (Class<? extends Mapper>) Class.forName(args[2]);

    // delete previous output
    FileSystem fs = FileSystem.getLocal(getConf());
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    fs.close();

    Job job = new Job(getConf());
    job.setJarByClass(MapReduceLocalDriver.class);

    job.setInputFormatClass(HAWQInputFormat.class);
    HAWQInputFormat.setInput(job.getConfiguration(), metadataFile);
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setMapperClass(mapperClass);
    job.setReducerClass(HAWQTableReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.pivotal.hawq.mapreduce.pt.HAWQInputFormatPerformanceTest_TPCH.java

License:Apache License

private int runMapReduceJob() throws Exception {
    Path outputPath = new Path("/output");
    // delete previous output
    FileSystem fs = FileSystem.get(getConf());
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);//  www.  ja  v a 2  s  .  c o  m
    fs.close();

    Job job = new Job(getConf());
    job.setJarByClass(HAWQInputFormatPerformanceTest_TPCH.class);

    job.setInputFormatClass(HAWQInputFormat.class);

    long startTime = System.currentTimeMillis();
    HAWQInputFormat.setInput(job.getConfiguration(), MRFormatConfiguration.TEST_DB_URL, null, null, tableName);
    metadataExtractTime = System.currentTimeMillis() - startTime;

    FileOutputFormat.setOutputPath(job, outputPath);

    job.setMapperClass(TPCHTableMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Void.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob1(Path input1, Path input2, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);
    conf.set("mapred.output.compression.type", "BLOCK");

    Job job = new Job(conf, "data-piqid.piqconnect.IterationStage1");
    job.setJarByClass(Runner.class);

    job.setMapperClass(IterationStage1._Mapper.class);
    job.setReducerClass(IterationStage1._Reducer.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setMapOutputKeyClass(IterationStage1.JoinKey.class);
    job.setMapOutputValueClass(BlockWritable.class);
    job.setOutputKeyClass(VLongWritable.class);
    job.setOutputValueClass(BlockWritable.class);
    job.setGroupingComparatorClass(IterationStage1.IndexComparator.class);
    job.setPartitionerClass(IterationStage1.IndexPartitioner.class);
    job.setSortComparatorClass(IterationStage1.SortComparator.class);

    FileInputFormat.setInputPaths(job, input1, input2);
    SequenceFileOutputFormat.setOutputPath(job, output);
    SequenceFileOutputFormat.setCompressOutput(job, true);

    setCompression(job);// www.ja  v a  2 s  .  c  om

    return job;
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob2(Path input, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);

    Job job = new Job(conf, "data-piqid.piqconnect.IterationStage2");
    job.setJarByClass(Runner.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(IterationStage2._Reducer.class);
    job.setNumReduceTasks(numberOfReducers);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapOutputKeyClass(VLongWritable.class);
    job.setMapOutputValueClass(BlockWritable.class);
    job.setOutputKeyClass(BlockIndexWritable.class);
    job.setOutputValueClass(BlockWritable.class);
    job.setSortComparatorClass(VLongWritableComparator.class);

    SequenceFileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);
    FileOutputFormat.setCompressOutput(job, true);

    setCompression(job);//from   ww w .  ja v a2  s .c o  m
    return job;
}

From source file:com.placeiq.piqconnect.Runner.java

License:Apache License

private Job buildJob3(Path input, Path output) throws Exception {
    Configuration conf = getConf();
    conf.setInt(Constants.PROP_BLOCK_SIZE, blockSize);

    Job job = new Job(conf, "data-piqid.piqconnect.FinalResultBuilder");
    job.setJarByClass(Runner.class);

    job.setMapperClass(FinalResultBuilder._Mapper.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setNumReduceTasks(0);/*from   w w  w  .  j a  v  a 2  s . c o  m*/
    job.setOutputKeyClass(VLongWritable.class);
    job.setOutputValueClass(VLongWritable.class);

    FileInputFormat.setInputPaths(job, input);
    FileOutputFormat.setOutputPath(job, output);
    FileOutputFormat.setCompressOutput(job, true);

    setCompression(job);
    return job;
}

From source file:com.rim.logdriver.util.Cat.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//ww  w  . j  a v a2  s  . c o  m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 2) {
        System.out.println("usage: [genericOptions] input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    for (int i = 0; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Cat.class);
    jobConf.setIfUnset("mapred.job.name", "Cat Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(CatMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}