Example usage for org.apache.hadoop.mapreduce.lib.db DBInputFormat setInput

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.db DBInputFormat setInput.

Prototype

public static void setInput(Job job, Class<? extends DBWritable> inputClass, String tableName,
        String conditions, String orderBy, String... fieldNames)

Source Link

Document

Initializes the map-part of the job with the appropriate input settings.

Usage

From source file:cn.itcast.hadoop.mr.wordcount.DBCountPageView.java

License:Apache License

@Override
//Usage DBCountPageView [driverClass dburl]
public int run(String[] args) throws Exception {

    //?MySql//from w w w.j  av  a  2  s. c  om
    String driverClassName = DRIVER_CLASS;
    String url = DB_URL; //??

    //????
    if (args.length > 1) {
        driverClassName = args[0];
        url = args[1];
    }

    //driverClassNameurl??
    initialize(driverClassName, url);

    //hdfs?
    Configuration conf = getConf();

    //??
    DBConfiguration.configureDB(conf, driverClassName, url); //???

    //job
    Job job = Job.getInstance(conf);

    //job??
    job.setJobName("Count Pageviews of URLs");

    //job
    job.setJarByClass(DBCountPageView.class);

    //Map
    job.setMapperClass(PageviewMapper.class);

    //Combiner
    job.setCombinerClass(LongSumReducer.class);

    //reduce
    job.setReducerClass(PageviewReducer.class);

    //DB?
    //   setInput(Job job, Class<? extends DBWritable> inputClass, String tableName, String conditions, String orderBy, String... fieldNames)
    DBInputFormat.setInput(job, AccessRecord.class, "HAccess", null, "url", AccessFieldNames); //?

    //FileOutputFormat.setoutput ?
    DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);//

    //Mapkey?
    job.setMapOutputKeyClass(Text.class);

    //MapValue?
    job.setMapOutputValueClass(LongWritable.class);

    //Reducekey?
    job.setOutputKeyClass(PageviewRecord.class);

    //Reducevalue?
    job.setOutputValueClass(NullWritable.class);

    int ret;//job?????
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;

        boolean correct = verify();
        if (!correct) {
            throw new RuntimeException("Evaluation was not correct!");
        }
    } finally {
        shutdown();
    }
    return ret;
}

From source file:com.phantom.hadoop.examples.DBCountPageView.java

License:Apache License

@Override
// Usage DBCountPageView [driverClass dburl]
public int run(String[] args) throws Exception {

    String driverClassName = DRIVER_CLASS;
    String url = DB_URL;/*from  ww w  .  ja v a2  s  .  c  o  m*/

    if (args.length > 1) {
        driverClassName = args[0];
        url = args[1];
    }

    initialize(driverClassName, url);
    Configuration conf = getConf();

    DBConfiguration.configureDB(conf, driverClassName, url);

    Job job = new Job(conf);

    job.setJobName("Count Pageviews of URLs");
    job.setJarByClass(DBCountPageView.class);
    job.setMapperClass(PageviewMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(PageviewReducer.class);

    DBInputFormat.setInput(job, AccessRecord.class, "Access", null, "url", AccessFieldNames);

    DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(PageviewRecord.class);
    job.setOutputValueClass(NullWritable.class);
    int ret;
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
        boolean correct = verify();
        if (!correct) {
            throw new RuntimeException("Evaluation was not correct!");
        }
    } finally {
        shutdown();
    }
    return ret;
}

From source file:hadoop.examples.DBCountPageView.java

License:Apache License

public int run(String[] args) throws Exception {

    String driverClassName = DRIVER_CLASS;
    String url = DB_URL;/*from w w w . j  a  va  2  s .c  o m*/

    if (args.length > 1) {
        driverClassName = args[0];
        url = args[1];
    }

    initialize(driverClassName, url);
    Configuration conf = getConf();

    DBConfiguration.configureDB(conf, driverClassName, url);

    Job job = new Job(conf);

    job.setJobName("Count Pageviews of URLs");
    job.setJarByClass(DBCountPageView.class);
    job.setMapperClass(PageviewMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(PageviewReducer.class);

    DBInputFormat.setInput(job, AccessRecord.class, "Access", null, "url", AccessFieldNames);

    DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(PageviewRecord.class);
    job.setOutputValueClass(NullWritable.class);
    int ret;
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
        boolean correct = verify();
        if (!correct) {
            throw new RuntimeException("Evaluation was not correct!");
        }
    } finally {
        shutdown();
    }
    return ret;
}

From source file:nl.sanoma.hdt.report.generator.ReportGeneratorDriver.java

License:Open Source License

/**
 * Job to import the values from DB to a MapFile in HDFS
 *
 * @param dBOutputPath the path where the MapFile output will be created
 * @return returns the exitCode of the job
 * @throws ConfigurationException// w  w w  . j av  a  2s.  c  o  m
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public Boolean importDBToMapFile(String dBOutputPath)
        throws ConfigurationException, IOException, InterruptedException, ClassNotFoundException {

    PropertiesConfiguration dBConfig = new PropertiesConfiguration(DB_CONFIG_FILE_NAME);
    //TODO: error handling if config file is missing

    //create MapFile from DB job
    Job job = new Job(getConf());
    Configuration conf = job.getConfiguration();
    job.setJobName("DB to MapFile import");
    job.setJarByClass(ReportGeneratorDriver.class);
    job.setMapperClass(DBToMapFileMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(DBInputFormat.class);
    job.setOutputFormatClass(MapFileOutputFormat.class);
    MapFileOutputFormat.setOutputPath(job, new Path(dBOutputPath));
    //TODO: add compression to MapFile
    job.setNumReduceTasks(0);
    DBConfiguration.configureDB(conf, dBConfig.getString(DB_CONFIG_DRIVER), dBConfig.getString(DB_CONFIG_URL),
            dBConfig.getString(DB_CONFIG_USER), dBConfig.getString(DB_CONFIG_PASSWORD));
    String[] fields = { "id", "name", "category", "price" };
    DBInputFormat.setInput(job, DBInputWritable.class, dBConfig.getString(DB_CONFIG_TABLE_NAME), null, "id",
            fields);

    return job.waitForCompletion(true);
}

From source file:org.apache.hadoop.examples.DBCountPageView.java

License:Apache License

@Override
//Usage DBCountPageView [driverClass dburl]
public int run(String[] args) throws Exception {

    String driverClassName = DRIVER_CLASS;
    String url = DB_URL;//from www  .j a  va 2  s.c om

    if (args.length > 1) {
        driverClassName = args[0];
        url = args[1];
    }

    initialize(driverClassName, url);
    Configuration conf = getConf();

    DBConfiguration.configureDB(conf, driverClassName, url);

    Job job = Job.getInstance(conf);

    job.setJobName("Count Pageviews of URLs");
    job.setJarByClass(DBCountPageView.class);
    job.setMapperClass(PageviewMapper.class);
    job.setCombinerClass(LongSumReducer.class);
    job.setReducerClass(PageviewReducer.class);

    DBInputFormat.setInput(job, AccessRecord.class, "HAccess", null, "url", AccessFieldNames);

    DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(PageviewRecord.class);
    job.setOutputValueClass(NullWritable.class);
    int ret;
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
        boolean correct = verify();
        if (!correct) {
            throw new RuntimeException("Evaluation was not correct!");
        }
    } finally {
        shutdown();
    }
    return ret;
}