Example usage for org.apache.hadoop.mapred JobConf set

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf set.

Prototype

public void set(String name, String value)

Source Link

Document

Set the value of the name property.

Usage

From source file:edu.yale.cs.hadoopdb.benchmark.GrepTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(GrepTaskDB.class);
    conf.setJobName("grep_db_job");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);// ww w .  ja  v a 2s. c  o  m

    // GREP arguments
    conf.setOutputFormat(TextOutputFormat.class);
    for (int i = 0; i < args.length; ++i) {
        if ("-pattern".equals(args[i]))
            conf.set("pattern", args[++i]);
        else if ("-output".equals(args[i]))
            conf.set("output", args[++i]);
    }

    // OUTPUT properties

    Path outputPath = new Path(conf.get("output"));
    System.out.println(conf.get("output"));
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    // DB properties
    conf.set(DBConst.DB_RELATION_ID, "grep");
    conf.set(DBConst.DB_RECORD_READER, DocumentsRecord.class.getName());
    conf.set(DBConst.DB_SQL_QUERY,
            "SELECT key1, field FROM grep WHERE field LIKE '%" + conf.get("pattern") + "%';");

    return conf;

}

From source file:edu.yale.cs.hadoopdb.benchmark.GrepTaskHDFS.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(getConf(), this.getClass());
    conf.setJobName("grep_hdfs");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);/*from ww  w.ja  va  2  s .co m*/

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 3) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }

    conf.set(GREP_PATTERN_PARAM, args[0]);

    FileInputFormat.setInputPaths(conf, new Path(args[1]));

    Path outputPath = new Path(args[2]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    return conf;

}

From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {
    JobConf conf = new JobConf(JoinTaskDB.class);
    conf.setJobName("join_db");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);
    conf.setNumReduceTasks(1); // Because we look for 1 TOP value

    // join arguments
    conf.setOutputFormat(TextOutputFormat.class);
    for (int i = 0; i < args.length; ++i) {
        if ("-date_l".equals(args[i]))
            conf.set("date_l", args[++i]);
        else if ("-date_u".equals(args[i]))
            conf.set("date_u", args[++i]);
        else if ("-output".equals(args[i]))
            conf.set("output", args[++i]);
    }// w  ww  . j a  v a2s.co  m

    // OUTPUT properties
    Path outputPath = new Path(conf.get("output"));
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "UserVisits");
    conf.set(DBConst.DB_RECORD_READER, JoinRecord.class.getName());

    String TABLE_R = "Rankings";
    String TABLE_UV = "UserVisits";

    conf.set(DBConst.DB_SQL_QUERY,
            "SELECT sourceIP, SUM(pageRank) as sumPageRank, COUNT(pageRank) as countPageRank, SUM(adRevenue) as totalRevenue "
                    + "FROM " + TABLE_R + " AS R, " + TABLE_UV + " AS UV " + "WHERE R.pageURL = UV.destURL "
                    + "AND UV.visitDate BETWEEN '" + conf.get("date_l") + "' AND '" + conf.get("date_u") + "' "
                    + "GROUP BY UV.sourceIP;");

    return conf;
}

From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskHDFS.java

License:Apache License

public int run(String[] args) throws Exception {

    long startTime = System.currentTimeMillis();

    if (args.length < 5) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }//  ww w . j  a  v  a 2s .c  om

    String dateFrom = args[0];
    String dateTo = args[1];
    String rankingsInputDir = args[2];
    String userVisitsInputDir = args[3];
    String outputDir = args[4];

    // output path (delete)
    Path outputPath = new Path(outputDir);
    HDFSUtil.deletePath(outputPath);

    // phase 1
    JobConf conf1 = new JobConf(this.getClass());
    conf1.setJobName("join_hdfs_phase1");
    Path p1Output = new Path(outputDir + "/phase1");
    FileOutputFormat.setOutputPath(conf1, p1Output);
    conf1.setInputFormat(TextInputFormat.class);
    conf1.setOutputFormat(TextOutputFormat.class);

    conf1.setOutputKeyClass(Text.class);
    conf1.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(conf1, new Path(rankingsInputDir), new Path(userVisitsInputDir));

    conf1.set(DATE_FROM_PARAM, dateFrom);
    conf1.set(DATE_TO_PARAM, dateTo);

    conf1.setMapperClass(Phase1Map.class);
    conf1.setReducerClass(Phase1Reduce.class);
    // conf1.setPartitionerClass(theClass)

    RunningJob job1 = JobClient.runJob(conf1);

    if (job1.isSuccessful()) {

        // phase 2

        JobConf conf2 = new JobConf(this.getClass());
        conf2.setJobName("join_hdfs_phase2");
        conf2.setInputFormat(KeyValueTextInputFormat.class);
        conf2.setOutputFormat(TextOutputFormat.class);

        conf2.setOutputKeyClass(Text.class);
        conf2.setOutputValueClass(Text.class);
        conf2.setMapperClass(IdentityMapper.class);
        conf2.setReducerClass(Phase2Reduce.class);

        Path p2Output = new Path(outputDir + "/phase2");
        FileOutputFormat.setOutputPath(conf2, p2Output);
        FileInputFormat.setInputPaths(conf2, p1Output);

        RunningJob job2 = JobClient.runJob(conf2);

        if (job2.isSuccessful()) {

            // phase 3

            JobConf conf3 = new JobConf(this.getClass());
            conf3.setJobName("join_hdfs_phase3");
            conf3.setNumReduceTasks(1);
            conf3.setInputFormat(KeyValueTextInputFormat.class);
            conf3.setOutputKeyClass(Text.class);
            conf3.setOutputValueClass(Text.class);
            conf3.setMapperClass(IdentityMapper.class);
            conf3.setReducerClass(Phase3Reduce.class);

            Path p3Output = new Path(outputDir + "/phase3");
            FileOutputFormat.setOutputPath(conf3, p3Output);
            FileInputFormat.setInputPaths(conf3, p2Output);

            RunningJob job3 = JobClient.runJob(conf3);

            if (!job3.isSuccessful()) {
                System.out.println("PHASE 3 FAILED!!!");
            }

        } else {
            System.out.println("PHASE 2 FAILED!!!");
        }

    } else {
        System.out.println("PHASE 1 FAILED!!!");
    }

    long endTime = System.currentTimeMillis();
    System.out.println("\nJOB TIME : " + (endTime - startTime) + " ms.\n");

    return 0;
}

From source file:edu.yale.cs.hadoopdb.benchmark.SelectionTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("selection_db");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);//  ww  w. j a  va  2  s  . co m

    if (args.length < 2) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }

    conf.set(PAGE_RANK_VALUE_PARAM, args[0]);

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "Rankings");
    conf.set(DBConst.DB_RECORD_READER, RankingsRecord.class.getName());
    conf.set(DBConst.DB_SQL_QUERY, "SELECT pageURL, pageRank FROM Rankings " + "WHERE pageRank > "
            + conf.get(PAGE_RANK_VALUE_PARAM) + ";");

    return conf;
}

From source file:edu.yale.cs.hadoopdb.benchmark.SelectionTaskHDFS.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(getConf(), this.getClass());
    conf.setJobName("selection_hdfs");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);/*  w ww  .ja va  2 s.co m*/

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    if (args.length < 3) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }

    conf.set(PAGE_RANK_VALUE_PARAM, args[0]);
    FileInputFormat.setInputPaths(conf, new Path(args[1]));

    // OUTPUT properties
    Path outputPath = new Path(args[2]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    return conf;

}

From source file:edu.yale.cs.hadoopdb.benchmark.UDFAggTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("udf_agg_db");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(LongWritable.class);

    conf.setMapperClass(Query4Map.class);
    conf.setCombinerClass(LongSumReducer.class);
    conf.setReducerClass(LongSumReducer.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 1) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }/*from  w  w w .  java2 s  .c o  m*/

    // OUTPUT properties
    Path outputPath = new Path(args[0]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "Documents");
    conf.set(DBConst.DB_RECORD_READER, DocumentRecord.class.getName());
    conf.set(DBConst.DB_SQL_QUERY, "SELECT url, contents FROM Documents;");

    return conf;

}

From source file:edu.yale.cs.hadoopdb.dataloader.GlobalHasher.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {

    JobConf conf = new JobConf(getConf(), this.getClass());
    conf.setJobName("GlobalHasher");

    conf.setMapOutputKeyClass(UnsortableInt.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(GlobalHasher.Map.class);
    conf.setReducerClass(GlobalHasher.Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 5) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }//w ww  . j  a  va  2 s. c  o m

    FileInputFormat.setInputPaths(conf, new Path(args[0]));

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    int partNo = Integer.parseInt(args[2]);
    conf.setNumReduceTasks(partNo);

    conf.set(DELIMITER_PARAM, args[3]);

    int hashFieldPos = Integer.parseInt(args[4]);
    conf.setInt(HASH_FIELD_POS_PARAM, hashFieldPos);

    return conf;
}

From source file:edu.yale.cs.hadoopdb.exec.DBJobBase.java

License:Apache License

/**
 * Job config initialization (command-line params etc).  
 *///  w  w w  .  j  a  v  a  2  s  . c om
protected JobConf initConf(String[] args) throws Exception {

    List<String> other_args = new ArrayList<String>();

    Path configuration_file = null;
    boolean replication = false;

    for (int i = 0; i < args.length; ++i) {
        if (("-" + DBConst.DB_CONFIG_FILE).equals(args[i])) {
            configuration_file = new Path(args[++i]);
        } else if ("-replication".equals(args[i])) {
            replication = true;
        } else {
            other_args.add(args[i]);
        }
    }

    JobConf conf = null;

    conf = configureJob(other_args.toArray(new String[0]));
    LOG.info(conf.getJobName());
    LOG.info(conf.get(DBConst.DB_SQL_QUERY));

    if (conf.get(DBConst.DB_RELATION_ID) == null || conf.get(DBConst.DB_SQL_QUERY) == null
            || conf.get(DBConst.DB_RECORD_READER) == null) {
        throw new Exception(
                "ERROR: DB Job requires a relation, an SQL Query and a Record Reader class to be configured.\n"
                        + "Please specify using: conf.set(\"" + DBConst.DB_RELATION_ID
                        + "\", <relation name>), conf.set(\"" + DBConst.DB_SQL_QUERY + "\", <SQL QUERY>)\n"
                        + "and code an appropriate Record Reader and specify conf.set(\""
                        + DBConst.DB_RECORD_READER + "\", <Record reader class name>)\n");
    }

    if (replication) {
        conf.setBoolean(DBConst.DB_REPLICATION, true);
    }

    if (configuration_file == null) {
        if (conf.get(DBConst.DB_CONFIG_FILE) == null) {
            throw new Exception("No HadoopDB config file!");
        }
    } else {
        conf.set(DBConst.DB_CONFIG_FILE, configuration_file.toString());
    }

    setInputFormat(conf);

    return conf;
}

From source file:etl.cmd.test.XFsTestCase.java

License:Apache License

/**
 * Return a JobClient to the test JobTracker.
 *
 * @return a JobClient to the test JobTracker.
 * @throws HadoopAccessorException thrown if the JobClient could not be obtained.
 *///from   w w w.jav a 2  s. c  om
protected JobClient createJobClient() throws HadoopAccessorException {
    JobConf conf = has.createJobConf(getJobTrackerUri());
    conf.set("mapreduce.jobtracker.address", getJobTrackerUri());
    conf.set("fs.defaultFS", getNameNodeUri());

    return has.createJobClient(getTestUser(), conf);
}