Example usage for org.apache.hadoop.mapred JobConf setOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputValueClass.

Prototype

public void setOutputValueClass(Class<?> theClass)

Source Link

Document

Set the value class for job outputs.

Usage

From source file:edu.yale.cs.hadoopdb.benchmark.GrepTaskHDFS.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(getConf(), this.getClass());
    conf.setJobName("grep_hdfs");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);//from www.  ja  va2s  .  c om

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 3) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }

    conf.set(GREP_PATTERN_PARAM, args[0]);

    FileInputFormat.setInputPaths(conf, new Path(args[1]));

    Path outputPath = new Path(args[2]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    return conf;

}

From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {
    JobConf conf = new JobConf(JoinTaskDB.class);
    conf.setJobName("join_db");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);
    conf.setNumReduceTasks(1); // Because we look for 1 TOP value

    // join arguments
    conf.setOutputFormat(TextOutputFormat.class);
    for (int i = 0; i < args.length; ++i) {
        if ("-date_l".equals(args[i]))
            conf.set("date_l", args[++i]);
        else if ("-date_u".equals(args[i]))
            conf.set("date_u", args[++i]);
        else if ("-output".equals(args[i]))
            conf.set("output", args[++i]);
    }// ww  w  . ja v  a2s.co m

    // OUTPUT properties
    Path outputPath = new Path(conf.get("output"));
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "UserVisits");
    conf.set(DBConst.DB_RECORD_READER, JoinRecord.class.getName());

    String TABLE_R = "Rankings";
    String TABLE_UV = "UserVisits";

    conf.set(DBConst.DB_SQL_QUERY,
            "SELECT sourceIP, SUM(pageRank) as sumPageRank, COUNT(pageRank) as countPageRank, SUM(adRevenue) as totalRevenue "
                    + "FROM " + TABLE_R + " AS R, " + TABLE_UV + " AS UV " + "WHERE R.pageURL = UV.destURL "
                    + "AND UV.visitDate BETWEEN '" + conf.get("date_l") + "' AND '" + conf.get("date_u") + "' "
                    + "GROUP BY UV.sourceIP;");

    return conf;
}

From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskHDFS.java

License:Apache License

public int run(String[] args) throws Exception {

    long startTime = System.currentTimeMillis();

    if (args.length < 5) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }/*from  ww  w. j a v  a 2 s  .co  m*/

    String dateFrom = args[0];
    String dateTo = args[1];
    String rankingsInputDir = args[2];
    String userVisitsInputDir = args[3];
    String outputDir = args[4];

    // output path (delete)
    Path outputPath = new Path(outputDir);
    HDFSUtil.deletePath(outputPath);

    // phase 1
    JobConf conf1 = new JobConf(this.getClass());
    conf1.setJobName("join_hdfs_phase1");
    Path p1Output = new Path(outputDir + "/phase1");
    FileOutputFormat.setOutputPath(conf1, p1Output);
    conf1.setInputFormat(TextInputFormat.class);
    conf1.setOutputFormat(TextOutputFormat.class);

    conf1.setOutputKeyClass(Text.class);
    conf1.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(conf1, new Path(rankingsInputDir), new Path(userVisitsInputDir));

    conf1.set(DATE_FROM_PARAM, dateFrom);
    conf1.set(DATE_TO_PARAM, dateTo);

    conf1.setMapperClass(Phase1Map.class);
    conf1.setReducerClass(Phase1Reduce.class);
    // conf1.setPartitionerClass(theClass)

    RunningJob job1 = JobClient.runJob(conf1);

    if (job1.isSuccessful()) {

        // phase 2

        JobConf conf2 = new JobConf(this.getClass());
        conf2.setJobName("join_hdfs_phase2");
        conf2.setInputFormat(KeyValueTextInputFormat.class);
        conf2.setOutputFormat(TextOutputFormat.class);

        conf2.setOutputKeyClass(Text.class);
        conf2.setOutputValueClass(Text.class);
        conf2.setMapperClass(IdentityMapper.class);
        conf2.setReducerClass(Phase2Reduce.class);

        Path p2Output = new Path(outputDir + "/phase2");
        FileOutputFormat.setOutputPath(conf2, p2Output);
        FileInputFormat.setInputPaths(conf2, p1Output);

        RunningJob job2 = JobClient.runJob(conf2);

        if (job2.isSuccessful()) {

            // phase 3

            JobConf conf3 = new JobConf(this.getClass());
            conf3.setJobName("join_hdfs_phase3");
            conf3.setNumReduceTasks(1);
            conf3.setInputFormat(KeyValueTextInputFormat.class);
            conf3.setOutputKeyClass(Text.class);
            conf3.setOutputValueClass(Text.class);
            conf3.setMapperClass(IdentityMapper.class);
            conf3.setReducerClass(Phase3Reduce.class);

            Path p3Output = new Path(outputDir + "/phase3");
            FileOutputFormat.setOutputPath(conf3, p3Output);
            FileInputFormat.setInputPaths(conf3, p2Output);

            RunningJob job3 = JobClient.runJob(conf3);

            if (!job3.isSuccessful()) {
                System.out.println("PHASE 3 FAILED!!!");
            }

        } else {
            System.out.println("PHASE 2 FAILED!!!");
        }

    } else {
        System.out.println("PHASE 1 FAILED!!!");
    }

    long endTime = System.currentTimeMillis();
    System.out.println("\nJOB TIME : " + (endTime - startTime) + " ms.\n");

    return 0;
}

From source file:edu.yale.cs.hadoopdb.benchmark.SelectionTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("selection_db");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);//w  w w  . j a v  a2  s.c  om

    if (args.length < 2) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }

    conf.set(PAGE_RANK_VALUE_PARAM, args[0]);

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "Rankings");
    conf.set(DBConst.DB_RECORD_READER, RankingsRecord.class.getName());
    conf.set(DBConst.DB_SQL_QUERY, "SELECT pageURL, pageRank FROM Rankings " + "WHERE pageRank > "
            + conf.get(PAGE_RANK_VALUE_PARAM) + ";");

    return conf;
}

From source file:edu.yale.cs.hadoopdb.benchmark.SelectionTaskHDFS.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(getConf(), this.getClass());
    conf.setJobName("selection_hdfs");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);/*from w w w .j a  v a 2  s .  co  m*/

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    if (args.length < 3) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }

    conf.set(PAGE_RANK_VALUE_PARAM, args[0]);
    FileInputFormat.setInputPaths(conf, new Path(args[1]));

    // OUTPUT properties
    Path outputPath = new Path(args[2]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    return conf;

}

From source file:edu.yale.cs.hadoopdb.benchmark.UDFAggTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("udf_agg_db");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(LongWritable.class);

    conf.setMapperClass(Query4Map.class);
    conf.setCombinerClass(LongSumReducer.class);
    conf.setReducerClass(LongSumReducer.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 1) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }/*  w w w.  j  av a2  s  .  c o  m*/

    // OUTPUT properties
    Path outputPath = new Path(args[0]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "Documents");
    conf.set(DBConst.DB_RECORD_READER, DocumentRecord.class.getName());
    conf.set(DBConst.DB_SQL_QUERY, "SELECT url, contents FROM Documents;");

    return conf;

}

From source file:edu.yale.cs.hadoopdb.benchmark.UDFAggTaskHDFS.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("udf_agg_hdfs");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(LongWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(LongSumReducer.class);
    conf.setReducerClass(LongSumReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 2) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }/*  w w w . ja v  a 2  s. com*/

    FileInputFormat.setInputPaths(conf, new Path(args[0]));

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    return conf;

}

From source file:edu.yale.cs.hadoopdb.dataloader.GlobalHasher.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {

    JobConf conf = new JobConf(getConf(), this.getClass());
    conf.setJobName("GlobalHasher");

    conf.setMapOutputKeyClass(UnsortableInt.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(GlobalHasher.Map.class);
    conf.setReducerClass(GlobalHasher.Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 5) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }// ww  w  .  ja  v a2s.co  m

    FileInputFormat.setInputPaths(conf, new Path(args[0]));

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    int partNo = Integer.parseInt(args[2]);
    conf.setNumReduceTasks(partNo);

    conf.set(DELIMITER_PARAM, args[3]);

    int hashFieldPos = Integer.parseInt(args[4]);
    conf.setInt(HASH_FIELD_POS_PARAM, hashFieldPos);

    return conf;
}

From source file:eu.scape_project.tb.chutney.ChutneyDriver.java

License:Apache License

/**
 * This method sets up and runs the job on Hadoop
 * @param args The passed through command line arguments
 */// ww w  .  ja  v a 2s.  c  o  m
public int run(String[] args) {

    CommandLineParser parser = new PosixParser();
    Options options = new Options();
    options.addOption("n", "jobname", true, "name to assign to the hadoop job");
    options.addOption("i", "inputlist", true,
            "text file containing list of input files (ensure no trailing carriage returns)");
    options.addOption("t", "jobtype", true,
            "type of job; CLJ (command line job), TSJ (Taverna Server job), TCL (Taverna command line job), XML (XML defined command line job), XWR (XML workflow report)");
    options.addOption("x", "xmlcode", true, "xml definition of job to run for XML jobs");
    options.addOption("h", "help", false, "help text");

    JobConf conf = new JobConf(ChutneyDriver.class);

    String input = null;
    String xmlcode = null;

    CommandLine com;
    try {
        com = parser.parse(options, args);
        if (com.hasOption("help")) {
            throw (new ParseException(""));
        }

        String jobName = Settings.JOB_NAME + "default";
        if (com.hasOption("jobname")) {
            //set the job name to something better than the default
            jobName = Settings.JOB_NAME + com.getOptionValue("jobname");
        }
        conf.setJobName(jobName);

        JobType jobType = JobType.CommandLineJob;
        if (com.hasOption("jobtype")) {
            String value = com.getOptionValue("jobtype").toUpperCase();
            if (value.equals(CommandLineJob.getShortJobType())) {
                jobType = CommandLineJob.getJobType();
            } else if (value.equals(TavernaCommandLineJob.getShortJobType())) {
                jobType = TavernaCommandLineJob.getJobType();
            } else if (value.equals(TavernaServerJob.getShortJobType())) {
                jobType = TavernaServerJob.getJobType();
            } else if (value.equals(XMLCommandLineJob.getShortJobType())) {
                jobType = XMLCommandLineJob.getJobType();
            } else if (value.equals(XMLWorkflowReport.getShortJobType())) {
                jobType = XMLWorkflowReport.getJobType();
            }
        }
        System.out.println("JobType: " + jobType.toString());
        conf.set(Settings.JOBTYPE_CONF_SETTING, jobType.toString());

        if (com.hasOption("xmlcode")) {
            //jobType == JobType.XMLCommandLineJob
            xmlcode = com.getOptionValue("xmlcode");
            //if it is a local file get the full path
            if (new File(xmlcode).exists())
                xmlcode = new File(xmlcode).getAbsolutePath();
            conf.set(Settings.XMLCODE_CONF_SETTING, xmlcode);
        }
        if ((jobType == JobType.XMLCommandLineJob) & (xmlcode == null)) {
            //i.e. no code specified
            System.out.println("No XML code specified on the command line");
            return -1;
        }

        if (com.hasOption("inputlist")) {
            input = com.getOptionValue("inputlist");
        }
        if (input.equals(null)) {
            System.out.println("no input given");
            return -2;
        }

    } catch (ParseException e) {
        HelpFormatter help = new HelpFormatter();
        help.printHelp("hadoop jar TavernaHadoopWrapper.jar", options);
        return -1;
    }

    //using matchbox it may take a while to process the jobs
    //set a longer timeout than the default (10 mins)
    //six hours should be more than enough :/        MMM*SS*MS
    //QAJob testing for 9 tests on ANJO files can take ~4.5hrs+
    conf.set("mapred.task.timeout", Integer.toString(360 * 60 * 1000));

    FileInputFormat.setInputPaths(conf, new Path(input));
    FileOutputFormat.setOutputPath(conf, new Path(conf.getJobName()));

    //set the mapper to this class' mapper
    conf.setMapperClass(Chutney.class);
    //we don't want to reduce
    //conf.setReducerClass(Reducer.class);

    //this input format should split the input by one line per map by default.
    conf.setInputFormat(NLineInputFormat.class);
    conf.setInt("mapred.line.input.format.linespermap", 1);

    //sets how the output is written cf. OutputFormat
    //we can use nulloutputformat if we are writing our own output
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    //this sets maximum jvm reuse
    conf.set("mapred.job.reuse.jvm.num.tasks", "-1");

    //we only want one reduce task
    conf.setNumReduceTasks(1);

    try {
        JobClient.runJob(conf);
    } catch (IOException ioe) {
        ioe.printStackTrace();
        return -1;
    }

    return 0;
}

From source file:eu.stratosphere.hadoopcompatibility.HadoopDataSink.java

License:Apache License

@SuppressWarnings("deprecation")
public HadoopDataSink(OutputFormat<K, V> hadoopFormat, JobConf jobConf, String name,
        List<Operator<Record>> input, StratosphereTypeConverter<K, V> conv, Class<K> keyClass,
        Class<V> valueClass) {
    super(new HadoopOutputFormatWrapper<K, V>(hadoopFormat, jobConf, conv), input, name);
    Preconditions.checkNotNull(hadoopFormat);
    Preconditions.checkNotNull(jobConf);
    this.name = name;
    this.jobConf = jobConf;
    jobConf.setOutputKeyClass(keyClass);
    jobConf.setOutputValueClass(valueClass);
}