Example usage for org.apache.hadoop.mapred JobConf setOutputValueClass

List of usage examples for org.apache.hadoop.mapred JobConf setOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputValueClass.

Prototype

public void setOutputValueClass(Class<?> theClass) 

Source Link

Document

Set the value class for job outputs.

Usage

From source file:edu.yale.cs.hadoopdb.benchmark.GrepTaskHDFS.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(getConf(), this.getClass());
    conf.setJobName("grep_hdfs");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);//from www.  ja  va2s  .  c om

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 3) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }

    conf.set(GREP_PATTERN_PARAM, args[0]);

    FileInputFormat.setInputPaths(conf, new Path(args[1]));

    Path outputPath = new Path(args[2]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    return conf;

}

From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {
    JobConf conf = new JobConf(JoinTaskDB.class);
    conf.setJobName("join_db");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);
    conf.setNumReduceTasks(1); // Because we look for 1 TOP value

    // join arguments
    conf.setOutputFormat(TextOutputFormat.class);
    for (int i = 0; i < args.length; ++i) {
        if ("-date_l".equals(args[i]))
            conf.set("date_l", args[++i]);
        else if ("-date_u".equals(args[i]))
            conf.set("date_u", args[++i]);
        else if ("-output".equals(args[i]))
            conf.set("output", args[++i]);
    }// ww  w  . ja v  a2s.co m

    // OUTPUT properties
    Path outputPath = new Path(conf.get("output"));
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "UserVisits");
    conf.set(DBConst.DB_RECORD_READER, JoinRecord.class.getName());

    String TABLE_R = "Rankings";
    String TABLE_UV = "UserVisits";

    conf.set(DBConst.DB_SQL_QUERY,
            "SELECT sourceIP, SUM(pageRank) as sumPageRank, COUNT(pageRank) as countPageRank, SUM(adRevenue) as totalRevenue "
                    + "FROM " + TABLE_R + " AS R, " + TABLE_UV + " AS UV " + "WHERE R.pageURL = UV.destURL "
                    + "AND UV.visitDate BETWEEN '" + conf.get("date_l") + "' AND '" + conf.get("date_u") + "' "
                    + "GROUP BY UV.sourceIP;");

    return conf;
}

From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskHDFS.java

License:Apache License

public int run(String[] args) throws Exception {

    long startTime = System.currentTimeMillis();

    if (args.length < 5) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }/*from  ww  w. j a v  a 2 s  .co  m*/

    String dateFrom = args[0];
    String dateTo = args[1];
    String rankingsInputDir = args[2];
    String userVisitsInputDir = args[3];
    String outputDir = args[4];

    // output path (delete)
    Path outputPath = new Path(outputDir);
    HDFSUtil.deletePath(outputPath);

    // phase 1
    JobConf conf1 = new JobConf(this.getClass());
    conf1.setJobName("join_hdfs_phase1");
    Path p1Output = new Path(outputDir + "/phase1");
    FileOutputFormat.setOutputPath(conf1, p1Output);
    conf1.setInputFormat(TextInputFormat.class);
    conf1.setOutputFormat(TextOutputFormat.class);

    conf1.setOutputKeyClass(Text.class);
    conf1.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(conf1, new Path(rankingsInputDir), new Path(userVisitsInputDir));

    conf1.set(DATE_FROM_PARAM, dateFrom);
    conf1.set(DATE_TO_PARAM, dateTo);

    conf1.setMapperClass(Phase1Map.class);
    conf1.setReducerClass(Phase1Reduce.class);
    // conf1.setPartitionerClass(theClass)

    RunningJob job1 = JobClient.runJob(conf1);

    if (job1.isSuccessful()) {

        // phase 2

        JobConf conf2 = new JobConf(this.getClass());
        conf2.setJobName("join_hdfs_phase2");
        conf2.setInputFormat(KeyValueTextInputFormat.class);
        conf2.setOutputFormat(TextOutputFormat.class);

        conf2.setOutputKeyClass(Text.class);
        conf2.setOutputValueClass(Text.class);
        conf2.setMapperClass(IdentityMapper.class);
        conf2.setReducerClass(Phase2Reduce.class);

        Path p2Output = new Path(outputDir + "/phase2");
        FileOutputFormat.setOutputPath(conf2, p2Output);
        FileInputFormat.setInputPaths(conf2, p1Output);

        RunningJob job2 = JobClient.runJob(conf2);

        if (job2.isSuccessful()) {

            // phase 3

            JobConf conf3 = new JobConf(this.getClass());
            conf3.setJobName("join_hdfs_phase3");
            conf3.setNumReduceTasks(1);
            conf3.setInputFormat(KeyValueTextInputFormat.class);
            conf3.setOutputKeyClass(Text.class);
            conf3.setOutputValueClass(Text.class);
            conf3.setMapperClass(IdentityMapper.class);
            conf3.setReducerClass(Phase3Reduce.class);

            Path p3Output = new Path(outputDir + "/phase3");
            FileOutputFormat.setOutputPath(conf3, p3Output);
            FileInputFormat.setInputPaths(conf3, p2Output);

            RunningJob job3 = JobClient.runJob(conf3);

            if (!job3.isSuccessful()) {
                System.out.println("PHASE 3 FAILED!!!");
            }

        } else {
            System.out.println("PHASE 2 FAILED!!!");
        }

    } else {
        System.out.println("PHASE 1 FAILED!!!");
    }

    long endTime = System.currentTimeMillis();
    System.out.println("\nJOB TIME : " + (endTime - startTime) + " ms.\n");

    return 0;
}

From source file:edu.yale.cs.hadoopdb.benchmark.SelectionTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("selection_db");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);//w  w w  . j a v  a2  s.c  om

    if (args.length < 2) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }

    conf.set(PAGE_RANK_VALUE_PARAM, args[0]);

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "Rankings");
    conf.set(DBConst.DB_RECORD_READER, RankingsRecord.class.getName());
    conf.set(DBConst.DB_SQL_QUERY, "SELECT pageURL, pageRank FROM Rankings " + "WHERE pageRank > "
            + conf.get(PAGE_RANK_VALUE_PARAM) + ";");

    return conf;
}

From source file:edu.yale.cs.hadoopdb.benchmark.SelectionTaskHDFS.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(getConf(), this.getClass());
    conf.setJobName("selection_hdfs");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);/*from w w w .j a  v a 2  s .  co  m*/

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    if (args.length < 3) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }

    conf.set(PAGE_RANK_VALUE_PARAM, args[0]);
    FileInputFormat.setInputPaths(conf, new Path(args[1]));

    // OUTPUT properties
    Path outputPath = new Path(args[2]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    return conf;

}

From source file:edu.yale.cs.hadoopdb.benchmark.UDFAggTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("udf_agg_db");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(LongWritable.class);

    conf.setMapperClass(Query4Map.class);
    conf.setCombinerClass(LongSumReducer.class);
    conf.setReducerClass(LongSumReducer.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 1) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }/*  w w w.  j  av a2  s  .  c o  m*/

    // OUTPUT properties
    Path outputPath = new Path(args[0]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "Documents");
    conf.set(DBConst.DB_RECORD_READER, DocumentRecord.class.getName());
    conf.set(DBConst.DB_SQL_QUERY, "SELECT url, contents FROM Documents;");

    return conf;

}

From source file:edu.yale.cs.hadoopdb.benchmark.UDFAggTaskHDFS.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("udf_agg_hdfs");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(LongWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(LongSumReducer.class);
    conf.setReducerClass(LongSumReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 2) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }/*  w w w . ja v  a 2  s. com*/

    FileInputFormat.setInputPaths(conf, new Path(args[0]));

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    return conf;

}

From source file:edu.yale.cs.hadoopdb.dataloader.GlobalHasher.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {

    JobConf conf = new JobConf(getConf(), this.getClass());
    conf.setJobName("GlobalHasher");

    conf.setMapOutputKeyClass(UnsortableInt.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(GlobalHasher.Map.class);
    conf.setReducerClass(GlobalHasher.Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 5) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }// ww  w  .  ja  v a2s.co  m

    FileInputFormat.setInputPaths(conf, new Path(args[0]));

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    int partNo = Integer.parseInt(args[2]);
    conf.setNumReduceTasks(partNo);

    conf.set(DELIMITER_PARAM, args[3]);

    int hashFieldPos = Integer.parseInt(args[4]);
    conf.setInt(HASH_FIELD_POS_PARAM, hashFieldPos);

    return conf;
}

From source file:eu.scape_project.tb.chutney.ChutneyDriver.java

License:Apache License

/**
 * This method sets up and runs the job on Hadoop
 * @param args The passed through command line arguments
 */// ww w  .  ja  v a 2s.  c  o  m
public int run(String[] args) {

    CommandLineParser parser = new PosixParser();
    Options options = new Options();
    options.addOption("n", "jobname", true, "name to assign to the hadoop job");
    options.addOption("i", "inputlist", true,
            "text file containing list of input files (ensure no trailing carriage returns)");
    options.addOption("t", "jobtype", true,
            "type of job; CLJ (command line job), TSJ (Taverna Server job), TCL (Taverna command line job), XML (XML defined command line job), XWR (XML workflow report)");
    options.addOption("x", "xmlcode", true, "xml definition of job to run for XML jobs");
    options.addOption("h", "help", false, "help text");

    JobConf conf = new JobConf(ChutneyDriver.class);

    String input = null;
    String xmlcode = null;

    CommandLine com;
    try {
        com = parser.parse(options, args);
        if (com.hasOption("help")) {
            throw (new ParseException(""));
        }

        String jobName = Settings.JOB_NAME + "default";
        if (com.hasOption("jobname")) {
            //set the job name to something better than the default
            jobName = Settings.JOB_NAME + com.getOptionValue("jobname");
        }
        conf.setJobName(jobName);

        JobType jobType = JobType.CommandLineJob;
        if (com.hasOption("jobtype")) {
            String value = com.getOptionValue("jobtype").toUpperCase();
            if (value.equals(CommandLineJob.getShortJobType())) {
                jobType = CommandLineJob.getJobType();
            } else if (value.equals(TavernaCommandLineJob.getShortJobType())) {
                jobType = TavernaCommandLineJob.getJobType();
            } else if (value.equals(TavernaServerJob.getShortJobType())) {
                jobType = TavernaServerJob.getJobType();
            } else if (value.equals(XMLCommandLineJob.getShortJobType())) {
                jobType = XMLCommandLineJob.getJobType();
            } else if (value.equals(XMLWorkflowReport.getShortJobType())) {
                jobType = XMLWorkflowReport.getJobType();
            }
        }
        System.out.println("JobType: " + jobType.toString());
        conf.set(Settings.JOBTYPE_CONF_SETTING, jobType.toString());

        if (com.hasOption("xmlcode")) {
            //jobType == JobType.XMLCommandLineJob
            xmlcode = com.getOptionValue("xmlcode");
            //if it is a local file get the full path
            if (new File(xmlcode).exists())
                xmlcode = new File(xmlcode).getAbsolutePath();
            conf.set(Settings.XMLCODE_CONF_SETTING, xmlcode);
        }
        if ((jobType == JobType.XMLCommandLineJob) & (xmlcode == null)) {
            //i.e. no code specified
            System.out.println("No XML code specified on the command line");
            return -1;
        }

        if (com.hasOption("inputlist")) {
            input = com.getOptionValue("inputlist");
        }
        if (input.equals(null)) {
            System.out.println("no input given");
            return -2;
        }

    } catch (ParseException e) {
        HelpFormatter help = new HelpFormatter();
        help.printHelp("hadoop jar TavernaHadoopWrapper.jar", options);
        return -1;
    }

    //using matchbox it may take a while to process the jobs
    //set a longer timeout than the default (10 mins)
    //six hours should be more than enough :/        MMM*SS*MS
    //QAJob testing for 9 tests on ANJO files can take ~4.5hrs+
    conf.set("mapred.task.timeout", Integer.toString(360 * 60 * 1000));

    FileInputFormat.setInputPaths(conf, new Path(input));
    FileOutputFormat.setOutputPath(conf, new Path(conf.getJobName()));

    //set the mapper to this class' mapper
    conf.setMapperClass(Chutney.class);
    //we don't want to reduce
    //conf.setReducerClass(Reducer.class);

    //this input format should split the input by one line per map by default.
    conf.setInputFormat(NLineInputFormat.class);
    conf.setInt("mapred.line.input.format.linespermap", 1);

    //sets how the output is written cf. OutputFormat
    //we can use nulloutputformat if we are writing our own output
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    //this sets maximum jvm reuse
    conf.set("mapred.job.reuse.jvm.num.tasks", "-1");

    //we only want one reduce task
    conf.setNumReduceTasks(1);

    try {
        JobClient.runJob(conf);
    } catch (IOException ioe) {
        ioe.printStackTrace();
        return -1;
    }

    return 0;
}

From source file:eu.stratosphere.hadoopcompatibility.HadoopDataSink.java

License:Apache License

@SuppressWarnings("deprecation")
public HadoopDataSink(OutputFormat<K, V> hadoopFormat, JobConf jobConf, String name,
        List<Operator<Record>> input, StratosphereTypeConverter<K, V> conv, Class<K> keyClass,
        Class<V> valueClass) {
    super(new HadoopOutputFormatWrapper<K, V>(hadoopFormat, jobConf, conv), input, name);
    Preconditions.checkNotNull(hadoopFormat);
    Preconditions.checkNotNull(jobConf);
    this.name = name;
    this.jobConf = jobConf;
    jobConf.setOutputKeyClass(keyClass);
    jobConf.setOutputValueClass(valueClass);
}