Example usage for org.apache.hadoop.mapred JobConf setOutputValueClass

List of usage examples for org.apache.hadoop.mapred JobConf setOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputValueClass.

Prototype

public void setOutputValueClass(Class<?> theClass) 

Source Link

Document

Set the value class for job outputs.

Usage

From source file:edu.umd.cloud9.webgraph.driver.SortWebGraph.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();//w  w  w.j  a  v a 2  s.com
        return -1;
    }

    JobConf conf = new JobConf(getConf(), SortWebGraph.class);
    FileSystem fs = FileSystem.get(conf);

    String inputPath = args[0];
    String outputPath = args[1];
    int numberOfDocuments = Integer.parseInt(args[2]);
    int numMappers = 1;
    int numReducers = Integer.parseInt(args[3]);

    conf.setJobName("SortWebGraph");
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setInt("mapred.task.timeout", 60000000);
    conf.set("mapreduce.map.memory.mb", "2048");
    conf.set("mapreduce.map.java.opts", "-Xmx2048m");
    conf.set("mapreduce.reduce.memory.mb", "2048");
    conf.set("mapreduce.reduce.java.opts", "-Xmx2048m");
    conf.set("mapreduce.task.timeout", "60000000");

    if (numberOfDocuments == 0) {
        numberOfDocuments = DEFAULT_NUMBER_OF_DOCUMENTS;
    }
    conf.setInt("Cloud9.NumberOfDocuments", numberOfDocuments);
    conf.setNumMapTasks(numMappers);
    conf.setNumReduceTasks(numReducers);
    conf.setMapperClass(IdentityMapper.class);
    conf.setPartitionerClass(Partition.class);
    conf.setReducerClass(IdentityReducer.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(ArrayListWritable.class);
    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(ArrayListWritable.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(conf, true);
    SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);
    SequenceFileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    LOG.info("SortAnchorText");
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of documents: " + conf.getInt("Cloud9.NumberOfDocuments", DEFAULT_NUMBER_OF_DOCUMENTS));
    fs.delete(new Path(outputPath));
    JobClient.runJob(conf);
    return 0;
}

From source file:edu.umd.cloud9.webgraph.ExtractLinks.java

License:Apache License

public int runTool() throws Exception {

    JobConf conf = new JobConf(getConf(), ExtractLinks.class);
    FileSystem fs = FileSystem.get(conf);

    int numMappers = conf.getInt("Cloud9.Mappers", 1);
    int numReducers = conf.getInt("Cloud9.Reducers", 200);

    String inputPath = conf.get("Cloud9.InputPath");
    String outputPath = conf.get("Cloud9.OutputPath");
    String mappingFile = conf.get("Cloud9.DocnoMappingFile");

    if (!fs.exists(new Path(mappingFile)))
        throw new RuntimeException("Error: Docno mapping data file " + mappingFile + " doesn't exist!");

    DistributedCache.addCacheFile(new URI(mappingFile), conf);

    conf.setJobName("ExtractLinks");
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setInt("mapred.task.timeout", 60000000);

    conf.setNumMapTasks(numMappers);/* ww w  .j  ava 2 s  . com*/
    conf.setNumReduceTasks(numReducers);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(ArrayListWritable.class);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    SequenceFileOutputFormat.setCompressOutput(conf, true);
    SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);

    SequenceFileInputFormat.setInputPaths(conf, inputPath);

    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    LOG.info("ExtractLinks");
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - mapping file: " + mappingFile);
    LOG.info(" - include internal links? " + conf.getBoolean("Cloud9.IncludeInternalLinks", false));

    if (!fs.exists(new Path(outputPath))) {
        JobClient.runJob(conf);
    } else {
        LOG.info(outputPath + " already exists! Skipping this step...");
    }

    return 0;
}

From source file:edu.umn.cs.spatialHadoop.operations.ConvexHull.java

License:Open Source License

public static void convexHullMapReduce(Path inFile, Path userOutPath, OperationsParams params)
        throws IOException {
    JobConf job = new JobConf(params, ConvexHull.class);
    Path outPath = userOutPath;/*from  ww w .j a v  a2s.co m*/
    FileSystem outFs = (userOutPath == null ? inFile : userOutPath).getFileSystem(job);
    Shape shape = params.getShape("shape");

    if (outPath == null) {
        do {
            outPath = new Path(inFile.toUri().getPath() + ".convex_hull_" + (int) (Math.random() * 1000000));
        } while (outFs.exists(outPath));
    } else {
        if (outFs.exists(outPath)) {
            if (params.getBoolean("overwrite", false)) {
                outFs.delete(outPath, true);
            } else {
                throw new RuntimeException("Output path already exists and -overwrite flag is not set");
            }
        }
    }

    job.setJobName("ConvexHull");
    job.setClass(SpatialSite.FilterClass, ConvexHullFilter.class, BlockFilter.class);
    job.setMapperClass(IdentityMapper.class);
    job.setCombinerClass(ConvexHullReducer.class);
    job.setReducerClass(ConvexHullReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(shape.getClass());
    job.setInputFormat(ShapeInputFormat.class);
    ShapeInputFormat.addInputPath(job, inFile);
    job.setOutputFormat(GridOutputFormat2.class);
    GridOutputFormat2.setOutputPath(job, outPath);

    JobClient.runJob(job);

    // If outputPath not set by user, automatically delete it
    if (userOutPath == null)
        outFs.delete(outPath, true);
}

From source file:edu.umn.cs.spatialHadoop.operations.Skyline.java

License:Open Source License

private static void skylineMapReduce(Path inFile, Path userOutPath, OperationsParams params)
        throws IOException {
    JobConf job = new JobConf(params, Skyline.class);
    Path outPath = userOutPath;/*ww w .j av a  2 s .  c  o  m*/
    FileSystem outFs = (userOutPath == null ? inFile : userOutPath).getFileSystem(job);
    Shape shape = params.getShape("shape");

    if (outPath == null) {
        do {
            outPath = new Path(inFile.toUri().getPath() + ".skyline_" + (int) (Math.random() * 1000000));
        } while (outFs.exists(outPath));
    }

    job.setJobName("Skyline");
    job.setClass(SpatialSite.FilterClass, SkylineFilter.class, BlockFilter.class);
    job.setMapperClass(IdentityMapper.class);
    job.setCombinerClass(SkylineReducer.class);
    job.setReducerClass(SkylineReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(shape.getClass());
    job.setInputFormat(ShapeIterInputFormat.class);
    ShapeInputFormat.addInputPath(job, inFile);
    job.setOutputFormat(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outPath);

    JobClient.runJob(job);

    // If outputPath not set by user, automatically delete it
    if (userOutPath == null)
        outFs.delete(outPath, true);
}

From source file:edu.umn.cs.sthadoop.operations.STJoin.java

License:Open Source License

/**
 * /*w  w w  .  j av  a  2 s .  com*/
 * @param inputPath
 * @param outputPath
 * @param params
 * @return
 * @throws IOException
 * @throws Exception
 * @throws InterruptedException
 */
private static long stJoin(Path inputPath, Path outputPath, OperationsParams params)
        throws IOException, Exception, InterruptedException {

    JobConf conf = new JobConf(new Configuration(), STJoin.class);
    FileSystem outfs = outputPath.getFileSystem(conf);
    outfs.delete(outputPath, true);
    conf.setJobName("STJoin");
    // pass params to the join map-reduce 
    conf.set("timedistance", params.get("timedistance"));
    conf.set("spacedistance", params.get("spacedistance"));
    //      conf.setMapOutputKeyClass(LongWritable.class);
    //      conf.setMapOutputValueClass(Text.class);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    // Mapper settings
    conf.setMapperClass(STJoinMap.class);
    //      conf.setReducerClass(STJoinReduce.class);
    //      conf.setCombinerClass(STJoinReduce.class);
    conf.setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true);
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);
    conf.setNumReduceTasks(0);
    JobClient.runJob(conf).waitForCompletion();
    outfs = inputPath.getFileSystem(conf);
    outfs.delete(inputPath);
    return 0;
}

From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskLargeDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("aggregation_db_large");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);

    if (args.length < 1) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }//from w  ww .j a  v  a 2s .c  o m

    // OUTPUT properties
    Path outputPath = new Path(args[0]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "UserVisits");
    conf.set(DBConst.DB_RECORD_READER, AggUserVisitsRecord.class.getName());
    conf.set(DBConst.DB_SQL_QUERY,
            "SELECT sourceIP, SUM(adRevenue) AS sumAdRevenue " + "FROM UserVisits GROUP BY sourceIP;");

    return conf;
}

From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskLargeHDFS.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(getConf(), this.getClass());
    conf.setJobName("aggregation_hdfs_large");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setMapperClass(AggTaskLargeHDFS.Map.class);
    conf.setCombinerClass(AggTaskLargeHDFS.Reduce.class);
    conf.setReducerClass(AggTaskLargeHDFS.Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 2) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }/*from  w w  w . j av a 2  s. c o m*/

    FileInputFormat.setInputPaths(conf, new Path(args[0]));

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    return conf;

}

From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskSmallDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("aggregation_db_small");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);

    if (args.length < 1) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }//from   w  w w .j av a2  s. co m

    // OUTPUT properties
    Path outputPath = new Path(args[0]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "UserVisits");
    conf.set(DBConst.DB_RECORD_READER, AggUserVisitsRecord.class.getName());
    conf.set(DBConst.DB_SQL_QUERY,
            "SELECT SUBSTRING(sourceIP, 1, 7) AS subSourceIP, SUM(adRevenue) AS sumAdRevenue FROM UserVisits GROUP BY subSourceIP;");

    return conf;
}

From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskSmallHDFS.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("aggregation_hdfs_small");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setMapperClass(AggTaskSmallHDFS.Map.class);
    conf.setCombinerClass(AggTaskSmallHDFS.Reduce.class);
    conf.setReducerClass(AggTaskSmallHDFS.Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 2) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }// www . ja  v a2 s .  c  om

    FileInputFormat.setInputPaths(conf, new Path(args[0]));

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    return conf;

}

From source file:edu.yale.cs.hadoopdb.benchmark.GrepTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(GrepTaskDB.class);
    conf.setJobName("grep_db_job");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);//from www  . ja  v a  2 s.  c o m

    // GREP arguments
    conf.setOutputFormat(TextOutputFormat.class);
    for (int i = 0; i < args.length; ++i) {
        if ("-pattern".equals(args[i]))
            conf.set("pattern", args[++i]);
        else if ("-output".equals(args[i]))
            conf.set("output", args[++i]);
    }

    // OUTPUT properties

    Path outputPath = new Path(conf.get("output"));
    System.out.println(conf.get("output"));
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    // DB properties
    conf.set(DBConst.DB_RELATION_ID, "grep");
    conf.set(DBConst.DB_RECORD_READER, DocumentsRecord.class.getName());
    conf.set(DBConst.DB_SQL_QUERY,
            "SELECT key1, field FROM grep WHERE field LIKE '%" + conf.get("pattern") + "%';");

    return conf;

}