Example usage for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name)

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:edu.umd.cloud9.webgraph.ClueExtractLinks.java

License:Apache License

public int runTool() throws Exception {
    JobConf conf = new JobConf(getConf(), ClueExtractLinks.class);
    FileSystem fs = FileSystem.get(conf);

    int numMappers = conf.getInt("Cloud9.Mappers", 1);
    int numReducers = conf.getInt("Cloud9.Reducers", 200);

    String inputPath = conf.get("Cloud9.InputPath");
    String outputPath = conf.get("Cloud9.OutputPath");
    String mappingFile = conf.get("Cloud9.DocnoMappingFile");

    if (!fs.exists(new Path(mappingFile))) {
        throw new RuntimeException("Error: Docno mapping data file " + mappingFile + " doesn't exist!");
    }//w w  w  . j a v a  2  s  .  co m

    DistributedCache.addCacheFile(new URI(mappingFile), conf);

    conf.setJobName("ClueExtractLinks");
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setInt("mapred.task.timeout", 60000000);
    conf.set("mapreduce.map.memory.mb", "2048");
    conf.set("mapreduce.map.java.opts", "-Xmx2048m");
    conf.set("mapreduce.reduce.memory.mb", "2048");
    conf.set("mapreduce.reduce.java.opts", "-Xmx2048m");
    conf.set("mapreduce.task.timeout", "60000000");

    conf.setNumMapTasks(numMappers);
    conf.setNumReduceTasks(numReducers);
    // TODO: to read!!
    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(ArrayListWritable.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setCompressOutput(conf, true);
    SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);
    SequenceFileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    LOG.info("ClueExtractLinks");
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - mapping file: " + mappingFile);
    LOG.info(" - include internal links? " + conf.getBoolean("Cloud9.IncludeInternalLinks", false));

    if (!fs.exists(new Path(outputPath))) {
        JobClient.runJob(conf);
    } else {
        LOG.info(outputPath + " already exists! Skipping this step...");
    }
    return 0;
}

From source file:edu.umd.cloud9.webgraph.CollectHostnames.java

License:Apache License

public int runTool() throws Exception {

    JobConf conf = new JobConf(getConf(), CollectHostnames.class);
    FileSystem fs = FileSystem.get(conf);

    int numMappers = conf.getInt("Cloud9.Mappers", 1);
    int numReducers = conf.getInt("Cloud9.Reducers", 200);

    String inputPath = conf.get("Cloud9.InputPath");
    String outputPath = conf.get("Cloud9.OutputPath");

    conf.setJobName("CollectHostnames");
    conf.set("mapred.child.java.opts", "-Xmx4096m");
    conf.setInt("mapred.task.timeout", 60000000);

    conf.setNumMapTasks(numMappers);//  w ww  . j  a  v  a2 s  . com
    conf.setNumReduceTasks(numReducers);

    conf.setMapperClass(Map.class);
    conf.setPartitionerClass(Partition.class);
    conf.setReducerClass(Reduce.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(ArrayListWritable.class);

    conf.setMapOutputKeyClass(PairOfIntString.class);
    conf.setMapOutputValueClass(IntWritable.class);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    SequenceFileOutputFormat.setCompressOutput(conf, true);
    SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);

    SequenceFileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    sLogger.info("PropagateHostname");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);

    if (!fs.exists(new Path(outputPath))) {
        JobClient.runJob(conf);
    } else {
        sLogger.info(outputPath + " already exists! Skipping this step...");
    }

    return 0;
}

From source file:edu.umd.cloud9.webgraph.ComputeWeight.java

License:Apache License

public int runTool() throws Exception {

    JobConf conf = new JobConf(getConf(), ComputeWeight.class);
    FileSystem fs = FileSystem.get(conf);

    int numMappers = conf.getInt("Cloud9.Mappers", 1);
    int numReducers = conf.getInt("Cloud9.Reducers", 200);

    String inputPath = conf.get("Cloud9.InputPath");
    String outputPath = conf.get("Cloud9.OutputPath");

    conf.setJobName("ComputeWeights");
    conf.set("mapred.child.java.opts", "-Xmx4096m");
    conf.setInt("mapred.task.timeout", 60000000);

    conf.setNumMapTasks(numMappers);/*from w  ww  . j  a  v  a 2 s.  com*/
    conf.setNumReduceTasks(numReducers);

    conf.setMapperClass(Map.class);
    conf.setPartitionerClass(Partition.class);
    conf.setReducerClass(Reduce.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(ArrayListWritable.class);

    conf.setMapOutputKeyClass(PairOfInts.class);
    conf.setMapOutputValueClass(ArrayListWritable.class);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    SequenceFileOutputFormat.setCompressOutput(conf, true);
    SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);

    SequenceFileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    LOG.info("ComputeWeight");
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);

    if (!fs.exists(new Path(outputPath))) {
        JobClient.runJob(conf);
    } else {
        LOG.info(outputPath + " already exists! Skipping this step...");
    }

    return 0;
}

From source file:edu.umd.cloud9.webgraph.ExtractLinks.java

License:Apache License

public int runTool() throws Exception {

    JobConf conf = new JobConf(getConf(), ExtractLinks.class);
    FileSystem fs = FileSystem.get(conf);

    int numMappers = conf.getInt("Cloud9.Mappers", 1);
    int numReducers = conf.getInt("Cloud9.Reducers", 200);

    String inputPath = conf.get("Cloud9.InputPath");
    String outputPath = conf.get("Cloud9.OutputPath");
    String mappingFile = conf.get("Cloud9.DocnoMappingFile");

    if (!fs.exists(new Path(mappingFile)))
        throw new RuntimeException("Error: Docno mapping data file " + mappingFile + " doesn't exist!");

    DistributedCache.addCacheFile(new URI(mappingFile), conf);

    conf.setJobName("ExtractLinks");
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.setInt("mapred.task.timeout", 60000000);

    conf.setNumMapTasks(numMappers);//from   ww w  . jav  a2s.c o m
    conf.setNumReduceTasks(numReducers);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(ArrayListWritable.class);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    SequenceFileOutputFormat.setCompressOutput(conf, true);
    SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK);

    SequenceFileInputFormat.setInputPaths(conf, inputPath);

    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    LOG.info("ExtractLinks");
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - mapping file: " + mappingFile);
    LOG.info(" - include internal links? " + conf.getBoolean("Cloud9.IncludeInternalLinks", false));

    if (!fs.exists(new Path(outputPath))) {
        JobClient.runJob(conf);
    } else {
        LOG.info(outputPath + " already exists! Skipping this step...");
    }

    return 0;
}

From source file:edu.umn.cs.spatialHadoop.operations.Indexer.java

License:Open Source License

private static RunningJob indexMapReduce(Path inPath, Path outPath, OperationsParams params)
        throws IOException, InterruptedException {
    JobConf job = new JobConf(params, Indexer.class);
    job.setJobName("Indexer");

    // Set input file MBR if not already set
    Rectangle inputMBR = (Rectangle) params.getShape("mbr");
    if (inputMBR == null)
        inputMBR = FileMBR.fileMBR(inPath, params);
    OperationsParams.setShape(job, "mbr", inputMBR);

    // Set input and output
    job.setInputFormat(ShapeIterInputFormat.class);
    ShapeIterInputFormat.setInputPaths(job, inPath);
    job.setOutputFormat(IndexOutputFormat.class);
    GridOutputFormat.setOutputPath(job, outPath);

    // Set the correct partitioner according to index type
    String index = job.get("sindex");
    if (index == null)
        throw new RuntimeException("Index type is not set");
    long t1 = System.currentTimeMillis();
    Partitioner partitioner = createPartitioner(inPath, outPath, job, index);
    Partitioner.setPartitioner(job, partitioner);
    long t2 = System.currentTimeMillis();
    System.out.println("Total time for space subdivision in millis: " + (t2 - t1));

    // Set mapper and reducer
    Shape shape = params.getShape("shape");
    job.setMapperClass(IndexMethods.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(shape.getClass());
    job.setReducerClass(IndexMethods.class);
    job.setOutputCommitter(IndexerOutputCommitter.class);
    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks()));
    job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks()));

    // Use multithreading in case the job is running locally
    job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors());

    // Start the job
    if (params.getBoolean("background", false)) {
        // Run in background
        JobClient jc = new JobClient(job);
        return jc.submitJob(job);
    } else {/*from   w  ww .j a  v a 2s.c o m*/
        // Run and block until it is finished
        return JobClient.runJob(job);
    }
}

From source file:edu.yale.cs.hadoopdb.benchmark.GrepTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(GrepTaskDB.class);
    conf.setJobName("grep_db_job");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);//from   w w  w. ja  v a 2 s  .c  o  m

    // GREP arguments
    conf.setOutputFormat(TextOutputFormat.class);
    for (int i = 0; i < args.length; ++i) {
        if ("-pattern".equals(args[i]))
            conf.set("pattern", args[++i]);
        else if ("-output".equals(args[i]))
            conf.set("output", args[++i]);
    }

    // OUTPUT properties

    Path outputPath = new Path(conf.get("output"));
    System.out.println(conf.get("output"));
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    // DB properties
    conf.set(DBConst.DB_RELATION_ID, "grep");
    conf.set(DBConst.DB_RECORD_READER, DocumentsRecord.class.getName());
    conf.set(DBConst.DB_SQL_QUERY,
            "SELECT key1, field FROM grep WHERE field LIKE '%" + conf.get("pattern") + "%';");

    return conf;

}

From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {
    JobConf conf = new JobConf(JoinTaskDB.class);
    conf.setJobName("join_db");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);
    conf.setNumReduceTasks(1); // Because we look for 1 TOP value

    // join arguments
    conf.setOutputFormat(TextOutputFormat.class);
    for (int i = 0; i < args.length; ++i) {
        if ("-date_l".equals(args[i]))
            conf.set("date_l", args[++i]);
        else if ("-date_u".equals(args[i]))
            conf.set("date_u", args[++i]);
        else if ("-output".equals(args[i]))
            conf.set("output", args[++i]);
    }//from w w w .j  a v  a2  s  . c o  m

    // OUTPUT properties
    Path outputPath = new Path(conf.get("output"));
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "UserVisits");
    conf.set(DBConst.DB_RECORD_READER, JoinRecord.class.getName());

    String TABLE_R = "Rankings";
    String TABLE_UV = "UserVisits";

    conf.set(DBConst.DB_SQL_QUERY,
            "SELECT sourceIP, SUM(pageRank) as sumPageRank, COUNT(pageRank) as countPageRank, SUM(adRevenue) as totalRevenue "
                    + "FROM " + TABLE_R + " AS R, " + TABLE_UV + " AS UV " + "WHERE R.pageURL = UV.destURL "
                    + "AND UV.visitDate BETWEEN '" + conf.get("date_l") + "' AND '" + conf.get("date_u") + "' "
                    + "GROUP BY UV.sourceIP;");

    return conf;
}

From source file:edu.yale.cs.hadoopdb.benchmark.SelectionTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("selection_db");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);//from   w ww  . jav a 2s  .  c o m

    if (args.length < 2) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }

    conf.set(PAGE_RANK_VALUE_PARAM, args[0]);

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "Rankings");
    conf.set(DBConst.DB_RECORD_READER, RankingsRecord.class.getName());
    conf.set(DBConst.DB_SQL_QUERY, "SELECT pageURL, pageRank FROM Rankings " + "WHERE pageRank > "
            + conf.get(PAGE_RANK_VALUE_PARAM) + ";");

    return conf;
}

From source file:edu.yale.cs.hadoopdb.catalog.Catalog.java

License:Apache License

private Catalog(JobConf job) {

    try {//from w  w  w  .  j a v  a 2s . co  m
        FileSystem fs = FileSystem.get(job);
        Path config_file = new Path(job.get(DBConst.DB_CONFIG_FILE));
        xmlConfig = ConfigurationMapping.getInstance(fs.open(config_file));
        replication = job.getBoolean(DBConst.DB_REPLICATION, false);
    } catch (IOException e) {
        LOG.error(StringUtils.stringifyException(e));
    } catch (JAXBException e) {
        LOG.error(StringUtils.stringifyException(e));
    }
}

From source file:edu.yale.cs.hadoopdb.connector.AbstractDBRecordReader.java

License:Apache License

/**
 * Prepares a SQL query, if no preparer specified then returns the same sqlQuery
 *///from ww  w . ja  v  a  2 s.  c  o  m
protected String prepareSqlQuery(String sqlQuery, DBInputSplit split, JobConf conf) {
    String preparerClass = conf.get(DBConst.DB_SQL_PREPARER);
    if (preparerClass == null) {
        return sqlQuery;
    } else {
        try {
            SQLPreparer sqlPreparer = (SQLPreparer) ReflectionUtils.newInstance(Class.forName(preparerClass),
                    conf);
            return sqlPreparer.prepare(sqlQuery, split, conf);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}