List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name)
name
property, null
if no such property exists. From source file:edu.umd.cloud9.webgraph.ClueExtractLinks.java
License:Apache License
public int runTool() throws Exception { JobConf conf = new JobConf(getConf(), ClueExtractLinks.class); FileSystem fs = FileSystem.get(conf); int numMappers = conf.getInt("Cloud9.Mappers", 1); int numReducers = conf.getInt("Cloud9.Reducers", 200); String inputPath = conf.get("Cloud9.InputPath"); String outputPath = conf.get("Cloud9.OutputPath"); String mappingFile = conf.get("Cloud9.DocnoMappingFile"); if (!fs.exists(new Path(mappingFile))) { throw new RuntimeException("Error: Docno mapping data file " + mappingFile + " doesn't exist!"); }//w w w . j a v a 2 s . co m DistributedCache.addCacheFile(new URI(mappingFile), conf); conf.setJobName("ClueExtractLinks"); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInt("mapred.task.timeout", 60000000); conf.set("mapreduce.map.memory.mb", "2048"); conf.set("mapreduce.map.java.opts", "-Xmx2048m"); conf.set("mapreduce.reduce.memory.mb", "2048"); conf.set("mapreduce.reduce.java.opts", "-Xmx2048m"); conf.set("mapreduce.task.timeout", "60000000"); conf.setNumMapTasks(numMappers); conf.setNumReduceTasks(numReducers); // TODO: to read!! conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(ArrayListWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(conf, true); SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); SequenceFileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); LOG.info("ClueExtractLinks"); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - mapping file: " + mappingFile); LOG.info(" - include internal links? " + conf.getBoolean("Cloud9.IncludeInternalLinks", false)); if (!fs.exists(new Path(outputPath))) { JobClient.runJob(conf); } else { LOG.info(outputPath + " already exists! Skipping this step..."); } return 0; }
From source file:edu.umd.cloud9.webgraph.CollectHostnames.java
License:Apache License
public int runTool() throws Exception { JobConf conf = new JobConf(getConf(), CollectHostnames.class); FileSystem fs = FileSystem.get(conf); int numMappers = conf.getInt("Cloud9.Mappers", 1); int numReducers = conf.getInt("Cloud9.Reducers", 200); String inputPath = conf.get("Cloud9.InputPath"); String outputPath = conf.get("Cloud9.OutputPath"); conf.setJobName("CollectHostnames"); conf.set("mapred.child.java.opts", "-Xmx4096m"); conf.setInt("mapred.task.timeout", 60000000); conf.setNumMapTasks(numMappers);// w ww . j a v a2 s . com conf.setNumReduceTasks(numReducers); conf.setMapperClass(Map.class); conf.setPartitionerClass(Partition.class); conf.setReducerClass(Reduce.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(ArrayListWritable.class); conf.setMapOutputKeyClass(PairOfIntString.class); conf.setMapOutputValueClass(IntWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(conf, true); SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); SequenceFileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); sLogger.info("PropagateHostname"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); if (!fs.exists(new Path(outputPath))) { JobClient.runJob(conf); } else { sLogger.info(outputPath + " already exists! Skipping this step..."); } return 0; }
From source file:edu.umd.cloud9.webgraph.ComputeWeight.java
License:Apache License
public int runTool() throws Exception { JobConf conf = new JobConf(getConf(), ComputeWeight.class); FileSystem fs = FileSystem.get(conf); int numMappers = conf.getInt("Cloud9.Mappers", 1); int numReducers = conf.getInt("Cloud9.Reducers", 200); String inputPath = conf.get("Cloud9.InputPath"); String outputPath = conf.get("Cloud9.OutputPath"); conf.setJobName("ComputeWeights"); conf.set("mapred.child.java.opts", "-Xmx4096m"); conf.setInt("mapred.task.timeout", 60000000); conf.setNumMapTasks(numMappers);/*from w ww . j a v a 2 s. com*/ conf.setNumReduceTasks(numReducers); conf.setMapperClass(Map.class); conf.setPartitionerClass(Partition.class); conf.setReducerClass(Reduce.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(ArrayListWritable.class); conf.setMapOutputKeyClass(PairOfInts.class); conf.setMapOutputValueClass(ArrayListWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(conf, true); SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); SequenceFileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); LOG.info("ComputeWeight"); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); if (!fs.exists(new Path(outputPath))) { JobClient.runJob(conf); } else { LOG.info(outputPath + " already exists! Skipping this step..."); } return 0; }
From source file:edu.umd.cloud9.webgraph.ExtractLinks.java
License:Apache License
public int runTool() throws Exception { JobConf conf = new JobConf(getConf(), ExtractLinks.class); FileSystem fs = FileSystem.get(conf); int numMappers = conf.getInt("Cloud9.Mappers", 1); int numReducers = conf.getInt("Cloud9.Reducers", 200); String inputPath = conf.get("Cloud9.InputPath"); String outputPath = conf.get("Cloud9.OutputPath"); String mappingFile = conf.get("Cloud9.DocnoMappingFile"); if (!fs.exists(new Path(mappingFile))) throw new RuntimeException("Error: Docno mapping data file " + mappingFile + " doesn't exist!"); DistributedCache.addCacheFile(new URI(mappingFile), conf); conf.setJobName("ExtractLinks"); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInt("mapred.task.timeout", 60000000); conf.setNumMapTasks(numMappers);//from ww w . jav a2s.c o m conf.setNumReduceTasks(numReducers); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(ArrayListWritable.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setCompressOutput(conf, true); SequenceFileOutputFormat.setOutputCompressionType(conf, SequenceFile.CompressionType.BLOCK); SequenceFileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); LOG.info("ExtractLinks"); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - mapping file: " + mappingFile); LOG.info(" - include internal links? " + conf.getBoolean("Cloud9.IncludeInternalLinks", false)); if (!fs.exists(new Path(outputPath))) { JobClient.runJob(conf); } else { LOG.info(outputPath + " already exists! Skipping this step..."); } return 0; }
From source file:edu.umn.cs.spatialHadoop.operations.Indexer.java
License:Open Source License
private static RunningJob indexMapReduce(Path inPath, Path outPath, OperationsParams params) throws IOException, InterruptedException { JobConf job = new JobConf(params, Indexer.class); job.setJobName("Indexer"); // Set input file MBR if not already set Rectangle inputMBR = (Rectangle) params.getShape("mbr"); if (inputMBR == null) inputMBR = FileMBR.fileMBR(inPath, params); OperationsParams.setShape(job, "mbr", inputMBR); // Set input and output job.setInputFormat(ShapeIterInputFormat.class); ShapeIterInputFormat.setInputPaths(job, inPath); job.setOutputFormat(IndexOutputFormat.class); GridOutputFormat.setOutputPath(job, outPath); // Set the correct partitioner according to index type String index = job.get("sindex"); if (index == null) throw new RuntimeException("Index type is not set"); long t1 = System.currentTimeMillis(); Partitioner partitioner = createPartitioner(inPath, outPath, job, index); Partitioner.setPartitioner(job, partitioner); long t2 = System.currentTimeMillis(); System.out.println("Total time for space subdivision in millis: " + (t2 - t1)); // Set mapper and reducer Shape shape = params.getShape("shape"); job.setMapperClass(IndexMethods.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setReducerClass(IndexMethods.class); job.setOutputCommitter(IndexerOutputCommitter.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(5 * Math.max(1, clusterStatus.getMaxMapTasks())); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); // Use multithreading in case the job is running locally job.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors()); // Start the job if (params.getBoolean("background", false)) { // Run in background JobClient jc = new JobClient(job); return jc.submitJob(job); } else {/*from w ww .j a v a 2s.c o m*/ // Run and block until it is finished return JobClient.runJob(job); } }
From source file:edu.yale.cs.hadoopdb.benchmark.GrepTaskDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws IOException { JobConf conf = new JobConf(GrepTaskDB.class); conf.setJobName("grep_db_job"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);//from w w w. ja v a 2 s .c o m // GREP arguments conf.setOutputFormat(TextOutputFormat.class); for (int i = 0; i < args.length; ++i) { if ("-pattern".equals(args[i])) conf.set("pattern", args[++i]); else if ("-output".equals(args[i])) conf.set("output", args[++i]); } // OUTPUT properties Path outputPath = new Path(conf.get("output")); System.out.println(conf.get("output")); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); // DB properties conf.set(DBConst.DB_RELATION_ID, "grep"); conf.set(DBConst.DB_RECORD_READER, DocumentsRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT key1, field FROM grep WHERE field LIKE '%" + conf.get("pattern") + "%';"); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(JoinTaskDB.class); conf.setJobName("join_db"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setNumReduceTasks(1); // Because we look for 1 TOP value // join arguments conf.setOutputFormat(TextOutputFormat.class); for (int i = 0; i < args.length; ++i) { if ("-date_l".equals(args[i])) conf.set("date_l", args[++i]); else if ("-date_u".equals(args[i])) conf.set("date_u", args[++i]); else if ("-output".equals(args[i])) conf.set("output", args[++i]); }//from w w w .j a v a2 s . c o m // OUTPUT properties Path outputPath = new Path(conf.get("output")); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.set(DBConst.DB_RELATION_ID, "UserVisits"); conf.set(DBConst.DB_RECORD_READER, JoinRecord.class.getName()); String TABLE_R = "Rankings"; String TABLE_UV = "UserVisits"; conf.set(DBConst.DB_SQL_QUERY, "SELECT sourceIP, SUM(pageRank) as sumPageRank, COUNT(pageRank) as countPageRank, SUM(adRevenue) as totalRevenue " + "FROM " + TABLE_R + " AS R, " + TABLE_UV + " AS UV " + "WHERE R.pageURL = UV.destURL " + "AND UV.visitDate BETWEEN '" + conf.get("date_l") + "' AND '" + conf.get("date_u") + "' " + "GROUP BY UV.sourceIP;"); return conf; }
From source file:edu.yale.cs.hadoopdb.benchmark.SelectionTaskDB.java
License:Apache License
@Override protected JobConf configureJob(String... args) throws Exception { JobConf conf = new JobConf(this.getClass()); conf.setJobName("selection_db"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(Map.class); conf.setNumReduceTasks(0);//from w ww . jav a 2s . c o m if (args.length < 2) { throw new RuntimeException("Incorrect arguments provided for " + this.getClass()); } conf.set(PAGE_RANK_VALUE_PARAM, args[0]); // OUTPUT properties Path outputPath = new Path(args[1]); HDFSUtil.deletePath(outputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.set(DBConst.DB_RELATION_ID, "Rankings"); conf.set(DBConst.DB_RECORD_READER, RankingsRecord.class.getName()); conf.set(DBConst.DB_SQL_QUERY, "SELECT pageURL, pageRank FROM Rankings " + "WHERE pageRank > " + conf.get(PAGE_RANK_VALUE_PARAM) + ";"); return conf; }
From source file:edu.yale.cs.hadoopdb.catalog.Catalog.java
License:Apache License
private Catalog(JobConf job) { try {//from w w w . j a v a 2s . co m FileSystem fs = FileSystem.get(job); Path config_file = new Path(job.get(DBConst.DB_CONFIG_FILE)); xmlConfig = ConfigurationMapping.getInstance(fs.open(config_file)); replication = job.getBoolean(DBConst.DB_REPLICATION, false); } catch (IOException e) { LOG.error(StringUtils.stringifyException(e)); } catch (JAXBException e) { LOG.error(StringUtils.stringifyException(e)); } }
From source file:edu.yale.cs.hadoopdb.connector.AbstractDBRecordReader.java
License:Apache License
/** * Prepares a SQL query, if no preparer specified then returns the same sqlQuery *///from ww w . ja v a 2 s. c o m protected String prepareSqlQuery(String sqlQuery, DBInputSplit split, JobConf conf) { String preparerClass = conf.get(DBConst.DB_SQL_PREPARER); if (preparerClass == null) { return sqlQuery; } else { try { SQLPreparer sqlPreparer = (SQLPreparer) ReflectionUtils.newInstance(Class.forName(preparerClass), conf); return sqlPreparer.prepare(sqlQuery, split, conf); } catch (Exception e) { throw new RuntimeException(e); } } }