List of usage examples for org.apache.hadoop.mapred JobConf setInt
public void setInt(String name, int value)
name
property to an int
. From source file:ivory.smrf.retrieval.distributed.RunQueryBroker.java
License:Apache License
/** * Runs this tool.//from w w w .j a va 2 s .c o m */ public int run(String[] args) throws Exception { if (args.length != 5) { printUsage(); return -1; } String configPath = args[0]; FileSystem fs = FileSystem.get(getConf()); sLogger.info("server config path: " + configPath); FileStatus[] stats = fs.listStatus(new Path(configPath)); if (stats == null) { sLogger.info("Error: " + configPath + " not found!"); return -1; } String runtag = args[1]; String queriesFilePath = args[2]; String resultsFilePath = args[3]; int numHits = Integer.parseInt(args[4]); JobConf conf = new JobConf(getConf(), RunQueryBroker.class); conf.setJobName("RunQueryBroker"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(Server.class); conf.set("QueriesFilePath", queriesFilePath); conf.set("ConfigPath", configPath); conf.set("ResultsFilePath", resultsFilePath); conf.set("Runtag", runtag); conf.setInt("NumHits", numHits); conf.set("mapred.child.java.opts", "-Xmx2048m"); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("runner started!"); return 0; }
From source file:ivory.smrf.retrieval.RunQueryBroker.java
License:Apache License
/** * Runs this tool.// w w w. j a va2s. c o m */ public int run(String[] args) throws Exception { if (args.length != 5) { printUsage(); return -1; } String configPath = args[0]; FileSystem fs = FileSystem.get(getConf()); sLogger.info("server config path: " + configPath); FileStatus[] stats = fs.listStatus(new Path(configPath)); if (stats == null) { sLogger.info("Error: " + configPath + " not found!"); return -1; } String runtag = args[1]; String queriesFilePath = args[2]; String resultsFilePath = args[3]; int numHits = Integer.parseInt(args[4]); JobConf conf = new JobConf(RunQueryBroker.class); conf.setJobName("RunQueryBroker"); conf.setNumMapTasks(1); conf.setNumReduceTasks(0); conf.setInputFormat(NullInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapperClass(Server.class); conf.set("QueriesFilePath", queriesFilePath); conf.set("ConfigPath", configPath); conf.set("ResultsFilePath", resultsFilePath); conf.set("Runtag", runtag); conf.setInt("NumHits", numHits); conf.set("mapred.child.java.opts", "-Xmx2048m"); JobClient client = new JobClient(conf); client.submitJob(conf); sLogger.info("runner started!"); return 0; }
From source file:job.uncombine.compressed.BigBuildInvertedIndex.java
License:Apache License
/** * Runs this tool./*from w w w . j a v a 2 s . c o m*/ */ public int run(String[] args) throws Exception { //long GB = 1024 * 1024 * 1024; //long totalDataSize = 1 * GB; int reduceNumArray[] = { 9, 18 }; int splitSizeMBArray[] = { 64, 128, 256 }; int xmxArray[] = { 1000, 2000, 3000, 4000 }; int xmsArray[] = { 0, 1 }; int ismbArray[] = { 200, 400, 600, 800 }; for (int splitIndex = 0; splitIndex < splitSizeMBArray.length; splitIndex++) { for (int reduceNumIndex = 0; reduceNumIndex < reduceNumArray.length; reduceNumIndex++) { for (int xmxIndex = 0; xmxIndex < xmxArray.length; xmxIndex++) { for (int xmsIndex = 0; xmsIndex < xmsArray.length; xmsIndex++) { for (int ismbIndex = 0; ismbIndex < ismbArray.length; ismbIndex++) { int reduceNum = reduceNumArray[reduceNumIndex]; int splitMB = splitSizeMBArray[splitIndex]; int xmx = xmxArray[xmxIndex]; int xms = xmsArray[xmsIndex] * xmx; int ismb = ismbArray[ismbIndex]; JobConf conf = new JobConf(getConf(), BigBuildInvertedIndex.class); conf.setLong("mapred.min.split.size", SplitTable.getMapred_min_split_size(splitMB)); conf.setLong("mapred.max.split.size", SplitTable.getMapred_max_split_size(splitMB)); //conf.setInt("my.sample.split.num", (int) (totalDataSize / (splitMB * 1024 * 1024))); conf.setInt("mapred.reduce.tasks", reduceNum); conf.setInt("io.sort.mb", ismb); if (xms == 0) conf.set("mapred.child.java.opts", "-Xmx" + xmx + "m"); else conf.set("mapred.child.java.opts", "-Xmx" + xmx + "m -Xms" + xms + "m"); conf.setInt("child.monitor.metrics.seconds", 2); conf.setInt("child.monitor.jvm.seconds", 2); conf.setInt("child.monitor.jstat.seconds", 2); conf.setJobName("BigBuildInvertedIndex " + splitMB + "MB " + conf.get("mapred.child.java.opts") + " ismb=" + ismb + " RN=" + reduceNum); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: BigBuildInvertedIndex <in> <out>"); System.exit(2); } conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(PairOfInts.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(PairOfWritables.class); SequenceFileOutputFormat.setOutputCompressionType(conf, CompressionType.BLOCK); conf.setOutputFormat(MapFileOutputFormat.class); conf.setMapperClass(MyMapper.class); // conf.setCombinerClass(IdentityReducer.class); conf.setReducerClass(MyReducer.class); FileInputFormat.setInputPaths(conf, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(conf, new Path(otherArgs[1])); FileSystem.get(conf).delete(new Path(otherArgs[1]), true); try { JobClient.runJob(conf); } catch (IOException e) { e.printStackTrace(); } Thread.sleep(15000); } } } } } return 0; }
From source file:jobimtext.thesaurus.distributional.hadoop.mapreduce.SimCounts1WithFeatures.java
License:Apache License
@SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { JobConf conf = HadoopUtil.generateJobConf(args); /* set the new defined type to be used */ conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); if (args.length > 3) { conf.setInt("threshold", Integer.parseInt(args[3])); }/* ww w .j a va2s. co m*/ /* number of milliseconds before killing a not responding task */ conf.set("mapred.task.timeout", "600000"); /* change to 128mb */ conf.set("dfs.block.size", "134217728"); /* set the maximum number of task per node */ int maptasks = 200; /* * Number of map tasks to deploy on each machine. 0.5 to 2 * * (cores/node) */ conf.set("mapred.tasktracker.map.tasks.maximum", "" + maptasks); conf.set("mapred.tasktracker.map", "" + maptasks); /* * The default number of map tasks per job. Typically set to a prime * several times greater than number of available hosts. */ conf.set("mapred.map.tasks", "" + maptasks); int reducetasks = 20; conf.set("mapred.tasktracker.reduce.tasks.maximum", "" + reducetasks); conf.set("mapred.tasktracker.reduce", "" + reducetasks); conf.set("mapred.reduce.tasks", "" + reducetasks); /* * how much virtual memory the entire process tree of each map/reduce * task will use */ conf.set("mapred.job.map.memory.mb", "4000"); conf.set("mapred.job.reduce.memory.mb", "4000"); conf.set("dfs.replication", "1"); /* * reduce I/O load */ conf.set("mapred.child.java.opts", "-Xmx1400M"); conf.set("io.sort.mb", "300"); conf.set("io.sort.factor", "30"); JobClient.runJob(conf); }
From source file:mapreduce.DosAttack.java
License:Apache License
private void issue() throws IOException { LOG.info("Starting DOS on url[{}] with clients[{}]", wsURL, numMappers); DosMapper.init(wsURL);// w ww . ja va 2 s . com JobConf job = new JobConf(DosAttack.class); job.setJarByClass(DosAttack.class); job.setJobName("DOS Attack"); job.setNumReduceTasks(0); job.setInputFormat(NullInputFormat.class); job.setOutputFormat(NullOutputFormat.class); job.setMapperClass(DosMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setNumMapTasks(numMappers); job.setInt(NUM_MAPPERS_KEY, numMappers); job.setInt(NUM_REQUESTS_KEY, numRequests); job.set(TARGET_URL_KEY, wsURL); JobClient.runJob(job); }
From source file:net.peacesoft.nutch.crawl.ReFetcher.java
License:Apache License
public void fetch(Path segment, int threads) throws IOException { checkConfiguration();/*from w ww . java 2 s .co m*/ SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); if (LOG.isInfoEnabled()) { LOG.info("Fetcher: starting at " + sdf.format(start)); LOG.info("Fetcher: segment: " + segment); } // set the actual time for the timelimit relative // to the beginning of the whole job and not of a specific task // otherwise it keeps trying again if a task fails long timelimit = getConf().getLong("ReFetcher.timelimit.mins", -1); if (timelimit != -1) { timelimit = System.currentTimeMillis() + (timelimit * 60 * 1000); LOG.info("Fetcher Timelimit set for : " + timelimit); getConf().setLong("fetcher.timelimit", timelimit); } // Set the time limit after which the throughput threshold feature is enabled timelimit = getConf().getLong("fetcher.throughput.threshold.check.after", 10); timelimit = System.currentTimeMillis() + (timelimit * 60 * 1000); getConf().setLong("fetcher.throughput.threshold.check.after", timelimit); int maxOutlinkDepth = getConf().getInt("fetcher.follow.outlinks.depth", -1); if (maxOutlinkDepth > 0) { LOG.info("Fetcher: following outlinks up to depth: " + Integer.toString(maxOutlinkDepth)); int maxOutlinkDepthNumLinks = getConf().getInt("fetcher.follow.outlinks.num.links", 4); int outlinksDepthDivisor = getConf().getInt("fetcher.follow.outlinks.depth.divisor", 2); int totalOutlinksToFollow = 0; for (int i = 0; i < maxOutlinkDepth; i++) { totalOutlinksToFollow += (int) Math.floor(outlinksDepthDivisor / (i + 1) * maxOutlinkDepthNumLinks); } LOG.info("Fetcher: maximum outlinks to follow: " + Integer.toString(totalOutlinksToFollow)); } JobConf job = new NutchJob(getConf()); job.setJobName("fetch " + segment); job.setInt("fetcher.threads.fetch", threads); job.set(Nutch.SEGMENT_NAME_KEY, segment.getName()); // for politeness, don't permit parallel execution of a single task job.setSpeculativeExecution(false); FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.GENERATE_DIR_NAME)); job.setInputFormat(ReFetcher.InputFormat.class); job.setMapRunnerClass(ReFetcher.class); FileOutputFormat.setOutputPath(job, segment); job.setOutputFormat(FetcherOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NutchWritable.class); JobClient.runJob(job); long end = System.currentTimeMillis(); LOG.info("Fetcher: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); }
From source file:net.peacesoft.nutch.crawl.ReGenerator.java
License:Apache License
/** * Generate fetchlists in one or more segments. Whether to filter URLs or * not is read from the crawl.generate.filter property in the configuration * files. If the property is not found, the URLs are filtered. Same for the * normalisation.// w w w. j a va 2 s. c o m * * @param dbDir Crawl database directory * @param segments Segments directory * @param numLists Number of reduce tasks * @param topN Number of top URLs to be selected * @param curTime Current time in milliseconds * * @return Path to generated segment or null if no entries were selected * * @throws IOException When an I/O error occurs */ public Path[] generate(Path dbDir, Path segments, int numLists, long topN, long curTime, boolean filter, boolean norm, boolean force, int maxNumSegments) throws IOException { try { Path tempDir = new Path( getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis()); Path lock = new Path(dbDir, CrawlDb.LOCK_NAME); FileSystem fs = FileSystem.get(getConf()); LockUtil.createLockFile(fs, lock, force); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("ReGenerator: starting at " + sdf.format(start)); LOG.info("ReGenerator: Selecting best-scoring urls due for fetch."); LOG.info("ReGenerator: filtering: " + filter); LOG.info("ReGenerator: normalizing: " + norm); if (topN != Long.MAX_VALUE) { LOG.info("ReGenerator: topN: " + topN); } if ("true".equals(getConf().get(GENERATE_MAX_PER_HOST_BY_IP))) { LOG.info( "ReGenerator: GENERATE_MAX_PER_HOST_BY_IP will be ignored, use partition.url.mode instead"); } // map to inverted subset due for fetch, sort by score JobConf job = new NutchJob(getConf()); job.setJobName("generate: select from " + dbDir); if (numLists == -1) { // for politeness make numLists = job.getNumMapTasks(); // a partition per fetch task } if ("local".equals(job.get("mapred.job.tracker")) && numLists != 1) { // override LOG.info("ReGenerator: jobtracker is 'local', generating exactly one partition."); numLists = 1; } job.setLong(GENERATOR_CUR_TIME, curTime); // record real generation time long generateTime = System.currentTimeMillis(); job.setLong(Nutch.GENERATE_TIME_KEY, generateTime); job.setLong(GENERATOR_TOP_N, topN); job.setBoolean(GENERATOR_FILTER, filter); job.setBoolean(GENERATOR_NORMALISE, norm); job.setInt(GENERATOR_MAX_NUM_SEGMENTS, maxNumSegments); FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME)); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(Selector.class); job.setPartitionerClass(Selector.class); job.setReducerClass(Selector.class); FileOutputFormat.setOutputPath(job, tempDir); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(FloatWritable.class); job.setOutputKeyComparatorClass(DecreasingFloatComparator.class); job.setOutputValueClass(SelectorEntry.class); job.setOutputFormat(GeneratorOutputFormat.class); try { JobClient.runJob(job); } catch (IOException e) { throw e; } // read the subdirectories generated in the temp // output and turn them into segments List<Path> generatedSegments = new ArrayList<Path>(); FileStatus[] status = fs.listStatus(tempDir); try { for (FileStatus stat : status) { Path subfetchlist = stat.getPath(); if (!subfetchlist.getName().startsWith("fetchlist-")) { continue; } // start a new partition job for this segment Path newSeg = partitionSegment(fs, segments, subfetchlist, numLists); generatedSegments.add(newSeg); } } catch (Exception e) { LOG.warn("ReGenerator: exception while partitioning segments, exiting ..."); fs.delete(tempDir, true); return null; } if (generatedSegments.size() == 0) { LOG.warn("ReGenerator: 0 records selected for fetching, exiting ..."); LockUtil.removeLockFile(fs, lock); fs.delete(tempDir, true); return null; } if (getConf().getBoolean(GENERATE_UPDATE_CRAWLDB, false)) { // update the db from tempDir Path tempDir2 = new Path( getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis()); job = new NutchJob(getConf()); job.setJobName("generate: updatedb " + dbDir); job.setLong(Nutch.GENERATE_TIME_KEY, generateTime); for (Path segmpaths : generatedSegments) { Path subGenDir = new Path(segmpaths, CrawlDatum.GENERATE_DIR_NAME); FileInputFormat.addInputPath(job, subGenDir); } FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME)); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(CrawlDbUpdater.class); job.setReducerClass(CrawlDbUpdater.class); job.setOutputFormat(MapFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); FileOutputFormat.setOutputPath(job, tempDir2); try { JobClient.runJob(job); CrawlDb.install(job, dbDir); } catch (IOException e) { LockUtil.removeLockFile(fs, lock); fs.delete(tempDir, true); fs.delete(tempDir2, true); throw e; } fs.delete(tempDir2, true); } LockUtil.removeLockFile(fs, lock); fs.delete(tempDir, true); long end = System.currentTimeMillis(); LOG.info("ReGenerator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); Path[] patharray = new Path[generatedSegments.size()]; return generatedSegments.toArray(patharray); } catch (Exception ex) { LOG.error("ReGenerator generate error: " + ex.toString(), ex); return null; } }
From source file:nl.tudelft.graphalytics.mapreducev2.evo.DirectedForestFireModelJob.java
License:Apache License
@Override protected void setConfigurationParameters(JobConf jobConfiguration) { super.setConfigurationParameters(jobConfiguration); jobConfiguration.setLong(ForestFireModelUtils.MAX_ID, getParameters().getMaxId() + 1); jobConfiguration.setFloat(ForestFireModelUtils.P_RATIO, getParameters().getPRatio()); jobConfiguration.setFloat(ForestFireModelUtils.R_RATIO, getParameters().getRRatio()); jobConfiguration.set(ForestFireModelUtils.CURRENT_AMBASSADORS, ForestFireModelUtils.verticesIDsMap2String(burnedEdges)); if (getIteration() == 1) { if (getNumMappers() > 0) { jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR, getParameters().getNumNewVertices() / getNumMappers()); jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, getNumMappers()); } else {//from w w w .j ava2 s.co m jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR, getParameters().getNumNewVertices()); jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, 1024 * 1024); } jobConfiguration.setBoolean(ForestFireModelUtils.IS_INIT, true); } }
From source file:nl.tudelft.graphalytics.mapreducev2.evo.UndirectedForestFireModelJob.java
License:Apache License
@Override protected void setConfigurationParameters(JobConf jobConfiguration) { super.setConfigurationParameters(jobConfiguration); jobConfiguration.setLong(ForestFireModelUtils.MAX_ID, getParameters().getMaxId() + 1); jobConfiguration.setFloat(ForestFireModelUtils.P_RATIO, getParameters().getPRatio()); jobConfiguration.setFloat(ForestFireModelUtils.R_RATIO, getParameters().getRRatio()); jobConfiguration.set(ForestFireModelUtils.CURRENT_AMBASSADORS, ForestFireModelUtils.verticesIDsMap2String(burnedEdges)); if (getIteration() == 1) { if (getNumMappers() > 0) { jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR, getParameters().getNumNewVertices() / getNumMappers()); jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, getNumMappers()); } else {/*from w ww . j a v a 2s .co m*/ jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR, getParameters().getNumNewVertices()); jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, 1024 * 1024); } jobConfiguration.setBoolean(ForestFireModelUtils.IS_INIT, true); } else if (getIteration() == getParameters().getMaxIterations() + 1) { jobConfiguration.setBoolean(ForestFireModelUtils.IS_FINAL, true); } }
From source file:nthu.scopelab.tsqr.ssvd.ABtDenseOutJob.java
License:Apache License
public static void run(Configuration conf, Path[] inputPath, Path inputBt, Path outputPath, int k, int p, int reduceTasks, int mis) throws Exception { JobConf job = new JobConf(conf, ABtDenseOutJob.class); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setInt(QJob.PROP_K, k); job.setInt(QJob.PROP_P, p);/*from w ww . j av a 2s . c o m*/ job.set(PROP_BT_PATH, inputBt.toString()); FileOutputFormat.setOutputPath(job, outputPath); job.setJobName("ABtDenseOutJob"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(LMatrixWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(LMatrixWritable.class); job.setMapperClass(ABtMapper.class); fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job)); mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job)); job.setNumMapTasks(fgather.recNumMapTasks(mis)); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, inputPath); RunningJob rj = JobClient.runJob(job); }