List of usage examples for org.apache.hadoop.mapred JobConf getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:pathmerge.linear.MergePathH1Reducer.java
License:Apache License
public void configure(JobConf job) { mos = new MultipleOutputs(job); I_MERGE = Integer.parseInt(job.get("iMerge")); KMER_SIZE = job.getInt("sizeKmer", 0); outputValue = new MergePathValueWritable(); kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE); outputKmer = new VKmerBytesWritable(KMER_SIZE); }
From source file:pathmerge.log.MergePathH2Reducer.java
License:Apache License
public void configure(JobConf job) { mos = new MultipleOutputs(job); I_MERGE = Integer.parseInt(job.get("iMerge")); KMER_SIZE = job.getInt("sizeKmer", 0); outputValue = new MergePathValueWritable(); tmpOutputValue = new MergePathValueWritable(); kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE); outputKmer = new VKmerBytesWritable(KMER_SIZE); tmpKmer1 = new VKmerBytesWritable(KMER_SIZE); tmpKmer2 = new VKmerBytesWritable(KMER_SIZE); }
From source file:redpoll.text.TermOutputFormat.java
License:Apache License
/** * Generate the outfile name based on a given anme and the input file name. If * the map input file does not exists (i.e. this is not for a map only job), * the given name is returned unchanged. If the config value for * "num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given * name is returned unchanged. Otherwise, return a file name consisting of the * N trailing legs of the input file name where N is the config value for * "num.of.trailing.legs.to.use".// w ww. j a va2 s. c om * * @param job * the job config * @param name * the output file name * @return the outfile name based on a given anme and the input file name. */ protected static String getInputFileBasedOutputFileName(JobConf job, String name) { String infilepath = job.get("map.input.file"); if (infilepath == null) { // if the map input file does not exists, then return the given name return name; } int numOfTrailingLegsToUse = job.getInt("mapred.outputformat.numOfTrailingLegs", 0); if (numOfTrailingLegsToUse <= 0) { return name; } Path infile = new Path(infilepath); Path parent = infile.getParent(); String midName = infile.getName(); Path outPath = new Path(midName); for (int i = 1; i < numOfTrailingLegsToUse; i++) { if (parent == null) break; midName = parent.getName(); if (midName.length() == 0) break; parent = parent.getParent(); outPath = new Path(midName, outPath); } return outPath.toString(); }
From source file:redpoll.text.TermReducer.java
License:Apache License
@Override public void configure(JobConf job) { dfLimit = job.getInt("redpoll.text.df.limit", 3); }
From source file:redpoll.text.TfIdfMapper.java
License:Apache License
@Override public void configure(JobConf job) { try {/*from w ww .ja va 2s . c o m*/ terms = new HashMap<String, Integer>(); DefaultStringifier<HashMap<String, Integer>> mapStringifier = new DefaultStringifier<HashMap<String, Integer>>( job, GenericsUtil.getClass(terms)); String docFreqString = job.get("redpoll.text.terms"); terms = mapStringifier.fromString(docFreqString); toalDocNum = job.getInt("redpoll.docs.num", 1024); } catch (IOException exp) { exp.printStackTrace(); } }
From source file:redpoll.text.TfIdfReducer.java
License:Apache License
@Override public void configure(JobConf job) { totalTerms = job.getInt("redpoll.text.terms.num", 1024); }
From source file:skewtune.mapreduce.STJobTracker.java
License:Apache License
@SuppressWarnings("unchecked") STJobTracker(final JobConf conf, String jobtrackerIndentifier) throws IOException, InterruptedException { // find the owner of the process // get the desired principal to load String keytabFilename = conf.get(JTConfig.JT_KEYTAB_FILE); UserGroupInformation.setConfiguration(conf); if (keytabFilename != null) { String desiredUser = conf.get(JTConfig.JT_USER_NAME, System.getProperty("user.name")); UserGroupInformation.loginUserFromKeytab(desiredUser, keytabFilename); mrOwner = UserGroupInformation.getLoginUser(); } else {//from ww w. j a va 2 s.c o m mrOwner = UserGroupInformation.getCurrentUser(); } supergroup = conf.get(MR_SUPERGROUP, "supergroup"); LOG.info("Starting jobtracker with owner as " + mrOwner.getShortUserName() + " and supergroup as " + supergroup); long secretKeyInterval = conf.getLong(MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_KEY, MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT); long tokenMaxLifetime = conf.getLong(MRConfig.DELEGATION_TOKEN_MAX_LIFETIME_KEY, MRConfig.DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT); long tokenRenewInterval = conf.getLong(MRConfig.DELEGATION_TOKEN_RENEW_INTERVAL_KEY, MRConfig.DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT); secretManager = new DelegationTokenSecretManager(secretKeyInterval, tokenMaxLifetime, tokenRenewInterval, DELEGATION_TOKEN_GC_INTERVAL); secretManager.startThreads(); // // Grab some static constants // NUM_HEARTBEATS_IN_SECOND = conf.getInt(JT_HEARTBEATS_IN_SECOND, DEFAULT_NUM_HEARTBEATS_IN_SECOND); if (NUM_HEARTBEATS_IN_SECOND < MIN_NUM_HEARTBEATS_IN_SECOND) { NUM_HEARTBEATS_IN_SECOND = DEFAULT_NUM_HEARTBEATS_IN_SECOND; } HEARTBEATS_SCALING_FACTOR = conf.getFloat(JT_HEARTBEATS_SCALING_FACTOR, DEFAULT_HEARTBEATS_SCALING_FACTOR); if (HEARTBEATS_SCALING_FACTOR < MIN_HEARTBEATS_SCALING_FACTOR) { HEARTBEATS_SCALING_FACTOR = DEFAULT_HEARTBEATS_SCALING_FACTOR; } // whether to dump or not every heartbeat message even when DEBUG is enabled dumpHeartbeat = conf.getBoolean(JT_HEARTBEATS_DUMP, false); // This is a directory of temporary submission files. We delete it // on startup, and can delete any files that we're done with this.conf = conf; JobConf jobConf = new JobConf(conf); // Set ports, start RPC servers, setup security policy etc. InetSocketAddress addr = getAddress(conf); this.localMachine = addr.getHostName(); this.port = addr.getPort(); int handlerCount = conf.getInt(JT_IPC_HANDLER_COUNT, 10); this.interTrackerServer = RPC.getServer(SkewTuneClientProtocol.class, this, addr.getHostName(), addr.getPort(), handlerCount, false, conf, secretManager); if (LOG.isDebugEnabled()) { Properties p = System.getProperties(); for (Iterator it = p.keySet().iterator(); it.hasNext();) { String key = (String) it.next(); String val = p.getProperty(key); LOG.debug("Property '" + key + "' is " + val); } } InetSocketAddress infoSocAddr = NetUtils .createSocketAddr(conf.get(JT_HTTP_ADDRESS, String.format("%s:0", this.localMachine))); String infoBindAddress = infoSocAddr.getHostName(); int tmpInfoPort = infoSocAddr.getPort(); this.startTime = System.currentTimeMillis(); infoServer = new HttpServer("job", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf); infoServer.setAttribute("job.tracker", this); infoServer.addServlet("jobcompletion", "/completion", JobCompletionServlet.class); infoServer.addServlet("taskspeculation", "/speculation", SpeculationEventServlet.class); infoServer.addServlet("skewreport", "/skew", SkewReportServlet.class); infoServer.addServlet("tasksplit", "/split/*", SplitTaskServlet.class); infoServer.addServlet("tasksplitV2", "/splitV2/*", SplitTaskV2Servlet.class); infoServer.start(); this.trackerIdentifier = jobtrackerIndentifier; // The rpc/web-server ports can be ephemeral ports... // ... ensure we have the correct info this.port = interTrackerServer.getListenerAddress().getPort(); this.conf.set(JT_IPC_ADDRESS, (this.localMachine + ":" + this.port)); LOG.info("JobTracker up at: " + this.port); this.infoPort = this.infoServer.getPort(); this.conf.set(JT_HTTP_ADDRESS, infoBindAddress + ":" + this.infoPort); LOG.info("JobTracker webserver: " + this.infoServer.getPort()); this.defaultNotificationUrl = String.format("http://%s:%d/completion?jobid=$jobId&status=$jobStatus", infoBindAddress, this.infoPort); LOG.info("JobTracker completion URI: " + defaultNotificationUrl); // this.defaultSpeculationEventUrl = String.format("http://%s:%d/speculation?taskid=$taskId&remainTime=$taskRemainTime",infoBindAddress,this.infoPort); this.defaultSpeculationEventUrl = String.format("http://%s:%d/speculation?jobid=$jobId", infoBindAddress, this.infoPort); LOG.info("JobTracker speculation event URI: " + defaultSpeculationEventUrl); this.defaultSkewReportUrl = String.format("http://%s:%d/skew", infoBindAddress, this.infoPort); LOG.info("JobTracker skew report event URI: " + defaultSkewReportUrl); this.trackerHttp = String.format("http://%s:%d", infoBindAddress, this.infoPort); while (!Thread.currentThread().isInterrupted()) { try { // if we haven't contacted the namenode go ahead and do it if (fs == null) { fs = mrOwner.doAs(new PrivilegedExceptionAction<FileSystem>() { @Override public FileSystem run() throws IOException { return FileSystem.get(conf); } }); } // clean up the system dir, which will only work if hdfs is out // of safe mode if (systemDir == null) { systemDir = new Path(getSystemDir()); } try { FileStatus systemDirStatus = fs.getFileStatus(systemDir); if (!systemDirStatus.getOwner().equals(mrOwner.getShortUserName())) { throw new AccessControlException( "The systemdir " + systemDir + " is not owned by " + mrOwner.getShortUserName()); } if (!systemDirStatus.getPermission().equals(SYSTEM_DIR_PERMISSION)) { LOG.warn("Incorrect permissions on " + systemDir + ". Setting it to " + SYSTEM_DIR_PERMISSION); fs.setPermission(systemDir, new FsPermission(SYSTEM_DIR_PERMISSION)); } else { break; } } catch (FileNotFoundException fnf) { } // ignore } catch (AccessControlException ace) { LOG.warn("Failed to operate on " + JTConfig.JT_SYSTEM_DIR + "(" + systemDir + ") because of permissions."); LOG.warn("Manually delete the " + JTConfig.JT_SYSTEM_DIR + "(" + systemDir + ") and then start the JobTracker."); LOG.warn("Bailing out ... "); throw ace; } catch (IOException ie) { LOG.info("problem cleaning system directory: " + systemDir, ie); } Thread.sleep(FS_ACCESS_RETRY_PERIOD); } if (Thread.currentThread().isInterrupted()) { throw new InterruptedException(); } // initialize cluster variable cluster = new Cluster(this.conf); // now create a job client proxy jtClient = (ClientProtocol) RPC.getProxy(ClientProtocol.class, ClientProtocol.versionID, JobTracker.getAddress(conf), mrOwner, this.conf, NetUtils.getSocketFactory(conf, ClientProtocol.class)); new SpeculativeScheduler().start(); // initialize task event fetcher new TaskCompletionEventFetcher().start(); // Same with 'localDir' except it's always on the local disk. asyncDiskService = new MRAsyncDiskService(FileSystem.getLocal(conf), conf.getLocalDirs()); asyncDiskService.moveAndDeleteFromEachVolume(SUBDIR); // keep at least one asynchronous worker per CPU core int numProcs = Runtime.getRuntime().availableProcessors(); LOG.info("# of available processors = " + numProcs); int maxFactor = conf.getInt(JT_MAX_ASYNC_WORKER_FACTOR, 2); asyncWorkers = new ThreadPoolExecutor(numProcs, numProcs * maxFactor, 30, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(true), new ThreadPoolExecutor.CallerRunsPolicy()); speculativeSplit = conf.getBoolean(JT_SPECULATIVE_SPLIT, false); }
From source file:source.TeraSort.java
License:Apache License
public static int getOutputReplication(JobConf job) { return job.getInt(OUTPUT_REPLICATION, 1); }
From source file:StorageEngineClient.CombineFileInputFormat.java
License:Open Source License
private void getMoreSplits(JobConf job, Path[] paths1, long maxSize, long minSizeNode, long minSizeRack, List<CombineFileSplit> splits) throws IOException, NullGzFileException { if (paths1.length == 0) { return;/*from www .jav a2 s . c om*/ } Path[] paths = paths1; ArrayList<Path> splitable = new ArrayList<Path>(); ArrayList<Path> unsplitable = new ArrayList<Path>(); for (int i = 0; i < paths1.length; i++) { if (isSplitable(paths1[i].getFileSystem(job), paths1[i])) { splitable.add(paths1[i]); } else { unsplitable.add(paths1[i]); } } if (unsplitable.size() != 0) { paths = new Path[splitable.size()]; splitable.toArray(paths); } OneFileInfo[] files; HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>(); HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>(); HashMap<String, List<OneBlockInfo>> nodeToBlocks = new HashMap<String, List<OneBlockInfo>>(); files = new OneFileInfo[paths.length]; long totLength = 0; for (int i = 0; i < paths.length; i++) { files[i] = new OneFileInfo(paths[i], job, rackToBlocks, blockToNodes, nodeToBlocks); totLength += files[i].getLength(); } for (Iterator<Map.Entry<String, List<OneBlockInfo>>> iter = nodeToBlocks.entrySet().iterator(); iter .hasNext();) { Map.Entry<String, List<OneBlockInfo>> onenode = iter.next(); this.processsplit(job, onenode, blockToNodes, maxSize, minSizeNode, minSizeRack, splits, "node"); } for (Iterator<Map.Entry<String, List<OneBlockInfo>>> iter = rackToBlocks.entrySet().iterator(); iter .hasNext();) { Map.Entry<String, List<OneBlockInfo>> onerack = iter.next(); this.processsplit(job, onerack, blockToNodes, maxSize, minSizeNode, minSizeRack, splits, "rack"); } this.processsplit(job, null, blockToNodes, maxSize, minSizeNode, minSizeRack, splits, "all"); int maxFileNumPerSplit = job.getInt("hive.merge.inputfiles.maxFileNumPerSplit", 1000); HashSet<OneBlockInfo> hs = new HashSet<OneBlockInfo>(); while (blockToNodes.size() > 0) { ArrayList<OneBlockInfo> validBlocks = new ArrayList<OneBlockInfo>(); List<String> nodes = new ArrayList<String>(); int filenum = 0; hs.clear(); for (OneBlockInfo blockInfo : blockToNodes.keySet()) { validBlocks.add(blockInfo); filenum++; for (String host : blockInfo.hosts) { nodes.add(host); } hs.add(blockInfo); if (filenum >= maxFileNumPerSplit) { break; } } for (OneBlockInfo blockInfo : hs) { blockToNodes.remove(blockInfo); } this.addCreatedSplit(job, splits, nodes, validBlocks); } if (unsplitable.size() != 0) { HashMap<OneBlockInfo, String[]> fileToNodes = new HashMap<OneBlockInfo, String[]>(); for (Path path : unsplitable) { FileSystem fs = path.getFileSystem(job); FileStatus stat = fs.getFileStatus(path); long len = fs.getFileStatus(path).getLen(); BlockLocation[] locations = path.getFileSystem(job).getFileBlockLocations(stat, 0, len); if (locations.length == 0) { console.printError("The file " + path.toUri().toString() + " maybe is empty, please check it!"); throw new NullGzFileException( "The file " + path.toUri().toString() + " maybe is empty, please check it!"); } LOG.info("unsplitable file:" + path.toUri().toString() + " length:" + len); OneBlockInfo oneblock = new OneBlockInfo(path, 0, len, locations[0].getHosts(), locations[0].getTopologyPaths()); fileToNodes.put(oneblock, locations[0].getHosts()); } this.processsplitForUnsplit(job, null, fileToNodes, maxSize, minSizeNode, minSizeRack, splits, "all"); } }
From source file:StorageEngineClient.CombineFileInputFormat.java
License:Open Source License
private void getMoreSplitsWithStatus(JobConf job, Path[] paths1, Map<String, FileStatus> fileNameToStatus, long maxSize, long minSizeNode, long minSizeRack, List<CombineFileSplit> splits) throws IOException, NullGzFileException { if (paths1.length == 0) { return;/*from w ww . jav a2 s .c o m*/ } Path[] paths = paths1; ArrayList<Path> splitable = new ArrayList<Path>(); ArrayList<Path> unsplitable = new ArrayList<Path>(); for (int i = 0; i < paths1.length; i++) { if (isSplitable(paths1[i].getFileSystem(job), paths1[i])) { splitable.add(paths1[i]); } else { unsplitable.add(paths1[i]); } } if (unsplitable.size() != 0) { paths = new Path[splitable.size()]; splitable.toArray(paths); } OneFileInfo[] files; HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>(); HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>(); HashMap<String, List<OneBlockInfo>> nodeToBlocks = new HashMap<String, List<OneBlockInfo>>(); files = new OneFileInfo[paths.length]; long totLength = 0; for (int i = 0; i < paths.length; i++) { files[i] = new OneFileInfo(paths[i], fileNameToStatus.get(paths[i].toString()), job, rackToBlocks, blockToNodes, nodeToBlocks); totLength += files[i].getLength(); } for (Iterator<Map.Entry<String, List<OneBlockInfo>>> iter = nodeToBlocks.entrySet().iterator(); iter .hasNext();) { Map.Entry<String, List<OneBlockInfo>> onenode = iter.next(); this.processsplit(job, onenode, blockToNodes, maxSize, minSizeNode, minSizeRack, splits, "node"); } for (Iterator<Map.Entry<String, List<OneBlockInfo>>> iter = rackToBlocks.entrySet().iterator(); iter .hasNext();) { Map.Entry<String, List<OneBlockInfo>> onerack = iter.next(); this.processsplit(job, onerack, blockToNodes, maxSize, minSizeNode, minSizeRack, splits, "rack"); } this.processsplit(job, null, blockToNodes, maxSize, minSizeNode, minSizeRack, splits, "all"); int maxFileNumPerSplit = job.getInt("hive.merge.inputfiles.maxFileNumPerSplit", 1000); HashSet<OneBlockInfo> hs = new HashSet<OneBlockInfo>(); while (blockToNodes.size() > 0) { ArrayList<OneBlockInfo> validBlocks = new ArrayList<OneBlockInfo>(); List<String> nodes = new ArrayList<String>(); int filenum = 0; hs.clear(); for (OneBlockInfo blockInfo : blockToNodes.keySet()) { validBlocks.add(blockInfo); filenum++; for (String host : blockInfo.hosts) { nodes.add(host); } hs.add(blockInfo); if (filenum >= maxFileNumPerSplit) { break; } } for (OneBlockInfo blockInfo : hs) { blockToNodes.remove(blockInfo); } this.addCreatedSplit(job, splits, nodes, validBlocks); } if (unsplitable.size() != 0) { HashMap<OneBlockInfo, String[]> fileToNodes = new HashMap<OneBlockInfo, String[]>(); for (Path path : unsplitable) { FileSystem fs = path.getFileSystem(job); FileStatus stat = fileNameToStatus.get(path.toString());//fs.getFileStatus(path); long len = stat.getLen(); BlockLocation[] locations = fs.getFileBlockLocations(stat, 0, len); if (locations.length == 0) { console.printError("The file " + path.toUri().toString() + " maybe is empty, please check it!"); throw new NullGzFileException( "The file " + path.toUri().toString() + " maybe is empty, please check it!"); } LOG.info("unsplitable file:" + path.toUri().toString() + " length:" + len); OneBlockInfo oneblock = new OneBlockInfo(path, 0, len, locations[0].getHosts(), locations[0].getTopologyPaths()); fileToNodes.put(oneblock, locations[0].getHosts()); } this.processsplitForUnsplit(job, null, fileToNodes, maxSize, minSizeNode, minSizeRack, splits, "all"); } }