List of usage examples for org.apache.hadoop.mapred JobConf getBoolean
public boolean getBoolean(String name, boolean defaultValue)
name
property as a boolean
. From source file:org.hypertable.hadoop.mapred.TextTableInputFormat.java
License:Open Source License
public void configure(JobConf job) { m_include_timestamps = job.getBoolean(INCLUDE_TIMESTAMPS, false); m_no_escape = job.getBoolean(NO_ESCAPE, false); try {//from w w w. j a va 2 s.c o m m_base_spec = new ScanSpec(); parseColumns(job); parseOptions(job); parseTimestampInterval(job); parseRowInterval(job); parseValueRegexps(job); parseColumnPredicate(job); System.err.println(m_base_spec); } catch (Exception e) { e.printStackTrace(); System.exit(-1); } }
From source file:org.mitre.ccv.mapred.CalculateKmerRevisedRelativeEntropy.java
License:Open Source License
/** * Start up a map-reduce job with the given parameters. * * @param jobConf/* w w w. jav a 2 s. c o m*/ * @param start starting window size * @param end ending window size * @param input * @param output * @return * @throws java.lang.Exception */ public int initJob(JobConf jobConf, String globalInput, String cvInput, String output, boolean cleanLogs) throws Exception { JobConf conf = new JobConf(jobConf, CalculateKmerRevisedRelativeEntropy.class); conf.setJobName("CalculateKmerRevisedRelativeEntropy"); /** * Set up paths */ String ts = FileUtils.getSimpleDate(); String cvOutput = output + "_" + ts + COMPOSITION_VECTORS_KMER_POSTFIX; /** commaSeparatedPaths */ String mergedInput = cvOutput + "," + globalInput; /** merged output */ String mergedOutput = output + "_" + ts + MERGED_KMER_POSTFIX; /** * First, map all the CompositionVector's k-mers to Text as keys and * local k-mer/value pairs (KmerPiValuePairWritables) as values. */ JobConf subConf = new JobConf(conf); subConf.setJobName("CalculateKmerRevisedRelativeEntropy-CompositionVectors"); // setup mapper SequenceFileInputFormat.setInputPaths(subConf, cvInput); subConf.setInputFormat(SequenceFileInputFormat.class); subConf.setMapperClass(CompositionVectorMap.class); subConf.setOutputKeyClass(Text.class); // job output key class subConf.setOutputValueClass(StringDoublePairWritable.class); // job output value class // Uses default reducer (IdentityReducer) subConf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(subConf, new Path(cvOutput)); LOG.info("Converting CompositionVectors to k-mer/pi-value pairs."); JobClient.runJob(subConf); /** * Second, map (merge) all the k-mer/pi-value pairs together in an * array of values (KmerPiValueArrayWritables). */ subConf = new JobConf(conf); subConf.setJobName("CalculateKmerRevisedRelativeEntropy-Merging"); // setup mapper SequenceFileInputFormat.setInputPaths(subConf, mergedInput); subConf.setInputFormat(SequenceFileInputFormat.class); subConf.setMapperClass(MergeMap.class); subConf.setOutputKeyClass(Text.class); subConf.setOutputValueClass(KmerPiValueArrayWritable.class); // setup combiner/reducer subConf.setCombinerClass(MergeReducer.class); subConf.setReducerClass(MergeReducer.class); subConf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(subConf, new Path(mergedOutput)); LOG.info("Merging k-mers/pi-values from CompositionVectors and all sequences (global)"); JobClient.runJob(subConf); /** * Third, calculate entropies (map-reduce) */ subConf = new JobConf(conf); subConf.setJobName("CalculateKmerRevisedRelativeEntropy-RRE"); // setup mapper SequenceFileInputFormat.setInputPaths(subConf, mergedOutput); subConf.setInputFormat(SequenceFileInputFormat.class); subConf.setMapperClass(EntropyMap.class); subConf.setOutputKeyClass(Text.class); subConf.setOutputValueClass(KmerEntropyPairWritable.class); // Setup Combiner and Reducer subConf.setCombinerClass(EntropyCombiner.class); subConf.setReducerClass(EntropyReducer.class); if (conf.getBoolean(TEXT_OUTPUT, false)) { FileOutputFormat.setOutputPath(subConf, new Path(output)); } else { subConf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(subConf, new Path(output)); } LOG.info("Calculating entropies"); JobClient.runJob(subConf); /** * Remove tmp directories */ Path tmp = new Path(cvOutput); FileSystem fs = tmp.getFileSystem(conf); fs.delete(tmp, true); tmp = new Path(mergedOutput); fs.delete(tmp, true); return 0; }
From source file:org.mitre.ccv.mapred.SortKmerRevisedRelativeEntropies.java
License:Open Source License
/** * Start a job with the given configuration and parameters. * * <P>Setting the <code>jobConf</code> boolean property "ccv.sortrre.textoutput" results in * the output being text instead of the default binary {@link SequenceFile}. * * @param jobConf//from w w w . j a v a 2 s . c o m * @param input * @param output * @param cleanLogs * @return * @throws java.lang.Exception */ public int initJob(JobConf jobConf, String input, String output, boolean cleanLogs) throws Exception { JobConf conf = new JobConf(jobConf, SortKmerRevisedRelativeEntropies.class); conf.setJobName("SortKmerRevisedRelativeEntropies"); // setup mapper SequenceFileInputFormat.setInputPaths(conf, input); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapperClass(ReverseSortMap.class); conf.setOutputKeyClass(KmerEntropyPairWritable.class); // job output key class conf.setOutputValueClass(Text.class); // job output value class // Uses default reducer (IdentityReducer) if (conf.getBoolean(TEXT_OUTPUT, false)) { FileOutputFormat.setOutputPath(conf, new Path(output)); } else { conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(conf, new Path(output)); } JobClient.runJob(conf); return 0; }
From source file:org.sf.xrime.preprocessing.pajek.PajekFormat2LabeledAdjBiSetVertex.java
License:Apache License
public void toBinaryData() { try {/* w ww. j a v a2 s .com*/ JobConf jobConf = new JobConf(new Configuration(), PajekFormat2LabeledAdjBiSetVertex.class); Path filePath = new Path(dstPath + "/part00000"); Path path = new Path(jobConf.getWorkingDirectory(), filePath); FileSystem fs = path.getFileSystem(jobConf); CompressionCodec codec = null; CompressionType compressionType = CompressionType.NONE; if (jobConf.getBoolean("mapred.output.compress", false)) { // find the kind of compression to do String val = jobConf.get("mapred.output.compression.type", CompressionType.RECORD.toString()); compressionType = CompressionType.valueOf(val); // find the right codec Class<? extends CompressionCodec> codecClass = DefaultCodec.class; String name = jobConf.get("mapred.output.compression.codec"); if (name != null) { try { codecClass = jobConf.getClassByName(name).asSubclass(CompressionCodec.class); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("Compression codec " + name + " was not found.", e); } } codec = ReflectionUtils.newInstance(codecClass, jobConf); } Set<String> keySet = vertexes.keySet(); Iterator<String> iter = keySet.iterator(); LabeledAdjBiSetVertex currentAdjVertex = new LabeledAdjBiSetVertex(); SequenceFile.Writer out = SequenceFile.createWriter(fs, jobConf, path, Text.class, LabeledAdjBiSetVertex.class, compressionType, codec, null); while (iter.hasNext()) { currentAdjVertex = vertexes.get(iter.next()); out.append(new Text(currentAdjVertex.getId()), currentAdjVertex); } out.close(); } catch (IOException e) { } }
From source file:org.sf.xrime.preprocessing.pajek.PajekFormat2WeightedLabeledAdjVertex.java
License:Apache License
public void toBinaryData() { try {//from w ww . j a va 2s .c o m JobConf jobConf = new JobConf(new Configuration(), PajekFormat2WeightedLabeledAdjVertex.class); Path filePath = new Path(dstPath + "/part00000"); Path path = new Path(jobConf.getWorkingDirectory(), filePath); FileSystem fs = path.getFileSystem(jobConf); CompressionCodec codec = null; CompressionType compressionType = CompressionType.NONE; if (jobConf.getBoolean("mapred.output.compress", false)) { // find the kind of compression to do String val = jobConf.get("mapred.output.compression.type", CompressionType.RECORD.toString()); compressionType = CompressionType.valueOf(val); // find the right codec Class<? extends CompressionCodec> codecClass = DefaultCodec.class; String name = jobConf.get("mapred.output.compression.codec"); if (name != null) { try { codecClass = jobConf.getClassByName(name).asSubclass(CompressionCodec.class); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("Compression codec " + name + " was not found.", e); } } codec = ReflectionUtils.newInstance(codecClass, jobConf); } Set<String> keySet = vertexes.keySet(); Iterator<String> iter = keySet.iterator(); LabeledAdjVertex currentAdjVertex = new LabeledAdjVertex(); SequenceFile.Writer out = SequenceFile.createWriter(fs, jobConf, path, Text.class, LabeledAdjVertex.class, compressionType, codec, null); while (iter.hasNext()) { currentAdjVertex = vertexes.get(iter.next()); out.append(new Text(currentAdjVertex.getId()), currentAdjVertex); } out.close(); } catch (IOException e) { } }
From source file:org.smartfrog.services.hadoop.mapreduce.terasort.TeraOutputFormat.java
License:Apache License
/** * Does the user want a final sync at close? *//*from w w w . j a va 2 s . co m*/ public static boolean getFinalSync(JobConf conf) { return conf.getBoolean(TeraConstants.FINAL_SYNC_ATTRIBUTE, false); }
From source file:org.terrier.utility.io.HadoopUtility.java
License:Mozilla Public License
protected static void saveClassPathToJob(JobConf jobConf) throws IOException { logger.info("Copying classpath to job"); if (jobConf.getBoolean("terrier.classpath.copied", false)) { return;//from w w w . j a v a 2 s.co m } jobConf.setBoolean("terrier.classpath.copied", true); final String[] jars = findJarFiles( new String[] { System.getenv().get("CLASSPATH"), System.getProperty("java.class.path") }); final FileSystem defFS = FileSystem.get(jobConf); for (String jarFile : jars) { //logger.debug("Adding " + jarFile + " to job class path"); Path srcJarFilePath = new Path("file:///" + jarFile); String filename = srcJarFilePath.getName(); Path tmpJarFilePath = makeTemporaryFile(jobConf, filename); defFS.copyFromLocalFile(srcJarFilePath, tmpJarFilePath); DistributedCache.addFileToClassPath(tmpJarFilePath, jobConf); } DistributedCache.createSymlink(jobConf); }
From source file:org.wikimedia.wikihadoop.StreamWikiDumpInputFormat.java
License:Apache License
public RecordReader<Text, Text> getRecordReader(final InputSplit genericSplit, JobConf job, Reporter reporter) throws IOException { // handling non-standard record reader (likely StreamXmlRecordReader) FileSplit split = (FileSplit) genericSplit; LOG.info("getRecordReader start.....split=" + split); reporter.setStatus(split.toString()); // Open the file and seek to the start of the split FileSystem fs = split.getPath().getFileSystem(job); String patt = job.get(KEY_EXCLUDE_PAGE_PATTERN); boolean prev = job.getBoolean(KEY_PREVIOUS_REVISION, true); return new MyRecordReader(split, reporter, job, fs, patt != null && !"".equals(patt) ? Pattern.compile(patt) : null, prev); }
From source file:org.zuinnote.hadoop.bitcoin.format.BitcoinBlockFileInputFormat.java
License:Apache License
public void configure(JobConf conf) { this.compressionCodecs = new CompressionCodecFactory(conf); this.isSplitable = conf.getBoolean(this.CONF_ISSPLITABLE, this.DEFAULT_ISSPLITABLE); }
From source file:skewtune.mapreduce.STJobTracker.java
License:Apache License
@SuppressWarnings("unchecked") STJobTracker(final JobConf conf, String jobtrackerIndentifier) throws IOException, InterruptedException { // find the owner of the process // get the desired principal to load String keytabFilename = conf.get(JTConfig.JT_KEYTAB_FILE); UserGroupInformation.setConfiguration(conf); if (keytabFilename != null) { String desiredUser = conf.get(JTConfig.JT_USER_NAME, System.getProperty("user.name")); UserGroupInformation.loginUserFromKeytab(desiredUser, keytabFilename); mrOwner = UserGroupInformation.getLoginUser(); } else {/*from ww w.ja va 2 s. c om*/ mrOwner = UserGroupInformation.getCurrentUser(); } supergroup = conf.get(MR_SUPERGROUP, "supergroup"); LOG.info("Starting jobtracker with owner as " + mrOwner.getShortUserName() + " and supergroup as " + supergroup); long secretKeyInterval = conf.getLong(MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_KEY, MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT); long tokenMaxLifetime = conf.getLong(MRConfig.DELEGATION_TOKEN_MAX_LIFETIME_KEY, MRConfig.DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT); long tokenRenewInterval = conf.getLong(MRConfig.DELEGATION_TOKEN_RENEW_INTERVAL_KEY, MRConfig.DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT); secretManager = new DelegationTokenSecretManager(secretKeyInterval, tokenMaxLifetime, tokenRenewInterval, DELEGATION_TOKEN_GC_INTERVAL); secretManager.startThreads(); // // Grab some static constants // NUM_HEARTBEATS_IN_SECOND = conf.getInt(JT_HEARTBEATS_IN_SECOND, DEFAULT_NUM_HEARTBEATS_IN_SECOND); if (NUM_HEARTBEATS_IN_SECOND < MIN_NUM_HEARTBEATS_IN_SECOND) { NUM_HEARTBEATS_IN_SECOND = DEFAULT_NUM_HEARTBEATS_IN_SECOND; } HEARTBEATS_SCALING_FACTOR = conf.getFloat(JT_HEARTBEATS_SCALING_FACTOR, DEFAULT_HEARTBEATS_SCALING_FACTOR); if (HEARTBEATS_SCALING_FACTOR < MIN_HEARTBEATS_SCALING_FACTOR) { HEARTBEATS_SCALING_FACTOR = DEFAULT_HEARTBEATS_SCALING_FACTOR; } // whether to dump or not every heartbeat message even when DEBUG is enabled dumpHeartbeat = conf.getBoolean(JT_HEARTBEATS_DUMP, false); // This is a directory of temporary submission files. We delete it // on startup, and can delete any files that we're done with this.conf = conf; JobConf jobConf = new JobConf(conf); // Set ports, start RPC servers, setup security policy etc. InetSocketAddress addr = getAddress(conf); this.localMachine = addr.getHostName(); this.port = addr.getPort(); int handlerCount = conf.getInt(JT_IPC_HANDLER_COUNT, 10); this.interTrackerServer = RPC.getServer(SkewTuneClientProtocol.class, this, addr.getHostName(), addr.getPort(), handlerCount, false, conf, secretManager); if (LOG.isDebugEnabled()) { Properties p = System.getProperties(); for (Iterator it = p.keySet().iterator(); it.hasNext();) { String key = (String) it.next(); String val = p.getProperty(key); LOG.debug("Property '" + key + "' is " + val); } } InetSocketAddress infoSocAddr = NetUtils .createSocketAddr(conf.get(JT_HTTP_ADDRESS, String.format("%s:0", this.localMachine))); String infoBindAddress = infoSocAddr.getHostName(); int tmpInfoPort = infoSocAddr.getPort(); this.startTime = System.currentTimeMillis(); infoServer = new HttpServer("job", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf); infoServer.setAttribute("job.tracker", this); infoServer.addServlet("jobcompletion", "/completion", JobCompletionServlet.class); infoServer.addServlet("taskspeculation", "/speculation", SpeculationEventServlet.class); infoServer.addServlet("skewreport", "/skew", SkewReportServlet.class); infoServer.addServlet("tasksplit", "/split/*", SplitTaskServlet.class); infoServer.addServlet("tasksplitV2", "/splitV2/*", SplitTaskV2Servlet.class); infoServer.start(); this.trackerIdentifier = jobtrackerIndentifier; // The rpc/web-server ports can be ephemeral ports... // ... ensure we have the correct info this.port = interTrackerServer.getListenerAddress().getPort(); this.conf.set(JT_IPC_ADDRESS, (this.localMachine + ":" + this.port)); LOG.info("JobTracker up at: " + this.port); this.infoPort = this.infoServer.getPort(); this.conf.set(JT_HTTP_ADDRESS, infoBindAddress + ":" + this.infoPort); LOG.info("JobTracker webserver: " + this.infoServer.getPort()); this.defaultNotificationUrl = String.format("http://%s:%d/completion?jobid=$jobId&status=$jobStatus", infoBindAddress, this.infoPort); LOG.info("JobTracker completion URI: " + defaultNotificationUrl); // this.defaultSpeculationEventUrl = String.format("http://%s:%d/speculation?taskid=$taskId&remainTime=$taskRemainTime",infoBindAddress,this.infoPort); this.defaultSpeculationEventUrl = String.format("http://%s:%d/speculation?jobid=$jobId", infoBindAddress, this.infoPort); LOG.info("JobTracker speculation event URI: " + defaultSpeculationEventUrl); this.defaultSkewReportUrl = String.format("http://%s:%d/skew", infoBindAddress, this.infoPort); LOG.info("JobTracker skew report event URI: " + defaultSkewReportUrl); this.trackerHttp = String.format("http://%s:%d", infoBindAddress, this.infoPort); while (!Thread.currentThread().isInterrupted()) { try { // if we haven't contacted the namenode go ahead and do it if (fs == null) { fs = mrOwner.doAs(new PrivilegedExceptionAction<FileSystem>() { @Override public FileSystem run() throws IOException { return FileSystem.get(conf); } }); } // clean up the system dir, which will only work if hdfs is out // of safe mode if (systemDir == null) { systemDir = new Path(getSystemDir()); } try { FileStatus systemDirStatus = fs.getFileStatus(systemDir); if (!systemDirStatus.getOwner().equals(mrOwner.getShortUserName())) { throw new AccessControlException( "The systemdir " + systemDir + " is not owned by " + mrOwner.getShortUserName()); } if (!systemDirStatus.getPermission().equals(SYSTEM_DIR_PERMISSION)) { LOG.warn("Incorrect permissions on " + systemDir + ". Setting it to " + SYSTEM_DIR_PERMISSION); fs.setPermission(systemDir, new FsPermission(SYSTEM_DIR_PERMISSION)); } else { break; } } catch (FileNotFoundException fnf) { } // ignore } catch (AccessControlException ace) { LOG.warn("Failed to operate on " + JTConfig.JT_SYSTEM_DIR + "(" + systemDir + ") because of permissions."); LOG.warn("Manually delete the " + JTConfig.JT_SYSTEM_DIR + "(" + systemDir + ") and then start the JobTracker."); LOG.warn("Bailing out ... "); throw ace; } catch (IOException ie) { LOG.info("problem cleaning system directory: " + systemDir, ie); } Thread.sleep(FS_ACCESS_RETRY_PERIOD); } if (Thread.currentThread().isInterrupted()) { throw new InterruptedException(); } // initialize cluster variable cluster = new Cluster(this.conf); // now create a job client proxy jtClient = (ClientProtocol) RPC.getProxy(ClientProtocol.class, ClientProtocol.versionID, JobTracker.getAddress(conf), mrOwner, this.conf, NetUtils.getSocketFactory(conf, ClientProtocol.class)); new SpeculativeScheduler().start(); // initialize task event fetcher new TaskCompletionEventFetcher().start(); // Same with 'localDir' except it's always on the local disk. asyncDiskService = new MRAsyncDiskService(FileSystem.getLocal(conf), conf.getLocalDirs()); asyncDiskService.moveAndDeleteFromEachVolume(SUBDIR); // keep at least one asynchronous worker per CPU core int numProcs = Runtime.getRuntime().availableProcessors(); LOG.info("# of available processors = " + numProcs); int maxFactor = conf.getInt(JT_MAX_ASYNC_WORKER_FACTOR, 2); asyncWorkers = new ThreadPoolExecutor(numProcs, numProcs * maxFactor, 30, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(true), new ThreadPoolExecutor.CallerRunsPolicy()); speculativeSplit = conf.getBoolean(JT_SPECULATIVE_SPLIT, false); }