Example usage for org.apache.hadoop.mapred JobConf getBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getBoolean.

Prototype

public boolean getBoolean(String name, boolean defaultValue)

Source Link

Document

Get the value of the name property as a boolean.

Usage

From source file:org.hypertable.hadoop.mapred.TextTableInputFormat.java

License:Open Source License

public void configure(JobConf job) {
    m_include_timestamps = job.getBoolean(INCLUDE_TIMESTAMPS, false);
    m_no_escape = job.getBoolean(NO_ESCAPE, false);
    try {//from   w w  w.  j a va 2 s.c o m
        m_base_spec = new ScanSpec();

        parseColumns(job);
        parseOptions(job);
        parseTimestampInterval(job);
        parseRowInterval(job);
        parseValueRegexps(job);
        parseColumnPredicate(job);

        System.err.println(m_base_spec);
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(-1);
    }

}

From source file:org.mitre.ccv.mapred.CalculateKmerRevisedRelativeEntropy.java

License:Open Source License

/**
 * Start up a map-reduce job with the given parameters.
 *
 * @param jobConf/* w  w w. jav a 2 s.  c o m*/
 * @param start     starting window size
 * @param end       ending window size
 * @param input
 * @param output
 * @return
 * @throws java.lang.Exception
 */
public int initJob(JobConf jobConf, String globalInput, String cvInput, String output, boolean cleanLogs)
        throws Exception {
    JobConf conf = new JobConf(jobConf, CalculateKmerRevisedRelativeEntropy.class);
    conf.setJobName("CalculateKmerRevisedRelativeEntropy");

    /** 
     * Set up paths
     */
    String ts = FileUtils.getSimpleDate();
    String cvOutput = output + "_" + ts + COMPOSITION_VECTORS_KMER_POSTFIX;

    /** commaSeparatedPaths */
    String mergedInput = cvOutput + "," + globalInput;

    /** merged output */
    String mergedOutput = output + "_" + ts + MERGED_KMER_POSTFIX;

    /**
     * First, map all the CompositionVector's k-mers to Text as keys and
     * local k-mer/value pairs (KmerPiValuePairWritables) as values.
     */
    JobConf subConf = new JobConf(conf);
    subConf.setJobName("CalculateKmerRevisedRelativeEntropy-CompositionVectors");
    // setup mapper
    SequenceFileInputFormat.setInputPaths(subConf, cvInput);
    subConf.setInputFormat(SequenceFileInputFormat.class);
    subConf.setMapperClass(CompositionVectorMap.class);
    subConf.setOutputKeyClass(Text.class); // job output key class
    subConf.setOutputValueClass(StringDoublePairWritable.class); // job output value class

    // Uses default reducer (IdentityReducer)
    subConf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(subConf, new Path(cvOutput));
    LOG.info("Converting CompositionVectors to k-mer/pi-value pairs.");
    JobClient.runJob(subConf);

    /**
     * Second, map (merge) all the k-mer/pi-value pairs together in an
     * array of values (KmerPiValueArrayWritables).
     */
    subConf = new JobConf(conf);
    subConf.setJobName("CalculateKmerRevisedRelativeEntropy-Merging");
    // setup mapper
    SequenceFileInputFormat.setInputPaths(subConf, mergedInput);
    subConf.setInputFormat(SequenceFileInputFormat.class);
    subConf.setMapperClass(MergeMap.class);
    subConf.setOutputKeyClass(Text.class);
    subConf.setOutputValueClass(KmerPiValueArrayWritable.class);

    // setup combiner/reducer
    subConf.setCombinerClass(MergeReducer.class);
    subConf.setReducerClass(MergeReducer.class);
    subConf.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(subConf, new Path(mergedOutput));
    LOG.info("Merging k-mers/pi-values from CompositionVectors and all sequences (global)");
    JobClient.runJob(subConf);

    /**
     * Third, calculate entropies (map-reduce)
     */
    subConf = new JobConf(conf);
    subConf.setJobName("CalculateKmerRevisedRelativeEntropy-RRE");
    // setup mapper
    SequenceFileInputFormat.setInputPaths(subConf, mergedOutput);
    subConf.setInputFormat(SequenceFileInputFormat.class);
    subConf.setMapperClass(EntropyMap.class);
    subConf.setOutputKeyClass(Text.class);
    subConf.setOutputValueClass(KmerEntropyPairWritable.class);

    // Setup Combiner and Reducer
    subConf.setCombinerClass(EntropyCombiner.class);
    subConf.setReducerClass(EntropyReducer.class);
    if (conf.getBoolean(TEXT_OUTPUT, false)) {
        FileOutputFormat.setOutputPath(subConf, new Path(output));
    } else {
        subConf.setOutputFormat(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputPath(subConf, new Path(output));
    }

    LOG.info("Calculating entropies");
    JobClient.runJob(subConf);

    /**
     * Remove tmp directories
     */
    Path tmp = new Path(cvOutput);
    FileSystem fs = tmp.getFileSystem(conf);
    fs.delete(tmp, true);
    tmp = new Path(mergedOutput);
    fs.delete(tmp, true);

    return 0;
}

From source file:org.mitre.ccv.mapred.SortKmerRevisedRelativeEntropies.java

License:Open Source License

/**
 * Start a job with the given configuration and parameters.
 *
 * <P>Setting the <code>jobConf</code> boolean property "ccv.sortrre.textoutput" results in
 * the output being text instead of the default binary {@link SequenceFile}.
 *
 * @param jobConf//from   w  w  w .  j  a v  a  2  s .  c o m
 * @param input
 * @param output
 * @param cleanLogs
 * @return
 * @throws java.lang.Exception
 */
public int initJob(JobConf jobConf, String input, String output, boolean cleanLogs) throws Exception {
    JobConf conf = new JobConf(jobConf, SortKmerRevisedRelativeEntropies.class);
    conf.setJobName("SortKmerRevisedRelativeEntropies");

    // setup mapper
    SequenceFileInputFormat.setInputPaths(conf, input);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapperClass(ReverseSortMap.class);
    conf.setOutputKeyClass(KmerEntropyPairWritable.class); // job output key class
    conf.setOutputValueClass(Text.class); // job output value class

    // Uses default reducer (IdentityReducer)
    if (conf.getBoolean(TEXT_OUTPUT, false)) {
        FileOutputFormat.setOutputPath(conf, new Path(output));
    } else {
        conf.setOutputFormat(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputPath(conf, new Path(output));
    }
    JobClient.runJob(conf);

    return 0;
}

From source file:org.sf.xrime.preprocessing.pajek.PajekFormat2LabeledAdjBiSetVertex.java

License:Apache License

public void toBinaryData() {
    try {/* w  ww.  j a v  a2  s  .com*/
        JobConf jobConf = new JobConf(new Configuration(), PajekFormat2LabeledAdjBiSetVertex.class);

        Path filePath = new Path(dstPath + "/part00000");
        Path path = new Path(jobConf.getWorkingDirectory(), filePath);
        FileSystem fs = path.getFileSystem(jobConf);

        CompressionCodec codec = null;
        CompressionType compressionType = CompressionType.NONE;
        if (jobConf.getBoolean("mapred.output.compress", false)) {
            // find the kind of compression to do             
            String val = jobConf.get("mapred.output.compression.type", CompressionType.RECORD.toString());
            compressionType = CompressionType.valueOf(val);

            // find the right codec
            Class<? extends CompressionCodec> codecClass = DefaultCodec.class;
            String name = jobConf.get("mapred.output.compression.codec");
            if (name != null) {
                try {
                    codecClass = jobConf.getClassByName(name).asSubclass(CompressionCodec.class);
                } catch (ClassNotFoundException e) {
                    throw new IllegalArgumentException("Compression codec " + name + " was not found.", e);
                }
            }
            codec = ReflectionUtils.newInstance(codecClass, jobConf);
        }

        Set<String> keySet = vertexes.keySet();
        Iterator<String> iter = keySet.iterator();
        LabeledAdjBiSetVertex currentAdjVertex = new LabeledAdjBiSetVertex();

        SequenceFile.Writer out = SequenceFile.createWriter(fs, jobConf, path, Text.class,
                LabeledAdjBiSetVertex.class, compressionType, codec, null);

        while (iter.hasNext()) {
            currentAdjVertex = vertexes.get(iter.next());
            out.append(new Text(currentAdjVertex.getId()), currentAdjVertex);
        }
        out.close();
    } catch (IOException e) {

    }

}

From source file:org.sf.xrime.preprocessing.pajek.PajekFormat2WeightedLabeledAdjVertex.java

License:Apache License

public void toBinaryData() {
    try {//from  w ww .  j a  va  2s  .c  o  m
        JobConf jobConf = new JobConf(new Configuration(), PajekFormat2WeightedLabeledAdjVertex.class);

        Path filePath = new Path(dstPath + "/part00000");
        Path path = new Path(jobConf.getWorkingDirectory(), filePath);
        FileSystem fs = path.getFileSystem(jobConf);

        CompressionCodec codec = null;
        CompressionType compressionType = CompressionType.NONE;
        if (jobConf.getBoolean("mapred.output.compress", false)) {
            // find the kind of compression to do             
            String val = jobConf.get("mapred.output.compression.type", CompressionType.RECORD.toString());
            compressionType = CompressionType.valueOf(val);

            // find the right codec
            Class<? extends CompressionCodec> codecClass = DefaultCodec.class;
            String name = jobConf.get("mapred.output.compression.codec");
            if (name != null) {
                try {
                    codecClass = jobConf.getClassByName(name).asSubclass(CompressionCodec.class);
                } catch (ClassNotFoundException e) {
                    throw new IllegalArgumentException("Compression codec " + name + " was not found.", e);
                }
            }
            codec = ReflectionUtils.newInstance(codecClass, jobConf);
        }

        Set<String> keySet = vertexes.keySet();
        Iterator<String> iter = keySet.iterator();
        LabeledAdjVertex currentAdjVertex = new LabeledAdjVertex();

        SequenceFile.Writer out = SequenceFile.createWriter(fs, jobConf, path, Text.class,
                LabeledAdjVertex.class, compressionType, codec, null);

        while (iter.hasNext()) {
            currentAdjVertex = vertexes.get(iter.next());
            out.append(new Text(currentAdjVertex.getId()), currentAdjVertex);
        }
        out.close();
    } catch (IOException e) {

    }

}

From source file:org.smartfrog.services.hadoop.mapreduce.terasort.TeraOutputFormat.java

License:Apache License

/**
 * Does the user want a final sync at close?
 *//*from  w w  w  .  j a va  2  s  .  co m*/
public static boolean getFinalSync(JobConf conf) {
    return conf.getBoolean(TeraConstants.FINAL_SYNC_ATTRIBUTE, false);
}

From source file:org.terrier.utility.io.HadoopUtility.java

License:Mozilla Public License

protected static void saveClassPathToJob(JobConf jobConf) throws IOException {
    logger.info("Copying classpath to job");
    if (jobConf.getBoolean("terrier.classpath.copied", false)) {
        return;//from   w  w w  .  j a  v  a  2  s.co m
    }
    jobConf.setBoolean("terrier.classpath.copied", true);
    final String[] jars = findJarFiles(
            new String[] { System.getenv().get("CLASSPATH"), System.getProperty("java.class.path") });
    final FileSystem defFS = FileSystem.get(jobConf);
    for (String jarFile : jars) {
        //logger.debug("Adding " + jarFile + " to job class path");
        Path srcJarFilePath = new Path("file:///" + jarFile);
        String filename = srcJarFilePath.getName();
        Path tmpJarFilePath = makeTemporaryFile(jobConf, filename);
        defFS.copyFromLocalFile(srcJarFilePath, tmpJarFilePath);
        DistributedCache.addFileToClassPath(tmpJarFilePath, jobConf);
    }
    DistributedCache.createSymlink(jobConf);
}

From source file:org.wikimedia.wikihadoop.StreamWikiDumpInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(final InputSplit genericSplit, JobConf job, Reporter reporter)
        throws IOException {
    // handling non-standard record reader (likely StreamXmlRecordReader) 
    FileSplit split = (FileSplit) genericSplit;
    LOG.info("getRecordReader start.....split=" + split);
    reporter.setStatus(split.toString());

    // Open the file and seek to the start of the split
    FileSystem fs = split.getPath().getFileSystem(job);
    String patt = job.get(KEY_EXCLUDE_PAGE_PATTERN);
    boolean prev = job.getBoolean(KEY_PREVIOUS_REVISION, true);
    return new MyRecordReader(split, reporter, job, fs,
            patt != null && !"".equals(patt) ? Pattern.compile(patt) : null, prev);
}

From source file:org.zuinnote.hadoop.bitcoin.format.BitcoinBlockFileInputFormat.java

License:Apache License

public void configure(JobConf conf) {
    this.compressionCodecs = new CompressionCodecFactory(conf);
    this.isSplitable = conf.getBoolean(this.CONF_ISSPLITABLE, this.DEFAULT_ISSPLITABLE);
}

From source file:skewtune.mapreduce.STJobTracker.java

License:Apache License

@SuppressWarnings("unchecked")
STJobTracker(final JobConf conf, String jobtrackerIndentifier) throws IOException, InterruptedException {
    // find the owner of the process
    // get the desired principal to load
    String keytabFilename = conf.get(JTConfig.JT_KEYTAB_FILE);
    UserGroupInformation.setConfiguration(conf);
    if (keytabFilename != null) {
        String desiredUser = conf.get(JTConfig.JT_USER_NAME, System.getProperty("user.name"));
        UserGroupInformation.loginUserFromKeytab(desiredUser, keytabFilename);
        mrOwner = UserGroupInformation.getLoginUser();
    } else {/*from ww  w.ja  va  2  s.  c  om*/
        mrOwner = UserGroupInformation.getCurrentUser();
    }

    supergroup = conf.get(MR_SUPERGROUP, "supergroup");
    LOG.info("Starting jobtracker with owner as " + mrOwner.getShortUserName() + " and supergroup as "
            + supergroup);

    long secretKeyInterval = conf.getLong(MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_KEY,
            MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT);
    long tokenMaxLifetime = conf.getLong(MRConfig.DELEGATION_TOKEN_MAX_LIFETIME_KEY,
            MRConfig.DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT);
    long tokenRenewInterval = conf.getLong(MRConfig.DELEGATION_TOKEN_RENEW_INTERVAL_KEY,
            MRConfig.DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT);
    secretManager = new DelegationTokenSecretManager(secretKeyInterval, tokenMaxLifetime, tokenRenewInterval,
            DELEGATION_TOKEN_GC_INTERVAL);
    secretManager.startThreads();

    //
    // Grab some static constants
    //

    NUM_HEARTBEATS_IN_SECOND = conf.getInt(JT_HEARTBEATS_IN_SECOND, DEFAULT_NUM_HEARTBEATS_IN_SECOND);
    if (NUM_HEARTBEATS_IN_SECOND < MIN_NUM_HEARTBEATS_IN_SECOND) {
        NUM_HEARTBEATS_IN_SECOND = DEFAULT_NUM_HEARTBEATS_IN_SECOND;
    }

    HEARTBEATS_SCALING_FACTOR = conf.getFloat(JT_HEARTBEATS_SCALING_FACTOR, DEFAULT_HEARTBEATS_SCALING_FACTOR);
    if (HEARTBEATS_SCALING_FACTOR < MIN_HEARTBEATS_SCALING_FACTOR) {
        HEARTBEATS_SCALING_FACTOR = DEFAULT_HEARTBEATS_SCALING_FACTOR;
    }

    // whether to dump or not every heartbeat message even when DEBUG is enabled
    dumpHeartbeat = conf.getBoolean(JT_HEARTBEATS_DUMP, false);

    // This is a directory of temporary submission files. We delete it
    // on startup, and can delete any files that we're done with
    this.conf = conf;
    JobConf jobConf = new JobConf(conf);

    // Set ports, start RPC servers, setup security policy etc.
    InetSocketAddress addr = getAddress(conf);
    this.localMachine = addr.getHostName();
    this.port = addr.getPort();

    int handlerCount = conf.getInt(JT_IPC_HANDLER_COUNT, 10);
    this.interTrackerServer = RPC.getServer(SkewTuneClientProtocol.class, this, addr.getHostName(),
            addr.getPort(), handlerCount, false, conf, secretManager);
    if (LOG.isDebugEnabled()) {
        Properties p = System.getProperties();
        for (Iterator it = p.keySet().iterator(); it.hasNext();) {
            String key = (String) it.next();
            String val = p.getProperty(key);
            LOG.debug("Property '" + key + "' is " + val);
        }
    }

    InetSocketAddress infoSocAddr = NetUtils
            .createSocketAddr(conf.get(JT_HTTP_ADDRESS, String.format("%s:0", this.localMachine)));
    String infoBindAddress = infoSocAddr.getHostName();
    int tmpInfoPort = infoSocAddr.getPort();
    this.startTime = System.currentTimeMillis();
    infoServer = new HttpServer("job", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf);
    infoServer.setAttribute("job.tracker", this);
    infoServer.addServlet("jobcompletion", "/completion", JobCompletionServlet.class);
    infoServer.addServlet("taskspeculation", "/speculation", SpeculationEventServlet.class);
    infoServer.addServlet("skewreport", "/skew", SkewReportServlet.class);
    infoServer.addServlet("tasksplit", "/split/*", SplitTaskServlet.class);
    infoServer.addServlet("tasksplitV2", "/splitV2/*", SplitTaskV2Servlet.class);
    infoServer.start();

    this.trackerIdentifier = jobtrackerIndentifier;

    // The rpc/web-server ports can be ephemeral ports...
    // ... ensure we have the correct info
    this.port = interTrackerServer.getListenerAddress().getPort();
    this.conf.set(JT_IPC_ADDRESS, (this.localMachine + ":" + this.port));
    LOG.info("JobTracker up at: " + this.port);
    this.infoPort = this.infoServer.getPort();
    this.conf.set(JT_HTTP_ADDRESS, infoBindAddress + ":" + this.infoPort);
    LOG.info("JobTracker webserver: " + this.infoServer.getPort());
    this.defaultNotificationUrl = String.format("http://%s:%d/completion?jobid=$jobId&status=$jobStatus",
            infoBindAddress, this.infoPort);
    LOG.info("JobTracker completion URI: " + defaultNotificationUrl);
    //        this.defaultSpeculationEventUrl = String.format("http://%s:%d/speculation?taskid=$taskId&remainTime=$taskRemainTime",infoBindAddress,this.infoPort);
    this.defaultSpeculationEventUrl = String.format("http://%s:%d/speculation?jobid=$jobId", infoBindAddress,
            this.infoPort);
    LOG.info("JobTracker speculation event URI: " + defaultSpeculationEventUrl);
    this.defaultSkewReportUrl = String.format("http://%s:%d/skew", infoBindAddress, this.infoPort);
    LOG.info("JobTracker skew report event URI: " + defaultSkewReportUrl);
    this.trackerHttp = String.format("http://%s:%d", infoBindAddress, this.infoPort);

    while (!Thread.currentThread().isInterrupted()) {
        try {
            // if we haven't contacted the namenode go ahead and do it
            if (fs == null) {
                fs = mrOwner.doAs(new PrivilegedExceptionAction<FileSystem>() {
                    @Override
                    public FileSystem run() throws IOException {
                        return FileSystem.get(conf);
                    }
                });
            }

            // clean up the system dir, which will only work if hdfs is out
            // of safe mode
            if (systemDir == null) {
                systemDir = new Path(getSystemDir());
            }
            try {
                FileStatus systemDirStatus = fs.getFileStatus(systemDir);
                if (!systemDirStatus.getOwner().equals(mrOwner.getShortUserName())) {
                    throw new AccessControlException(
                            "The systemdir " + systemDir + " is not owned by " + mrOwner.getShortUserName());
                }
                if (!systemDirStatus.getPermission().equals(SYSTEM_DIR_PERMISSION)) {
                    LOG.warn("Incorrect permissions on " + systemDir + ". Setting it to "
                            + SYSTEM_DIR_PERMISSION);
                    fs.setPermission(systemDir, new FsPermission(SYSTEM_DIR_PERMISSION));
                } else {
                    break;
                }
            } catch (FileNotFoundException fnf) {
            } // ignore
        } catch (AccessControlException ace) {
            LOG.warn("Failed to operate on " + JTConfig.JT_SYSTEM_DIR + "(" + systemDir
                    + ") because of permissions.");
            LOG.warn("Manually delete the " + JTConfig.JT_SYSTEM_DIR + "(" + systemDir
                    + ") and then start the JobTracker.");
            LOG.warn("Bailing out ... ");
            throw ace;
        } catch (IOException ie) {
            LOG.info("problem cleaning system directory: " + systemDir, ie);
        }
        Thread.sleep(FS_ACCESS_RETRY_PERIOD);
    }

    if (Thread.currentThread().isInterrupted()) {
        throw new InterruptedException();
    }

    // initialize cluster variable
    cluster = new Cluster(this.conf);

    // now create a job client proxy
    jtClient = (ClientProtocol) RPC.getProxy(ClientProtocol.class, ClientProtocol.versionID,
            JobTracker.getAddress(conf), mrOwner, this.conf,
            NetUtils.getSocketFactory(conf, ClientProtocol.class));

    new SpeculativeScheduler().start();

    // initialize task event fetcher
    new TaskCompletionEventFetcher().start();

    // Same with 'localDir' except it's always on the local disk.
    asyncDiskService = new MRAsyncDiskService(FileSystem.getLocal(conf), conf.getLocalDirs());
    asyncDiskService.moveAndDeleteFromEachVolume(SUBDIR);

    // keep at least one asynchronous worker per CPU core
    int numProcs = Runtime.getRuntime().availableProcessors();
    LOG.info("# of available processors = " + numProcs);
    int maxFactor = conf.getInt(JT_MAX_ASYNC_WORKER_FACTOR, 2);
    asyncWorkers = new ThreadPoolExecutor(numProcs, numProcs * maxFactor, 30, TimeUnit.SECONDS,
            new SynchronousQueue<Runnable>(true), new ThreadPoolExecutor.CallerRunsPolicy());

    speculativeSplit = conf.getBoolean(JT_SPECULATIVE_SPLIT, false);
}