Example usage for org.apache.hadoop.mapred JobConf getInt

List of usage examples for org.apache.hadoop.mapred JobConf getInt

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getInt.

Prototype

public int getInt(String name, int defaultValue) 

Source Link

Document

Get the value of the name property as an int.

Usage

From source file:pathmerge.linear.MergePathH1Reducer.java

License:Apache License

public void configure(JobConf job) {
    mos = new MultipleOutputs(job);
    I_MERGE = Integer.parseInt(job.get("iMerge"));
    KMER_SIZE = job.getInt("sizeKmer", 0);
    outputValue = new MergePathValueWritable();
    kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
    outputKmer = new VKmerBytesWritable(KMER_SIZE);
}

From source file:pathmerge.log.MergePathH2Reducer.java

License:Apache License

public void configure(JobConf job) {
    mos = new MultipleOutputs(job);
    I_MERGE = Integer.parseInt(job.get("iMerge"));
    KMER_SIZE = job.getInt("sizeKmer", 0);
    outputValue = new MergePathValueWritable();
    tmpOutputValue = new MergePathValueWritable();
    kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
    outputKmer = new VKmerBytesWritable(KMER_SIZE);
    tmpKmer1 = new VKmerBytesWritable(KMER_SIZE);
    tmpKmer2 = new VKmerBytesWritable(KMER_SIZE);
}

From source file:redpoll.text.TermOutputFormat.java

License:Apache License

/**
 * Generate the outfile name based on a given anme and the input file name. If
 * the map input file does not exists (i.e. this is not for a map only job),
 * the given name is returned unchanged. If the config value for
 * "num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given
 * name is returned unchanged. Otherwise, return a file name consisting of the
 * N trailing legs of the input file name where N is the config value for
 * "num.of.trailing.legs.to.use".// w ww.  j  a va2  s.  c om
 * 
 * @param job
 *          the job config
 * @param name
 *          the output file name
 * @return the outfile name based on a given anme and the input file name.
 */
protected static String getInputFileBasedOutputFileName(JobConf job, String name) {
    String infilepath = job.get("map.input.file");
    if (infilepath == null) {
        // if the map input file does not exists, then return the given name
        return name;
    }
    int numOfTrailingLegsToUse = job.getInt("mapred.outputformat.numOfTrailingLegs", 0);
    if (numOfTrailingLegsToUse <= 0) {
        return name;
    }
    Path infile = new Path(infilepath);
    Path parent = infile.getParent();
    String midName = infile.getName();
    Path outPath = new Path(midName);
    for (int i = 1; i < numOfTrailingLegsToUse; i++) {
        if (parent == null)
            break;
        midName = parent.getName();
        if (midName.length() == 0)
            break;
        parent = parent.getParent();
        outPath = new Path(midName, outPath);
    }
    return outPath.toString();
}

From source file:redpoll.text.TermReducer.java

License:Apache License

@Override
public void configure(JobConf job) {
    dfLimit = job.getInt("redpoll.text.df.limit", 3);
}

From source file:redpoll.text.TfIdfMapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    try {/*from   w ww .ja va 2s  . c  o  m*/
        terms = new HashMap<String, Integer>();

        DefaultStringifier<HashMap<String, Integer>> mapStringifier = new DefaultStringifier<HashMap<String, Integer>>(
                job, GenericsUtil.getClass(terms));

        String docFreqString = job.get("redpoll.text.terms");
        terms = mapStringifier.fromString(docFreqString);
        toalDocNum = job.getInt("redpoll.docs.num", 1024);
    } catch (IOException exp) {
        exp.printStackTrace();
    }
}

From source file:redpoll.text.TfIdfReducer.java

License:Apache License

@Override
public void configure(JobConf job) {
    totalTerms = job.getInt("redpoll.text.terms.num", 1024);
}

From source file:skewtune.mapreduce.STJobTracker.java

License:Apache License

@SuppressWarnings("unchecked")
STJobTracker(final JobConf conf, String jobtrackerIndentifier) throws IOException, InterruptedException {
    // find the owner of the process
    // get the desired principal to load
    String keytabFilename = conf.get(JTConfig.JT_KEYTAB_FILE);
    UserGroupInformation.setConfiguration(conf);
    if (keytabFilename != null) {
        String desiredUser = conf.get(JTConfig.JT_USER_NAME, System.getProperty("user.name"));
        UserGroupInformation.loginUserFromKeytab(desiredUser, keytabFilename);
        mrOwner = UserGroupInformation.getLoginUser();
    } else {//from   ww w. j  a va 2  s.c  o  m
        mrOwner = UserGroupInformation.getCurrentUser();
    }

    supergroup = conf.get(MR_SUPERGROUP, "supergroup");
    LOG.info("Starting jobtracker with owner as " + mrOwner.getShortUserName() + " and supergroup as "
            + supergroup);

    long secretKeyInterval = conf.getLong(MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_KEY,
            MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT);
    long tokenMaxLifetime = conf.getLong(MRConfig.DELEGATION_TOKEN_MAX_LIFETIME_KEY,
            MRConfig.DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT);
    long tokenRenewInterval = conf.getLong(MRConfig.DELEGATION_TOKEN_RENEW_INTERVAL_KEY,
            MRConfig.DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT);
    secretManager = new DelegationTokenSecretManager(secretKeyInterval, tokenMaxLifetime, tokenRenewInterval,
            DELEGATION_TOKEN_GC_INTERVAL);
    secretManager.startThreads();

    //
    // Grab some static constants
    //

    NUM_HEARTBEATS_IN_SECOND = conf.getInt(JT_HEARTBEATS_IN_SECOND, DEFAULT_NUM_HEARTBEATS_IN_SECOND);
    if (NUM_HEARTBEATS_IN_SECOND < MIN_NUM_HEARTBEATS_IN_SECOND) {
        NUM_HEARTBEATS_IN_SECOND = DEFAULT_NUM_HEARTBEATS_IN_SECOND;
    }

    HEARTBEATS_SCALING_FACTOR = conf.getFloat(JT_HEARTBEATS_SCALING_FACTOR, DEFAULT_HEARTBEATS_SCALING_FACTOR);
    if (HEARTBEATS_SCALING_FACTOR < MIN_HEARTBEATS_SCALING_FACTOR) {
        HEARTBEATS_SCALING_FACTOR = DEFAULT_HEARTBEATS_SCALING_FACTOR;
    }

    // whether to dump or not every heartbeat message even when DEBUG is enabled
    dumpHeartbeat = conf.getBoolean(JT_HEARTBEATS_DUMP, false);

    // This is a directory of temporary submission files. We delete it
    // on startup, and can delete any files that we're done with
    this.conf = conf;
    JobConf jobConf = new JobConf(conf);

    // Set ports, start RPC servers, setup security policy etc.
    InetSocketAddress addr = getAddress(conf);
    this.localMachine = addr.getHostName();
    this.port = addr.getPort();

    int handlerCount = conf.getInt(JT_IPC_HANDLER_COUNT, 10);
    this.interTrackerServer = RPC.getServer(SkewTuneClientProtocol.class, this, addr.getHostName(),
            addr.getPort(), handlerCount, false, conf, secretManager);
    if (LOG.isDebugEnabled()) {
        Properties p = System.getProperties();
        for (Iterator it = p.keySet().iterator(); it.hasNext();) {
            String key = (String) it.next();
            String val = p.getProperty(key);
            LOG.debug("Property '" + key + "' is " + val);
        }
    }

    InetSocketAddress infoSocAddr = NetUtils
            .createSocketAddr(conf.get(JT_HTTP_ADDRESS, String.format("%s:0", this.localMachine)));
    String infoBindAddress = infoSocAddr.getHostName();
    int tmpInfoPort = infoSocAddr.getPort();
    this.startTime = System.currentTimeMillis();
    infoServer = new HttpServer("job", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf);
    infoServer.setAttribute("job.tracker", this);
    infoServer.addServlet("jobcompletion", "/completion", JobCompletionServlet.class);
    infoServer.addServlet("taskspeculation", "/speculation", SpeculationEventServlet.class);
    infoServer.addServlet("skewreport", "/skew", SkewReportServlet.class);
    infoServer.addServlet("tasksplit", "/split/*", SplitTaskServlet.class);
    infoServer.addServlet("tasksplitV2", "/splitV2/*", SplitTaskV2Servlet.class);
    infoServer.start();

    this.trackerIdentifier = jobtrackerIndentifier;

    // The rpc/web-server ports can be ephemeral ports...
    // ... ensure we have the correct info
    this.port = interTrackerServer.getListenerAddress().getPort();
    this.conf.set(JT_IPC_ADDRESS, (this.localMachine + ":" + this.port));
    LOG.info("JobTracker up at: " + this.port);
    this.infoPort = this.infoServer.getPort();
    this.conf.set(JT_HTTP_ADDRESS, infoBindAddress + ":" + this.infoPort);
    LOG.info("JobTracker webserver: " + this.infoServer.getPort());
    this.defaultNotificationUrl = String.format("http://%s:%d/completion?jobid=$jobId&status=$jobStatus",
            infoBindAddress, this.infoPort);
    LOG.info("JobTracker completion URI: " + defaultNotificationUrl);
    //        this.defaultSpeculationEventUrl = String.format("http://%s:%d/speculation?taskid=$taskId&remainTime=$taskRemainTime",infoBindAddress,this.infoPort);
    this.defaultSpeculationEventUrl = String.format("http://%s:%d/speculation?jobid=$jobId", infoBindAddress,
            this.infoPort);
    LOG.info("JobTracker speculation event URI: " + defaultSpeculationEventUrl);
    this.defaultSkewReportUrl = String.format("http://%s:%d/skew", infoBindAddress, this.infoPort);
    LOG.info("JobTracker skew report event URI: " + defaultSkewReportUrl);
    this.trackerHttp = String.format("http://%s:%d", infoBindAddress, this.infoPort);

    while (!Thread.currentThread().isInterrupted()) {
        try {
            // if we haven't contacted the namenode go ahead and do it
            if (fs == null) {
                fs = mrOwner.doAs(new PrivilegedExceptionAction<FileSystem>() {
                    @Override
                    public FileSystem run() throws IOException {
                        return FileSystem.get(conf);
                    }
                });
            }

            // clean up the system dir, which will only work if hdfs is out
            // of safe mode
            if (systemDir == null) {
                systemDir = new Path(getSystemDir());
            }
            try {
                FileStatus systemDirStatus = fs.getFileStatus(systemDir);
                if (!systemDirStatus.getOwner().equals(mrOwner.getShortUserName())) {
                    throw new AccessControlException(
                            "The systemdir " + systemDir + " is not owned by " + mrOwner.getShortUserName());
                }
                if (!systemDirStatus.getPermission().equals(SYSTEM_DIR_PERMISSION)) {
                    LOG.warn("Incorrect permissions on " + systemDir + ". Setting it to "
                            + SYSTEM_DIR_PERMISSION);
                    fs.setPermission(systemDir, new FsPermission(SYSTEM_DIR_PERMISSION));
                } else {
                    break;
                }
            } catch (FileNotFoundException fnf) {
            } // ignore
        } catch (AccessControlException ace) {
            LOG.warn("Failed to operate on " + JTConfig.JT_SYSTEM_DIR + "(" + systemDir
                    + ") because of permissions.");
            LOG.warn("Manually delete the " + JTConfig.JT_SYSTEM_DIR + "(" + systemDir
                    + ") and then start the JobTracker.");
            LOG.warn("Bailing out ... ");
            throw ace;
        } catch (IOException ie) {
            LOG.info("problem cleaning system directory: " + systemDir, ie);
        }
        Thread.sleep(FS_ACCESS_RETRY_PERIOD);
    }

    if (Thread.currentThread().isInterrupted()) {
        throw new InterruptedException();
    }

    // initialize cluster variable
    cluster = new Cluster(this.conf);

    // now create a job client proxy
    jtClient = (ClientProtocol) RPC.getProxy(ClientProtocol.class, ClientProtocol.versionID,
            JobTracker.getAddress(conf), mrOwner, this.conf,
            NetUtils.getSocketFactory(conf, ClientProtocol.class));

    new SpeculativeScheduler().start();

    // initialize task event fetcher
    new TaskCompletionEventFetcher().start();

    // Same with 'localDir' except it's always on the local disk.
    asyncDiskService = new MRAsyncDiskService(FileSystem.getLocal(conf), conf.getLocalDirs());
    asyncDiskService.moveAndDeleteFromEachVolume(SUBDIR);

    // keep at least one asynchronous worker per CPU core
    int numProcs = Runtime.getRuntime().availableProcessors();
    LOG.info("# of available processors = " + numProcs);
    int maxFactor = conf.getInt(JT_MAX_ASYNC_WORKER_FACTOR, 2);
    asyncWorkers = new ThreadPoolExecutor(numProcs, numProcs * maxFactor, 30, TimeUnit.SECONDS,
            new SynchronousQueue<Runnable>(true), new ThreadPoolExecutor.CallerRunsPolicy());

    speculativeSplit = conf.getBoolean(JT_SPECULATIVE_SPLIT, false);
}

From source file:source.TeraSort.java

License:Apache License

public static int getOutputReplication(JobConf job) {
    return job.getInt(OUTPUT_REPLICATION, 1);
}

From source file:StorageEngineClient.CombineFileInputFormat.java

License:Open Source License

private void getMoreSplits(JobConf job, Path[] paths1, long maxSize, long minSizeNode, long minSizeRack,
        List<CombineFileSplit> splits) throws IOException, NullGzFileException {
    if (paths1.length == 0) {
        return;/*from   www .jav a2 s  .  c om*/
    }

    Path[] paths = paths1;
    ArrayList<Path> splitable = new ArrayList<Path>();
    ArrayList<Path> unsplitable = new ArrayList<Path>();
    for (int i = 0; i < paths1.length; i++) {
        if (isSplitable(paths1[i].getFileSystem(job), paths1[i])) {
            splitable.add(paths1[i]);
        } else {
            unsplitable.add(paths1[i]);
        }
    }
    if (unsplitable.size() != 0) {
        paths = new Path[splitable.size()];
        splitable.toArray(paths);
    }

    OneFileInfo[] files;

    HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>();

    HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>();

    HashMap<String, List<OneBlockInfo>> nodeToBlocks = new HashMap<String, List<OneBlockInfo>>();

    files = new OneFileInfo[paths.length];

    long totLength = 0;
    for (int i = 0; i < paths.length; i++) {
        files[i] = new OneFileInfo(paths[i], job, rackToBlocks, blockToNodes, nodeToBlocks);
        totLength += files[i].getLength();
    }

    for (Iterator<Map.Entry<String, List<OneBlockInfo>>> iter = nodeToBlocks.entrySet().iterator(); iter
            .hasNext();) {

        Map.Entry<String, List<OneBlockInfo>> onenode = iter.next();
        this.processsplit(job, onenode, blockToNodes, maxSize, minSizeNode, minSizeRack, splits, "node");
    }

    for (Iterator<Map.Entry<String, List<OneBlockInfo>>> iter = rackToBlocks.entrySet().iterator(); iter
            .hasNext();) {

        Map.Entry<String, List<OneBlockInfo>> onerack = iter.next();
        this.processsplit(job, onerack, blockToNodes, maxSize, minSizeNode, minSizeRack, splits, "rack");
    }

    this.processsplit(job, null, blockToNodes, maxSize, minSizeNode, minSizeRack, splits, "all");

    int maxFileNumPerSplit = job.getInt("hive.merge.inputfiles.maxFileNumPerSplit", 1000);

    HashSet<OneBlockInfo> hs = new HashSet<OneBlockInfo>();
    while (blockToNodes.size() > 0) {
        ArrayList<OneBlockInfo> validBlocks = new ArrayList<OneBlockInfo>();
        List<String> nodes = new ArrayList<String>();
        int filenum = 0;
        hs.clear();
        for (OneBlockInfo blockInfo : blockToNodes.keySet()) {
            validBlocks.add(blockInfo);
            filenum++;
            for (String host : blockInfo.hosts) {
                nodes.add(host);
            }
            hs.add(blockInfo);
            if (filenum >= maxFileNumPerSplit) {
                break;
            }
        }
        for (OneBlockInfo blockInfo : hs) {
            blockToNodes.remove(blockInfo);
        }
        this.addCreatedSplit(job, splits, nodes, validBlocks);
    }

    if (unsplitable.size() != 0) {

        HashMap<OneBlockInfo, String[]> fileToNodes = new HashMap<OneBlockInfo, String[]>();

        for (Path path : unsplitable) {
            FileSystem fs = path.getFileSystem(job);
            FileStatus stat = fs.getFileStatus(path);
            long len = fs.getFileStatus(path).getLen();
            BlockLocation[] locations = path.getFileSystem(job).getFileBlockLocations(stat, 0, len);
            if (locations.length == 0) {
                console.printError("The file " + path.toUri().toString() + " maybe is empty, please check it!");
                throw new NullGzFileException(
                        "The file " + path.toUri().toString() + " maybe is empty, please check it!");
            }

            LOG.info("unsplitable file:" + path.toUri().toString() + " length:" + len);

            OneBlockInfo oneblock = new OneBlockInfo(path, 0, len, locations[0].getHosts(),
                    locations[0].getTopologyPaths());
            fileToNodes.put(oneblock, locations[0].getHosts());
        }

        this.processsplitForUnsplit(job, null, fileToNodes, maxSize, minSizeNode, minSizeRack, splits, "all");
    }
}

From source file:StorageEngineClient.CombineFileInputFormat.java

License:Open Source License

private void getMoreSplitsWithStatus(JobConf job, Path[] paths1, Map<String, FileStatus> fileNameToStatus,
        long maxSize, long minSizeNode, long minSizeRack, List<CombineFileSplit> splits)
        throws IOException, NullGzFileException {
    if (paths1.length == 0) {
        return;/*from  w ww .  jav  a2  s .c  o  m*/
    }

    Path[] paths = paths1;
    ArrayList<Path> splitable = new ArrayList<Path>();
    ArrayList<Path> unsplitable = new ArrayList<Path>();
    for (int i = 0; i < paths1.length; i++) {
        if (isSplitable(paths1[i].getFileSystem(job), paths1[i])) {
            splitable.add(paths1[i]);
        } else {
            unsplitable.add(paths1[i]);
        }
    }
    if (unsplitable.size() != 0) {
        paths = new Path[splitable.size()];
        splitable.toArray(paths);
    }

    OneFileInfo[] files;

    HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>();

    HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>();

    HashMap<String, List<OneBlockInfo>> nodeToBlocks = new HashMap<String, List<OneBlockInfo>>();

    files = new OneFileInfo[paths.length];

    long totLength = 0;
    for (int i = 0; i < paths.length; i++) {
        files[i] = new OneFileInfo(paths[i], fileNameToStatus.get(paths[i].toString()), job, rackToBlocks,
                blockToNodes, nodeToBlocks);
        totLength += files[i].getLength();
    }

    for (Iterator<Map.Entry<String, List<OneBlockInfo>>> iter = nodeToBlocks.entrySet().iterator(); iter
            .hasNext();) {

        Map.Entry<String, List<OneBlockInfo>> onenode = iter.next();
        this.processsplit(job, onenode, blockToNodes, maxSize, minSizeNode, minSizeRack, splits, "node");
    }

    for (Iterator<Map.Entry<String, List<OneBlockInfo>>> iter = rackToBlocks.entrySet().iterator(); iter
            .hasNext();) {

        Map.Entry<String, List<OneBlockInfo>> onerack = iter.next();
        this.processsplit(job, onerack, blockToNodes, maxSize, minSizeNode, minSizeRack, splits, "rack");
    }

    this.processsplit(job, null, blockToNodes, maxSize, minSizeNode, minSizeRack, splits, "all");

    int maxFileNumPerSplit = job.getInt("hive.merge.inputfiles.maxFileNumPerSplit", 1000);

    HashSet<OneBlockInfo> hs = new HashSet<OneBlockInfo>();
    while (blockToNodes.size() > 0) {
        ArrayList<OneBlockInfo> validBlocks = new ArrayList<OneBlockInfo>();
        List<String> nodes = new ArrayList<String>();
        int filenum = 0;
        hs.clear();
        for (OneBlockInfo blockInfo : blockToNodes.keySet()) {
            validBlocks.add(blockInfo);
            filenum++;
            for (String host : blockInfo.hosts) {
                nodes.add(host);
            }
            hs.add(blockInfo);
            if (filenum >= maxFileNumPerSplit) {
                break;
            }
        }
        for (OneBlockInfo blockInfo : hs) {
            blockToNodes.remove(blockInfo);
        }
        this.addCreatedSplit(job, splits, nodes, validBlocks);
    }

    if (unsplitable.size() != 0) {

        HashMap<OneBlockInfo, String[]> fileToNodes = new HashMap<OneBlockInfo, String[]>();

        for (Path path : unsplitable) {
            FileSystem fs = path.getFileSystem(job);
            FileStatus stat = fileNameToStatus.get(path.toString());//fs.getFileStatus(path);
            long len = stat.getLen();
            BlockLocation[] locations = fs.getFileBlockLocations(stat, 0, len);
            if (locations.length == 0) {
                console.printError("The file " + path.toUri().toString() + " maybe is empty, please check it!");
                throw new NullGzFileException(
                        "The file " + path.toUri().toString() + " maybe is empty, please check it!");
            }

            LOG.info("unsplitable file:" + path.toUri().toString() + " length:" + len);

            OneBlockInfo oneblock = new OneBlockInfo(path, 0, len, locations[0].getHosts(),
                    locations[0].getTopologyPaths());
            fileToNodes.put(oneblock, locations[0].getHosts());
        }

        this.processsplitForUnsplit(job, null, fileToNodes, maxSize, minSizeNode, minSizeRack, splits, "all");
    }
}