Example usage for org.apache.hadoop.mapred JobConf setInt

List of usage examples for org.apache.hadoop.mapred JobConf setInt

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setInt.

Prototype

public void setInt(String name, int value) 

Source Link

Document

Set the value of the name property to an int.

Usage

From source file:ivory.smrf.retrieval.distributed.RunQueryBroker.java

License:Apache License

/**
 * Runs this tool.//from w  w  w  .j  a  va  2 s  .c  o m
 */
public int run(String[] args) throws Exception {
    if (args.length != 5) {
        printUsage();
        return -1;
    }

    String configPath = args[0];
    FileSystem fs = FileSystem.get(getConf());

    sLogger.info("server config path: " + configPath);
    FileStatus[] stats = fs.listStatus(new Path(configPath));

    if (stats == null) {
        sLogger.info("Error: " + configPath + " not found!");
        return -1;
    }

    String runtag = args[1];
    String queriesFilePath = args[2];
    String resultsFilePath = args[3];
    int numHits = Integer.parseInt(args[4]);

    JobConf conf = new JobConf(getConf(), RunQueryBroker.class);
    conf.setJobName("RunQueryBroker");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NullInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(Server.class);

    conf.set("QueriesFilePath", queriesFilePath);
    conf.set("ConfigPath", configPath);
    conf.set("ResultsFilePath", resultsFilePath);
    conf.set("Runtag", runtag);
    conf.setInt("NumHits", numHits);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    JobClient client = new JobClient(conf);
    client.submitJob(conf);

    sLogger.info("runner started!");

    return 0;
}

From source file:ivory.smrf.retrieval.RunQueryBroker.java

License:Apache License

/**
 * Runs this tool.// w w w. j a va2s. c o  m
 */
public int run(String[] args) throws Exception {
    if (args.length != 5) {
        printUsage();
        return -1;
    }

    String configPath = args[0];
    FileSystem fs = FileSystem.get(getConf());

    sLogger.info("server config path: " + configPath);
    FileStatus[] stats = fs.listStatus(new Path(configPath));

    if (stats == null) {
        sLogger.info("Error: " + configPath + " not found!");
        return -1;
    }

    String runtag = args[1];
    String queriesFilePath = args[2];
    String resultsFilePath = args[3];
    int numHits = Integer.parseInt(args[4]);

    JobConf conf = new JobConf(RunQueryBroker.class);
    conf.setJobName("RunQueryBroker");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NullInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(Server.class);

    conf.set("QueriesFilePath", queriesFilePath);
    conf.set("ConfigPath", configPath);
    conf.set("ResultsFilePath", resultsFilePath);
    conf.set("Runtag", runtag);
    conf.setInt("NumHits", numHits);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    JobClient client = new JobClient(conf);
    client.submitJob(conf);

    sLogger.info("runner started!");

    return 0;
}

From source file:job.uncombine.compressed.BigBuildInvertedIndex.java

License:Apache License

/**
 * Runs this tool./*from  w  w  w  .  j  a v  a 2  s  . c  o m*/
 */
public int run(String[] args) throws Exception {

    //long GB = 1024 * 1024 * 1024;
    //long totalDataSize = 1 * GB;

    int reduceNumArray[] = { 9, 18 };
    int splitSizeMBArray[] = { 64, 128, 256 };
    int xmxArray[] = { 1000, 2000, 3000, 4000 };
    int xmsArray[] = { 0, 1 };
    int ismbArray[] = { 200, 400, 600, 800 };

    for (int splitIndex = 0; splitIndex < splitSizeMBArray.length; splitIndex++) {
        for (int reduceNumIndex = 0; reduceNumIndex < reduceNumArray.length; reduceNumIndex++) {
            for (int xmxIndex = 0; xmxIndex < xmxArray.length; xmxIndex++) {
                for (int xmsIndex = 0; xmsIndex < xmsArray.length; xmsIndex++) {
                    for (int ismbIndex = 0; ismbIndex < ismbArray.length; ismbIndex++) {

                        int reduceNum = reduceNumArray[reduceNumIndex];
                        int splitMB = splitSizeMBArray[splitIndex];
                        int xmx = xmxArray[xmxIndex];
                        int xms = xmsArray[xmsIndex] * xmx;
                        int ismb = ismbArray[ismbIndex];

                        JobConf conf = new JobConf(getConf(), BigBuildInvertedIndex.class);

                        conf.setLong("mapred.min.split.size", SplitTable.getMapred_min_split_size(splitMB));
                        conf.setLong("mapred.max.split.size", SplitTable.getMapred_max_split_size(splitMB));

                        //conf.setInt("my.sample.split.num", (int) (totalDataSize / (splitMB * 1024 * 1024)));

                        conf.setInt("mapred.reduce.tasks", reduceNum);
                        conf.setInt("io.sort.mb", ismb);

                        if (xms == 0)
                            conf.set("mapred.child.java.opts", "-Xmx" + xmx + "m");
                        else
                            conf.set("mapred.child.java.opts", "-Xmx" + xmx + "m -Xms" + xms + "m");

                        conf.setInt("child.monitor.metrics.seconds", 2);
                        conf.setInt("child.monitor.jvm.seconds", 2);
                        conf.setInt("child.monitor.jstat.seconds", 2);

                        conf.setJobName("BigBuildInvertedIndex " + splitMB + "MB "
                                + conf.get("mapred.child.java.opts") + " ismb=" + ismb + " RN=" + reduceNum);

                        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
                        if (otherArgs.length != 2) {
                            System.err.println("Usage: BigBuildInvertedIndex <in> <out>");
                            System.exit(2);
                        }

                        conf.setMapOutputKeyClass(Text.class);
                        conf.setMapOutputValueClass(PairOfInts.class);
                        conf.setOutputKeyClass(Text.class);
                        conf.setOutputValueClass(PairOfWritables.class);
                        SequenceFileOutputFormat.setOutputCompressionType(conf, CompressionType.BLOCK);
                        conf.setOutputFormat(MapFileOutputFormat.class);

                        conf.setMapperClass(MyMapper.class);
                        // conf.setCombinerClass(IdentityReducer.class);
                        conf.setReducerClass(MyReducer.class);
                        FileInputFormat.setInputPaths(conf, new Path(otherArgs[0]));
                        FileOutputFormat.setOutputPath(conf, new Path(otherArgs[1]));

                        FileSystem.get(conf).delete(new Path(otherArgs[1]), true);

                        try {
                            JobClient.runJob(conf);
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                        Thread.sleep(15000);

                    }
                }
            }
        }
    }
    return 0;
}

From source file:jobimtext.thesaurus.distributional.hadoop.mapreduce.SimCounts1WithFeatures.java

License:Apache License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {

    JobConf conf = HadoopUtil.generateJobConf(args);

    /* set the new defined type to be used */
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    if (args.length > 3) {
        conf.setInt("threshold", Integer.parseInt(args[3]));
    }/* ww  w  .j  a  va2s. co m*/
    /* number of milliseconds before killing a not responding task */
    conf.set("mapred.task.timeout", "600000");

    /* change to 128mb */
    conf.set("dfs.block.size", "134217728");

    /* set the maximum number of task per node */
    int maptasks = 200;

    /*
     * Number of map tasks to deploy on each machine. 0.5 to 2 *
     * (cores/node)
     */
    conf.set("mapred.tasktracker.map.tasks.maximum", "" + maptasks);
    conf.set("mapred.tasktracker.map", "" + maptasks);
    /*
     * The default number of map tasks per job. Typically set to a prime
     * several times greater than number of available hosts.
     */
    conf.set("mapred.map.tasks", "" + maptasks);

    int reducetasks = 20;

    conf.set("mapred.tasktracker.reduce.tasks.maximum", "" + reducetasks);
    conf.set("mapred.tasktracker.reduce", "" + reducetasks);
    conf.set("mapred.reduce.tasks", "" + reducetasks);

    /*
     * how much virtual memory the entire process tree of each map/reduce
     * task will use
     */
    conf.set("mapred.job.map.memory.mb", "4000");
    conf.set("mapred.job.reduce.memory.mb", "4000");

    conf.set("dfs.replication", "1");

    /*
     * reduce I/O load
     */
    conf.set("mapred.child.java.opts", "-Xmx1400M");

    conf.set("io.sort.mb", "300");
    conf.set("io.sort.factor", "30");

    JobClient.runJob(conf);

}

From source file:mapreduce.DosAttack.java

License:Apache License

private void issue() throws IOException {
    LOG.info("Starting DOS on url[{}] with clients[{}]", wsURL, numMappers);
    DosMapper.init(wsURL);// w ww  . ja  va  2  s  . com
    JobConf job = new JobConf(DosAttack.class);
    job.setJarByClass(DosAttack.class);
    job.setJobName("DOS Attack");
    job.setNumReduceTasks(0);
    job.setInputFormat(NullInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);
    job.setMapperClass(DosMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setNumMapTasks(numMappers);
    job.setInt(NUM_MAPPERS_KEY, numMappers);
    job.setInt(NUM_REQUESTS_KEY, numRequests);
    job.set(TARGET_URL_KEY, wsURL);
    JobClient.runJob(job);
}

From source file:net.peacesoft.nutch.crawl.ReFetcher.java

License:Apache License

public void fetch(Path segment, int threads) throws IOException {

    checkConfiguration();/*from w ww  .  java 2 s  .co  m*/

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    long start = System.currentTimeMillis();
    if (LOG.isInfoEnabled()) {
        LOG.info("Fetcher: starting at " + sdf.format(start));
        LOG.info("Fetcher: segment: " + segment);
    }

    // set the actual time for the timelimit relative
    // to the beginning of the whole job and not of a specific task
    // otherwise it keeps trying again if a task fails
    long timelimit = getConf().getLong("ReFetcher.timelimit.mins", -1);
    if (timelimit != -1) {
        timelimit = System.currentTimeMillis() + (timelimit * 60 * 1000);
        LOG.info("Fetcher Timelimit set for : " + timelimit);
        getConf().setLong("fetcher.timelimit", timelimit);
    }

    // Set the time limit after which the throughput threshold feature is enabled
    timelimit = getConf().getLong("fetcher.throughput.threshold.check.after", 10);
    timelimit = System.currentTimeMillis() + (timelimit * 60 * 1000);
    getConf().setLong("fetcher.throughput.threshold.check.after", timelimit);

    int maxOutlinkDepth = getConf().getInt("fetcher.follow.outlinks.depth", -1);
    if (maxOutlinkDepth > 0) {
        LOG.info("Fetcher: following outlinks up to depth: " + Integer.toString(maxOutlinkDepth));

        int maxOutlinkDepthNumLinks = getConf().getInt("fetcher.follow.outlinks.num.links", 4);
        int outlinksDepthDivisor = getConf().getInt("fetcher.follow.outlinks.depth.divisor", 2);

        int totalOutlinksToFollow = 0;
        for (int i = 0; i < maxOutlinkDepth; i++) {
            totalOutlinksToFollow += (int) Math.floor(outlinksDepthDivisor / (i + 1) * maxOutlinkDepthNumLinks);
        }

        LOG.info("Fetcher: maximum outlinks to follow: " + Integer.toString(totalOutlinksToFollow));
    }

    JobConf job = new NutchJob(getConf());
    job.setJobName("fetch " + segment);

    job.setInt("fetcher.threads.fetch", threads);
    job.set(Nutch.SEGMENT_NAME_KEY, segment.getName());

    // for politeness, don't permit parallel execution of a single task
    job.setSpeculativeExecution(false);

    FileInputFormat.addInputPath(job, new Path(segment, CrawlDatum.GENERATE_DIR_NAME));
    job.setInputFormat(ReFetcher.InputFormat.class);

    job.setMapRunnerClass(ReFetcher.class);

    FileOutputFormat.setOutputPath(job, segment);
    job.setOutputFormat(FetcherOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NutchWritable.class);

    JobClient.runJob(job);

    long end = System.currentTimeMillis();
    LOG.info("Fetcher: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
}

From source file:net.peacesoft.nutch.crawl.ReGenerator.java

License:Apache License

/**
 * Generate fetchlists in one or more segments. Whether to filter URLs or
 * not is read from the crawl.generate.filter property in the configuration
 * files. If the property is not found, the URLs are filtered. Same for the
 * normalisation.// w w w. j a va 2  s.  c o  m
 *
 * @param dbDir Crawl database directory
 * @param segments Segments directory
 * @param numLists Number of reduce tasks
 * @param topN Number of top URLs to be selected
 * @param curTime Current time in milliseconds
 *
 * @return Path to generated segment or null if no entries were selected
 *
 * @throws IOException When an I/O error occurs
 */
public Path[] generate(Path dbDir, Path segments, int numLists, long topN, long curTime, boolean filter,
        boolean norm, boolean force, int maxNumSegments) throws IOException {
    try {
        Path tempDir = new Path(
                getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis());

        Path lock = new Path(dbDir, CrawlDb.LOCK_NAME);
        FileSystem fs = FileSystem.get(getConf());
        LockUtil.createLockFile(fs, lock, force);

        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        long start = System.currentTimeMillis();
        LOG.info("ReGenerator: starting at " + sdf.format(start));
        LOG.info("ReGenerator: Selecting best-scoring urls due for fetch.");
        LOG.info("ReGenerator: filtering: " + filter);
        LOG.info("ReGenerator: normalizing: " + norm);
        if (topN != Long.MAX_VALUE) {
            LOG.info("ReGenerator: topN: " + topN);
        }

        if ("true".equals(getConf().get(GENERATE_MAX_PER_HOST_BY_IP))) {
            LOG.info(
                    "ReGenerator: GENERATE_MAX_PER_HOST_BY_IP will be ignored, use partition.url.mode instead");
        }

        // map to inverted subset due for fetch, sort by score
        JobConf job = new NutchJob(getConf());
        job.setJobName("generate: select from " + dbDir);

        if (numLists == -1) { // for politeness make
            numLists = job.getNumMapTasks(); // a partition per fetch task
        }
        if ("local".equals(job.get("mapred.job.tracker")) && numLists != 1) {
            // override
            LOG.info("ReGenerator: jobtracker is 'local', generating exactly one partition.");
            numLists = 1;
        }
        job.setLong(GENERATOR_CUR_TIME, curTime);
        // record real generation time
        long generateTime = System.currentTimeMillis();
        job.setLong(Nutch.GENERATE_TIME_KEY, generateTime);
        job.setLong(GENERATOR_TOP_N, topN);
        job.setBoolean(GENERATOR_FILTER, filter);
        job.setBoolean(GENERATOR_NORMALISE, norm);
        job.setInt(GENERATOR_MAX_NUM_SEGMENTS, maxNumSegments);

        FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME));
        job.setInputFormat(SequenceFileInputFormat.class);

        job.setMapperClass(Selector.class);
        job.setPartitionerClass(Selector.class);
        job.setReducerClass(Selector.class);

        FileOutputFormat.setOutputPath(job, tempDir);
        job.setOutputFormat(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(FloatWritable.class);
        job.setOutputKeyComparatorClass(DecreasingFloatComparator.class);
        job.setOutputValueClass(SelectorEntry.class);
        job.setOutputFormat(GeneratorOutputFormat.class);

        try {
            JobClient.runJob(job);
        } catch (IOException e) {
            throw e;
        }

        // read the subdirectories generated in the temp
        // output and turn them into segments
        List<Path> generatedSegments = new ArrayList<Path>();

        FileStatus[] status = fs.listStatus(tempDir);
        try {
            for (FileStatus stat : status) {
                Path subfetchlist = stat.getPath();
                if (!subfetchlist.getName().startsWith("fetchlist-")) {
                    continue;
                }
                // start a new partition job for this segment
                Path newSeg = partitionSegment(fs, segments, subfetchlist, numLists);
                generatedSegments.add(newSeg);
            }
        } catch (Exception e) {
            LOG.warn("ReGenerator: exception while partitioning segments, exiting ...");
            fs.delete(tempDir, true);
            return null;
        }

        if (generatedSegments.size() == 0) {
            LOG.warn("ReGenerator: 0 records selected for fetching, exiting ...");
            LockUtil.removeLockFile(fs, lock);
            fs.delete(tempDir, true);
            return null;
        }

        if (getConf().getBoolean(GENERATE_UPDATE_CRAWLDB, false)) {
            // update the db from tempDir
            Path tempDir2 = new Path(
                    getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + System.currentTimeMillis());

            job = new NutchJob(getConf());
            job.setJobName("generate: updatedb " + dbDir);
            job.setLong(Nutch.GENERATE_TIME_KEY, generateTime);
            for (Path segmpaths : generatedSegments) {
                Path subGenDir = new Path(segmpaths, CrawlDatum.GENERATE_DIR_NAME);
                FileInputFormat.addInputPath(job, subGenDir);
            }
            FileInputFormat.addInputPath(job, new Path(dbDir, CrawlDb.CURRENT_NAME));
            job.setInputFormat(SequenceFileInputFormat.class);
            job.setMapperClass(CrawlDbUpdater.class);
            job.setReducerClass(CrawlDbUpdater.class);
            job.setOutputFormat(MapFileOutputFormat.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(CrawlDatum.class);
            FileOutputFormat.setOutputPath(job, tempDir2);
            try {
                JobClient.runJob(job);
                CrawlDb.install(job, dbDir);
            } catch (IOException e) {
                LockUtil.removeLockFile(fs, lock);
                fs.delete(tempDir, true);
                fs.delete(tempDir2, true);
                throw e;
            }
            fs.delete(tempDir2, true);
        }

        LockUtil.removeLockFile(fs, lock);
        fs.delete(tempDir, true);

        long end = System.currentTimeMillis();
        LOG.info("ReGenerator: finished at " + sdf.format(end) + ", elapsed: "
                + TimingUtil.elapsedTime(start, end));

        Path[] patharray = new Path[generatedSegments.size()];
        return generatedSegments.toArray(patharray);
    } catch (Exception ex) {
        LOG.error("ReGenerator generate error: " + ex.toString(), ex);
        return null;
    }
}

From source file:nl.tudelft.graphalytics.mapreducev2.evo.DirectedForestFireModelJob.java

License:Apache License

@Override
protected void setConfigurationParameters(JobConf jobConfiguration) {
    super.setConfigurationParameters(jobConfiguration);
    jobConfiguration.setLong(ForestFireModelUtils.MAX_ID, getParameters().getMaxId() + 1);
    jobConfiguration.setFloat(ForestFireModelUtils.P_RATIO, getParameters().getPRatio());
    jobConfiguration.setFloat(ForestFireModelUtils.R_RATIO, getParameters().getRRatio());
    jobConfiguration.set(ForestFireModelUtils.CURRENT_AMBASSADORS,
            ForestFireModelUtils.verticesIDsMap2String(burnedEdges));

    if (getIteration() == 1) {
        if (getNumMappers() > 0) {
            jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR,
                    getParameters().getNumNewVertices() / getNumMappers());
            jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, getNumMappers());
        } else {//from w  w w  .j ava2 s.co m
            jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR, getParameters().getNumNewVertices());
            jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, 1024 * 1024);
        }
        jobConfiguration.setBoolean(ForestFireModelUtils.IS_INIT, true);
    }
}

From source file:nl.tudelft.graphalytics.mapreducev2.evo.UndirectedForestFireModelJob.java

License:Apache License

@Override
protected void setConfigurationParameters(JobConf jobConfiguration) {
    super.setConfigurationParameters(jobConfiguration);
    jobConfiguration.setLong(ForestFireModelUtils.MAX_ID, getParameters().getMaxId() + 1);
    jobConfiguration.setFloat(ForestFireModelUtils.P_RATIO, getParameters().getPRatio());
    jobConfiguration.setFloat(ForestFireModelUtils.R_RATIO, getParameters().getRRatio());
    jobConfiguration.set(ForestFireModelUtils.CURRENT_AMBASSADORS,
            ForestFireModelUtils.verticesIDsMap2String(burnedEdges));

    if (getIteration() == 1) {
        if (getNumMappers() > 0) {
            jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR,
                    getParameters().getNumNewVertices() / getNumMappers());
            jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, getNumMappers());
        } else {/*from  w ww . j  a v  a  2s  .co m*/
            jobConfiguration.setInt(ForestFireModelUtils.NEW_VERTICES_NR, getParameters().getNumNewVertices());
            jobConfiguration.setInt(ForestFireModelUtils.ID_SHIFT, 1024 * 1024);
        }
        jobConfiguration.setBoolean(ForestFireModelUtils.IS_INIT, true);
    } else if (getIteration() == getParameters().getMaxIterations() + 1) {
        jobConfiguration.setBoolean(ForestFireModelUtils.IS_FINAL, true);
    }
}

From source file:nthu.scopelab.tsqr.ssvd.ABtDenseOutJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPath, Path inputBt, Path outputPath, int k, int p,
        int reduceTasks, int mis) throws Exception {

    JobConf job = new JobConf(conf, ABtDenseOutJob.class);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.setInt(QJob.PROP_K, k);
    job.setInt(QJob.PROP_P, p);/*from  w ww  .  j  av  a 2s  .  c o m*/
    job.set(PROP_BT_PATH, inputBt.toString());

    FileOutputFormat.setOutputPath(job, outputPath);
    job.setJobName("ABtDenseOutJob");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(LMatrixWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(LMatrixWritable.class);

    job.setMapperClass(ABtMapper.class);

    fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job));
    mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job));
    job.setNumMapTasks(fgather.recNumMapTasks(mis));

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, inputPath);

    RunningJob rj = JobClient.runJob(job);
}