Example usage for org.apache.hadoop.mapred JobConf setNumMapTasks

List of usage examples for org.apache.hadoop.mapred JobConf setNumMapTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setNumMapTasks.

Prototype

public void setNumMapTasks(int n) 

Source Link

Document

Set the number of map tasks for this job.

Usage

From source file:ivory.server.RunRetrievalBroker.java

License:Apache License

/**
 * Runs this tool./*from  w  ww.  j  av a 2  s. c  o  m*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }

    String configPath = args[0];

    FileSystem fs = FileSystem.get(getConf());

    String ids = "";

    sLogger.info("Starting retrieval broker...");
    sLogger.info("server config path: " + configPath);
    FileStatus[] stats = fs.listStatus(new Path(configPath));

    if (stats == null) {
        sLogger.info("Error: " + configPath + " not found!");
        return -1;
    }

    String scoreMergeModel = args[1];
    if (!scoreMergeModel.equals("sort") && !scoreMergeModel.equals("normalize")) {
        throw new RuntimeException("Unsupported score merging model: " + args[1]);
    }

    for (int i = 0; i < stats.length; i++) {
        String s = stats[i].getPath().toString();
        if (!s.endsWith(".host"))
            continue;

        String sid = s.substring(s.lastIndexOf("/") + 1, s.lastIndexOf(".host"));
        sLogger.info("sid=" + sid + ", host=" + s);

        if (ids.length() != 0)
            ids += ";";

        ids += sid;
    }

    JobConf conf = new JobConf(RunRetrievalBroker.class);
    conf.setJobName("RetrievalBroker");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NullInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(ServerMapper.class);

    conf.set("serverIDs", ids);
    conf.set("ServerAddressPath", configPath);
    conf.set("ScoreMergeModel", scoreMergeModel);
    conf.set("mapred.child.java.opts", "-Xmx2048m");

    fs.delete(new Path(appendPath(configPath, "broker.ready")), true);

    JobClient client = new JobClient(conf);
    client.submitJob(conf);

    sLogger.info("broker started!");

    while (true) {
        String f = appendPath(configPath, "broker.ready");
        if (fs.exists(new Path(f))) {
            break;
        }

        Thread.sleep(5000);
    }

    String s = FSProperty.readString(FileSystem.get(conf), appendPath(configPath, "broker.ready"));
    sLogger.info("broker ready at " + s);

    return 0;
}

From source file:ivory.smrf.retrieval.distributed.RunDistributedRetrievalServers.java

License:Apache License

/**
 * Runs this tool./*from  w w w.  java  2  s.  c  o  m*/
 */
public int run(String[] args) throws Exception {
    if (args.length < 2) {
        printUsage();
        return -1;
    }

    String configFile = args[0];

    FileSystem fs = FileSystem.get(getConf());

    Document d = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(fs.open(new Path(configFile)));

    sLogger.info("Reading configuration to determine number of servers to launch:");
    List<String> sids = new ArrayList<String>();
    NodeList servers = d.getElementsByTagName("server");
    for (int i = 0; i < servers.getLength(); i++) {
        Node node = servers.item(i);

        // get server id
        String sid = XMLTools.getAttributeValue(node, "id", null);
        if (sid == null) {
            throw new Exception("Must specify a query id attribute for every server!");
        }

        sLogger.info(" - sid: " + sid);
        sids.add(sid);
    }

    int port = 7000;
    int numServers = sids.size();
    String configPath = args[1];

    if (fs.exists(new Path(configPath))) {
        fs.delete(new Path(configPath), true);
    }

    String fname = appendPath(configPath, "config-" + numServers + ".txt");
    sLogger.info("Writing configuration to: " + fname);
    StringBuffer sb = new StringBuffer();
    for (int n = 0; n < numServers; n++) {
        port++;
        sb.append(sids.get(n) + " " + port + "\n");
    }

    FSDataOutputStream out = fs.create(new Path(fname), true);
    out.writeBytes(sb.toString());
    out.close();

    JobConf conf = new JobConf(getConf(), RetrievalServer.class);

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NLineInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(ServerMapper.class);

    FileInputFormat.setInputPaths(conf, new Path(fname));

    conf.set("Ivory.ConfigFile", configFile);
    conf.set("Ivory.ConfigPath", configPath);
    conf.setJobName("RetrievalServers");
    //conf.set("mapred.child.java.opts", "-Xmx2048m");
    conf.set("mapred.child.java.opts", "-Xmx2048m");
    // conf.set("mapred.job.queue.name", "search");

    JobClient client = new JobClient(conf);
    client.submitJob(conf);

    sLogger.info("Waiting for servers to start up...");

    // poll HDFS for hostnames and ports
    boolean allStarted = true;
    do {
        allStarted = true;
        for (int n = 0; n < numServers; n++) {
            String f = appendPath(configPath, sids.get(n) + ".host");
            if (!fs.exists(new Path(f))) {
                allStarted = false;
            }
        }
        Thread.sleep(10000);
        sLogger.info(" ...");
    } while (!allStarted);

    // poll HDFS for ready signal that the index is ready
    boolean allReady = true;
    do {
        allReady = true;
        for (int n = 0; n < numServers; n++) {
            String f = appendPath(configPath, sids.get(n) + ".ready");
            if (!fs.exists(new Path(f))) {
                allReady = false;
            }
        }
        Thread.sleep(10000);
        sLogger.info(" ...");
    } while (!allReady);

    sLogger.info("All servers ready!");
    sLogger.info("Host information:");
    for (int n = 0; n < numServers; n++) {
        String f = appendPath(configPath, sids.get(n) + ".host");
        sLogger.info(" sid=" + sids.get(n) + ", " + FSProperty.readString(fs, f));
    }

    return 0;
}

From source file:ivory.smrf.retrieval.distributed.RunQueryBroker.java

License:Apache License

/**
 * Runs this tool./*  w  w w . j  a v  a  2s  . c  o m*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 5) {
        printUsage();
        return -1;
    }

    String configPath = args[0];
    FileSystem fs = FileSystem.get(getConf());

    sLogger.info("server config path: " + configPath);
    FileStatus[] stats = fs.listStatus(new Path(configPath));

    if (stats == null) {
        sLogger.info("Error: " + configPath + " not found!");
        return -1;
    }

    String runtag = args[1];
    String queriesFilePath = args[2];
    String resultsFilePath = args[3];
    int numHits = Integer.parseInt(args[4]);

    JobConf conf = new JobConf(getConf(), RunQueryBroker.class);
    conf.setJobName("RunQueryBroker");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NullInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(Server.class);

    conf.set("QueriesFilePath", queriesFilePath);
    conf.set("ConfigPath", configPath);
    conf.set("ResultsFilePath", resultsFilePath);
    conf.set("Runtag", runtag);
    conf.setInt("NumHits", numHits);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    JobClient client = new JobClient(conf);
    client.submitJob(conf);

    sLogger.info("runner started!");

    return 0;
}

From source file:ivory.smrf.retrieval.distributed.RunRetrievalBroker.java

License:Apache License

/**
 * Runs this tool./*  w  w w. j  a v  a2  s  . c  om*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }

    String configPath = args[0];

    FileSystem fs = FileSystem.get(getConf());

    String ids = "";

    sLogger.info("Starting retrieval broker...");
    sLogger.info("server config path: " + configPath);
    FileStatus[] stats = fs.listStatus(new Path(configPath));

    if (stats == null) {
        sLogger.info("Error: " + configPath + " not found!");
        return -1;
    }

    String scoreMergeModel = args[1];
    if (!scoreMergeModel.equals("sort") && !scoreMergeModel.equals("normalize")) {
        throw new RuntimeException("Unsupported score merging model: " + args[1]);
    }

    for (int i = 0; i < stats.length; i++) {
        String s = stats[i].getPath().toString();
        if (!s.endsWith(".host"))
            continue;

        String sid = s.substring(s.lastIndexOf("/") + 1, s.lastIndexOf(".host"));
        sLogger.info("sid=" + sid + ", host=" + s);

        if (ids.length() != 0)
            ids += ";";

        ids += sid;
    }

    JobConf conf = new JobConf(getConf(), RunRetrievalBroker.class);
    conf.setJobName("RetrievalBroker");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NullInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(ServerMapper.class);

    conf.set("serverIDs", ids);
    conf.set("ServerAddressPath", configPath);
    conf.set("ScoreMergeModel", scoreMergeModel);
    conf.set("mapred.child.java.opts", "-Xmx2048m");

    fs.delete(new Path(appendPath(configPath, "broker.ready")), true);

    JobClient client = new JobClient(conf);
    client.submitJob(conf);

    sLogger.info("broker started!");

    while (true) {
        String f = appendPath(configPath, "broker.ready");
        if (fs.exists(new Path(f))) {
            break;
        }

        Thread.sleep(5000);
    }

    String s = FSProperty.readString(FileSystem.get(conf), appendPath(configPath, "broker.ready"));
    sLogger.info("broker ready at " + s);

    return 0;
}

From source file:ivory.smrf.retrieval.RunQueryBroker.java

License:Apache License

/**
 * Runs this tool.//from   w w w  .j  a  va  2  s.  co m
 */
public int run(String[] args) throws Exception {
    if (args.length != 5) {
        printUsage();
        return -1;
    }

    String configPath = args[0];
    FileSystem fs = FileSystem.get(getConf());

    sLogger.info("server config path: " + configPath);
    FileStatus[] stats = fs.listStatus(new Path(configPath));

    if (stats == null) {
        sLogger.info("Error: " + configPath + " not found!");
        return -1;
    }

    String runtag = args[1];
    String queriesFilePath = args[2];
    String resultsFilePath = args[3];
    int numHits = Integer.parseInt(args[4]);

    JobConf conf = new JobConf(RunQueryBroker.class);
    conf.setJobName("RunQueryBroker");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NullInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(Server.class);

    conf.set("QueriesFilePath", queriesFilePath);
    conf.set("ConfigPath", configPath);
    conf.set("ResultsFilePath", resultsFilePath);
    conf.set("Runtag", runtag);
    conf.setInt("NumHits", numHits);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    JobClient client = new JobClient(conf);
    client.submitJob(conf);

    sLogger.info("runner started!");

    return 0;
}

From source file:ivory.smrf.retrieval.RunQueryHDFS.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.out.println("usage: [queries-file] [models-file]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }//from w  w w .  j av a2 s.  c o m

    String argsStr = Joiner.on(";").join(args);

    JobConf conf = new JobConf(getConf(), RunQueryHDFS.class);
    conf.setJobName("RunQueryHDFS");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NullInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(QueryRunner.class);

    conf.set("args", argsStr);
    conf.set("mapred.child.java.opts", "-Xmx16g");

    LOG.info("argsStr: " + argsStr);

    JobClient client = new JobClient(conf);
    client.submitJob(conf);

    LOG.info("runner started!");

    return 0;
}

From source file:mapreduce.BigramCount.java

License:Apache License

/**
 * Runs this tool.// w  w w  . ja  va2s  .  c  o m
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int mapTasks = 1;//Integer.parseInt(args[2]);
    int reduceTasks = 1;//Integer.parseInt(args[3]);

    sLogger.info("Tool: BigramCount");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    JobConf conf = new JobConf(BigramCount.class);
    conf.setJobName("BigramCount");

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    /**
     *  Note that these must match the Class arguments given in the mapper 
     */
    conf.setOutputKeyClass(WordPair.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MyMapper.class);
    conf.setPartitionerClass(MyPartitioner.class);
    conf.setCombinerClass(MyReducer.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:mapreduce.DosAttack.java

License:Apache License

private void issue() throws IOException {
    LOG.info("Starting DOS on url[{}] with clients[{}]", wsURL, numMappers);
    DosMapper.init(wsURL);/*from  w w  w  .ja v  a 2s . c  om*/
    JobConf job = new JobConf(DosAttack.class);
    job.setJarByClass(DosAttack.class);
    job.setJobName("DOS Attack");
    job.setNumReduceTasks(0);
    job.setInputFormat(NullInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);
    job.setMapperClass(DosMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setNumMapTasks(numMappers);
    job.setInt(NUM_MAPPERS_KEY, numMappers);
    job.setInt(NUM_REQUESTS_KEY, numRequests);
    job.set(TARGET_URL_KEY, wsURL);
    JobClient.runJob(job);
}

From source file:map_reduce.MapReduce_OptimizedBrandesAdditions_DO_JUNG.java

License:Open Source License

@SuppressWarnings("deprecation")
@Override//ww  w .  ja  v  a  2  s.c  o  m
public int run(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("Usage:\n");
        System.exit(1);
    }

    //       Job job = new Job(super.getConf());

    //      READ IN ALL COMMAND LINE ARGUMENTS
    //      EXAMPLE: 
    // hadoop jar MapReduce_OptimizedBrandesAdditions_DO_JUNG.jar
    // -libjars collections-generic-4.01.jar,jung-graph-impl-2.0.1.jar,jung-api-2.0.1.jar
    // -Dmapred.job.map.memory.mb=4096
    // -Dmapred.job.reduce.memory.mb=4096
    // -Dmapred.child.java.opts=-Xmx3500m
    // -Dmapreduce.task.timeout=60000000
    // -Dmapreduce.job.queuename=QUEUENAME
    // input_iterbrandes_additions_nocomb_10k_1 output_iterbrandes_additions_nocomb_10k_1
    // 10 1 10000 55245 10k 10k_randedges 100 1 false times/ betweenness/

    int m = -1;

    // input path to use on hdfs
    Path inputPath = new Path(args[++m]);

    // output path to use on hdfs
    Path outputPath = new Path(args[++m]);

    // number of Mappers to split the sources: e.g., 1, 10, 100 etc.
    // rule of thumb: the larger the graph (i.e., number of roots to test), the larger should be this number.
    int numOfMaps = Integer.parseInt(args[++m]);

    // number of Reducers to collect the output
    int numOfReduce = Integer.parseInt(args[++m]);

    // Number of vertices in graph
    int N = Integer.parseInt(args[++m]);

    // Number of edges in graph
    int M = Integer.parseInt(args[++m]);

    // Graph file (edge list, tab delimited) (full path)
    String graph = args[++m];

    // File with edges to be added (tab delimited) (full path)
    // Note: this version handles only edges between existing vertices in the graph.
    String random_edges = args[++m];

    // Number of random edges added
    int re = Integer.parseInt(args[++m]);

    // Experiment iteration (in case of multiple experiments)
    int iter = Integer.parseInt(args[++m]);

    // Use combiner or not (true/false)
    Boolean comb = Boolean.valueOf(args[++m]);

    // Output path for file with stats
    String statsoutputpath = args[++m];

    // Output path for file with final betweenness values
    String betoutputpath = args[++m];

    //      BEGIN INITIALIZATION

    JobConf conf = new JobConf(getConf(), MapReduce_OptimizedBrandesAdditions_DO_JUNG.class);
    FileSystem fs = FileSystem.get(conf);

    String setup = "_additions_edges" + re + "_maps" + numOfMaps + "_comb" + comb;
    conf.setJobName("OptimizedBrandesAdditionsDOJung_" + graph + setup + "_" + iter);
    conf.set("HDFS_GRAPH", graph + setup);
    conf.set("HDFS_Random_Edges", random_edges + setup);
    conf.set("output", outputPath.getName());
    conf.set("setup", setup);

    //      CREATE INPUT FILES FOR MAPPERS

    int numOfTasksperMap = (int) Math.ceil(N / numOfMaps);
    //generate an input file for each map task
    for (int i = 0; i < numOfMaps - 1; i++) {
        Path file = new Path(inputPath, "part-r-" + i);
        IntWritable start = new IntWritable(i * numOfTasksperMap);
        IntWritable end = new IntWritable((i * numOfTasksperMap) + numOfTasksperMap - 1);

        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class,
                IntWritable.class, CompressionType.NONE);
        try {
            writer.append(start, end);
        } finally {
            writer.close();
        }
        System.out.println("Wrote input for Map #" + i + ": " + start + " - " + end);
    }

    // last mapper takes what is left
    Path file = new Path(inputPath, "part-r-" + (numOfMaps - 1));
    IntWritable start = new IntWritable((numOfMaps - 1) * numOfTasksperMap);
    IntWritable end = new IntWritable(N - 1);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class,
            CompressionType.NONE);
    try {
        writer.append(start, end);
    } finally {
        writer.close();
    }
    System.out.println("Wrote input for Map #" + (numOfMaps - 1) + ": " + start + " - " + end);

    //      COPY FILES TO MAPPERS
    System.out.println("Copying graph to cache");
    String LOCAL_GRAPH = graph;
    Path hdfsPath = new Path(graph + setup);

    // upload the file to hdfs. Overwrite any existing copy.
    fs.copyFromLocalFile(false, true, new Path(LOCAL_GRAPH), hdfsPath);
    DistributedCache.addCacheFile(hdfsPath.toUri(), conf);

    System.out.println("Copying random edges to cache");
    String LOCAL_Random_Edges = random_edges;
    hdfsPath = new Path(random_edges + setup);

    // upload the file to hdfs. Overwrite any existing copy.
    fs.copyFromLocalFile(false, true, new Path(LOCAL_Random_Edges), hdfsPath);
    DistributedCache.addCacheFile(hdfsPath.toUri(), conf);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setMapperClass(IterBrandesMapper.class);
    conf.setNumMapTasks(numOfMaps);

    if (comb)
        conf.setCombinerClass(IterBrandesReducer.class);

    conf.setReducerClass(IterBrandesReducer.class);
    conf.setNumReduceTasks(numOfReduce);

    // turn off speculative execution, because DFS doesn't handle multiple writers to the same file.
    conf.setSpeculativeExecution(false);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    // conf.set("mapred.job.name", "APS-" + outputPath.getName());
    conf.setNumTasksToExecutePerJvm(-1); // JVM reuse

    System.out.println("Starting the execution...! Pray!! \n");
    long time1 = System.nanoTime();
    RunningJob rj = JobClient.runJob(conf);
    long time2 = System.nanoTime();

    //      READ OUTPUT FILES

    System.out.println("\nFinished and now reading/writing Betweenness Output...\n");

    // Assuming 1 reducer.
    Path inFile = new Path(outputPath, "part-00000");
    IntWritable id = new IntWritable();
    DoubleWritable betweenness = new DoubleWritable();
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);

    FileWriter fw = new FileWriter(new File(betoutputpath + graph + setup + "_betweenness_" + iter));
    try {
        int i = 0;
        for (; i < (N + M + re); i++) {
            reader.next(id, betweenness);
            fw.write(id + "\t" + betweenness + "\n");
            fw.flush();
        }
    } finally {
        reader.close();
        fw.close();
    }

    System.out.println("\nWriting times Output...\n");

    fw = new FileWriter(new File(statsoutputpath + graph + setup + "_times_" + iter));

    fw.write("Total-time:\t" + (time2 - time1) + "\n");
    fw.write("total-map\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("SLOTS_MILLIS_MAPS") + "\n");
    fw.write("total-reduce\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("SLOTS_MILLIS_REDUCES") + "\n");
    fw.write("total-cpu-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("CPU_MILLISECONDS") + "\n");
    fw.write("total-gc-mr\t"
            + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter").getCounter("GC_TIME_MILLIS")
            + "\n");
    fw.write("total-phy-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("PHYSICAL_MEMORY_BYTES") + "\n");
    fw.write("total-vir-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("VIRTUAL_MEMORY_BYTES") + "\n");
    fw.write("brandes\t" + rj.getCounters().getGroup("TimeForBrandes").getCounter("exectime_initial_brandes")
            + "\n");
    fw.write("reduce\t" + rj.getCounters().getGroup("TimeForReduce").getCounter("reduceafteralledges") + "\n");
    fw.flush();

    try {
        Iterator<Counters.Counter> counters = rj.getCounters().getGroup("TimeForRandomEdges").iterator();
        while (counters.hasNext()) {
            Counter cc = counters.next();
            fw.write(cc.getName() + "\t" + cc.getCounter() + "\n");
            fw.flush();
        }
    } finally {
        fw.close();
    }

    return 0;
}

From source file:map_reduce.MapReduce_OptimizedBrandesDeletions_DO_JUNG.java

License:Open Source License

@SuppressWarnings("deprecation")
@Override/*from   w  ww  .  ja  v  a2  s.c o m*/
public int run(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("Usage:\n");
        System.exit(1);
    }

    //       Job job = new Job(super.getConf());

    //      READ IN ALL COMMAND LINE ARGUMENTS
    //      EXAMPLE: 
    // hadoop jar MapReduce_OptimizedBrandesDeletions_DO_JUNG.jar
    // -libjars collections-generic-4.01.jar,jung-graph-impl-2.0.1.jar,jung-api-2.0.1.jar
    // -Dmapred.job.map.memory.mb=4096
    // -Dmapred.job.reduce.memory.mb=4096
    // -Dmapred.child.java.opts=-Xmx3500m
    // -Dmapreduce.task.timeout=60000000
    // -Dmapreduce.job.queuename=QUEUENAME
    // input_iterbrandes_deletions_nocomb_10k_1 output_iterbrandes_deletions_nocomb_10k_1
    // 10 1 10000 55245 10k 10k_randedges 100 1 false times/ betweenness/

    int m = -1;

    // input path to use on hdfs
    Path inputPath = new Path(args[++m]);

    // output path to use on hdfs
    Path outputPath = new Path(args[++m]);

    // number of Mappers to split the sources: e.g., 1, 10, 100 etc.
    // rule of thumb: the larger the graph (i.e., number of roots to test), the larger should be this number.
    int numOfMaps = Integer.parseInt(args[++m]);

    // number of Reducers to collect the output
    int numOfReduce = Integer.parseInt(args[++m]);

    // Number of vertices in graph
    int N = Integer.parseInt(args[++m]);

    // Number of edges in graph
    int M = Integer.parseInt(args[++m]);

    // Graph file (edge list, tab delimited) (full path)
    String graph = args[++m];

    // File with edges to be added (tab delimited) (full path)
    // Note: this version handles only edges between existing vertices in the graph.
    String random_edges = args[++m];

    // Number of random edges added
    int re = Integer.parseInt(args[++m]);

    // Experiment iteration (in case of multiple experiments)
    int iter = Integer.parseInt(args[++m]);

    // Use combiner or not (true/false)
    Boolean comb = Boolean.valueOf(args[++m]);

    // Output path for file with stats
    String statsoutputpath = args[++m];

    // Output path for file with final betweenness values
    String betoutputpath = args[++m];

    //      BEGIN INITIALIZATION

    JobConf conf = new JobConf(getConf(), MapReduce_OptimizedBrandesDeletions_DO_JUNG.class);
    FileSystem fs = FileSystem.get(conf);

    String setup = "_deletions_edges" + re + "_maps" + numOfMaps + "_comb" + comb;
    conf.setJobName("OptimizedBrandesDeletionsDOJung_" + graph + setup + "_" + iter);
    conf.set("HDFS_GRAPH", graph + setup);
    conf.set("HDFS_Random_Edges", random_edges + setup);
    conf.set("output", outputPath.getName());
    conf.set("setup", setup);

    //      CREATE INPUT FILES FOR MAPPERS

    int numOfTasksperMap = (int) Math.ceil(N / numOfMaps);
    //generate an input file for each map task
    for (int i = 0; i < numOfMaps - 1; i++) {
        Path file = new Path(inputPath, "part-r-" + i);
        IntWritable start = new IntWritable(i * numOfTasksperMap);
        IntWritable end = new IntWritable((i * numOfTasksperMap) + numOfTasksperMap - 1);

        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class,
                IntWritable.class, CompressionType.NONE);
        try {
            writer.append(start, end);
        } finally {
            writer.close();
        }
        System.out.println("Wrote input for Map #" + i + ": " + start + " - " + end);
    }

    // last mapper takes what is left
    Path file = new Path(inputPath, "part-r-" + (numOfMaps - 1));
    IntWritable start = new IntWritable((numOfMaps - 1) * numOfTasksperMap);
    IntWritable end = new IntWritable(N - 1);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class,
            CompressionType.NONE);
    try {
        writer.append(start, end);
    } finally {
        writer.close();
    }
    System.out.println("Wrote input for Map #" + (numOfMaps - 1) + ": " + start + " - " + end);

    //      COPY FILES TO MAPPERS
    System.out.println("Copying graph to cache");
    String LOCAL_GRAPH = graph;
    Path hdfsPath = new Path(graph + setup);

    // upload the file to hdfs. Overwrite any existing copy.
    fs.copyFromLocalFile(false, true, new Path(LOCAL_GRAPH), hdfsPath);
    DistributedCache.addCacheFile(hdfsPath.toUri(), conf);

    System.out.println("Copying random edges to cache");
    String LOCAL_Random_Edges = random_edges;
    hdfsPath = new Path(random_edges + setup);

    // upload the file to hdfs. Overwrite any existing copy.
    fs.copyFromLocalFile(false, true, new Path(LOCAL_Random_Edges), hdfsPath);
    DistributedCache.addCacheFile(hdfsPath.toUri(), conf);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setMapperClass(IterBrandesMapper.class);
    conf.setNumMapTasks(numOfMaps);

    if (comb)
        conf.setCombinerClass(IterBrandesReducer.class);

    conf.setReducerClass(IterBrandesReducer.class);
    conf.setNumReduceTasks(numOfReduce);

    // turn off speculative execution, because DFS doesn't handle multiple writers to the same file.
    conf.setSpeculativeExecution(false);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    // conf.set("mapred.job.name", "APS-" + outputPath.getName());
    conf.setNumTasksToExecutePerJvm(-1); // JVM reuse

    System.out.println("Starting the execution...! Pray!! \n");
    long time1 = System.nanoTime();
    RunningJob rj = JobClient.runJob(conf);
    long time2 = System.nanoTime();

    //      READ OUTPUT FILES

    System.out.println("\nFinished and now reading/writing Betweenness Output...\n");

    // Assuming 1 reducer.
    Path inFile = new Path(outputPath, "part-00000");
    IntWritable id = new IntWritable();
    DoubleWritable betweenness = new DoubleWritable();
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);

    FileWriter fw = new FileWriter(new File(betoutputpath + graph + setup + "_betweenness_" + iter));
    try {
        int i = 0;
        for (; i < (N + (M - re)); i++) {
            reader.next(id, betweenness);
            fw.write(id + "\t" + betweenness + "\n");
            fw.flush();
        }
    } finally {
        reader.close();
        fw.close();
    }

    System.out.println("\nWriting times Output...\n");

    fw = new FileWriter(new File(statsoutputpath + graph + setup + "_times_" + iter));

    fw.write("Total-time:\t" + (time2 - time1) + "\n");
    fw.write("total-map\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("SLOTS_MILLIS_MAPS") + "\n");
    fw.write("total-reduce\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("SLOTS_MILLIS_REDUCES") + "\n");
    fw.write("total-cpu-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("CPU_MILLISECONDS") + "\n");
    fw.write("total-gc-mr\t"
            + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter").getCounter("GC_TIME_MILLIS")
            + "\n");
    fw.write("total-phy-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("PHYSICAL_MEMORY_BYTES") + "\n");
    fw.write("total-vir-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("VIRTUAL_MEMORY_BYTES") + "\n");
    fw.write("brandes\t" + rj.getCounters().getGroup("TimeForBrandes").getCounter("exectime_initial_brandes")
            + "\n");
    fw.write("reduce\t" + rj.getCounters().getGroup("TimeForReduce").getCounter("reduceafteralledges") + "\n");
    fw.flush();

    try {
        Iterator<Counters.Counter> counters = rj.getCounters().getGroup("TimeForRandomEdges").iterator();
        while (counters.hasNext()) {
            Counter cc = counters.next();
            fw.write(cc.getName() + "\t" + cc.getCounter() + "\n");
            fw.flush();
        }
    } finally {
        fw.close();
    }

    return 0;
}