Example usage for org.apache.hadoop.mapreduce Job submit

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job submit.

Prototype

public void submit() throws IOException, InterruptedException, ClassNotFoundException

Source Link

Document

Submit the job to the cluster and return immediately.

Usage

From source file:edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques.java

License:BSD License

/**
 * @param args/*from   w  w  w. j av a 2 s .  co  m*/
 * @throws ParseException 
 */
@SuppressWarnings({ "deprecation" })
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Options options = new Options();

    Option forceOption = new Option("f", "force", false,
            "force the computation of the relationship " + "even if files already exist");
    forceOption.setRequired(false);
    options.addOption(forceOption);

    Option g1Option = new Option("g1", "first-group", true, "set first group of datasets");
    g1Option.setRequired(true);
    g1Option.setArgName("FIRST GROUP");
    g1Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g1Option);

    Option g2Option = new Option("g2", "second-group", true, "set second group of datasets");
    g2Option.setRequired(false);
    g2Option.setArgName("SECOND GROUP");
    g2Option.setArgs(Option.UNLIMITED_VALUES);
    options.addOption(g2Option);

    Option machineOption = new Option("m", "machine", true, "machine identifier");
    machineOption.setRequired(true);
    machineOption.setArgName("MACHINE");
    machineOption.setArgs(1);
    options.addOption(machineOption);

    Option nodesOption = new Option("n", "nodes", true, "number of nodes");
    nodesOption.setRequired(true);
    nodesOption.setArgName("NODES");
    nodesOption.setArgs(1);
    options.addOption(nodesOption);

    Option s3Option = new Option("s3", "s3", false, "data on Amazon S3");
    s3Option.setRequired(false);
    options.addOption(s3Option);

    Option awsAccessKeyIdOption = new Option("aws_id", "aws-id", true,
            "aws access key id; " + "this is required if the execution is on aws");
    awsAccessKeyIdOption.setRequired(false);
    awsAccessKeyIdOption.setArgName("AWS-ACCESS-KEY-ID");
    awsAccessKeyIdOption.setArgs(1);
    options.addOption(awsAccessKeyIdOption);

    Option awsSecretAccessKeyOption = new Option("aws_key", "aws-id", true,
            "aws secrect access key; " + "this is required if the execution is on aws");
    awsSecretAccessKeyOption.setRequired(false);
    awsSecretAccessKeyOption.setArgName("AWS-SECRET-ACCESS-KEY");
    awsSecretAccessKeyOption.setArgs(1);
    options.addOption(awsSecretAccessKeyOption);

    Option bucketOption = new Option("b", "s3-bucket", true,
            "bucket on s3; " + "this is required if the execution is on aws");
    bucketOption.setRequired(false);
    bucketOption.setArgName("S3-BUCKET");
    bucketOption.setArgs(1);
    options.addOption(bucketOption);

    Option helpOption = new Option("h", "help", false, "display this message");
    helpOption.setRequired(false);
    options.addOption(helpOption);

    HelpFormatter formatter = new HelpFormatter();
    CommandLineParser parser = new PosixParser();
    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
    } catch (ParseException e) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    if (cmd.hasOption("h")) {
        formatter.printHelp(
                "hadoop jar data-polygamy.jar "
                        + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                options, true);
        System.exit(0);
    }

    boolean s3 = cmd.hasOption("s3");
    String s3bucket = "";
    String awsAccessKeyId = "";
    String awsSecretAccessKey = "";

    if (s3) {
        if ((!cmd.hasOption("aws_id")) || (!cmd.hasOption("aws_key")) || (!cmd.hasOption("b"))) {
            System.out.println(
                    "Arguments 'aws_id', 'aws_key', and 'b'" + " are mandatory if execution is on AWS.");
            formatter.printHelp(
                    "hadoop jar data-polygamy.jar "
                            + "edu.nyu.vida.data_polygamy.standard_techniques.CorrelationTechniques",
                    options, true);
            System.exit(0);
        }
        s3bucket = cmd.getOptionValue("b");
        awsAccessKeyId = cmd.getOptionValue("aws_id");
        awsSecretAccessKey = cmd.getOptionValue("aws_key");
    }

    boolean snappyCompression = false;
    boolean bzip2Compression = false;
    String machine = cmd.getOptionValue("m");
    int nbNodes = Integer.parseInt(cmd.getOptionValue("n"));

    Configuration s3conf = new Configuration();
    if (s3) {
        s3conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        s3conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        s3conf.set("bucket", s3bucket);
    }

    Path path = null;
    FileSystem fs = FileSystem.get(new Configuration());

    ArrayList<String> shortDataset = new ArrayList<String>();
    ArrayList<String> firstGroup = new ArrayList<String>();
    ArrayList<String> secondGroup = new ArrayList<String>();
    HashMap<String, String> datasetAgg = new HashMap<String, String>();

    boolean removeExistingFiles = cmd.hasOption("f");

    String[] firstGroupCmd = cmd.getOptionValues("g1");
    String[] secondGroupCmd = cmd.hasOption("g2") ? cmd.getOptionValues("g2") : new String[0];
    addDatasets(firstGroupCmd, firstGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);
    addDatasets(secondGroupCmd, secondGroup, shortDataset, datasetAgg, path, fs, s3conf, s3, s3bucket);

    if (shortDataset.size() == 0) {
        System.out.println("No datasets to process.");
        System.exit(0);
    }

    if (firstGroup.isEmpty()) {
        System.out.println("First group of datasets (G1) is empty. " + "Doing G1 = G2.");
        firstGroup.addAll(secondGroup);
    }

    if (secondGroup.isEmpty()) {
        System.out.println("Second group of datasets (G2) is empty. " + "Doing G2 = G1.");
        secondGroup.addAll(firstGroup);
    }

    // getting dataset ids

    String datasetNames = "";
    String datasetIds = "";
    HashMap<String, String> datasetId = new HashMap<String, String>();
    Iterator<String> it = shortDataset.iterator();
    while (it.hasNext()) {
        datasetId.put(it.next(), null);
    }

    if (s3) {
        path = new Path(s3bucket + FrameworkUtils.datasetsIndexDir);
        fs = FileSystem.get(path.toUri(), s3conf);
    } else {
        path = new Path(fs.getHomeDirectory() + "/" + FrameworkUtils.datasetsIndexDir);
    }
    BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path)));
    String line = br.readLine();
    while (line != null) {
        String[] dt = line.split("\t");
        if (datasetId.containsKey(dt[0])) {
            datasetId.put(dt[0], dt[1]);
            datasetNames += dt[0] + ",";
            datasetIds += dt[1] + ",";
        }
        line = br.readLine();
    }
    br.close();
    if (s3)
        fs.close();

    datasetNames = datasetNames.substring(0, datasetNames.length() - 1);
    datasetIds = datasetIds.substring(0, datasetIds.length() - 1);
    it = shortDataset.iterator();
    while (it.hasNext()) {
        String dataset = it.next();
        if (datasetId.get(dataset) == null) {
            System.out.println("No dataset id for " + dataset);
            System.exit(0);
        }
    }

    String firstGroupStr = "";
    String secondGroupStr = "";
    for (String dataset : firstGroup) {
        firstGroupStr += datasetId.get(dataset) + ",";
    }
    for (String dataset : secondGroup) {
        secondGroupStr += datasetId.get(dataset) + ",";
    }
    firstGroupStr = firstGroupStr.substring(0, firstGroupStr.length() - 1);
    secondGroupStr = secondGroupStr.substring(0, secondGroupStr.length() - 1);

    FrameworkUtils.createDir(s3bucket + FrameworkUtils.correlationTechniquesDir, s3conf, s3);

    String dataAttributesInputDirs = "";
    String noRelationship = "";

    HashSet<String> dirs = new HashSet<String>();

    String dataset1;
    String dataset2;
    String datasetId1;
    String datasetId2;
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            datasetId1 = datasetId.get(dataset1);
            datasetId2 = datasetId.get(dataset2);

            if (dataset1.equals(dataset2))
                continue;
            String correlationOutputFileName = s3bucket + FrameworkUtils.correlationTechniquesDir + "/"
                    + dataset1 + "-" + dataset2 + "/";

            if (removeExistingFiles) {
                FrameworkUtils.removeFile(correlationOutputFileName, s3conf, s3);
            }
            if (!FrameworkUtils.fileExists(correlationOutputFileName, s3conf, s3)) {
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset1);
                dirs.add(s3bucket + FrameworkUtils.aggregatesDir + "/" + dataset2);
            } else {
                noRelationship += datasetId1 + "-" + datasetId2 + ",";
            }
        }
    }

    if (dirs.isEmpty()) {
        System.out.println("All the relationships were already computed.");
        System.out.println("Use -f in the beginning of the command line to force the computation.");
        System.exit(0);
    }

    for (String dir : dirs) {
        dataAttributesInputDirs += dir + ",";
    }

    Configuration conf = new Configuration();
    Machine machineConf = new Machine(machine, nbNodes);

    String jobName = "correlation";
    String correlationOutputDir = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/";

    FrameworkUtils.removeFile(correlationOutputDir, s3conf, s3);

    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg", datasetAgg.get(shortDataset.get(i)));
    }
    for (int i = 0; i < shortDataset.size(); i++) {
        conf.set("dataset-" + datasetId.get(shortDataset.get(i)) + "-agg-size",
                Integer.toString(datasetAgg.get(shortDataset.get(i)).split(",").length));
    }
    conf.set("dataset-keys", datasetIds);
    conf.set("dataset-names", datasetNames);
    conf.set("first-group", firstGroupStr);
    conf.set("second-group", secondGroupStr);
    conf.set("main-dataset-id", datasetId.get(shortDataset.get(0)));
    if (noRelationship.length() > 0) {
        conf.set("no-relationship", noRelationship.substring(0, noRelationship.length() - 1));
    }

    conf.set("mapreduce.tasktracker.map.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.tasktracker.reduce.tasks.maximum", String.valueOf(machineConf.getMaximumTasks()));
    conf.set("mapreduce.jobtracker.maxtasks.perjob", "-1");
    conf.set("mapreduce.reduce.shuffle.parallelcopies", "20");
    conf.set("mapreduce.input.fileinputformat.split.minsize", "0");
    conf.set("mapreduce.task.io.sort.mb", "200");
    conf.set("mapreduce.task.io.sort.factor", "100");
    conf.set("mapreduce.task.timeout", "2400000");

    if (s3) {
        machineConf.setMachineConfiguration(conf);
        conf.set("fs.s3.awsAccessKeyId", awsAccessKeyId);
        conf.set("fs.s3.awsSecretAccessKey", awsSecretAccessKey);
        conf.set("bucket", s3bucket);
    }

    if (snappyCompression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    if (bzip2Compression) {
        conf.set("mapreduce.map.output.compress", "true");
        conf.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
        //conf.set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.BZip2Codec");
    }

    Job job = new Job(conf);
    job.setJobName(jobName);

    job.setMapOutputKeyClass(PairAttributeWritable.class);
    job.setMapOutputValueClass(SpatioTemporalValueWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CorrelationTechniquesMapper.class);
    job.setReducerClass(CorrelationTechniquesReducer.class);
    job.setNumReduceTasks(machineConf.getNumberReduces());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job,
            dataAttributesInputDirs.substring(0, dataAttributesInputDirs.length() - 1));
    FileOutputFormat.setOutputPath(job, new Path(correlationOutputDir));

    job.setJarByClass(CorrelationTechniques.class);

    long start = System.currentTimeMillis();
    job.submit();
    job.waitForCompletion(true);
    System.out.println(jobName + "\t" + (System.currentTimeMillis() - start));

    // moving files to right place
    for (int i = 0; i < firstGroup.size(); i++) {
        for (int j = 0; j < secondGroup.size(); j++) {

            if (Integer.parseInt(datasetId.get(firstGroup.get(i))) < Integer
                    .parseInt(datasetId.get(secondGroup.get(j)))) {
                dataset1 = firstGroup.get(i);
                dataset2 = secondGroup.get(j);
            } else {
                dataset1 = secondGroup.get(j);
                dataset2 = firstGroup.get(i);
            }

            if (dataset1.equals(dataset2))
                continue;

            String from = s3bucket + FrameworkUtils.correlationTechniquesDir + "/tmp/" + dataset1 + "-"
                    + dataset2 + "/";
            String to = s3bucket + FrameworkUtils.correlationTechniquesDir + "/" + dataset1 + "-" + dataset2
                    + "/";
            FrameworkUtils.renameFile(from, to, s3conf, s3);
        }
    }
}

From source file:edu.umn.cs.spatialHadoop.delaunay.DelaunayTriangulation.java

License:Open Source License

/**
 * Run the DT algorithm in MapReduce//from   w  ww . j ava2  s  . co  m
 * @param inPaths
 * @param outPath
 * @param params
 * @return
 * @throws IOException 
 * @throws ClassNotFoundException 
 * @throws InterruptedException 
 */
public static Job delaunayMapReduce(Path[] inPaths, Path outPath, OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = new Job(params, "Delaunay Triangulation");
    job.setJarByClass(DelaunayTriangulation.class);

    // Set map and reduce
    job.setMapperClass(DelaunayMap.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Triangulation.class);
    job.setReducerClass(DelaunayReduce.class);

    // Set input and output
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inPaths);
    job.setOutputFormatClass(DelaunayTriangulationOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outPath);

    // Set column boundaries to define the boundaries of each reducer
    SpatialSite.splitReduceSpace(job, inPaths, params);

    // Submit the job
    if (!params.getBoolean("background", false)) {
        job.waitForCompletion(params.getBoolean("verbose", false));
        if (!job.isSuccessful())
            throw new RuntimeException("Job failed!");
    } else {
        job.submit();
    }
    return job;
}

From source file:edu.umn.cs.spatialHadoop.indexing.Indexer.java

License:Open Source License

private static Job indexMapReduce(Path inPath, Path outPath, OperationsParams paramss)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = new Job(paramss, "Indexer");
    Configuration conf = job.getConfiguration();
    job.setJarByClass(Indexer.class);

    // Set input file MBR if not already set
    Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, "mbr");
    if (inputMBR == null) {
        inputMBR = FileMBR.fileMBR(inPath, new OperationsParams(conf));
        OperationsParams.setShape(conf, "mbr", inputMBR);
    }/*from  ww w  .  j  a  va 2 s. co  m*/

    // Set the correct partitioner according to index type
    String index = conf.get("sindex");
    if (index == null)
        throw new RuntimeException("Index type is not set");
    long t1 = System.currentTimeMillis();
    setLocalIndexer(conf, index);
    Partitioner partitioner = createPartitioner(inPath, outPath, conf, index);
    Partitioner.setPartitioner(conf, partitioner);

    long t2 = System.currentTimeMillis();
    System.out.println("Total time for space subdivision in millis: " + (t2 - t1));

    // Set mapper and reducer
    Shape shape = OperationsParams.getShape(conf, "shape");
    job.setMapperClass(PartitionerMap.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(shape.getClass());
    job.setReducerClass(PartitionerReduce.class);
    // Set input and output
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inPath);
    job.setOutputFormatClass(IndexOutputFormat.class);
    IndexOutputFormat.setOutputPath(job, outPath);
    // Set number of reduce tasks according to cluster status
    ClusterStatus clusterStatus = new JobClient(new JobConf()).getClusterStatus();
    job.setNumReduceTasks(Math.max(1,
            Math.min(partitioner.getPartitionCount(), (clusterStatus.getMaxReduceTasks() * 9) / 10)));

    // Use multithreading in case the job is running locally
    conf.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors());

    // Start the job
    if (conf.getBoolean("background", false)) {
        // Run in background
        job.submit();
    } else {
        job.waitForCompletion(conf.getBoolean("verbose", false));
    }
    return job;
}

From source file:edu.umn.cs.spatialHadoop.operations.ClosestPair.java

License:Open Source License

public static Job closestPairMapReduce(Path[] inPaths, Path outPath, OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = new Job(params, "Closest Pair");
    job.setJarByClass(ClosestPair.class);
    Shape shape = params.getShape("shape");

    // Set map and reduce
    job.setMapperClass(ClosestPairMap.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(shape.getClass());
    job.setReducerClass(ClosestPairReduce.class);

    // Set input and output
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inPaths);
    job.setOutputFormatClass(ClosestPairOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outPath);

    // Set column boundaries to define the boundaries of each reducer
    SpatialSite.splitReduceSpace(job, inPaths, params);

    // Submit the job
    if (!params.getBoolean("background", false)) {
        job.waitForCompletion(params.getBoolean("verbose", false));
        if (!job.isSuccessful())
            throw new RuntimeException("Job failed!");
    } else {//from  w ww . j  a  v a  2  s.c  o  m
        job.submit();
    }
    return job;
}

From source file:edu.umn.cs.spatialHadoop.operations.FarthestPair.java

License:Open Source License

public static Job farthestPairMapReduce(Path inFile, Path outPath, OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = new Job(params, "FarthestPair");
    job.setJarByClass(FarthestPair.class);
    job.setMapperClass(FarthestPairMap.class);
    job.setReducerClass(FarthestPairReducer.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(PairDistance.class);
    job.setInputFormatClass(SpatialInputFormat3.class);
    // Add input file twice to treat it as a binary function
    SpatialInputFormat3.addInputPath(job, inFile);
    job.setOutputFormatClass(TextOutputFormat3.class);
    TextOutputFormat3.setOutputPath(job, outPath);

    // Calculate an initial lower bound on the farthest pair from the global index
    FileSystem fs = inFile.getFileSystem(params);
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, inFile);
    if (gindex != null) {
        double tightLowerBound = 0;
        for (Partition p1 : gindex) {
            for (Partition p2 : gindex) {
                double lb = computeLB(p1, p2);
                if (lb > tightLowerBound)
                    tightLowerBound = lb;
            }/*from  ww w  .j  av a 2  s. com*/
        }
        job.getConfiguration().setFloat(FarthestPairLowerBound, (float) tightLowerBound);
    }

    // Start the job
    if (params.getBoolean("background", false)) {
        // Run in background
        job.submit();
    } else {
        job.waitForCompletion(params.getBoolean("verbose", false));
    }
    return job;
}

From source file:edu.umn.cs.spatialHadoop.operations.RangeQuery.java

License:Open Source License

public static Job rangeQueryMapReduce(Path inFile, Path outFile, OperationsParams params)
        throws IOException, ClassNotFoundException, InterruptedException {
    // Use the built-in range filter of the input format
    params.set(SpatialInputFormat3.InputQueryRange, params.get("rect"));
    // Use multithreading in case it is running locally
    params.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors());

    Job job = new Job(params, "RangeQuery");
    job.setJarByClass(RangeQuery.class);
    job.setNumReduceTasks(0);/* w w w . j a  va2s.  co m*/

    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inFile);

    job.setMapperClass(RangeQueryMap.class);

    if (params.getBoolean("output", true) && outFile != null) {
        job.setOutputFormatClass(TextOutputFormat3.class);
        TextOutputFormat3.setOutputPath(job, outFile);
    } else {
        // Skip writing the output for the sake of debugging
        job.setOutputFormatClass(NullOutputFormat.class);
    }
    // Submit the job
    if (!params.getBoolean("background", false)) {
        job.waitForCompletion(false);
    } else {
        job.submit();
    }
    return job;
}

From source file:edu.umn.cs.spatialHadoop.operations.Sampler2.java

License:Open Source License

public static Job sampleMapReduce(Path[] files, Path output, OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = new Job(params, "Sampler2");
    job.setJarByClass(Sampler2.class);

    // Set input and output
    job.setInputFormatClass(SampleInputFormat.class);
    SampleInputFormat.setInputPaths(job, files);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, output);

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setNumReduceTasks(0); // No reducer needed

    // Start the job
    if (params.getBoolean("background", false)) {
        // Run in background
        job.submit();
    } else {/*from  w  w w. jav  a 2 s  .c o  m*/
        job.waitForCompletion(params.getBoolean("verbose", false));
    }
    return job;
}

From source file:edu.umn.cs.spatialHadoop.operations.UltimateUnion.java

License:Open Source License

private static Job ultimateUnionMapReduce(Path input, Path output, OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = new Job(params, "UltimateUnion");
    job.setJarByClass(UltimateUnion.class);

    // Set map and reduce
    job.setMapperClass(UltimateUnionMap.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(OGCJTSShape.class);
    job.setNumReduceTasks(0);// www  .  j a v  a2  s .  c o m

    // Set input and output
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.addInputPath(job, input);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, output);

    // Submit the job
    if (!params.getBoolean("background", false)) {
        job.waitForCompletion(false);
        if (!job.isSuccessful())
            throw new RuntimeException("Job failed!");
    } else {
        job.submit();
    }
    return job;
}

From source file:edu.umn.cs.spatialHadoop.operations.Union.java

License:Open Source License

private static Job unionMapReduce(Path input, Path output, OperationsParams params)
        throws IOException, InterruptedException, ClassNotFoundException {
    Job job = new Job(params, "BasicUnion");
    job.setJarByClass(Union.class);

    // Set map and reduce
    job.setMapperClass(UnionMap.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(OGCJTSShape.class);
    job.setReducerClass(UnionReduce.class);
    SpatialSite.splitReduceSpace(job, new Path[] { input }, params);

    // Set input and output
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.addInputPath(job, input);

    job.setOutputFormatClass(UnionOutputFormat.class);
    TextOutputFormat.setOutputPath(job, output);

    // Submit the job
    if (!params.getBoolean("background", false)) {
        job.waitForCompletion(false);/*w w  w . j av a2s .  c  o  m*/
        if (!job.isSuccessful())
            throw new RuntimeException("Job failed!");
    } else {
        job.submit();
    }
    return job;
}

From source file:edu.umn.cs.spatialHadoop.visualization.MultilevelPlot.java

License:Open Source License

private static Job plotMapReduce(Path[] inFiles, Path outFile, Class<? extends Plotter> plotterClass,
        OperationsParams params) throws IOException, InterruptedException, ClassNotFoundException {
    Plotter plotter;/* w  w  w  .ja  v  a2  s  .  c  om*/
    try {
        plotter = plotterClass.newInstance();
    } catch (InstantiationException e) {
        throw new RuntimeException("Error creating rastierizer", e);
    } catch (IllegalAccessException e) {
        throw new RuntimeException("Error creating rastierizer", e);
    }

    Job job = new Job(params, "MultilevelPlot");
    job.setJarByClass(SingleLevelPlot.class);
    // Set plotter
    Configuration conf = job.getConfiguration();
    Plotter.setPlotter(conf, plotterClass);
    // Set input file MBR
    Rectangle inputMBR = (Rectangle) params.getShape("mbr");
    if (inputMBR == null)
        inputMBR = FileMBR.fileMBR(inFiles, params);

    // Adjust width and height if aspect ratio is to be kept
    if (params.getBoolean("keepratio", true)) {
        // Expand input file to a rectangle for compatibility with the pyramid
        // structure
        if (inputMBR.getWidth() > inputMBR.getHeight()) {
            inputMBR.y1 -= (inputMBR.getWidth() - inputMBR.getHeight()) / 2;
            inputMBR.y2 = inputMBR.y1 + inputMBR.getWidth();
        } else {
            inputMBR.x1 -= (inputMBR.getHeight() - inputMBR.getWidth()) / 2;
            inputMBR.x2 = inputMBR.x1 + inputMBR.getHeight();
        }
    }
    OperationsParams.setShape(conf, InputMBR, inputMBR);

    // Set input and output
    job.setInputFormatClass(SpatialInputFormat3.class);
    SpatialInputFormat3.setInputPaths(job, inFiles);
    if (conf.getBoolean("output", true)) {
        job.setOutputFormatClass(PyramidOutputFormat2.class);
        PyramidOutputFormat2.setOutputPath(job, outFile);
    } else {
        job.setOutputFormatClass(NullOutputFormat.class);
    }

    // Set mapper, reducer and committer
    String partitionTechnique = params.get("partition", "flat");
    if (partitionTechnique.equalsIgnoreCase("flat")) {
        // Use flat partitioning
        job.setMapperClass(FlatPartitionMap.class);
        job.setMapOutputKeyClass(TileIndex.class);
        job.setMapOutputValueClass(plotter.getCanvasClass());
        job.setReducerClass(FlatPartitionReduce.class);
    } else if (partitionTechnique.equalsIgnoreCase("pyramid")) {
        // Use pyramid partitioning
        Shape shape = params.getShape("shape");
        job.setMapperClass(PyramidPartitionMap.class);
        job.setMapOutputKeyClass(TileIndex.class);
        job.setMapOutputValueClass(shape.getClass());
        job.setReducerClass(PyramidPartitionReduce.class);
    } else {
        throw new RuntimeException("Unknown partitioning technique '" + partitionTechnique + "'");
    }
    // Set number of reducers
    job.setNumReduceTasks(
            Math.max(1, new JobClient(new JobConf()).getClusterStatus().getMaxReduceTasks() * 7 / 8));
    // Use multithreading in case the job is running locally
    conf.setInt(LocalJobRunner.LOCAL_MAX_MAPS, Runtime.getRuntime().availableProcessors());

    // Start the job
    if (params.getBoolean("background", false)) {
        job.submit();
    } else {
        job.waitForCompletion(false);
    }
    return job;
}