Example usage for org.apache.hadoop.mapred JobConf getJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getJobName.

Prototype

public String getJobName()

Source Link

Document

Get the user-specified job name.

Usage

From source file:com.ebay.erl.mobius.core.mapred.ConfigurableJob.java

License:Apache License

private static void writePartitionFile(JobConf job, Sampler sampler) {
    try {/*  ww w.jav  a2 s  . c o  m*/
        ////////////////////////////////////////////////
        // first, getting samples from the data sources
        ////////////////////////////////////////////////
        LOGGER.info("Running local sampling for job [" + job.getJobName() + "]");
        InputFormat inf = job.getInputFormat();
        Object[] samples = sampler.getSample(inf, job);
        LOGGER.info("Samples retrieved, sorting...");

        ////////////////////////////////////////////////
        // sort the samples
        ////////////////////////////////////////////////
        RawComparator comparator = job.getOutputKeyComparator();
        Arrays.sort(samples, comparator);

        if (job.getBoolean("mobius.print.sample", false)) {
            PrintWriter pw = new PrintWriter(
                    new OutputStreamWriter(new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(
                            new File(job.get("mobius.sample.file", "./samples.txt.gz")))))));
            for (Object obj : samples) {
                pw.println(obj);
            }
            pw.flush();
            pw.close();
        }

        ////////////////////////////////////////////////
        // start to write partition files
        ////////////////////////////////////////////////

        FileSystem fs = FileSystem.get(job);
        Path partitionFile = fs.makeQualified(new Path(TotalOrderPartitioner.getPartitionFile(job)));
        while (fs.exists(partitionFile)) {
            partitionFile = new Path(partitionFile.toString() + "." + System.currentTimeMillis());
        }
        fs.deleteOnExit(partitionFile);
        TotalOrderPartitioner.setPartitionFile(job, partitionFile);
        LOGGER.info("write partition file to:" + partitionFile.toString());

        int reducersNbr = job.getNumReduceTasks();
        Set<Object> wroteSamples = new HashSet<Object>();

        SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, partitionFile, Tuple.class,
                NullWritable.class);

        float avgReduceSize = samples.length / reducersNbr;

        int lastBegin = 0;
        for (int i = 0; i < samples.length;) {
            // trying to distribute the load for every reducer evenly,
            // dividing the <code>samples</code> into a set of blocks
            // separated by boundaries, objects that selected from the
            // <code>samples</code> array, and each blocks should have
            // about the same size.

            // find the last index of element that equals to samples[i], as
            // such element might appear multiple times in the samples.
            int upperBound = Util.findUpperBound(samples, samples[i], comparator);

            int lowerBound = i;//Util.findLowerBound(samples, samples[i], comparator);

            // the repeat time of samples[i], if the key itself is too big
            // select it as boundary
            int currentElemSize = upperBound - lowerBound + 1;

            if (currentElemSize > avgReduceSize * 2) // greater than two times of average reducer size
            {
                // the current element is too big, greater than
                // two times of the <code>avgReduceSize</code>, 
                // put itself as boundary
                writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get());
                wroteSamples.add(((DataJoinKey) samples[i]).getKey());
                //pw.println(samples[i]);

                // immediate put the next element to the boundary,
                // the next element starts at <code> upperBound+1
                // </code>, to prevent the current one consume even 
                // more.
                if (upperBound + 1 < samples.length) {
                    writer.append(((DataJoinKey) samples[upperBound + 1]).getKey(), NullWritable.get());
                    wroteSamples.add(((DataJoinKey) samples[upperBound + 1]).getKey());
                    //pw.println(samples[upperBound+1]);

                    // move on to the next element of <code>samples[upperBound+1]/code>
                    lastBegin = Util.findUpperBound(samples, samples[upperBound + 1], comparator) + 1;
                    i = lastBegin;
                } else {
                    break;
                }
            } else {
                // current element is small enough to be consider
                // with previous group
                int size = upperBound - lastBegin;
                if (size > avgReduceSize) {
                    // by including the current elements, we have
                    // found a block that's big enough, select it
                    // as boundary
                    writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get());
                    wroteSamples.add(((DataJoinKey) samples[i]).getKey());
                    //pw.println(samples[i]);

                    i = upperBound + 1;
                    lastBegin = i;
                } else {
                    i = upperBound + 1;
                }
            }
        }

        writer.close();

        // if the number of wrote samples doesn't equals to number of
        // reducer minus one, then it means the key spaces is too small
        // hence TotalOrderPartitioner won't work, it works only if 
        // the partition boundaries are distinct.
        //
        // we need to change the number of reducers
        if (wroteSamples.size() + 1 != reducersNbr) {
            LOGGER.info("Write complete, but key space is too small, sample size=" + wroteSamples.size()
                    + ", reducer size:" + (reducersNbr));
            LOGGER.info("Set the reducer size to:" + (wroteSamples.size() + 1));

            // add 1 because the wrote samples define boundary, ex, if
            // the sample size is two with two element [300, 1000], then 
            // there should be 3 reducers, one for handling i<300, one 
            // for n300<=i<1000, and another one for 1000<=i
            job.setNumReduceTasks((wroteSamples.size() + 1));
        }

        samples = null;
    } catch (IOException e) {
        LOGGER.error(e.getMessage(), e);
        throw new RuntimeException(e);
    }
}

From source file:com.ricemap.spateDB.core.SpatialSite.java

License:Apache License

public static void setCells(JobConf job, CellInfo[] cellsInfo) throws IOException {
    Path tempFile;/*from w  w  w.j  a v a 2 s .c o m*/
    FileSystem fs = FileSystem.get(job);
    do {
        tempFile = new Path(job.getJobName() + "_" + (int) (Math.random() * 1000000) + ".cells");
    } while (fs.exists(tempFile));
    FSDataOutputStream out = fs.create(tempFile);
    out.writeInt(cellsInfo.length);
    for (CellInfo cell : cellsInfo) {
        cell.write(out);
    }
    out.close();

    fs.deleteOnExit(tempFile);

    DistributedCache.addCacheFile(tempFile.toUri(), job);
    job.set(OUTPUT_CELLS, tempFile.getName());
    LOG.info("Partitioning file into " + cellsInfo.length + " cells");
}

From source file:com.scaleoutsoftware.soss.hserver.HServerJobClient.java

License:Apache License

public HServerJobClient(JobConf conf) throws IOException {
    //super(conf);  -- do not attempt to initialize cluster
    jobID = JobID.forName("job_" + conf.getJobName() + "_0");
    jobConf = conf;/*  w  ww .ja v  a2  s. c om*/
}

From source file:com.scaleoutsoftware.soss.hserver.HServerJobClient.java

License:Apache License

@Override
public RunningJob submitJob(final JobConf job) throws IOException {
    ensureInvocationGridPresent();//from  w  ww.  ja v  a 2 s . c o m
    ExecutorService async = Executors.newSingleThreadExecutor();
    final JobID jobID = JobID.forName("job_" + job.getJobName() + "_0");

    Future jobSubmitted = async.submit(new Callable<Object>() {
        @Override
        public Object call() throws Exception {
            try {
                JobScheduler.getInstance().runOldApiJob(job, jobID, sortEnabled, null, grid);
            } finally {
                if (unloadGrid) {
                    grid.unload();
                }
            }
            return null;
        }
    });
    async.shutdown(); //Will shut down after task is done

    return new HServerRunningJob(jobID, jobSubmitted);
}

From source file:edu.brown.cs.mapreduce.BenchmarkBase.java

License:Open Source License

public void runJob(JobConf _conf) throws Exception {
    String ret = "BenchmarkBase(" + _conf.getJobName() + ")\n" + "\tInput Path:  {";
    Path inputs[] = FileInputFormat.getInputPaths(_conf);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0)
            ret += ", ";
        ret += inputs[ctr].toString();//  w w w.j  a  v a2s  .  c  o  m
    }
    ret += "}\n";

    ret += "\tOutput Path: " + FileOutputFormat.getOutputPath(_conf) + "\n" + "\tMap Jobs:    "
            + _conf.getNumMapTasks() + "\n" + "\tReduce Jobs: " + _conf.getNumReduceTasks() + "\n"
            + "\tProperties:  " + this.options;
    System.out.println(ret);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(_conf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println(
            "The job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds.");
    this.last_job = _conf;
    return;
}

From source file:edu.brown.cs.mapreduce.benchmarks.Benchmark3.java

License:Open Source License

public int run(String[] args) throws Exception {
    BenchmarkBase base = new BenchmarkBase(this.getConf(), this.getClass(), args);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);

    // -------------------------------------------
    // Phase #1//from ww w  .jav a2  s  . c  om
    // -------------------------------------------
    JobConf p1_job = base.getJobConf();
    p1_job.setJobName(p1_job.getJobName() + ".Phase1");
    Path p1_output = new Path(base.getOutputPath().toString() + "/phase1");
    FileOutputFormat.setOutputPath(p1_job, p1_output);

    //
    // Make sure we have our properties
    //
    String required[] = { BenchmarkBase.PROPERTY_START_DATE, BenchmarkBase.PROPERTY_STOP_DATE };
    for (String req : required) {
        if (!base.getOptions().containsKey(req)) {
            System.err.println("ERROR: The property '" + req + "' is not set");
            System.exit(1);
        }
    } // FOR

    p1_job.setInputFormat(
            base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class);
    if (base.getSequenceFile())
        p1_job.setOutputFormat(SequenceFileOutputFormat.class);
    p1_job.setOutputKeyClass(Text.class);
    p1_job.setOutputValueClass(Text.class);
    p1_job.setMapperClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TupleWritableMap.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TextMap.class);
    p1_job.setReducerClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TupleWritableReduce.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TextReduce.class);
    p1_job.setCompressMapOutput(base.getCompress());

    // -------------------------------------------
    // Phase #2
    // -------------------------------------------
    JobConf p2_job = base.getJobConf();
    p2_job.setJobName(p2_job.getJobName() + ".Phase2");
    p2_job.setInputFormat(
            base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class);
    if (base.getSequenceFile())
        p2_job.setOutputFormat(SequenceFileOutputFormat.class);
    p2_job.setOutputKeyClass(Text.class);
    p2_job.setOutputValueClass(Text.class);
    p2_job.setMapperClass(IdentityMapper.class);
    p2_job.setReducerClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase2.TupleWritableReduce.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase2.TextReduce.class);
    p2_job.setCompressMapOutput(base.getCompress());
    p2_job.setNumMapTasks(60);

    // -------------------------------------------
    // Phase #3
    // -------------------------------------------
    JobConf p3_job = base.getJobConf();
    p3_job.setJobName(p3_job.getJobName() + ".Phase3");
    p3_job.setNumReduceTasks(1);
    p3_job.setInputFormat(
            base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class);
    p3_job.setOutputKeyClass(Text.class);
    p3_job.setOutputValueClass(Text.class);
    //p3_job.setMapperClass(Phase3Map.class);
    p3_job.setMapperClass(IdentityMapper.class);
    p3_job.setReducerClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase3.TupleWritableReduce.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase3.TextReduce.class);

    //
    // Execute #1
    //
    base.runJob(p1_job);

    //
    // Execute #2
    //
    Path p2_output = new Path(base.getOutputPath().toString() + "/phase2");
    FileOutputFormat.setOutputPath(p2_job, p2_output);
    FileInputFormat.setInputPaths(p2_job, p1_output);
    base.runJob(p2_job);

    //
    // Execute #3
    //
    Path p3_output = new Path(base.getOutputPath().toString() + "/phase3");
    FileOutputFormat.setOutputPath(p3_job, p3_output);
    FileInputFormat.setInputPaths(p3_job, p2_output);
    base.runJob(p3_job);

    // There does need to be a combine if (base.getCombine()) base.runCombine();

    return 0;
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.FuzzyJoinDriver.java

License:Apache License

public static void run(JobConf job) throws IOException {
    job.setJarByClass(FuzzyJoinDriver.class);
    ///* w ww  . j  a va2 s  .  co  m*/
    // print info
    //
    String ret = "FuzzyJoinDriver(" + job.getJobName() + ")\n" + "  Input Path:  {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + "  Map Jobs:    "
            + job.getNumMapTasks() + "\n" + "  Reduce Jobs: " + job.getNumReduceTasks() + "\n"
            + "  Properties:  {";
    String[][] properties = new String[][] {
            new String[] { FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY, FuzzyJoinConfig.SIMILARITY_NAME_VALUE },
            new String[] { FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
                    "" + FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE },
            new String[] { FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE },
            new String[] { TOKENS_PACKAGE_PROPERTY, TOKENS_PACKAGE_VALUE },
            new String[] { TOKENS_LENGTHSTATS_PROPERTY, "" + TOKENS_LENGTHSTATS_VALUE },
            new String[] { RIDPAIRS_GROUP_CLASS_PROPERTY, RIDPAIRS_GROUP_CLASS_VALUE },
            new String[] { RIDPAIRS_GROUP_FACTOR_PROPERTY, "" + RIDPAIRS_GROUP_FACTOR_VALUE },
            new String[] { FuzzyJoinConfig.DATA_TOKENS_PROPERTY, "" },
            new String[] { DATA_JOININDEX_PROPERTY, "" }, };
    for (int crt = 0; crt < properties.length; crt++) {
        if (crt > 0) {
            ret += "\n                ";
        }
        ret += properties[crt][0] + "=" + job.get(properties[crt][0], properties[crt][1]);
    }
    ret += "}";
    System.out.println(ret);
    //
    // run job
    //
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(job);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println(
            "The job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds.");
}

From source file:edu.uci.ics.hyracks.hadoop.compat.driver.CompatibilityLayer.java

License:Apache License

public HyracksRunningJob submitJob(JobConf conf, Set<String> userLibs) throws Exception {
    List<JobConf> jobConfs = new ArrayList<JobConf>();
    jobConfs.add(conf);/*w  w w.  j a  v  a2  s  . co  m*/
    String applicationName = conf.getJobName() + System.currentTimeMillis();
    JobSpecification spec = hadoopAdapter.getJobSpecification(jobConfs);
    HyracksRunningJob hyracksRunningJob = hyracksClient.submitJob(applicationName, spec, userLibs);
    return hyracksRunningJob;
}

From source file:edu.ucsb.cs.hybrid.HybridDriver.java

License:Apache License

public static void run(JobConf job) throws IOException {
    String ret = stars() + "\n  Running job:  " + job.getJobName() + "\n  Input Path:   {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }/*from w w w  .  ja  v  a2 s  . com*/
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path:  " + FileOutputFormat.getOutputPath(job) + "\n";
    ret += "  Threshold:    " + job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE) + "\n";
    System.err.println(ret);

    Date startTime = new Date();
    JobClient.runJob(job);
    Date end_time = new Date();
    System.err.println(
            "Similarity job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds.");
}

From source file:edu.ucsb.cs.knn.KnnDriver.java

License:Apache License

/**
 * Submit the configured job to Hadoop JobTracker to start the process.
 *//*from  w  w w . j  a  v  a2s . c  o m*/
public static void run(JobConf job) throws IOException {

    job.setJarByClass(KnnDriver.class); // This method sets the jar
    String ret = stars() + "\nKnnDriver(" + job.getJobName() + ")\n" + "  Input Path:  {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + "  Map Tasks:    "
            + job.getNumMapTasks() + "\n" + "  Reduce Tasks: " + job.getNumReduceTasks() + "\n";
    ret += "  Threshold:    " + job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE) + "\n";
    System.out.println(ret);
    //
    // run job
    //
    JobClient.runJob(job);
}