Example usage for org.apache.hadoop.mapred JobClient submitJob

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobClient submitJob.

Prototype

public RunningJob submitJob(final JobConf conf) throws FileNotFoundException, IOException

Source Link

Document

Submit a job to the MR system.

Usage

From source file:DataJoinJob.java

License:Apache License

/**
 * Submit/run a map/reduce job./*from w  w  w  .  j a  v a 2  s  .  c  o  m*/
 * 
 * @param job
 * @return true for success
 * @throws IOException
 */
public static boolean runJob(JobConf job) throws IOException {
    JobClient jc = new JobClient(job);
    boolean sucess = true;
    RunningJob running = null;
    try {
        running = jc.submitJob(job);
        JobID jobId = running.getID();
        System.out.println("Job " + jobId + " is submitted");
        while (!running.isComplete()) {
            System.out.println("Job " + jobId + " is still running.");
            try {
                Thread.sleep(60000);
            } catch (InterruptedException e) {
            }
            running = jc.getJob(jobId);
        }
        sucess = running.isSuccessful();
    } finally {
        if (!sucess && (running != null)) {
            running.killJob();
        }
        jc.close();
    }
    return sucess;
}

From source file:Text2FormatStorageMR.java

License:Open Source License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.out.println("FormatFileMR <input> <output>");
        System.exit(-1);//from www  . ja v a  2 s .c  o m
    }

    JobConf conf = new JobConf(FormatStorageMR.class);

    conf.setJobName("Text2FormatMR");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(4);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Unit.Record.class);

    conf.setMapperClass(TextFileTestMapper.class);
    conf.setReducerClass(FormatFileTestReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(FormatStorageOutputFormat.class);
    conf.set("mapred.output.compress", "flase");

    Head head = new Head();
    initHead(head);

    head.toJobConf(conf);

    FileInputFormat.setInputPaths(conf, args[0]);
    Path outputPath = new Path(args[1]);
    FileOutputFormat.setOutputPath(conf, outputPath);

    FileSystem fs = outputPath.getFileSystem(conf);
    fs.delete(outputPath, true);

    JobClient jc = new JobClient(conf);
    RunningJob rj = null;
    rj = jc.submitJob(conf);

    String lastReport = "";
    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
    long reportTime = System.currentTimeMillis();
    long maxReportInterval = 3 * 1000;
    while (!rj.isComplete()) {
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
        }

        int mapProgress = Math.round(rj.mapProgress() * 100);
        int reduceProgress = Math.round(rj.reduceProgress() * 100);

        String report = " map = " + mapProgress + "%,  reduce = " + reduceProgress + "%";

        if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) {

            String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
            System.out.println(output);
            lastReport = report;
            reportTime = System.currentTimeMillis();
        }
    }

    System.exit(0);

}

From source file:Text2ColumntStorageMR.java

License:Open Source License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {

    if (args.length != 3) {
        System.out.println("Text2ColumnStorageMR <input> <output> <columnStorageMode>");
        System.exit(-1);/*from  www.  j av  a 2 s .c  o  m*/
    }

    JobConf conf = new JobConf(Text2ColumntStorageMR.class);

    conf.setJobName("Text2ColumnStorageMR");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(4);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Unit.Record.class);

    conf.setMapperClass(TextFileMapper.class);
    conf.setReducerClass(ColumnStorageReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat((Class<? extends OutputFormat>) ColumnStorageHiveOutputFormat.class);
    conf.set("mapred.output.compress", "flase");

    Head head = new Head();
    initHead(head);

    head.toJobConf(conf);

    int bt = Integer.valueOf(args[2]);

    FileInputFormat.setInputPaths(conf, args[0]);
    Path outputPath = new Path(args[1]);
    FileOutputFormat.setOutputPath(conf, outputPath);

    FileSystem fs = outputPath.getFileSystem(conf);
    fs.delete(outputPath, true);

    JobClient jc = new JobClient(conf);
    RunningJob rj = null;
    rj = jc.submitJob(conf);

    String lastReport = "";
    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
    long reportTime = System.currentTimeMillis();
    long maxReportInterval = 3 * 1000;
    while (!rj.isComplete()) {
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
        }

        int mapProgress = Math.round(rj.mapProgress() * 100);
        int reduceProgress = Math.round(rj.reduceProgress() * 100);

        String report = " map = " + mapProgress + "%,  reduce = " + reduceProgress + "%";

        if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) {

            String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
            System.out.println(output);
            lastReport = report;
            reportTime = System.currentTimeMillis();
        }
    }

    System.exit(0);

}

From source file:FormatStorage2ColumnStorageMR.java

License:Open Source License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.out.println("FormatStorage2ColumnStorageMR <input> <output>");
        System.exit(-1);/*from  w  w  w  .ja v  a  2s  .  c  o  m*/
    }

    JobConf conf = new JobConf(FormatStorageMR.class);

    conf.setJobName("FormatStorage2ColumnStorageMR");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(4);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Unit.Record.class);

    conf.setMapperClass(FormatStorageMapper.class);
    conf.setReducerClass(ColumnStorageReducer.class);

    conf.setInputFormat(FormatStorageInputFormat.class);
    conf.set("mapred.output.compress", "flase");

    Head head = new Head();
    initHead(head);

    head.toJobConf(conf);

    FileInputFormat.setInputPaths(conf, args[0]);
    Path outputPath = new Path(args[1]);
    FileOutputFormat.setOutputPath(conf, outputPath);

    FileSystem fs = outputPath.getFileSystem(conf);
    fs.delete(outputPath, true);

    JobClient jc = new JobClient(conf);
    RunningJob rj = null;
    rj = jc.submitJob(conf);

    String lastReport = "";
    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
    long reportTime = System.currentTimeMillis();
    long maxReportInterval = 3 * 1000;
    while (!rj.isComplete()) {
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
        }

        int mapProgress = Math.round(rj.mapProgress() * 100);
        int reduceProgress = Math.round(rj.reduceProgress() * 100);

        String report = " map = " + mapProgress + "%,  reduce = " + reduceProgress + "%";

        if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) {

            String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
            System.out.println(output);
            lastReport = report;
            reportTime = System.currentTimeMillis();
        }
    }

    System.exit(0);

}

From source file:com.cloudera.circus.test.TestXTest.java

License:Open Source License

@Test
@TestHadoop//  w  w  w .j  a v  a 2 s .  co  m
public void testHadoopMapReduce() throws Exception {
    JobConf conf = getHadoopConf();
    FileSystem fs = FileSystem.get(conf);
    JobClient jobClient = new JobClient(conf);
    try {
        Path inputDir = new Path(getHadoopTestDir(), "input");
        Path outputDir = new Path(getHadoopTestDir(), "output");

        fs.mkdirs(inputDir);
        Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
        writer.write("a\n");
        writer.write("b\n");
        writer.write("c\n");
        writer.close();

        JobConf jobConf = getHadoopConf();
        jobConf.setInt("mapred.map.tasks", 1);
        jobConf.setInt("mapred.map.max.attempts", 1);
        jobConf.setInt("mapred.reduce.max.attempts", 1);
        jobConf.set("mapred.input.dir", inputDir.toString());
        jobConf.set("mapred.output.dir", outputDir.toString());
        final RunningJob runningJob = jobClient.submitJob(jobConf);
        waitFor(60 * 1000, true, new Predicate() {
            @Override
            public boolean evaluate() throws Exception {
                return runningJob.isComplete();
            }
        });
        Assert.assertTrue(runningJob.isSuccessful());
        Assert.assertTrue(fs.exists(new Path(outputDir, "part-00000")));
        BufferedReader reader = new BufferedReader(
                new InputStreamReader(fs.open(new Path(outputDir, "part-00000"))));
        Assert.assertTrue(reader.readLine().trim().endsWith("a"));
        Assert.assertTrue(reader.readLine().trim().endsWith("b"));
        Assert.assertTrue(reader.readLine().trim().endsWith("c"));
        Assert.assertNull(reader.readLine());
        reader.close();
    } finally {
        fs.close();
        jobClient.close();
    }
}

From source file:com.google.mr4c.hadoop.HadoopAlgoRunner.java

License:Open Source License

private void submitJob() throws IOException {
    // most of this method copies JobClient.runJob()
    // addition here is logging the job URI
    JobClient client = new JobClient(m_jobConf);
    RunningJob job = client.submitJob(m_jobConf);
    m_log.info("Job URL is [{}]", job.getTrackingURL());
    try {//w w w .  j  av  a 2 s. c om
        if (!client.monitorAndPrintJob(m_jobConf, job)) {
            throw new IOException("Job failed!");
        }
    } catch (InterruptedException ie) {
        Thread.currentThread().interrupt();
    }
}

From source file:com.ibm.jaql.lang.expr.hadoop.Util.java

License:Apache License

public static void submitJob(JsonString submitClassName, JobConf conf) throws Exception {
    JobClient jc = new JobClient(conf);
    RunningJob rj = jc.submitJob(conf);
    String sc = JsonUtil.printToString(submitClassName);

    // log to status that a MR job is starting
    mrStatusStart(sc);/*from ww w.j a  v a2s . co  m*/

    // log to status vital MR job information
    mrStatusInfo(sc, JsonUtil.printToString(new JsonString(rj.getID().toString())),
            JsonUtil.printToString(new JsonString(rj.getJobName())),
            JsonUtil.printToString(new JsonString(rj.getTrackingURL())));
    //STATUS_LOG.info("MAP-REDUCE INFO: " + rj.getID() + "," + rj.getJobName() + "," + rj.getTrackingURL());

    boolean failed = false;
    try {
        if (!jc.monitorAndPrintJob(conf, rj)) {
            LOG.error(new IOException("Job failed!"));
            failed = true;
            //throw new IOException("Job failed!");
        }
    } catch (InterruptedException ie) {
        Thread.currentThread().interrupt();
    }

    try {
        if (rj.isSuccessful()) {
            logAllTaskSyslogs(rj, true);
        } else {
            logAllTaskSyslogs(rj, false);
        }
    } catch (Throwable t) {
        // log it, but do not stop the world for this
        LOG.error(t);
    }

    // log to status that a MR job is stopping
    mrStatusStop(sc);

    // if the job failed, then throw an exception
    if (failed) {
        throw new IOException("Job failed!");
    }
}

From source file:com.liferay.hadoop.action.HadoopJob.java

License:Open Source License

public String doExecute(HttpServletRequest request, HttpServletResponse response) throws Exception {

    response.setContentType(ContentTypes.TEXT_PLAIN_UTF8);

    PrintWriter writer = response.getWriter();

    FileSystem fileSystem = HadoopManager.getFileSystem();

    JobClient jobClient = HadoopManager.getJobClient();

    writer.println("-- Job Status --");

    Path inputPath = new Path("/index/*/*");
    Path outputPath = new Path("/wordcount/results");

    try {//www  . j a  va 2s  .  co  m
        if (_runningJob == null) {
            writer.println("Creating job");

            if (fileSystem.exists(_jobPath)) {
                fileSystem.delete(_jobPath, false);
            }

            if (!fileSystem.exists(_jobPath)) {
                writer.println("Deploying the job code to cluster");

                FSDataOutputStream outputStream = null;

                try {
                    outputStream = fileSystem.create(_jobPath);

                    ServletContext servletContext = HadoopManager.getServletContext();

                    InputStream inputStream = servletContext.getResourceAsStream("/WEB-INF/lib/hadoop-job.jar");

                    StreamUtil.transfer(inputStream, outputStream, false);
                } finally {
                    StreamUtil.cleanUp(outputStream);
                }

                writer.println("Job code deployed to cluster");
            }

            if (fileSystem.exists(outputPath)) {
                writer.println("A previous job output was found, backing it up");

                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            _jobConf = HadoopManager.createNewJobConf();

            _jobConf.setJobName("Word Count");

            writer.println("Job '" + _jobConf.getJobName() + "' is being configured");

            _jobConf.setJarByClass(Map.class);
            _jobConf.setOutputKeyClass(Text.class);
            _jobConf.setOutputValueClass(IntWritable.class);
            _jobConf.setMapperClass(Map.class);
            _jobConf.setCombinerClass(Reduce.class);
            _jobConf.setReducerClass(Reduce.class);
            _jobConf.setInputFormat(TextInputFormat.class);
            _jobConf.setOutputFormat(TextOutputFormat.class);

            writer.println("Job code deployed to distributed cache's classpath");

            DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem);

            FileInputFormat.setInputPaths(_jobConf, inputPath);
            FileOutputFormat.setOutputPath(_jobConf, outputPath);

            writer.println("Submitting job the first time");

            _runningJob = jobClient.submitJob(_jobConf);

            writer.println("Job submitted");
        }

        int jobState = _runningJob.getJobState();

        writer.println(
                "Job status: " + jobState + " (RUNNING = 1, SUCCEEDED = 2, FAILED = 3, PREP = 4, KILLED = 5)");

        if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) {

            writer.println("Re-issuing the job");

            if (fileSystem.exists(outputPath)) {
                writer.println("A previous job output was found, backing it up");

                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            writer.println("Submitting job the first time");

            _runningJob = jobClient.submitJob(_jobConf);

            writer.println("Job submitted");
        }
    } catch (Exception ioe) {
        writer.println("Job error: ");

        ioe.printStackTrace(writer);
    }

    writer.flush();
    writer.close();

    return null;
}

From source file:com.liferay.hadoop.util.HadoopManager.java

License:Open Source License

public static void runJob(StoreEvent storeEvent) throws IOException {
    FileSystem fileSystem = getFileSystem();

    if (_servletContext == null) {
        return;//from   ww w  .java  2  s  .c om
    }

    JobClient jobClient = getJobClient();

    Path inputPath = new Path("/index".concat(storeEvent.getRootPath().toString()).concat("/*"));
    Path outputPath = new Path("/wordcount".concat(storeEvent.getRootPath().toString()).concat("/results"));

    try {
        if (_runningJob == null) {
            if (!fileSystem.exists(_jobPath)) {
                FSDataOutputStream outputStream = null;

                try {
                    outputStream = fileSystem.create(_jobPath);

                    InputStream inputStream = _servletContext
                            .getResourceAsStream("/WEB-INF/lib/hadoop-job.jar");

                    StreamUtil.transfer(inputStream, outputStream, false);
                } finally {
                    StreamUtil.cleanUp(outputStream);
                }
            }

            if (fileSystem.exists(outputPath)) {
                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            _jobConf = new JobConf(_sharedJobConf);

            _jobConf.setJobName("Word Count");
            _jobConf.setJarByClass(Map.class);
            _jobConf.setOutputKeyClass(Text.class);
            _jobConf.setOutputValueClass(IntWritable.class);
            _jobConf.setMapperClass(Map.class);
            _jobConf.setCombinerClass(Reduce.class);
            _jobConf.setReducerClass(Reduce.class);
            _jobConf.setInputFormat(TextInputFormat.class);
            _jobConf.setOutputFormat(TextOutputFormat.class);

            DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem);

            FileInputFormat.setInputPaths(_jobConf, inputPath);
            FileOutputFormat.setOutputPath(_jobConf, outputPath);

            _runningJob = jobClient.submitJob(_jobConf);
        }

        int jobState = _runningJob.getJobState();

        if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) {

            System.out.println("Re-issuing the word count job.");

            if (fileSystem.exists(outputPath)) {
                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            _runningJob = jobClient.submitJob(_jobConf);
        }
    } catch (Exception ioe) {
        ioe.printStackTrace();
    }
}

From source file:com.mycompany.app.TestStagingDirectoryPermissions.java

License:Apache License

@Test
public void perms() throws IOException, InterruptedException {
    MiniDFSCluster minidfs = null;/*from   w  ww  .  j  a v  a 2 s .  c o  m*/
    FileSystem fs = null;
    MiniMRClientCluster minimr = null;
    try {
        Configuration conf = new Configuration(true);
        conf.set("fs.permission.umask-mode", "0077");
        minidfs = new MiniDFSCluster.Builder(conf).build();
        minidfs.waitActive();

        fs = minidfs.getFileSystem();
        conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString());
        Path p = path("/in");
        fs.mkdirs(p);

        FSDataOutputStream os = fs.create(new Path(p, "input.txt"));
        os.write("hello!".getBytes("UTF-8"));
        os.close();

        String user = UserGroupInformation.getCurrentUser().getUserName();
        Path home = new Path("/User/" + user);
        fs.mkdirs(home);
        minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf);
        JobConf job = new JobConf(minimr.getConfig());

        job.setJobName("PermsTest");
        JobClient client = new JobClient(job);
        FileInputFormat.addInputPath(job, p);
        FileOutputFormat.setOutputPath(job, path("/out"));
        job.setInputFormat(TextInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapperClass(MySleepMapper.class);

        job.setNumReduceTasks(1);
        RunningJob submittedJob = client.submitJob(job);

        // Sleep for a bit to let localization finish
        System.out.println("Sleeping...");
        Thread.sleep(3 * 1000l);
        System.out.println("Done sleeping...");
        assertFalse(UserGroupInformation.isSecurityEnabled());

        Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/");
        assertTrue(fs.exists(stagingRoot));
        assertEquals(1, fs.listStatus(stagingRoot).length);
        Path staging = fs.listStatus(stagingRoot)[0].getPath();
        Path jobXml = path(staging + "/job.xml");

        assertTrue(fs.exists(jobXml));

        FileStatus fileStatus = fs.getFileStatus(jobXml);
        System.out.println("job.xml permission = " + fileStatus.getPermission());
        assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ));
        assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ));

        submittedJob.waitForCompletion();
    } finally {
        if (minimr != null) {
            minimr.stop();
        }
        if (fs != null) {
            fs.close();
        }
        if (minidfs != null) {
            minidfs.shutdown(true);
        }
    }
}