List of usage examples for org.apache.hadoop.mapred JobClient submitJob
public RunningJob submitJob(final JobConf conf) throws FileNotFoundException, IOException
From source file:DataJoinJob.java
License:Apache License
/** * Submit/run a map/reduce job./*from w w w . j a v a 2 s . c o m*/ * * @param job * @return true for success * @throws IOException */ public static boolean runJob(JobConf job) throws IOException { JobClient jc = new JobClient(job); boolean sucess = true; RunningJob running = null; try { running = jc.submitJob(job); JobID jobId = running.getID(); System.out.println("Job " + jobId + " is submitted"); while (!running.isComplete()) { System.out.println("Job " + jobId + " is still running."); try { Thread.sleep(60000); } catch (InterruptedException e) { } running = jc.getJob(jobId); } sucess = running.isSuccessful(); } finally { if (!sucess && (running != null)) { running.killJob(); } jc.close(); } return sucess; }
From source file:Text2FormatStorageMR.java
License:Open Source License
@SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { if (args.length != 2) { System.out.println("FormatFileMR <input> <output>"); System.exit(-1);//from www . ja v a 2 s .c o m } JobConf conf = new JobConf(FormatStorageMR.class); conf.setJobName("Text2FormatMR"); conf.setNumMapTasks(1); conf.setNumReduceTasks(4); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setMapperClass(TextFileTestMapper.class); conf.setReducerClass(FormatFileTestReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(FormatStorageOutputFormat.class); conf.set("mapred.output.compress", "flase"); Head head = new Head(); initHead(head); head.toJobConf(conf); FileInputFormat.setInputPaths(conf, args[0]); Path outputPath = new Path(args[1]); FileOutputFormat.setOutputPath(conf, outputPath); FileSystem fs = outputPath.getFileSystem(conf); fs.delete(outputPath, true); JobClient jc = new JobClient(conf); RunningJob rj = null; rj = jc.submitJob(conf); String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rj.isComplete()) { try { Thread.sleep(1000); } catch (InterruptedException e) { } int mapProgress = Math.round(rj.mapProgress() * 100); int reduceProgress = Math.round(rj.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.out.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } System.exit(0); }
From source file:Text2ColumntStorageMR.java
License:Open Source License
@SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { if (args.length != 3) { System.out.println("Text2ColumnStorageMR <input> <output> <columnStorageMode>"); System.exit(-1);/*from www. j av a 2 s .c o m*/ } JobConf conf = new JobConf(Text2ColumntStorageMR.class); conf.setJobName("Text2ColumnStorageMR"); conf.setNumMapTasks(1); conf.setNumReduceTasks(4); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setMapperClass(TextFileMapper.class); conf.setReducerClass(ColumnStorageReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat((Class<? extends OutputFormat>) ColumnStorageHiveOutputFormat.class); conf.set("mapred.output.compress", "flase"); Head head = new Head(); initHead(head); head.toJobConf(conf); int bt = Integer.valueOf(args[2]); FileInputFormat.setInputPaths(conf, args[0]); Path outputPath = new Path(args[1]); FileOutputFormat.setOutputPath(conf, outputPath); FileSystem fs = outputPath.getFileSystem(conf); fs.delete(outputPath, true); JobClient jc = new JobClient(conf); RunningJob rj = null; rj = jc.submitJob(conf); String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rj.isComplete()) { try { Thread.sleep(1000); } catch (InterruptedException e) { } int mapProgress = Math.round(rj.mapProgress() * 100); int reduceProgress = Math.round(rj.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.out.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } System.exit(0); }
From source file:FormatStorage2ColumnStorageMR.java
License:Open Source License
@SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { if (args.length != 2) { System.out.println("FormatStorage2ColumnStorageMR <input> <output>"); System.exit(-1);/*from w w w .ja v a 2s . c o m*/ } JobConf conf = new JobConf(FormatStorageMR.class); conf.setJobName("FormatStorage2ColumnStorageMR"); conf.setNumMapTasks(1); conf.setNumReduceTasks(4); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setMapperClass(FormatStorageMapper.class); conf.setReducerClass(ColumnStorageReducer.class); conf.setInputFormat(FormatStorageInputFormat.class); conf.set("mapred.output.compress", "flase"); Head head = new Head(); initHead(head); head.toJobConf(conf); FileInputFormat.setInputPaths(conf, args[0]); Path outputPath = new Path(args[1]); FileOutputFormat.setOutputPath(conf, outputPath); FileSystem fs = outputPath.getFileSystem(conf); fs.delete(outputPath, true); JobClient jc = new JobClient(conf); RunningJob rj = null; rj = jc.submitJob(conf); String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rj.isComplete()) { try { Thread.sleep(1000); } catch (InterruptedException e) { } int mapProgress = Math.round(rj.mapProgress() * 100); int reduceProgress = Math.round(rj.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.out.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } System.exit(0); }
From source file:com.cloudera.circus.test.TestXTest.java
License:Open Source License
@Test @TestHadoop// w w w .j a v a 2 s . co m public void testHadoopMapReduce() throws Exception { JobConf conf = getHadoopConf(); FileSystem fs = FileSystem.get(conf); JobClient jobClient = new JobClient(conf); try { Path inputDir = new Path(getHadoopTestDir(), "input"); Path outputDir = new Path(getHadoopTestDir(), "output"); fs.mkdirs(inputDir); Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt"))); writer.write("a\n"); writer.write("b\n"); writer.write("c\n"); writer.close(); JobConf jobConf = getHadoopConf(); jobConf.setInt("mapred.map.tasks", 1); jobConf.setInt("mapred.map.max.attempts", 1); jobConf.setInt("mapred.reduce.max.attempts", 1); jobConf.set("mapred.input.dir", inputDir.toString()); jobConf.set("mapred.output.dir", outputDir.toString()); final RunningJob runningJob = jobClient.submitJob(jobConf); waitFor(60 * 1000, true, new Predicate() { @Override public boolean evaluate() throws Exception { return runningJob.isComplete(); } }); Assert.assertTrue(runningJob.isSuccessful()); Assert.assertTrue(fs.exists(new Path(outputDir, "part-00000"))); BufferedReader reader = new BufferedReader( new InputStreamReader(fs.open(new Path(outputDir, "part-00000")))); Assert.assertTrue(reader.readLine().trim().endsWith("a")); Assert.assertTrue(reader.readLine().trim().endsWith("b")); Assert.assertTrue(reader.readLine().trim().endsWith("c")); Assert.assertNull(reader.readLine()); reader.close(); } finally { fs.close(); jobClient.close(); } }
From source file:com.google.mr4c.hadoop.HadoopAlgoRunner.java
License:Open Source License
private void submitJob() throws IOException { // most of this method copies JobClient.runJob() // addition here is logging the job URI JobClient client = new JobClient(m_jobConf); RunningJob job = client.submitJob(m_jobConf); m_log.info("Job URL is [{}]", job.getTrackingURL()); try {//w w w . j av a 2 s. c om if (!client.monitorAndPrintJob(m_jobConf, job)) { throw new IOException("Job failed!"); } } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } }
From source file:com.ibm.jaql.lang.expr.hadoop.Util.java
License:Apache License
public static void submitJob(JsonString submitClassName, JobConf conf) throws Exception { JobClient jc = new JobClient(conf); RunningJob rj = jc.submitJob(conf); String sc = JsonUtil.printToString(submitClassName); // log to status that a MR job is starting mrStatusStart(sc);/*from ww w.j a v a2s . co m*/ // log to status vital MR job information mrStatusInfo(sc, JsonUtil.printToString(new JsonString(rj.getID().toString())), JsonUtil.printToString(new JsonString(rj.getJobName())), JsonUtil.printToString(new JsonString(rj.getTrackingURL()))); //STATUS_LOG.info("MAP-REDUCE INFO: " + rj.getID() + "," + rj.getJobName() + "," + rj.getTrackingURL()); boolean failed = false; try { if (!jc.monitorAndPrintJob(conf, rj)) { LOG.error(new IOException("Job failed!")); failed = true; //throw new IOException("Job failed!"); } } catch (InterruptedException ie) { Thread.currentThread().interrupt(); } try { if (rj.isSuccessful()) { logAllTaskSyslogs(rj, true); } else { logAllTaskSyslogs(rj, false); } } catch (Throwable t) { // log it, but do not stop the world for this LOG.error(t); } // log to status that a MR job is stopping mrStatusStop(sc); // if the job failed, then throw an exception if (failed) { throw new IOException("Job failed!"); } }
From source file:com.liferay.hadoop.action.HadoopJob.java
License:Open Source License
public String doExecute(HttpServletRequest request, HttpServletResponse response) throws Exception { response.setContentType(ContentTypes.TEXT_PLAIN_UTF8); PrintWriter writer = response.getWriter(); FileSystem fileSystem = HadoopManager.getFileSystem(); JobClient jobClient = HadoopManager.getJobClient(); writer.println("-- Job Status --"); Path inputPath = new Path("/index/*/*"); Path outputPath = new Path("/wordcount/results"); try {//www . j a va 2s . co m if (_runningJob == null) { writer.println("Creating job"); if (fileSystem.exists(_jobPath)) { fileSystem.delete(_jobPath, false); } if (!fileSystem.exists(_jobPath)) { writer.println("Deploying the job code to cluster"); FSDataOutputStream outputStream = null; try { outputStream = fileSystem.create(_jobPath); ServletContext servletContext = HadoopManager.getServletContext(); InputStream inputStream = servletContext.getResourceAsStream("/WEB-INF/lib/hadoop-job.jar"); StreamUtil.transfer(inputStream, outputStream, false); } finally { StreamUtil.cleanUp(outputStream); } writer.println("Job code deployed to cluster"); } if (fileSystem.exists(outputPath)) { writer.println("A previous job output was found, backing it up"); fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } _jobConf = HadoopManager.createNewJobConf(); _jobConf.setJobName("Word Count"); writer.println("Job '" + _jobConf.getJobName() + "' is being configured"); _jobConf.setJarByClass(Map.class); _jobConf.setOutputKeyClass(Text.class); _jobConf.setOutputValueClass(IntWritable.class); _jobConf.setMapperClass(Map.class); _jobConf.setCombinerClass(Reduce.class); _jobConf.setReducerClass(Reduce.class); _jobConf.setInputFormat(TextInputFormat.class); _jobConf.setOutputFormat(TextOutputFormat.class); writer.println("Job code deployed to distributed cache's classpath"); DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem); FileInputFormat.setInputPaths(_jobConf, inputPath); FileOutputFormat.setOutputPath(_jobConf, outputPath); writer.println("Submitting job the first time"); _runningJob = jobClient.submitJob(_jobConf); writer.println("Job submitted"); } int jobState = _runningJob.getJobState(); writer.println( "Job status: " + jobState + " (RUNNING = 1, SUCCEEDED = 2, FAILED = 3, PREP = 4, KILLED = 5)"); if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) { writer.println("Re-issuing the job"); if (fileSystem.exists(outputPath)) { writer.println("A previous job output was found, backing it up"); fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } writer.println("Submitting job the first time"); _runningJob = jobClient.submitJob(_jobConf); writer.println("Job submitted"); } } catch (Exception ioe) { writer.println("Job error: "); ioe.printStackTrace(writer); } writer.flush(); writer.close(); return null; }
From source file:com.liferay.hadoop.util.HadoopManager.java
License:Open Source License
public static void runJob(StoreEvent storeEvent) throws IOException { FileSystem fileSystem = getFileSystem(); if (_servletContext == null) { return;//from ww w .java 2 s .c om } JobClient jobClient = getJobClient(); Path inputPath = new Path("/index".concat(storeEvent.getRootPath().toString()).concat("/*")); Path outputPath = new Path("/wordcount".concat(storeEvent.getRootPath().toString()).concat("/results")); try { if (_runningJob == null) { if (!fileSystem.exists(_jobPath)) { FSDataOutputStream outputStream = null; try { outputStream = fileSystem.create(_jobPath); InputStream inputStream = _servletContext .getResourceAsStream("/WEB-INF/lib/hadoop-job.jar"); StreamUtil.transfer(inputStream, outputStream, false); } finally { StreamUtil.cleanUp(outputStream); } } if (fileSystem.exists(outputPath)) { fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } _jobConf = new JobConf(_sharedJobConf); _jobConf.setJobName("Word Count"); _jobConf.setJarByClass(Map.class); _jobConf.setOutputKeyClass(Text.class); _jobConf.setOutputValueClass(IntWritable.class); _jobConf.setMapperClass(Map.class); _jobConf.setCombinerClass(Reduce.class); _jobConf.setReducerClass(Reduce.class); _jobConf.setInputFormat(TextInputFormat.class); _jobConf.setOutputFormat(TextOutputFormat.class); DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem); FileInputFormat.setInputPaths(_jobConf, inputPath); FileOutputFormat.setOutputPath(_jobConf, outputPath); _runningJob = jobClient.submitJob(_jobConf); } int jobState = _runningJob.getJobState(); if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) { System.out.println("Re-issuing the word count job."); if (fileSystem.exists(outputPath)) { fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } _runningJob = jobClient.submitJob(_jobConf); } } catch (Exception ioe) { ioe.printStackTrace(); } }
From source file:com.mycompany.app.TestStagingDirectoryPermissions.java
License:Apache License
@Test public void perms() throws IOException, InterruptedException { MiniDFSCluster minidfs = null;/*from w ww . j a v a 2 s . c o m*/ FileSystem fs = null; MiniMRClientCluster minimr = null; try { Configuration conf = new Configuration(true); conf.set("fs.permission.umask-mode", "0077"); minidfs = new MiniDFSCluster.Builder(conf).build(); minidfs.waitActive(); fs = minidfs.getFileSystem(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString()); Path p = path("/in"); fs.mkdirs(p); FSDataOutputStream os = fs.create(new Path(p, "input.txt")); os.write("hello!".getBytes("UTF-8")); os.close(); String user = UserGroupInformation.getCurrentUser().getUserName(); Path home = new Path("/User/" + user); fs.mkdirs(home); minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf); JobConf job = new JobConf(minimr.getConfig()); job.setJobName("PermsTest"); JobClient client = new JobClient(job); FileInputFormat.addInputPath(job, p); FileOutputFormat.setOutputPath(job, path("/out")); job.setInputFormat(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MySleepMapper.class); job.setNumReduceTasks(1); RunningJob submittedJob = client.submitJob(job); // Sleep for a bit to let localization finish System.out.println("Sleeping..."); Thread.sleep(3 * 1000l); System.out.println("Done sleeping..."); assertFalse(UserGroupInformation.isSecurityEnabled()); Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/"); assertTrue(fs.exists(stagingRoot)); assertEquals(1, fs.listStatus(stagingRoot).length); Path staging = fs.listStatus(stagingRoot)[0].getPath(); Path jobXml = path(staging + "/job.xml"); assertTrue(fs.exists(jobXml)); FileStatus fileStatus = fs.getFileStatus(jobXml); System.out.println("job.xml permission = " + fileStatus.getPermission()); assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ)); assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ)); submittedJob.waitForCompletion(); } finally { if (minimr != null) { minimr.stop(); } if (fs != null) { fs.close(); } if (minidfs != null) { minidfs.shutdown(true); } } }