List of usage examples for org.apache.hadoop.mapreduce Job submit
public void submit() throws IOException, InterruptedException, ClassNotFoundException
From source file:TestBAM.java
License:Open Source License
public int run(String[] args) throws Exception { final Configuration conf = getConf(); conf.set(MyOutputFormat.HEADER_FROM_FILE, args[0]); DistributedCache.addFileToClassPath(new Path("hdfs:///libjars/hadoop-bam-7.0.0-jar-with-dependencies.jar"), conf);//from w w w. j a v a2 s. c om final Job job = new Job(conf); job.setJarByClass(TestBAM.class); job.setMapperClass(TestBAMMapper.class); job.setReducerClass(TestBAMReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(SAMRecordWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SAMRecordWritable.class); job.setInputFormatClass(AnySAMInputFormat.class); job.setOutputFormatClass(TestBAM.MyOutputFormat.class); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(args[0])); org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1])); job.submit(); if (!job.waitForCompletion(true)) { System.err.println("sort :: Job failed."); return 1; } return 0; }
From source file:BU.MET.CS755.SpeciesIterDriver2.java
static boolean MRGraphBuilder(String args[], int iterCnt) { Job theJob = null; conf = new JobConf(SpeciesIterDriver2.class); conf.setJobName("Species Graph Builder"); conf.setNumReduceTasks(5);/* w w w . j a v a 2 s . co m*/ conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(SpeciesGraphBuilderMapper.class); conf.setReducerClass(SpeciesGraphBuilderReducer.class); // Reading in XML. conf.setInputFormat(StreamInputFormat.class); conf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader"); // Look for the <page> record in the XML. conf.set("stream.recordreader.begin", "<page>"); conf.set("stream.recordreader.end", "</page>"); inputpath = args[0]; outputpath = args[1] + iterCnt; FileInputFormat.setInputPaths(conf, new Path(inputpath)); FileOutputFormat.setOutputPath(conf, new Path(outputpath)); try { theJob = new Job(conf, "SpeciesIter"); theJob.submit(); } catch (Exception e) { e.printStackTrace(); } try { if (theJob != null) { theJob.waitForCompletion(true); } } catch (Exception e) { e.printStackTrace(); } return true; }
From source file:cienciaCelularMR.Main.java
@Override public int run(String[] args) throws Exception { for (int i = 0; i < args.length; i++) { System.out.println("Hadoop - arg[" + i + "] es: " + args[i]); }//from w w w.ja v a 2 s. c o m //Configuracin de memoria de YARN Configuration conf = new Configuration(); conf.set("mapreduce.map.memory.mb", "1400"); conf.set("mapreduce.reduce.memory.mb", "2800"); conf.set("mapreduce.map.java.opts", "-Xmx1120m"); conf.set("mapreduce.reduce.java.opts", "-Xmx2240m"); conf.set("yarn.app.mapreduce.am.resource.mb", "2800"); conf.set("yarn.app.mapreduce.am.command-opts", "-Xmx2240m"); conf.set("yarn.nodemanager.resource.memory-mb", "5040"); conf.set("yarn.scheduler.minimum-allocation-mb", "1400"); conf.set("yarn.scheduler.maximum-allocation-mb", "5040"); conf.set("mapreduce.task.timeout", "18000000");//5 horas //Creacin del Job Job job = Job.getInstance(conf); job.setInputFormatClass(WholeFileInputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[5])); FileOutputFormat.setOutputPath(job, new Path(args[6])); //Salidas alternativas de Mapper para brindar informacin MultipleOutputs.addNamedOutput(job, "controloutput", TextOutputFormat.class, KeyMcell.class, Text.class); MultipleOutputs.addNamedOutput(job, "errormcell", TextOutputFormat.class, KeyMcell.class, Text.class); //Archivos copiados a cache de los nodos job.addCacheFile(new Path("wasb:///mcell.exe").toUri()); job.addCacheFile(new Path("wasb:///fernet.exe").toUri()); job.addCacheFile(new Path("wasb:///fernet.cfg").toUri()); job.addCacheFile(new Path("wasb:///libconfig_d.dll").toUri()); job.addCacheFile(new Path("wasb:///libtiff3.dll").toUri()); job.addCacheFile(new Path("wasb:///jpeg62.dll").toUri()); job.addCacheFile(new Path("wasb:///zlib1.dll").toUri()); job.addCacheFile(new Path("wasb:///msvcr100d.dll").toUri()); job.setJarByClass(Main.class); Configuration mapAConf = new Configuration(false); ChainMapper.addMapper(job, McellMapper.class, KeyMcell.class, BytesWritable.class, KeyMcell.class, Text.class, mapAConf); Configuration mapBConf = new Configuration(false); ChainMapper.addMapper(job, FernetMapper.class, KeyMcell.class, Text.class, KeyMcell.class, FernetOutput.class, mapBConf); job.setReducerClass(ResultReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.submit(); return 0; }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
@Override protected void startUp() throws Exception { // Creates a temporary directory locally for storing all generated files. File tempDir = createTempDirectory(); cleanupTask = createCleanupTask(tempDir); try {//from w w w . j a va2 s.c o m Job job = createJob(new File(tempDir, "mapreduce")); Configuration mapredConf = job.getConfiguration(); classLoader = new MapReduceClassLoader(injector, cConf, mapredConf, context.getProgram().getClassLoader(), context.getPlugins(), context.getPluginInstantiator()); cleanupTask = createCleanupTask(cleanupTask, classLoader); mapredConf.setClassLoader(new WeakReferenceDelegatorClassLoader(classLoader)); ClassLoaders.setContextClassLoader(mapredConf.getClassLoader()); context.setJob(job); beforeSubmit(job); // Localize additional resources that users have requested via BasicMapReduceContext.localize methods Map<String, String> localizedUserResources = localizeUserResources(job, tempDir); // Override user-defined job name, since we set it and depend on the name. // https://issues.cask.co/browse/CDAP-2441 String jobName = job.getJobName(); if (!jobName.isEmpty()) { LOG.warn("Job name {} is being overridden.", jobName); } job.setJobName(getJobName(context)); // Create a temporary location for storing all generated files through the LocationFactory. Location tempLocation = createTempLocationDirectory(); cleanupTask = createCleanupTask(cleanupTask, tempLocation); // For local mode, everything is in the configuration classloader already, hence no need to create new jar if (!MapReduceTaskContextProvider.isLocal(mapredConf)) { // After calling beforeSubmit, we know what plugins are needed for the program, hence construct the proper // ClassLoader from here and use it for setting up the job Location pluginArchive = createPluginArchive(tempLocation); if (pluginArchive != null) { job.addCacheArchive(pluginArchive.toURI()); mapredConf.set(Constants.Plugin.ARCHIVE, pluginArchive.getName()); } } // set resources for the job TaskType.MAP.setResources(mapredConf, context.getMapperResources()); TaskType.REDUCE.setResources(mapredConf, context.getReducerResources()); // replace user's Mapper & Reducer's with our wrappers in job config MapperWrapper.wrap(job); ReducerWrapper.wrap(job); // packaging job jar which includes cdap classes with dependencies File jobJar = buildJobJar(job, tempDir); job.setJar(jobJar.toURI().toString()); Location programJar = programJarLocation; if (!MapReduceTaskContextProvider.isLocal(mapredConf)) { // Copy and localize the program jar in distributed mode programJar = copyProgramJar(tempLocation); job.addCacheFile(programJar.toURI()); List<String> classpath = new ArrayList<>(); // Localize logback.xml Location logbackLocation = createLogbackJar(tempLocation); if (logbackLocation != null) { job.addCacheFile(logbackLocation.toURI()); classpath.add(logbackLocation.getName()); } // Generate and localize the launcher jar to control the classloader of MapReduce containers processes classpath.add("job.jar/lib/*"); classpath.add("job.jar/classes"); Location launcherJar = createLauncherJar( Joiner.on(",").join(MapReduceContainerHelper.getMapReduceClassPath(mapredConf, classpath)), tempLocation); job.addCacheFile(launcherJar.toURI()); // The only thing in the container classpath is the launcher.jar // The MapReduceContainerLauncher inside the launcher.jar will creates a MapReduceClassLoader and launch // the actual MapReduce AM/Task from that // We explicitly localize the mr-framwork, but not use it with the classpath URI frameworkURI = MapReduceContainerHelper.getFrameworkURI(mapredConf); if (frameworkURI != null) { job.addCacheArchive(frameworkURI); } mapredConf.unset(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH); mapredConf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, launcherJar.getName()); mapredConf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH, launcherJar.getName()); } MapReduceContextConfig contextConfig = new MapReduceContextConfig(mapredConf); // We start long-running tx to be used by mapreduce job tasks. Transaction tx = txClient.startLong(); try { // We remember tx, so that we can re-use it in mapreduce tasks CConfiguration cConfCopy = cConf; contextConfig.set(context, cConfCopy, tx, programJar.toURI(), localizedUserResources); LOG.info("Submitting MapReduce Job: {}", context); // submits job and returns immediately. Shouldn't need to set context ClassLoader. job.submit(); this.job = job; this.transaction = tx; } catch (Throwable t) { Transactions.invalidateQuietly(txClient, tx); throw t; } } catch (Throwable t) { LOG.error("Exception when submitting MapReduce Job: {}", context, t); cleanupTask.run(); throw t; } }
From source file:com.accumulobook.advanced.mapreduce.WordCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(new Configuration()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(WordCountMapper.class); job.setCombinerClass(WordCountCombiner.class); job.setReducerClass(WordCountReducer.class); // input/*w w w . j a v a 2 s . co m*/ job.setInputFormatClass(AccumuloInputFormat.class); ClientConfiguration zkiConfig = new ClientConfiguration().withInstance(args[0]).withZkHosts(args[1]); AccumuloInputFormat.setInputTableName(job, WikipediaConstants.ARTICLES_TABLE); List<Pair<Text, Text>> columns = new ArrayList<>(); columns.add(new Pair(WikipediaConstants.CONTENTS_FAMILY_TEXT, new Text(""))); AccumuloInputFormat.fetchColumns(job, columns); AccumuloInputFormat.setZooKeeperInstance(job, zkiConfig); AccumuloInputFormat.setConnectorInfo(job, args[2], new PasswordToken(args[3])); // output job.setOutputFormatClass(AccumuloOutputFormat.class); BatchWriterConfig config = new BatchWriterConfig(); AccumuloOutputFormat.setBatchWriterOptions(job, config); AccumuloOutputFormat.setZooKeeperInstance(job, zkiConfig); AccumuloOutputFormat.setConnectorInfo(job, args[2], new PasswordToken(args[3])); AccumuloOutputFormat.setDefaultTableName(job, WikipediaConstants.WORD_COUNT_TABLE); AccumuloOutputFormat.setCreateTables(job, true); job.setJarByClass(WordCount.class); job.submit(); return 0; }
From source file:com.basho.riak.hadoop.RiakWordCount.java
License:Apache License
public int run(String[] args) throws Exception { String[] keys = new String[10000]; for (int i = 0; i < 10000; i++) { keys[i] = String.valueOf(i + 1000); }/*from ww w. ja v a 2s . com*/ Configuration conf = getConf(); conf = RiakConfig.setKeyLister(conf, new BucketKeyLister("wordcount")); conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 11087)); conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 12087)); conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 13087)); conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 14087)); conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 15087)); conf = RiakConfig.setOutputBucket(conf, "wordcount_out"); conf = RiakConfig.setHadoopClusterSize(conf, 4); Job job = new Job(conf, "Riak-WordCount"); job.setJarByClass(RiakWordCount.class); job.setInputFormatClass(RiakInputFormat.class); job.setMapperClass(TokenCounterMapper.class); job.setReducerClass(TokenCounterReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(RiakOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(WordCountResult.class); job.setNumReduceTasks(4); job.submit(); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.blackberry.logdriver.util.Cat.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//from w w w .j a v a2 s . c o m FileSystem fs = FileSystem.get(conf); // The command line options List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 2) { System.out.println("usage: [genericOptions] input [input ...] output"); System.exit(1); } // Get the files we need from the command line. for (int i = 0; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Cat.class); jobConf.setIfUnset("mapred.job.name", "Cat Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(CatMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.blackberry.logdriver.util.FastSearch.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }/* w w w. j a va 2 s . c o m*/ FileSystem fs = FileSystem.get(conf); // The command line options String searchString = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchString input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchString = args[0]; for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(FastSearch.class); jobConf.setIfUnset("mapred.job.name", "Search Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string", Base64.encodeBase64String(searchString.getBytes("UTF-8"))); job.setInputFormatClass(AvroBlockInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // And set the output as usual job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { AvroBlockInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.blackberry.logdriver.util.Grep.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//from w ww . ja va 2 s . c o m FileSystem fs = FileSystem.get(conf); // The command line options String regex = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] regex input [input ...] output"); System.exit(1); } // Get the files we need from the command line. regex = args[0]; for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(Grep.class); jobConf.setIfUnset("mapred.job.name", "Grep Files"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.grep.regex", Base64.encodeBase64String(regex.getBytes("UTF-8"))); job.setInputFormatClass(BoomInputFormat.class); job.setMapperClass(GrepMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); // And set the output as usual job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { BoomInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }
From source file:com.blackberry.logdriver.util.MultiSearch.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); // Configuration processed by ToolRunner // If run by Oozie, then load the Oozie conf too if (System.getProperty("oozie.action.conf.xml") != null) { conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml"))); }//from w w w . j ava 2s . co m FileSystem fs = FileSystem.get(conf); // The command line options String searchStringDir = null; List<Path> paths = new ArrayList<Path>(); Path outputDir = null; // Load input files from the command line if (args.length < 3) { System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output"); System.exit(1); } // Get the files we need from the command line. searchStringDir = args[0]; // We are going to be reading all the files in this directory a lot. So // let's up the replication factor by a lot so that they're easy to read. for (FileStatus f : fs.listStatus(new Path(searchStringDir))) { fs.setReplication(f.getPath(), (short) 16); } for (int i = 1; i < args.length - 1; i++) { for (FileStatus f : fs.globStatus(new Path(args[i]))) { paths.add(f.getPath()); } } outputDir = new Path(args[args.length - 1]); @SuppressWarnings("deprecation") Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); job.setJarByClass(MultiSearch.class); jobConf.setIfUnset("mapred.job.name", "MultiSearch"); // To propagate credentials within Oozie if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Good output separators include things that are unsupported by XML. So we // just send the byte value of the character through. The restriction here // is that it can't be more than 1 byte when UTF-8 encoded, since it will be // read by Pig which only deals with single byte separators. { String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR); byte[] bytes = outputSeparator.getBytes(UTF_8); if (bytes.length != 1) { LOG.error("The output separator must be a single byte in UTF-8."); return 1; } jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0])); } jobConf.set("logdriver.search.string.dir", searchStringDir); // This search is generally too fast to make good use of 128MB blocks, so // let's set the value to 256MB (if it's not set already) if (jobConf.get("mapred.max.split.size") == null) { jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024); } job.setInputFormatClass(AvroBlockInputFormat.class); job.setMapperClass(SearchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); for (Path path : paths) { AvroBlockInputFormat.addInputPath(job, path); } // Run the job. if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) { return job.waitForCompletion(true) ? 0 : 1; } else { job.submit(); return 0; } }