List of usage examples for org.apache.hadoop.mapreduce Job addCacheFile
public void addCacheFile(URI uri)
From source file:mx.iteso.msc.examples.FaceCount.java
License:Apache License
public int run(String[] args) throws Exception { // Check input arguments if (args.length != 2) { System.out.println("Usage: FaceCount <input HIB> <output directory>"); System.exit(0);/*ww w . ja va2 s . c om*/ } // Initialize and configure MapReduce job Job job = Job.getInstance(); // Set input format class which parses the input HIB and spawns map tasks job.setInputFormatClass(HibInputFormat.class); // Set the driver, mapper, and reducer classes which express the computation job.setJarByClass(FaceCount.class); job.setMapperClass(FaceCountMapper.class); job.setReducerClass(FaceCountReducer.class); // Set the types for the key/value pairs passed to/from map and reduce layers job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); // Set the input and output paths on the HDFS FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); // add cascade file job.addCacheFile(new URI("/hipi/OpenCV/lbpcascade_frontalface.xml#lbpcascade_frontalface.xml")); // Execute the MapReduce job and block until it complets boolean success = job.waitForCompletion(true); // Return success or failure return success ? 0 : 1; }
From source file:nl.utwente.bigdata.PageRank.java
License:Apache License
public static void run(String[] args) throws Exception { Configuration conf = new Configuration(); //Process args GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: pageRank <in> [<in>...] <out> <retweetpath> <pagerankpath"); System.exit(2);/*from www. j a v a 2 s .c om*/ } conf.set("mapreduce.job.reduce.slowstart.completedmaps", "1"); //Setup the job Job job = Job.getInstance(conf, "Twitter Reader"); job.setJarByClass(PageRank.class); job.setMapperClass(PageRankMapper.class); job.setReducerClass(PageRankReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(MapWritable.class); //Load input files for (int i = 0; i < otherArgs.length - 3; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } //Load output file FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 3])); //Load previous results FileSystem fs = FileSystem.get(new Configuration()); FileStatus[] status = fs.listStatus(new Path("hdfs:" + otherArgs[otherArgs.length - 2])); for (FileStatus s : status) { job.addCacheFile(s.getPath().toUri()); } String str = new String(otherArgs[otherArgs.length - 1]); if (!str.equals("overslaan")) { FileStatus[] status2 = fs.listStatus(new Path("hdfs:" + otherArgs[otherArgs.length - 1])); for (FileStatus s2 : status2) { job.addCacheFile(s2.getPath().toUri()); } } boolean succesful = job.waitForCompletion(true); }
From source file:nl.utwente.bigdata.TwitterExample.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: exampleTwitter <in> [<in>...] <out>"); System.exit(2);/*from ww w .ja v a 2 s . c o m*/ } Job job = new Job(conf, "Twitter Reader"); job.addCacheFile(new Path("players.txt").toUri()); job.setJarByClass(TwitterExample.class); job.setMapperClass(ExampleMapper.class); job.setReducerClass(ExampleReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:org.apache.accumulo.hadoop.mapreduce.partition.RangePartitioner.java
License:Apache License
/** * Sets the hdfs file name to use, containing a newline separated list of Base64 encoded split * points that represent ranges for partitioning *///from ww w.j a v a 2 s . co m public static void setSplitFile(Job job, String file) { URI uri = new Path(file).toUri(); job.addCacheFile(uri); job.getConfiguration().set(CUTFILE_KEY, uri.getPath()); }
From source file:org.apache.hadoop.examples.terasort.TeraSort.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 2) { usage();/*from ww w . j a v a2 s . c om*/ return 2; } LOG.info("starting"); Job job = Job.getInstance(getConf()); Path inputDir = new Path(args[0]); Path outputDir = new Path(args[1]); boolean useSimplePartitioner = getUseSimplePartitioner(job); TeraInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TeraInputFormat.class); job.setOutputFormatClass(TeraOutputFormat.class); if (useSimplePartitioner) { job.setPartitionerClass(SimplePartitioner.class); } else { long start = System.currentTimeMillis(); Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); try { TeraInputFormat.writePartitionFile(job, partitionFile); } catch (Throwable e) { LOG.error(e.getMessage()); return -1; } job.addCacheFile(partitionUri); long end = System.currentTimeMillis(); System.out.println("Spent " + (end - start) + "ms computing partitions."); job.setPartitionerClass(TotalOrderPartitioner.class); } job.getConfiguration().setInt("dfs.replication", getOutputReplication(job)); int ret = job.waitForCompletion(true) ? 0 : 1; LOG.info("done"); return ret; }
From source file:org.apache.ignite.internal.processors.hadoop.impl.HadoopTeraSortTest.java
License:Apache License
/** * Creates Job instance and sets up necessary properties for it. * @param conf The Job config./*www . ja v a 2 s.co m*/ * @return The job. * @throws Exception On error. */ private Job setupConfig(JobConf conf) throws Exception { Job job = Job.getInstance(conf); Path inputDir = new Path(generateOutDir); Path outputDir = new Path(sortOutDir); boolean useSimplePartitioner = TeraSort.getUseSimplePartitioner(job); TeraInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraSort"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TeraInputFormat.class); job.setOutputFormatClass(TeraOutputFormat.class); if (useSimplePartitioner) job.setPartitionerClass(TeraSort.SimplePartitioner.class); else { long start = System.currentTimeMillis(); Path partFile = new Path(outputDir, PARTITION_FILENAME); URI partUri = new URI(partFile.toString() + "#" + PARTITION_FILENAME); try { TeraInputFormat.writePartitionFile(job, partFile); } catch (Throwable e) { throw new RuntimeException(e); } job.addCacheFile(partUri); long end = System.currentTimeMillis(); System.out.println("Spent " + (end - start) + "ms computing partitions. " + "Partition file added to distributed cache: " + partUri); job.setPartitionerClass(getTeraSortTotalOrderPartitioner()/*TeraSort.TotalOrderPartitioner.class*/); } job.getConfiguration().setInt("dfs.replication", TeraSort.getOutputReplication(job)); /* TeraOutputFormat.setFinalSync(job, true); */ Method m = TeraOutputFormat.class.getDeclaredMethod("setFinalSync", JobContext.class, boolean.class); m.setAccessible(true); m.invoke(null, job, true); return job; }
From source file:org.bgi.flexlab.gaea.data.structure.header.MultipleVCFHeader.java
License:Open Source License
public boolean distributeCacheVcfHeader(String outputPath, Job job, Configuration conf) { writeHeaderToHDFS(outputPath, conf); try {//ww w . j a v a2 s .com job.addCacheFile(new URI(conf.get(GaeaVCFHeader.VCF_HEADER_PROPERTY) + "#VcfHeaderObj")); } catch (URISyntaxException e) { e.printStackTrace(); return false; } return true; }
From source file:org.bgi.flexlab.gaea.data.structure.memoryshare.WholeGenomeShare.java
License:Open Source License
public static boolean distributeCache(String chrList, Job job, String cacheName) throws IOException, URISyntaxException { job.addCacheFile(new URI(chrList + "#" + cacheName)); Configuration conf = job.getConfiguration(); Path refPath = new Path(chrList); FileSystem fs = refPath.getFileSystem(conf); FSDataInputStream refin = fs.open(refPath); LineReader in = new LineReader(refin); Text line = new Text(); String chrFile = ""; String[] chrs = new String[3]; while ((in.readLine(line)) != 0) { chrFile = line.toString();//from w ww . j a v a 2 s. co m chrs = chrFile.split("\t"); File fileTest = new File(chrs[1]); if (fileTest.isFile()) { chrs[1] = "file://" + chrs[1]; } job.addCacheFile(new URI(chrs[1] + "#" + chrs[0])); } in.close(); refin.close(); return true; }
From source file:Patterns.A5_MapSideJoinByDistributedCache.Distributed_InnerJoin_Driver.java
/** * @param args the command line arguments *//*ww w . j av a 2s . c o m*/ public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Inner Join User/Artist Data"); job.setJarByClass(Distributed_InnerJoin_Driver.class); job.setMapperClass(Distributed_InnerJoin_Mapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); try { job.addCacheFile(new URI( "/home/chintan/IdeaProjects/AdvancedDBMS/music-project/inputUserTaste/userid-profile.tsv#user")); } catch (URISyntaxException e) { e.printStackTrace(); } job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 2); }
From source file:terasort.TeraSort.java
License:Apache License
public int run(String[] args) throws Exception { LOG.info("starting"); Job job = Job.getInstance(getConf()); Path inputDir = new Path(args[0]); Path outputDir = new Path(args[1]); FileSystem fs = FileSystem.get(getConf()); if (fs.exists(outputDir)) { fs.delete(outputDir, true);/*from w w w. ja v a 2 s.c o m*/ } boolean useSimplePartitioner = getUseSimplePartitioner(job); TeraInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TeraInputFormat.class); job.setOutputFormatClass(TeraOutputFormat.class); if (useSimplePartitioner) { job.setPartitionerClass(SimplePartitioner.class); } else { long start = System.currentTimeMillis(); Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); try { TeraInputFormat.writePartitionFile(job, partitionFile); } catch (Throwable e) { LOG.error(e.getMessage()); return -1; } job.addCacheFile(partitionUri); long end = System.currentTimeMillis(); System.out.println("Spent " + (end - start) + "ms computing partitions."); job.setPartitionerClass(TotalOrderPartitioner.class); } job.getConfiguration().setInt("dfs.replication", getOutputReplication(job)); TeraOutputFormat.setFinalSync(job, true); int ret = job.waitForCompletion(true) ? 0 : 1; LOG.info("done"); return ret; }